diff --git a/.gitignore b/.gitignore index f6028fa..0beefad 100644 --- a/.gitignore +++ b/.gitignore @@ -9,8 +9,11 @@ /coverage # next.js -.venv /.next/ + +# Python virtual environment +.venv +venv/ /out/ diff --git a/README.md b/README.md index d229ba7..2388c47 100644 --- a/README.md +++ b/README.md @@ -11,14 +11,12 @@ # ReMarkable Web - [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/Naereen/StrapDown.js/graphs/commit-activity) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](http://makeapullrequest.com) [![TypeScript](https://badgen.net/badge/icon/typescript?icon=typescript&label)](https://typescriptlang.org) [![Buymeacoffee](https://badgen.net/badge/icon/buymeacoffee?icon=buymeacoffee&label)](https://www.buymeacoffee.com/andrewbest) - **ReMarkable Web** is a web application deployed [here](https://remarkable-web.vercel.app/) that imports highlights from [ReMarkable](https://remarkable.com/) devices and exports them to providers such as [Readwise](https://readwise.io/). [Features](#features) • @@ -28,7 +26,6 @@ - Screenshot ## Features @@ -38,25 +35,51 @@ ## Getting Started -First install the npm dependencies: +### 1. Install npm dependencies: -``` +```bash npm install ``` -Next run the Next application: +### 2. Set up Python for highlight extraction (reMarkable firmware 3.0+) + +reMarkable firmware 3.0+ stores highlights in a new binary format. To extract them, you need to run the Python highlight parser: +```bash +# Create a virtual environment +python3 -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate + +# Install Python dependencies +pip install rmscene flask ``` -npm run dev + +### 3. Run both servers for local development: + +**Terminal 1 - Python highlight parser:** + +```bash +source venv/bin/activate +python python-server.py ``` -If you wish to run it with vercel so that the manual python function will work: +**Terminal 2 - Next.js app:** +```bash +npm run dev ``` + +The application should then be running at http://localhost:3000/ + +### Alternative: Using Vercel CLI + +If you prefer to use Vercel's serverless functions (includes the Python API): + +```bash vercel dev ``` -The application should then be running at http://localhost:3000/ +> **Note:** For highlight extraction to work with reMarkable firmware 3.0+, you must run the Python server alongside the Next.js app during local development. ## Contributing @@ -64,7 +87,7 @@ Contributions are very welcome! Please fork and PRs to build this into a fantast - Move, Rename and Delete files - Neaten up code, remove 'any' uses -- Sort by values other than alphabetical +- Sort by values other than alphabetical - Filter by favourites, ebooks, pdfs - Export to Zotero, Obsidian @@ -73,4 +96,3 @@ Contributions are very welcome! Please fork and PRs to build this into a fantast - [Next](https://nextjs.org/) - [Tailwind](https://tailwindcss.com/) - [Ant design](https://ant.design/) - diff --git a/api/parse-highlights.py b/api/parse-highlights.py new file mode 100644 index 0000000..9781a4c --- /dev/null +++ b/api/parse-highlights.py @@ -0,0 +1,119 @@ +""" +Vercel serverless function for parsing reMarkable .rm files. +Extracts text highlights (GlyphRange) from reMarkable firmware 3.0+ documents. +""" + +from http.server import BaseHTTPRequestHandler +import json +import base64 +import io + +from rmscene import read_tree +from rmscene.scene_items import GlyphRange + + +class handler(BaseHTTPRequestHandler): + + def do_OPTIONS(self): + """Handle CORS preflight requests.""" + self.send_response(200) + self.send_header('Access-Control-Allow-Credentials', 'true') + self.send_header('Access-Control-Allow-Origin', '*') + self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') + self.send_header('Access-Control-Allow-Headers', 'X-Requested-With, Content-type') + self.end_headers() + + def do_POST(self): + """ + Parse .rm files and extract highlights. + + Expected request body: + { + "rmFiles": [ + {"pageId": "uuid", "rmData": "base64-encoded .rm file"} + ] + } + + Response: + { + "success": true, + "highlights": [ + {"pageId": "uuid", "highlights": [{"text": "...", "color": 0}]} + ] + } + """ + self.send_response(200) + self.send_header('Access-Control-Allow-Credentials', 'true') + self.send_header('Access-Control-Allow-Origin', '*') + self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') + self.send_header('Access-Control-Allow-Headers', 'X-Requested-With, Content-type') + self.send_header('Content-type', 'application/json') + self.end_headers() + + try: + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + request_data = json.loads(post_data.decode('utf-8')) + + rm_files = request_data.get('rmFiles', []) + all_highlights = [] + + for rm_file in rm_files: + page_id = rm_file.get('pageId', '') + rm_data_b64 = rm_file.get('rmData', '') + + if not rm_data_b64: + continue + + try: + rm_bytes = base64.b64decode(rm_data_b64) + highlights = self._extract_highlights_from_rm(rm_bytes) + + if highlights: + all_highlights.append({ + 'pageId': page_id, + 'highlights': highlights + }) + except Exception as e: + # Continue processing other files on error + continue + + response = json.dumps({'success': True, 'highlights': all_highlights}) + self.wfile.write(response.encode('utf-8')) + + except Exception as e: + error_response = json.dumps({'success': False, 'error': str(e)}) + self.wfile.write(error_response.encode('utf-8')) + + def _extract_highlights_from_rm(self, rm_bytes: bytes) -> list: + """ + Parse a .rm file and extract GlyphRange (highlighted text) items. + + Uses rmscene to parse the reMarkable v6 binary format and walks + the scene tree to find all GlyphRange items (text highlights). + + Returns a list of highlight objects with text, color, start, and length. + """ + highlights = [] + + try: + rm_stream = io.BytesIO(rm_bytes) + tree = read_tree(rm_stream) + + for item in tree.walk(): + if isinstance(item, GlyphRange): + highlight_text = getattr(item, 'text', '') + highlight_color = getattr(item, 'color', 0) + + if highlight_text: + highlights.append({ + 'text': highlight_text, + 'color': int(highlight_color) if hasattr(highlight_color, '__int__') else highlight_color, + 'start': getattr(item, 'start', None), + 'length': getattr(item, 'length', None) + }) + except Exception: + # Return empty list on parse errors + pass + + return highlights diff --git a/components/header.tsx b/components/header.tsx index e3d37bc..1ed9bdc 100644 --- a/components/header.tsx +++ b/components/header.tsx @@ -11,8 +11,8 @@ export const Header = () => { return (
- - Manual upload + + Manual upload
diff --git a/components/highlights.tsx b/components/highlights.tsx index c96144f..f877037 100644 --- a/components/highlights.tsx +++ b/components/highlights.tsx @@ -74,9 +74,9 @@ export const Highlights = ({ const onCheck = (checkedKeysValue: string[], e: any) => { const checkedData = e.checkedNodes // @ts-ignore - .filter((node) => !node?.children?.length) + .filter((node: any) => !node?.children?.length && node?.data?.location !== undefined) // @ts-ignore - .map((node) => ({ text: node.key, location: node.data.location })); + .map((node: any) => ({ text: node.key, location: node.data.location })); setCheckedHighlights(checkedData); // @ts-ignore setCheckedKeys(checkedKeysValue); diff --git a/components/register-modal.tsx b/components/register-modal.tsx index 336b7c3..b1379cd 100644 --- a/components/register-modal.tsx +++ b/components/register-modal.tsx @@ -121,10 +121,8 @@ export const RegisterDeviceModal = ({

- - - If you wish to upload a PDF manually, click here - + + If you wish to upload a PDF manually, click here