From 0f7e69c43e9ef3bd1416bfcb6e8f7a9008f08dfd Mon Sep 17 00:00:00 2001 From: okendoken Date: Wed, 11 Feb 2026 13:58:13 +0100 Subject: [PATCH] add readme --- LOCAL_SCREENSHOT_EXTENSION.md | 5 +- README.md | 149 +++++++++++++++++++++++++++++++ chrome_screenshot_ext/popup.html | 11 ++- chrome_screenshot_ext/popup.js | 10 +++ scripts/ai_prepare_responses.py | 4 +- tools/local_screenshot_bridge.py | 9 +- 6 files changed, 181 insertions(+), 7 deletions(-) create mode 100644 README.md diff --git a/LOCAL_SCREENSHOT_EXTENSION.md b/LOCAL_SCREENSHOT_EXTENSION.md index 78fd4ab..8dca23a 100644 --- a/LOCAL_SCREENSHOT_EXTENSION.md +++ b/LOCAL_SCREENSHOT_EXTENSION.md @@ -40,8 +40,9 @@ Notes: 1. Click the extension icon. 2. Confirm the endpoint is `http://127.0.0.1:8765/screenshot`. -3. Click "Capture". -4. Use the "History" dropdown + "Show" to view older AI suggestions. +3. Optionally add "Extra instructions" (saved locally in the extension). +4. Click "Capture". +5. Use the "History" dropdown + "Show" to view older AI suggestions. Saved files land in `screenshots/`: diff --git a/README.md b/README.md new file mode 100644 index 0000000..9e2542c --- /dev/null +++ b/README.md @@ -0,0 +1,149 @@ +# AIEA Local Chrome Screenshot + OpenAI Assistant + +Local-only (unpacked) Chrome extension + Python bridge that: + +1. Captures the visible tab screenshot. +2. Extracts visible page text hierarchy (reduced HTML tree). +3. Saves files to `./screenshots`. +4. Calls OpenAI with: + - screenshot image + - extracted page content + - system instructions from `AI_EA_INSTRUCTIONS.MD` + - optional extra instructions from the extension UI +5. Returns short + medium response suggestions per visible post back to the extension popup. + +No Chrome Web Store publishing is required. + +## Project Structure + +- `chrome_screenshot_ext/` unpacked MV3 extension (popup UI + capture + extraction) +- `tools/local_screenshot_bridge.py` local HTTP server (`127.0.0.1`) + OpenAI call +- `AI_EA_INSTRUCTIONS.MD` system instructions fed to the model +- `screenshots/` generated outputs (`.png`, `.json`, `.content.json`, `.ai.json`) + +## Prerequisites + +- macOS/Linux shell +- Python 3.9+ +- Google Chrome +- OpenAI API key + +## Setup (venv) + +From repo root: + +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install -U pip +pip install -r requirements.txt +``` + +Set API key with either: + +1. Environment variable +```bash +export OPENAI_API_KEY="your_api_key_here" +``` + +2. `.env` file in project root +```bash +OPENAI_API_KEY=your_api_key_here +``` + +## Run the Local Server + +With OpenAI enabled: + +```bash +python3 tools/local_screenshot_bridge.py --port 8765 --out-dir screenshots --ai +``` + +Without OpenAI (save files only): + +```bash +python3 tools/local_screenshot_bridge.py --port 8765 --out-dir screenshots +``` + +Health check: + +```bash +curl http://127.0.0.1:8765/health +``` + +## Load the Chrome Extension (Unpacked) + +1. Open `chrome://extensions` +2. Enable Developer mode +3. Click `Load unpacked` +4. Select `chrome_screenshot_ext/` + +If you change extension code, click `Reload` on this extension page. + +## Use the Extension + +1. Open any normal webpage (not `chrome://` pages). +2. Click extension icon. +3. Keep endpoint: `http://127.0.0.1:8765/screenshot` +4. Optional: add `Extra instructions` (2-line input in popup). +5. Click `Capture`. + +Popup shows: + +- Saved file paths +- AI suggestions per detected post: + - short response + - medium response +- Any AI error details + +History in popup: + +- `History` dropdown stores recent generations locally (`chrome.storage.local`) +- `Show` displays a previous result +- `Clear` removes local popup history + +## Output Files + +Generated in `screenshots/`: + +- `YYYYMMDDTHHMMSSZ-.png` screenshot +- `YYYYMMDDTHHMMSSZ-.json` metadata +- `YYYYMMDDTHHMMSSZ-.content.json` extracted visible content tree +- `YYYYMMDDTHHMMSSZ-.ai.json` structured AI suggestions + +## AI Behavior Notes + +- System instructions source: `AI_EA_INSTRUCTIONS.MD` +- Extra per-capture instructions source: popup `Extra instructions` +- AI output is sanitized post-generation: + - quote/dash/space cleanup + - invisible unicode cleanup + - collapsed extra spaces/newlines + - forced lowercase (no capital letters) + +## Useful CLI Options + +```bash +python3 tools/local_screenshot_bridge.py --help +``` + +Common options: + +- `--ai-model` (default: `gpt-5.2`) +- `--ai-max-posts` (default: `12`) +- `--ai-content-max-chars` (default: `120000`) +- `--ai-image-detail` (default: `auto`) +- `--ai-max-output-tokens` (default: `1400`) +- `--run ...` optional post-save hook command + +## Troubleshooting + +- `missing_openai_api_key`: + set `OPENAI_API_KEY` in shell or `.env`, then restart server. +- `missing_openai_sdk`: + run `pip install -r requirements.txt` inside your venv. +- Capture fails on Chrome internal pages: + `chrome://*`, Web Store, and some protected tabs cannot be scripted. +- No AI results in popup: + verify server started with `--ai`. + diff --git a/chrome_screenshot_ext/popup.html b/chrome_screenshot_ext/popup.html index aca38fe..cb4cb50 100644 --- a/chrome_screenshot_ext/popup.html +++ b/chrome_screenshot_ext/popup.html @@ -42,7 +42,8 @@ margin: 10px 0 6px 0; } input, - select { + select, + textarea { width: 100%; box-sizing: border-box; padding: 10px 10px; @@ -52,8 +53,12 @@ color: var(--text); outline: none; } + textarea { + resize: vertical; + } input:focus, - select:focus { + select:focus, + textarea:focus { border-color: rgba(52, 211, 153, 0.6); box-shadow: 0 0 0 3px rgba(52, 211, 153, 0.15); } @@ -117,6 +122,8 @@
Local Screenshot Saver
+ +
diff --git a/chrome_screenshot_ext/popup.js b/chrome_screenshot_ext/popup.js index 0ac4420..570e386 100644 --- a/chrome_screenshot_ext/popup.js +++ b/chrome_screenshot_ext/popup.js @@ -1,6 +1,7 @@ const DEFAULT_ENDPOINT = "http://127.0.0.1:8765/screenshot"; const HISTORY_KEY = "capture_history_v1"; const HISTORY_LIMIT = 25; +const EXTRA_INSTRUCTIONS_KEY = "extra_instructions_v1"; function $(id) { return document.getElementById(id); @@ -40,6 +41,7 @@ function renderResult(entryOrResp) { lines.push(` META: ${resp.meta_path || "(unknown)"}`); if (resp.content_path) lines.push(` CONTENT: ${resp.content_path}`); if (meta && meta.url) lines.push(` URL: ${meta.url}`); + if (meta && meta.extra_instructions) lines.push(` EXTRA: ${clampString(meta.extra_instructions, 220)}`); if (resp.ai_result) { if (resp.ai_result.ok && resp.ai_result.ai && Array.isArray(resp.ai_result.ai.posts)) { @@ -355,6 +357,7 @@ async function ping(endpoint) { async function main() { const endpointEl = $("endpoint"); + const extraEl = $("extra_instructions"); const captureBtn = $("capture"); const pingBtn = $("ping"); const historySel = $("history"); @@ -362,11 +365,15 @@ async function main() { const clearHistoryBtn = $("clear_history"); endpointEl.value = (await storageGet("endpoint")) || DEFAULT_ENDPOINT; + extraEl.value = (await storageGet(EXTRA_INSTRUCTIONS_KEY)) || ""; await refreshHistoryUI(); endpointEl.addEventListener("change", async () => { await storageSet({ endpoint: endpointEl.value.trim() }); }); + extraEl.addEventListener("change", async () => { + await storageSet({ [EXTRA_INSTRUCTIONS_KEY]: extraEl.value }); + }); pingBtn.addEventListener("click", async () => { const endpoint = endpointEl.value.trim() || DEFAULT_ENDPOINT; @@ -398,6 +405,7 @@ async function main() { captureBtn.addEventListener("click", async () => { const endpoint = endpointEl.value.trim() || DEFAULT_ENDPOINT; + const extraInstructions = (extraEl.value || "").trim(); captureBtn.disabled = true; setStatus("Extracting page content...", ""); @@ -423,6 +431,7 @@ async function main() { url: tab.url || "", ts: new Date().toISOString(), content, + extra_instructions: extraInstructions, }); // Persist for later viewing in the popup. @@ -432,6 +441,7 @@ async function main() { saved_at: new Date().toISOString(), title: tab.title || "", url: tab.url || "", + extra_instructions: extraInstructions, resp: { png_path: resp.png_path || "", meta_path: resp.meta_path || "", diff --git a/scripts/ai_prepare_responses.py b/scripts/ai_prepare_responses.py index 55d7ba7..4d59647 100755 --- a/scripts/ai_prepare_responses.py +++ b/scripts/ai_prepare_responses.py @@ -193,6 +193,7 @@ def main(argv: list[str]) -> int: page_url = str(meta.get("url") or "") page_title = str(meta.get("title") or "") + extra_instructions = str(meta.get("extra_instructions") or "").strip() user_payload = { "page_url": page_url, @@ -216,7 +217,8 @@ def main(argv: list[str]) -> int: "Do not invent facts not present in the screenshot/content.\n" "Return JSON matching the provided schema. Include all top-level keys: page_url, page_title, posts, notes.\n" "If a value is unknown, use an empty string.\n\n" - f"PAGE_DATA_JSON={_safe_json_dump(user_payload, args.content_max_chars)}" + + (f"EXTRA_INSTRUCTIONS={extra_instructions}\n\n" if extra_instructions else "") + + f"PAGE_DATA_JSON={_safe_json_dump(user_payload, args.content_max_chars)}" ) # Screenshot is provided as a base64 data URL image input. diff --git a/tools/local_screenshot_bridge.py b/tools/local_screenshot_bridge.py index 6c95c00..737e260 100755 --- a/tools/local_screenshot_bridge.py +++ b/tools/local_screenshot_bridge.py @@ -208,6 +208,7 @@ def _maybe_generate_ai(server, png_path: Path, meta: dict, content: object) -> d page_url = str(meta.get("url") or "") page_title = str(meta.get("title") or "") + extra_instructions = str(meta.get("extra_instructions") or "").strip() user_payload = { "page_url": page_url, @@ -231,7 +232,8 @@ def _maybe_generate_ai(server, png_path: Path, meta: dict, content: object) -> d "Do not invent facts not present in the screenshot/content.\n" "Return JSON matching the provided schema. Include all top-level keys: page_url, page_title, posts, notes.\n" "If a value is unknown, use an empty string.\n\n" - f"PAGE_DATA_JSON={_safe_json_dump(user_payload, content_max_chars)}" + + (f"EXTRA_INSTRUCTIONS={extra_instructions}\n\n" if extra_instructions else "") + + f"PAGE_DATA_JSON={_safe_json_dump(user_payload, content_max_chars)}" ) b64 = base64.b64encode(png_path.read_bytes()).decode("ascii") @@ -339,6 +341,7 @@ class Handler(BaseHTTPRequestHandler): page_url = req.get("url") or "" client_ts = req.get("ts") or "" content = req.get("content", None) + extra_instructions = req.get("extra_instructions") or "" m = re.match(r"^data:image/png;base64,(.*)$", data_url) if not m: @@ -393,6 +396,7 @@ class Handler(BaseHTTPRequestHandler): "saved_utc": now.isoformat(), "png_path": str(png_path), "content_path": final_content_path, + "extra_instructions": extra_instructions, }, indent=2, ensure_ascii=True, @@ -411,6 +415,7 @@ class Handler(BaseHTTPRequestHandler): "saved_utc": now.isoformat(), "png_path": str(png_path), "content_path": final_content_path, + "extra_instructions": extra_instructions, } run = getattr(self.server, "run_cmd", None) # type: ignore[attr-defined] @@ -472,7 +477,7 @@ def main(argv: list[str]) -> int: "--run", nargs="+", default=None, - help="Optional command to run after saving. Screenshot paths are appended as args: PNG then JSON.", + help="Optional command to run after saving. Args appended: [content_path].", ) args = p.parse_args(argv)