From 0f7e69c43e9ef3bd1416bfcb6e8f7a9008f08dfd Mon Sep 17 00:00:00 2001
From: okendoken <philip@flatlogic.com>
Date: Wed, 11 Feb 2026 13:58:13 +0100
Subject: [PATCH] add readme

---
 LOCAL_SCREENSHOT_EXTENSION.md    |   5 +-
 README.md                        | 149 +++++++++++++++++++++++++++++++
 chrome_screenshot_ext/popup.html |  11 ++-
 chrome_screenshot_ext/popup.js   |  10 +++
 scripts/ai_prepare_responses.py  |   4 +-
 tools/local_screenshot_bridge.py |   9 +-
 6 files changed, 181 insertions(+), 7 deletions(-)
 create mode 100644 README.md
diff --git a/LOCAL_SCREENSHOT_EXTENSION.md b/LOCAL_SCREENSHOT_EXTENSION.md
index 78fd4ab..8dca23a 100644
--- a/LOCAL_SCREENSHOT_EXTENSION.md
+++ b/LOCAL_SCREENSHOT_EXTENSION.md
@@ -40,8 +40,9 @@ Notes:
 
 1. Click the extension icon.
 2. Confirm the endpoint is `http://127.0.0.1:8765/screenshot`.
-3. Click "Capture".
-4. Use the "History" dropdown + "Show" to view older AI suggestions.
+3. Optionally add "Extra instructions" (saved locally in the extension).
+4. Click "Capture".
+5. Use the "History" dropdown + "Show" to view older AI suggestions.
 
 Saved files land in `screenshots/`:
 
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9e2542c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,149 @@
+# AIEA Local Chrome Screenshot + OpenAI Assistant
+
+Local-only (unpacked) Chrome extension + Python bridge that:
+
+1. Captures the visible tab screenshot.
+2. Extracts visible page text hierarchy (reduced HTML tree).
+3. Saves files to `./screenshots`.
+4. Calls OpenAI with:
+   - screenshot image
+   - extracted page content
+   - system instructions from `AI_EA_INSTRUCTIONS.MD`
+   - optional extra instructions from the extension UI
+5. Returns short + medium response suggestions per visible post back to the extension popup.
+
+No Chrome Web Store publishing is required.
+
+## Project Structure
+
+- `chrome_screenshot_ext/` unpacked MV3 extension (popup UI + capture + extraction)
+- `tools/local_screenshot_bridge.py` local HTTP server (`127.0.0.1`) + OpenAI call
+- `AI_EA_INSTRUCTIONS.MD` system instructions fed to the model
+- `screenshots/` generated outputs (`.png`, `.json`, `.content.json`, `.ai.json`)
+
+## Prerequisites
+
+- macOS/Linux shell
+- Python 3.9+
+- Google Chrome
+- OpenAI API key
+
+## Setup (venv)
+
+From repo root:
+
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -U pip
+pip install -r requirements.txt
+```
+
+Set API key with either:
+
+1. Environment variable
+```bash
+export OPENAI_API_KEY="your_api_key_here"
+```
+
+2. `.env` file in project root
+```bash
+OPENAI_API_KEY=your_api_key_here
+```
+
+## Run the Local Server
+
+With OpenAI enabled:
+
+```bash
+python3 tools/local_screenshot_bridge.py --port 8765 --out-dir screenshots --ai
+```
+
+Without OpenAI (save files only):
+
+```bash
+python3 tools/local_screenshot_bridge.py --port 8765 --out-dir screenshots
+```
+
+Health check:
+
+```bash
+curl http://127.0.0.1:8765/health
+```
+
+## Load the Chrome Extension (Unpacked)
+
+1. Open `chrome://extensions`
+2. Enable Developer mode
+3. Click `Load unpacked`
+4. Select `chrome_screenshot_ext/`
+
+If you change extension code, click `Reload` on this extension page.
+
+## Use the Extension
+
+1. Open any normal webpage (not `chrome://` pages).
+2. Click extension icon.
+3. Keep endpoint: `http://127.0.0.1:8765/screenshot`
+4. Optional: add `Extra instructions` (2-line input in popup).
+5. Click `Capture`.
+
+Popup shows:
+
+- Saved file paths
+- AI suggestions per detected post:
+  - short response
+  - medium response
+- Any AI error details
+
+History in popup:
+
+- `History` dropdown stores recent generations locally (`chrome.storage.local`)
+- `Show` displays a previous result
+- `Clear` removes local popup history
+
+## Output Files
+
+Generated in `screenshots/`:
+
+- `YYYYMMDDTHHMMSSZ-<title-slug>.png` screenshot
+- `YYYYMMDDTHHMMSSZ-<title-slug>.json` metadata
+- `YYYYMMDDTHHMMSSZ-<title-slug>.content.json` extracted visible content tree
+- `YYYYMMDDTHHMMSSZ-<title-slug>.ai.json` structured AI suggestions
+
+## AI Behavior Notes
+
+- System instructions source: `AI_EA_INSTRUCTIONS.MD`
+- Extra per-capture instructions source: popup `Extra instructions`
+- AI output is sanitized post-generation:
+  - quote/dash/space cleanup
+  - invisible unicode cleanup
+  - collapsed extra spaces/newlines
+  - forced lowercase (no capital letters)
+
+## Useful CLI Options
+
+```bash
+python3 tools/local_screenshot_bridge.py --help
+```
+
+Common options:
+
+- `--ai-model` (default: `gpt-5.2`)
+- `--ai-max-posts` (default: `12`)
+- `--ai-content-max-chars` (default: `120000`)
+- `--ai-image-detail` (default: `auto`)
+- `--ai-max-output-tokens` (default: `1400`)
+- `--run ...` optional post-save hook command
+
+## Troubleshooting
+
+- `missing_openai_api_key`:
+  set `OPENAI_API_KEY` in shell or `.env`, then restart server.
+- `missing_openai_sdk`:
+  run `pip install -r requirements.txt` inside your venv.
+- Capture fails on Chrome internal pages:
+  `chrome://*`, Web Store, and some protected tabs cannot be scripted.
+- No AI results in popup:
+  verify server started with `--ai`.
+
diff --git a/chrome_screenshot_ext/popup.html b/chrome_screenshot_ext/popup.html
index aca38fe..cb4cb50 100644
--- a/chrome_screenshot_ext/popup.html
+++ b/chrome_screenshot_ext/popup.html
@@ -42,7 +42,8 @@
         margin: 10px 0 6px 0;
       }
       input,
-      select {
+      select,
+      textarea {
         width: 100%;
         box-sizing: border-box;
         padding: 10px 10px;
@@ -52,8 +53,12 @@
         color: var(--text);
         outline: none;
       }
+      textarea {
+        resize: vertical;
+      }
       input:focus,
-      select:focus {
+      select:focus,
+      textarea:focus {
         border-color: rgba(52, 211, 153, 0.6);
         box-shadow: 0 0 0 3px rgba(52, 211, 153, 0.15);
       }
@@ -117,6 +122,8 @@
       <div class="title">Local Screenshot Saver</div>
       <label for="endpoint">Endpoint</label>
       <input id="endpoint" type="text" spellcheck="false" />
+      <label for="extra_instructions">Extra instructions</label>
+      <textarea id="extra_instructions" rows="2" spellcheck="false" placeholder="optional: tone, goal, constraints..."></textarea>
       <div class="row">
         <button id="ping" type="button">Ping</button>
         <button id="capture" type="button">Capture</button>
diff --git a/chrome_screenshot_ext/popup.js b/chrome_screenshot_ext/popup.js
index 0ac4420..570e386 100644
--- a/chrome_screenshot_ext/popup.js
+++ b/chrome_screenshot_ext/popup.js
@@ -1,6 +1,7 @@
 const DEFAULT_ENDPOINT = "http://127.0.0.1:8765/screenshot";
 const HISTORY_KEY = "capture_history_v1";
 const HISTORY_LIMIT = 25;
+const EXTRA_INSTRUCTIONS_KEY = "extra_instructions_v1";
 
 function $(id) {
   return document.getElementById(id);
@@ -40,6 +41,7 @@ function renderResult(entryOrResp) {
   lines.push(`  META: ${resp.meta_path || "(unknown)"}`);
   if (resp.content_path) lines.push(`  CONTENT: ${resp.content_path}`);
   if (meta && meta.url) lines.push(`  URL:  ${meta.url}`);
+  if (meta && meta.extra_instructions) lines.push(`  EXTRA: ${clampString(meta.extra_instructions, 220)}`);
 
   if (resp.ai_result) {
     if (resp.ai_result.ok && resp.ai_result.ai && Array.isArray(resp.ai_result.ai.posts)) {
@@ -355,6 +357,7 @@ async function ping(endpoint) {
 
 async function main() {
   const endpointEl = $("endpoint");
+  const extraEl = $("extra_instructions");
   const captureBtn = $("capture");
   const pingBtn = $("ping");
   const historySel = $("history");
@@ -362,11 +365,15 @@ async function main() {
   const clearHistoryBtn = $("clear_history");
 
   endpointEl.value = (await storageGet("endpoint")) || DEFAULT_ENDPOINT;
+  extraEl.value = (await storageGet(EXTRA_INSTRUCTIONS_KEY)) || "";
   await refreshHistoryUI();
 
   endpointEl.addEventListener("change", async () => {
     await storageSet({ endpoint: endpointEl.value.trim() });
   });
+  extraEl.addEventListener("change", async () => {
+    await storageSet({ [EXTRA_INSTRUCTIONS_KEY]: extraEl.value });
+  });
 
   pingBtn.addEventListener("click", async () => {
     const endpoint = endpointEl.value.trim() || DEFAULT_ENDPOINT;
@@ -398,6 +405,7 @@ async function main() {
 
   captureBtn.addEventListener("click", async () => {
     const endpoint = endpointEl.value.trim() || DEFAULT_ENDPOINT;
+    const extraInstructions = (extraEl.value || "").trim();
     captureBtn.disabled = true;
     setStatus("Extracting page content...", "");
 
@@ -423,6 +431,7 @@ async function main() {
         url: tab.url || "",
         ts: new Date().toISOString(),
         content,
+        extra_instructions: extraInstructions,
       });
 
       // Persist for later viewing in the popup.
@@ -432,6 +441,7 @@ async function main() {
         saved_at: new Date().toISOString(),
         title: tab.title || "",
         url: tab.url || "",
+        extra_instructions: extraInstructions,
         resp: {
           png_path: resp.png_path || "",
           meta_path: resp.meta_path || "",
diff --git a/scripts/ai_prepare_responses.py b/scripts/ai_prepare_responses.py
index 55d7ba7..4d59647 100755
--- a/scripts/ai_prepare_responses.py
+++ b/scripts/ai_prepare_responses.py
@@ -193,6 +193,7 @@ def main(argv: list[str]) -> int:
 
     page_url = str(meta.get("url") or "")
     page_title = str(meta.get("title") or "")
+    extra_instructions = str(meta.get("extra_instructions") or "").strip()
 
     user_payload = {
         "page_url": page_url,
@@ -216,7 +217,8 @@ def main(argv: list[str]) -> int:
         "Do not invent facts not present in the screenshot/content.\n"
         "Return JSON matching the provided schema. Include all top-level keys: page_url, page_title, posts, notes.\n"
         "If a value is unknown, use an empty string.\n\n"
-        f"PAGE_DATA_JSON={_safe_json_dump(user_payload, args.content_max_chars)}"
+        + (f"EXTRA_INSTRUCTIONS={extra_instructions}\n\n" if extra_instructions else "")
+        + f"PAGE_DATA_JSON={_safe_json_dump(user_payload, args.content_max_chars)}"
     )
 
     # Screenshot is provided as a base64 data URL image input.
diff --git a/tools/local_screenshot_bridge.py b/tools/local_screenshot_bridge.py
index 6c95c00..737e260 100755
--- a/tools/local_screenshot_bridge.py
+++ b/tools/local_screenshot_bridge.py
@@ -208,6 +208,7 @@ def _maybe_generate_ai(server, png_path: Path, meta: dict, content: object) -> d
 
     page_url = str(meta.get("url") or "")
     page_title = str(meta.get("title") or "")
+    extra_instructions = str(meta.get("extra_instructions") or "").strip()
 
     user_payload = {
         "page_url": page_url,
@@ -231,7 +232,8 @@ def _maybe_generate_ai(server, png_path: Path, meta: dict, content: object) -> d
         "Do not invent facts not present in the screenshot/content.\n"
         "Return JSON matching the provided schema. Include all top-level keys: page_url, page_title, posts, notes.\n"
         "If a value is unknown, use an empty string.\n\n"
-        f"PAGE_DATA_JSON={_safe_json_dump(user_payload, content_max_chars)}"
+        + (f"EXTRA_INSTRUCTIONS={extra_instructions}\n\n" if extra_instructions else "")
+        + f"PAGE_DATA_JSON={_safe_json_dump(user_payload, content_max_chars)}"
     )
 
     b64 = base64.b64encode(png_path.read_bytes()).decode("ascii")
@@ -339,6 +341,7 @@ class Handler(BaseHTTPRequestHandler):
         page_url = req.get("url") or ""
         client_ts = req.get("ts") or ""
         content = req.get("content", None)
+        extra_instructions = req.get("extra_instructions") or ""
 
         m = re.match(r"^data:image/png;base64,(.*)$", data_url)
         if not m:
@@ -393,6 +396,7 @@ class Handler(BaseHTTPRequestHandler):
                         "saved_utc": now.isoformat(),
                         "png_path": str(png_path),
                         "content_path": final_content_path,
+                        "extra_instructions": extra_instructions,
                     },
                     indent=2,
                     ensure_ascii=True,
@@ -411,6 +415,7 @@ class Handler(BaseHTTPRequestHandler):
             "saved_utc": now.isoformat(),
             "png_path": str(png_path),
             "content_path": final_content_path,
+            "extra_instructions": extra_instructions,
         }
 
         run = getattr(self.server, "run_cmd", None)  # type: ignore[attr-defined]
@@ -472,7 +477,7 @@ def main(argv: list[str]) -> int:
         "--run",
         nargs="+",
         default=None,
-        help="Optional command to run after saving. Screenshot paths are appended as args: PNG then JSON.",
+        help="Optional command to run after saving. Args appended: <png_path> <meta_path> [content_path].",
     )
     args = p.parse_args(argv)