add readme

This commit is contained in:
okendoken 2026-02-11 13:58:13 +01:00
parent a5ab3640ec
commit 0f7e69c43e
6 changed files with 181 additions and 7 deletions

View File

@ -40,8 +40,9 @@ Notes:
1. Click the extension icon.
2. Confirm the endpoint is `http://127.0.0.1:8765/screenshot`.
3. Click "Capture".
4. Use the "History" dropdown + "Show" to view older AI suggestions.
3. Optionally add "Extra instructions" (saved locally in the extension).
4. Click "Capture".
5. Use the "History" dropdown + "Show" to view older AI suggestions.
Saved files land in `screenshots/`:

149
README.md Normal file
View File

@ -0,0 +1,149 @@
# AIEA Local Chrome Screenshot + OpenAI Assistant
Local-only (unpacked) Chrome extension + Python bridge that:
1. Captures the visible tab screenshot.
2. Extracts visible page text hierarchy (reduced HTML tree).
3. Saves files to `./screenshots`.
4. Calls OpenAI with:
- screenshot image
- extracted page content
- system instructions from `AI_EA_INSTRUCTIONS.MD`
- optional extra instructions from the extension UI
5. Returns short + medium response suggestions per visible post back to the extension popup.
No Chrome Web Store publishing is required.
## Project Structure
- `chrome_screenshot_ext/` unpacked MV3 extension (popup UI + capture + extraction)
- `tools/local_screenshot_bridge.py` local HTTP server (`127.0.0.1`) + OpenAI call
- `AI_EA_INSTRUCTIONS.MD` system instructions fed to the model
- `screenshots/` generated outputs (`.png`, `.json`, `.content.json`, `.ai.json`)
## Prerequisites
- macOS/Linux shell
- Python 3.9+
- Google Chrome
- OpenAI API key
## Setup (venv)
From repo root:
```bash
python3 -m venv .venv
source .venv/bin/activate
pip install -U pip
pip install -r requirements.txt
```
Set API key with either:
1. Environment variable
```bash
export OPENAI_API_KEY="your_api_key_here"
```
2. `.env` file in project root
```bash
OPENAI_API_KEY=your_api_key_here
```
## Run the Local Server
With OpenAI enabled:
```bash
python3 tools/local_screenshot_bridge.py --port 8765 --out-dir screenshots --ai
```
Without OpenAI (save files only):
```bash
python3 tools/local_screenshot_bridge.py --port 8765 --out-dir screenshots
```
Health check:
```bash
curl http://127.0.0.1:8765/health
```
## Load the Chrome Extension (Unpacked)
1. Open `chrome://extensions`
2. Enable Developer mode
3. Click `Load unpacked`
4. Select `chrome_screenshot_ext/`
If you change extension code, click `Reload` on this extension page.
## Use the Extension
1. Open any normal webpage (not `chrome://` pages).
2. Click extension icon.
3. Keep endpoint: `http://127.0.0.1:8765/screenshot`
4. Optional: add `Extra instructions` (2-line input in popup).
5. Click `Capture`.
Popup shows:
- Saved file paths
- AI suggestions per detected post:
- short response
- medium response
- Any AI error details
History in popup:
- `History` dropdown stores recent generations locally (`chrome.storage.local`)
- `Show` displays a previous result
- `Clear` removes local popup history
## Output Files
Generated in `screenshots/`:
- `YYYYMMDDTHHMMSSZ-<title-slug>.png` screenshot
- `YYYYMMDDTHHMMSSZ-<title-slug>.json` metadata
- `YYYYMMDDTHHMMSSZ-<title-slug>.content.json` extracted visible content tree
- `YYYYMMDDTHHMMSSZ-<title-slug>.ai.json` structured AI suggestions
## AI Behavior Notes
- System instructions source: `AI_EA_INSTRUCTIONS.MD`
- Extra per-capture instructions source: popup `Extra instructions`
- AI output is sanitized post-generation:
- quote/dash/space cleanup
- invisible unicode cleanup
- collapsed extra spaces/newlines
- forced lowercase (no capital letters)
## Useful CLI Options
```bash
python3 tools/local_screenshot_bridge.py --help
```
Common options:
- `--ai-model` (default: `gpt-5.2`)
- `--ai-max-posts` (default: `12`)
- `--ai-content-max-chars` (default: `120000`)
- `--ai-image-detail` (default: `auto`)
- `--ai-max-output-tokens` (default: `1400`)
- `--run ...` optional post-save hook command
## Troubleshooting
- `missing_openai_api_key`:
set `OPENAI_API_KEY` in shell or `.env`, then restart server.
- `missing_openai_sdk`:
run `pip install -r requirements.txt` inside your venv.
- Capture fails on Chrome internal pages:
`chrome://*`, Web Store, and some protected tabs cannot be scripted.
- No AI results in popup:
verify server started with `--ai`.

View File

@ -42,7 +42,8 @@
margin: 10px 0 6px 0;
}
input,
select {
select,
textarea {
width: 100%;
box-sizing: border-box;
padding: 10px 10px;
@ -52,8 +53,12 @@
color: var(--text);
outline: none;
}
textarea {
resize: vertical;
}
input:focus,
select:focus {
select:focus,
textarea:focus {
border-color: rgba(52, 211, 153, 0.6);
box-shadow: 0 0 0 3px rgba(52, 211, 153, 0.15);
}
@ -117,6 +122,8 @@
<div class="title">Local Screenshot Saver</div>
<label for="endpoint">Endpoint</label>
<input id="endpoint" type="text" spellcheck="false" />
<label for="extra_instructions">Extra instructions</label>
<textarea id="extra_instructions" rows="2" spellcheck="false" placeholder="optional: tone, goal, constraints..."></textarea>
<div class="row">
<button id="ping" type="button">Ping</button>
<button id="capture" type="button">Capture</button>

View File

@ -1,6 +1,7 @@
const DEFAULT_ENDPOINT = "http://127.0.0.1:8765/screenshot";
const HISTORY_KEY = "capture_history_v1";
const HISTORY_LIMIT = 25;
const EXTRA_INSTRUCTIONS_KEY = "extra_instructions_v1";
function $(id) {
return document.getElementById(id);
@ -40,6 +41,7 @@ function renderResult(entryOrResp) {
lines.push(` META: ${resp.meta_path || "(unknown)"}`);
if (resp.content_path) lines.push(` CONTENT: ${resp.content_path}`);
if (meta && meta.url) lines.push(` URL: ${meta.url}`);
if (meta && meta.extra_instructions) lines.push(` EXTRA: ${clampString(meta.extra_instructions, 220)}`);
if (resp.ai_result) {
if (resp.ai_result.ok && resp.ai_result.ai && Array.isArray(resp.ai_result.ai.posts)) {
@ -355,6 +357,7 @@ async function ping(endpoint) {
async function main() {
const endpointEl = $("endpoint");
const extraEl = $("extra_instructions");
const captureBtn = $("capture");
const pingBtn = $("ping");
const historySel = $("history");
@ -362,11 +365,15 @@ async function main() {
const clearHistoryBtn = $("clear_history");
endpointEl.value = (await storageGet("endpoint")) || DEFAULT_ENDPOINT;
extraEl.value = (await storageGet(EXTRA_INSTRUCTIONS_KEY)) || "";
await refreshHistoryUI();
endpointEl.addEventListener("change", async () => {
await storageSet({ endpoint: endpointEl.value.trim() });
});
extraEl.addEventListener("change", async () => {
await storageSet({ [EXTRA_INSTRUCTIONS_KEY]: extraEl.value });
});
pingBtn.addEventListener("click", async () => {
const endpoint = endpointEl.value.trim() || DEFAULT_ENDPOINT;
@ -398,6 +405,7 @@ async function main() {
captureBtn.addEventListener("click", async () => {
const endpoint = endpointEl.value.trim() || DEFAULT_ENDPOINT;
const extraInstructions = (extraEl.value || "").trim();
captureBtn.disabled = true;
setStatus("Extracting page content...", "");
@ -423,6 +431,7 @@ async function main() {
url: tab.url || "",
ts: new Date().toISOString(),
content,
extra_instructions: extraInstructions,
});
// Persist for later viewing in the popup.
@ -432,6 +441,7 @@ async function main() {
saved_at: new Date().toISOString(),
title: tab.title || "",
url: tab.url || "",
extra_instructions: extraInstructions,
resp: {
png_path: resp.png_path || "",
meta_path: resp.meta_path || "",

View File

@ -193,6 +193,7 @@ def main(argv: list[str]) -> int:
page_url = str(meta.get("url") or "")
page_title = str(meta.get("title") or "")
extra_instructions = str(meta.get("extra_instructions") or "").strip()
user_payload = {
"page_url": page_url,
@ -216,7 +217,8 @@ def main(argv: list[str]) -> int:
"Do not invent facts not present in the screenshot/content.\n"
"Return JSON matching the provided schema. Include all top-level keys: page_url, page_title, posts, notes.\n"
"If a value is unknown, use an empty string.\n\n"
f"PAGE_DATA_JSON={_safe_json_dump(user_payload, args.content_max_chars)}"
+ (f"EXTRA_INSTRUCTIONS={extra_instructions}\n\n" if extra_instructions else "")
+ f"PAGE_DATA_JSON={_safe_json_dump(user_payload, args.content_max_chars)}"
)
# Screenshot is provided as a base64 data URL image input.

View File

@ -208,6 +208,7 @@ def _maybe_generate_ai(server, png_path: Path, meta: dict, content: object) -> d
page_url = str(meta.get("url") or "")
page_title = str(meta.get("title") or "")
extra_instructions = str(meta.get("extra_instructions") or "").strip()
user_payload = {
"page_url": page_url,
@ -231,7 +232,8 @@ def _maybe_generate_ai(server, png_path: Path, meta: dict, content: object) -> d
"Do not invent facts not present in the screenshot/content.\n"
"Return JSON matching the provided schema. Include all top-level keys: page_url, page_title, posts, notes.\n"
"If a value is unknown, use an empty string.\n\n"
f"PAGE_DATA_JSON={_safe_json_dump(user_payload, content_max_chars)}"
+ (f"EXTRA_INSTRUCTIONS={extra_instructions}\n\n" if extra_instructions else "")
+ f"PAGE_DATA_JSON={_safe_json_dump(user_payload, content_max_chars)}"
)
b64 = base64.b64encode(png_path.read_bytes()).decode("ascii")
@ -339,6 +341,7 @@ class Handler(BaseHTTPRequestHandler):
page_url = req.get("url") or ""
client_ts = req.get("ts") or ""
content = req.get("content", None)
extra_instructions = req.get("extra_instructions") or ""
m = re.match(r"^data:image/png;base64,(.*)$", data_url)
if not m:
@ -393,6 +396,7 @@ class Handler(BaseHTTPRequestHandler):
"saved_utc": now.isoformat(),
"png_path": str(png_path),
"content_path": final_content_path,
"extra_instructions": extra_instructions,
},
indent=2,
ensure_ascii=True,
@ -411,6 +415,7 @@ class Handler(BaseHTTPRequestHandler):
"saved_utc": now.isoformat(),
"png_path": str(png_path),
"content_path": final_content_path,
"extra_instructions": extra_instructions,
}
run = getattr(self.server, "run_cmd", None) # type: ignore[attr-defined]
@ -472,7 +477,7 @@ def main(argv: list[str]) -> int:
"--run",
nargs="+",
default=None,
help="Optional command to run after saving. Screenshot paths are appended as args: PNG then JSON.",
help="Optional command to run after saving. Args appended: <png_path> <meta_path> [content_path].",
)
args = p.parse_args(argv)