ai generation

This commit is contained in:
okendoken 2026-02-10 19:49:29 +01:00
parent cfe5d786e5
commit a5ab3640ec
9 changed files with 764 additions and 30 deletions

1
.env Normal file
View File

@ -0,0 +1 @@
OPENAI_API_KEY=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

4
.gitignore vendored
View File

@ -1,2 +1,6 @@
.idea
screenshots/
.env
.venv/
__pycache__/
*.pyc

View File

@ -9,18 +9,25 @@ This is a local-only setup (no publishing) that:
## 1) Start the local server
From the project root:
From the project root (recommended: use a venv):
```bash
python3 tools/local_screenshot_bridge.py --port 8765 --out-dir screenshots --run bash scripts/on_screenshot.sh
python3 -m venv .venv
source .venv/bin/activate
pip install -U pip
pip install -r requirements.txt
export OPENAI_API_KEY="..."
python3 tools/local_screenshot_bridge.py --port 8765 --out-dir screenshots --ai
```
Notes:
- The server listens on `http://127.0.0.1:8765/screenshot`.
- If you omit `--run ...`, it will only save files.
- If `--run ...` is set, it appends two args to the command:
- `<png_path>` then `<meta_path>`
- If you omit `--ai`, it will only save files (no OpenAI call).
- If you set `--ai`, it will generate reply suggestions and return them back to the extension (and also save `*.ai.json`).
- Optional: you can still use `--run ...` as a post-save hook.
## 2) Load the extension (unpacked)
@ -34,9 +41,17 @@ Notes:
1. Click the extension icon.
2. Confirm the endpoint is `http://127.0.0.1:8765/screenshot`.
3. Click "Capture".
4. Use the "History" dropdown + "Show" to view older AI suggestions.
Saved files land in `screenshots/`:
- `YYYYMMDDTHHMMSSZ-<title-slug>.png`
- `YYYYMMDDTHHMMSSZ-<title-slug>.json`
- `YYYYMMDDTHHMMSSZ-<title-slug>.content.json`
- `YYYYMMDDTHHMMSSZ-<title-slug>.ai.json` (when `OPENAI_API_KEY` is set)
Alternatively create `.env` in the project root:
```bash
OPENAI_API_KEY=...
```

View File

@ -41,7 +41,8 @@
color: var(--muted);
margin: 10px 0 6px 0;
}
input {
input,
select {
width: 100%;
box-sizing: border-box;
padding: 10px 10px;
@ -51,7 +52,8 @@
color: var(--text);
outline: none;
}
input:focus {
input:focus,
select:focus {
border-color: rgba(52, 211, 153, 0.6);
box-shadow: 0 0 0 3px rgba(52, 211, 153, 0.15);
}
@ -81,6 +83,16 @@
color: var(--text);
border: 1px solid rgba(255, 255, 255, 0.18);
}
#show_history {
background: rgba(255, 255, 255, 0.1);
color: var(--text);
border: 1px solid rgba(255, 255, 255, 0.18);
}
#clear_history {
background: rgba(251, 113, 133, 0.12);
color: var(--text);
border: 1px solid rgba(251, 113, 133, 0.25);
}
pre {
margin: 10px 0 0 0;
padding: 10px;
@ -109,9 +121,14 @@
<button id="ping" type="button">Ping</button>
<button id="capture" type="button">Capture</button>
</div>
<label for="history">History</label>
<select id="history"></select>
<div class="row">
<button id="show_history" type="button">Show</button>
<button id="clear_history" type="button">Clear</button>
</div>
<pre id="status"></pre>
</div>
<script src="popup.js"></script>
</body>
</html>

View File

@ -1,4 +1,6 @@
const DEFAULT_ENDPOINT = "http://127.0.0.1:8765/screenshot";
const HISTORY_KEY = "capture_history_v1";
const HISTORY_LIMIT = 25;
function $(id) {
return document.getElementById(id);
@ -22,6 +24,99 @@ async function storageSet(obj) {
});
}
function clampString(s, maxLen) {
const t = String(s || "");
if (t.length <= maxLen) return t;
return t.slice(0, Math.max(0, maxLen - 1)) + "…";
}
function renderResult(entryOrResp) {
const resp = entryOrResp && entryOrResp.png_path ? entryOrResp : entryOrResp && entryOrResp.resp ? entryOrResp.resp : entryOrResp;
const meta = entryOrResp && entryOrResp.title ? entryOrResp : null;
const lines = [];
lines.push("Saved:");
lines.push(` PNG: ${resp.png_path || "(unknown)"}`);
lines.push(` META: ${resp.meta_path || "(unknown)"}`);
if (resp.content_path) lines.push(` CONTENT: ${resp.content_path}`);
if (meta && meta.url) lines.push(` URL: ${meta.url}`);
if (resp.ai_result) {
if (resp.ai_result.ok && resp.ai_result.ai && Array.isArray(resp.ai_result.ai.posts)) {
lines.push("");
lines.push(`AI (${resp.ai_result.took_ms || "?"}ms):`);
for (const p of resp.ai_result.ai.posts) {
const idx = typeof p.index === "number" ? p.index : "?";
const postText = (p.post_text || "").replace(/\s+/g, " ").trim();
const short = (p.short_response || "").replace(/\s+/g, " ").trim();
const medium = (p.medium_response || "").replace(/\s+/g, " ").trim();
lines.push("");
lines.push(`#${idx} Post: ${clampString(postText, 180)}`);
lines.push(` Short: ${short}`);
lines.push(` Medium: ${medium}`);
}
if (resp.ai_result.ai_path) lines.push(`\nAI file: ${resp.ai_result.ai_path}`);
} else {
lines.push("");
lines.push(`AI error: ${resp.ai_result.error || "unknown"}`);
if (resp.ai_result.detail) lines.push(`Detail: ${resp.ai_result.detail}`);
}
}
if (resp.ran) {
lines.push("Ran:");
if (resp.ran.error) {
lines.push(` error: ${resp.ran.error}`);
} else {
lines.push(` exit: ${resp.ran.exit_code}`);
if (resp.ran.stdout) lines.push(` stdout: ${resp.ran.stdout.trim()}`);
if (resp.ran.stderr) lines.push(` stderr: ${resp.ran.stderr.trim()}`);
}
}
return lines.join("\n");
}
async function loadHistory() {
const h = (await storageGet(HISTORY_KEY)) || [];
return Array.isArray(h) ? h : [];
}
async function saveHistory(history) {
await storageSet({ [HISTORY_KEY]: history });
}
function historyLabel(item) {
const when = item.saved_at ? item.saved_at.replace("T", " ").replace("Z", "Z") : "";
const t = item.title ? clampString(item.title, 36) : "";
const u = item.url ? clampString(item.url, 46) : "";
const base = t || u || item.id || "";
return when ? `${when} | ${base}` : base;
}
async function refreshHistoryUI() {
const sel = $("history");
const history = await loadHistory();
sel.textContent = "";
if (!history.length) {
const opt = document.createElement("option");
opt.value = "";
opt.textContent = "(empty)";
sel.appendChild(opt);
sel.disabled = true;
return;
}
sel.disabled = false;
for (const item of history) {
const opt = document.createElement("option");
opt.value = item.id || item.png_path || item.meta_path || "";
opt.textContent = historyLabel(item);
sel.appendChild(opt);
}
}
async function getActiveTab() {
const tabs = await chrome.tabs.query({ active: true, currentWindow: true });
return tabs[0] || null;
@ -262,8 +357,12 @@ async function main() {
const endpointEl = $("endpoint");
const captureBtn = $("capture");
const pingBtn = $("ping");
const historySel = $("history");
const showHistoryBtn = $("show_history");
const clearHistoryBtn = $("clear_history");
endpointEl.value = (await storageGet("endpoint")) || DEFAULT_ENDPOINT;
await refreshHistoryUI();
endpointEl.addEventListener("change", async () => {
await storageSet({ endpoint: endpointEl.value.trim() });
@ -276,6 +375,27 @@ async function main() {
setStatus(`Ping result: ${msg}`, msg === "ok" ? "ok" : "err");
});
showHistoryBtn.addEventListener("click", async () => {
const history = await loadHistory();
if (!history.length) {
setStatus("History is empty.", "err");
return;
}
const id = historySel.value;
const item = history.find((x) => (x.id || x.png_path || x.meta_path) === id) || history[0];
if (!item) {
setStatus("No history item selected.", "err");
return;
}
setStatus(renderResult(item), "ok");
});
clearHistoryBtn.addEventListener("click", async () => {
await saveHistory([]);
await refreshHistoryUI();
setStatus("History cleared.", "ok");
});
captureBtn.addEventListener("click", async () => {
const endpoint = endpointEl.value.trim() || DEFAULT_ENDPOINT;
captureBtn.disabled = true;
@ -305,23 +425,25 @@ async function main() {
content,
});
const lines = [];
lines.push("Saved:");
lines.push(` PNG: ${resp.png_path || "(unknown)"}`);
lines.push(` META: ${resp.meta_path || "(unknown)"}`);
if (resp.content_path) lines.push(` CONTENT: ${resp.content_path}`);
if (resp.ran) {
lines.push("Ran:");
if (resp.ran.error) {
lines.push(` error: ${resp.ran.error}`);
} else {
lines.push(` exit: ${resp.ran.exit_code}`);
if (resp.ran.stdout) lines.push(` stdout: ${resp.ran.stdout.trim()}`);
if (resp.ran.stderr) lines.push(` stderr: ${resp.ran.stderr.trim()}`);
}
}
// Persist for later viewing in the popup.
const history = await loadHistory();
const entry = {
id: resp.png_path || resp.meta_path || String(Date.now()),
saved_at: new Date().toISOString(),
title: tab.title || "",
url: tab.url || "",
resp: {
png_path: resp.png_path || "",
meta_path: resp.meta_path || "",
content_path: resp.content_path || "",
ai_result: resp.ai_result || null,
},
};
const deduped = [entry, ...history.filter((x) => (x.id || x.png_path || x.meta_path) !== entry.id)].slice(0, HISTORY_LIMIT);
await saveHistory(deduped);
await refreshHistoryUI();
setStatus(lines.join("\n"), "ok");
setStatus(renderResult(entry), "ok");
} catch (e) {
setStatus(String(e && e.message ? e.message : e), "err");
} finally {

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
openai>=1.0.0

268
scripts/ai_prepare_responses.py Executable file
View File

@ -0,0 +1,268 @@
#!/usr/bin/env python3
import argparse
import base64
import json
import os
import re
import sys
from pathlib import Path
def _load_dotenv_if_present(project_root: Path) -> None:
"""
Minimal .env loader:
- supports KEY=VALUE
- ignores blank lines and lines starting with '#'
- does not support quotes/escapes; keep it simple
"""
if os.getenv("OPENAI_API_KEY"):
return
p = project_root / ".env"
if not p.exists():
return
try:
for line in p.read_text("utf-8").splitlines():
s = line.strip()
if not s or s.startswith("#") or "=" not in s:
continue
k, v = s.split("=", 1)
k = k.strip()
v = v.strip()
if k and v and k not in os.environ:
os.environ[k] = v
except Exception:
return
def _read_json(path: Path) -> dict:
return json.loads(path.read_text("utf-8"))
def _data_url_for_png(png_path: Path) -> str:
b64 = base64.b64encode(png_path.read_bytes()).decode("ascii")
return f"data:image/png;base64,{b64}"
def _truncate_text(s: str, max_chars: int) -> str:
if len(s) <= max_chars:
return s
return s[: max_chars - 1] + "\u2026"
def _safe_json_dump(obj: object, max_chars: int) -> str:
s = json.dumps(obj, ensure_ascii=True, separators=(",", ":"), sort_keys=False)
return _truncate_text(s, max_chars)
def _ea_sanitize_text(text: object) -> str:
if text is None:
return ""
s = str(text)
if s == "":
return ""
s = s.replace("\r", "").replace("\t", " ")
replacements = {
"\u201c": '"',
"\u201d": '"',
"\u201e": '"',
"\u201f": '"',
"\u2018": "'",
"\u2019": "'",
"\u201a": "'",
"\u201b": "'",
"\u2014": "-",
"\u2013": "-",
"\u2212": "-",
"\u2022": "- ",
"\u2026": "...",
}
for k, v in replacements.items():
s = s.replace(k, v)
s = re.sub(
r"[\u00A0\u2000-\u200A\u202F\u205F\u3000\u1680\u180E\u2800\u3164\uFFA0]",
" ",
s,
)
s = s.replace("\u2028", "\n").replace("\u2029", "\n\n")
s = re.sub(
r"[\u200B\u200C\u200D\u200E\u200F\u202A-\u202E\u2060\u2061\u2066-\u2069\u206A-\u206F\u00AD\u034F\u115F\u1160\u17B4\u17B5\u180B-\u180D\uFE00-\uFE0F\uFEFF\u001C\u000C]",
"",
s,
)
s = s.replace("\u2062", "x").replace("\u2063", ",").replace("\u2064", "+")
s = re.sub(r"[ ]{2,}", " ", s)
s = re.sub(r"\n{3,}", "\n\n", s)
return s.lower()
def _sanitize_ai_payload(ai: dict, page_url: str, page_title: str) -> dict:
out = dict(ai) if isinstance(ai, dict) else {}
out["page_url"] = page_url
out["page_title"] = page_title
out["notes"] = _ea_sanitize_text(out.get("notes", ""))
posts = out.get("posts", [])
if not isinstance(posts, list):
posts = []
cleaned_posts = []
for i, p in enumerate(posts):
if not isinstance(p, dict):
continue
cleaned_posts.append(
{
"index": int(p.get("index", i)),
"post_text": _ea_sanitize_text(p.get("post_text", "")),
"short_response": _ea_sanitize_text(p.get("short_response", "")),
"medium_response": _ea_sanitize_text(p.get("medium_response", "")),
}
)
out["posts"] = cleaned_posts
return out
def _response_schema(max_posts: int) -> dict:
# Keep schema simple; strict mode supports a subset of JSON Schema.
return {
"type": "object",
"additionalProperties": False,
"properties": {
"page_url": {"type": "string"},
"page_title": {"type": "string"},
"posts": {
"type": "array",
"maxItems": max_posts,
"items": {
"type": "object",
"additionalProperties": False,
"properties": {
"index": {"type": "integer"},
"post_text": {"type": "string"},
"short_response": {"type": "string"},
"medium_response": {"type": "string"},
},
"required": ["index", "post_text", "short_response", "medium_response"],
},
},
"notes": {"type": "string"},
},
# OpenAI strict json_schema currently expects all top-level properties to be required.
"required": ["page_url", "page_title", "posts", "notes"],
}
def main(argv: list[str]) -> int:
p = argparse.ArgumentParser(
description="Use OpenAI to draft short + medium responses per visible post on the page (screenshot + extracted content)."
)
p.add_argument("png_path", help="Path to saved screenshot PNG")
p.add_argument("meta_path", help="Path to saved meta JSON")
p.add_argument("content_path", nargs="?", default="", help="Optional path to saved extracted content JSON")
p.add_argument("--model", default=os.getenv("AI_EA_MODEL", "gpt-5.2"))
p.add_argument("--max-posts", type=int, default=int(os.getenv("AI_EA_MAX_POSTS", "12")))
p.add_argument("--out", default="", help="Output path (default: alongside PNG, with .ai.json suffix)")
p.add_argument("--content-max-chars", type=int, default=120_000, help="Max chars of content JSON sent to the model")
p.add_argument("--image-detail", default="auto", choices=["low", "high", "auto"])
args = p.parse_args(argv)
project_root = Path(__file__).resolve().parents[1]
_load_dotenv_if_present(project_root)
try:
from openai import OpenAI # type: ignore
except Exception:
print("Missing dependency: pip install openai", file=sys.stderr)
return 2
if not os.getenv("OPENAI_API_KEY"):
print("OPENAI_API_KEY is not set (export it or put it in .env). Skipping.", file=sys.stderr)
return 3
png_path = Path(args.png_path).expanduser().resolve()
meta_path = Path(args.meta_path).expanduser().resolve()
content_path = Path(args.content_path).expanduser().resolve() if args.content_path else None
meta = _read_json(meta_path)
content = _read_json(content_path) if (content_path and content_path.exists()) else None
instructions_path = project_root / "AI_EA_INSTRUCTIONS.MD"
system_instructions = instructions_path.read_text("utf-8") if instructions_path.exists() else ""
page_url = str(meta.get("url") or "")
page_title = str(meta.get("title") or "")
user_payload = {
"page_url": page_url,
"page_title": page_title,
"meta": meta,
"content": content,
"task": {
"goal": "Draft replies to each distinct post currently visible on the page.",
"definition_of_post": "A single feed item / post / story / comment root visible on-screen right now. If it's a single-article page, treat the main article as one post.",
"output_requirements": {
"short_response": "1-2 sentences, direct, useful, no fluff.",
"medium_response": "3-6 sentences, more context, still concise.",
"style": "Follow the system instructions for voice/tone. If unclear what the post says, be honest and ask a question instead of guessing.",
},
},
}
prompt_text = (
"You will receive (1) a screenshot of the current viewport and (2) extracted visible page content.\n"
"Identify each distinct post visible on the page and draft two reply options per post.\n"
"Do not invent facts not present in the screenshot/content.\n"
"Return JSON matching the provided schema. Include all top-level keys: page_url, page_title, posts, notes.\n"
"If a value is unknown, use an empty string.\n\n"
f"PAGE_DATA_JSON={_safe_json_dump(user_payload, args.content_max_chars)}"
)
# Screenshot is provided as a base64 data URL image input.
image_data_url = _data_url_for_png(png_path)
client = OpenAI()
resp = client.responses.create(
model=args.model,
instructions=system_instructions,
input=[
{
"role": "user",
"content": [
{"type": "input_text", "text": prompt_text},
{"type": "input_image", "image_url": image_data_url, "detail": args.image_detail},
],
}
],
text={
"format": {
"type": "json_schema",
"name": "ea_post_responses",
"description": "Draft short and medium replies for each visible post on the page.",
"schema": _response_schema(args.max_posts),
"strict": True,
},
"verbosity": "low",
},
max_output_tokens=1400,
)
raw = resp.output_text or ""
try:
parsed = json.loads(raw)
except Exception:
parsed = {"error": "non_json_output", "raw": raw}
if isinstance(parsed, dict) and "posts" in parsed:
parsed = _sanitize_ai_payload(parsed, page_url=page_url, page_title=page_title)
out_path = Path(args.out) if args.out else png_path.with_suffix(".ai.json")
out_path.write_text(json.dumps(parsed, indent=2, ensure_ascii=True) + "\n", encoding="utf-8")
print(str(out_path))
return 0
if __name__ == "__main__":
raise SystemExit(main(sys.argv[1:]))

View File

@ -3,11 +3,16 @@ set -euo pipefail
png_path="${1:?png_path missing}"
meta_path="${2:?meta_path missing}"
content_path="${3:-}"
echo "Saved PNG: ${png_path}"
echo "Saved META: ${meta_path}"
if [[ -n "${content_path}" ]]; then
echo "Saved CONTENT: ${content_path}"
fi
# Replace this with your real local workflow.
# Example:
# python3 scripts/process_screenshot.py "$png_path" "$meta_path"
if [[ -n "${OPENAI_API_KEY:-}" ]] || [[ -f ".env" ]]; then
python3 scripts/ai_prepare_responses.py "$png_path" "$meta_path" "${content_path}"
else
echo "OPENAI_API_KEY not set (and no .env). Skipping OpenAI step."
fi

View File

@ -6,6 +6,7 @@ import os
import re
import subprocess
import sys
import time
from datetime import datetime, timezone
from http.server import BaseHTTPRequestHandler, HTTPServer
from pathlib import Path
@ -19,6 +20,264 @@ def _slug(s: str, max_len: int = 80) -> str:
return "screenshot"
return s[:max_len]
def _load_dotenv_if_present(project_root: Path) -> None:
"""
Minimal .env loader:
- supports KEY=VALUE
- ignores blank lines and lines starting with '#'
- does not support quotes/escapes
"""
if os.getenv("OPENAI_API_KEY"):
return
p = project_root / ".env"
if not p.exists():
return
try:
for line in p.read_text("utf-8").splitlines():
s = line.strip()
if not s or s.startswith("#") or "=" not in s:
continue
k, v = s.split("=", 1)
k = k.strip()
v = v.strip()
if k and v and k not in os.environ:
os.environ[k] = v
except Exception:
return
def _truncate(s: str, max_chars: int) -> str:
if len(s) <= max_chars:
return s
return s[: max_chars - 1] + "\u2026"
def _safe_json_dump(obj: object, max_chars: int) -> str:
s = json.dumps(obj, ensure_ascii=True, separators=(",", ":"), sort_keys=False)
return _truncate(s, max_chars)
def _ea_sanitize_text(text: object) -> str:
"""
Port of fl_geo_sanitize_text(), plus lowercase output (no capitals).
Notes:
- stays ASCII in-code by using \\u escapes for unicode literals.
- preserves newlines (normalizes excess blank lines).
"""
if text is None:
return ""
s = str(text)
if s == "":
return ""
# 1) Quick ASCII-level normalizations
s = s.replace("\r", "").replace("\t", " ")
# 2) Specific single-char replacements
replacements = {
"\u201c": '"', # “
"\u201d": '"', # ”
"\u201e": '"', # „
"\u201f": '"', # ‟
"\u2018": "'", #
"\u2019": "'", #
"\u201a": "'", #
"\u201b": "'", #
"\u2014": "-", # —
"\u2013": "-", #
"\u2212": "-", #
"\u2022": "- ", # •
"\u2026": "...", # …
}
for k, v in replacements.items():
s = s.replace(k, v)
# 3) Regex-based replacements/removals
s = re.sub(
r"[\u00A0\u2000-\u200A\u202F\u205F\u3000\u1680\u180E\u2800\u3164\uFFA0]",
" ",
s,
)
s = s.replace("\u2028", "\n") # LS
s = s.replace("\u2029", "\n\n") # PS
s = re.sub(
r"[\u200B\u200C\u200D\u200E\u200F\u202A-\u202E\u2060\u2061\u2066-\u2069\u206A-\u206F\u00AD\u034F\u115F\u1160\u17B4\u17B5\u180B-\u180D\uFE00-\uFE0F\uFEFF\u001C\u000C]",
"",
s,
)
# Invisible math
s = s.replace("\u2062", "x").replace("\u2063", ",").replace("\u2064", "+")
# 4) Collapse excessive spaces
s = re.sub(r"[ ]{2,}", " ", s)
# 5) Normalize multiple blank lines to at most two
s = re.sub(r"\n{3,}", "\n\n", s)
# Remove capitals: lowercase all text.
return s.lower()
def _sanitize_ai_payload(ai: dict, page_url: str, page_title: str) -> dict:
# Strict schema requires these keys; we prefer ground truth from meta.
out = dict(ai) if isinstance(ai, dict) else {}
out["page_url"] = page_url
out["page_title"] = page_title
out["notes"] = _ea_sanitize_text(out.get("notes", ""))
posts = out.get("posts", [])
if not isinstance(posts, list):
posts = []
cleaned_posts = []
for i, p in enumerate(posts):
if not isinstance(p, dict):
continue
cleaned_posts.append(
{
"index": int(p.get("index", i)),
"post_text": _ea_sanitize_text(p.get("post_text", "")),
"short_response": _ea_sanitize_text(p.get("short_response", "")),
"medium_response": _ea_sanitize_text(p.get("medium_response", "")),
}
)
out["posts"] = cleaned_posts
return out
def _response_schema(max_posts: int) -> dict:
return {
"type": "object",
"additionalProperties": False,
"properties": {
"page_url": {"type": "string"},
"page_title": {"type": "string"},
"posts": {
"type": "array",
"maxItems": max_posts,
"items": {
"type": "object",
"additionalProperties": False,
"properties": {
"index": {"type": "integer"},
"post_text": {"type": "string"},
"short_response": {"type": "string"},
"medium_response": {"type": "string"},
},
"required": ["index", "post_text", "short_response", "medium_response"],
},
},
"notes": {"type": "string"},
},
# OpenAI strict json_schema currently expects all top-level properties to be required.
# If you don't have a value, return "" / [].
"required": ["page_url", "page_title", "posts", "notes"],
}
def _maybe_generate_ai(server, png_path: Path, meta: dict, content: object) -> dict:
"""
Returns:
{ "ok": True, "ai": <obj>, "ai_path": <str>, "took_ms": <int> }
or { "ok": False, "error": <str>, "detail": <str?> }
"""
if not getattr(server, "ai_enabled", False):
return {"ok": False, "error": "ai_disabled"}
project_root: Path = server.project_root # type: ignore[attr-defined]
_load_dotenv_if_present(project_root)
if not os.getenv("OPENAI_API_KEY"):
return {"ok": False, "error": "missing_openai_api_key"}
try:
from openai import OpenAI # type: ignore
except Exception as e:
return {"ok": False, "error": "missing_openai_sdk", "detail": str(e)}
instructions_text = getattr(server, "ai_instructions", "")
model = getattr(server, "ai_model", "gpt-5.2")
max_posts = int(getattr(server, "ai_max_posts", 12))
content_max_chars = int(getattr(server, "ai_content_max_chars", 120_000))
image_detail = getattr(server, "ai_image_detail", "auto")
max_output_tokens = int(getattr(server, "ai_max_output_tokens", 1400))
page_url = str(meta.get("url") or "")
page_title = str(meta.get("title") or "")
user_payload = {
"page_url": page_url,
"page_title": page_title,
"meta": meta,
"content": content,
"task": {
"goal": "Draft replies to each distinct post currently visible on the page.",
"definition_of_post": "A single feed item / post / story / comment root visible on-screen right now. If it's a single-article page, treat the main article as one post.",
"output_requirements": {
"short_response": "1-2 sentences, direct, useful, no fluff.",
"medium_response": "3-6 sentences, more context, still concise.",
"style": "Follow the system instructions for voice/tone. If unclear what the post says, be honest and ask a question instead of guessing.",
},
},
}
prompt_text = (
"You will receive (1) a screenshot of the current viewport and (2) extracted visible page content.\n"
"Identify each distinct post visible on the page and draft two reply options per post.\n"
"Do not invent facts not present in the screenshot/content.\n"
"Return JSON matching the provided schema. Include all top-level keys: page_url, page_title, posts, notes.\n"
"If a value is unknown, use an empty string.\n\n"
f"PAGE_DATA_JSON={_safe_json_dump(user_payload, content_max_chars)}"
)
b64 = base64.b64encode(png_path.read_bytes()).decode("ascii")
image_data_url = f"data:image/png;base64,{b64}"
t0 = time.monotonic()
client = OpenAI()
resp = client.responses.create(
model=model,
instructions=instructions_text,
input=[
{
"role": "user",
"content": [
{"type": "input_text", "text": prompt_text},
{"type": "input_image", "image_url": image_data_url, "detail": image_detail},
],
}
],
text={
"format": {
"type": "json_schema",
"name": "ea_post_responses",
"description": "Draft short and medium replies for each visible post on the page.",
"schema": _response_schema(max_posts),
"strict": True,
},
"verbosity": "low",
},
max_output_tokens=max_output_tokens,
)
took_ms = int((time.monotonic() - t0) * 1000)
raw = resp.output_text or ""
try:
parsed = json.loads(raw)
except Exception:
parsed = {"error": "non_json_output", "raw": raw}
if isinstance(parsed, dict) and "posts" in parsed:
parsed = _sanitize_ai_payload(parsed, page_url=page_url, page_title=page_title)
ai_path = png_path.with_suffix(".ai.json")
ai_path.write_text(json.dumps(parsed, indent=2, ensure_ascii=True) + "\n", encoding="utf-8")
return {"ok": True, "ai": parsed, "ai_path": str(ai_path), "took_ms": took_ms}
class Handler(BaseHTTPRequestHandler):
server_version = "LocalScreenshotBridge/0.1"
@ -46,6 +305,7 @@ class Handler(BaseHTTPRequestHandler):
"service": "local_screenshot_bridge",
"out_dir": str(self.server.out_dir), # type: ignore[attr-defined]
"has_run_cmd": bool(getattr(self.server, "run_cmd", None)), # type: ignore[attr-defined]
"ai_enabled": bool(getattr(self.server, "ai_enabled", False)), # type: ignore[attr-defined]
},
)
@ -144,12 +404,25 @@ class Handler(BaseHTTPRequestHandler):
self._send_json(500, {"ok": False, "error": "write_failed", "detail": str(e)})
return
meta_obj = {
"title": title,
"url": page_url,
"client_ts": client_ts,
"saved_utc": now.isoformat(),
"png_path": str(png_path),
"content_path": final_content_path,
}
run = getattr(self.server, "run_cmd", None) # type: ignore[attr-defined]
ran = None
if run:
try:
# Pass content_path as a 3rd arg when available. This keeps hooks compatible with older 2-arg scripts.
args = [str(png_path), str(meta_path)]
if final_content_path:
args.append(final_content_path)
proc = subprocess.run(
run + [str(png_path), str(meta_path)],
run + args,
cwd=str(self.server.project_root), # type: ignore[attr-defined]
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
@ -164,6 +437,13 @@ class Handler(BaseHTTPRequestHandler):
except Exception as e:
ran = {"cmd": run, "error": str(e)}
ai_result = None
if getattr(self.server, "ai_enabled", False): # type: ignore[attr-defined]
try:
ai_result = _maybe_generate_ai(self.server, png_path, meta_obj, content)
except Exception as e:
ai_result = {"ok": False, "error": "ai_exception", "detail": str(e)}
self._send_json(
200,
{
@ -172,6 +452,7 @@ class Handler(BaseHTTPRequestHandler):
"meta_path": str(meta_path),
"content_path": final_content_path,
"ran": ran,
"ai_result": ai_result,
},
)
@ -181,6 +462,12 @@ def main(argv: list[str]) -> int:
p.add_argument("--port", type=int, default=8765)
p.add_argument("--bind", default="127.0.0.1", help="Bind address (default: 127.0.0.1)")
p.add_argument("--out-dir", default="screenshots", help="Output directory relative to project root")
p.add_argument("--ai", action="store_true", help="Run OpenAI to generate reply suggestions and return them to the extension")
p.add_argument("--ai-model", default=os.getenv("AI_EA_MODEL", "gpt-5.2"))
p.add_argument("--ai-max-posts", type=int, default=int(os.getenv("AI_EA_MAX_POSTS", "12")))
p.add_argument("--ai-content-max-chars", type=int, default=int(os.getenv("AI_EA_CONTENT_MAX_CHARS", "120000")))
p.add_argument("--ai-image-detail", default=os.getenv("AI_EA_IMAGE_DETAIL", "auto"))
p.add_argument("--ai-max-output-tokens", type=int, default=int(os.getenv("AI_EA_MAX_OUTPUT_TOKENS", "1400")))
p.add_argument(
"--run",
nargs="+",
@ -191,16 +478,29 @@ def main(argv: list[str]) -> int:
project_root = Path(__file__).resolve().parents[1]
out_dir = (project_root / args.out_dir).resolve()
if args.ai:
_load_dotenv_if_present(project_root)
instructions_path = project_root / "AI_EA_INSTRUCTIONS.MD"
ai_instructions = instructions_path.read_text("utf-8") if instructions_path.exists() else ""
httpd = HTTPServer((args.bind, args.port), Handler)
httpd.project_root = project_root # type: ignore[attr-defined]
httpd.out_dir = out_dir # type: ignore[attr-defined]
httpd.run_cmd = args.run # type: ignore[attr-defined]
httpd.ai_enabled = bool(args.ai) # type: ignore[attr-defined]
httpd.ai_model = args.ai_model # type: ignore[attr-defined]
httpd.ai_max_posts = args.ai_max_posts # type: ignore[attr-defined]
httpd.ai_content_max_chars = args.ai_content_max_chars # type: ignore[attr-defined]
httpd.ai_image_detail = args.ai_image_detail # type: ignore[attr-defined]
httpd.ai_max_output_tokens = args.ai_max_output_tokens # type: ignore[attr-defined]
httpd.ai_instructions = ai_instructions # type: ignore[attr-defined]
print(f"Listening on http://{args.bind}:{args.port}/screenshot", file=sys.stderr)
print(f"Saving screenshots to {out_dir}", file=sys.stderr)
if args.ai:
print(f"OpenAI enabled: model={args.ai_model} max_posts={args.ai_max_posts}", file=sys.stderr)
if args.run:
print(f"Will run: {' '.join(args.run)} <png_path> <meta_path>", file=sys.stderr)
print(f"Will run: {' '.join(args.run)} <png_path> <meta_path> [content_path]", file=sys.stderr)
try:
httpd.serve_forever()
except KeyboardInterrupt: