39227-vm/tools/local_screenshot_bridge.py
2026-02-11 13:58:13 +01:00

517 lines
19 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
import argparse
import base64
import json
import os
import re
import subprocess
import sys
import time
from datetime import datetime, timezone
from http.server import BaseHTTPRequestHandler, HTTPServer
from pathlib import Path
def _slug(s: str, max_len: int = 80) -> str:
s = (s or "").strip().lower()
s = re.sub(r"[^a-z0-9]+", "-", s)
s = s.strip("-")
if not s:
return "screenshot"
return s[:max_len]
def _load_dotenv_if_present(project_root: Path) -> None:
"""
Minimal .env loader:
- supports KEY=VALUE
- ignores blank lines and lines starting with '#'
- does not support quotes/escapes
"""
if os.getenv("OPENAI_API_KEY"):
return
p = project_root / ".env"
if not p.exists():
return
try:
for line in p.read_text("utf-8").splitlines():
s = line.strip()
if not s or s.startswith("#") or "=" not in s:
continue
k, v = s.split("=", 1)
k = k.strip()
v = v.strip()
if k and v and k not in os.environ:
os.environ[k] = v
except Exception:
return
def _truncate(s: str, max_chars: int) -> str:
if len(s) <= max_chars:
return s
return s[: max_chars - 1] + "\u2026"
def _safe_json_dump(obj: object, max_chars: int) -> str:
s = json.dumps(obj, ensure_ascii=True, separators=(",", ":"), sort_keys=False)
return _truncate(s, max_chars)
def _ea_sanitize_text(text: object) -> str:
"""
Port of fl_geo_sanitize_text(), plus lowercase output (no capitals).
Notes:
- stays ASCII in-code by using \\u escapes for unicode literals.
- preserves newlines (normalizes excess blank lines).
"""
if text is None:
return ""
s = str(text)
if s == "":
return ""
# 1) Quick ASCII-level normalizations
s = s.replace("\r", "").replace("\t", " ")
# 2) Specific single-char replacements
replacements = {
"\u201c": '"', # “
"\u201d": '"', # ”
"\u201e": '"', # „
"\u201f": '"', # ‟
"\u2018": "'", #
"\u2019": "'", #
"\u201a": "'", #
"\u201b": "'", #
"\u2014": "-", # —
"\u2013": "-", #
"\u2212": "-", #
"\u2022": "- ", # •
"\u2026": "...", # …
}
for k, v in replacements.items():
s = s.replace(k, v)
# 3) Regex-based replacements/removals
s = re.sub(
r"[\u00A0\u2000-\u200A\u202F\u205F\u3000\u1680\u180E\u2800\u3164\uFFA0]",
" ",
s,
)
s = s.replace("\u2028", "\n") # LS
s = s.replace("\u2029", "\n\n") # PS
s = re.sub(
r"[\u200B\u200C\u200D\u200E\u200F\u202A-\u202E\u2060\u2061\u2066-\u2069\u206A-\u206F\u00AD\u034F\u115F\u1160\u17B4\u17B5\u180B-\u180D\uFE00-\uFE0F\uFEFF\u001C\u000C]",
"",
s,
)
# Invisible math
s = s.replace("\u2062", "x").replace("\u2063", ",").replace("\u2064", "+")
# 4) Collapse excessive spaces
s = re.sub(r"[ ]{2,}", " ", s)
# 5) Normalize multiple blank lines to at most two
s = re.sub(r"\n{3,}", "\n\n", s)
# Remove capitals: lowercase all text.
return s.lower()
def _sanitize_ai_payload(ai: dict, page_url: str, page_title: str) -> dict:
# Strict schema requires these keys; we prefer ground truth from meta.
out = dict(ai) if isinstance(ai, dict) else {}
out["page_url"] = page_url
out["page_title"] = page_title
out["notes"] = _ea_sanitize_text(out.get("notes", ""))
posts = out.get("posts", [])
if not isinstance(posts, list):
posts = []
cleaned_posts = []
for i, p in enumerate(posts):
if not isinstance(p, dict):
continue
cleaned_posts.append(
{
"index": int(p.get("index", i)),
"post_text": _ea_sanitize_text(p.get("post_text", "")),
"short_response": _ea_sanitize_text(p.get("short_response", "")),
"medium_response": _ea_sanitize_text(p.get("medium_response", "")),
}
)
out["posts"] = cleaned_posts
return out
def _response_schema(max_posts: int) -> dict:
return {
"type": "object",
"additionalProperties": False,
"properties": {
"page_url": {"type": "string"},
"page_title": {"type": "string"},
"posts": {
"type": "array",
"maxItems": max_posts,
"items": {
"type": "object",
"additionalProperties": False,
"properties": {
"index": {"type": "integer"},
"post_text": {"type": "string"},
"short_response": {"type": "string"},
"medium_response": {"type": "string"},
},
"required": ["index", "post_text", "short_response", "medium_response"],
},
},
"notes": {"type": "string"},
},
# OpenAI strict json_schema currently expects all top-level properties to be required.
# If you don't have a value, return "" / [].
"required": ["page_url", "page_title", "posts", "notes"],
}
def _maybe_generate_ai(server, png_path: Path, meta: dict, content: object) -> dict:
"""
Returns:
{ "ok": True, "ai": <obj>, "ai_path": <str>, "took_ms": <int> }
or { "ok": False, "error": <str>, "detail": <str?> }
"""
if not getattr(server, "ai_enabled", False):
return {"ok": False, "error": "ai_disabled"}
project_root: Path = server.project_root # type: ignore[attr-defined]
_load_dotenv_if_present(project_root)
if not os.getenv("OPENAI_API_KEY"):
return {"ok": False, "error": "missing_openai_api_key"}
try:
from openai import OpenAI # type: ignore
except Exception as e:
return {"ok": False, "error": "missing_openai_sdk", "detail": str(e)}
instructions_text = getattr(server, "ai_instructions", "")
model = getattr(server, "ai_model", "gpt-5.2")
max_posts = int(getattr(server, "ai_max_posts", 12))
content_max_chars = int(getattr(server, "ai_content_max_chars", 120_000))
image_detail = getattr(server, "ai_image_detail", "auto")
max_output_tokens = int(getattr(server, "ai_max_output_tokens", 1400))
page_url = str(meta.get("url") or "")
page_title = str(meta.get("title") or "")
extra_instructions = str(meta.get("extra_instructions") or "").strip()
user_payload = {
"page_url": page_url,
"page_title": page_title,
"meta": meta,
"content": content,
"task": {
"goal": "Draft replies to each distinct post currently visible on the page.",
"definition_of_post": "A single feed item / post / story / comment root visible on-screen right now. If it's a single-article page, treat the main article as one post.",
"output_requirements": {
"short_response": "1-2 sentences, direct, useful, no fluff.",
"medium_response": "3-6 sentences, more context, still concise.",
"style": "Follow the system instructions for voice/tone. If unclear what the post says, be honest and ask a question instead of guessing.",
},
},
}
prompt_text = (
"You will receive (1) a screenshot of the current viewport and (2) extracted visible page content.\n"
"Identify each distinct post visible on the page and draft two reply options per post.\n"
"Do not invent facts not present in the screenshot/content.\n"
"Return JSON matching the provided schema. Include all top-level keys: page_url, page_title, posts, notes.\n"
"If a value is unknown, use an empty string.\n\n"
+ (f"EXTRA_INSTRUCTIONS={extra_instructions}\n\n" if extra_instructions else "")
+ f"PAGE_DATA_JSON={_safe_json_dump(user_payload, content_max_chars)}"
)
b64 = base64.b64encode(png_path.read_bytes()).decode("ascii")
image_data_url = f"data:image/png;base64,{b64}"
t0 = time.monotonic()
client = OpenAI()
resp = client.responses.create(
model=model,
instructions=instructions_text,
input=[
{
"role": "user",
"content": [
{"type": "input_text", "text": prompt_text},
{"type": "input_image", "image_url": image_data_url, "detail": image_detail},
],
}
],
text={
"format": {
"type": "json_schema",
"name": "ea_post_responses",
"description": "Draft short and medium replies for each visible post on the page.",
"schema": _response_schema(max_posts),
"strict": True,
},
"verbosity": "low",
},
max_output_tokens=max_output_tokens,
)
took_ms = int((time.monotonic() - t0) * 1000)
raw = resp.output_text or ""
try:
parsed = json.loads(raw)
except Exception:
parsed = {"error": "non_json_output", "raw": raw}
if isinstance(parsed, dict) and "posts" in parsed:
parsed = _sanitize_ai_payload(parsed, page_url=page_url, page_title=page_title)
ai_path = png_path.with_suffix(".ai.json")
ai_path.write_text(json.dumps(parsed, indent=2, ensure_ascii=True) + "\n", encoding="utf-8")
return {"ok": True, "ai": parsed, "ai_path": str(ai_path), "took_ms": took_ms}
class Handler(BaseHTTPRequestHandler):
server_version = "LocalScreenshotBridge/0.1"
def _send_json(self, status: int, payload: dict):
body = json.dumps(payload, ensure_ascii=True).encode("utf-8")
self.send_response(status)
self.send_header("Content-Type", "application/json; charset=utf-8")
self.send_header("Content-Length", str(len(body)))
# Chrome extension fetch() to localhost will preflight; allow it.
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Access-Control-Allow-Methods", "POST, OPTIONS")
self.send_header("Access-Control-Allow-Headers", "Content-Type")
self.end_headers()
self.wfile.write(body)
def do_GET(self): # noqa: N802
if self.path not in ("/", "/health"):
self._send_json(404, {"ok": False, "error": "not_found"})
return
self._send_json(
200,
{
"ok": True,
"service": "local_screenshot_bridge",
"out_dir": str(self.server.out_dir), # type: ignore[attr-defined]
"has_run_cmd": bool(getattr(self.server, "run_cmd", None)), # type: ignore[attr-defined]
"ai_enabled": bool(getattr(self.server, "ai_enabled", False)), # type: ignore[attr-defined]
},
)
def do_OPTIONS(self): # noqa: N802
self.send_response(204)
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Access-Control-Allow-Methods", "POST, OPTIONS")
self.send_header("Access-Control-Allow-Headers", "Content-Type")
self.end_headers()
def do_POST(self): # noqa: N802
if self.path != "/screenshot":
self._send_json(404, {"ok": False, "error": "not_found"})
return
try:
length = int(self.headers.get("Content-Length", "0"))
except ValueError:
self._send_json(400, {"ok": False, "error": "bad_content_length"})
return
raw = self.rfile.read(length)
try:
req = json.loads(raw.decode("utf-8"))
except Exception:
self._send_json(400, {"ok": False, "error": "bad_json"})
return
data_url = req.get("data_url") or ""
title = req.get("title") or ""
page_url = req.get("url") or ""
client_ts = req.get("ts") or ""
content = req.get("content", None)
extra_instructions = req.get("extra_instructions") or ""
m = re.match(r"^data:image/png;base64,(.*)$", data_url)
if not m:
self._send_json(400, {"ok": False, "error": "expected_png_data_url"})
return
try:
png_bytes = base64.b64decode(m.group(1), validate=True)
except Exception:
self._send_json(400, {"ok": False, "error": "bad_base64"})
return
now = datetime.now(timezone.utc)
stamp = now.strftime("%Y%m%dT%H%M%SZ")
base = f"{stamp}-{_slug(title)}"
out_dir: Path = self.server.out_dir # type: ignore[attr-defined]
out_dir.mkdir(parents=True, exist_ok=True)
png_path = out_dir / f"{base}.png"
meta_path = out_dir / f"{base}.json"
content_path = out_dir / f"{base}.content.json"
try:
png_path.write_bytes(png_bytes)
# Save extracted page content separately to keep the meta file small/handy.
wrote_content = False
if content is not None:
try:
raw_content = json.dumps(content, ensure_ascii=True, indent=2) + "\n"
# Prevent pathological payloads from creating huge files.
if len(raw_content.encode("utf-8")) > 2_000_000:
content = {
"error": "content_too_large_truncated",
"note": "Original extracted content exceeded 2MB.",
}
raw_content = json.dumps(content, ensure_ascii=True, indent=2) + "\n"
content_path.write_text(raw_content, encoding="utf-8")
wrote_content = True
except Exception:
# Don't fail the whole request if content writing fails.
wrote_content = False
final_content_path = str(content_path) if wrote_content else None
meta_path.write_text(
json.dumps(
{
"title": title,
"url": page_url,
"client_ts": client_ts,
"saved_utc": now.isoformat(),
"png_path": str(png_path),
"content_path": final_content_path,
"extra_instructions": extra_instructions,
},
indent=2,
ensure_ascii=True,
)
+ "\n",
encoding="utf-8",
)
except Exception as e:
self._send_json(500, {"ok": False, "error": "write_failed", "detail": str(e)})
return
meta_obj = {
"title": title,
"url": page_url,
"client_ts": client_ts,
"saved_utc": now.isoformat(),
"png_path": str(png_path),
"content_path": final_content_path,
"extra_instructions": extra_instructions,
}
run = getattr(self.server, "run_cmd", None) # type: ignore[attr-defined]
ran = None
if run:
try:
# Pass content_path as a 3rd arg when available. This keeps hooks compatible with older 2-arg scripts.
args = [str(png_path), str(meta_path)]
if final_content_path:
args.append(final_content_path)
proc = subprocess.run(
run + args,
cwd=str(self.server.project_root), # type: ignore[attr-defined]
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
ran = {
"cmd": run,
"exit_code": proc.returncode,
"stdout": proc.stdout[-4000:],
"stderr": proc.stderr[-4000:],
}
except Exception as e:
ran = {"cmd": run, "error": str(e)}
ai_result = None
if getattr(self.server, "ai_enabled", False): # type: ignore[attr-defined]
try:
ai_result = _maybe_generate_ai(self.server, png_path, meta_obj, content)
except Exception as e:
ai_result = {"ok": False, "error": "ai_exception", "detail": str(e)}
self._send_json(
200,
{
"ok": True,
"png_path": str(png_path),
"meta_path": str(meta_path),
"content_path": final_content_path,
"ran": ran,
"ai_result": ai_result,
},
)
def main(argv: list[str]) -> int:
p = argparse.ArgumentParser(description="Receive screenshots from a Chrome extension and save into this project.")
p.add_argument("--port", type=int, default=8765)
p.add_argument("--bind", default="127.0.0.1", help="Bind address (default: 127.0.0.1)")
p.add_argument("--out-dir", default="screenshots", help="Output directory relative to project root")
p.add_argument("--ai", action="store_true", help="Run OpenAI to generate reply suggestions and return them to the extension")
p.add_argument("--ai-model", default=os.getenv("AI_EA_MODEL", "gpt-5.2"))
p.add_argument("--ai-max-posts", type=int, default=int(os.getenv("AI_EA_MAX_POSTS", "12")))
p.add_argument("--ai-content-max-chars", type=int, default=int(os.getenv("AI_EA_CONTENT_MAX_CHARS", "120000")))
p.add_argument("--ai-image-detail", default=os.getenv("AI_EA_IMAGE_DETAIL", "auto"))
p.add_argument("--ai-max-output-tokens", type=int, default=int(os.getenv("AI_EA_MAX_OUTPUT_TOKENS", "1400")))
p.add_argument(
"--run",
nargs="+",
default=None,
help="Optional command to run after saving. Args appended: <png_path> <meta_path> [content_path].",
)
args = p.parse_args(argv)
project_root = Path(__file__).resolve().parents[1]
out_dir = (project_root / args.out_dir).resolve()
if args.ai:
_load_dotenv_if_present(project_root)
instructions_path = project_root / "AI_EA_INSTRUCTIONS.MD"
ai_instructions = instructions_path.read_text("utf-8") if instructions_path.exists() else ""
httpd = HTTPServer((args.bind, args.port), Handler)
httpd.project_root = project_root # type: ignore[attr-defined]
httpd.out_dir = out_dir # type: ignore[attr-defined]
httpd.run_cmd = args.run # type: ignore[attr-defined]
httpd.ai_enabled = bool(args.ai) # type: ignore[attr-defined]
httpd.ai_model = args.ai_model # type: ignore[attr-defined]
httpd.ai_max_posts = args.ai_max_posts # type: ignore[attr-defined]
httpd.ai_content_max_chars = args.ai_content_max_chars # type: ignore[attr-defined]
httpd.ai_image_detail = args.ai_image_detail # type: ignore[attr-defined]
httpd.ai_max_output_tokens = args.ai_max_output_tokens # type: ignore[attr-defined]
httpd.ai_instructions = ai_instructions # type: ignore[attr-defined]
print(f"Listening on http://{args.bind}:{args.port}/screenshot", file=sys.stderr)
print(f"Saving screenshots to {out_dir}", file=sys.stderr)
if args.ai:
print(f"OpenAI enabled: model={args.ai_model} max_posts={args.ai_max_posts}", file=sys.stderr)
if args.run:
print(f"Will run: {' '.join(args.run)} <png_path> <meta_path> [content_path]", file=sys.stderr)
try:
httpd.serve_forever()
except KeyboardInterrupt:
return 0
if __name__ == "__main__":
raise SystemExit(main(sys.argv[1:]))