296 lines
11 KiB
Python
296 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
import argparse
|
|
import base64
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
def _load_dotenv_if_present(project_root: Path) -> None:
|
|
"""
|
|
Minimal .env loader:
|
|
- supports KEY=VALUE
|
|
- ignores blank lines and lines starting with '#'
|
|
- does not support quotes/escapes; keep it simple
|
|
"""
|
|
if os.getenv("OPENAI_API_KEY"):
|
|
return
|
|
|
|
p = project_root / ".env"
|
|
if not p.exists():
|
|
return
|
|
|
|
try:
|
|
for line in p.read_text("utf-8").splitlines():
|
|
s = line.strip()
|
|
if not s or s.startswith("#") or "=" not in s:
|
|
continue
|
|
k, v = s.split("=", 1)
|
|
k = k.strip()
|
|
v = v.strip()
|
|
if k and v and k not in os.environ:
|
|
os.environ[k] = v
|
|
except Exception:
|
|
return
|
|
|
|
|
|
def _read_json(path: Path) -> dict:
|
|
return json.loads(path.read_text("utf-8"))
|
|
|
|
|
|
def _data_url_for_png(png_path: Path) -> str:
|
|
b64 = base64.b64encode(png_path.read_bytes()).decode("ascii")
|
|
return f"data:image/png;base64,{b64}"
|
|
|
|
|
|
def _truncate_text(s: str, max_chars: int) -> str:
|
|
if len(s) <= max_chars:
|
|
return s
|
|
return s[: max_chars - 1] + "\u2026"
|
|
|
|
|
|
def _safe_json_dump(obj: object, max_chars: int) -> str:
|
|
s = json.dumps(obj, ensure_ascii=True, separators=(",", ":"), sort_keys=False)
|
|
return _truncate_text(s, max_chars)
|
|
|
|
def _ea_sanitize_text(text: object) -> str:
|
|
if text is None:
|
|
return ""
|
|
s = str(text)
|
|
if s == "":
|
|
return ""
|
|
|
|
s = s.replace("\r", "").replace("\t", " ")
|
|
|
|
replacements = {
|
|
"\u201c": '"',
|
|
"\u201d": '"',
|
|
"\u201e": '"',
|
|
"\u201f": '"',
|
|
"\u2018": "'",
|
|
"\u2019": "'",
|
|
"\u201a": "'",
|
|
"\u201b": "'",
|
|
"\u2014": "-",
|
|
"\u2013": "-",
|
|
"\u2212": "-",
|
|
"\u2022": "- ",
|
|
"\u2026": "...",
|
|
}
|
|
for k, v in replacements.items():
|
|
s = s.replace(k, v)
|
|
|
|
s = re.sub(
|
|
r"[\u00A0\u2000-\u200A\u202F\u205F\u3000\u1680\u180E\u2800\u3164\uFFA0]",
|
|
" ",
|
|
s,
|
|
)
|
|
s = s.replace("\u2028", "\n").replace("\u2029", "\n\n")
|
|
s = re.sub(
|
|
r"[\u200B\u200C\u200D\u200E\u200F\u202A-\u202E\u2060\u2061\u2066-\u2069\u206A-\u206F\u00AD\u034F\u115F\u1160\u17B4\u17B5\u180B-\u180D\uFE00-\uFE0F\uFEFF\u001C\u000C]",
|
|
"",
|
|
s,
|
|
)
|
|
s = s.replace("\u2062", "x").replace("\u2063", ",").replace("\u2064", "+")
|
|
s = re.sub(r"[ ]{2,}", " ", s)
|
|
s = re.sub(r"\n{3,}", "\n\n", s)
|
|
return s.lower()
|
|
|
|
|
|
def _sanitize_ai_payload(ai: dict, page_url: str, page_title: str) -> dict:
|
|
out = dict(ai) if isinstance(ai, dict) else {}
|
|
out["page_url"] = page_url
|
|
out["page_title"] = page_title
|
|
out["notes"] = _ea_sanitize_text(out.get("notes", ""))
|
|
|
|
posts = out.get("posts", [])
|
|
if not isinstance(posts, list):
|
|
posts = []
|
|
cleaned_posts = []
|
|
for i, p in enumerate(posts):
|
|
if not isinstance(p, dict):
|
|
continue
|
|
cleaned_posts.append(
|
|
{
|
|
"index": int(p.get("index", i)),
|
|
"post_text": _ea_sanitize_text(p.get("post_text", "")),
|
|
"improved_short": _ea_sanitize_text(p.get("improved_short", "")),
|
|
"improved_medium": _ea_sanitize_text(p.get("improved_medium", "")),
|
|
"critical_short": _ea_sanitize_text(p.get("critical_short", "")),
|
|
"critical_medium": _ea_sanitize_text(p.get("critical_medium", "")),
|
|
"suggested_short": _ea_sanitize_text(p.get("suggested_short", "")),
|
|
"suggested_medium": _ea_sanitize_text(p.get("suggested_medium", "")),
|
|
}
|
|
)
|
|
out["posts"] = cleaned_posts
|
|
return out
|
|
|
|
|
|
def _response_schema(max_posts: int) -> dict:
|
|
# Keep schema simple; strict mode supports a subset of JSON Schema.
|
|
return {
|
|
"type": "object",
|
|
"additionalProperties": False,
|
|
"properties": {
|
|
"page_url": {"type": "string"},
|
|
"page_title": {"type": "string"},
|
|
"posts": {
|
|
"type": "array",
|
|
"maxItems": max_posts,
|
|
"items": {
|
|
"type": "object",
|
|
"additionalProperties": False,
|
|
"properties": {
|
|
"index": {"type": "integer"},
|
|
"post_text": {"type": "string"},
|
|
"improved_short": {"type": "string"},
|
|
"improved_medium": {"type": "string"},
|
|
"critical_short": {"type": "string"},
|
|
"critical_medium": {"type": "string"},
|
|
"suggested_short": {"type": "string"},
|
|
"suggested_medium": {"type": "string"},
|
|
},
|
|
"required": [
|
|
"index",
|
|
"post_text",
|
|
"improved_short",
|
|
"improved_medium",
|
|
"critical_short",
|
|
"critical_medium",
|
|
"suggested_short",
|
|
"suggested_medium",
|
|
],
|
|
},
|
|
},
|
|
"notes": {"type": "string"},
|
|
},
|
|
# OpenAI strict json_schema currently expects all top-level properties to be required.
|
|
"required": ["page_url", "page_title", "posts", "notes"],
|
|
}
|
|
|
|
|
|
def main(argv: list[str]) -> int:
|
|
p = argparse.ArgumentParser(
|
|
description="Use OpenAI to draft short + medium responses per visible post on the page (screenshot + extracted content)."
|
|
)
|
|
p.add_argument("png_path", help="Path to saved screenshot PNG")
|
|
p.add_argument("meta_path", help="Path to saved meta JSON")
|
|
p.add_argument("content_path", nargs="?", default="", help="Optional path to saved extracted content JSON")
|
|
p.add_argument("--model", default=os.getenv("AI_EA_MODEL", "gpt-5.2"))
|
|
p.add_argument("--max-posts", type=int, default=int(os.getenv("AI_EA_MAX_POSTS", "12")))
|
|
p.add_argument("--out", default="", help="Output path (default: alongside PNG, with .ai.json suffix)")
|
|
p.add_argument("--content-max-chars", type=int, default=120_000, help="Max chars of content JSON sent to the model")
|
|
p.add_argument("--image-detail", default="auto", choices=["low", "high", "auto"])
|
|
args = p.parse_args(argv)
|
|
|
|
project_root = Path(__file__).resolve().parents[1]
|
|
_load_dotenv_if_present(project_root)
|
|
|
|
try:
|
|
from openai import OpenAI # type: ignore
|
|
except Exception:
|
|
print("Missing dependency: pip install openai", file=sys.stderr)
|
|
return 2
|
|
|
|
if not os.getenv("OPENAI_API_KEY"):
|
|
print("OPENAI_API_KEY is not set (export it or put it in .env). Skipping.", file=sys.stderr)
|
|
return 3
|
|
|
|
png_path = Path(args.png_path).expanduser().resolve()
|
|
meta_path = Path(args.meta_path).expanduser().resolve()
|
|
content_path = Path(args.content_path).expanduser().resolve() if args.content_path else None
|
|
|
|
meta = _read_json(meta_path)
|
|
content = _read_json(content_path) if (content_path and content_path.exists()) else None
|
|
|
|
instructions_path = project_root / "AI_EA_INSTRUCTIONS.MD"
|
|
system_instructions = instructions_path.read_text("utf-8") if instructions_path.exists() else ""
|
|
|
|
page_url = str(meta.get("url") or "")
|
|
page_title = str(meta.get("title") or "")
|
|
extra_instructions = str(meta.get("extra_instructions") or "").strip()
|
|
|
|
user_payload = {
|
|
"page_url": page_url,
|
|
"page_title": page_title,
|
|
"meta": meta,
|
|
"content": content,
|
|
"task": {
|
|
"goal": "Draft replies to each distinct post currently visible on the page.",
|
|
"definition_of_post": "A single feed item / post / story / comment root visible on-screen right now. If it's a single-article page, treat the main article as one post.",
|
|
"output_requirements": {
|
|
#"ironic": "Lightly ironic, laughing at us humans (not cruel).",
|
|
"improved": "Proofread whatever is given in extra_instructions [EXTRA_INSTRUCTIONS]!!! Proofreading-style improvements, preserving the original words as much as possible. Improving in medium version.",
|
|
"critical": "Bold/critical: politely questions the premise or assumptions.",
|
|
"suggested": "Best style you think fits (helpful/witty/clarifying/etc).",
|
|
"short": "direct, useful, no fluff. if X(twitter): 1-2 sentences max, if Reddit: 3-6 sentences max.",
|
|
"medium": "more context, still concise. if X(twitter): 3-6 sentences max, if Reddit: 6-12 sentences max.",
|
|
"style": "Follow the system instructions for voice/tone. Apply EXTRA_INSTRUCTIONS to all responses. If unclear what the post says, be honest and ask a question instead of guessing.",
|
|
},
|
|
},
|
|
}
|
|
|
|
prompt_text = (
|
|
"You will receive (1) a screenshot of the current viewport and (2) extracted visible page content.\n"
|
|
"Identify each distinct post visible on the page and draft SIX reply options per post:\n"
|
|
"- improved_short, improved_medium\n"
|
|
"- critical_short, critical_medium\n"
|
|
"- suggested_short, suggested_medium\n"
|
|
"All six must follow the system instructions and EXTRA_INSTRUCTIONS.\n"
|
|
"Do not invent facts not present in the screenshot/content.\n"
|
|
"Return JSON matching the provided schema. Include all top-level keys: page_url, page_title, posts, notes.\n"
|
|
"If a value is unknown, use an empty string.\n\n"
|
|
+ (f"EXTRA_INSTRUCTIONS={extra_instructions}\n\n" if extra_instructions else "")
|
|
+ f"PAGE_DATA_JSON={_safe_json_dump(user_payload, args.content_max_chars)}"
|
|
)
|
|
|
|
# Screenshot is provided as a base64 data URL image input.
|
|
image_data_url = _data_url_for_png(png_path)
|
|
|
|
client = OpenAI()
|
|
|
|
resp = client.responses.create(
|
|
model=args.model,
|
|
instructions=system_instructions,
|
|
input=[
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "input_text", "text": prompt_text},
|
|
{"type": "input_image", "image_url": image_data_url, "detail": args.image_detail},
|
|
],
|
|
}
|
|
],
|
|
text={
|
|
"format": {
|
|
"type": "json_schema",
|
|
"name": "ea_post_responses",
|
|
"description": "Draft short and medium replies for each visible post on the page.",
|
|
"schema": _response_schema(args.max_posts),
|
|
"strict": True,
|
|
},
|
|
"verbosity": "low",
|
|
},
|
|
max_output_tokens=1400,
|
|
)
|
|
|
|
raw = resp.output_text or ""
|
|
try:
|
|
parsed = json.loads(raw)
|
|
except Exception:
|
|
parsed = {"error": "non_json_output", "raw": raw}
|
|
|
|
if isinstance(parsed, dict) and "posts" in parsed:
|
|
parsed = _sanitize_ai_payload(parsed, page_url=page_url, page_title=page_title)
|
|
|
|
out_path = Path(args.out) if args.out else png_path.with_suffix(".ai.json")
|
|
out_path.write_text(json.dumps(parsed, indent=2, ensure_ascii=True) + "\n", encoding="utf-8")
|
|
print(str(out_path))
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main(sys.argv[1:]))
|