Regenerate summaries at will

This commit is contained in:
Flatlogic Bot 2026-02-07 02:35:09 +00:00
parent b73da29ee4
commit 556b73ecb5
9 changed files with 242 additions and 122 deletions

View File

@ -1,37 +1,3 @@
"""
LocalAIApi lightweight Python client for the Flatlogic AI proxy.
Usage (inside the Django workspace):
from ai.local_ai_api import LocalAIApi
response = LocalAIApi.create_response({
"input": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Summarise this text in two sentences."},
],
"text": {"format": {"type": "json_object"}},
})
if response.get("success"):
data = LocalAIApi.decode_json_from_response(response)
# ...
# Typical successful payload (truncated):
# {
# "id": "resp_xxx",
# "status": "completed",
# "output": [
# {"type": "reasoning", "summary": []},
# {"type": "message", "content": [{"type": "output_text", "text": "Your final answer here."}]}
# ],
# "usage": { "input_tokens": 123, "output_tokens": 456 }
# }
The helper automatically injects the project UUID header and falls back to
reading executor/.env if environment variables are missing.
"""
from __future__ import annotations
import json
@ -52,10 +18,8 @@ __all__ = [
"decode_json_from_response",
]
_CONFIG_CACHE: Optional[Dict[str, Any]] = None
class LocalAIApi:
"""Static helpers mirroring the PHP implementation."""
@ -76,9 +40,7 @@ class LocalAIApi:
def decode_json_from_response(response: Dict[str, Any]) -> Optional[Dict[str, Any]]:
return decode_json_from_response(response)
def create_response(params: Dict[str, Any], options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""Signature compatible with the OpenAI Responses API."""
options = options or {}
payload = dict(params)
@ -111,9 +73,7 @@ def create_response(params: Dict[str, Any], options: Optional[Dict[str, Any]] =
return initial
def request(path: Optional[str], payload: Dict[str, Any], options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""Perform a raw request to the AI proxy."""
cfg = _config()
options = options or {}
@ -145,6 +105,7 @@ def request(path: Optional[str], payload: Dict[str, Any], options: Optional[Dict
"Content-Type": "application/json",
"Accept": "application/json",
cfg["project_header"]: project_uuid,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}
extra_headers = options.get("headers")
if isinstance(extra_headers, Iterable):
@ -156,9 +117,7 @@ def request(path: Optional[str], payload: Dict[str, Any], options: Optional[Dict
body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
return _http_request(url, "POST", body, headers, timeout, verify_tls)
def fetch_status(ai_request_id: Any, options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""Fetch status for a queued AI request."""
cfg = _config()
options = options or {}
@ -180,6 +139,7 @@ def fetch_status(ai_request_id: Any, options: Optional[Dict[str, Any]] = None) -
headers: Dict[str, str] = {
"Accept": "application/json",
cfg["project_header"]: project_uuid,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}
extra_headers = options.get("headers")
if isinstance(extra_headers, Iterable):
@ -190,9 +150,7 @@ def fetch_status(ai_request_id: Any, options: Optional[Dict[str, Any]] = None) -
return _http_request(url, "GET", None, headers, timeout, verify_tls)
def await_response(ai_request_id: Any, options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""Poll status endpoint until the request is complete or timed out."""
options = options or {}
timeout = int(options.get("timeout", 300))
interval = int(options.get("interval", 5))
@ -236,14 +194,10 @@ def await_response(ai_request_id: Any, options: Optional[Dict[str, Any]] = None)
}
time.sleep(interval)
def extract_text(response: Dict[str, Any]) -> str:
"""Public helper to extract plain text from a Responses payload."""
return _extract_text(response)
def decode_json_from_response(response: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""Attempt to decode JSON emitted by the model (handles markdown fences)."""
text = _extract_text(response)
if text == "":
return None
@ -270,7 +224,6 @@ def decode_json_from_response(response: Dict[str, Any]) -> Optional[Dict[str, An
return None
return None
def _extract_text(response: Dict[str, Any]) -> str:
payload = response.get("data") if response.get("success") else response.get("response")
if isinstance(payload, dict):
@ -294,9 +247,8 @@ def _extract_text(response: Dict[str, Any]) -> str:
return payload
return ""
def _config() -> Dict[str, Any]:
global _CONFIG_CACHE # noqa: PLW0603
global _CONFIG_CACHE
if _CONFIG_CACHE is not None:
return _CONFIG_CACHE
@ -320,7 +272,6 @@ def _config() -> Dict[str, Any]:
}
return _CONFIG_CACHE
def _build_url(path: str, base_url: str) -> str:
trimmed = path.strip()
if trimmed.startswith("http://") or trimmed.startswith("https://"):
@ -329,7 +280,6 @@ def _build_url(path: str, base_url: str) -> str:
return f"{base_url}{trimmed}"
return f"{base_url}/{trimmed}"
def _resolve_status_path(ai_request_id: Any, cfg: Dict[str, Any]) -> str:
base_path = (cfg.get("responses_path") or "").rstrip("/")
if not base_path:
@ -338,12 +288,8 @@ def _resolve_status_path(ai_request_id: Any, cfg: Dict[str, Any]) -> str:
base_path = f"{base_path}/ai-request"
return f"{base_path}/{ai_request_id}/status"
def _http_request(url: str, method: str, body: Optional[bytes], headers: Dict[str, str],
timeout: int, verify_tls: bool) -> Dict[str, Any]:
"""
Shared HTTP helper for GET/POST requests.
"""
req = urlrequest.Request(url, data=body, method=method.upper())
for name, value in headers.items():
req.add_header(name, value)
@ -361,7 +307,7 @@ def _http_request(url: str, method: str, body: Optional[bytes], headers: Dict[st
except urlerror.HTTPError as exc:
status = exc.getcode()
response_body = exc.read().decode("utf-8", errors="replace")
except Exception as exc: # pylint: disable=broad-except
except Exception as exc:
return {
"success": False,
"error": "request_failed",
@ -395,9 +341,7 @@ def _http_request(url: str, method: str, body: Optional[bytes], headers: Dict[st
"response": decoded if decoded is not None else response_body,
}
def _ensure_env_loaded() -> None:
"""Populate os.environ from executor/.env if variables are missing."""
if os.getenv("PROJECT_UUID") and os.getenv("PROJECT_ID"):
return

View File

@ -6,9 +6,18 @@ from ai.local_ai_api import LocalAIApi
from bs4 import BeautifulSoup
import html2text
import logging
from urllib.parse import urlparse
logger = logging.getLogger(__name__)
DEFAULT_HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
def get_base_url(url):
parsed = urlparse(url)
return f"{parsed.scheme}://{parsed.netloc}/"
@shared_task(bind=True, max_retries=3)
def process_bookmark(self, bookmark_id):
try:
@ -16,14 +25,43 @@ def process_bookmark(self, bookmark_id):
except Bookmark.DoesNotExist:
return
html_content = ""
status_code = None
content_type = None
used_backup = False
try:
with httpx.Client(follow_redirects=True, timeout=30.0) as client:
with httpx.Client(follow_redirects=True, timeout=20.0, headers=DEFAULT_HEADERS) as client:
response = client.get(bookmark.url)
response.raise_for_status()
html_content = response.text
status_code = response.status_code
content_type = response.headers.get('content-type')
# If content is too small, maybe it's a redirect or anti-bot page
if len(html_content) < 500:
raise ValueError("Content too small, likely failed to scrape meaningful data.")
except Exception as exc:
logger.error(f"Error fetching bookmark {bookmark_id}: {exc}")
raise self.retry(exc=exc, countdown=60)
logger.warning(f"Error fetching bookmark {bookmark_id} ({bookmark.url}): {exc}. Trying base domain backup.")
try:
base_url = get_base_url(bookmark.url)
if base_url.rstrip('/') != bookmark.url.rstrip('/'):
with httpx.Client(follow_redirects=True, timeout=20.0, headers=DEFAULT_HEADERS) as client:
response = client.get(base_url)
response.raise_for_status()
html_content = response.text
status_code = response.status_code
content_type = response.headers.get('content-type')
used_backup = True
else:
if not html_content:
raise exc
except Exception as base_exc:
logger.error(f"Error fetching base domain for bookmark {bookmark_id}: {base_exc}")
if not html_content:
html_content = f"<html><body><p>Failed to retrieve content from {bookmark.url} and its base domain.</p></body></html>"
status_code = status_code or 0
soup = BeautifulSoup(html_content, 'html.parser')
@ -46,8 +84,9 @@ def process_bookmark(self, bookmark_id):
'content_html': html_content,
'content_text': text_content,
'metadata': {
'status_code': response.status_code,
'content_type': response.headers.get('content-type'),
'status_code': status_code,
'content_type': content_type,
'used_backup': used_backup,
}
}
)
@ -62,30 +101,69 @@ def generate_summary(bookmark_id):
try:
bookmark = Bookmark.objects.get(id=bookmark_id)
extraction = bookmark.extraction
except (Bookmark.DoesNotExist, Extraction.DoesNotExist):
except Bookmark.DoesNotExist:
return
except Extraction.DoesNotExist:
# If extraction doesn't exist yet, we might want to wait or just return
# But in EAGER mode it should be there.
return
if not extraction.content_text:
content_to_summarize = extraction.content_text.strip()
used_backup = extraction.metadata.get('used_backup', False)
if not content_to_summarize or len(content_to_summarize) < 50:
Summary.objects.update_or_create(
bookmark=bookmark,
defaults={'content': f"Insufficient content extracted from {bookmark.url} to generate a meaningful AI summary."}
)
return
# Prepare prompt for AI
prompt = f"Summarize the following content from the webpage '{bookmark.title or bookmark.url}' in 2-3 concise sentences. Focus on the main points for a researcher.\n\nContent:\n{extraction.content_text[:4000]}"
if used_backup:
prompt = f"The specific page '{bookmark.url}' could not be reached. Summarize the main domain front page content instead to describe what this website is about.\n\nContent:\n{content_to_summarize[:4000]}"
else:
prompt = f"Summarize the following content from the webpage '{bookmark.title or bookmark.url}' in 2-3 concise sentences. Focus on the main points for a researcher.\n\nContent:\n{content_to_summarize[:4000]}"
response = LocalAIApi.create_response({
"input": [
{"role": "system", "content": "You are a helpful assistant that summarizes web content for researchers and knowledge workers. Be concise and professional."},
{"role": "user", "content": prompt},
],
})
try:
response = LocalAIApi.create_response({
"input": [
{"role": "system", "content": "You are a helpful assistant that summarizes web content for researchers and knowledge workers. Be concise and professional."},
{"role": "user", "content": prompt},
],
})
if response.get("success"):
summary_text = LocalAIApi.extract_text(response)
if summary_text:
summary_text = None
if response.get("success"):
summary_text = LocalAIApi.extract_text(response)
if summary_text and len(summary_text.strip()) > 10:
Summary.objects.update_or_create(
bookmark=bookmark,
defaults={'content': summary_text}
defaults={'content': summary_text.strip()}
)
return f"Generated summary for bookmark {bookmark_id}"
logger.error(f"Failed to generate summary for bookmark {bookmark_id}: {response.get('error')}")
return f"Failed to generate summary for bookmark {bookmark_id}"
else:
error_msg = response.get('error') or "Empty response from AI"
logger.error(f"Failed to generate summary for bookmark {bookmark_id}: {error_msg}")
# Create a fallback summary to stop the spinner
fallback_content = "AI summary could not be generated at this time. "
if used_backup:
fallback_content += "The original page was unreachable, and the home page content was insufficient for a summary."
elif bookmark.title:
fallback_content += f"The page appears to be titled '{bookmark.title}'."
else:
fallback_content += f"Please visit the link directly: {bookmark.url}"
Summary.objects.update_or_create(
bookmark=bookmark,
defaults={'content': fallback_content}
)
return f"Failed to generate summary for bookmark {bookmark_id}, created fallback."
except Exception as e:
logger.exception(f"Unexpected error in generate_summary for bookmark {bookmark_id}: {e}")
Summary.objects.update_or_create(
bookmark=bookmark,
defaults={'content': "An unexpected error occurred while generating the AI summary."}
)
return f"Error in generate_summary for bookmark {bookmark_id}"

View File

@ -16,20 +16,28 @@
<div class="d-flex justify-content-between align-items-start mb-3">
<h1 class="h2">{{ bookmark.title|default:bookmark.url }}</h1>
{% if bookmark.user == request.user %}
<div class="dropdown">
<button class="btn btn-outline-secondary btn-sm" type="button" data-bs-toggle="dropdown">
<i class="bi bi-three-dots"></i>
</button>
<ul class="dropdown-menu">
<li><a class="dropdown-item" href="{% url 'bookmark-edit' bookmark.pk %}">Edit</a></li>
<li><hr class="dropdown-divider"></li>
<li>
<form action="{% url 'bookmark-delete' bookmark.pk %}" method="post" onsubmit="return confirm('Are you sure?');">
{% csrf_token %}
<button type="submit" class="dropdown-item text-danger">Delete</button>
</form>
</li>
</ul>
<div class="d-flex gap-2">
<form action="{% url 'bookmark-regenerate' bookmark.pk %}" method="post">
{% csrf_token %}
<button type="submit" class="btn btn-outline-primary btn-sm" title="Regenerate Summary and Content">
<i class="bi bi-arrow-clockwise"></i> Regenerate
</button>
</form>
<div class="dropdown">
<button class="btn btn-outline-secondary btn-sm" type="button" data-bs-toggle="dropdown">
<i class="bi bi-three-dots"></i>
</button>
<ul class="dropdown-menu dropdown-menu-end">
<li><a class="dropdown-item" href="{% url 'bookmark-edit' bookmark.pk %}">Edit Metadata</a></li>
<li><hr class="dropdown-divider"></li>
<li>
<form action="{% url 'bookmark-delete' bookmark.pk %}" method="post" onsubmit="return confirm('Are you sure?');">
{% csrf_token %}
<button type="submit" class="dropdown-item text-danger">Delete Bookmark</button>
</form>
</li>
</ul>
</div>
</div>
{% endif %}
</div>
@ -47,19 +55,37 @@
</div>
{% endif %}
{% if bookmark.summary %}
<div class="mb-4">
<h5 class="text-uppercase small fw-bold text-muted mb-2">AI Summary</h5>
<div class="p-3 border rounded shadow-sm bg-white">
<div class="mb-4">
<div class="d-flex justify-content-between align-items-center mb-2">
<h5 class="text-uppercase small fw-bold text-muted mb-0">AI Summary</h5>
{% if bookmark.user == request.user and bookmark.summary %}
<button class="btn btn-link btn-sm p-0 text-decoration-none" onclick="toggleEdit('summary')">Edit</button>
{% endif %}
</div>
{% if bookmark.summary %}
<div id="summary-display" class="p-3 border rounded shadow-sm bg-white">
{{ bookmark.summary.content }}
</div>
</div>
{% else %}
<div class="alert alert-light border text-center small py-3">
<div class="spinner-border spinner-border-sm text-primary me-2" role="status"></div>
AI Summary is being generated...
</div>
{% endif %}
{% if bookmark.user == request.user %}
<div id="summary-edit" class="d-none">
<form action="{% url 'summary-update' bookmark.pk %}" method="post">
{% csrf_token %}
<textarea name="content" class="form-control mb-2" rows="4">{{ bookmark.summary.content }}</textarea>
<div class="d-flex gap-2">
<button type="submit" class="btn btn-primary btn-sm">Save</button>
<button type="button" class="btn btn-outline-secondary btn-sm" onclick="toggleEdit('summary')">Cancel</button>
</div>
</form>
</div>
{% endif %}
{% else %}
<div class="alert alert-light border text-center small py-3">
<div class="spinner-border spinner-border-sm text-primary me-2" role="status"></div>
AI Summary is being generated...
</div>
{% endif %}
</div>
<div class="mt-4">
{% for tag in bookmark.tags.all %}
@ -68,14 +94,37 @@
</div>
</div>
{% if bookmark.extraction %}
<div class="card p-4">
<h5 class="text-uppercase small fw-bold text-muted mb-3">Extracted Text Content</h5>
<div class="extraction-content text-muted small" style="max-height: 500px; overflow-y: auto;">
<div class="card p-4">
<div class="d-flex justify-content-between align-items-center mb-3">
<h5 class="text-uppercase small fw-bold text-muted mb-0">Extracted Text Content</h5>
{% if bookmark.user == request.user and bookmark.extraction %}
<button class="btn btn-link btn-sm p-0 text-decoration-none" onclick="toggleEdit('extraction')">Edit</button>
{% endif %}
</div>
{% if bookmark.extraction %}
<div id="extraction-display" class="extraction-content text-muted small" style="max-height: 500px; overflow-y: auto;">
{{ bookmark.extraction.content_text|linebreaks }}
</div>
</div>
{% endif %}
{% if bookmark.user == request.user %}
<div id="extraction-edit" class="d-none">
<form action="{% url 'extraction-update' bookmark.pk %}" method="post">
{% csrf_token %}
<textarea name="content_text" class="form-control mb-2" rows="15">{{ bookmark.extraction.content_text }}</textarea>
<div class="d-flex gap-2">
<button type="submit" class="btn btn-primary btn-sm">Save</button>
<button type="button" class="btn btn-outline-secondary btn-sm" onclick="toggleEdit('extraction')">Cancel</button>
</div>
</form>
</div>
{% endif %}
{% else %}
<div class="alert alert-light border text-center small py-3">
<div class="spinner-border spinner-border-sm text-primary me-2" role="status"></div>
Content is being extracted...
</div>
{% endif %}
</div>
</div>
<div class="col-md-4">
@ -121,6 +170,18 @@
{% block extra_js %}
<script>
function toggleEdit(type) {
const display = document.getElementById(type + '-display');
const edit = document.getElementById(type + '-edit');
if (display.classList.contains('d-none')) {
display.classList.remove('d-none');
edit.classList.add('d-none');
} else {
display.classList.add('d-none');
edit.classList.remove('d-none');
}
}
document.querySelectorAll('.share-toggle').forEach(button => {
button.addEventListener('click', async function() {
const url = this.getAttribute('data-url');

View File

@ -4,7 +4,8 @@ from core.api_views import BookmarkViewSet, TeamViewSet, ApiStatusView
from core.views import (
BookmarkListView, BookmarkCreateView, BookmarkDetailView,
BookmarkUpdateView, BookmarkDeleteView,
TeamListView, TeamDetailView, BookmarkShareToggleView
TeamListView, TeamDetailView, BookmarkShareToggleView,
BookmarkRegenerateView, SummaryUpdateView, ExtractionUpdateView
)
router = DefaultRouter()
@ -17,6 +18,9 @@ urlpatterns = [
path("bookmark/<int:pk>/", BookmarkDetailView.as_view(), name="bookmark-detail"),
path("bookmark/<int:pk>/edit/", BookmarkUpdateView.as_view(), name="bookmark-edit"),
path("bookmark/<int:pk>/delete/", BookmarkDeleteView.as_view(), name="bookmark-delete"),
path("bookmark/<int:pk>/regenerate/", BookmarkRegenerateView.as_view(), name="bookmark-regenerate"),
path("bookmark/<int:pk>/summary/update/", SummaryUpdateView.as_view(), name="summary-update"),
path("bookmark/<int:pk>/extraction/update/", ExtractionUpdateView.as_view(), name="extraction-update"),
path("bookmark/<int:pk>/share/<int:team_id>/", BookmarkShareToggleView.as_view(), name="bookmark-share-toggle"),
path("teams/", TeamListView.as_view(), name="team-list"),
@ -24,4 +28,4 @@ urlpatterns = [
path("api/status/", ApiStatusView.as_view(), name="api-status"),
path("api/", include(router.urls)),
]
]

View File

@ -2,10 +2,10 @@ from django.shortcuts import render, redirect, get_object_or_404
from django.views import View
from django.views.generic import ListView, CreateView, DetailView, UpdateView, DeleteView
from django.contrib.auth.mixins import LoginRequiredMixin
from django.urls import reverse_lazy
from django.urls import reverse_lazy, reverse
from django.db.models import Q
from django.http import JsonResponse
from .models import Bookmark, Team, Extraction, BookmarkShare
from django.http import JsonResponse, HttpResponseRedirect
from .models import Bookmark, Team, Extraction, BookmarkShare, Summary
from .tasks import process_bookmark
class BookmarkListView(LoginRequiredMixin, ListView):
@ -52,9 +52,6 @@ class BookmarkCreateView(LoginRequiredMixin, CreateView):
form.instance.user = self.request.user
response = super().form_valid(form)
# Handle tags if provided in a separate field or as a comma-separated string
# For simplicity, we'll assume the model's TaggableManager handles it if added to fields,
# but here we might need to handle it manually if we use a custom field.
# Let's add 'tags' to fields in the actual form.
tags = self.request.POST.get('tags_input')
if tags:
form.instance.tags.add(*[t.strip() for t in tags.split(',')])
@ -66,7 +63,9 @@ class BookmarkUpdateView(LoginRequiredMixin, UpdateView):
model = Bookmark
fields = ['url', 'title', 'notes', 'is_favorite']
template_name = 'core/bookmark_form.html'
success_url = reverse_lazy('home')
def get_success_url(self):
return reverse('bookmark-detail', kwargs={'pk': self.object.pk})
def get_queryset(self):
return Bookmark.objects.filter(user=self.request.user)
@ -91,6 +90,40 @@ class BookmarkDetailView(LoginRequiredMixin, DetailView):
Q(shares__team__in=user_teams)
).distinct()
class BookmarkRegenerateView(LoginRequiredMixin, View):
def post(self, request, pk):
bookmark = get_object_or_404(Bookmark, pk=pk, user=request.user)
# Delete existing summary and extraction to force regeneration and show loading states
if hasattr(bookmark, 'summary'):
bookmark.summary.delete()
if hasattr(bookmark, 'extraction'):
bookmark.extraction.delete()
process_bookmark.delay(bookmark.id)
return HttpResponseRedirect(reverse('bookmark-detail', args=[pk]))
class SummaryUpdateView(LoginRequiredMixin, View):
def post(self, request, pk):
bookmark = get_object_or_404(Bookmark, pk=pk, user=request.user)
content = request.POST.get('content')
if content:
Summary.objects.update_or_create(
bookmark=bookmark,
defaults={'content': content}
)
return HttpResponseRedirect(reverse('bookmark-detail', args=[pk]))
class ExtractionUpdateView(LoginRequiredMixin, View):
def post(self, request, pk):
bookmark = get_object_or_404(Bookmark, pk=pk, user=request.user)
content_text = request.POST.get('content_text')
if content_text:
Extraction.objects.update_or_create(
bookmark=bookmark,
defaults={'content_text': content_text}
)
return HttpResponseRedirect(reverse('bookmark-detail', args=[pk]))
class TeamListView(LoginRequiredMixin, ListView):
model = Team
template_name = 'core/team_list.html'
@ -130,4 +163,4 @@ class BookmarkShareToggleView(LoginRequiredMixin, View):
else:
shared = True
return JsonResponse({'shared': shared})
return JsonResponse({'shared': shared})