38191-vm/core/tasks.py
2026-02-05 16:44:24 +00:00

91 lines
3.0 KiB
Python

import httpx
from celery import shared_task
from django.utils import timezone
from core.models import Bookmark, Extraction, Summary
from ai.local_ai_api import LocalAIApi
from bs4 import BeautifulSoup
import html2text
import logging
logger = logging.getLogger(__name__)
@shared_task(bind=True, max_retries=3)
def process_bookmark(self, bookmark_id):
try:
bookmark = Bookmark.objects.get(id=bookmark_id)
except Bookmark.DoesNotExist:
return
try:
with httpx.Client(follow_redirects=True, timeout=30.0) as client:
response = client.get(bookmark.url)
response.raise_for_status()
html_content = response.text
except Exception as exc:
logger.error(f"Error fetching bookmark {bookmark_id}: {exc}")
raise self.retry(exc=exc, countdown=60)
soup = BeautifulSoup(html_content, 'html.parser')
# Simple title extraction if not already set
if not bookmark.title:
title_tag = soup.find('title')
if title_tag:
bookmark.title = title_tag.string.strip()[:255]
bookmark.save()
# Readability extraction
h = html2text.HTML2Text()
h.ignore_links = False
h.ignore_images = True
text_content = h.handle(html_content)
extraction, created = Extraction.objects.update_or_create(
bookmark=bookmark,
defaults={
'content_html': html_content,
'content_text': text_content,
'metadata': {
'status_code': response.status_code,
'content_type': response.headers.get('content-type'),
}
}
)
# AI Summary generation
generate_summary.delay(bookmark_id)
return f"Processed bookmark {bookmark_id}"
@shared_task
def generate_summary(bookmark_id):
try:
bookmark = Bookmark.objects.get(id=bookmark_id)
extraction = bookmark.extraction
except (Bookmark.DoesNotExist, Extraction.DoesNotExist):
return
if not extraction.content_text:
return
# Prepare prompt for AI
prompt = f"Summarize the following content from the webpage '{bookmark.title or bookmark.url}' in 2-3 concise sentences. Focus on the main points for a researcher.\n\nContent:\n{extraction.content_text[:4000]}"
response = LocalAIApi.create_response({
"input": [
{"role": "system", "content": "You are a helpful assistant that summarizes web content for researchers and knowledge workers. Be concise and professional."},
{"role": "user", "content": prompt},
],
})
if response.get("success"):
summary_text = LocalAIApi.extract_text(response)
if summary_text:
Summary.objects.update_or_create(
bookmark=bookmark,
defaults={'content': summary_text}
)
return f"Generated summary for bookmark {bookmark_id}"
logger.error(f"Failed to generate summary for bookmark {bookmark_id}: {response.get('error')}")
return f"Failed to generate summary for bookmark {bookmark_id}"