import httpx from celery import shared_task from django.utils import timezone from core.models import Bookmark, Extraction, Summary from ai.local_ai_api import LocalAIApi from bs4 import BeautifulSoup import html2text import logging logger = logging.getLogger(__name__) @shared_task(bind=True, max_retries=3) def process_bookmark(self, bookmark_id): try: bookmark = Bookmark.objects.get(id=bookmark_id) except Bookmark.DoesNotExist: return try: with httpx.Client(follow_redirects=True, timeout=30.0) as client: response = client.get(bookmark.url) response.raise_for_status() html_content = response.text except Exception as exc: logger.error(f"Error fetching bookmark {bookmark_id}: {exc}") raise self.retry(exc=exc, countdown=60) soup = BeautifulSoup(html_content, 'html.parser') # Simple title extraction if not already set if not bookmark.title: title_tag = soup.find('title') if title_tag: bookmark.title = title_tag.string.strip()[:255] bookmark.save() # Readability extraction h = html2text.HTML2Text() h.ignore_links = False h.ignore_images = True text_content = h.handle(html_content) extraction, created = Extraction.objects.update_or_create( bookmark=bookmark, defaults={ 'content_html': html_content, 'content_text': text_content, 'metadata': { 'status_code': response.status_code, 'content_type': response.headers.get('content-type'), } } ) # AI Summary generation generate_summary.delay(bookmark_id) return f"Processed bookmark {bookmark_id}" @shared_task def generate_summary(bookmark_id): try: bookmark = Bookmark.objects.get(id=bookmark_id) extraction = bookmark.extraction except (Bookmark.DoesNotExist, Extraction.DoesNotExist): return if not extraction.content_text: return # Prepare prompt for AI prompt = f"Summarize the following content from the webpage '{bookmark.title or bookmark.url}' in 2-3 concise sentences. Focus on the main points for a researcher.\n\nContent:\n{extraction.content_text[:4000]}" response = LocalAIApi.create_response({ "input": [ {"role": "system", "content": "You are a helpful assistant that summarizes web content for researchers and knowledge workers. Be concise and professional."}, {"role": "user", "content": prompt}, ], }) if response.get("success"): summary_text = LocalAIApi.extract_text(response) if summary_text: Summary.objects.update_or_create( bookmark=bookmark, defaults={'content': summary_text} ) return f"Generated summary for bookmark {bookmark_id}" logger.error(f"Failed to generate summary for bookmark {bookmark_id}: {response.get('error')}") return f"Failed to generate summary for bookmark {bookmark_id}"