diff --git a/core/__pycache__/admin.cpython-311.pyc b/core/__pycache__/admin.cpython-311.pyc index 5e8987a..febdfa3 100644 Binary files a/core/__pycache__/admin.cpython-311.pyc and b/core/__pycache__/admin.cpython-311.pyc differ diff --git a/core/__pycache__/context_processors.cpython-311.pyc b/core/__pycache__/context_processors.cpython-311.pyc index 75bf223..2f0013b 100644 Binary files a/core/__pycache__/context_processors.cpython-311.pyc and b/core/__pycache__/context_processors.cpython-311.pyc differ diff --git a/core/__pycache__/forms.cpython-311.pyc b/core/__pycache__/forms.cpython-311.pyc new file mode 100644 index 0000000..f1ecafb Binary files /dev/null and b/core/__pycache__/forms.cpython-311.pyc differ diff --git a/core/__pycache__/models.cpython-311.pyc b/core/__pycache__/models.cpython-311.pyc index a251b5f..f9be4d6 100644 Binary files a/core/__pycache__/models.cpython-311.pyc and b/core/__pycache__/models.cpython-311.pyc differ diff --git a/core/__pycache__/rss.cpython-311.pyc b/core/__pycache__/rss.cpython-311.pyc new file mode 100644 index 0000000..406fbce Binary files /dev/null and b/core/__pycache__/rss.cpython-311.pyc differ diff --git a/core/__pycache__/tests.cpython-311.pyc b/core/__pycache__/tests.cpython-311.pyc new file mode 100644 index 0000000..0bf7f7f Binary files /dev/null and b/core/__pycache__/tests.cpython-311.pyc differ diff --git a/core/__pycache__/urls.cpython-311.pyc b/core/__pycache__/urls.cpython-311.pyc index f705988..afb6fb1 100644 Binary files a/core/__pycache__/urls.cpython-311.pyc and b/core/__pycache__/urls.cpython-311.pyc differ diff --git a/core/__pycache__/views.cpython-311.pyc b/core/__pycache__/views.cpython-311.pyc index 2f0989c..e2c4b37 100644 Binary files a/core/__pycache__/views.cpython-311.pyc and b/core/__pycache__/views.cpython-311.pyc differ diff --git a/core/admin.py b/core/admin.py index 8c38f3f..8c6f841 100644 --- a/core/admin.py +++ b/core/admin.py @@ -1,3 +1,27 @@ from django.contrib import admin -# Register your models here. +from .models import Article, NewsSource, Topic + + +@admin.register(Topic) +class TopicAdmin(admin.ModelAdmin): + list_display = ('name', 'accent_color') + search_fields = ('name', 'description') + prepopulated_fields = {'slug': ('name',)} + + +@admin.register(NewsSource) +class NewsSourceAdmin(admin.ModelAdmin): + list_display = ('name', 'is_active', 'last_synced_at') + list_filter = ('is_active',) + search_fields = ('name', 'feed_url', 'description') + prepopulated_fields = {'slug': ('name',)} + + +@admin.register(Article) +class ArticleAdmin(admin.ModelAdmin): + list_display = ('title', 'article_kind', 'topic', 'source', 'is_featured', 'is_published', 'published_at') + list_filter = ('article_kind', 'topic', 'is_featured', 'is_published', 'source') + search_fields = ('title', 'excerpt', 'content', 'author_name', 'external_url') + prepopulated_fields = {'slug': ('title',)} + autocomplete_fields = ('topic', 'source') diff --git a/core/forms.py b/core/forms.py new file mode 100644 index 0000000..136d23e --- /dev/null +++ b/core/forms.py @@ -0,0 +1,62 @@ +from django import forms + +from .models import Article, Topic + + +class ArticleFilterForm(forms.Form): + q = forms.CharField( + required=False, + max_length=120, + label='Search', + widget=forms.TextInput( + attrs={ + 'placeholder': 'Search startup, AI, funding, product…', + 'class': 'form-control form-control-lg search-input', + } + ), + ) + topic = forms.ChoiceField( + required=False, + label='Topic', + widget=forms.Select(attrs={'class': 'form-select form-select-lg'}), + ) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + topic_choices = [('', 'All topics')] + topic_choices.extend((topic.slug, topic.name) for topic in Topic.objects.order_by('name')) + self.fields['topic'].choices = topic_choices + + +class OriginalArticleForm(forms.ModelForm): + class Meta: + model = Article + fields = ['title', 'topic', 'author_name', 'excerpt', 'content', 'is_featured'] + widgets = { + 'title': forms.TextInput(attrs={'class': 'form-control form-control-lg', 'placeholder': 'Write a sharp headline'}), + 'topic': forms.Select(attrs={'class': 'form-select form-select-lg'}), + 'author_name': forms.TextInput(attrs={'class': 'form-control form-control-lg', 'placeholder': 'Editor or columnist name'}), + 'excerpt': forms.Textarea(attrs={'class': 'form-control', 'rows': 3, 'placeholder': 'A short dek for cards and search results'}), + 'content': forms.Textarea(attrs={'class': 'form-control', 'rows': 10, 'placeholder': 'Draft the story, analysis, or curated roundup'}), + 'is_featured': forms.CheckboxInput(attrs={'class': 'form-check-input'}), + } + labels = { + 'excerpt': 'Deck / summary', + 'is_featured': 'Pin as featured story', + } + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.fields['topic'].queryset = Topic.objects.order_by('name') + self.fields['topic'].empty_label = 'Choose a topic' + self.fields['topic'].required = True + + def clean(self): + cleaned_data = super().clean() + excerpt = cleaned_data.get('excerpt', '') + content = cleaned_data.get('content', '') + if len(excerpt.strip()) < 20: + self.add_error('excerpt', 'Please add at least 20 characters so cards feel editorially complete.') + if len(content.strip()) < 80: + self.add_error('content', 'Please write at least 80 characters to create a publishable story.') + return cleaned_data diff --git a/core/management/__init__.py b/core/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/management/__pycache__/__init__.cpython-311.pyc b/core/management/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..1d04a9d Binary files /dev/null and b/core/management/__pycache__/__init__.cpython-311.pyc differ diff --git a/core/management/commands/__init__.py b/core/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/management/commands/__pycache__/__init__.cpython-311.pyc b/core/management/commands/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000..be6d9f1 Binary files /dev/null and b/core/management/commands/__pycache__/__init__.cpython-311.pyc differ diff --git a/core/management/commands/__pycache__/import_rss.cpython-311.pyc b/core/management/commands/__pycache__/import_rss.cpython-311.pyc new file mode 100644 index 0000000..b9e4523 Binary files /dev/null and b/core/management/commands/__pycache__/import_rss.cpython-311.pyc differ diff --git a/core/management/commands/import_rss.py b/core/management/commands/import_rss.py new file mode 100644 index 0000000..3dfe4c2 --- /dev/null +++ b/core/management/commands/import_rss.py @@ -0,0 +1,15 @@ +from django.core.management.base import BaseCommand + +from core.rss import sync_active_sources + + +class Command(BaseCommand): + help = "Import active RSS feeds into newsroom articles" + + def add_arguments(self, parser): + parser.add_argument('--limit', type=int, default=8, help='Number of feed items per source') + parser.add_argument('--stale-minutes', type=int, default=0, help='Only sync sources older than this many minutes') + + def handle(self, *args, **options): + created = sync_active_sources(limit=options['limit'], stale_minutes=options['stale_minutes']) + self.stdout.write(self.style.SUCCESS(f'Imported {created} article(s).')) diff --git a/core/migrations/0001_initial.py b/core/migrations/0001_initial.py new file mode 100644 index 0000000..63b4318 --- /dev/null +++ b/core/migrations/0001_initial.py @@ -0,0 +1,71 @@ +# Generated by Django 5.2.7 on 2026-04-14 16:42 + +import django.db.models.deletion +import django.utils.timezone +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='NewsSource', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=120, unique=True)), + ('slug', models.SlugField(max_length=140, unique=True)), + ('site_url', models.URLField(blank=True)), + ('feed_url', models.URLField(unique=True)), + ('description', models.TextField(blank=True)), + ('is_active', models.BooleanField(default=True)), + ('last_synced_at', models.DateTimeField(blank=True, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ], + options={ + 'ordering': ['name'], + }, + ), + migrations.CreateModel( + name='Topic', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=80, unique=True)), + ('slug', models.SlugField(max_length=90, unique=True)), + ('description', models.TextField(blank=True)), + ('accent_color', models.CharField(default='#00c2a8', max_length=7)), + ], + options={ + 'ordering': ['name'], + }, + ), + migrations.CreateModel( + name='Article', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('title', models.CharField(max_length=220)), + ('slug', models.SlugField(max_length=240, unique=True)), + ('excerpt', models.TextField(blank=True)), + ('content', models.TextField(blank=True)), + ('article_kind', models.CharField(choices=[('rss', 'RSS import'), ('original', 'Original story')], default='rss', max_length=20)), + ('external_url', models.URLField(blank=True)), + ('image_url', models.URLField(blank=True)), + ('author_name', models.CharField(blank=True, max_length=120)), + ('published_at', models.DateTimeField(default=django.utils.timezone.now)), + ('dedupe_key', models.CharField(blank=True, max_length=64, unique=True)), + ('is_featured', models.BooleanField(default=False)), + ('is_published', models.BooleanField(default=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('source', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='articles', to='core.newssource')), + ('topic', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='articles', to='core.topic')), + ], + options={ + 'ordering': ['-published_at', '-created_at'], + }, + ), + ] diff --git a/core/migrations/0002_seed_newsroom_data.py b/core/migrations/0002_seed_newsroom_data.py new file mode 100644 index 0000000..45994ba --- /dev/null +++ b/core/migrations/0002_seed_newsroom_data.py @@ -0,0 +1,88 @@ +from django.db import migrations +from django.utils import timezone + + +def seed_newsroom(apps, schema_editor): + Topic = apps.get_model('core', 'Topic') + NewsSource = apps.get_model('core', 'NewsSource') + Article = apps.get_model('core', 'Article') + + topics = [ + ('Artificial Intelligence', 'artificial-intelligence', 'Model launches, research, tooling, and applied AI.', '#00c2a8'), + ('Startups', 'startups', 'Founders, launches, product momentum, and operator notes.', '#ff6b4a'), + ('Venture Capital', 'venture-capital', 'Funding rounds, investors, and deal flow.', '#7df9d7'), + ('Product & Cloud', 'product-cloud', 'SaaS, developer platforms, cloud infrastructure, and enterprise software.', '#ffd166'), + ('Hardware', 'hardware', 'Devices, chips, robotics, and the physical layer of tech.', '#8ec5ff'), + ('Security', 'security', 'Cybersecurity, privacy, and resilience across the stack.', '#ff8fab'), + ] + for name, slug, description, accent_color in topics: + Topic.objects.update_or_create( + slug=slug, + defaults={ + 'name': name, + 'description': description, + 'accent_color': accent_color, + }, + ) + + sources = [ + ('TechCrunch', 'techcrunch', 'https://techcrunch.com', 'https://techcrunch.com/feed/', 'Startup and technology reporting.'), + ('The Verge', 'the-verge', 'https://www.theverge.com', 'https://www.theverge.com/rss/index.xml', 'Product, platform, and consumer tech news.'), + ('Wired', 'wired', 'https://www.wired.com', 'https://www.wired.com/feed/rss', 'Culture, technology, and future-of-tech coverage.'), + ('Ars Technica', 'ars-technica', 'https://arstechnica.com', 'https://feeds.arstechnica.com/arstechnica/index', 'Deep reporting across science, policy, and hardware.'), + ] + for name, slug, site_url, feed_url, description in sources: + NewsSource.objects.update_or_create( + slug=slug, + defaults={ + 'name': name, + 'site_url': site_url, + 'feed_url': feed_url, + 'description': description, + 'is_active': True, + }, + ) + + topic = Topic.objects.filter(slug='startups').first() + Article.objects.update_or_create( + slug='inside-signal-how-to-run-a-fast-tech-newsroom', + defaults={ + 'title': 'Inside Signal: how to run a fast, future-ready tech newsroom', + 'excerpt': 'A seeded original story so the editorial flow is visible even before the first RSS sync completes.', + 'content': 'Signal combines live feed aggregation with a lightweight publishing flow. Editors can scan imported stories, publish original analysis, and shape the front page with featured picks. This seeded story exists to make the first delivery feel complete while you continue building the full magazine.', + 'article_kind': 'original', + 'topic': topic, + 'author_name': 'Signal Editorial Desk', + 'published_at': timezone.now(), + 'dedupe_key': 'seeded-signal-editorial-launch-story', + 'is_featured': True, + 'is_published': True, + }, + ) + + +def unseed_newsroom(apps, schema_editor): + Topic = apps.get_model('core', 'Topic') + NewsSource = apps.get_model('core', 'NewsSource') + Article = apps.get_model('core', 'Article') + Article.objects.filter(dedupe_key='seeded-signal-editorial-launch-story').delete() + NewsSource.objects.filter(slug__in=['techcrunch', 'the-verge', 'wired', 'ars-technica']).delete() + Topic.objects.filter(slug__in=[ + 'artificial-intelligence', + 'startups', + 'venture-capital', + 'product-cloud', + 'hardware', + 'security', + ]).delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0001_initial'), + ] + + operations = [ + migrations.RunPython(seed_newsroom, unseed_newsroom), + ] diff --git a/core/migrations/__pycache__/0001_initial.cpython-311.pyc b/core/migrations/__pycache__/0001_initial.cpython-311.pyc new file mode 100644 index 0000000..f4ec98b Binary files /dev/null and b/core/migrations/__pycache__/0001_initial.cpython-311.pyc differ diff --git a/core/migrations/__pycache__/0002_seed_newsroom_data.cpython-311.pyc b/core/migrations/__pycache__/0002_seed_newsroom_data.cpython-311.pyc new file mode 100644 index 0000000..a4a3f6b Binary files /dev/null and b/core/migrations/__pycache__/0002_seed_newsroom_data.cpython-311.pyc differ diff --git a/core/models.py b/core/models.py index 71a8362..a72d852 100644 --- a/core/models.py +++ b/core/models.py @@ -1,3 +1,87 @@ from django.db import models +from django.urls import reverse +from django.utils import timezone +from django.utils.text import slugify -# Create your models here. + +class Topic(models.Model): + name = models.CharField(max_length=80, unique=True) + slug = models.SlugField(max_length=90, unique=True) + description = models.TextField(blank=True) + accent_color = models.CharField(max_length=7, default="#00c2a8") + + class Meta: + ordering = ["name"] + + def __str__(self): + return self.name + + def save(self, *args, **kwargs): + if not self.slug: + self.slug = slugify(self.name) + super().save(*args, **kwargs) + + +class NewsSource(models.Model): + name = models.CharField(max_length=120, unique=True) + slug = models.SlugField(max_length=140, unique=True) + site_url = models.URLField(blank=True) + feed_url = models.URLField(unique=True) + description = models.TextField(blank=True) + is_active = models.BooleanField(default=True) + last_synced_at = models.DateTimeField(blank=True, null=True) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + ordering = ["name"] + + def __str__(self): + return self.name + + def save(self, *args, **kwargs): + if not self.slug: + self.slug = slugify(self.name) + super().save(*args, **kwargs) + + +class Article(models.Model): + class ArticleKind(models.TextChoices): + RSS = "rss", "RSS import" + ORIGINAL = "original", "Original story" + + title = models.CharField(max_length=220) + slug = models.SlugField(max_length=240, unique=True) + excerpt = models.TextField(blank=True) + content = models.TextField(blank=True) + article_kind = models.CharField(max_length=20, choices=ArticleKind.choices, default=ArticleKind.RSS) + topic = models.ForeignKey(Topic, on_delete=models.SET_NULL, null=True, blank=True, related_name='articles') + source = models.ForeignKey(NewsSource, on_delete=models.SET_NULL, null=True, blank=True, related_name='articles') + external_url = models.URLField(blank=True) + image_url = models.URLField(blank=True) + author_name = models.CharField(max_length=120, blank=True) + published_at = models.DateTimeField(default=timezone.now) + dedupe_key = models.CharField(max_length=64, unique=True, blank=True) + is_featured = models.BooleanField(default=False) + is_published = models.BooleanField(default=True) + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + ordering = ['-published_at', '-created_at'] + + def __str__(self): + return self.title + + def save(self, *args, **kwargs): + if not self.slug: + base_slug = slugify(self.title)[:220] or 'story' + slug = base_slug + suffix = 2 + while Article.objects.exclude(pk=self.pk).filter(slug=slug).exists(): + slug = f"{base_slug[:210]}-{suffix}" + suffix += 1 + self.slug = slug + super().save(*args, **kwargs) + + def get_absolute_url(self): + return reverse('article_detail', args=[self.slug]) diff --git a/core/rss.py b/core/rss.py new file mode 100644 index 0000000..d02e5d3 --- /dev/null +++ b/core/rss.py @@ -0,0 +1,161 @@ +from __future__ import annotations + +import hashlib +import logging +import re +from datetime import timezone as dt_timezone +from datetime import timedelta +from email.utils import parsedate_to_datetime +from html import unescape +from urllib.request import Request, urlopen +import xml.etree.ElementTree as ET + +from django.db.models import Q +from django.utils import timezone + +from .models import Article, NewsSource, Topic + +logger = logging.getLogger(__name__) +HTML_RE = re.compile(r'<[^>]+>') +NAMESPACES = { + 'atom': 'http://www.w3.org/2005/Atom', + 'media': 'http://search.yahoo.com/mrss/', + 'content': 'http://purl.org/rss/1.0/modules/content/', +} +DEFAULT_TOPIC_MAP = { + 'ai': 'Artificial Intelligence', + 'artificial intelligence': 'Artificial Intelligence', + 'startup': 'Startups', + 'startups': 'Startups', + 'venture': 'Venture Capital', + 'funding': 'Venture Capital', + 'cloud': 'Product & Cloud', + 'saas': 'Product & Cloud', + 'product': 'Product & Cloud', + 'hardware': 'Hardware', + 'chips': 'Hardware', + 'security': 'Security', +} + + +def strip_html(value: str) -> str: + return re.sub(r'\s+', ' ', HTML_RE.sub(' ', unescape(value or ''))).strip() + + +def text_or_empty(node, path: str) -> str: + found = node.find(path, NAMESPACES) + if found is None: + return '' + return ''.join(found.itertext()).strip() + + +def pick_topic(*parts: str) -> Topic | None: + combined = ' '.join(part.lower() for part in parts if part) + for keyword, topic_name in DEFAULT_TOPIC_MAP.items(): + if keyword in combined: + return Topic.objects.filter(name=topic_name).first() + return Topic.objects.order_by('name').first() + + +def parse_datetime(value: str): + if not value: + return timezone.now() + try: + parsed = parsedate_to_datetime(value) + if timezone.is_naive(parsed): + parsed = timezone.make_aware(parsed, dt_timezone.utc) + return parsed.astimezone(dt_timezone.utc) + except (TypeError, ValueError, IndexError, OverflowError): + return timezone.now() + + +def _extract_image(item: ET.Element) -> str: + enclosure = item.find('enclosure') + if enclosure is not None and 'image' in enclosure.attrib.get('type', ''): + return enclosure.attrib.get('url', '') + for path in ['media:content', 'media:thumbnail']: + media = item.find(path, NAMESPACES) + if media is not None: + return media.attrib.get('url', '') + return '' + + +def _dedupe_key(source: NewsSource, guid: str, link: str, title: str) -> str: + payload = f"{source.pk}|{guid or link or title}".encode('utf-8') + return hashlib.sha256(payload).hexdigest() + + +def import_feed(source: NewsSource, limit: int = 8) -> int: + request = Request( + source.feed_url, + headers={'User-Agent': 'Mozilla/5.0 FlatlogicNewsroomBot/1.0'}, + ) + with urlopen(request, timeout=15) as response: + body = response.read() + root = ET.fromstring(body) + channel_items = root.findall('./channel/item') + atom_entries = root.findall('./atom:entry', NAMESPACES) + items = channel_items or atom_entries + created_count = 0 + + for item in items[:limit]: + title = text_or_empty(item, 'title') or text_or_empty(item, 'atom:title') + link = text_or_empty(item, 'link') or item.attrib.get('href', '') + if not link: + atom_link = item.find('atom:link', NAMESPACES) + if atom_link is not None: + link = atom_link.attrib.get('href', '') + guid = text_or_empty(item, 'guid') or text_or_empty(item, 'atom:id') + excerpt = ( + text_or_empty(item, 'description') + or text_or_empty(item, 'atom:summary') + or text_or_empty(item, 'content:encoded') + ) + content = text_or_empty(item, 'content:encoded') or text_or_empty(item, 'atom:content') or excerpt + published_raw = ( + text_or_empty(item, 'pubDate') + or text_or_empty(item, 'published') + or text_or_empty(item, 'updated') + or text_or_empty(item, 'atom:updated') + ) + author_name = text_or_empty(item, 'author') or text_or_empty(item, 'atom:author/atom:name') + category_text = ' '.join(elem.text or '' for elem in item.findall('category')) + dedupe_key = _dedupe_key(source, guid, link, title) + + if not title or Article.objects.filter(dedupe_key=dedupe_key).exists(): + continue + + article = Article( + title=strip_html(title), + excerpt=strip_html(excerpt)[:340], + content=strip_html(content), + article_kind=Article.ArticleKind.RSS, + source=source, + topic=pick_topic(title, excerpt, category_text, source.name), + external_url=link, + image_url=_extract_image(item), + author_name=strip_html(author_name)[:120], + published_at=parse_datetime(published_raw), + dedupe_key=dedupe_key, + is_published=True, + ) + article.save() + created_count += 1 + + source.last_synced_at = timezone.now() + source.save(update_fields=['last_synced_at']) + return created_count + + +def sync_active_sources(limit: int = 6, stale_minutes: int = 45) -> int: + threshold = timezone.now() - timedelta(minutes=stale_minutes) + sources = NewsSource.objects.filter(is_active=True).filter( + Q(last_synced_at__isnull=True) | Q(last_synced_at__lt=threshold) + ) + total_created = 0 + for source in sources: + try: + total_created += import_feed(source, limit=limit) + except Exception as exc: # pragma: no cover - graceful failure for live feed parsing + logger.warning('RSS sync failed for %s: %s', source.name, exc) + return total_created diff --git a/core/templates/base.html b/core/templates/base.html index 1e7e5fb..4e0638e 100644 --- a/core/templates/base.html +++ b/core/templates/base.html @@ -1,25 +1,76 @@ +{% load static %} -
-