38583-vm/core/views.py
Flatlogic Bot 87854aee02 v1
2026-02-18 22:51:16 +00:00

200 lines
6.7 KiB
Python

from html.parser import HTMLParser
from io import BytesIO
import textwrap
from django.contrib import messages
from django.http import HttpResponse
from django.shortcuts import get_object_or_404, redirect, render
from django.utils import timezone
from django.utils.text import slugify
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from .forms import BundleUploadForm
from .models import HtmlBundle, HtmlDocument, HtmlExport
class _HtmlTextExtractor(HTMLParser):
block_tags = {
"p",
"div",
"br",
"li",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"section",
"article",
"header",
"footer",
}
def __init__(self):
super().__init__()
self.parts = []
def handle_starttag(self, tag, attrs):
if tag in self.block_tags:
self.parts.append("\n")
def handle_endtag(self, tag):
if tag in self.block_tags:
self.parts.append("\n")
def handle_data(self, data):
if data.strip():
self.parts.append(data)
def _extract_text_from_html(html_bytes: bytes) -> str:
html_text = html_bytes.decode("utf-8", errors="replace")
parser = _HtmlTextExtractor()
parser.feed(html_text)
raw_text = "".join(parser.parts)
lines = [line.strip() for line in raw_text.splitlines()]
cleaned = "\n".join([line for line in lines if line])
return cleaned
def _build_pdf(bundle: HtmlBundle) -> BytesIO:
buffer = BytesIO()
pdf = canvas.Canvas(buffer, pagesize=letter)
width, height = letter
margin = 72
line_height = 14
y = height - margin
documents = bundle.documents.all()
for document in documents:
text = document.content_text or ""
paragraphs = text.splitlines() or [""]
for paragraph in paragraphs:
wrapped = textwrap.wrap(paragraph, width=95) or [""]
for line in wrapped:
if y < margin:
pdf.showPage()
y = height - margin
pdf.setFont("Helvetica", 11)
pdf.drawString(margin, y, line)
y -= line_height
y -= 6
y -= 12
pdf.save()
buffer.seek(0)
return buffer
def home(request):
file_errors = []
form = BundleUploadForm()
if request.method == "POST":
form = BundleUploadForm(request.POST)
files = request.FILES.getlist("files")
if form.is_valid():
if not files:
file_errors.append("No files were received. Please ensure you have selected files and try again.")
else:
invalid_files = [f.name for f in files if not f.name.lower().endswith((".html", ".htm"))]
if invalid_files:
file_errors.append(
f"Only .html or .htm files are supported. Invalid: {', '.join(invalid_files[:3])}"
)
else:
title = form.cleaned_data.get("title", "").strip()
bundle = HtmlBundle.objects.create(title=title)
for index, uploaded in enumerate(files, start=1):
content_text = _extract_text_from_html(uploaded.read())
HtmlDocument.objects.create(
bundle=bundle,
original_name=uploaded.name,
order=index,
content_text=content_text,
)
messages.success(request, f"Bundle '{bundle.title or 'Untitled'}' created with {len(files)} files.")
return redirect("bundle_detail", bundle_id=bundle.id)
bundles = HtmlBundle.objects.order_by("-created_at")[:5]
exports = HtmlExport.objects.select_related("bundle").order_by("-created_at")[:5]
context = {
"page_title": "HTML Bundle to PDF",
"page_description": "Upload multiple HTML files, arrange the order, and export a single PDF instantly.",
"form": form,
"file_errors": file_errors,
"bundles": bundles,
"exports": exports,
}
return render(request, "core/index.html", context)
def bundle_list(request):
bundles = HtmlBundle.objects.order_by("-created_at")
context = {
"page_title": "All Bundles",
"page_description": "Browse recent HTML bundles and download combined PDFs.",
"bundles": bundles,
}
return render(request, "core/bundle_list.html", context)
def bundle_detail(request, bundle_id: int):
bundle = get_object_or_404(HtmlBundle, pk=bundle_id)
documents = bundle.documents.all()
exports = bundle.exports.order_by("-created_at")
if request.method == "POST" and request.POST.get("action") == "update_order":
updates = []
for document in documents:
field_name = f"order_{document.id}"
raw_value = request.POST.get(field_name)
if raw_value is None:
continue
try:
order_value = int(raw_value)
if order_value < 1:
raise ValueError
except ValueError:
messages.error(request, "Order values must be positive numbers.")
return redirect("bundle_detail", bundle_id=bundle.id)
document.order = order_value
updates.append(document)
if updates:
HtmlDocument.objects.bulk_update(updates, ["order"])
messages.success(request, "Order updated.")
return redirect("bundle_detail", bundle_id=bundle.id)
context = {
"page_title": bundle.title or "Untitled bundle",
"page_description": "Review your bundle and generate a combined PDF.",
"bundle": bundle,
"documents": documents,
"exports": exports,
}
return render(request, "core/bundle_detail.html", context)
def bundle_download(request, bundle_id: int):
bundle = get_object_or_404(HtmlBundle, pk=bundle_id)
slug = slugify(bundle.title) or "bundle"
timestamp = timezone.now().strftime("%Y%m%d-%H%M")
file_name = f"{slug}-{timestamp}.pdf"
HtmlExport.objects.create(bundle=bundle, file_name=file_name)
pdf_buffer = _build_pdf(bundle)
response = HttpResponse(pdf_buffer.getvalue(), content_type="application/pdf")
response["Content-Disposition"] = f'attachment; filename="{file_name}"'
return response
def export_download(request, export_id: int):
export = get_object_or_404(HtmlExport, pk=export_id)
pdf_buffer = _build_pdf(export.bundle)
response = HttpResponse(pdf_buffer.getvalue(), content_type="application/pdf")
response["Content-Disposition"] = f'attachment; filename="{export.file_name}"'
return response