200 lines
6.7 KiB
Python
200 lines
6.7 KiB
Python
from html.parser import HTMLParser
|
|
from io import BytesIO
|
|
import textwrap
|
|
|
|
from django.contrib import messages
|
|
from django.http import HttpResponse
|
|
from django.shortcuts import get_object_or_404, redirect, render
|
|
from django.utils import timezone
|
|
from django.utils.text import slugify
|
|
|
|
from reportlab.lib.pagesizes import letter
|
|
from reportlab.pdfgen import canvas
|
|
|
|
from .forms import BundleUploadForm
|
|
from .models import HtmlBundle, HtmlDocument, HtmlExport
|
|
|
|
|
|
class _HtmlTextExtractor(HTMLParser):
|
|
block_tags = {
|
|
"p",
|
|
"div",
|
|
"br",
|
|
"li",
|
|
"h1",
|
|
"h2",
|
|
"h3",
|
|
"h4",
|
|
"h5",
|
|
"h6",
|
|
"section",
|
|
"article",
|
|
"header",
|
|
"footer",
|
|
}
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.parts = []
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if tag in self.block_tags:
|
|
self.parts.append("\n")
|
|
|
|
def handle_endtag(self, tag):
|
|
if tag in self.block_tags:
|
|
self.parts.append("\n")
|
|
|
|
def handle_data(self, data):
|
|
if data.strip():
|
|
self.parts.append(data)
|
|
|
|
|
|
def _extract_text_from_html(html_bytes: bytes) -> str:
|
|
html_text = html_bytes.decode("utf-8", errors="replace")
|
|
parser = _HtmlTextExtractor()
|
|
parser.feed(html_text)
|
|
raw_text = "".join(parser.parts)
|
|
lines = [line.strip() for line in raw_text.splitlines()]
|
|
cleaned = "\n".join([line for line in lines if line])
|
|
return cleaned
|
|
|
|
|
|
def _build_pdf(bundle: HtmlBundle) -> BytesIO:
|
|
buffer = BytesIO()
|
|
pdf = canvas.Canvas(buffer, pagesize=letter)
|
|
width, height = letter
|
|
margin = 72
|
|
line_height = 14
|
|
y = height - margin
|
|
|
|
documents = bundle.documents.all()
|
|
for document in documents:
|
|
text = document.content_text or ""
|
|
paragraphs = text.splitlines() or [""]
|
|
for paragraph in paragraphs:
|
|
wrapped = textwrap.wrap(paragraph, width=95) or [""]
|
|
for line in wrapped:
|
|
if y < margin:
|
|
pdf.showPage()
|
|
y = height - margin
|
|
pdf.setFont("Helvetica", 11)
|
|
pdf.drawString(margin, y, line)
|
|
y -= line_height
|
|
y -= 6
|
|
y -= 12
|
|
|
|
pdf.save()
|
|
buffer.seek(0)
|
|
return buffer
|
|
|
|
|
|
def home(request):
|
|
file_errors = []
|
|
form = BundleUploadForm()
|
|
if request.method == "POST":
|
|
form = BundleUploadForm(request.POST)
|
|
files = request.FILES.getlist("files")
|
|
|
|
if form.is_valid():
|
|
if not files:
|
|
file_errors.append("No files were received. Please ensure you have selected files and try again.")
|
|
else:
|
|
invalid_files = [f.name for f in files if not f.name.lower().endswith((".html", ".htm"))]
|
|
if invalid_files:
|
|
file_errors.append(
|
|
f"Only .html or .htm files are supported. Invalid: {', '.join(invalid_files[:3])}"
|
|
)
|
|
else:
|
|
title = form.cleaned_data.get("title", "").strip()
|
|
bundle = HtmlBundle.objects.create(title=title)
|
|
for index, uploaded in enumerate(files, start=1):
|
|
content_text = _extract_text_from_html(uploaded.read())
|
|
HtmlDocument.objects.create(
|
|
bundle=bundle,
|
|
original_name=uploaded.name,
|
|
order=index,
|
|
content_text=content_text,
|
|
)
|
|
messages.success(request, f"Bundle '{bundle.title or 'Untitled'}' created with {len(files)} files.")
|
|
return redirect("bundle_detail", bundle_id=bundle.id)
|
|
|
|
bundles = HtmlBundle.objects.order_by("-created_at")[:5]
|
|
exports = HtmlExport.objects.select_related("bundle").order_by("-created_at")[:5]
|
|
context = {
|
|
"page_title": "HTML Bundle to PDF",
|
|
"page_description": "Upload multiple HTML files, arrange the order, and export a single PDF instantly.",
|
|
"form": form,
|
|
"file_errors": file_errors,
|
|
"bundles": bundles,
|
|
"exports": exports,
|
|
}
|
|
return render(request, "core/index.html", context)
|
|
|
|
|
|
def bundle_list(request):
|
|
bundles = HtmlBundle.objects.order_by("-created_at")
|
|
context = {
|
|
"page_title": "All Bundles",
|
|
"page_description": "Browse recent HTML bundles and download combined PDFs.",
|
|
"bundles": bundles,
|
|
}
|
|
return render(request, "core/bundle_list.html", context)
|
|
|
|
|
|
def bundle_detail(request, bundle_id: int):
|
|
bundle = get_object_or_404(HtmlBundle, pk=bundle_id)
|
|
documents = bundle.documents.all()
|
|
exports = bundle.exports.order_by("-created_at")
|
|
|
|
if request.method == "POST" and request.POST.get("action") == "update_order":
|
|
updates = []
|
|
for document in documents:
|
|
field_name = f"order_{document.id}"
|
|
raw_value = request.POST.get(field_name)
|
|
if raw_value is None:
|
|
continue
|
|
try:
|
|
order_value = int(raw_value)
|
|
if order_value < 1:
|
|
raise ValueError
|
|
except ValueError:
|
|
messages.error(request, "Order values must be positive numbers.")
|
|
return redirect("bundle_detail", bundle_id=bundle.id)
|
|
document.order = order_value
|
|
updates.append(document)
|
|
if updates:
|
|
HtmlDocument.objects.bulk_update(updates, ["order"])
|
|
messages.success(request, "Order updated.")
|
|
return redirect("bundle_detail", bundle_id=bundle.id)
|
|
|
|
context = {
|
|
"page_title": bundle.title or "Untitled bundle",
|
|
"page_description": "Review your bundle and generate a combined PDF.",
|
|
"bundle": bundle,
|
|
"documents": documents,
|
|
"exports": exports,
|
|
}
|
|
return render(request, "core/bundle_detail.html", context)
|
|
|
|
|
|
def bundle_download(request, bundle_id: int):
|
|
bundle = get_object_or_404(HtmlBundle, pk=bundle_id)
|
|
slug = slugify(bundle.title) or "bundle"
|
|
timestamp = timezone.now().strftime("%Y%m%d-%H%M")
|
|
file_name = f"{slug}-{timestamp}.pdf"
|
|
HtmlExport.objects.create(bundle=bundle, file_name=file_name)
|
|
|
|
pdf_buffer = _build_pdf(bundle)
|
|
response = HttpResponse(pdf_buffer.getvalue(), content_type="application/pdf")
|
|
response["Content-Disposition"] = f'attachment; filename="{file_name}"'
|
|
return response
|
|
|
|
|
|
def export_download(request, export_id: int):
|
|
export = get_object_or_404(HtmlExport, pk=export_id)
|
|
pdf_buffer = _build_pdf(export.bundle)
|
|
response = HttpResponse(pdf_buffer.getvalue(), content_type="application/pdf")
|
|
response["Content-Disposition"] = f'attachment; filename="{export.file_name}"'
|
|
return response
|