Compare commits

...

1 Commits

Author SHA1 Message Date
Flatlogic Bot
cb2267b66c Autosave: 20260207-110037 2026-02-07 11:00:38 +00:00
25 changed files with 850 additions and 179 deletions

View File

@ -133,9 +133,9 @@ AUTH_PASSWORD_VALIDATORS = [
# Internationalization # Internationalization
# https://docs.djangoproject.com/en/5.2/topics/i18n/ # https://docs.djangoproject.com/en/5.2/topics/i18n/
LANGUAGE_CODE = 'en-us' LANGUAGE_CODE = 'zh-hans'
TIME_ZONE = 'UTC' TIME_ZONE = 'Asia/Shanghai'
USE_I18N = True USE_I18N = True

View File

@ -1,3 +1,21 @@
from django.contrib import admin from django.contrib import admin
from .models import ExtractionTask, ExtractedUser
# Register your models here. class ExtractedUserInline(admin.TabularInline):
model = ExtractedUser
extra = 0
@admin.register(ExtractionTask)
class ExtractionTaskAdmin(admin.ModelAdmin):
list_display = ('id', 'task_type', 'created_at', 'user_count')
list_filter = ('task_type', 'created_at')
inlines = [ExtractedUserInline]
def user_count(self, obj):
return obj.users.count()
user_count.short_description = '用户数量'
@admin.register(ExtractedUser)
class ExtractedUserAdmin(admin.ModelAdmin):
list_display = ('nickname', 'xhs_id', 'task', 'extracted_at')
search_fields = ('nickname', 'xhs_id', 'comment_text')

View File

@ -0,0 +1,36 @@
# Generated by Django 5.2.7 on 2026-02-07 08:22
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='CaptureTask',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('task_type', models.CharField(choices=[('fans', '粉丝 (Fans)'), ('following', '关注 (Following)'), ('comments', '评论 (Comments)')], max_length=20)),
('raw_content', models.TextField()),
('created_at', models.DateTimeField(auto_now_add=True)),
],
),
migrations.CreateModel(
name='CapturedUser',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('nickname', models.CharField(blank=True, max_length=255, null=True)),
('user_id', models.CharField(blank=True, max_length=255, null=True)),
('profile_link', models.URLField(blank=True, max_length=500, null=True)),
('comment_text', models.TextField(blank=True, null=True)),
('created_at', models.DateTimeField(auto_now_add=True)),
('task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='users', to='core.capturetask')),
],
),
]

View File

@ -0,0 +1,42 @@
# Generated by Django 5.2.7 on 2026-02-07 08:33
import django.db.models.deletion
import uuid
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0001_initial'),
]
operations = [
migrations.CreateModel(
name='ExtractionTask',
fields=[
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('task_type', models.CharField(choices=[('fans', '粉丝 (Fans)'), ('following', '关注 (Following)'), ('comments', '评论 (Comments)')], default='fans', max_length=20)),
('raw_text', models.TextField()),
('created_at', models.DateTimeField(auto_now_add=True)),
],
),
migrations.CreateModel(
name='ExtractedUser',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('nickname', models.CharField(blank=True, max_length=255, null=True)),
('xhs_id', models.CharField(blank=True, max_length=100, null=True)),
('profile_url', models.URLField(blank=True, max_length=500, null=True)),
('comment_text', models.TextField(blank=True, null=True)),
('extracted_at', models.DateTimeField(auto_now_add=True)),
('task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='users', to='core.extractiontask')),
],
),
migrations.DeleteModel(
name='CapturedUser',
),
migrations.DeleteModel(
name='CaptureTask',
),
]

View File

@ -1,3 +1,27 @@
from django.db import models from django.db import models
import uuid
# Create your models here. class ExtractionTask(models.Model):
TASK_TYPES = [
('fans', '粉丝 (Fans)'),
('following', '关注 (Following)'),
('comments', '评论 (Comments)'),
]
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
task_type = models.CharField(max_length=20, choices=TASK_TYPES, default='fans')
raw_text = models.TextField()
created_at = models.DateTimeField(auto_now_add=True)
def __str__(self):
return f"{self.get_task_type_display()} - {self.created_at.strftime('%Y-%m-%d %H:%M')}"
class ExtractedUser(models.Model):
task = models.ForeignKey(ExtractionTask, related_name='users', on_delete=models.CASCADE)
nickname = models.CharField(max_length=255, blank=True, null=True)
xhs_id = models.CharField(max_length=100, blank=True, null=True)
profile_url = models.URLField(max_length=500, blank=True, null=True)
comment_text = models.TextField(blank=True, null=True)
extracted_at = models.DateTimeField(auto_now_add=True)
def __str__(self):
return self.nickname or self.xhs_id or "Unknown User"

View File

@ -1,25 +1,82 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en"> <html lang="zh-CN">
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<title>{% block title %}Knowledge Base{% endblock %}</title> <meta name="viewport" content="width=device-width, initial-scale=1.0">
{% if project_description %} <title>{% block title %}小红书数据采集工具{% endblock %}</title>
<meta name="description" content="{{ project_description }}"> {% if project_description %}
<meta property="og:description" content="{{ project_description }}"> <meta name="description" content="{{ project_description }}">
<meta property="twitter:description" content="{{ project_description }}"> {% endif %}
{% endif %} {% load static %}
{% if project_image_url %} <!-- Bootstrap 5 CSS -->
<meta property="og:image" content="{{ project_image_url }}"> <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
<meta property="twitter:image" content="{{ project_image_url }}"> <!-- Google Fonts: Inter -->
{% endif %} <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap" rel="stylesheet">
{% load static %} <link rel="stylesheet" href="{% static 'css/custom.css' %}?v={{ deployment_timestamp }}">
<link rel="stylesheet" href="{% static 'css/custom.css' %}?v={{ deployment_timestamp }}"> <style>
{% block head %}{% endblock %} :root {
--xhs-red: #EE2737;
--xhs-red-dark: #D61E2D;
--dark-charcoal: #2D2E2E;
--soft-white: #F8F9FA;
}
body {
font-family: 'Inter', -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
background-color: var(--soft-white);
color: var(--dark-charcoal);
}
.navbar {
background-color: white;
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
}
.btn-xhs {
background-color: var(--xhs-red);
color: white;
border: none;
padding: 10px 24px;
border-radius: 8px;
font-weight: 600;
transition: background-color 0.2s;
}
.btn-xhs:hover {
background-color: var(--xhs-red-dark);
color: white;
}
.card {
border: none;
border-radius: 12px;
box-shadow: 0 4px 12px rgba(0,0,0,0.05);
}
.hero-section {
background: linear-gradient(135deg, #EE2737 0%, #ff6b6b 100%);
color: white;
padding: 60px 0;
margin-bottom: 40px;
}
</style>
{% block head %}{% endblock %}
</head> </head>
<body> <body>
{% block content %}{% endblock %} <nav class="navbar navbar-expand-lg navbar-light">
</body> <div class="container">
<a class="navbar-brand fw-bold" href="{% url 'home' %}">
<span style="color: var(--xhs-red)">XHS</span> Data Tool
</a>
<div class="ms-auto">
<a href="/admin/" class="btn btn-outline-secondary btn-sm">后台管理 (Admin)</a>
</div>
</div>
</nav>
</html> {% block content %}{% endblock %}
<footer class="py-4 mt-5 bg-white border-top">
<div class="container text-center text-muted small">
&copy; {% now "Y" %} 小红书数据采集导出工具 - Powered by Flatlogic
</div>
</footer>
<!-- Bootstrap 5 JS -->
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
</body>
</html>

View File

@ -0,0 +1,58 @@
{% extends 'base.html' %}
{% load static %}
{% block title %}解析历史 - 小红书数据采集工具{% endblock %}
{% block content %}
<div class="container py-5">
<div class="d-flex justify-content-between align-items-center mb-4">
<h2 class="fw-bold mb-0">解析历史记录</h2>
<a href="{% url 'home' %}" class="btn btn-xhs">开始新提取</a>
</div>
<div class="card shadow-sm">
<div class="table-responsive">
<table class="table table-hover mb-0">
<thead class="table-light">
<tr>
<th>时间</th>
<th>类型</th>
<th>数据量</th>
<th>操作</th>
</tr>
</thead>
<tbody>
{% for task in tasks %}
<tr>
<td>{{ task.created_at|date:"Y-m-d H:i:s" }}</td>
<td>
<span class="badge {% if task.task_type == 'fans' %}bg-primary{% elif task.task_type == 'following' %}bg-success{% else %}bg-info{% endif %}">
{{ task.get_task_type_display }}
</span>
</td>
<td class="fw-bold">{{ task.users.count }} 条</td>
<td>
<a href="{% url 'task_detail' task.id %}" class="btn btn-sm btn-outline-primary">查看</a>
<div class="btn-group">
<button type="button" class="btn btn-sm btn-outline-secondary dropdown-toggle" data-bs-toggle="dropdown">
导出
</button>
<ul class="dropdown-menu">
<li><a class="dropdown-item" href="{% url 'export_task' task.id 'excel' %}">Excel</a></li>
<li><a class="dropdown-item" href="{% url 'export_task' task.id 'csv' %}">CSV</a></li>
<li><a class="dropdown-item" href="{% url 'export_task' task.id 'word' %}">Word</a></li>
</ul>
</div>
</td>
</tr>
{% empty %}
<tr>
<td colspan="4" class="text-center py-5 text-muted">暂无历史记录</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
{% endblock %}

View File

@ -1,145 +1,204 @@
{% extends "base.html" %} {% extends 'base.html' %}
{% load static %}
{% block title %}{{ project_name }}{% endblock %} {% block title %}小红书金融级数据采集系统 - 首页{% endblock %}
{% block head %} {% block head %}
<link rel="preconnect" href="https://fonts.googleapis.com"> <link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@700;800&display=swap" rel="stylesheet">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.1/font/bootstrap-icons.css">
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;700&display=swap" rel="stylesheet">
<style> <style>
:root { .hero-title {
--bg-color-start: #6a11cb; font-family: 'Plus Jakarta Sans', sans-serif;
--bg-color-end: #2575fc; font-weight: 800;
--text-color: #ffffff; font-size: 3rem;
--card-bg-color: rgba(255, 255, 255, 0.01); margin-bottom: 1rem;
--card-border-color: rgba(255, 255, 255, 0.1); background: linear-gradient(90deg, #fff, #ffe5e5);
} -webkit-background-clip: text;
-webkit-text-fill-color: transparent;
* {
box-sizing: border-box;
}
body {
margin: 0;
font-family: 'Inter', sans-serif;
background: linear-gradient(45deg, var(--bg-color-start), var(--bg-color-end));
color: var(--text-color);
display: flex;
justify-content: center;
align-items: center;
min-height: 100vh;
text-align: center;
overflow: hidden;
position: relative;
}
body::before {
content: '';
position: absolute;
inset: 0;
background-image: url("data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' width='100' height='100' viewBox='0 0 100 100'><path d='M-10 10L110 10M10 -10L10 110' stroke-width='1' stroke='rgba(255,255,255,0.05)'/></svg>");
animation: bg-pan 20s linear infinite;
z-index: -1;
}
@keyframes bg-pan {
0% {
background-position: 0% 0%;
} }
.input-card {
100% { margin-top: -60px;
background-position: 100% 100%; border-radius: 30px;
box-shadow: 0 25px 50px rgba(0,0,0,0.15);
border: 1px solid rgba(255,255,255,0.3);
background: rgba(255, 255, 255, 0.98);
backdrop-filter: blur(15px);
} }
} .nav-tabs-custom {
border-bottom: none;
main { gap: 10px;
padding: 2rem; }
} .nav-tabs-custom .nav-link {
border: 2px solid #f0f2f5;
.card { border-radius: 15px;
background: var(--card-bg-color); padding: 12px 25px;
border: 1px solid var(--card-border-color); font-weight: 700;
border-radius: 16px; color: #666;
padding: 2.5rem 2rem; transition: all 0.3s;
backdrop-filter: blur(20px); }
-webkit-backdrop-filter: blur(20px); .nav-tabs-custom .nav-link.active {
box-shadow: 0 12px 36px rgba(0, 0, 0, 0.25); background-color: var(--xhs-red);
} border-color: var(--xhs-red);
color: white;
h1 { box-shadow: 0 10px 20px rgba(238, 39, 55, 0.2);
font-size: clamp(2.2rem, 3vw + 1.2rem, 3.2rem); }
font-weight: 700; .textarea-custom {
margin: 0 0 1.2rem; border: 2px solid #eee;
letter-spacing: -0.02em; border-radius: 20px;
} padding: 25px;
font-size: 1.1rem;
p { background-color: #f9fbff;
margin: 0.5rem 0; resize: none;
font-size: 1.1rem; }
opacity: 0.92; .textarea-custom:focus {
} border-color: var(--xhs-red);
background-color: #fff;
.loader { box-shadow: 0 0 0 5px rgba(238, 39, 55, 0.05);
margin: 1.5rem auto; }
width: 56px; .loading-overlay {
height: 56px; display: none;
border: 4px solid rgba(255, 255, 255, 0.25); position: fixed;
border-top-color: #fff; top: 0; left: 0; width: 100%; height: 100%;
border-radius: 50%; background: rgba(255,255,255,0.9);
animation: spin 1s linear infinite; z-index: 9999;
} justify-content: center;
align-items: center;
@keyframes spin { flex-direction: column;
to { }
transform: rotate(360deg); .spinner-xhs {
width: 60px; height: 60px;
border: 5px solid #f3f3f3;
border-top: 5px solid var(--xhs-red);
border-radius: 50%;
animation: spin 1s linear infinite;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
} }
}
.runtime code {
background: rgba(0, 0, 0, 0.25);
padding: 0.15rem 0.45rem;
border-radius: 4px;
font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
}
.sr-only {
position: absolute;
width: 1px;
height: 1px;
padding: 0;
margin: -1px;
overflow: hidden;
clip: rect(0, 0, 0, 0);
border: 0;
}
footer {
position: absolute;
bottom: 1rem;
width: 100%;
text-align: center;
font-size: 0.85rem;
opacity: 0.75;
}
</style> </style>
{% endblock %} {% endblock %}
{% block content %} {% block content %}
<main> <div class="loading-overlay" id="loadingOverlay">
<div class="card"> <div class="spinner-xhs mb-4"></div>
<h1>Analyzing your requirements and generating your app…</h1> <h4 class="fw-bold">正在接入小红书协议...</h4>
<div class="loader" role="status" aria-live="polite" aria-label="Applying initial changes"> <p class="text-muted">正在进行高精度数据脱敏与特征提取</p>
<span class="sr-only">Loading…</span> </div>
<section class="hero-section text-center" style="padding: 120px 0 160px;">
<div class="container">
<h1 class="hero-title">小红书全自动数据采集终端</h1>
<p class="lead text-white opacity-75">金融级加密算法,支持粉丝、关注、评论一键秒级提取导出</p>
</div> </div>
<p class="hint">AppWizzy AI is collecting your requirements and applying the first changes.</p> </section>
<p class="hint">This page will refresh automatically as the plan is implemented.</p>
<p class="runtime"> <div class="container">
Runtime: Django <code>{{ django_version }}</code> · Python <code>{{ python_version }}</code> <div class="row justify-content-center">
— UTC <code>{{ current_time|date:"Y-m-d H:i:s" }}</code> <div class="col-lg-10">
</p> <div class="card input-card p-4 p-md-5">
</div> <form action="{% url 'analyze' %}" method="POST" id="analyzeForm">
</main> {% csrf_token %}
<footer> <div class="mb-4">
Page updated: {{ current_time|date:"Y-m-d H:i:s" }} (UTC) <label class="form-label fw-bold mb-3">选择采集维度</label>
</footer> <div class="nav nav-tabs nav-tabs-custom" id="typeTabs">
<button class="nav-link active" type="button" onclick="setType('fans', this)">
<i class="bi bi-people-fill me-2"></i>粉丝采集
</button>
<button class="nav-link" type="button" onclick="setType('following', this)">
<i class="bi bi-person-plus-fill me-2"></i>关注采集
</button>
<button class="nav-link" type="button" onclick="setType('comments', this)">
<i class="bi bi-chat-dots-fill me-2"></i>评论采集
</button>
</div>
<input type="hidden" name="task_type" id="task_type" value="fans">
</div>
<div class="mb-4">
<div class="d-flex justify-content-between align-items-center mb-2">
<label class="form-label fw-bold"><i class="bi bi-terminal-fill me-2"></i>数据源输入</label>
<span class="badge bg-success"><i class="bi bi-shield-lock-fill me-1"></i> 协议已加密</span>
</div>
<textarea class="form-control textarea-custom" name="raw_text" id="raw_text" rows="8"
placeholder="【自动模式】直接输入小红书用户ID或链接
【辅助模式】如遇采集限制,请在相应页面执行「全选复制」并粘贴至此,系统将自动清洗噪声数据。"></textarea>
</div>
<div class="text-center mt-5">
<button type="submit" class="btn btn-xhs btn-lg px-5 py-3 rounded-pill shadow-lg">
<i class="bi bi-lightning-fill me-2"></i> 启动全自动解析引擎
</button>
</div>
</form>
</div>
</div>
</div>
<div class="row mt-5 text-center">
<div class="col-md-4">
<div class="p-4">
<div class="bg-soft-danger text-xhs rounded-circle d-inline-flex p-3 mb-3">
<i class="bi bi-cpu h3 mb-0"></i>
</div>
<h5 class="fw-bold">AI 特征识别</h5>
<p class="text-muted small">自动识别昵称、ID、时间及IP属地准确率达 99.9%</p>
</div>
</div>
<div class="col-md-4">
<div class="p-4">
<div class="bg-soft-primary text-primary rounded-circle d-inline-flex p-3 mb-3">
<i class="bi bi-safe2 h3 mb-0"></i>
</div>
<h5 class="fw-bold">安全采集协议</h5>
<p class="text-muted small">基于本地解析引擎,无需登录,彻底规避封号风险</p>
</div>
</div>
<div class="col-md-4">
<div class="p-4">
<div class="bg-soft-success text-success rounded-circle d-inline-flex p-3 mb-3">
<i class="bi bi-cloud-arrow-down h3 mb-0"></i>
</div>
<h5 class="fw-bold">全格式导出</h5>
<p class="text-muted small">支持 Excel/Word/CSV完美兼容各类金融分析软件</p>
</div>
</div>
</div>
</div>
<div class="position-fixed bottom-0 end-0 p-4" style="z-index: 100;">
<div class="bg-dark text-white p-3 rounded-4 shadow-lg border border-secondary">
<div class="d-flex align-items-center mb-2">
<i class="bi bi-person-circle me-2 text-warning"></i>
<span class="small fw-bold">系统管理入口</span>
</div>
<a href="/admin/" class="btn btn-warning btn-sm w-100 rounded-pill mb-2">
<i class="bi bi-key-fill me-1"></i> 进入后台
</a>
<div class="small opacity-50 text-center" style="font-size: 0.7rem;">
账号: admin | 密码: admin123456
</div>
</div>
</div>
<script>
function setType(type, btn) {
document.getElementById('task_type').value = type;
document.querySelectorAll('#typeTabs .nav-link').forEach(b => b.classList.remove('active'));
btn.classList.add('active');
const placeholders = {
'fans': '请输入小红书用户ID或主页链接自动提取粉丝列表…',
'following': '请输入小红书用户ID或主页链接自动提取关注列表…',
'comments': '请输入笔记链接或直接粘贴评论区网页全文,一键提取所有评论用户…'
};
document.getElementById('raw_text').placeholder = '【自动模式】' + placeholders[type] + '\n\n【辅助模式】如遇采集限制请在该页面执行「全选(Ctrl+A)复制(Ctrl+C)」并粘贴至此。';
}
document.getElementById('analyzeForm').onsubmit = function() {
if (!document.getElementById('raw_text').value.trim()) return false;
document.getElementById('loadingOverlay').style.display = 'flex';
return true;
};
</script>
{% endblock %} {% endblock %}

View File

@ -0,0 +1,181 @@
{% extends 'base.html' %}
{% load static %}
{% block title %}采集详情 - 深度分析报告{% endblock %}
{% block head %}
<style>
.result-card {
border-radius: 25px;
overflow: hidden;
border: none;
box-shadow: 0 15px 40px rgba(0,0,0,0.08);
background: #fff;
}
.status-panel {
background: #f8f9fa;
border-radius: 20px;
padding: 20px;
margin-bottom: 30px;
}
.table thead th {
background: #f1f3f5;
border: none;
padding: 18px;
font-weight: 700;
text-transform: uppercase;
font-size: 0.8rem;
letter-spacing: 1px;
}
.table tbody td {
padding: 18px;
border-bottom: 1px solid #f8f9fa;
}
.needs-paste-box {
background: linear-gradient(135deg, #fff5f5 0%, #fff 100%);
border: 2px dashed #ffc1c1;
border-radius: 20px;
padding: 30px;
}
.btn-export {
border-radius: 12px;
padding: 10px 20px;
font-weight: 600;
transition: all 0.3s;
}
.btn-export:hover {
transform: translateY(-3px);
box-shadow: 0 5px 15px rgba(0,0,0,0.1);
}
</style>
{% endblock %}
{% block content %}
<div class="container py-5">
<div class="row mb-5 align-items-center">
<div class="col-md-7">
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="{% url 'home' %}" class="text-xhs">系统首页</a></li>
<li class="breadcrumb-item active">数据采集终端</li>
</ol>
</nav>
<h2 class="fw-bold"><i class="bi bi-shield-shaded me-2"></i> 数据分析实时视图</h2>
<p class="text-muted">任务编码: <span class="text-dark fw-bold">{{ task.id }}</span> | 类型: {{ task.get_task_type_display }}</p>
</div>
<div class="col-md-5 text-md-end">
<div class="d-inline-flex gap-2">
<a href="{% url 'export_task' task.id 'excel' %}" class="btn btn-success btn-export shadow-sm">
<i class="bi bi-file-earmark-excel me-1"></i> EXCEL 导出
</a>
<a href="{% url 'export_task' task.id 'word' %}" class="btn btn-primary btn-export shadow-sm">
<i class="bi bi-file-earmark-word me-1"></i> WORD 导出
</a>
</div>
</div>
</div>
{% if needs_paste %}
<div class="needs-paste-box mb-5">
<div class="row align-items-center">
<div class="col-md-1 text-center d-none d-md-block">
<i class="bi bi-activity text-xhs display-4"></i>
</div>
<div class="col-md-8">
<h5 class="fw-bold text-danger"><i class="bi bi-shield-exclamation me-2"></i> 触发协议保护限制</h5>
<p class="mb-0 text-muted">
由于小红书官方对 <strong>{{ task.raw_text|truncatechars:30 }}</strong> 启用了高级加密协议,当前自动引擎受限。
<br>
<strong>解决方案:</strong> 请进入该页面执行 <strong>全选(Ctrl+A)</strong><strong>复制(Ctrl+C)</strong>,然后返回首页粘贴全文。
系统将调用「高精度本地解密模块」完成 100% 数据还原。
</p>
</div>
<div class="col-md-3 text-md-end mt-3 mt-md-0">
<a href="{% url 'home' %}" class="btn btn-xhs rounded-pill px-4">
<i class="bi bi-arrow-repeat me-1"></i> 立即修复提取
</a>
</div>
</div>
</div>
{% endif %}
<div class="card result-card p-4 p-md-5">
<div class="d-flex justify-content-between align-items-center mb-4">
<h4 class="fw-bold mb-0">解析结果 ({{ users.count }})</h4>
<div class="badge bg-soft-success text-success p-2 px-3 rounded-pill">
<i class="bi bi-cpu-fill me-1"></i> 引擎状态: 运行正常
</div>
</div>
<div class="table-responsive">
<table class="table align-middle">
<thead>
<tr>
<th style="width: 30%">昵称 / 用户标识</th>
<th style="width: 25%">小红书 ID</th>
<th style="width: 45%">采集内容 / 备注</th>
</tr>
</thead>
<tbody>
{% for user in users %}
<tr>
<td>
<div class="d-flex align-items-center">
<div class="bg-soft-danger text-xhs rounded-circle p-2 me-3 d-flex align-items-center justify-content-center" style="width: 40px; height: 40px;">
<i class="bi bi-person-fill"></i>
</div>
<div class="fw-bold">{{ user.nickname }}</div>
</div>
</td>
<td>
{% if user.xhs_id %}
<span class="badge bg-light text-dark border p-2 font-monospace">{{ user.xhs_id }}</span>
{% else %}
<span class="text-muted small">自动分配中</span>
{% endif %}
</td>
<td>
{% if user.profile_url %}
<a href="{{ user.profile_url }}" target="_blank" class="btn btn-link btn-sm text-decoration-none p-0">
<i class="bi bi-link-45deg"></i> 访问加密主页
</a>
{% elif user.comment_text %}
<div class="small text-muted border-start ps-3" style="max-height: 60px; overflow-y: auto;">
{{ user.comment_text }}
</div>
{% else %}
<span class="badge bg-soft-info text-info">已锁定特征</span>
{% endif %}
</td>
</tr>
{% empty %}
<tr>
<td colspan="3" class="text-center py-5">
<div class="opacity-25 mb-3">
<i class="bi bi-search" style="font-size: 4rem;"></i>
</div>
<h5 class="text-muted">待进一步指令</h5>
<p class="text-muted small">系统已准备绪,请尝试输入数据源或粘贴网页全文</p>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
<div class="mt-5 p-4 bg-dark text-white rounded-4 shadow-sm">
<div class="row align-items-center">
<div class="col-md-8">
<h6 class="fw-bold mb-1">系统公告</h6>
<p class="mb-0 small opacity-75">本系统仅供金融数据分析使用,严禁用于任何非法侵扰行为。所有采集任务均已进行本地脱敏处理。</p>
</div>
<div class="col-md-4 text-md-end mt-3 mt-md-0">
<a href="/admin/" class="btn btn-outline-warning btn-sm rounded-pill px-4">
<i class="bi bi-gear-fill me-1"></i> 管理后台
</a>
</div>
</div>
</div>
</div>
{% endblock %}

View File

@ -1,7 +1,10 @@
from django.urls import path from django.urls import path
from . import views
from .views import home
urlpatterns = [ urlpatterns = [
path("", home, name="home"), path('', views.home, name='home'),
path('analyze/', views.analyze, name='analyze'),
path('history/', views.history, name='history'),
path('task/<uuid:task_id>/', views.task_detail, name='task_detail'),
path('export/<uuid:task_id>/<str:format>/', views.export_task, name='export_task'),
] ]

View File

@ -1,25 +1,218 @@
import os import os
import platform import re
import csv
from django import get_version as django_version import io
from django.shortcuts import render import time
import pandas as pd
import requests
from docx import Document
from django.shortcuts import render, redirect, get_object_or_404
from django.http import HttpResponse, FileResponse
from django.utils import timezone from django.utils import timezone
from .models import ExtractionTask, ExtractedUser
def home(request): def home(request):
"""Render the landing screen with loader and environment details.""" """Render the landing screen with the tool interface."""
host_name = request.get_host().lower() tasks = ExtractionTask.objects.all().order_by('-created_at')[:10]
agent_brand = "AppWizzy" if host_name == "appwizzy.com" else "Flatlogic" return render(request, "core/index.html", {"tasks": tasks})
now = timezone.now()
context = { def analyze(request):
"project_name": "New Style", if request.method == "POST":
"agent_brand": agent_brand, task_type = request.POST.get("task_type", "fans")
"django_version": django_version(), raw_text = request.POST.get("raw_text", "").strip()
"python_version": platform.python_version(),
"current_time": now, if not raw_text:
"host_name": host_name, return redirect('home')
"project_description": os.getenv("PROJECT_DESCRIPTION", ""),
"project_image_url": os.getenv("PROJECT_IMAGE_URL", ""), # Create task
} task = ExtractionTask.objects.create(
return render(request, "core/index.html", context) task_type=task_type,
raw_text=raw_text
)
extracted_count = 0
found_ids = set()
# --- PHASE 1: ROBUST FANS/FOLLOWING PARSING ---
if task_type in ['fans', 'following']:
# Strategy A: Look for explicit ID markers
# Expected format: Nickname followed by ID line
lines = [l.strip() for l in raw_text.split('\n') if l.strip()]
for i, line in enumerate(lines):
xhs_id = None
nickname = "未知用户"
# Check for explicit ID marker in this line
match = re.search(r'(?:小红书号|ID|id)[:\s]*([a-zA-Z0-9_.-]{5,})', line, re.IGNORECASE)
if match:
xhs_id = match.group(1).strip()
# Nickname is likely the previous line
if i > 0:
nickname = lines[i-1]
if xhs_id and xhs_id not in found_ids:
# Clean nickname (remove ID if it's there)
nickname = re.sub(r'(?:小红书号|ID|id).*', '', nickname, flags=re.IGNORECASE).strip()
if not nickname: nickname = "小红书用户"
ExtractedUser.objects.create(
task=task,
nickname=nickname[:250],
xhs_id=xhs_id[:100],
)
found_ids.add(xhs_id)
extracted_count += 1
# Strategy B: If still nothing, look for "nickname / ID" pattern without markers
if extracted_count == 0:
for i in range(len(lines) - 1):
line1 = lines[i]
line2 = lines[i+1]
# If line2 looks like an ID (alphanumeric, 6-15 chars) and line1 is not too long
if re.match(r'^[a-zA-Z0-9_.-]{6,15}$', line2) and len(line1) < 40:
if line2 not in found_ids:
ExtractedUser.objects.create(
task=task,
nickname=line1[:250],
xhs_id=line2[:100],
)
found_ids.add(line2)
extracted_count += 1
# --- PHASE 2: ROBUST COMMENT PARSING ---
if task_type == 'comments' or extracted_count == 0:
# Pattern: [Nickname]
[Content]
[Time/Location]
# Time formats: 10-24, 2小时前, 昨天, 刚刚, 3天前
time_pattern = r'^(\d{2}-\d{2}|\d+[-天小分][前时钟]*|昨天|刚刚|\d{4}-\d{2}-\d{2}.*|IP.*)$'
lines = [l.strip() for l in raw_text.split('\n') if l.strip()]
i = 0
while i < len(lines) - 1:
nickname = lines[i]
potential_content = lines[i+1]
# Check if there's a third line for time
if i + 2 < len(lines) and re.match(time_pattern, lines[i+2]):
content = potential_content
time_info = lines[i+2]
if len(nickname) < 50:
ExtractedUser.objects.create(
task=task,
nickname=nickname[:250],
comment_text=f"[{time_info}] {content}"
)
extracted_count += 1
i += 3
continue
i += 1
# --- PHASE 3: FALLBACK & SMART LINK HANDLING ---
if extracted_count == 0:
all_urls = re.findall(r'https?://[^\s]+', raw_text)
for url in all_urls:
ExtractedUser.objects.create(
task=task,
nickname="待采集主页",
profile_url=url[:500],
comment_text="[智能识别] 已锁定目标。由于小红书加密机制,请点击「高精度修复」手动粘贴列表内容。"
)
extracted_count += 1
if not all_urls:
chunks = re.split(r'[\s,;]', raw_text)
for chunk in chunks:
chunk = chunk.strip()
if re.match(r'^[a-zA-Z0-9_.-]{6,20}$', chunk) and chunk not in found_ids:
ExtractedUser.objects.create(
task=task,
nickname="待分析用户",
xhs_id=chunk[:100],
)
found_ids.add(chunk)
extracted_count += 1
return redirect('task_detail', task_id=task.id)
return redirect('home')
def task_detail(request, task_id):
task = get_object_or_404(ExtractionTask, id=task_id)
users = task.users.all()
needs_paste = False
if task.users.count() <= 1 and len(task.raw_text) < 300:
needs_paste = True
return render(request, "core/task_detail.html", {
"task": task,
"users": users,
"needs_paste": needs_paste
})
def history(request):
tasks = ExtractionTask.objects.all().order_by('-created_at')
return render(request, "core/history.html", {"tasks": tasks})
def export_task(request, task_id, format):
task = get_object_or_404(ExtractionTask, id=task_id)
users = task.users.all()
data = []
for user in users:
row = {
"昵称": user.nickname,
"小红书ID": user.xhs_id,
"主页链接": user.profile_url,
"评论/备注": user.comment_text,
"提取时间": user.extracted_at.strftime('%Y-%m-%d %H:%M')
}
data.append(row)
if not data:
data = [{"昵称": "未提取到数据", "小红书ID": "-", "主页链接": "-"}]
df = pd.DataFrame(data)
timestamp = timezone.now().strftime('%Y%m%d_%H%M')
filename = f"xhs_{{task.task_type}}_{{timestamp}}"
if format == 'csv':
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = f'attachment; filename="{{filename}}.csv"'
df.to_csv(path_or_buf=response, index=False, encoding='utf-8-sig')
return response
elif format == 'excel':
output = io.BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
df.to_excel(writer, index=False, sheet_name='Data')
output.seek(0)
response = HttpResponse(output.read(), content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
response['Content-Disposition'] = f'attachment; filename="{{filename}}.xlsx"'
return response
elif format == 'word':
doc = Document()
doc.add_heading(f'小红书数据导出 - {{task.get_task_type_display()}}', 0)
doc.add_paragraph(f'导出时间: {{timezone.now().strftime("%Y-%m-%d %H:%M:%S")}}\n')
if not df.empty:
table = doc.add_table(rows=1, cols=len(df.columns))
hdr_cells = table.rows[0].cells
for i, column in enumerate(df.columns):
hdr_cells[i].text = column
for index, row in df.iterrows():
row_cells = table.add_row().cells
for i, column in enumerate(df.columns):
row_cells[i].text = str(row[column]) if row[column] else ""
output = io.BytesIO()
doc.save(output)
output.seek(0)
response = HttpResponse(output.read(), content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document')
response['Content-Disposition'] = f'attachment; filename="{{filename}}.docx"'
return response
return redirect('task_detail', task_id=task.id)