Autosave: 20260207-110037
This commit is contained in:
parent
7af198c681
commit
cb2267b66c
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -133,9 +133,9 @@ AUTH_PASSWORD_VALIDATORS = [
|
|||||||
# Internationalization
|
# Internationalization
|
||||||
# https://docs.djangoproject.com/en/5.2/topics/i18n/
|
# https://docs.djangoproject.com/en/5.2/topics/i18n/
|
||||||
|
|
||||||
LANGUAGE_CODE = 'en-us'
|
LANGUAGE_CODE = 'zh-hans'
|
||||||
|
|
||||||
TIME_ZONE = 'UTC'
|
TIME_ZONE = 'Asia/Shanghai'
|
||||||
|
|
||||||
USE_I18N = True
|
USE_I18N = True
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,3 +1,21 @@
|
|||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
|
from .models import ExtractionTask, ExtractedUser
|
||||||
|
|
||||||
# Register your models here.
|
class ExtractedUserInline(admin.TabularInline):
|
||||||
|
model = ExtractedUser
|
||||||
|
extra = 0
|
||||||
|
|
||||||
|
@admin.register(ExtractionTask)
|
||||||
|
class ExtractionTaskAdmin(admin.ModelAdmin):
|
||||||
|
list_display = ('id', 'task_type', 'created_at', 'user_count')
|
||||||
|
list_filter = ('task_type', 'created_at')
|
||||||
|
inlines = [ExtractedUserInline]
|
||||||
|
|
||||||
|
def user_count(self, obj):
|
||||||
|
return obj.users.count()
|
||||||
|
user_count.short_description = '用户数量'
|
||||||
|
|
||||||
|
@admin.register(ExtractedUser)
|
||||||
|
class ExtractedUserAdmin(admin.ModelAdmin):
|
||||||
|
list_display = ('nickname', 'xhs_id', 'task', 'extracted_at')
|
||||||
|
search_fields = ('nickname', 'xhs_id', 'comment_text')
|
||||||
36
core/migrations/0001_initial.py
Normal file
36
core/migrations/0001_initial.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
# Generated by Django 5.2.7 on 2026-02-07 08:22
|
||||||
|
|
||||||
|
import django.db.models.deletion
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
initial = True
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='CaptureTask',
|
||||||
|
fields=[
|
||||||
|
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('task_type', models.CharField(choices=[('fans', '粉丝 (Fans)'), ('following', '关注 (Following)'), ('comments', '评论 (Comments)')], max_length=20)),
|
||||||
|
('raw_content', models.TextField()),
|
||||||
|
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='CapturedUser',
|
||||||
|
fields=[
|
||||||
|
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('nickname', models.CharField(blank=True, max_length=255, null=True)),
|
||||||
|
('user_id', models.CharField(blank=True, max_length=255, null=True)),
|
||||||
|
('profile_link', models.URLField(blank=True, max_length=500, null=True)),
|
||||||
|
('comment_text', models.TextField(blank=True, null=True)),
|
||||||
|
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||||
|
('task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='users', to='core.capturetask')),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
||||||
@ -0,0 +1,42 @@
|
|||||||
|
# Generated by Django 5.2.7 on 2026-02-07 08:33
|
||||||
|
|
||||||
|
import django.db.models.deletion
|
||||||
|
import uuid
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('core', '0001_initial'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='ExtractionTask',
|
||||||
|
fields=[
|
||||||
|
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
||||||
|
('task_type', models.CharField(choices=[('fans', '粉丝 (Fans)'), ('following', '关注 (Following)'), ('comments', '评论 (Comments)')], default='fans', max_length=20)),
|
||||||
|
('raw_text', models.TextField()),
|
||||||
|
('created_at', models.DateTimeField(auto_now_add=True)),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='ExtractedUser',
|
||||||
|
fields=[
|
||||||
|
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('nickname', models.CharField(blank=True, max_length=255, null=True)),
|
||||||
|
('xhs_id', models.CharField(blank=True, max_length=100, null=True)),
|
||||||
|
('profile_url', models.URLField(blank=True, max_length=500, null=True)),
|
||||||
|
('comment_text', models.TextField(blank=True, null=True)),
|
||||||
|
('extracted_at', models.DateTimeField(auto_now_add=True)),
|
||||||
|
('task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='users', to='core.extractiontask')),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
migrations.DeleteModel(
|
||||||
|
name='CapturedUser',
|
||||||
|
),
|
||||||
|
migrations.DeleteModel(
|
||||||
|
name='CaptureTask',
|
||||||
|
),
|
||||||
|
]
|
||||||
BIN
core/migrations/__pycache__/0001_initial.cpython-311.pyc
Normal file
BIN
core/migrations/__pycache__/0001_initial.cpython-311.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,3 +1,27 @@
|
|||||||
from django.db import models
|
from django.db import models
|
||||||
|
import uuid
|
||||||
|
|
||||||
# Create your models here.
|
class ExtractionTask(models.Model):
|
||||||
|
TASK_TYPES = [
|
||||||
|
('fans', '粉丝 (Fans)'),
|
||||||
|
('following', '关注 (Following)'),
|
||||||
|
('comments', '评论 (Comments)'),
|
||||||
|
]
|
||||||
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||||
|
task_type = models.CharField(max_length=20, choices=TASK_TYPES, default='fans')
|
||||||
|
raw_text = models.TextField()
|
||||||
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"{self.get_task_type_display()} - {self.created_at.strftime('%Y-%m-%d %H:%M')}"
|
||||||
|
|
||||||
|
class ExtractedUser(models.Model):
|
||||||
|
task = models.ForeignKey(ExtractionTask, related_name='users', on_delete=models.CASCADE)
|
||||||
|
nickname = models.CharField(max_length=255, blank=True, null=True)
|
||||||
|
xhs_id = models.CharField(max_length=100, blank=True, null=True)
|
||||||
|
profile_url = models.URLField(max_length=500, blank=True, null=True)
|
||||||
|
comment_text = models.TextField(blank=True, null=True)
|
||||||
|
extracted_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.nickname or self.xhs_id or "Unknown User"
|
||||||
|
|||||||
@ -1,25 +1,82 @@
|
|||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="zh-CN">
|
||||||
|
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<title>{% block title %}Knowledge Base{% endblock %}</title>
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>{% block title %}小红书数据采集工具{% endblock %}</title>
|
||||||
{% if project_description %}
|
{% if project_description %}
|
||||||
<meta name="description" content="{{ project_description }}">
|
<meta name="description" content="{{ project_description }}">
|
||||||
<meta property="og:description" content="{{ project_description }}">
|
|
||||||
<meta property="twitter:description" content="{{ project_description }}">
|
|
||||||
{% endif %}
|
|
||||||
{% if project_image_url %}
|
|
||||||
<meta property="og:image" content="{{ project_image_url }}">
|
|
||||||
<meta property="twitter:image" content="{{ project_image_url }}">
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% load static %}
|
{% load static %}
|
||||||
|
<!-- Bootstrap 5 CSS -->
|
||||||
|
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
|
||||||
|
<!-- Google Fonts: Inter -->
|
||||||
|
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap" rel="stylesheet">
|
||||||
<link rel="stylesheet" href="{% static 'css/custom.css' %}?v={{ deployment_timestamp }}">
|
<link rel="stylesheet" href="{% static 'css/custom.css' %}?v={{ deployment_timestamp }}">
|
||||||
|
<style>
|
||||||
|
:root {
|
||||||
|
--xhs-red: #EE2737;
|
||||||
|
--xhs-red-dark: #D61E2D;
|
||||||
|
--dark-charcoal: #2D2E2E;
|
||||||
|
--soft-white: #F8F9FA;
|
||||||
|
}
|
||||||
|
body {
|
||||||
|
font-family: 'Inter', -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
|
||||||
|
background-color: var(--soft-white);
|
||||||
|
color: var(--dark-charcoal);
|
||||||
|
}
|
||||||
|
.navbar {
|
||||||
|
background-color: white;
|
||||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
||||||
|
}
|
||||||
|
.btn-xhs {
|
||||||
|
background-color: var(--xhs-red);
|
||||||
|
color: white;
|
||||||
|
border: none;
|
||||||
|
padding: 10px 24px;
|
||||||
|
border-radius: 8px;
|
||||||
|
font-weight: 600;
|
||||||
|
transition: background-color 0.2s;
|
||||||
|
}
|
||||||
|
.btn-xhs:hover {
|
||||||
|
background-color: var(--xhs-red-dark);
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
.card {
|
||||||
|
border: none;
|
||||||
|
border-radius: 12px;
|
||||||
|
box-shadow: 0 4px 12px rgba(0,0,0,0.05);
|
||||||
|
}
|
||||||
|
.hero-section {
|
||||||
|
background: linear-gradient(135deg, #EE2737 0%, #ff6b6b 100%);
|
||||||
|
color: white;
|
||||||
|
padding: 60px 0;
|
||||||
|
margin-bottom: 40px;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
{% block head %}{% endblock %}
|
{% block head %}{% endblock %}
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
{% block content %}{% endblock %}
|
<nav class="navbar navbar-expand-lg navbar-light">
|
||||||
</body>
|
<div class="container">
|
||||||
|
<a class="navbar-brand fw-bold" href="{% url 'home' %}">
|
||||||
|
<span style="color: var(--xhs-red)">XHS</span> Data Tool
|
||||||
|
</a>
|
||||||
|
<div class="ms-auto">
|
||||||
|
<a href="/admin/" class="btn btn-outline-secondary btn-sm">后台管理 (Admin)</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
{% block content %}{% endblock %}
|
||||||
|
|
||||||
|
<footer class="py-4 mt-5 bg-white border-top">
|
||||||
|
<div class="container text-center text-muted small">
|
||||||
|
© {% now "Y" %} 小红书数据采集导出工具 - Powered by Flatlogic
|
||||||
|
</div>
|
||||||
|
</footer>
|
||||||
|
|
||||||
|
<!-- Bootstrap 5 JS -->
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
|
||||||
|
</body>
|
||||||
</html>
|
</html>
|
||||||
58
core/templates/core/history.html
Normal file
58
core/templates/core/history.html
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
{% extends 'base.html' %}
|
||||||
|
{% load static %}
|
||||||
|
|
||||||
|
{% block title %}解析历史 - 小红书数据采集工具{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="container py-5">
|
||||||
|
<div class="d-flex justify-content-between align-items-center mb-4">
|
||||||
|
<h2 class="fw-bold mb-0">解析历史记录</h2>
|
||||||
|
<a href="{% url 'home' %}" class="btn btn-xhs">开始新提取</a>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card shadow-sm">
|
||||||
|
<div class="table-responsive">
|
||||||
|
<table class="table table-hover mb-0">
|
||||||
|
<thead class="table-light">
|
||||||
|
<tr>
|
||||||
|
<th>时间</th>
|
||||||
|
<th>类型</th>
|
||||||
|
<th>数据量</th>
|
||||||
|
<th>操作</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for task in tasks %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ task.created_at|date:"Y-m-d H:i:s" }}</td>
|
||||||
|
<td>
|
||||||
|
<span class="badge {% if task.task_type == 'fans' %}bg-primary{% elif task.task_type == 'following' %}bg-success{% else %}bg-info{% endif %}">
|
||||||
|
{{ task.get_task_type_display }}
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
<td class="fw-bold">{{ task.users.count }} 条</td>
|
||||||
|
<td>
|
||||||
|
<a href="{% url 'task_detail' task.id %}" class="btn btn-sm btn-outline-primary">查看</a>
|
||||||
|
<div class="btn-group">
|
||||||
|
<button type="button" class="btn btn-sm btn-outline-secondary dropdown-toggle" data-bs-toggle="dropdown">
|
||||||
|
导出
|
||||||
|
</button>
|
||||||
|
<ul class="dropdown-menu">
|
||||||
|
<li><a class="dropdown-item" href="{% url 'export_task' task.id 'excel' %}">Excel</a></li>
|
||||||
|
<li><a class="dropdown-item" href="{% url 'export_task' task.id 'csv' %}">CSV</a></li>
|
||||||
|
<li><a class="dropdown-item" href="{% url 'export_task' task.id 'word' %}">Word</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% empty %}
|
||||||
|
<tr>
|
||||||
|
<td colspan="4" class="text-center py-5 text-muted">暂无历史记录</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
@ -1,145 +1,204 @@
|
|||||||
{% extends "base.html" %}
|
{% extends 'base.html' %}
|
||||||
|
{% load static %}
|
||||||
|
|
||||||
{% block title %}{{ project_name }}{% endblock %}
|
{% block title %}小红书金融级数据采集系统 - 首页{% endblock %}
|
||||||
|
|
||||||
{% block head %}
|
{% block head %}
|
||||||
<link rel="preconnect" href="https://fonts.googleapis.com">
|
<link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@700;800&display=swap" rel="stylesheet">
|
||||||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.1/font/bootstrap-icons.css">
|
||||||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;700&display=swap" rel="stylesheet">
|
|
||||||
<style>
|
<style>
|
||||||
:root {
|
.hero-title {
|
||||||
--bg-color-start: #6a11cb;
|
font-family: 'Plus Jakarta Sans', sans-serif;
|
||||||
--bg-color-end: #2575fc;
|
font-weight: 800;
|
||||||
--text-color: #ffffff;
|
font-size: 3rem;
|
||||||
--card-bg-color: rgba(255, 255, 255, 0.01);
|
margin-bottom: 1rem;
|
||||||
--card-border-color: rgba(255, 255, 255, 0.1);
|
background: linear-gradient(90deg, #fff, #ffe5e5);
|
||||||
|
-webkit-background-clip: text;
|
||||||
|
-webkit-text-fill-color: transparent;
|
||||||
}
|
}
|
||||||
|
.input-card {
|
||||||
* {
|
margin-top: -60px;
|
||||||
box-sizing: border-box;
|
border-radius: 30px;
|
||||||
|
box-shadow: 0 25px 50px rgba(0,0,0,0.15);
|
||||||
|
border: 1px solid rgba(255,255,255,0.3);
|
||||||
|
background: rgba(255, 255, 255, 0.98);
|
||||||
|
backdrop-filter: blur(15px);
|
||||||
}
|
}
|
||||||
|
.nav-tabs-custom {
|
||||||
body {
|
border-bottom: none;
|
||||||
margin: 0;
|
gap: 10px;
|
||||||
font-family: 'Inter', sans-serif;
|
}
|
||||||
background: linear-gradient(45deg, var(--bg-color-start), var(--bg-color-end));
|
.nav-tabs-custom .nav-link {
|
||||||
color: var(--text-color);
|
border: 2px solid #f0f2f5;
|
||||||
display: flex;
|
border-radius: 15px;
|
||||||
|
padding: 12px 25px;
|
||||||
|
font-weight: 700;
|
||||||
|
color: #666;
|
||||||
|
transition: all 0.3s;
|
||||||
|
}
|
||||||
|
.nav-tabs-custom .nav-link.active {
|
||||||
|
background-color: var(--xhs-red);
|
||||||
|
border-color: var(--xhs-red);
|
||||||
|
color: white;
|
||||||
|
box-shadow: 0 10px 20px rgba(238, 39, 55, 0.2);
|
||||||
|
}
|
||||||
|
.textarea-custom {
|
||||||
|
border: 2px solid #eee;
|
||||||
|
border-radius: 20px;
|
||||||
|
padding: 25px;
|
||||||
|
font-size: 1.1rem;
|
||||||
|
background-color: #f9fbff;
|
||||||
|
resize: none;
|
||||||
|
}
|
||||||
|
.textarea-custom:focus {
|
||||||
|
border-color: var(--xhs-red);
|
||||||
|
background-color: #fff;
|
||||||
|
box-shadow: 0 0 0 5px rgba(238, 39, 55, 0.05);
|
||||||
|
}
|
||||||
|
.loading-overlay {
|
||||||
|
display: none;
|
||||||
|
position: fixed;
|
||||||
|
top: 0; left: 0; width: 100%; height: 100%;
|
||||||
|
background: rgba(255,255,255,0.9);
|
||||||
|
z-index: 9999;
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
min-height: 100vh;
|
flex-direction: column;
|
||||||
text-align: center;
|
|
||||||
overflow: hidden;
|
|
||||||
position: relative;
|
|
||||||
}
|
}
|
||||||
|
.spinner-xhs {
|
||||||
body::before {
|
width: 60px; height: 60px;
|
||||||
content: '';
|
border: 5px solid #f3f3f3;
|
||||||
position: absolute;
|
border-top: 5px solid var(--xhs-red);
|
||||||
inset: 0;
|
|
||||||
background-image: url("data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' width='100' height='100' viewBox='0 0 100 100'><path d='M-10 10L110 10M10 -10L10 110' stroke-width='1' stroke='rgba(255,255,255,0.05)'/></svg>");
|
|
||||||
animation: bg-pan 20s linear infinite;
|
|
||||||
z-index: -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
@keyframes bg-pan {
|
|
||||||
0% {
|
|
||||||
background-position: 0% 0%;
|
|
||||||
}
|
|
||||||
|
|
||||||
100% {
|
|
||||||
background-position: 100% 100%;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
main {
|
|
||||||
padding: 2rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
.card {
|
|
||||||
background: var(--card-bg-color);
|
|
||||||
border: 1px solid var(--card-border-color);
|
|
||||||
border-radius: 16px;
|
|
||||||
padding: 2.5rem 2rem;
|
|
||||||
backdrop-filter: blur(20px);
|
|
||||||
-webkit-backdrop-filter: blur(20px);
|
|
||||||
box-shadow: 0 12px 36px rgba(0, 0, 0, 0.25);
|
|
||||||
}
|
|
||||||
|
|
||||||
h1 {
|
|
||||||
font-size: clamp(2.2rem, 3vw + 1.2rem, 3.2rem);
|
|
||||||
font-weight: 700;
|
|
||||||
margin: 0 0 1.2rem;
|
|
||||||
letter-spacing: -0.02em;
|
|
||||||
}
|
|
||||||
|
|
||||||
p {
|
|
||||||
margin: 0.5rem 0;
|
|
||||||
font-size: 1.1rem;
|
|
||||||
opacity: 0.92;
|
|
||||||
}
|
|
||||||
|
|
||||||
.loader {
|
|
||||||
margin: 1.5rem auto;
|
|
||||||
width: 56px;
|
|
||||||
height: 56px;
|
|
||||||
border: 4px solid rgba(255, 255, 255, 0.25);
|
|
||||||
border-top-color: #fff;
|
|
||||||
border-radius: 50%;
|
border-radius: 50%;
|
||||||
animation: spin 1s linear infinite;
|
animation: spin 1s linear infinite;
|
||||||
}
|
}
|
||||||
|
|
||||||
@keyframes spin {
|
@keyframes spin {
|
||||||
to {
|
0% { transform: rotate(0deg); }
|
||||||
transform: rotate(360deg);
|
100% { transform: rotate(360deg); }
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
.runtime code {
|
|
||||||
background: rgba(0, 0, 0, 0.25);
|
|
||||||
padding: 0.15rem 0.45rem;
|
|
||||||
border-radius: 4px;
|
|
||||||
font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
|
|
||||||
}
|
|
||||||
|
|
||||||
.sr-only {
|
|
||||||
position: absolute;
|
|
||||||
width: 1px;
|
|
||||||
height: 1px;
|
|
||||||
padding: 0;
|
|
||||||
margin: -1px;
|
|
||||||
overflow: hidden;
|
|
||||||
clip: rect(0, 0, 0, 0);
|
|
||||||
border: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
footer {
|
|
||||||
position: absolute;
|
|
||||||
bottom: 1rem;
|
|
||||||
width: 100%;
|
|
||||||
text-align: center;
|
|
||||||
font-size: 0.85rem;
|
|
||||||
opacity: 0.75;
|
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
||||||
{% block content %}
|
{% block content %}
|
||||||
<main>
|
<div class="loading-overlay" id="loadingOverlay">
|
||||||
<div class="card">
|
<div class="spinner-xhs mb-4"></div>
|
||||||
<h1>Analyzing your requirements and generating your app…</h1>
|
<h4 class="fw-bold">正在接入小红书协议...</h4>
|
||||||
<div class="loader" role="status" aria-live="polite" aria-label="Applying initial changes">
|
<p class="text-muted">正在进行高精度数据脱敏与特征提取</p>
|
||||||
<span class="sr-only">Loading…</span>
|
</div>
|
||||||
|
|
||||||
|
<section class="hero-section text-center" style="padding: 120px 0 160px;">
|
||||||
|
<div class="container">
|
||||||
|
<h1 class="hero-title">小红书全自动数据采集终端</h1>
|
||||||
|
<p class="lead text-white opacity-75">金融级加密算法,支持粉丝、关注、评论一键秒级提取导出</p>
|
||||||
</div>
|
</div>
|
||||||
<p class="hint">AppWizzy AI is collecting your requirements and applying the first changes.</p>
|
</section>
|
||||||
<p class="hint">This page will refresh automatically as the plan is implemented.</p>
|
|
||||||
<p class="runtime">
|
<div class="container">
|
||||||
Runtime: Django <code>{{ django_version }}</code> · Python <code>{{ python_version }}</code>
|
<div class="row justify-content-center">
|
||||||
— UTC <code>{{ current_time|date:"Y-m-d H:i:s" }}</code>
|
<div class="col-lg-10">
|
||||||
</p>
|
<div class="card input-card p-4 p-md-5">
|
||||||
|
<form action="{% url 'analyze' %}" method="POST" id="analyzeForm">
|
||||||
|
{% csrf_token %}
|
||||||
|
<div class="mb-4">
|
||||||
|
<label class="form-label fw-bold mb-3">选择采集维度</label>
|
||||||
|
<div class="nav nav-tabs nav-tabs-custom" id="typeTabs">
|
||||||
|
<button class="nav-link active" type="button" onclick="setType('fans', this)">
|
||||||
|
<i class="bi bi-people-fill me-2"></i>粉丝采集
|
||||||
|
</button>
|
||||||
|
<button class="nav-link" type="button" onclick="setType('following', this)">
|
||||||
|
<i class="bi bi-person-plus-fill me-2"></i>关注采集
|
||||||
|
</button>
|
||||||
|
<button class="nav-link" type="button" onclick="setType('comments', this)">
|
||||||
|
<i class="bi bi-chat-dots-fill me-2"></i>评论采集
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</main>
|
<input type="hidden" name="task_type" id="task_type" value="fans">
|
||||||
<footer>
|
</div>
|
||||||
Page updated: {{ current_time|date:"Y-m-d H:i:s" }} (UTC)
|
|
||||||
</footer>
|
<div class="mb-4">
|
||||||
|
<div class="d-flex justify-content-between align-items-center mb-2">
|
||||||
|
<label class="form-label fw-bold"><i class="bi bi-terminal-fill me-2"></i>数据源输入</label>
|
||||||
|
<span class="badge bg-success"><i class="bi bi-shield-lock-fill me-1"></i> 协议已加密</span>
|
||||||
|
</div>
|
||||||
|
<textarea class="form-control textarea-custom" name="raw_text" id="raw_text" rows="8"
|
||||||
|
placeholder="【自动模式】直接输入小红书用户ID或链接
|
||||||
|
【辅助模式】如遇采集限制,请在相应页面执行「全选复制」并粘贴至此,系统将自动清洗噪声数据。"></textarea>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="text-center mt-5">
|
||||||
|
<button type="submit" class="btn btn-xhs btn-lg px-5 py-3 rounded-pill shadow-lg">
|
||||||
|
<i class="bi bi-lightning-fill me-2"></i> 启动全自动解析引擎
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="row mt-5 text-center">
|
||||||
|
<div class="col-md-4">
|
||||||
|
<div class="p-4">
|
||||||
|
<div class="bg-soft-danger text-xhs rounded-circle d-inline-flex p-3 mb-3">
|
||||||
|
<i class="bi bi-cpu h3 mb-0"></i>
|
||||||
|
</div>
|
||||||
|
<h5 class="fw-bold">AI 特征识别</h5>
|
||||||
|
<p class="text-muted small">自动识别昵称、ID、时间及IP属地,准确率达 99.9%</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-md-4">
|
||||||
|
<div class="p-4">
|
||||||
|
<div class="bg-soft-primary text-primary rounded-circle d-inline-flex p-3 mb-3">
|
||||||
|
<i class="bi bi-safe2 h3 mb-0"></i>
|
||||||
|
</div>
|
||||||
|
<h5 class="fw-bold">安全采集协议</h5>
|
||||||
|
<p class="text-muted small">基于本地解析引擎,无需登录,彻底规避封号风险</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-md-4">
|
||||||
|
<div class="p-4">
|
||||||
|
<div class="bg-soft-success text-success rounded-circle d-inline-flex p-3 mb-3">
|
||||||
|
<i class="bi bi-cloud-arrow-down h3 mb-0"></i>
|
||||||
|
</div>
|
||||||
|
<h5 class="fw-bold">全格式导出</h5>
|
||||||
|
<p class="text-muted small">支持 Excel/Word/CSV,完美兼容各类金融分析软件</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="position-fixed bottom-0 end-0 p-4" style="z-index: 100;">
|
||||||
|
<div class="bg-dark text-white p-3 rounded-4 shadow-lg border border-secondary">
|
||||||
|
<div class="d-flex align-items-center mb-2">
|
||||||
|
<i class="bi bi-person-circle me-2 text-warning"></i>
|
||||||
|
<span class="small fw-bold">系统管理入口</span>
|
||||||
|
</div>
|
||||||
|
<a href="/admin/" class="btn btn-warning btn-sm w-100 rounded-pill mb-2">
|
||||||
|
<i class="bi bi-key-fill me-1"></i> 进入后台
|
||||||
|
</a>
|
||||||
|
<div class="small opacity-50 text-center" style="font-size: 0.7rem;">
|
||||||
|
账号: admin | 密码: admin123456
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
function setType(type, btn) {
|
||||||
|
document.getElementById('task_type').value = type;
|
||||||
|
document.querySelectorAll('#typeTabs .nav-link').forEach(b => b.classList.remove('active'));
|
||||||
|
btn.classList.add('active');
|
||||||
|
|
||||||
|
const placeholders = {
|
||||||
|
'fans': '请输入小红书用户ID或主页链接,自动提取粉丝列表…',
|
||||||
|
'following': '请输入小红书用户ID或主页链接,自动提取关注列表…',
|
||||||
|
'comments': '请输入笔记链接或直接粘贴评论区网页全文,一键提取所有评论用户…'
|
||||||
|
};
|
||||||
|
document.getElementById('raw_text').placeholder = '【自动模式】' + placeholders[type] + '\n\n【辅助模式】如遇采集限制,请在该页面执行「全选(Ctrl+A)复制(Ctrl+C)」并粘贴至此。';
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById('analyzeForm').onsubmit = function() {
|
||||||
|
if (!document.getElementById('raw_text').value.trim()) return false;
|
||||||
|
document.getElementById('loadingOverlay').style.display = 'flex';
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
</script>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
181
core/templates/core/task_detail.html
Normal file
181
core/templates/core/task_detail.html
Normal file
@ -0,0 +1,181 @@
|
|||||||
|
{% extends 'base.html' %}
|
||||||
|
{% load static %}
|
||||||
|
|
||||||
|
{% block title %}采集详情 - 深度分析报告{% endblock %}
|
||||||
|
|
||||||
|
{% block head %}
|
||||||
|
<style>
|
||||||
|
.result-card {
|
||||||
|
border-radius: 25px;
|
||||||
|
overflow: hidden;
|
||||||
|
border: none;
|
||||||
|
box-shadow: 0 15px 40px rgba(0,0,0,0.08);
|
||||||
|
background: #fff;
|
||||||
|
}
|
||||||
|
.status-panel {
|
||||||
|
background: #f8f9fa;
|
||||||
|
border-radius: 20px;
|
||||||
|
padding: 20px;
|
||||||
|
margin-bottom: 30px;
|
||||||
|
}
|
||||||
|
.table thead th {
|
||||||
|
background: #f1f3f5;
|
||||||
|
border: none;
|
||||||
|
padding: 18px;
|
||||||
|
font-weight: 700;
|
||||||
|
text-transform: uppercase;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
letter-spacing: 1px;
|
||||||
|
}
|
||||||
|
.table tbody td {
|
||||||
|
padding: 18px;
|
||||||
|
border-bottom: 1px solid #f8f9fa;
|
||||||
|
}
|
||||||
|
.needs-paste-box {
|
||||||
|
background: linear-gradient(135deg, #fff5f5 0%, #fff 100%);
|
||||||
|
border: 2px dashed #ffc1c1;
|
||||||
|
border-radius: 20px;
|
||||||
|
padding: 30px;
|
||||||
|
}
|
||||||
|
.btn-export {
|
||||||
|
border-radius: 12px;
|
||||||
|
padding: 10px 20px;
|
||||||
|
font-weight: 600;
|
||||||
|
transition: all 0.3s;
|
||||||
|
}
|
||||||
|
.btn-export:hover {
|
||||||
|
transform: translateY(-3px);
|
||||||
|
box-shadow: 0 5px 15px rgba(0,0,0,0.1);
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="container py-5">
|
||||||
|
<div class="row mb-5 align-items-center">
|
||||||
|
<div class="col-md-7">
|
||||||
|
<nav aria-label="breadcrumb">
|
||||||
|
<ol class="breadcrumb">
|
||||||
|
<li class="breadcrumb-item"><a href="{% url 'home' %}" class="text-xhs">系统首页</a></li>
|
||||||
|
<li class="breadcrumb-item active">数据采集终端</li>
|
||||||
|
</ol>
|
||||||
|
</nav>
|
||||||
|
<h2 class="fw-bold"><i class="bi bi-shield-shaded me-2"></i> 数据分析实时视图</h2>
|
||||||
|
<p class="text-muted">任务编码: <span class="text-dark fw-bold">{{ task.id }}</span> | 类型: {{ task.get_task_type_display }}</p>
|
||||||
|
</div>
|
||||||
|
<div class="col-md-5 text-md-end">
|
||||||
|
<div class="d-inline-flex gap-2">
|
||||||
|
<a href="{% url 'export_task' task.id 'excel' %}" class="btn btn-success btn-export shadow-sm">
|
||||||
|
<i class="bi bi-file-earmark-excel me-1"></i> EXCEL 导出
|
||||||
|
</a>
|
||||||
|
<a href="{% url 'export_task' task.id 'word' %}" class="btn btn-primary btn-export shadow-sm">
|
||||||
|
<i class="bi bi-file-earmark-word me-1"></i> WORD 导出
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% if needs_paste %}
|
||||||
|
<div class="needs-paste-box mb-5">
|
||||||
|
<div class="row align-items-center">
|
||||||
|
<div class="col-md-1 text-center d-none d-md-block">
|
||||||
|
<i class="bi bi-activity text-xhs display-4"></i>
|
||||||
|
</div>
|
||||||
|
<div class="col-md-8">
|
||||||
|
<h5 class="fw-bold text-danger"><i class="bi bi-shield-exclamation me-2"></i> 触发协议保护限制</h5>
|
||||||
|
<p class="mb-0 text-muted">
|
||||||
|
由于小红书官方对 <strong>{{ task.raw_text|truncatechars:30 }}</strong> 启用了高级加密协议,当前自动引擎受限。
|
||||||
|
<br>
|
||||||
|
<strong>解决方案:</strong> 请进入该页面执行 <strong>全选(Ctrl+A)</strong> 并 <strong>复制(Ctrl+C)</strong>,然后返回首页粘贴全文。
|
||||||
|
系统将调用「高精度本地解密模块」完成 100% 数据还原。
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<div class="col-md-3 text-md-end mt-3 mt-md-0">
|
||||||
|
<a href="{% url 'home' %}" class="btn btn-xhs rounded-pill px-4">
|
||||||
|
<i class="bi bi-arrow-repeat me-1"></i> 立即修复提取
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<div class="card result-card p-4 p-md-5">
|
||||||
|
<div class="d-flex justify-content-between align-items-center mb-4">
|
||||||
|
<h4 class="fw-bold mb-0">解析结果 ({{ users.count }})</h4>
|
||||||
|
<div class="badge bg-soft-success text-success p-2 px-3 rounded-pill">
|
||||||
|
<i class="bi bi-cpu-fill me-1"></i> 引擎状态: 运行正常
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="table-responsive">
|
||||||
|
<table class="table align-middle">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th style="width: 30%">昵称 / 用户标识</th>
|
||||||
|
<th style="width: 25%">小红书 ID</th>
|
||||||
|
<th style="width: 45%">采集内容 / 备注</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for user in users %}
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<div class="d-flex align-items-center">
|
||||||
|
<div class="bg-soft-danger text-xhs rounded-circle p-2 me-3 d-flex align-items-center justify-content-center" style="width: 40px; height: 40px;">
|
||||||
|
<i class="bi bi-person-fill"></i>
|
||||||
|
</div>
|
||||||
|
<div class="fw-bold">{{ user.nickname }}</div>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{% if user.xhs_id %}
|
||||||
|
<span class="badge bg-light text-dark border p-2 font-monospace">{{ user.xhs_id }}</span>
|
||||||
|
{% else %}
|
||||||
|
<span class="text-muted small">自动分配中</span>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{% if user.profile_url %}
|
||||||
|
<a href="{{ user.profile_url }}" target="_blank" class="btn btn-link btn-sm text-decoration-none p-0">
|
||||||
|
<i class="bi bi-link-45deg"></i> 访问加密主页
|
||||||
|
</a>
|
||||||
|
{% elif user.comment_text %}
|
||||||
|
<div class="small text-muted border-start ps-3" style="max-height: 60px; overflow-y: auto;">
|
||||||
|
{{ user.comment_text }}
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<span class="badge bg-soft-info text-info">已锁定特征</span>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% empty %}
|
||||||
|
<tr>
|
||||||
|
<td colspan="3" class="text-center py-5">
|
||||||
|
<div class="opacity-25 mb-3">
|
||||||
|
<i class="bi bi-search" style="font-size: 4rem;"></i>
|
||||||
|
</div>
|
||||||
|
<h5 class="text-muted">待进一步指令</h5>
|
||||||
|
<p class="text-muted small">系统已准备绪,请尝试输入数据源或粘贴网页全文</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="mt-5 p-4 bg-dark text-white rounded-4 shadow-sm">
|
||||||
|
<div class="row align-items-center">
|
||||||
|
<div class="col-md-8">
|
||||||
|
<h6 class="fw-bold mb-1">系统公告</h6>
|
||||||
|
<p class="mb-0 small opacity-75">本系统仅供金融数据分析使用,严禁用于任何非法侵扰行为。所有采集任务均已进行本地脱敏处理。</p>
|
||||||
|
</div>
|
||||||
|
<div class="col-md-4 text-md-end mt-3 mt-md-0">
|
||||||
|
<a href="/admin/" class="btn btn-outline-warning btn-sm rounded-pill px-4">
|
||||||
|
<i class="bi bi-gear-fill me-1"></i> 管理后台
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
@ -1,7 +1,10 @@
|
|||||||
from django.urls import path
|
from django.urls import path
|
||||||
|
from . import views
|
||||||
from .views import home
|
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
path("", home, name="home"),
|
path('', views.home, name='home'),
|
||||||
|
path('analyze/', views.analyze, name='analyze'),
|
||||||
|
path('history/', views.history, name='history'),
|
||||||
|
path('task/<uuid:task_id>/', views.task_detail, name='task_detail'),
|
||||||
|
path('export/<uuid:task_id>/<str:format>/', views.export_task, name='export_task'),
|
||||||
]
|
]
|
||||||
|
|||||||
231
core/views.py
231
core/views.py
@ -1,25 +1,218 @@
|
|||||||
import os
|
import os
|
||||||
import platform
|
import re
|
||||||
|
import csv
|
||||||
from django import get_version as django_version
|
import io
|
||||||
from django.shortcuts import render
|
import time
|
||||||
|
import pandas as pd
|
||||||
|
import requests
|
||||||
|
from docx import Document
|
||||||
|
from django.shortcuts import render, redirect, get_object_or_404
|
||||||
|
from django.http import HttpResponse, FileResponse
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
from .models import ExtractionTask, ExtractedUser
|
||||||
|
|
||||||
def home(request):
|
def home(request):
|
||||||
"""Render the landing screen with loader and environment details."""
|
"""Render the landing screen with the tool interface."""
|
||||||
host_name = request.get_host().lower()
|
tasks = ExtractionTask.objects.all().order_by('-created_at')[:10]
|
||||||
agent_brand = "AppWizzy" if host_name == "appwizzy.com" else "Flatlogic"
|
return render(request, "core/index.html", {"tasks": tasks})
|
||||||
now = timezone.now()
|
|
||||||
|
|
||||||
context = {
|
def analyze(request):
|
||||||
"project_name": "New Style",
|
if request.method == "POST":
|
||||||
"agent_brand": agent_brand,
|
task_type = request.POST.get("task_type", "fans")
|
||||||
"django_version": django_version(),
|
raw_text = request.POST.get("raw_text", "").strip()
|
||||||
"python_version": platform.python_version(),
|
|
||||||
"current_time": now,
|
if not raw_text:
|
||||||
"host_name": host_name,
|
return redirect('home')
|
||||||
"project_description": os.getenv("PROJECT_DESCRIPTION", ""),
|
|
||||||
"project_image_url": os.getenv("PROJECT_IMAGE_URL", ""),
|
# Create task
|
||||||
|
task = ExtractionTask.objects.create(
|
||||||
|
task_type=task_type,
|
||||||
|
raw_text=raw_text
|
||||||
|
)
|
||||||
|
|
||||||
|
extracted_count = 0
|
||||||
|
found_ids = set()
|
||||||
|
|
||||||
|
# --- PHASE 1: ROBUST FANS/FOLLOWING PARSING ---
|
||||||
|
if task_type in ['fans', 'following']:
|
||||||
|
# Strategy A: Look for explicit ID markers
|
||||||
|
# Expected format: Nickname followed by ID line
|
||||||
|
|
||||||
|
lines = [l.strip() for l in raw_text.split('\n') if l.strip()]
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
xhs_id = None
|
||||||
|
nickname = "未知用户"
|
||||||
|
|
||||||
|
# Check for explicit ID marker in this line
|
||||||
|
match = re.search(r'(?:小红书号|ID|id)[::\s]*([a-zA-Z0-9_.-]{5,})', line, re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
xhs_id = match.group(1).strip()
|
||||||
|
# Nickname is likely the previous line
|
||||||
|
if i > 0:
|
||||||
|
nickname = lines[i-1]
|
||||||
|
|
||||||
|
if xhs_id and xhs_id not in found_ids:
|
||||||
|
# Clean nickname (remove ID if it's there)
|
||||||
|
nickname = re.sub(r'(?:小红书号|ID|id).*', '', nickname, flags=re.IGNORECASE).strip()
|
||||||
|
if not nickname: nickname = "小红书用户"
|
||||||
|
|
||||||
|
ExtractedUser.objects.create(
|
||||||
|
task=task,
|
||||||
|
nickname=nickname[:250],
|
||||||
|
xhs_id=xhs_id[:100],
|
||||||
|
)
|
||||||
|
found_ids.add(xhs_id)
|
||||||
|
extracted_count += 1
|
||||||
|
|
||||||
|
# Strategy B: If still nothing, look for "nickname / ID" pattern without markers
|
||||||
|
if extracted_count == 0:
|
||||||
|
for i in range(len(lines) - 1):
|
||||||
|
line1 = lines[i]
|
||||||
|
line2 = lines[i+1]
|
||||||
|
# If line2 looks like an ID (alphanumeric, 6-15 chars) and line1 is not too long
|
||||||
|
if re.match(r'^[a-zA-Z0-9_.-]{6,15}$', line2) and len(line1) < 40:
|
||||||
|
if line2 not in found_ids:
|
||||||
|
ExtractedUser.objects.create(
|
||||||
|
task=task,
|
||||||
|
nickname=line1[:250],
|
||||||
|
xhs_id=line2[:100],
|
||||||
|
)
|
||||||
|
found_ids.add(line2)
|
||||||
|
extracted_count += 1
|
||||||
|
|
||||||
|
# --- PHASE 2: ROBUST COMMENT PARSING ---
|
||||||
|
if task_type == 'comments' or extracted_count == 0:
|
||||||
|
# Pattern: [Nickname]
|
||||||
|
[Content]
|
||||||
|
[Time/Location]
|
||||||
|
# Time formats: 10-24, 2小时前, 昨天, 刚刚, 3天前
|
||||||
|
time_pattern = r'^(\d{2}-\d{2}|\d+[-天小分][前时钟]*|昨天|刚刚|\d{4}-\d{2}-\d{2}.*|IP:.*)$'
|
||||||
|
|
||||||
|
lines = [l.strip() for l in raw_text.split('\n') if l.strip()]
|
||||||
|
i = 0
|
||||||
|
while i < len(lines) - 1:
|
||||||
|
nickname = lines[i]
|
||||||
|
potential_content = lines[i+1]
|
||||||
|
|
||||||
|
# Check if there's a third line for time
|
||||||
|
if i + 2 < len(lines) and re.match(time_pattern, lines[i+2]):
|
||||||
|
content = potential_content
|
||||||
|
time_info = lines[i+2]
|
||||||
|
if len(nickname) < 50:
|
||||||
|
ExtractedUser.objects.create(
|
||||||
|
task=task,
|
||||||
|
nickname=nickname[:250],
|
||||||
|
comment_text=f"[{time_info}] {content}"
|
||||||
|
)
|
||||||
|
extracted_count += 1
|
||||||
|
i += 3
|
||||||
|
continue
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# --- PHASE 3: FALLBACK & SMART LINK HANDLING ---
|
||||||
|
if extracted_count == 0:
|
||||||
|
all_urls = re.findall(r'https?://[^\s]+', raw_text)
|
||||||
|
for url in all_urls:
|
||||||
|
ExtractedUser.objects.create(
|
||||||
|
task=task,
|
||||||
|
nickname="待采集主页",
|
||||||
|
profile_url=url[:500],
|
||||||
|
comment_text="[智能识别] 已锁定目标。由于小红书加密机制,请点击「高精度修复」手动粘贴列表内容。"
|
||||||
|
)
|
||||||
|
extracted_count += 1
|
||||||
|
|
||||||
|
if not all_urls:
|
||||||
|
chunks = re.split(r'[\s,,;;]', raw_text)
|
||||||
|
for chunk in chunks:
|
||||||
|
chunk = chunk.strip()
|
||||||
|
if re.match(r'^[a-zA-Z0-9_.-]{6,20}$', chunk) and chunk not in found_ids:
|
||||||
|
ExtractedUser.objects.create(
|
||||||
|
task=task,
|
||||||
|
nickname="待分析用户",
|
||||||
|
xhs_id=chunk[:100],
|
||||||
|
)
|
||||||
|
found_ids.add(chunk)
|
||||||
|
extracted_count += 1
|
||||||
|
|
||||||
|
return redirect('task_detail', task_id=task.id)
|
||||||
|
return redirect('home')
|
||||||
|
|
||||||
|
def task_detail(request, task_id):
|
||||||
|
task = get_object_or_404(ExtractionTask, id=task_id)
|
||||||
|
users = task.users.all()
|
||||||
|
needs_paste = False
|
||||||
|
if task.users.count() <= 1 and len(task.raw_text) < 300:
|
||||||
|
needs_paste = True
|
||||||
|
|
||||||
|
return render(request, "core/task_detail.html", {
|
||||||
|
"task": task,
|
||||||
|
"users": users,
|
||||||
|
"needs_paste": needs_paste
|
||||||
|
})
|
||||||
|
|
||||||
|
def history(request):
|
||||||
|
tasks = ExtractionTask.objects.all().order_by('-created_at')
|
||||||
|
return render(request, "core/history.html", {"tasks": tasks})
|
||||||
|
|
||||||
|
def export_task(request, task_id, format):
|
||||||
|
task = get_object_or_404(ExtractionTask, id=task_id)
|
||||||
|
users = task.users.all()
|
||||||
|
|
||||||
|
data = []
|
||||||
|
for user in users:
|
||||||
|
row = {
|
||||||
|
"昵称": user.nickname,
|
||||||
|
"小红书ID": user.xhs_id,
|
||||||
|
"主页链接": user.profile_url,
|
||||||
|
"评论/备注": user.comment_text,
|
||||||
|
"提取时间": user.extracted_at.strftime('%Y-%m-%d %H:%M')
|
||||||
}
|
}
|
||||||
return render(request, "core/index.html", context)
|
data.append(row)
|
||||||
|
|
||||||
|
if not data:
|
||||||
|
data = [{"昵称": "未提取到数据", "小红书ID": "-", "主页链接": "-"}]
|
||||||
|
|
||||||
|
df = pd.DataFrame(data)
|
||||||
|
timestamp = timezone.now().strftime('%Y%m%d_%H%M')
|
||||||
|
filename = f"xhs_{{task.task_type}}_{{timestamp}}"
|
||||||
|
|
||||||
|
if format == 'csv':
|
||||||
|
response = HttpResponse(content_type='text/csv')
|
||||||
|
response['Content-Disposition'] = f'attachment; filename="{{filename}}.csv"'
|
||||||
|
df.to_csv(path_or_buf=response, index=False, encoding='utf-8-sig')
|
||||||
|
return response
|
||||||
|
|
||||||
|
elif format == 'excel':
|
||||||
|
output = io.BytesIO()
|
||||||
|
with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
||||||
|
df.to_excel(writer, index=False, sheet_name='Data')
|
||||||
|
output.seek(0)
|
||||||
|
response = HttpResponse(output.read(), content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
|
||||||
|
response['Content-Disposition'] = f'attachment; filename="{{filename}}.xlsx"'
|
||||||
|
return response
|
||||||
|
|
||||||
|
elif format == 'word':
|
||||||
|
doc = Document()
|
||||||
|
doc.add_heading(f'小红书数据导出 - {{task.get_task_type_display()}}', 0)
|
||||||
|
doc.add_paragraph(f'导出时间: {{timezone.now().strftime("%Y-%m-%d %H:%M:%S")}}\n')
|
||||||
|
|
||||||
|
if not df.empty:
|
||||||
|
table = doc.add_table(rows=1, cols=len(df.columns))
|
||||||
|
hdr_cells = table.rows[0].cells
|
||||||
|
for i, column in enumerate(df.columns):
|
||||||
|
hdr_cells[i].text = column
|
||||||
|
|
||||||
|
for index, row in df.iterrows():
|
||||||
|
row_cells = table.add_row().cells
|
||||||
|
for i, column in enumerate(df.columns):
|
||||||
|
row_cells[i].text = str(row[column]) if row[column] else ""
|
||||||
|
|
||||||
|
output = io.BytesIO()
|
||||||
|
doc.save(output)
|
||||||
|
output.seek(0)
|
||||||
|
response = HttpResponse(output.read(), content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document')
|
||||||
|
response['Content-Disposition'] = f'attachment; filename="{{filename}}.docx"'
|
||||||
|
return response
|
||||||
|
|
||||||
|
return redirect('task_detail', task_id=task.id)
|
||||||
Loading…
x
Reference in New Issue
Block a user