39484-vm/backend/src/services/copyrightStudio.js
Flatlogic Bot c21ed70146 1.0
2026-04-05 14:13:54 +00:00

483 lines
13 KiB
JavaScript

const crypto = require('crypto');
const db = require('../db/models');
const Reveal_requestsDBApi = require('../db/api/reveal_requests');
const Reveal_resultsDBApi = require('../db/api/reveal_results');
const INPUT_TYPES = ['text', 'url', 'file'];
function createValidationError(message) {
const error = new Error(message);
error.code = 400;
return error;
}
function normalizeValue(value) {
return (value || '')
.toString()
.toLowerCase()
.replace(/https?:\/\//g, '')
.replace(/www\./g, '')
.replace(/[._-]+/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
function tokenize(value) {
return Array.from(
new Set(
normalizeValue(value)
.split(/[^a-z0-9]+/)
.filter((token) => token.length > 2),
),
);
}
function getFileStem(name) {
const parts = (name || '').split('.');
if (parts.length <= 1) {
return normalizeValue(name);
}
parts.pop();
return normalizeValue(parts.join('.'));
}
function toPlainArray(records) {
return records.map((record) => record.get({ plain: true }));
}
function buildSourceSummary(data) {
const uploadedFiles = Array.isArray(data.uploaded_files) ? data.uploaded_files : [];
return [
data.request_title,
data.input_text,
data.input_url,
uploadedFiles.map((file) => file.name).join(' '),
]
.filter(Boolean)
.join(' | ')
.trim();
}
function createFingerprint(data) {
const payload = {
request_title: data.request_title || '',
input_type: data.input_type || '',
input_text: data.input_text || '',
input_url: data.input_url || '',
uploaded_files: (Array.isArray(data.uploaded_files) ? data.uploaded_files : []).map(
(file) => ({
name: file.name,
sizeInBytes: file.sizeInBytes,
}),
),
};
return crypto.createHash('sha1').update(JSON.stringify(payload)).digest('hex');
}
function scoreWork(work, payload) {
const reasons = [];
let score = 0;
const normalizedWorkTitle = normalizeValue(work.title);
const normalizedAuthorName = normalizeValue(work.author_name);
const normalizedExternalUrl = normalizeValue(work.external_source_url);
const normalizedDescription = normalizeValue(work.description);
const workFileStems = (work.original_files || []).map((file) => getFileStem(file.name));
if (payload.normalizedTitle && normalizedWorkTitle) {
if (payload.normalizedTitle === normalizedWorkTitle) {
score += 52;
reasons.push('Exact title match');
} else if (
normalizedWorkTitle.includes(payload.normalizedTitle) ||
payload.normalizedTitle.includes(normalizedWorkTitle)
) {
score += 28;
reasons.push('Strong title similarity');
} else {
const sharedTitleTokens = payload.titleTokens.filter((token) =>
tokenize(work.title).includes(token),
).length;
if (sharedTitleTokens > 0) {
score += Math.min(20, sharedTitleTokens * 6);
reasons.push('Title keywords overlap');
}
}
}
if (payload.computedHash && work.content_hash && payload.computedHash === work.content_hash) {
score += 32;
reasons.push('Registered content hash match');
}
if (
payload.computedFingerprint &&
work.fingerprint &&
payload.computedFingerprint === work.fingerprint
) {
score += 26;
reasons.push('Fingerprint signature match');
}
if (payload.normalizedUrl && normalizedExternalUrl) {
if (payload.normalizedUrl === normalizedExternalUrl) {
score += 24;
reasons.push('Source URL match');
} else if (
normalizedExternalUrl.includes(payload.normalizedUrl) ||
payload.normalizedUrl.includes(normalizedExternalUrl)
) {
score += 12;
reasons.push('Related source URL');
}
}
if (normalizedAuthorName && payload.sourceText.includes(normalizedAuthorName)) {
score += 10;
reasons.push('Author signature appears in the request');
}
if (normalizedDescription && payload.searchTokens.length) {
const descriptionHits = payload.searchTokens.filter((token) =>
normalizedDescription.includes(token),
).length;
if (descriptionHits > 0) {
score += Math.min(14, descriptionHits * 3);
reasons.push('Description keywords align');
}
}
if (payload.fileStems.length && workFileStems.length) {
const exactStemMatch = payload.fileStems.find((stem) => workFileStems.includes(stem));
if (exactStemMatch) {
score += 18;
reasons.push('Uploaded filename matches a registered asset');
} else {
const partialStemMatch = payload.fileStems.find((stem) =>
workFileStems.some((workStem) => workStem.includes(stem) || stem.includes(workStem)),
);
if (partialStemMatch) {
score += 10;
reasons.push('Uploaded filename resembles a registered asset');
}
}
}
return {
work,
score: Math.min(score, 99),
reasons: Array.from(new Set(reasons)),
};
}
function classifyResult(score) {
if (score >= 75) {
return 'match';
}
if (score >= 42) {
return 'possible_match';
}
return 'no_match';
}
function buildResultNotes(topCandidate, resultType) {
if (!topCandidate) {
return 'No registered work was close enough to verify ownership. Try refining the title, text, or uploaded evidence.';
}
if (resultType === 'match') {
return `High-confidence reveal: ${topCandidate.work.title || 'Untitled work'} is the strongest ownership match.`;
}
if (resultType === 'possible_match') {
return `Possible reveal found for ${topCandidate.work.title || 'Untitled work'}. Review the evidence before relying on it.`;
}
return 'No strong ownership match was found. You can still review the closest candidate below.';
}
function formatCandidate(candidate) {
return {
id: candidate.work.id,
title: candidate.work.title,
author_name: candidate.work.author_name,
work_type: candidate.work.work_type,
visibility: candidate.work.visibility,
registered_at: candidate.work.registered_at,
confidence_score: Number((candidate.score / 100).toFixed(2)),
match_reasons: candidate.reasons,
};
}
module.exports = class CopyrightStudioService {
static validatePayload(data) {
const inputType = data.input_type || 'text';
if (!INPUT_TYPES.includes(inputType)) {
throw createValidationError('Choose a valid reveal mode.');
}
if (!data.request_title || !data.request_title.trim()) {
throw createValidationError('Give this reveal request a title so it can be tracked.');
}
if (inputType === 'text' && !data.input_text?.trim()) {
throw createValidationError('Paste some text to compare against registered works.');
}
if (inputType === 'url' && !data.input_url?.trim()) {
throw createValidationError('Enter a source URL to inspect.');
}
if (inputType === 'file' && !(Array.isArray(data.uploaded_files) && data.uploaded_files.length)) {
throw createValidationError('Upload at least one file to run a reveal.');
}
}
static async runReveal(data, currentUser) {
this.validatePayload(data);
const sourceSummary = buildSourceSummary(data);
const computedHash = crypto.createHash('sha256').update(sourceSummary).digest('hex');
const computedFingerprint = createFingerprint(data);
const normalizedTitle = normalizeValue(data.request_title);
const normalizedUrl = normalizeValue(data.input_url);
const fileStems = (Array.isArray(data.uploaded_files) ? data.uploaded_files : []).map((file) =>
getFileStem(file.name),
);
const payload = {
normalizedTitle,
normalizedUrl,
titleTokens: tokenize(data.request_title),
searchTokens: tokenize(sourceSummary),
sourceText: normalizeValue(sourceSummary),
computedHash,
computedFingerprint,
fileStems,
};
const works = await db.works.findAll({
attributes: [
'id',
'title',
'author_name',
'work_type',
'description',
'external_source_url',
'license_terms',
'content_hash',
'fingerprint',
'registered_at',
'visibility',
],
include: [
{
model: db.file,
as: 'original_files',
attributes: ['id', 'name', 'sizeInBytes', 'publicUrl', 'privateUrl'],
},
],
order: [
['registered_at', 'DESC'],
['createdAt', 'DESC'],
],
limit: 100,
});
const rankedCandidates = toPlainArray(works)
.map((work) => scoreWork(work, payload))
.sort((left, right) => right.score - left.score)
.slice(0, 3);
const topCandidate = rankedCandidates[0];
const resultType = classifyResult(topCandidate?.score || 0);
const confidenceScore = Number((((topCandidate?.score || 8) / 100)).toFixed(2));
const startedAt = new Date();
const completedAt = new Date();
const notes = buildResultNotes(topCandidate, resultType);
const matchedFields = (topCandidate?.reasons || ['No close match signals were detected']).join(', ');
const transaction = await db.sequelize.transaction();
try {
const revealRequest = await Reveal_requestsDBApi.create(
{
request_title: data.request_title,
input_type: data.input_type,
input_text: data.input_text || null,
input_url: data.input_url || null,
computed_hash: computedHash,
computed_fingerprint: computedFingerprint,
status: 'completed',
started_at: startedAt,
completed_at: completedAt,
uploaded_files: Array.isArray(data.uploaded_files) ? data.uploaded_files : [],
requested_by: currentUser.id,
},
{
currentUser,
transaction,
},
);
const revealResult = await Reveal_resultsDBApi.create(
{
result_type: resultType,
confidence_score: confidenceScore,
matched_fields: matchedFields,
notes,
generated_at: completedAt,
request: revealRequest.id,
matched_work: resultType === 'no_match' ? null : topCandidate?.work?.id || null,
created_by_user: currentUser.id,
evidence_files: Array.isArray(data.uploaded_files) ? data.uploaded_files : [],
},
{
currentUser,
transaction,
},
);
await transaction.commit();
const detail = await this.getResultDetail(revealResult.id);
return {
request: detail.request,
result: detail.result,
candidates: rankedCandidates.map(formatCandidate),
};
} catch (error) {
await transaction.rollback();
throw error;
}
}
static async getStudioFeed(currentUser) {
const [recentResults, featuredWorks, totalWorks, currentUserReveals] = await Promise.all([
db.reveal_results.findAll({
where: {
created_by_userId: currentUser.id,
},
include: [
{
model: db.reveal_requests,
as: 'request',
},
{
model: db.works,
as: 'matched_work',
},
],
order: [
['generated_at', 'DESC'],
['createdAt', 'DESC'],
],
limit: 6,
}),
db.works.findAll({
attributes: [
'id',
'title',
'author_name',
'work_type',
'registered_at',
'visibility',
'license_terms',
],
order: [
['registered_at', 'DESC'],
['createdAt', 'DESC'],
],
limit: 4,
}),
db.works.count(),
db.reveal_results.count({
where: {
created_by_userId: currentUser.id,
},
}),
]);
return {
stats: {
totalWorks,
currentUserReveals,
matchableWorks: featuredWorks.filter((work) => !!work.license_terms).length,
},
recentResults: toPlainArray(recentResults),
featuredWorks: toPlainArray(featuredWorks),
};
}
static async getResultDetail(id) {
const result = await db.reveal_results.findOne({
where: { id },
include: [
{
model: db.reveal_requests,
as: 'request',
include: [
{
model: db.file,
as: 'uploaded_files',
attributes: ['id', 'name', 'publicUrl', 'privateUrl', 'sizeInBytes'],
},
{
model: db.users,
as: 'requested_by',
attributes: ['id', 'firstName', 'lastName', 'email'],
},
],
},
{
model: db.works,
as: 'matched_work',
include: [
{
model: db.file,
as: 'original_files',
attributes: ['id', 'name', 'publicUrl', 'privateUrl', 'sizeInBytes'],
},
{
model: db.users,
as: 'owner',
attributes: ['id', 'firstName', 'lastName', 'email'],
},
],
},
{
model: db.file,
as: 'evidence_files',
attributes: ['id', 'name', 'publicUrl', 'privateUrl', 'sizeInBytes'],
},
{
model: db.users,
as: 'created_by_user',
attributes: ['id', 'firstName', 'lastName', 'email'],
},
],
});
if (!result) {
const error = new Error('Reveal result not found.');
error.code = 404;
throw error;
}
return {
result: result.get({ plain: true }),
request: result.request ? result.request.get({ plain: true }) : null,
};
}
};