483 lines
13 KiB
JavaScript
483 lines
13 KiB
JavaScript
const crypto = require('crypto');
|
|
const db = require('../db/models');
|
|
const Reveal_requestsDBApi = require('../db/api/reveal_requests');
|
|
const Reveal_resultsDBApi = require('../db/api/reveal_results');
|
|
|
|
const INPUT_TYPES = ['text', 'url', 'file'];
|
|
|
|
function createValidationError(message) {
|
|
const error = new Error(message);
|
|
error.code = 400;
|
|
return error;
|
|
}
|
|
|
|
function normalizeValue(value) {
|
|
return (value || '')
|
|
.toString()
|
|
.toLowerCase()
|
|
.replace(/https?:\/\//g, '')
|
|
.replace(/www\./g, '')
|
|
.replace(/[._-]+/g, ' ')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
function tokenize(value) {
|
|
return Array.from(
|
|
new Set(
|
|
normalizeValue(value)
|
|
.split(/[^a-z0-9]+/)
|
|
.filter((token) => token.length > 2),
|
|
),
|
|
);
|
|
}
|
|
|
|
function getFileStem(name) {
|
|
const parts = (name || '').split('.');
|
|
if (parts.length <= 1) {
|
|
return normalizeValue(name);
|
|
}
|
|
|
|
parts.pop();
|
|
return normalizeValue(parts.join('.'));
|
|
}
|
|
|
|
function toPlainArray(records) {
|
|
return records.map((record) => record.get({ plain: true }));
|
|
}
|
|
|
|
function buildSourceSummary(data) {
|
|
const uploadedFiles = Array.isArray(data.uploaded_files) ? data.uploaded_files : [];
|
|
|
|
return [
|
|
data.request_title,
|
|
data.input_text,
|
|
data.input_url,
|
|
uploadedFiles.map((file) => file.name).join(' '),
|
|
]
|
|
.filter(Boolean)
|
|
.join(' | ')
|
|
.trim();
|
|
}
|
|
|
|
function createFingerprint(data) {
|
|
const payload = {
|
|
request_title: data.request_title || '',
|
|
input_type: data.input_type || '',
|
|
input_text: data.input_text || '',
|
|
input_url: data.input_url || '',
|
|
uploaded_files: (Array.isArray(data.uploaded_files) ? data.uploaded_files : []).map(
|
|
(file) => ({
|
|
name: file.name,
|
|
sizeInBytes: file.sizeInBytes,
|
|
}),
|
|
),
|
|
};
|
|
|
|
return crypto.createHash('sha1').update(JSON.stringify(payload)).digest('hex');
|
|
}
|
|
|
|
function scoreWork(work, payload) {
|
|
const reasons = [];
|
|
let score = 0;
|
|
|
|
const normalizedWorkTitle = normalizeValue(work.title);
|
|
const normalizedAuthorName = normalizeValue(work.author_name);
|
|
const normalizedExternalUrl = normalizeValue(work.external_source_url);
|
|
const normalizedDescription = normalizeValue(work.description);
|
|
const workFileStems = (work.original_files || []).map((file) => getFileStem(file.name));
|
|
|
|
if (payload.normalizedTitle && normalizedWorkTitle) {
|
|
if (payload.normalizedTitle === normalizedWorkTitle) {
|
|
score += 52;
|
|
reasons.push('Exact title match');
|
|
} else if (
|
|
normalizedWorkTitle.includes(payload.normalizedTitle) ||
|
|
payload.normalizedTitle.includes(normalizedWorkTitle)
|
|
) {
|
|
score += 28;
|
|
reasons.push('Strong title similarity');
|
|
} else {
|
|
const sharedTitleTokens = payload.titleTokens.filter((token) =>
|
|
tokenize(work.title).includes(token),
|
|
).length;
|
|
|
|
if (sharedTitleTokens > 0) {
|
|
score += Math.min(20, sharedTitleTokens * 6);
|
|
reasons.push('Title keywords overlap');
|
|
}
|
|
}
|
|
}
|
|
|
|
if (payload.computedHash && work.content_hash && payload.computedHash === work.content_hash) {
|
|
score += 32;
|
|
reasons.push('Registered content hash match');
|
|
}
|
|
|
|
if (
|
|
payload.computedFingerprint &&
|
|
work.fingerprint &&
|
|
payload.computedFingerprint === work.fingerprint
|
|
) {
|
|
score += 26;
|
|
reasons.push('Fingerprint signature match');
|
|
}
|
|
|
|
if (payload.normalizedUrl && normalizedExternalUrl) {
|
|
if (payload.normalizedUrl === normalizedExternalUrl) {
|
|
score += 24;
|
|
reasons.push('Source URL match');
|
|
} else if (
|
|
normalizedExternalUrl.includes(payload.normalizedUrl) ||
|
|
payload.normalizedUrl.includes(normalizedExternalUrl)
|
|
) {
|
|
score += 12;
|
|
reasons.push('Related source URL');
|
|
}
|
|
}
|
|
|
|
if (normalizedAuthorName && payload.sourceText.includes(normalizedAuthorName)) {
|
|
score += 10;
|
|
reasons.push('Author signature appears in the request');
|
|
}
|
|
|
|
if (normalizedDescription && payload.searchTokens.length) {
|
|
const descriptionHits = payload.searchTokens.filter((token) =>
|
|
normalizedDescription.includes(token),
|
|
).length;
|
|
|
|
if (descriptionHits > 0) {
|
|
score += Math.min(14, descriptionHits * 3);
|
|
reasons.push('Description keywords align');
|
|
}
|
|
}
|
|
|
|
if (payload.fileStems.length && workFileStems.length) {
|
|
const exactStemMatch = payload.fileStems.find((stem) => workFileStems.includes(stem));
|
|
if (exactStemMatch) {
|
|
score += 18;
|
|
reasons.push('Uploaded filename matches a registered asset');
|
|
} else {
|
|
const partialStemMatch = payload.fileStems.find((stem) =>
|
|
workFileStems.some((workStem) => workStem.includes(stem) || stem.includes(workStem)),
|
|
);
|
|
|
|
if (partialStemMatch) {
|
|
score += 10;
|
|
reasons.push('Uploaded filename resembles a registered asset');
|
|
}
|
|
}
|
|
}
|
|
|
|
return {
|
|
work,
|
|
score: Math.min(score, 99),
|
|
reasons: Array.from(new Set(reasons)),
|
|
};
|
|
}
|
|
|
|
function classifyResult(score) {
|
|
if (score >= 75) {
|
|
return 'match';
|
|
}
|
|
|
|
if (score >= 42) {
|
|
return 'possible_match';
|
|
}
|
|
|
|
return 'no_match';
|
|
}
|
|
|
|
function buildResultNotes(topCandidate, resultType) {
|
|
if (!topCandidate) {
|
|
return 'No registered work was close enough to verify ownership. Try refining the title, text, or uploaded evidence.';
|
|
}
|
|
|
|
if (resultType === 'match') {
|
|
return `High-confidence reveal: ${topCandidate.work.title || 'Untitled work'} is the strongest ownership match.`;
|
|
}
|
|
|
|
if (resultType === 'possible_match') {
|
|
return `Possible reveal found for ${topCandidate.work.title || 'Untitled work'}. Review the evidence before relying on it.`;
|
|
}
|
|
|
|
return 'No strong ownership match was found. You can still review the closest candidate below.';
|
|
}
|
|
|
|
function formatCandidate(candidate) {
|
|
return {
|
|
id: candidate.work.id,
|
|
title: candidate.work.title,
|
|
author_name: candidate.work.author_name,
|
|
work_type: candidate.work.work_type,
|
|
visibility: candidate.work.visibility,
|
|
registered_at: candidate.work.registered_at,
|
|
confidence_score: Number((candidate.score / 100).toFixed(2)),
|
|
match_reasons: candidate.reasons,
|
|
};
|
|
}
|
|
|
|
module.exports = class CopyrightStudioService {
|
|
static validatePayload(data) {
|
|
const inputType = data.input_type || 'text';
|
|
|
|
if (!INPUT_TYPES.includes(inputType)) {
|
|
throw createValidationError('Choose a valid reveal mode.');
|
|
}
|
|
|
|
if (!data.request_title || !data.request_title.trim()) {
|
|
throw createValidationError('Give this reveal request a title so it can be tracked.');
|
|
}
|
|
|
|
if (inputType === 'text' && !data.input_text?.trim()) {
|
|
throw createValidationError('Paste some text to compare against registered works.');
|
|
}
|
|
|
|
if (inputType === 'url' && !data.input_url?.trim()) {
|
|
throw createValidationError('Enter a source URL to inspect.');
|
|
}
|
|
|
|
if (inputType === 'file' && !(Array.isArray(data.uploaded_files) && data.uploaded_files.length)) {
|
|
throw createValidationError('Upload at least one file to run a reveal.');
|
|
}
|
|
}
|
|
|
|
static async runReveal(data, currentUser) {
|
|
this.validatePayload(data);
|
|
|
|
const sourceSummary = buildSourceSummary(data);
|
|
const computedHash = crypto.createHash('sha256').update(sourceSummary).digest('hex');
|
|
const computedFingerprint = createFingerprint(data);
|
|
const normalizedTitle = normalizeValue(data.request_title);
|
|
const normalizedUrl = normalizeValue(data.input_url);
|
|
const fileStems = (Array.isArray(data.uploaded_files) ? data.uploaded_files : []).map((file) =>
|
|
getFileStem(file.name),
|
|
);
|
|
|
|
const payload = {
|
|
normalizedTitle,
|
|
normalizedUrl,
|
|
titleTokens: tokenize(data.request_title),
|
|
searchTokens: tokenize(sourceSummary),
|
|
sourceText: normalizeValue(sourceSummary),
|
|
computedHash,
|
|
computedFingerprint,
|
|
fileStems,
|
|
};
|
|
|
|
const works = await db.works.findAll({
|
|
attributes: [
|
|
'id',
|
|
'title',
|
|
'author_name',
|
|
'work_type',
|
|
'description',
|
|
'external_source_url',
|
|
'license_terms',
|
|
'content_hash',
|
|
'fingerprint',
|
|
'registered_at',
|
|
'visibility',
|
|
],
|
|
include: [
|
|
{
|
|
model: db.file,
|
|
as: 'original_files',
|
|
attributes: ['id', 'name', 'sizeInBytes', 'publicUrl', 'privateUrl'],
|
|
},
|
|
],
|
|
order: [
|
|
['registered_at', 'DESC'],
|
|
['createdAt', 'DESC'],
|
|
],
|
|
limit: 100,
|
|
});
|
|
|
|
const rankedCandidates = toPlainArray(works)
|
|
.map((work) => scoreWork(work, payload))
|
|
.sort((left, right) => right.score - left.score)
|
|
.slice(0, 3);
|
|
|
|
const topCandidate = rankedCandidates[0];
|
|
const resultType = classifyResult(topCandidate?.score || 0);
|
|
const confidenceScore = Number((((topCandidate?.score || 8) / 100)).toFixed(2));
|
|
const startedAt = new Date();
|
|
const completedAt = new Date();
|
|
const notes = buildResultNotes(topCandidate, resultType);
|
|
const matchedFields = (topCandidate?.reasons || ['No close match signals were detected']).join(', ');
|
|
|
|
const transaction = await db.sequelize.transaction();
|
|
|
|
try {
|
|
const revealRequest = await Reveal_requestsDBApi.create(
|
|
{
|
|
request_title: data.request_title,
|
|
input_type: data.input_type,
|
|
input_text: data.input_text || null,
|
|
input_url: data.input_url || null,
|
|
computed_hash: computedHash,
|
|
computed_fingerprint: computedFingerprint,
|
|
status: 'completed',
|
|
started_at: startedAt,
|
|
completed_at: completedAt,
|
|
uploaded_files: Array.isArray(data.uploaded_files) ? data.uploaded_files : [],
|
|
requested_by: currentUser.id,
|
|
},
|
|
{
|
|
currentUser,
|
|
transaction,
|
|
},
|
|
);
|
|
|
|
const revealResult = await Reveal_resultsDBApi.create(
|
|
{
|
|
result_type: resultType,
|
|
confidence_score: confidenceScore,
|
|
matched_fields: matchedFields,
|
|
notes,
|
|
generated_at: completedAt,
|
|
request: revealRequest.id,
|
|
matched_work: resultType === 'no_match' ? null : topCandidate?.work?.id || null,
|
|
created_by_user: currentUser.id,
|
|
evidence_files: Array.isArray(data.uploaded_files) ? data.uploaded_files : [],
|
|
},
|
|
{
|
|
currentUser,
|
|
transaction,
|
|
},
|
|
);
|
|
|
|
await transaction.commit();
|
|
|
|
const detail = await this.getResultDetail(revealResult.id);
|
|
|
|
return {
|
|
request: detail.request,
|
|
result: detail.result,
|
|
candidates: rankedCandidates.map(formatCandidate),
|
|
};
|
|
} catch (error) {
|
|
await transaction.rollback();
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
static async getStudioFeed(currentUser) {
|
|
const [recentResults, featuredWorks, totalWorks, currentUserReveals] = await Promise.all([
|
|
db.reveal_results.findAll({
|
|
where: {
|
|
created_by_userId: currentUser.id,
|
|
},
|
|
include: [
|
|
{
|
|
model: db.reveal_requests,
|
|
as: 'request',
|
|
},
|
|
{
|
|
model: db.works,
|
|
as: 'matched_work',
|
|
},
|
|
],
|
|
order: [
|
|
['generated_at', 'DESC'],
|
|
['createdAt', 'DESC'],
|
|
],
|
|
limit: 6,
|
|
}),
|
|
db.works.findAll({
|
|
attributes: [
|
|
'id',
|
|
'title',
|
|
'author_name',
|
|
'work_type',
|
|
'registered_at',
|
|
'visibility',
|
|
'license_terms',
|
|
],
|
|
order: [
|
|
['registered_at', 'DESC'],
|
|
['createdAt', 'DESC'],
|
|
],
|
|
limit: 4,
|
|
}),
|
|
db.works.count(),
|
|
db.reveal_results.count({
|
|
where: {
|
|
created_by_userId: currentUser.id,
|
|
},
|
|
}),
|
|
]);
|
|
|
|
return {
|
|
stats: {
|
|
totalWorks,
|
|
currentUserReveals,
|
|
matchableWorks: featuredWorks.filter((work) => !!work.license_terms).length,
|
|
},
|
|
recentResults: toPlainArray(recentResults),
|
|
featuredWorks: toPlainArray(featuredWorks),
|
|
};
|
|
}
|
|
|
|
static async getResultDetail(id) {
|
|
const result = await db.reveal_results.findOne({
|
|
where: { id },
|
|
include: [
|
|
{
|
|
model: db.reveal_requests,
|
|
as: 'request',
|
|
include: [
|
|
{
|
|
model: db.file,
|
|
as: 'uploaded_files',
|
|
attributes: ['id', 'name', 'publicUrl', 'privateUrl', 'sizeInBytes'],
|
|
},
|
|
{
|
|
model: db.users,
|
|
as: 'requested_by',
|
|
attributes: ['id', 'firstName', 'lastName', 'email'],
|
|
},
|
|
],
|
|
},
|
|
{
|
|
model: db.works,
|
|
as: 'matched_work',
|
|
include: [
|
|
{
|
|
model: db.file,
|
|
as: 'original_files',
|
|
attributes: ['id', 'name', 'publicUrl', 'privateUrl', 'sizeInBytes'],
|
|
},
|
|
{
|
|
model: db.users,
|
|
as: 'owner',
|
|
attributes: ['id', 'firstName', 'lastName', 'email'],
|
|
},
|
|
],
|
|
},
|
|
{
|
|
model: db.file,
|
|
as: 'evidence_files',
|
|
attributes: ['id', 'name', 'publicUrl', 'privateUrl', 'sizeInBytes'],
|
|
},
|
|
{
|
|
model: db.users,
|
|
as: 'created_by_user',
|
|
attributes: ['id', 'firstName', 'lastName', 'email'],
|
|
},
|
|
],
|
|
});
|
|
|
|
if (!result) {
|
|
const error = new Error('Reveal result not found.');
|
|
error.code = 404;
|
|
throw error;
|
|
}
|
|
|
|
return {
|
|
result: result.get({ plain: true }),
|
|
request: result.request ? result.request.get({ plain: true }) : null,
|
|
};
|
|
}
|
|
};
|