40252-vm/backend/src/services/threat_detections.js

const db = require('../db/models');
const Threat_detectionsDBApi = require('../db/api/threat_detections');
const processFile = require("../middlewares/upload");
const ValidationError = require('./notifications/errors/validation');
const csv = require('csv-parser');
const stream = require('stream');


const PHISHING_PATTERNS = [
  { label: 'Urgent pressure language', weight: 14, pattern: /\b(urgent|immediately|final notice|act now|within 24 hours|account will be closed)\b/i },
  { label: 'Credential or password request', weight: 18, pattern: /\b(password|credentials|verify your account|confirm your identity|login to continue|security update)\b/i },
  { label: 'Payment or gift-card lure', weight: 15, pattern: /\b(wire transfer|gift card|bitcoin|crypto wallet|invoice attached|payment failed)\b/i },
  { label: 'Attachment execution lure', weight: 16, pattern: /\b(enable macros|open the attachment|download invoice|run the file|security patch)\b/i },
  { label: 'Brand impersonation wording', weight: 11, pattern: /\b(microsoft|google|paypal|docusign|dropbox|office 365|banking portal)\b/i },
];

const URL_PATTERNS = [
  { label: 'URL contains an IP address host', weight: 20, pattern: /https?:\/\/\d{1,3}(\.\d{1,3}){3}/i },
  { label: 'Punycode or homograph indicator', weight: 16, pattern: /xn--/i },
  { label: 'Suspicious top-level domain', weight: 12, pattern: /\.(zip|mov|top|click|work|rest|country|gq|tk|ml)(\/|$)/i },
  { label: 'URL hides destination with @ symbol', weight: 18, pattern: /https?:\/\/[^\s]+@/i },
  { label: 'Known URL shortener', weight: 10, pattern: /\b(bit\.ly|tinyurl\.com|t\.co|goo\.gl|ow\.ly|is\.gd)\b/i },
];

const MALWARE_PATTERNS = [
  { label: 'Executable or script file extension', weight: 22, pattern: /\.(exe|scr|bat|cmd|js|vbs|ps1|jar|dll|hta|iso)\b/i },
  { label: 'Office macro-enabled file extension', weight: 14, pattern: /\.(docm|xlsm|pptm)\b/i },
  { label: 'Suspicious process or shell behavior', weight: 20, pattern: /\b(powershell|cmd\.exe|rundll32|regsvr32|wscript|cscript|encodedcommand)\b/i },
  { label: 'Persistence or credential-access behavior', weight: 18, pattern: /\b(run key|startup folder|credential dump|mimikatz|lsass|keylogger)\b/i },
  { label: 'Command-and-control network indicator', weight: 17, pattern: /\b(beacon|tor exit|dns tunnel|c2|command and control|port 4444|port 1337)\b/i },
];

const SAFE_PATTERNS = [
  { label: 'Uses HTTPS URL', weight: -4, pattern: /https:\/\//i },
  { label: 'Mentions SPF/DKIM/DMARC pass', weight: -8, pattern: /\b(spf pass|dkim pass|dmarc pass)\b/i },
  { label: 'Known institutional domain signal', weight: -6, pattern: /\b(\.edu|\.gov)\b/i },
];

function clamp(value, min, max) {
  return Math.max(min, Math.min(max, value));
}

function redactSensitiveText(value) {
  if (!value) return '';
  return String(value)
    .replace(/[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}/gi, '[email]')
    .replace(/https?:\/\/\S+/gi, '[url]')
    .replace(/\b\d{12,19}\b/g, '[number]')
    .slice(0, 600);
}

function addMatches(source, patterns, indicators) {
  patterns.forEach((item) => {
    if (item.pattern.test(source)) {
      indicators.push({ label: item.label, weight: item.weight });
    }
  });
}

function assessThreat(data) {
  const source = [
    data.title,
    data.content_text,
    data.url,
    data.fileName,
    data.sha256,
    data.network_activity,
  ].filter(Boolean).join('\n');

  const indicators = [];
  addMatches(source, PHISHING_PATTERNS, indicators);
  addMatches(source, URL_PATTERNS, indicators);
  addMatches(source, MALWARE_PATTERNS, indicators);
  addMatches(source, SAFE_PATTERNS, indicators);

  let riskScore = 12 + indicators.reduce((sum, item) => sum + item.weight, 0);

  if (data.submission_type === 'url') riskScore += 4;
  if (data.submission_type === 'file') riskScore += 6;
  if (data.submission_type === 'network_traffic') riskScore += 8;
  if (source.length > 2500) riskScore += 3;

  riskScore = clamp(Math.round(riskScore), 0, 100);

  const positiveIndicators = indicators.filter((item) => item.weight > 0);
  const topIndicators = positiveIndicators
    .sort((a, b) => b.weight - a.weight)
    .slice(0, 6);

  const threatType = data.submission_type === 'file' || data.submission_type === 'network_traffic'
    ? (riskScore >= 35 ? 'malware' : 'benign')
    : (riskScore >= 35 ? (data.submission_type === 'url' ? 'suspicious_url' : 'phishing') : 'benign');

  const severity = riskScore >= 90
    ? 'critical'
    : riskScore >= 72
      ? 'high'
      : riskScore >= 45
        ? 'medium'
        : riskScore >= 20
          ? 'low'
          : 'info';

  const verdict = riskScore >= 75
    ? 'block'
    : riskScore >= 45
      ? 'warn'
      : riskScore >= 20
        ? 'needs_review'
        : 'allow';

  const confidence = clamp(
    Number((0.56 + topIndicators.length * 0.055 + Math.abs(riskScore - 50) / 250).toFixed(2)),
    0.56,
    0.96,
  );

  const summary = topIndicators.length
    ? `${severity.toUpperCase()} risk ${threatType.replace('_', ' ')} signal: ${topIndicators.map((item) => item.label).join(', ')}.`
    : 'No strong malicious indicators were found; keep monitoring and verify sender/source context.';

  return {
    threatType,
    severity,
    verdict,
    riskScore,
    confidence,
    summary,
    indicators: topIndicators,
    allIndicators: indicators,
  };
}

function validateAnalysisPayload(data) {
  const validTypes = ['email', 'message', 'url', 'file', 'network_traffic'];

  if (!data || typeof data !== 'object') {
    throw new ValidationError('analysisPayloadMissing', 'Analysis payload is required.');
  }

  if (!validTypes.includes(data.submission_type)) {
    throw new ValidationError('analysisTypeInvalid', 'Choose email, message, URL, file, or network traffic.');
  }

  const hasSignal = [data.content_text, data.url, data.fileName, data.sha256, data.network_activity]
    .some((value) => String(value || '').trim().length > 0);

  if (!hasSignal) {
    throw new ValidationError('analysisSignalMissing', 'Add text, a URL, a file name/hash, or network behavior to analyze.');
  }

  if (String(data.content_text || '').length > 20000) {
    throw new ValidationError('analysisTextTooLong', 'Analysis text must be 20,000 characters or less.');
  }
}


module.exports = class Threat_detectionsService {
  static async create(data, currentUser) {
    const transaction = await db.sequelize.transaction();
    try {
      await Threat_detectionsDBApi.create(
        data,
        {
          currentUser,
          transaction,
        },
      );

      await transaction.commit();
    } catch (error) {
      await transaction.rollback();
      throw error;
    }
  }

  static async bulkImport(req, res) {
    const transaction = await db.sequelize.transaction();

    try {
      await processFile(req, res);
      const bufferStream = new stream.PassThrough();
      const results = [];

      await bufferStream.end(Buffer.from(req.file.buffer, "utf-8")); // convert Buffer to Stream

      await new Promise((resolve, reject) => {
        bufferStream
          .pipe(csv())
          .on('data', (data) => results.push(data))
          .on('end', async () => {
            console.log('CSV results', results);
            resolve();
          })
          .on('error', (error) => reject(error));
      })

      await Threat_detectionsDBApi.bulkImport(results, {
          transaction,
          ignoreDuplicates: true,
          validate: true,
          currentUser: req.currentUser
      });

      await transaction.commit();
    } catch (error) {
      await transaction.rollback();
      throw error;
    }
  }

  static async update(data, id, currentUser) {
    const transaction = await db.sequelize.transaction();
    try {
      let threat_detections = await Threat_detectionsDBApi.findBy(
        {id},
        {transaction},
      );

      if (!threat_detections) {
        throw new ValidationError(
          'threat_detectionsNotFound',
        );
      }

      const updatedThreat_detections = await Threat_detectionsDBApi.update(
        id,
        data,
        {
          currentUser,
          transaction,
        },
      );

      await transaction.commit();
      return updatedThreat_detections;

    } catch (error) {
      await transaction.rollback();
      throw error;
    }
  }

  static async deleteByIds(ids, currentUser) {
    const transaction = await db.sequelize.transaction();

    try {
      await Threat_detectionsDBApi.deleteByIds(ids, {
        currentUser,
        transaction,
      });

      await transaction.commit();
    } catch (error) {
      await transaction.rollback();
      throw error;
    }
  }

  static async remove(id, currentUser) {
    const transaction = await db.sequelize.transaction();

    try {
      await Threat_detectionsDBApi.remove(
        id,
        {
          currentUser,
          transaction,
        },
      );

      await transaction.commit();
    } catch (error) {
      await transaction.rollback();
      throw error;
    }
  }


  static async analyze(data, currentUser) {
    validateAnalysisPayload(data);

    const transaction = await db.sequelize.transaction();

    try {
      const now = new Date();
      const assessment = assessThreat(data);
      const organizationId = currentUser?.organizationsId
        || currentUser?.organizations?.id
        || currentUser?.organization?.id
        || null;
      const rawSignal = [
        data.title,
        data.content_text,
        data.url,
        data.fileName,
        data.sha256,
        data.network_activity,
      ].filter(Boolean).join('\n');
      const signalHash = crypto.createHash('sha256').update(rawSignal).digest('hex');
      const privacyMode = data.privacy_mode !== false;
      const storedText = privacyMode ? redactSensitiveText(data.content_text || data.network_activity || '') : String(data.content_text || data.network_activity || '').slice(0, 20000);

      const submission = await db.analysis_submissions.create({
        submission_type: data.submission_type,
        title: String(data.title || data.fileName || data.url || 'Untitled analysis').slice(0, 180),
        content_text: storedText,
        url: data.submission_type === 'url' ? String(data.url || '').slice(0, 2000) : null,
        sha256: String(data.sha256 || signalHash).slice(0, 128),
        processing_location: 'local',
        status: 'completed',
        submitted_at: now,
        completed_at: now,
        submitted_byId: currentUser?.id || null,
        organizationsId: organizationId,
        createdById: currentUser?.id || null,
        updatedById: currentUser?.id || null,
      }, { transaction });

      const detection = await db.threat_detections.create({
        threat_type: assessment.threatType,
        severity: assessment.severity,
        risk_score: assessment.riskScore,
        verdict: assessment.verdict,
        is_false_positive: false,
        is_false_negative: false,
        summary: assessment.summary,
        detected_at: now,
        submissionId: submission.id,
        organizationsId: organizationId,
        createdById: currentUser?.id || null,
        updatedById: currentUser?.id || null,
      }, { transaction });

      const explanationText = assessment.indicators.length
        ? `The analyzer flagged this item because it matched ${assessment.indicators.length} high-signal indicator(s). Review the indicators before blocking business-critical traffic.`
        : 'The analyzer did not find strong malicious signals. Treat this as a low-risk result, not a guarantee of safety.';

      const explanation = await db.explanations.create({
        explanation_type: assessment.indicators.length ? 'rule_match' : 'feature_importance',
        explanation_text: explanationText,
        top_indicators: JSON.stringify(assessment.indicators),
        confidence: assessment.confidence,
        generated_at: now,
        detectionId: detection.id,
        organizationsId: organizationId,
        createdById: currentUser?.id || null,
        updatedById: currentUser?.id || null,
      }, { transaction });

      await transaction.commit();

      return {
        submission: submission.get({ plain: true }),
        detection: detection.get({ plain: true }),
        explanation: explanation.get({ plain: true }),
        indicators: assessment.indicators,
        privacy: {
          mode: privacyMode ? 'redacted_local_processing' : 'full_text_stored_by_request',
          sha256: signalHash,
        },
      };
    } catch (error) {
      await transaction.rollback();
      throw error;
    }
  }

  static async recentAssistantFindings(currentUser, limit = 8) {
    const globalAccess = currentUser?.app_role?.globalAccess;
    const organizationId = currentUser?.organizationsId
      || currentUser?.organizations?.id
      || currentUser?.organization?.id
      || null;
    const where = {};

    if (!globalAccess) {
      where[db.Sequelize.Op.or] = [
        { createdById: currentUser?.id || null },
      ];

      if (organizationId) {
        where[db.Sequelize.Op.or].push({ organizationsId: organizationId });
      }
    }

    const rows = await db.threat_detections.findAll({
      where,
      include: [
        { model: db.analysis_submissions, as: 'submission' },
        { model: db.explanations, as: 'explanations_detection' },
      ],
      order: [['createdAt', 'desc']],
      limit: Math.min(Number(limit) || 8, 25),
    });

    return rows.map((row) => row.get({ plain: true }));
  }


};