<?php
session_start();
header('Content-Type: application/json');

require_once __DIR__ . '/../db/config.php';

// --- Rate Limiting ---
try {
    $db = db();
    $ip_address = $_SERVER['REMOTE_ADDR'];

    // Create table if it doesn't exist (simple migration)
    $db->exec("CREATE TABLE IF NOT EXISTS rate_limits (
        id INT AUTO_INCREMENT PRIMARY KEY,
        ip_address VARCHAR(45) NOT NULL,
        request_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    )");

    // Clean up old records
    $db->prepare("DELETE FROM rate_limits WHERE request_timestamp < NOW() - INTERVAL 1 DAY")->execute();

    // Check minute limit (5 requests)
    $stmt_minute = $db->prepare("SELECT COUNT(*) FROM rate_limits WHERE ip_address = ? AND request_timestamp > NOW() - INTERVAL 1 MINUTE");
    $stmt_minute->execute([$ip_address]);
    if ($stmt_minute->fetchColumn() >= 5) {
        http_response_code(429); // Too Many Requests
        echo json_encode(['error' => 'Too many requests. Please wait a minute before trying again.']);
        exit;
    }

    // Check daily limit (100 requests)
    $stmt_day = $db->prepare("SELECT COUNT(*) FROM rate_limits WHERE ip_address = ?");
    $stmt_day->execute([$ip_address]);
    if ($stmt_day->fetchColumn() >= 100) {
        http_response_code(429);
        echo json_encode(['error' => 'You have reached your daily limit of 100 summaries.']);
        exit;
    }

} catch (PDOException $e) {
    http_response_code(500);
    // Do not expose detailed error to client
    echo json_encode(['error' => 'Could not connect to the database for rate limiting.']);
    exit;
}


// --- Summarization Logic ---
$url = isset($_POST['url']) ? trim($_POST['url']) : '';

if (empty($url) || !filter_var($url, FILTER_VALIDATE_URL)) {
    http_response_code(400);
    echo json_encode(['error' => 'Invalid or missing URL.']);
    exit;
}

$url_hash = md5($url);

// Check session for cached summary (valid for 30 minutes)
if (isset($_SESSION['summaries'][$url_hash]) && (time() - $_SESSION['summaries'][$url_hash]['timestamp'] < 1800)) {
    echo json_encode(['summary' => $_SESSION['summaries'][$url_hash]['summary']]);
    exit;
}

// If not cached, proceed to fetch and summarize
// Log the request for rate limiting
$db->prepare("INSERT INTO rate_limits (ip_address) VALUES (?)")->execute([$ip_address]);


// 1. Scrape the article content using cURL
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, 'ArtickleSummarizer/1.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)');
curl_setopt($ch, CURLOPT_TIMEOUT, 15);
$html = curl_exec($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);

if ($http_code < 200 || $http_code >= 300 || empty($html)) {
    http_response_code(500);
    echo json_encode(['error' => 'Failed to fetch article content. The site may be down or blocking requests.']);
    exit;
}

// 2. Extract main content (simple heuristic)
libxml_use_internal_errors(true);
$doc = new DOMDocument();
$doc->loadHTML($html);
libxml_clear_errors();
$paragraphs = $doc->getElementsByTagName('p');
$text_content = "";
foreach ($paragraphs as $p) {
    $text_content .= $p->nodeValue . "\n\n";
}

if (trim($text_content) === "") {
    http_response_code(500);
    echo json_encode(['error' => 'Could not extract readable content from the article.']);
    exit;
}

// 3. Summarize (Placeholder for in-house model)
// This is a placeholder function. In a real scenario, this would call an internal summarization model.
// For now, it returns the first 5 paragraphs.
$sentences = explode("\n\n", $text_content);
$summary = implode("\n\n", array_slice($sentences, 0, 5));

// 4. Cache the summary in the session
$_SESSION['summaries'][$url_hash] = [
    'summary' => $summary,
    'timestamp' => time()
];

echo json_encode(['summary' => $summary]);