diff --git a/api/search.php b/api/search.php new file mode 100644 index 0000000..9a17ae7 --- /dev/null +++ b/api/search.php @@ -0,0 +1,27 @@ + 'Query parameter is missing.']); + exit; +} + +$url = 'https://api.duckduckgo.com/?q=' . urlencode($query) . '&format=json'; + +$ch = curl_init(); +curl_setopt($ch, CURLOPT_URL, $url); +curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); +curl_setopt($ch, CURLOPT_USERAGENT, 'ArtickleSearch/1.0'); +$output = curl_exec($ch); +$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); +curl_close($ch); + +if ($http_code !== 200) { + http_response_code(500); + echo json_encode(['error' => 'Failed to fetch data from the external API.']); + exit; +} + +echo $output; diff --git a/api/summarize.php b/api/summarize.php new file mode 100644 index 0000000..96906ab --- /dev/null +++ b/api/summarize.php @@ -0,0 +1,117 @@ +exec("CREATE TABLE IF NOT EXISTS rate_limits ( + id INT AUTO_INCREMENT PRIMARY KEY, + ip_address VARCHAR(45) NOT NULL, + request_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP + )"); + + // Clean up old records + $db->prepare("DELETE FROM rate_limits WHERE request_timestamp < NOW() - INTERVAL 1 DAY")->execute(); + + // Check minute limit (5 requests) + $stmt_minute = $db->prepare("SELECT COUNT(*) FROM rate_limits WHERE ip_address = ? AND request_timestamp > NOW() - INTERVAL 1 MINUTE"); + $stmt_minute->execute([$ip_address]); + if ($stmt_minute->fetchColumn() >= 5) { + http_response_code(429); // Too Many Requests + echo json_encode(['error' => 'Too many requests. Please wait a minute before trying again.']); + exit; + } + + // Check daily limit (100 requests) + $stmt_day = $db->prepare("SELECT COUNT(*) FROM rate_limits WHERE ip_address = ?"); + $stmt_day->execute([$ip_address]); + if ($stmt_day->fetchColumn() >= 100) { + http_response_code(429); + echo json_encode(['error' => 'You have reached your daily limit of 100 summaries.']); + exit; + } + +} catch (PDOException $e) { + http_response_code(500); + // Do not expose detailed error to client + echo json_encode(['error' => 'Could not connect to the database for rate limiting.']); + exit; +} + + +// --- Summarization Logic --- +$url = isset($_POST['url']) ? trim($_POST['url']) : ''; + +if (empty($url) || !filter_var($url, FILTER_VALIDATE_URL)) { + http_response_code(400); + echo json_encode(['error' => 'Invalid or missing URL.']); + exit; +} + +$url_hash = md5($url); + +// Check session for cached summary (valid for 30 minutes) +if (isset($_SESSION['summaries'][$url_hash]) && (time() - $_SESSION['summaries'][$url_hash]['timestamp'] < 1800)) { + echo json_encode(['summary' => $_SESSION['summaries'][$url_hash]['summary']]); + exit; +} + +// If not cached, proceed to fetch and summarize +// Log the request for rate limiting +$db->prepare("INSERT INTO rate_limits (ip_address) VALUES (?)")->execute([$ip_address]); + + +// 1. Scrape the article content using cURL +$ch = curl_init(); +curl_setopt($ch, CURLOPT_URL, $url); +curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); +curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); +curl_setopt($ch, CURLOPT_USERAGENT, 'ArtickleSummarizer/1.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'); +curl_setopt($ch, CURLOPT_TIMEOUT, 15); +$html = curl_exec($ch); +$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); +curl_close($ch); + +if ($http_code < 200 || $http_code >= 300 || empty($html)) { + http_response_code(500); + echo json_encode(['error' => 'Failed to fetch article content. The site may be down or blocking requests.']); + exit; +} + +// 2. Extract main content (simple heuristic) +libxml_use_internal_errors(true); +$doc = new DOMDocument(); +$doc->loadHTML($html); +libxml_clear_errors(); +$paragraphs = $doc->getElementsByTagName('p'); +$text_content = ""; +foreach ($paragraphs as $p) { + $text_content .= $p->nodeValue . "\n\n"; +} + +if (trim($text_content) === "") { + http_response_code(500); + echo json_encode(['error' => 'Could not extract readable content from the article.']); + exit; +} + +// 3. Summarize (Placeholder for in-house model) +// This is a placeholder function. In a real scenario, this would call an internal summarization model. +// For now, it returns the first 5 paragraphs. +$sentences = explode("\n\n", $text_content); +$summary = implode("\n\n", array_slice($sentences, 0, 5)); + +// 4. Cache the summary in the session +$_SESSION['summaries'][$url_hash] = [ + 'summary' => $summary, + 'timestamp' => time() +]; + +echo json_encode(['summary' => $summary]); + diff --git a/assets/css/custom.css b/assets/css/custom.css new file mode 100644 index 0000000..880e9c0 --- /dev/null +++ b/assets/css/custom.css @@ -0,0 +1,22 @@ +body { + font-family: 'Inter', sans-serif; +} + +.input-group-lg .form-control, .input-group-lg .btn { + height: calc(2.5rem + 10px); + padding: 0.5rem 1rem; + font-size: 1.25rem; +} + +.result-card { + transition: transform 0.2s ease-in-out; +} + +.result-card:hover { + transform: translateY(-5px); +} + +#search-button i { + width: 24px; + height: 24px; +} diff --git a/assets/js/main.js b/assets/js/main.js new file mode 100644 index 0000000..971c05b --- /dev/null +++ b/assets/js/main.js @@ -0,0 +1,99 @@ +document.addEventListener('DOMContentLoaded', function () { + const searchForm = document.getElementById('search-form'); + const searchInput = document.getElementById('search-input'); + const resultsContainer = document.getElementById('results-container'); + const loadingIndicator = document.getElementById('loading-indicator'); + const summaryModalEl = document.getElementById('summary-modal'); + const summaryModal = new bootstrap.Modal(summaryModalEl); + const summaryModalBody = document.getElementById('summary-modal-body'); + const originalArticleLink = document.getElementById('original-article-link'); + + searchForm.addEventListener('submit', function (e) { + e.preventDefault(); + const query = searchInput.value.trim(); + + if (query) { + fetchResults(query); + } + }); + + function fetchResults(query) { + resultsContainer.innerHTML = ''; + loadingIndicator.style.display = 'block'; + + fetch(`/api/search.php?q=${encodeURIComponent(query)}`) + .then(response => response.json()) + .then(data => { + displayResults(data.RelatedTopics); + }) + .catch(error => { + console.error('Error fetching search results:', error); + resultsContainer.innerHTML = '
${item.Result}
+ +