159 lines
5.8 KiB
PHP
159 lines
5.8 KiB
PHP
<?php
|
|
require_once __DIR__ . '/db/config.php';
|
|
|
|
// Set a longer execution time, as this can be a long-running script.
|
|
ini_set('max_execution_time', 300); // 5 minutes
|
|
|
|
// --- Helper Functions ---
|
|
|
|
/**
|
|
* Updates the status of a submission.
|
|
*/
|
|
function update_status($pdo, $id, $status, $message = null) {
|
|
$sql = "UPDATE submissions SET status = ? WHERE id = ?";
|
|
if ($message) {
|
|
// For now, we don't have a column for error messages, but we could add one.
|
|
// For this version, we'll just log it to the server's error log.
|
|
error_log("Submission ID: $id, Status: $status, Message: $message");
|
|
}
|
|
$stmt = $pdo->prepare($sql);
|
|
$stmt->execute([$status, $id]);
|
|
}
|
|
|
|
/**
|
|
* A simple function to extract a domain from a URL.
|
|
*/
|
|
function get_domain($url) {
|
|
$pieces = parse_url($url);
|
|
if (isset($pieces['host'])) {
|
|
return preg_replace('/^www\./', '', $pieces['host']);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
// --- Main Worker Logic ---
|
|
|
|
$pdo = db();
|
|
|
|
// 1. Find submissions that are 'Pending' or those that have been 'Processing' for a while (e.g., > 15 mins)
|
|
// This helps recover from script crashes.
|
|
$stmt = $pdo->query("SELECT * FROM submissions WHERE status = 'Pending' OR (status = 'Processing' AND updated_at < NOW() - INTERVAL 15 MINUTE)");
|
|
$pending_submissions = $stmt->fetchAll();
|
|
|
|
if (empty($pending_submissions)) {
|
|
// No work to do, just redirect back.
|
|
header('Location: analyst_dashboard.php');
|
|
exit;
|
|
}
|
|
|
|
foreach ($pending_submissions as $sub) {
|
|
$id = $sub['id'];
|
|
$name = $sub['name'];
|
|
|
|
// 2. Mark as 'Processing'
|
|
update_status($pdo, $id, 'Processing');
|
|
|
|
// 3. Simulate fetching data (This is where the Gemini API calls will go)
|
|
// For now, we'll use a mock implementation that just populates some data.
|
|
try {
|
|
// --- REAL DATA IMPLEMENTATION ---
|
|
$linkedin_query = "{$name}" site:linkedin.com/in/;
|
|
$twitter_query = "{$name}" site:twitter.com;
|
|
$company_query = "{$name}" company;
|
|
|
|
// For this example, we'll call the tool synchronously.
|
|
// In a real-world high-volume application, you might use a job queue.
|
|
$linkedin_results_json = shell_exec("gemini-tool google_web_search --query '{$linkedin_query}'");
|
|
$twitter_results_json = shell_exec("gemini-tool google_web_search --query '{$twitter_query}'");
|
|
$company_results_json = shell_exec("gemini-tool google_web_search --query '{$company_query}'");
|
|
|
|
$linkedin_url = null;
|
|
if ($linkedin_results_json) {
|
|
$results = json_decode($linkedin_results_json, true);
|
|
if (!empty($results['web_search_result']['results'])) {
|
|
// Find the first result that looks like a profile
|
|
foreach ($results['web_search_result']['results'] as $res) {
|
|
if (preg_match('/linkedin\.com\/in\//', $res['url'])) {
|
|
$linkedin_url = $res['url'];
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
$twitter_url = null;
|
|
if ($twitter_results_json) {
|
|
$results = json_decode($twitter_results_json, true);
|
|
if (!empty($results['web_search_result']['results'])) {
|
|
foreach ($results['web_search_result']['results'] as $res) {
|
|
if (preg_match('/twitter\.com\/[^\/]+$/', $res['url'])) {
|
|
$twitter_url = $res['url'];
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
$company = null;
|
|
if ($company_results_json) {
|
|
$results = json_decode($company_results_json, true);
|
|
if (!empty($results['web_search_result']['results'])) {
|
|
// For simplicity, we'll take the title of the first result as the company name.
|
|
// This is a very rough heuristic and could be improved.
|
|
$company = $results['web_search_result']['results'][0]['title'];
|
|
}
|
|
}
|
|
|
|
$industry = null;
|
|
if ($company) {
|
|
$industry_query = ""{$company}" industry";
|
|
$industry_results_json = shell_exec("gemini-tool google_web_search --query '{$industry_query}'");
|
|
if ($industry_results_json) {
|
|
$results = json_decode($industry_results_json, true);
|
|
if (!empty($results['web_search_result']['results'])) {
|
|
$industry = $results['web_search_result']['results'][0]['title'];
|
|
}
|
|
}
|
|
}
|
|
|
|
$geo_location = null;
|
|
if ($company) {
|
|
$location_query = ""{$company}" headquarters location";
|
|
$location_results_json = shell_exec("gemini-tool google_web_search --query '{$location_query}'");
|
|
if ($location_results_json) {
|
|
$results = json_decode($location_results_json, true);
|
|
if (!empty($results['web_search_result']['results'])) {
|
|
$geo_location = $results['web_search_result']['results'][0]['title'];
|
|
}
|
|
}
|
|
}
|
|
|
|
$update_data = [
|
|
'linkedin_url' => $linkedin_url,
|
|
'twitter_url' => $twitter_url,
|
|
'company' => $company,
|
|
'industry' => $industry,
|
|
'geo_location' => $geo_location,
|
|
'status' => 'Completed'
|
|
];
|
|
|
|
$sql_parts = [];
|
|
foreach ($update_data as $key => $value) {
|
|
$sql_parts[] = "`$key` = :$key";
|
|
}
|
|
$sql = "UPDATE submissions SET " . implode(', ', $sql_parts) . " WHERE id = :id";
|
|
|
|
$stmt = $pdo->prepare($sql);
|
|
$update_data['id'] = $id;
|
|
$stmt->execute($update_data);
|
|
|
|
} catch (Exception $e) {
|
|
// If something goes wrong, mark as 'Error'
|
|
update_status($pdo, $id, 'Error', $e->getMessage());
|
|
}
|
|
}
|
|
|
|
// 4. Redirect back to the dashboard
|
|
header('Location: analyst_dashboard.php?refreshed=all');
|
|
exit;
|