63 lines
2.2 KiB
PHP
63 lines
2.2 KiB
PHP
<?php
|
|
function fetchOpenGraphData($url) {
|
|
$ch = curl_init();
|
|
curl_setopt($ch, CURLOPT_URL, $url);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
|
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
|
|
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
|
curl_setopt($ch, CURLOPT_HTTPHEADER, [
|
|
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
'Accept-Language: en-US,en;q=0.9',
|
|
'Cache-Control: no-cache',
|
|
'Pragma: no-cache',
|
|
'Upgrade-Insecure-Requests: 1'
|
|
]);
|
|
$html = curl_exec($ch);
|
|
$info = curl_getinfo($ch);
|
|
curl_close($ch);
|
|
|
|
if (!$html || $info['http_code'] !== 200) return null;
|
|
if (!class_exists('DOMDocument')) return null;
|
|
|
|
$doc = new DOMDocument();
|
|
@$doc->loadHTML($html);
|
|
$metas = $doc->getElementsByTagName('meta');
|
|
|
|
$data = [
|
|
'title' => '',
|
|
'description' => '',
|
|
'image' => '',
|
|
'url' => $url,
|
|
'site_name' => ''
|
|
];
|
|
|
|
// Try title tag if og:title is missing
|
|
$titles = $doc->getElementsByTagName('title');
|
|
if ($titles->length > 0) {
|
|
$data['title'] = $titles->item(0)->nodeValue;
|
|
}
|
|
|
|
foreach ($metas as $meta) {
|
|
$property = $meta->getAttribute('property');
|
|
$name = $meta->getAttribute('name');
|
|
$content = $meta->getAttribute('content');
|
|
|
|
if ($property === 'og:title' || $name === 'twitter:title') $data['title'] = $content;
|
|
if ($property === 'og:description' || $name === 'description' || $name === 'twitter:description') $data['description'] = $content;
|
|
if ($property === 'og:image' || $name === 'twitter:image') $data['image'] = $content;
|
|
if ($property === 'og:site_name') $data['site_name'] = $content;
|
|
}
|
|
|
|
// Filter out empty results
|
|
if (empty($data['title']) && empty($data['description'])) return null;
|
|
|
|
return $data;
|
|
}
|
|
|
|
function extractUrls($text) {
|
|
$pattern = '/https?:\/\/[^\s<]+/';
|
|
preg_match_all($pattern, $text, $matches);
|
|
return $matches[0];
|
|
}
|