loadHTML($html); $metas = $doc->getElementsByTagName('meta'); $data = [ 'title' => '', 'description' => '', 'image' => '', 'url' => $url, 'site_name' => '' ]; // Try title tag if og:title is missing $titles = $doc->getElementsByTagName('title'); if ($titles->length > 0) { $data['title'] = $titles->item(0)->nodeValue; } foreach ($metas as $meta) { $property = $meta->getAttribute('property'); $name = $meta->getAttribute('name'); $content = $meta->getAttribute('content'); if ($property === 'og:title' || $name === 'twitter:title') $data['title'] = $content; if ($property === 'og:description' || $name === 'description' || $name === 'twitter:description') $data['description'] = $content; if ($property === 'og:image' || $name === 'twitter:image') $data['image'] = $content; if ($property === 'og:site_name') $data['site_name'] = $content; } // Filter out empty results if (empty($data['title']) && empty($data['description'])) return null; return $data; } function extractUrls($text) { $pattern = '/https?:\/\/[^\s<]+/'; preg_match_all($pattern, $text, $matches); return $matches[0]; }