V1.4.2
This commit is contained in:
parent
3565a88085
commit
a4cb3a5abc
BIN
assets/pasted-20260419-201602-db15dcc6.png
Normal file
BIN
assets/pasted-20260419-201602-db15dcc6.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 404 KiB |
268
index-en.php
268
index-en.php
@ -84,34 +84,98 @@ function index_vanilla_uex_normalize_search_text(string $value): string
|
|||||||
return trim((string) preg_replace('/\s+/u', ' ', $value));
|
return trim((string) preg_replace('/\s+/u', ' ', $value));
|
||||||
}
|
}
|
||||||
|
|
||||||
function index_vanilla_uex_title_matches_query(string $title, string $queryName): bool
|
function index_vanilla_uex_normalize_compact_search_text(string $value): string
|
||||||
{
|
{
|
||||||
$normalizedTitle = index_vanilla_uex_normalize_search_text($title);
|
$value = function_exists('mb_strtolower')
|
||||||
$normalizedQuery = index_vanilla_uex_normalize_search_text($queryName);
|
? mb_strtolower($value, 'UTF-8')
|
||||||
|
: strtolower($value);
|
||||||
|
|
||||||
if ($normalizedTitle === '' || $normalizedQuery === '') {
|
return trim((string) preg_replace('/[^[:alnum:]]+/u', '', $value));
|
||||||
return false;
|
}
|
||||||
|
|
||||||
|
function index_vanilla_uex_tokenize_search_text(string $value): array
|
||||||
|
{
|
||||||
|
$normalizedValue = index_vanilla_uex_normalize_search_text($value);
|
||||||
|
if ($normalizedValue === '') {
|
||||||
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strpos($normalizedTitle, $normalizedQuery) !== false) {
|
$tokens = array_values(array_filter(explode(' ', $normalizedValue), static function (string $token): bool {
|
||||||
return true;
|
return preg_match('/\d/u', $token) || strlen($token) >= 2;
|
||||||
}
|
|
||||||
|
|
||||||
$queryTokens = array_values(array_filter(explode(' ', $normalizedQuery), static function (string $token): bool {
|
|
||||||
return preg_match('/\d/', $token) || strlen($token) >= 3;
|
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
return array_values(array_unique($tokens));
|
||||||
|
}
|
||||||
|
|
||||||
|
function index_vanilla_uex_extract_wts_subject(string $title): string
|
||||||
|
{
|
||||||
|
$normalizedTitle = index_vanilla_uex_normalize_search_text($title);
|
||||||
|
if ($normalizedTitle === '') {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!preg_match('/\bwts\b/u', $normalizedTitle, $matches, PREG_OFFSET_CAPTURE)) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
$matchText = (string) ($matches[0][0] ?? 'wts');
|
||||||
|
$matchOffset = (int) ($matches[0][1] ?? 0);
|
||||||
|
$subject = trim(substr($normalizedTitle, $matchOffset + strlen($matchText)));
|
||||||
|
|
||||||
|
while ($subject !== '') {
|
||||||
|
$updated = trim((string) preg_replace('/^(?:wts|wtb|lts|ltb|sell|selling|trade|trading|for|offer|offering|looking|lf|want|wanted)\b[\s\-:]*/u', '', $subject));
|
||||||
|
if ($updated === $subject) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
$subject = $updated;
|
||||||
|
}
|
||||||
|
|
||||||
|
$subject = trim((string) preg_replace('/^(?:[a-z]{1,4}\s+)?\d+(?:\s+\d+)*(?:\s*[\-:])\s*/u', '', $subject));
|
||||||
|
return $subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
function index_vanilla_uex_title_has_wts_marker(string $title): bool
|
||||||
|
{
|
||||||
|
return index_vanilla_uex_extract_wts_subject($title) !== '';
|
||||||
|
}
|
||||||
|
|
||||||
|
function index_vanilla_uex_title_match_score(string $title, string $queryName): int
|
||||||
|
{
|
||||||
|
$normalizedTitle = index_vanilla_uex_extract_wts_subject($title);
|
||||||
|
$normalizedQuery = index_vanilla_uex_normalize_search_text($queryName);
|
||||||
|
$compactTitle = index_vanilla_uex_normalize_compact_search_text($normalizedTitle);
|
||||||
|
$compactQuery = index_vanilla_uex_normalize_compact_search_text($normalizedQuery);
|
||||||
|
|
||||||
|
if ($normalizedTitle === '' || $normalizedQuery === '' || $compactTitle === '' || $compactQuery === '') {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($normalizedTitle === $normalizedQuery || $compactTitle === $compactQuery) {
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strpos($normalizedTitle, $normalizedQuery) !== false || strpos($compactTitle, $compactQuery) !== false) {
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
$queryTokens = index_vanilla_uex_tokenize_search_text($normalizedQuery);
|
||||||
if ($queryTokens === []) {
|
if ($queryTokens === []) {
|
||||||
return false;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($queryTokens as $token) {
|
foreach ($queryTokens as $token) {
|
||||||
if (strpos($normalizedTitle, $token) === false) {
|
if (strpos($normalizedTitle, $token) === false) {
|
||||||
return false;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
function index_vanilla_uex_title_matches_query(string $title, string $queryName): bool
|
||||||
|
{
|
||||||
|
return index_vanilla_uex_title_match_score($title, $queryName) > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
function index_vanilla_uex_extract_price_value(string $rawPrice): ?int
|
function index_vanilla_uex_extract_price_value(string $rawPrice): ?int
|
||||||
@ -168,20 +232,21 @@ function index_vanilla_uex_extract_price_value(string $rawPrice): ?int
|
|||||||
|
|
||||||
function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryName, int $sampleLimit = 10): array
|
function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryName, int $sampleLimit = 10): array
|
||||||
{
|
{
|
||||||
$values = [];
|
$candidates = [];
|
||||||
$chunks = preg_split('/<div\s+class="search-row\b[^>]*>/i', $html) ?: [];
|
$chunks = preg_split('/<div\s+class="search-row\b[^>]*>/i', $html) ?: [];
|
||||||
|
|
||||||
foreach ($chunks as $chunk) {
|
foreach ($chunks as $chunk) {
|
||||||
if (count($values) >= $sampleLimit) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!preg_match('/<a\b[^>]*class="text-bold"[^>]*>(.*?)<\/a>/is', $chunk, $titleMatches)) {
|
if (!preg_match('/<a\b[^>]*class="text-bold"[^>]*>(.*?)<\/a>/is', $chunk, $titleMatches)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$title = index_vanilla_uex_normalize_whitespace((string) ($titleMatches[1] ?? ''));
|
$title = index_vanilla_uex_normalize_whitespace((string) ($titleMatches[1] ?? ''));
|
||||||
if ($title === '' || !preg_match('/^WTS\b/i', $title) || !index_vanilla_uex_title_matches_query($title, $queryName)) {
|
if ($title === '' || !index_vanilla_uex_title_has_wts_marker($title)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$matchScore = index_vanilla_uex_title_match_score($title, $queryName);
|
||||||
|
if ($matchScore <= 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -194,7 +259,34 @@ function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryN
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$values[] = $priceValue;
|
$candidates[] = [
|
||||||
|
'price' => $priceValue,
|
||||||
|
'score' => $matchScore,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($candidates === []) {
|
||||||
|
return [
|
||||||
|
'has_estimate' => false,
|
||||||
|
'average' => null,
|
||||||
|
'formatted' => '—',
|
||||||
|
'sample_count' => 0,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
$bestScore = max(array_column($candidates, 'score'));
|
||||||
|
$minimumAcceptedScore = $bestScore >= 3 ? 2 : $bestScore;
|
||||||
|
$values = [];
|
||||||
|
|
||||||
|
foreach ($candidates as $candidate) {
|
||||||
|
if (($candidate['score'] ?? 0) < $minimumAcceptedScore) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$values[] = (int) $candidate['price'];
|
||||||
|
if (count($values) >= $sampleLimit) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($values === []) {
|
if ($values === []) {
|
||||||
@ -216,6 +308,44 @@ function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryN
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function index_vanilla_uex_create_handle(string $url, string $userAgent, int $connectTimeout, int $timeout)
|
||||||
|
{
|
||||||
|
$handle = curl_init();
|
||||||
|
curl_setopt_array($handle, [
|
||||||
|
CURLOPT_URL => $url,
|
||||||
|
CURLOPT_RETURNTRANSFER => true,
|
||||||
|
CURLOPT_FOLLOWLOCATION => true,
|
||||||
|
CURLOPT_CONNECTTIMEOUT => $connectTimeout,
|
||||||
|
CURLOPT_TIMEOUT => $timeout,
|
||||||
|
CURLOPT_USERAGENT => $userAgent,
|
||||||
|
CURLOPT_ENCODING => '',
|
||||||
|
CURLOPT_SSL_VERIFYPEER => true,
|
||||||
|
CURLOPT_SSL_VERIFYHOST => 2,
|
||||||
|
CURLOPT_HTTPHEADER => [
|
||||||
|
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
|
'Accept-Language: fr-FR,fr;q=0.9,en;q=0.8',
|
||||||
|
'Cache-Control: no-cache',
|
||||||
|
],
|
||||||
|
]);
|
||||||
|
|
||||||
|
return $handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
function index_vanilla_uex_fetch_single_body(string $url, string $userAgent, int $connectTimeout = 8, int $timeout = 18): ?string
|
||||||
|
{
|
||||||
|
$handle = index_vanilla_uex_create_handle($url, $userAgent, $connectTimeout, $timeout);
|
||||||
|
$body = curl_exec($handle);
|
||||||
|
$error = curl_error($handle);
|
||||||
|
$httpCode = (int) curl_getinfo($handle, CURLINFO_RESPONSE_CODE);
|
||||||
|
curl_close($handle);
|
||||||
|
|
||||||
|
if (!is_string($body) || $error !== '' || $httpCode < 200 || $httpCode >= 300 || trim($body) === '') {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $body;
|
||||||
|
}
|
||||||
|
|
||||||
function index_vanilla_uex_fetch_estimates(array $names, int $sampleLimit = 10): array
|
function index_vanilla_uex_fetch_estimates(array $names, int $sampleLimit = 10): array
|
||||||
{
|
{
|
||||||
$results = [];
|
$results = [];
|
||||||
@ -234,65 +364,63 @@ function index_vanilla_uex_fetch_estimates(array $names, int $sampleLimit = 10):
|
|||||||
return $results;
|
return $results;
|
||||||
}
|
}
|
||||||
|
|
||||||
$multiHandle = curl_multi_init();
|
|
||||||
$handles = [];
|
|
||||||
$userAgent = 'Mozilla/5.0 (compatible; FlatLogicVanillaDb/1.0; +https://uexcorp.space/)';
|
$userAgent = 'Mozilla/5.0 (compatible; FlatLogicVanillaDb/1.0; +https://uexcorp.space/)';
|
||||||
|
$batchSize = 4;
|
||||||
|
|
||||||
foreach ($uniqueNames as $name) {
|
foreach (array_chunk(array_values($uniqueNames), $batchSize) as $nameBatch) {
|
||||||
$url = 'https://uexcorp.space/search?q=' . rawurlencode($name);
|
$multiHandle = curl_multi_init();
|
||||||
$handle = curl_init();
|
$handles = [];
|
||||||
curl_setopt_array($handle, [
|
|
||||||
CURLOPT_URL => $url,
|
|
||||||
CURLOPT_RETURNTRANSFER => true,
|
|
||||||
CURLOPT_FOLLOWLOCATION => true,
|
|
||||||
CURLOPT_CONNECTTIMEOUT => 4,
|
|
||||||
CURLOPT_TIMEOUT => 8,
|
|
||||||
CURLOPT_USERAGENT => $userAgent,
|
|
||||||
CURLOPT_SSL_VERIFYPEER => true,
|
|
||||||
CURLOPT_SSL_VERIFYHOST => 2,
|
|
||||||
CURLOPT_HTTPHEADER => [
|
|
||||||
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
||||||
'Accept-Language: fr-FR,fr;q=0.9,en;q=0.8',
|
|
||||||
'Cache-Control: no-cache',
|
|
||||||
],
|
|
||||||
]);
|
|
||||||
|
|
||||||
curl_multi_add_handle($multiHandle, $handle);
|
foreach ($nameBatch as $name) {
|
||||||
$handles[$name] = $handle;
|
$url = 'https://uexcorp.space/search?q=' . rawurlencode($name);
|
||||||
}
|
$handle = index_vanilla_uex_create_handle($url, $userAgent, 6, 12);
|
||||||
|
curl_multi_add_handle($multiHandle, $handle);
|
||||||
$running = null;
|
$handles[$name] = [
|
||||||
do {
|
'handle' => $handle,
|
||||||
$status = curl_multi_exec($multiHandle, $running);
|
'url' => $url,
|
||||||
if ($running) {
|
|
||||||
curl_multi_select($multiHandle, 1.0);
|
|
||||||
}
|
|
||||||
} while ($running && $status === CURLM_OK);
|
|
||||||
|
|
||||||
foreach ($handles as $name => $handle) {
|
|
||||||
$error = curl_error($handle);
|
|
||||||
$httpCode = (int) curl_getinfo($handle, CURLINFO_RESPONSE_CODE);
|
|
||||||
$body = (string) curl_multi_getcontent($handle);
|
|
||||||
|
|
||||||
if ($error !== '' || $httpCode < 200 || $httpCode >= 300 || trim($body) === '') {
|
|
||||||
$results[$name] = [
|
|
||||||
'has_estimate' => false,
|
|
||||||
'average' => null,
|
|
||||||
'formatted' => 'Indisponible',
|
|
||||||
'sample_count' => 0,
|
|
||||||
'error' => true,
|
|
||||||
];
|
];
|
||||||
} else {
|
}
|
||||||
|
|
||||||
|
$running = null;
|
||||||
|
do {
|
||||||
|
$status = curl_multi_exec($multiHandle, $running);
|
||||||
|
if ($running) {
|
||||||
|
curl_multi_select($multiHandle, 1.0);
|
||||||
|
}
|
||||||
|
} while ($running && $status === CURLM_OK);
|
||||||
|
|
||||||
|
foreach ($handles as $name => $payload) {
|
||||||
|
$handle = $payload['handle'];
|
||||||
|
$url = $payload['url'];
|
||||||
|
$error = curl_error($handle);
|
||||||
|
$httpCode = (int) curl_getinfo($handle, CURLINFO_RESPONSE_CODE);
|
||||||
|
$body = (string) curl_multi_getcontent($handle);
|
||||||
|
|
||||||
|
curl_multi_remove_handle($multiHandle, $handle);
|
||||||
|
curl_close($handle);
|
||||||
|
|
||||||
|
if ($error !== '' || $httpCode < 200 || $httpCode >= 300 || trim($body) === '') {
|
||||||
|
$body = index_vanilla_uex_fetch_single_body($url, $userAgent) ?? '';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (trim($body) === '') {
|
||||||
|
$results[$name] = [
|
||||||
|
'has_estimate' => false,
|
||||||
|
'average' => null,
|
||||||
|
'formatted' => 'Indisponible',
|
||||||
|
'sample_count' => 0,
|
||||||
|
'error' => true,
|
||||||
|
];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
$results[$name] = index_vanilla_uex_parse_estimate_from_html($body, $name, $sampleLimit);
|
$results[$name] = index_vanilla_uex_parse_estimate_from_html($body, $name, $sampleLimit);
|
||||||
$results[$name]['error'] = false;
|
$results[$name]['error'] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_multi_remove_handle($multiHandle, $handle);
|
curl_multi_close($multiHandle);
|
||||||
curl_close($handle);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_multi_close($multiHandle);
|
|
||||||
|
|
||||||
return $results;
|
return $results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
268
index.php
268
index.php
@ -85,34 +85,98 @@ function index_vanilla_uex_normalize_search_text(string $value): string
|
|||||||
return trim((string) preg_replace('/\s+/u', ' ', $value));
|
return trim((string) preg_replace('/\s+/u', ' ', $value));
|
||||||
}
|
}
|
||||||
|
|
||||||
function index_vanilla_uex_title_matches_query(string $title, string $queryName): bool
|
function index_vanilla_uex_normalize_compact_search_text(string $value): string
|
||||||
{
|
{
|
||||||
$normalizedTitle = index_vanilla_uex_normalize_search_text($title);
|
$value = function_exists('mb_strtolower')
|
||||||
$normalizedQuery = index_vanilla_uex_normalize_search_text($queryName);
|
? mb_strtolower($value, 'UTF-8')
|
||||||
|
: strtolower($value);
|
||||||
|
|
||||||
if ($normalizedTitle === '' || $normalizedQuery === '') {
|
return trim((string) preg_replace('/[^[:alnum:]]+/u', '', $value));
|
||||||
return false;
|
}
|
||||||
|
|
||||||
|
function index_vanilla_uex_tokenize_search_text(string $value): array
|
||||||
|
{
|
||||||
|
$normalizedValue = index_vanilla_uex_normalize_search_text($value);
|
||||||
|
if ($normalizedValue === '') {
|
||||||
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strpos($normalizedTitle, $normalizedQuery) !== false) {
|
$tokens = array_values(array_filter(explode(' ', $normalizedValue), static function (string $token): bool {
|
||||||
return true;
|
return preg_match('/\d/u', $token) || strlen($token) >= 2;
|
||||||
}
|
|
||||||
|
|
||||||
$queryTokens = array_values(array_filter(explode(' ', $normalizedQuery), static function (string $token): bool {
|
|
||||||
return preg_match('/\d/', $token) || strlen($token) >= 3;
|
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
return array_values(array_unique($tokens));
|
||||||
|
}
|
||||||
|
|
||||||
|
function index_vanilla_uex_extract_wts_subject(string $title): string
|
||||||
|
{
|
||||||
|
$normalizedTitle = index_vanilla_uex_normalize_search_text($title);
|
||||||
|
if ($normalizedTitle === '') {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!preg_match('/\bwts\b/u', $normalizedTitle, $matches, PREG_OFFSET_CAPTURE)) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
$matchText = (string) ($matches[0][0] ?? 'wts');
|
||||||
|
$matchOffset = (int) ($matches[0][1] ?? 0);
|
||||||
|
$subject = trim(substr($normalizedTitle, $matchOffset + strlen($matchText)));
|
||||||
|
|
||||||
|
while ($subject !== '') {
|
||||||
|
$updated = trim((string) preg_replace('/^(?:wts|wtb|lts|ltb|sell|selling|trade|trading|for|offer|offering|looking|lf|want|wanted)\b[\s\-:]*/u', '', $subject));
|
||||||
|
if ($updated === $subject) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
$subject = $updated;
|
||||||
|
}
|
||||||
|
|
||||||
|
$subject = trim((string) preg_replace('/^(?:[a-z]{1,4}\s+)?\d+(?:\s+\d+)*(?:\s*[\-:])\s*/u', '', $subject));
|
||||||
|
return $subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
function index_vanilla_uex_title_has_wts_marker(string $title): bool
|
||||||
|
{
|
||||||
|
return index_vanilla_uex_extract_wts_subject($title) !== '';
|
||||||
|
}
|
||||||
|
|
||||||
|
function index_vanilla_uex_title_match_score(string $title, string $queryName): int
|
||||||
|
{
|
||||||
|
$normalizedTitle = index_vanilla_uex_extract_wts_subject($title);
|
||||||
|
$normalizedQuery = index_vanilla_uex_normalize_search_text($queryName);
|
||||||
|
$compactTitle = index_vanilla_uex_normalize_compact_search_text($normalizedTitle);
|
||||||
|
$compactQuery = index_vanilla_uex_normalize_compact_search_text($normalizedQuery);
|
||||||
|
|
||||||
|
if ($normalizedTitle === '' || $normalizedQuery === '' || $compactTitle === '' || $compactQuery === '') {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($normalizedTitle === $normalizedQuery || $compactTitle === $compactQuery) {
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strpos($normalizedTitle, $normalizedQuery) !== false || strpos($compactTitle, $compactQuery) !== false) {
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
$queryTokens = index_vanilla_uex_tokenize_search_text($normalizedQuery);
|
||||||
if ($queryTokens === []) {
|
if ($queryTokens === []) {
|
||||||
return false;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($queryTokens as $token) {
|
foreach ($queryTokens as $token) {
|
||||||
if (strpos($normalizedTitle, $token) === false) {
|
if (strpos($normalizedTitle, $token) === false) {
|
||||||
return false;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
function index_vanilla_uex_title_matches_query(string $title, string $queryName): bool
|
||||||
|
{
|
||||||
|
return index_vanilla_uex_title_match_score($title, $queryName) > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
function index_vanilla_uex_extract_price_value(string $rawPrice): ?int
|
function index_vanilla_uex_extract_price_value(string $rawPrice): ?int
|
||||||
@ -169,20 +233,21 @@ function index_vanilla_uex_extract_price_value(string $rawPrice): ?int
|
|||||||
|
|
||||||
function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryName, int $sampleLimit = 10): array
|
function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryName, int $sampleLimit = 10): array
|
||||||
{
|
{
|
||||||
$values = [];
|
$candidates = [];
|
||||||
$chunks = preg_split('/<div\s+class="search-row\b[^>]*>/i', $html) ?: [];
|
$chunks = preg_split('/<div\s+class="search-row\b[^>]*>/i', $html) ?: [];
|
||||||
|
|
||||||
foreach ($chunks as $chunk) {
|
foreach ($chunks as $chunk) {
|
||||||
if (count($values) >= $sampleLimit) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!preg_match('/<a\b[^>]*class="text-bold"[^>]*>(.*?)<\/a>/is', $chunk, $titleMatches)) {
|
if (!preg_match('/<a\b[^>]*class="text-bold"[^>]*>(.*?)<\/a>/is', $chunk, $titleMatches)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$title = index_vanilla_uex_normalize_whitespace((string) ($titleMatches[1] ?? ''));
|
$title = index_vanilla_uex_normalize_whitespace((string) ($titleMatches[1] ?? ''));
|
||||||
if ($title === '' || !preg_match('/^WTS\b/i', $title) || !index_vanilla_uex_title_matches_query($title, $queryName)) {
|
if ($title === '' || !index_vanilla_uex_title_has_wts_marker($title)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$matchScore = index_vanilla_uex_title_match_score($title, $queryName);
|
||||||
|
if ($matchScore <= 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -195,7 +260,34 @@ function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryN
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$values[] = $priceValue;
|
$candidates[] = [
|
||||||
|
'price' => $priceValue,
|
||||||
|
'score' => $matchScore,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($candidates === []) {
|
||||||
|
return [
|
||||||
|
'has_estimate' => false,
|
||||||
|
'average' => null,
|
||||||
|
'formatted' => '—',
|
||||||
|
'sample_count' => 0,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
$bestScore = max(array_column($candidates, 'score'));
|
||||||
|
$minimumAcceptedScore = $bestScore >= 3 ? 2 : $bestScore;
|
||||||
|
$values = [];
|
||||||
|
|
||||||
|
foreach ($candidates as $candidate) {
|
||||||
|
if (($candidate['score'] ?? 0) < $minimumAcceptedScore) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$values[] = (int) $candidate['price'];
|
||||||
|
if (count($values) >= $sampleLimit) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($values === []) {
|
if ($values === []) {
|
||||||
@ -217,6 +309,44 @@ function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryN
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function index_vanilla_uex_create_handle(string $url, string $userAgent, int $connectTimeout, int $timeout)
|
||||||
|
{
|
||||||
|
$handle = curl_init();
|
||||||
|
curl_setopt_array($handle, [
|
||||||
|
CURLOPT_URL => $url,
|
||||||
|
CURLOPT_RETURNTRANSFER => true,
|
||||||
|
CURLOPT_FOLLOWLOCATION => true,
|
||||||
|
CURLOPT_CONNECTTIMEOUT => $connectTimeout,
|
||||||
|
CURLOPT_TIMEOUT => $timeout,
|
||||||
|
CURLOPT_USERAGENT => $userAgent,
|
||||||
|
CURLOPT_ENCODING => '',
|
||||||
|
CURLOPT_SSL_VERIFYPEER => true,
|
||||||
|
CURLOPT_SSL_VERIFYHOST => 2,
|
||||||
|
CURLOPT_HTTPHEADER => [
|
||||||
|
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
|
'Accept-Language: fr-FR,fr;q=0.9,en;q=0.8',
|
||||||
|
'Cache-Control: no-cache',
|
||||||
|
],
|
||||||
|
]);
|
||||||
|
|
||||||
|
return $handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
function index_vanilla_uex_fetch_single_body(string $url, string $userAgent, int $connectTimeout = 8, int $timeout = 18): ?string
|
||||||
|
{
|
||||||
|
$handle = index_vanilla_uex_create_handle($url, $userAgent, $connectTimeout, $timeout);
|
||||||
|
$body = curl_exec($handle);
|
||||||
|
$error = curl_error($handle);
|
||||||
|
$httpCode = (int) curl_getinfo($handle, CURLINFO_RESPONSE_CODE);
|
||||||
|
curl_close($handle);
|
||||||
|
|
||||||
|
if (!is_string($body) || $error !== '' || $httpCode < 200 || $httpCode >= 300 || trim($body) === '') {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $body;
|
||||||
|
}
|
||||||
|
|
||||||
function index_vanilla_uex_fetch_estimates(array $names, int $sampleLimit = 10): array
|
function index_vanilla_uex_fetch_estimates(array $names, int $sampleLimit = 10): array
|
||||||
{
|
{
|
||||||
$results = [];
|
$results = [];
|
||||||
@ -235,65 +365,63 @@ function index_vanilla_uex_fetch_estimates(array $names, int $sampleLimit = 10):
|
|||||||
return $results;
|
return $results;
|
||||||
}
|
}
|
||||||
|
|
||||||
$multiHandle = curl_multi_init();
|
|
||||||
$handles = [];
|
|
||||||
$userAgent = 'Mozilla/5.0 (compatible; FlatLogicVanillaDb/1.0; +https://uexcorp.space/)';
|
$userAgent = 'Mozilla/5.0 (compatible; FlatLogicVanillaDb/1.0; +https://uexcorp.space/)';
|
||||||
|
$batchSize = 4;
|
||||||
|
|
||||||
foreach ($uniqueNames as $name) {
|
foreach (array_chunk(array_values($uniqueNames), $batchSize) as $nameBatch) {
|
||||||
$url = 'https://uexcorp.space/search?q=' . rawurlencode($name);
|
$multiHandle = curl_multi_init();
|
||||||
$handle = curl_init();
|
$handles = [];
|
||||||
curl_setopt_array($handle, [
|
|
||||||
CURLOPT_URL => $url,
|
|
||||||
CURLOPT_RETURNTRANSFER => true,
|
|
||||||
CURLOPT_FOLLOWLOCATION => true,
|
|
||||||
CURLOPT_CONNECTTIMEOUT => 4,
|
|
||||||
CURLOPT_TIMEOUT => 8,
|
|
||||||
CURLOPT_USERAGENT => $userAgent,
|
|
||||||
CURLOPT_SSL_VERIFYPEER => true,
|
|
||||||
CURLOPT_SSL_VERIFYHOST => 2,
|
|
||||||
CURLOPT_HTTPHEADER => [
|
|
||||||
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
||||||
'Accept-Language: fr-FR,fr;q=0.9,en;q=0.8',
|
|
||||||
'Cache-Control: no-cache',
|
|
||||||
],
|
|
||||||
]);
|
|
||||||
|
|
||||||
curl_multi_add_handle($multiHandle, $handle);
|
foreach ($nameBatch as $name) {
|
||||||
$handles[$name] = $handle;
|
$url = 'https://uexcorp.space/search?q=' . rawurlencode($name);
|
||||||
}
|
$handle = index_vanilla_uex_create_handle($url, $userAgent, 6, 12);
|
||||||
|
curl_multi_add_handle($multiHandle, $handle);
|
||||||
$running = null;
|
$handles[$name] = [
|
||||||
do {
|
'handle' => $handle,
|
||||||
$status = curl_multi_exec($multiHandle, $running);
|
'url' => $url,
|
||||||
if ($running) {
|
|
||||||
curl_multi_select($multiHandle, 1.0);
|
|
||||||
}
|
|
||||||
} while ($running && $status === CURLM_OK);
|
|
||||||
|
|
||||||
foreach ($handles as $name => $handle) {
|
|
||||||
$error = curl_error($handle);
|
|
||||||
$httpCode = (int) curl_getinfo($handle, CURLINFO_RESPONSE_CODE);
|
|
||||||
$body = (string) curl_multi_getcontent($handle);
|
|
||||||
|
|
||||||
if ($error !== '' || $httpCode < 200 || $httpCode >= 300 || trim($body) === '') {
|
|
||||||
$results[$name] = [
|
|
||||||
'has_estimate' => false,
|
|
||||||
'average' => null,
|
|
||||||
'formatted' => 'Indisponible',
|
|
||||||
'sample_count' => 0,
|
|
||||||
'error' => true,
|
|
||||||
];
|
];
|
||||||
} else {
|
}
|
||||||
|
|
||||||
|
$running = null;
|
||||||
|
do {
|
||||||
|
$status = curl_multi_exec($multiHandle, $running);
|
||||||
|
if ($running) {
|
||||||
|
curl_multi_select($multiHandle, 1.0);
|
||||||
|
}
|
||||||
|
} while ($running && $status === CURLM_OK);
|
||||||
|
|
||||||
|
foreach ($handles as $name => $payload) {
|
||||||
|
$handle = $payload['handle'];
|
||||||
|
$url = $payload['url'];
|
||||||
|
$error = curl_error($handle);
|
||||||
|
$httpCode = (int) curl_getinfo($handle, CURLINFO_RESPONSE_CODE);
|
||||||
|
$body = (string) curl_multi_getcontent($handle);
|
||||||
|
|
||||||
|
curl_multi_remove_handle($multiHandle, $handle);
|
||||||
|
curl_close($handle);
|
||||||
|
|
||||||
|
if ($error !== '' || $httpCode < 200 || $httpCode >= 300 || trim($body) === '') {
|
||||||
|
$body = index_vanilla_uex_fetch_single_body($url, $userAgent) ?? '';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (trim($body) === '') {
|
||||||
|
$results[$name] = [
|
||||||
|
'has_estimate' => false,
|
||||||
|
'average' => null,
|
||||||
|
'formatted' => 'Indisponible',
|
||||||
|
'sample_count' => 0,
|
||||||
|
'error' => true,
|
||||||
|
];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
$results[$name] = index_vanilla_uex_parse_estimate_from_html($body, $name, $sampleLimit);
|
$results[$name] = index_vanilla_uex_parse_estimate_from_html($body, $name, $sampleLimit);
|
||||||
$results[$name]['error'] = false;
|
$results[$name]['error'] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_multi_remove_handle($multiHandle, $handle);
|
curl_multi_close($multiHandle);
|
||||||
curl_close($handle);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_multi_close($multiHandle);
|
|
||||||
|
|
||||||
return $results;
|
return $results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user