This commit is contained in:
Flatlogic Bot 2026-04-19 21:05:13 +00:00
parent 3565a88085
commit a4cb3a5abc
3 changed files with 396 additions and 140 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 404 KiB

View File

@ -84,34 +84,98 @@ function index_vanilla_uex_normalize_search_text(string $value): string
return trim((string) preg_replace('/\s+/u', ' ', $value)); return trim((string) preg_replace('/\s+/u', ' ', $value));
} }
function index_vanilla_uex_title_matches_query(string $title, string $queryName): bool function index_vanilla_uex_normalize_compact_search_text(string $value): string
{ {
$normalizedTitle = index_vanilla_uex_normalize_search_text($title); $value = function_exists('mb_strtolower')
$normalizedQuery = index_vanilla_uex_normalize_search_text($queryName); ? mb_strtolower($value, 'UTF-8')
: strtolower($value);
if ($normalizedTitle === '' || $normalizedQuery === '') { return trim((string) preg_replace('/[^[:alnum:]]+/u', '', $value));
return false; }
function index_vanilla_uex_tokenize_search_text(string $value): array
{
$normalizedValue = index_vanilla_uex_normalize_search_text($value);
if ($normalizedValue === '') {
return [];
} }
if (strpos($normalizedTitle, $normalizedQuery) !== false) { $tokens = array_values(array_filter(explode(' ', $normalizedValue), static function (string $token): bool {
return true; return preg_match('/\d/u', $token) || strlen($token) >= 2;
}
$queryTokens = array_values(array_filter(explode(' ', $normalizedQuery), static function (string $token): bool {
return preg_match('/\d/', $token) || strlen($token) >= 3;
})); }));
return array_values(array_unique($tokens));
}
function index_vanilla_uex_extract_wts_subject(string $title): string
{
$normalizedTitle = index_vanilla_uex_normalize_search_text($title);
if ($normalizedTitle === '') {
return '';
}
if (!preg_match('/\bwts\b/u', $normalizedTitle, $matches, PREG_OFFSET_CAPTURE)) {
return '';
}
$matchText = (string) ($matches[0][0] ?? 'wts');
$matchOffset = (int) ($matches[0][1] ?? 0);
$subject = trim(substr($normalizedTitle, $matchOffset + strlen($matchText)));
while ($subject !== '') {
$updated = trim((string) preg_replace('/^(?:wts|wtb|lts|ltb|sell|selling|trade|trading|for|offer|offering|looking|lf|want|wanted)\b[\s\-:]*/u', '', $subject));
if ($updated === $subject) {
break;
}
$subject = $updated;
}
$subject = trim((string) preg_replace('/^(?:[a-z]{1,4}\s+)?\d+(?:\s+\d+)*(?:\s*[\-:])\s*/u', '', $subject));
return $subject;
}
function index_vanilla_uex_title_has_wts_marker(string $title): bool
{
return index_vanilla_uex_extract_wts_subject($title) !== '';
}
function index_vanilla_uex_title_match_score(string $title, string $queryName): int
{
$normalizedTitle = index_vanilla_uex_extract_wts_subject($title);
$normalizedQuery = index_vanilla_uex_normalize_search_text($queryName);
$compactTitle = index_vanilla_uex_normalize_compact_search_text($normalizedTitle);
$compactQuery = index_vanilla_uex_normalize_compact_search_text($normalizedQuery);
if ($normalizedTitle === '' || $normalizedQuery === '' || $compactTitle === '' || $compactQuery === '') {
return 0;
}
if ($normalizedTitle === $normalizedQuery || $compactTitle === $compactQuery) {
return 3;
}
if (strpos($normalizedTitle, $normalizedQuery) !== false || strpos($compactTitle, $compactQuery) !== false) {
return 2;
}
$queryTokens = index_vanilla_uex_tokenize_search_text($normalizedQuery);
if ($queryTokens === []) { if ($queryTokens === []) {
return false; return 0;
} }
foreach ($queryTokens as $token) { foreach ($queryTokens as $token) {
if (strpos($normalizedTitle, $token) === false) { if (strpos($normalizedTitle, $token) === false) {
return false; return 0;
} }
} }
return true; return 1;
}
function index_vanilla_uex_title_matches_query(string $title, string $queryName): bool
{
return index_vanilla_uex_title_match_score($title, $queryName) > 0;
} }
function index_vanilla_uex_extract_price_value(string $rawPrice): ?int function index_vanilla_uex_extract_price_value(string $rawPrice): ?int
@ -168,20 +232,21 @@ function index_vanilla_uex_extract_price_value(string $rawPrice): ?int
function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryName, int $sampleLimit = 10): array function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryName, int $sampleLimit = 10): array
{ {
$values = []; $candidates = [];
$chunks = preg_split('/<div\s+class="search-row\b[^>]*>/i', $html) ?: []; $chunks = preg_split('/<div\s+class="search-row\b[^>]*>/i', $html) ?: [];
foreach ($chunks as $chunk) { foreach ($chunks as $chunk) {
if (count($values) >= $sampleLimit) {
break;
}
if (!preg_match('/<a\b[^>]*class="text-bold"[^>]*>(.*?)<\/a>/is', $chunk, $titleMatches)) { if (!preg_match('/<a\b[^>]*class="text-bold"[^>]*>(.*?)<\/a>/is', $chunk, $titleMatches)) {
continue; continue;
} }
$title = index_vanilla_uex_normalize_whitespace((string) ($titleMatches[1] ?? '')); $title = index_vanilla_uex_normalize_whitespace((string) ($titleMatches[1] ?? ''));
if ($title === '' || !preg_match('/^WTS\b/i', $title) || !index_vanilla_uex_title_matches_query($title, $queryName)) { if ($title === '' || !index_vanilla_uex_title_has_wts_marker($title)) {
continue;
}
$matchScore = index_vanilla_uex_title_match_score($title, $queryName);
if ($matchScore <= 0) {
continue; continue;
} }
@ -194,7 +259,34 @@ function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryN
continue; continue;
} }
$values[] = $priceValue; $candidates[] = [
'price' => $priceValue,
'score' => $matchScore,
];
}
if ($candidates === []) {
return [
'has_estimate' => false,
'average' => null,
'formatted' => '—',
'sample_count' => 0,
];
}
$bestScore = max(array_column($candidates, 'score'));
$minimumAcceptedScore = $bestScore >= 3 ? 2 : $bestScore;
$values = [];
foreach ($candidates as $candidate) {
if (($candidate['score'] ?? 0) < $minimumAcceptedScore) {
continue;
}
$values[] = (int) $candidate['price'];
if (count($values) >= $sampleLimit) {
break;
}
} }
if ($values === []) { if ($values === []) {
@ -216,6 +308,44 @@ function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryN
]; ];
} }
function index_vanilla_uex_create_handle(string $url, string $userAgent, int $connectTimeout, int $timeout)
{
$handle = curl_init();
curl_setopt_array($handle, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_CONNECTTIMEOUT => $connectTimeout,
CURLOPT_TIMEOUT => $timeout,
CURLOPT_USERAGENT => $userAgent,
CURLOPT_ENCODING => '',
CURLOPT_SSL_VERIFYPEER => true,
CURLOPT_SSL_VERIFYHOST => 2,
CURLOPT_HTTPHEADER => [
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language: fr-FR,fr;q=0.9,en;q=0.8',
'Cache-Control: no-cache',
],
]);
return $handle;
}
function index_vanilla_uex_fetch_single_body(string $url, string $userAgent, int $connectTimeout = 8, int $timeout = 18): ?string
{
$handle = index_vanilla_uex_create_handle($url, $userAgent, $connectTimeout, $timeout);
$body = curl_exec($handle);
$error = curl_error($handle);
$httpCode = (int) curl_getinfo($handle, CURLINFO_RESPONSE_CODE);
curl_close($handle);
if (!is_string($body) || $error !== '' || $httpCode < 200 || $httpCode >= 300 || trim($body) === '') {
return null;
}
return $body;
}
function index_vanilla_uex_fetch_estimates(array $names, int $sampleLimit = 10): array function index_vanilla_uex_fetch_estimates(array $names, int $sampleLimit = 10): array
{ {
$results = []; $results = [];
@ -234,65 +364,63 @@ function index_vanilla_uex_fetch_estimates(array $names, int $sampleLimit = 10):
return $results; return $results;
} }
$multiHandle = curl_multi_init();
$handles = [];
$userAgent = 'Mozilla/5.0 (compatible; FlatLogicVanillaDb/1.0; +https://uexcorp.space/)'; $userAgent = 'Mozilla/5.0 (compatible; FlatLogicVanillaDb/1.0; +https://uexcorp.space/)';
$batchSize = 4;
foreach ($uniqueNames as $name) { foreach (array_chunk(array_values($uniqueNames), $batchSize) as $nameBatch) {
$url = 'https://uexcorp.space/search?q=' . rawurlencode($name); $multiHandle = curl_multi_init();
$handle = curl_init(); $handles = [];
curl_setopt_array($handle, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_CONNECTTIMEOUT => 4,
CURLOPT_TIMEOUT => 8,
CURLOPT_USERAGENT => $userAgent,
CURLOPT_SSL_VERIFYPEER => true,
CURLOPT_SSL_VERIFYHOST => 2,
CURLOPT_HTTPHEADER => [
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language: fr-FR,fr;q=0.9,en;q=0.8',
'Cache-Control: no-cache',
],
]);
curl_multi_add_handle($multiHandle, $handle); foreach ($nameBatch as $name) {
$handles[$name] = $handle; $url = 'https://uexcorp.space/search?q=' . rawurlencode($name);
} $handle = index_vanilla_uex_create_handle($url, $userAgent, 6, 12);
curl_multi_add_handle($multiHandle, $handle);
$running = null; $handles[$name] = [
do { 'handle' => $handle,
$status = curl_multi_exec($multiHandle, $running); 'url' => $url,
if ($running) {
curl_multi_select($multiHandle, 1.0);
}
} while ($running && $status === CURLM_OK);
foreach ($handles as $name => $handle) {
$error = curl_error($handle);
$httpCode = (int) curl_getinfo($handle, CURLINFO_RESPONSE_CODE);
$body = (string) curl_multi_getcontent($handle);
if ($error !== '' || $httpCode < 200 || $httpCode >= 300 || trim($body) === '') {
$results[$name] = [
'has_estimate' => false,
'average' => null,
'formatted' => 'Indisponible',
'sample_count' => 0,
'error' => true,
]; ];
} else { }
$running = null;
do {
$status = curl_multi_exec($multiHandle, $running);
if ($running) {
curl_multi_select($multiHandle, 1.0);
}
} while ($running && $status === CURLM_OK);
foreach ($handles as $name => $payload) {
$handle = $payload['handle'];
$url = $payload['url'];
$error = curl_error($handle);
$httpCode = (int) curl_getinfo($handle, CURLINFO_RESPONSE_CODE);
$body = (string) curl_multi_getcontent($handle);
curl_multi_remove_handle($multiHandle, $handle);
curl_close($handle);
if ($error !== '' || $httpCode < 200 || $httpCode >= 300 || trim($body) === '') {
$body = index_vanilla_uex_fetch_single_body($url, $userAgent) ?? '';
}
if (trim($body) === '') {
$results[$name] = [
'has_estimate' => false,
'average' => null,
'formatted' => 'Indisponible',
'sample_count' => 0,
'error' => true,
];
continue;
}
$results[$name] = index_vanilla_uex_parse_estimate_from_html($body, $name, $sampleLimit); $results[$name] = index_vanilla_uex_parse_estimate_from_html($body, $name, $sampleLimit);
$results[$name]['error'] = false; $results[$name]['error'] = false;
} }
curl_multi_remove_handle($multiHandle, $handle); curl_multi_close($multiHandle);
curl_close($handle);
} }
curl_multi_close($multiHandle);
return $results; return $results;
} }

268
index.php
View File

@ -85,34 +85,98 @@ function index_vanilla_uex_normalize_search_text(string $value): string
return trim((string) preg_replace('/\s+/u', ' ', $value)); return trim((string) preg_replace('/\s+/u', ' ', $value));
} }
function index_vanilla_uex_title_matches_query(string $title, string $queryName): bool function index_vanilla_uex_normalize_compact_search_text(string $value): string
{ {
$normalizedTitle = index_vanilla_uex_normalize_search_text($title); $value = function_exists('mb_strtolower')
$normalizedQuery = index_vanilla_uex_normalize_search_text($queryName); ? mb_strtolower($value, 'UTF-8')
: strtolower($value);
if ($normalizedTitle === '' || $normalizedQuery === '') { return trim((string) preg_replace('/[^[:alnum:]]+/u', '', $value));
return false; }
function index_vanilla_uex_tokenize_search_text(string $value): array
{
$normalizedValue = index_vanilla_uex_normalize_search_text($value);
if ($normalizedValue === '') {
return [];
} }
if (strpos($normalizedTitle, $normalizedQuery) !== false) { $tokens = array_values(array_filter(explode(' ', $normalizedValue), static function (string $token): bool {
return true; return preg_match('/\d/u', $token) || strlen($token) >= 2;
}
$queryTokens = array_values(array_filter(explode(' ', $normalizedQuery), static function (string $token): bool {
return preg_match('/\d/', $token) || strlen($token) >= 3;
})); }));
return array_values(array_unique($tokens));
}
function index_vanilla_uex_extract_wts_subject(string $title): string
{
$normalizedTitle = index_vanilla_uex_normalize_search_text($title);
if ($normalizedTitle === '') {
return '';
}
if (!preg_match('/\bwts\b/u', $normalizedTitle, $matches, PREG_OFFSET_CAPTURE)) {
return '';
}
$matchText = (string) ($matches[0][0] ?? 'wts');
$matchOffset = (int) ($matches[0][1] ?? 0);
$subject = trim(substr($normalizedTitle, $matchOffset + strlen($matchText)));
while ($subject !== '') {
$updated = trim((string) preg_replace('/^(?:wts|wtb|lts|ltb|sell|selling|trade|trading|for|offer|offering|looking|lf|want|wanted)\b[\s\-:]*/u', '', $subject));
if ($updated === $subject) {
break;
}
$subject = $updated;
}
$subject = trim((string) preg_replace('/^(?:[a-z]{1,4}\s+)?\d+(?:\s+\d+)*(?:\s*[\-:])\s*/u', '', $subject));
return $subject;
}
function index_vanilla_uex_title_has_wts_marker(string $title): bool
{
return index_vanilla_uex_extract_wts_subject($title) !== '';
}
function index_vanilla_uex_title_match_score(string $title, string $queryName): int
{
$normalizedTitle = index_vanilla_uex_extract_wts_subject($title);
$normalizedQuery = index_vanilla_uex_normalize_search_text($queryName);
$compactTitle = index_vanilla_uex_normalize_compact_search_text($normalizedTitle);
$compactQuery = index_vanilla_uex_normalize_compact_search_text($normalizedQuery);
if ($normalizedTitle === '' || $normalizedQuery === '' || $compactTitle === '' || $compactQuery === '') {
return 0;
}
if ($normalizedTitle === $normalizedQuery || $compactTitle === $compactQuery) {
return 3;
}
if (strpos($normalizedTitle, $normalizedQuery) !== false || strpos($compactTitle, $compactQuery) !== false) {
return 2;
}
$queryTokens = index_vanilla_uex_tokenize_search_text($normalizedQuery);
if ($queryTokens === []) { if ($queryTokens === []) {
return false; return 0;
} }
foreach ($queryTokens as $token) { foreach ($queryTokens as $token) {
if (strpos($normalizedTitle, $token) === false) { if (strpos($normalizedTitle, $token) === false) {
return false; return 0;
} }
} }
return true; return 1;
}
function index_vanilla_uex_title_matches_query(string $title, string $queryName): bool
{
return index_vanilla_uex_title_match_score($title, $queryName) > 0;
} }
function index_vanilla_uex_extract_price_value(string $rawPrice): ?int function index_vanilla_uex_extract_price_value(string $rawPrice): ?int
@ -169,20 +233,21 @@ function index_vanilla_uex_extract_price_value(string $rawPrice): ?int
function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryName, int $sampleLimit = 10): array function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryName, int $sampleLimit = 10): array
{ {
$values = []; $candidates = [];
$chunks = preg_split('/<div\s+class="search-row\b[^>]*>/i', $html) ?: []; $chunks = preg_split('/<div\s+class="search-row\b[^>]*>/i', $html) ?: [];
foreach ($chunks as $chunk) { foreach ($chunks as $chunk) {
if (count($values) >= $sampleLimit) {
break;
}
if (!preg_match('/<a\b[^>]*class="text-bold"[^>]*>(.*?)<\/a>/is', $chunk, $titleMatches)) { if (!preg_match('/<a\b[^>]*class="text-bold"[^>]*>(.*?)<\/a>/is', $chunk, $titleMatches)) {
continue; continue;
} }
$title = index_vanilla_uex_normalize_whitespace((string) ($titleMatches[1] ?? '')); $title = index_vanilla_uex_normalize_whitespace((string) ($titleMatches[1] ?? ''));
if ($title === '' || !preg_match('/^WTS\b/i', $title) || !index_vanilla_uex_title_matches_query($title, $queryName)) { if ($title === '' || !index_vanilla_uex_title_has_wts_marker($title)) {
continue;
}
$matchScore = index_vanilla_uex_title_match_score($title, $queryName);
if ($matchScore <= 0) {
continue; continue;
} }
@ -195,7 +260,34 @@ function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryN
continue; continue;
} }
$values[] = $priceValue; $candidates[] = [
'price' => $priceValue,
'score' => $matchScore,
];
}
if ($candidates === []) {
return [
'has_estimate' => false,
'average' => null,
'formatted' => '—',
'sample_count' => 0,
];
}
$bestScore = max(array_column($candidates, 'score'));
$minimumAcceptedScore = $bestScore >= 3 ? 2 : $bestScore;
$values = [];
foreach ($candidates as $candidate) {
if (($candidate['score'] ?? 0) < $minimumAcceptedScore) {
continue;
}
$values[] = (int) $candidate['price'];
if (count($values) >= $sampleLimit) {
break;
}
} }
if ($values === []) { if ($values === []) {
@ -217,6 +309,44 @@ function index_vanilla_uex_parse_estimate_from_html(string $html, string $queryN
]; ];
} }
function index_vanilla_uex_create_handle(string $url, string $userAgent, int $connectTimeout, int $timeout)
{
$handle = curl_init();
curl_setopt_array($handle, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_CONNECTTIMEOUT => $connectTimeout,
CURLOPT_TIMEOUT => $timeout,
CURLOPT_USERAGENT => $userAgent,
CURLOPT_ENCODING => '',
CURLOPT_SSL_VERIFYPEER => true,
CURLOPT_SSL_VERIFYHOST => 2,
CURLOPT_HTTPHEADER => [
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language: fr-FR,fr;q=0.9,en;q=0.8',
'Cache-Control: no-cache',
],
]);
return $handle;
}
function index_vanilla_uex_fetch_single_body(string $url, string $userAgent, int $connectTimeout = 8, int $timeout = 18): ?string
{
$handle = index_vanilla_uex_create_handle($url, $userAgent, $connectTimeout, $timeout);
$body = curl_exec($handle);
$error = curl_error($handle);
$httpCode = (int) curl_getinfo($handle, CURLINFO_RESPONSE_CODE);
curl_close($handle);
if (!is_string($body) || $error !== '' || $httpCode < 200 || $httpCode >= 300 || trim($body) === '') {
return null;
}
return $body;
}
function index_vanilla_uex_fetch_estimates(array $names, int $sampleLimit = 10): array function index_vanilla_uex_fetch_estimates(array $names, int $sampleLimit = 10): array
{ {
$results = []; $results = [];
@ -235,65 +365,63 @@ function index_vanilla_uex_fetch_estimates(array $names, int $sampleLimit = 10):
return $results; return $results;
} }
$multiHandle = curl_multi_init();
$handles = [];
$userAgent = 'Mozilla/5.0 (compatible; FlatLogicVanillaDb/1.0; +https://uexcorp.space/)'; $userAgent = 'Mozilla/5.0 (compatible; FlatLogicVanillaDb/1.0; +https://uexcorp.space/)';
$batchSize = 4;
foreach ($uniqueNames as $name) { foreach (array_chunk(array_values($uniqueNames), $batchSize) as $nameBatch) {
$url = 'https://uexcorp.space/search?q=' . rawurlencode($name); $multiHandle = curl_multi_init();
$handle = curl_init(); $handles = [];
curl_setopt_array($handle, [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_CONNECTTIMEOUT => 4,
CURLOPT_TIMEOUT => 8,
CURLOPT_USERAGENT => $userAgent,
CURLOPT_SSL_VERIFYPEER => true,
CURLOPT_SSL_VERIFYHOST => 2,
CURLOPT_HTTPHEADER => [
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language: fr-FR,fr;q=0.9,en;q=0.8',
'Cache-Control: no-cache',
],
]);
curl_multi_add_handle($multiHandle, $handle); foreach ($nameBatch as $name) {
$handles[$name] = $handle; $url = 'https://uexcorp.space/search?q=' . rawurlencode($name);
} $handle = index_vanilla_uex_create_handle($url, $userAgent, 6, 12);
curl_multi_add_handle($multiHandle, $handle);
$running = null; $handles[$name] = [
do { 'handle' => $handle,
$status = curl_multi_exec($multiHandle, $running); 'url' => $url,
if ($running) {
curl_multi_select($multiHandle, 1.0);
}
} while ($running && $status === CURLM_OK);
foreach ($handles as $name => $handle) {
$error = curl_error($handle);
$httpCode = (int) curl_getinfo($handle, CURLINFO_RESPONSE_CODE);
$body = (string) curl_multi_getcontent($handle);
if ($error !== '' || $httpCode < 200 || $httpCode >= 300 || trim($body) === '') {
$results[$name] = [
'has_estimate' => false,
'average' => null,
'formatted' => 'Indisponible',
'sample_count' => 0,
'error' => true,
]; ];
} else { }
$running = null;
do {
$status = curl_multi_exec($multiHandle, $running);
if ($running) {
curl_multi_select($multiHandle, 1.0);
}
} while ($running && $status === CURLM_OK);
foreach ($handles as $name => $payload) {
$handle = $payload['handle'];
$url = $payload['url'];
$error = curl_error($handle);
$httpCode = (int) curl_getinfo($handle, CURLINFO_RESPONSE_CODE);
$body = (string) curl_multi_getcontent($handle);
curl_multi_remove_handle($multiHandle, $handle);
curl_close($handle);
if ($error !== '' || $httpCode < 200 || $httpCode >= 300 || trim($body) === '') {
$body = index_vanilla_uex_fetch_single_body($url, $userAgent) ?? '';
}
if (trim($body) === '') {
$results[$name] = [
'has_estimate' => false,
'average' => null,
'formatted' => 'Indisponible',
'sample_count' => 0,
'error' => true,
];
continue;
}
$results[$name] = index_vanilla_uex_parse_estimate_from_html($body, $name, $sampleLimit); $results[$name] = index_vanilla_uex_parse_estimate_from_html($body, $name, $sampleLimit);
$results[$name]['error'] = false; $results[$name]['error'] = false;
} }
curl_multi_remove_handle($multiHandle, $handle); curl_multi_close($multiHandle);
curl_close($handle);
} }
curl_multi_close($multiHandle);
return $results; return $results;
} }