"growing-out-of-the-shadows", "icon"=>"βš›οΈ", "title"=>"Growing Out of the Shadows"], ["id"=>"sheep", "icon"=>"πŸ‘", "title"=>"Sheep / Laegna Counters"], ["id"=>"infinity", "icon"=>"♾️", "title"=>"Infinity"], ["id"=>"natura", "icon"=>"πŸŒ€", "title"=>"Natura"], ["id"=>"sunrise", "icon"=>"πŸ”†", "title"=>"Sunrise"], ["id"=>"bigbang", "icon"=>"πŸ’₯", "title"=>"Bigbang"], ["id"=>"yggdrasill", "icon"=>"🌳", "title"=>"Yggdrasill"], ["id"=>"spiritrise", "icon"=>"πŸ”·", "title"=>"Spiritrise"], ["id"=>"laelab", "icon"=>"πŸ”¬", "title"=>"LaeLab"], ["id"=>"geneticar", "icon"=>"🧬", "title"=>"Geneticar"], ["id"=>"handheldcal", "icon"=>"πŸ–©", "title"=>"HandheldCal"], ["id"=>"puzzled", "icon"=>"🧩", "title"=>"Puzzled"], ["id"=>"chakra", "icon"=>"✴️", "title"=>"Chakra"], ["id"=>"wheelsgoround", "icon"=>"βš™οΈ", "title"=>"Wheels Go Round"], ["id"=>"laemedics", "icon"=>"🧘", "title"=>"LaeMedics"], ["id"=>"coffeeandcigarettes", "icon"=>"β˜•", "title"=>"Coffee and Cigarettes"], ]; $html = fetch_source($source); if ($html === null) { http_response_code(502); echo json_encode(["success"=>false, "error"=>"Could not fetch source page"], JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); exit; } $positions = []; foreach ($ids as $item) { $pattern = '~\bid\s*=\s*["\x27]' . preg_quote($item["id"], '~') . '["\x27]~i'; if (preg_match($pattern, $html, $m, PREG_OFFSET_CAPTURE)) { $item["pos"] = $m[0][1]; $positions[] = $item; } } usort($positions, fn($a, $b) => $a["pos"] <=> $b["pos"]); $sections = []; $totalLinks = 0; $utilityChrome = []; foreach ($positions as $idx => $item) { $start = (int)$item["pos"]; $end = isset($positions[$idx + 1]) ? (int)$positions[$idx + 1]["pos"] : strlen($html); $chunk = substr($html, $start, $end - $start); $ownedLinks = []; if ($item["id"] === "coffeeandcigarettes") { // Coffee is terminal prose. Links after it are footer/fixed/absolute utility chrome, // not branch links owned by the Coffee icon. $utilityChrome = parse_visible_links($chunk, $source); } else { $ownedLinks = parse_visible_links($chunk, $source); $totalLinks += count($ownedLinks); } $layout = $sectionLayout[$item["id"]] ?? null; $sections[] = [ "id" => $item["id"], "icon" => $item["icon"], "title" => $item["title"], "source_anchor" => "#" . $item["id"], "owns_until" => $item["id"] === "coffeeandcigarettes" ? "utility chrome" : (isset($positions[$idx + 1]) ? "#" . $positions[$idx + 1]["id"] : "end"), "layout" => $layout, "excerpt" => unicode_limit(clean_text($chunk), 420), "link_count" => count($ownedLinks), "links" => $ownedLinks, ]; } $shadowBonus = [ ["icon"=>"🎭", "title"=>"Dancing Shadows", "href"=>"https://material-psychic-gam-8mo3.bolt.host/"], ["icon"=>"πŸ“˜", "title"=>"Book of Shadows 2", "href"=>"https://app-bxfrqbbqegap.appmedo.com/"], ["icon"=>"πŸ“•", "title"=>"Bulk Load Book of the Dead 3", "href"=>"https://app-by9gm7mu9ssh.appmedo.com/"], ]; echo json_encode([ "success" => true, "source" => $source, "scanned_at" => gmdate("Y-m-d H:i:s") . " UTC", "model" => "CSS-aware: first 100vh console and fixed/absolute utility chrome are separated; document-flow icon owners keep links until the next meaningful icon/title; Coffee is terminal prose and owns no branch links.", "layout_zones" => $layoutZones ?? [], "section_count" => count($sections), "link_count" => $totalLinks, "utility_count" => count($utilityChrome), "utility_chrome" => $utilityChrome, "shadow_bonus" => $shadowBonus, "sections" => $sections, ], JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); function fetch_source(string $url): ?string { $context = stream_context_create([ "http" => [ "timeout" => 18, "header" => "User-Agent: LandScaper structural scanner\r\n", ], ]); $html = @file_get_contents($url, false, $context); return is_string($html) && $html !== "" ? $html : null; } function parse_visible_links(string $chunk, string $base): array { $links = []; if (!preg_match_all("~]*)>(.*?)~is", $chunk, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) { return $links; } foreach ($matches as $match) { $attrs = $match[1][0] ?? ""; $inner = $match[2][0] ?? ""; $offset = (int)($match[0][1] ?? 0); if (is_hidden_anchor($attrs)) { continue; } $href = attr_value($attrs, "href"); $label = visible_label($attrs, $inner); if ($href === "" || $label === "") { continue; } $absolute = absolute_url($href, $base); $reason = skip_reason($href, $absolute); if ($reason !== "") { continue; } $host = strtolower((string)(parse_url($absolute, PHP_URL_HOST) ?: "")); $category = categorize_link($absolute, $label); $links[] = [ "text" => unicode_limit($label, 150), "href" => $absolute, "host" => $host, "category" => $category, "category_label" => category_label($category), "context" => context_hint($chunk, $offset), "layout_hint" => layout_hint($attrs, $chunk, $offset), "safety" => safety_class($host), ]; } return $links; } function context_hint(string $chunk, int $offset): string { $prefix = substr($chunk, max(0, $offset - 1800), min(1800, $offset)); if (!preg_match_all('~<(article|section|blockquote|div|p|big|center)\b([^>]*)>~i', $prefix, $tags, PREG_SET_ORDER)) { return 'flow text'; } $tag = end($tags); $name = strtolower($tag[1] ?? 'flow'); $attrs = $tag[2] ?? ''; $id = attr_value($attrs, 'id'); $class = attr_value($attrs, 'class'); $label = $name; if ($id !== '') $label .= '#' . $id; if ($class !== '') $label .= '.' . preg_replace('/\s+/', '.', trim($class)); return $label; } function layout_hint(string $attrs, string $chunk, int $offset): string { $style = strtolower(attr_value($attrs, 'style')); $class = strtolower(attr_value($attrs, 'class')); $prefix = strtolower(substr($chunk, max(0, $offset - 1300), min(1300, $offset))); $haystack = $style . ' ' . $class . ' ' . $prefix; if (str_contains($haystack, 'position: fixed') || str_contains($haystack, 'position:fixed')) return 'fixed chrome'; if (str_contains($haystack, 'position: absolute') || str_contains($haystack, 'position:absolute') || str_contains($haystack, 'calc(100vh')) return 'absolute-positioned chrome'; if (str_contains($haystack, 'position: relative') || str_contains($haystack, 'position:relative')) return 'relative positioned block'; if (str_contains($haystack, ']*>.*?~is", " ", $inner) ?? $inner; $text = html_entity_decode(strip_tags($cleanInner), ENT_QUOTES | ENT_HTML5, "UTF-8"); $text = trim(preg_replace("/\s+/u", " ", $text) ?? ""); if ($text !== "") return $text; foreach (["aria-label", "title", "alt"] as $attr) { $candidate = attr_value($attrs, $attr); if ($candidate !== "") return trim(preg_replace("/\s+/u", " ", $candidate) ?? ""); } return ""; } function skip_reason(string $href, string $absolute): string { $hrefLower = strtolower(trim($href)); if ($hrefLower === "" || $hrefLower[0] === "#") return "fragment"; foreach (["javascript:", "mailto:", "tel:", "data:", "blob:"] as $scheme) { if (str_starts_with($hrefLower, $scheme)) return "scheme"; } if (!preg_match("~^https?://~i", $absolute)) return "scheme"; $path = (string)(parse_url($absolute, PHP_URL_PATH) ?: ""); if (preg_match("~\.(?:js|mjs|css|map|png|jpe?g|gif|svg|webp|ico|woff2?|ttf|eot)(?:$|[?#])~i", $path)) return "asset"; return ""; } function absolute_url(string $href, string $base): string { $href = html_entity_decode(trim($href), ENT_QUOTES | ENT_HTML5, "UTF-8"); if (preg_match("~^https?://~i", $href)) return $href; if (str_starts_with($href, "//")) return "https:" . $href; $parts = parse_url($base); $scheme = $parts["scheme"] ?? "https"; $host = $parts["host"] ?? "spireason.neocities.org"; if (str_starts_with($href, "/")) return $scheme . "://" . $host . $href; $basePath = $parts["path"] ?? "/"; $dir = rtrim(dirname($basePath), "/"); if ($dir === "") $dir = "/"; return $scheme . "://" . $host . rtrim($dir, "/") . "/" . str_replace(" ", "%20", $href); } function clean_text(string $html): string { $html = preg_replace("~<(script|style)\b[^>]*>.*?~is", " ", $html) ?? $html; $text = html_entity_decode(strip_tags($html), ENT_QUOTES | ENT_HTML5, "UTF-8"); $text = preg_replace("~\bid=\"[^\"]+\"\s*>~u", " ", $text) ?? $text; $text = preg_replace("~[#.][A-Za-z0-9_-][^{}]{0,220}\\{[^{}]*\\}~u", " ", $text) ?? $text; $text = preg_replace("~\b(?:font|color|background|padding|margin|position|display|border|box-shadow|width|height|transform|opacity|z-index|transition)[^.;]{0,180}[.;]~iu", " ", $text) ?? $text; $text = preg_replace("~/\\*.*?\\*/~s", " ", $text) ?? $text; $text = preg_replace("~@media[^{}]*\\{\\s*\\}~u", " ", $text) ?? $text; return trim(preg_replace("/\s+/u", " ", $text) ?? ""); } function categorize_link(string $url, string $label): string { $host = strtolower((string)(parse_url($url, PHP_URL_HOST) ?: "")); $path = strtolower((string)(parse_url($url, PHP_URL_PATH) ?: "")); $haystack = strtolower($url . " " . $label); if (str_ends_with($path, ".pdf")) return "pdf"; if (str_ends_with($path, ".zip")) return "archive"; if (str_ends_with($path, ".json") || str_contains($path, "numberdatabase")) return "dataset"; if (str_ends_with($path, ".py") || str_contains($haystack, "script") || str_contains($haystack, "github")) return "source"; if (str_contains($haystack, "frequency") || str_contains($haystack, "frequential") || str_contains($haystack, "octave") || str_contains($haystack, "calculator") || str_contains($haystack, "counter")) return "tool"; if (str_contains($haystack, "bot") || str_contains($host, "perplexity.ai")) return "bot"; if (str_contains($host, "github")) return "github"; if (str_contains($path, "apples") || str_contains($haystack, "app") || str_contains($host, "lovable.app") || str_contains($host, "bolt.host") || str_contains($host, "appmedo.com")) return "applet"; if (str_contains($host, "prezi") || str_contains($host, "docs.google")) return "presentation"; if (str_contains($host, "youtube") || str_contains($host, "notion")) return "media"; if ($host !== "spireason.neocities.org") return "external"; return "onsite"; } function category_label(string $category): string { return [ "pdf" => "PDF text", "archive" => "Archive", "dataset" => "Dataset", "source" => "Source file", "tool" => "Tool", "bot" => "External bot/system", "github" => "Repository", "applet" => "Applet", "presentation" => "Presentation", "media" => "Media", "external" => "External system", "onsite" => "On-site branch", ][$category] ?? ucfirst(str_replace("-", " ", $category)); } function safety_class(string $host): string { if ($host === "spireason.neocities.org") return "source"; $knownHosts = [ "laegna.notaku.site", "prezi.com", "www.perplexity.ai", "huggingface.co", "assorted-canopy-961.notion.site", "www.youtube.com", "youtube.com", "github.com", "tambetvali.github.io", "archive.org", ]; foreach ($knownHosts as $knownHost) { if ($host === $knownHost || str_ends_with($host, "." . $knownHost)) return "known-external"; } return "review"; } function unicode_limit(string $text, int $limit): string { if (preg_match_all("/./us", $text, $chars) && count($chars[0]) > $limit) { return implode("", array_slice($chars[0], 0, max(0, $limit - 1))) . "…"; } return $text; }