From 7957a709851b3b73a9a1dc308399ce2ccaf1c7d4 Mon Sep 17 00:00:00 2001 From: Flatlogic Bot Date: Wed, 15 Apr 2026 01:46:21 +0000 Subject: [PATCH] Revert to version d67465d --- backend/.env | 3 - backend/src/services/firecrawl.js | 325 +--- backend/src/services/siteEntitlements.js | 4 +- backend/src/services/sites.js | 440 +---- frontend/src/components/Logo/index.tsx | 11 +- frontend/src/helpers/siteEntitlements.ts | 2 +- frontend/src/pages/profile.tsx | 9 +- frontend/src/pages/sites/analyzer.tsx | 2102 +++++----------------- 8 files changed, 491 insertions(+), 2405 deletions(-) diff --git a/backend/.env b/backend/.env index c8a42c3..ae9b07a 100644 --- a/backend/.env +++ b/backend/.env @@ -12,6 +12,3 @@ EMAIL_USER=AKIAVEW7G4PQUBGM52OF EMAIL_PASS=BLnD4hKGb6YkSz3gaQrf8fnyLi3C3/EdjOOsLEDTDPTz SECRET_KEY=HUEyqESqgQ1yTwzVlO6wprC9Kf1J1xuA PEXELS_KEY=Vc99rnmOhHhJAbgGQoKLZtsaIVfkeownoQNbTj78VemUjKh08ZYRbf18 -FIRECRAWL_API_KEY=fc-409763513f6c458c9d1d09e460346b17 -FIRECRAWL_BASE_URL=https://api.firecrawl.dev/v2 -FIRECRAWL_ENABLED=true diff --git a/backend/src/services/firecrawl.js b/backend/src/services/firecrawl.js index 75e712c..8a5fe02 100644 --- a/backend/src/services/firecrawl.js +++ b/backend/src/services/firecrawl.js @@ -1,49 +1,4 @@ -const fs = require('fs'); -const path = require('path'); -const axios = require('axios'); - -const FIRECRAWL_DEFAULT_BASE_URL = 'https://api.firecrawl.dev/v2'; -const FIRECRAWL_DEFAULT_POLL_INTERVAL_MS = 2000; -const FIRECRAWL_DEFAULT_TIMEOUT_MS = 45000; - -const BACKEND_ENV_PATH = path.join(__dirname, '..', '..', '.env'); - -function readBackendEnvFile() { - try { - const raw = fs.readFileSync(BACKEND_ENV_PATH, 'utf8'); - - return raw.split(/\r?\n/).reduce((accumulator, line) => { - const trimmedLine = line.trim(); - - if (!trimmedLine || trimmedLine.startsWith('#')) { - return accumulator; - } - - const separatorIndex = trimmedLine.indexOf('='); - - if (separatorIndex === -1) { - return accumulator; - } - - const key = trimmedLine.slice(0, separatorIndex).trim(); - const value = trimmedLine.slice(separatorIndex + 1).trim(); - - accumulator[key] = value.replace(/^"|"$/g, '').replace(/^'|'$/g, ''); - return accumulator; - }, {}); - } catch (error) { - return {}; - } -} - -function getEnvValue(name) { - if (process.env[name] !== undefined && process.env[name] !== null && process.env[name] !== '') { - return process.env[name]; - } - - return readBackendEnvFile()[name]; -} - +const FIRECRAWL_DEFAULT_BASE_URL = 'https://api.firecrawl.dev/v1'; function toBoolean(value, defaultValue = false) { if (value === undefined || value === null || value === '') { @@ -67,34 +22,12 @@ function toBoolean(value, defaultValue = false) { return defaultValue; } -function toPositiveInteger(value, defaultValue) { - const parsed = Number(value); - - if (Number.isInteger(parsed) && parsed > 0) { - return parsed; - } - - return defaultValue; -} - -function normalizeBaseUrl(baseUrl) { - return String(baseUrl || FIRECRAWL_DEFAULT_BASE_URL) - .trim() - .replace(/\/+$/, ''); -} - function getFirecrawlRuntime() { - const apiKey = String(getEnvValue('FIRECRAWL_API_KEY') || '').trim(); - const baseUrl = normalizeBaseUrl(getEnvValue('FIRECRAWL_BASE_URL')); - const enabled = toBoolean(getEnvValue('FIRECRAWL_ENABLED'), true); - const pollIntervalMs = toPositiveInteger( - getEnvValue('FIRECRAWL_POLL_INTERVAL_MS'), - FIRECRAWL_DEFAULT_POLL_INTERVAL_MS, - ); - const timeoutMs = toPositiveInteger( - getEnvValue('FIRECRAWL_TIMEOUT_MS'), - FIRECRAWL_DEFAULT_TIMEOUT_MS, - ); + const apiKey = String(process.env.FIRECRAWL_API_KEY || '').trim(); + const baseUrl = String( + process.env.FIRECRAWL_BASE_URL || FIRECRAWL_DEFAULT_BASE_URL, + ).trim(); + const enabled = toBoolean(process.env.FIRECRAWL_ENABLED, true); return { provider: 'firecrawl', @@ -102,255 +35,29 @@ function getFirecrawlRuntime() { enabled, configured: Boolean(apiKey), hasApiKey: Boolean(apiKey), - apiKey, - pollIntervalMs, - timeoutMs, - mode: enabled && apiKey ? 'active' : 'scaffold_only', + mode: 'scaffold_only', }; } -function buildFirecrawlMessage(runtime, entitlements, requestedPages) { - if (!entitlements?.canAdvancedCrawl) { - return 'Firecrawl is reserved for paid Advanced Crawl users. This request will stay on the built-in crawler.'; - } - - if (!runtime.enabled) { - return 'Firecrawl is configured in code, but FIRECRAWL_ENABLED is turned off. Paid users will stay on the built-in crawler until it is enabled.'; - } - - if (!runtime.configured) { - return 'Firecrawl is enabled for paid users, but FIRECRAWL_API_KEY is missing. Falling back to the built-in crawler until the key is configured.'; - } - - return requestedPages > 1 - ? 'Paid Advanced Crawl users are routed through Firecrawl for sitemap-aware, JavaScript-rendered multi-page crawling.' - : 'Paid Advanced Crawl users are routed through Firecrawl for sitemap-aware, JavaScript-rendered crawling.'; -} - function getFirecrawlScaffold({ requestedPages, entitlements } = {}) { const runtime = getFirecrawlRuntime(); - const availableForCurrentUser = Boolean(entitlements?.canAdvancedCrawl); - const shouldUseFirecrawl = Boolean( - availableForCurrentUser - && runtime.enabled - && runtime.configured, - ); + const wantsAdvancedCrawl = Number(requestedPages || 1) > 1; + const advancedCrawlUnlocked = Boolean(entitlements?.canAdvancedCrawl); + const shouldUseFirecrawlLater = runtime.enabled && (wantsAdvancedCrawl || advancedCrawlUnlocked); return { - provider: 'firecrawl', - baseUrl: runtime.baseUrl, - enabled: runtime.enabled, - configured: runtime.configured, - hasApiKey: runtime.hasApiKey, - mode: shouldUseFirecrawl ? 'active' : runtime.mode, - status: shouldUseFirecrawl ? 'active_for_paid_users' : 'scaffold_only', + ...runtime, + status: runtime.configured ? 'ready_for_activation' : 'awaiting_api_key', wouldHandleJavascript: true, wouldHandleSitemapDiscovery: true, - availableForCurrentUser, - shouldUseFirecrawl, - usePaidOnly: true, - message: buildFirecrawlMessage(runtime, entitlements, requestedPages), - }; -} - -function sleep(milliseconds) { - return new Promise((resolve) => { - setTimeout(resolve, milliseconds); - }); -} - -function isAbsoluteUrl(value) { - return /^https?:\/\//i.test(String(value || '')); -} - -function buildApiUrl(runtime, pathOrUrl) { - const value = String(pathOrUrl || '').trim(); - - if (!value) { - return runtime.baseUrl; - } - - if (isAbsoluteUrl(value)) { - return value; - } - - if (value.startsWith('/')) { - return `${runtime.baseUrl}${value}`; - } - - return `${runtime.baseUrl}/${value}`; -} - -function summarizeFirecrawlPayload(payload) { - if (!payload) { - return 'Unknown Firecrawl API error.'; - } - - if (typeof payload === 'string') { - return payload; - } - - if (typeof payload?.error === 'string' && payload.error.trim()) { - return payload.error; - } - - if (typeof payload?.message === 'string' && payload.message.trim()) { - return payload.message; - } - - return 'Unexpected Firecrawl API response.'; -} - -async function firecrawlRequest(runtime, method, pathOrUrl, options = {}) { - try { - const response = await axios({ - method, - url: buildApiUrl(runtime, pathOrUrl), - timeout: options.timeout || runtime.timeoutMs, - data: options.data, - headers: { - Authorization: `Bearer ${runtime.apiKey}`, - 'Content-Type': 'application/json', - ...(options.headers || {}), - }, - }); - - return response.data; - } catch (error) { - if (axios.isAxiosError(error)) { - const payload = error.response?.data; - const detail = summarizeFirecrawlPayload(payload); - const status = error.response?.status; - const wrappedError = new Error( - status - ? `Firecrawl request failed with status ${status}: ${detail}` - : `Firecrawl request failed: ${detail}`, - ); - - wrappedError.code = status || 502; - wrappedError.response = payload; - throw wrappedError; - } - - throw error; - } -} - -async function collectPagedStatus(runtime, initialStatus) { - const documents = Array.isArray(initialStatus?.data) - ? [...initialStatus.data] - : []; - let nextUrl = initialStatus?.next || null; - - while (nextUrl) { - const nextStatus = await firecrawlRequest(runtime, 'get', nextUrl); - - if (Array.isArray(nextStatus?.data) && nextStatus.data.length > 0) { - documents.push(...nextStatus.data); - } - - nextUrl = nextStatus?.next || null; - } - - return { - ...initialStatus, - data: documents, - next: null, - }; -} - -async function waitForCrawlCompletion(runtime, crawlId) { - const deadline = Date.now() + runtime.timeoutMs; - - while (Date.now() <= deadline) { - const status = await firecrawlRequest(runtime, 'get', `/crawl/${encodeURIComponent(crawlId)}`); - - if (status?.status === 'completed' || status?.status === 'failed') { - return collectPagedStatus(runtime, status); - } - - await sleep(runtime.pollIntervalMs); - } - - const timeoutError = new Error( - `Firecrawl crawl timed out after ${Math.round(runtime.timeoutMs / 1000)} seconds.`, - ); - timeoutError.code = 504; - throw timeoutError; -} - -async function getCrawlErrors(runtime, crawlId) { - try { - return await firecrawlRequest(runtime, 'get', `/crawl/${encodeURIComponent(crawlId)}/errors`); - } catch (error) { - console.error('Failed to fetch Firecrawl crawl errors:', error); - return { - errors: [], - robotsBlocked: [], - }; - } -} - -async function crawlSiteWithFirecrawl(url, requestedPages) { - const runtime = getFirecrawlRuntime(); - - if (!runtime.enabled) { - const error = new Error('Firecrawl is disabled in this environment.'); - error.code = 503; - throw error; - } - - if (!runtime.configured) { - const error = new Error('Firecrawl API key is not configured.'); - error.code = 503; - throw error; - } - - const started = await firecrawlRequest(runtime, 'post', '/crawl', { - data: { - url, - limit: requestedPages, - sitemap: 'include', - crawlEntireDomain: true, - allowExternalLinks: false, - allowSubdomains: false, - ignoreQueryParameters: true, - scrapeOptions: { - formats: ['html'], - }, - }, - }); - - const crawlId = started?.id; - - if (!crawlId) { - const error = new Error('Firecrawl did not return a crawl job ID.'); - error.code = 502; - error.response = started; - throw error; - } - - const status = await waitForCrawlCompletion(runtime, crawlId); - const crawlErrors = await getCrawlErrors(runtime, crawlId); - - return { - crawlId, - provider: 'firecrawl', - status: status?.status || 'unknown', - total: status?.total || 0, - completed: status?.completed || 0, - creditsUsed: status?.creditsUsed || 0, - expiresAt: status?.expiresAt || null, - data: Array.isArray(status?.data) ? status.data : [], - errors: Array.isArray(crawlErrors?.errors) ? crawlErrors.errors : [], - robotsBlocked: Array.isArray(crawlErrors?.robotsBlocked) - ? crawlErrors.robotsBlocked - : [], + shouldUseFirecrawlLater, + message: runtime.configured + ? 'Firecrawl scaffold is wired and ready for the next activation step, but this analyzer still uses the built-in crawler today.' + : 'Firecrawl scaffold is wired, but FIRECRAWL_API_KEY is not set yet. The analyzer still uses the built-in crawler for now.', }; } module.exports = { getFirecrawlRuntime, getFirecrawlScaffold, - crawlSiteWithFirecrawl, }; diff --git a/backend/src/services/siteEntitlements.js b/backend/src/services/siteEntitlements.js index 47e7c53..ce3ec26 100644 --- a/backend/src/services/siteEntitlements.js +++ b/backend/src/services/siteEntitlements.js @@ -1,6 +1,6 @@ const ValidationError = require('./notifications/errors/validation'); -const BASIC_MAX_PAGES_PER_CRAWL = 25; +const BASIC_MAX_PAGES_PER_CRAWL = 1; const ADVANCED_MAX_PAGES_PER_CRAWL = 25; const ADVANCED_CRAWL_PERMISSION = 'USE_ADVANCED_CRAWL'; const PLATFORM_OUTPUT_PERMISSION = 'USE_PLATFORM_OUTPUT'; @@ -68,7 +68,7 @@ function ensureRequestedPagesAllowed(requestedPages, currentUser) { if (requestedPages > entitlements.maxPagesPerCrawl) { const error = new Error( - `This analyzer supports up to ${entitlements.maxPagesPerCrawl} page${entitlements.maxPagesPerCrawl === 1 ? '' : 's'} per crawl. Reduce the requested page count to continue.`, + `Your current plan allows up to ${entitlements.maxPagesPerCrawl} page${entitlements.maxPagesPerCrawl === 1 ? '' : 's'} per crawl. Upgrade to Advanced Crawl to analyze ${requestedPages} pages.`, ); error.code = 403; throw error; diff --git a/backend/src/services/sites.js b/backend/src/services/sites.js index ac36a79..9ee0055 100644 --- a/backend/src/services/sites.js +++ b/backend/src/services/sites.js @@ -8,7 +8,7 @@ const { ensureRequestedPagesAllowed, ensurePlatformOutputAllowed, } = require('./siteEntitlements'); -const { getFirecrawlScaffold, crawlSiteWithFirecrawl } = require('./firecrawl'); +const { getFirecrawlScaffold } = require('./firecrawl'); const REQUEST_TIMEOUT = 15000; const PREVIEW_LIMIT = 5; @@ -271,152 +271,6 @@ function normalizeAllowedHostnames(allowedHostnames) { return new Set(); } -function normalizeTargetPathname(pathname) { - const trimmedPathname = String(pathname || '').trim(); - - if (!trimmedPathname || trimmedPathname === '/') { - return '/'; - } - - return `/${trimmedPathname.replace(/^\/+/, '').replace(/\/+$/, '')}`; -} - -function buildCrawlTarget(rawTarget, baseUrl, label) { - const trimmedTarget = String(rawTarget || '').trim(); - - if (!trimmedTarget) { - return null; - } - - let parsedTarget; - - try { - if (/^https?:\/\//i.test(trimmedTarget)) { - parsedTarget = new URL(trimmedTarget); - } else if (trimmedTarget.startsWith('/')) { - parsedTarget = new URL(trimmedTarget, baseUrl); - } else { - parsedTarget = new URL(`/${trimmedTarget.replace(/^\/+/, '')}`, baseUrl); - } - } catch (error) { - const targetError = new Error(`Invalid ${label} target: ${trimmedTarget}`); - targetError.code = 400; - throw targetError; - } - - if (!['http:', 'https:'].includes(parsedTarget.protocol)) { - const targetError = new Error(`Invalid ${label} target: ${trimmedTarget}`); - targetError.code = 400; - throw targetError; - } - - const baseHostname = new URL(baseUrl).hostname.toLowerCase(); - - if (parsedTarget.hostname.toLowerCase() !== baseHostname) { - const targetError = new Error( - `${label} targets must stay on the same website as the analyzed URL.`, - ); - targetError.code = 400; - throw targetError; - } - - parsedTarget.hash = ''; - parsedTarget.search = ''; - - const path = normalizeTargetPathname(parsedTarget.pathname); - const url = normalizeUrl(parsedTarget.toString()); - - return { - input: trimmedTarget, - label: /^https?:\/\//i.test(trimmedTarget) ? url : path, - path, - url, - }; -} - -function parseCrawlTargets(rawTargets, baseUrl, label) { - const targetValues = Array.isArray(rawTargets) - ? rawTargets - : String(rawTargets || '').split(/\r?\n/); - const dedupedTargets = new Map(); - - targetValues - .map((targetValue) => String(targetValue || '').trim()) - .filter(Boolean) - .forEach((targetValue) => { - const normalizedTarget = buildCrawlTarget(targetValue, baseUrl, label); - - dedupedTargets.set(normalizedTarget.url, normalizedTarget); - }); - - return Array.from(dedupedTargets.values()); -} - -function normalizeCrawlTargets(data, baseUrl) { - return { - includeTargets: parseCrawlTargets(data?.includeTargets, baseUrl, 'include'), - excludeTargets: parseCrawlTargets(data?.excludeTargets, baseUrl, 'exclude'), - }; -} - -function isUrlMatchingTarget(candidateUrl, target) { - if (!candidateUrl || !target?.path) { - return false; - } - - let parsedUrl; - - try { - parsedUrl = new URL(normalizeUrl(candidateUrl)); - } catch (error) { - return false; - } - - const candidatePath = normalizeTargetPathname(parsedUrl.pathname); - - if (target.path === '/') { - return true; - } - - return candidatePath === target.path || candidatePath.startsWith(`${target.path}/`); -} - -function matchesAnyCrawlTarget(candidateUrl, targets = []) { - return targets.some((target) => isUrlMatchingTarget(candidateUrl, target)); -} - -function isUrlAllowedByCrawlTargets(candidateUrl, crawlTargets = {}) { - const includeTargets = crawlTargets.includeTargets || []; - const excludeTargets = crawlTargets.excludeTargets || []; - - if (includeTargets.length > 0 && !matchesAnyCrawlTarget(candidateUrl, includeTargets)) { - return false; - } - - if (excludeTargets.length > 0 && matchesAnyCrawlTarget(candidateUrl, excludeTargets)) { - return false; - } - - return true; -} - -function buildSeedUrls(baseUrl, crawlTargets = {}) { - const seedUrls = new Set([baseUrl]); - - (crawlTargets.includeTargets || []).forEach((target) => { - seedUrls.add(target.url); - }); - - return Array.from(seedUrls); -} - -function summarizeCrawlTargets(crawlTargets = {}) { - return { - includeTargets: (crawlTargets.includeTargets || []).map((target) => target.label), - excludeTargets: (crawlTargets.excludeTargets || []).map((target) => target.label), - }; -} - function normalizeCrawlUrl(rawUrl, parentUrl, allowedHostnames) { if (!rawUrl || typeof rawUrl !== 'string') { return null; @@ -549,184 +403,12 @@ async function fetchAnalyzedPage(pageUrl, allowedHostnames) { }; } -function analyzeFetchedPage({ - requestedUrl, - analyzedUrl, - html, - statusCode, - headers = {}, - allowedHostnames, - discoveredLinks = null, - pageTitle = null, -}) { - const normalizedAnalyzedUrl = normalizeUrl(analyzedUrl || requestedUrl); - const normalizedAllowedHostnames = normalizeAllowedHostnames(allowedHostnames); - const analyzedHostname = new URL(normalizedAnalyzedUrl).hostname.toLowerCase(); - normalizedAllowedHostnames.add(analyzedHostname); - - if (allowedHostnames instanceof Set) { - allowedHostnames.add(analyzedHostname); - } - - const resolvedHtml = typeof html === 'string' ? html : ''; - const resolvedPageTitle = pageTitle || extractPageTitle(resolvedHtml); - const platform = detectPlatform(resolvedHtml, headers, normalizedAnalyzedUrl); - const schema = extractSchemaSummary(resolvedHtml); - const pageSignals = inferPageSignals( - resolvedHtml, - normalizedAnalyzedUrl, - resolvedPageTitle, - platform, - ); - const normalizedLinks = Array.isArray(discoveredLinks) - ? Array.from( - new Set( - discoveredLinks - .map((linkUrl) => normalizeCrawlUrl(linkUrl, normalizedAnalyzedUrl, normalizedAllowedHostnames)) - .filter(Boolean), - ), - ) - : extractInternalLinks( - resolvedHtml, - normalizedAnalyzedUrl, - normalizedAllowedHostnames, - ); - - return { - requestedUrl: requestedUrl || normalizedAnalyzedUrl, - analyzedUrl: normalizedAnalyzedUrl, - pageTitle: resolvedPageTitle, - statusCode: statusCode || null, - html: resolvedHtml, - platform, - schema, - pageSignals, - discoveredLinks: normalizedLinks, - }; -} - -function transformFirecrawlDocument(document, allowedHostnames) { - const metadata = document?.metadata || {}; - const sourceUrl = - metadata.sourceURL - || metadata.sourceUrl - || metadata.url - || document?.url - || document?.sourceURL - || document?.sourceUrl; - - if (!sourceUrl) { - return null; - } - - const html = - typeof document?.html === 'string' - ? document.html - : typeof document?.rawHtml === 'string' - ? document.rawHtml - : typeof document?.content === 'string' - ? document.content - : ''; - - return analyzeFetchedPage({ - requestedUrl: sourceUrl, - analyzedUrl: sourceUrl, - html, - statusCode: document?.metadata?.statusCode || 200, - headers: {}, - allowedHostnames, - discoveredLinks: Array.isArray(document?.links) ? document.links : null, - pageTitle: metadata.title || null, - }); -} - -async function crawlPagesWithFirecrawl(baseUrl, requestedPages, crawlTargets = {}) { +async function crawlPages(baseUrl, requestedPages) { const normalizedBaseUrl = normalizeUrl(baseUrl); const allowedHostnames = new Set([new URL(normalizedBaseUrl).hostname.toLowerCase()]); - const firecrawlResult = await crawlSiteWithFirecrawl(normalizedBaseUrl, requestedPages); - const pages = []; - const analyzedUrls = new Set(); - - (firecrawlResult.data || []).forEach((document) => { - try { - const page = transformFirecrawlDocument(document, allowedHostnames); - - if ( - !page - || analyzedUrls.has(page.analyzedUrl) - || !isUrlAllowedByCrawlTargets(page.analyzedUrl, crawlTargets) - ) { - return; - } - - analyzedUrls.add(page.analyzedUrl); - pages.push(page); - } catch (error) { - console.error('Failed to transform Firecrawl document:', error); - } - }); - - const failedPages = []; - - (firecrawlResult.errors || []).forEach((entry) => { - const failedUrl = normalizeCrawlUrl( - entry?.path || entry?.url || entry?.sourceURL || normalizedBaseUrl, - normalizedBaseUrl, - allowedHostnames, - ) || normalizedBaseUrl; - - if (!isUrlAllowedByCrawlTargets(failedUrl, crawlTargets)) { - return; - } - - failedPages.push({ - url: failedUrl, - error: entry?.error || entry?.message || 'Firecrawl could not fetch this page.', - }); - }); - - (firecrawlResult.robotsBlocked || []).forEach((entry) => { - const blockedUrl = normalizeCrawlUrl( - entry?.path || entry?.url || normalizedBaseUrl, - normalizedBaseUrl, - allowedHostnames, - ) || normalizedBaseUrl; - - if (!isUrlAllowedByCrawlTargets(blockedUrl, crawlTargets)) { - return; - } - - failedPages.push({ - url: blockedUrl, - error: 'Blocked by robots.txt during Firecrawl crawl.', - }); - }); - - return { - provider: 'firecrawl', - pages, - failedPages, - discoveredInternalPages: Math.max((firecrawlResult.total || pages.length) - 1, 0), - firecrawlJob: { - crawlId: firecrawlResult.crawlId, - status: firecrawlResult.status, - total: firecrawlResult.total, - completed: firecrawlResult.completed, - creditsUsed: firecrawlResult.creditsUsed, - expiresAt: firecrawlResult.expiresAt, - failedPages: failedPages.length, - }, - }; -} - -async function crawlPages(baseUrl, requestedPages, crawlTargets = {}) { - const normalizedBaseUrl = normalizeUrl(baseUrl); - const allowedHostnames = new Set([new URL(normalizedBaseUrl).hostname.toLowerCase()]); - const seedUrls = buildSeedUrls(normalizedBaseUrl, crawlTargets); - const seedUrlSet = new Set(seedUrls); const visitedUrls = new Set(); - const queuedUrls = new Set(seedUrls); - const pendingUrls = [...seedUrls]; + const queuedUrls = new Set([normalizedBaseUrl]); + const pendingUrls = [normalizedBaseUrl]; const pages = []; const failedPages = []; let discoveredInternalPages = 0; @@ -738,29 +420,15 @@ async function crawlPages(baseUrl, requestedPages, crawlTargets = {}) { continue; } - const isBootstrapSeed = seedUrlSet.has(nextUrl) && nextUrl === normalizedBaseUrl; - - if (!isBootstrapSeed && !isUrlAllowedByCrawlTargets(nextUrl, crawlTargets)) { - visitedUrls.add(nextUrl); - continue; - } - visitedUrls.add(nextUrl); try { const page = await fetchAnalyzedPage(nextUrl, allowedHostnames); visitedUrls.add(page.analyzedUrl); queuedUrls.add(page.analyzedUrl); - - if (isUrlAllowedByCrawlTargets(page.analyzedUrl, crawlTargets)) { - pages.push(page); - } + pages.push(page); page.discoveredLinks.forEach((linkUrl) => { - if (!isUrlAllowedByCrawlTargets(linkUrl, crawlTargets)) { - return; - } - if (!visitedUrls.has(linkUrl) && !queuedUrls.has(linkUrl)) { queuedUrls.add(linkUrl); pendingUrls.push(linkUrl); @@ -855,29 +523,30 @@ function buildCrawlNotice({ requestedPages, actualPagesAnalyzed, failedPages, - crawlTargetSummary, + discoveredInternalPages, + firecrawl, }) { - const parts = []; - - if (requestedPages > 1) { - parts.push( - `The crawl analyzed ${actualPagesAnalyzed} of ${requestedPages} requested page${requestedPages === 1 ? '' : 's'}.`, - ); + if (requestedPages <= 1) { + return null; } - if (actualPagesAnalyzed < requestedPages) { - parts.push('Fewer matching crawlable pages were found than requested.'); - } + const parts = [ + `Advanced crawl analyzed ${actualPagesAnalyzed} of ${requestedPages} requested page${requestedPages === 1 ? '' : 's'}.`, + ]; - if ((crawlTargetSummary?.includeTargets || []).length > 0 || (crawlTargetSummary?.excludeTargets || []).length > 0) { - parts.push('Custom include/exclude targeting was applied to this report.'); + if (discoveredInternalPages + 1 < requestedPages) { + parts.push('Fewer crawlable internal HTML pages were discovered than requested.'); } if (failedPages > 0) { parts.push(`${failedPages} page${failedPages === 1 ? '' : 's'} could not be fetched during the crawl.`); } - return parts.length > 0 ? parts.join(' ') : null; + if (firecrawl?.message) { + parts.push(firecrawl.message); + } + + return parts.join(' '); } function buildAggregateAnalysis({ @@ -888,14 +557,11 @@ function buildAggregateAnalysis({ discoveredInternalPages, failedPages, firecrawl, - crawlTargets, - provider = 'internal', }) { const homepage = pageAnalyses[0]; const finishedAt = new Date(); const aggregateSchema = buildAggregateSchema(pageAnalyses); const aggregateSignals = buildAggregateSignals(pageAnalyses); - const crawlTargetSummary = summarizeCrawlTargets(crawlTargets); return { requestedUrl: normalizedUrl, @@ -915,9 +581,7 @@ function buildAggregateAnalysis({ allowedPages: entitlements.maxPagesPerCrawl, actualPagesAnalyzed: pageAnalyses.length, advancedCrawlEnabled: entitlements.canAdvancedCrawl, - provider, - includeTargets: crawlTargetSummary.includeTargets, - excludeTargets: crawlTargetSummary.excludeTargets, + provider: 'internal', }, crawlSummary: { pagesWithStructuredData: pageAnalyses.filter((page) => page.schema?.hasStructuredData).length, @@ -943,13 +607,14 @@ function buildAggregateAnalysis({ requestedPages, actualPagesAnalyzed: pageAnalyses.length, failedPages: failedPages.length, - crawlTargetSummary, + discoveredInternalPages, + firecrawl, }), finishedAt, }; } -function buildFailureAnalysis(normalizedUrl, error, firecrawl, provider = 'internal') { +function buildFailureAnalysis(normalizedUrl, error, firecrawl) { const isAxiosError = axios.isAxiosError(error); return { @@ -968,9 +633,6 @@ function buildFailureAnalysis(normalizedUrl, error, firecrawl, provider = 'inter rdfa: { count: 0, detected: false }, }, firecrawl, - crawlPlan: { - provider, - }, error: isAxiosError ? error.response ? `Request failed with status ${error.response.status}` @@ -1461,8 +1123,7 @@ module.exports = class SitesService { const requestedPages = parseRequestedPages(data?.requestedPages); const entitlements = ensureRequestedPagesAllowed(requestedPages, currentUser); const normalizedUrl = normalizeUrl(data?.url || data?.base_url); - const crawlTargets = normalizeCrawlTargets(data, normalizedUrl); - let firecrawl = getFirecrawlScaffold({ requestedPages, entitlements }); + const firecrawl = getFirecrawlScaffold({ requestedPages, entitlements }); const requestedName = typeof data?.name === 'string' && data.name.trim() ? data.name.trim() @@ -1529,50 +1190,12 @@ module.exports = class SitesService { } try { - let crawlResult; - - if (firecrawl.shouldUseFirecrawl) { - try { - crawlResult = await crawlPagesWithFirecrawl(normalizedUrl, requestedPages, crawlTargets); - firecrawl = { - ...firecrawl, - currentProvider: 'firecrawl', - crawlId: crawlResult.firecrawlJob?.crawlId || null, - crawlStatus: crawlResult.firecrawlJob?.status || null, - creditsUsed: crawlResult.firecrawlJob?.creditsUsed || 0, - message: crawlResult.firecrawlJob?.status === 'failed' - ? 'Firecrawl ran for this paid request, but the crawl reported failures. Partial results are shown when available.' - : 'Firecrawl handled this paid request with sitemap-aware, JavaScript-rendered crawling.', - }; - } catch (error) { - console.error('Firecrawl crawl failed, falling back to internal crawl:', error); - firecrawl = { - ...firecrawl, - currentProvider: 'internal', - status: 'fallback_internal_after_error', - shouldUseFirecrawl: false, - fallbackReason: error.message, - message: `Firecrawl was selected for this paid request but failed to run (${error.message}). The analyzer fell back to the built-in crawler.`, - }; - crawlResult = await crawlPages(normalizedUrl, requestedPages, crawlTargets); - } - } else { - crawlResult = await crawlPages(normalizedUrl, requestedPages, crawlTargets); - firecrawl = { - ...firecrawl, - currentProvider: 'internal', - }; - } - + const crawlResult = await crawlPages(normalizedUrl, requestedPages); const pageAnalyses = crawlResult.pages; if (pageAnalyses.length === 0) { const firstFailure = crawlResult.failedPages[0]; - const error = new Error( - crawlTargets.includeTargets.length > 0 || crawlTargets.excludeTargets.length > 0 - ? 'No pages matched the include/exclude targeting rules you entered.' - : firstFailure?.error || 'Site analysis failed.', - ); + const error = new Error(firstFailure?.error || 'Site analysis failed.'); error.code = 400; throw error; } @@ -1585,8 +1208,6 @@ module.exports = class SitesService { discoveredInternalPages: crawlResult.discoveredInternalPages, failedPages: crawlResult.failedPages, firecrawl, - crawlTargets, - provider: crawlResult.provider || 'internal', }); const homepage = pageAnalyses[0]; const finishedAt = analysis.finishedAt; @@ -1670,12 +1291,7 @@ module.exports = class SitesService { } catch (error) { console.error('Site analysis failed:', error); - const failureAnalysis = buildFailureAnalysis( - normalizedUrl, - error, - firecrawl, - firecrawl?.currentProvider || 'internal', - ); + const failureAnalysis = buildFailureAnalysis(normalizedUrl, error, firecrawl); const failedAt = new Date(); const failureTransaction = await db.sequelize.transaction(); let failedSite; @@ -1735,7 +1351,7 @@ module.exports = class SitesService { allowedPages: entitlements.maxPagesPerCrawl, actualPagesAnalyzed: 0, advancedCrawlEnabled: entitlements.canAdvancedCrawl, - provider: failureAnalysis.crawlPlan?.provider || 'internal', + provider: 'internal', }, entitlements, }, diff --git a/frontend/src/components/Logo/index.tsx b/frontend/src/components/Logo/index.tsx index 30747d0..a582e29 100644 --- a/frontend/src/components/Logo/index.tsx +++ b/frontend/src/components/Logo/index.tsx @@ -1,4 +1,3 @@ -import Image from 'next/image' import React from 'react' type Props = { @@ -7,12 +6,10 @@ type Props = { export default function Logo({ className = '' }: Props) { return ( - Flatlogic logo + alt={'Flatlogic logo'}> + ) } diff --git a/frontend/src/helpers/siteEntitlements.ts b/frontend/src/helpers/siteEntitlements.ts index 1877880..c0d14f9 100644 --- a/frontend/src/helpers/siteEntitlements.ts +++ b/frontend/src/helpers/siteEntitlements.ts @@ -1,6 +1,6 @@ import { hasPermission } from './userPermissions'; -export const BASIC_MAX_PAGES_PER_CRAWL = 25; +export const BASIC_MAX_PAGES_PER_CRAWL = 1; export const ADVANCED_MAX_PAGES_PER_CRAWL = 25; export const ADVANCED_CRAWL_PERMISSION = 'USE_ADVANCED_CRAWL'; export const PLATFORM_OUTPUT_PERMISSION = 'USE_PLATFORM_OUTPUT'; diff --git a/frontend/src/pages/profile.tsx b/frontend/src/pages/profile.tsx index 000ed48..f5eb7cf 100644 --- a/frontend/src/pages/profile.tsx +++ b/frontend/src/pages/profile.tsx @@ -3,7 +3,6 @@ import { mdiUpload, } from '@mdi/js'; import Head from 'next/head'; -import Image from 'next/image'; import React, { ReactElement, useEffect, useState } from 'react'; import { ToastContainer, toast } from 'react-toastify'; import DatePicker from 'react-datepicker'; @@ -85,13 +84,7 @@ const EditUsers = () => { {currentUser?.avatar[0]?.publicUrl &&
- Avatar + Avatar
} void; - children: React.ReactNode; -}; - -type ResultsTabButtonProps = { - label: string; - iconPath: string; - count?: number | string; - isActive: boolean; - onClick: () => void; -}; - -type PageFilterChipProps = { - label: string; - count: number; - iconPath: string; - isActive: boolean; - onClick: () => void; -}; - -type DeliverySummaryCardProps = { - label: string; - value: string | number; - helper: string; - iconPath: string; - toneClassName?: string; -}; - -type DeliveryActionCardProps = { - title: string; - description: string; - iconPath: string; - badge?: React.ReactNode; - children: React.ReactNode; -}; - const PLATFORM_OPTIONS = [ { value: 'wordpress', label: 'WordPress' }, { value: 'shopify', label: 'Shopify' }, @@ -189,267 +132,10 @@ const PLATFORM_OPTIONS = [ const initialReport: ReportResponse | null = null; -const parseTargetLines = (value: string) => value - .split(/\r?\n/) - .map((entry) => entry.trim()) - .filter(Boolean); - -const recommendationPriorityOrder = ['critical', 'high', 'medium', 'low', 'other'] as const; -type RecommendationPriorityId = (typeof recommendationPriorityOrder)[number]; - -type RecommendationPriorityMeta = { - id: RecommendationPriorityId; - label: string; - sortOrder: number; - iconPath: string; - badgeClassName: string; - sectionTitle: string; - sectionDescription: string; - accentClassName: string; -}; - -const recommendationPriorityMetaMap: Record = { - critical: { - id: 'critical', - label: 'Critical', - sortOrder: 0, - iconPath: icon.mdiAlertCircleOutline, - badgeClassName: 'bg-rose-600 text-white dark:bg-rose-500 dark:text-white', - sectionTitle: 'Critical fixes first', - sectionDescription: 'Resolve these before anything else because they are the most urgent structured data gaps.', - accentClassName: 'border-rose-200 bg-rose-50/80 dark:border-rose-500/30 dark:bg-rose-500/10', - }, - high: { - id: 'high', - label: 'High', - sortOrder: 1, - iconPath: icon.mdiAlertOutline, - badgeClassName: 'bg-amber-500 text-white dark:bg-amber-400 dark:text-slate-950', - sectionTitle: 'High priority', - sectionDescription: 'These recommendations should be tackled early because they likely affect key pages or important schema coverage.', - accentClassName: 'border-amber-200 bg-amber-50/80 dark:border-amber-500/30 dark:bg-amber-500/10', - }, - medium: { - id: 'medium', - label: 'Medium', - sortOrder: 2, - iconPath: icon.mdiArrowDownCircleOutline, - badgeClassName: 'bg-sky-600 text-white dark:bg-sky-500 dark:text-white', - sectionTitle: 'Next up', - sectionDescription: 'Address these after the urgent items to improve broader coverage and quality.', - accentClassName: 'border-sky-200 bg-sky-50/80 dark:border-sky-500/30 dark:bg-sky-500/10', - }, - low: { - id: 'low', - label: 'Low', - sortOrder: 3, - iconPath: icon.mdiCheckCircleOutline, - badgeClassName: 'bg-emerald-600 text-white dark:bg-emerald-500 dark:text-white', - sectionTitle: 'Quick wins', - sectionDescription: 'Useful polish items that can be handled once higher-impact fixes are moving.', - accentClassName: 'border-emerald-200 bg-emerald-50/80 dark:border-emerald-500/30 dark:bg-emerald-500/10', - }, - other: { - id: 'other', - label: 'Unprioritized', - sortOrder: 4, - iconPath: icon.mdiLightbulbOutline, - badgeClassName: 'bg-slate-800 text-white dark:bg-slate-200 dark:text-slate-950', - sectionTitle: 'More opportunities', - sectionDescription: 'These are useful follow-up ideas that were not assigned a stronger priority label.', - accentClassName: 'border-slate-200 bg-slate-50/80 dark:border-slate-700 dark:bg-slate-900/40', - }, -}; - -const normalizeRecommendationPriority = (priority?: string): RecommendationPriorityId => { - const normalizedPriority = priority?.trim().toLowerCase(); - - if (!normalizedPriority) { - return 'other'; - } - - if (normalizedPriority.includes('critical') || normalizedPriority === 'p0') { - return 'critical'; - } - - if (normalizedPriority.includes('high') || normalizedPriority === 'p1') { - return 'high'; - } - - if (normalizedPriority.includes('medium') || normalizedPriority.includes('med') || normalizedPriority === 'p2') { - return 'medium'; - } - - if (normalizedPriority.includes('low') || normalizedPriority === 'p3') { - return 'low'; - } - - return 'other'; -}; - -const getRecommendationPriorityMeta = (priority?: string) => recommendationPriorityMetaMap[normalizeRecommendationPriority(priority)]; - -const isFixFirstRecommendation = (recommendation: Recommendation) => { - const priorityId = normalizeRecommendationPriority(recommendation.priority); - return priorityId === 'critical' || priorityId === 'high'; -}; - -const getRecommendationScopeSortOrder = (pageScope?: string) => { - const normalizedScope = pageScope?.trim().toLowerCase() || ''; - - if (!normalizedScope) { - return 4; - } - - if (normalizedScope.includes('site') || normalizedScope.includes('global') || normalizedScope.includes('all')) { - return 0; - } - - if (normalizedScope.includes('home')) { - return 1; - } - - if (normalizedScope.includes('template') || normalizedScope.includes('category') || normalizedScope.includes('collection') || normalizedScope.includes('product')) { - return 2; - } - - if (normalizedScope.includes('page')) { - return 3; - } - - return 4; -}; - -const SetupAccordionSection = ({ - title, - description, - iconPath, - badge, - isOpen, - onToggle, - children, -}: SetupAccordionSectionProps) => ( -
- - - {isOpen && ( -
- {children} -
- )} -
-); - -const ResultsTabButton = ({ label, iconPath, count, isActive, onClick }: ResultsTabButtonProps) => ( - -); - -const PageFilterChip = ({ label, count, iconPath, isActive, onClick }: PageFilterChipProps) => ( - -); - -const DeliverySummaryCard = ({ - label, - value, - helper, - iconPath, - toneClassName = 'border-slate-200 bg-white dark:border-slate-700 dark:bg-slate-950/40', -}: DeliverySummaryCardProps) => ( -
-
-
-
{label}
-
{value}
-
- - - -
-

{helper}

-
-); - -const DeliveryActionCard = ({ title, description, iconPath, badge, children }: DeliveryActionCardProps) => ( -
-
-
- - - -
-
{title}
-

{description}

-
-
- {badge} -
-
{children}
-
-); - const SchemaAnalyzerPage = () => { const { currentUser } = useAppSelector((state) => state.auth); const [url, setUrl] = React.useState(''); const [requestedPages, setRequestedPages] = React.useState(1); - const [includeTargets, setIncludeTargets] = React.useState(''); - const [excludeTargets, setExcludeTargets] = React.useState(''); const [selectedPlatform, setSelectedPlatform] = React.useState('wordpress'); const [emailTo, setEmailTo] = React.useState(currentUser?.email || ''); const [report, setReport] = React.useState(initialReport); @@ -458,21 +144,6 @@ const SchemaAnalyzerPage = () => { const [emailingId, setEmailingId] = React.useState(null); const [exportingId, setExportingId] = React.useState(null); const [isCheckingPlatformOutput, setIsCheckingPlatformOutput] = React.useState(false); - const [openSections, setOpenSections] = React.useState>({ - targeting: false, - options: false, - limits: false, - }); - const [activeResultsTab, setActiveResultsTab] = React.useState('overview'); - const [activePageFilter, setActivePageFilter] = React.useState('all'); - const [activeRecommendationFilter, setActiveRecommendationFilter] = React.useState('all'); - const [isFailedPagesExpanded, setIsFailedPagesExpanded] = React.useState(false); - const [expandedRecommendationIds, setExpandedRecommendationIds] = React.useState>({}); - const resultsRef = React.useRef(null); - - const scrollToResults = React.useCallback(() => { - resultsRef.current?.scrollIntoView({ behavior: 'smooth', block: 'start' }); - }, []); React.useEffect(() => { if (currentUser?.email) { @@ -480,24 +151,6 @@ const SchemaAnalyzerPage = () => { } }, [currentUser?.email]); - React.useEffect(() => { - if (!report?.analysis) { - return undefined; - } - - setActiveResultsTab('overview'); - setActivePageFilter('all'); - setActiveRecommendationFilter('all'); - setIsFailedPagesExpanded(false); - setExpandedRecommendationIds({}); - - const timeoutId = window.setTimeout(() => { - scrollToResults(); - }, 150); - - return () => window.clearTimeout(timeoutId); - }, [report?.analysis?.analyzedUrl, report?.analysis?.fetchedAt, scrollToResults]); - const notify = React.useCallback((type: 'success' | 'error' | 'info', message: string) => { toast(message, { type, position: 'bottom-center' }); }, []); @@ -507,289 +160,18 @@ const SchemaAnalyzerPage = () => { [currentUser], ); const entitlements = report?.entitlements || report?.analysis?.entitlements || fallbackEntitlements; - const maxPagesPerCrawl = entitlements?.maxPagesPerCrawl || fallbackEntitlements.maxPagesPerCrawl || 25; + const maxPagesPerCrawl = entitlements?.maxPagesPerCrawl || 1; const recommendations = report?.recommendations || []; const exportableRecommendations = recommendations.filter( (recommendation) => recommendation.suggested_schema, ); - const sortedRecommendations = React.useMemo(() => ( - [...recommendations].sort((leftRecommendation, rightRecommendation) => { - const leftPriority = getRecommendationPriorityMeta(leftRecommendation.priority); - const rightPriority = getRecommendationPriorityMeta(rightRecommendation.priority); - - if (leftPriority.sortOrder !== rightPriority.sortOrder) { - return leftPriority.sortOrder - rightPriority.sortOrder; - } - - const leftScopeOrder = getRecommendationScopeSortOrder(leftRecommendation.page_scope); - const rightScopeOrder = getRecommendationScopeSortOrder(rightRecommendation.page_scope); - if (leftScopeOrder !== rightScopeOrder) { - return leftScopeOrder - rightScopeOrder; - } - - const leftHasCode = Number(Boolean(leftRecommendation.suggested_schema)); - const rightHasCode = Number(Boolean(rightRecommendation.suggested_schema)); - if (leftHasCode !== rightHasCode) { - return rightHasCode - leftHasCode; - } - - return leftRecommendation.title.localeCompare(rightRecommendation.title); - }) - ), [recommendations]); - const crawlPlan = report?.analysis?.crawlPlan; const isRequestedPagesOverLimit = requestedPages > maxPagesPerCrawl; - const draftIncludeTargets = React.useMemo(() => parseTargetLines(includeTargets), [includeTargets]); - const draftExcludeTargets = React.useMemo(() => parseTargetLines(excludeTargets), [excludeTargets]); - const appliedIncludeTargets = crawlPlan?.includeTargets || draftIncludeTargets; - const appliedExcludeTargets = crawlPlan?.excludeTargets || draftExcludeTargets; - const analyzedPages = report?.analysis?.pages || []; - const failedPages = report?.analysis?.failedPages || []; - const jsonLdTypes = report?.analysis?.schema?.jsonLd?.types || []; - const invalidJsonLdBlocks = report?.analysis?.schema?.jsonLd?.invalidBlocks || []; - const hasTargetingRules = appliedIncludeTargets.length > 0 || appliedExcludeTargets.length > 0; - const selectedPlatformLabel = PLATFORM_OPTIONS.find( - (platformOption) => platformOption.value === selectedPlatform, - )?.label || 'Custom / Other'; - const analyzedTimestamp = report?.analysis?.fetchedAt - ? new Date(report.analysis.fetchedAt).toLocaleString() - : null; - const hasEmailRecipient = emailTo.trim().length > 0; - const hasUrl = url.trim().length > 0; - const targetingSummary = hasTargetingRules - ? `${appliedIncludeTargets.length} include · ${appliedExcludeTargets.length} exclude` - : 'No targeting rules'; - const recommendationQuickFilters = [ - { - id: 'all' as const, - label: 'All', - count: recommendations.length, - iconPath: icon.mdiViewListOutline, - }, - { - id: 'fixFirst' as const, - label: 'Fix first', - count: sortedRecommendations.filter((recommendation) => isFixFirstRecommendation(recommendation)).length, - iconPath: icon.mdiAlertCircleOutline, - }, - { - id: 'codeReady' as const, - label: 'Code ready', - count: sortedRecommendations.filter((recommendation) => recommendation.suggested_schema).length, - iconPath: icon.mdiCodeBraces, - }, - { - id: 'needsCode' as const, - label: 'Needs code', - count: sortedRecommendations.filter((recommendation) => !recommendation.suggested_schema).length, - iconPath: icon.mdiLightbulbOutline, - }, - ]; - const pageFilterOptions = [ - { - id: 'all' as const, - label: 'All', - count: analyzedPages.length + failedPages.length, - iconPath: icon.mdiViewListOutline, - }, - { - id: 'withSchema' as const, - label: 'With schema', - count: analyzedPages.filter((page) => page.hasStructuredData).length, - iconPath: icon.mdiCheckCircleOutline, - }, - { - id: 'missingSchema' as const, - label: 'Missing schema', - count: analyzedPages.filter((page) => !page.hasStructuredData).length, - iconPath: icon.mdiAlertCircleOutline, - }, - { - id: 'failed' as const, - label: 'Failed', - count: failedPages.length, - iconPath: icon.mdiCloseCircleOutline, - }, - ]; - const filteredRecommendations = React.useMemo(() => { - if (activeRecommendationFilter === 'fixFirst') { - return sortedRecommendations.filter((recommendation) => isFixFirstRecommendation(recommendation)); - } - - if (activeRecommendationFilter === 'codeReady') { - return sortedRecommendations.filter((recommendation) => recommendation.suggested_schema); - } - - if (activeRecommendationFilter === 'needsCode') { - return sortedRecommendations.filter((recommendation) => !recommendation.suggested_schema); - } - - return sortedRecommendations; - }, [activeRecommendationFilter, sortedRecommendations]); - const recommendationGroups = React.useMemo(() => recommendationPriorityOrder - .map((priorityId) => ({ - meta: recommendationPriorityMetaMap[priorityId], - recommendations: filteredRecommendations.filter( - (recommendation) => normalizeRecommendationPriority(recommendation.priority) === priorityId, - ), - })) - .filter((group) => group.recommendations.length > 0), [filteredRecommendations]); - const activeRecommendationFilterLabel = recommendationQuickFilters.find( - (filterOption) => filterOption.id === activeRecommendationFilter, - )?.label || 'All'; - const recommendationEmptyStateMessage = activeRecommendationFilter === 'fixFirst' - ? 'No high-priority recommendations are waiting in this report.' - : activeRecommendationFilter === 'codeReady' - ? 'No recommendations with generated code are available yet.' - : activeRecommendationFilter === 'needsCode' - ? 'Every visible recommendation already has a code snippet attached.' - : 'No recommendations were generated for this page yet.'; - - const filteredAnalyzedPages = React.useMemo(() => { - if (activePageFilter === 'withSchema') { - return analyzedPages.filter((page) => page.hasStructuredData); - } - - if (activePageFilter === 'missingSchema') { - return analyzedPages.filter((page) => !page.hasStructuredData); - } - - if (activePageFilter === 'failed') { - return []; - } - - return analyzedPages; - }, [activePageFilter, analyzedPages]); - const shouldShowFailedSection = failedPages.length > 0 && (activePageFilter === 'all' || activePageFilter === 'failed'); - const emptyPagesStateMessage = activePageFilter === 'failed' - ? 'No failed internal pages were recorded for this analysis run.' - : activePageFilter === 'withSchema' - ? 'No analyzed pages with structured data match this filter yet.' - : activePageFilter === 'missingSchema' - ? 'No analyzed pages are missing structured data for this run.' - : 'No page-level results are available yet for this analysis run.'; - const deliverySummaryCards = [ - { - label: 'Code-ready fixes', - value: exportableRecommendations.length, - helper: exportableRecommendations.length > 0 - ? `${exportableRecommendations.length} recommendation${exportableRecommendations.length === 1 ? '' : 's'} can be exported right now.` - : 'No code-ready recommendations yet. Use the Recommendations tab to refine the handoff.', - iconPath: icon.mdiCodeBraces, - toneClassName: exportableRecommendations.length > 0 - ? 'border-emerald-200 bg-emerald-50/80 dark:border-emerald-500/30 dark:bg-emerald-500/10' - : 'border-slate-200 bg-white dark:border-slate-700 dark:bg-slate-950/40', - }, - { - label: 'Email recipient', - value: hasEmailRecipient ? 'Ready' : 'Missing', - helper: hasEmailRecipient ? emailTo.trim() : 'Add a developer email before sending the handoff.', - iconPath: hasEmailRecipient ? icon.mdiEmailOutline : icon.mdiAlertCircleOutline, - toneClassName: hasEmailRecipient - ? 'border-sky-200 bg-sky-50/80 dark:border-sky-500/30 dark:bg-sky-500/10' - : 'border-amber-200 bg-amber-50/80 dark:border-amber-500/30 dark:bg-amber-500/10', - }, - { - label: 'Platform output', - value: entitlements?.canPlatformOutput ? 'Unlocked' : 'Premium', - helper: entitlements?.canPlatformOutput - ? `${selectedPlatformLabel} output can be checked in Step 4.` - : 'Premium is required for Step 4 platform-specific output.', - iconPath: entitlements?.canPlatformOutput ? icon.mdiCheckCircleOutline : icon.mdiLockOutline, - toneClassName: entitlements?.canPlatformOutput - ? 'border-violet-200 bg-violet-50/80 dark:border-violet-500/30 dark:bg-violet-500/10' - : 'border-slate-200 bg-white dark:border-slate-700 dark:bg-slate-950/40', - }, - ]; - const deliveryChecklist = [ - { - id: 'recipient', - label: 'Recipient email', - value: hasEmailRecipient ? emailTo.trim() : 'Add an email to send the handoff.', - isReady: hasEmailRecipient, - }, - { - id: 'export', - label: 'Export package', - value: exportableRecommendations.length > 0 - ? `${exportableRecommendations.length} code-ready recommendation${exportableRecommendations.length === 1 ? '' : 's'} available.` - : 'Export all still works, but no generated code is attached yet.', - isReady: Boolean(report?.site?.id), - }, - { - id: 'platform', - label: 'Step 4 output', - value: entitlements?.canPlatformOutput - ? `${selectedPlatformLabel} output is available for this workspace.` - : `${selectedPlatformLabel} output requires Premium access.`, - isReady: Boolean(entitlements?.canPlatformOutput), - }, - ]; - const overviewStats = [ - { - label: 'Pages analyzed', - value: crawlPlan?.actualPagesAnalyzed || analyzedPages.length || 0, - helper: 'Crawl total', - iconPath: icon.mdiFileDocumentOutline, - }, - { - label: 'Recommendations', - value: recommendations.length, - helper: 'Next actions', - iconPath: icon.mdiLightbulbOutline, - }, - { - label: 'Structured data', - value: report?.analysis?.crawlSummary?.pagesWithStructuredData ?? (report?.analysis?.schema?.hasStructuredData ? 1 : 0), - helper: 'Pages with schema', - iconPath: icon.mdiCheckCircleOutline, - }, - { - label: 'JSON-LD blocks', - value: report?.analysis?.schema?.jsonLd?.count || 0, - helper: 'Detected snippets', - iconPath: icon.mdiCodeJson, - }, - { - label: 'Failed fetches', - value: report?.analysis?.crawlSummary?.failedPages ?? failedPages.length, - helper: 'Needs follow-up', - iconPath: icon.mdiAlertCircleOutline, - }, - { - label: 'Invalid blocks', - value: invalidJsonLdBlocks.length, - helper: 'Needs cleanup', - iconPath: icon.mdiAlertOutline, - }, - ]; - - React.useEffect(() => { - if ((draftIncludeTargets.length > 0 || draftExcludeTargets.length > 0) && !openSections.targeting) { - setOpenSections((currentSections) => ({ - ...currentSections, - targeting: true, - })); - } - }, [draftExcludeTargets.length, draftIncludeTargets.length, openSections.targeting]); - - React.useEffect(() => { - if (activePageFilter === 'failed' && failedPages.length > 0) { - setIsFailedPagesExpanded(true); - } - }, [activePageFilter, failedPages.length]); - - const toggleSection = (section: SetupSectionId) => { - setOpenSections((currentSections) => ({ - ...currentSections, - [section]: !currentSections[section], - })); - }; - - const toggleRecommendationCode = (recommendationId: string) => { - setExpandedRecommendationIds((currentIds) => ({ - ...currentIds, - [recommendationId]: !currentIds[recommendationId], - })); + const firecrawlStatus = report?.analysis?.firecrawl || { + provider: 'firecrawl', + configured: false, + wouldHandleJavascript: true, + wouldHandleSitemapDiscovery: true, + message: 'Firecrawl scaffold is wired in code, but this environment still needs a FIRECRAWL_API_KEY before activation.', }; const handleAnalyze = async () => { @@ -801,7 +183,7 @@ const SchemaAnalyzerPage = () => { if (isRequestedPagesOverLimit) { notify( 'error', - `This analyzer supports up to ${maxPagesPerCrawl} page${maxPagesPerCrawl === 1 ? '' : 's'} per crawl. Reduce the page count to continue.`, + `Your current plan allows up to ${maxPagesPerCrawl} page${maxPagesPerCrawl === 1 ? '' : 's'} per crawl. Upgrade to Advanced Crawl to go beyond that limit.`, ); return; } @@ -811,8 +193,6 @@ const SchemaAnalyzerPage = () => { const response = await axios.post('/sites/analyze', { url: url.trim(), requestedPages, - includeTargets, - excludeTargets, }); setReport(response.data); @@ -972,6 +352,8 @@ const SchemaAnalyzerPage = () => { } }; + const crawlPlan = report?.analysis?.crawlPlan; + return ( <> @@ -986,39 +368,39 @@ const SchemaAnalyzerPage = () => { {''} - -
+ +

Analyze a customer site

-

- Enter a domain or full URL, choose how many pages to review, and optionally focus the report on the - folders, categories, or pages that matter most. This setup keeps the page cleaner on mobile while still - supporting up to {maxPagesPerCrawl} pages per crawl. +

+ Enter a domain or full URL. The app will detect the platform, crawl up to your allowed page limit, + inspect structured data across the discovered pages, generate rules-based schema recommendations, + and prepare developer-ready code snippets.

-
-
- - setUrl(event.target.value)} - onKeyDown={(event) => { - if (event.key === 'Enter') { - event.preventDefault(); - handleAnalyze().catch(() => null); - } - }} - /> - +
+ + setUrl(event.target.value)} + onKeyDown={(event) => { + if (event.key === 'Enter') { + event.preventDefault(); + handleAnalyze().catch(() => null); + } + }} + /> + +
-
+
{ /> -
-
Quick setup
-
Pick a page count here, then use Target pages below if you want a more focused report.
-
+ + +
+ + {isRequestedPagesOverLimit && ( +
+ You requested {requestedPages} pages, but this account is capped at {maxPagesPerCrawl}. Upgrade to + Advanced Crawl to raise that limit. +
+ )} + + + { + handleAnalyze().catch(() => null); + }} + /> + { + const combined = exportableRecommendations + .map((recommendation) => recommendation.suggested_schema) + .filter(Boolean) + .join('\n\n'); + navigator.clipboard + .writeText(combined) + .then(() => notify('success', 'All schema code copied to clipboard.')) + .catch((error) => { + console.error('Copy all code failed:', error); + notify('error', 'Unable to copy the combined code.'); + }); + }} + /> + { + handlePlatformOutputCheck().catch(() => null); + }} + /> +
- {isRequestedPagesOverLimit && ( -
- You requested {requestedPages} pages, but this analyzer is capped at {maxPagesPerCrawl}. Reduce the page count to continue. +
+
+
+
+

Paywall status

+

+ Advanced Crawl is now enforced and active. Premium still reserves Step 4 platform output. Firecrawl is scaffolded for sitemap + JS-rendered crawling, but not activated yet. +

+
+ + {entitlements?.canPlatformOutput ? 'Premium access' : 'Basic access'} + +
+ +
+
+ Advanced crawl entitlement + + {entitlements?.canAdvancedCrawl ? 'Unlocked' : 'Locked'} + +
+
+ Max pages per crawl + {maxPagesPerCrawl} +
+
+ Platform-specific Step 4 output + + {entitlements?.canPlatformOutput ? 'Reserved' : 'Premium only'} + +
+
- )} -
- toggleSection('targeting')} - > -
- -