From fa68f426aad4e2f099791b4d389788ec0a6c5e25 Mon Sep 17 00:00:00 2001 From: Flatlogic Bot Date: Tue, 14 Apr 2026 16:46:48 +0000 Subject: [PATCH] Autosave: 20260414-164650 --- ...162000-add-site-entitlement-permissions.js | 96 +++ ...inistrator-site-entitlement-permissions.js | 130 +++++ backend/src/services/firecrawl.js | 63 ++ backend/src/services/siteEntitlements.js | 104 ++++ backend/src/services/sites.js | 551 ++++++++++++++++-- frontend/src/helpers/siteEntitlements.ts | 23 + frontend/src/pages/sites/analyzer.tsx | 516 +++++++++++++--- 7 files changed, 1346 insertions(+), 137 deletions(-) create mode 100644 backend/src/db/seeders/20260414162000-add-site-entitlement-permissions.js create mode 100644 backend/src/db/seeders/20260414162500-apply-administrator-site-entitlement-permissions.js create mode 100644 backend/src/services/firecrawl.js create mode 100644 backend/src/services/siteEntitlements.js create mode 100644 frontend/src/helpers/siteEntitlements.ts diff --git a/backend/src/db/seeders/20260414162000-add-site-entitlement-permissions.js b/backend/src/db/seeders/20260414162000-add-site-entitlement-permissions.js new file mode 100644 index 0000000..a45f8c6 --- /dev/null +++ b/backend/src/db/seeders/20260414162000-add-site-entitlement-permissions.js @@ -0,0 +1,96 @@ +const { v4: uuid } = require('uuid'); + +const PERMISSION_NAMES = [ + 'USE_ADVANCED_CRAWL', + 'USE_PLATFORM_OUTPUT', +]; + +module.exports = { + async up(queryInterface) { + const transaction = await queryInterface.sequelize.transaction(); + + try { + const createdAt = new Date(); + const updatedAt = createdAt; + + const existingPermissions = await queryInterface.sequelize.query( + 'SELECT id, name FROM permissions WHERE name IN (:permissionNames);', + { + replacements: { permissionNames: PERMISSION_NAMES }, + transaction, + type: queryInterface.sequelize.QueryTypes.SELECT, + }, + ); + + const existingPermissionNames = new Set( + existingPermissions.map((permission) => permission.name), + ); + + const missingPermissions = PERMISSION_NAMES + .filter((permissionName) => !existingPermissionNames.has(permissionName)) + .map((permissionName) => ({ + id: uuid(), + name: permissionName, + createdAt, + updatedAt, + })); + + if (missingPermissions.length > 0) { + await queryInterface.bulkInsert('permissions', missingPermissions, { + transaction, + }); + } + + await transaction.commit(); + } catch (error) { + await transaction.rollback(); + throw error; + } + }, + + async down(queryInterface) { + const transaction = await queryInterface.sequelize.transaction(); + + try { + const permissions = await queryInterface.sequelize.query( + 'SELECT id FROM permissions WHERE name IN (:permissionNames);', + { + replacements: { permissionNames: PERMISSION_NAMES }, + transaction, + type: queryInterface.sequelize.QueryTypes.SELECT, + }, + ); + + if (permissions.length > 0) { + await queryInterface.bulkDelete( + 'rolesPermissionsPermissions', + { + permissionId: permissions.map((permission) => permission.id), + }, + { transaction }, + ); + + await queryInterface.bulkDelete( + 'usersCustom_permissionsPermissions', + { + permissionId: permissions.map((permission) => permission.id), + }, + { transaction }, + ); + + await queryInterface.bulkDelete( + 'permissions', + { + id: permissions.map((permission) => permission.id), + }, + { transaction }, + ); + } + + await transaction.commit(); + } catch (error) { + await transaction.rollback(); + throw error; + } + }, +}; diff --git a/backend/src/db/seeders/20260414162500-apply-administrator-site-entitlement-permissions.js b/backend/src/db/seeders/20260414162500-apply-administrator-site-entitlement-permissions.js new file mode 100644 index 0000000..d89d57a --- /dev/null +++ b/backend/src/db/seeders/20260414162500-apply-administrator-site-entitlement-permissions.js @@ -0,0 +1,130 @@ +const ROLE_NAME = 'Administrator'; +const PERMISSION_NAMES = [ + 'USE_ADVANCED_CRAWL', + 'USE_PLATFORM_OUTPUT', +]; + +module.exports = { + async up(queryInterface) { + const transaction = await queryInterface.sequelize.transaction(); + + try { + const createdAt = new Date(); + const updatedAt = createdAt; + + const role = await queryInterface.sequelize.query( + 'SELECT id FROM roles WHERE name = :roleName LIMIT 1;', + { + replacements: { roleName: ROLE_NAME }, + transaction, + type: queryInterface.sequelize.QueryTypes.SELECT, + }, + ); + + if (!role[0]) { + await transaction.commit(); + return; + } + + const permissions = await queryInterface.sequelize.query( + 'SELECT id FROM permissions WHERE name IN (:permissionNames);', + { + replacements: { permissionNames: PERMISSION_NAMES }, + transaction, + type: queryInterface.sequelize.QueryTypes.SELECT, + }, + ); + + if (permissions.length === 0) { + await transaction.commit(); + return; + } + + const existingPairs = await queryInterface.sequelize.query( + `SELECT "permissionId" + FROM "rolesPermissionsPermissions" + WHERE "roles_permissionsId" = :roleId + AND "permissionId" IN (:permissionIds);`, + { + replacements: { + roleId: role[0].id, + permissionIds: permissions.map((permission) => permission.id), + }, + transaction, + type: queryInterface.sequelize.QueryTypes.SELECT, + }, + ); + + const existingPermissionIds = new Set( + existingPairs.map((pair) => pair.permissionId), + ); + + const missingPairs = permissions + .filter((permission) => !existingPermissionIds.has(permission.id)) + .map((permission) => ({ + createdAt, + updatedAt, + roles_permissionsId: role[0].id, + permissionId: permission.id, + })); + + if (missingPairs.length > 0) { + await queryInterface.bulkInsert( + 'rolesPermissionsPermissions', + missingPairs, + { transaction }, + ); + } + + await transaction.commit(); + } catch (error) { + await transaction.rollback(); + throw error; + } + }, + + async down(queryInterface) { + const transaction = await queryInterface.sequelize.transaction(); + + try { + const role = await queryInterface.sequelize.query( + 'SELECT id FROM roles WHERE name = :roleName LIMIT 1;', + { + replacements: { roleName: ROLE_NAME }, + transaction, + type: queryInterface.sequelize.QueryTypes.SELECT, + }, + ); + + if (!role[0]) { + await transaction.commit(); + return; + } + + const permissions = await queryInterface.sequelize.query( + 'SELECT id FROM permissions WHERE name IN (:permissionNames);', + { + replacements: { permissionNames: PERMISSION_NAMES }, + transaction, + type: queryInterface.sequelize.QueryTypes.SELECT, + }, + ); + + if (permissions.length > 0) { + await queryInterface.bulkDelete( + 'rolesPermissionsPermissions', + { + roles_permissionsId: role[0].id, + permissionId: permissions.map((permission) => permission.id), + }, + { transaction }, + ); + } + + await transaction.commit(); + } catch (error) { + await transaction.rollback(); + throw error; + } + }, +}; diff --git a/backend/src/services/firecrawl.js b/backend/src/services/firecrawl.js new file mode 100644 index 0000000..8a5fe02 --- /dev/null +++ b/backend/src/services/firecrawl.js @@ -0,0 +1,63 @@ +const FIRECRAWL_DEFAULT_BASE_URL = 'https://api.firecrawl.dev/v1'; + +function toBoolean(value, defaultValue = false) { + if (value === undefined || value === null || value === '') { + return defaultValue; + } + + if (typeof value === 'boolean') { + return value; + } + + const normalizedValue = String(value).trim().toLowerCase(); + + if (['1', 'true', 'yes', 'on'].includes(normalizedValue)) { + return true; + } + + if (['0', 'false', 'no', 'off'].includes(normalizedValue)) { + return false; + } + + return defaultValue; +} + +function getFirecrawlRuntime() { + const apiKey = String(process.env.FIRECRAWL_API_KEY || '').trim(); + const baseUrl = String( + process.env.FIRECRAWL_BASE_URL || FIRECRAWL_DEFAULT_BASE_URL, + ).trim(); + const enabled = toBoolean(process.env.FIRECRAWL_ENABLED, true); + + return { + provider: 'firecrawl', + baseUrl, + enabled, + configured: Boolean(apiKey), + hasApiKey: Boolean(apiKey), + mode: 'scaffold_only', + }; +} + +function getFirecrawlScaffold({ requestedPages, entitlements } = {}) { + const runtime = getFirecrawlRuntime(); + const wantsAdvancedCrawl = Number(requestedPages || 1) > 1; + const advancedCrawlUnlocked = Boolean(entitlements?.canAdvancedCrawl); + const shouldUseFirecrawlLater = runtime.enabled && (wantsAdvancedCrawl || advancedCrawlUnlocked); + + return { + ...runtime, + status: runtime.configured ? 'ready_for_activation' : 'awaiting_api_key', + wouldHandleJavascript: true, + wouldHandleSitemapDiscovery: true, + shouldUseFirecrawlLater, + message: runtime.configured + ? 'Firecrawl scaffold is wired and ready for the next activation step, but this analyzer still uses the built-in crawler today.' + : 'Firecrawl scaffold is wired, but FIRECRAWL_API_KEY is not set yet. The analyzer still uses the built-in crawler for now.', + }; +} + +module.exports = { + getFirecrawlRuntime, + getFirecrawlScaffold, +}; diff --git a/backend/src/services/siteEntitlements.js b/backend/src/services/siteEntitlements.js new file mode 100644 index 0000000..ce3ec26 --- /dev/null +++ b/backend/src/services/siteEntitlements.js @@ -0,0 +1,104 @@ +const ValidationError = require('./notifications/errors/validation'); + +const BASIC_MAX_PAGES_PER_CRAWL = 1; +const ADVANCED_MAX_PAGES_PER_CRAWL = 25; +const ADVANCED_CRAWL_PERMISSION = 'USE_ADVANCED_CRAWL'; +const PLATFORM_OUTPUT_PERMISSION = 'USE_PLATFORM_OUTPUT'; + +function getPermissionNames(currentUser) { + return new Set([ + ...((currentUser?.custom_permissions || []).map((permission) => permission.name)), + ...((currentUser?.app_role_permissions || []).map((permission) => permission.name)), + ]); +} + +function isAdministrator(currentUser) { + return currentUser?.app_role?.name === 'Administrator'; +} + +function hasEntitlement(currentUser, permissionName) { + if (!permissionName) { + return false; + } + + if (isAdministrator(currentUser)) { + return true; + } + + return getPermissionNames(currentUser).has(permissionName); +} + +function getSiteEntitlements(currentUser) { + const canAdvancedCrawl = hasEntitlement(currentUser, ADVANCED_CRAWL_PERMISSION); + const canPlatformOutput = hasEntitlement(currentUser, PLATFORM_OUTPUT_PERMISSION); + + return { + canAdvancedCrawl, + canPlatformOutput, + maxPagesPerCrawl: canAdvancedCrawl + ? ADVANCED_MAX_PAGES_PER_CRAWL + : BASIC_MAX_PAGES_PER_CRAWL, + permissions: { + advancedCrawl: ADVANCED_CRAWL_PERMISSION, + platformOutput: PLATFORM_OUTPUT_PERMISSION, + }, + }; +} + +function parseRequestedPages(rawRequestedPages) { + if ( + rawRequestedPages === undefined + || rawRequestedPages === null + || rawRequestedPages === '' + ) { + return 1; + } + + const parsed = Number(rawRequestedPages); + + if (!Number.isInteger(parsed) || parsed < 1) { + throw new ValidationError('errors.validation.message'); + } + + return parsed; +} + +function ensureRequestedPagesAllowed(requestedPages, currentUser) { + const entitlements = getSiteEntitlements(currentUser); + + if (requestedPages > entitlements.maxPagesPerCrawl) { + const error = new Error( + `Your current plan allows up to ${entitlements.maxPagesPerCrawl} page${entitlements.maxPagesPerCrawl === 1 ? '' : 's'} per crawl. Upgrade to Advanced Crawl to analyze ${requestedPages} pages.`, + ); + error.code = 403; + throw error; + } + + return entitlements; +} + +function ensurePlatformOutputAllowed(currentUser, platform) { + const entitlements = getSiteEntitlements(currentUser); + + if (!entitlements.canPlatformOutput) { + const error = new Error( + `Platform-specific schema output${platform ? ` for ${platform}` : ''} is part of the Premium plan. Upgrade to unlock Step 4 code generation.`, + ); + error.code = 403; + throw error; + } + + return entitlements; +} + +module.exports = { + BASIC_MAX_PAGES_PER_CRAWL, + ADVANCED_MAX_PAGES_PER_CRAWL, + ADVANCED_CRAWL_PERMISSION, + PLATFORM_OUTPUT_PERMISSION, + getSiteEntitlements, + hasEntitlement, + parseRequestedPages, + ensureRequestedPagesAllowed, + ensurePlatformOutputAllowed, +}; diff --git a/backend/src/services/sites.js b/backend/src/services/sites.js index 626e5e1..9ee0055 100644 --- a/backend/src/services/sites.js +++ b/backend/src/services/sites.js @@ -2,9 +2,18 @@ const axios = require('axios'); const db = require('../db/models'); const ValidationError = require('./notifications/errors/validation'); const EmailSender = require('./email'); +const { + getSiteEntitlements, + parseRequestedPages, + ensureRequestedPagesAllowed, + ensurePlatformOutputAllowed, +} = require('./siteEntitlements'); +const { getFirecrawlScaffold } = require('./firecrawl'); const REQUEST_TIMEOUT = 15000; const PREVIEW_LIMIT = 5; +const PAGE_PREVIEW_LIMIT = 10; +const NON_HTML_FILE_PATTERN = /\.(?:7z|avi|bmp|css|csv|docx?|eot|gif|ico|jpe?g|js|json|map|mov|mp3|mp4|pdf|png|pptx?|rar|svg|tar|tgz|txt|wav|webm|webp|woff2?|xlsx?|xml|zip)$/i; function normalizeUrl(rawUrl) { if (!rawUrl || typeof rawUrl !== 'string') { @@ -229,7 +238,383 @@ function detectPlatform(html, headers, analyzedUrl) { }; } -function buildFailureAnalysis(normalizedUrl, error) { +function isHtmlLikeResponse(response) { + const contentType = String(response?.headers?.['content-type'] || '').toLowerCase(); + + if (!contentType) { + return true; + } + + return ( + contentType.includes('text/html') + || contentType.includes('application/xhtml+xml') + ); +} + +function normalizeAllowedHostnames(allowedHostnames) { + if (allowedHostnames instanceof Set) { + return new Set( + Array.from(allowedHostnames).map((hostname) => String(hostname).toLowerCase()), + ); + } + + if (Array.isArray(allowedHostnames)) { + return new Set( + allowedHostnames.map((hostname) => String(hostname).toLowerCase()), + ); + } + + if (allowedHostnames) { + return new Set([String(allowedHostnames).toLowerCase()]); + } + + return new Set(); +} + +function normalizeCrawlUrl(rawUrl, parentUrl, allowedHostnames) { + if (!rawUrl || typeof rawUrl !== 'string') { + return null; + } + + const trimmedUrl = rawUrl.trim(); + + if ( + !trimmedUrl + || trimmedUrl.startsWith('#') + || /^mailto:/i.test(trimmedUrl) + || /^tel:/i.test(trimmedUrl) + || /^javascript:/i.test(trimmedUrl) + || /^data:/i.test(trimmedUrl) + ) { + return null; + } + + let parsedUrl; + + try { + parsedUrl = new URL(trimmedUrl, parentUrl); + } catch (error) { + return null; + } + + if (!['http:', 'https:'].includes(parsedUrl.protocol)) { + return null; + } + + const allowedHostnamesSet = normalizeAllowedHostnames(allowedHostnames); + + if ( + allowedHostnamesSet.size > 0 + && !allowedHostnamesSet.has(parsedUrl.hostname.toLowerCase()) + ) { + return null; + } + + if (NON_HTML_FILE_PATTERN.test(parsedUrl.pathname)) { + return null; + } + + parsedUrl.hash = ''; + + return normalizeUrl(parsedUrl.toString()); +} + +function extractInternalLinks(html, pageUrl, allowedHostnames) { + const matches = [ + ...String(html || '').matchAll(/]*href=(?:"([^"]+)"|'([^']+)'|([^\s>]+))/gi), + ]; + + return Array.from( + new Set( + matches + .map((match) => match[1] || match[2] || match[3] || '') + .map((href) => normalizeCrawlUrl(href, pageUrl, allowedHostnames)) + .filter(Boolean), + ), + ); +} + +function summarizeCrawlError(error) { + if (axios.isAxiosError(error)) { + if (error.response) { + return `Request failed with status ${error.response.status}`; + } + + return error.message; + } + + return error.message || 'Unknown crawl error'; +} + +async function fetchAnalyzedPage(pageUrl, allowedHostnames) { + const response = await axios.get(pageUrl, { + timeout: REQUEST_TIMEOUT, + maxRedirects: 5, + responseType: 'text', + headers: { + Accept: + 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'User-Agent': + 'Mozilla/5.0 (compatible; SchemaCrawlerBot/1.0; +https://flatlogic.com)', + }, + }); + + if (!isHtmlLikeResponse(response)) { + const error = new Error('Skipped non-HTML response during crawl.'); + error.code = 415; + throw error; + } + + const analyzedUrl = + response.request?.res?.responseUrl || response.config?.url || pageUrl; + const normalizedAnalyzedUrl = normalizeUrl(analyzedUrl); + const normalizedAllowedHostnames = normalizeAllowedHostnames(allowedHostnames); + const analyzedHostname = new URL(normalizedAnalyzedUrl).hostname.toLowerCase(); + normalizedAllowedHostnames.add(analyzedHostname); + + if (allowedHostnames instanceof Set) { + allowedHostnames.add(analyzedHostname); + } + const html = typeof response.data === 'string' ? response.data : ''; + const pageTitle = extractPageTitle(html); + const platform = detectPlatform(html, response.headers, normalizedAnalyzedUrl); + const schema = extractSchemaSummary(html); + const pageSignals = inferPageSignals( + html, + normalizedAnalyzedUrl, + pageTitle, + platform, + ); + + return { + requestedUrl: pageUrl, + analyzedUrl: normalizedAnalyzedUrl, + pageTitle, + statusCode: response.status, + html, + platform, + schema, + pageSignals, + discoveredLinks: extractInternalLinks( + html, + normalizedAnalyzedUrl, + normalizedAllowedHostnames, + ), + }; +} + +async function crawlPages(baseUrl, requestedPages) { + const normalizedBaseUrl = normalizeUrl(baseUrl); + const allowedHostnames = new Set([new URL(normalizedBaseUrl).hostname.toLowerCase()]); + const visitedUrls = new Set(); + const queuedUrls = new Set([normalizedBaseUrl]); + const pendingUrls = [normalizedBaseUrl]; + const pages = []; + const failedPages = []; + let discoveredInternalPages = 0; + + while (pendingUrls.length > 0 && pages.length < requestedPages) { + const nextUrl = pendingUrls.shift(); + + if (!nextUrl || visitedUrls.has(nextUrl)) { + continue; + } + + visitedUrls.add(nextUrl); + + try { + const page = await fetchAnalyzedPage(nextUrl, allowedHostnames); + visitedUrls.add(page.analyzedUrl); + queuedUrls.add(page.analyzedUrl); + pages.push(page); + + page.discoveredLinks.forEach((linkUrl) => { + if (!visitedUrls.has(linkUrl) && !queuedUrls.has(linkUrl)) { + queuedUrls.add(linkUrl); + pendingUrls.push(linkUrl); + discoveredInternalPages += 1; + } + }); + } catch (error) { + failedPages.push({ + url: nextUrl, + error: summarizeCrawlError(error), + }); + } + } + + return { + pages, + failedPages, + discoveredInternalPages, + }; +} + +function buildAggregateSchema(pageAnalyses) { + const jsonLdTypes = new Set(); + const invalidBlocks = []; + let jsonLdCount = 0; + let microdataCount = 0; + let rdfaCount = 0; + + pageAnalyses.forEach((page) => { + const schema = page.schema || {}; + + jsonLdCount += schema.jsonLd?.count || 0; + microdataCount += schema.microdata?.count || 0; + rdfaCount += schema.rdfa?.count || 0; + + (schema.jsonLd?.types || []).forEach((typeName) => jsonLdTypes.add(typeName)); + (schema.jsonLd?.invalidBlocks || []).forEach((block) => { + invalidBlocks.push({ + ...block, + url: page.analyzedUrl, + }); + }); + }); + + return { + hasStructuredData: pageAnalyses.some((page) => page.schema?.hasStructuredData), + jsonLd: { + count: jsonLdCount, + types: Array.from(jsonLdTypes), + invalidBlocks, + }, + microdata: { + count: microdataCount, + detected: microdataCount > 0, + }, + rdfa: { + count: rdfaCount, + detected: rdfaCount > 0, + }, + }; +} + +function buildAggregateSignals(pageAnalyses) { + return pageAnalyses.reduce((accumulator, page) => { + const pageSignals = page.pageSignals || {}; + + return { + hasFaqHints: accumulator.hasFaqHints || Boolean(pageSignals.hasFaqHints), + hasBlogHints: accumulator.hasBlogHints || Boolean(pageSignals.hasBlogHints), + hasProductHints: accumulator.hasProductHints || Boolean(pageSignals.hasProductHints), + hasLocalBusinessHints: + accumulator.hasLocalBusinessHints || Boolean(pageSignals.hasLocalBusinessHints), + faqPages: accumulator.faqPages + (pageSignals.hasFaqHints ? 1 : 0), + blogPages: accumulator.blogPages + (pageSignals.hasBlogHints ? 1 : 0), + productPages: accumulator.productPages + (pageSignals.hasProductHints ? 1 : 0), + localBusinessPages: + accumulator.localBusinessPages + (pageSignals.hasLocalBusinessHints ? 1 : 0), + }; + }, { + hasFaqHints: false, + hasBlogHints: false, + hasProductHints: false, + hasLocalBusinessHints: false, + faqPages: 0, + blogPages: 0, + productPages: 0, + localBusinessPages: 0, + }); +} + +function buildCrawlNotice({ + requestedPages, + actualPagesAnalyzed, + failedPages, + discoveredInternalPages, + firecrawl, +}) { + if (requestedPages <= 1) { + return null; + } + + const parts = [ + `Advanced crawl analyzed ${actualPagesAnalyzed} of ${requestedPages} requested page${requestedPages === 1 ? '' : 's'}.`, + ]; + + if (discoveredInternalPages + 1 < requestedPages) { + parts.push('Fewer crawlable internal HTML pages were discovered than requested.'); + } + + if (failedPages > 0) { + parts.push(`${failedPages} page${failedPages === 1 ? '' : 's'} could not be fetched during the crawl.`); + } + + if (firecrawl?.message) { + parts.push(firecrawl.message); + } + + return parts.join(' '); +} + +function buildAggregateAnalysis({ + normalizedUrl, + pageAnalyses, + requestedPages, + entitlements, + discoveredInternalPages, + failedPages, + firecrawl, +}) { + const homepage = pageAnalyses[0]; + const finishedAt = new Date(); + const aggregateSchema = buildAggregateSchema(pageAnalyses); + const aggregateSignals = buildAggregateSignals(pageAnalyses); + + return { + requestedUrl: normalizedUrl, + analyzedUrl: homepage?.analyzedUrl || normalizedUrl, + pageTitle: homepage?.pageTitle || null, + fetchedAt: finishedAt.toISOString(), + statusCode: homepage?.statusCode || null, + platform: homepage?.platform || { + detected: 'unknown', + label: 'Unknown', + matchedSignals: [], + }, + schema: aggregateSchema, + recommendationCount: 0, + crawlPlan: { + requestedPages, + allowedPages: entitlements.maxPagesPerCrawl, + actualPagesAnalyzed: pageAnalyses.length, + advancedCrawlEnabled: entitlements.canAdvancedCrawl, + provider: 'internal', + }, + crawlSummary: { + pagesWithStructuredData: pageAnalyses.filter((page) => page.schema?.hasStructuredData).length, + pagesWithoutStructuredData: pageAnalyses.filter((page) => !page.schema?.hasStructuredData).length, + pagesWithInvalidJsonLd: pageAnalyses.filter( + (page) => (page.schema?.jsonLd?.invalidBlocks || []).length > 0, + ).length, + failedPages: failedPages.length, + discoveredInternalPages, + }, + pages: pageAnalyses.slice(0, PAGE_PREVIEW_LIMIT).map((page) => ({ + url: page.analyzedUrl, + title: page.pageTitle, + statusCode: page.statusCode, + hasStructuredData: Boolean(page.schema?.hasStructuredData), + jsonLdTypes: page.schema?.jsonLd?.types || [], + })), + failedPages: failedPages.slice(0, PAGE_PREVIEW_LIMIT), + aggregateSignals, + entitlements, + firecrawl, + notice: buildCrawlNotice({ + requestedPages, + actualPagesAnalyzed: pageAnalyses.length, + failedPages: failedPages.length, + discoveredInternalPages, + firecrawl, + }), + finishedAt, + }; +} + +function buildFailureAnalysis(normalizedUrl, error, firecrawl) { const isAxiosError = axios.isAxiosError(error); return { @@ -247,6 +632,7 @@ function buildFailureAnalysis(normalizedUrl, error) { microdata: { count: 0, detected: false }, rdfa: { count: 0, detected: false }, }, + firecrawl, error: isAxiosError ? error.response ? `Request failed with status ${error.response.status}` @@ -445,22 +831,25 @@ function buildRecommendationCode({ baseUrl, siteName, schemaType, pageScope }) { }); } -function buildRecommendations({ baseUrl, siteName, analysis, html }) { +function buildRecommendations({ baseUrl, siteName, analysis, html, pageAnalyses = [] }) { const recommendationList = []; const schemaTypes = analysis?.schema?.jsonLd?.types || []; - const pageSignals = inferPageSignals( - html, - analysis?.analyzedUrl, - analysis?.pageTitle, - analysis?.platform || {}, - ); + const aggregateSignals = analysis?.aggregateSignals || {}; + const pageSignals = pageAnalyses.length > 0 + ? aggregateSignals + : inferPageSignals( + html, + analysis?.analyzedUrl, + analysis?.pageTitle, + analysis?.platform || {}, + ); if ((analysis?.schema?.jsonLd?.invalidBlocks || []).length > 0) { recommendationList.push({ - title: 'Fix invalid JSON-LD blocks already on the homepage', + title: 'Fix invalid JSON-LD blocks already on the analyzed pages', recommendation_type: 'fix_existing', schema_type: 'JSON-LD', - page_scope: 'homepage', + page_scope: pageAnalyses.length > 1 ? 'crawl-wide' : 'homepage', priority: 'high', reason: 'At least one JSON-LD block could not be parsed. Invalid structured data can prevent search engines from using your markup.', @@ -524,7 +913,9 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) { page_scope: 'internal-pages', priority: 'medium', reason: - 'Breadcrumb schema helps search engines understand content hierarchy and page relationships.', + pageAnalyses.length > 1 + ? 'Multiple internal pages were analyzed without BreadcrumbList schema, so search engines may miss content hierarchy and page relationships.' + : 'Breadcrumb schema helps search engines understand content hierarchy and page relationships.', expected_impact: 'Can improve result presentation and site structure understanding.', suggested_schema: buildRecommendationCode({ @@ -537,8 +928,8 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) { } if ( - pageSignals.hasProductHints && - !hasSchemaType(schemaTypes, ['Product']) + pageSignals.hasProductHints + && !hasSchemaType(schemaTypes, ['Product']) ) { recommendationList.push({ title: 'Add Product schema on product detail pages', @@ -547,7 +938,9 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) { page_scope: 'product-pages', priority: 'high', reason: - 'The site shows product/ecommerce signals, but no Product schema was detected on the analyzed page.', + pageAnalyses.length > 1 + ? `Product or ecommerce signals appeared across ${pageSignals.productPages || 1} analyzed page${(pageSignals.productPages || 1) === 1 ? '' : 's'}, but no Product schema was detected.` + : 'The site shows product/ecommerce signals, but no Product schema was detected on the analyzed page.', expected_impact: 'Improves eligibility for product-rich search experiences and helps AI systems interpret commercial details.', suggested_schema: buildRecommendationCode({ @@ -560,8 +953,8 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) { } if ( - pageSignals.hasBlogHints && - !hasSchemaType(schemaTypes, ['Article', 'BlogPosting', 'NewsArticle']) + pageSignals.hasBlogHints + && !hasSchemaType(schemaTypes, ['Article', 'BlogPosting', 'NewsArticle']) ) { recommendationList.push({ title: 'Add BlogPosting schema on editorial content', @@ -570,7 +963,9 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) { page_scope: 'article-pages', priority: 'medium', reason: - 'The site appears to publish editorial content, but article-level schema was not detected.', + pageAnalyses.length > 1 + ? `Editorial or blog signals appeared across ${pageSignals.blogPages || 1} analyzed page${(pageSignals.blogPages || 1) === 1 ? '' : 's'}, but article-level schema was not detected.` + : 'The site appears to publish editorial content, but article-level schema was not detected.', expected_impact: 'Clarifies content ownership, publication dates, and headline structure for search engines and answer engines.', suggested_schema: buildRecommendationCode({ @@ -583,8 +978,8 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) { } if ( - pageSignals.hasFaqHints && - !hasSchemaType(schemaTypes, ['FAQPage']) + pageSignals.hasFaqHints + && !hasSchemaType(schemaTypes, ['FAQPage']) ) { recommendationList.push({ title: 'Add FAQPage schema where FAQs are published', @@ -593,7 +988,9 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) { page_scope: 'faq-pages', priority: 'medium', reason: - 'FAQ content signals were found, but the page is not marked up as an FAQPage.', + pageAnalyses.length > 1 + ? `FAQ-style content signals appeared across ${pageSignals.faqPages || 1} analyzed page${(pageSignals.faqPages || 1) === 1 ? '' : 's'}, but FAQPage schema was not detected.` + : 'FAQ content signals were found, but the page is not marked up as an FAQPage.', expected_impact: 'Makes question-and-answer content easier to parse and reuse in search and AI contexts.', suggested_schema: buildRecommendationCode({ @@ -678,6 +1075,7 @@ async function buildStoredReport(siteId, currentUser) { crawl: crawl ? crawl.get({ plain: true }) : null, analysis: crawl ? parseSummary(crawl.summary) : null, recommendations: recommendations.map((item) => item.get({ plain: true })), + entitlements: getSiteEntitlements(currentUser), }; } @@ -722,7 +1120,10 @@ module.exports = class SitesService { static async analyzeHomepage(data, currentUser) { ensureCurrentUser(currentUser); + const requestedPages = parseRequestedPages(data?.requestedPages); + const entitlements = ensureRequestedPagesAllowed(requestedPages, currentUser); const normalizedUrl = normalizeUrl(data?.url || data?.base_url); + const firecrawl = getFirecrawlScaffold({ requestedPages, entitlements }); const requestedName = typeof data?.name === 'string' && data.name.trim() ? data.name.trim() @@ -789,46 +1190,37 @@ module.exports = class SitesService { } try { - const response = await axios.get(normalizedUrl, { - timeout: REQUEST_TIMEOUT, - maxRedirects: 5, - responseType: 'text', - headers: { - Accept: - 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'User-Agent': - 'Mozilla/5.0 (compatible; SchemaCrawlerBot/1.0; +https://flatlogic.com)', - }, - }); + const crawlResult = await crawlPages(normalizedUrl, requestedPages); + const pageAnalyses = crawlResult.pages; - const analyzedUrl = - response.request?.res?.responseUrl || response.config?.url || normalizedUrl; - const html = typeof response.data === 'string' ? response.data : ''; - const pageTitle = extractPageTitle(html); - const platform = detectPlatform(html, response.headers, analyzedUrl); - const schema = extractSchemaSummary(html); - const finishedAt = new Date(); + if (pageAnalyses.length === 0) { + const firstFailure = crawlResult.failedPages[0]; + const error = new Error(firstFailure?.error || 'Site analysis failed.'); + error.code = 400; + throw error; + } + + const analysis = buildAggregateAnalysis({ + normalizedUrl, + pageAnalyses, + requestedPages, + entitlements, + discoveredInternalPages: crawlResult.discoveredInternalPages, + failedPages: crawlResult.failedPages, + firecrawl, + }); + const homepage = pageAnalyses[0]; + const finishedAt = analysis.finishedAt; const recommendations = buildRecommendations({ baseUrl: normalizedUrl, - siteName: requestedName || pageTitle || defaultName, - analysis: { - analyzedUrl, - pageTitle, - platform, - schema, - }, - html, + siteName: requestedName || homepage.pageTitle || defaultName, + analysis, + html: homepage.html, + pageAnalyses, }); - const analysis = { - requestedUrl: normalizedUrl, - analyzedUrl, - pageTitle, - fetchedAt: finishedAt.toISOString(), - statusCode: response.status, - platform, - schema, - recommendationCount: recommendations.length, - }; + + analysis.recommendationCount = recommendations.length; + delete analysis.finishedAt; const finalizeTransaction = await db.sequelize.transaction(); let updatedSite; @@ -845,8 +1237,8 @@ module.exports = class SitesService { await updatedSite.update( { - name: updatedSite.name || requestedName || pageTitle || defaultName, - detected_platform: platform.detected, + name: updatedSite.name || requestedName || homepage.pageTitle || defaultName, + detected_platform: homepage.platform.detected, crawl_status: 'completed', last_crawled_at: finishedAt, updatedById: currentUser.id, @@ -860,7 +1252,7 @@ module.exports = class SitesService { { status: 'completed', finished_at: finishedAt, - pages_scanned: 1, + pages_scanned: pageAnalyses.length, summary: JSON.stringify(analysis), updatedById: currentUser.id, }, @@ -894,11 +1286,12 @@ module.exports = class SitesService { crawl: updatedCrawl.get({ plain: true }), analysis, recommendations: storedRecommendations.map((item) => item.get({ plain: true })), + entitlements, }; } catch (error) { console.error('Site analysis failed:', error); - const failureAnalysis = buildFailureAnalysis(normalizedUrl, error); + const failureAnalysis = buildFailureAnalysis(normalizedUrl, error, firecrawl); const failedAt = new Date(); const failureTransaction = await db.sequelize.transaction(); let failedSite; @@ -943,13 +1336,28 @@ module.exports = class SitesService { throw updateError; } - if (error instanceof ValidationError || axios.isAxiosError(error)) { + if ( + error instanceof ValidationError + || axios.isAxiosError(error) + || [400, 403, 404, 415].includes(error.code) + ) { return { site: failedSite.get({ plain: true }), crawl: failedCrawl.get({ plain: true }), - analysis: failureAnalysis, + analysis: { + ...failureAnalysis, + crawlPlan: { + requestedPages, + allowedPages: entitlements.maxPagesPerCrawl, + actualPagesAnalyzed: 0, + advancedCrawlEnabled: entitlements.canAdvancedCrawl, + provider: 'internal', + }, + entitlements, + }, recommendations: [], error: failureAnalysis.error, + entitlements, }; } @@ -964,6 +1372,25 @@ module.exports = class SitesService { static async exportCode(data, currentUser) { ensureCurrentUser(currentUser); + const outputMode = typeof data?.outputMode === 'string' + ? data.outputMode.trim().toLowerCase() + : 'generic'; + const platform = typeof data?.platform === 'string' + ? data.platform.trim() + : ''; + + if (outputMode === 'platform') { + ensurePlatformOutputAllowed(currentUser, platform); + + const error = new Error( + 'Premium platform-specific schema output' + + (platform ? ' for ' + platform : '') + + ' is enabled for this user, but the Step 4 generator is not connected yet.', + ); + error.code = 400; + throw error; + } + const { recommendationId, siteId } = data || {}; if (recommendationId) { diff --git a/frontend/src/helpers/siteEntitlements.ts b/frontend/src/helpers/siteEntitlements.ts new file mode 100644 index 0000000..c0d14f9 --- /dev/null +++ b/frontend/src/helpers/siteEntitlements.ts @@ -0,0 +1,23 @@ +import { hasPermission } from './userPermissions'; + +export const BASIC_MAX_PAGES_PER_CRAWL = 1; +export const ADVANCED_MAX_PAGES_PER_CRAWL = 25; +export const ADVANCED_CRAWL_PERMISSION = 'USE_ADVANCED_CRAWL'; +export const PLATFORM_OUTPUT_PERMISSION = 'USE_PLATFORM_OUTPUT'; + +export function getSiteEntitlements(user: any) { + const canAdvancedCrawl = hasPermission(user, ADVANCED_CRAWL_PERMISSION); + const canPlatformOutput = hasPermission(user, PLATFORM_OUTPUT_PERMISSION); + + return { + canAdvancedCrawl, + canPlatformOutput, + maxPagesPerCrawl: canAdvancedCrawl + ? ADVANCED_MAX_PAGES_PER_CRAWL + : BASIC_MAX_PAGES_PER_CRAWL, + permissions: { + advancedCrawl: ADVANCED_CRAWL_PERMISSION, + platformOutput: PLATFORM_OUTPUT_PERMISSION, + }, + }; +} diff --git a/frontend/src/pages/sites/analyzer.tsx b/frontend/src/pages/sites/analyzer.tsx index 0055a43..7d3bbf3 100644 --- a/frontend/src/pages/sites/analyzer.tsx +++ b/frontend/src/pages/sites/analyzer.tsx @@ -11,14 +11,64 @@ import LayoutAuthenticated from '../../layouts/Authenticated'; import SectionMain from '../../components/SectionMain'; import SectionTitleLineWithButton from '../../components/SectionTitleLineWithButton'; import { getPageTitle } from '../../config'; +import { getSiteEntitlements } from '../../helpers/siteEntitlements'; import { useAppSelector } from '../../stores/hooks'; +type Entitlements = { + canAdvancedCrawl?: boolean; + canPlatformOutput?: boolean; + maxPagesPerCrawl?: number; + permissions?: { + advancedCrawl?: string; + platformOutput?: string; + }; +}; + type AnalysisPayload = { requestedUrl?: string; analyzedUrl?: string; pageTitle?: string | null; fetchedAt?: string; statusCode?: number; + recommendationCount?: number; + notice?: string; + crawlPlan?: { + requestedPages?: number; + allowedPages?: number; + actualPagesAnalyzed?: number; + advancedCrawlEnabled?: boolean; + provider?: string; + }; + crawlSummary?: { + pagesWithStructuredData?: number; + pagesWithoutStructuredData?: number; + pagesWithInvalidJsonLd?: number; + failedPages?: number; + discoveredInternalPages?: number; + }; + pages?: { + url?: string; + title?: string | null; + statusCode?: number | null; + hasStructuredData?: boolean; + jsonLdTypes?: string[]; + }[]; + failedPages?: { + url?: string; + error?: string; + }[]; + entitlements?: Entitlements; + firecrawl?: { + provider?: string; + enabled?: boolean; + configured?: boolean; + mode?: string; + status?: string; + wouldHandleJavascript?: boolean; + wouldHandleSitemapDiscovery?: boolean; + shouldUseFirecrawlLater?: boolean; + message?: string; + }; platform?: { detected?: string; label?: string; @@ -69,20 +119,31 @@ type ReportResponse = { }; analysis?: AnalysisPayload | null; recommendations?: Recommendation[]; + entitlements?: Entitlements; error?: string; }; +const PLATFORM_OPTIONS = [ + { value: 'wordpress', label: 'WordPress' }, + { value: 'shopify', label: 'Shopify' }, + { value: 'webflow', label: 'Webflow' }, + { value: 'custom', label: 'Custom / Other' }, +]; + const initialReport: ReportResponse | null = null; const SchemaAnalyzerPage = () => { const { currentUser } = useAppSelector((state) => state.auth); const [url, setUrl] = React.useState(''); + const [requestedPages, setRequestedPages] = React.useState(1); + const [selectedPlatform, setSelectedPlatform] = React.useState('wordpress'); const [emailTo, setEmailTo] = React.useState(currentUser?.email || ''); const [report, setReport] = React.useState(initialReport); const [isAnalyzing, setIsAnalyzing] = React.useState(false); const [isExportingAll, setIsExportingAll] = React.useState(false); const [emailingId, setEmailingId] = React.useState(null); const [exportingId, setExportingId] = React.useState(null); + const [isCheckingPlatformOutput, setIsCheckingPlatformOutput] = React.useState(false); React.useEffect(() => { if (currentUser?.email) { @@ -94,10 +155,24 @@ const SchemaAnalyzerPage = () => { toast(message, { type, position: 'bottom-center' }); }, []); + const fallbackEntitlements = React.useMemo( + () => getSiteEntitlements(currentUser), + [currentUser], + ); + const entitlements = report?.entitlements || report?.analysis?.entitlements || fallbackEntitlements; + const maxPagesPerCrawl = entitlements?.maxPagesPerCrawl || 1; const recommendations = report?.recommendations || []; const exportableRecommendations = recommendations.filter( (recommendation) => recommendation.suggested_schema, ); + const isRequestedPagesOverLimit = requestedPages > maxPagesPerCrawl; + const firecrawlStatus = report?.analysis?.firecrawl || { + provider: 'firecrawl', + configured: false, + wouldHandleJavascript: true, + wouldHandleSitemapDiscovery: true, + message: 'Firecrawl scaffold is wired in code, but this environment still needs a FIRECRAWL_API_KEY before activation.', + }; const handleAnalyze = async () => { if (!url.trim()) { @@ -105,15 +180,26 @@ const SchemaAnalyzerPage = () => { return; } + if (isRequestedPagesOverLimit) { + notify( + 'error', + `Your current plan allows up to ${maxPagesPerCrawl} page${maxPagesPerCrawl === 1 ? '' : 's'} per crawl. Upgrade to Advanced Crawl to go beyond that limit.`, + ); + return; + } + try { setIsAnalyzing(true); const response = await axios.post('/sites/analyze', { url: url.trim(), + requestedPages, }); setReport(response.data); if (response.data.error) { notify('error', response.data.error); + } else if (response.data.analysis?.notice) { + notify('info', response.data.analysis.notice); } else { notify('success', 'Site analyzed successfully.'); } @@ -152,7 +238,7 @@ const SchemaAnalyzerPage = () => { }; const parseFilename = (contentDisposition?: string) => { - const match = contentDisposition?.match(/filename="?([^\";]+)"?/i); + const match = contentDisposition?.match(/filename="?([^";]+)"?/i); return match?.[1] || 'schema-export.txt'; }; @@ -238,6 +324,36 @@ const SchemaAnalyzerPage = () => { } }; + const handlePlatformOutputCheck = async () => { + if (!report?.site?.id) { + notify('error', 'Analyze a site first.'); + return; + } + + if (!entitlements?.canPlatformOutput) { + notify('info', 'Premium unlocks Step 4 platform-specific schema output.'); + return; + } + + try { + setIsCheckingPlatformOutput(true); + await axios.post('/sites/export', { + siteId: report.site.id, + outputMode: 'platform', + platform: selectedPlatform, + }, { + responseType: 'blob', + }); + } catch (error: any) { + console.error('Platform output check failed:', error); + notify('info', await extractBlobError(error)); + } finally { + setIsCheckingPlatformOutput(false); + } + }; + + const crawlPlan = report?.analysis?.crawlPlan; + return ( <> @@ -257,8 +373,9 @@ const SchemaAnalyzerPage = () => {

Analyze a customer site

- Enter a domain or full URL. The app will detect the platform, inspect homepage structured data, - generate rules-based schema recommendations, and prepare developer-ready code snippets. + Enter a domain or full URL. The app will detect the platform, crawl up to your allowed page limit, + inspect structured data across the discovered pages, generate rules-based schema recommendations, + and prepare developer-ready code snippets.

@@ -283,12 +400,60 @@ const SchemaAnalyzerPage = () => {
- +
+ + { + const nextValue = Number(event.target.value); + setRequestedPages(Number.isInteger(nextValue) && nextValue > 0 ? nextValue : 1); + }} + /> + + + + + +
+ + {isRequestedPagesOverLimit && ( +
+ You requested {requestedPages} pages, but this account is capped at {maxPagesPerCrawl}. Upgrade to + Advanced Crawl to raise that limit. +
+ )} + + { handleAnalyze().catch(() => null); }} @@ -313,45 +478,139 @@ const SchemaAnalyzerPage = () => { }); }} /> + { + handlePlatformOutputCheck().catch(() => null); + }} + />
-
-

Delivery actions

-

- Export a developer handoff file or email the latest recommendations directly. -

-
- - setEmailTo(event.target.value)} - /> - +
+
+
+
+

Paywall status

+

+ Advanced Crawl is now enforced and active. Premium still reserves Step 4 platform output. Firecrawl is scaffolded for sitemap + JS-rendered crawling, but not activated yet. +

+
+ + {entitlements?.canPlatformOutput ? 'Premium access' : 'Basic access'} + +
+ +
+
+ Advanced crawl entitlement + + {entitlements?.canAdvancedCrawl ? 'Unlocked' : 'Locked'} + +
+
+ Max pages per crawl + {maxPagesPerCrawl} +
+
+ Platform-specific Step 4 output + + {entitlements?.canPlatformOutput ? 'Reserved' : 'Premium only'} + +
+
+
+ +
+
+
+

Firecrawl scaffold

+

+ Sitemap discovery and JS-rendered crawl are planned through Firecrawl. This environment is currently using the built-in crawler only. +

+
+ + {firecrawlStatus?.configured ? 'Key detected' : 'API key needed'} + +
+ +
+
+ Current crawl provider + + {report?.analysis?.crawlPlan?.provider || 'internal'} + +
+
+ Sitemap crawl path + + {firecrawlStatus?.wouldHandleSitemapDiscovery ? 'Scaffolded' : 'Not scaffolded'} + +
+
+ JS-rendered crawl path + + {firecrawlStatus?.wouldHandleJavascript ? 'Scaffolded' : 'Not scaffolded'} + +
+
+ + {firecrawlStatus?.message && ( +
+ {firecrawlStatus.message} +
+ )} +
+ +
+

Delivery actions

+

+ Export a developer handoff file or email the latest recommendations directly. +

+
+ + setEmailTo(event.target.value)} + /> + +
+ + { + handleExportAll().catch(() => null); + }} + /> + { + handleEmailCode().catch(() => null); + }} + /> +
- - { - handleExportAll().catch(() => null); - }} - /> - { - handleEmailCode().catch(() => null); - }} - /> -
@@ -359,51 +618,158 @@ const SchemaAnalyzerPage = () => { {report?.analysis && (
-

Analysis snapshot

-
+
+
+

Site findings

+

+ High-level crawl and structured-data summary from the latest analysis run. +

+
+
+ {report.analysis.platform?.label || 'Unknown platform'} +
+
+ +
-
Platform
-
- {report.analysis.platform?.label || 'Unknown'} -
-
- {report.analysis.platform?.matchedSignals?.join(', ') || 'No strong platform signals found.'} +
Analyzed URL
+
+ {report.analysis.analyzedUrl || report.site?.base_url || '—'}
-
Structured data
-
- {report.analysis.schema?.hasStructuredData ? 'Detected' : 'Not detected'} +
Page title
+
+ {report.analysis.pageTitle || 'No title found'}
-
- JSON-LD: {report.analysis.schema?.jsonLd?.count || 0} • Microdata: {report.analysis.schema?.microdata?.count || 0} • RDFa: {report.analysis.schema?.rdfa?.count || 0} +
+
+
Pages with structured data
+
+ {report.analysis.crawlSummary?.pagesWithStructuredData ?? (report.analysis.schema?.hasStructuredData ? 1 : 0)} +
+
+
+
JSON-LD blocks found
+
+ {report.analysis.schema?.jsonLd?.count || 0}
-
-
- Requested URL:{' '} - {report.analysis.requestedUrl || '—'} -
-
- Analyzed URL:{' '} - {report.analysis.analyzedUrl || '—'} -
-
- Page title:{' '} - {report.analysis.pageTitle || '—'} -
-
- JSON-LD types found:{' '} - {(report.analysis.schema?.jsonLd?.types || []).join(', ') || 'None'} -
- {report.analysis.error && ( -
- {report.analysis.error} + {crawlPlan && ( +
+
Crawl summary
+
+ Requested pages: {crawlPlan.requestedPages || 1} + Plan limit: {crawlPlan.allowedPages || maxPagesPerCrawl} + Pages analyzed: {crawlPlan.actualPagesAnalyzed || 0}
- )} -
+ {report.analysis.notice &&
{report.analysis.notice}
} +
+ )} + + {report.analysis.crawlSummary && ( +
+
+
Pages without structured data
+
+ {report.analysis.crawlSummary.pagesWithoutStructuredData ?? 0} +
+
+
+
Failed page fetches
+
+ {report.analysis.crawlSummary.failedPages ?? 0} +
+
+
+ )} + + {(report.analysis.pages || []).length > 0 && ( +
+
Analyzed pages
+
+ {(report.analysis.pages || []).map((page) => ( +
+
{page.url}
+
+ {page.title || 'Untitled page'} +
+
+ + Status {page.statusCode || '—'} + + + {page.hasStructuredData ? 'Structured data found' : 'No structured data'} + + {(page.jsonLdTypes || []).slice(0, 3).map((typeName) => ( + + {typeName} + + ))} +
+
+ ))} +
+
+ )} + + {(report.analysis.failedPages || []).length > 0 && ( +
+
Some internal pages could not be fetched
+
    + {(report.analysis.failedPages || []).map((page) => ( +
  • + {page.url}: {page.error} +
  • + ))} +
+
+ )} + + {(report.analysis.schema?.jsonLd?.types || []).length > 0 && ( +
+
Detected JSON-LD types
+
+ {(report.analysis.schema?.jsonLd?.types || []).map((typeName) => ( + + {typeName} + + ))} +
+
+ )} + + {(report.analysis.schema?.jsonLd?.invalidBlocks || []).length > 0 && ( +
+
Invalid JSON-LD detected
+
    + {(report.analysis.schema?.jsonLd?.invalidBlocks || []).map((block) => ( +
  • + Block {block.index + 1}: {block.message} +
  • + ))} +
+
+ )} + + {report.analysis.error && ( +
+ {report.analysis.error} +
+ )}