Autosave: 20260414-164650

This commit is contained in:
Flatlogic Bot 2026-04-14 16:46:48 +00:00
parent b9a2103bb3
commit fa68f426aa
7 changed files with 1346 additions and 137 deletions

View File

@ -0,0 +1,96 @@
const { v4: uuid } = require('uuid');
const PERMISSION_NAMES = [
'USE_ADVANCED_CRAWL',
'USE_PLATFORM_OUTPUT',
];
module.exports = {
async up(queryInterface) {
const transaction = await queryInterface.sequelize.transaction();
try {
const createdAt = new Date();
const updatedAt = createdAt;
const existingPermissions = await queryInterface.sequelize.query(
'SELECT id, name FROM permissions WHERE name IN (:permissionNames);',
{
replacements: { permissionNames: PERMISSION_NAMES },
transaction,
type: queryInterface.sequelize.QueryTypes.SELECT,
},
);
const existingPermissionNames = new Set(
existingPermissions.map((permission) => permission.name),
);
const missingPermissions = PERMISSION_NAMES
.filter((permissionName) => !existingPermissionNames.has(permissionName))
.map((permissionName) => ({
id: uuid(),
name: permissionName,
createdAt,
updatedAt,
}));
if (missingPermissions.length > 0) {
await queryInterface.bulkInsert('permissions', missingPermissions, {
transaction,
});
}
await transaction.commit();
} catch (error) {
await transaction.rollback();
throw error;
}
},
async down(queryInterface) {
const transaction = await queryInterface.sequelize.transaction();
try {
const permissions = await queryInterface.sequelize.query(
'SELECT id FROM permissions WHERE name IN (:permissionNames);',
{
replacements: { permissionNames: PERMISSION_NAMES },
transaction,
type: queryInterface.sequelize.QueryTypes.SELECT,
},
);
if (permissions.length > 0) {
await queryInterface.bulkDelete(
'rolesPermissionsPermissions',
{
permissionId: permissions.map((permission) => permission.id),
},
{ transaction },
);
await queryInterface.bulkDelete(
'usersCustom_permissionsPermissions',
{
permissionId: permissions.map((permission) => permission.id),
},
{ transaction },
);
await queryInterface.bulkDelete(
'permissions',
{
id: permissions.map((permission) => permission.id),
},
{ transaction },
);
}
await transaction.commit();
} catch (error) {
await transaction.rollback();
throw error;
}
},
};

View File

@ -0,0 +1,130 @@
const ROLE_NAME = 'Administrator';
const PERMISSION_NAMES = [
'USE_ADVANCED_CRAWL',
'USE_PLATFORM_OUTPUT',
];
module.exports = {
async up(queryInterface) {
const transaction = await queryInterface.sequelize.transaction();
try {
const createdAt = new Date();
const updatedAt = createdAt;
const role = await queryInterface.sequelize.query(
'SELECT id FROM roles WHERE name = :roleName LIMIT 1;',
{
replacements: { roleName: ROLE_NAME },
transaction,
type: queryInterface.sequelize.QueryTypes.SELECT,
},
);
if (!role[0]) {
await transaction.commit();
return;
}
const permissions = await queryInterface.sequelize.query(
'SELECT id FROM permissions WHERE name IN (:permissionNames);',
{
replacements: { permissionNames: PERMISSION_NAMES },
transaction,
type: queryInterface.sequelize.QueryTypes.SELECT,
},
);
if (permissions.length === 0) {
await transaction.commit();
return;
}
const existingPairs = await queryInterface.sequelize.query(
`SELECT "permissionId"
FROM "rolesPermissionsPermissions"
WHERE "roles_permissionsId" = :roleId
AND "permissionId" IN (:permissionIds);`,
{
replacements: {
roleId: role[0].id,
permissionIds: permissions.map((permission) => permission.id),
},
transaction,
type: queryInterface.sequelize.QueryTypes.SELECT,
},
);
const existingPermissionIds = new Set(
existingPairs.map((pair) => pair.permissionId),
);
const missingPairs = permissions
.filter((permission) => !existingPermissionIds.has(permission.id))
.map((permission) => ({
createdAt,
updatedAt,
roles_permissionsId: role[0].id,
permissionId: permission.id,
}));
if (missingPairs.length > 0) {
await queryInterface.bulkInsert(
'rolesPermissionsPermissions',
missingPairs,
{ transaction },
);
}
await transaction.commit();
} catch (error) {
await transaction.rollback();
throw error;
}
},
async down(queryInterface) {
const transaction = await queryInterface.sequelize.transaction();
try {
const role = await queryInterface.sequelize.query(
'SELECT id FROM roles WHERE name = :roleName LIMIT 1;',
{
replacements: { roleName: ROLE_NAME },
transaction,
type: queryInterface.sequelize.QueryTypes.SELECT,
},
);
if (!role[0]) {
await transaction.commit();
return;
}
const permissions = await queryInterface.sequelize.query(
'SELECT id FROM permissions WHERE name IN (:permissionNames);',
{
replacements: { permissionNames: PERMISSION_NAMES },
transaction,
type: queryInterface.sequelize.QueryTypes.SELECT,
},
);
if (permissions.length > 0) {
await queryInterface.bulkDelete(
'rolesPermissionsPermissions',
{
roles_permissionsId: role[0].id,
permissionId: permissions.map((permission) => permission.id),
},
{ transaction },
);
}
await transaction.commit();
} catch (error) {
await transaction.rollback();
throw error;
}
},
};

View File

@ -0,0 +1,63 @@
const FIRECRAWL_DEFAULT_BASE_URL = 'https://api.firecrawl.dev/v1';
function toBoolean(value, defaultValue = false) {
if (value === undefined || value === null || value === '') {
return defaultValue;
}
if (typeof value === 'boolean') {
return value;
}
const normalizedValue = String(value).trim().toLowerCase();
if (['1', 'true', 'yes', 'on'].includes(normalizedValue)) {
return true;
}
if (['0', 'false', 'no', 'off'].includes(normalizedValue)) {
return false;
}
return defaultValue;
}
function getFirecrawlRuntime() {
const apiKey = String(process.env.FIRECRAWL_API_KEY || '').trim();
const baseUrl = String(
process.env.FIRECRAWL_BASE_URL || FIRECRAWL_DEFAULT_BASE_URL,
).trim();
const enabled = toBoolean(process.env.FIRECRAWL_ENABLED, true);
return {
provider: 'firecrawl',
baseUrl,
enabled,
configured: Boolean(apiKey),
hasApiKey: Boolean(apiKey),
mode: 'scaffold_only',
};
}
function getFirecrawlScaffold({ requestedPages, entitlements } = {}) {
const runtime = getFirecrawlRuntime();
const wantsAdvancedCrawl = Number(requestedPages || 1) > 1;
const advancedCrawlUnlocked = Boolean(entitlements?.canAdvancedCrawl);
const shouldUseFirecrawlLater = runtime.enabled && (wantsAdvancedCrawl || advancedCrawlUnlocked);
return {
...runtime,
status: runtime.configured ? 'ready_for_activation' : 'awaiting_api_key',
wouldHandleJavascript: true,
wouldHandleSitemapDiscovery: true,
shouldUseFirecrawlLater,
message: runtime.configured
? 'Firecrawl scaffold is wired and ready for the next activation step, but this analyzer still uses the built-in crawler today.'
: 'Firecrawl scaffold is wired, but FIRECRAWL_API_KEY is not set yet. The analyzer still uses the built-in crawler for now.',
};
}
module.exports = {
getFirecrawlRuntime,
getFirecrawlScaffold,
};

View File

@ -0,0 +1,104 @@
const ValidationError = require('./notifications/errors/validation');
const BASIC_MAX_PAGES_PER_CRAWL = 1;
const ADVANCED_MAX_PAGES_PER_CRAWL = 25;
const ADVANCED_CRAWL_PERMISSION = 'USE_ADVANCED_CRAWL';
const PLATFORM_OUTPUT_PERMISSION = 'USE_PLATFORM_OUTPUT';
function getPermissionNames(currentUser) {
return new Set([
...((currentUser?.custom_permissions || []).map((permission) => permission.name)),
...((currentUser?.app_role_permissions || []).map((permission) => permission.name)),
]);
}
function isAdministrator(currentUser) {
return currentUser?.app_role?.name === 'Administrator';
}
function hasEntitlement(currentUser, permissionName) {
if (!permissionName) {
return false;
}
if (isAdministrator(currentUser)) {
return true;
}
return getPermissionNames(currentUser).has(permissionName);
}
function getSiteEntitlements(currentUser) {
const canAdvancedCrawl = hasEntitlement(currentUser, ADVANCED_CRAWL_PERMISSION);
const canPlatformOutput = hasEntitlement(currentUser, PLATFORM_OUTPUT_PERMISSION);
return {
canAdvancedCrawl,
canPlatformOutput,
maxPagesPerCrawl: canAdvancedCrawl
? ADVANCED_MAX_PAGES_PER_CRAWL
: BASIC_MAX_PAGES_PER_CRAWL,
permissions: {
advancedCrawl: ADVANCED_CRAWL_PERMISSION,
platformOutput: PLATFORM_OUTPUT_PERMISSION,
},
};
}
function parseRequestedPages(rawRequestedPages) {
if (
rawRequestedPages === undefined
|| rawRequestedPages === null
|| rawRequestedPages === ''
) {
return 1;
}
const parsed = Number(rawRequestedPages);
if (!Number.isInteger(parsed) || parsed < 1) {
throw new ValidationError('errors.validation.message');
}
return parsed;
}
function ensureRequestedPagesAllowed(requestedPages, currentUser) {
const entitlements = getSiteEntitlements(currentUser);
if (requestedPages > entitlements.maxPagesPerCrawl) {
const error = new Error(
`Your current plan allows up to ${entitlements.maxPagesPerCrawl} page${entitlements.maxPagesPerCrawl === 1 ? '' : 's'} per crawl. Upgrade to Advanced Crawl to analyze ${requestedPages} pages.`,
);
error.code = 403;
throw error;
}
return entitlements;
}
function ensurePlatformOutputAllowed(currentUser, platform) {
const entitlements = getSiteEntitlements(currentUser);
if (!entitlements.canPlatformOutput) {
const error = new Error(
`Platform-specific schema output${platform ? ` for ${platform}` : ''} is part of the Premium plan. Upgrade to unlock Step 4 code generation.`,
);
error.code = 403;
throw error;
}
return entitlements;
}
module.exports = {
BASIC_MAX_PAGES_PER_CRAWL,
ADVANCED_MAX_PAGES_PER_CRAWL,
ADVANCED_CRAWL_PERMISSION,
PLATFORM_OUTPUT_PERMISSION,
getSiteEntitlements,
hasEntitlement,
parseRequestedPages,
ensureRequestedPagesAllowed,
ensurePlatformOutputAllowed,
};

View File

@ -2,9 +2,18 @@ const axios = require('axios');
const db = require('../db/models');
const ValidationError = require('./notifications/errors/validation');
const EmailSender = require('./email');
const {
getSiteEntitlements,
parseRequestedPages,
ensureRequestedPagesAllowed,
ensurePlatformOutputAllowed,
} = require('./siteEntitlements');
const { getFirecrawlScaffold } = require('./firecrawl');
const REQUEST_TIMEOUT = 15000;
const PREVIEW_LIMIT = 5;
const PAGE_PREVIEW_LIMIT = 10;
const NON_HTML_FILE_PATTERN = /\.(?:7z|avi|bmp|css|csv|docx?|eot|gif|ico|jpe?g|js|json|map|mov|mp3|mp4|pdf|png|pptx?|rar|svg|tar|tgz|txt|wav|webm|webp|woff2?|xlsx?|xml|zip)$/i;
function normalizeUrl(rawUrl) {
if (!rawUrl || typeof rawUrl !== 'string') {
@ -229,7 +238,383 @@ function detectPlatform(html, headers, analyzedUrl) {
};
}
function buildFailureAnalysis(normalizedUrl, error) {
function isHtmlLikeResponse(response) {
const contentType = String(response?.headers?.['content-type'] || '').toLowerCase();
if (!contentType) {
return true;
}
return (
contentType.includes('text/html')
|| contentType.includes('application/xhtml+xml')
);
}
function normalizeAllowedHostnames(allowedHostnames) {
if (allowedHostnames instanceof Set) {
return new Set(
Array.from(allowedHostnames).map((hostname) => String(hostname).toLowerCase()),
);
}
if (Array.isArray(allowedHostnames)) {
return new Set(
allowedHostnames.map((hostname) => String(hostname).toLowerCase()),
);
}
if (allowedHostnames) {
return new Set([String(allowedHostnames).toLowerCase()]);
}
return new Set();
}
function normalizeCrawlUrl(rawUrl, parentUrl, allowedHostnames) {
if (!rawUrl || typeof rawUrl !== 'string') {
return null;
}
const trimmedUrl = rawUrl.trim();
if (
!trimmedUrl
|| trimmedUrl.startsWith('#')
|| /^mailto:/i.test(trimmedUrl)
|| /^tel:/i.test(trimmedUrl)
|| /^javascript:/i.test(trimmedUrl)
|| /^data:/i.test(trimmedUrl)
) {
return null;
}
let parsedUrl;
try {
parsedUrl = new URL(trimmedUrl, parentUrl);
} catch (error) {
return null;
}
if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
return null;
}
const allowedHostnamesSet = normalizeAllowedHostnames(allowedHostnames);
if (
allowedHostnamesSet.size > 0
&& !allowedHostnamesSet.has(parsedUrl.hostname.toLowerCase())
) {
return null;
}
if (NON_HTML_FILE_PATTERN.test(parsedUrl.pathname)) {
return null;
}
parsedUrl.hash = '';
return normalizeUrl(parsedUrl.toString());
}
function extractInternalLinks(html, pageUrl, allowedHostnames) {
const matches = [
...String(html || '').matchAll(/<a\s[^>]*href=(?:"([^"]+)"|'([^']+)'|([^\s>]+))/gi),
];
return Array.from(
new Set(
matches
.map((match) => match[1] || match[2] || match[3] || '')
.map((href) => normalizeCrawlUrl(href, pageUrl, allowedHostnames))
.filter(Boolean),
),
);
}
function summarizeCrawlError(error) {
if (axios.isAxiosError(error)) {
if (error.response) {
return `Request failed with status ${error.response.status}`;
}
return error.message;
}
return error.message || 'Unknown crawl error';
}
async function fetchAnalyzedPage(pageUrl, allowedHostnames) {
const response = await axios.get(pageUrl, {
timeout: REQUEST_TIMEOUT,
maxRedirects: 5,
responseType: 'text',
headers: {
Accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'User-Agent':
'Mozilla/5.0 (compatible; SchemaCrawlerBot/1.0; +https://flatlogic.com)',
},
});
if (!isHtmlLikeResponse(response)) {
const error = new Error('Skipped non-HTML response during crawl.');
error.code = 415;
throw error;
}
const analyzedUrl =
response.request?.res?.responseUrl || response.config?.url || pageUrl;
const normalizedAnalyzedUrl = normalizeUrl(analyzedUrl);
const normalizedAllowedHostnames = normalizeAllowedHostnames(allowedHostnames);
const analyzedHostname = new URL(normalizedAnalyzedUrl).hostname.toLowerCase();
normalizedAllowedHostnames.add(analyzedHostname);
if (allowedHostnames instanceof Set) {
allowedHostnames.add(analyzedHostname);
}
const html = typeof response.data === 'string' ? response.data : '';
const pageTitle = extractPageTitle(html);
const platform = detectPlatform(html, response.headers, normalizedAnalyzedUrl);
const schema = extractSchemaSummary(html);
const pageSignals = inferPageSignals(
html,
normalizedAnalyzedUrl,
pageTitle,
platform,
);
return {
requestedUrl: pageUrl,
analyzedUrl: normalizedAnalyzedUrl,
pageTitle,
statusCode: response.status,
html,
platform,
schema,
pageSignals,
discoveredLinks: extractInternalLinks(
html,
normalizedAnalyzedUrl,
normalizedAllowedHostnames,
),
};
}
async function crawlPages(baseUrl, requestedPages) {
const normalizedBaseUrl = normalizeUrl(baseUrl);
const allowedHostnames = new Set([new URL(normalizedBaseUrl).hostname.toLowerCase()]);
const visitedUrls = new Set();
const queuedUrls = new Set([normalizedBaseUrl]);
const pendingUrls = [normalizedBaseUrl];
const pages = [];
const failedPages = [];
let discoveredInternalPages = 0;
while (pendingUrls.length > 0 && pages.length < requestedPages) {
const nextUrl = pendingUrls.shift();
if (!nextUrl || visitedUrls.has(nextUrl)) {
continue;
}
visitedUrls.add(nextUrl);
try {
const page = await fetchAnalyzedPage(nextUrl, allowedHostnames);
visitedUrls.add(page.analyzedUrl);
queuedUrls.add(page.analyzedUrl);
pages.push(page);
page.discoveredLinks.forEach((linkUrl) => {
if (!visitedUrls.has(linkUrl) && !queuedUrls.has(linkUrl)) {
queuedUrls.add(linkUrl);
pendingUrls.push(linkUrl);
discoveredInternalPages += 1;
}
});
} catch (error) {
failedPages.push({
url: nextUrl,
error: summarizeCrawlError(error),
});
}
}
return {
pages,
failedPages,
discoveredInternalPages,
};
}
function buildAggregateSchema(pageAnalyses) {
const jsonLdTypes = new Set();
const invalidBlocks = [];
let jsonLdCount = 0;
let microdataCount = 0;
let rdfaCount = 0;
pageAnalyses.forEach((page) => {
const schema = page.schema || {};
jsonLdCount += schema.jsonLd?.count || 0;
microdataCount += schema.microdata?.count || 0;
rdfaCount += schema.rdfa?.count || 0;
(schema.jsonLd?.types || []).forEach((typeName) => jsonLdTypes.add(typeName));
(schema.jsonLd?.invalidBlocks || []).forEach((block) => {
invalidBlocks.push({
...block,
url: page.analyzedUrl,
});
});
});
return {
hasStructuredData: pageAnalyses.some((page) => page.schema?.hasStructuredData),
jsonLd: {
count: jsonLdCount,
types: Array.from(jsonLdTypes),
invalidBlocks,
},
microdata: {
count: microdataCount,
detected: microdataCount > 0,
},
rdfa: {
count: rdfaCount,
detected: rdfaCount > 0,
},
};
}
function buildAggregateSignals(pageAnalyses) {
return pageAnalyses.reduce((accumulator, page) => {
const pageSignals = page.pageSignals || {};
return {
hasFaqHints: accumulator.hasFaqHints || Boolean(pageSignals.hasFaqHints),
hasBlogHints: accumulator.hasBlogHints || Boolean(pageSignals.hasBlogHints),
hasProductHints: accumulator.hasProductHints || Boolean(pageSignals.hasProductHints),
hasLocalBusinessHints:
accumulator.hasLocalBusinessHints || Boolean(pageSignals.hasLocalBusinessHints),
faqPages: accumulator.faqPages + (pageSignals.hasFaqHints ? 1 : 0),
blogPages: accumulator.blogPages + (pageSignals.hasBlogHints ? 1 : 0),
productPages: accumulator.productPages + (pageSignals.hasProductHints ? 1 : 0),
localBusinessPages:
accumulator.localBusinessPages + (pageSignals.hasLocalBusinessHints ? 1 : 0),
};
}, {
hasFaqHints: false,
hasBlogHints: false,
hasProductHints: false,
hasLocalBusinessHints: false,
faqPages: 0,
blogPages: 0,
productPages: 0,
localBusinessPages: 0,
});
}
function buildCrawlNotice({
requestedPages,
actualPagesAnalyzed,
failedPages,
discoveredInternalPages,
firecrawl,
}) {
if (requestedPages <= 1) {
return null;
}
const parts = [
`Advanced crawl analyzed ${actualPagesAnalyzed} of ${requestedPages} requested page${requestedPages === 1 ? '' : 's'}.`,
];
if (discoveredInternalPages + 1 < requestedPages) {
parts.push('Fewer crawlable internal HTML pages were discovered than requested.');
}
if (failedPages > 0) {
parts.push(`${failedPages} page${failedPages === 1 ? '' : 's'} could not be fetched during the crawl.`);
}
if (firecrawl?.message) {
parts.push(firecrawl.message);
}
return parts.join(' ');
}
function buildAggregateAnalysis({
normalizedUrl,
pageAnalyses,
requestedPages,
entitlements,
discoveredInternalPages,
failedPages,
firecrawl,
}) {
const homepage = pageAnalyses[0];
const finishedAt = new Date();
const aggregateSchema = buildAggregateSchema(pageAnalyses);
const aggregateSignals = buildAggregateSignals(pageAnalyses);
return {
requestedUrl: normalizedUrl,
analyzedUrl: homepage?.analyzedUrl || normalizedUrl,
pageTitle: homepage?.pageTitle || null,
fetchedAt: finishedAt.toISOString(),
statusCode: homepage?.statusCode || null,
platform: homepage?.platform || {
detected: 'unknown',
label: 'Unknown',
matchedSignals: [],
},
schema: aggregateSchema,
recommendationCount: 0,
crawlPlan: {
requestedPages,
allowedPages: entitlements.maxPagesPerCrawl,
actualPagesAnalyzed: pageAnalyses.length,
advancedCrawlEnabled: entitlements.canAdvancedCrawl,
provider: 'internal',
},
crawlSummary: {
pagesWithStructuredData: pageAnalyses.filter((page) => page.schema?.hasStructuredData).length,
pagesWithoutStructuredData: pageAnalyses.filter((page) => !page.schema?.hasStructuredData).length,
pagesWithInvalidJsonLd: pageAnalyses.filter(
(page) => (page.schema?.jsonLd?.invalidBlocks || []).length > 0,
).length,
failedPages: failedPages.length,
discoveredInternalPages,
},
pages: pageAnalyses.slice(0, PAGE_PREVIEW_LIMIT).map((page) => ({
url: page.analyzedUrl,
title: page.pageTitle,
statusCode: page.statusCode,
hasStructuredData: Boolean(page.schema?.hasStructuredData),
jsonLdTypes: page.schema?.jsonLd?.types || [],
})),
failedPages: failedPages.slice(0, PAGE_PREVIEW_LIMIT),
aggregateSignals,
entitlements,
firecrawl,
notice: buildCrawlNotice({
requestedPages,
actualPagesAnalyzed: pageAnalyses.length,
failedPages: failedPages.length,
discoveredInternalPages,
firecrawl,
}),
finishedAt,
};
}
function buildFailureAnalysis(normalizedUrl, error, firecrawl) {
const isAxiosError = axios.isAxiosError(error);
return {
@ -247,6 +632,7 @@ function buildFailureAnalysis(normalizedUrl, error) {
microdata: { count: 0, detected: false },
rdfa: { count: 0, detected: false },
},
firecrawl,
error: isAxiosError
? error.response
? `Request failed with status ${error.response.status}`
@ -445,22 +831,25 @@ function buildRecommendationCode({ baseUrl, siteName, schemaType, pageScope }) {
});
}
function buildRecommendations({ baseUrl, siteName, analysis, html }) {
function buildRecommendations({ baseUrl, siteName, analysis, html, pageAnalyses = [] }) {
const recommendationList = [];
const schemaTypes = analysis?.schema?.jsonLd?.types || [];
const pageSignals = inferPageSignals(
html,
analysis?.analyzedUrl,
analysis?.pageTitle,
analysis?.platform || {},
);
const aggregateSignals = analysis?.aggregateSignals || {};
const pageSignals = pageAnalyses.length > 0
? aggregateSignals
: inferPageSignals(
html,
analysis?.analyzedUrl,
analysis?.pageTitle,
analysis?.platform || {},
);
if ((analysis?.schema?.jsonLd?.invalidBlocks || []).length > 0) {
recommendationList.push({
title: 'Fix invalid JSON-LD blocks already on the homepage',
title: 'Fix invalid JSON-LD blocks already on the analyzed pages',
recommendation_type: 'fix_existing',
schema_type: 'JSON-LD',
page_scope: 'homepage',
page_scope: pageAnalyses.length > 1 ? 'crawl-wide' : 'homepage',
priority: 'high',
reason:
'At least one JSON-LD block could not be parsed. Invalid structured data can prevent search engines from using your markup.',
@ -524,7 +913,9 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) {
page_scope: 'internal-pages',
priority: 'medium',
reason:
'Breadcrumb schema helps search engines understand content hierarchy and page relationships.',
pageAnalyses.length > 1
? 'Multiple internal pages were analyzed without BreadcrumbList schema, so search engines may miss content hierarchy and page relationships.'
: 'Breadcrumb schema helps search engines understand content hierarchy and page relationships.',
expected_impact:
'Can improve result presentation and site structure understanding.',
suggested_schema: buildRecommendationCode({
@ -537,8 +928,8 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) {
}
if (
pageSignals.hasProductHints &&
!hasSchemaType(schemaTypes, ['Product'])
pageSignals.hasProductHints
&& !hasSchemaType(schemaTypes, ['Product'])
) {
recommendationList.push({
title: 'Add Product schema on product detail pages',
@ -547,7 +938,9 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) {
page_scope: 'product-pages',
priority: 'high',
reason:
'The site shows product/ecommerce signals, but no Product schema was detected on the analyzed page.',
pageAnalyses.length > 1
? `Product or ecommerce signals appeared across ${pageSignals.productPages || 1} analyzed page${(pageSignals.productPages || 1) === 1 ? '' : 's'}, but no Product schema was detected.`
: 'The site shows product/ecommerce signals, but no Product schema was detected on the analyzed page.',
expected_impact:
'Improves eligibility for product-rich search experiences and helps AI systems interpret commercial details.',
suggested_schema: buildRecommendationCode({
@ -560,8 +953,8 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) {
}
if (
pageSignals.hasBlogHints &&
!hasSchemaType(schemaTypes, ['Article', 'BlogPosting', 'NewsArticle'])
pageSignals.hasBlogHints
&& !hasSchemaType(schemaTypes, ['Article', 'BlogPosting', 'NewsArticle'])
) {
recommendationList.push({
title: 'Add BlogPosting schema on editorial content',
@ -570,7 +963,9 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) {
page_scope: 'article-pages',
priority: 'medium',
reason:
'The site appears to publish editorial content, but article-level schema was not detected.',
pageAnalyses.length > 1
? `Editorial or blog signals appeared across ${pageSignals.blogPages || 1} analyzed page${(pageSignals.blogPages || 1) === 1 ? '' : 's'}, but article-level schema was not detected.`
: 'The site appears to publish editorial content, but article-level schema was not detected.',
expected_impact:
'Clarifies content ownership, publication dates, and headline structure for search engines and answer engines.',
suggested_schema: buildRecommendationCode({
@ -583,8 +978,8 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) {
}
if (
pageSignals.hasFaqHints &&
!hasSchemaType(schemaTypes, ['FAQPage'])
pageSignals.hasFaqHints
&& !hasSchemaType(schemaTypes, ['FAQPage'])
) {
recommendationList.push({
title: 'Add FAQPage schema where FAQs are published',
@ -593,7 +988,9 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) {
page_scope: 'faq-pages',
priority: 'medium',
reason:
'FAQ content signals were found, but the page is not marked up as an FAQPage.',
pageAnalyses.length > 1
? `FAQ-style content signals appeared across ${pageSignals.faqPages || 1} analyzed page${(pageSignals.faqPages || 1) === 1 ? '' : 's'}, but FAQPage schema was not detected.`
: 'FAQ content signals were found, but the page is not marked up as an FAQPage.',
expected_impact:
'Makes question-and-answer content easier to parse and reuse in search and AI contexts.',
suggested_schema: buildRecommendationCode({
@ -678,6 +1075,7 @@ async function buildStoredReport(siteId, currentUser) {
crawl: crawl ? crawl.get({ plain: true }) : null,
analysis: crawl ? parseSummary(crawl.summary) : null,
recommendations: recommendations.map((item) => item.get({ plain: true })),
entitlements: getSiteEntitlements(currentUser),
};
}
@ -722,7 +1120,10 @@ module.exports = class SitesService {
static async analyzeHomepage(data, currentUser) {
ensureCurrentUser(currentUser);
const requestedPages = parseRequestedPages(data?.requestedPages);
const entitlements = ensureRequestedPagesAllowed(requestedPages, currentUser);
const normalizedUrl = normalizeUrl(data?.url || data?.base_url);
const firecrawl = getFirecrawlScaffold({ requestedPages, entitlements });
const requestedName =
typeof data?.name === 'string' && data.name.trim()
? data.name.trim()
@ -789,46 +1190,37 @@ module.exports = class SitesService {
}
try {
const response = await axios.get(normalizedUrl, {
timeout: REQUEST_TIMEOUT,
maxRedirects: 5,
responseType: 'text',
headers: {
Accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'User-Agent':
'Mozilla/5.0 (compatible; SchemaCrawlerBot/1.0; +https://flatlogic.com)',
},
});
const crawlResult = await crawlPages(normalizedUrl, requestedPages);
const pageAnalyses = crawlResult.pages;
const analyzedUrl =
response.request?.res?.responseUrl || response.config?.url || normalizedUrl;
const html = typeof response.data === 'string' ? response.data : '';
const pageTitle = extractPageTitle(html);
const platform = detectPlatform(html, response.headers, analyzedUrl);
const schema = extractSchemaSummary(html);
const finishedAt = new Date();
if (pageAnalyses.length === 0) {
const firstFailure = crawlResult.failedPages[0];
const error = new Error(firstFailure?.error || 'Site analysis failed.');
error.code = 400;
throw error;
}
const analysis = buildAggregateAnalysis({
normalizedUrl,
pageAnalyses,
requestedPages,
entitlements,
discoveredInternalPages: crawlResult.discoveredInternalPages,
failedPages: crawlResult.failedPages,
firecrawl,
});
const homepage = pageAnalyses[0];
const finishedAt = analysis.finishedAt;
const recommendations = buildRecommendations({
baseUrl: normalizedUrl,
siteName: requestedName || pageTitle || defaultName,
analysis: {
analyzedUrl,
pageTitle,
platform,
schema,
},
html,
siteName: requestedName || homepage.pageTitle || defaultName,
analysis,
html: homepage.html,
pageAnalyses,
});
const analysis = {
requestedUrl: normalizedUrl,
analyzedUrl,
pageTitle,
fetchedAt: finishedAt.toISOString(),
statusCode: response.status,
platform,
schema,
recommendationCount: recommendations.length,
};
analysis.recommendationCount = recommendations.length;
delete analysis.finishedAt;
const finalizeTransaction = await db.sequelize.transaction();
let updatedSite;
@ -845,8 +1237,8 @@ module.exports = class SitesService {
await updatedSite.update(
{
name: updatedSite.name || requestedName || pageTitle || defaultName,
detected_platform: platform.detected,
name: updatedSite.name || requestedName || homepage.pageTitle || defaultName,
detected_platform: homepage.platform.detected,
crawl_status: 'completed',
last_crawled_at: finishedAt,
updatedById: currentUser.id,
@ -860,7 +1252,7 @@ module.exports = class SitesService {
{
status: 'completed',
finished_at: finishedAt,
pages_scanned: 1,
pages_scanned: pageAnalyses.length,
summary: JSON.stringify(analysis),
updatedById: currentUser.id,
},
@ -894,11 +1286,12 @@ module.exports = class SitesService {
crawl: updatedCrawl.get({ plain: true }),
analysis,
recommendations: storedRecommendations.map((item) => item.get({ plain: true })),
entitlements,
};
} catch (error) {
console.error('Site analysis failed:', error);
const failureAnalysis = buildFailureAnalysis(normalizedUrl, error);
const failureAnalysis = buildFailureAnalysis(normalizedUrl, error, firecrawl);
const failedAt = new Date();
const failureTransaction = await db.sequelize.transaction();
let failedSite;
@ -943,13 +1336,28 @@ module.exports = class SitesService {
throw updateError;
}
if (error instanceof ValidationError || axios.isAxiosError(error)) {
if (
error instanceof ValidationError
|| axios.isAxiosError(error)
|| [400, 403, 404, 415].includes(error.code)
) {
return {
site: failedSite.get({ plain: true }),
crawl: failedCrawl.get({ plain: true }),
analysis: failureAnalysis,
analysis: {
...failureAnalysis,
crawlPlan: {
requestedPages,
allowedPages: entitlements.maxPagesPerCrawl,
actualPagesAnalyzed: 0,
advancedCrawlEnabled: entitlements.canAdvancedCrawl,
provider: 'internal',
},
entitlements,
},
recommendations: [],
error: failureAnalysis.error,
entitlements,
};
}
@ -964,6 +1372,25 @@ module.exports = class SitesService {
static async exportCode(data, currentUser) {
ensureCurrentUser(currentUser);
const outputMode = typeof data?.outputMode === 'string'
? data.outputMode.trim().toLowerCase()
: 'generic';
const platform = typeof data?.platform === 'string'
? data.platform.trim()
: '';
if (outputMode === 'platform') {
ensurePlatformOutputAllowed(currentUser, platform);
const error = new Error(
'Premium platform-specific schema output'
+ (platform ? ' for ' + platform : '')
+ ' is enabled for this user, but the Step 4 generator is not connected yet.',
);
error.code = 400;
throw error;
}
const { recommendationId, siteId } = data || {};
if (recommendationId) {

View File

@ -0,0 +1,23 @@
import { hasPermission } from './userPermissions';
export const BASIC_MAX_PAGES_PER_CRAWL = 1;
export const ADVANCED_MAX_PAGES_PER_CRAWL = 25;
export const ADVANCED_CRAWL_PERMISSION = 'USE_ADVANCED_CRAWL';
export const PLATFORM_OUTPUT_PERMISSION = 'USE_PLATFORM_OUTPUT';
export function getSiteEntitlements(user: any) {
const canAdvancedCrawl = hasPermission(user, ADVANCED_CRAWL_PERMISSION);
const canPlatformOutput = hasPermission(user, PLATFORM_OUTPUT_PERMISSION);
return {
canAdvancedCrawl,
canPlatformOutput,
maxPagesPerCrawl: canAdvancedCrawl
? ADVANCED_MAX_PAGES_PER_CRAWL
: BASIC_MAX_PAGES_PER_CRAWL,
permissions: {
advancedCrawl: ADVANCED_CRAWL_PERMISSION,
platformOutput: PLATFORM_OUTPUT_PERMISSION,
},
};
}

View File

@ -11,14 +11,64 @@ import LayoutAuthenticated from '../../layouts/Authenticated';
import SectionMain from '../../components/SectionMain';
import SectionTitleLineWithButton from '../../components/SectionTitleLineWithButton';
import { getPageTitle } from '../../config';
import { getSiteEntitlements } from '../../helpers/siteEntitlements';
import { useAppSelector } from '../../stores/hooks';
type Entitlements = {
canAdvancedCrawl?: boolean;
canPlatformOutput?: boolean;
maxPagesPerCrawl?: number;
permissions?: {
advancedCrawl?: string;
platformOutput?: string;
};
};
type AnalysisPayload = {
requestedUrl?: string;
analyzedUrl?: string;
pageTitle?: string | null;
fetchedAt?: string;
statusCode?: number;
recommendationCount?: number;
notice?: string;
crawlPlan?: {
requestedPages?: number;
allowedPages?: number;
actualPagesAnalyzed?: number;
advancedCrawlEnabled?: boolean;
provider?: string;
};
crawlSummary?: {
pagesWithStructuredData?: number;
pagesWithoutStructuredData?: number;
pagesWithInvalidJsonLd?: number;
failedPages?: number;
discoveredInternalPages?: number;
};
pages?: {
url?: string;
title?: string | null;
statusCode?: number | null;
hasStructuredData?: boolean;
jsonLdTypes?: string[];
}[];
failedPages?: {
url?: string;
error?: string;
}[];
entitlements?: Entitlements;
firecrawl?: {
provider?: string;
enabled?: boolean;
configured?: boolean;
mode?: string;
status?: string;
wouldHandleJavascript?: boolean;
wouldHandleSitemapDiscovery?: boolean;
shouldUseFirecrawlLater?: boolean;
message?: string;
};
platform?: {
detected?: string;
label?: string;
@ -69,20 +119,31 @@ type ReportResponse = {
};
analysis?: AnalysisPayload | null;
recommendations?: Recommendation[];
entitlements?: Entitlements;
error?: string;
};
const PLATFORM_OPTIONS = [
{ value: 'wordpress', label: 'WordPress' },
{ value: 'shopify', label: 'Shopify' },
{ value: 'webflow', label: 'Webflow' },
{ value: 'custom', label: 'Custom / Other' },
];
const initialReport: ReportResponse | null = null;
const SchemaAnalyzerPage = () => {
const { currentUser } = useAppSelector((state) => state.auth);
const [url, setUrl] = React.useState('');
const [requestedPages, setRequestedPages] = React.useState(1);
const [selectedPlatform, setSelectedPlatform] = React.useState('wordpress');
const [emailTo, setEmailTo] = React.useState(currentUser?.email || '');
const [report, setReport] = React.useState<ReportResponse | null>(initialReport);
const [isAnalyzing, setIsAnalyzing] = React.useState(false);
const [isExportingAll, setIsExportingAll] = React.useState(false);
const [emailingId, setEmailingId] = React.useState<string | null>(null);
const [exportingId, setExportingId] = React.useState<string | null>(null);
const [isCheckingPlatformOutput, setIsCheckingPlatformOutput] = React.useState(false);
React.useEffect(() => {
if (currentUser?.email) {
@ -94,10 +155,24 @@ const SchemaAnalyzerPage = () => {
toast(message, { type, position: 'bottom-center' });
}, []);
const fallbackEntitlements = React.useMemo(
() => getSiteEntitlements(currentUser),
[currentUser],
);
const entitlements = report?.entitlements || report?.analysis?.entitlements || fallbackEntitlements;
const maxPagesPerCrawl = entitlements?.maxPagesPerCrawl || 1;
const recommendations = report?.recommendations || [];
const exportableRecommendations = recommendations.filter(
(recommendation) => recommendation.suggested_schema,
);
const isRequestedPagesOverLimit = requestedPages > maxPagesPerCrawl;
const firecrawlStatus = report?.analysis?.firecrawl || {
provider: 'firecrawl',
configured: false,
wouldHandleJavascript: true,
wouldHandleSitemapDiscovery: true,
message: 'Firecrawl scaffold is wired in code, but this environment still needs a FIRECRAWL_API_KEY before activation.',
};
const handleAnalyze = async () => {
if (!url.trim()) {
@ -105,15 +180,26 @@ const SchemaAnalyzerPage = () => {
return;
}
if (isRequestedPagesOverLimit) {
notify(
'error',
`Your current plan allows up to ${maxPagesPerCrawl} page${maxPagesPerCrawl === 1 ? '' : 's'} per crawl. Upgrade to Advanced Crawl to go beyond that limit.`,
);
return;
}
try {
setIsAnalyzing(true);
const response = await axios.post<ReportResponse>('/sites/analyze', {
url: url.trim(),
requestedPages,
});
setReport(response.data);
if (response.data.error) {
notify('error', response.data.error);
} else if (response.data.analysis?.notice) {
notify('info', response.data.analysis.notice);
} else {
notify('success', 'Site analyzed successfully.');
}
@ -152,7 +238,7 @@ const SchemaAnalyzerPage = () => {
};
const parseFilename = (contentDisposition?: string) => {
const match = contentDisposition?.match(/filename="?([^\";]+)"?/i);
const match = contentDisposition?.match(/filename="?([^";]+)"?/i);
return match?.[1] || 'schema-export.txt';
};
@ -238,6 +324,36 @@ const SchemaAnalyzerPage = () => {
}
};
const handlePlatformOutputCheck = async () => {
if (!report?.site?.id) {
notify('error', 'Analyze a site first.');
return;
}
if (!entitlements?.canPlatformOutput) {
notify('info', 'Premium unlocks Step 4 platform-specific schema output.');
return;
}
try {
setIsCheckingPlatformOutput(true);
await axios.post('/sites/export', {
siteId: report.site.id,
outputMode: 'platform',
platform: selectedPlatform,
}, {
responseType: 'blob',
});
} catch (error: any) {
console.error('Platform output check failed:', error);
notify('info', await extractBlobError(error));
} finally {
setIsCheckingPlatformOutput(false);
}
};
const crawlPlan = report?.analysis?.crawlPlan;
return (
<>
<Head>
@ -257,8 +373,9 @@ const SchemaAnalyzerPage = () => {
<div>
<h2 className='text-xl font-semibold text-slate-900 dark:text-white'>Analyze a customer site</h2>
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
Enter a domain or full URL. The app will detect the platform, inspect homepage structured data,
generate rules-based schema recommendations, and prepare developer-ready code snippets.
Enter a domain or full URL. The app will detect the platform, crawl up to your allowed page limit,
inspect structured data across the discovered pages, generate rules-based schema recommendations,
and prepare developer-ready code snippets.
</p>
<div className='mt-6'>
@ -283,12 +400,60 @@ const SchemaAnalyzerPage = () => {
</FormField>
</div>
<BaseButtons type='justify-start' className='mt-2'>
<div className='grid gap-4 md:grid-cols-2'>
<FormField
label='Pages to analyze'
labelFor='schema-requested-pages'
help={`Current plan limit: ${maxPagesPerCrawl} page${maxPagesPerCrawl === 1 ? '' : 's'} per crawl`}
>
<input
id='schema-requested-pages'
name='schema-requested-pages'
type='number'
min={1}
value={requestedPages}
onChange={(event) => {
const nextValue = Number(event.target.value);
setRequestedPages(Number.isInteger(nextValue) && nextValue > 0 ? nextValue : 1);
}}
/>
</FormField>
<FormField
label='Step 4 target platform'
labelFor='schema-platform-output'
help={entitlements?.canPlatformOutput
? 'Premium access detected. Step 4 platform output is reserved for the next phase.'
: 'Premium-only feature: platform-specific code output.'}
>
<select
id='schema-platform-output'
name='schema-platform-output'
value={selectedPlatform}
onChange={(event) => setSelectedPlatform(event.target.value)}
>
{PLATFORM_OPTIONS.map((platformOption) => (
<option key={platformOption.value} value={platformOption.value}>
{platformOption.label}
</option>
))}
</select>
</FormField>
</div>
{isRequestedPagesOverLimit && (
<div className='mt-4 rounded-2xl border border-amber-300 bg-amber-50 p-3 text-sm text-amber-800 dark:border-amber-500/40 dark:bg-amber-500/10 dark:text-amber-100'>
You requested {requestedPages} pages, but this account is capped at {maxPagesPerCrawl}. Upgrade to
Advanced Crawl to raise that limit.
</div>
)}
<BaseButtons type='justify-start' className='mt-4'>
<BaseButton
color='info'
icon={icon.mdiMagnify}
label={isAnalyzing ? 'Analyzing…' : 'Analyze site'}
disabled={isAnalyzing}
disabled={isAnalyzing || isRequestedPagesOverLimit}
onClick={() => {
handleAnalyze().catch(() => null);
}}
@ -313,45 +478,139 @@ const SchemaAnalyzerPage = () => {
});
}}
/>
<BaseButton
color={entitlements?.canPlatformOutput ? 'success' : 'warning'}
outline={!entitlements?.canPlatformOutput}
icon={entitlements?.canPlatformOutput ? icon.mdiCodeBraces : icon.mdiLockOutline}
label={isCheckingPlatformOutput
? 'Checking…'
: entitlements?.canPlatformOutput
? 'Check Step 4 output'
: 'Premium Step 4'}
disabled={!report?.site?.id || isCheckingPlatformOutput}
onClick={() => {
handlePlatformOutputCheck().catch(() => null);
}}
/>
</BaseButtons>
</div>
<div className='rounded-2xl border border-slate-200 bg-slate-50 p-5 dark:border-slate-700 dark:bg-slate-900/40'>
<h3 className='text-base font-semibold text-slate-900 dark:text-white'>Delivery actions</h3>
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
Export a developer handoff file or email the latest recommendations directly.
</p>
<div className='mt-4'>
<FormField label='Developer email' labelFor='schema-email-recipient'>
<input
id='schema-email-recipient'
name='schema-email-recipient'
placeholder='developer@example.com'
value={emailTo}
onChange={(event) => setEmailTo(event.target.value)}
/>
</FormField>
<div className='space-y-4'>
<div className='rounded-2xl border border-slate-200 bg-slate-50 p-5 dark:border-slate-700 dark:bg-slate-900/40'>
<div className='flex items-start justify-between gap-4'>
<div>
<h3 className='text-base font-semibold text-slate-900 dark:text-white'>Paywall status</h3>
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
Advanced Crawl is now enforced and active. Premium still reserves Step 4 platform output. Firecrawl is scaffolded for sitemap + JS-rendered crawling, but not activated yet.
</p>
</div>
<span className={`rounded-full px-3 py-1 text-xs font-semibold ${entitlements?.canPlatformOutput
? 'bg-emerald-100 text-emerald-700 dark:bg-emerald-500/10 dark:text-emerald-200'
: 'bg-slate-200 text-slate-700 dark:bg-slate-700 dark:text-slate-100'}`}>
{entitlements?.canPlatformOutput ? 'Premium access' : 'Basic access'}
</span>
</div>
<div className='mt-4 space-y-3 text-sm'>
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
<span className='text-slate-600 dark:text-slate-300'>Advanced crawl entitlement</span>
<span className={`font-semibold ${entitlements?.canAdvancedCrawl ? 'text-emerald-600 dark:text-emerald-300' : 'text-amber-700 dark:text-amber-300'}`}>
{entitlements?.canAdvancedCrawl ? 'Unlocked' : 'Locked'}
</span>
</div>
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
<span className='text-slate-600 dark:text-slate-300'>Max pages per crawl</span>
<span className='font-semibold text-slate-900 dark:text-white'>{maxPagesPerCrawl}</span>
</div>
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
<span className='text-slate-600 dark:text-slate-300'>Platform-specific Step 4 output</span>
<span className={`font-semibold ${entitlements?.canPlatformOutput ? 'text-emerald-600 dark:text-emerald-300' : 'text-amber-700 dark:text-amber-300'}`}>
{entitlements?.canPlatformOutput ? 'Reserved' : 'Premium only'}
</span>
</div>
</div>
</div>
<div className='rounded-2xl border border-slate-200 bg-slate-50 p-5 dark:border-slate-700 dark:bg-slate-900/40'>
<div className='flex items-start justify-between gap-4'>
<div>
<h3 className='text-base font-semibold text-slate-900 dark:text-white'>Firecrawl scaffold</h3>
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
Sitemap discovery and JS-rendered crawl are planned through Firecrawl. This environment is currently using the built-in crawler only.
</p>
</div>
<span className={`rounded-full px-3 py-1 text-xs font-semibold ${firecrawlStatus?.configured
? 'bg-emerald-100 text-emerald-700 dark:bg-emerald-500/10 dark:text-emerald-200'
: 'bg-amber-100 text-amber-700 dark:bg-amber-500/10 dark:text-amber-200'}`}>
{firecrawlStatus?.configured ? 'Key detected' : 'API key needed'}
</span>
</div>
<div className='mt-4 space-y-3 text-sm'>
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
<span className='text-slate-600 dark:text-slate-300'>Current crawl provider</span>
<span className='font-semibold text-slate-900 dark:text-white'>
{report?.analysis?.crawlPlan?.provider || 'internal'}
</span>
</div>
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
<span className='text-slate-600 dark:text-slate-300'>Sitemap crawl path</span>
<span className='font-semibold text-slate-900 dark:text-white'>
{firecrawlStatus?.wouldHandleSitemapDiscovery ? 'Scaffolded' : 'Not scaffolded'}
</span>
</div>
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
<span className='text-slate-600 dark:text-slate-300'>JS-rendered crawl path</span>
<span className='font-semibold text-slate-900 dark:text-white'>
{firecrawlStatus?.wouldHandleJavascript ? 'Scaffolded' : 'Not scaffolded'}
</span>
</div>
</div>
{firecrawlStatus?.message && (
<div className='mt-4 rounded-xl border border-sky-200 bg-sky-50 px-3 py-3 text-sm text-sky-800 dark:border-sky-500/30 dark:bg-sky-500/10 dark:text-sky-100'>
{firecrawlStatus.message}
</div>
)}
</div>
<div className='rounded-2xl border border-slate-200 bg-slate-50 p-5 dark:border-slate-700 dark:bg-slate-900/40'>
<h3 className='text-base font-semibold text-slate-900 dark:text-white'>Delivery actions</h3>
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
Export a developer handoff file or email the latest recommendations directly.
</p>
<div className='mt-4'>
<FormField label='Developer email' labelFor='schema-email-recipient'>
<input
id='schema-email-recipient'
name='schema-email-recipient'
placeholder='developer@example.com'
value={emailTo}
onChange={(event) => setEmailTo(event.target.value)}
/>
</FormField>
</div>
<BaseButtons type='justify-start'>
<BaseButton
color='success'
icon={icon.mdiDownload}
label={isExportingAll ? 'Exporting…' : 'Export all'}
disabled={!report?.site?.id || isExportingAll}
onClick={() => {
handleExportAll().catch(() => null);
}}
/>
<BaseButton
color='warning'
icon={icon.mdiEmailOutline}
label={emailingId === 'all' ? 'Emailing…' : 'Email all'}
disabled={!report?.site?.id || emailingId === 'all'}
onClick={() => {
handleEmailCode().catch(() => null);
}}
/>
</BaseButtons>
</div>
<BaseButtons type='justify-start'>
<BaseButton
color='success'
icon={icon.mdiDownload}
label={isExportingAll ? 'Exporting…' : 'Export all'}
disabled={!report?.site?.id || isExportingAll}
onClick={() => {
handleExportAll().catch(() => null);
}}
/>
<BaseButton
color='warning'
icon={icon.mdiEmailOutline}
label={emailingId === 'all' ? 'Emailing…' : 'Email all'}
disabled={!report?.site?.id || emailingId === 'all'}
onClick={() => {
handleEmailCode().catch(() => null);
}}
/>
</BaseButtons>
</div>
</div>
</CardBox>
@ -359,51 +618,158 @@ const SchemaAnalyzerPage = () => {
{report?.analysis && (
<div className='grid gap-6 xl:grid-cols-[0.9fr,1.1fr]'>
<CardBox className='h-full'>
<h3 className='text-lg font-semibold text-slate-900 dark:text-white'>Analysis snapshot</h3>
<div className='mt-4 grid gap-3 sm:grid-cols-2'>
<div className='flex flex-col gap-2 sm:flex-row sm:items-start sm:justify-between'>
<div>
<h3 className='text-lg font-semibold text-slate-900 dark:text-white'>Site findings</h3>
<p className='mt-1 text-sm text-slate-500 dark:text-slate-300'>
High-level crawl and structured-data summary from the latest analysis run.
</p>
</div>
<div className='rounded-full bg-slate-100 px-3 py-1 text-xs font-semibold uppercase tracking-wide text-slate-700 dark:bg-slate-800 dark:text-slate-200'>
{report.analysis.platform?.label || 'Unknown platform'}
</div>
</div>
<div className='mt-5 grid gap-4 md:grid-cols-2'>
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
<div className='text-xs uppercase tracking-wide text-slate-500'>Platform</div>
<div className='mt-2 text-lg font-semibold text-slate-900 dark:text-white'>
{report.analysis.platform?.label || 'Unknown'}
</div>
<div className='mt-2 text-xs text-slate-500'>
{report.analysis.platform?.matchedSignals?.join(', ') || 'No strong platform signals found.'}
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Analyzed URL</div>
<div className='mt-2 break-all text-sm text-slate-900 dark:text-white'>
{report.analysis.analyzedUrl || report.site?.base_url || '—'}
</div>
</div>
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
<div className='text-xs uppercase tracking-wide text-slate-500'>Structured data</div>
<div className='mt-2 text-lg font-semibold text-slate-900 dark:text-white'>
{report.analysis.schema?.hasStructuredData ? 'Detected' : 'Not detected'}
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Page title</div>
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
{report.analysis.pageTitle || 'No title found'}
</div>
<div className='mt-2 text-xs text-slate-500'>
JSON-LD: {report.analysis.schema?.jsonLd?.count || 0} Microdata: {report.analysis.schema?.microdata?.count || 0} RDFa: {report.analysis.schema?.rdfa?.count || 0}
</div>
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Pages with structured data</div>
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
{report.analysis.crawlSummary?.pagesWithStructuredData ?? (report.analysis.schema?.hasStructuredData ? 1 : 0)}
</div>
</div>
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>JSON-LD blocks found</div>
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
{report.analysis.schema?.jsonLd?.count || 0}
</div>
</div>
</div>
<div className='mt-5 space-y-3 text-sm text-slate-600 dark:text-slate-300'>
<div>
<span className='font-semibold text-slate-900 dark:text-white'>Requested URL:</span>{' '}
{report.analysis.requestedUrl || '—'}
</div>
<div>
<span className='font-semibold text-slate-900 dark:text-white'>Analyzed URL:</span>{' '}
{report.analysis.analyzedUrl || '—'}
</div>
<div>
<span className='font-semibold text-slate-900 dark:text-white'>Page title:</span>{' '}
{report.analysis.pageTitle || '—'}
</div>
<div>
<span className='font-semibold text-slate-900 dark:text-white'>JSON-LD types found:</span>{' '}
{(report.analysis.schema?.jsonLd?.types || []).join(', ') || 'None'}
</div>
{report.analysis.error && (
<div className='rounded-xl border border-rose-200 bg-rose-50 p-3 text-rose-700 dark:border-rose-500/40 dark:bg-rose-500/10 dark:text-rose-200'>
{report.analysis.error}
{crawlPlan && (
<div className='mt-5 rounded-2xl border border-sky-200 bg-sky-50 p-4 text-sm text-sky-900 dark:border-sky-500/30 dark:bg-sky-500/10 dark:text-sky-100'>
<div className='font-semibold'>Crawl summary</div>
<div className='mt-2 flex flex-col gap-1'>
<span>Requested pages: {crawlPlan.requestedPages || 1}</span>
<span>Plan limit: {crawlPlan.allowedPages || maxPagesPerCrawl}</span>
<span>Pages analyzed: {crawlPlan.actualPagesAnalyzed || 0}</span>
</div>
)}
</div>
{report.analysis.notice && <div className='mt-3'>{report.analysis.notice}</div>}
</div>
)}
{report.analysis.crawlSummary && (
<div className='mt-5 grid gap-4 md:grid-cols-2'>
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Pages without structured data</div>
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
{report.analysis.crawlSummary.pagesWithoutStructuredData ?? 0}
</div>
</div>
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Failed page fetches</div>
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
{report.analysis.crawlSummary.failedPages ?? 0}
</div>
</div>
</div>
)}
{(report.analysis.pages || []).length > 0 && (
<div className='mt-5'>
<div className='mb-2 text-xs font-semibold uppercase tracking-wide text-slate-500'>Analyzed pages</div>
<div className='space-y-3'>
{(report.analysis.pages || []).map((page) => (
<div
key={page.url}
className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'
>
<div className='break-all text-sm font-medium text-slate-900 dark:text-white'>{page.url}</div>
<div className='mt-1 text-sm text-slate-500 dark:text-slate-300'>
{page.title || 'Untitled page'}
</div>
<div className='mt-2 flex flex-wrap gap-2 text-xs'>
<span className='rounded-full bg-slate-100 px-3 py-1 font-semibold text-slate-700 dark:bg-slate-800 dark:text-slate-200'>
Status {page.statusCode || '—'}
</span>
<span className={`rounded-full px-3 py-1 font-semibold ${page.hasStructuredData
? 'bg-emerald-100 text-emerald-700 dark:bg-emerald-500/10 dark:text-emerald-200'
: 'bg-amber-100 text-amber-700 dark:bg-amber-500/10 dark:text-amber-200'}`}>
{page.hasStructuredData ? 'Structured data found' : 'No structured data'}
</span>
{(page.jsonLdTypes || []).slice(0, 3).map((typeName) => (
<span
key={`${page.url}-${typeName}`}
className='rounded-full bg-sky-100 px-3 py-1 font-semibold text-sky-700 dark:bg-sky-500/10 dark:text-sky-200'
>
{typeName}
</span>
))}
</div>
</div>
))}
</div>
</div>
)}
{(report.analysis.failedPages || []).length > 0 && (
<div className='mt-5 rounded-2xl border border-amber-300 bg-amber-50 p-4 text-sm text-amber-900 dark:border-amber-500/30 dark:bg-amber-500/10 dark:text-amber-100'>
<div className='font-semibold'>Some internal pages could not be fetched</div>
<ul className='mt-2 list-disc space-y-1 pl-5'>
{(report.analysis.failedPages || []).map((page) => (
<li key={`${page.url}-${page.error}`}>
<span className='font-medium'>{page.url}</span>: {page.error}
</li>
))}
</ul>
</div>
)}
{(report.analysis.schema?.jsonLd?.types || []).length > 0 && (
<div className='mt-5'>
<div className='mb-2 text-xs font-semibold uppercase tracking-wide text-slate-500'>Detected JSON-LD types</div>
<div className='flex flex-wrap gap-2'>
{(report.analysis.schema?.jsonLd?.types || []).map((typeName) => (
<span
key={typeName}
className='rounded-full bg-emerald-100 px-3 py-1 text-xs font-semibold text-emerald-700 dark:bg-emerald-500/10 dark:text-emerald-200'
>
{typeName}
</span>
))}
</div>
</div>
)}
{(report.analysis.schema?.jsonLd?.invalidBlocks || []).length > 0 && (
<div className='mt-5 rounded-2xl border border-amber-300 bg-amber-50 p-4 text-sm text-amber-900 dark:border-amber-500/30 dark:bg-amber-500/10 dark:text-amber-100'>
<div className='font-semibold'>Invalid JSON-LD detected</div>
<ul className='mt-2 list-disc space-y-1 pl-5'>
{(report.analysis.schema?.jsonLd?.invalidBlocks || []).map((block) => (
<li key={`${block.index}-${block.message}`}>
Block {block.index + 1}: {block.message}
</li>
))}
</ul>
</div>
)}
{report.analysis.error && (
<div className='mt-5 rounded-xl border border-rose-200 bg-rose-50 p-3 text-rose-700 dark:border-rose-500/40 dark:bg-rose-500/10 dark:text-rose-200'>
{report.analysis.error}
</div>
)}
</CardBox>
<CardBox className='h-full'>