Autosave: 20260414-164650
This commit is contained in:
parent
b9a2103bb3
commit
fa68f426aa
@ -0,0 +1,96 @@
|
||||
const { v4: uuid } = require('uuid');
|
||||
|
||||
const PERMISSION_NAMES = [
|
||||
'USE_ADVANCED_CRAWL',
|
||||
'USE_PLATFORM_OUTPUT',
|
||||
];
|
||||
|
||||
module.exports = {
|
||||
async up(queryInterface) {
|
||||
const transaction = await queryInterface.sequelize.transaction();
|
||||
|
||||
try {
|
||||
const createdAt = new Date();
|
||||
const updatedAt = createdAt;
|
||||
|
||||
const existingPermissions = await queryInterface.sequelize.query(
|
||||
'SELECT id, name FROM permissions WHERE name IN (:permissionNames);',
|
||||
{
|
||||
replacements: { permissionNames: PERMISSION_NAMES },
|
||||
transaction,
|
||||
type: queryInterface.sequelize.QueryTypes.SELECT,
|
||||
},
|
||||
);
|
||||
|
||||
const existingPermissionNames = new Set(
|
||||
existingPermissions.map((permission) => permission.name),
|
||||
);
|
||||
|
||||
const missingPermissions = PERMISSION_NAMES
|
||||
.filter((permissionName) => !existingPermissionNames.has(permissionName))
|
||||
.map((permissionName) => ({
|
||||
id: uuid(),
|
||||
name: permissionName,
|
||||
createdAt,
|
||||
updatedAt,
|
||||
}));
|
||||
|
||||
if (missingPermissions.length > 0) {
|
||||
await queryInterface.bulkInsert('permissions', missingPermissions, {
|
||||
transaction,
|
||||
});
|
||||
}
|
||||
|
||||
await transaction.commit();
|
||||
} catch (error) {
|
||||
await transaction.rollback();
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
async down(queryInterface) {
|
||||
const transaction = await queryInterface.sequelize.transaction();
|
||||
|
||||
try {
|
||||
const permissions = await queryInterface.sequelize.query(
|
||||
'SELECT id FROM permissions WHERE name IN (:permissionNames);',
|
||||
{
|
||||
replacements: { permissionNames: PERMISSION_NAMES },
|
||||
transaction,
|
||||
type: queryInterface.sequelize.QueryTypes.SELECT,
|
||||
},
|
||||
);
|
||||
|
||||
if (permissions.length > 0) {
|
||||
await queryInterface.bulkDelete(
|
||||
'rolesPermissionsPermissions',
|
||||
{
|
||||
permissionId: permissions.map((permission) => permission.id),
|
||||
},
|
||||
{ transaction },
|
||||
);
|
||||
|
||||
await queryInterface.bulkDelete(
|
||||
'usersCustom_permissionsPermissions',
|
||||
{
|
||||
permissionId: permissions.map((permission) => permission.id),
|
||||
},
|
||||
{ transaction },
|
||||
);
|
||||
|
||||
await queryInterface.bulkDelete(
|
||||
'permissions',
|
||||
{
|
||||
id: permissions.map((permission) => permission.id),
|
||||
},
|
||||
{ transaction },
|
||||
);
|
||||
}
|
||||
|
||||
await transaction.commit();
|
||||
} catch (error) {
|
||||
await transaction.rollback();
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
};
|
||||
@ -0,0 +1,130 @@
|
||||
const ROLE_NAME = 'Administrator';
|
||||
const PERMISSION_NAMES = [
|
||||
'USE_ADVANCED_CRAWL',
|
||||
'USE_PLATFORM_OUTPUT',
|
||||
];
|
||||
|
||||
module.exports = {
|
||||
async up(queryInterface) {
|
||||
const transaction = await queryInterface.sequelize.transaction();
|
||||
|
||||
try {
|
||||
const createdAt = new Date();
|
||||
const updatedAt = createdAt;
|
||||
|
||||
const role = await queryInterface.sequelize.query(
|
||||
'SELECT id FROM roles WHERE name = :roleName LIMIT 1;',
|
||||
{
|
||||
replacements: { roleName: ROLE_NAME },
|
||||
transaction,
|
||||
type: queryInterface.sequelize.QueryTypes.SELECT,
|
||||
},
|
||||
);
|
||||
|
||||
if (!role[0]) {
|
||||
await transaction.commit();
|
||||
return;
|
||||
}
|
||||
|
||||
const permissions = await queryInterface.sequelize.query(
|
||||
'SELECT id FROM permissions WHERE name IN (:permissionNames);',
|
||||
{
|
||||
replacements: { permissionNames: PERMISSION_NAMES },
|
||||
transaction,
|
||||
type: queryInterface.sequelize.QueryTypes.SELECT,
|
||||
},
|
||||
);
|
||||
|
||||
if (permissions.length === 0) {
|
||||
await transaction.commit();
|
||||
return;
|
||||
}
|
||||
|
||||
const existingPairs = await queryInterface.sequelize.query(
|
||||
`SELECT "permissionId"
|
||||
FROM "rolesPermissionsPermissions"
|
||||
WHERE "roles_permissionsId" = :roleId
|
||||
AND "permissionId" IN (:permissionIds);`,
|
||||
{
|
||||
replacements: {
|
||||
roleId: role[0].id,
|
||||
permissionIds: permissions.map((permission) => permission.id),
|
||||
},
|
||||
transaction,
|
||||
type: queryInterface.sequelize.QueryTypes.SELECT,
|
||||
},
|
||||
);
|
||||
|
||||
const existingPermissionIds = new Set(
|
||||
existingPairs.map((pair) => pair.permissionId),
|
||||
);
|
||||
|
||||
const missingPairs = permissions
|
||||
.filter((permission) => !existingPermissionIds.has(permission.id))
|
||||
.map((permission) => ({
|
||||
createdAt,
|
||||
updatedAt,
|
||||
roles_permissionsId: role[0].id,
|
||||
permissionId: permission.id,
|
||||
}));
|
||||
|
||||
if (missingPairs.length > 0) {
|
||||
await queryInterface.bulkInsert(
|
||||
'rolesPermissionsPermissions',
|
||||
missingPairs,
|
||||
{ transaction },
|
||||
);
|
||||
}
|
||||
|
||||
await transaction.commit();
|
||||
} catch (error) {
|
||||
await transaction.rollback();
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
|
||||
async down(queryInterface) {
|
||||
const transaction = await queryInterface.sequelize.transaction();
|
||||
|
||||
try {
|
||||
const role = await queryInterface.sequelize.query(
|
||||
'SELECT id FROM roles WHERE name = :roleName LIMIT 1;',
|
||||
{
|
||||
replacements: { roleName: ROLE_NAME },
|
||||
transaction,
|
||||
type: queryInterface.sequelize.QueryTypes.SELECT,
|
||||
},
|
||||
);
|
||||
|
||||
if (!role[0]) {
|
||||
await transaction.commit();
|
||||
return;
|
||||
}
|
||||
|
||||
const permissions = await queryInterface.sequelize.query(
|
||||
'SELECT id FROM permissions WHERE name IN (:permissionNames);',
|
||||
{
|
||||
replacements: { permissionNames: PERMISSION_NAMES },
|
||||
transaction,
|
||||
type: queryInterface.sequelize.QueryTypes.SELECT,
|
||||
},
|
||||
);
|
||||
|
||||
if (permissions.length > 0) {
|
||||
await queryInterface.bulkDelete(
|
||||
'rolesPermissionsPermissions',
|
||||
{
|
||||
roles_permissionsId: role[0].id,
|
||||
permissionId: permissions.map((permission) => permission.id),
|
||||
},
|
||||
{ transaction },
|
||||
);
|
||||
}
|
||||
|
||||
await transaction.commit();
|
||||
} catch (error) {
|
||||
await transaction.rollback();
|
||||
throw error;
|
||||
}
|
||||
},
|
||||
};
|
||||
63
backend/src/services/firecrawl.js
Normal file
63
backend/src/services/firecrawl.js
Normal file
@ -0,0 +1,63 @@
|
||||
const FIRECRAWL_DEFAULT_BASE_URL = 'https://api.firecrawl.dev/v1';
|
||||
|
||||
function toBoolean(value, defaultValue = false) {
|
||||
if (value === undefined || value === null || value === '') {
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
if (typeof value === 'boolean') {
|
||||
return value;
|
||||
}
|
||||
|
||||
const normalizedValue = String(value).trim().toLowerCase();
|
||||
|
||||
if (['1', 'true', 'yes', 'on'].includes(normalizedValue)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (['0', 'false', 'no', 'off'].includes(normalizedValue)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
function getFirecrawlRuntime() {
|
||||
const apiKey = String(process.env.FIRECRAWL_API_KEY || '').trim();
|
||||
const baseUrl = String(
|
||||
process.env.FIRECRAWL_BASE_URL || FIRECRAWL_DEFAULT_BASE_URL,
|
||||
).trim();
|
||||
const enabled = toBoolean(process.env.FIRECRAWL_ENABLED, true);
|
||||
|
||||
return {
|
||||
provider: 'firecrawl',
|
||||
baseUrl,
|
||||
enabled,
|
||||
configured: Boolean(apiKey),
|
||||
hasApiKey: Boolean(apiKey),
|
||||
mode: 'scaffold_only',
|
||||
};
|
||||
}
|
||||
|
||||
function getFirecrawlScaffold({ requestedPages, entitlements } = {}) {
|
||||
const runtime = getFirecrawlRuntime();
|
||||
const wantsAdvancedCrawl = Number(requestedPages || 1) > 1;
|
||||
const advancedCrawlUnlocked = Boolean(entitlements?.canAdvancedCrawl);
|
||||
const shouldUseFirecrawlLater = runtime.enabled && (wantsAdvancedCrawl || advancedCrawlUnlocked);
|
||||
|
||||
return {
|
||||
...runtime,
|
||||
status: runtime.configured ? 'ready_for_activation' : 'awaiting_api_key',
|
||||
wouldHandleJavascript: true,
|
||||
wouldHandleSitemapDiscovery: true,
|
||||
shouldUseFirecrawlLater,
|
||||
message: runtime.configured
|
||||
? 'Firecrawl scaffold is wired and ready for the next activation step, but this analyzer still uses the built-in crawler today.'
|
||||
: 'Firecrawl scaffold is wired, but FIRECRAWL_API_KEY is not set yet. The analyzer still uses the built-in crawler for now.',
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getFirecrawlRuntime,
|
||||
getFirecrawlScaffold,
|
||||
};
|
||||
104
backend/src/services/siteEntitlements.js
Normal file
104
backend/src/services/siteEntitlements.js
Normal file
@ -0,0 +1,104 @@
|
||||
const ValidationError = require('./notifications/errors/validation');
|
||||
|
||||
const BASIC_MAX_PAGES_PER_CRAWL = 1;
|
||||
const ADVANCED_MAX_PAGES_PER_CRAWL = 25;
|
||||
const ADVANCED_CRAWL_PERMISSION = 'USE_ADVANCED_CRAWL';
|
||||
const PLATFORM_OUTPUT_PERMISSION = 'USE_PLATFORM_OUTPUT';
|
||||
|
||||
function getPermissionNames(currentUser) {
|
||||
return new Set([
|
||||
...((currentUser?.custom_permissions || []).map((permission) => permission.name)),
|
||||
...((currentUser?.app_role_permissions || []).map((permission) => permission.name)),
|
||||
]);
|
||||
}
|
||||
|
||||
function isAdministrator(currentUser) {
|
||||
return currentUser?.app_role?.name === 'Administrator';
|
||||
}
|
||||
|
||||
function hasEntitlement(currentUser, permissionName) {
|
||||
if (!permissionName) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isAdministrator(currentUser)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return getPermissionNames(currentUser).has(permissionName);
|
||||
}
|
||||
|
||||
function getSiteEntitlements(currentUser) {
|
||||
const canAdvancedCrawl = hasEntitlement(currentUser, ADVANCED_CRAWL_PERMISSION);
|
||||
const canPlatformOutput = hasEntitlement(currentUser, PLATFORM_OUTPUT_PERMISSION);
|
||||
|
||||
return {
|
||||
canAdvancedCrawl,
|
||||
canPlatformOutput,
|
||||
maxPagesPerCrawl: canAdvancedCrawl
|
||||
? ADVANCED_MAX_PAGES_PER_CRAWL
|
||||
: BASIC_MAX_PAGES_PER_CRAWL,
|
||||
permissions: {
|
||||
advancedCrawl: ADVANCED_CRAWL_PERMISSION,
|
||||
platformOutput: PLATFORM_OUTPUT_PERMISSION,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function parseRequestedPages(rawRequestedPages) {
|
||||
if (
|
||||
rawRequestedPages === undefined
|
||||
|| rawRequestedPages === null
|
||||
|| rawRequestedPages === ''
|
||||
) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
const parsed = Number(rawRequestedPages);
|
||||
|
||||
if (!Number.isInteger(parsed) || parsed < 1) {
|
||||
throw new ValidationError('errors.validation.message');
|
||||
}
|
||||
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function ensureRequestedPagesAllowed(requestedPages, currentUser) {
|
||||
const entitlements = getSiteEntitlements(currentUser);
|
||||
|
||||
if (requestedPages > entitlements.maxPagesPerCrawl) {
|
||||
const error = new Error(
|
||||
`Your current plan allows up to ${entitlements.maxPagesPerCrawl} page${entitlements.maxPagesPerCrawl === 1 ? '' : 's'} per crawl. Upgrade to Advanced Crawl to analyze ${requestedPages} pages.`,
|
||||
);
|
||||
error.code = 403;
|
||||
throw error;
|
||||
}
|
||||
|
||||
return entitlements;
|
||||
}
|
||||
|
||||
function ensurePlatformOutputAllowed(currentUser, platform) {
|
||||
const entitlements = getSiteEntitlements(currentUser);
|
||||
|
||||
if (!entitlements.canPlatformOutput) {
|
||||
const error = new Error(
|
||||
`Platform-specific schema output${platform ? ` for ${platform}` : ''} is part of the Premium plan. Upgrade to unlock Step 4 code generation.`,
|
||||
);
|
||||
error.code = 403;
|
||||
throw error;
|
||||
}
|
||||
|
||||
return entitlements;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
BASIC_MAX_PAGES_PER_CRAWL,
|
||||
ADVANCED_MAX_PAGES_PER_CRAWL,
|
||||
ADVANCED_CRAWL_PERMISSION,
|
||||
PLATFORM_OUTPUT_PERMISSION,
|
||||
getSiteEntitlements,
|
||||
hasEntitlement,
|
||||
parseRequestedPages,
|
||||
ensureRequestedPagesAllowed,
|
||||
ensurePlatformOutputAllowed,
|
||||
};
|
||||
@ -2,9 +2,18 @@ const axios = require('axios');
|
||||
const db = require('../db/models');
|
||||
const ValidationError = require('./notifications/errors/validation');
|
||||
const EmailSender = require('./email');
|
||||
const {
|
||||
getSiteEntitlements,
|
||||
parseRequestedPages,
|
||||
ensureRequestedPagesAllowed,
|
||||
ensurePlatformOutputAllowed,
|
||||
} = require('./siteEntitlements');
|
||||
const { getFirecrawlScaffold } = require('./firecrawl');
|
||||
|
||||
const REQUEST_TIMEOUT = 15000;
|
||||
const PREVIEW_LIMIT = 5;
|
||||
const PAGE_PREVIEW_LIMIT = 10;
|
||||
const NON_HTML_FILE_PATTERN = /\.(?:7z|avi|bmp|css|csv|docx?|eot|gif|ico|jpe?g|js|json|map|mov|mp3|mp4|pdf|png|pptx?|rar|svg|tar|tgz|txt|wav|webm|webp|woff2?|xlsx?|xml|zip)$/i;
|
||||
|
||||
function normalizeUrl(rawUrl) {
|
||||
if (!rawUrl || typeof rawUrl !== 'string') {
|
||||
@ -229,7 +238,383 @@ function detectPlatform(html, headers, analyzedUrl) {
|
||||
};
|
||||
}
|
||||
|
||||
function buildFailureAnalysis(normalizedUrl, error) {
|
||||
function isHtmlLikeResponse(response) {
|
||||
const contentType = String(response?.headers?.['content-type'] || '').toLowerCase();
|
||||
|
||||
if (!contentType) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return (
|
||||
contentType.includes('text/html')
|
||||
|| contentType.includes('application/xhtml+xml')
|
||||
);
|
||||
}
|
||||
|
||||
function normalizeAllowedHostnames(allowedHostnames) {
|
||||
if (allowedHostnames instanceof Set) {
|
||||
return new Set(
|
||||
Array.from(allowedHostnames).map((hostname) => String(hostname).toLowerCase()),
|
||||
);
|
||||
}
|
||||
|
||||
if (Array.isArray(allowedHostnames)) {
|
||||
return new Set(
|
||||
allowedHostnames.map((hostname) => String(hostname).toLowerCase()),
|
||||
);
|
||||
}
|
||||
|
||||
if (allowedHostnames) {
|
||||
return new Set([String(allowedHostnames).toLowerCase()]);
|
||||
}
|
||||
|
||||
return new Set();
|
||||
}
|
||||
|
||||
function normalizeCrawlUrl(rawUrl, parentUrl, allowedHostnames) {
|
||||
if (!rawUrl || typeof rawUrl !== 'string') {
|
||||
return null;
|
||||
}
|
||||
|
||||
const trimmedUrl = rawUrl.trim();
|
||||
|
||||
if (
|
||||
!trimmedUrl
|
||||
|| trimmedUrl.startsWith('#')
|
||||
|| /^mailto:/i.test(trimmedUrl)
|
||||
|| /^tel:/i.test(trimmedUrl)
|
||||
|| /^javascript:/i.test(trimmedUrl)
|
||||
|| /^data:/i.test(trimmedUrl)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let parsedUrl;
|
||||
|
||||
try {
|
||||
parsedUrl = new URL(trimmedUrl, parentUrl);
|
||||
} catch (error) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const allowedHostnamesSet = normalizeAllowedHostnames(allowedHostnames);
|
||||
|
||||
if (
|
||||
allowedHostnamesSet.size > 0
|
||||
&& !allowedHostnamesSet.has(parsedUrl.hostname.toLowerCase())
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (NON_HTML_FILE_PATTERN.test(parsedUrl.pathname)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
parsedUrl.hash = '';
|
||||
|
||||
return normalizeUrl(parsedUrl.toString());
|
||||
}
|
||||
|
||||
function extractInternalLinks(html, pageUrl, allowedHostnames) {
|
||||
const matches = [
|
||||
...String(html || '').matchAll(/<a\s[^>]*href=(?:"([^"]+)"|'([^']+)'|([^\s>]+))/gi),
|
||||
];
|
||||
|
||||
return Array.from(
|
||||
new Set(
|
||||
matches
|
||||
.map((match) => match[1] || match[2] || match[3] || '')
|
||||
.map((href) => normalizeCrawlUrl(href, pageUrl, allowedHostnames))
|
||||
.filter(Boolean),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
function summarizeCrawlError(error) {
|
||||
if (axios.isAxiosError(error)) {
|
||||
if (error.response) {
|
||||
return `Request failed with status ${error.response.status}`;
|
||||
}
|
||||
|
||||
return error.message;
|
||||
}
|
||||
|
||||
return error.message || 'Unknown crawl error';
|
||||
}
|
||||
|
||||
async function fetchAnalyzedPage(pageUrl, allowedHostnames) {
|
||||
const response = await axios.get(pageUrl, {
|
||||
timeout: REQUEST_TIMEOUT,
|
||||
maxRedirects: 5,
|
||||
responseType: 'text',
|
||||
headers: {
|
||||
Accept:
|
||||
'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (compatible; SchemaCrawlerBot/1.0; +https://flatlogic.com)',
|
||||
},
|
||||
});
|
||||
|
||||
if (!isHtmlLikeResponse(response)) {
|
||||
const error = new Error('Skipped non-HTML response during crawl.');
|
||||
error.code = 415;
|
||||
throw error;
|
||||
}
|
||||
|
||||
const analyzedUrl =
|
||||
response.request?.res?.responseUrl || response.config?.url || pageUrl;
|
||||
const normalizedAnalyzedUrl = normalizeUrl(analyzedUrl);
|
||||
const normalizedAllowedHostnames = normalizeAllowedHostnames(allowedHostnames);
|
||||
const analyzedHostname = new URL(normalizedAnalyzedUrl).hostname.toLowerCase();
|
||||
normalizedAllowedHostnames.add(analyzedHostname);
|
||||
|
||||
if (allowedHostnames instanceof Set) {
|
||||
allowedHostnames.add(analyzedHostname);
|
||||
}
|
||||
const html = typeof response.data === 'string' ? response.data : '';
|
||||
const pageTitle = extractPageTitle(html);
|
||||
const platform = detectPlatform(html, response.headers, normalizedAnalyzedUrl);
|
||||
const schema = extractSchemaSummary(html);
|
||||
const pageSignals = inferPageSignals(
|
||||
html,
|
||||
normalizedAnalyzedUrl,
|
||||
pageTitle,
|
||||
platform,
|
||||
);
|
||||
|
||||
return {
|
||||
requestedUrl: pageUrl,
|
||||
analyzedUrl: normalizedAnalyzedUrl,
|
||||
pageTitle,
|
||||
statusCode: response.status,
|
||||
html,
|
||||
platform,
|
||||
schema,
|
||||
pageSignals,
|
||||
discoveredLinks: extractInternalLinks(
|
||||
html,
|
||||
normalizedAnalyzedUrl,
|
||||
normalizedAllowedHostnames,
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
async function crawlPages(baseUrl, requestedPages) {
|
||||
const normalizedBaseUrl = normalizeUrl(baseUrl);
|
||||
const allowedHostnames = new Set([new URL(normalizedBaseUrl).hostname.toLowerCase()]);
|
||||
const visitedUrls = new Set();
|
||||
const queuedUrls = new Set([normalizedBaseUrl]);
|
||||
const pendingUrls = [normalizedBaseUrl];
|
||||
const pages = [];
|
||||
const failedPages = [];
|
||||
let discoveredInternalPages = 0;
|
||||
|
||||
while (pendingUrls.length > 0 && pages.length < requestedPages) {
|
||||
const nextUrl = pendingUrls.shift();
|
||||
|
||||
if (!nextUrl || visitedUrls.has(nextUrl)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
visitedUrls.add(nextUrl);
|
||||
|
||||
try {
|
||||
const page = await fetchAnalyzedPage(nextUrl, allowedHostnames);
|
||||
visitedUrls.add(page.analyzedUrl);
|
||||
queuedUrls.add(page.analyzedUrl);
|
||||
pages.push(page);
|
||||
|
||||
page.discoveredLinks.forEach((linkUrl) => {
|
||||
if (!visitedUrls.has(linkUrl) && !queuedUrls.has(linkUrl)) {
|
||||
queuedUrls.add(linkUrl);
|
||||
pendingUrls.push(linkUrl);
|
||||
discoveredInternalPages += 1;
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
failedPages.push({
|
||||
url: nextUrl,
|
||||
error: summarizeCrawlError(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
pages,
|
||||
failedPages,
|
||||
discoveredInternalPages,
|
||||
};
|
||||
}
|
||||
|
||||
function buildAggregateSchema(pageAnalyses) {
|
||||
const jsonLdTypes = new Set();
|
||||
const invalidBlocks = [];
|
||||
let jsonLdCount = 0;
|
||||
let microdataCount = 0;
|
||||
let rdfaCount = 0;
|
||||
|
||||
pageAnalyses.forEach((page) => {
|
||||
const schema = page.schema || {};
|
||||
|
||||
jsonLdCount += schema.jsonLd?.count || 0;
|
||||
microdataCount += schema.microdata?.count || 0;
|
||||
rdfaCount += schema.rdfa?.count || 0;
|
||||
|
||||
(schema.jsonLd?.types || []).forEach((typeName) => jsonLdTypes.add(typeName));
|
||||
(schema.jsonLd?.invalidBlocks || []).forEach((block) => {
|
||||
invalidBlocks.push({
|
||||
...block,
|
||||
url: page.analyzedUrl,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
return {
|
||||
hasStructuredData: pageAnalyses.some((page) => page.schema?.hasStructuredData),
|
||||
jsonLd: {
|
||||
count: jsonLdCount,
|
||||
types: Array.from(jsonLdTypes),
|
||||
invalidBlocks,
|
||||
},
|
||||
microdata: {
|
||||
count: microdataCount,
|
||||
detected: microdataCount > 0,
|
||||
},
|
||||
rdfa: {
|
||||
count: rdfaCount,
|
||||
detected: rdfaCount > 0,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function buildAggregateSignals(pageAnalyses) {
|
||||
return pageAnalyses.reduce((accumulator, page) => {
|
||||
const pageSignals = page.pageSignals || {};
|
||||
|
||||
return {
|
||||
hasFaqHints: accumulator.hasFaqHints || Boolean(pageSignals.hasFaqHints),
|
||||
hasBlogHints: accumulator.hasBlogHints || Boolean(pageSignals.hasBlogHints),
|
||||
hasProductHints: accumulator.hasProductHints || Boolean(pageSignals.hasProductHints),
|
||||
hasLocalBusinessHints:
|
||||
accumulator.hasLocalBusinessHints || Boolean(pageSignals.hasLocalBusinessHints),
|
||||
faqPages: accumulator.faqPages + (pageSignals.hasFaqHints ? 1 : 0),
|
||||
blogPages: accumulator.blogPages + (pageSignals.hasBlogHints ? 1 : 0),
|
||||
productPages: accumulator.productPages + (pageSignals.hasProductHints ? 1 : 0),
|
||||
localBusinessPages:
|
||||
accumulator.localBusinessPages + (pageSignals.hasLocalBusinessHints ? 1 : 0),
|
||||
};
|
||||
}, {
|
||||
hasFaqHints: false,
|
||||
hasBlogHints: false,
|
||||
hasProductHints: false,
|
||||
hasLocalBusinessHints: false,
|
||||
faqPages: 0,
|
||||
blogPages: 0,
|
||||
productPages: 0,
|
||||
localBusinessPages: 0,
|
||||
});
|
||||
}
|
||||
|
||||
function buildCrawlNotice({
|
||||
requestedPages,
|
||||
actualPagesAnalyzed,
|
||||
failedPages,
|
||||
discoveredInternalPages,
|
||||
firecrawl,
|
||||
}) {
|
||||
if (requestedPages <= 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const parts = [
|
||||
`Advanced crawl analyzed ${actualPagesAnalyzed} of ${requestedPages} requested page${requestedPages === 1 ? '' : 's'}.`,
|
||||
];
|
||||
|
||||
if (discoveredInternalPages + 1 < requestedPages) {
|
||||
parts.push('Fewer crawlable internal HTML pages were discovered than requested.');
|
||||
}
|
||||
|
||||
if (failedPages > 0) {
|
||||
parts.push(`${failedPages} page${failedPages === 1 ? '' : 's'} could not be fetched during the crawl.`);
|
||||
}
|
||||
|
||||
if (firecrawl?.message) {
|
||||
parts.push(firecrawl.message);
|
||||
}
|
||||
|
||||
return parts.join(' ');
|
||||
}
|
||||
|
||||
function buildAggregateAnalysis({
|
||||
normalizedUrl,
|
||||
pageAnalyses,
|
||||
requestedPages,
|
||||
entitlements,
|
||||
discoveredInternalPages,
|
||||
failedPages,
|
||||
firecrawl,
|
||||
}) {
|
||||
const homepage = pageAnalyses[0];
|
||||
const finishedAt = new Date();
|
||||
const aggregateSchema = buildAggregateSchema(pageAnalyses);
|
||||
const aggregateSignals = buildAggregateSignals(pageAnalyses);
|
||||
|
||||
return {
|
||||
requestedUrl: normalizedUrl,
|
||||
analyzedUrl: homepage?.analyzedUrl || normalizedUrl,
|
||||
pageTitle: homepage?.pageTitle || null,
|
||||
fetchedAt: finishedAt.toISOString(),
|
||||
statusCode: homepage?.statusCode || null,
|
||||
platform: homepage?.platform || {
|
||||
detected: 'unknown',
|
||||
label: 'Unknown',
|
||||
matchedSignals: [],
|
||||
},
|
||||
schema: aggregateSchema,
|
||||
recommendationCount: 0,
|
||||
crawlPlan: {
|
||||
requestedPages,
|
||||
allowedPages: entitlements.maxPagesPerCrawl,
|
||||
actualPagesAnalyzed: pageAnalyses.length,
|
||||
advancedCrawlEnabled: entitlements.canAdvancedCrawl,
|
||||
provider: 'internal',
|
||||
},
|
||||
crawlSummary: {
|
||||
pagesWithStructuredData: pageAnalyses.filter((page) => page.schema?.hasStructuredData).length,
|
||||
pagesWithoutStructuredData: pageAnalyses.filter((page) => !page.schema?.hasStructuredData).length,
|
||||
pagesWithInvalidJsonLd: pageAnalyses.filter(
|
||||
(page) => (page.schema?.jsonLd?.invalidBlocks || []).length > 0,
|
||||
).length,
|
||||
failedPages: failedPages.length,
|
||||
discoveredInternalPages,
|
||||
},
|
||||
pages: pageAnalyses.slice(0, PAGE_PREVIEW_LIMIT).map((page) => ({
|
||||
url: page.analyzedUrl,
|
||||
title: page.pageTitle,
|
||||
statusCode: page.statusCode,
|
||||
hasStructuredData: Boolean(page.schema?.hasStructuredData),
|
||||
jsonLdTypes: page.schema?.jsonLd?.types || [],
|
||||
})),
|
||||
failedPages: failedPages.slice(0, PAGE_PREVIEW_LIMIT),
|
||||
aggregateSignals,
|
||||
entitlements,
|
||||
firecrawl,
|
||||
notice: buildCrawlNotice({
|
||||
requestedPages,
|
||||
actualPagesAnalyzed: pageAnalyses.length,
|
||||
failedPages: failedPages.length,
|
||||
discoveredInternalPages,
|
||||
firecrawl,
|
||||
}),
|
||||
finishedAt,
|
||||
};
|
||||
}
|
||||
|
||||
function buildFailureAnalysis(normalizedUrl, error, firecrawl) {
|
||||
const isAxiosError = axios.isAxiosError(error);
|
||||
|
||||
return {
|
||||
@ -247,6 +632,7 @@ function buildFailureAnalysis(normalizedUrl, error) {
|
||||
microdata: { count: 0, detected: false },
|
||||
rdfa: { count: 0, detected: false },
|
||||
},
|
||||
firecrawl,
|
||||
error: isAxiosError
|
||||
? error.response
|
||||
? `Request failed with status ${error.response.status}`
|
||||
@ -445,22 +831,25 @@ function buildRecommendationCode({ baseUrl, siteName, schemaType, pageScope }) {
|
||||
});
|
||||
}
|
||||
|
||||
function buildRecommendations({ baseUrl, siteName, analysis, html }) {
|
||||
function buildRecommendations({ baseUrl, siteName, analysis, html, pageAnalyses = [] }) {
|
||||
const recommendationList = [];
|
||||
const schemaTypes = analysis?.schema?.jsonLd?.types || [];
|
||||
const pageSignals = inferPageSignals(
|
||||
html,
|
||||
analysis?.analyzedUrl,
|
||||
analysis?.pageTitle,
|
||||
analysis?.platform || {},
|
||||
);
|
||||
const aggregateSignals = analysis?.aggregateSignals || {};
|
||||
const pageSignals = pageAnalyses.length > 0
|
||||
? aggregateSignals
|
||||
: inferPageSignals(
|
||||
html,
|
||||
analysis?.analyzedUrl,
|
||||
analysis?.pageTitle,
|
||||
analysis?.platform || {},
|
||||
);
|
||||
|
||||
if ((analysis?.schema?.jsonLd?.invalidBlocks || []).length > 0) {
|
||||
recommendationList.push({
|
||||
title: 'Fix invalid JSON-LD blocks already on the homepage',
|
||||
title: 'Fix invalid JSON-LD blocks already on the analyzed pages',
|
||||
recommendation_type: 'fix_existing',
|
||||
schema_type: 'JSON-LD',
|
||||
page_scope: 'homepage',
|
||||
page_scope: pageAnalyses.length > 1 ? 'crawl-wide' : 'homepage',
|
||||
priority: 'high',
|
||||
reason:
|
||||
'At least one JSON-LD block could not be parsed. Invalid structured data can prevent search engines from using your markup.',
|
||||
@ -524,7 +913,9 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) {
|
||||
page_scope: 'internal-pages',
|
||||
priority: 'medium',
|
||||
reason:
|
||||
'Breadcrumb schema helps search engines understand content hierarchy and page relationships.',
|
||||
pageAnalyses.length > 1
|
||||
? 'Multiple internal pages were analyzed without BreadcrumbList schema, so search engines may miss content hierarchy and page relationships.'
|
||||
: 'Breadcrumb schema helps search engines understand content hierarchy and page relationships.',
|
||||
expected_impact:
|
||||
'Can improve result presentation and site structure understanding.',
|
||||
suggested_schema: buildRecommendationCode({
|
||||
@ -537,8 +928,8 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) {
|
||||
}
|
||||
|
||||
if (
|
||||
pageSignals.hasProductHints &&
|
||||
!hasSchemaType(schemaTypes, ['Product'])
|
||||
pageSignals.hasProductHints
|
||||
&& !hasSchemaType(schemaTypes, ['Product'])
|
||||
) {
|
||||
recommendationList.push({
|
||||
title: 'Add Product schema on product detail pages',
|
||||
@ -547,7 +938,9 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) {
|
||||
page_scope: 'product-pages',
|
||||
priority: 'high',
|
||||
reason:
|
||||
'The site shows product/ecommerce signals, but no Product schema was detected on the analyzed page.',
|
||||
pageAnalyses.length > 1
|
||||
? `Product or ecommerce signals appeared across ${pageSignals.productPages || 1} analyzed page${(pageSignals.productPages || 1) === 1 ? '' : 's'}, but no Product schema was detected.`
|
||||
: 'The site shows product/ecommerce signals, but no Product schema was detected on the analyzed page.',
|
||||
expected_impact:
|
||||
'Improves eligibility for product-rich search experiences and helps AI systems interpret commercial details.',
|
||||
suggested_schema: buildRecommendationCode({
|
||||
@ -560,8 +953,8 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) {
|
||||
}
|
||||
|
||||
if (
|
||||
pageSignals.hasBlogHints &&
|
||||
!hasSchemaType(schemaTypes, ['Article', 'BlogPosting', 'NewsArticle'])
|
||||
pageSignals.hasBlogHints
|
||||
&& !hasSchemaType(schemaTypes, ['Article', 'BlogPosting', 'NewsArticle'])
|
||||
) {
|
||||
recommendationList.push({
|
||||
title: 'Add BlogPosting schema on editorial content',
|
||||
@ -570,7 +963,9 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) {
|
||||
page_scope: 'article-pages',
|
||||
priority: 'medium',
|
||||
reason:
|
||||
'The site appears to publish editorial content, but article-level schema was not detected.',
|
||||
pageAnalyses.length > 1
|
||||
? `Editorial or blog signals appeared across ${pageSignals.blogPages || 1} analyzed page${(pageSignals.blogPages || 1) === 1 ? '' : 's'}, but article-level schema was not detected.`
|
||||
: 'The site appears to publish editorial content, but article-level schema was not detected.',
|
||||
expected_impact:
|
||||
'Clarifies content ownership, publication dates, and headline structure for search engines and answer engines.',
|
||||
suggested_schema: buildRecommendationCode({
|
||||
@ -583,8 +978,8 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) {
|
||||
}
|
||||
|
||||
if (
|
||||
pageSignals.hasFaqHints &&
|
||||
!hasSchemaType(schemaTypes, ['FAQPage'])
|
||||
pageSignals.hasFaqHints
|
||||
&& !hasSchemaType(schemaTypes, ['FAQPage'])
|
||||
) {
|
||||
recommendationList.push({
|
||||
title: 'Add FAQPage schema where FAQs are published',
|
||||
@ -593,7 +988,9 @@ function buildRecommendations({ baseUrl, siteName, analysis, html }) {
|
||||
page_scope: 'faq-pages',
|
||||
priority: 'medium',
|
||||
reason:
|
||||
'FAQ content signals were found, but the page is not marked up as an FAQPage.',
|
||||
pageAnalyses.length > 1
|
||||
? `FAQ-style content signals appeared across ${pageSignals.faqPages || 1} analyzed page${(pageSignals.faqPages || 1) === 1 ? '' : 's'}, but FAQPage schema was not detected.`
|
||||
: 'FAQ content signals were found, but the page is not marked up as an FAQPage.',
|
||||
expected_impact:
|
||||
'Makes question-and-answer content easier to parse and reuse in search and AI contexts.',
|
||||
suggested_schema: buildRecommendationCode({
|
||||
@ -678,6 +1075,7 @@ async function buildStoredReport(siteId, currentUser) {
|
||||
crawl: crawl ? crawl.get({ plain: true }) : null,
|
||||
analysis: crawl ? parseSummary(crawl.summary) : null,
|
||||
recommendations: recommendations.map((item) => item.get({ plain: true })),
|
||||
entitlements: getSiteEntitlements(currentUser),
|
||||
};
|
||||
}
|
||||
|
||||
@ -722,7 +1120,10 @@ module.exports = class SitesService {
|
||||
static async analyzeHomepage(data, currentUser) {
|
||||
ensureCurrentUser(currentUser);
|
||||
|
||||
const requestedPages = parseRequestedPages(data?.requestedPages);
|
||||
const entitlements = ensureRequestedPagesAllowed(requestedPages, currentUser);
|
||||
const normalizedUrl = normalizeUrl(data?.url || data?.base_url);
|
||||
const firecrawl = getFirecrawlScaffold({ requestedPages, entitlements });
|
||||
const requestedName =
|
||||
typeof data?.name === 'string' && data.name.trim()
|
||||
? data.name.trim()
|
||||
@ -789,46 +1190,37 @@ module.exports = class SitesService {
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await axios.get(normalizedUrl, {
|
||||
timeout: REQUEST_TIMEOUT,
|
||||
maxRedirects: 5,
|
||||
responseType: 'text',
|
||||
headers: {
|
||||
Accept:
|
||||
'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (compatible; SchemaCrawlerBot/1.0; +https://flatlogic.com)',
|
||||
},
|
||||
});
|
||||
const crawlResult = await crawlPages(normalizedUrl, requestedPages);
|
||||
const pageAnalyses = crawlResult.pages;
|
||||
|
||||
const analyzedUrl =
|
||||
response.request?.res?.responseUrl || response.config?.url || normalizedUrl;
|
||||
const html = typeof response.data === 'string' ? response.data : '';
|
||||
const pageTitle = extractPageTitle(html);
|
||||
const platform = detectPlatform(html, response.headers, analyzedUrl);
|
||||
const schema = extractSchemaSummary(html);
|
||||
const finishedAt = new Date();
|
||||
if (pageAnalyses.length === 0) {
|
||||
const firstFailure = crawlResult.failedPages[0];
|
||||
const error = new Error(firstFailure?.error || 'Site analysis failed.');
|
||||
error.code = 400;
|
||||
throw error;
|
||||
}
|
||||
|
||||
const analysis = buildAggregateAnalysis({
|
||||
normalizedUrl,
|
||||
pageAnalyses,
|
||||
requestedPages,
|
||||
entitlements,
|
||||
discoveredInternalPages: crawlResult.discoveredInternalPages,
|
||||
failedPages: crawlResult.failedPages,
|
||||
firecrawl,
|
||||
});
|
||||
const homepage = pageAnalyses[0];
|
||||
const finishedAt = analysis.finishedAt;
|
||||
const recommendations = buildRecommendations({
|
||||
baseUrl: normalizedUrl,
|
||||
siteName: requestedName || pageTitle || defaultName,
|
||||
analysis: {
|
||||
analyzedUrl,
|
||||
pageTitle,
|
||||
platform,
|
||||
schema,
|
||||
},
|
||||
html,
|
||||
siteName: requestedName || homepage.pageTitle || defaultName,
|
||||
analysis,
|
||||
html: homepage.html,
|
||||
pageAnalyses,
|
||||
});
|
||||
const analysis = {
|
||||
requestedUrl: normalizedUrl,
|
||||
analyzedUrl,
|
||||
pageTitle,
|
||||
fetchedAt: finishedAt.toISOString(),
|
||||
statusCode: response.status,
|
||||
platform,
|
||||
schema,
|
||||
recommendationCount: recommendations.length,
|
||||
};
|
||||
|
||||
analysis.recommendationCount = recommendations.length;
|
||||
delete analysis.finishedAt;
|
||||
|
||||
const finalizeTransaction = await db.sequelize.transaction();
|
||||
let updatedSite;
|
||||
@ -845,8 +1237,8 @@ module.exports = class SitesService {
|
||||
|
||||
await updatedSite.update(
|
||||
{
|
||||
name: updatedSite.name || requestedName || pageTitle || defaultName,
|
||||
detected_platform: platform.detected,
|
||||
name: updatedSite.name || requestedName || homepage.pageTitle || defaultName,
|
||||
detected_platform: homepage.platform.detected,
|
||||
crawl_status: 'completed',
|
||||
last_crawled_at: finishedAt,
|
||||
updatedById: currentUser.id,
|
||||
@ -860,7 +1252,7 @@ module.exports = class SitesService {
|
||||
{
|
||||
status: 'completed',
|
||||
finished_at: finishedAt,
|
||||
pages_scanned: 1,
|
||||
pages_scanned: pageAnalyses.length,
|
||||
summary: JSON.stringify(analysis),
|
||||
updatedById: currentUser.id,
|
||||
},
|
||||
@ -894,11 +1286,12 @@ module.exports = class SitesService {
|
||||
crawl: updatedCrawl.get({ plain: true }),
|
||||
analysis,
|
||||
recommendations: storedRecommendations.map((item) => item.get({ plain: true })),
|
||||
entitlements,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Site analysis failed:', error);
|
||||
|
||||
const failureAnalysis = buildFailureAnalysis(normalizedUrl, error);
|
||||
const failureAnalysis = buildFailureAnalysis(normalizedUrl, error, firecrawl);
|
||||
const failedAt = new Date();
|
||||
const failureTransaction = await db.sequelize.transaction();
|
||||
let failedSite;
|
||||
@ -943,13 +1336,28 @@ module.exports = class SitesService {
|
||||
throw updateError;
|
||||
}
|
||||
|
||||
if (error instanceof ValidationError || axios.isAxiosError(error)) {
|
||||
if (
|
||||
error instanceof ValidationError
|
||||
|| axios.isAxiosError(error)
|
||||
|| [400, 403, 404, 415].includes(error.code)
|
||||
) {
|
||||
return {
|
||||
site: failedSite.get({ plain: true }),
|
||||
crawl: failedCrawl.get({ plain: true }),
|
||||
analysis: failureAnalysis,
|
||||
analysis: {
|
||||
...failureAnalysis,
|
||||
crawlPlan: {
|
||||
requestedPages,
|
||||
allowedPages: entitlements.maxPagesPerCrawl,
|
||||
actualPagesAnalyzed: 0,
|
||||
advancedCrawlEnabled: entitlements.canAdvancedCrawl,
|
||||
provider: 'internal',
|
||||
},
|
||||
entitlements,
|
||||
},
|
||||
recommendations: [],
|
||||
error: failureAnalysis.error,
|
||||
entitlements,
|
||||
};
|
||||
}
|
||||
|
||||
@ -964,6 +1372,25 @@ module.exports = class SitesService {
|
||||
static async exportCode(data, currentUser) {
|
||||
ensureCurrentUser(currentUser);
|
||||
|
||||
const outputMode = typeof data?.outputMode === 'string'
|
||||
? data.outputMode.trim().toLowerCase()
|
||||
: 'generic';
|
||||
const platform = typeof data?.platform === 'string'
|
||||
? data.platform.trim()
|
||||
: '';
|
||||
|
||||
if (outputMode === 'platform') {
|
||||
ensurePlatformOutputAllowed(currentUser, platform);
|
||||
|
||||
const error = new Error(
|
||||
'Premium platform-specific schema output'
|
||||
+ (platform ? ' for ' + platform : '')
|
||||
+ ' is enabled for this user, but the Step 4 generator is not connected yet.',
|
||||
);
|
||||
error.code = 400;
|
||||
throw error;
|
||||
}
|
||||
|
||||
const { recommendationId, siteId } = data || {};
|
||||
|
||||
if (recommendationId) {
|
||||
|
||||
23
frontend/src/helpers/siteEntitlements.ts
Normal file
23
frontend/src/helpers/siteEntitlements.ts
Normal file
@ -0,0 +1,23 @@
|
||||
import { hasPermission } from './userPermissions';
|
||||
|
||||
export const BASIC_MAX_PAGES_PER_CRAWL = 1;
|
||||
export const ADVANCED_MAX_PAGES_PER_CRAWL = 25;
|
||||
export const ADVANCED_CRAWL_PERMISSION = 'USE_ADVANCED_CRAWL';
|
||||
export const PLATFORM_OUTPUT_PERMISSION = 'USE_PLATFORM_OUTPUT';
|
||||
|
||||
export function getSiteEntitlements(user: any) {
|
||||
const canAdvancedCrawl = hasPermission(user, ADVANCED_CRAWL_PERMISSION);
|
||||
const canPlatformOutput = hasPermission(user, PLATFORM_OUTPUT_PERMISSION);
|
||||
|
||||
return {
|
||||
canAdvancedCrawl,
|
||||
canPlatformOutput,
|
||||
maxPagesPerCrawl: canAdvancedCrawl
|
||||
? ADVANCED_MAX_PAGES_PER_CRAWL
|
||||
: BASIC_MAX_PAGES_PER_CRAWL,
|
||||
permissions: {
|
||||
advancedCrawl: ADVANCED_CRAWL_PERMISSION,
|
||||
platformOutput: PLATFORM_OUTPUT_PERMISSION,
|
||||
},
|
||||
};
|
||||
}
|
||||
@ -11,14 +11,64 @@ import LayoutAuthenticated from '../../layouts/Authenticated';
|
||||
import SectionMain from '../../components/SectionMain';
|
||||
import SectionTitleLineWithButton from '../../components/SectionTitleLineWithButton';
|
||||
import { getPageTitle } from '../../config';
|
||||
import { getSiteEntitlements } from '../../helpers/siteEntitlements';
|
||||
import { useAppSelector } from '../../stores/hooks';
|
||||
|
||||
type Entitlements = {
|
||||
canAdvancedCrawl?: boolean;
|
||||
canPlatformOutput?: boolean;
|
||||
maxPagesPerCrawl?: number;
|
||||
permissions?: {
|
||||
advancedCrawl?: string;
|
||||
platformOutput?: string;
|
||||
};
|
||||
};
|
||||
|
||||
type AnalysisPayload = {
|
||||
requestedUrl?: string;
|
||||
analyzedUrl?: string;
|
||||
pageTitle?: string | null;
|
||||
fetchedAt?: string;
|
||||
statusCode?: number;
|
||||
recommendationCount?: number;
|
||||
notice?: string;
|
||||
crawlPlan?: {
|
||||
requestedPages?: number;
|
||||
allowedPages?: number;
|
||||
actualPagesAnalyzed?: number;
|
||||
advancedCrawlEnabled?: boolean;
|
||||
provider?: string;
|
||||
};
|
||||
crawlSummary?: {
|
||||
pagesWithStructuredData?: number;
|
||||
pagesWithoutStructuredData?: number;
|
||||
pagesWithInvalidJsonLd?: number;
|
||||
failedPages?: number;
|
||||
discoveredInternalPages?: number;
|
||||
};
|
||||
pages?: {
|
||||
url?: string;
|
||||
title?: string | null;
|
||||
statusCode?: number | null;
|
||||
hasStructuredData?: boolean;
|
||||
jsonLdTypes?: string[];
|
||||
}[];
|
||||
failedPages?: {
|
||||
url?: string;
|
||||
error?: string;
|
||||
}[];
|
||||
entitlements?: Entitlements;
|
||||
firecrawl?: {
|
||||
provider?: string;
|
||||
enabled?: boolean;
|
||||
configured?: boolean;
|
||||
mode?: string;
|
||||
status?: string;
|
||||
wouldHandleJavascript?: boolean;
|
||||
wouldHandleSitemapDiscovery?: boolean;
|
||||
shouldUseFirecrawlLater?: boolean;
|
||||
message?: string;
|
||||
};
|
||||
platform?: {
|
||||
detected?: string;
|
||||
label?: string;
|
||||
@ -69,20 +119,31 @@ type ReportResponse = {
|
||||
};
|
||||
analysis?: AnalysisPayload | null;
|
||||
recommendations?: Recommendation[];
|
||||
entitlements?: Entitlements;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
const PLATFORM_OPTIONS = [
|
||||
{ value: 'wordpress', label: 'WordPress' },
|
||||
{ value: 'shopify', label: 'Shopify' },
|
||||
{ value: 'webflow', label: 'Webflow' },
|
||||
{ value: 'custom', label: 'Custom / Other' },
|
||||
];
|
||||
|
||||
const initialReport: ReportResponse | null = null;
|
||||
|
||||
const SchemaAnalyzerPage = () => {
|
||||
const { currentUser } = useAppSelector((state) => state.auth);
|
||||
const [url, setUrl] = React.useState('');
|
||||
const [requestedPages, setRequestedPages] = React.useState(1);
|
||||
const [selectedPlatform, setSelectedPlatform] = React.useState('wordpress');
|
||||
const [emailTo, setEmailTo] = React.useState(currentUser?.email || '');
|
||||
const [report, setReport] = React.useState<ReportResponse | null>(initialReport);
|
||||
const [isAnalyzing, setIsAnalyzing] = React.useState(false);
|
||||
const [isExportingAll, setIsExportingAll] = React.useState(false);
|
||||
const [emailingId, setEmailingId] = React.useState<string | null>(null);
|
||||
const [exportingId, setExportingId] = React.useState<string | null>(null);
|
||||
const [isCheckingPlatformOutput, setIsCheckingPlatformOutput] = React.useState(false);
|
||||
|
||||
React.useEffect(() => {
|
||||
if (currentUser?.email) {
|
||||
@ -94,10 +155,24 @@ const SchemaAnalyzerPage = () => {
|
||||
toast(message, { type, position: 'bottom-center' });
|
||||
}, []);
|
||||
|
||||
const fallbackEntitlements = React.useMemo(
|
||||
() => getSiteEntitlements(currentUser),
|
||||
[currentUser],
|
||||
);
|
||||
const entitlements = report?.entitlements || report?.analysis?.entitlements || fallbackEntitlements;
|
||||
const maxPagesPerCrawl = entitlements?.maxPagesPerCrawl || 1;
|
||||
const recommendations = report?.recommendations || [];
|
||||
const exportableRecommendations = recommendations.filter(
|
||||
(recommendation) => recommendation.suggested_schema,
|
||||
);
|
||||
const isRequestedPagesOverLimit = requestedPages > maxPagesPerCrawl;
|
||||
const firecrawlStatus = report?.analysis?.firecrawl || {
|
||||
provider: 'firecrawl',
|
||||
configured: false,
|
||||
wouldHandleJavascript: true,
|
||||
wouldHandleSitemapDiscovery: true,
|
||||
message: 'Firecrawl scaffold is wired in code, but this environment still needs a FIRECRAWL_API_KEY before activation.',
|
||||
};
|
||||
|
||||
const handleAnalyze = async () => {
|
||||
if (!url.trim()) {
|
||||
@ -105,15 +180,26 @@ const SchemaAnalyzerPage = () => {
|
||||
return;
|
||||
}
|
||||
|
||||
if (isRequestedPagesOverLimit) {
|
||||
notify(
|
||||
'error',
|
||||
`Your current plan allows up to ${maxPagesPerCrawl} page${maxPagesPerCrawl === 1 ? '' : 's'} per crawl. Upgrade to Advanced Crawl to go beyond that limit.`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
setIsAnalyzing(true);
|
||||
const response = await axios.post<ReportResponse>('/sites/analyze', {
|
||||
url: url.trim(),
|
||||
requestedPages,
|
||||
});
|
||||
setReport(response.data);
|
||||
|
||||
if (response.data.error) {
|
||||
notify('error', response.data.error);
|
||||
} else if (response.data.analysis?.notice) {
|
||||
notify('info', response.data.analysis.notice);
|
||||
} else {
|
||||
notify('success', 'Site analyzed successfully.');
|
||||
}
|
||||
@ -152,7 +238,7 @@ const SchemaAnalyzerPage = () => {
|
||||
};
|
||||
|
||||
const parseFilename = (contentDisposition?: string) => {
|
||||
const match = contentDisposition?.match(/filename="?([^\";]+)"?/i);
|
||||
const match = contentDisposition?.match(/filename="?([^";]+)"?/i);
|
||||
return match?.[1] || 'schema-export.txt';
|
||||
};
|
||||
|
||||
@ -238,6 +324,36 @@ const SchemaAnalyzerPage = () => {
|
||||
}
|
||||
};
|
||||
|
||||
const handlePlatformOutputCheck = async () => {
|
||||
if (!report?.site?.id) {
|
||||
notify('error', 'Analyze a site first.');
|
||||
return;
|
||||
}
|
||||
|
||||
if (!entitlements?.canPlatformOutput) {
|
||||
notify('info', 'Premium unlocks Step 4 platform-specific schema output.');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
setIsCheckingPlatformOutput(true);
|
||||
await axios.post('/sites/export', {
|
||||
siteId: report.site.id,
|
||||
outputMode: 'platform',
|
||||
platform: selectedPlatform,
|
||||
}, {
|
||||
responseType: 'blob',
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('Platform output check failed:', error);
|
||||
notify('info', await extractBlobError(error));
|
||||
} finally {
|
||||
setIsCheckingPlatformOutput(false);
|
||||
}
|
||||
};
|
||||
|
||||
const crawlPlan = report?.analysis?.crawlPlan;
|
||||
|
||||
return (
|
||||
<>
|
||||
<Head>
|
||||
@ -257,8 +373,9 @@ const SchemaAnalyzerPage = () => {
|
||||
<div>
|
||||
<h2 className='text-xl font-semibold text-slate-900 dark:text-white'>Analyze a customer site</h2>
|
||||
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
|
||||
Enter a domain or full URL. The app will detect the platform, inspect homepage structured data,
|
||||
generate rules-based schema recommendations, and prepare developer-ready code snippets.
|
||||
Enter a domain or full URL. The app will detect the platform, crawl up to your allowed page limit,
|
||||
inspect structured data across the discovered pages, generate rules-based schema recommendations,
|
||||
and prepare developer-ready code snippets.
|
||||
</p>
|
||||
|
||||
<div className='mt-6'>
|
||||
@ -283,12 +400,60 @@ const SchemaAnalyzerPage = () => {
|
||||
</FormField>
|
||||
</div>
|
||||
|
||||
<BaseButtons type='justify-start' className='mt-2'>
|
||||
<div className='grid gap-4 md:grid-cols-2'>
|
||||
<FormField
|
||||
label='Pages to analyze'
|
||||
labelFor='schema-requested-pages'
|
||||
help={`Current plan limit: ${maxPagesPerCrawl} page${maxPagesPerCrawl === 1 ? '' : 's'} per crawl`}
|
||||
>
|
||||
<input
|
||||
id='schema-requested-pages'
|
||||
name='schema-requested-pages'
|
||||
type='number'
|
||||
min={1}
|
||||
value={requestedPages}
|
||||
onChange={(event) => {
|
||||
const nextValue = Number(event.target.value);
|
||||
setRequestedPages(Number.isInteger(nextValue) && nextValue > 0 ? nextValue : 1);
|
||||
}}
|
||||
/>
|
||||
</FormField>
|
||||
|
||||
<FormField
|
||||
label='Step 4 target platform'
|
||||
labelFor='schema-platform-output'
|
||||
help={entitlements?.canPlatformOutput
|
||||
? 'Premium access detected. Step 4 platform output is reserved for the next phase.'
|
||||
: 'Premium-only feature: platform-specific code output.'}
|
||||
>
|
||||
<select
|
||||
id='schema-platform-output'
|
||||
name='schema-platform-output'
|
||||
value={selectedPlatform}
|
||||
onChange={(event) => setSelectedPlatform(event.target.value)}
|
||||
>
|
||||
{PLATFORM_OPTIONS.map((platformOption) => (
|
||||
<option key={platformOption.value} value={platformOption.value}>
|
||||
{platformOption.label}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</FormField>
|
||||
</div>
|
||||
|
||||
{isRequestedPagesOverLimit && (
|
||||
<div className='mt-4 rounded-2xl border border-amber-300 bg-amber-50 p-3 text-sm text-amber-800 dark:border-amber-500/40 dark:bg-amber-500/10 dark:text-amber-100'>
|
||||
You requested {requestedPages} pages, but this account is capped at {maxPagesPerCrawl}. Upgrade to
|
||||
Advanced Crawl to raise that limit.
|
||||
</div>
|
||||
)}
|
||||
|
||||
<BaseButtons type='justify-start' className='mt-4'>
|
||||
<BaseButton
|
||||
color='info'
|
||||
icon={icon.mdiMagnify}
|
||||
label={isAnalyzing ? 'Analyzing…' : 'Analyze site'}
|
||||
disabled={isAnalyzing}
|
||||
disabled={isAnalyzing || isRequestedPagesOverLimit}
|
||||
onClick={() => {
|
||||
handleAnalyze().catch(() => null);
|
||||
}}
|
||||
@ -313,45 +478,139 @@ const SchemaAnalyzerPage = () => {
|
||||
});
|
||||
}}
|
||||
/>
|
||||
<BaseButton
|
||||
color={entitlements?.canPlatformOutput ? 'success' : 'warning'}
|
||||
outline={!entitlements?.canPlatformOutput}
|
||||
icon={entitlements?.canPlatformOutput ? icon.mdiCodeBraces : icon.mdiLockOutline}
|
||||
label={isCheckingPlatformOutput
|
||||
? 'Checking…'
|
||||
: entitlements?.canPlatformOutput
|
||||
? 'Check Step 4 output'
|
||||
: 'Premium Step 4'}
|
||||
disabled={!report?.site?.id || isCheckingPlatformOutput}
|
||||
onClick={() => {
|
||||
handlePlatformOutputCheck().catch(() => null);
|
||||
}}
|
||||
/>
|
||||
</BaseButtons>
|
||||
</div>
|
||||
|
||||
<div className='rounded-2xl border border-slate-200 bg-slate-50 p-5 dark:border-slate-700 dark:bg-slate-900/40'>
|
||||
<h3 className='text-base font-semibold text-slate-900 dark:text-white'>Delivery actions</h3>
|
||||
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
|
||||
Export a developer handoff file or email the latest recommendations directly.
|
||||
</p>
|
||||
<div className='mt-4'>
|
||||
<FormField label='Developer email' labelFor='schema-email-recipient'>
|
||||
<input
|
||||
id='schema-email-recipient'
|
||||
name='schema-email-recipient'
|
||||
placeholder='developer@example.com'
|
||||
value={emailTo}
|
||||
onChange={(event) => setEmailTo(event.target.value)}
|
||||
/>
|
||||
</FormField>
|
||||
<div className='space-y-4'>
|
||||
<div className='rounded-2xl border border-slate-200 bg-slate-50 p-5 dark:border-slate-700 dark:bg-slate-900/40'>
|
||||
<div className='flex items-start justify-between gap-4'>
|
||||
<div>
|
||||
<h3 className='text-base font-semibold text-slate-900 dark:text-white'>Paywall status</h3>
|
||||
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
|
||||
Advanced Crawl is now enforced and active. Premium still reserves Step 4 platform output. Firecrawl is scaffolded for sitemap + JS-rendered crawling, but not activated yet.
|
||||
</p>
|
||||
</div>
|
||||
<span className={`rounded-full px-3 py-1 text-xs font-semibold ${entitlements?.canPlatformOutput
|
||||
? 'bg-emerald-100 text-emerald-700 dark:bg-emerald-500/10 dark:text-emerald-200'
|
||||
: 'bg-slate-200 text-slate-700 dark:bg-slate-700 dark:text-slate-100'}`}>
|
||||
{entitlements?.canPlatformOutput ? 'Premium access' : 'Basic access'}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div className='mt-4 space-y-3 text-sm'>
|
||||
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
|
||||
<span className='text-slate-600 dark:text-slate-300'>Advanced crawl entitlement</span>
|
||||
<span className={`font-semibold ${entitlements?.canAdvancedCrawl ? 'text-emerald-600 dark:text-emerald-300' : 'text-amber-700 dark:text-amber-300'}`}>
|
||||
{entitlements?.canAdvancedCrawl ? 'Unlocked' : 'Locked'}
|
||||
</span>
|
||||
</div>
|
||||
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
|
||||
<span className='text-slate-600 dark:text-slate-300'>Max pages per crawl</span>
|
||||
<span className='font-semibold text-slate-900 dark:text-white'>{maxPagesPerCrawl}</span>
|
||||
</div>
|
||||
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
|
||||
<span className='text-slate-600 dark:text-slate-300'>Platform-specific Step 4 output</span>
|
||||
<span className={`font-semibold ${entitlements?.canPlatformOutput ? 'text-emerald-600 dark:text-emerald-300' : 'text-amber-700 dark:text-amber-300'}`}>
|
||||
{entitlements?.canPlatformOutput ? 'Reserved' : 'Premium only'}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className='rounded-2xl border border-slate-200 bg-slate-50 p-5 dark:border-slate-700 dark:bg-slate-900/40'>
|
||||
<div className='flex items-start justify-between gap-4'>
|
||||
<div>
|
||||
<h3 className='text-base font-semibold text-slate-900 dark:text-white'>Firecrawl scaffold</h3>
|
||||
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
|
||||
Sitemap discovery and JS-rendered crawl are planned through Firecrawl. This environment is currently using the built-in crawler only.
|
||||
</p>
|
||||
</div>
|
||||
<span className={`rounded-full px-3 py-1 text-xs font-semibold ${firecrawlStatus?.configured
|
||||
? 'bg-emerald-100 text-emerald-700 dark:bg-emerald-500/10 dark:text-emerald-200'
|
||||
: 'bg-amber-100 text-amber-700 dark:bg-amber-500/10 dark:text-amber-200'}`}>
|
||||
{firecrawlStatus?.configured ? 'Key detected' : 'API key needed'}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div className='mt-4 space-y-3 text-sm'>
|
||||
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
|
||||
<span className='text-slate-600 dark:text-slate-300'>Current crawl provider</span>
|
||||
<span className='font-semibold text-slate-900 dark:text-white'>
|
||||
{report?.analysis?.crawlPlan?.provider || 'internal'}
|
||||
</span>
|
||||
</div>
|
||||
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
|
||||
<span className='text-slate-600 dark:text-slate-300'>Sitemap crawl path</span>
|
||||
<span className='font-semibold text-slate-900 dark:text-white'>
|
||||
{firecrawlStatus?.wouldHandleSitemapDiscovery ? 'Scaffolded' : 'Not scaffolded'}
|
||||
</span>
|
||||
</div>
|
||||
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
|
||||
<span className='text-slate-600 dark:text-slate-300'>JS-rendered crawl path</span>
|
||||
<span className='font-semibold text-slate-900 dark:text-white'>
|
||||
{firecrawlStatus?.wouldHandleJavascript ? 'Scaffolded' : 'Not scaffolded'}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{firecrawlStatus?.message && (
|
||||
<div className='mt-4 rounded-xl border border-sky-200 bg-sky-50 px-3 py-3 text-sm text-sky-800 dark:border-sky-500/30 dark:bg-sky-500/10 dark:text-sky-100'>
|
||||
{firecrawlStatus.message}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className='rounded-2xl border border-slate-200 bg-slate-50 p-5 dark:border-slate-700 dark:bg-slate-900/40'>
|
||||
<h3 className='text-base font-semibold text-slate-900 dark:text-white'>Delivery actions</h3>
|
||||
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
|
||||
Export a developer handoff file or email the latest recommendations directly.
|
||||
</p>
|
||||
<div className='mt-4'>
|
||||
<FormField label='Developer email' labelFor='schema-email-recipient'>
|
||||
<input
|
||||
id='schema-email-recipient'
|
||||
name='schema-email-recipient'
|
||||
placeholder='developer@example.com'
|
||||
value={emailTo}
|
||||
onChange={(event) => setEmailTo(event.target.value)}
|
||||
/>
|
||||
</FormField>
|
||||
</div>
|
||||
<BaseButtons type='justify-start'>
|
||||
<BaseButton
|
||||
color='success'
|
||||
icon={icon.mdiDownload}
|
||||
label={isExportingAll ? 'Exporting…' : 'Export all'}
|
||||
disabled={!report?.site?.id || isExportingAll}
|
||||
onClick={() => {
|
||||
handleExportAll().catch(() => null);
|
||||
}}
|
||||
/>
|
||||
<BaseButton
|
||||
color='warning'
|
||||
icon={icon.mdiEmailOutline}
|
||||
label={emailingId === 'all' ? 'Emailing…' : 'Email all'}
|
||||
disabled={!report?.site?.id || emailingId === 'all'}
|
||||
onClick={() => {
|
||||
handleEmailCode().catch(() => null);
|
||||
}}
|
||||
/>
|
||||
</BaseButtons>
|
||||
</div>
|
||||
<BaseButtons type='justify-start'>
|
||||
<BaseButton
|
||||
color='success'
|
||||
icon={icon.mdiDownload}
|
||||
label={isExportingAll ? 'Exporting…' : 'Export all'}
|
||||
disabled={!report?.site?.id || isExportingAll}
|
||||
onClick={() => {
|
||||
handleExportAll().catch(() => null);
|
||||
}}
|
||||
/>
|
||||
<BaseButton
|
||||
color='warning'
|
||||
icon={icon.mdiEmailOutline}
|
||||
label={emailingId === 'all' ? 'Emailing…' : 'Email all'}
|
||||
disabled={!report?.site?.id || emailingId === 'all'}
|
||||
onClick={() => {
|
||||
handleEmailCode().catch(() => null);
|
||||
}}
|
||||
/>
|
||||
</BaseButtons>
|
||||
</div>
|
||||
</div>
|
||||
</CardBox>
|
||||
@ -359,51 +618,158 @@ const SchemaAnalyzerPage = () => {
|
||||
{report?.analysis && (
|
||||
<div className='grid gap-6 xl:grid-cols-[0.9fr,1.1fr]'>
|
||||
<CardBox className='h-full'>
|
||||
<h3 className='text-lg font-semibold text-slate-900 dark:text-white'>Analysis snapshot</h3>
|
||||
<div className='mt-4 grid gap-3 sm:grid-cols-2'>
|
||||
<div className='flex flex-col gap-2 sm:flex-row sm:items-start sm:justify-between'>
|
||||
<div>
|
||||
<h3 className='text-lg font-semibold text-slate-900 dark:text-white'>Site findings</h3>
|
||||
<p className='mt-1 text-sm text-slate-500 dark:text-slate-300'>
|
||||
High-level crawl and structured-data summary from the latest analysis run.
|
||||
</p>
|
||||
</div>
|
||||
<div className='rounded-full bg-slate-100 px-3 py-1 text-xs font-semibold uppercase tracking-wide text-slate-700 dark:bg-slate-800 dark:text-slate-200'>
|
||||
{report.analysis.platform?.label || 'Unknown platform'}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className='mt-5 grid gap-4 md:grid-cols-2'>
|
||||
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||
<div className='text-xs uppercase tracking-wide text-slate-500'>Platform</div>
|
||||
<div className='mt-2 text-lg font-semibold text-slate-900 dark:text-white'>
|
||||
{report.analysis.platform?.label || 'Unknown'}
|
||||
</div>
|
||||
<div className='mt-2 text-xs text-slate-500'>
|
||||
{report.analysis.platform?.matchedSignals?.join(', ') || 'No strong platform signals found.'}
|
||||
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Analyzed URL</div>
|
||||
<div className='mt-2 break-all text-sm text-slate-900 dark:text-white'>
|
||||
{report.analysis.analyzedUrl || report.site?.base_url || '—'}
|
||||
</div>
|
||||
</div>
|
||||
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||
<div className='text-xs uppercase tracking-wide text-slate-500'>Structured data</div>
|
||||
<div className='mt-2 text-lg font-semibold text-slate-900 dark:text-white'>
|
||||
{report.analysis.schema?.hasStructuredData ? 'Detected' : 'Not detected'}
|
||||
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Page title</div>
|
||||
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
|
||||
{report.analysis.pageTitle || 'No title found'}
|
||||
</div>
|
||||
<div className='mt-2 text-xs text-slate-500'>
|
||||
JSON-LD: {report.analysis.schema?.jsonLd?.count || 0} • Microdata: {report.analysis.schema?.microdata?.count || 0} • RDFa: {report.analysis.schema?.rdfa?.count || 0}
|
||||
</div>
|
||||
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Pages with structured data</div>
|
||||
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
|
||||
{report.analysis.crawlSummary?.pagesWithStructuredData ?? (report.analysis.schema?.hasStructuredData ? 1 : 0)}
|
||||
</div>
|
||||
</div>
|
||||
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>JSON-LD blocks found</div>
|
||||
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
|
||||
{report.analysis.schema?.jsonLd?.count || 0}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className='mt-5 space-y-3 text-sm text-slate-600 dark:text-slate-300'>
|
||||
<div>
|
||||
<span className='font-semibold text-slate-900 dark:text-white'>Requested URL:</span>{' '}
|
||||
{report.analysis.requestedUrl || '—'}
|
||||
</div>
|
||||
<div>
|
||||
<span className='font-semibold text-slate-900 dark:text-white'>Analyzed URL:</span>{' '}
|
||||
{report.analysis.analyzedUrl || '—'}
|
||||
</div>
|
||||
<div>
|
||||
<span className='font-semibold text-slate-900 dark:text-white'>Page title:</span>{' '}
|
||||
{report.analysis.pageTitle || '—'}
|
||||
</div>
|
||||
<div>
|
||||
<span className='font-semibold text-slate-900 dark:text-white'>JSON-LD types found:</span>{' '}
|
||||
{(report.analysis.schema?.jsonLd?.types || []).join(', ') || 'None'}
|
||||
</div>
|
||||
{report.analysis.error && (
|
||||
<div className='rounded-xl border border-rose-200 bg-rose-50 p-3 text-rose-700 dark:border-rose-500/40 dark:bg-rose-500/10 dark:text-rose-200'>
|
||||
{report.analysis.error}
|
||||
{crawlPlan && (
|
||||
<div className='mt-5 rounded-2xl border border-sky-200 bg-sky-50 p-4 text-sm text-sky-900 dark:border-sky-500/30 dark:bg-sky-500/10 dark:text-sky-100'>
|
||||
<div className='font-semibold'>Crawl summary</div>
|
||||
<div className='mt-2 flex flex-col gap-1'>
|
||||
<span>Requested pages: {crawlPlan.requestedPages || 1}</span>
|
||||
<span>Plan limit: {crawlPlan.allowedPages || maxPagesPerCrawl}</span>
|
||||
<span>Pages analyzed: {crawlPlan.actualPagesAnalyzed || 0}</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{report.analysis.notice && <div className='mt-3'>{report.analysis.notice}</div>}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{report.analysis.crawlSummary && (
|
||||
<div className='mt-5 grid gap-4 md:grid-cols-2'>
|
||||
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Pages without structured data</div>
|
||||
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
|
||||
{report.analysis.crawlSummary.pagesWithoutStructuredData ?? 0}
|
||||
</div>
|
||||
</div>
|
||||
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Failed page fetches</div>
|
||||
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
|
||||
{report.analysis.crawlSummary.failedPages ?? 0}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{(report.analysis.pages || []).length > 0 && (
|
||||
<div className='mt-5'>
|
||||
<div className='mb-2 text-xs font-semibold uppercase tracking-wide text-slate-500'>Analyzed pages</div>
|
||||
<div className='space-y-3'>
|
||||
{(report.analysis.pages || []).map((page) => (
|
||||
<div
|
||||
key={page.url}
|
||||
className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'
|
||||
>
|
||||
<div className='break-all text-sm font-medium text-slate-900 dark:text-white'>{page.url}</div>
|
||||
<div className='mt-1 text-sm text-slate-500 dark:text-slate-300'>
|
||||
{page.title || 'Untitled page'}
|
||||
</div>
|
||||
<div className='mt-2 flex flex-wrap gap-2 text-xs'>
|
||||
<span className='rounded-full bg-slate-100 px-3 py-1 font-semibold text-slate-700 dark:bg-slate-800 dark:text-slate-200'>
|
||||
Status {page.statusCode || '—'}
|
||||
</span>
|
||||
<span className={`rounded-full px-3 py-1 font-semibold ${page.hasStructuredData
|
||||
? 'bg-emerald-100 text-emerald-700 dark:bg-emerald-500/10 dark:text-emerald-200'
|
||||
: 'bg-amber-100 text-amber-700 dark:bg-amber-500/10 dark:text-amber-200'}`}>
|
||||
{page.hasStructuredData ? 'Structured data found' : 'No structured data'}
|
||||
</span>
|
||||
{(page.jsonLdTypes || []).slice(0, 3).map((typeName) => (
|
||||
<span
|
||||
key={`${page.url}-${typeName}`}
|
||||
className='rounded-full bg-sky-100 px-3 py-1 font-semibold text-sky-700 dark:bg-sky-500/10 dark:text-sky-200'
|
||||
>
|
||||
{typeName}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{(report.analysis.failedPages || []).length > 0 && (
|
||||
<div className='mt-5 rounded-2xl border border-amber-300 bg-amber-50 p-4 text-sm text-amber-900 dark:border-amber-500/30 dark:bg-amber-500/10 dark:text-amber-100'>
|
||||
<div className='font-semibold'>Some internal pages could not be fetched</div>
|
||||
<ul className='mt-2 list-disc space-y-1 pl-5'>
|
||||
{(report.analysis.failedPages || []).map((page) => (
|
||||
<li key={`${page.url}-${page.error}`}>
|
||||
<span className='font-medium'>{page.url}</span>: {page.error}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{(report.analysis.schema?.jsonLd?.types || []).length > 0 && (
|
||||
<div className='mt-5'>
|
||||
<div className='mb-2 text-xs font-semibold uppercase tracking-wide text-slate-500'>Detected JSON-LD types</div>
|
||||
<div className='flex flex-wrap gap-2'>
|
||||
{(report.analysis.schema?.jsonLd?.types || []).map((typeName) => (
|
||||
<span
|
||||
key={typeName}
|
||||
className='rounded-full bg-emerald-100 px-3 py-1 text-xs font-semibold text-emerald-700 dark:bg-emerald-500/10 dark:text-emerald-200'
|
||||
>
|
||||
{typeName}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{(report.analysis.schema?.jsonLd?.invalidBlocks || []).length > 0 && (
|
||||
<div className='mt-5 rounded-2xl border border-amber-300 bg-amber-50 p-4 text-sm text-amber-900 dark:border-amber-500/30 dark:bg-amber-500/10 dark:text-amber-100'>
|
||||
<div className='font-semibold'>Invalid JSON-LD detected</div>
|
||||
<ul className='mt-2 list-disc space-y-1 pl-5'>
|
||||
{(report.analysis.schema?.jsonLd?.invalidBlocks || []).map((block) => (
|
||||
<li key={`${block.index}-${block.message}`}>
|
||||
Block {block.index + 1}: {block.message}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{report.analysis.error && (
|
||||
<div className='mt-5 rounded-xl border border-rose-200 bg-rose-50 p-3 text-rose-700 dark:border-rose-500/40 dark:bg-rose-500/10 dark:text-rose-200'>
|
||||
{report.analysis.error}
|
||||
</div>
|
||||
)}
|
||||
</CardBox>
|
||||
|
||||
<CardBox className='h-full'>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user