Autosave: 20260414-175631
This commit is contained in:
parent
fa68f426aa
commit
48292888fb
@ -12,3 +12,6 @@ EMAIL_USER=AKIAVEW7G4PQUBGM52OF
|
|||||||
EMAIL_PASS=BLnD4hKGb6YkSz3gaQrf8fnyLi3C3/EdjOOsLEDTDPTz
|
EMAIL_PASS=BLnD4hKGb6YkSz3gaQrf8fnyLi3C3/EdjOOsLEDTDPTz
|
||||||
SECRET_KEY=HUEyqESqgQ1yTwzVlO6wprC9Kf1J1xuA
|
SECRET_KEY=HUEyqESqgQ1yTwzVlO6wprC9Kf1J1xuA
|
||||||
PEXELS_KEY=Vc99rnmOhHhJAbgGQoKLZtsaIVfkeownoQNbTj78VemUjKh08ZYRbf18
|
PEXELS_KEY=Vc99rnmOhHhJAbgGQoKLZtsaIVfkeownoQNbTj78VemUjKh08ZYRbf18
|
||||||
|
FIRECRAWL_API_KEY=fc-409763513f6c458c9d1d09e460346b17
|
||||||
|
FIRECRAWL_BASE_URL=https://api.firecrawl.dev/v2
|
||||||
|
FIRECRAWL_ENABLED=true
|
||||||
|
|||||||
@ -1,4 +1,49 @@
|
|||||||
const FIRECRAWL_DEFAULT_BASE_URL = 'https://api.firecrawl.dev/v1';
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
const axios = require('axios');
|
||||||
|
|
||||||
|
const FIRECRAWL_DEFAULT_BASE_URL = 'https://api.firecrawl.dev/v2';
|
||||||
|
const FIRECRAWL_DEFAULT_POLL_INTERVAL_MS = 2000;
|
||||||
|
const FIRECRAWL_DEFAULT_TIMEOUT_MS = 45000;
|
||||||
|
|
||||||
|
const BACKEND_ENV_PATH = path.join(__dirname, '..', '..', '.env');
|
||||||
|
|
||||||
|
function readBackendEnvFile() {
|
||||||
|
try {
|
||||||
|
const raw = fs.readFileSync(BACKEND_ENV_PATH, 'utf8');
|
||||||
|
|
||||||
|
return raw.split(/\r?\n/).reduce((accumulator, line) => {
|
||||||
|
const trimmedLine = line.trim();
|
||||||
|
|
||||||
|
if (!trimmedLine || trimmedLine.startsWith('#')) {
|
||||||
|
return accumulator;
|
||||||
|
}
|
||||||
|
|
||||||
|
const separatorIndex = trimmedLine.indexOf('=');
|
||||||
|
|
||||||
|
if (separatorIndex === -1) {
|
||||||
|
return accumulator;
|
||||||
|
}
|
||||||
|
|
||||||
|
const key = trimmedLine.slice(0, separatorIndex).trim();
|
||||||
|
const value = trimmedLine.slice(separatorIndex + 1).trim();
|
||||||
|
|
||||||
|
accumulator[key] = value.replace(/^"|"$/g, '').replace(/^'|'$/g, '');
|
||||||
|
return accumulator;
|
||||||
|
}, {});
|
||||||
|
} catch (error) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getEnvValue(name) {
|
||||||
|
if (process.env[name] !== undefined && process.env[name] !== null && process.env[name] !== '') {
|
||||||
|
return process.env[name];
|
||||||
|
}
|
||||||
|
|
||||||
|
return readBackendEnvFile()[name];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
function toBoolean(value, defaultValue = false) {
|
function toBoolean(value, defaultValue = false) {
|
||||||
if (value === undefined || value === null || value === '') {
|
if (value === undefined || value === null || value === '') {
|
||||||
@ -22,12 +67,34 @@ function toBoolean(value, defaultValue = false) {
|
|||||||
return defaultValue;
|
return defaultValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function toPositiveInteger(value, defaultValue) {
|
||||||
|
const parsed = Number(value);
|
||||||
|
|
||||||
|
if (Number.isInteger(parsed) && parsed > 0) {
|
||||||
|
return parsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeBaseUrl(baseUrl) {
|
||||||
|
return String(baseUrl || FIRECRAWL_DEFAULT_BASE_URL)
|
||||||
|
.trim()
|
||||||
|
.replace(/\/+$/, '');
|
||||||
|
}
|
||||||
|
|
||||||
function getFirecrawlRuntime() {
|
function getFirecrawlRuntime() {
|
||||||
const apiKey = String(process.env.FIRECRAWL_API_KEY || '').trim();
|
const apiKey = String(getEnvValue('FIRECRAWL_API_KEY') || '').trim();
|
||||||
const baseUrl = String(
|
const baseUrl = normalizeBaseUrl(getEnvValue('FIRECRAWL_BASE_URL'));
|
||||||
process.env.FIRECRAWL_BASE_URL || FIRECRAWL_DEFAULT_BASE_URL,
|
const enabled = toBoolean(getEnvValue('FIRECRAWL_ENABLED'), true);
|
||||||
).trim();
|
const pollIntervalMs = toPositiveInteger(
|
||||||
const enabled = toBoolean(process.env.FIRECRAWL_ENABLED, true);
|
getEnvValue('FIRECRAWL_POLL_INTERVAL_MS'),
|
||||||
|
FIRECRAWL_DEFAULT_POLL_INTERVAL_MS,
|
||||||
|
);
|
||||||
|
const timeoutMs = toPositiveInteger(
|
||||||
|
getEnvValue('FIRECRAWL_TIMEOUT_MS'),
|
||||||
|
FIRECRAWL_DEFAULT_TIMEOUT_MS,
|
||||||
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
provider: 'firecrawl',
|
provider: 'firecrawl',
|
||||||
@ -35,29 +102,255 @@ function getFirecrawlRuntime() {
|
|||||||
enabled,
|
enabled,
|
||||||
configured: Boolean(apiKey),
|
configured: Boolean(apiKey),
|
||||||
hasApiKey: Boolean(apiKey),
|
hasApiKey: Boolean(apiKey),
|
||||||
mode: 'scaffold_only',
|
apiKey,
|
||||||
|
pollIntervalMs,
|
||||||
|
timeoutMs,
|
||||||
|
mode: enabled && apiKey ? 'active' : 'scaffold_only',
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function buildFirecrawlMessage(runtime, entitlements, requestedPages) {
|
||||||
|
if (!entitlements?.canAdvancedCrawl) {
|
||||||
|
return 'Firecrawl is reserved for paid Advanced Crawl users. This request will stay on the built-in crawler.';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!runtime.enabled) {
|
||||||
|
return 'Firecrawl is configured in code, but FIRECRAWL_ENABLED is turned off. Paid users will stay on the built-in crawler until it is enabled.';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!runtime.configured) {
|
||||||
|
return 'Firecrawl is enabled for paid users, but FIRECRAWL_API_KEY is missing. Falling back to the built-in crawler until the key is configured.';
|
||||||
|
}
|
||||||
|
|
||||||
|
return requestedPages > 1
|
||||||
|
? 'Paid Advanced Crawl users are routed through Firecrawl for sitemap-aware, JavaScript-rendered multi-page crawling.'
|
||||||
|
: 'Paid Advanced Crawl users are routed through Firecrawl for sitemap-aware, JavaScript-rendered crawling.';
|
||||||
|
}
|
||||||
|
|
||||||
function getFirecrawlScaffold({ requestedPages, entitlements } = {}) {
|
function getFirecrawlScaffold({ requestedPages, entitlements } = {}) {
|
||||||
const runtime = getFirecrawlRuntime();
|
const runtime = getFirecrawlRuntime();
|
||||||
const wantsAdvancedCrawl = Number(requestedPages || 1) > 1;
|
const availableForCurrentUser = Boolean(entitlements?.canAdvancedCrawl);
|
||||||
const advancedCrawlUnlocked = Boolean(entitlements?.canAdvancedCrawl);
|
const shouldUseFirecrawl = Boolean(
|
||||||
const shouldUseFirecrawlLater = runtime.enabled && (wantsAdvancedCrawl || advancedCrawlUnlocked);
|
availableForCurrentUser
|
||||||
|
&& runtime.enabled
|
||||||
|
&& runtime.configured,
|
||||||
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...runtime,
|
provider: 'firecrawl',
|
||||||
status: runtime.configured ? 'ready_for_activation' : 'awaiting_api_key',
|
baseUrl: runtime.baseUrl,
|
||||||
|
enabled: runtime.enabled,
|
||||||
|
configured: runtime.configured,
|
||||||
|
hasApiKey: runtime.hasApiKey,
|
||||||
|
mode: shouldUseFirecrawl ? 'active' : runtime.mode,
|
||||||
|
status: shouldUseFirecrawl ? 'active_for_paid_users' : 'scaffold_only',
|
||||||
wouldHandleJavascript: true,
|
wouldHandleJavascript: true,
|
||||||
wouldHandleSitemapDiscovery: true,
|
wouldHandleSitemapDiscovery: true,
|
||||||
shouldUseFirecrawlLater,
|
availableForCurrentUser,
|
||||||
message: runtime.configured
|
shouldUseFirecrawl,
|
||||||
? 'Firecrawl scaffold is wired and ready for the next activation step, but this analyzer still uses the built-in crawler today.'
|
usePaidOnly: true,
|
||||||
: 'Firecrawl scaffold is wired, but FIRECRAWL_API_KEY is not set yet. The analyzer still uses the built-in crawler for now.',
|
message: buildFirecrawlMessage(runtime, entitlements, requestedPages),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function sleep(milliseconds) {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
setTimeout(resolve, milliseconds);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function isAbsoluteUrl(value) {
|
||||||
|
return /^https?:\/\//i.test(String(value || ''));
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildApiUrl(runtime, pathOrUrl) {
|
||||||
|
const value = String(pathOrUrl || '').trim();
|
||||||
|
|
||||||
|
if (!value) {
|
||||||
|
return runtime.baseUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isAbsoluteUrl(value)) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value.startsWith('/')) {
|
||||||
|
return `${runtime.baseUrl}${value}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return `${runtime.baseUrl}/${value}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function summarizeFirecrawlPayload(payload) {
|
||||||
|
if (!payload) {
|
||||||
|
return 'Unknown Firecrawl API error.';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof payload === 'string') {
|
||||||
|
return payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof payload?.error === 'string' && payload.error.trim()) {
|
||||||
|
return payload.error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof payload?.message === 'string' && payload.message.trim()) {
|
||||||
|
return payload.message;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 'Unexpected Firecrawl API response.';
|
||||||
|
}
|
||||||
|
|
||||||
|
async function firecrawlRequest(runtime, method, pathOrUrl, options = {}) {
|
||||||
|
try {
|
||||||
|
const response = await axios({
|
||||||
|
method,
|
||||||
|
url: buildApiUrl(runtime, pathOrUrl),
|
||||||
|
timeout: options.timeout || runtime.timeoutMs,
|
||||||
|
data: options.data,
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${runtime.apiKey}`,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
...(options.headers || {}),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return response.data;
|
||||||
|
} catch (error) {
|
||||||
|
if (axios.isAxiosError(error)) {
|
||||||
|
const payload = error.response?.data;
|
||||||
|
const detail = summarizeFirecrawlPayload(payload);
|
||||||
|
const status = error.response?.status;
|
||||||
|
const wrappedError = new Error(
|
||||||
|
status
|
||||||
|
? `Firecrawl request failed with status ${status}: ${detail}`
|
||||||
|
: `Firecrawl request failed: ${detail}`,
|
||||||
|
);
|
||||||
|
|
||||||
|
wrappedError.code = status || 502;
|
||||||
|
wrappedError.response = payload;
|
||||||
|
throw wrappedError;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function collectPagedStatus(runtime, initialStatus) {
|
||||||
|
const documents = Array.isArray(initialStatus?.data)
|
||||||
|
? [...initialStatus.data]
|
||||||
|
: [];
|
||||||
|
let nextUrl = initialStatus?.next || null;
|
||||||
|
|
||||||
|
while (nextUrl) {
|
||||||
|
const nextStatus = await firecrawlRequest(runtime, 'get', nextUrl);
|
||||||
|
|
||||||
|
if (Array.isArray(nextStatus?.data) && nextStatus.data.length > 0) {
|
||||||
|
documents.push(...nextStatus.data);
|
||||||
|
}
|
||||||
|
|
||||||
|
nextUrl = nextStatus?.next || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
...initialStatus,
|
||||||
|
data: documents,
|
||||||
|
next: null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function waitForCrawlCompletion(runtime, crawlId) {
|
||||||
|
const deadline = Date.now() + runtime.timeoutMs;
|
||||||
|
|
||||||
|
while (Date.now() <= deadline) {
|
||||||
|
const status = await firecrawlRequest(runtime, 'get', `/crawl/${encodeURIComponent(crawlId)}`);
|
||||||
|
|
||||||
|
if (status?.status === 'completed' || status?.status === 'failed') {
|
||||||
|
return collectPagedStatus(runtime, status);
|
||||||
|
}
|
||||||
|
|
||||||
|
await sleep(runtime.pollIntervalMs);
|
||||||
|
}
|
||||||
|
|
||||||
|
const timeoutError = new Error(
|
||||||
|
`Firecrawl crawl timed out after ${Math.round(runtime.timeoutMs / 1000)} seconds.`,
|
||||||
|
);
|
||||||
|
timeoutError.code = 504;
|
||||||
|
throw timeoutError;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getCrawlErrors(runtime, crawlId) {
|
||||||
|
try {
|
||||||
|
return await firecrawlRequest(runtime, 'get', `/crawl/${encodeURIComponent(crawlId)}/errors`);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to fetch Firecrawl crawl errors:', error);
|
||||||
|
return {
|
||||||
|
errors: [],
|
||||||
|
robotsBlocked: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function crawlSiteWithFirecrawl(url, requestedPages) {
|
||||||
|
const runtime = getFirecrawlRuntime();
|
||||||
|
|
||||||
|
if (!runtime.enabled) {
|
||||||
|
const error = new Error('Firecrawl is disabled in this environment.');
|
||||||
|
error.code = 503;
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!runtime.configured) {
|
||||||
|
const error = new Error('Firecrawl API key is not configured.');
|
||||||
|
error.code = 503;
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
const started = await firecrawlRequest(runtime, 'post', '/crawl', {
|
||||||
|
data: {
|
||||||
|
url,
|
||||||
|
limit: requestedPages,
|
||||||
|
sitemap: 'include',
|
||||||
|
crawlEntireDomain: true,
|
||||||
|
allowExternalLinks: false,
|
||||||
|
allowSubdomains: false,
|
||||||
|
ignoreQueryParameters: true,
|
||||||
|
scrapeOptions: {
|
||||||
|
formats: ['html'],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const crawlId = started?.id;
|
||||||
|
|
||||||
|
if (!crawlId) {
|
||||||
|
const error = new Error('Firecrawl did not return a crawl job ID.');
|
||||||
|
error.code = 502;
|
||||||
|
error.response = started;
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
const status = await waitForCrawlCompletion(runtime, crawlId);
|
||||||
|
const crawlErrors = await getCrawlErrors(runtime, crawlId);
|
||||||
|
|
||||||
|
return {
|
||||||
|
crawlId,
|
||||||
|
provider: 'firecrawl',
|
||||||
|
status: status?.status || 'unknown',
|
||||||
|
total: status?.total || 0,
|
||||||
|
completed: status?.completed || 0,
|
||||||
|
creditsUsed: status?.creditsUsed || 0,
|
||||||
|
expiresAt: status?.expiresAt || null,
|
||||||
|
data: Array.isArray(status?.data) ? status.data : [],
|
||||||
|
errors: Array.isArray(crawlErrors?.errors) ? crawlErrors.errors : [],
|
||||||
|
robotsBlocked: Array.isArray(crawlErrors?.robotsBlocked)
|
||||||
|
? crawlErrors.robotsBlocked
|
||||||
|
: [],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
getFirecrawlRuntime,
|
getFirecrawlRuntime,
|
||||||
getFirecrawlScaffold,
|
getFirecrawlScaffold,
|
||||||
|
crawlSiteWithFirecrawl,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
const ValidationError = require('./notifications/errors/validation');
|
const ValidationError = require('./notifications/errors/validation');
|
||||||
|
|
||||||
const BASIC_MAX_PAGES_PER_CRAWL = 1;
|
const BASIC_MAX_PAGES_PER_CRAWL = 25;
|
||||||
const ADVANCED_MAX_PAGES_PER_CRAWL = 25;
|
const ADVANCED_MAX_PAGES_PER_CRAWL = 25;
|
||||||
const ADVANCED_CRAWL_PERMISSION = 'USE_ADVANCED_CRAWL';
|
const ADVANCED_CRAWL_PERMISSION = 'USE_ADVANCED_CRAWL';
|
||||||
const PLATFORM_OUTPUT_PERMISSION = 'USE_PLATFORM_OUTPUT';
|
const PLATFORM_OUTPUT_PERMISSION = 'USE_PLATFORM_OUTPUT';
|
||||||
@ -68,7 +68,7 @@ function ensureRequestedPagesAllowed(requestedPages, currentUser) {
|
|||||||
|
|
||||||
if (requestedPages > entitlements.maxPagesPerCrawl) {
|
if (requestedPages > entitlements.maxPagesPerCrawl) {
|
||||||
const error = new Error(
|
const error = new Error(
|
||||||
`Your current plan allows up to ${entitlements.maxPagesPerCrawl} page${entitlements.maxPagesPerCrawl === 1 ? '' : 's'} per crawl. Upgrade to Advanced Crawl to analyze ${requestedPages} pages.`,
|
`This analyzer supports up to ${entitlements.maxPagesPerCrawl} page${entitlements.maxPagesPerCrawl === 1 ? '' : 's'} per crawl. Reduce the requested page count to continue.`,
|
||||||
);
|
);
|
||||||
error.code = 403;
|
error.code = 403;
|
||||||
throw error;
|
throw error;
|
||||||
|
|||||||
@ -8,7 +8,7 @@ const {
|
|||||||
ensureRequestedPagesAllowed,
|
ensureRequestedPagesAllowed,
|
||||||
ensurePlatformOutputAllowed,
|
ensurePlatformOutputAllowed,
|
||||||
} = require('./siteEntitlements');
|
} = require('./siteEntitlements');
|
||||||
const { getFirecrawlScaffold } = require('./firecrawl');
|
const { getFirecrawlScaffold, crawlSiteWithFirecrawl } = require('./firecrawl');
|
||||||
|
|
||||||
const REQUEST_TIMEOUT = 15000;
|
const REQUEST_TIMEOUT = 15000;
|
||||||
const PREVIEW_LIMIT = 5;
|
const PREVIEW_LIMIT = 5;
|
||||||
@ -271,6 +271,152 @@ function normalizeAllowedHostnames(allowedHostnames) {
|
|||||||
return new Set();
|
return new Set();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function normalizeTargetPathname(pathname) {
|
||||||
|
const trimmedPathname = String(pathname || '').trim();
|
||||||
|
|
||||||
|
if (!trimmedPathname || trimmedPathname === '/') {
|
||||||
|
return '/';
|
||||||
|
}
|
||||||
|
|
||||||
|
return `/${trimmedPathname.replace(/^\/+/, '').replace(/\/+$/, '')}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildCrawlTarget(rawTarget, baseUrl, label) {
|
||||||
|
const trimmedTarget = String(rawTarget || '').trim();
|
||||||
|
|
||||||
|
if (!trimmedTarget) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
let parsedTarget;
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (/^https?:\/\//i.test(trimmedTarget)) {
|
||||||
|
parsedTarget = new URL(trimmedTarget);
|
||||||
|
} else if (trimmedTarget.startsWith('/')) {
|
||||||
|
parsedTarget = new URL(trimmedTarget, baseUrl);
|
||||||
|
} else {
|
||||||
|
parsedTarget = new URL(`/${trimmedTarget.replace(/^\/+/, '')}`, baseUrl);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
const targetError = new Error(`Invalid ${label} target: ${trimmedTarget}`);
|
||||||
|
targetError.code = 400;
|
||||||
|
throw targetError;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!['http:', 'https:'].includes(parsedTarget.protocol)) {
|
||||||
|
const targetError = new Error(`Invalid ${label} target: ${trimmedTarget}`);
|
||||||
|
targetError.code = 400;
|
||||||
|
throw targetError;
|
||||||
|
}
|
||||||
|
|
||||||
|
const baseHostname = new URL(baseUrl).hostname.toLowerCase();
|
||||||
|
|
||||||
|
if (parsedTarget.hostname.toLowerCase() !== baseHostname) {
|
||||||
|
const targetError = new Error(
|
||||||
|
`${label} targets must stay on the same website as the analyzed URL.`,
|
||||||
|
);
|
||||||
|
targetError.code = 400;
|
||||||
|
throw targetError;
|
||||||
|
}
|
||||||
|
|
||||||
|
parsedTarget.hash = '';
|
||||||
|
parsedTarget.search = '';
|
||||||
|
|
||||||
|
const path = normalizeTargetPathname(parsedTarget.pathname);
|
||||||
|
const url = normalizeUrl(parsedTarget.toString());
|
||||||
|
|
||||||
|
return {
|
||||||
|
input: trimmedTarget,
|
||||||
|
label: /^https?:\/\//i.test(trimmedTarget) ? url : path,
|
||||||
|
path,
|
||||||
|
url,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseCrawlTargets(rawTargets, baseUrl, label) {
|
||||||
|
const targetValues = Array.isArray(rawTargets)
|
||||||
|
? rawTargets
|
||||||
|
: String(rawTargets || '').split(/\r?\n/);
|
||||||
|
const dedupedTargets = new Map();
|
||||||
|
|
||||||
|
targetValues
|
||||||
|
.map((targetValue) => String(targetValue || '').trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
.forEach((targetValue) => {
|
||||||
|
const normalizedTarget = buildCrawlTarget(targetValue, baseUrl, label);
|
||||||
|
|
||||||
|
dedupedTargets.set(normalizedTarget.url, normalizedTarget);
|
||||||
|
});
|
||||||
|
|
||||||
|
return Array.from(dedupedTargets.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeCrawlTargets(data, baseUrl) {
|
||||||
|
return {
|
||||||
|
includeTargets: parseCrawlTargets(data?.includeTargets, baseUrl, 'include'),
|
||||||
|
excludeTargets: parseCrawlTargets(data?.excludeTargets, baseUrl, 'exclude'),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function isUrlMatchingTarget(candidateUrl, target) {
|
||||||
|
if (!candidateUrl || !target?.path) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let parsedUrl;
|
||||||
|
|
||||||
|
try {
|
||||||
|
parsedUrl = new URL(normalizeUrl(candidateUrl));
|
||||||
|
} catch (error) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const candidatePath = normalizeTargetPathname(parsedUrl.pathname);
|
||||||
|
|
||||||
|
if (target.path === '/') {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return candidatePath === target.path || candidatePath.startsWith(`${target.path}/`);
|
||||||
|
}
|
||||||
|
|
||||||
|
function matchesAnyCrawlTarget(candidateUrl, targets = []) {
|
||||||
|
return targets.some((target) => isUrlMatchingTarget(candidateUrl, target));
|
||||||
|
}
|
||||||
|
|
||||||
|
function isUrlAllowedByCrawlTargets(candidateUrl, crawlTargets = {}) {
|
||||||
|
const includeTargets = crawlTargets.includeTargets || [];
|
||||||
|
const excludeTargets = crawlTargets.excludeTargets || [];
|
||||||
|
|
||||||
|
if (includeTargets.length > 0 && !matchesAnyCrawlTarget(candidateUrl, includeTargets)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (excludeTargets.length > 0 && matchesAnyCrawlTarget(candidateUrl, excludeTargets)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildSeedUrls(baseUrl, crawlTargets = {}) {
|
||||||
|
const seedUrls = new Set([baseUrl]);
|
||||||
|
|
||||||
|
(crawlTargets.includeTargets || []).forEach((target) => {
|
||||||
|
seedUrls.add(target.url);
|
||||||
|
});
|
||||||
|
|
||||||
|
return Array.from(seedUrls);
|
||||||
|
}
|
||||||
|
|
||||||
|
function summarizeCrawlTargets(crawlTargets = {}) {
|
||||||
|
return {
|
||||||
|
includeTargets: (crawlTargets.includeTargets || []).map((target) => target.label),
|
||||||
|
excludeTargets: (crawlTargets.excludeTargets || []).map((target) => target.label),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
function normalizeCrawlUrl(rawUrl, parentUrl, allowedHostnames) {
|
function normalizeCrawlUrl(rawUrl, parentUrl, allowedHostnames) {
|
||||||
if (!rawUrl || typeof rawUrl !== 'string') {
|
if (!rawUrl || typeof rawUrl !== 'string') {
|
||||||
return null;
|
return null;
|
||||||
@ -403,12 +549,184 @@ async function fetchAnalyzedPage(pageUrl, allowedHostnames) {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function crawlPages(baseUrl, requestedPages) {
|
function analyzeFetchedPage({
|
||||||
|
requestedUrl,
|
||||||
|
analyzedUrl,
|
||||||
|
html,
|
||||||
|
statusCode,
|
||||||
|
headers = {},
|
||||||
|
allowedHostnames,
|
||||||
|
discoveredLinks = null,
|
||||||
|
pageTitle = null,
|
||||||
|
}) {
|
||||||
|
const normalizedAnalyzedUrl = normalizeUrl(analyzedUrl || requestedUrl);
|
||||||
|
const normalizedAllowedHostnames = normalizeAllowedHostnames(allowedHostnames);
|
||||||
|
const analyzedHostname = new URL(normalizedAnalyzedUrl).hostname.toLowerCase();
|
||||||
|
normalizedAllowedHostnames.add(analyzedHostname);
|
||||||
|
|
||||||
|
if (allowedHostnames instanceof Set) {
|
||||||
|
allowedHostnames.add(analyzedHostname);
|
||||||
|
}
|
||||||
|
|
||||||
|
const resolvedHtml = typeof html === 'string' ? html : '';
|
||||||
|
const resolvedPageTitle = pageTitle || extractPageTitle(resolvedHtml);
|
||||||
|
const platform = detectPlatform(resolvedHtml, headers, normalizedAnalyzedUrl);
|
||||||
|
const schema = extractSchemaSummary(resolvedHtml);
|
||||||
|
const pageSignals = inferPageSignals(
|
||||||
|
resolvedHtml,
|
||||||
|
normalizedAnalyzedUrl,
|
||||||
|
resolvedPageTitle,
|
||||||
|
platform,
|
||||||
|
);
|
||||||
|
const normalizedLinks = Array.isArray(discoveredLinks)
|
||||||
|
? Array.from(
|
||||||
|
new Set(
|
||||||
|
discoveredLinks
|
||||||
|
.map((linkUrl) => normalizeCrawlUrl(linkUrl, normalizedAnalyzedUrl, normalizedAllowedHostnames))
|
||||||
|
.filter(Boolean),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
: extractInternalLinks(
|
||||||
|
resolvedHtml,
|
||||||
|
normalizedAnalyzedUrl,
|
||||||
|
normalizedAllowedHostnames,
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
requestedUrl: requestedUrl || normalizedAnalyzedUrl,
|
||||||
|
analyzedUrl: normalizedAnalyzedUrl,
|
||||||
|
pageTitle: resolvedPageTitle,
|
||||||
|
statusCode: statusCode || null,
|
||||||
|
html: resolvedHtml,
|
||||||
|
platform,
|
||||||
|
schema,
|
||||||
|
pageSignals,
|
||||||
|
discoveredLinks: normalizedLinks,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function transformFirecrawlDocument(document, allowedHostnames) {
|
||||||
|
const metadata = document?.metadata || {};
|
||||||
|
const sourceUrl =
|
||||||
|
metadata.sourceURL
|
||||||
|
|| metadata.sourceUrl
|
||||||
|
|| metadata.url
|
||||||
|
|| document?.url
|
||||||
|
|| document?.sourceURL
|
||||||
|
|| document?.sourceUrl;
|
||||||
|
|
||||||
|
if (!sourceUrl) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const html =
|
||||||
|
typeof document?.html === 'string'
|
||||||
|
? document.html
|
||||||
|
: typeof document?.rawHtml === 'string'
|
||||||
|
? document.rawHtml
|
||||||
|
: typeof document?.content === 'string'
|
||||||
|
? document.content
|
||||||
|
: '';
|
||||||
|
|
||||||
|
return analyzeFetchedPage({
|
||||||
|
requestedUrl: sourceUrl,
|
||||||
|
analyzedUrl: sourceUrl,
|
||||||
|
html,
|
||||||
|
statusCode: document?.metadata?.statusCode || 200,
|
||||||
|
headers: {},
|
||||||
|
allowedHostnames,
|
||||||
|
discoveredLinks: Array.isArray(document?.links) ? document.links : null,
|
||||||
|
pageTitle: metadata.title || null,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function crawlPagesWithFirecrawl(baseUrl, requestedPages, crawlTargets = {}) {
|
||||||
const normalizedBaseUrl = normalizeUrl(baseUrl);
|
const normalizedBaseUrl = normalizeUrl(baseUrl);
|
||||||
const allowedHostnames = new Set([new URL(normalizedBaseUrl).hostname.toLowerCase()]);
|
const allowedHostnames = new Set([new URL(normalizedBaseUrl).hostname.toLowerCase()]);
|
||||||
|
const firecrawlResult = await crawlSiteWithFirecrawl(normalizedBaseUrl, requestedPages);
|
||||||
|
const pages = [];
|
||||||
|
const analyzedUrls = new Set();
|
||||||
|
|
||||||
|
(firecrawlResult.data || []).forEach((document) => {
|
||||||
|
try {
|
||||||
|
const page = transformFirecrawlDocument(document, allowedHostnames);
|
||||||
|
|
||||||
|
if (
|
||||||
|
!page
|
||||||
|
|| analyzedUrls.has(page.analyzedUrl)
|
||||||
|
|| !isUrlAllowedByCrawlTargets(page.analyzedUrl, crawlTargets)
|
||||||
|
) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
analyzedUrls.add(page.analyzedUrl);
|
||||||
|
pages.push(page);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to transform Firecrawl document:', error);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const failedPages = [];
|
||||||
|
|
||||||
|
(firecrawlResult.errors || []).forEach((entry) => {
|
||||||
|
const failedUrl = normalizeCrawlUrl(
|
||||||
|
entry?.path || entry?.url || entry?.sourceURL || normalizedBaseUrl,
|
||||||
|
normalizedBaseUrl,
|
||||||
|
allowedHostnames,
|
||||||
|
) || normalizedBaseUrl;
|
||||||
|
|
||||||
|
if (!isUrlAllowedByCrawlTargets(failedUrl, crawlTargets)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
failedPages.push({
|
||||||
|
url: failedUrl,
|
||||||
|
error: entry?.error || entry?.message || 'Firecrawl could not fetch this page.',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
(firecrawlResult.robotsBlocked || []).forEach((entry) => {
|
||||||
|
const blockedUrl = normalizeCrawlUrl(
|
||||||
|
entry?.path || entry?.url || normalizedBaseUrl,
|
||||||
|
normalizedBaseUrl,
|
||||||
|
allowedHostnames,
|
||||||
|
) || normalizedBaseUrl;
|
||||||
|
|
||||||
|
if (!isUrlAllowedByCrawlTargets(blockedUrl, crawlTargets)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
failedPages.push({
|
||||||
|
url: blockedUrl,
|
||||||
|
error: 'Blocked by robots.txt during Firecrawl crawl.',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
provider: 'firecrawl',
|
||||||
|
pages,
|
||||||
|
failedPages,
|
||||||
|
discoveredInternalPages: Math.max((firecrawlResult.total || pages.length) - 1, 0),
|
||||||
|
firecrawlJob: {
|
||||||
|
crawlId: firecrawlResult.crawlId,
|
||||||
|
status: firecrawlResult.status,
|
||||||
|
total: firecrawlResult.total,
|
||||||
|
completed: firecrawlResult.completed,
|
||||||
|
creditsUsed: firecrawlResult.creditsUsed,
|
||||||
|
expiresAt: firecrawlResult.expiresAt,
|
||||||
|
failedPages: failedPages.length,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function crawlPages(baseUrl, requestedPages, crawlTargets = {}) {
|
||||||
|
const normalizedBaseUrl = normalizeUrl(baseUrl);
|
||||||
|
const allowedHostnames = new Set([new URL(normalizedBaseUrl).hostname.toLowerCase()]);
|
||||||
|
const seedUrls = buildSeedUrls(normalizedBaseUrl, crawlTargets);
|
||||||
|
const seedUrlSet = new Set(seedUrls);
|
||||||
const visitedUrls = new Set();
|
const visitedUrls = new Set();
|
||||||
const queuedUrls = new Set([normalizedBaseUrl]);
|
const queuedUrls = new Set(seedUrls);
|
||||||
const pendingUrls = [normalizedBaseUrl];
|
const pendingUrls = [...seedUrls];
|
||||||
const pages = [];
|
const pages = [];
|
||||||
const failedPages = [];
|
const failedPages = [];
|
||||||
let discoveredInternalPages = 0;
|
let discoveredInternalPages = 0;
|
||||||
@ -420,15 +738,29 @@ async function crawlPages(baseUrl, requestedPages) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const isBootstrapSeed = seedUrlSet.has(nextUrl) && nextUrl === normalizedBaseUrl;
|
||||||
|
|
||||||
|
if (!isBootstrapSeed && !isUrlAllowedByCrawlTargets(nextUrl, crawlTargets)) {
|
||||||
|
visitedUrls.add(nextUrl);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
visitedUrls.add(nextUrl);
|
visitedUrls.add(nextUrl);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const page = await fetchAnalyzedPage(nextUrl, allowedHostnames);
|
const page = await fetchAnalyzedPage(nextUrl, allowedHostnames);
|
||||||
visitedUrls.add(page.analyzedUrl);
|
visitedUrls.add(page.analyzedUrl);
|
||||||
queuedUrls.add(page.analyzedUrl);
|
queuedUrls.add(page.analyzedUrl);
|
||||||
|
|
||||||
|
if (isUrlAllowedByCrawlTargets(page.analyzedUrl, crawlTargets)) {
|
||||||
pages.push(page);
|
pages.push(page);
|
||||||
|
}
|
||||||
|
|
||||||
page.discoveredLinks.forEach((linkUrl) => {
|
page.discoveredLinks.forEach((linkUrl) => {
|
||||||
|
if (!isUrlAllowedByCrawlTargets(linkUrl, crawlTargets)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!visitedUrls.has(linkUrl) && !queuedUrls.has(linkUrl)) {
|
if (!visitedUrls.has(linkUrl) && !queuedUrls.has(linkUrl)) {
|
||||||
queuedUrls.add(linkUrl);
|
queuedUrls.add(linkUrl);
|
||||||
pendingUrls.push(linkUrl);
|
pendingUrls.push(linkUrl);
|
||||||
@ -523,30 +855,29 @@ function buildCrawlNotice({
|
|||||||
requestedPages,
|
requestedPages,
|
||||||
actualPagesAnalyzed,
|
actualPagesAnalyzed,
|
||||||
failedPages,
|
failedPages,
|
||||||
discoveredInternalPages,
|
crawlTargetSummary,
|
||||||
firecrawl,
|
|
||||||
}) {
|
}) {
|
||||||
if (requestedPages <= 1) {
|
const parts = [];
|
||||||
return null;
|
|
||||||
|
if (requestedPages > 1) {
|
||||||
|
parts.push(
|
||||||
|
`The crawl analyzed ${actualPagesAnalyzed} of ${requestedPages} requested page${requestedPages === 1 ? '' : 's'}.`,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const parts = [
|
if (actualPagesAnalyzed < requestedPages) {
|
||||||
`Advanced crawl analyzed ${actualPagesAnalyzed} of ${requestedPages} requested page${requestedPages === 1 ? '' : 's'}.`,
|
parts.push('Fewer matching crawlable pages were found than requested.');
|
||||||
];
|
}
|
||||||
|
|
||||||
if (discoveredInternalPages + 1 < requestedPages) {
|
if ((crawlTargetSummary?.includeTargets || []).length > 0 || (crawlTargetSummary?.excludeTargets || []).length > 0) {
|
||||||
parts.push('Fewer crawlable internal HTML pages were discovered than requested.');
|
parts.push('Custom include/exclude targeting was applied to this report.');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (failedPages > 0) {
|
if (failedPages > 0) {
|
||||||
parts.push(`${failedPages} page${failedPages === 1 ? '' : 's'} could not be fetched during the crawl.`);
|
parts.push(`${failedPages} page${failedPages === 1 ? '' : 's'} could not be fetched during the crawl.`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (firecrawl?.message) {
|
return parts.length > 0 ? parts.join(' ') : null;
|
||||||
parts.push(firecrawl.message);
|
|
||||||
}
|
|
||||||
|
|
||||||
return parts.join(' ');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildAggregateAnalysis({
|
function buildAggregateAnalysis({
|
||||||
@ -557,11 +888,14 @@ function buildAggregateAnalysis({
|
|||||||
discoveredInternalPages,
|
discoveredInternalPages,
|
||||||
failedPages,
|
failedPages,
|
||||||
firecrawl,
|
firecrawl,
|
||||||
|
crawlTargets,
|
||||||
|
provider = 'internal',
|
||||||
}) {
|
}) {
|
||||||
const homepage = pageAnalyses[0];
|
const homepage = pageAnalyses[0];
|
||||||
const finishedAt = new Date();
|
const finishedAt = new Date();
|
||||||
const aggregateSchema = buildAggregateSchema(pageAnalyses);
|
const aggregateSchema = buildAggregateSchema(pageAnalyses);
|
||||||
const aggregateSignals = buildAggregateSignals(pageAnalyses);
|
const aggregateSignals = buildAggregateSignals(pageAnalyses);
|
||||||
|
const crawlTargetSummary = summarizeCrawlTargets(crawlTargets);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
requestedUrl: normalizedUrl,
|
requestedUrl: normalizedUrl,
|
||||||
@ -581,7 +915,9 @@ function buildAggregateAnalysis({
|
|||||||
allowedPages: entitlements.maxPagesPerCrawl,
|
allowedPages: entitlements.maxPagesPerCrawl,
|
||||||
actualPagesAnalyzed: pageAnalyses.length,
|
actualPagesAnalyzed: pageAnalyses.length,
|
||||||
advancedCrawlEnabled: entitlements.canAdvancedCrawl,
|
advancedCrawlEnabled: entitlements.canAdvancedCrawl,
|
||||||
provider: 'internal',
|
provider,
|
||||||
|
includeTargets: crawlTargetSummary.includeTargets,
|
||||||
|
excludeTargets: crawlTargetSummary.excludeTargets,
|
||||||
},
|
},
|
||||||
crawlSummary: {
|
crawlSummary: {
|
||||||
pagesWithStructuredData: pageAnalyses.filter((page) => page.schema?.hasStructuredData).length,
|
pagesWithStructuredData: pageAnalyses.filter((page) => page.schema?.hasStructuredData).length,
|
||||||
@ -607,14 +943,13 @@ function buildAggregateAnalysis({
|
|||||||
requestedPages,
|
requestedPages,
|
||||||
actualPagesAnalyzed: pageAnalyses.length,
|
actualPagesAnalyzed: pageAnalyses.length,
|
||||||
failedPages: failedPages.length,
|
failedPages: failedPages.length,
|
||||||
discoveredInternalPages,
|
crawlTargetSummary,
|
||||||
firecrawl,
|
|
||||||
}),
|
}),
|
||||||
finishedAt,
|
finishedAt,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildFailureAnalysis(normalizedUrl, error, firecrawl) {
|
function buildFailureAnalysis(normalizedUrl, error, firecrawl, provider = 'internal') {
|
||||||
const isAxiosError = axios.isAxiosError(error);
|
const isAxiosError = axios.isAxiosError(error);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@ -633,6 +968,9 @@ function buildFailureAnalysis(normalizedUrl, error, firecrawl) {
|
|||||||
rdfa: { count: 0, detected: false },
|
rdfa: { count: 0, detected: false },
|
||||||
},
|
},
|
||||||
firecrawl,
|
firecrawl,
|
||||||
|
crawlPlan: {
|
||||||
|
provider,
|
||||||
|
},
|
||||||
error: isAxiosError
|
error: isAxiosError
|
||||||
? error.response
|
? error.response
|
||||||
? `Request failed with status ${error.response.status}`
|
? `Request failed with status ${error.response.status}`
|
||||||
@ -1123,7 +1461,8 @@ module.exports = class SitesService {
|
|||||||
const requestedPages = parseRequestedPages(data?.requestedPages);
|
const requestedPages = parseRequestedPages(data?.requestedPages);
|
||||||
const entitlements = ensureRequestedPagesAllowed(requestedPages, currentUser);
|
const entitlements = ensureRequestedPagesAllowed(requestedPages, currentUser);
|
||||||
const normalizedUrl = normalizeUrl(data?.url || data?.base_url);
|
const normalizedUrl = normalizeUrl(data?.url || data?.base_url);
|
||||||
const firecrawl = getFirecrawlScaffold({ requestedPages, entitlements });
|
const crawlTargets = normalizeCrawlTargets(data, normalizedUrl);
|
||||||
|
let firecrawl = getFirecrawlScaffold({ requestedPages, entitlements });
|
||||||
const requestedName =
|
const requestedName =
|
||||||
typeof data?.name === 'string' && data.name.trim()
|
typeof data?.name === 'string' && data.name.trim()
|
||||||
? data.name.trim()
|
? data.name.trim()
|
||||||
@ -1190,12 +1529,50 @@ module.exports = class SitesService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const crawlResult = await crawlPages(normalizedUrl, requestedPages);
|
let crawlResult;
|
||||||
|
|
||||||
|
if (firecrawl.shouldUseFirecrawl) {
|
||||||
|
try {
|
||||||
|
crawlResult = await crawlPagesWithFirecrawl(normalizedUrl, requestedPages, crawlTargets);
|
||||||
|
firecrawl = {
|
||||||
|
...firecrawl,
|
||||||
|
currentProvider: 'firecrawl',
|
||||||
|
crawlId: crawlResult.firecrawlJob?.crawlId || null,
|
||||||
|
crawlStatus: crawlResult.firecrawlJob?.status || null,
|
||||||
|
creditsUsed: crawlResult.firecrawlJob?.creditsUsed || 0,
|
||||||
|
message: crawlResult.firecrawlJob?.status === 'failed'
|
||||||
|
? 'Firecrawl ran for this paid request, but the crawl reported failures. Partial results are shown when available.'
|
||||||
|
: 'Firecrawl handled this paid request with sitemap-aware, JavaScript-rendered crawling.',
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Firecrawl crawl failed, falling back to internal crawl:', error);
|
||||||
|
firecrawl = {
|
||||||
|
...firecrawl,
|
||||||
|
currentProvider: 'internal',
|
||||||
|
status: 'fallback_internal_after_error',
|
||||||
|
shouldUseFirecrawl: false,
|
||||||
|
fallbackReason: error.message,
|
||||||
|
message: `Firecrawl was selected for this paid request but failed to run (${error.message}). The analyzer fell back to the built-in crawler.`,
|
||||||
|
};
|
||||||
|
crawlResult = await crawlPages(normalizedUrl, requestedPages, crawlTargets);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
crawlResult = await crawlPages(normalizedUrl, requestedPages, crawlTargets);
|
||||||
|
firecrawl = {
|
||||||
|
...firecrawl,
|
||||||
|
currentProvider: 'internal',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
const pageAnalyses = crawlResult.pages;
|
const pageAnalyses = crawlResult.pages;
|
||||||
|
|
||||||
if (pageAnalyses.length === 0) {
|
if (pageAnalyses.length === 0) {
|
||||||
const firstFailure = crawlResult.failedPages[0];
|
const firstFailure = crawlResult.failedPages[0];
|
||||||
const error = new Error(firstFailure?.error || 'Site analysis failed.');
|
const error = new Error(
|
||||||
|
crawlTargets.includeTargets.length > 0 || crawlTargets.excludeTargets.length > 0
|
||||||
|
? 'No pages matched the include/exclude targeting rules you entered.'
|
||||||
|
: firstFailure?.error || 'Site analysis failed.',
|
||||||
|
);
|
||||||
error.code = 400;
|
error.code = 400;
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
@ -1208,6 +1585,8 @@ module.exports = class SitesService {
|
|||||||
discoveredInternalPages: crawlResult.discoveredInternalPages,
|
discoveredInternalPages: crawlResult.discoveredInternalPages,
|
||||||
failedPages: crawlResult.failedPages,
|
failedPages: crawlResult.failedPages,
|
||||||
firecrawl,
|
firecrawl,
|
||||||
|
crawlTargets,
|
||||||
|
provider: crawlResult.provider || 'internal',
|
||||||
});
|
});
|
||||||
const homepage = pageAnalyses[0];
|
const homepage = pageAnalyses[0];
|
||||||
const finishedAt = analysis.finishedAt;
|
const finishedAt = analysis.finishedAt;
|
||||||
@ -1291,7 +1670,12 @@ module.exports = class SitesService {
|
|||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Site analysis failed:', error);
|
console.error('Site analysis failed:', error);
|
||||||
|
|
||||||
const failureAnalysis = buildFailureAnalysis(normalizedUrl, error, firecrawl);
|
const failureAnalysis = buildFailureAnalysis(
|
||||||
|
normalizedUrl,
|
||||||
|
error,
|
||||||
|
firecrawl,
|
||||||
|
firecrawl?.currentProvider || 'internal',
|
||||||
|
);
|
||||||
const failedAt = new Date();
|
const failedAt = new Date();
|
||||||
const failureTransaction = await db.sequelize.transaction();
|
const failureTransaction = await db.sequelize.transaction();
|
||||||
let failedSite;
|
let failedSite;
|
||||||
@ -1351,7 +1735,7 @@ module.exports = class SitesService {
|
|||||||
allowedPages: entitlements.maxPagesPerCrawl,
|
allowedPages: entitlements.maxPagesPerCrawl,
|
||||||
actualPagesAnalyzed: 0,
|
actualPagesAnalyzed: 0,
|
||||||
advancedCrawlEnabled: entitlements.canAdvancedCrawl,
|
advancedCrawlEnabled: entitlements.canAdvancedCrawl,
|
||||||
provider: 'internal',
|
provider: failureAnalysis.crawlPlan?.provider || 'internal',
|
||||||
},
|
},
|
||||||
entitlements,
|
entitlements,
|
||||||
},
|
},
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
import { hasPermission } from './userPermissions';
|
import { hasPermission } from './userPermissions';
|
||||||
|
|
||||||
export const BASIC_MAX_PAGES_PER_CRAWL = 1;
|
export const BASIC_MAX_PAGES_PER_CRAWL = 25;
|
||||||
export const ADVANCED_MAX_PAGES_PER_CRAWL = 25;
|
export const ADVANCED_MAX_PAGES_PER_CRAWL = 25;
|
||||||
export const ADVANCED_CRAWL_PERMISSION = 'USE_ADVANCED_CRAWL';
|
export const ADVANCED_CRAWL_PERMISSION = 'USE_ADVANCED_CRAWL';
|
||||||
export const PLATFORM_OUTPUT_PERMISSION = 'USE_PLATFORM_OUTPUT';
|
export const PLATFORM_OUTPUT_PERMISSION = 'USE_PLATFORM_OUTPUT';
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import React, { ReactElement } from 'react';
|
|||||||
import { ToastContainer, toast } from 'react-toastify';
|
import { ToastContainer, toast } from 'react-toastify';
|
||||||
import BaseButton from '../../components/BaseButton';
|
import BaseButton from '../../components/BaseButton';
|
||||||
import BaseButtons from '../../components/BaseButtons';
|
import BaseButtons from '../../components/BaseButtons';
|
||||||
|
import BaseIcon from '../../components/BaseIcon';
|
||||||
import CardBox from '../../components/CardBox';
|
import CardBox from '../../components/CardBox';
|
||||||
import FormField from '../../components/FormField';
|
import FormField from '../../components/FormField';
|
||||||
import LayoutAuthenticated from '../../layouts/Authenticated';
|
import LayoutAuthenticated from '../../layouts/Authenticated';
|
||||||
@ -38,6 +39,8 @@ type AnalysisPayload = {
|
|||||||
actualPagesAnalyzed?: number;
|
actualPagesAnalyzed?: number;
|
||||||
advancedCrawlEnabled?: boolean;
|
advancedCrawlEnabled?: boolean;
|
||||||
provider?: string;
|
provider?: string;
|
||||||
|
includeTargets?: string[];
|
||||||
|
excludeTargets?: string[];
|
||||||
};
|
};
|
||||||
crawlSummary?: {
|
crawlSummary?: {
|
||||||
pagesWithStructuredData?: number;
|
pagesWithStructuredData?: number;
|
||||||
@ -66,7 +69,14 @@ type AnalysisPayload = {
|
|||||||
status?: string;
|
status?: string;
|
||||||
wouldHandleJavascript?: boolean;
|
wouldHandleJavascript?: boolean;
|
||||||
wouldHandleSitemapDiscovery?: boolean;
|
wouldHandleSitemapDiscovery?: boolean;
|
||||||
shouldUseFirecrawlLater?: boolean;
|
availableForCurrentUser?: boolean;
|
||||||
|
shouldUseFirecrawl?: boolean;
|
||||||
|
usePaidOnly?: boolean;
|
||||||
|
currentProvider?: string;
|
||||||
|
crawlId?: string | null;
|
||||||
|
crawlStatus?: string | null;
|
||||||
|
creditsUsed?: number;
|
||||||
|
fallbackReason?: string;
|
||||||
message?: string;
|
message?: string;
|
||||||
};
|
};
|
||||||
platform?: {
|
platform?: {
|
||||||
@ -123,6 +133,25 @@ type ReportResponse = {
|
|||||||
error?: string;
|
error?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
type SetupSectionId = 'targeting' | 'options' | 'limits';
|
||||||
|
type ResultsTabId = 'overview' | 'pages' | 'recommendations' | 'delivery';
|
||||||
|
|
||||||
|
type SetupAccordionSectionProps = {
|
||||||
|
title: string;
|
||||||
|
description: string;
|
||||||
|
badge?: React.ReactNode;
|
||||||
|
isOpen: boolean;
|
||||||
|
onToggle: () => void;
|
||||||
|
children: React.ReactNode;
|
||||||
|
};
|
||||||
|
|
||||||
|
type ResultsTabButtonProps = {
|
||||||
|
label: string;
|
||||||
|
count?: number | string;
|
||||||
|
isActive: boolean;
|
||||||
|
onClick: () => void;
|
||||||
|
};
|
||||||
|
|
||||||
const PLATFORM_OPTIONS = [
|
const PLATFORM_OPTIONS = [
|
||||||
{ value: 'wordpress', label: 'WordPress' },
|
{ value: 'wordpress', label: 'WordPress' },
|
||||||
{ value: 'shopify', label: 'Shopify' },
|
{ value: 'shopify', label: 'Shopify' },
|
||||||
@ -132,10 +161,77 @@ const PLATFORM_OPTIONS = [
|
|||||||
|
|
||||||
const initialReport: ReportResponse | null = null;
|
const initialReport: ReportResponse | null = null;
|
||||||
|
|
||||||
|
const parseTargetLines = (value: string) => value
|
||||||
|
.split(/\r?\n/)
|
||||||
|
.map((entry) => entry.trim())
|
||||||
|
.filter(Boolean);
|
||||||
|
|
||||||
|
const SetupAccordionSection = ({
|
||||||
|
title,
|
||||||
|
description,
|
||||||
|
badge,
|
||||||
|
isOpen,
|
||||||
|
onToggle,
|
||||||
|
children,
|
||||||
|
}: SetupAccordionSectionProps) => (
|
||||||
|
<div className='overflow-hidden rounded-2xl border border-slate-200 bg-slate-50 dark:border-slate-700 dark:bg-slate-900/40'>
|
||||||
|
<button
|
||||||
|
type='button'
|
||||||
|
className='flex w-full items-start justify-between gap-4 px-5 py-4 text-left'
|
||||||
|
onClick={onToggle}
|
||||||
|
aria-expanded={isOpen}
|
||||||
|
>
|
||||||
|
<div>
|
||||||
|
<div className='flex flex-wrap items-center gap-3'>
|
||||||
|
<h3 className='text-base font-semibold text-slate-900 dark:text-white'>{title}</h3>
|
||||||
|
{badge && (
|
||||||
|
<span className='rounded-full bg-white px-3 py-1 text-xs font-semibold text-slate-600 dark:bg-slate-950/50 dark:text-slate-200'>
|
||||||
|
{badge}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<p className='mt-2 pr-6 text-sm leading-6 text-slate-500 dark:text-slate-300'>{description}</p>
|
||||||
|
</div>
|
||||||
|
<BaseIcon
|
||||||
|
path={isOpen ? icon.mdiChevronUp : icon.mdiChevronDown}
|
||||||
|
className='mt-1 text-slate-500 dark:text-slate-300'
|
||||||
|
/>
|
||||||
|
</button>
|
||||||
|
|
||||||
|
{isOpen && (
|
||||||
|
<div className='border-t border-slate-200 px-5 py-5 dark:border-slate-700'>
|
||||||
|
{children}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
|
||||||
|
const ResultsTabButton = ({ label, count, isActive, onClick }: ResultsTabButtonProps) => (
|
||||||
|
<button
|
||||||
|
type='button'
|
||||||
|
onClick={onClick}
|
||||||
|
className={`inline-flex items-center justify-center gap-2 rounded-xl px-4 py-3 text-sm font-semibold transition-colors ${isActive
|
||||||
|
? 'bg-white text-slate-900 shadow-sm dark:bg-slate-950 dark:text-white'
|
||||||
|
: 'text-slate-500 hover:text-slate-900 dark:text-slate-300 dark:hover:text-white'}`}
|
||||||
|
>
|
||||||
|
<span>{label}</span>
|
||||||
|
{count !== undefined && count !== null && (
|
||||||
|
<span className={`rounded-full px-2 py-0.5 text-xs ${isActive
|
||||||
|
? 'bg-slate-100 text-slate-700 dark:bg-slate-800 dark:text-slate-100'
|
||||||
|
: 'bg-slate-200 text-slate-600 dark:bg-slate-700 dark:text-slate-200'}`}
|
||||||
|
>
|
||||||
|
{count}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</button>
|
||||||
|
);
|
||||||
|
|
||||||
const SchemaAnalyzerPage = () => {
|
const SchemaAnalyzerPage = () => {
|
||||||
const { currentUser } = useAppSelector((state) => state.auth);
|
const { currentUser } = useAppSelector((state) => state.auth);
|
||||||
const [url, setUrl] = React.useState('');
|
const [url, setUrl] = React.useState('');
|
||||||
const [requestedPages, setRequestedPages] = React.useState(1);
|
const [requestedPages, setRequestedPages] = React.useState(1);
|
||||||
|
const [includeTargets, setIncludeTargets] = React.useState('');
|
||||||
|
const [excludeTargets, setExcludeTargets] = React.useState('');
|
||||||
const [selectedPlatform, setSelectedPlatform] = React.useState('wordpress');
|
const [selectedPlatform, setSelectedPlatform] = React.useState('wordpress');
|
||||||
const [emailTo, setEmailTo] = React.useState(currentUser?.email || '');
|
const [emailTo, setEmailTo] = React.useState(currentUser?.email || '');
|
||||||
const [report, setReport] = React.useState<ReportResponse | null>(initialReport);
|
const [report, setReport] = React.useState<ReportResponse | null>(initialReport);
|
||||||
@ -144,6 +240,12 @@ const SchemaAnalyzerPage = () => {
|
|||||||
const [emailingId, setEmailingId] = React.useState<string | null>(null);
|
const [emailingId, setEmailingId] = React.useState<string | null>(null);
|
||||||
const [exportingId, setExportingId] = React.useState<string | null>(null);
|
const [exportingId, setExportingId] = React.useState<string | null>(null);
|
||||||
const [isCheckingPlatformOutput, setIsCheckingPlatformOutput] = React.useState(false);
|
const [isCheckingPlatformOutput, setIsCheckingPlatformOutput] = React.useState(false);
|
||||||
|
const [openSections, setOpenSections] = React.useState<Record<SetupSectionId, boolean>>({
|
||||||
|
targeting: true,
|
||||||
|
options: false,
|
||||||
|
limits: false,
|
||||||
|
});
|
||||||
|
const [activeResultsTab, setActiveResultsTab] = React.useState<ResultsTabId>('overview');
|
||||||
|
|
||||||
React.useEffect(() => {
|
React.useEffect(() => {
|
||||||
if (currentUser?.email) {
|
if (currentUser?.email) {
|
||||||
@ -151,6 +253,12 @@ const SchemaAnalyzerPage = () => {
|
|||||||
}
|
}
|
||||||
}, [currentUser?.email]);
|
}, [currentUser?.email]);
|
||||||
|
|
||||||
|
React.useEffect(() => {
|
||||||
|
if (report?.analysis) {
|
||||||
|
setActiveResultsTab('overview');
|
||||||
|
}
|
||||||
|
}, [report?.analysis?.analyzedUrl, report?.analysis?.fetchedAt]);
|
||||||
|
|
||||||
const notify = React.useCallback((type: 'success' | 'error' | 'info', message: string) => {
|
const notify = React.useCallback((type: 'success' | 'error' | 'info', message: string) => {
|
||||||
toast(message, { type, position: 'bottom-center' });
|
toast(message, { type, position: 'bottom-center' });
|
||||||
}, []);
|
}, []);
|
||||||
@ -160,18 +268,34 @@ const SchemaAnalyzerPage = () => {
|
|||||||
[currentUser],
|
[currentUser],
|
||||||
);
|
);
|
||||||
const entitlements = report?.entitlements || report?.analysis?.entitlements || fallbackEntitlements;
|
const entitlements = report?.entitlements || report?.analysis?.entitlements || fallbackEntitlements;
|
||||||
const maxPagesPerCrawl = entitlements?.maxPagesPerCrawl || 1;
|
const maxPagesPerCrawl = entitlements?.maxPagesPerCrawl || fallbackEntitlements.maxPagesPerCrawl || 25;
|
||||||
const recommendations = report?.recommendations || [];
|
const recommendations = report?.recommendations || [];
|
||||||
const exportableRecommendations = recommendations.filter(
|
const exportableRecommendations = recommendations.filter(
|
||||||
(recommendation) => recommendation.suggested_schema,
|
(recommendation) => recommendation.suggested_schema,
|
||||||
);
|
);
|
||||||
|
const crawlPlan = report?.analysis?.crawlPlan;
|
||||||
const isRequestedPagesOverLimit = requestedPages > maxPagesPerCrawl;
|
const isRequestedPagesOverLimit = requestedPages > maxPagesPerCrawl;
|
||||||
const firecrawlStatus = report?.analysis?.firecrawl || {
|
const draftIncludeTargets = React.useMemo(() => parseTargetLines(includeTargets), [includeTargets]);
|
||||||
provider: 'firecrawl',
|
const draftExcludeTargets = React.useMemo(() => parseTargetLines(excludeTargets), [excludeTargets]);
|
||||||
configured: false,
|
const appliedIncludeTargets = crawlPlan?.includeTargets || draftIncludeTargets;
|
||||||
wouldHandleJavascript: true,
|
const appliedExcludeTargets = crawlPlan?.excludeTargets || draftExcludeTargets;
|
||||||
wouldHandleSitemapDiscovery: true,
|
const analyzedPages = report?.analysis?.pages || [];
|
||||||
message: 'Firecrawl scaffold is wired in code, but this environment still needs a FIRECRAWL_API_KEY before activation.',
|
const failedPages = report?.analysis?.failedPages || [];
|
||||||
|
const jsonLdTypes = report?.analysis?.schema?.jsonLd?.types || [];
|
||||||
|
const invalidJsonLdBlocks = report?.analysis?.schema?.jsonLd?.invalidBlocks || [];
|
||||||
|
const hasTargetingRules = appliedIncludeTargets.length > 0 || appliedExcludeTargets.length > 0;
|
||||||
|
const selectedPlatformLabel = PLATFORM_OPTIONS.find(
|
||||||
|
(platformOption) => platformOption.value === selectedPlatform,
|
||||||
|
)?.label || 'Custom / Other';
|
||||||
|
const analyzedTimestamp = report?.analysis?.fetchedAt
|
||||||
|
? new Date(report.analysis.fetchedAt).toLocaleString()
|
||||||
|
: null;
|
||||||
|
|
||||||
|
const toggleSection = (section: SetupSectionId) => {
|
||||||
|
setOpenSections((currentSections) => ({
|
||||||
|
...currentSections,
|
||||||
|
[section]: !currentSections[section],
|
||||||
|
}));
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleAnalyze = async () => {
|
const handleAnalyze = async () => {
|
||||||
@ -183,7 +307,7 @@ const SchemaAnalyzerPage = () => {
|
|||||||
if (isRequestedPagesOverLimit) {
|
if (isRequestedPagesOverLimit) {
|
||||||
notify(
|
notify(
|
||||||
'error',
|
'error',
|
||||||
`Your current plan allows up to ${maxPagesPerCrawl} page${maxPagesPerCrawl === 1 ? '' : 's'} per crawl. Upgrade to Advanced Crawl to go beyond that limit.`,
|
`This analyzer supports up to ${maxPagesPerCrawl} page${maxPagesPerCrawl === 1 ? '' : 's'} per crawl. Reduce the page count to continue.`,
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -193,6 +317,8 @@ const SchemaAnalyzerPage = () => {
|
|||||||
const response = await axios.post<ReportResponse>('/sites/analyze', {
|
const response = await axios.post<ReportResponse>('/sites/analyze', {
|
||||||
url: url.trim(),
|
url: url.trim(),
|
||||||
requestedPages,
|
requestedPages,
|
||||||
|
includeTargets,
|
||||||
|
excludeTargets,
|
||||||
});
|
});
|
||||||
setReport(response.data);
|
setReport(response.data);
|
||||||
|
|
||||||
@ -352,8 +478,6 @@ const SchemaAnalyzerPage = () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const crawlPlan = report?.analysis?.crawlPlan;
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<Head>
|
<Head>
|
||||||
@ -369,16 +493,17 @@ const SchemaAnalyzerPage = () => {
|
|||||||
</SectionTitleLineWithButton>
|
</SectionTitleLineWithButton>
|
||||||
|
|
||||||
<CardBox className='mb-6'>
|
<CardBox className='mb-6'>
|
||||||
<div className='grid gap-6 lg:grid-cols-[1.2fr,0.8fr]'>
|
<div className='space-y-6'>
|
||||||
<div>
|
<div>
|
||||||
<h2 className='text-xl font-semibold text-slate-900 dark:text-white'>Analyze a customer site</h2>
|
<h2 className='text-xl font-semibold text-slate-900 dark:text-white'>Analyze a customer site</h2>
|
||||||
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
|
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
|
||||||
Enter a domain or full URL. The app will detect the platform, crawl up to your allowed page limit,
|
Enter a domain or full URL, choose how many pages to review, and optionally focus the report on the
|
||||||
inspect structured data across the discovered pages, generate rules-based schema recommendations,
|
folders, categories, or pages that matter most. This setup keeps the page cleaner on mobile while still
|
||||||
and prepare developer-ready code snippets.
|
supporting up to {maxPagesPerCrawl} pages per crawl.
|
||||||
</p>
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div className='mt-6'>
|
<div className='grid gap-4 lg:grid-cols-[minmax(0,1.35fr),minmax(240px,0.65fr)]'>
|
||||||
<FormField
|
<FormField
|
||||||
label='Website URL'
|
label='Website URL'
|
||||||
labelFor='schema-site-url'
|
labelFor='schema-site-url'
|
||||||
@ -398,9 +523,8 @@ const SchemaAnalyzerPage = () => {
|
|||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
</FormField>
|
</FormField>
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className='grid gap-4 md:grid-cols-2'>
|
<div className='flex flex-col gap-4'>
|
||||||
<FormField
|
<FormField
|
||||||
label='Pages to analyze'
|
label='Pages to analyze'
|
||||||
labelFor='schema-requested-pages'
|
labelFor='schema-requested-pages'
|
||||||
@ -419,6 +543,128 @@ const SchemaAnalyzerPage = () => {
|
|||||||
/>
|
/>
|
||||||
</FormField>
|
</FormField>
|
||||||
|
|
||||||
|
<BaseButtons type='justify-start' className='mb-0' mb='mb-0'>
|
||||||
|
<BaseButton
|
||||||
|
color='info'
|
||||||
|
icon={icon.mdiMagnify}
|
||||||
|
label={isAnalyzing ? 'Analyzing…' : 'Analyze site'}
|
||||||
|
disabled={isAnalyzing || isRequestedPagesOverLimit}
|
||||||
|
onClick={() => {
|
||||||
|
handleAnalyze().catch(() => null);
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</BaseButtons>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{isRequestedPagesOverLimit && (
|
||||||
|
<div className='rounded-2xl border border-amber-300 bg-amber-50 p-3 text-sm text-amber-800 dark:border-amber-500/40 dark:bg-amber-500/10 dark:text-amber-100'>
|
||||||
|
You requested {requestedPages} pages, but this analyzer is capped at {maxPagesPerCrawl}. Reduce the page count to continue.
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<div className='space-y-3'>
|
||||||
|
<SetupAccordionSection
|
||||||
|
title='Target pages'
|
||||||
|
description='Include only the folders or categories you want reviewed, and exclude pages you do not want reflected in the final report.'
|
||||||
|
badge={hasTargetingRules ? `${appliedIncludeTargets.length} include · ${appliedExcludeTargets.length} exclude` : 'Optional'}
|
||||||
|
isOpen={openSections.targeting}
|
||||||
|
onToggle={() => toggleSection('targeting')}
|
||||||
|
>
|
||||||
|
<div className='grid gap-4 md:grid-cols-2'>
|
||||||
|
<FormField
|
||||||
|
label='Include only these pages or folders'
|
||||||
|
labelFor='schema-include-targets'
|
||||||
|
help='Optional. Enter one full URL or path per line, for example /blog or /services/seo.'
|
||||||
|
hasTextareaHeight
|
||||||
|
>
|
||||||
|
<textarea
|
||||||
|
id='schema-include-targets'
|
||||||
|
name='schema-include-targets'
|
||||||
|
rows={4}
|
||||||
|
placeholder={`/blog
|
||||||
|
/services
|
||||||
|
https://example.com/pricing`}
|
||||||
|
value={includeTargets}
|
||||||
|
onChange={(event) => setIncludeTargets(event.target.value)}
|
||||||
|
/>
|
||||||
|
</FormField>
|
||||||
|
|
||||||
|
<FormField
|
||||||
|
label='Exclude these pages from the report'
|
||||||
|
labelFor='schema-exclude-targets'
|
||||||
|
help='Optional. Enter one full URL or path per line to leave pages or sections out of the final report.'
|
||||||
|
hasTextareaHeight
|
||||||
|
>
|
||||||
|
<textarea
|
||||||
|
id='schema-exclude-targets'
|
||||||
|
name='schema-exclude-targets'
|
||||||
|
rows={4}
|
||||||
|
placeholder={`/tag
|
||||||
|
/cart
|
||||||
|
/thank-you`}
|
||||||
|
value={excludeTargets}
|
||||||
|
onChange={(event) => setExcludeTargets(event.target.value)}
|
||||||
|
/>
|
||||||
|
</FormField>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className='grid gap-3 lg:grid-cols-2'>
|
||||||
|
<div className='rounded-xl border border-slate-200 bg-white px-4 py-3 text-sm dark:border-slate-700 dark:bg-slate-950/40'>
|
||||||
|
<div className='font-semibold text-slate-900 dark:text-white'>Accepted formats</div>
|
||||||
|
<div className='mt-1 text-slate-500 dark:text-slate-300'>Full URLs or path rules like /blog, /pricing, or /category/shoes.</div>
|
||||||
|
</div>
|
||||||
|
<div className='rounded-xl border border-slate-200 bg-white px-4 py-3 text-sm dark:border-slate-700 dark:bg-slate-950/40'>
|
||||||
|
<div className='font-semibold text-slate-900 dark:text-white'>Report behavior</div>
|
||||||
|
<div className='mt-1 text-slate-500 dark:text-slate-300'>Excluded pages stay out of the analyzed page set and the final recommendations.</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{hasTargetingRules && (
|
||||||
|
<div className='mt-4 space-y-3'>
|
||||||
|
{appliedIncludeTargets.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<div className='mb-2 text-xs font-semibold uppercase tracking-wide text-slate-500'>Include targets</div>
|
||||||
|
<div className='flex flex-wrap gap-2'>
|
||||||
|
{appliedIncludeTargets.map((target) => (
|
||||||
|
<span
|
||||||
|
key={`include-${target}`}
|
||||||
|
className='rounded-full bg-emerald-100 px-3 py-1 text-xs font-semibold text-emerald-700 dark:bg-emerald-500/10 dark:text-emerald-200'
|
||||||
|
>
|
||||||
|
{target}
|
||||||
|
</span>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{appliedExcludeTargets.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<div className='mb-2 text-xs font-semibold uppercase tracking-wide text-slate-500'>Exclude targets</div>
|
||||||
|
<div className='flex flex-wrap gap-2'>
|
||||||
|
{appliedExcludeTargets.map((target) => (
|
||||||
|
<span
|
||||||
|
key={`exclude-${target}`}
|
||||||
|
className='rounded-full bg-amber-100 px-3 py-1 text-xs font-semibold text-amber-700 dark:bg-amber-500/10 dark:text-amber-200'
|
||||||
|
>
|
||||||
|
{target}
|
||||||
|
</span>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</SetupAccordionSection>
|
||||||
|
|
||||||
|
<SetupAccordionSection
|
||||||
|
title='Report options'
|
||||||
|
description='Choose the target platform for Step 4 output. This does not change what pages are reviewed; it only prepares the preferred output format for a later export step.'
|
||||||
|
badge={selectedPlatformLabel}
|
||||||
|
isOpen={openSections.options}
|
||||||
|
onToggle={() => toggleSection('options')}
|
||||||
|
>
|
||||||
|
<div className='grid gap-4 lg:grid-cols-[minmax(0,1fr),minmax(220px,0.7fr)]'>
|
||||||
<FormField
|
<FormField
|
||||||
label='Step 4 target platform'
|
label='Step 4 target platform'
|
||||||
labelFor='schema-platform-output'
|
labelFor='schema-platform-output'
|
||||||
@ -439,198 +685,101 @@ const SchemaAnalyzerPage = () => {
|
|||||||
))}
|
))}
|
||||||
</select>
|
</select>
|
||||||
</FormField>
|
</FormField>
|
||||||
</div>
|
|
||||||
|
|
||||||
{isRequestedPagesOverLimit && (
|
<div className='rounded-xl border border-slate-200 bg-white p-4 text-sm dark:border-slate-700 dark:bg-slate-950/40'>
|
||||||
<div className='mt-4 rounded-2xl border border-amber-300 bg-amber-50 p-3 text-sm text-amber-800 dark:border-amber-500/40 dark:bg-amber-500/10 dark:text-amber-100'>
|
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Selected output target</div>
|
||||||
You requested {requestedPages} pages, but this account is capped at {maxPagesPerCrawl}. Upgrade to
|
<div className='mt-2 text-base font-semibold text-slate-900 dark:text-white'>{selectedPlatformLabel}</div>
|
||||||
Advanced Crawl to raise that limit.
|
<p className='mt-2 text-slate-500 dark:text-slate-300'>
|
||||||
</div>
|
Keep this aligned with the CMS or platform your developer will implement against.
|
||||||
)}
|
|
||||||
|
|
||||||
<BaseButtons type='justify-start' className='mt-4'>
|
|
||||||
<BaseButton
|
|
||||||
color='info'
|
|
||||||
icon={icon.mdiMagnify}
|
|
||||||
label={isAnalyzing ? 'Analyzing…' : 'Analyze site'}
|
|
||||||
disabled={isAnalyzing || isRequestedPagesOverLimit}
|
|
||||||
onClick={() => {
|
|
||||||
handleAnalyze().catch(() => null);
|
|
||||||
}}
|
|
||||||
/>
|
|
||||||
<BaseButton
|
|
||||||
color='whiteDark'
|
|
||||||
outline
|
|
||||||
icon={icon.mdiContentCopy}
|
|
||||||
label='Copy all code'
|
|
||||||
disabled={exportableRecommendations.length === 0}
|
|
||||||
onClick={() => {
|
|
||||||
const combined = exportableRecommendations
|
|
||||||
.map((recommendation) => recommendation.suggested_schema)
|
|
||||||
.filter(Boolean)
|
|
||||||
.join('\n\n');
|
|
||||||
navigator.clipboard
|
|
||||||
.writeText(combined)
|
|
||||||
.then(() => notify('success', 'All schema code copied to clipboard.'))
|
|
||||||
.catch((error) => {
|
|
||||||
console.error('Copy all code failed:', error);
|
|
||||||
notify('error', 'Unable to copy the combined code.');
|
|
||||||
});
|
|
||||||
}}
|
|
||||||
/>
|
|
||||||
<BaseButton
|
|
||||||
color={entitlements?.canPlatformOutput ? 'success' : 'warning'}
|
|
||||||
outline={!entitlements?.canPlatformOutput}
|
|
||||||
icon={entitlements?.canPlatformOutput ? icon.mdiCodeBraces : icon.mdiLockOutline}
|
|
||||||
label={isCheckingPlatformOutput
|
|
||||||
? 'Checking…'
|
|
||||||
: entitlements?.canPlatformOutput
|
|
||||||
? 'Check Step 4 output'
|
|
||||||
: 'Premium Step 4'}
|
|
||||||
disabled={!report?.site?.id || isCheckingPlatformOutput}
|
|
||||||
onClick={() => {
|
|
||||||
handlePlatformOutputCheck().catch(() => null);
|
|
||||||
}}
|
|
||||||
/>
|
|
||||||
</BaseButtons>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className='space-y-4'>
|
|
||||||
<div className='rounded-2xl border border-slate-200 bg-slate-50 p-5 dark:border-slate-700 dark:bg-slate-900/40'>
|
|
||||||
<div className='flex items-start justify-between gap-4'>
|
|
||||||
<div>
|
|
||||||
<h3 className='text-base font-semibold text-slate-900 dark:text-white'>Paywall status</h3>
|
|
||||||
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
|
|
||||||
Advanced Crawl is now enforced and active. Premium still reserves Step 4 platform output. Firecrawl is scaffolded for sitemap + JS-rendered crawling, but not activated yet.
|
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
<span className={`rounded-full px-3 py-1 text-xs font-semibold ${entitlements?.canPlatformOutput
|
|
||||||
? 'bg-emerald-100 text-emerald-700 dark:bg-emerald-500/10 dark:text-emerald-200'
|
|
||||||
: 'bg-slate-200 text-slate-700 dark:bg-slate-700 dark:text-slate-100'}`}>
|
|
||||||
{entitlements?.canPlatformOutput ? 'Premium access' : 'Basic access'}
|
|
||||||
</span>
|
|
||||||
</div>
|
</div>
|
||||||
|
</SetupAccordionSection>
|
||||||
|
|
||||||
<div className='mt-4 space-y-3 text-sm'>
|
<SetupAccordionSection
|
||||||
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
|
title='Plan details'
|
||||||
<span className='text-slate-600 dark:text-slate-300'>Advanced crawl entitlement</span>
|
description='A simple summary of what this analyzer includes for the current user.'
|
||||||
<span className={`font-semibold ${entitlements?.canAdvancedCrawl ? 'text-emerald-600 dark:text-emerald-300' : 'text-amber-700 dark:text-amber-300'}`}>
|
badge={`${maxPagesPerCrawl} pages per crawl`}
|
||||||
{entitlements?.canAdvancedCrawl ? 'Unlocked' : 'Locked'}
|
isOpen={openSections.limits}
|
||||||
</span>
|
onToggle={() => toggleSection('limits')}
|
||||||
|
>
|
||||||
|
<div className='grid gap-3 md:grid-cols-3'>
|
||||||
|
<div className='rounded-xl border border-slate-200 bg-white px-4 py-3 text-sm dark:border-slate-700 dark:bg-slate-950/40'>
|
||||||
|
<div className='text-slate-500 dark:text-slate-300'>Access level</div>
|
||||||
|
<div className='mt-1 font-semibold text-slate-900 dark:text-white'>
|
||||||
|
{entitlements?.canPlatformOutput ? 'Premium' : 'Basic'}
|
||||||
</div>
|
</div>
|
||||||
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
|
|
||||||
<span className='text-slate-600 dark:text-slate-300'>Max pages per crawl</span>
|
|
||||||
<span className='font-semibold text-slate-900 dark:text-white'>{maxPagesPerCrawl}</span>
|
|
||||||
</div>
|
</div>
|
||||||
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
|
<div className='rounded-xl border border-slate-200 bg-white px-4 py-3 text-sm dark:border-slate-700 dark:bg-slate-950/40'>
|
||||||
<span className='text-slate-600 dark:text-slate-300'>Platform-specific Step 4 output</span>
|
<div className='text-slate-500 dark:text-slate-300'>Pages allowed</div>
|
||||||
<span className={`font-semibold ${entitlements?.canPlatformOutput ? 'text-emerald-600 dark:text-emerald-300' : 'text-amber-700 dark:text-amber-300'}`}>
|
<div className='mt-1 font-semibold text-slate-900 dark:text-white'>
|
||||||
{entitlements?.canPlatformOutput ? 'Reserved' : 'Premium only'}
|
Up to {maxPagesPerCrawl} pages per crawl
|
||||||
</span>
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className='rounded-xl border border-slate-200 bg-white px-4 py-3 text-sm dark:border-slate-700 dark:bg-slate-950/40'>
|
||||||
|
<div className='text-slate-500 dark:text-slate-300'>Step 4 output</div>
|
||||||
|
<div className='mt-1 font-semibold text-slate-900 dark:text-white'>
|
||||||
|
{entitlements?.canPlatformOutput ? 'Available to check' : 'Premium only'}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
</SetupAccordionSection>
|
||||||
<div className='rounded-2xl border border-slate-200 bg-slate-50 p-5 dark:border-slate-700 dark:bg-slate-900/40'>
|
|
||||||
<div className='flex items-start justify-between gap-4'>
|
|
||||||
<div>
|
|
||||||
<h3 className='text-base font-semibold text-slate-900 dark:text-white'>Firecrawl scaffold</h3>
|
|
||||||
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
|
|
||||||
Sitemap discovery and JS-rendered crawl are planned through Firecrawl. This environment is currently using the built-in crawler only.
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
<span className={`rounded-full px-3 py-1 text-xs font-semibold ${firecrawlStatus?.configured
|
|
||||||
? 'bg-emerald-100 text-emerald-700 dark:bg-emerald-500/10 dark:text-emerald-200'
|
|
||||||
: 'bg-amber-100 text-amber-700 dark:bg-amber-500/10 dark:text-amber-200'}`}>
|
|
||||||
{firecrawlStatus?.configured ? 'Key detected' : 'API key needed'}
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className='mt-4 space-y-3 text-sm'>
|
|
||||||
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
|
|
||||||
<span className='text-slate-600 dark:text-slate-300'>Current crawl provider</span>
|
|
||||||
<span className='font-semibold text-slate-900 dark:text-white'>
|
|
||||||
{report?.analysis?.crawlPlan?.provider || 'internal'}
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
|
|
||||||
<span className='text-slate-600 dark:text-slate-300'>Sitemap crawl path</span>
|
|
||||||
<span className='font-semibold text-slate-900 dark:text-white'>
|
|
||||||
{firecrawlStatus?.wouldHandleSitemapDiscovery ? 'Scaffolded' : 'Not scaffolded'}
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
<div className='flex items-center justify-between rounded-xl border border-slate-200 bg-white px-3 py-2 dark:border-slate-700 dark:bg-slate-950/40'>
|
|
||||||
<span className='text-slate-600 dark:text-slate-300'>JS-rendered crawl path</span>
|
|
||||||
<span className='font-semibold text-slate-900 dark:text-white'>
|
|
||||||
{firecrawlStatus?.wouldHandleJavascript ? 'Scaffolded' : 'Not scaffolded'}
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{firecrawlStatus?.message && (
|
|
||||||
<div className='mt-4 rounded-xl border border-sky-200 bg-sky-50 px-3 py-3 text-sm text-sky-800 dark:border-sky-500/30 dark:bg-sky-500/10 dark:text-sky-100'>
|
|
||||||
{firecrawlStatus.message}
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className='rounded-2xl border border-slate-200 bg-slate-50 p-5 dark:border-slate-700 dark:bg-slate-900/40'>
|
|
||||||
<h3 className='text-base font-semibold text-slate-900 dark:text-white'>Delivery actions</h3>
|
|
||||||
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
|
|
||||||
Export a developer handoff file or email the latest recommendations directly.
|
|
||||||
</p>
|
|
||||||
<div className='mt-4'>
|
|
||||||
<FormField label='Developer email' labelFor='schema-email-recipient'>
|
|
||||||
<input
|
|
||||||
id='schema-email-recipient'
|
|
||||||
name='schema-email-recipient'
|
|
||||||
placeholder='developer@example.com'
|
|
||||||
value={emailTo}
|
|
||||||
onChange={(event) => setEmailTo(event.target.value)}
|
|
||||||
/>
|
|
||||||
</FormField>
|
|
||||||
</div>
|
|
||||||
<BaseButtons type='justify-start'>
|
|
||||||
<BaseButton
|
|
||||||
color='success'
|
|
||||||
icon={icon.mdiDownload}
|
|
||||||
label={isExportingAll ? 'Exporting…' : 'Export all'}
|
|
||||||
disabled={!report?.site?.id || isExportingAll}
|
|
||||||
onClick={() => {
|
|
||||||
handleExportAll().catch(() => null);
|
|
||||||
}}
|
|
||||||
/>
|
|
||||||
<BaseButton
|
|
||||||
color='warning'
|
|
||||||
icon={icon.mdiEmailOutline}
|
|
||||||
label={emailingId === 'all' ? 'Emailing…' : 'Email all'}
|
|
||||||
disabled={!report?.site?.id || emailingId === 'all'}
|
|
||||||
onClick={() => {
|
|
||||||
handleEmailCode().catch(() => null);
|
|
||||||
}}
|
|
||||||
/>
|
|
||||||
</BaseButtons>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</CardBox>
|
</CardBox>
|
||||||
|
|
||||||
{report?.analysis && (
|
{report?.analysis && (
|
||||||
<div className='grid gap-6 xl:grid-cols-[0.9fr,1.1fr]'>
|
<CardBox>
|
||||||
<CardBox className='h-full'>
|
<div className='flex flex-col gap-3 lg:flex-row lg:items-start lg:justify-between'>
|
||||||
<div className='flex flex-col gap-2 sm:flex-row sm:items-start sm:justify-between'>
|
|
||||||
<div>
|
<div>
|
||||||
<h3 className='text-lg font-semibold text-slate-900 dark:text-white'>Site findings</h3>
|
<h3 className='text-lg font-semibold text-slate-900 dark:text-white'>Analysis results</h3>
|
||||||
<p className='mt-1 text-sm text-slate-500 dark:text-slate-300'>
|
<p className='mt-1 text-sm text-slate-500 dark:text-slate-300'>
|
||||||
High-level crawl and structured-data summary from the latest analysis run.
|
Review the latest crawl summary, page-level findings, prioritized recommendations, and delivery actions from one mobile-friendly workspace.
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
<div className='rounded-full bg-slate-100 px-3 py-1 text-xs font-semibold uppercase tracking-wide text-slate-700 dark:bg-slate-800 dark:text-slate-200'>
|
<div className='flex flex-wrap items-center gap-2'>
|
||||||
|
<span className='rounded-full bg-slate-100 px-3 py-1 text-xs font-semibold uppercase tracking-wide text-slate-700 dark:bg-slate-800 dark:text-slate-200'>
|
||||||
{report.analysis.platform?.label || 'Unknown platform'}
|
{report.analysis.platform?.label || 'Unknown platform'}
|
||||||
|
</span>
|
||||||
|
{analyzedTimestamp && (
|
||||||
|
<span className='rounded-full bg-sky-50 px-3 py-1 text-xs font-semibold text-sky-700 dark:bg-sky-500/10 dark:text-sky-200'>
|
||||||
|
{analyzedTimestamp}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className='mt-5 grid gap-4 md:grid-cols-2'>
|
<div className='mt-5 overflow-x-auto'>
|
||||||
|
<div className='inline-flex min-w-full gap-2 rounded-2xl bg-slate-100 p-1 dark:bg-slate-800/70'>
|
||||||
|
<ResultsTabButton
|
||||||
|
label='Overview'
|
||||||
|
isActive={activeResultsTab === 'overview'}
|
||||||
|
onClick={() => setActiveResultsTab('overview')}
|
||||||
|
/>
|
||||||
|
<ResultsTabButton
|
||||||
|
label='Pages'
|
||||||
|
count={analyzedPages.length + failedPages.length}
|
||||||
|
isActive={activeResultsTab === 'pages'}
|
||||||
|
onClick={() => setActiveResultsTab('pages')}
|
||||||
|
/>
|
||||||
|
<ResultsTabButton
|
||||||
|
label='Recommendations'
|
||||||
|
count={recommendations.length}
|
||||||
|
isActive={activeResultsTab === 'recommendations'}
|
||||||
|
onClick={() => setActiveResultsTab('recommendations')}
|
||||||
|
/>
|
||||||
|
<ResultsTabButton
|
||||||
|
label='Delivery'
|
||||||
|
isActive={activeResultsTab === 'delivery'}
|
||||||
|
onClick={() => setActiveResultsTab('delivery')}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className='mt-6'>
|
||||||
|
{activeResultsTab === 'overview' && (
|
||||||
|
<div className='space-y-5'>
|
||||||
|
<div className='grid gap-4 md:grid-cols-2 xl:grid-cols-3'>
|
||||||
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||||
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Analyzed URL</div>
|
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Analyzed URL</div>
|
||||||
<div className='mt-2 break-all text-sm text-slate-900 dark:text-white'>
|
<div className='mt-2 break-all text-sm text-slate-900 dark:text-white'>
|
||||||
@ -643,6 +792,18 @@ const SchemaAnalyzerPage = () => {
|
|||||||
{report.analysis.pageTitle || 'No title found'}
|
{report.analysis.pageTitle || 'No title found'}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||||
|
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Recommendations</div>
|
||||||
|
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
|
||||||
|
{recommendations.length} total
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||||
|
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Pages analyzed</div>
|
||||||
|
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
|
||||||
|
{crawlPlan?.actualPagesAnalyzed || analyzedPages.length || 0}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||||
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Pages with structured data</div>
|
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Pages with structured data</div>
|
||||||
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
|
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
|
||||||
@ -658,19 +819,20 @@ const SchemaAnalyzerPage = () => {
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
{crawlPlan && (
|
{crawlPlan && (
|
||||||
<div className='mt-5 rounded-2xl border border-sky-200 bg-sky-50 p-4 text-sm text-sky-900 dark:border-sky-500/30 dark:bg-sky-500/10 dark:text-sky-100'>
|
<div className='rounded-2xl border border-sky-200 bg-sky-50 p-4 text-sm text-sky-900 dark:border-sky-500/30 dark:bg-sky-500/10 dark:text-sky-100'>
|
||||||
<div className='font-semibold'>Crawl summary</div>
|
<div className='font-semibold'>Crawl summary</div>
|
||||||
<div className='mt-2 flex flex-col gap-1'>
|
<div className='mt-2 grid gap-2 md:grid-cols-2'>
|
||||||
<span>Requested pages: {crawlPlan.requestedPages || 1}</span>
|
<span>Requested pages: {crawlPlan.requestedPages || 1}</span>
|
||||||
<span>Plan limit: {crawlPlan.allowedPages || maxPagesPerCrawl}</span>
|
<span>Plan limit: {crawlPlan.allowedPages || maxPagesPerCrawl}</span>
|
||||||
<span>Pages analyzed: {crawlPlan.actualPagesAnalyzed || 0}</span>
|
<span>Pages analyzed: {crawlPlan.actualPagesAnalyzed || 0}</span>
|
||||||
|
<span>Failed page fetches: {report.analysis.crawlSummary?.failedPages ?? failedPages.length}</span>
|
||||||
</div>
|
</div>
|
||||||
{report.analysis.notice && <div className='mt-3'>{report.analysis.notice}</div>}
|
{report.analysis.notice && <div className='mt-3'>{report.analysis.notice}</div>}
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{report.analysis.crawlSummary && (
|
{report.analysis.crawlSummary && (
|
||||||
<div className='mt-5 grid gap-4 md:grid-cols-2'>
|
<div className='grid gap-4 md:grid-cols-2'>
|
||||||
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||||
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Pages without structured data</div>
|
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Pages without structured data</div>
|
||||||
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
|
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
|
||||||
@ -678,19 +840,102 @@ const SchemaAnalyzerPage = () => {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||||
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Failed page fetches</div>
|
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Invalid JSON-LD blocks</div>
|
||||||
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
|
<div className='mt-2 text-sm text-slate-900 dark:text-white'>
|
||||||
{report.analysis.crawlSummary.failedPages ?? 0}
|
{invalidJsonLdBlocks.length}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{(report.analysis.pages || []).length > 0 && (
|
{hasTargetingRules && (
|
||||||
<div className='mt-5'>
|
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||||
<div className='mb-2 text-xs font-semibold uppercase tracking-wide text-slate-500'>Analyzed pages</div>
|
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Applied targeting</div>
|
||||||
|
<div className='mt-3 space-y-3'>
|
||||||
|
{appliedIncludeTargets.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<div className='mb-2 text-sm font-semibold text-slate-900 dark:text-white'>Included</div>
|
||||||
|
<div className='flex flex-wrap gap-2'>
|
||||||
|
{appliedIncludeTargets.map((target) => (
|
||||||
|
<span
|
||||||
|
key={`overview-include-${target}`}
|
||||||
|
className='rounded-full bg-emerald-100 px-3 py-1 text-xs font-semibold text-emerald-700 dark:bg-emerald-500/10 dark:text-emerald-200'
|
||||||
|
>
|
||||||
|
{target}
|
||||||
|
</span>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{appliedExcludeTargets.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<div className='mb-2 text-sm font-semibold text-slate-900 dark:text-white'>Excluded</div>
|
||||||
|
<div className='flex flex-wrap gap-2'>
|
||||||
|
{appliedExcludeTargets.map((target) => (
|
||||||
|
<span
|
||||||
|
key={`overview-exclude-${target}`}
|
||||||
|
className='rounded-full bg-amber-100 px-3 py-1 text-xs font-semibold text-amber-700 dark:bg-amber-500/10 dark:text-amber-200'
|
||||||
|
>
|
||||||
|
{target}
|
||||||
|
</span>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{jsonLdTypes.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<div className='mb-2 text-xs font-semibold uppercase tracking-wide text-slate-500'>Detected JSON-LD types</div>
|
||||||
|
<div className='flex flex-wrap gap-2'>
|
||||||
|
{jsonLdTypes.map((typeName) => (
|
||||||
|
<span
|
||||||
|
key={typeName}
|
||||||
|
className='rounded-full bg-emerald-100 px-3 py-1 text-xs font-semibold text-emerald-700 dark:bg-emerald-500/10 dark:text-emerald-200'
|
||||||
|
>
|
||||||
|
{typeName}
|
||||||
|
</span>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{invalidJsonLdBlocks.length > 0 && (
|
||||||
|
<div className='rounded-2xl border border-amber-300 bg-amber-50 p-4 text-sm text-amber-900 dark:border-amber-500/30 dark:bg-amber-500/10 dark:text-amber-100'>
|
||||||
|
<div className='font-semibold'>Invalid JSON-LD detected</div>
|
||||||
|
<ul className='mt-2 list-disc space-y-1 pl-5'>
|
||||||
|
{invalidJsonLdBlocks.map((block) => (
|
||||||
|
<li key={`${block.index}-${block.message}`}>
|
||||||
|
Block {block.index + 1}: {block.message}
|
||||||
|
</li>
|
||||||
|
))}
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{report.analysis.error && (
|
||||||
|
<div className='rounded-xl border border-rose-200 bg-rose-50 p-3 text-rose-700 dark:border-rose-500/40 dark:bg-rose-500/10 dark:text-rose-200'>
|
||||||
|
{report.analysis.error}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{activeResultsTab === 'pages' && (
|
||||||
|
<div className='space-y-5'>
|
||||||
|
{analyzedPages.length === 0 && failedPages.length === 0 && (
|
||||||
|
<div className='rounded-2xl border border-dashed border-slate-300 p-6 text-sm text-slate-500 dark:border-slate-700 dark:text-slate-300'>
|
||||||
|
No page-level results are available yet for this analysis run.
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{analyzedPages.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<div className='mb-3 text-xs font-semibold uppercase tracking-wide text-slate-500'>Analyzed pages</div>
|
||||||
<div className='space-y-3'>
|
<div className='space-y-3'>
|
||||||
{(report.analysis.pages || []).map((page) => (
|
{analyzedPages.map((page) => (
|
||||||
<div
|
<div
|
||||||
key={page.url}
|
key={page.url}
|
||||||
className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'
|
className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'
|
||||||
@ -723,11 +968,11 @@ const SchemaAnalyzerPage = () => {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{(report.analysis.failedPages || []).length > 0 && (
|
{failedPages.length > 0 && (
|
||||||
<div className='mt-5 rounded-2xl border border-amber-300 bg-amber-50 p-4 text-sm text-amber-900 dark:border-amber-500/30 dark:bg-amber-500/10 dark:text-amber-100'>
|
<div className='rounded-2xl border border-amber-300 bg-amber-50 p-4 text-sm text-amber-900 dark:border-amber-500/30 dark:bg-amber-500/10 dark:text-amber-100'>
|
||||||
<div className='font-semibold'>Some internal pages could not be fetched</div>
|
<div className='font-semibold'>Some internal pages could not be fetched</div>
|
||||||
<ul className='mt-2 list-disc space-y-1 pl-5'>
|
<ul className='mt-2 list-disc space-y-1 pl-5'>
|
||||||
{(report.analysis.failedPages || []).map((page) => (
|
{failedPages.map((page) => (
|
||||||
<li key={`${page.url}-${page.error}`}>
|
<li key={`${page.url}-${page.error}`}>
|
||||||
<span className='font-medium'>{page.url}</span>: {page.error}
|
<span className='font-medium'>{page.url}</span>: {page.error}
|
||||||
</li>
|
</li>
|
||||||
@ -735,57 +980,39 @@ const SchemaAnalyzerPage = () => {
|
|||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{(report.analysis.schema?.jsonLd?.types || []).length > 0 && (
|
|
||||||
<div className='mt-5'>
|
|
||||||
<div className='mb-2 text-xs font-semibold uppercase tracking-wide text-slate-500'>Detected JSON-LD types</div>
|
|
||||||
<div className='flex flex-wrap gap-2'>
|
|
||||||
{(report.analysis.schema?.jsonLd?.types || []).map((typeName) => (
|
|
||||||
<span
|
|
||||||
key={typeName}
|
|
||||||
className='rounded-full bg-emerald-100 px-3 py-1 text-xs font-semibold text-emerald-700 dark:bg-emerald-500/10 dark:text-emerald-200'
|
|
||||||
>
|
|
||||||
{typeName}
|
|
||||||
</span>
|
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{(report.analysis.schema?.jsonLd?.invalidBlocks || []).length > 0 && (
|
{activeResultsTab === 'recommendations' && (
|
||||||
<div className='mt-5 rounded-2xl border border-amber-300 bg-amber-50 p-4 text-sm text-amber-900 dark:border-amber-500/30 dark:bg-amber-500/10 dark:text-amber-100'>
|
<div className='space-y-5'>
|
||||||
<div className='font-semibold'>Invalid JSON-LD detected</div>
|
<div className='flex flex-col gap-3 sm:flex-row sm:items-center sm:justify-between'>
|
||||||
<ul className='mt-2 list-disc space-y-1 pl-5'>
|
|
||||||
{(report.analysis.schema?.jsonLd?.invalidBlocks || []).map((block) => (
|
|
||||||
<li key={`${block.index}-${block.message}`}>
|
|
||||||
Block {block.index + 1}: {block.message}
|
|
||||||
</li>
|
|
||||||
))}
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{report.analysis.error && (
|
|
||||||
<div className='mt-5 rounded-xl border border-rose-200 bg-rose-50 p-3 text-rose-700 dark:border-rose-500/40 dark:bg-rose-500/10 dark:text-rose-200'>
|
|
||||||
{report.analysis.error}
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</CardBox>
|
|
||||||
|
|
||||||
<CardBox className='h-full'>
|
|
||||||
<div className='flex flex-col gap-2 sm:flex-row sm:items-end sm:justify-between'>
|
|
||||||
<div>
|
|
||||||
<h3 className='text-lg font-semibold text-slate-900 dark:text-white'>Recommendations</h3>
|
|
||||||
<p className='mt-1 text-sm text-slate-500 dark:text-slate-300'>
|
|
||||||
Prioritized next actions with ready-to-copy schema where possible.
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
<div className='text-sm text-slate-500 dark:text-slate-300'>
|
<div className='text-sm text-slate-500 dark:text-slate-300'>
|
||||||
{recommendations.length} recommendation{recommendations.length === 1 ? '' : 's'}
|
{recommendations.length} recommendation{recommendations.length === 1 ? '' : 's'} generated from the latest analysis.
|
||||||
</div>
|
</div>
|
||||||
|
<BaseButtons type='justify-start sm:justify-end' className='mb-0' mb='mb-0'>
|
||||||
|
<BaseButton
|
||||||
|
color='whiteDark'
|
||||||
|
outline
|
||||||
|
icon={icon.mdiContentCopy}
|
||||||
|
label='Copy all code'
|
||||||
|
disabled={exportableRecommendations.length === 0}
|
||||||
|
onClick={() => {
|
||||||
|
const combined = exportableRecommendations
|
||||||
|
.map((recommendation) => recommendation.suggested_schema)
|
||||||
|
.filter(Boolean)
|
||||||
|
.join('\n\n');
|
||||||
|
navigator.clipboard
|
||||||
|
.writeText(combined)
|
||||||
|
.then(() => notify('success', 'All schema code copied to clipboard.'))
|
||||||
|
.catch((error) => {
|
||||||
|
console.error('Copy all code failed:', error);
|
||||||
|
notify('error', 'Unable to copy the combined code.');
|
||||||
|
});
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</BaseButtons>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className='mt-5 space-y-4'>
|
|
||||||
{recommendations.length === 0 && (
|
{recommendations.length === 0 && (
|
||||||
<div className='rounded-2xl border border-dashed border-slate-300 p-6 text-sm text-slate-500 dark:border-slate-700 dark:text-slate-300'>
|
<div className='rounded-2xl border border-dashed border-slate-300 p-6 text-sm text-slate-500 dark:border-slate-700 dark:text-slate-300'>
|
||||||
No recommendations were generated for this page yet.
|
No recommendations were generated for this page yet.
|
||||||
@ -871,8 +1098,78 @@ const SchemaAnalyzerPage = () => {
|
|||||||
</div>
|
</div>
|
||||||
))}
|
))}
|
||||||
</div>
|
</div>
|
||||||
</CardBox>
|
)}
|
||||||
|
|
||||||
|
{activeResultsTab === 'delivery' && (
|
||||||
|
<div className='space-y-5'>
|
||||||
|
<div className='rounded-2xl border border-slate-200 bg-slate-50 p-5 dark:border-slate-700 dark:bg-slate-900/40'>
|
||||||
|
<h4 className='text-base font-semibold text-slate-900 dark:text-white'>Delivery actions</h4>
|
||||||
|
<p className='mt-2 text-sm leading-6 text-slate-500 dark:text-slate-300'>
|
||||||
|
Export a developer handoff file, email the latest recommendations, or check Step 4 output for the selected platform.
|
||||||
|
</p>
|
||||||
|
<div className='mt-4'>
|
||||||
|
<FormField label='Developer email' labelFor='schema-email-recipient'>
|
||||||
|
<input
|
||||||
|
id='schema-email-recipient'
|
||||||
|
name='schema-email-recipient'
|
||||||
|
placeholder='developer@example.com'
|
||||||
|
value={emailTo}
|
||||||
|
onChange={(event) => setEmailTo(event.target.value)}
|
||||||
|
/>
|
||||||
|
</FormField>
|
||||||
</div>
|
</div>
|
||||||
|
<BaseButtons type='justify-start'>
|
||||||
|
<BaseButton
|
||||||
|
color='success'
|
||||||
|
icon={icon.mdiDownload}
|
||||||
|
label={isExportingAll ? 'Exporting…' : 'Export all'}
|
||||||
|
disabled={!report?.site?.id || isExportingAll}
|
||||||
|
onClick={() => {
|
||||||
|
handleExportAll().catch(() => null);
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
<BaseButton
|
||||||
|
color='warning'
|
||||||
|
icon={icon.mdiEmailOutline}
|
||||||
|
label={emailingId === 'all' ? 'Emailing…' : 'Email all'}
|
||||||
|
disabled={!report?.site?.id || emailingId === 'all'}
|
||||||
|
onClick={() => {
|
||||||
|
handleEmailCode().catch(() => null);
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
<BaseButton
|
||||||
|
color={entitlements?.canPlatformOutput ? 'info' : 'whiteDark'}
|
||||||
|
outline={!entitlements?.canPlatformOutput}
|
||||||
|
icon={entitlements?.canPlatformOutput ? icon.mdiCodeBraces : icon.mdiLockOutline}
|
||||||
|
label={isCheckingPlatformOutput
|
||||||
|
? 'Checking…'
|
||||||
|
: entitlements?.canPlatformOutput
|
||||||
|
? 'Check Step 4 output'
|
||||||
|
: 'Premium Step 4'}
|
||||||
|
disabled={!report?.site?.id || isCheckingPlatformOutput}
|
||||||
|
onClick={() => {
|
||||||
|
handlePlatformOutputCheck().catch(() => null);
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</BaseButtons>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className='grid gap-4 md:grid-cols-2'>
|
||||||
|
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||||
|
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Selected Step 4 platform</div>
|
||||||
|
<div className='mt-2 text-sm font-semibold text-slate-900 dark:text-white'>{selectedPlatformLabel}</div>
|
||||||
|
</div>
|
||||||
|
<div className='rounded-2xl border border-slate-200 p-4 dark:border-slate-700'>
|
||||||
|
<div className='text-xs font-semibold uppercase tracking-wide text-slate-500'>Access</div>
|
||||||
|
<div className='mt-2 text-sm font-semibold text-slate-900 dark:text-white'>
|
||||||
|
{entitlements?.canPlatformOutput ? 'Premium access detected' : 'Premium required for Step 4 output'}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</CardBox>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
<ToastContainer />
|
<ToastContainer />
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user