diff --git a/src/config.ts b/src/config.ts index 7da28001..787744ce 100644 --- a/src/config.ts +++ b/src/config.ts @@ -20,7 +20,12 @@ export const configSchema = z.object({ * @default "" */ match: z.string().or(z.array(z.string())), - + /** + * Pattern to match against for links on a page to exclude from crawling + * @example "https://www.builder.io/c/docs/**" + * @default "" + */ + exclude: z.string().or(z.array(z.string())).optional(), /** * Selector to grab the inner text from * @example ".docs-builder-container" diff --git a/src/core.ts b/src/core.ts index 43b74312..06e0697a 100644 --- a/src/core.ts +++ b/src/core.ts @@ -92,6 +92,8 @@ export async function crawl(config: Config) { await enqueueLinks({ globs: typeof config.match === "string" ? [config.match] : config.match, + exclude: + typeof config.exclude === "string" ? [config.exclude] : config.exclude ?? [], }); }, // Comment this option to scrape the full website.