From 16443ed9501624de40d921b8e47e4c35f15bf6b4 Mon Sep 17 00:00:00 2001 From: Patrik Schmittat Date: Thu, 28 Dec 2023 21:53:52 +0100 Subject: [PATCH] feat: add exclude pattern for links in config --- src/config.ts | 7 ++++++- src/core.ts | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/config.ts b/src/config.ts index 7da28001..787744ce 100644 --- a/src/config.ts +++ b/src/config.ts @@ -20,7 +20,12 @@ export const configSchema = z.object({ * @default "" */ match: z.string().or(z.array(z.string())), - + /** + * Pattern to match against for links on a page to exclude from crawling + * @example "https://www.builder.io/c/docs/**" + * @default "" + */ + exclude: z.string().or(z.array(z.string())).optional(), /** * Selector to grab the inner text from * @example ".docs-builder-container" diff --git a/src/core.ts b/src/core.ts index 43b74312..06e0697a 100644 --- a/src/core.ts +++ b/src/core.ts @@ -92,6 +92,8 @@ export async function crawl(config: Config) { await enqueueLinks({ globs: typeof config.match === "string" ? [config.match] : config.match, + exclude: + typeof config.exclude === "string" ? [config.exclude] : config.exclude ?? [], }); }, // Comment this option to scrape the full website.