Skip to content

Commit

Permalink
Merge pull request #116 from little873/multiple-cookies
Browse files Browse the repository at this point in the history
Support for multiple cookies
  • Loading branch information
steve8708 authored Dec 25, 2023
2 parents 3ec5fee + 581cc77 commit c34bde5
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 15 deletions.
16 changes: 12 additions & 4 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,18 @@ export const configSchema = z.object({
outputFileName: z.string(),
/** Optional cookie to be set. E.g. for Cookie Consent */
cookie: z
.object({
name: z.string(),
value: z.string(),
})
.union([
z.object({
name: z.string(),
value: z.string(),
}),
z.array(
z.object({
name: z.string(),
value: z.string(),
}),
),
])
.optional(),
/** Optional function to run for each page found */
onVisitPage: z
Expand Down
24 changes: 13 additions & 11 deletions src/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,6 @@ export async function crawl(config: Config) {
const crawler = new PlaywrightCrawler({
// Use the requestHandler to process each of the crawled pages.
async requestHandler({ request, page, enqueueLinks, log, pushData }) {
if (config.cookie) {
// Set the cookie for the specific URL
const cookie = {
name: config.cookie.name,
value: config.cookie.value,
url: request.loadedUrl,
};
await page.context().addCookies([cookie]);
}

const title = await page.title();
pageCounter++;
log.info(
Expand Down Expand Up @@ -108,12 +98,24 @@ export async function crawl(config: Config) {
// headless: false,
preNavigationHooks: [
// Abort requests for certain resource types
async ({ page, log }) => {
async ({ request, page, log }) => {
// If there are no resource exclusions, return
const RESOURCE_EXCLUSTIONS = config.resourceExclusions ?? [];
if (RESOURCE_EXCLUSTIONS.length === 0) {
return;
}
if (config.cookie) {
const cookies = (
Array.isArray(config.cookie) ? config.cookie : [config.cookie]
).map((cookie) => {
return {
name: cookie.name,
value: cookie.value,
url: request.loadedUrl,
};
});
await page.context().addCookies(cookies);
}
await page.route(`**\/*.{${RESOURCE_EXCLUSTIONS.join()}}`, (route) =>
route.abort("aborted"),
);
Expand Down

0 comments on commit c34bde5

Please sign in to comment.