From d547e0726c648bba82331015e331448f68344865 Mon Sep 17 00:00:00 2001 From: blopeu Date: Fri, 11 Oct 2024 14:32:43 -0500 Subject: [PATCH] feature/add_timeout_AsyncPlaywrightCrawlerStrategy add timeout --- crawl4ai/async_crawler_strategy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py index 987925f8..f4254f4a 100644 --- a/crawl4ai/async_crawler_strategy.py +++ b/crawl4ai/async_crawler_strategy.py @@ -51,6 +51,7 @@ def __init__(self, use_cached_html=False, js_code=None, **kwargs): self.session_ttl = 1800 self.js_code = js_code self.verbose = kwargs.get("verbose", False) + self.timeout = int(kwargs.get("timeout", 60000)) self.playwright = None self.browser = None self.hooks = { @@ -248,7 +249,7 @@ async def crawl(self, url: str, **kwargs) -> AsyncCrawlResponse: if not kwargs.get("js_only", False): await self.execute_hook('before_goto', page) - response = await page.goto(url, wait_until="domcontentloaded", timeout=60000) + response = await page.goto(url, wait_until="domcontentloaded", timeout=self.timeout) await self.execute_hook('after_goto', page) # Get status code and headers