Skip to content

Commit

Permalink
Fixed Nonetype error if Domain object if domain doesn't exist or last…
Browse files Browse the repository at this point in the history
…_crawled is Null when checking robots.txt compliance.
  • Loading branch information
quintindunn committed Jun 29, 2024
1 parent 4fec1d9 commit b365431
Showing 1 changed file with 10 additions and 9 deletions.
19 changes: 10 additions & 9 deletions src/crawler/robots.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,16 @@ def does_page_follow_robots_rules(
request_delay = parser.request_rate(crawler_options.ua)

now = datetime.datetime.now()
if crawl_delay and (now - domain.last_crawled).total_seconds() < int(
crawl_delay
):
raise WaitBeforeRetryException()

if request_delay and (now - domain.last_crawled).total_seconds() < int(
request_delay.seconds
):
raise WaitBeforeRetryException()
if domain and domain.last_crawled:
if crawl_delay and (now - domain.last_crawled).total_seconds() < int(
crawl_delay
):
raise WaitBeforeRetryException()

if request_delay and (now - domain.last_crawled).total_seconds() < int(
request_delay.seconds
):
raise WaitBeforeRetryException()
except ValueError:
pass

Expand Down

0 comments on commit b365431

Please sign in to comment.