Skip to content
This repository has been archived by the owner on Feb 2, 2025. It is now read-only.

Commit

Permalink
Black formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
quintindunn committed Jun 29, 2024
1 parent 15d49d8 commit 85aaa35
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/crawler/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def get_protocol_and_domain_from_url(url: str):

protocol, _url = url.split("//", 1) # https:, example.com/test

domain = re.split(r'[?/#]', _url, maxsplit=1)[0]
domain = re.split(r"[?/#]", _url, maxsplit=1)[0]
return protocol, domain


Expand Down
8 changes: 3 additions & 5 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,11 @@ def load_data():
seeds, to_crawl = load_data()
seed_url = random.choice(seeds)

url_manager = URLManager(
seed_url=seed_url, crawled=None, to_crawl=to_crawl
)
url_manager = URLManager(seed_url=seed_url, crawled=None, to_crawl=to_crawl)

# If instance count > 1 multithreading will be used.
if INSTANCE_COUNT > 1:
for instance in range(1, INSTANCE_COUNT+1):
for instance in range(1, INSTANCE_COUNT + 1):
logger.info(f"Starting crawler instance #{instance}")
launch_instance(url_man=url_manager)

Expand All @@ -80,4 +78,4 @@ def load_data():
try:
run_instance(url_man=url_manager)
except KeyboardInterrupt:
dump_to_crawl(url_man=url_manager)
dump_to_crawl(url_man=url_manager)
2 changes: 1 addition & 1 deletion src/tests/url_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_protocol_domain(self):
"http://example.com/path;param?query#fragment": ["http:", "example.com"],
"https://example.com#fragment": ["https:", "example.com"],
"http://192.168.0.1/path": ["http:", "192.168.0.1"],
"http://[::1]/path": ["http:", "[::1]"]
"http://[::1]/path": ["http:", "[::1]"],
}

for url, expected in urls.items():
Expand Down

0 comments on commit 85aaa35

Please sign in to comment.