Skip to content

Commit

Permalink
Removed tqdm logging and used prints
Browse files Browse the repository at this point in the history
  • Loading branch information
AlbertSuarez committed Aug 20, 2019
1 parent 1f6665b commit 75b2e6a
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 12 deletions.
6 changes: 2 additions & 4 deletions requirements.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
requests==2.22.0
beautifulsoup4==4.7.1
tqdm==4.32.2
beautifulsoup4==4.8.0
Unidecode==1.1.1
boxsdk==2.5.0
stem==1.7.1
Expand All @@ -12,14 +11,13 @@ certifi==2019.6.16
cffi==1.12.3
chardet==3.0.4
cryptography==2.7
dropbox==9.4.0
idna==2.8
pycparser==2.19
PyJWT==1.7.1
pyOpenSSL==19.0.0
PySocks==1.7.0
requests-toolbelt==0.9.1
six==1.12.0
soupsieve==1.9.2
soupsieve==1.9.3
urllib3==1.25.3
wrapt==1.11.2
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
requests[socks,security]
beautifulsoup4
tqdm
Unidecode
boxsdk[jwt]
stem
fake_useragent
fake_useragent
21 changes: 15 additions & 6 deletions src/__main__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os

from tqdm import tqdm

from src import *
from src import azlyrics, csv_parser, box_sdk

Expand All @@ -11,18 +9,28 @@ def scrape():
Processes the main function of the scraper.
:return: All AZLyrics scraped.
"""
for artist_letter in tqdm(AZ_LYRICS_ARTIST_LETTER_LIST, total=len(AZ_LYRICS_ARTIST_LETTER_LIST)):
for artist_letter in AZ_LYRICS_ARTIST_LETTER_LIST:
# Logging stuff
print(f'[1] Processing [{artist_letter}] letter...')

# Downloads file if it is available on Box folder.
csv_file_name = f'{CSV_FILE}_{artist_letter}.csv'
print(f'[1] Searching for {csv_file_name} in Box folder...')
file_id = box_sdk.search_file(BOX_FOLDER_APP_ID, csv_file_name.split('/')[-1])
if file_id:
print(f'[1] ---> File found with id [{file_id}]!')
box_sdk.download_file(file_id, csv_file_name)

# Iterates over all artists with the given letter.
print('[1] Scraping artists URLs...')
artist_url_list = azlyrics.get_artist_url_list(artist_letter)
for artist_name, artist_url in tqdm(artist_url_list, total=len(artist_url_list)):
print(f'[1] ---> {len(artist_url_list)} artists found with letter [{artist_letter}]')
for artist_name, artist_url in artist_url_list:
print(f'[2] Scraping song URLs for {artist_name}...')
song_url_list = azlyrics.get_song_url_list(artist_url)
for song_name, song_url in tqdm(song_url_list, total=len(song_url_list)):
print(f'[2] ---> {len(artist_url_list)} artists found with letter [{artist_letter}]')
for song_name, song_url in song_url_list:
print(f'[3] Scraping lyrics for song: [{song_name}]')
if not csv_parser.exists_song(artist_letter, artist_url, song_url):
song_lyrics = azlyrics.get_song_lyrics(song_url)
csv_parser.append_to_csv(artist_name, artist_url, song_name, song_url, song_lyrics, artist_letter)
Expand All @@ -33,7 +41,8 @@ def scrape():
file_id = box_sdk.upload_file(BOX_FOLDER_APP_ID, csv_file_name)

# Removes the local version of the CSV for saving storage.
os.remove(csv_file_name)
if os.path.isfile(csv_file_name):
os.remove(csv_file_name)


if __name__ == '__main__':
Expand Down

0 comments on commit 75b2e6a

Please sign in to comment.