Skip to content

Commit

Permalink
Made code more robust adding try/except
Browse files Browse the repository at this point in the history
  • Loading branch information
AlbertSuarez committed Jul 7, 2019
1 parent e87d436 commit b589994
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 34 deletions.
59 changes: 34 additions & 25 deletions src/azlyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,45 +31,54 @@ def _get_html(url):
def get_artist_url_list(artist_letter):
artist_url_list = []

artist_letter_url = f'{AZ_LYRICS_BASE_URL}/{artist_letter}.html'
html_content = _get_html(artist_letter_url)
if html_content:
soup = BeautifulSoup(html_content, 'html.parser')

column_list = soup.find_all('div', {'class': 'artist-col'})
for column in column_list:
for a in column.find_all('a'):
artist_name = string_cleaner.clean_name(a.text)
artist_url = string_cleaner.clean_url('{}/{}'.format(AZ_LYRICS_BASE_URL, a['href']))
artist_url_list.append((artist_name, artist_url))
try:
artist_letter_url = f'{AZ_LYRICS_BASE_URL}/{artist_letter}.html'
html_content = _get_html(artist_letter_url)
if html_content:
soup = BeautifulSoup(html_content, 'html.parser')

column_list = soup.find_all('div', {'class': 'artist-col'})
for column in column_list:
for a in column.find_all('a'):
artist_name = string_cleaner.clean_name(a.text)
artist_url = string_cleaner.clean_url('{}/{}'.format(AZ_LYRICS_BASE_URL, a['href']))
artist_url_list.append((artist_name, artist_url))
except Exception as e:
print(f'Error while getting artists from letter {artist_letter}: {e}')

return artist_url_list


def get_song_url_list(artist_url):
song_url_list = []

html_content = _get_html(artist_url)
if html_content:
soup = BeautifulSoup(html_content, 'html.parser')
try:
html_content = _get_html(artist_url)
if html_content:
soup = BeautifulSoup(html_content, 'html.parser')

list_album_div = soup.find('div', {'id': 'listAlbum'})
for a in list_album_div.find_all('a'):
song_name = string_cleaner.clean_name(a.text)
artist_url = string_cleaner.clean_url('{}/{}'.format(AZ_LYRICS_BASE_URL, a['href'].replace('../', '')))
song_url_list.append((song_name, artist_url))
list_album_div = soup.find('div', {'id': 'listAlbum'})
for a in list_album_div.find_all('a'):
song_name = string_cleaner.clean_name(a.text)
artist_url = string_cleaner.clean_url('{}/{}'.format(AZ_LYRICS_BASE_URL, a['href'].replace('../', '')))
song_url_list.append((song_name, artist_url))
except Exception as e:
print(f'Error while getting songs from artist {artist_url}: {e}')

return song_url_list


def get_song_lyrics(song_url):
song_lyrics = ''

html_content = _get_html(song_url)
if html_content:
soup = BeautifulSoup(html_content, 'html.parser')
div_list = [div.text for div in soup.find_all('div', {'class': None})]
song_lyrics = max(div_list, key=len)
song_lyrics = string_cleaner.clean_lyrics(song_lyrics)
try:
html_content = _get_html(song_url)
if html_content:
soup = BeautifulSoup(html_content, 'html.parser')
div_list = [div.text for div in soup.find_all('div', {'class': None})]
song_lyrics = max(div_list, key=len)
song_lyrics = string_cleaner.clean_lyrics(song_lyrics)
except Exception as e:
print(f'Error while getting lyrics from song {song_url}: {e}')

return song_lyrics
19 changes: 10 additions & 9 deletions src/csv_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ def exists_song(csv_letter, artist_url, song_url):


def append_to_csv(artist_name, artist_url, song_name, song_url, song_lyrics, csv_letter):
csv_file_name = f'{CSV_FILE}_{csv_letter}.csv'
exists_file = os.path.isfile(csv_file_name)
with open(csv_file_name, 'a') as file:
if not exists_file:
file.write(
f'"{CSV_HEADER_ARTIST_NAME}","{CSV_HEADER_ARTIST_URL}",'
f'"{CSV_HEADER_SONG_NAME}","{CSV_HEADER_SONG_URL}","{CSV_HEADER_LYRICS}"'
)
file.write(f'\n"{artist_name}","{artist_url}","{song_name}","{song_url}","{song_lyrics}"')
if song_lyrics:
csv_file_name = f'{CSV_FILE}_{csv_letter}.csv'
exists_file = os.path.isfile(csv_file_name)
with open(csv_file_name, 'a') as file:
if not exists_file:
file.write(
f'"{CSV_HEADER_ARTIST_NAME}","{CSV_HEADER_ARTIST_URL}",'
f'"{CSV_HEADER_SONG_NAME}","{CSV_HEADER_SONG_URL}","{CSV_HEADER_LYRICS}"'
)
file.write(f'\n"{artist_name}","{artist_url}","{song_name}","{song_url}","{song_lyrics}"')

0 comments on commit b589994

Please sign in to comment.