From fb87b00dbe03b0ba51cb6d2dc96325017c964ec6 Mon Sep 17 00:00:00 2001
From: Albert Suarez <alsumo95@gmail.com>
Date: Fri, 9 Aug 2019 14:55:26 +0200
Subject: [PATCH] Documented code

---
 src/__init__.py       |  5 +++++
 src/__main__.py       |  4 ++++
 src/azlyrics.py       | 20 ++++++++++++++++++++
 src/box_sdk.py        | 36 +++++++++++++++++++++++++++++++++++-
 src/csv_parser.py     | 17 +++++++++++++++++
 src/string_cleaner.py | 15 +++++++++++++++
 6 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/src/__init__.py b/src/__init__.py
index 8f1f46c..0720a91 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -1,9 +1,11 @@
+# AZLyrics website
 AZ_LYRICS_BASE_URL = 'https://www.azlyrics.com'
 AZ_LYRICS_ARTIST_LETTER_LIST = [
     'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
     'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '19'
 ]
 
+# Scrapping
 BASE = 'Mozilla/5.0'
 SCRAPE_RTD_MINIMUM = 4
 SCRAPE_RTD_MAXIMUM = 6
@@ -24,6 +26,7 @@
 SCRAPE_RETRIES_AMOUNT = 3
 SCRAPE_SLEEP_TIME_BETWEEN_RETRIES = 10
 
+# CSV
 CSV_FILE = 'data/azlyrics_lyrics'
 CSV_HEADER_ARTIST_NAME = 'ARTIST_NAME'
 CSV_HEADER_ARTIST_URL = 'ARTIST_URL'
@@ -31,6 +34,7 @@
 CSV_HEADER_SONG_URL = 'SONG_URL'
 CSV_HEADER_LYRICS = 'LYRICS'
 
+# String cleaning
 STR_CLEAN_TIMES = 3
 STR_CLEAN_DICT = {
     '\n\n': '\n',
@@ -56,6 +60,7 @@
     ':.': ':'
 }
 
+# Box integration
 BOX_CONFIG_FILE_PATH = 'data/jwt_config.json'
 BOX_RETRIES = 3
 BOX_RTM = 3
diff --git a/src/__main__.py b/src/__main__.py
index 9e8e2cc..25d563d 100644
--- a/src/__main__.py
+++ b/src/__main__.py
@@ -7,6 +7,10 @@
 
 
 def scrape():
+    """
+    Processes the main function of the scraper.
+    :return: All AZLyrics scraped.
+    """
     for artist_letter in tqdm(AZ_LYRICS_ARTIST_LETTER_LIST, total=len(AZ_LYRICS_ARTIST_LETTER_LIST)):
         # Downloads file if it is available on Box folder.
         csv_file_name = f'{CSV_FILE}_{artist_letter}.csv'
diff --git a/src/azlyrics.py b/src/azlyrics.py
index eea33e7..a475c63 100644
--- a/src/azlyrics.py
+++ b/src/azlyrics.py
@@ -9,6 +9,11 @@
 
 
 def _get_html(url):
+    """
+    Retrieves the HTML content given a Internet accessible URL.
+    :param url: URL to retrieve.
+    :return: HTML content formatted as String, None if there was an error.
+    """
     time.sleep(random.uniform(SCRAPE_RTD_MINIMUM, SCRAPE_RTD_MAXIMUM))  # RTD
     for i in range(0, SCRAPE_RETRIES_AMOUNT):
         try:
@@ -29,6 +34,11 @@ def _get_html(url):
 
 
 def get_artist_url_list(artist_letter):
+    """
+    Retrieves the AZLyrics website URLs for all the artists given its first character.
+    :param artist_letter: First character of an artist.
+    :return: List of pairs containing the artist name and its AZLyrics URL.
+    """
     artist_url_list = []
 
     try:
@@ -50,6 +60,11 @@ def get_artist_url_list(artist_letter):
 
 
 def get_song_url_list(artist_url):
+    """
+    Retrieves the AZLyrics website URLs for all the songs from an artist AZLyrics URL.
+    :param artist_url: AZLyrics URL from a given artist.
+    :return: List of pairs containing the song name and its AZLyrics URL.
+    """
     song_url_list = []
 
     try:
@@ -69,6 +84,11 @@ def get_song_url_list(artist_url):
 
 
 def get_song_lyrics(song_url):
+    """
+    Retrieves and cleans the lyrics of a song given its AZLyrics URL.
+    :param song_url: AZLyrics URL from a given song.
+    :return: Cleaned and formatted song lyrics.
+    """
     song_lyrics = ''
 
     try:
diff --git a/src/box_sdk.py b/src/box_sdk.py
index cd8e7c5..d49cca1 100644
--- a/src/box_sdk.py
+++ b/src/box_sdk.py
@@ -7,6 +7,11 @@
 
 
 def create_folder(folder_name):
+    """
+    Creates a folder in the root folder given its name.
+    :param folder_name: Folder name to create.
+    :return: Folder identifier if the creation was successful, None otherwise.
+    """
     box_client = Client(JWTAuth.from_settings_file(BOX_CONFIG_FILE_PATH))
     for i in range(0, BOX_RETRIES):
         try:
@@ -20,6 +25,11 @@ def create_folder(folder_name):
 
 
 def create_shared_link(folder_id):
+    """
+    Creates an Internet accessible shared link of folder given its identifier.
+    :param folder_id: Folder identifier.
+    :return: Shared link if the creation was successful, None otherwise.
+    """
     box_client = Client(JWTAuth.from_settings_file(BOX_CONFIG_FILE_PATH))
     for i in range(0, BOX_RETRIES):
         try:
@@ -37,6 +47,12 @@ def create_shared_link(folder_id):
 
 
 def search_file(folder_id, file_name):
+    """
+    Finds a file into a folder given its identifier and a query string.
+    :param folder_id: Folder identifier.
+    :param file_name: File name.
+    :return: File identifier if the file exists, None otherwise.
+    """
     box_client = Client(JWTAuth.from_settings_file(BOX_CONFIG_FILE_PATH))
     for i in range(0, BOX_RETRIES):
         try:
@@ -52,6 +68,12 @@ def search_file(folder_id, file_name):
 
 
 def upload_file(folder_id, file_path):
+    """
+    Uploads a file (that must not exist in Box folder) into a folder given its path.
+    :param folder_id: Folder identifier.
+    :param file_path: File path.
+    :return: File identifier if the upload was successful, None otherwise.
+    """
     box_client = Client(JWTAuth.from_settings_file(BOX_CONFIG_FILE_PATH))
     for i in range(0, BOX_RETRIES):
         try:
@@ -65,6 +87,12 @@ def upload_file(folder_id, file_path):
 
 
 def update_file(file_id, file_path):
+    """
+    Updates a file (that must exist in Box folder) given its identifier.
+    :param file_id: File identifier.
+    :param file_path: File path.
+    :return: File identifier if the update was successful, None otherwise.
+    """
     box_client = Client(JWTAuth.from_settings_file(BOX_CONFIG_FILE_PATH))
     for i in range(0, BOX_RETRIES):
         try:
@@ -77,6 +105,12 @@ def update_file(file_id, file_path):
 
 
 def download_file(file_id, file_path):
+    """
+    Downloads a Box file given its identifier to a specific path.
+    :param file_id: File identifier.
+    :param file_path: File path.
+    :return: True if the download was successful, False otherwise.
+    """
     box_client = Client(JWTAuth.from_settings_file(BOX_CONFIG_FILE_PATH))
     for i in range(0, BOX_RETRIES):
         try:
@@ -87,4 +121,4 @@ def download_file(file_id, file_path):
             time.sleep(BOX_RTM)
             if i == BOX_RETRIES - 1:
                 print(f'Error calling Box API downloading the file [{file_id}] to file [{file_path}]: {e}')
-    return None
+    return False
diff --git a/src/csv_parser.py b/src/csv_parser.py
index e429112..6d97271 100644
--- a/src/csv_parser.py
+++ b/src/csv_parser.py
@@ -5,6 +5,13 @@
 
 
 def exists_song(csv_letter, artist_url, song_url):
+    """
+    Checks if a song exists in a given CSV given the artist and song url.
+    :param csv_letter: CSV letter in order to identify which CSV to get.
+    :param artist_url: Artist AZLyrics URL.
+    :param song_url: Song AZLyrics URL.
+    :return: True if the song exists in the CSV, False otherwise.
+    """
     csv_file_name = f'{CSV_FILE}_{csv_letter}.csv'
     exists_file = os.path.isfile(csv_file_name)
     if exists_file:
@@ -17,6 +24,16 @@ def exists_song(csv_letter, artist_url, song_url):
 
 
 def append_to_csv(artist_name, artist_url, song_name, song_url, song_lyrics, csv_letter):
+    """
+    Appends song information into the end of a (in)existing CSV.
+    :param artist_name: Artist name.
+    :param artist_url: Artist AZLyrics URL.
+    :param song_name: Song name.
+    :param song_url: Song AZLyrics URL.
+    :param song_lyrics: Song lyrics.
+    :param csv_letter: CSV letter for getting the CSV where to append.
+    :return: Song information appended.
+    """
     if song_lyrics:
         csv_file_name = f'{CSV_FILE}_{csv_letter}.csv'
         exists_file = os.path.isfile(csv_file_name)
diff --git a/src/string_cleaner.py b/src/string_cleaner.py
index 8f00ca9..c3e9171 100644
--- a/src/string_cleaner.py
+++ b/src/string_cleaner.py
@@ -5,12 +5,22 @@
 
 
 def clean_url(url_str):
+    """
+    Cleans a given URL.
+    :param url_str: String formatted URL.
+    :return: Cleaned string formatted URL.
+    """
     url_str = url_str.lower()
     url_str = url_str.strip()
     return url_str
 
 
 def clean_name(name_str):
+    """
+    Cleans a given name (song or artist).
+    :param name_str: String formatted song.
+    :return: Cleaned string formatted song.
+    """
     name_str = name_str.lower()
     name_str = name_str.strip()
     name_str = unidecode.unidecode(name_str)
@@ -18,6 +28,11 @@ def clean_name(name_str):
 
 
 def clean_lyrics(lyrics_str):
+    """
+    Cleans a given string where song lyrics are.
+    :param lyrics_str: String formatted lyrics.
+    :return: Cleaned string formatted lyrics.
+    """
     lyrics_str = lyrics_str.lower()
     lyrics_str = lyrics_str.strip()
     lyrics_str = unidecode.unidecode(lyrics_str)