diff --git a/README.md b/README.md index 76503df..04dbe47 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Instantiates the Player object. The **init** method finds the matching player on ### Player.get_player_stats() -Pulls player stats (career totals, and season-by-season summary) and presents as a Python object. +Pulls player bio (height, weight, dob, debut, and last) to store in Player attribute 'metadata', and player stats (career totals, and season-by-season summary) which is presented as a Python object. This function returns a PlayerStats object with attributes: - season_stats_total (Pandas dataframe) diff --git a/pyAFL/players/models.py b/pyAFL/players/models.py index d8b348d..6e578f1 100644 --- a/pyAFL/players/models.py +++ b/pyAFL/players/models.py @@ -1,6 +1,7 @@ import re import pandas as pd +from datetime import datetime, timedelta from bs4 import BeautifulSoup from pyAFL import config @@ -16,11 +17,14 @@ class Player(object): ---------- name : str first name of the person - stats : object - PlayerStats object + url : str + url to the player's information page + metadata : dictionary + player bio information Methods ------- + get_player_stats : returns PlayerStats object ... """ @@ -42,6 +46,7 @@ def __init__(self, name: str, url: str = None, team: str = None): self.name = name.title() # Convert to title case for URL string matching self.name = self.name.replace("\n", "").strip() + self.metadata = {} if url: self.url = url else: @@ -82,6 +87,44 @@ def _get_player_url(self): ) return url_list[0].attrs.get("href") + + def _get_bio_info(self, b_tags): + for bio in b_tags: + + if re.sub(r"[\n\t\s]*", "", bio.get_text())=="Born:": + date_born = re.sub(r"[\n\t\s]*", "", bio.next_sibling.replace(" (","")) + if not date_born: self.metadata["born"] = None; continue + + timestamp = datetime.strptime(date_born, '%d-%b-%Y').strftime('%d-%b-%Y') + self.metadata["born"] = timestamp + + if re.sub(r"[\n\t\s]*", "", bio.get_text())=="Debut:": + debut = bio.next_sibling.strip() # Ex:18y 218d + if not debut or self.metadata["born"] == None: self.metadata["debut"] = None; continue + + debut = debut.split(" ") + timestamp = (datetime.strptime(self.metadata["born"], '%d-%b-%Y') + timedelta(int(debut[0][:-1]) * 365 + int(debut[1][:-1]))).strftime('%d-%b-%Y') + self.metadata["debut"] = timestamp + + if re.sub(r"[\n\t\s]*", "", bio.get_text())=="Last:": + last = bio.next_sibling.replace(")","").strip() + if not last or self.metadata["born"] == None: self.metadata["last"] = None; continue + + last = last.split(" ") + timestamp = (datetime.strptime(self.metadata["born"], '%d-%b-%Y') + timedelta(int(last[0][:-1]) * 365 + int(last[1][:-1]))).strftime('%d-%b-%Y') + self.metadata["last"] = timestamp + + if re.sub(r"[\n\t\s]*", "", bio.get_text())=="Height:": + height = re.sub("[^0-9]", "",bio.next_sibling) + if not height: self.metadata["height"] = None; continue + + self.metadata["height"] = height + + if re.sub(r"[\n\t\s]*", "", bio.get_text())=="Weight:": + weight = re.sub("[^0-9]", "",bio.next_sibling) + if not weight: self.metadata["weight"] = None; continue + + self.metadata["weight"] = weight def get_player_stats(self): """ @@ -99,6 +142,8 @@ def get_player_stats(self): soup = BeautifulSoup(self._stat_html, "html.parser") + self._get_bio_info(soup.find_all('b')) + all_dfs = pd.read_html(self._stat_html) season_dfs = pd.read_html(self._stat_html, match=r"[A-Za-z]* - [0-9]{4}") diff --git a/pyAFL/players/tests/test_models.py b/pyAFL/players/tests/test_models.py index bea5cbb..75e4983 100644 --- a/pyAFL/players/tests/test_models.py +++ b/pyAFL/players/tests/test_models.py @@ -1,5 +1,6 @@ import pytest +from bs4 import BeautifulSoup from pyAFL.base.exceptions import LookupError from pyAFL.players.models import Player, PlayerStats @@ -37,3 +38,47 @@ def test_player_classmethod_get_player_stats(self): player = Player("Nick Riewoldt") assert isinstance(player.get_player_stats(), PlayerStats) + + player = Player("Stuart Magee") + player.get_player_stats() + + assert(player.metadata["born"] == "13-Oct-1943") + assert(player.metadata["debut"] == "14-May-1962") + assert(player.metadata["last"] == "22-Aug-1975") + + def test_unvavailable_player_bio(self): + # Mock an empty, or None value + # Case: date of birth is unavailable but debut or last is/are, + # player.metadata['debut' and 'last'] = None + player = Player("Nathan Brown") + + html_content = """ + + +
+ Born: + ( + Debut: + 18y 218d + Last: + ) + Height: + + Weight: + 74 kg +
+ + + """ + + soup = BeautifulSoup(html_content, 'html.parser') + + player._get_bio_info(soup.find_all('b')) + + print("player.metadata:", player.metadata) + + assert(player.metadata["born"] == None) + assert(player.metadata["debut"] == None) + assert(player.metadata["height"] == None) + assert(player.metadata["weight"] == "74") + assert(player.metadata["last"] == None) diff --git a/setup.py b/setup.py index ce777de..9702893 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="pyAFL", - version="0.4.2", + version="0.4.3", description="Python data fetching library for the Australian Football League", long_description="pyAFL is a AFL (Australian Football League) data fetching libary. It scrapes data from https://afltables.com/ and converts results to structured Python objects for easier analytics.", url="https://github.com/RamParameswaran/pyAFL",