Skip to content

Commit

Permalink
Player Bio #8 Retrieve bio information after calling get_player_stats…
Browse files Browse the repository at this point in the history
…() (#26)

* Read in player bio after getting stats

* Added new function to read in player bio information

* Added error handling when bio info is unavailable
  • Loading branch information
adrperez5 authored Apr 29, 2024
1 parent 355240e commit d333737
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 4 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ Instantiates the Player object. The **init** method finds the matching player on

### Player.get_player_stats()

Pulls player stats (career totals, and season-by-season summary) and presents as a Python object.
Pulls player bio (height, weight, dob, debut, and last) to store in Player attribute 'metadata', and player stats (career totals, and season-by-season summary) which is presented as a Python object.
This function returns a PlayerStats object with attributes:

- season_stats_total (Pandas dataframe)
Expand Down
49 changes: 47 additions & 2 deletions pyAFL/players/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re

import pandas as pd
from datetime import datetime, timedelta
from bs4 import BeautifulSoup

from pyAFL import config
Expand All @@ -16,11 +17,14 @@ class Player(object):
----------
name : str
first name of the person
stats : object
PlayerStats object
url : str
url to the player's information page
metadata : dictionary
player bio information
Methods
-------
get_player_stats : returns PlayerStats object
...
"""

Expand All @@ -42,6 +46,7 @@ def __init__(self, name: str, url: str = None, team: str = None):

self.name = name.title() # Convert to title case for URL string matching
self.name = self.name.replace("\n", "").strip()
self.metadata = {}
if url:
self.url = url
else:
Expand Down Expand Up @@ -82,6 +87,44 @@ def _get_player_url(self):
)

return url_list[0].attrs.get("href")

def _get_bio_info(self, b_tags):
for bio in b_tags:

if re.sub(r"[\n\t\s]*", "", bio.get_text())=="Born:":
date_born = re.sub(r"[\n\t\s]*", "", bio.next_sibling.replace(" (",""))
if not date_born: self.metadata["born"] = None; continue

timestamp = datetime.strptime(date_born, '%d-%b-%Y').strftime('%d-%b-%Y')
self.metadata["born"] = timestamp

if re.sub(r"[\n\t\s]*", "", bio.get_text())=="Debut:":
debut = bio.next_sibling.strip() # Ex:18y 218d
if not debut or self.metadata["born"] == None: self.metadata["debut"] = None; continue

debut = debut.split(" ")
timestamp = (datetime.strptime(self.metadata["born"], '%d-%b-%Y') + timedelta(int(debut[0][:-1]) * 365 + int(debut[1][:-1]))).strftime('%d-%b-%Y')
self.metadata["debut"] = timestamp

if re.sub(r"[\n\t\s]*", "", bio.get_text())=="Last:":
last = bio.next_sibling.replace(")","").strip()
if not last or self.metadata["born"] == None: self.metadata["last"] = None; continue

last = last.split(" ")
timestamp = (datetime.strptime(self.metadata["born"], '%d-%b-%Y') + timedelta(int(last[0][:-1]) * 365 + int(last[1][:-1]))).strftime('%d-%b-%Y')
self.metadata["last"] = timestamp

if re.sub(r"[\n\t\s]*", "", bio.get_text())=="Height:":
height = re.sub("[^0-9]", "",bio.next_sibling)
if not height: self.metadata["height"] = None; continue

self.metadata["height"] = height

if re.sub(r"[\n\t\s]*", "", bio.get_text())=="Weight:":
weight = re.sub("[^0-9]", "",bio.next_sibling)
if not weight: self.metadata["weight"] = None; continue

self.metadata["weight"] = weight

def get_player_stats(self):
"""
Expand All @@ -99,6 +142,8 @@ def get_player_stats(self):

soup = BeautifulSoup(self._stat_html, "html.parser")

self._get_bio_info(soup.find_all('b'))

all_dfs = pd.read_html(self._stat_html)
season_dfs = pd.read_html(self._stat_html, match=r"[A-Za-z]* - [0-9]{4}")

Expand Down
45 changes: 45 additions & 0 deletions pyAFL/players/tests/test_models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest

from bs4 import BeautifulSoup
from pyAFL.base.exceptions import LookupError
from pyAFL.players.models import Player, PlayerStats

Expand Down Expand Up @@ -37,3 +38,47 @@ def test_player_classmethod_get_player_stats(self):
player = Player("Nick Riewoldt")

assert isinstance(player.get_player_stats(), PlayerStats)

player = Player("Stuart Magee")
player.get_player_stats()

assert(player.metadata["born"] == "13-Oct-1943")
assert(player.metadata["debut"] == "14-May-1962")
assert(player.metadata["last"] == "22-Aug-1975")

def test_unvavailable_player_bio(self):
# Mock an empty, or None value
# Case: date of birth is unavailable but debut or last is/are,
# player.metadata['debut' and 'last'] = None
player = Player("Nathan Brown")

html_content = """
<html>
<body>
<center>
<b>Born:</b>
(
<b>Debut:</b>
18y 218d
<b>Last:</b>
)
<b>Height:</b>
<b>Weight:</b>
74 kg
</center>
</body>
</html>
"""

soup = BeautifulSoup(html_content, 'html.parser')

player._get_bio_info(soup.find_all('b'))

print("player.metadata:", player.metadata)

assert(player.metadata["born"] == None)
assert(player.metadata["debut"] == None)
assert(player.metadata["height"] == None)
assert(player.metadata["weight"] == "74")
assert(player.metadata["last"] == None)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="pyAFL",
version="0.4.2",
version="0.4.3",
description="Python data fetching library for the Australian Football League",
long_description="pyAFL is a AFL (Australian Football League) data fetching libary. It scrapes data from https://afltables.com/ and converts results to structured Python objects for easier analytics.",
url="https://github.com/RamParameswaran/pyAFL",
Expand Down

0 comments on commit d333737

Please sign in to comment.