Skip to content

Commit

Permalink
feat: add percent change to analytics package (#4335)
Browse files Browse the repository at this point in the history
* feat: added percent change column and formatting (#4331)

* chore: updated ga package version (#4331)

* fix: made argument names consistent (#4331)
  • Loading branch information
jpaten authored Jan 17, 2025
1 parent d2ac666 commit eef1cee
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 29 deletions.
119 changes: 93 additions & 26 deletions analytics/analytics_package/analytics/sheets_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
import gspread_formatting
from enum import Enum
from googleapiclient.discovery import build
import numpy as np

FONT_SIZE_PTS = 10
PTS_PIXELS_RATIO = 4/3
DEFAULT_BUFFER_CHARS = 2
GREEN_COLOR = "#00FF00"
RED_COLOR = "#FF0000"


class FILE_OVERRIDE_BEHAVIORS(Enum):
OVERRIDE_IF_IN_SAME_PLACE = 1
Expand All @@ -12,9 +20,17 @@ class WORKSHEET_OVERRIDE_BEHAVIORS(Enum):
OVERRIDE = 1
EXIT = 2

FONT_SIZE_PTS = 10
PTS_PIXELS_RATIO = 4/3
DEFAULT_BUFFER_CHARS = 2
class COLUMN_FORMAT_OPTIONS(Enum):
DEFAULT = 1
PERCENT_UNCOLORED = 2
PERCENT_COLORED = 3

DEFAULT_SHEET_FORMATTING_OPTIONS = {
"bold_header": True,
"center_header": True,
"freeze_header": True,
"column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS}
}

def extract_credentials(authentication_response):
"""Extracts the credentials from the tuple from api.authenticate"""
Expand Down Expand Up @@ -141,17 +157,14 @@ def create_sheet_in_folder(authentication_response, sheet_name, parent_folder_na
# Open new file
return gc.open_by_key(spread_id)


def fill_worksheet_with_df(
sheet,
df,
worksheet_name,
overlapBehavior,
options={
"bold_header": True,
"center_header": True,
"freeze_header": True,
"column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS}
}
sheet_formatting_options=DEFAULT_SHEET_FORMATTING_OPTIONS,
column_formatting_options={}
):
"""
Fill a worksheet with the contents of a DataFrame.
Expand All @@ -162,8 +175,10 @@ def fill_worksheet_with_df(
:param df: the DataFrame to fill the worksheet with
:param worksheet_name: the name of the worksheet to fill. Cannot be "Sheet1"
:param overlapBehavior: the behavior to take if the worksheet already exists.
:param options: the formatting options for the worksheet.
:param sheet_formatting_options: the formatting options for the worksheet.
Should be a dictionary with optional elements "bold_header", "center_header", "freeze_header", and "column_widths", optional
:param column_formatting_options: the column formatting options for the worksheet.
Should be a dictionary with dataframe columns as keys and instances of COLUMN_FORMAT_OPTIONS as values, optional
"""
# Sheet1 is special since it's created by default, so it's not allowed
assert worksheet_name != "Sheet1"
Expand All @@ -179,19 +194,19 @@ def fill_worksheet_with_df(
)

# Add data to worksheet
worksheet.update([df.columns.values.tolist()] + df.values.tolist())
worksheet.update([df.columns.values.tolist()] + df.fillna("NA").values.tolist())

# Format worksheet
# Justify Column Widths
if "column_widths" not in options or options["column_widths"]["justify"]:
if "column_widths" not in sheet_formatting_options or sheet_formatting_options["column_widths"]["justify"]:
text_widths = df.astype(str).columns.map(
lambda column_name: df[column_name].astype(str).str.len().max()
)
header_widths = df.columns.str.len()
buffer_chars = (
DEFAULT_BUFFER_CHARS
if ("column_widths" not in options or "buffer_chars" not in options["column_widths"])
else options["column_widths"]["buffer_chars"]
if ("column_widths" not in sheet_formatting_options or "buffer_chars" not in sheet_formatting_options["column_widths"])
else sheet_formatting_options["column_widths"]["buffer_chars"]
)
column_widths = [
round((max(len_tuple) + buffer_chars) * FONT_SIZE_PTS * 1/PTS_PIXELS_RATIO)
Expand All @@ -202,26 +217,71 @@ def fill_worksheet_with_df(
]
gspread_formatting.set_column_widths(worksheet, zip(column_positions, column_widths))
# Freeze Header
if "freeze_header" not in options or options["freeze_header"]:
if "freeze_header" not in sheet_formatting_options or sheet_formatting_options["freeze_header"]:
gspread_formatting.set_frozen(worksheet, rows=1)
format_options = gspread_formatting.CellFormat()
base_format_options = gspread_formatting.CellFormat()
# Bold Header
if "bold_header" not in options or options["bold_header"]:
format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True))
if "bold_header" not in sheet_formatting_options or sheet_formatting_options["bold_header"]:
base_format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True))
# Center Header
if "center_header" not in options or options["center_header"]:
format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER")
if "center_header" not in sheet_formatting_options or sheet_formatting_options["center_header"]:
base_format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER")
# Handle column specific formatting
for column in column_formatting_options:
if column not in df.columns:
raise KeyError("Formatting column is not in the dataframe")
# Skip if the column is set to default
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.DEFAULT:
continue
# Get the column position
column_position_numeric = df.columns.get_loc(column) + 1
column_range_top = gspread.utils.rowcol_to_a1(1, column_position_numeric)
column_range_bottom = gspread.utils.rowcol_to_a1(df.index.size + 1, column_position_numeric)
column_range = f"{column_range_top}:{column_range_bottom}"
column_worksheet_range = gspread_formatting.GridRange.from_a1_range(column_range, worksheet)
# Get conditional formatting rules
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.PERCENT_COLORED:
green_rule = gspread_formatting.ConditionalFormatRule(
ranges=[column_worksheet_range],
booleanRule=gspread_formatting.BooleanRule(
condition=gspread_formatting.BooleanCondition('NUMBER_GREATER_THAN_EQ', ['0']),
format=gspread_formatting.CellFormat(
textFormat=gspread_formatting.TextFormat(foregroundColor=gspread_formatting.Color(0,1,0)))
)
)
red_rule = gspread_formatting.ConditionalFormatRule(
ranges=[column_worksheet_range],
booleanRule=gspread_formatting.BooleanRule(
condition=gspread_formatting.BooleanCondition('NUMBER_LESS_THAN_EQ', ['0']),
format=gspread_formatting.CellFormat(
textFormat=gspread_formatting.TextFormat(foregroundColor=gspread_formatting.Color(1,0,0)))
)
)
# Apply conditional formatting rules
conditional_formatting_rules = gspread_formatting.get_conditional_format_rules(worksheet)
conditional_formatting_rules.append(green_rule)
conditional_formatting_rules.append(red_rule)
conditional_formatting_rules.save()
if column_formatting_options[column] in (COLUMN_FORMAT_OPTIONS.PERCENT_COLORED, COLUMN_FORMAT_OPTIONS.PERCENT_UNCOLORED):
# Apply percent format rule
gspread_formatting.format_cell_range(
worksheet,
column_range,
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='PERCENT', pattern='0.0%'))
)

# Apply base formatting options
gspread_formatting.format_cell_range(
worksheet,
f"A1:{gspread.utils.rowcol_to_a1(1, len(df.columns))}",
format_options
base_format_options
)

# Delete Sheet1 if it has been created by default
if "Sheet1" in [i.title for i in sheet.worksheets()]:
sheet.del_worksheet(sheet.worksheet("Sheet1"))

def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, options={}):
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}):
"""
Fill a sheet with the contents of a dictionary of DataFrames.
The keys of the dictionary are the names of the worksheets, and the values contain the data to be placed in the sheet.
Expand All @@ -230,8 +290,12 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, options={}):
:param sheet: the gspread.Spreadsheet object
:param df_dict: the dictionary of DataFrames to fill the worksheets with
:param overlapBehavior: the behavior to take if any of the worksheets already exist
:param options: the formatting options for the worksheets.
Should be a dictionary with optional elements "bold_header", "center_header", "freeze_header", and "column_widths", optional
:param sheet_formatting_options: the formatting options for the worksheets.
Should be a 2 level dictionary with outer keys being names of worksheets and inner keys being some of
"bold_header", "center_header", "freeze_header", and "column_widths", optional
:param column_formatting_options: the column formatting options for the worksheets.
Should be a 2 level dictionary with outer keys being names of worksheets and inner keys being column names.
The inner keys should be an instance of COLUMN_FORMATTING_OPTIONS, optional
"""
if overlapBehavior == WORKSHEET_OVERRIDE_BEHAVIORS.EXIT:
for worksheet_name in df_dict.keys():
Expand All @@ -241,5 +305,8 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, options={}):
except gspread.exceptions.WorksheetNotFound:
pass
for worksheet_name, df in df_dict.items():
fill_worksheet_with_df(sheet, df, worksheet_name, overlapBehavior, options=options)

fill_worksheet_with_df(
sheet, df, worksheet_name, overlapBehavior,
sheet_formatting_options=sheet_formatting_options.get(worksheet_name, DEFAULT_SHEET_FORMATTING_OPTIONS),
column_formatting_options=column_formatting_options.get(worksheet_name, {})
)
39 changes: 37 additions & 2 deletions analytics/analytics_package/analytics/sheets_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def get_rename_dict(dimensions):
zip([dimension["id"] for dimension in dimensions], [dimension["alias"] for dimension in dimensions])
)

def get_outbound_sheets_df(analytics_params):
def get_outbound_links_df(analytics_params):
"""
Get a DF with outbound links from the Analytics API. Merges the builtin and custom events for outbound links.
Expand Down Expand Up @@ -95,4 +95,39 @@ def get_outbound_sheets_df(analytics_params):
"hostname": "Hostname",
}
)[["Page Path", "Hostname", "Outbound Link", "Total Clicks", "Total Users"]]
return df_all_links.copy().reset_index(drop=True)

return df_all_links.copy().reset_index(drop=True)

def get_outbound_links_change(analytics_params, start_current, end_current, start_previous, end_previous):
"""
Get a DF with outbound links from the Analytics API and a comparison for the prior month
:param analytics_params: the parameters for the Analytics API, including authentication and property ids
:param start_current: the start date for the current month in the format "YYYY-MM-DD"
:param end_current: the end date for the current month
:param start_previous: the start date for the previous month
:param end_previous: the end date for the previous month
"""
analytics_params_month_1 = {
**analytics_params,
"start_date": start_current,
"end_date": end_current,
}
analytics_params_month_2 = {
**analytics_params,
"start_date": start_previous,
"end_date": end_previous,
}
print(analytics_params_month_2)
df_current = get_outbound_links_df(analytics_params_month_1).set_index(
["Page Path", "Outbound Link", "Hostname"]
)
df_previous = get_outbound_links_df(analytics_params_month_2).set_index(
["Page Path", "Outbound Link", "Hostname"]
)
combined_index = df_current.index.union(df_previous.index)
df_current_reindexed = df_current.reindex(combined_index).fillna(0)
df_previous_reindexed = df_previous.reindex(combined_index)
df_current_reindexed["Total Clicks Percent Change"] = (df_current_reindexed["Total Clicks"] / df_previous_reindexed["Total Clicks"]) - 1
df_current_reindexed["Total Users Percent Change"] = (df_current_reindexed["Total Users"] / df_previous_reindexed["Total Users"]) - 1
return df_current_reindexed.sort_values(["Total Clicks", "Total Users"], ascending=False, kind="stable").reset_index()

2 changes: 1 addition & 1 deletion analytics/analytics_package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="analytics",
version="3.1.0",
version="3.2.0",
packages=["analytics"],
install_requires=["matplotlib", "pandas", "numpy", "google-auth-oauthlib", "google-api-python-client", "gspread", "gspread-formatting"],
)

0 comments on commit eef1cee

Please sign in to comment.