Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add percent change to analytics package #4335

Merged
merged 3 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 93 additions & 26 deletions analytics/analytics_package/analytics/sheets_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
import gspread_formatting
from enum import Enum
from googleapiclient.discovery import build
import numpy as np

FONT_SIZE_PTS = 10
PTS_PIXELS_RATIO = 4/3
DEFAULT_BUFFER_CHARS = 2
GREEN_COLOR = "#00FF00"
RED_COLOR = "#FF0000"


class FILE_OVERRIDE_BEHAVIORS(Enum):
OVERRIDE_IF_IN_SAME_PLACE = 1
Expand All @@ -12,9 +20,17 @@ class WORKSHEET_OVERRIDE_BEHAVIORS(Enum):
OVERRIDE = 1
EXIT = 2

FONT_SIZE_PTS = 10
PTS_PIXELS_RATIO = 4/3
DEFAULT_BUFFER_CHARS = 2
class COLUMN_FORMAT_OPTIONS(Enum):
DEFAULT = 1
PERCENT_UNCOLORED = 2
PERCENT_COLORED = 3

DEFAULT_SHEET_FORMATTING_OPTIONS = {
"bold_header": True,
"center_header": True,
"freeze_header": True,
"column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS}
}

def extract_credentials(authentication_response):
"""Extracts the credentials from the tuple from api.authenticate"""
Expand Down Expand Up @@ -141,17 +157,14 @@ def create_sheet_in_folder(authentication_response, sheet_name, parent_folder_na
# Open new file
return gc.open_by_key(spread_id)


def fill_worksheet_with_df(
sheet,
df,
worksheet_name,
overlapBehavior,
options={
"bold_header": True,
"center_header": True,
"freeze_header": True,
"column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS}
}
sheet_formatting_options=DEFAULT_SHEET_FORMATTING_OPTIONS,
column_formatting_options={}
):
"""
Fill a worksheet with the contents of a DataFrame.
Expand All @@ -162,8 +175,10 @@ def fill_worksheet_with_df(
:param df: the DataFrame to fill the worksheet with
:param worksheet_name: the name of the worksheet to fill. Cannot be "Sheet1"
:param overlapBehavior: the behavior to take if the worksheet already exists.
:param options: the formatting options for the worksheet.
:param sheet_formatting_options: the formatting options for the worksheet.
Should be a dictionary with optional elements "bold_header", "center_header", "freeze_header", and "column_widths", optional
:param column_formatting_options: the column formatting options for the worksheet.
Should be a dictionary with dataframe columns as keys and instances of COLUMN_FORMAT_OPTIONS as values, optional
"""
# Sheet1 is special since it's created by default, so it's not allowed
assert worksheet_name != "Sheet1"
Expand All @@ -179,19 +194,19 @@ def fill_worksheet_with_df(
)

# Add data to worksheet
worksheet.update([df.columns.values.tolist()] + df.values.tolist())
worksheet.update([df.columns.values.tolist()] + df.fillna("NA").values.tolist())

# Format worksheet
# Justify Column Widths
if "column_widths" not in options or options["column_widths"]["justify"]:
if "column_widths" not in sheet_formatting_options or sheet_formatting_options["column_widths"]["justify"]:
text_widths = df.astype(str).columns.map(
lambda column_name: df[column_name].astype(str).str.len().max()
)
header_widths = df.columns.str.len()
buffer_chars = (
DEFAULT_BUFFER_CHARS
if ("column_widths" not in options or "buffer_chars" not in options["column_widths"])
else options["column_widths"]["buffer_chars"]
if ("column_widths" not in sheet_formatting_options or "buffer_chars" not in sheet_formatting_options["column_widths"])
else sheet_formatting_options["column_widths"]["buffer_chars"]
)
column_widths = [
round((max(len_tuple) + buffer_chars) * FONT_SIZE_PTS * 1/PTS_PIXELS_RATIO)
Expand All @@ -202,26 +217,71 @@ def fill_worksheet_with_df(
]
gspread_formatting.set_column_widths(worksheet, zip(column_positions, column_widths))
# Freeze Header
if "freeze_header" not in options or options["freeze_header"]:
if "freeze_header" not in sheet_formatting_options or sheet_formatting_options["freeze_header"]:
gspread_formatting.set_frozen(worksheet, rows=1)
format_options = gspread_formatting.CellFormat()
base_format_options = gspread_formatting.CellFormat()
# Bold Header
if "bold_header" not in options or options["bold_header"]:
format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True))
if "bold_header" not in sheet_formatting_options or sheet_formatting_options["bold_header"]:
base_format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True))
# Center Header
if "center_header" not in options or options["center_header"]:
format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER")
if "center_header" not in sheet_formatting_options or sheet_formatting_options["center_header"]:
base_format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER")
# Handle column specific formatting
for column in column_formatting_options:
if column not in df.columns:
raise KeyError("Formatting column is not in the dataframe")
# Skip if the column is set to default
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.DEFAULT:
continue
# Get the column position
column_position_numeric = df.columns.get_loc(column) + 1
column_range_top = gspread.utils.rowcol_to_a1(1, column_position_numeric)
column_range_bottom = gspread.utils.rowcol_to_a1(df.index.size + 1, column_position_numeric)
column_range = f"{column_range_top}:{column_range_bottom}"
column_worksheet_range = gspread_formatting.GridRange.from_a1_range(column_range, worksheet)
# Get conditional formatting rules
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.PERCENT_COLORED:
green_rule = gspread_formatting.ConditionalFormatRule(
ranges=[column_worksheet_range],
booleanRule=gspread_formatting.BooleanRule(
condition=gspread_formatting.BooleanCondition('NUMBER_GREATER_THAN_EQ', ['0']),
format=gspread_formatting.CellFormat(
textFormat=gspread_formatting.TextFormat(foregroundColor=gspread_formatting.Color(0,1,0)))
)
)
red_rule = gspread_formatting.ConditionalFormatRule(
ranges=[column_worksheet_range],
booleanRule=gspread_formatting.BooleanRule(
condition=gspread_formatting.BooleanCondition('NUMBER_LESS_THAN_EQ', ['0']),
format=gspread_formatting.CellFormat(
textFormat=gspread_formatting.TextFormat(foregroundColor=gspread_formatting.Color(1,0,0)))
)
)
# Apply conditional formatting rules
conditional_formatting_rules = gspread_formatting.get_conditional_format_rules(worksheet)
conditional_formatting_rules.append(green_rule)
conditional_formatting_rules.append(red_rule)
conditional_formatting_rules.save()
if column_formatting_options[column] in (COLUMN_FORMAT_OPTIONS.PERCENT_COLORED, COLUMN_FORMAT_OPTIONS.PERCENT_UNCOLORED):
# Apply percent format rule
gspread_formatting.format_cell_range(
worksheet,
column_range,
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='PERCENT', pattern='0.0%'))
)

# Apply base formatting options
gspread_formatting.format_cell_range(
worksheet,
f"A1:{gspread.utils.rowcol_to_a1(1, len(df.columns))}",
format_options
base_format_options
)

# Delete Sheet1 if it has been created by default
if "Sheet1" in [i.title for i in sheet.worksheets()]:
sheet.del_worksheet(sheet.worksheet("Sheet1"))

def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, options={}):
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}):
"""
Fill a sheet with the contents of a dictionary of DataFrames.
The keys of the dictionary are the names of the worksheets, and the values contain the data to be placed in the sheet.
Expand All @@ -230,8 +290,12 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, options={}):
:param sheet: the gspread.Spreadsheet object
:param df_dict: the dictionary of DataFrames to fill the worksheets with
:param overlapBehavior: the behavior to take if any of the worksheets already exist
:param options: the formatting options for the worksheets.
Should be a dictionary with optional elements "bold_header", "center_header", "freeze_header", and "column_widths", optional
:param sheet_formatting_options: the formatting options for the worksheets.
Should be a 2 level dictionary with outer keys being names of worksheets and inner keys being some of
"bold_header", "center_header", "freeze_header", and "column_widths", optional
:param column_formatting_options: the column formatting options for the worksheets.
Should be a 2 level dictionary with outer keys being names of worksheets and inner keys being column names.
The inner keys should be an instance of COLUMN_FORMATTING_OPTIONS, optional
"""
if overlapBehavior == WORKSHEET_OVERRIDE_BEHAVIORS.EXIT:
for worksheet_name in df_dict.keys():
Expand All @@ -241,5 +305,8 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, options={}):
except gspread.exceptions.WorksheetNotFound:
pass
for worksheet_name, df in df_dict.items():
fill_worksheet_with_df(sheet, df, worksheet_name, overlapBehavior, options=options)

fill_worksheet_with_df(
sheet, df, worksheet_name, overlapBehavior,
sheet_formatting_options=sheet_formatting_options.get(worksheet_name, DEFAULT_SHEET_FORMATTING_OPTIONS),
column_formatting_options=column_formatting_options.get(worksheet_name, {})
)
39 changes: 37 additions & 2 deletions analytics/analytics_package/analytics/sheets_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def get_rename_dict(dimensions):
zip([dimension["id"] for dimension in dimensions], [dimension["alias"] for dimension in dimensions])
)

def get_outbound_sheets_df(analytics_params):
def get_outbound_links_df(analytics_params):
"""
Get a DF with outbound links from the Analytics API. Merges the builtin and custom events for outbound links.

Expand Down Expand Up @@ -95,4 +95,39 @@ def get_outbound_sheets_df(analytics_params):
"hostname": "Hostname",
}
)[["Page Path", "Hostname", "Outbound Link", "Total Clicks", "Total Users"]]
return df_all_links.copy().reset_index(drop=True)

return df_all_links.copy().reset_index(drop=True)

def get_outbound_links_change(analytics_params, start_current, end_current, start_previous, end_previous):
"""
Get a DF with outbound links from the Analytics API and a comparison for the prior month
:param analytics_params: the parameters for the Analytics API, including authentication and property ids
:param start_current: the start date for the current month in the format "YYYY-MM-DD"
:param end_current: the end date for the current month
:param start_previous: the start date for the previous month
:param end_previous: the end date for the previous month
"""
analytics_params_month_1 = {
**analytics_params,
"start_date": start_current,
"end_date": end_current,
}
analytics_params_month_2 = {
**analytics_params,
"start_date": start_previous,
"end_date": end_previous,
}
print(analytics_params_month_2)
df_current = get_outbound_links_df(analytics_params_month_1).set_index(
["Page Path", "Outbound Link", "Hostname"]
)
df_previous = get_outbound_links_df(analytics_params_month_2).set_index(
["Page Path", "Outbound Link", "Hostname"]
)
combined_index = df_current.index.union(df_previous.index)
df_current_reindexed = df_current.reindex(combined_index).fillna(0)
df_previous_reindexed = df_previous.reindex(combined_index)
df_current_reindexed["Total Clicks Percent Change"] = (df_current_reindexed["Total Clicks"] / df_previous_reindexed["Total Clicks"]) - 1
df_current_reindexed["Total Users Percent Change"] = (df_current_reindexed["Total Users"] / df_previous_reindexed["Total Users"]) - 1
return df_current_reindexed.sort_values(["Total Clicks", "Total Users"], ascending=False, kind="stable").reset_index()

2 changes: 1 addition & 1 deletion analytics/analytics_package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="analytics",
version="3.1.0",
version="3.2.0",
packages=["analytics"],
install_requires=["matplotlib", "pandas", "numpy", "google-auth-oauthlib", "google-api-python-client", "gspread", "gspread-formatting"],
)
Loading