Skip to content

Commit

Permalink
feat: added pageviews analytics functions (#4336) (#4339)
Browse files Browse the repository at this point in the history
* feat: added pageviews analytics functions (#4336)

* fix: updated outbound sheets link to breaking change in get_flat_data_df (#4336)

* chore: made analytics percent change calculation consistent in sheets-elements (#4336)

* chore: bumped ga package version (#4336)

* fix: no longer double adjusting for time period length (#4336)

* fix: corrected formula for changes (#4336)

* chore: remove logging (#4336)
  • Loading branch information
jpaten authored Jan 23, 2025
1 parent eef1cee commit 6d65fae
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 11 deletions.
1 change: 1 addition & 0 deletions analytics/analytics_package/analytics/fields.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Metric names
METRIC_EVENT_COUNT = 'eventCount'
METRIC_TOTAL_USERS = 'totalUsers'
METRIC_PAGE_VIEW = 'screenPageViews'

# DIMENSIONS
DIMENSION_PAGE_PATH = {
Expand Down
124 changes: 114 additions & 10 deletions analytics/analytics_package/analytics/sheets_elements.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import numpy as np
import pandas as pd
from .charts import get_data_df
from .fields import *
from urllib.parse import urlparse
import datetime as dt

def get_flat_data_df(analytics_params, metrics, dimensions, remove_matches=None):
def get_flat_data_df(metrics, dimensions, remove_matches=None, **other_params):
"""
Get a df from the Analytics API with a flat structure (no multiindex).
Expand All @@ -22,7 +24,7 @@ def get_flat_data_df(analytics_params, metrics, dimensions, remove_matches=None)
df = get_data_df(
metrics,
[dimension["id"] for dimension in dimensions],
**analytics_params,
**other_params,
)
if remove_matches is not None:
for i, match in enumerate([dimension["remove_matches"] for dimension in dimensions]):
Expand All @@ -46,20 +48,20 @@ def get_outbound_links_df(analytics_params):
pd.set_option('future.no_silent_downcasting', True)
# Get the builtin "Click" event
df_builtin_links = get_flat_data_df(
analytics_params,
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
[DIMENSION_PAGE_PATH, DIMENSION_BUILTIN_URL, DIMENSION_EVENT_NAME],
remove_matches=[None, r"\s*", None]
remove_matches=[None, r"\s*", None],
**analytics_params,
).groupby(
[DIMENSION_PAGE_PATH["alias"], DIMENSION_BUILTIN_URL["alias"]]
).sum().reset_index()

# Get the custom "outbound_link_click" event
df_custom_links = get_flat_data_df(
analytics_params,
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
[DIMENSION_EVENT_NAME, DIMENSION_CUSTOM_URL, DIMENSION_PAGE_PATH],
remove_matches=[DIMENSION_EVENT_NAME["remove_matches"], r"\(not set\)", None],
**analytics_params,
).groupby(
[DIMENSION_PAGE_PATH["alias"], DIMENSION_CUSTOM_URL["alias"]]
).sum().reset_index()
Expand Down Expand Up @@ -117,17 +119,119 @@ def get_outbound_links_change(analytics_params, start_current, end_current, star
"start_date": start_previous,
"end_date": end_previous,
}
print(analytics_params_month_2)
df_current = get_outbound_links_df(analytics_params_month_1).set_index(
["Page Path", "Outbound Link", "Hostname"]
)
df_previous = get_outbound_links_df(analytics_params_month_2).set_index(
["Page Path", "Outbound Link", "Hostname"]
)
total_clicks_percent_change = get_change(
df_current["Total Clicks"],
df_previous["Total Clicks"],
start_current,
end_current,
start_previous,
end_previous
)
total_users_percent_change = get_change(
df_current["Total Users"],
df_previous["Total Users"],
start_current,
end_current,
start_previous,
end_previous
)
df_reindexed = df_current.reindex(total_clicks_percent_change.index).fillna(0)
df_reindexed["Total Clicks Percent Change"] = total_clicks_percent_change
df_reindexed["Total Users Percent Change"] = total_users_percent_change
return df_reindexed.sort_values(["Total Clicks", "Total Users"], ascending=False, kind="stable").reset_index()

def get_page_views_df(analytics_params):
"""
Get a DF with page views from the Analytics API.
:param analytics_params: the parameters for the Analytics API, including authentication and property ids
:return: a DataFrame with the page views from the Analytics API
"""
df_response = get_flat_data_df(
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS, METRIC_PAGE_VIEW],
[DIMENSION_PAGE_PATH, DIMENSION_EVENT_NAME],
dimension_filter="eventName==page_view",
**analytics_params,
).rename(
columns={
DIMENSION_PAGE_PATH["alias"]: "Page Path",
METRIC_PAGE_VIEW: "Total Views",
METRIC_TOTAL_USERS: "Total Users",
}
)[["Page Path", "Total Views", "Total Users"]].copy()
return df_response

def get_page_views_change(analytics_params, start_current, end_current, start_previous, end_previous):
"""
Get a DF with page views from the Analytics API and a comparison for the prior month
:param analytics_params: the parameters for the Analytics API, including authentication and property ids
:param start_current: the start date for the current month in the format "YYYY-MM-DD"
:param end_current: the end date for the current month
:param start_previous: the start date for the previous month
:param end_previous: the end date for the previous month
"""
analytics_params_current = {
**analytics_params,
"start_date": start_current,
"end_date": end_current,
}
analytics_params_previous = {
**analytics_params,
"start_date": start_previous,
"end_date": end_previous,
}
df_current = get_page_views_df(analytics_params_current).set_index("Page Path")
df_previous = get_page_views_df(analytics_params_previous).set_index("Page Path")
combined_index = df_current.index.union(df_previous.index)
df_current_reindexed = df_current.reindex(combined_index).fillna(0)
df_previous_reindexed = df_previous.reindex(combined_index)
df_current_reindexed["Total Clicks Percent Change"] = (df_current_reindexed["Total Clicks"] / df_previous_reindexed["Total Clicks"]) - 1
df_current_reindexed["Total Users Percent Change"] = (df_current_reindexed["Total Users"] / df_previous_reindexed["Total Users"]) - 1
return df_current_reindexed.sort_values(["Total Clicks", "Total Users"], ascending=False, kind="stable").reset_index()

views_percent_change = get_change(
df_current_reindexed["Total Views"],
df_previous_reindexed["Total Views"],
start_current,
end_current,
start_previous,
end_previous,
)
users_percent_change = get_change(
df_current_reindexed["Total Users"],
df_previous_reindexed["Total Users"],
start_current,
end_current,
start_previous,
end_previous,
)
df_reindexed = df_current.reindex(views_percent_change.index).fillna(0)
df_reindexed["Total Views Percent Change"] = views_percent_change
df_reindexed["Total Users Percent Change"] = users_percent_change
return df_reindexed.sort_values(["Total Views", "Total Users"], ascending=False, kind="stable").reset_index()

def get_change(series_current, series_previous, start_current, end_current, start_previous, end_previous, combined_index = None):
"""
Get the percent change between two serieses, accounting for different numbers of days in the month.
:param series_current: the series representing the current month
:param series_previous: the series representing the prior month
:param start_current: the start date for the current month in the format "YYYY-MM-DD"
:param end_current: the end date for the current month
:param start_previous: the start date for the prior month
:param end_previous: the end date for the prior month
:return: a Series with the change between the two serieses
"""
# Check that both serieses have the same index names
assert series_current.index.names == series_previous.index.names
# Reindex both serieses to have the same index
combined_index = series_current.index.union(series_previous.index)
current_length = float((dt.datetime.fromisoformat(end_current) - dt.datetime.fromisoformat(start_current)).days + 1)
previous_length = float((dt.datetime.fromisoformat(end_previous) - dt.datetime.fromisoformat(start_previous)).days + 1)
assert current_length != 0 and previous_length != 0
series_current_reindexed = series_current.reindex(combined_index).fillna(0)
# Adjust the values from the prior series to account for the different number of days in the month
series_previous_reindexed = (series_previous.reindex(combined_index) * current_length / previous_length)
change = ((series_current_reindexed / series_previous_reindexed) - 1).replace({np.inf: np.nan})
return change
2 changes: 1 addition & 1 deletion analytics/analytics_package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="analytics",
version="3.2.0",
version="3.3.0",
packages=["analytics"],
install_requires=["matplotlib", "pandas", "numpy", "google-auth-oauthlib", "google-api-python-client", "gspread", "gspread-formatting"],
)

0 comments on commit 6d65fae

Please sign in to comment.