Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added pageviews analytics functions (#4336) #4339

Merged
merged 7 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions analytics/analytics_package/analytics/fields.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Metric names
METRIC_EVENT_COUNT = 'eventCount'
METRIC_TOTAL_USERS = 'totalUsers'
METRIC_PAGE_VIEW = 'screenPageViews'

# DIMENSIONS
DIMENSION_PAGE_PATH = {
Expand Down
124 changes: 114 additions & 10 deletions analytics/analytics_package/analytics/sheets_elements.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import numpy as np
import pandas as pd
from .charts import get_data_df
from .fields import *
from urllib.parse import urlparse
import datetime as dt

def get_flat_data_df(analytics_params, metrics, dimensions, remove_matches=None):
def get_flat_data_df(metrics, dimensions, remove_matches=None, **other_params):
"""
Get a df from the Analytics API with a flat structure (no multiindex).

Expand All @@ -22,7 +24,7 @@ def get_flat_data_df(analytics_params, metrics, dimensions, remove_matches=None)
df = get_data_df(
metrics,
[dimension["id"] for dimension in dimensions],
**analytics_params,
**other_params,
)
if remove_matches is not None:
for i, match in enumerate([dimension["remove_matches"] for dimension in dimensions]):
Expand All @@ -46,20 +48,20 @@ def get_outbound_links_df(analytics_params):
pd.set_option('future.no_silent_downcasting', True)
# Get the builtin "Click" event
df_builtin_links = get_flat_data_df(
analytics_params,
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
[DIMENSION_PAGE_PATH, DIMENSION_BUILTIN_URL, DIMENSION_EVENT_NAME],
remove_matches=[None, r"\s*", None]
remove_matches=[None, r"\s*", None],
**analytics_params,
).groupby(
[DIMENSION_PAGE_PATH["alias"], DIMENSION_BUILTIN_URL["alias"]]
).sum().reset_index()

# Get the custom "outbound_link_click" event
df_custom_links = get_flat_data_df(
analytics_params,
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS],
[DIMENSION_EVENT_NAME, DIMENSION_CUSTOM_URL, DIMENSION_PAGE_PATH],
remove_matches=[DIMENSION_EVENT_NAME["remove_matches"], r"\(not set\)", None],
**analytics_params,
).groupby(
[DIMENSION_PAGE_PATH["alias"], DIMENSION_CUSTOM_URL["alias"]]
).sum().reset_index()
Expand Down Expand Up @@ -117,17 +119,119 @@ def get_outbound_links_change(analytics_params, start_current, end_current, star
"start_date": start_previous,
"end_date": end_previous,
}
print(analytics_params_month_2)
df_current = get_outbound_links_df(analytics_params_month_1).set_index(
["Page Path", "Outbound Link", "Hostname"]
)
df_previous = get_outbound_links_df(analytics_params_month_2).set_index(
["Page Path", "Outbound Link", "Hostname"]
)
total_clicks_percent_change = get_change(
df_current["Total Clicks"],
df_previous["Total Clicks"],
start_current,
end_current,
start_previous,
end_previous
)
total_users_percent_change = get_change(
df_current["Total Users"],
df_previous["Total Users"],
start_current,
end_current,
start_previous,
end_previous
)
df_reindexed = df_current.reindex(total_clicks_percent_change.index).fillna(0)
df_reindexed["Total Clicks Percent Change"] = total_clicks_percent_change
df_reindexed["Total Users Percent Change"] = total_users_percent_change
return df_reindexed.sort_values(["Total Clicks", "Total Users"], ascending=False, kind="stable").reset_index()

def get_page_views_df(analytics_params):
"""
Get a DF with page views from the Analytics API.

:param analytics_params: the parameters for the Analytics API, including authentication and property ids
:return: a DataFrame with the page views from the Analytics API
"""
df_response = get_flat_data_df(
[METRIC_EVENT_COUNT, METRIC_TOTAL_USERS, METRIC_PAGE_VIEW],
[DIMENSION_PAGE_PATH, DIMENSION_EVENT_NAME],
dimension_filter="eventName==page_view",
**analytics_params,
).rename(
columns={
DIMENSION_PAGE_PATH["alias"]: "Page Path",
METRIC_PAGE_VIEW: "Total Views",
METRIC_TOTAL_USERS: "Total Users",
}
)[["Page Path", "Total Views", "Total Users"]].copy()
return df_response

def get_page_views_change(analytics_params, start_current, end_current, start_previous, end_previous):
"""
Get a DF with page views from the Analytics API and a comparison for the prior month
:param analytics_params: the parameters for the Analytics API, including authentication and property ids
:param start_current: the start date for the current month in the format "YYYY-MM-DD"
:param end_current: the end date for the current month
:param start_previous: the start date for the previous month
:param end_previous: the end date for the previous month
"""
analytics_params_current = {
**analytics_params,
"start_date": start_current,
"end_date": end_current,
}
analytics_params_previous = {
**analytics_params,
"start_date": start_previous,
"end_date": end_previous,
}
df_current = get_page_views_df(analytics_params_current).set_index("Page Path")
df_previous = get_page_views_df(analytics_params_previous).set_index("Page Path")
combined_index = df_current.index.union(df_previous.index)
df_current_reindexed = df_current.reindex(combined_index).fillna(0)
df_previous_reindexed = df_previous.reindex(combined_index)
df_current_reindexed["Total Clicks Percent Change"] = (df_current_reindexed["Total Clicks"] / df_previous_reindexed["Total Clicks"]) - 1
df_current_reindexed["Total Users Percent Change"] = (df_current_reindexed["Total Users"] / df_previous_reindexed["Total Users"]) - 1
return df_current_reindexed.sort_values(["Total Clicks", "Total Users"], ascending=False, kind="stable").reset_index()

views_percent_change = get_change(
df_current_reindexed["Total Views"],
df_previous_reindexed["Total Views"],
start_current,
end_current,
start_previous,
end_previous,
)
users_percent_change = get_change(
df_current_reindexed["Total Users"],
df_previous_reindexed["Total Users"],
start_current,
end_current,
start_previous,
end_previous,
)
df_reindexed = df_current.reindex(views_percent_change.index).fillna(0)
df_reindexed["Total Views Percent Change"] = views_percent_change
df_reindexed["Total Users Percent Change"] = users_percent_change
return df_reindexed.sort_values(["Total Views", "Total Users"], ascending=False, kind="stable").reset_index()

def get_change(series_current, series_previous, start_current, end_current, start_previous, end_previous, combined_index = None):
"""
Get the percent change between two serieses, accounting for different numbers of days in the month.
:param series_current: the series representing the current month
:param series_previous: the series representing the prior month
:param start_current: the start date for the current month in the format "YYYY-MM-DD"
:param end_current: the end date for the current month
:param start_previous: the start date for the prior month
:param end_previous: the end date for the prior month
:return: a Series with the change between the two serieses
"""
# Check that both serieses have the same index names
assert series_current.index.names == series_previous.index.names
# Reindex both serieses to have the same index
combined_index = series_current.index.union(series_previous.index)
current_length = float((dt.datetime.fromisoformat(end_current) - dt.datetime.fromisoformat(start_current)).days + 1)
previous_length = float((dt.datetime.fromisoformat(end_previous) - dt.datetime.fromisoformat(start_previous)).days + 1)
assert current_length != 0 and previous_length != 0
series_current_reindexed = series_current.reindex(combined_index).fillna(0)
# Adjust the values from the prior series to account for the different number of days in the month
series_previous_reindexed = (series_previous.reindex(combined_index) * current_length / previous_length)
change = ((series_current_reindexed / series_previous_reindexed) - 1).replace({np.inf: np.nan})
return change
2 changes: 1 addition & 1 deletion analytics/analytics_package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="analytics",
version="3.2.0",
version="3.3.0",
packages=["analytics"],
install_requires=["matplotlib", "pandas", "numpy", "google-auth-oauthlib", "google-api-python-client", "gspread", "gspread-formatting"],
)
Loading