From 1aec654b188fb8973e8b154c37d078606692e8b3 Mon Sep 17 00:00:00 2001 From: jpaten Date: Fri, 24 Jan 2025 15:28:24 -0800 Subject: [PATCH 1/5] fix: renamed rowsCount to rowCount (#4351) --- analytics/analytics_package/analytics/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analytics/analytics_package/analytics/api.py b/analytics/analytics_package/analytics/api.py index dd3f459ec..a358da933 100644 --- a/analytics/analytics_package/analytics/api.py +++ b/analytics/analytics_package/analytics/api.py @@ -160,7 +160,7 @@ def get_metrics_by_dimensions_v4_style(service, metrics, dimensions, property, s while has_more: result = service.properties().runReport(property=property, body=params).execute() if rows_left is None: - rows_left = result.get("rowsCount", 0) + rows_left = result.get("rowCount", 0) page_row_count = len(result["rows"]) if "rows" in result else 0 has_more = page_row_count > 0 if has_more: @@ -172,7 +172,7 @@ def get_metrics_by_dimensions_v4_style(service, metrics, dimensions, property, s offset += max_results params["offset"] = offset - df = v4_results_to_df(results, dimensions, metrics) + df = v4_results_to_df(results, dimensions, metrics) return df From 372145c4f11306d30a7b8f4f173a721413b8c9e1 Mon Sep 17 00:00:00 2001 From: jpaten Date: Fri, 24 Jan 2025 15:33:45 -0800 Subject: [PATCH 2/5] fix: switched ga result filtering from results to dimension filters (#4351) --- .../analytics_package/analytics/fields.py | 9 +++++---- .../analytics/sheets_elements.py | 20 ++++++++----------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/analytics/analytics_package/analytics/fields.py b/analytics/analytics_package/analytics/fields.py index 8498e6600..3b31a5272 100644 --- a/analytics/analytics_package/analytics/fields.py +++ b/analytics/analytics_package/analytics/fields.py @@ -3,24 +3,25 @@ METRIC_TOTAL_USERS = 'totalUsers' METRIC_PAGE_VIEW = 'screenPageViews' +# Event Names +EVENT_BUILTIN_CLICK = "click" +EVENT_CUSTOM_CLICK = "outbound_link_clicked" +EVENT_PAGE_VIEW = "page_view" + # DIMENSIONS DIMENSION_PAGE_PATH = { 'id': 'pagePath', 'alias': 'page_path', - 'remove_matches': None, } DIMENSION_BUILTIN_URL = { 'id': 'linkUrl', 'alias': 'builtin_url', - 'remove_matches': r"\s*", } DIMENSION_EVENT_NAME = { 'id': 'eventName', 'alias': 'event_name', - 'remove_matches': None, } DIMENSION_CUSTOM_URL = { 'id': 'customEvent:click_url', 'alias': 'outbound_url', - 'remove_matches': r"\(not set\)", } diff --git a/analytics/analytics_package/analytics/sheets_elements.py b/analytics/analytics_package/analytics/sheets_elements.py index b35fd9e77..98ad9df10 100644 --- a/analytics/analytics_package/analytics/sheets_elements.py +++ b/analytics/analytics_package/analytics/sheets_elements.py @@ -5,7 +5,7 @@ from urllib.parse import urlparse import datetime as dt -def get_flat_data_df(metrics, dimensions, remove_matches=None, **other_params): +def get_flat_data_df(metrics, dimensions, **other_params): """ Get a df from the Analytics API with a flat structure (no multiindex). @@ -18,18 +18,11 @@ def get_flat_data_df(metrics, dimensions, remove_matches=None, **other_params): :return: a DataFrame with the data from the Analytics API """ - if remove_matches is not None: - assert len(remove_matches) == len(dimensions) - df = get_data_df( metrics, [dimension["id"] for dimension in dimensions], **other_params, ) - if remove_matches is not None: - for i, match in enumerate([dimension["remove_matches"] for dimension in dimensions]): - if match is not None: - df = df.loc[~df.index.get_level_values(i).str.fullmatch(match)] return df.reset_index().rename(columns=get_rename_dict(dimensions)).copy() def get_rename_dict(dimensions): @@ -41,30 +34,33 @@ def get_rename_dict(dimensions): def get_outbound_links_df(analytics_params): """ Get a DF with outbound links from the Analytics API. Merges the builtin and custom events for outbound links. + analytics_params cannot currently include a dimension_filter :param analytics_params: the parameters for the Analytics API, including authentication and property ids :return: a DataFrame with the outbound links from the Analytics API """ pd.set_option('future.no_silent_downcasting', True) + assert "dimension_filter" not in analytics_params # Get the builtin "Click" event df_builtin_links = get_flat_data_df( [METRIC_EVENT_COUNT, METRIC_TOTAL_USERS], [DIMENSION_PAGE_PATH, DIMENSION_BUILTIN_URL, DIMENSION_EVENT_NAME], - remove_matches=[None, r"\s*", None], + dimension_filter=f"eventName=={EVENT_BUILTIN_CLICK}", **analytics_params, ).groupby( [DIMENSION_PAGE_PATH["alias"], DIMENSION_BUILTIN_URL["alias"]] ).sum().reset_index() - + df_builtin_links.sort_values(METRIC_EVENT_COUNT, ascending=False).to_csv("test_builtin_links.csv") # Get the custom "outbound_link_click" event df_custom_links = get_flat_data_df( [METRIC_EVENT_COUNT, METRIC_TOTAL_USERS], [DIMENSION_EVENT_NAME, DIMENSION_CUSTOM_URL, DIMENSION_PAGE_PATH], - remove_matches=[DIMENSION_EVENT_NAME["remove_matches"], r"\(not set\)", None], + dimension_filter=f"eventName=={EVENT_CUSTOM_CLICK}", **analytics_params, ).groupby( [DIMENSION_PAGE_PATH["alias"], DIMENSION_CUSTOM_URL["alias"]] ).sum().reset_index() + df_custom_links.sort_values(METRIC_EVENT_COUNT, ascending=False).to_csv("test_custom_links.csv") # Concatenate the two dataframes, avoiding duplicates # Keep the link from the builtin event, unless the link contains a #fragment, in which case keep the link from the custom event df_builtin_links["builtin"] = True @@ -156,7 +152,7 @@ def get_page_views_df(analytics_params): df_response = get_flat_data_df( [METRIC_EVENT_COUNT, METRIC_TOTAL_USERS, METRIC_PAGE_VIEW], [DIMENSION_PAGE_PATH, DIMENSION_EVENT_NAME], - dimension_filter="eventName==page_view", + dimension_filter=f"eventName=={EVENT_PAGE_VIEW}", **analytics_params, ).rename( columns={ From 9bbc8501c78bbfef0893991bd87c3e812395dbb2 Mon Sep 17 00:00:00 2001 From: jpaten Date: Fri, 24 Jan 2025 15:37:28 -0800 Subject: [PATCH 3/5] chore: bumped analytics package version (#4351) --- analytics/analytics_package/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analytics/analytics_package/setup.py b/analytics/analytics_package/setup.py index 876189326..80cf2b71d 100644 --- a/analytics/analytics_package/setup.py +++ b/analytics/analytics_package/setup.py @@ -2,7 +2,7 @@ setup( name="analytics", - version="3.3.0", + version="3.3.1", packages=["analytics"], install_requires=["matplotlib", "pandas", "numpy", "google-auth-oauthlib", "google-api-python-client", "gspread", "gspread-formatting"], ) \ No newline at end of file From 5e6c0b9ac0b296727702468644a0a52baa00be04 Mon Sep 17 00:00:00 2001 From: jpaten Date: Fri, 24 Jan 2025 15:41:59 -0800 Subject: [PATCH 4/5] chore: updated analytics package docstrings (#4351) --- analytics/analytics_package/analytics/sheets_elements.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/analytics/analytics_package/analytics/sheets_elements.py b/analytics/analytics_package/analytics/sheets_elements.py index 98ad9df10..52a5fa54f 100644 --- a/analytics/analytics_package/analytics/sheets_elements.py +++ b/analytics/analytics_package/analytics/sheets_elements.py @@ -12,10 +12,6 @@ def get_flat_data_df(metrics, dimensions, **other_params): :param analytics_params: the parameters for the Analytics API, including authentication and property ids :param metrics: the metrics to get :param dimensions: the dimensions to get - :param remove_matches: a list of regex patterns or None elements to remove from each dimension. - Each regex or None element should correspond with an element of dimensions and remove_matches must be the same length as dimensions. - If the value is None, no patterns are removed, defaults to None. - :return: a DataFrame with the data from the Analytics API """ df = get_data_df( @@ -149,11 +145,12 @@ def get_page_views_df(analytics_params): :param analytics_params: the parameters for the Analytics API, including authentication and property ids :return: a DataFrame with the page views from the Analytics API """ + assert "dimension_filter" not in analytics_params df_response = get_flat_data_df( [METRIC_EVENT_COUNT, METRIC_TOTAL_USERS, METRIC_PAGE_VIEW], [DIMENSION_PAGE_PATH, DIMENSION_EVENT_NAME], - dimension_filter=f"eventName=={EVENT_PAGE_VIEW}", **analytics_params, + dimension_filter=f"eventName=={EVENT_PAGE_VIEW}", ).rename( columns={ DIMENSION_PAGE_PATH["alias"]: "Page Path", From a8208c2af26b27dae34340986fa1798d81e54fbd Mon Sep 17 00:00:00 2001 From: hunterckx <118154470+hunterckx@users.noreply.github.com> Date: Fri, 24 Jan 2025 16:53:13 -0800 Subject: [PATCH 5/5] chore: remove test csv outputs (#4351) --- analytics/analytics_package/analytics/sheets_elements.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/analytics/analytics_package/analytics/sheets_elements.py b/analytics/analytics_package/analytics/sheets_elements.py index 52a5fa54f..5a99e6d12 100644 --- a/analytics/analytics_package/analytics/sheets_elements.py +++ b/analytics/analytics_package/analytics/sheets_elements.py @@ -46,7 +46,6 @@ def get_outbound_links_df(analytics_params): ).groupby( [DIMENSION_PAGE_PATH["alias"], DIMENSION_BUILTIN_URL["alias"]] ).sum().reset_index() - df_builtin_links.sort_values(METRIC_EVENT_COUNT, ascending=False).to_csv("test_builtin_links.csv") # Get the custom "outbound_link_click" event df_custom_links = get_flat_data_df( [METRIC_EVENT_COUNT, METRIC_TOTAL_USERS], @@ -56,7 +55,6 @@ def get_outbound_links_df(analytics_params): ).groupby( [DIMENSION_PAGE_PATH["alias"], DIMENSION_CUSTOM_URL["alias"]] ).sum().reset_index() - df_custom_links.sort_values(METRIC_EVENT_COUNT, ascending=False).to_csv("test_custom_links.csv") # Concatenate the two dataframes, avoiding duplicates # Keep the link from the builtin event, unless the link contains a #fragment, in which case keep the link from the custom event df_builtin_links["builtin"] = True