diff --git a/analytics/analytics_package/analytics/sheets_api.py b/analytics/analytics_package/analytics/sheets_api.py index 9dfbeef64..dffca9701 100644 --- a/analytics/analytics_package/analytics/sheets_api.py +++ b/analytics/analytics_package/analytics/sheets_api.py @@ -2,6 +2,14 @@ import gspread_formatting from enum import Enum from googleapiclient.discovery import build +import numpy as np + +FONT_SIZE_PTS = 10 +PTS_PIXELS_RATIO = 4/3 +DEFAULT_BUFFER_CHARS = 2 +GREEN_COLOR = "#00FF00" +RED_COLOR = "#FF0000" + class FILE_OVERRIDE_BEHAVIORS(Enum): OVERRIDE_IF_IN_SAME_PLACE = 1 @@ -12,9 +20,17 @@ class WORKSHEET_OVERRIDE_BEHAVIORS(Enum): OVERRIDE = 1 EXIT = 2 -FONT_SIZE_PTS = 10 -PTS_PIXELS_RATIO = 4/3 -DEFAULT_BUFFER_CHARS = 2 +class COLUMN_FORMAT_OPTIONS(Enum): + DEFAULT = 1 + PERCENT_UNCOLORED = 2 + PERCENT_COLORED = 3 + +DEFAULT_SHEET_FORMATTING_OPTIONS = { + "bold_header": True, + "center_header": True, + "freeze_header": True, + "column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS} +} def extract_credentials(authentication_response): """Extracts the credentials from the tuple from api.authenticate""" @@ -141,17 +157,14 @@ def create_sheet_in_folder(authentication_response, sheet_name, parent_folder_na # Open new file return gc.open_by_key(spread_id) + def fill_worksheet_with_df( sheet, df, worksheet_name, overlapBehavior, - options={ - "bold_header": True, - "center_header": True, - "freeze_header": True, - "column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS} - } + sheet_formatting_options=DEFAULT_SHEET_FORMATTING_OPTIONS, + column_formatting_options={} ): """ Fill a worksheet with the contents of a DataFrame. @@ -162,8 +175,10 @@ def fill_worksheet_with_df( :param df: the DataFrame to fill the worksheet with :param worksheet_name: the name of the worksheet to fill. Cannot be "Sheet1" :param overlapBehavior: the behavior to take if the worksheet already exists. - :param options: the formatting options for the worksheet. + :param sheet_formatting_options: the formatting options for the worksheet. Should be a dictionary with optional elements "bold_header", "center_header", "freeze_header", and "column_widths", optional + :param column_formatting_options: the column formatting options for the worksheet. + Should be a dictionary with dataframe columns as keys and instances of COLUMN_FORMAT_OPTIONS as values, optional """ # Sheet1 is special since it's created by default, so it's not allowed assert worksheet_name != "Sheet1" @@ -179,19 +194,19 @@ def fill_worksheet_with_df( ) # Add data to worksheet - worksheet.update([df.columns.values.tolist()] + df.values.tolist()) + worksheet.update([df.columns.values.tolist()] + df.fillna("NA").values.tolist()) # Format worksheet # Justify Column Widths - if "column_widths" not in options or options["column_widths"]["justify"]: + if "column_widths" not in sheet_formatting_options or sheet_formatting_options["column_widths"]["justify"]: text_widths = df.astype(str).columns.map( lambda column_name: df[column_name].astype(str).str.len().max() ) header_widths = df.columns.str.len() buffer_chars = ( DEFAULT_BUFFER_CHARS - if ("column_widths" not in options or "buffer_chars" not in options["column_widths"]) - else options["column_widths"]["buffer_chars"] + if ("column_widths" not in sheet_formatting_options or "buffer_chars" not in sheet_formatting_options["column_widths"]) + else sheet_formatting_options["column_widths"]["buffer_chars"] ) column_widths = [ round((max(len_tuple) + buffer_chars) * FONT_SIZE_PTS * 1/PTS_PIXELS_RATIO) @@ -202,26 +217,71 @@ def fill_worksheet_with_df( ] gspread_formatting.set_column_widths(worksheet, zip(column_positions, column_widths)) # Freeze Header - if "freeze_header" not in options or options["freeze_header"]: + if "freeze_header" not in sheet_formatting_options or sheet_formatting_options["freeze_header"]: gspread_formatting.set_frozen(worksheet, rows=1) - format_options = gspread_formatting.CellFormat() + base_format_options = gspread_formatting.CellFormat() # Bold Header - if "bold_header" not in options or options["bold_header"]: - format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True)) + if "bold_header" not in sheet_formatting_options or sheet_formatting_options["bold_header"]: + base_format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True)) # Center Header - if "center_header" not in options or options["center_header"]: - format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER") + if "center_header" not in sheet_formatting_options or sheet_formatting_options["center_header"]: + base_format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER") + # Handle column specific formatting + for column in column_formatting_options: + if column not in df.columns: + raise KeyError("Formatting column is not in the dataframe") + # Skip if the column is set to default + if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.DEFAULT: + continue + # Get the column position + column_position_numeric = df.columns.get_loc(column) + 1 + column_range_top = gspread.utils.rowcol_to_a1(1, column_position_numeric) + column_range_bottom = gspread.utils.rowcol_to_a1(df.index.size + 1, column_position_numeric) + column_range = f"{column_range_top}:{column_range_bottom}" + column_worksheet_range = gspread_formatting.GridRange.from_a1_range(column_range, worksheet) + # Get conditional formatting rules + if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.PERCENT_COLORED: + green_rule = gspread_formatting.ConditionalFormatRule( + ranges=[column_worksheet_range], + booleanRule=gspread_formatting.BooleanRule( + condition=gspread_formatting.BooleanCondition('NUMBER_GREATER_THAN_EQ', ['0']), + format=gspread_formatting.CellFormat( + textFormat=gspread_formatting.TextFormat(foregroundColor=gspread_formatting.Color(0,1,0))) + ) + ) + red_rule = gspread_formatting.ConditionalFormatRule( + ranges=[column_worksheet_range], + booleanRule=gspread_formatting.BooleanRule( + condition=gspread_formatting.BooleanCondition('NUMBER_LESS_THAN_EQ', ['0']), + format=gspread_formatting.CellFormat( + textFormat=gspread_formatting.TextFormat(foregroundColor=gspread_formatting.Color(1,0,0))) + ) + ) + # Apply conditional formatting rules + conditional_formatting_rules = gspread_formatting.get_conditional_format_rules(worksheet) + conditional_formatting_rules.append(green_rule) + conditional_formatting_rules.append(red_rule) + conditional_formatting_rules.save() + if column_formatting_options[column] in (COLUMN_FORMAT_OPTIONS.PERCENT_COLORED, COLUMN_FORMAT_OPTIONS.PERCENT_UNCOLORED): + # Apply percent format rule + gspread_formatting.format_cell_range( + worksheet, + column_range, + gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='PERCENT', pattern='0.0%')) + ) + + # Apply base formatting options gspread_formatting.format_cell_range( worksheet, f"A1:{gspread.utils.rowcol_to_a1(1, len(df.columns))}", - format_options + base_format_options ) # Delete Sheet1 if it has been created by default if "Sheet1" in [i.title for i in sheet.worksheets()]: sheet.del_worksheet(sheet.worksheet("Sheet1")) -def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, options={}): +def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}): """ Fill a sheet with the contents of a dictionary of DataFrames. The keys of the dictionary are the names of the worksheets, and the values contain the data to be placed in the sheet. @@ -230,8 +290,12 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, options={}): :param sheet: the gspread.Spreadsheet object :param df_dict: the dictionary of DataFrames to fill the worksheets with :param overlapBehavior: the behavior to take if any of the worksheets already exist - :param options: the formatting options for the worksheets. - Should be a dictionary with optional elements "bold_header", "center_header", "freeze_header", and "column_widths", optional + :param sheet_formatting_options: the formatting options for the worksheets. + Should be a 2 level dictionary with outer keys being names of worksheets and inner keys being some of + "bold_header", "center_header", "freeze_header", and "column_widths", optional + :param column_formatting_options: the column formatting options for the worksheets. + Should be a 2 level dictionary with outer keys being names of worksheets and inner keys being column names. + The inner keys should be an instance of COLUMN_FORMATTING_OPTIONS, optional """ if overlapBehavior == WORKSHEET_OVERRIDE_BEHAVIORS.EXIT: for worksheet_name in df_dict.keys(): @@ -241,5 +305,8 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, options={}): except gspread.exceptions.WorksheetNotFound: pass for worksheet_name, df in df_dict.items(): - fill_worksheet_with_df(sheet, df, worksheet_name, overlapBehavior, options=options) - + fill_worksheet_with_df( + sheet, df, worksheet_name, overlapBehavior, + sheet_formatting_options=sheet_formatting_options.get(worksheet_name, DEFAULT_SHEET_FORMATTING_OPTIONS), + column_formatting_options=column_formatting_options.get(worksheet_name, {}) + ) \ No newline at end of file diff --git a/analytics/analytics_package/analytics/sheets_elements.py b/analytics/analytics_package/analytics/sheets_elements.py index 828d9303c..34ee0bce2 100644 --- a/analytics/analytics_package/analytics/sheets_elements.py +++ b/analytics/analytics_package/analytics/sheets_elements.py @@ -36,7 +36,7 @@ def get_rename_dict(dimensions): zip([dimension["id"] for dimension in dimensions], [dimension["alias"] for dimension in dimensions]) ) -def get_outbound_sheets_df(analytics_params): +def get_outbound_links_df(analytics_params): """ Get a DF with outbound links from the Analytics API. Merges the builtin and custom events for outbound links. @@ -95,4 +95,39 @@ def get_outbound_sheets_df(analytics_params): "hostname": "Hostname", } )[["Page Path", "Hostname", "Outbound Link", "Total Clicks", "Total Users"]] - return df_all_links.copy().reset_index(drop=True) \ No newline at end of file + + return df_all_links.copy().reset_index(drop=True) + +def get_outbound_links_change(analytics_params, start_current, end_current, start_previous, end_previous): + """ + Get a DF with outbound links from the Analytics API and a comparison for the prior month + :param analytics_params: the parameters for the Analytics API, including authentication and property ids + :param start_current: the start date for the current month in the format "YYYY-MM-DD" + :param end_current: the end date for the current month + :param start_previous: the start date for the previous month + :param end_previous: the end date for the previous month + """ + analytics_params_month_1 = { + **analytics_params, + "start_date": start_current, + "end_date": end_current, + } + analytics_params_month_2 = { + **analytics_params, + "start_date": start_previous, + "end_date": end_previous, + } + print(analytics_params_month_2) + df_current = get_outbound_links_df(analytics_params_month_1).set_index( + ["Page Path", "Outbound Link", "Hostname"] + ) + df_previous = get_outbound_links_df(analytics_params_month_2).set_index( + ["Page Path", "Outbound Link", "Hostname"] + ) + combined_index = df_current.index.union(df_previous.index) + df_current_reindexed = df_current.reindex(combined_index).fillna(0) + df_previous_reindexed = df_previous.reindex(combined_index) + df_current_reindexed["Total Clicks Percent Change"] = (df_current_reindexed["Total Clicks"] / df_previous_reindexed["Total Clicks"]) - 1 + df_current_reindexed["Total Users Percent Change"] = (df_current_reindexed["Total Users"] / df_previous_reindexed["Total Users"]) - 1 + return df_current_reindexed.sort_values(["Total Clicks", "Total Users"], ascending=False, kind="stable").reset_index() + diff --git a/analytics/analytics_package/setup.py b/analytics/analytics_package/setup.py index 76ac7ad07..df8259c4e 100644 --- a/analytics/analytics_package/setup.py +++ b/analytics/analytics_package/setup.py @@ -2,7 +2,7 @@ setup( name="analytics", - version="3.1.0", + version="3.2.0", packages=["analytics"], install_requires=["matplotlib", "pandas", "numpy", "google-auth-oauthlib", "google-api-python-client", "gspread", "gspread-formatting"], ) \ No newline at end of file