Skip to content

Commit

Permalink
Push changes to base TELL package
Browse files Browse the repository at this point in the history
  • Loading branch information
cdburley committed Jan 29, 2025
1 parent da5e8f7 commit 384a288
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 88 deletions.
28 changes: 15 additions & 13 deletions notebooks/tell_data_preprocessing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,9 @@
"outputs": [],
"source": [
"# Execute the function to pre-proceess the raw EIA-930 data using parallel processing streams:\n",
"tell.process_eia_930_data(data_input_dir = tell_data_dir,\n",
" n_jobs = -1)\n"
"tell.process_eia_930_data(eia_930_data_input_dir = '/Users/burl878/Documents/Code/code_repos/tell/tell/tell_data/tell_raw_data/EIA_930/Balancing_Authority',\n",
" data_output_dir = '/Users/burl878/Documents/Code/code_repos/tell/tell/tell_data/tell_quickstarter_data/outputs/historical_ba_load',\n",
" n_jobs = -2)\n"
]
},
{
Expand Down Expand Up @@ -202,8 +203,10 @@
"source": [
"# Execute the function to pre-process the historical observed population data for all years from 2015 to 2020:\n",
"tell.process_ba_population_data(start_year = 2015,\n",
" end_year = 2020,\n",
" data_input_dir = tell_data_dir)\n"
" end_year = 2023,\n",
" ba_mapping_input_dir = '/Users/burl878/Documents/Code/code_repos/tell/tell/tell_data/tell_quickstarter_data/outputs/ba_service_territory', \n",
" population_input_dir = '/Users/burl878/Documents/Code/code_repos/tell/tell/tell_data/tell_raw_data/Population',\n",
" data_output_dir = '/Users/burl878/Documents/Code/code_repos/tell/tell/tell_data/tell_quickstarter_data/outputs/historical_population')\n"
]
},
{
Expand Down Expand Up @@ -262,20 +265,19 @@
"outputs": [],
"source": [
"# Compile the historical load, population, and meteorology data into a single set of .csv files:\n",
"tell.compile_data(start_year = 2015,\n",
" end_year = 2019,\n",
" data_input_dir = tell_data_dir)\n"
"tell.compile_data(start_year = 2016,\n",
" end_year = 2023,\n",
" load_input_dir = '/Users/burl878/Documents/Code/code_repos/tell/tell/tell_data/tell_quickstarter_data/outputs/historical_ba_load',\n",
" ba_weather_input_dir = '/Users/burl878/Documents/Code/code_repos/tell/tell/tell_data/sample_forcing_data/historical_weather',\n",
" population_input_dir = '/Users/burl878/Documents/Code/code_repos/tell/tell/tell_data/tell_quickstarter_data/outputs/historical_population',\n",
" data_output_dir = '/Users/burl878/Documents/Code/code_repos/tell/tell/tell_data/tell_quickstarter_data/outputs/compiled_historical_data')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4b20187e-1e0d-4a4b-b0b4-6f332fd6ec41",
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"id": "df7b4ef4-2dc7-464e-8e0e-d165bb916db4",
"metadata": {},
"outputs": [],
"source": []
}
Expand Down
40 changes: 22 additions & 18 deletions tell/data_process_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from .package_data import get_ba_abbreviations


def compile_data(start_year: int, end_year: int, data_input_dir: str):
def compile_data(start_year: int, end_year: int, load_input_dir: str, ba_weather_input_dir: str,
population_input_dir: str, data_output_dir: str):
"""Merge the load, population, and climate data into a single .csv file for each BA
:param start_year: Year to start process; four digit year (e.g., 1990)
Expand All @@ -15,43 +16,46 @@ def compile_data(start_year: int, end_year: int, data_input_dir: str):
:param end_year: Year to end process; four digit year (e.g., 1990)
:type end_year: int
:param data_input_dir: Top-level data directory for TELL
:type data_input_dir: str
:param load_input_dir: Path to where the pre-processed BA hourly loads are located
:type load_input_dir: str
:param ba_weather_input_dir: Path to where the pre-processed BA weather data is located
:type ba_weather_input_dir: str
:param population_input_dir: Path to where the pre-processed BA population data is located
:type population_input_dir: str
:param data_output_dir: Place to store the output files
:type data_output_dir: str
"""

# Get a list of BA abbreviations to process:
ba_name = get_ba_abbreviations()

# Set the input directories for each variable:
load_dir = os.path.join(data_input_dir, r'tell_quickstarter_data', r'outputs', r'historical_ba_load')
population_dir = os.path.join(data_input_dir, r'tell_quickstarter_data', r'outputs', r'historical_population')
weather_dir = os.path.join(data_input_dir, r'sample_forcing_data', r'historical_weather')

# Set the output directory based on the "data_input_dir" variable:
output_dir = os.path.join(data_input_dir, r'tell_quickstarter_data', r'outputs', r'compiled_historical_data')
if not os.path.exists(output_dir):
os.makedirs(output_dir)
if not os.path.exists(data_output_dir):
os.makedirs(data_output_dir)

# Loop over the list of BAs to process:
for i in ba_name:

# Check to make sure all of the requisite data exist for that BA:
all_data_present = False
if os.path.isfile(os.path.join(load_dir, f"{i}_hourly_load_data.csv")) is True:
if os.path.isfile(os.path.join(population_dir, f"{i}_hourly_population_data.csv")) is True:
if os.path.isfile(os.path.join(weather_dir, f"{i}_WRF_Hourly_Mean_Meteorology_2019.csv")) is True:
if os.path.isfile(os.path.join(load_input_dir, f"{i}_hourly_load_data.csv")) is True:
if os.path.isfile(os.path.join(population_input_dir, f"{i}_hourly_population_data.csv")) is True:
if os.path.isfile(os.path.join(ba_weather_input_dir, f"{i}_WRF_Hourly_Mean_Meteorology_2019.csv")) is True:
all_data_present = True

if all_data_present is True:
# Read in the historical load and population data for that BA:
load_df = pd.read_csv(os.path.join(load_dir, f"{i}_hourly_load_data.csv"))
population_df = pd.read_csv(os.path.join(population_dir, f"{i}_hourly_population_data.csv"))
load_df = pd.read_csv(os.path.join(load_input_dir, f"{i}_hourly_load_data.csv"))
population_df = pd.read_csv(os.path.join(population_input_dir, f"{i}_hourly_population_data.csv"))

# Loop over the range of years defined by the 'start_year' and 'end_year' variables:
for year in range(start_year, end_year + 1):
# Read in the annual historical weather for that BA:
temp_weather_df = pd.read_csv(os.path.join(weather_dir, f"{i}_WRF_Hourly_Mean_Meteorology_{year}.csv"))
temp_weather_df = pd.read_csv(os.path.join(ba_weather_input_dir, f"{i}_WRF_Hourly_Mean_Meteorology_{year}.csv"))

# Convert the time stamp to a datetime variable and then extract the year, month, day, and hour variables:
temp_weather_df['Time_UTC'] = pd.to_datetime(temp_weather_df['Time_UTC'])
Expand Down Expand Up @@ -79,7 +83,7 @@ def compile_data(start_year: int, end_year: int, data_input_dir: str):
merged_second['Total_Population'] = merged_second['Total_Population'].round(2)

# Write the merged dataframe to a .csv file
merged_second.to_csv(os.path.join(output_dir, f"{i}_historical_data.csv"), index=False, header=True)
merged_second.to_csv(os.path.join(data_output_dir, f"{i}_historical_data.csv"), index=False, header=True)

# Clean up the variables and move to the next BA in the loop:
del temp_weather_df, weather_df, load_df, population_df, merged_first, merged_second, all_data_present
46 changes: 23 additions & 23 deletions tell/data_process_eia_930.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
from .package_data import get_ba_abbreviations


def list_EIA_930_files(data_input_dir: str) -> list:
def list_EIA_930_files(eia_930_data_input_dir: str) -> list:
"""Make a list of all the file names for the EIA-930 hourly load dataset
:param data_input_dir: Top-level data directory for TELL
:type data_input_dir: str
:param eia_930_data_input_dir: Path to where the raw EIA-930 data are stored
:type eia_930_data_input_dir: str
:return: list
Expand All @@ -24,30 +24,27 @@ def list_EIA_930_files(data_input_dir: str) -> list:

# Loop over the list and find the path for each BA in the list:
for i in ba_name:
path_to_check = os.path.join(data_input_dir, r'tell_raw_data', r'EIA_930', r'Balancing_Authority', f'{i}.xlsx')
path_to_check = os.path.join(eia_930_data_input_dir, f'{i}.xlsx')
path_list.append(path_to_check)

# Return the list:
return path_list


def eia_data_subset(file_string: str, data_input_dir: str):
def eia_data_subset(file_string: str, data_output_dir: str):
"""Extract only the columns TELL needs from the EIA-930 Excel files
:param file_string: File name of EIA-930 hourly load data by BA
:type file_string: str
:param data_input_dir: Top-level data directory for TELL
:type data_input_dir: str
:param data_output_dir: Place to store the output files
:type data_output_dir: str
"""

# Set the output directory based on the "data_input_dir" variable:
output_dir = os.path.join(data_input_dir, r'tell_quickstarter_data', r'outputs', r'historical_ba_load')

# If the output directory doesn't exist then create it:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
if not os.path.exists(data_output_dir):
os.makedirs(data_output_dir)

# Read in the data from the "Published Hourly Data" sheet:
df = pd.read_excel(file_string, sheet_name='Published Hourly Data')
Expand All @@ -59,27 +56,30 @@ def eia_data_subset(file_string: str, data_input_dir: str):
df['Hour'] = df['UTC time'].dt.strftime('%H')

# Only keep the columns that are needed:
col_names = ['Year', 'Month', 'Day', 'Hour', 'DF', 'Adjusted D', 'Adjusted NG', 'Adjusted TI']
col_names = ['Year', 'Month', 'Day', 'Hour', 'Demand forecast', 'Adjusted demand', 'Adjusted net generation', 'Adjusted total interchange']
df = df[col_names].copy()

# Rename the columns to add the units to each variable:
df.rename(columns={"DF": "Forecast_Demand_MWh",
"Adjusted D": "Adjusted_Demand_MWh",
"Adjusted NG": "Adjusted_Generation_MWh",
"Adjusted TI": "Adjusted_Interchange_MWh"}, inplace=True)
df.rename(columns={"Demand forecast": "Forecast_Demand_MWh",
"Adjusted demand": "Adjusted_Demand_MWh",
"Adjusted net generation": "Adjusted_Generation_MWh",
"Adjusted total interchange": "Adjusted_Interchange_MWh"}, inplace=True)

# Extract the BA name from the "file_string" variable:
BA_name = os.path.splitext(os.path.basename(file_string))[0]

# Write the output to a .csv file:
df.to_csv(os.path.join(output_dir, f'{BA_name}_hourly_load_data.csv'), index=False, header=True)
df.to_csv(os.path.join(data_output_dir, f'{BA_name}_hourly_load_data.csv'), index=False, header=True)


def process_eia_930_data(data_input_dir: str, n_jobs: int):
def process_eia_930_data(eia_930_data_input_dir: str, data_output_dir: str, n_jobs: int):
"""Read in list of EIA 930 files, subset the data, and save the output as a .csv file
:param data_input_dir: Top-level data directory for TELL
:type data_input_dir: str
:param eia_930_data_input_dir: Path to where the raw EIA-930 data are stored
:type eia_930_data_input_dir: str
:param data_output_dir: Place to store the output files
:type data_output_dir: str
:param n_jobs: The maximum number of concurrently running jobs, such as the number of Python
worker processes when backend=”multiprocessing” or the size of the thread-pool
Expand All @@ -94,12 +94,12 @@ def process_eia_930_data(data_input_dir: str, n_jobs: int):
"""

# Create the list of EIA-930 Excel files:
list_of_files = list_EIA_930_files(data_input_dir)
list_of_files = list_EIA_930_files(eia_930_data_input_dir)

# Process each file in the list in parallel:
Parallel(n_jobs=n_jobs)(
delayed(eia_data_subset)(
file_string=i,
data_input_dir=data_input_dir
data_output_dir=data_output_dir
) for i in list_of_files
)
Loading

0 comments on commit 384a288

Please sign in to comment.