-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbreatheright_correlation_analysis.py
46 lines (35 loc) · 1.49 KB
/
breatheright_correlation_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from breatheright_data_preparation import data_folder, merged_ozone_file_name
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
def do_breatheright_correlation_analysis():
# read in the merged one csv file with ozone and lung disease data
merged_ozone_df = pd.read_csv(f"{data_folder}/{merged_ozone_file_name}")
print(merged_ozone_df.head())
print(merged_ozone_df.columns)
# drop the unnecessary columns
merged_ozone_df = merged_ozone_df.drop(columns=[
"County Name_x", 'State Name_x',
'County Name_y', 'State Name_y'])
merged_ozone_df = merged_ozone_df.rename(columns={
'Max': 'ozone_max',
'Min': 'ozone_min',
'Mean': 'ozone_mean',
'Median': 'ozone_median',
'Std': 'ozone_std'
})
# Calculate correlations
correlation_matrix = merged_ozone_df[[
'mortality_average', 'mortality_min', 'mortality_max',
'ozone_max', 'ozone_min', 'ozone_mean', 'ozone_median',
'ozone_std']].corr()
# Save correlation matrix to CSV
correlation_matrix.to_csv(f'{data_folder}/correlation_matrix_ozone_lung_disease_5years.csv')
# Plot Correlation Heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1, center=0)
plt.title('Correlation Heatmap')
plt.savefig(f'{data_folder}/correlation_heatmap_ozone_lung_disease.png')
#plt.show()
if __name__ == "__main__":
do_breatheright_correlation_analysis()