-
Notifications
You must be signed in to change notification settings - Fork 7
/
data_input.py
77 lines (58 loc) · 3.37 KB
/
data_input.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import pandas as pd
import glob
### Import files from each year in a separate dataframe
class DataInput:
def __init__(self, test_location, test_year, run_train, cs_test, cs_2010and2011):
self.test_location = test_location
self.test_year = test_year
self.run_train = run_train
self.cs_test = cs_test
self.cs_2010and2011 = cs_2010and2011
def load_n_merge(self):
cols = ['year', 'jday', 'month', 'day','hour','min','dt','zen','dw_solar','dw_solar_QC','uw_solar',
'uw_solar_QC', 'direct_n','direct_n_QC','diffuse', 'diffuse_QC', 'dw_ir', 'dw_ir_QC', 'dw_casetemp',
'dw_casetemp_QC', 'dw_dometemp','dw_dometemp_QC','uw_ir', 'uw_ir_QC', 'uw_casetemp','uw_casetemp_QC',
'uw_dometemp','uw_dometemp_QC','uvb','uvb_QC','par','par_QC','netsolar','netsolar_QC','netir','netir_QC',
'totalnet','totalnet_QC','temp','temp_QC','rh','rh_QC','windspd','windspd_QC','winddir','winddir_QC',
'pressure','pressure_QC']
if self.run_train:
# Train Set
path = r'data/' + self.test_location + '/Exp_1_train'
print("train_path:",path)
all_files = glob.glob(path + "/*.dat")
all_files.sort()
df_big_train = pd.concat([pd.read_csv(f, skipinitialspace = True, quotechar = '"',skiprows=(2),delimiter=' ',
index_col=False,header=None, names=cols) for f in all_files],ignore_index=True)
print(df_big_train.shape)
### Merging Clear Sky GHI And the big dataframe
df_train = pd.merge(df_big_train, self.cs_2010and2011, on=['year','month','day','hour','min'])
print("loaded training set\n");
print("df_train.shape=", df_train.shape)
# Test set
path = r'data/' + self.test_location + '/Exp_1_test/' + self.test_year
print(path)
all_files = glob.glob(path + "/*.dat")
all_files.sort()
df_big_test = pd.concat((pd.read_csv(f, skipinitialspace = True, quotechar = '"', skiprows=(2),delimiter=' ',
index_col=False, header=None, names=cols) for f in all_files), ignore_index=True)
### Merging Clear Sky GHI And the big dataframe
df_test = pd.merge(df_big_test, self.cs_test, on=['year','month','day','hour','min'])
print('df_test.shape:', df_test.shape)
print("loaded test set\n");
print('df_big_test.shape:', df_big_test.shape)
return df_train, df_test
else:
# Test set
path = r'./data/' + self.test_location + '/Exp_1_test/' + self.test_year
print(path)
all_files = glob.glob(path + "/*.dat")
all_files.sort()
df_big_test = pd.concat((pd.read_csv(f, skipinitialspace=True, quotechar='"', skiprows=(2), delimiter=' ',
index_col=False, header=None, names=cols) for f in all_files),
ignore_index=True)
### Merging Clear Sky GHI And the big dataframe
df_test = pd.merge(df_big_test, self.cs_test, on=['year', 'month', 'day', 'hour', 'min'])
print('df_test.shape:', df_test.shape)
print("loaded test set\n");
print('df_big_test.shape:', df_big_test.shape)
return df_test