-
Notifications
You must be signed in to change notification settings - Fork 42
/
libDataLoaders.py
188 lines (142 loc) · 6.13 KB
/
libDataLoaders.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#
# Library/Module: functions for load various datasets (libDataLoaders.py)
# Copyright (C) 2013-2015 Stephen Makonin. All Right Reserved.
#
import pandas
def AMPds_r2013(filename, ids, precision, denoised=False, verbose=True):
"""Loaders for the AMPds Release 2013 dataset."""
timestamp_col = 'TimeStamp'
agg_meter_col = 'WHE'
unmetered_col = 'UNE'
if verbose: print('Loading AMPds R1 dataset at %s...' % filename)
df = pandas.read_csv(filename)
if verbose: print('\tSetting timestamp column %s as index.' % timestamp_col)
df = df.set_index(timestamp_col)
if verbose: print('\tModfity data with precision %f then convert to int...' % precision)
for col in list(df):
df[col] = df[col] * precision
df[col] = df[col].astype(int)
cols = ids[:]
if unmetered_col in cols:
cols.remove(unmetered_col)
if verbose: print('\tNoise will modelled as %s.' % unmetered_col)
if verbose: print('\tKeeping only columns %s.' % str(cols))
df = df[[agg_meter_col] + cols]
if denoised:
if verbose: print('\tDenoising aggregate meter column %s.' % agg_meter_col)
df[agg_meter_col] = df[cols].sum(axis=1)
if verbose: print('\tCalculating unmetered column %s.' % unmetered_col)
df[unmetered_col] = df[agg_meter_col] - df[cols].sum(axis=1)
df.loc[df[unmetered_col] < 0] = 0
return df
def AMPds_v2(filename, ids, precision, denoised=False, verbose=True):
"""Loaders for the AMPds Version 2 dataset."""
timestamp_col = 'UNIX_TS'
agg_meter_col = 'WHE'
unmetered_col = 'UNE'
if verbose: print('Loading AMPds R1 dataset at %s...' % filename)
df = pandas.read_csv(filename)
if verbose: print('\tSetting timestamp column %s as index.' % timestamp_col)
df = df.set_index(timestamp_col)
if verbose: print('\tModfity data with precision %f then convert to int...' % precision)
for col in list(df):
df[col] = df[col] * precision
df[col] = df[col].astype(int)
cols = ids[:]
if unmetered_col in cols:
cols.remove(unmetered_col)
if verbose: print('\tNoise will modelled as %s.' % unmetered_col)
if verbose: print('\tKeeping only columns %s.' % str(cols))
df = df[[agg_meter_col] + cols]
if denoised:
if verbose: print('\tDenoising aggregate meter column %s.' % agg_meter_col)
df[agg_meter_col] = df[cols].sum(axis=1)
if verbose: print('\tCalculating unmetered column %s.' % unmetered_col)
df[unmetered_col] = df[agg_meter_col] - df[cols].sum(axis=1)
df.loc[df[unmetered_col] < 0] = 0
return df
def RAE_power(filename, ids, precision, denoised=False, verbose=True):
"""Loaders for the RAE dataset."""
timestamp_col = 'unix_ts'
agg_meter_col = 'mains'
unmetered_col = 'noise'
if verbose: print('Loading RAE dataset file %s...' % filename)
df = pandas.read_csv(filename)
if verbose: print('\tSetting timestamp column %s as index.' % timestamp_col)
df = df.set_index(timestamp_col)
if verbose: print('\tRemoving loads...')
rm_list = []
del df['21']
del df['22']
for id in ids:
if '-' in id:
sub_id = id[1:]
df.drop(sub_id, inplace=True, axis=1)
rm_list.append(id)
for rm_id in rm_list:
ids.remove(rm_id)
headers = list(df.columns.values)
headers = headers[2:]
df[agg_meter_col] = df[headers].sum(axis=1)
if verbose: print('\tCombining L1 and L2 for double-pole loads...')
for id in ids:
if '+' in id:
sub_ids = id.split('+')
df[id] = 0
for sub_id in sub_ids:
df[id] += df[sub_id]
df.drop(sub_ids, inplace=True, axis=1)
cols = ids[:]
if unmetered_col in cols:
cols.remove(unmetered_col)
if verbose: print('\tNoise will modelled as %s.' % unmetered_col)
if verbose: print('\tKeeping only columns %s.' % str(cols))
df = df[[agg_meter_col] + cols]
if denoised:
if verbose: print('\tDenoising aggregate meter column %s.' % agg_meter_col)
df[agg_meter_col] = df[cols].sum(axis=1)
if verbose: print('\tModfity data with precision %f then convert to int...' % precision)
for col in list(df):
df[col] = df[col] * precision
df[col] = df[col].astype(int)
if verbose: print('\tCalculating unmetered column %s.' % unmetered_col)
df[unmetered_col] = df[agg_meter_col] - df[cols].sum(axis=1)
df.loc[df[unmetered_col] < 0] = 0
return df
def REDD_lo(filename, ids, precision, denoised=False, verbose=True):
"""Loaders for the AMPds Release 1 dataset."""
timestamp_col = 'TimeStamp'
agg_meter_col = 'MAIN'
unmetered_col = 'DIFF'
if verbose: print('Loading REDD Low Res dataset at %s...' % filename)
df = pandas.read_csv(filename)
if verbose: print('\tSetting timestamp column %s as index.' % timestamp_col)
df = df.set_index(timestamp_col)
cols = ids[:]
if unmetered_col in cols:
cols.remove(unmetered_col)
if verbose: print('\tNoise will modelled as %s.' % unmetered_col)
if verbose: print('\tKeeping only columns %s.' % str(cols))
df = df[[agg_meter_col] + cols]
if denoised:
if verbose: print('\tDenoising aggregate meter column %s.' % agg_meter_col)
df[agg_meter_col] = df[cols].sum(axis=1)
if verbose: print('\tCalculating unmetered column %s.' % unmetered_col)
df[unmetered_col] = df[agg_meter_col] - df[cols].sum(axis=1)
df.loc[df[unmetered_col] < 0] = 0
return df
def dataset_loader(filename, ids, precision, denoised=False, verbose=True):
"""A generic loader that (based in keyword in name) will use the correct loader to load dataset."""
df = None
if 'AMPdsR1' in filename:
df = AMPds_r2013(filename, ids, precision, denoised, verbose)
elif 'AMPdsR2' in filename:
df = AMPds_v2(filename, ids, precision, denoised, verbose)
elif 'RAE' in filename:
df = RAE_power(filename, ids, precision, denoised, verbose)
elif 'REDD' in filename:
df = REDD_lo(filename, ids, precision, denoised, verbose)
else:
print("ERROR: Do not know how to load dataset!")
exit(1)
return df