-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrandom_walk_itemized.py
113 lines (85 loc) · 3.82 KB
/
random_walk_itemized.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import os
from scipy.stats import pearsonr
from datetime import datetime
import pickle
FORECAST_SHIFT = 7
SIZE_TEST_SET = 20
'''
#############################################################################################
########### LOADING FROM DISK #############
#############################################################################################
'''
#Load data
with open('generated_data/data_prepared_itemized.pickle', 'rb') as f:
orders = pickle.load(f)
print('Dataset is loaded from disk')
#Create the test set
orders = orders.groupby('ITEM_ID').apply(lambda x: x[-SIZE_TEST_SET:])
'''
#############################################################################################
########### RANDOM WALK #############
#############################################################################################
'''
#Create new dataframe
df_result = pd.DataFrame({
'DATE' : orders['REQUESTED_DELIVERY_DATE'],
'ITEM_ID' : orders['ITEM_ID'],
'QTY_ORIGINAL' : orders['REQUESTED_QUANTITY'],
'QTY_PREDICTED' : orders['REQUESTED_QUANTITY']
})
#Do the random walk shifting
df_result['QTY_PREDICTED'] = df_result.groupby('ITEM_ID')['QTY_PREDICTED'].shift(FORECAST_SHIFT)
df_result.dropna(how='any', inplace=True)
'''
#############################################################################################
########### EVALUATION #############
#############################################################################################
'''
#SINGLE VALUES
loss = {}
loss['CORR'] = pearsonr(df_result['QTY_ORIGINAL'], df_result['QTY_PREDICTED'])[0]
loss['MAE'] = np.mean(abs(df_result['QTY_ORIGINAL'] - df_result['QTY_PREDICTED']))
loss['RMSE'] = np.sqrt(np.mean((df_result['QTY_ORIGINAL'] - df_result['QTY_PREDICTED']) ** 2))
loss['MAPE'] = np.mean(abs((df_result['QTY_ORIGINAL'] - df_result['QTY_PREDICTED']) / df_result['QTY_ORIGINAL'])) * 100
#Calc MASE
import h5py
h5f = h5py.File('results_final/ORIGINAL_TRAIN_Y.h5','r')
ORIGINAL_TRAIN_Y = h5f['ORIGINAL_TRAIN_Y'][:]
h5f.close()
ORIGINAL_TRAIN_Y = pd.Series(ORIGINAL_TRAIN_Y)
n = ORIGINAL_TRAIN_Y.shape[0]
d = abs(ORIGINAL_TRAIN_Y.diff()).sum() / (n-1)
errors = np.abs(df_result['QTY_ORIGINAL'] - df_result['QTY_PREDICTED'])
loss['MASE'] = errors.mean() / d
print('CORR: ', loss['CORR'])
print('MAE: ', loss['MAE'])
print('RMSE: ', loss['RMSE'])
print('MAPE: ', loss['MAPE'])
print('MASE: ', loss['MASE'])
#Plotting
df_result.reset_index(inplace=True, drop=True)
df_result.iloc[600:700].plot()
'''
###############################################################################
################## SAVE RESULTS #################
###############################################################################
'''
unique_folder = 'losses_{:06.4f}_{}_{}_{}'.format(loss['MASE'], int(loss['MAE']), int(loss['RMSE']), int(loss['MAPE']))
path_folder = os.path.join('generated_by_randomwalk_itemized', unique_folder)
os.mkdir(path_folder)
path_losses = os.path.join(path_folder, 'losses.h5')
path_predictions = os.path.join(path_folder, 'predictions.h5')
#Save losses and predictions
df_loss = pd.DataFrame(loss,index=[0])
df_loss.to_hdf(path_losses, 'losses', mode='w')
df_result.to_hdf(path_predictions, 'predictions', mode='w')
#Save source code
path_source_code = os.path.join(path_folder, 'source_code.txt')
with open(__file__) as sc:
source_code = sc.read()
with open(path_source_code, 'w') as text_file:
text_file.write(source_code)
print('Results are saved to disk')