-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
266 lines (198 loc) · 12.2 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
# -*- coding: utf-8 -*-
"""train.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1tj0k--0Rv1k0KLOR8k0lZYuHIk9q_RY9
# Script Description: train.py
This script loads the corresponding training dataloader from the /app/application_files/dataloaders folder, and uses it to build and train the model with the corresponding parameters (model architecture, batch size, learning rate and epochs) set in the config.ini. Eventually it stores the trained model in /models folder and stores a plot of the train loss with respect to the epochs in /app/application_files/loss_plots. Both model and plot are stored inside the corresponding model architecture folder according to the one used in the training. E.g. /models/VGG and /app/application_files/loss_plots/VGG folders if the VGG model architecture was set in the config.ini file for the training.
Depending on the value of the *'use_validation_model'* option set in the config.ini file , the behaviour of the script will be different:
> * *'use_validation_model'* option set to ***True***:
The script loads the training_loader.pth file (generated if data_loading.py script was executed with the *'use_validation_model'* option set to ***True*** in the config.ini file to make the split) as training dataloader from /app/application_file/data_loaders to train the model and, after being trained, it saves the model inside /models folder inside the corresponding architecture folder (depending on the model architecture set in the config.ini file) as **validation_model.pth** file. The training loss plot is also saved as validation_model_train_loss.png in the corresponding folder according to the architecture used.
> * *'use_validation_model'* option set to ***False***:
The script loads the all_data_loader.pth file (generated if data_loading.py script was executed with the *'use_validation_model'* option set to ***False*** in the config.ini file to avoid making the split) as training dataloader from /app/application_file/data_loaders to train the model and, after being trained, it saves the model inside /models folder inside the corresponding architecture folder (depending on the model architecture set in the config.ini file) as **final_model.pth** file. The training loss plot is also saved as final_model_train_loss.png in the corresponding folder according to the architecture used.
---
The training parameters are set according the values of the following different constant in the config.ini file:
* ***MODEL_ARCHITECTURE*** : This constant specifies the model architecture used to build and train the model from the ones available. Only three possible values [*RESNET, VGG , MOBILENET*] to select one of the three available architectures.
* ***LR*** : This constant specifies the learning rate to be used in the model training.
* ***EPOCHS*** : This constant specifies the number of epochs to be used in the model training.
## Importing required libraries
"""
import numpy as np
import matplotlib
matplotlib.style.use('ggplot')
import matplotlib.pyplot as plt
import torch
import random
import os
import sys
import configparser
print('\n--> train.py execution starts...\n')
"""## Setting GPU as processing device instead of CPU"""
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
"""## Initializating config parser and reading config.ini"""
config = configparser.ConfigParser()
config.read('./app/application_files/config.ini')
"""## Loading paths, constants and options from configuration file"""
# learning parameters
LR = float(config['CONSTANTS']['LR'])
EPOCHS = int(config['CONSTANTS']['EPOCHS'])
# Batch size
BATCH_SIZE = int(config['CONSTANTS']['BATCH_SIZE'])
#Model architecture
MODEL_ARCHITECTURE = config['CONSTANTS']['MODEL_ARCHITECTURE']
# Size of images
SIZE = int(config['CONSTANTS']['SIZE'])
# Decide if you want to see an example image during training to visualize the model development:
show_images_during_training = True if config['OPTIONS']['show_images_during_training'] == "True" else False
# True if we want to reproduce the same results setting fixed seeds
reproducibility = True if config['OPTIONS']['reproducibility'] == "True" else False
# If use_validation_model=True, this script will generate a validation_model.pth file (in the models folder) which contains the model trained using the train_loader.pth dataloader file generated by the data_loading.py
# script using the corresponding training data after applying the specified split and applying augmentation. This generated model will be used by the test.py script to evaluate the model over the test dataset.
# If use_validation_model=False, this script will generate a final_model.pth file (in the models folder) which contains the model trained using the all_data_loader.pth dataloader file generated by the data_loading.py
# script using the the whole training data provided in the csv and applying augmentation. This generated final model will be the one used by application to detect the facial landmarks in the video stream.
use_validation_model = True if config['OPTIONS']['use_validation_model'] == "True" else False
"""## Importing own-built architecture library where our model architectures are defined and own-built library where our customised FacialKeyPointDataset class is defined since is required to load the dataloaders"""
import architecture
#Adding path (where the customised library is located) to the system path so it can be imported
library_path = os.getcwd()+'/app/application_files'
sys.path.append(library_path)
import customized_dataset_augmentation_library
"""## Setting fixed seeds and configurations for reproducibility if option activated"""
if reproducibility:
torch.backends.cudnn.deterministic = True
random.seed(1)
torch.manual_seed(1)
torch.cuda.manual_seed(1)
np.random.seed(1)
"""# 1) Loading Training Dataloader"""
print('Loading training dataloader ...')
if use_validation_model:
try:
training_loader = torch.load('./app/application_files/dataloaders/train_loader.pth')
except FileNotFoundError:
raise RuntimeError('No train_loader.pth in /app/application_files/dataloaders folder. It has not been generated yet. Run the data_loading.py script with the corresponding config settings to generate it.')
else:
try:
training_loader = torch.load('./app/application_files/dataloaders/all_data_loader.pth')
except FileNotFoundError:
raise RuntimeError('No all_data_loader.pth in /app/application_files/dataloaders folder. It has not been generated yet. Run the data_loading.py script with the corresponding config settings to generate it.')
print('training dataloader loaded\n')
"""# 2) Training the validation model and final model (this last one if option activated)"""
def train(model, train_dataloader):
# Set the model in training mode.
model.train()
# for each epoch we save the loss of our model over the training data
train_loss = []
# calculate the number of batches
num_batches = int(len(train_dataloader.dataset)/train_dataloader.batch_size)
for epoch in range(EPOCHS):
# We will need the following two variables to compute the loss after each epoch
train_running_loss = 0.0
counter = 0
for data in train_dataloader:
counter += 1
# extract the images and keypoints for the given batch of training data
images, keypoints = data['image'].to(DEVICE), data['keypoints'].to(DEVICE)
# flatten the keypoints (original: torch.Size([256, 15, 2]))
# new size: torch.Size([256, 30])
keypoints = keypoints.view(keypoints.size(0), -1)
# set all gradients to zero before using the model
optimizer.zero_grad()
outputs = model(images)
# compute the loss -> single scalar value
loss = criterion(outputs, keypoints)
train_running_loss += loss.item()
loss.backward()
optimizer.step()
#storing last batch and outputs for the "Actual vs Predicted" Keypoints Comparison Plotting
last_batch=data
last_batch_outputs=outputs.clone().detach()
# We have computed the MSE-loss for each individual batch. In train_running_loss
# we have the sum over all these MSE-loss, to get the final mean, we have
# to devide the result by the number of batches (-> counter)
train_epoch_loss = train_running_loss/counter
train_loss.append(train_epoch_loss)
#Getting image, actual keypoints and predicted output keypoints for the first sample of the last batch used for the "Actual vs Predicted" Keypoints Comparison Plotting
first_sample_image_in_last_train_batch, first_sample_actual_keypoints_in_last_train_batch, first_sample_predicted_keypoints_in_last_train_batch = last_batch['image'][0], last_batch['keypoints'][0], last_batch_outputs[0].cpu()
if (epoch+1)%25==0 or epoch==0:
print(f"\nEpoch {epoch+1} of {EPOCHS}")
print(f"Train Loss: {train_running_loss/counter:.4f}")
if show_images_during_training:
print(f'Actual (red) vs Predictive (blue) Keypoints Comparison Plotting for the first sample of the last training batch:')
plt.clf()# clean plot
plt.imshow(first_sample_image_in_last_train_batch.reshape(SIZE, SIZE), cmap='gray')
plt.plot(first_sample_actual_keypoints_in_last_train_batch[:,0], first_sample_actual_keypoints_in_last_train_batch[:,1], 'r.')
plt.plot(first_sample_predicted_keypoints_in_last_train_batch[::2], first_sample_predicted_keypoints_in_last_train_batch[1::2], 'b.')
plt.show()
return train_loss
import torch.nn as nn
#MODEL TRAINING
print('MODEL TRAINING STARTS:\n')
print('- Model architecture used: '+MODEL_ARCHITECTURE+'\n')
print('- Training settings:')
print(' * Learning rate: '+str(LR))
print(' * Epochs: '+ str(EPOCHS) +'\n')
if MODEL_ARCHITECTURE == "RESNET":
model = architecture.FaceKeypointModelResNet().to(DEVICE)
elif MODEL_ARCHITECTURE == "MOBILENET":
model = architecture.FaceKeypointModelMobileNet().to(DEVICE)
elif MODEL_ARCHITECTURE == "VGG":
model = architecture.FaceKeypointModelVGG().to(DEVICE)
else:
raise RuntimeError('No valid value for model architecture in config.ini file')
# we need a loss function which is good for regression like MSELoss
criterion = nn.MSELoss()
# define optimizer:
# (Adam is an alternative for our classic SGD
# (https://pytorch.org/docs/stable/optim.html))
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
train_loss = train(model, training_loader)
print('\nMODEL TRAINING DONE')
"""# 3) Saving trained model and loss plot"""
#Create models directory if doesn't exists
try:
model_dirName='models/'+MODEL_ARCHITECTURE
os.makedirs(model_dirName)
print("Directory " , model_dirName , " Created ")
except FileExistsError:
print("Directory " , model_dirName , " already exists")
#Create loss_plots directory if doesn't exists
try:
loss_plot_dirName='app/application_files/loss_plots/'+MODEL_ARCHITECTURE
os.makedirs(loss_plot_dirName)
print("Directory " , loss_plot_dirName , " Created ")
except FileExistsError:
print("Directory " , loss_plot_dirName , " already exists")
#loss plot
plt.figure(figsize=(10, 7))
plt.plot(train_loss, color='orange', label='train loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
fig = plt.gcf()
plt.show()
if use_validation_model:
#VALIDATION MODEL
#saving loss plot
fig.savefig(loss_plot_dirName +'/validation_model_train_loss.png')
#saving model
torch.save({
'epoch':EPOCHS,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': criterion,
}, model_dirName+'/validation_model.pth')
print('VALIDATION MODEL AND LOSS PLOT SAVED')
else:
#FINAL MODEL
#saving loss plot
fig.savefig(loss_plot_dirName+'/final_model_train_loss.png')
#saving model
torch.save({
'epoch':EPOCHS,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': criterion,
}, model_dirName+'/final_model.pth')
print('FINAL MODEL AND LOSS PLOT SAVED')
print('\n--> train.py execution finished')