-
Notifications
You must be signed in to change notification settings - Fork 0
/
lstm.py
128 lines (108 loc) · 4.53 KB
/
lstm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""
This script runs a bidirectional LSTM (Long Short-Term Memory) neural network
and either trains a new model from scratch, or loads a previously trained model,
then evaluates that model on the testing set.
Usage:
- Run this script with `python3 lstm.py` to evaluate the trained model on the testing set.
- To retrain the model, run this script with the --train_from_scratch command line argument,
and optionally specify the following hyperparameters:
--use_og_data_only: If set, only trains on the original data without any augmented data.
--use_2_classes: If set, uses 2 classes rather than 3 (collapses class 0 and 1 into one class)
--n_epochs: Integer representing how many epochs to train the model for
--batch_size: Integer representing how large each batch should be
--learning_rate: Float representing the desired learning rate
--hidden_size: Integer representing the desired dimensions of the hidden layer(s) of the NN
--num_layers: Integer representing the number of layers
For further explanations of these flags, consult the README or `utils.py`.
"""
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import trange
from data_loader import load_data
from utils import train_model, evaluate_model, load_data_tensors, parse_command_line_args
SAVE_PATH = 'models/lstm.pt'
class LSTM(nn.Module):
"""
Implementation of Long Short-Term Memory Neural Network.
Set the init parameter `bidirectional` to True to use a bidirectional LSTM (a BLSTM).
"""
def __init__(self, vocab_size, hidden_size, output_size, num_layers, bidirectional=False):
"""
Initializes the LSTM with the specified hyperparameters.
"""
super().__init__()
self.bidirectional = bidirectional
self.emb = nn.Embedding(vocab_size, hidden_size)
self.rec = nn.LSTM(hidden_size, hidden_size,
num_layers=num_layers, bidirectional=bidirectional)
# When bidirectional, the output of the LSTM will be twice as long, so we need to
# multiply the next layer's dims by 2
linear_size = hidden_size if not bidirectional else hidden_size * 2
self.lin = nn.Linear(linear_size, output_size)
self.sigmoid = nn.Sigmoid()
def forward(self, input_seq):
"""
Executes the forward pass of the neural network. This function is called automatically
when we run `model(...)`.
"""
embeds = self.emb(input_seq)
output_seq, (h_last, c_last) = self.rec(embeds)
h = None
if self.bidirectional:
h_direc_1 = h_last[1, :, :]
h_direc_2 = h_last[2, :, :]
h = torch.cat((h_direc_1, h_direc_2), dim=1)
else:
h = h_last[0, :, :]
scores = self.lin(h)
return self.sigmoid(scores)
if __name__ == "__main__":
args = parse_command_line_args()
# If there's an available GPU, let's use it
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_train, labels_train, tok_to_ix = load_data_tensors(
args.use_og_data_only)
if args.retrain:
"""
Initialize model with the specified hyperparameters and architecture
"""
hidden_size = args.hidden_size
num_layers = 2 if args.num_layers < 0 else args.num_layers
vocab_size = len(tok_to_ix)
output_size = len(np.unique(labels_train))
model = LSTM(
hidden_size=hidden_size,
num_layers=num_layers,
vocab_size=vocab_size,
output_size=output_size,
bidirectional=False)
model = model.to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=args.learning_rate)
model = train_model(
model=model,
loss_func=loss_func,
optimizer=optimizer,
data_train=data_train,
labels_train=labels_train,
n_epochs=args.n_epochs,
batch_size=args.batch_size,
save_path=SAVE_PATH,
device=device)
else:
"""
File was not run with --train_from_scratch, so simply load the model from its saved path
"""
model = torch.load(SAVE_PATH)
"""
Whether we're training or just loading the pretrained model, we finish by
evaluating the model on the testing set.
"""
evaluate_model(
model=model,
tok_to_ix=tok_to_ix,
use_og_data_only=args.use_og_data_only,
bs=args.batch_size,
device=device)