-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmlp.py
33 lines (24 loc) · 792 Bytes
/
mlp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from libs import *
from attention import MultiHeadAttention
class MLP(nn.Module):
def __init__(self, n_embd):
super().__init__()
self.net = nn.Sequential(
nn.Linear(n_embd, 4 * n_embd),
nn.ReLU(),
nn.Linear(4 * n_embd, n_embd)
)
def forward(self, x):
return self.net(x)
class Block(nn.Module):
def __init__(self, n_embd, context_length, num_heads):
super().__init__()
head_size = n_embd // num_heads
self.ln1 = nn.LayerNorm(n_embd)
self.ln2 = nn.LayerNorm(n_embd)
self.attention = MultiHeadAttention(n_embd, context_length, num_heads, head_size)
self.MLP = MLP(n_embd)
def forward(self, x):
attn = x + self.attention(self.ln1(x))
out = attn + self.MLP(self.ln2(attn))
return out