-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathmain.py
53 lines (35 loc) · 1.13 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import word2vec
import fasttxt
import numpy as np
from gensim.matutils import unitvec
def test(model,positive,negative,test_words):
mean = []
for pos_word in positive:
mean.append(1.0 * np.array(model[pos_word]))
for neg_word in negative:
mean.append(-1.0 * np.array(model[neg_word]))
# compute the weighted average of all words
mean = unitvec(np.array(mean).mean(axis=0))
scores = {}
for word in test_words:
if word not in positive + negative:
test_word = unitvec(np.array(model[word]))
# Cosine Similarity
scores[word] = np.dot(test_word, mean)
print(sorted(scores, key=scores.get, reverse=True)[:1])
TRAIN = False
if TRAIN:
print("Training Word2vec")
word2vec.train()
print("Training Fasttext")
fasttxt.train()
positive_words = ["מלכה","גבר"]
negative_words = ["מלך"]
# Test Word2vec
print("Testing Word2vec")
model = word2vec.getModel()
test(model,positive_words,negative_words,model.vocab)
# Test Fasttext
print("Testing Fasttext")
model = fasttxt.getModel()
test(model,positive_words,negative_words,model.words)