-
Notifications
You must be signed in to change notification settings - Fork 0
/
GPT_run_other_sets.py
117 lines (103 loc) · 3.79 KB
/
GPT_run_other_sets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import json
from time import sleep
import openai
from loguru import logger
from tqdm import tqdm
from GPT_prompter import Prompter
def getSentiment(text):
while True:
try:
try:
resp = Prompter.get_response_chat(Prompter.prompts[15], text)
break
except openai.error.RateLimitError:
logger.warning('Rate limit error, sleeping for 10 seconds')
sleep(10)
continue
except Exception as e:
logger.exception(e)
logger.error(f'Failed to get response for {text}')
resp_clean = resp.split("\n")[-1].lower()
is_pos = False
is_neg = False
is_neu = False
if 'negative' in resp_clean:
is_neg = True
if 'neutral' in resp_clean:
is_neu = True
if 'positive' in resp_clean:
is_pos = True
if is_pos and not is_neg and not is_neu:
resp_clean = 'positive'
elif is_neg and not is_pos and not is_neu:
resp_clean = 'negative'
elif is_neu and not is_pos and not is_neg:
resp_clean = 'neutral'
else:
logger.info('Invalid response:', resp)
resp_clean = 'neutral'
if resp_clean not in ['negative', 'neutral', 'positive']:
logger.info('Invalid response:', resp)
resp_clean = 'neutral'
return resp_clean
with open('LV-twitter-sentiment-corpus/tweet_corpus.json', 'r', encoding='utf8') as f:
dataset = json.load(f)
correct = 0
total = 0
for d in tqdm(dataset):
sentiment = {"POZ": 'positive', "NEG": 'negative', "NEU": 'neutral'}[d['sentiment']]
gpt_sentiment = getSentiment(d['text'])
if sentiment == gpt_sentiment:
correct += 1
total += 1
print(f'LV-twitter-sentiment-corpus accuracy: {correct / total}')
with open('Latvian-Twitter-Eater-Corpus/sub-corpora/sentiment-analysis/ltec-sentiment-annotated-test.json', 'r', encoding='utf8') as f:
dataset = json.load(f)
correct = 0
total = 0
for d in tqdm(dataset):
sentiment = {"pos": 'positive', "neg": 'negative', "neu": 'neutral'}[d['sentiment']]
gpt_sentiment = getSentiment(d['tweet_text'])
if sentiment == gpt_sentiment:
correct += 1
total += 1
print(f'Latvian Twitter Eater Corpus accuracy: {correct / total}')
with open('sikzinu_analize/viksna.json', 'r', encoding='utf8') as f:
dataset = json.load(f)
correct = 0
total = 0
for i, d in tqdm(list(enumerate(dataset['data']))):
sentiment = 'positive' if d['POS'] - d['NEG'] > 0 else 'negative' if d['NEG'] - d['POS'] > 0 else 'neutral'
gpt_sentiment = getSentiment(d['text'])
if not d['NOT_LV']:
if sentiment == gpt_sentiment:
correct += 1
total += 1
if i % 100 == 0:
print(f'sikzinu_analize accuracy: {correct / total}')
with open('sikzinu_analize/viksna_results.json', 'w', encoding='utf8') as f:
json.dump(dataset, f, ensure_ascii=False, indent=4)
print(f'sikzinu_analize accuracy: {correct / total}')
# with open('om/data/psgs_norm.arff', 'r', encoding='utf8') as f:
# for i in range(8):
# f.readline()
# dataset = [[i.strip(' \'\n') for i in j.split(',')] for j in f.readlines()]
with open('om/data/psgs_norm.json', 'r', encoding='utf8') as f:
dataset = json.load(f)
correct = 0
total = 0
for i, d in tqdm(list(enumerate(dataset))):
sentiment = {"POZ": 'positive', "NEG": 'negative', "NEU": 'neutral'}[d[2]]
if len(d) == 3:
gpt_sentiment = getSentiment(d[1])
d.append(gpt_sentiment)
else:
gpt_sentiment = d[3]
if sentiment == gpt_sentiment:
correct += 1
total += 1
if i % 100 == 0:
print(f'om accuracy: {correct / total}')
with open('om/data/psgs_norm.json', 'w', encoding='utf8') as f:
json.dump(dataset, f, ensure_ascii=False, indent=4)
print(f'OM accuracy: {correct / total}')