-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsummary_generation.py
57 lines (46 loc) · 1.38 KB
/
summary_generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#using SBS(Summation-Based Selection)
from representative_score import rep_score_sentence
import config
import os
import nltk.data
import time
from decimal import Decimal
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
'-f',
'--factor',
default = 1,
help = 'factor of threshold for summary'
)
args = parser.parse_args()
start_time = time.time()
#fetching scraped txt blogs
blogs = []
for file in os.listdir():
if file.endswith(".txt"):
blogs.append(str(file))
sentence_sbs = dict()
overall_sbs = 0
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
for blog in blogs:
file = open(blog)
summary = open(blog[:-4] + '_summary.txt', 'w+')
data = file.read()
sentences = tokenizer.tokenize(data)
#successfully split blog into smart sentences
file = open("sbs_score.txt", "w+")
for sentence in sentences:
print("Old sentence : " + sentence)
sbs_score = rep_score_sentence(sentence, config.tau)
file.write(str(sbs_score) + '\n')
overall_sbs = overall_sbs + sbs_score
sentence_sbs[sentence] = sbs_score
threshold = overall_sbs/len(sentences)
for key in sentence_sbs:
if sentence_sbs[key] >= Decimal(args.factor) * threshold:
summary.write(key)
summary.close()
file.close()
end_time = time.time()
print("Execution time : " + str(end_time - start_time))