-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearch.py
168 lines (121 loc) · 4.48 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
"""Script to search the GUTINDEX file by e-text number, title or author.
Usage:
------
> From the command line, run:
`python search.py {mode: etext/title/author} {search parameter}`
Known bugs:
-----------
> If there are less than 2 spaces before the e-text number, the search fails.
This is in relation to the fact that some entries in the file end with a
number (eg. "... Vol. 2"), and there is no other (known) way to
differentiate an e-text number from that one.
> When searching by author, if the author's full name breaks into a new line,
or it is not the first name in a list of authors' names, the search fails.
> When searching by title, if the title spans more than one line, the search
fails.
"""
import re # For regex
import sys # For parsing command line arguments
def search_by_etext_no(file_to_search, eno):
"""Search using the e-text number"""
eno_ex = re.compile('^((.)*)(\s){{2,}}\\b{}\\b$'.format(eno))
reg = re.compile(r'^(.)*(\s){2,}\b(\d)+(C)?\b$')
with open(file_to_search, encoding='utf-8') as f:
for line in f:
match = eno_ex.match(line)
if match:
data = [match.group(1).strip() + '\n']
line = f.readline()
while line not in ['', '\n'] and not reg.match(line):
data.append(line)
line = f.readline()
return ''.join(data)
return None
def search_by_title(file_to_search, title):
"""Search using the title"""
reg = re.compile(r'(.)*(\s){2,}\b(\d)+(C)?\b$')
with open(file_to_search, encoding='utf-8') as f:
for line in f:
match = reg.match(line)
if match and title in line:
data = [line]
line = f.readline()
while line not in ['', '\n'] and not reg.match(line):
data.append(line)
line = f.readline()
return ''.join(data)
return None
def search_by_author(file_to_search, author):
"""Search using the author's full name"""
reg = re.compile(r'(.)*(\s){2,}\b(\d)+(C)?\b$')
by_list = list()
for s in [' by ', ' mennessä ', ' par ', ' di ']:
by_list.append(s + author)
data = list()
with open(file_to_search, encoding='utf-8') as f:
line = f.readline()
while line != '':
match = reg.match(line)
if match:
place = f.tell()
first_line = line
for s in by_list:
if s in line:
found = True
break
else:
found = False
if not found:
line = f.readline()
while line != '' and not reg.match(line):
for s in by_list:
if s in line:
found = True
break
if found:
break
else:
line = f.readline()
if found:
f.seek(place)
data.append(first_line)
line = f.readline()
while line != '' and not reg.match(line):
data.append(line)
line = f.readline()
else:
pass
else:
line = f.readline()
if len(data):
return ''.join(data)
else:
return None
if __name__ == '__main__':
if len(sys.argv) < 2:
print('Invalid number of arguments; expected two or more.\n')
else:
file = 'GUTINDEX.ALL'
mode = sys.argv[1].lower()
if mode in ['etext', 'e-text', 'etextno', 'etext_no', 'e-text_no']:
info = search_by_etext_no(file, sys.argv[2])
if info:
print(info)
else:
print('Not found.\n')
elif mode == 'title':
book_title = ' '.join(sys.argv[2:])
info = search_by_title(file, book_title)
if info:
print(info)
else:
print('Not found.\n')
elif mode == 'author':
author_name = ' '.join(sys.argv[2:])
info = search_by_author(file, author_name)
if info:
print(info)
else:
print('Not found.\n')
else:
print('Invalid mode.\n')