-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclue_parser.py
192 lines (161 loc) · 6.67 KB
/
clue_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# -*- coding: utf-8 -*-
"""
This is the top-level non-GUI module.
This is the module that should be imported if one wishes to avoid the GUI and dive straight
into the source-code functionality.
A new ClueParser instance can be initialised as follows:
cp = ClueParser()
After this, clues can be submitted for solving by typing (for example):
cp.parseClue('Zoroastrian pairs dancing. (5)')
The parser employs certain heuristics to speed up computation:
-Minimum solution length is 3 letters (not counting hyphens/apostrophes).
-Solution is not the same as any of the clue tokens.
-Solution is a single (possibly hyphenated) word.
-Definitions are not longer than three words.
"""
# Python libraries
import inspect # for parsing through the contents of python modules
import pdb # live debugging module
# Dictionary libraries
import wordnet # custom wrapper around NLTK WordNet
# Other CCS modules
from clue import Clue
from solution import Solution
from exceptions import BruteForceWithoutKnownLettersException
import wordplay
import log # module for giving runtime feedback to the user
__author__ = 'Jarek Glowacki'
logger = log.getLogger(__name__, streamLevel=log.INFO)
DEFINITION_MAX_LEN = 3
class ClueParser(object):
def __init__(self, gui_thread=None):
self.gui_thread = gui_thread
# Create an instance of each existing wordplay type, based on what's
# been defined in the wordplay module.
self.wordplays = {mem.__typ__: mem() for _, mem in inspect.getmembers(wordplay, inspect.isclass) \
if issubclass(mem, wordplay.Wordplay) and mem is not wordplay.Wordplay}
logger.debug('%s instance initialised.' % self.__class__.__name__)
def parseClue(self, clue, length=None, typ=None, known_letters=None, brute_force=False, **kwargs):
"""
This is the core method that handles input clues, interprets them, then passes them off to other
modules to dissect. It then re-assimilates all solutions, sorts them, then returns them to the user.
"""
logger.info('Parsing clue: %s' % clue)
# Convert to Clue object if it is not already.
if not isinstance(clue, Clue):
clue = Clue(clue, length, typ, known_letters)
logger.debug('Converted clue to Clue object: %s' % clue)
# Loop through all possible definitions.
solutions = []
raw_solns = set() # for bruteforcing later
raw_definitions = [] # for bruteforcing later
for defpos in filter(lambda x: x!=0, range(-DEFINITION_MAX_LEN, DEFINITION_MAX_LEN+1)):
# Generate a list of possible solutions for chosen definition.
# Defpos signifies how many words in the definition.
# A negative value indicates to take words from end of string.
definition = '_'.join(clue.tokens[0 if defpos > 0 else defpos : defpos if defpos > 0 else None])
if not wordnet.exists(definition):
continue
raw_definitions.append([defpos,definition])
# Generate possible wordplay interpretations. (Convert to a tree structure with keywords at nodes.)
wpTokens = clue.tokens[0 if defpos < 0 else defpos:defpos if defpos < 0 else None]
interpretations = self.interpret(clue, wpTokens)
# Consider each interpretation.
for kwpos, typs in interpretations.items():
for typ, subplay in typs.items():
if self.gui_thread:
if self.gui_thread.halt():
return solutions
self.gui_thread.updateStatus(typ=typ)
for soln in self.wordplays[typ].check(clue, subplay, wordplays=self.wordplays, gui=self.gui_thread, **kwargs):
if self.gui_thread and self.gui_thread.halt():
return solutions
certainty = wordnet.calcSimilarity(soln.solution, definition) * soln.certainty
if certainty > 0:
solutions.append(
Solution(clue,
soln.solution,
defpos,
[(kwpos + (defpos if defpos > 0 else 0)) if kwpos >= 0 else -1, typ,
' '.join(soln.applied_to)],
certainty
)
)
raw_solns.add(soln.solution)
# Sort solutions by certainty score.
solutions = sorted(solutions, key=lambda s: s.certainty, reverse=True)
# Consider brute-forcing solutions.
if brute_force:
if known_letters is None:
raise BruteForceWithoutKnownLettersException
if self.gui_thread:
if self.gui_thread.halt():
return solutions
self.gui_thread.updateStatus(typ='brute-forcing')
# Get a brute-forced list of solutions.
brute_solns = set(wordnet.getWordsWithPattern(clue.regex)) - raw_solns
brute_forced_solutions = []
for soln in brute_solns:
if self.gui_thread and self.gui_thread.halt():
return solutions
soln_certainty = max([(0,None)]+[(wordnet.calcSimilarity(soln, d[1]), d[0]) for d in raw_definitions], key=lambda x:x[0])
brute_forced_solutions.append(
Solution(clue,
soln,
soln_certainty[1] if soln_certainty[0] > 0 else None,
[-1, 'brute-forced', '---'],
soln_certainty[0]
)
)
solutions.extend(sorted(brute_forced_solutions, key=lambda s: s.certainty, reverse=True))
if solutions:
logger.info('%i solution(s) found. Best solution is \'%s\' (certainty: %f)' % (len(solutions), solutions[0].solution, solutions[0].certainty))
else:
logger.info('Unsuccessful in resolving clue.')
return solutions
def interpret(self, clue, wp_tokens):
"""
Generates a list of possible interpretations for the wordplay part.
"""
kwords = {}
for pos, word in enumerate(wp_tokens):
word = wordnet.stemmer.stem(word)
try:
if word in self.wordplays[clue.typ].keywords:
kwords[pos] = [clue.typ]
except KeyError:
# No wordplay type provided; attempt to fit each wordplay.
for typ, wp in self.wordplays.items():
if word in wp.keywords:
kwords.setdefault(pos, []).append(typ)
# Assume only single-keyword wordplays.
interpretations = {}
for pos, typs in kwords.items():
interpretations[pos] = {}
for typ in typs:
interpretations[pos][typ] = {}
# Consider token sets before and after the keyword.
interpretations[pos][typ] = {'<': wp_tokens[:pos], '>': wp_tokens[pos + 1:]}
# Add wordplays which do not use keywords
interpretations[-1] = {}
if clue.typ is not None:
if not self.wordplays[clue.typ].usesKeywords:
interpretations[-1][clue.typ] = wp_tokens
else:
for typ, wp in self.wordplays.items():
if not wp.usesKeywords:
interpretations[-1][typ] = wp_tokens
return interpretations
def recompileDictionaries(self, **kwargs):
"""
Recompiles all of the custom wordplay dictionaries.
"""
for wp in self.wordplays.values():
if wp.hasCustomDict:
wp.recompileDictionary(**kwargs)
def recompileWordlist(self, **kwargs):
"""
Recompiles the wordlist, followed by all of the custom wordplay dictionaries.
"""
wordnet.recompileWordList(**kwargs)
self.recompileDictionaries(**kwargs)