Skip to content
This repository has been archived by the owner on Aug 26, 2024. It is now read-only.

Commit

Permalink
to maintain backwards compatibility, default is to force_ascii as before
Browse files Browse the repository at this point in the history
  • Loading branch information
Adam Cohen committed May 3, 2013
1 parent 3b43c37 commit a52d149
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 17 deletions.
26 changes: 13 additions & 13 deletions fuzzywuzzy/fuzz.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def partial_ratio(s1, s2):
# find all alphanumeric tokens in the string
# sort those tokens and take ratio of resulting joined strings
# controls for unordered string elements
def _token_sort(s1, s2, partial=True, force_ascii=False):
def _token_sort(s1, s2, partial=True, force_ascii=True):

if s1 is None: raise TypeError("s1 is None")
if s2 is None: raise TypeError("s2 is None")
Expand All @@ -107,10 +107,10 @@ def _token_sort(s1, s2, partial=True, force_ascii=False):
else:
return ratio(sorted1, sorted2)

def token_sort_ratio(s1, s2, force_ascii=False):
def token_sort_ratio(s1, s2, force_ascii=True):
return _token_sort(s1, s2, partial=False, force_ascii=force_ascii)

def partial_token_sort_ratio(s1, s2, force_ascii=False):
def partial_token_sort_ratio(s1, s2, force_ascii=True):
return _token_sort(s1, s2, partial=True, force_ascii=force_ascii)

# Token Set
Expand All @@ -119,7 +119,7 @@ def partial_token_sort_ratio(s1, s2, force_ascii=False):
# <sorted_intersection><sorted_remainder>
# take ratios of those two strings
# controls for unordered partial matches
def _token_set(s1, s2, partial=True, force_ascii=False):
def _token_set(s1, s2, partial=True, force_ascii=True):

if s1 is None: raise TypeError("s1 is None")
if s2 is None: raise TypeError("s2 is None")
Expand Down Expand Up @@ -157,10 +157,10 @@ def _token_set(s1, s2, partial=True, force_ascii=False):
]
return max(pairwise)

def token_set_ratio(s1, s2, force_ascii=False):
def token_set_ratio(s1, s2, force_ascii=True):
return _token_set(s1, s2, partial=False, force_ascii=force_ascii)

def partial_token_set_ratio(s1, s2, force_ascii=False):
def partial_token_set_ratio(s1, s2, force_ascii=True):
return _token_set(s1, s2, partial=True, force_ascii=force_ascii)

# TODO: numerics
Expand All @@ -170,7 +170,7 @@ def partial_token_set_ratio(s1, s2, force_ascii=False):
###################

# q is for quick
def QRatio(s1, s2, force_ascii=False):
def QRatio(s1, s2, force_ascii=True):

p1 = full_process(s1, force_ascii=force_ascii)
p2 = full_process(s2, force_ascii=force_ascii)
Expand All @@ -181,7 +181,7 @@ def QRatio(s1, s2, force_ascii=False):
return ratio(p1, p2)

# w is for weighted
def WRatio(s1, s2, force_ascii=False):
def WRatio(s1, s2, force_ascii=True):

p1 = full_process(s1, force_ascii=force_ascii)
p2 = full_process(s2, force_ascii=force_ascii)
Expand All @@ -204,14 +204,14 @@ def WRatio(s1, s2, force_ascii=False):
if len_ratio > 8: partial_scale = .6

if try_partial:
partial = partial_ratio(p1, p2) * partial_scale
ptsor = partial_token_sort_ratio(p1, p2) * unbase_scale * partial_scale
ptser = partial_token_set_ratio(p1, p2) * unbase_scale * partial_scale
partial = partial_ratio(p1, p2) * partial_scale
ptsor = partial_token_sort_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale * partial_scale
ptser = partial_token_set_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale * partial_scale

return int(max(base, partial, ptsor, ptser))
else:
tsor = token_sort_ratio(p1, p2) * unbase_scale
tser = token_set_ratio(p1, p2) * unbase_scale
tsor = token_sort_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale
tser = token_set_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale

return int(max(base, tsor, tser))

8 changes: 4 additions & 4 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,13 +159,13 @@ def testWRatioUnicodeString(self):
# Cyrillic.
s1 = u"\u043f\u0441\u0438\u0445\u043e\u043b\u043e\u0433"
s2 = u"\u043f\u0441\u0438\u0445\u043e\u0442\u0435\u0440\u0430\u043f\u0435\u0432\u0442"
score = fuzz.WRatio(s1, s2)
score = fuzz.WRatio(s1, s2, force_ascii=False)
self.assertNotEqual(0, score)

# Chinese.
s1 = u"\u6211\u4e86\u89e3\u6570\u5b66"
s2 = u"\u6211\u5b66\u6570\u5b66"
score = fuzz.WRatio(s1, s2)
score = fuzz.WRatio(s1, s2, force_ascii=False)
self.assertNotEqual(0, score)

def testQRatioUnicodeString(self):
Expand All @@ -177,13 +177,13 @@ def testQRatioUnicodeString(self):
# Cyrillic.
s1 = u"\u043f\u0441\u0438\u0445\u043e\u043b\u043e\u0433"
s2 = u"\u043f\u0441\u0438\u0445\u043e\u0442\u0435\u0440\u0430\u043f\u0435\u0432\u0442"
score = fuzz.QRatio(s1, s2)
score = fuzz.QRatio(s1, s2, force_ascii=False)
self.assertNotEqual(0, score)

# Chinese.
s1 = u"\u6211\u4e86\u89e3\u6570\u5b66"
s2 = u"\u6211\u5b66\u6570\u5b66"
score = fuzz.QRatio(s1, s2)
score = fuzz.QRatio(s1, s2, force_ascii=False)
self.assertNotEqual(0, score)

def testQratioForceAscii(self):
Expand Down

0 comments on commit a52d149

Please sign in to comment.