Skip to content

Commit

Permalink
Merge pull request #3699 from Cheukting/py311_regex
Browse files Browse the repository at this point in the history
  • Loading branch information
Zac-HD authored Jul 19, 2023
2 parents 36bb4ca + 635c512 commit 95a7706
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 2 deletions.
7 changes: 7 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
RELEASE_TYPE: minor

:func:`~hypothesis.strategies.from_regex` now supports the atomic grouping
(``(?>...)``) and possessive quantifier (``*+``, ``++``, ``?+``, ``{m,n}+``)
syntax `added in Python 3.11 <https://docs.python.org/3/whatsnew/3.11.html#re>`__.

Thanks to Cheuk Ting Ho for implementing this!
14 changes: 12 additions & 2 deletions hypothesis-python/src/hypothesis/strategies/_internal/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,16 @@
try: # pragma: no cover
import re._constants as sre
import re._parser as sre_parse

ATOMIC_GROUP = sre.ATOMIC_GROUP
POSSESSIVE_REPEAT = sre.POSSESSIVE_REPEAT
except ImportError: # Python < 3.11
import sre_constants as sre
import sre_parse

ATOMIC_GROUP = object()
POSSESSIVE_REPEAT = object()

from hypothesis import reject, strategies as st
from hypothesis.internal.charmap import as_general_categories, categories
from hypothesis.internal.compat import int_to_byte
Expand Down Expand Up @@ -474,7 +480,7 @@ def recurse(codes):
# Regex 'a|b|c' (branch)
return st.one_of([recurse(branch) for branch in value[1]])

elif code in [sre.MIN_REPEAT, sre.MAX_REPEAT]:
elif code in [sre.MIN_REPEAT, sre.MAX_REPEAT, POSSESSIVE_REPEAT]:
# Regexes 'a?', 'a*', 'a+' and their non-greedy variants
# (repeaters)
at_least, at_most, subregex = value
Expand All @@ -494,8 +500,12 @@ def recurse(codes):
recurse(value[1]),
recurse(value[2]) if value[2] else st.just(empty),
)
elif code == ATOMIC_GROUP: # pragma: no cover # new in Python 3.11
return _strategy(value, context, is_unicode)

else:
# Currently there are no known code points other than handled here.
# This code is just future proofing
raise NotImplementedError(f"Unknown code point: {code!r}")
raise NotImplementedError(
f"Unknown code point: {code!r}. Please open an issue."
)
19 changes: 19 additions & 0 deletions hypothesis-python/tests/nocover/test_regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@

import re
import string
import sys
from functools import reduce

import pytest

from hypothesis import assume, given, reject, strategies as st
from hypothesis.strategies._internal.regex import base_regex_strategy

Expand Down Expand Up @@ -86,6 +89,22 @@ def test_fuzz_stuff(data):
assert regex.search(ex)


@pytest.mark.skipif(sys.version_info[:2] < (3, 11), reason="new syntax")
@given(st.data())
def test_regex_atomic_group(data):
pattern = "a(?>bc|b)c"
ex = data.draw(st.from_regex(pattern))
assert re.search(pattern, ex)


@pytest.mark.skipif(sys.version_info[:2] < (3, 11), reason="new syntax")
@given(st.data())
def test_regex_possessive(data):
pattern = '"[^"]*+"'
ex = data.draw(st.from_regex(pattern))
assert re.search(pattern, ex)


# Some preliminaries, to establish what's happening:
I_WITH_DOT = "\u0130"
assert I_WITH_DOT.swapcase() == "i\u0307" # note: string of length two!
Expand Down

0 comments on commit 95a7706

Please sign in to comment.