-
Notifications
You must be signed in to change notification settings - Fork 2.8k
/
Copy pathabout_regex.py
140 lines (120 loc) · 5.05 KB
/
about_regex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from runner.koan import *
import re
class AboutRegex(Koan):
"""
These koans are based on Ben's book: Regular Expressions in 10
minutes. I found this book very useful, so I decided to write
a koan file in order to practice everything it taught me.
http://www.forta.com/books/0672325667/
"""
def test_matching_literal_text(self):
"""
Lesson 1 Matching Literal String
"""
string = "Hello, my name is Felix and these koans are based " + \
"on Ben's book: Regular Expressions in 10 minutes."
m = re.search(__, string)
self.assertTrue(
m and m.group(0) and
m.group(0) == 'Felix',
"I want my name")
def test_matching_literal_text_how_many(self):
"""
Lesson 1 -- How many matches?
The default behaviour of most regular expression engines is
to return just the first match. In python you have the
following options:
match() --> Determine if the RE matches at the
beginning of the string.
search() --> Scan through a string, looking for any
location where this RE matches.
findall() --> Find all substrings where the RE
matches, and return them as a list.
finditer() --> Find all substrings where the RE
matches, and return them as an iterator.
"""
string = ("Hello, my name is Felix and these koans are based " +
"on Ben's book: Regular Expressions in 10 minutes. " +
"Repeat My name is Felix")
m = re.match('Felix', string) # TIP: match may not be the best option
# I want to know how many times my name appears
self.assertEqual(m, __)
def test_matching_literal_text_not_case_sensitivity(self):
"""
Lesson 1 -- Matching Literal String non case sensitivity.
Most regex implementations also support matches that are not
case sensitive. In python you can use re.IGNORECASE, in
Javascript you can specify the optional i flag. In Ben's
book you can see more languages.
"""
string = "Hello, my name is Felix or felix and this koan " + \
"is based on Ben's book: Regular Expressions in 10 minutes."
self.assertEqual(re.findall("felix", string), __)
self.assertEqual(re.findall("felix", string, re.IGNORECASE), __)
def test_matching_any_character(self):
"""
Lesson 1: Matching any character
`.` matches any character: alphabetic characters, digits,
and punctuation.
"""
string = "pecks.xlx\n" \
+ "orders1.xls\n" \
+ "apec1.xls\n" \
+ "na1.xls\n" \
+ "na2.xls\n" \
+ "sa1.xls"
# I want to find all uses of myArray
change_this_search_string = 'a..xlx'
self.assertEquals(
len(re.findall(change_this_search_string, string)),
3)
def test_matching_set_character(self):
"""
Lesson 2 -- Matching sets of characters
A set of characters is defined using the metacharacters
`[` and `]`. Everything between them is part of the set, and
any single one of the set members will match.
"""
string = "sales.xlx\n" \
+ "sales1.xls\n" \
+ "orders3.xls\n" \
+ "apac1.xls\n" \
+ "sales2.xls\n" \
+ "na1.xls\n" \
+ "na2.xls\n" \
+ "sa1.xls\n" \
+ "ca1.xls"
# I want to find all files for North America(na) or South
# America(sa), but not (ca) TIP you can use the pattern .a.
# which matches in above test but in this case matches more than
# you want
change_this_search_string = '[nsc]a[2-9].xls'
self.assertEquals(
len(re.findall(change_this_search_string, string)),
3)
def test_anything_but_matching(self):
"""
Lesson 2 -- Using character set ranges
Occasionally, you'll have a list of characters that you don't
want to match. Character sets can be negated using the ^
metacharacter.
"""
string = "sales.xlx\n" \
+ "sales1.xls\n" \
+ "orders3.xls\n" \
+ "apac1.xls\n" \
+ "sales2.xls\n" \
+ "sales3.xls\n" \
+ "europe2.xls\n" \
+ "sam.xls\n" \
+ "na1.xls\n" \
+ "na2.xls\n" \
+ "sa1.xls\n" \
+ "ca1.xls"
# I want to find the name 'sam'
change_this_search_string = '[^nc]am'
self.assertEquals(
re.findall(change_this_search_string, string),
['sam.xls'])