koans/about_regex.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from runner.koan import *

import re


class AboutRegex(Koan):
    """
        These koans are based on Ben's book: Regular Expressions in 10
        minutes. I found this book very useful, so I decided to write
        a koan file in order to practice everything it taught me.
        http://www.forta.com/books/0672325667/
    """

    def test_matching_literal_text(self):
        """
            Lesson 1 Matching Literal String
        """
        string = "Hello, my name is Felix and these koans are based " + \
        "on Ben's book: Regular Expressions in 10 minutes."
        m = re.search(__, string)
        self.assertTrue(
            m and m.group(0) and
                m.group(0) == 'Felix',
            "I want my name")

    def test_matching_literal_text_how_many(self):
        """
            Lesson 1 -- How many matches?

            The default behaviour of most regular expression engines is
            to return just the first match. In python you have the
            following options:

                match()    -->  Determine if the RE matches at the
                                beginning of the string.
                search()   -->  Scan through a string, looking for any
                                location where this RE matches.
                findall()  -->  Find all substrings where the RE
                                matches, and return them as a list.
                finditer() -->  Find all substrings where the RE
                                matches, and return them as an iterator.
        """
        string = ("Hello, my name is Felix and these koans are based " +
            "on Ben's book: Regular Expressions in 10 minutes. " +
            "Repeat My name is Felix")
        m = re.match('Felix', string)  # TIP: match may not be the best option

        # I want to know how many times my name appears
        self.assertEqual(m, __)

    def test_matching_literal_text_not_case_sensitivity(self):
        """
            Lesson 1 -- Matching Literal String non case sensitivity.
            Most regex implementations also support matches that are not
            case sensitive. In python you can use re.IGNORECASE, in
            Javascript you can specify the optional i flag. In Ben's
            book you can see more languages.

        """
        string = "Hello, my name is Felix or felix and this koan " + \
            "is based on Ben's book: Regular Expressions in 10 minutes."

        self.assertEqual(re.findall("felix", string), __)
        self.assertEqual(re.findall("felix", string, re.IGNORECASE), __)

    def test_matching_any_character(self):
        """
            Lesson 1: Matching any character

            `.` matches any character: alphabetic characters, digits,
            and punctuation.
        """
        string = "pecks.xlx\n"    \
                + "orders1.xls\n" \
                + "apec1.xls\n"   \
                + "na1.xls\n"     \
                + "na2.xls\n"     \
                + "sa1.xls"

        # I want to find all uses of myArray
        change_this_search_string = 'a..xlx'
        self.assertEquals(
            len(re.findall(change_this_search_string, string)),
            3)

    def test_matching_set_character(self):
        """
            Lesson 2 -- Matching sets of characters

            A set of characters is defined using the metacharacters
            `[` and `]`. Everything between them is part of the set, and
            any single one of the set members will match.
        """
        string = "sales.xlx\n"    \
                + "sales1.xls\n"  \
                + "orders3.xls\n" \
                + "apac1.xls\n" \
                + "sales2.xls\n"  \
                + "na1.xls\n"  \
                + "na2.xls\n"  \
                + "sa1.xls\n"  \
                + "ca1.xls"
        # I want to find all files for North America(na) or South
        # America(sa), but not (ca) TIP you can use the pattern .a.
        # which matches in above test but in this case matches more than
        # you want
        change_this_search_string = '[nsc]a[2-9].xls'
        self.assertEquals(
            len(re.findall(change_this_search_string, string)),
            3)

    def test_anything_but_matching(self):
        """
            Lesson 2 -- Using character set ranges
            Occasionally, you'll have a list of characters that you don't
            want to match. Character sets can be negated using the ^
            metacharacter.

        """
        string = "sales.xlx\n"    \
                + "sales1.xls\n"  \
                + "orders3.xls\n" \
                + "apac1.xls\n" \
                + "sales2.xls\n"  \
                + "sales3.xls\n"  \
                + "europe2.xls\n"  \
                + "sam.xls\n"  \
                + "na1.xls\n"  \
                + "na2.xls\n"  \
                + "sa1.xls\n"  \
                + "ca1.xls"

        # I want to find the name 'sam'
        change_this_search_string = '[^nc]am'
        self.assertEquals(
            re.findall(change_this_search_string, string),
            ['sam.xls'])