diff --git a/docs/.buildinfo b/docs/.buildinfo index 756f522..616a937 100644 --- a/docs/.buildinfo +++ b/docs/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 1b2c69900a20a5e6d404547532dec06c +config: 76d7be4743f134d1ecb28043ff0666d5 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/_modules/index.html b/docs/_modules/index.html index 76d757a..91b8010 100644 --- a/docs/_modules/index.html +++ b/docs/_modules/index.html @@ -317,7 +317,7 @@
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021 + © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/_modules/pycantonese/jyutping/characters.html b/docs/_modules/pycantonese/jyutping/characters.html
index 1a97900..d5d07d2 100644
--- a/docs/_modules/pycantonese/jyutping/characters.html
+++ b/docs/_modules/pycantonese/jyutping/characters.html
@@ -438,7 +438,7 @@ Source code for pycantonese.jyutping.characters
<
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/_modules/pycantonese/jyutping/parse_jyutping.html b/docs/_modules/pycantonese/jyutping/parse_jyutping.html
index ee38da4..b23191d 100644
--- a/docs/_modules/pycantonese/jyutping/parse_jyutping.html
+++ b/docs/_modules/pycantonese/jyutping/parse_jyutping.html
@@ -498,7 +498,7 @@ Source code for pycantonese.jyutping.parse_jyutping
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/_modules/pycantonese/jyutping/tipa.html b/docs/_modules/pycantonese/jyutping/tipa.html
index 6ec32a3..851b3c0 100644
--- a/docs/_modules/pycantonese/jyutping/tipa.html
+++ b/docs/_modules/pycantonese/jyutping/tipa.html
@@ -447,7 +447,7 @@ Source code for pycantonese.jyutping.tipa
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/_modules/pycantonese/jyutping/yale.html b/docs/_modules/pycantonese/jyutping/yale.html
index 1d5fa37..5aae92b 100644
--- a/docs/_modules/pycantonese/jyutping/yale.html
+++ b/docs/_modules/pycantonese/jyutping/yale.html
@@ -625,7 +625,7 @@ Source code for pycantonese.jyutping.yale
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/_modules/pycantonese/pos_tagging/hkcancor_to_ud.html b/docs/_modules/pycantonese/pos_tagging/hkcancor_to_ud.html
index 23c23e1..1061e7b 100644
--- a/docs/_modules/pycantonese/pos_tagging/hkcancor_to_ud.html
+++ b/docs/_modules/pycantonese/pos_tagging/hkcancor_to_ud.html
@@ -492,7 +492,7 @@ Source code for pycantonese.pos_tagging.hkcancor_to_ud
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/_modules/pycantonese/pos_tagging/tagger.html b/docs/_modules/pycantonese/pos_tagging/tagger.html
index 75608cf..bc74c88 100644
--- a/docs/_modules/pycantonese/pos_tagging/tagger.html
+++ b/docs/_modules/pycantonese/pos_tagging/tagger.html
@@ -660,7 +660,7 @@ Source code for pycantonese.pos_tagging.tagger
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/_modules/pycantonese/stop_words.html b/docs/_modules/pycantonese/stop_words.html
index bd6bd51..7b5a136 100644
--- a/docs/_modules/pycantonese/stop_words.html
+++ b/docs/_modules/pycantonese/stop_words.html
@@ -465,7 +465,7 @@ Source code for pycantonese.stop_words
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/_modules/pycantonese/word_segmentation.html b/docs/_modules/pycantonese/word_segmentation.html
index 10353fb..8502916 100644
--- a/docs/_modules/pycantonese/word_segmentation.html
+++ b/docs/_modules/pycantonese/word_segmentation.html
@@ -426,7 +426,7 @@ Source code for pycantonese.word_segmentation
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/_modules/pylangacq/chat.html b/docs/_modules/pylangacq/chat.html
index 61c7b44..9554e0c 100644
--- a/docs/_modules/pylangacq/chat.html
+++ b/docs/_modules/pylangacq/chat.html
@@ -322,6 +322,7 @@ Source code for pylangacq.chat
from pylangacq.measures import _CLITIC, _get_ipsyn, _get_mlum, _get_mluw, _get_ttr
from pylangacq.objects import Gra, Token, Utterance
+from pylangacq._clean_utterance import _clean_utterance
_ENCODING = "utf-8"
@@ -1651,252 +1652,6 @@ Source code for pylangacq.chat
)
-def _clean_utterance(utterance, phon=False):
- """Filter away the CHAT-style annotations in ``utterance``.
-
- Parameters
- ----------
- utterance : str
- The utterance as a str
- phon : bool, optional
- whether we are handling PhonBank data; defaults to ``False``.
- If ``True``, words like "xxx" and "yyy" won't be removed.
-
- Returns
- -------
- str
- """
- # Function tested with the following CHILDES datasets:
- # 1) Brent, Brown, HSLLD, Kuczaj, MacWhinney, Valian in Eng-NA-MOR
- # 2) YipMatthews in Biling
- # 3) LeeWongLeung in EastAsian/Cantonese
- # 4) CromptonPater, Goad, Inkelas, and Providence in PhonBank English
-
- # *** At the end of each step, apply remove_extra_spaces(). ***
-
- # Step 1: Remove unwanted scope elements (only the very certain cases)
- # [= whatever] for explanations
- # [x how_many_times] for collapses
- # [+ whatever] for actions etc
- # [* whatever] for error coding
- # [=? whatever] for uncertain transcriptions
- # [=! whatever] for actions etc
- # [% whatever] for random noises?
- # [- language_name] for using a non-dominant language
- # [^ whatever] for complex local events
- # whatever for audio/video time stamps? the character is 0x15
- # [<] and [>] for overlapping, including [<1], [>2] etc with numbers
- # (2.), (3.5) etc for pauses
- # [%act: whatever] for actions etc
-
- # [?] for best guess
- # ‹ and › used in conjunction with [?]
- # [!] for stressing
-
- # "[*] [/" replaced by "[/"
- # "] [*]" replaced by "]"
-
- # print('utterance:', utterance, type(utterance))
-
- utterance = re.sub(r"\[= [^\[]+?\]", "", utterance)
- utterance = re.sub(r"\[x \d+?\]", "", utterance)
- utterance = re.sub(r"\[\+ [^\[]+?\]", "", utterance)
- utterance = re.sub(r"\[\* [^\[]+?\]", "", utterance)
- utterance = re.sub(r"\[=\? [^\[]+?\]", "", utterance)
- utterance = re.sub(r"\[=! [^\[]+?\]", "", utterance)
- utterance = re.sub(r"\[% [^\[]+?\]", "", utterance)
- utterance = re.sub(r"\[- [^\[]+?\]", "", utterance)
- utterance = re.sub(r"\[\^ [^\[]+?\]", "", utterance)
- utterance = re.sub(r"[^]+?", "", utterance)
- utterance = re.sub(r"\[<\d?\]", "", utterance)
- utterance = re.sub(r"\[>\d?\]", "", utterance)
- utterance = re.sub(r"\(\d+?\.?\d*?\)", "", utterance)
- utterance = re.sub(r"\[%act: [^\[]+?\]", "", utterance)
-
- utterance = re.sub(r"\[\?\]", "", utterance)
- utterance = re.sub(r"\[\!\]", "", utterance)
- utterance = re.sub(r"‹", "", utterance)
- utterance = re.sub(r"›", "", utterance)
-
- utterance = re.sub(r"\[\*\] \[/", "[/", utterance)
- utterance = re.sub(r"\] \[\*\]", "]", utterance)
-
- utterance = _remove_extra_spaces(utterance)
- # print('step 1:', utterance)
-
- # Step 2: Pad elements with spaces to avoid human transcription errors
- # If utterance has these delimiters: [ ]
- # then pad them with extra spaces to avoid errors in transcriptions
- # like "movement[?]" (--> "movement [?]")
- #
- # If utterance has:
- # < > (left and right angle brackets), excluding "+<" (lazy overlap)
- # “ (beginning quote)
- # ” (ending quote)
- # , (comma)
- # ? (question mark)
- # . (period) <-- commented out at the moment
- # (.) (short pause)
- # then pad them with extra spaces.
-
- utterance = re.sub(r"<", " <", utterance)
- utterance = re.sub(r"\+ <", "+<", utterance)
- utterance = re.sub(r">", "> ", utterance)
- utterance = re.sub(r"\[", " [", utterance)
- utterance = re.sub(r"\]", "] ", utterance)
- utterance = re.sub(r"“", " “ ", utterance)
- utterance = re.sub(r"”", " ” ", utterance)
- utterance = re.sub(r",", " , ", utterance) # works together with next line
- utterance = re.sub(r"\+ ,", "+,", utterance)
- utterance = re.sub(r"[^\[\./!]\?", " ? ", utterance)
- # utterance = re.sub('[^\(\[\.\+]\.', ' . ', utterance)
- utterance = re.sub(r"\(\.\)", " (.) ", utterance)
- utterance = _remove_extra_spaces(utterance)
- # print('step 2:', utterance)
-
- # Step 3:
- # Handle [/], [//], [///], [/?] for repetitions/reformulation
- # [: xx] or [:: xx] for errors
- #
- # Discard "xx [/]", "<xx yy> [/]", "xx [//]", "<xx yy> [//]".
- # For "zz [: xx]" or "<yy zz> [:: xx]", keep "xx" and discard the rest.
- #
- # Strategies:
- # 1. Get all matching index pairs for angle brackets < and >.
- # 2. Delete the unwanted material inside and including these brackets
- # plus their signaling annotations (= "[:", "[::", "[/]", "[//]").
- # 3. Delete the unwanted words on the left of the signaling annotations.
-
- angle_brackets_l2r_pairs = {} # left-to-right
- for index_ in _find_indices(utterance, "<"):
- counter = 1
- for i in range(index_ + 1, len(utterance)):
- if utterance[i] == "<":
- counter += 1
- elif utterance[i] == ">":
- counter -= 1
-
- if counter == 0:
- angle_brackets_l2r_pairs[index_] = i
- break
- angle_brackets_r2l_pairs = {v: k for k, v in angle_brackets_l2r_pairs.items()}
-
- index_pairs = [] # characters bounded by index pairs to be removed
-
- # remove ' [///]'
- triple_slash_right_indices = _find_indices(utterance, r"> \[///\]")
- index_pairs += [(begin + 1, begin + 6) for begin in triple_slash_right_indices]
-
- # remove ' [//]'
- double_overlap_right_indices = _find_indices(utterance, r"> \[//\]")
- index_pairs += [(begin + 1, begin + 5) for begin in double_overlap_right_indices]
-
- # remove ' [/]'
- single_overlap_right_indices = _find_indices(utterance, r"> \[/\]")
- index_pairs += [(begin + 1, begin + 4) for begin in single_overlap_right_indices]
-
- # remove ' [/?]'
- slash_question_indices = _find_indices(utterance, r"> \[/\?\]")
- index_pairs += [(begin + 1, begin + 4) for begin in slash_question_indices]
-
- # remove ' [/-]'
- slash_dash_indices = _find_indices(utterance, r"> \[/\-\]")
- index_pairs += [(begin + 1, begin + 4) for begin in slash_dash_indices]
-
- # remove ' [::'
- double_error_right_indices = _find_indices(utterance, r"> \[::")
- index_pairs += [(begin + 1, begin + 4) for begin in double_error_right_indices]
-
- # remove ' [:'
- single_error_right_indices = _find_indices(utterance, r"> \[: ")
- index_pairs += [(begin + 1, begin + 3) for begin in single_error_right_indices]
-
- right_indices = (
- double_overlap_right_indices
- + single_overlap_right_indices
- + double_error_right_indices
- + single_error_right_indices
- + triple_slash_right_indices
- + slash_question_indices
- + slash_dash_indices
- )
-
- index_pairs = index_pairs + [
- (angle_brackets_r2l_pairs[right], right) for right in sorted(right_indices)
- ]
- indices_to_ignore = set()
- for left, right in index_pairs:
- for i in range(left, right + 1):
- indices_to_ignore.add(i)
-
- new_utterance = ""
- for i in range(len(utterance)):
- if i not in indices_to_ignore:
- new_utterance += utterance[i]
- utterance = new_utterance
-
- utterance = re.sub(r"\S+? \[/\]", "", utterance)
- utterance = re.sub(r"\S+? \[//\]", "", utterance)
- utterance = re.sub(r"\S+? \[///\]", "", utterance)
- utterance = re.sub(r"\S+? \[/\?\]", "", utterance)
- utterance = re.sub(r"\S+? \[/\-\]", "", utterance)
-
- utterance = re.sub(r"\S+? \[::", "", utterance)
- utterance = re.sub(r"\S+? \[:", "", utterance)
-
- utterance = _remove_extra_spaces(utterance)
- # print('step 3:', utterance)
-
- # Step 4: Remove unwanted symbols
- utterance = re.sub(r"“", "", utterance)
- utterance = re.sub(r"”", "", utterance)
-
- utterance = _remove_extra_spaces(utterance)
-
- # Step 5: Split utterance by spaces and determine whether to keep items.
-
- escape_prefixes = {
- "[?",
- "[/",
- "[<",
- "[>",
- "[:",
- "[!",
- "[*",
- '+"',
- "+,",
- "<&",
- }
- escape_words = {"0", "++", "+<", "+^", "(.)", "(..)", "(...)", ":", ";"}
- keep_prefixes = {'+"/', "+,/", '+".'}
-
- if not phon:
- escape_words.update({"xxx", "yyy", "www", "xxx:", "yyy:"})
- escape_prefixes.update({"&"})
- else:
- escape_words.update({","})
- escape_prefixes.update({"0"})
-
- words = utterance.split()
- new_words = []
-
- for word in words:
- word = re.sub(r"\A<", "", word) # remove beginning <
- word = re.sub(r">\Z", "", word) # remove final >
- word = re.sub(r"\]\Z", "", word) # remove final ]
-
- not_an_escape_word = word not in escape_words
- no_escape_prefix = not any(word.startswith(e) for e in escape_prefixes)
- has_keep_prefix = any(word.startswith(k) for k in keep_prefixes)
-
- if (not_an_escape_word and no_escape_prefix) or has_keep_prefix:
- new_words.append(word)
-
- # print('step 5:', remove_extra_spaces(' '.join(new_words)))
-
- return _remove_extra_spaces(" ".join(new_words))
-
-
def _clean_word(word):
"""Clean the word.
@@ -1925,38 +1680,6 @@ Source code for pylangacq.chat
return new_word
-def _remove_extra_spaces(inputstr):
- """Remove extra spaces in *inputstr* so that there are only single spaces.
-
- Parameters
- ----------
- inputstr : str
-
- Returns
- -------
- str
- """
- while " " in inputstr:
- inputstr = inputstr.replace(" ", " ")
- return inputstr.strip()
-
-
-def _find_indices(longstr, substring):
- """Find all indices of non-overlapping ``substring`` in ``longstr``.
-
- Parameters
- ----------
- longstr : str
- substring : str
-
- Returns
- -------
- list of int
- List of indices of the long string for where substring occurs
- """
- return [m.start() for m in re.finditer(substring, longstr)]
-
-
class _HTTPSession(requests.Session):
def __init__(
self, max_retries: int = 10, backoff_factor: float = 0.1, timeout: int = 60
@@ -1991,7 +1714,7 @@ Source code for pylangacq.chat
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/_sources/data.rst.txt b/docs/_sources/data.rst.txt
index a180e5d..b22ce79 100644
--- a/docs/_sources/data.rst.txt
+++ b/docs/_sources/data.rst.txt
@@ -65,6 +65,34 @@ the CC BY-NC-SA 3.0 license.
As of March 2021, the following Cantonese-related datasets are
available from CHILDES and TalkBank (in alphabetical order):
+.. invisible-code-block: python
+
+ >>> import os
+
+.. skip: start if(os.getenv("CI") == "true", reason="certain CHILDES data pulls fail in some but not all python versions for unknown reasons")
+
+* `Child Heritage Chinese Corpus `_
+
+ .. code-block:: python
+
+ >>> url = "https://childes.talkbank.org/data/Biling/CHCC.zip"
+ >>> corpus = pycantonese.read_chat(url)
+ >>> corpus.n_files()
+ 190
+ >>> len(corpus.words())
+ 533877
+
+* `Guthrie Bilingual Corpus `_
+
+ .. code-block:: python
+
+ >>> url = "https://childes.talkbank.org/data/Biling/Guthrie.zip"
+ >>> corpus = pycantonese.read_chat(url)
+ >>> corpus.n_files()
+ 36
+ >>> len(corpus.words())
+ 70438
+
* `HKU-70 Corpus `_
.. code-block:: python
@@ -76,12 +104,6 @@ available from CHILDES and TalkBank (in alphabetical order):
>>> len(corpus.words())
178270
-.. invisible-code-block: python
-
- >>> import os
-
-.. skip: start if(os.getenv("CI") == "true", reason="certain CHILDES data pulls fail in some but not all python versions for unknown reasons")
-
* `Lee-Wong-Leung Corpus `_
.. code-block:: python
diff --git a/docs/api.html b/docs/api.html
index 1d61373..3ade092 100644
--- a/docs/api.html
+++ b/docs/api.html
@@ -1601,7 +1601,7 @@
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/changelog.html b/docs/changelog.html
index 9da0ba6..02268f4 100644
--- a/docs/changelog.html
+++ b/docs/changelog.html
@@ -641,7 +641,7 @@ [0.1] - 2014-12-17
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/data.html b/docs/data.html
index 7ed409e..0e3b2aa 100644
--- a/docs/data.html
+++ b/docs/data.html
@@ -350,6 +350,30 @@ CHILDES and TalkBank DataChild Heritage Chinese Corpus
+
+>>> url = "https://childes.talkbank.org/data/Biling/CHCC.zip"
+>>> corpus = pycantonese.read_chat(url)
+>>> corpus.n_files()
+190
+>>> len(corpus.words())
+533877
+
+
+
+
+
+
+>>> url = "https://childes.talkbank.org/data/Biling/Guthrie.zip"
+>>> corpus = pycantonese.read_chat(url)
+>>> corpus.n_files()
+36
+>>> len(corpus.words())
+70438
+
+
+
+
>>> url = "https://childes.talkbank.org/data/Chinese/Cantonese/HKU.zip"
@@ -362,8 +386,6 @@ CHILDES and TalkBank DataLee-Wong-Leung Corpus
>>> url = "https://childes.talkbank.org/data/Chinese/Cantonese/LeeWongLeung.zip"
@@ -456,7 +478,7 @@ Custom Data
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/generated/pycantonese.CHATReader.html b/docs/generated/pycantonese.CHATReader.html
index 1a4b003..1582949 100644
--- a/docs/generated/pycantonese.CHATReader.html
+++ b/docs/generated/pycantonese.CHATReader.html
@@ -593,7 +593,7 @@ pycantonese.CHATReader
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/generated/pycantonese.CHATReader.search.html b/docs/generated/pycantonese.CHATReader.search.html
index e89d5df..31d4017 100644
--- a/docs/generated/pycantonese.CHATReader.search.html
+++ b/docs/generated/pycantonese.CHATReader.search.html
@@ -400,7 +400,7 @@ pycantonese.CHATReader.search
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/generated/pycantonese.characters_to_jyutping.html b/docs/generated/pycantonese.characters_to_jyutping.html
index 552c60a..ef3fd98 100644
--- a/docs/generated/pycantonese.characters_to_jyutping.html
+++ b/docs/generated/pycantonese.characters_to_jyutping.html
@@ -363,7 +363,7 @@ pycantonese.characters_to_jyutping
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/generated/pycantonese.hkcancor.html b/docs/generated/pycantonese.hkcancor.html
index 9992063..66e3640 100644
--- a/docs/generated/pycantonese.hkcancor.html
+++ b/docs/generated/pycantonese.hkcancor.html
@@ -337,7 +337,7 @@ pycantonese.hkcancor
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/generated/pycantonese.jyutping_to_tipa.html b/docs/generated/pycantonese.jyutping_to_tipa.html
index 826f446..d991c11 100644
--- a/docs/generated/pycantonese.jyutping_to_tipa.html
+++ b/docs/generated/pycantonese.jyutping_to_tipa.html
@@ -358,7 +358,7 @@ pycantonese.jyutping_to_tipa
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/generated/pycantonese.jyutping_to_yale.html b/docs/generated/pycantonese.jyutping_to_yale.html
index 070a363..86d13f8 100644
--- a/docs/generated/pycantonese.jyutping_to_yale.html
+++ b/docs/generated/pycantonese.jyutping_to_yale.html
@@ -374,7 +374,7 @@ pycantonese.jyutping_to_yale
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/generated/pycantonese.parse_jyutping.html b/docs/generated/pycantonese.parse_jyutping.html
index 0ff328a..32b9944 100644
--- a/docs/generated/pycantonese.parse_jyutping.html
+++ b/docs/generated/pycantonese.parse_jyutping.html
@@ -357,7 +357,7 @@ pycantonese.parse_jyutping
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/generated/pycantonese.pos_tag.html b/docs/generated/pycantonese.pos_tag.html
index 0cb6fb5..19b3e99 100644
--- a/docs/generated/pycantonese.pos_tag.html
+++ b/docs/generated/pycantonese.pos_tag.html
@@ -382,7 +382,7 @@ pycantonese.pos_tag
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/generated/pycantonese.pos_tagging.hkcancor_to_ud.html b/docs/generated/pycantonese.pos_tagging.hkcancor_to_ud.html
index 329753e..d11c80c 100644
--- a/docs/generated/pycantonese.pos_tagging.hkcancor_to_ud.html
+++ b/docs/generated/pycantonese.pos_tagging.hkcancor_to_ud.html
@@ -363,7 +363,7 @@ pycantonese.pos_tagging.hkcancor_to_ud
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/generated/pycantonese.read_chat.html b/docs/generated/pycantonese.read_chat.html
index 9e46ef2..87e5f98 100644
--- a/docs/generated/pycantonese.read_chat.html
+++ b/docs/generated/pycantonese.read_chat.html
@@ -366,7 +366,7 @@ pycantonese.read_chat
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/generated/pycantonese.segment.html b/docs/generated/pycantonese.segment.html
index 2c2af0b..7727813 100644
--- a/docs/generated/pycantonese.segment.html
+++ b/docs/generated/pycantonese.segment.html
@@ -370,7 +370,7 @@ pycantonese.segment
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/generated/pycantonese.stop_words.html b/docs/generated/pycantonese.stop_words.html
index 54c4400..ca1e2bb 100644
--- a/docs/generated/pycantonese.stop_words.html
+++ b/docs/generated/pycantonese.stop_words.html
@@ -364,7 +364,7 @@ pycantonese.stop_words
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021
+ © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021 + © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/genindex.html b/docs/genindex.html index 2dafb2e..6c87a91 100644 --- a/docs/genindex.html +++ b/docs/genindex.html @@ -570,7 +570,7 @@- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021 + © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021 + © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/jyutping.html b/docs/jyutping.html index 03a6ef3..1680b6f 100644 --- a/docs/jyutping.html +++ b/docs/jyutping.html @@ -471,7 +471,7 @@- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021 + © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/papers.html b/docs/papers.html index 2765416..adc0675 100644 --- a/docs/papers.html +++ b/docs/papers.html @@ -329,7 +329,7 @@- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021 + © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021 + © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021 + © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/docs/search.html b/docs/search.html index f45ddde..1618b22 100644 --- a/docs/search.html +++ b/docs/search.html @@ -321,7 +321,7 @@- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021 + © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021 + © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021 + © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
- © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 21, 2021 + © Copyright 2014-2021, Jackson L. Lee | Documentation last updated on March 23, 2021
diff --git a/setup.py b/setup.py index 2877dc6..cf4e385 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ def main(): setup_requires="setuptools>=39", install_requires=[ "dataclasses ; python_version < '3.7'", - "pylangacq==0.13.0", + "pylangacq==0.13.1", "wordseg==0.0.2", ], package_data={