From 15edbf4c130b717894994b4ce9d119746360e129 Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Sun, 27 Jan 2019 19:47:34 -0500 Subject: [PATCH 1/9] spelling: Remembrances (translation??) --- test_topics.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_topics.json b/test_topics.json index f0657a3..d17f765 100644 --- a/test_topics.json +++ b/test_topics.json @@ -695,7 +695,7 @@ 0.7636483311653137 ], [ - "Siddur Sefard, Additional Prayers , Six Rememberances 10", + "Siddur Sefard, Additional Prayers , Six Remembrances 10", 0.7589909434318542 ], [ From 90f88b471a9c9feddab8028cd49edc2a9ad5d636 Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Sun, 27 Jan 2019 19:48:00 -0500 Subject: [PATCH 2/9] spelling: Scriptures (translation??) --- high_conf_links.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/high_conf_links.json b/high_conf_links.json index ac3fb22..1ede433 100644 --- a/high_conf_links.json +++ b/high_conf_links.json @@ -563561,7 +563561,7 @@ ], [ "Psalms 99:6", - "Divrei Emet, Divrei Emet on Torah, A Collection on Sciptures 5", + "Divrei Emet, Divrei Emet on Torah, A Collection on Scriptures 5", 102.61833333333334 ], [ From 5ee3620c240e4305569f00dddc0c9804f8d3c5e3 Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Sun, 27 Jan 2019 19:38:13 -0500 Subject: [PATCH 3/9] spelling: appropriate --- Hebrew Spellcheck.ipynb | 2 +- hebrew_spellcheck.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Hebrew Spellcheck.ipynb b/Hebrew Spellcheck.ipynb index 49e068c..082b3c4 100644 --- a/Hebrew Spellcheck.ipynb +++ b/Hebrew Spellcheck.ipynb @@ -68,7 +68,7 @@ "\n", "def determine_beginning_stopword(beginning_stopword):\n", " \"\"\"\n", - " Appends the approproate beginning stopword. There are three possibilites, \n", + " Appends the appropriate beginning stopword. There are three possibilites, \n", " therefore the correct option needs to be determine\n", " :param beginning_stopword: The modified word\n", " :return: Appropriate beginning stopword\n", diff --git a/hebrew_spellcheck.py b/hebrew_spellcheck.py index 42c709c..525e7cf 100644 --- a/hebrew_spellcheck.py +++ b/hebrew_spellcheck.py @@ -47,7 +47,7 @@ def we_havent_added_prefix_or_suffix(word): def determine_beginning_stopword(beginning_stopword): """ - Appends the approproate beginning stopword. There are three possibilites, + Appends the appropriate beginning stopword. There are three possibilites, therefore the correct option needs to be determine :param beginning_stopword: The modified word :return: Appropriate beginning stopword From b119f0f76714756fb6082c2a43d074009fef7632 Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Sun, 27 Jan 2019 19:41:46 -0500 Subject: [PATCH 4/9] spelling: forbidden (translation??) --- high_conf_links.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/high_conf_links.json b/high_conf_links.json index 1ede433..5fc6493 100644 --- a/high_conf_links.json +++ b/high_conf_links.json @@ -55691,7 +55691,7 @@ ], [ "Daniel 7:9", - "Likutei Halachot, Yoreh Deah, Laws of Fobidden Fabric Blends 3:2:1", + "Likutei Halachot, Yoreh Deah, Laws of Forbidden Fabric Blends 3:2:1", 146.76882352941178 ], [ @@ -279761,7 +279761,7 @@ ], [ "Chagigah 13a:9", - "Likutei Halachot, Yoreh Deah, Laws of Fobidden Fabric Blends 4:3:1", + "Likutei Halachot, Yoreh Deah, Laws of Forbidden Fabric Blends 4:3:1", 163.4555 ], [ @@ -389716,7 +389716,7 @@ ], [ "Berakhot 6a:5", - "Likutei Halachot, Yoreh Deah, Laws of Fobidden Fabric Blends 2:2:1", + "Likutei Halachot, Yoreh Deah, Laws of Forbidden Fabric Blends 2:2:1", 83.0 ], [ @@ -391401,7 +391401,7 @@ ], [ "Proverbs 25:2", - "Likutei Halachot, Yoreh Deah, Laws of Fobidden Fabric Blends 3:5:1", + "Likutei Halachot, Yoreh Deah, Laws of Forbidden Fabric Blends 3:5:1", 66.27947368421053 ], [ From 80f25cd448c6f0672c2c1f9a15179505e2b5f86a Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Sun, 27 Jan 2019 19:45:51 -0500 Subject: [PATCH 5/9] spelling: occurrences --- RAKE Hebrew.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RAKE Hebrew.ipynb b/RAKE Hebrew.ipynb index a5c3922..06c4424 100644 --- a/RAKE Hebrew.ipynb +++ b/RAKE Hebrew.ipynb @@ -1078,7 +1078,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# This just gets a list of every word and the number of occurences within the entire corpus. Used to help build a stopword list" + "# This just gets a list of every word and the number of occurrences within the entire corpus. Used to help build a stopword list" ] }, { From 11b63e05537417a6b4a2f90b462e57c730d49003 Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Sun, 27 Jan 2019 19:46:40 -0500 Subject: [PATCH 6/9] spelling: possibilities --- Hebrew Spellcheck.ipynb | 2 +- hebrew_spellcheck.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Hebrew Spellcheck.ipynb b/Hebrew Spellcheck.ipynb index 082b3c4..c89e2ca 100644 --- a/Hebrew Spellcheck.ipynb +++ b/Hebrew Spellcheck.ipynb @@ -68,7 +68,7 @@ "\n", "def determine_beginning_stopword(beginning_stopword):\n", " \"\"\"\n", - " Appends the appropriate beginning stopword. There are three possibilites, \n", + " Appends the appropriate beginning stopword. There are three possibilities, \n", " therefore the correct option needs to be determine\n", " :param beginning_stopword: The modified word\n", " :return: Appropriate beginning stopword\n", diff --git a/hebrew_spellcheck.py b/hebrew_spellcheck.py index 525e7cf..a50cdcb 100644 --- a/hebrew_spellcheck.py +++ b/hebrew_spellcheck.py @@ -47,7 +47,7 @@ def we_havent_added_prefix_or_suffix(word): def determine_beginning_stopword(beginning_stopword): """ - Appends the appropriate beginning stopword. There are three possibilites, + Appends the appropriate beginning stopword. There are three possibilities, therefore the correct option needs to be determine :param beginning_stopword: The modified word :return: Appropriate beginning stopword From 6734fcf1195830e1e72fb6d1396e250fe0731f8c Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Sun, 27 Jan 2019 19:46:51 -0500 Subject: [PATCH 7/9] spelling: punctuation --- Doc2Vec.ipynb | 2 +- create_docs_for_doc2vec.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc2Vec.ipynb b/Doc2Vec.ipynb index 4b0160b..7dfb530 100644 --- a/Doc2Vec.ipynb +++ b/Doc2Vec.ipynb @@ -222,7 +222,7 @@ "\n", "def remove_punctuation(data):\n", " \"\"\"\n", - " Removes various punctation from Hebrew text.\n", + " Removes various punctuation from Hebrew text.\n", " :param data: String of Hebrew Text\n", " :return: String without punctuation\n", " \"\"\"\n", diff --git a/create_docs_for_doc2vec.py b/create_docs_for_doc2vec.py index 93d9fb8..f1608b5 100644 --- a/create_docs_for_doc2vec.py +++ b/create_docs_for_doc2vec.py @@ -168,7 +168,7 @@ def remove_dicta_prefix(string, marker): def remove_punctuation(data): """ - Removes various punctation from Hebrew text. + Removes various punctuation from Hebrew text. :param data: String of Hebrew Text :return: String without punctuation """ From 9fb49f67fbb531f4162538d63314bc52f75b84ca Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Sun, 27 Jan 2019 19:48:42 -0500 Subject: [PATCH 8/9] spelling: sefaria --- Doc2Vec.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc2Vec.ipynb b/Doc2Vec.ipynb index 7dfb530..7a3348b 100644 --- a/Doc2Vec.ipynb +++ b/Doc2Vec.ipynb @@ -281,7 +281,7 @@ "\n", "def get_segments(filename):\n", " \"\"\"\n", - " Combs through the entire Sefaris Hebrew Library and cleans the text for Doc2Vec.\n", + " Combs through the entire Sefaria Hebrew Library and cleans the text for Doc2Vec.\n", " Creates a dict:\n", " Key: Ref\n", " Value: The text of that ref cleaned and ready for Doc2Vec\n", From 806bcf9fed445cef69999076cbedf7ec9efbf27b Mon Sep 17 00:00:00 2001 From: Josh Soref Date: Sun, 27 Jan 2019 19:49:31 -0500 Subject: [PATCH 9/9] spelling: semantical --- create_docs_for_doc2vec.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/create_docs_for_doc2vec.py b/create_docs_for_doc2vec.py index f1608b5..7ab9497 100644 --- a/create_docs_for_doc2vec.py +++ b/create_docs_for_doc2vec.py @@ -236,9 +236,9 @@ def extract_reference(data): return data.split(u'~')[0] -def concatenate_sematically_linked_segments(topic_ranged_refs, ranged_to_segment): +def concatenate_semantically_linked_segments(topic_ranged_refs, ranged_to_segment): """ - Combines multiple Sefaria Segments into one larger segment based on sematical meaning + Combines multiple Sefaria Segments into one larger segment based on semantical meaning :param topic_ranged_refs: List of ranged trefs that define the semantic separation :param ranged_to_segment: Nested dict. First layer points from Ranged Refs to all sub-seg-refs. The nested dict points from the sub_seg_ref to the text of said sub_seg_ref :return: Dict containing semantically define ranged refs corresponding to their concatenated text @@ -294,8 +294,8 @@ def get_segments(filename): else: all_data[ref] = data - all_data.update(concatenate_sematically_linked_segments(tanakh_topic_ranged_refs, tanakh_ranged_to_segment)) - all_data.update(concatenate_sematically_linked_segments(talmud_topic_ranged_refs, talmud_ranged_to_segment)) + all_data.update(concatenate_semantically_linked_segments(tanakh_topic_ranged_refs, tanakh_ranged_to_segment)) + all_data.update(concatenate_semantically_linked_segments(talmud_topic_ranged_refs, talmud_ranged_to_segment)) return all_data