diff --git a/build/ci/production-values.yaml b/build/ci/production-values.yaml index e77469a237..61b2315f87 100644 --- a/build/ci/production-values.yaml +++ b/build/ci/production-values.yaml @@ -151,7 +151,7 @@ cronJobs: reindexElasticSearch: enabled: true SEARCH_HOST_ES6: "contextus-es-default-0.elasticsearch.svc" - SEARCH_HOST_ES8: "elasticsearch-8-es-default-0.elasticsearch.svc" + SEARCH_HOST_ES8: "elasticsearch-8-es-default.elasticsearch.svc" topicsIndexing: enabled: true trello: diff --git a/sefaria/model/schema.py b/sefaria/model/schema.py index 82ad383ca3..928b549420 100644 --- a/sefaria/model/schema.py +++ b/sefaria/model/schema.py @@ -243,7 +243,7 @@ def remove_shared_term(self, term): class Term(abst.AbstractMongoRecord, AbstractTitledObject): """ A Term is a shared title node. It can be referenced and used by many different Index nodes. - Examples: Noah, Perek HaChovel, Even HaEzer + Examples: Noah, HaChovel Terms that use the same TermScheme can be ordered. """ collection = 'term' @@ -588,7 +588,7 @@ def traverse_tree(self, callback, **kwargs): """ callback(self, **kwargs) for child in self.children: - child.traverse_to_string(callback, **kwargs) + child.traverse_tree(callback, **kwargs) def traverse_to_string(self, callback, depth=0, **kwargs): st = callback(self, depth, **kwargs) diff --git a/sefaria/model/text.py b/sefaria/model/text.py index 0de7c248d2..cf25f95c6f 100644 --- a/sefaria/model/text.py +++ b/sefaria/model/text.py @@ -997,7 +997,7 @@ def sub_content_with_ref(self, ref=None, value=None): def sub_content(self, key_list=None, indx_list=None, value=None): """ - Get's or sets values deep within the content of this version. + Gets or sets values deep within the content of this version. This returns the result by reference, NOT by value. http://stackoverflow.com/questions/27339165/slice-nested-list-at-variable-depth :param key_list: The node keys to traverse to get to the content node @@ -3946,8 +3946,8 @@ def all_context_refs(self, include_self = True, include_book = False): def context_ref(self, level=1): """ - :return: :class:`Ref` that is more general than this :class:`Ref`. :param level: how many levels to 'zoom out' from the most specific possible :class:`Ref` + :return: :class:`Ref` that is more general than this :class:`Ref`. :: @@ -5034,7 +5034,7 @@ def rebuild_toc(self, skip_toc_tree=False): While building these ToC data structures, this function also builds the equivalent JSON structures as an API optimization. - @param: skip_toc_tree boolean + :param skip_toc_tree: Boolean """ if not skip_toc_tree: self._toc_tree = self.get_toc_tree(rebuild=True) @@ -5128,7 +5128,7 @@ def get_topic_toc(self, rebuild=False): def get_topic_toc_json(self, rebuild=False): """ Returns JSON representation of Topics ToC. - @param: rebuild boolean + :param rebuild: Boolean """ if rebuild or not self._topic_toc_json: if not rebuild: @@ -5142,9 +5142,9 @@ def get_topic_toc_json(self, rebuild=False): def get_topic_toc_json_recursive(self, topic=None, explored=None, with_descriptions=False): """ Returns JSON representation of Topics ToC - @param: topic Topic - @param: explored Set - @param: with_descriptions boolean + :param topic: Topic + :param explored: Set + :param with_descriptions: Boolean """ from .topic import Topic, TopicSet, IntraTopicLinkSet explored = explored or set() @@ -5211,7 +5211,7 @@ def build_topic_toc_category_mapping(self) -> dict: def get_topic_toc_category_mapping(self, rebuild=False) -> dict: """ Returns the category mapping as a dictionary for the topics ToC. Loads on Library startup. - @param: rebuild boolean + :param rebuild: Boolean """ if rebuild or not self._topic_toc_category_mapping: if not rebuild: @@ -5264,7 +5264,7 @@ def root_title_sorter(t): def get_topic_link_type(self, link_type): """ Returns a TopicLinkType with a slug of link_type (parameter) if not already present - @param: link_type String + :param link_type: String """ from .topic import TopicLinkTypeSet if not self._topic_link_types: @@ -5277,7 +5277,7 @@ def get_topic_link_type(self, link_type): def get_topic_data_source(self, data_source): """ Returns a TopicDataSource with the data_source (parameter) slug if not already present - @param: data_source String + :param data_source: String """ from .topic import TopicDataSourceSet if not self._topic_data_sources: @@ -5381,7 +5381,7 @@ def lexicon_auto_completer(self, lexicon): is not present, it assumes the need to rebuild the lexicon_auto_completer and calls the build function with appropriate logger warnings before returning the desired result - @param: lexicon String + :param lexicon: String """ try: return self._lexicon_auto_completer[lexicon] @@ -5641,6 +5641,12 @@ def get_term_dict(self, lang="en"): return term_dict def build_term_mappings(self): + """ + Build simple and full term mappings + A full term mapping has the term name as the key, and the term as the value. + A simple term mapping has the term name as the key, and a dictionary containing the English and Hebrew + primary titles for the terms as the value. + """ self._simple_term_mapping = {} self._full_term_mapping = {} for term in TermSet(): @@ -5676,6 +5682,11 @@ def get_simple_term_mapping_json(self, rebuild=False): return self._simple_term_mapping_json def get_term(self, term_name): + """ + Returns the full term, if mapping not present, builds the full term mapping. + :param term_name: String + :returns: full Term (Mongo Record) + """ if not self._full_term_mapping: self.build_term_mappings() return self._full_term_mapping.get(term_name) if term_name in self._full_term_mapping else Term().load({"name": term_name}) @@ -5683,15 +5694,34 @@ def get_term(self, term_name): def get_topic(self, slug): + """ + Returns a dictionary containing the keys "en" and "he". + The "en" field has a value of the topic's English primary title, and the "he" field has a + value of the topic's Hebrew primary title. + :param slug: String + :returns: topic map for the given slug Dictionary + """ return self._topic_mapping[slug] def get_topic_mapping(self, rebuild=False): + """ + Returns the topic mapping if it exists, if not rebuilds it and returns + :param rebuild: Boolean (optional, default set to False) + """ tm = self._topic_mapping if not tm or rebuild: tm = self._build_topic_mapping() return tm def _build_topic_mapping(self): + """ + Builds the topic mapping. The topic mapping is a dictionary with keys, where each key + is a slug of a topic. + That key contains the value of another dictionary, with the keys "en" and "he". + The "en" field has a value of the topic's English primary title, and the "he" field has a + value of the topic's Hebrew primary title. + :returns: topic map for the given slug Dictionary + """ from .topic import Topic, TopicSet self._topic_mapping = {t.slug: {"en": t.get_primary_title("en"), "he": t.get_primary_title("he")} for t in TopicSet()} return self._topic_mapping @@ -5736,6 +5766,9 @@ def get_index_forest(self): return root_nodes def all_index_records(self): + """ + Returns an array of all index records + """ return [self._index_map[k] for k in list(self._index_title_maps["en"].keys())] def get_title_node_dict(self, lang="en"): @@ -5776,9 +5809,9 @@ def citing_title_list(self, lang="en"): def full_title_list(self, lang="en", with_terms=False): """ - :return: list of strings of all possible titles :param lang: "he" or "en" :param with_terms: if True, includes shared titles ('terms') + :return: list of strings of all possible titles """ key = lang key += "_terms" if with_terms else "" @@ -5802,6 +5835,11 @@ def build_text_titles_json(self, lang="en"): return title_list def get_text_titles_json(self, lang="en", rebuild=False): + """ + Returns the json text title list + :param lang: String (optional, default set to 'en') + :param rebuild: Boolean (optional, default set to False) + """ if rebuild or not self._full_title_list_jsons.get(lang): if not rebuild: self._full_title_list_jsons[lang] = scache.get_shared_cache_elem('books_'+lang+'_json') @@ -5815,6 +5853,9 @@ def get_text_titles_json(self, lang="en", rebuild=False): return self._full_title_list_jsons[lang] def reset_text_titles_cache(self): + """ + Resets the text titles for all languages by clearing the existing titles from the cache. + """ for lang in self.langs: scache.delete_shared_cache_elem('books_' + lang) scache.delete_shared_cache_elem('books_' + lang + '_json') @@ -6060,6 +6101,20 @@ def get_multi_title_regex_string(self, titles, lang, for_js=False, anchored=Fals # do we want to move this to the schema node? We'd still have to pass the title... def get_regex_string(self, title, lang, for_js=False, anchored=False, capture_title=False, parentheses=False): + """ + Given a book title, this function returns a regex for a Ref. + This works for references not in Sefaria format (i.e. "See Genesis 2 3" as opposed to "Genesis 2:3", + as well as for references in Sefaria format. + If the language is 'en', it calls the full_regex() function which returns the regex, whereas for 'he' we + limit the regex creation to content inside parenthesis to limit false positives (i.e. the phrase שבת לא תעשה + could be caught by mistake as Shabbat 31) + :param title: String + :param lang: 'en' or 'he' + :param for_js: Boolean (default set to False, optional) + :param anchored: Boolean (default set to False, optional) + :param capture_title: Boolean (default set to False, optional) + :param parentheses: Boolean (default set to False, optional) + """ node = self.get_schema_node(title, lang) assert isinstance(node, JaggedArrayNode) # Assumes that node is a JaggedArrayNode @@ -6212,6 +6267,12 @@ def repl(match): return re.sub(fr"{dummy_char}+", repl, dummy_text) def category_id_dict(self, toc=None, cat_head="", code_head=""): + """Returns a dict of unique category ids based on the ToC, with the + values being the category IDs. + :param toc: ToC object (optional, default is None) + :param cat_head: String, (optional, default is "" - an empty string) + :param code_head: String, (optional, default is "" - an empty string) + """ if toc is None: if not self._category_id_dict: self._category_id_dict = self.category_id_dict(self.get_toc()) @@ -6235,6 +6296,12 @@ def category_id_dict(self, toc=None, cat_head="", code_head=""): return d def simplify_toc(self, lang=None, toc_node=None, path=None): + """ + Simplifies the table of contents (ToC) + :param lang: 'en' or 'he', default is None (optional) + :param toc_node: ToC Node, default is None (optional) + :param path: Node Path, default is None (optional) + """ is_root = toc_node is None and path is None toc_node = toc_node if toc_node else self.get_toc() path = path if path else [] diff --git a/sefaria/model/topic.py b/sefaria/model/topic.py index 493b0d42fb..fdc833c4d1 100644 --- a/sefaria/model/topic.py +++ b/sefaria/model/topic.py @@ -279,6 +279,9 @@ def set_slug(self, new_slug) -> None: def merge(self, other: Union['Topic', str]) -> None: """ + Merge `other` into `self`. This means that all data from `other` will be merged into self. + Data from self takes precedence in the event of conflict. + Links to `other` will be changed to point to `self` and `other` will be deleted. :param other: Topic or old slug to migrate from :return: None """ diff --git a/sefaria/tracker.py b/sefaria/tracker.py index 0caf7ba5b4..7e68968ea7 100644 --- a/sefaria/tracker.py +++ b/sefaria/tracker.py @@ -46,7 +46,7 @@ def modify_bulk_text(user: int, version: model.Version, text_map: dict, vsource= """ user: user ID of user making modification version: version object of text being modified - text_map: dict with segment ref keys and text values. Each key/value pair represents a segment that should be modified. Segments that don't have changes will be ignored. + text_map: dict with segment ref keys and text values. Each key/value pair represents a segment that should be modified. Segments that don't have changes will be ignored. The key should be the tref, and the value the text, ex: {'Mishnah Berakhot 1:1': 'Text of the Mishnah goes here'} vsource: optional parameter to set the version source of the version. not sure why this is here. I copied it from modify_text. """ def populate_change_map(old_text, en_tref, he_tref, _):