From 0960547fdc7e3c90a04bfd651646672f30df16ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Thu, 31 Oct 2024 17:22:21 +0100 Subject: [PATCH] feat: wait for meilisearch index creation to succeed In `search.meilisearch.create_indexes`, we were not waiting for the index creation tasks to complete. This was causing a potential race condition, where the `create_indexes` function would fail because it took a few seconds for the index creation to succeed. See the relevant conversation here: https://github.com/openedx/edx-platform/pull/35743#issuecomment-2450115310 --- edxsearch/__init__.py | 2 +- search/meilisearch.py | 95 +++++++++++++++++++++++--------- search/tests/test_meilisearch.py | 85 ++++++++++++++++++++-------- 3 files changed, 130 insertions(+), 52 deletions(-) diff --git a/edxsearch/__init__.py b/edxsearch/__init__.py index d00a3e59..71727cf1 100644 --- a/edxsearch/__init__.py +++ b/edxsearch/__init__.py @@ -1,3 +1,3 @@ """ Container module for testing / demoing search """ -__version__ = '4.1.0' +__version__ = '4.1.1' diff --git a/search/meilisearch.py b/search/meilisearch.py index 75636526..9e09e426 100644 --- a/search/meilisearch.py +++ b/search/meilisearch.py @@ -115,9 +115,20 @@ class MeilisearchEngine(SearchEngine): compliant with edx-search's ElasticSearchEngine. """ - def __init__(self, index=None): + def __init__(self, index=None) -> None: super().__init__(index=index) - self.meilisearch_index = get_meilisearch_index(self.index_name) + self._meilisearch_index: t.Optional[meilisearch.index.Index] = None + + @property + def meilisearch_index(self) -> meilisearch.index.Index: + """ + Lazy load meilisearch index. + """ + if self._meilisearch_index is None: + meilisearch_index_name = get_meilisearch_index_name(self.index_name) + meilisearch_client = get_meilisearch_client() + self._meilisearch_index = meilisearch_client.index(meilisearch_index_name) + return self._meilisearch_index @property def meilisearch_index_name(self): @@ -211,7 +222,7 @@ def print_failed_meilisearch_tasks(count: int = 10): print(result) -def create_indexes(index_filterables: dict[str, list[str]] = None): +def create_indexes(index_filterables: t.Optional[dict[str, list[str]]] = None): """ This is an initialization function that creates indexes and makes sure that they support the right facetting. @@ -225,38 +236,68 @@ def create_indexes(index_filterables: dict[str, list[str]] = None): client = get_meilisearch_client() for index_name, filterables in index_filterables.items(): meilisearch_index_name = get_meilisearch_index_name(index_name) - try: - index = client.get_index(meilisearch_index_name) - except meilisearch.errors.MeilisearchApiError as e: - if e.code != "index_not_found": - raise - client.create_index( - meilisearch_index_name, {"primaryKey": PRIMARY_KEY_FIELD_NAME} - ) - # Get the index again - index = client.get_index(meilisearch_index_name) + index = get_or_create_meilisearch_index(client, meilisearch_index_name) + update_index_filterables(client, index, filterables) - # Update filterables if there are some new elements - if filterables: - existing_filterables = set(index.get_filterable_attributes()) - if not set(filterables).issubset(existing_filterables): - all_filterables = list(existing_filterables.union(filterables)) - index.update_filterable_attributes(all_filterables) +def get_or_create_meilisearch_index( + client: meilisearch.Client, index_name: str +) -> meilisearch.index.Index: + """ + Get an index. If it does not exist, create it. -def get_meilisearch_index(index_name: str): + This will fail with a RuntimeError if we fail to create the index. It will fail with + a MeilisearchApiError in other failure cases. """ - Return a meilisearch index. + try: + return client.get_index(index_name) + except meilisearch.errors.MeilisearchApiError as e: + if e.code != "index_not_found": + raise + task_info = client.create_index( + index_name, {"primaryKey": PRIMARY_KEY_FIELD_NAME} + ) + wait_for_task_to_succeed(client, task_info) + # Get the index again + return client.get_index(index_name) - Note that the index may not exist, and it will be created on first insertion. - ideally, the initialisation function `create_indexes` should be run first. + +def update_index_filterables( + client: meilisearch.Client, index: meilisearch.index.Index, filterables: list[str] +) -> None: """ - meilisearch_client = get_meilisearch_client() - meilisearch_index_name = get_meilisearch_index_name(index_name) - return meilisearch_client.index(meilisearch_index_name) + Make sure that the filterable fields of an index include the given list of fields. + + If existing fields are present, they are preserved. + """ + if not filterables: + return + existing_filterables = set(index.get_filterable_attributes()) + if set(filterables).issubset(existing_filterables): + # all filterables fields are already present + return + all_filterables = list(existing_filterables.union(filterables)) + task_info = index.update_filterable_attributes(all_filterables) + wait_for_task_to_succeed(client, task_info) + + +def wait_for_task_to_succeed( + client: meilisearch.Client, + task_info: meilisearch.task.TaskInfo, + timeout_in_ms: int = 5000, +) -> None: + """ + Wait for a Meilisearch task to succeed. If it does not, raise RuntimeError. + """ + task = client.wait_for_task(task_info.task_uid, timeout_in_ms=timeout_in_ms) + if task.status != "succeeded": + raise RuntimeError(f"Failed meilisearch task: {task}") def get_meilisearch_client(): + """ + Return a Meilisearch client with the right settings. + """ return meilisearch.Client(MEILISEARCH_URL, api_key=MEILISEARCH_API_KEY) @@ -332,7 +373,7 @@ def get_search_params( Return a dictionary of parameters that should be passed to the Meilisearch client `.search()` method. """ - params = {"showRankingScore": True} + params: dict[str, t.Any] = {"showRankingScore": True} # Aggregation if aggregation_terms: diff --git a/search/tests/test_meilisearch.py b/search/tests/test_meilisearch.py index dc0e9f77..0ad84405 100644 --- a/search/tests/test_meilisearch.py +++ b/search/tests/test_meilisearch.py @@ -3,11 +3,13 @@ """ from datetime import datetime -from unittest.mock import Mock +from unittest.mock import Mock, patch import django.test from django.utils import timezone +import meilisearch import pytest +from requests import Response from search.utils import DateRange, ValueRange import search.meilisearch @@ -294,20 +296,22 @@ def test_engine_index(self): def test_engine_search(self): engine = search.meilisearch.MeilisearchEngine(index="my_index") - engine.meilisearch_index.search = Mock(return_value={ - "hits": [ - { - "pk": "f381d4f1914235c9532576c0861d09b484ade634", - "id": "course-v1:OpenedX+DemoX+DemoCourse", - "_rankingScore": 0.865, - }, - ], - "query": "demo", - "processingTimeMs": 0, - "limit": 20, - "offset": 0, - "estimatedTotalHits": 1 - }) + engine.meilisearch_index.search = Mock( + return_value={ + "hits": [ + { + "pk": "f381d4f1914235c9532576c0861d09b484ade634", + "id": "course-v1:OpenedX+DemoX+DemoCourse", + "_rankingScore": 0.865, + }, + ], + "query": "demo", + "processingTimeMs": 0, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + } + ) results = engine.search( query_string="abc", @@ -321,15 +325,19 @@ def test_engine_search(self): log_search_params=True, ) - engine.meilisearch_index.search.assert_called_with("abc", { - "showRankingScore": True, - "facets": ["org", "course"], - "filter": [ - 'course = "course-v1:testorg+test1+alpha"', - 'org = "testorg"', 'key = "value" OR key NOT EXISTS', - 'NOT _pk = "81fe8bfe87576c3ecb22426f8e57847382917acf"', - ] - }) + engine.meilisearch_index.search.assert_called_with( + "abc", + { + "showRankingScore": True, + "facets": ["org", "course"], + "filter": [ + 'course = "course-v1:testorg+test1+alpha"', + 'org = "testorg"', + 'key = "value" OR key NOT EXISTS', + 'NOT _pk = "81fe8bfe87576c3ecb22426f8e57847382917acf"', + ], + }, + ) assert results == { "aggs": {}, "max_score": 0.865, @@ -357,3 +365,32 @@ def test_engine_remove(self): doc_pk = "81fe8bfe87576c3ecb22426f8e57847382917acf" engine.remove(doc_ids=[doc_id]) engine.meilisearch_index.delete_documents.assert_called_with([doc_pk]) + + +class UtilitiesTests(django.test.TestCase): + """ + Tests associated to the utility functions of the meilisearch engine. + """ + + @patch.object(search.meilisearch, "wait_for_task_to_succeed") + def test_create_index(self, mock_wait_for_task_to_succeed) -> None: + class GetIndexMock: + number_of_calls = 0 + + def get_index(self, index_name): + """Mocked client.get_index method""" + self.number_of_calls += 1 + if self.number_of_calls == 1: + error = meilisearch.errors.MeilisearchApiError("", Response()) + error.code = "index_not_found" + raise error + if self.number_of_calls == 2: + return f"index created: {index_name}" + # We shouldn't be there + assert False + + client = Mock() + client.get_index = Mock(side_effect=GetIndexMock().get_index) + result = search.meilisearch.get_or_create_meilisearch_index(client, "my_index") + assert result == "index created: my_index" + mock_wait_for_task_to_succeed.assert_called_once()