From 426762086eb3a8d6ccc79bf5515cd807221f3a90 Mon Sep 17 00:00:00 2001 From: jamesrichards Date: Tue, 25 Jun 2024 09:53:54 +0000 Subject: [PATCH 1/3] Fixing poetry lock --- poetry.lock | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 186c3c0ad..a21b27e5d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5265,6 +5265,7 @@ description = "Nvidia JIT LTO Library" optional = false python-versions = ">=3" files = [ + {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_aarch64.whl", hash = "sha256:004186d5ea6a57758fd6d57052a123c73a4815adf365eb8dd6a85c9eaa7535ff"}, {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d9714f27c1d0f0895cd8915c07a87a1d0029a0aa36acaf9156952ec2a8a12189"}, {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-win_amd64.whl", hash = "sha256:c3401dc8543b52d3a8158007a0c1ab4e9c768fcbd24153a48c86972102197ddd"}, ] @@ -10218,4 +10219,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "763770ffacae7e6e577a3d3f48ad37e347c689a016c0168f013a53351fef40ce" +content-hash = "280917a2721b3b3a10cb845896bf1416c21f67499184e484949e6725a9d9d47e" From 901aa153d5eb84d61f3e500005efc96ea5c5ae8d Mon Sep 17 00:00:00 2001 From: jamesrichards Date: Tue, 25 Jun 2024 11:27:25 +0000 Subject: [PATCH 2/3] Creating chunk index in worker to avoid race condition with multiple workers --- worker/src/app.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/worker/src/app.py b/worker/src/app.py index aa93409d0..729d485fc 100755 --- a/worker/src/app.py +++ b/worker/src/app.py @@ -34,6 +34,7 @@ @asynccontextmanager async def lifespan(context: ContextRepo): + es_index_name = f"{env.elastic_root_index}-chunk" es = env.elasticsearch_client() s3_client = env.s3_client() # embeddings = AzureOpenAIEmbeddings( @@ -46,13 +47,13 @@ async def lifespan(context: ContextRepo): # ) embeddings = SentenceTransformerEmbeddings(model_name=env.embedding_model) elasticsearch_store = ElasticsearchStore( - index_name=f"{env.elastic_root_index}-chunk", + index_name=es_index_name, embedding=embeddings, es_connection=es, query_field="text", - vector_query_field=env.embedding_document_field_name, + vector_query_field=env.embedding_document_field_name ) - + es.indices.create(index=es_index_name) context.set_global("vectorstore", elasticsearch_store) context.set_global("s3_client", s3_client) yield From 409e9483717f0033ae418af1cad13bc14a49196f Mon Sep 17 00:00:00 2001 From: jamesrichards Date: Tue, 25 Jun 2024 12:18:28 +0000 Subject: [PATCH 3/3] Ignoring already existing index in worker startup --- worker/src/app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/worker/src/app.py b/worker/src/app.py index 729d485fc..d84daed47 100755 --- a/worker/src/app.py +++ b/worker/src/app.py @@ -53,7 +53,8 @@ async def lifespan(context: ContextRepo): query_field="text", vector_query_field=env.embedding_document_field_name ) - es.indices.create(index=es_index_name) + + es.indices.create(index=es_index_name, ignore=[400]) context.set_global("vectorstore", elasticsearch_store) context.set_global("s3_client", s3_client) yield