From 426762086eb3a8d6ccc79bf5515cd807221f3a90 Mon Sep 17 00:00:00 2001
From: jamesrichards <james.richards@cabinetoffice.gov.uk>
Date: Tue, 25 Jun 2024 09:53:54 +0000
Subject: [PATCH 1/3] Fixing poetry lock

---
 poetry.lock | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index 186c3c0ad..a21b27e5d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -5265,6 +5265,7 @@ description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
 files = [
+    {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_aarch64.whl", hash = "sha256:004186d5ea6a57758fd6d57052a123c73a4815adf365eb8dd6a85c9eaa7535ff"},
     {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d9714f27c1d0f0895cd8915c07a87a1d0029a0aa36acaf9156952ec2a8a12189"},
     {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-win_amd64.whl", hash = "sha256:c3401dc8543b52d3a8158007a0c1ab4e9c768fcbd24153a48c86972102197ddd"},
 ]
@@ -10218,4 +10219,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11,<3.12"
-content-hash = "763770ffacae7e6e577a3d3f48ad37e347c689a016c0168f013a53351fef40ce"
+content-hash = "280917a2721b3b3a10cb845896bf1416c21f67499184e484949e6725a9d9d47e"

From 901aa153d5eb84d61f3e500005efc96ea5c5ae8d Mon Sep 17 00:00:00 2001
From: jamesrichards <james.richards@cabinetoffice.gov.uk>
Date: Tue, 25 Jun 2024 11:27:25 +0000
Subject: [PATCH 2/3] Creating chunk index in worker to avoid race condition
 with multiple workers

---
 worker/src/app.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/worker/src/app.py b/worker/src/app.py
index aa93409d0..729d485fc 100755
--- a/worker/src/app.py
+++ b/worker/src/app.py
@@ -34,6 +34,7 @@
 
 @asynccontextmanager
 async def lifespan(context: ContextRepo):
+    es_index_name = f"{env.elastic_root_index}-chunk"
     es = env.elasticsearch_client()
     s3_client = env.s3_client()
     # embeddings = AzureOpenAIEmbeddings(
@@ -46,13 +47,13 @@ async def lifespan(context: ContextRepo):
     # )
     embeddings = SentenceTransformerEmbeddings(model_name=env.embedding_model)
     elasticsearch_store = ElasticsearchStore(
-        index_name=f"{env.elastic_root_index}-chunk",
+        index_name=es_index_name,
         embedding=embeddings,
         es_connection=es,
         query_field="text",
-        vector_query_field=env.embedding_document_field_name,
+        vector_query_field=env.embedding_document_field_name
     )
-
+    es.indices.create(index=es_index_name)
     context.set_global("vectorstore", elasticsearch_store)
     context.set_global("s3_client", s3_client)
     yield

From 409e9483717f0033ae418af1cad13bc14a49196f Mon Sep 17 00:00:00 2001
From: jamesrichards <james.richards@cabinetoffice.gov.uk>
Date: Tue, 25 Jun 2024 12:18:28 +0000
Subject: [PATCH 3/3] Ignoring already existing index in worker startup

---
 worker/src/app.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/worker/src/app.py b/worker/src/app.py
index 729d485fc..d84daed47 100755
--- a/worker/src/app.py
+++ b/worker/src/app.py
@@ -53,7 +53,8 @@ async def lifespan(context: ContextRepo):
         query_field="text",
         vector_query_field=env.embedding_document_field_name
     )
-    es.indices.create(index=es_index_name)
+    
+    es.indices.create(index=es_index_name, ignore=[400])
     context.set_global("vectorstore", elasticsearch_store)
     context.set_global("s3_client", s3_client)
     yield