weaviate · Merk0ff · Nov 18, 2024
diff --git a/config.py b/config.py
@@ -2,6 +2,11 @@
 from typing import List
 
 TRUST_REMOTE_CODE = os.getenv("TRUST_REMOTE_CODE", False)
+ST_LOCAL_FILES_ONLY = os.getenv("ST_LOCAL_FILES_ONLY", "False").lower() in (
+    "true",
+    "1",
+    "t",
+)
 
 
 def get_allowed_tokens() -> List[str] | None:

diff --git a/vectorizer.py b/vectorizer.py
@@ -20,6 +20,8 @@
     T5Tokenizer,
 )
 
+from config import ST_LOCAL_FILES_ONLY
+
 
 # limit transformer batch size to limit parallel inference, otherwise we run
 # into memory problems
@@ -96,6 +98,7 @@ def __init__(
             cache_folder=model_path,
             device=self.get_device(),
             trust_remote_code=trust_remote_code,
+            local_files_only=ST_LOCAL_FILES_ONLY,
         )
         self.model.eval()  # make sure we're in inference mode, not training
 
@@ -258,7 +261,6 @@ def vectorize(self, text: str, config: VectorInputConfig):
 
 
 class HFModel:
-
     def __init__(self, cuda_support: bool, cuda_core: str, trust_remote_code: bool):
         super().__init__()
         self.model = None
@@ -331,7 +333,6 @@ def pool_sum(self, embeddings, attention_mask):
 
 
 class DPRModel(HFModel):
-
     def __init__(
         self,
         architecture: str,
@@ -364,7 +365,6 @@ def pool_embedding(self, batch_results, tokens, config: VectorInputConfig):
 
 
 class T5Model(HFModel):
-
     def __init__(self, cuda_support: bool, cuda_core: str, trust_remote_code: bool):
         super().__init__(cuda_support, cuda_core)
         self.model = None
@@ -406,7 +406,6 @@ def get_batch_results(self, tokens, text):
 
 
 class ModelFactory:
-
     @staticmethod
     def model(
         model_type,