add resources to input downloader in the ollama plugin (#2754)

* add resources to input downloader in the ollama plugin Signed-off-by: Samhita Alla <[email protected]> * remove gpu Signed-off-by: Samhita Alla <[email protected]> * make cpu configurable Signed-off-by: Samhita Alla <[email protected]> * set cpu to 2 Signed-off-by: Samhita Alla <[email protected]> --------- Signed-off-by: Samhita Alla <[email protected]>
flyteorg · Sep 18, 2024 · 11c3a18 · 11c3a18
1 parent fb55841
commit 11c3a18
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 0 deletions.
diff --git a/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py b/plugins/flytekit-inference/flytekitplugins/inference/ollama/serve.py
@@ -31,6 +31,8 @@ def __init__(
         cpu: int = 1,
         gpu: int = 1,
         mem: str = "15Gi",
+        download_inputs_mem: str = "500Mi",
+        download_inputs_cpu: int = 2,
     ):
         """Initialize Ollama class for managing a Kubernetes pod template.
 
@@ -40,6 +42,8 @@ def __init__(
         :param cpu: The number of CPU cores requested for the container. Default is 1.
         :param gpu: The number of GPUs requested for the container. Default is 1.
         :param mem: The amount of memory requested for the container, specified as a string. Default is "15Gi".
+        :param download_inputs_mem: The amount of memory requested for downloading inputs, specified as a string. Default is "500Mi".
+        :param download_inputs_cpu: The number of CPU cores requested for downloading inputs. Default is 2.
         """
         self._model_name = model.name
         self._model_mem = model.mem
@@ -52,6 +56,8 @@ def __init__(
             cpu=cpu,
             gpu=gpu,
             mem=mem,
+            download_inputs_mem=download_inputs_mem,
+            download_inputs_cpu=download_inputs_cpu,
             download_inputs=(True if self._model_modelfile and "{inputs" in self._model_modelfile else False),
         )
 

diff --git a/plugins/flytekit-inference/flytekitplugins/inference/sidecar_template.py b/plugins/flytekit-inference/flytekitplugins/inference/sidecar_template.py
@@ -15,6 +15,8 @@ def __init__(
         mem: str = "1Gi",
         env: Optional[dict[str, str]] = None,
         download_inputs: bool = False,
+        download_inputs_mem: str = "500Mi",
+        download_inputs_cpu: int = 2,
     ):
         from kubernetes.client.models import (
             V1Container,
@@ -34,6 +36,8 @@ def __init__(
         self._cpu = cpu
         self._gpu = gpu
         self._mem = mem
+        self._download_inputs_mem = download_inputs_mem
+        self._download_inputs_cpu = download_inputs_cpu
         self._env = env
         self._download_inputs = download_inputs
 
@@ -138,6 +142,16 @@ def __init__(
                         V1VolumeMount(name="shared-data", mount_path="/shared"),
                         V1VolumeMount(name="tmp", mount_path="/tmp"),
                     ],
+                    resources=V1ResourceRequirements(
+                        requests={
+                            "cpu": self._download_inputs_cpu,
+                            "memory": self._download_inputs_mem,
+                        },
+                        limits={
+                            "cpu": self._download_inputs_cpu,
+                            "memory": self._download_inputs_mem,
+                        },
+                    ),
                 ),
             )