chore: update dependencies and requirements across examples

- Update the packages in the `checks.yml` workflow job to include `mnist_classifier` - Add a step to check if the `Dockerfile` exists in the `checks.yml` workflow job - Modify the Docker image version in the `databricks_agent` and `databricks_plugin` examples - Change the import path in the `decks.py` example - Add `tabulate` to the requirements in the `development_lifecycle` example - Update the base image version in the `k8s_dask_plugin` and `k8s_spark_plugin` examples - Remove unnecessary imports in the `structured_dataset.py` example - Update the base image version in the `mnist_classifier` example - Add `scipy` requirement in the `nlp_processing` example - Add `marshmallow_enum` requirement in the `snowflake_agent` and `snowflake_plugin` examples - Update the `whylogs` version and requirements in the `whylogs_plugin` example Signed-off-by: jason.lai <[email protected]>
flyteorg · May 14, 2024 · 46edb27 · 46edb27
2 parents 71b63d8 + 374093a
commit 46edb27
Show file tree

Hide file tree

Showing 15 changed files with 52 additions and 26 deletions.
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -53,7 +53,7 @@ jobs:
           # TODO: Register and update the examples below. (onnx_plugin, feast_integration, etc)
           echo "PACKAGES=$(find examples -maxdepth 1 -type d -exec basename '{}' \; \
           | grep -v -e 'testing' -e 'examples' \
-          | grep -v -e 'airflow_plugin' -e 'forecasting_sales' -e 'onnx_plugin' -e 'feast_integration' -e 'modin_plugin' -e 'sagemaker_inference_agent' \
+          | grep -v -e 'airflow_plugin' -e 'forecasting_sales' -e 'onnx_plugin' -e 'feast_integration' -e 'modin_plugin' -e 'sagemaker_inference_agent' -e 'mnist_classifier' \
           | sort \
           | jq --raw-input . \
           | jq --slurp . \
@@ -90,26 +90,37 @@ jobs:
           pip install uv
           uv venv
           source .venv/bin/activate
-          uv pip install flytekit flytekitplugins-envd
           if [ -f requirements.in ]; then uv pip install -r requirements.in; fi
+          uv pip install "flytekit>=1.12.1b0" flytekitplugins-envd
+          pip freeze
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
       - name: Login to GitHub Container Registry
         if: ${{ github.event_name != 'pull_request' }}
         uses: docker/login-action@v1
         with:
           registry: ghcr.io
           username: ${{ secrets.FLYTE_BOT_USERNAME }}
           password: ${{ secrets.FLYTE_BOT_PAT }}
-      - name: Build and push default image
+      - name: Check if dockerfile exists
         working-directory: examples/${{ matrix.example }}
+        id: dockerfile
         run: |
-          if [ -f Dockerfile ]; then
-              tag1=ghcr.io/flyteorg/flytecookbook:${{ matrix.example }}-${{ github.sha }}
-              tag2=ghcr.io/flyteorg/flytecookbook:latest
-              docker build -t "$tag1" -t "$tag2" .
-              if ${{ github.event_name != 'pull_request' }}; then
-                  docker push ghcr.io/flyteorg/flytecookbook --all-tags
-              fi
+          if [ -f Dockerfile ]
+          then
+            echo "exist=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "exist=false" >> "$GITHUB_OUTPUT"
           fi
+      - name: Build and push default image
+        if: ${{ steps.dockerfile.outputs.exist == 'true' }}
+        uses: docker/build-push-action@v5
+        with:
+          context: examples/${{ matrix.example }}
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ghcr.io/flyteorg/flytecookbook:${{ matrix.example }}-${{ github.sha }},ghcr.io/flyteorg/flytecookbook:latest
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
       - name: Pyflyte package
         working-directory: examples/${{ matrix.example }}
         run: |
@@ -287,7 +298,7 @@ jobs:
           pip install uv
           uv venv
           source .venv/bin/activate
-          uv pip install --upgrade pip flytekit flytekitplugins-deck-standard torch
+          uv pip install "flytekit>=1.12.1b0" flytekitplugins-deck-standard torch tabulate
           pip freeze
       - name: Checkout flytesnacks
         uses: actions/checkout@v3

diff --git a/examples/data_types_and_io/data_types_and_io/structured_dataset.py b/examples/data_types_and_io/data_types_and_io/structured_dataset.py
@@ -16,7 +16,6 @@
     StructuredDatasetEncoder,
     StructuredDatasetTransformerEngine,
 )
-from tabulate import tabulate
 from typing_extensions import Annotated
 
 
@@ -203,6 +202,8 @@ class CompanyField:
 
 @task(container_image=image)
 def create_parquet_file() -> StructuredDataset:
+    from tabulate import tabulate
+
     df = pd.json_normalize(data, max_level=0)
     print("original dataframe: \n", tabulate(df, headers="keys", tablefmt="psql"))
 
@@ -211,48 +212,62 @@ def create_parquet_file() -> StructuredDataset:
 
 @task(container_image=image)
 def print_table_by_arg(sd: MyArgDataset) -> pd.DataFrame:
+    from tabulate import tabulate
+
     t = sd.open(pd.DataFrame).all()
     print("MyArgDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql"))
     return t
 
 
 @task(container_image=image)
 def print_table_by_dict(sd: MyDictDataset) -> pd.DataFrame:
+    from tabulate import tabulate
+
     t = sd.open(pd.DataFrame).all()
     print("MyDictDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql"))
     return t
 
 
 @task(container_image=image)
 def print_table_by_list_dict(sd: MyDictListDataset) -> pd.DataFrame:
+    from tabulate import tabulate
+
     t = sd.open(pd.DataFrame).all()
     print("MyDictListDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql"))
     return t
 
 
 @task(container_image=image)
 def print_table_by_top_dataclass(sd: MyTopDataClassDataset) -> pd.DataFrame:
+    from tabulate import tabulate
+
     t = sd.open(pd.DataFrame).all()
     print("MyTopDataClassDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql"))
     return t
 
 
 @task(container_image=image)
 def print_table_by_top_dict(sd: MyTopDictDataset) -> pd.DataFrame:
+    from tabulate import tabulate
+
     t = sd.open(pd.DataFrame).all()
     print("MyTopDictDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql"))
     return t
 
 
 @task(container_image=image)
 def print_table_by_second_dataclass(sd: MySecondDataClassDataset) -> pd.DataFrame:
+    from tabulate import tabulate
+
     t = sd.open(pd.DataFrame).all()
     print("MySecondDataClassDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql"))
     return t
 
 
 @task(container_image=image)
 def print_table_by_nested_dataclass(sd: MyNestedDataClassDataset) -> pd.DataFrame:
+    from tabulate import tabulate
+
     t = sd.open(pd.DataFrame).all()
     print("MyNestedDataClassDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql"))
     return t

diff --git a/examples/databricks_agent/Dockerfile b/examples/databricks_agent/Dockerfile
@@ -1,4 +1,4 @@
-FROM databricksruntime/standard:12.2-LTS
+FROM databricksruntime/standard:14.3-LTS
 LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks
 
 ENV VENV /opt/venv

diff --git a/examples/databricks_plugin/Dockerfile b/examples/databricks_plugin/Dockerfile
@@ -1,4 +1,4 @@
-FROM databricksruntime/standard:12.2-LTS
+FROM databricksruntime/standard:14.3-LTS
 LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks
 
 ENV VENV /opt/venv

diff --git a/examples/development_lifecycle/development_lifecycle/decks.py b/examples/development_lifecycle/development_lifecycle/decks.py
@@ -1,6 +1,6 @@
 import flytekit
 from flytekit import ImageSpec, task
-from flytekitplugins.deck.renderer import MarkdownRenderer
+from flytekit.deck.renderer import MarkdownRenderer
 from sklearn.decomposition import PCA
 
 # Create a new deck named `pca` and render Markdown content along with a

diff --git a/examples/development_lifecycle/requirements.in b/examples/development_lifecycle/requirements.in
@@ -3,3 +3,4 @@ flytekitplugins-deck-standard
 flytekitplugins-envd
 plotly
 scikit-learn
+tabulate
diff --git a/examples/k8s_dask_plugin/Dockerfile b/examples/k8s_dask_plugin/Dockerfile
@@ -1,7 +1,7 @@
 # ######################
 # NOTE: For CI/CD only #
 ########################
-FROM ubuntu:focal
+FROM ubuntu:latest
 LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks
 
 WORKDIR /root

diff --git a/examples/k8s_dask_plugin/k8s_dask_plugin/dask_example.py b/examples/k8s_dask_plugin/k8s_dask_plugin/dask_example.py
@@ -12,7 +12,7 @@
 # %% [markdown]
 # Create an `ImageSpec` to encompass all the dependencies needed for the Dask task.
 # %%
-custom_image = ImageSpec(name="flyte-dask-plugin", registry="ghcr.io/flyteorg", packages=["flytekitplugins-dask"])
+custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-dask"])
 
 # %% [markdown]
 # :::{important}

diff --git a/examples/k8s_spark_plugin/Dockerfile b/examples/k8s_spark_plugin/Dockerfile
@@ -1,7 +1,7 @@
 # ######################
 # NOTE: For CI/CD only #
 ########################
-FROM apache/spark-py:3.3.1
+FROM apache/spark-py:v3.4.0
 LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks
 
 WORKDIR /root

diff --git a/examples/k8s_spark_plugin/k8s_spark_plugin/dataframe_passing.py b/examples/k8s_spark_plugin/k8s_spark_plugin/dataframe_passing.py
@@ -14,11 +14,7 @@
 from flytekit import ImageSpec, Resources, kwtypes, task, workflow
 from flytekit.types.structured.structured_dataset import StructuredDataset
 from flytekitplugins.spark import Spark
-
-try:
-    from typing import Annotated
-except ImportError:
-    from typing_extensions import Annotated
+from typing_extensions import Annotated
 
 # %% [markdown]
 # Create an `ImageSpec` to automate the retrieval of a prebuilt Spark image.

diff --git a/examples/mnist_classifier/Dockerfile b/examples/mnist_classifier/Dockerfile
@@ -1,4 +1,4 @@
-FROM pytorch/pytorch:1.11.0-cuda11.3-cudnn8-runtime
+FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
 LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks
 
 WORKDIR /root

diff --git a/examples/nlp_processing/requirements.in b/examples/nlp_processing/requirements.in
@@ -1,4 +1,4 @@
-flytekit>=0.32.3
+flytekit
 wheel
 matplotlib
 flytekitplugins-deck-standard
@@ -8,3 +8,4 @@ nltk
 plotly
 pyemd
 scikit-learn
+scipy==1.10.1
diff --git a/examples/snowflake_agent/requirements.in b/examples/snowflake_agent/requirements.in
@@ -1,3 +1,4 @@
 flytekitplugins-snowflake==1.7.0
 flytekitplugins-envd==1.7.0
 flytekit==1.7.1b1
+marshmallow_enum
diff --git a/examples/snowflake_plugin/requirements.in b/examples/snowflake_plugin/requirements.in
@@ -1,3 +1,4 @@
 flytekitplugins-snowflake==1.7.0
 flytekit==1.7.1b1
 flytekitplugins-envd==1.7.0
+marshmallow_enum
diff --git a/examples/whylogs_plugin/requirements.in b/examples/whylogs_plugin/requirements.in
@@ -4,6 +4,6 @@ matplotlib
 flytekitplugins-deck-standard
 flytekitplugins-whylogs>=1.1.1b0
 scikit-learn
-whylogs[s3]
+whylogs[s3]==1.3.30
 whylogs[mlflow]
 whylogs[whylabs]