Skip to content

Commit

Permalink
chore: update dependencies and requirements across examples
Browse files Browse the repository at this point in the history
- Update the packages in the `checks.yml` workflow job to include `mnist_classifier`
- Add a step to check if the `Dockerfile` exists in the `checks.yml` workflow job
- Modify the Docker image version in the `databricks_agent` and `databricks_plugin` examples
- Change the import path in the `decks.py` example
- Add `tabulate` to the requirements in the `development_lifecycle` example
- Update the base image version in the `k8s_dask_plugin` and `k8s_spark_plugin` examples
- Remove unnecessary imports in the `structured_dataset.py` example
- Update the base image version in the `mnist_classifier` example
- Add `scipy` requirement in the `nlp_processing` example
- Add `marshmallow_enum` requirement in the `snowflake_agent` and `snowflake_plugin` examples
- Update the `whylogs` version and requirements in the `whylogs_plugin` example

Signed-off-by: jason.lai <[email protected]>
  • Loading branch information
jasonlai1218 committed May 14, 2024
2 parents 71b63d8 + 374093a commit 46edb27
Show file tree
Hide file tree
Showing 15 changed files with 52 additions and 26 deletions.
33 changes: 22 additions & 11 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
# TODO: Register and update the examples below. (onnx_plugin, feast_integration, etc)
echo "PACKAGES=$(find examples -maxdepth 1 -type d -exec basename '{}' \; \
| grep -v -e 'testing' -e 'examples' \
| grep -v -e 'airflow_plugin' -e 'forecasting_sales' -e 'onnx_plugin' -e 'feast_integration' -e 'modin_plugin' -e 'sagemaker_inference_agent' \
| grep -v -e 'airflow_plugin' -e 'forecasting_sales' -e 'onnx_plugin' -e 'feast_integration' -e 'modin_plugin' -e 'sagemaker_inference_agent' -e 'mnist_classifier' \
| sort \
| jq --raw-input . \
| jq --slurp . \
Expand Down Expand Up @@ -90,26 +90,37 @@ jobs:
pip install uv
uv venv
source .venv/bin/activate
uv pip install flytekit flytekitplugins-envd
if [ -f requirements.in ]; then uv pip install -r requirements.in; fi
uv pip install "flytekit>=1.12.1b0" flytekitplugins-envd
pip freeze
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
if: ${{ github.event_name != 'pull_request' }}
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{ secrets.FLYTE_BOT_USERNAME }}
password: ${{ secrets.FLYTE_BOT_PAT }}
- name: Build and push default image
- name: Check if dockerfile exists
working-directory: examples/${{ matrix.example }}
id: dockerfile
run: |
if [ -f Dockerfile ]; then
tag1=ghcr.io/flyteorg/flytecookbook:${{ matrix.example }}-${{ github.sha }}
tag2=ghcr.io/flyteorg/flytecookbook:latest
docker build -t "$tag1" -t "$tag2" .
if ${{ github.event_name != 'pull_request' }}; then
docker push ghcr.io/flyteorg/flytecookbook --all-tags
fi
if [ -f Dockerfile ]
then
echo "exist=true" >> "$GITHUB_OUTPUT"
else
echo "exist=false" >> "$GITHUB_OUTPUT"
fi
- name: Build and push default image
if: ${{ steps.dockerfile.outputs.exist == 'true' }}
uses: docker/build-push-action@v5
with:
context: examples/${{ matrix.example }}
push: ${{ github.event_name != 'pull_request' }}
tags: ghcr.io/flyteorg/flytecookbook:${{ matrix.example }}-${{ github.sha }},ghcr.io/flyteorg/flytecookbook:latest
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Pyflyte package
working-directory: examples/${{ matrix.example }}
run: |
Expand Down Expand Up @@ -287,7 +298,7 @@ jobs:
pip install uv
uv venv
source .venv/bin/activate
uv pip install --upgrade pip flytekit flytekitplugins-deck-standard torch
uv pip install "flytekit>=1.12.1b0" flytekitplugins-deck-standard torch tabulate
pip freeze
- name: Checkout flytesnacks
uses: actions/checkout@v3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
StructuredDatasetEncoder,
StructuredDatasetTransformerEngine,
)
from tabulate import tabulate
from typing_extensions import Annotated


Expand Down Expand Up @@ -203,6 +202,8 @@ class CompanyField:

@task(container_image=image)
def create_parquet_file() -> StructuredDataset:
from tabulate import tabulate

df = pd.json_normalize(data, max_level=0)
print("original dataframe: \n", tabulate(df, headers="keys", tablefmt="psql"))

Expand All @@ -211,48 +212,62 @@ def create_parquet_file() -> StructuredDataset:

@task(container_image=image)
def print_table_by_arg(sd: MyArgDataset) -> pd.DataFrame:
from tabulate import tabulate

t = sd.open(pd.DataFrame).all()
print("MyArgDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql"))
return t


@task(container_image=image)
def print_table_by_dict(sd: MyDictDataset) -> pd.DataFrame:
from tabulate import tabulate

t = sd.open(pd.DataFrame).all()
print("MyDictDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql"))
return t


@task(container_image=image)
def print_table_by_list_dict(sd: MyDictListDataset) -> pd.DataFrame:
from tabulate import tabulate

t = sd.open(pd.DataFrame).all()
print("MyDictListDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql"))
return t


@task(container_image=image)
def print_table_by_top_dataclass(sd: MyTopDataClassDataset) -> pd.DataFrame:
from tabulate import tabulate

t = sd.open(pd.DataFrame).all()
print("MyTopDataClassDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql"))
return t


@task(container_image=image)
def print_table_by_top_dict(sd: MyTopDictDataset) -> pd.DataFrame:
from tabulate import tabulate

t = sd.open(pd.DataFrame).all()
print("MyTopDictDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql"))
return t


@task(container_image=image)
def print_table_by_second_dataclass(sd: MySecondDataClassDataset) -> pd.DataFrame:
from tabulate import tabulate

t = sd.open(pd.DataFrame).all()
print("MySecondDataClassDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql"))
return t


@task(container_image=image)
def print_table_by_nested_dataclass(sd: MyNestedDataClassDataset) -> pd.DataFrame:
from tabulate import tabulate

t = sd.open(pd.DataFrame).all()
print("MyNestedDataClassDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql"))
return t
Expand Down
2 changes: 1 addition & 1 deletion examples/databricks_agent/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM databricksruntime/standard:12.2-LTS
FROM databricksruntime/standard:14.3-LTS
LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks

ENV VENV /opt/venv
Expand Down
2 changes: 1 addition & 1 deletion examples/databricks_plugin/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM databricksruntime/standard:12.2-LTS
FROM databricksruntime/standard:14.3-LTS
LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks

ENV VENV /opt/venv
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import flytekit
from flytekit import ImageSpec, task
from flytekitplugins.deck.renderer import MarkdownRenderer
from flytekit.deck.renderer import MarkdownRenderer
from sklearn.decomposition import PCA

# Create a new deck named `pca` and render Markdown content along with a
Expand Down
1 change: 1 addition & 0 deletions examples/development_lifecycle/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ flytekitplugins-deck-standard
flytekitplugins-envd
plotly
scikit-learn
tabulate
2 changes: 1 addition & 1 deletion examples/k8s_dask_plugin/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# ######################
# NOTE: For CI/CD only #
########################
FROM ubuntu:focal
FROM ubuntu:latest
LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks

WORKDIR /root
Expand Down
2 changes: 1 addition & 1 deletion examples/k8s_dask_plugin/k8s_dask_plugin/dask_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# %% [markdown]
# Create an `ImageSpec` to encompass all the dependencies needed for the Dask task.
# %%
custom_image = ImageSpec(name="flyte-dask-plugin", registry="ghcr.io/flyteorg", packages=["flytekitplugins-dask"])
custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-dask"])

# %% [markdown]
# :::{important}
Expand Down
2 changes: 1 addition & 1 deletion examples/k8s_spark_plugin/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# ######################
# NOTE: For CI/CD only #
########################
FROM apache/spark-py:3.3.1
FROM apache/spark-py:v3.4.0
LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks

WORKDIR /root
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@
from flytekit import ImageSpec, Resources, kwtypes, task, workflow
from flytekit.types.structured.structured_dataset import StructuredDataset
from flytekitplugins.spark import Spark

try:
from typing import Annotated
except ImportError:
from typing_extensions import Annotated
from typing_extensions import Annotated

# %% [markdown]
# Create an `ImageSpec` to automate the retrieval of a prebuilt Spark image.
Expand Down
2 changes: 1 addition & 1 deletion examples/mnist_classifier/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM pytorch/pytorch:1.11.0-cuda11.3-cudnn8-runtime
FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks

WORKDIR /root
Expand Down
3 changes: 2 additions & 1 deletion examples/nlp_processing/requirements.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
flytekit>=0.32.3
flytekit
wheel
matplotlib
flytekitplugins-deck-standard
Expand All @@ -8,3 +8,4 @@ nltk
plotly
pyemd
scikit-learn
scipy==1.10.1
1 change: 1 addition & 0 deletions examples/snowflake_agent/requirements.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
flytekitplugins-snowflake==1.7.0
flytekitplugins-envd==1.7.0
flytekit==1.7.1b1
marshmallow_enum
1 change: 1 addition & 0 deletions examples/snowflake_plugin/requirements.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
flytekitplugins-snowflake==1.7.0
flytekit==1.7.1b1
flytekitplugins-envd==1.7.0
marshmallow_enum
2 changes: 1 addition & 1 deletion examples/whylogs_plugin/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ matplotlib
flytekitplugins-deck-standard
flytekitplugins-whylogs>=1.1.1b0
scikit-learn
whylogs[s3]
whylogs[s3]==1.3.30
whylogs[mlflow]
whylogs[whylabs]

0 comments on commit 46edb27

Please sign in to comment.