Skip to content

Commit

Permalink
Additional serialization fixes (flyteorg#408)
Browse files Browse the repository at this point in the history
* requirements update

Signed-off-by: Haytham Abuelfutuh <[email protected]>

* Set resources differently for SANDBOX vs prod

Signed-off-by: Haytham Abuelfutuh <[email protected]>

* bump

Signed-off-by: Haytham Abuelfutuh <[email protected]>

* use lower resources for sandbox

Signed-off-by: Haytham Abuelfutuh <[email protected]>

* bump

Signed-off-by: Haytham Abuelfutuh <[email protected]>

* bump

Signed-off-by: Haytham Abuelfutuh <[email protected]>

* register without serialize

Signed-off-by: Haytham Abuelfutuh <[email protected]>

* register without serialize

Signed-off-by: Haytham Abuelfutuh <[email protected]>

* bump

Signed-off-by: Haytham Abuelfutuh <[email protected]>

* bump

Signed-off-by: Haytham Abuelfutuh <[email protected]>

* Update requirements

Signed-off-by: Haytham Abuelfutuh <[email protected]>

* wip

Signed-off-by: Haytham Abuelfutuh <[email protected]>

* Update eda requirements

Signed-off-by: Haytham Abuelfutuh <[email protected]>

* Cleanup

Signed-off-by: Haytham Abuelfutuh <[email protected]>

* format

Signed-off-by: Haytham Abuelfutuh <[email protected]>
  • Loading branch information
EngHabu authored Sep 15, 2021
1 parent 7f876e2 commit 6a3c5dd
Show file tree
Hide file tree
Showing 35 changed files with 277 additions and 257 deletions.
1 change: 1 addition & 0 deletions cookbook/case_studies/feature_engineering/eda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ RUN ${VENV}/bin/pip install -r /root/requirements.txt

# Copy the makefile targets to expose on the container. This makes it easier to register.
COPY in_container.mk /root/Makefile
COPY eda/sandbox.config /root

# Copy the actual code
COPY eda/ /root/eda/
Expand Down
26 changes: 13 additions & 13 deletions cookbook/case_studies/feature_engineering/eda/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with python 3.8
# To update, run:
#
# /Library/Developer/CommandLineTools/usr/bin/make requirements.txt
# /Applications/Xcode.app/Contents/Developer/usr/bin/make requirements.txt
#
ansiwrap==0.8.4
# via papermill
Expand All @@ -16,13 +16,13 @@ attrs==21.2.0
# scantree
backcall==0.2.0
# via ipython
black==21.8b0
black==21.9b0
# via papermill
bleach==4.1.0
# via nbconvert
certifi==2021.5.30
# via requests
charset-normalizer==2.0.4
charset-normalizer==2.0.5
# via requests
click==7.1.2
# via
Expand All @@ -33,11 +33,11 @@ croniter==1.0.15
# via flytekit
cycler==0.10.0
# via matplotlib
dataclasses-json==0.5.5
dataclasses-json==0.5.6
# via flytekit
debugpy==1.4.2.post1
debugpy==1.4.3
# via ipykernel
decorator==5.0.9
decorator==5.1.0
# via
# ipython
# retry
Expand All @@ -60,19 +60,19 @@ entrypoints==0.3
# papermill
flyteidl==0.20.2
# via flytekit
flytekit==0.22.1
flytekit==0.22.2
# via
# -r ../../../common/requirements-common.in
# flytekitplugins-papermill
flytekitplugins-papermill==0.22.1
flytekitplugins-papermill==0.22.2
# via -r requirements.in
grpcio==1.40.0
# via flytekit
idna==3.2
# via requests
importlib-metadata==4.8.1
# via keyring
ipykernel==6.4.0
ipykernel==6.4.1
# via flytekitplugins-papermill
ipython==7.27.0
# via ipykernel
Expand All @@ -99,7 +99,7 @@ jupyter-core==4.7.1
# nbformat
jupyterlab-pygments==0.1.2
# via nbconvert
keyring==23.1.0
keyring==23.2.1
# via flytekit
kiwisolver==1.3.2
# via matplotlib
Expand Down Expand Up @@ -156,11 +156,11 @@ numpy==1.21.2
# seaborn
packaging==21.0
# via bleach
pandas==1.3.2
pandas==1.3.3
# via
# flytekit
# seaborn
pandocfilters==1.4.3
pandocfilters==1.5.0
# via nbconvert
papermill==2.3.3
# via flytekitplugins-papermill
Expand Down Expand Up @@ -231,7 +231,7 @@ requests==2.26.0
# flytekit
# papermill
# responses
responses==0.13.4
responses==0.14.0
# via flytekit
retry==0.9.2
# via flytekit
Expand Down
3 changes: 3 additions & 0 deletions cookbook/case_studies/feature_engineering/eda/sandbox.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[sdk]
workflow_packages=eda
python_venv=flytekit_venv
2 changes: 1 addition & 1 deletion cookbook/case_studies/feature_engineering/in_container.mk
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ serialize: $(SERIALIZED_PB_OUTPUT_DIR)
pyflyte --config /root/sandbox.config serialize workflows -f $(SERIALIZED_PB_OUTPUT_DIR)

.PHONY: register
register: serialize
register:
flyte-cli register-files -h ${FLYTE_HOST} ${INSECURE_FLAG} -p ${PROJECT} -d development -v ${VERSION} --kubernetes-service-account ${SERVICE_ACCOUNT} --output-location-prefix ${OUTPUT_DATA_PREFIX} $(SERIALIZED_PB_OUTPUT_DIR)/*

.PHONY: fast_serialize
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@
# This file is autogenerated by pip-compile with python 3.8
# To update, run:
#
# /Library/Developer/CommandLineTools/usr/bin/make requirements.txt
# /Applications/Xcode.app/Contents/Developer/usr/bin/make requirements.txt
#
attrs==21.2.0
# via scantree
certifi==2021.5.30
# via requests
charset-normalizer==2.0.4
charset-normalizer==2.0.5
# via requests
click==7.1.2
# via flytekit
croniter==1.0.15
# via flytekit
dataclasses-json==0.5.5
dataclasses-json==0.5.6
# via flytekit
decorator==5.0.9
decorator==5.1.0
# via retry
deprecated==1.2.13
# via flytekit
Expand All @@ -30,7 +30,7 @@ docstring-parser==0.10
# via flytekit
flyteidl==0.20.2
# via flytekit
flytekit==0.22.1
flytekit==0.22.2
# via -r requirements.in
grpcio==1.40.0
# via flytekit
Expand All @@ -40,7 +40,7 @@ importlib-metadata==4.8.1
# via keyring
joblib==1.0.1
# via scikit-learn
keyring==23.1.0
keyring==23.2.1
# via flytekit
marshmallow==3.13.0
# via
Expand All @@ -62,7 +62,7 @@ numpy==1.21.2
# pyarrow
# scikit-learn
# scipy
pandas==1.3.2
pandas==1.3.3
# via flytekit
pathspec==0.9.0
# via scantree
Expand Down Expand Up @@ -93,7 +93,7 @@ requests==2.26.0
# via
# flytekit
# responses
responses==0.13.4
responses==0.14.0
# via flytekit
retry==0.9.2
# via flytekit
Expand Down
2 changes: 1 addition & 1 deletion cookbook/case_studies/in_container.mk
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ serialize: $(SERIALIZED_PB_OUTPUT_DIR)
pyflyte --config /root/sandbox.config serialize workflows -f $(SERIALIZED_PB_OUTPUT_DIR)

.PHONY: register
register: serialize
register:
flyte-cli register-files -h ${FLYTE_HOST} ${INSECURE_FLAG} -p ${PROJECT} -d development -v ${VERSION} --kubernetes-service-account ${SERVICE_ACCOUNT} --output-location-prefix ${OUTPUT_DATA_PREFIX} $(SERIALIZED_PB_OUTPUT_DIR)/*

.PHONY: fast_serialize
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,23 @@
# This file is autogenerated by pip-compile with python 3.8
# To update, run:
#
# /Library/Developer/CommandLineTools/usr/bin/make requirements.txt
# /Applications/Xcode.app/Contents/Developer/usr/bin/make requirements.txt
#
attrs==21.2.0
# via scantree
certifi==2021.5.30
# via requests
charset-normalizer==2.0.4
charset-normalizer==2.0.5
# via requests
click==7.1.2
# via flytekit
croniter==1.0.15
# via flytekit
cycler==0.10.0
# via matplotlib
dataclasses-json==0.5.5
dataclasses-json==0.5.6
# via flytekit
decorator==5.0.9
decorator==5.1.0
# via retry
deprecated==1.2.13
# via flytekit
Expand All @@ -32,7 +32,7 @@ docstring-parser==0.10
# via flytekit
flyteidl==0.20.2
# via flytekit
flytekit==0.22.1
flytekit==0.22.2
# via -r ../../../common/requirements-common.in
grpcio==1.40.0
# via flytekit
Expand All @@ -44,7 +44,7 @@ joblib==1.0.1
# via
# -r requirements.in
# scikit-learn
keyring==23.1.0
keyring==23.2.1
# via flytekit
kiwisolver==1.3.2
# via matplotlib
Expand Down Expand Up @@ -73,7 +73,7 @@ numpy==1.21.2
# scikit-learn
# scipy
# xgboost
pandas==1.3.2
pandas==1.3.3
# via flytekit
pathspec==0.9.0
# via scantree
Expand Down Expand Up @@ -109,7 +109,7 @@ requests==2.26.0
# via
# flytekit
# responses
responses==0.13.4
responses==0.14.0
# via flytekit
retry==0.9.2
# via flytekit
Expand Down
2 changes: 1 addition & 1 deletion cookbook/case_studies/ml_training/in_container.mk
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ serialize: $(SERIALIZED_PB_OUTPUT_DIR)
pyflyte --config /root/sandbox.config serialize workflows -f $(SERIALIZED_PB_OUTPUT_DIR)

.PHONY: register
register: serialize
register:
flyte-cli register-files -h ${FLYTE_HOST} ${INSECURE_FLAG} -p ${PROJECT} -d development -v ${VERSION} --kubernetes-service-account ${SERVICE_ACCOUNT} --output-location-prefix ${OUTPUT_DATA_PREFIX} $(SERIALIZED_PB_OUTPUT_DIR)/*

.PHONY: fast_serialize
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
NUM_BATCHES_TO_LOG = 10
LOG_IMAGES_PER_BATCH = 32


# %%
# If running remotely, copy your ``wandb`` API key to the Dockerfile under the environment variable ``WANDB_API_KEY``.
# This function logs into ``wandb`` and initializes the project. If you built your Docker image with the
Expand All @@ -48,6 +49,7 @@ def wandb_setup():
wandb.login()
wandb.init(project="mnist-single-node-single-gpu", entity=os.environ.get("WANDB_USERNAME", "my-user-name"))


# %%
# Creating the Network
# ====================
Expand Down Expand Up @@ -159,7 +161,6 @@ def log_test_predictions(images, labels, outputs, predicted, my_table, log_count
# We log ``accuracy``, ``test_loss``, and a ``wandb`` `table <https://docs.wandb.ai/guides/data-vis/log-tables>`__.
# The ``wandb`` table can help in depicting the model's performance in a structured format.
def test(model, device, test_loader):

# ``wandb`` tabular columns
columns = ["id", "image", "guess", "truth"]
for digit in range(10):
Expand Down Expand Up @@ -253,13 +254,28 @@ class Hyperparameters(object):
model_state=PythonPickledFile,
)

# %%
# Set memory, gpu and storage depending on whether we are trying to register against sandbox or not...
if os.getenv("SANDBOX") != "":
print(f"SANDBOX ENV: '{os.getenv('SANDBOX')}'")

mem = "100Mi"
gpu = "0"
storage = "500Mi"
else:
print(f"SANDBOX ENV: '{os.getenv('SANDBOX')}'")

mem = "3Gi"
gpu = "1"
storage = "1Gi"


@task(
retries=2,
cache=True,
cache_version="1.0",
requests=Resources(gpu="1", mem="3Gi", storage="1Gi"),
limits=Resources(gpu="1", mem="3Gi", storage="1Gi"),
requests=Resources(gpu=gpu, mem=mem, storage=storage),
limits=Resources(gpu=gpu, mem=mem, storage=storage),
)
def pytorch_mnist_task(hp: Hyperparameters) -> TrainingOutputs:
wandb_setup()
Expand Down
Loading

0 comments on commit 6a3c5dd

Please sign in to comment.