From 16eae65a28d86a498c4022bff96e27840c68e1a1 Mon Sep 17 00:00:00 2001 From: Yicheng-Lu-llll <51814063+Yicheng-Lu-llll@users.noreply.github.com> Date: Thu, 18 May 2023 19:20:49 -0500 Subject: [PATCH] Add an example of how to use BatchSize (#980) * Update flyte_pickle.py Signed-off-by: Yicheng-Lu-llll <51814063+Yicheng-Lu-llll@users.noreply.github.com> * Update flyte_pickle.py Signed-off-by: Yicheng-Lu-llll <51814063+Yicheng-Lu-llll@users.noreply.github.com> * update flytekit version Signed-off-by: Yicheng-Lu-llll <51814063+Yicheng-Lu-llll@users.noreply.github.com> * Update requirements.txt Signed-off-by: Yicheng-Lu-llll <51814063+Yicheng-Lu-llll@users.noreply.github.com> * nit Signed-off-by: Yicheng-Lu-llll <51814063+Yicheng-Lu-llll@users.noreply.github.com> --------- Signed-off-by: Yicheng-Lu-llll <51814063+Yicheng-Lu-llll@users.noreply.github.com> Co-authored-by: Kevin Su --- cookbook/core/requirements.txt | 4 ++-- cookbook/core/type_system/flyte_pickle.py | 28 +++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/cookbook/core/requirements.txt b/cookbook/core/requirements.txt index ae1255b3e..96d364179 100644 --- a/cookbook/core/requirements.txt +++ b/cookbook/core/requirements.txt @@ -74,9 +74,9 @@ docstring-parser==0.15 # via flytekit executing==1.2.0 # via stack-data -flyteidl==1.3.11 +flyteidl==1.3.18 # via flytekit -flytekit==1.4.1 +flytekit==1.5.0 # via # -r ../common/requirements-common.in # flytekitplugins-deck-standard diff --git a/cookbook/core/type_system/flyte_pickle.py b/cookbook/core/type_system/flyte_pickle.py index 5458b4ebe..3eafdd1b6 100644 --- a/cookbook/core/type_system/flyte_pickle.py +++ b/cookbook/core/type_system/flyte_pickle.py @@ -57,3 +57,31 @@ def welcome(name: str) -> People: the custom object (People) will be marshalled to and from python pickle. """ welcome(name="Foo") + + +# %% +# By default, if the list subtype is unrecognized, a single pickle file is generated. +# To also improve serialization and deserialization performance for cases with millions of items or large list items, +# users can specify a batch size, processing each batch as a separate pickle file. +# Example below shows how users can set batch size. +from flytekit.types.pickle.pickle import BatchSize +from typing import List +from typing_extensions import Annotated + +@task +def greet_all(names: List[str]) -> Annotated[List[People],BatchSize(2)]: + return [People(name) for name in names] + + +@workflow +def welcome_all(names: List[str]) -> Annotated[List[People],BatchSize(2)]: + return greet_all(names=names) + + +if __name__ == "__main__": + """ + In this example, two pickle files will be generated: + - One containing two People objects + - One containing one People object + """ + welcome_all(names=["f","o","o"])