diff --git a/cookbook/core/flyte_basics/files.py b/cookbook/core/flyte_basics/files.py index cfb5549094..dc6a43500a 100644 --- a/cookbook/core/flyte_basics/files.py +++ b/cookbook/core/flyte_basics/files.py @@ -2,68 +2,88 @@ Working With Files ------------------- -Files are one of the most fundamental things that users of Python work with, and they are fully supported by Flyte. +Files are one of the most fundamental entities that users of Python work with, and they are fully supported by Flyte. In the IDL, they are known as `Blob `__ literals -and are backed by the `blob type `__ +which are backed by the `blob type `__. -Note that the type message includes an optional ``format`` field which is a text-field used to denote the file extension. +Let's assume our mission here is pretty simple. We take in a couple of links, download the pictures, rotate them, and return the rotated images. """ +# %% +# First, let's import the libraries. import os -import urllib.request import cv2 import flytekit from flytekit import task, workflow -from flytekit.types.file import FlyteFile +from flytekit.types.file import JPEGImageFile +from flytekit import FlyteContext # %% -# Let's assume our mission here is pretty simple. We want to take each of these links, download the picture, rotate it -# and return the file. -default_images = [ - "https://upload.wikimedia.org/wikipedia/commons/a/a8/Fractal_pyramid.jpg", - "https://upload.wikimedia.org/wikipedia/commons/thumb/a/ad/Julian_fractal.jpg/256px-Julian_fractal.jpg", -] +# ``JPEGImageFile`` is a pre-formatted FlyteFile type. It is equivalent to ``FlyteFile[typing.TypeVar("jpeg")]``. +# +# .. note:: +# The ``FlyteFile`` literal can be scoped with a string, which gets inserted into the format of the Blob type ("jpeg" is the string in +# ``FlyteFile[typing.TypeVar("jpeg")]``). The format is entirely optional, and if not specified, defaults to ``""``. # %% -# Note the signature of the return type of this task is a ``FlyteFile``. Files do not have a native object in Python -# so we had to write one ourselves. There does exist the ``os.PathLike`` protocol, but nothing implements it. -# -# When this task finishes, the flytekit engine will detect the ``FlyteFile`` instance being returned, find a location -# in Flyte's object store (usually S3), upload the file to that location, and create a Blob literal pointing to it. -# -# Note that the ``FlyteFile`` literal is scoped with a string, which gets inserted into the format of the Blob type. -# The ``[]`` are entirely optional, and if you don't specify it, the format will just be an ``""``. +# Next, we write a task that accepts ``JPEGImageFile`` as an input and returns the rotated image as an output, +# which again is the ``JPEGImageFile``. +# Files do not have a native object in Python, so we had to write one ourselves. +# There does exist the ``os.PathLike`` protocol, but nothing implements it. @task -def rotate(image_location: str) -> FlyteFile: +def rotate(image_location: JPEGImageFile) -> JPEGImageFile: """ Download the given image, rotate it by 180 degrees """ working_dir = flytekit.current_context().working_directory - local_image = os.path.join(working_dir, "incoming.jpg") - urllib.request.urlretrieve(image_location, local_image) - img = cv2.imread(local_image, 0) + image_location.download() + print(image_location.path) + img = cv2.imread(image_location.path, 0) if img is None: raise Exception("Failed to read image") (h, w) = img.shape[:2] center = (w / 2, h / 2) mat = cv2.getRotationMatrix2D(center, 180, 1) res = cv2.warpAffine(img, mat, (w, h)) - out_path = os.path.join(working_dir, "rotated.jpg") + out_path = os.path.join( + working_dir, + f"rotated-{os.path.basename(image_location.path).rsplit('.')[0]}.jpg", + ) cv2.imwrite(out_path, res) - return FlyteFile["jpg"](path=out_path) + return JPEGImageFile(path=out_path) + +# %% +# When image URL is sent to the task, the Flytekit engine translates it into a ``FlyteFile`` object on the local drive +# (but doesn't download it). +# The act of calling ``download`` method should trigger the download. +# ``_SpecificFormatClass``'s path enables OpenCV to read the file. +# +# When this task finishes, Flytekit engine returns the ``FlyteFile`` instance, finds a location +# in Flyte's object store (usually S3), uploads the file to that location and creates a Blob literal pointing to it. +# +# .. tip:: +# +# The ``rotate`` task works with ``FlyteFile``, too. However, ``JPEGImageFile`` helps attach the content information. +# %% +# We now define the workflow. @workflow -def rotate_one_workflow(in_image: str) -> FlyteFile: +def rotate_one_workflow(in_image: JPEGImageFile) -> JPEGImageFile: return rotate(image_location=in_image) # %% -# Execute it +# Finally, let's execute it! if __name__ == "__main__": + default_images = [ + "https://media.sketchfab.com/models/e13940161fb64746a4f6753f76abe886/thumbnails/b7e1ba951ffb46a4ad584ba8ae400d17/e9de09ac9c7941f1924bd384e74a5e2e.jpeg", + "https://upload.wikimedia.org/wikipedia/en/7/7e/Julia_0.4_0.6.png", + ] print(f"Running {__file__} main...") - print( - f"Running rotate_one_workflow(in_image=default_images[0]) {rotate_one_workflow(in_image=default_images[0])}" - ) + for index, each_image in enumerate(default_images): + print( + f"Running rotate_one_workflow(in_image=default_images[{index}]) {rotate_one_workflow(in_image=each_image)}" + )