From e6468318b30935a5afde059ad3defeb7d825de2a Mon Sep 17 00:00:00 2001 From: Sean Lin Date: Thu, 22 Jul 2021 17:03:54 -0700 Subject: [PATCH] Add support for @workflow docstring input/output variable description (#562) Signed-off-by: Sean Lin --- flytekit/clients/friendly.py | 2 +- flytekit/core/base_task.py | 3 +- flytekit/core/docstring.py | 27 ++++++ flytekit/core/interface.py | 30 +++---- flytekit/core/python_auto_container.py | 3 + flytekit/core/python_function_task.py | 3 +- flytekit/core/workflow.py | 11 ++- tests/flytekit/unit/core/test_docstring.py | 95 ++++++++++++++++++++++ tests/flytekit/unit/core/test_interface.py | 78 +----------------- tests/flytekit/unit/core/test_workflows.py | 16 +++- 10 files changed, 168 insertions(+), 100 deletions(-) create mode 100644 flytekit/core/docstring.py create mode 100644 tests/flytekit/unit/core/test_docstring.py diff --git a/flytekit/clients/friendly.py b/flytekit/clients/friendly.py index daac8ee088..e016d55485 100644 --- a/flytekit/clients/friendly.py +++ b/flytekit/clients/friendly.py @@ -293,7 +293,7 @@ def list_workflows_paginated(self, identifier, limit=100, token=None, filters=No def get_workflow(self, id): """ - This returns a single task for a given ID. + This returns a single workflow for a given ID. :param flytekit.models.core.identifier.Identifier id: The ID representing a given task. :raises: TODO diff --git a/flytekit/core/base_task.py b/flytekit/core/base_task.py index 0bcc52e14e..7aa03d41fc 100644 --- a/flytekit/core/base_task.py +++ b/flytekit/core/base_task.py @@ -33,6 +33,7 @@ FlyteEntities, SerializationSettings, ) +from flytekit.core.docstring import Docstring from flytekit.core.interface import Interface, transform_interface_to_typed_interface from flytekit.core.promise import ( Promise, @@ -372,7 +373,7 @@ def __init__( task_config: T, interface: Optional[Interface] = None, environment: Optional[Dict[str, str]] = None, - docstring: str = None, + docstring: Optional[Docstring] = None, **kwargs, ): """ diff --git a/flytekit/core/docstring.py b/flytekit/core/docstring.py new file mode 100644 index 0000000000..420f26f8f5 --- /dev/null +++ b/flytekit/core/docstring.py @@ -0,0 +1,27 @@ +from typing import Callable, Dict, Optional + +from docstring_parser import parse + + +class Docstring(object): + def __init__(self, docstring: str = None, callable_: Callable = None): + if docstring is not None: + self._parsed_docstring = parse(docstring) + else: + self._parsed_docstring = parse(callable_.__doc__) + + @property + def input_descriptions(self) -> Dict[str, str]: + return {p.arg_name: p.description for p in self._parsed_docstring.params} + + @property + def output_descriptions(self) -> Dict[str, str]: + return {p.return_name: p.description for p in self._parsed_docstring.many_returns} + + @property + def short_description(self) -> Optional[str]: + return self._parsed_docstring.short_description + + @property + def long_description(self) -> Optional[str]: + return self._parsed_docstring.long_description diff --git a/flytekit/core/interface.py b/flytekit/core/interface.py index c2c6082dd4..64beca8e6a 100644 --- a/flytekit/core/interface.py +++ b/flytekit/core/interface.py @@ -7,10 +7,9 @@ from collections import OrderedDict from typing import Any, Dict, Generator, List, Optional, Tuple, Type, TypeVar, Union -from docstring_parser import parse - from flytekit.common.exceptions.user import FlyteValidationException from flytekit.core import context_manager +from flytekit.core.docstring import Docstring from flytekit.core.type_engine import TypeEngine from flytekit.loggers import logger from flytekit.models import interface as _interface_models @@ -180,18 +179,22 @@ def transform_inputs_to_parameters( def transform_interface_to_typed_interface( interface: typing.Optional[Interface], - docstring: str = None, + docstring: Optional[Docstring] = None, ) -> typing.Optional[_interface_models.TypedInterface]: """ Transform the given simple python native interface to FlyteIDL's interface """ if interface is None: return None - input_descriptions, output_description = get_variable_descriptions(docstring) + + if docstring is None: + input_descriptions = output_descriptions = {} + else: + input_descriptions = docstring.input_descriptions + output_descriptions = remap_shared_output_descriptions(docstring.output_descriptions, interface.outputs) + inputs_map = transform_variable_map(interface.inputs, input_descriptions) - outputs_map = transform_variable_map( - interface.outputs, remap_shared_output_descriptions(output_description, interface.outputs) - ) + outputs_map = transform_variable_map(interface.outputs, output_descriptions) return _interface_models.TypedInterface(inputs_map, outputs_map) @@ -354,19 +357,6 @@ def t(a: int, b: str) -> Dict[str, int]: ... return {default_output_name(): return_annotation} -def get_variable_descriptions(docstring: str) -> Tuple[Dict[str, str], Optional[str]]: - """ - Takes a Python docstring, either from `function.__doc__` or `inpect.getdoc(function)`, and returns the descriptions of the input paramenters and the output values. - - :param docstring: Python docstring in Sphinx reStructuredText style, Numpydoc style, or Google style. - :return: Dict of input parameter names mapping to their descriptions, and dict of output names mapping to their descriptions. - """ - parsed_docstring = parse(docstring) - return {p.arg_name: p.description for p in parsed_docstring.params}, { - p.return_name: p.description for p in parsed_docstring.many_returns - } - - def remap_shared_output_descriptions(output_descriptions: Dict[str, str], outputs: Dict[str, Type]) -> Dict[str, str]: """ Deals with mixed styles of return value descriptions used in docstrings. If the docstring contains a single entry of return value description, that output description is shared by each output variable. diff --git a/flytekit/core/python_auto_container.py b/flytekit/core/python_auto_container.py index 38331d59bc..bd737e9e92 100644 --- a/flytekit/core/python_auto_container.py +++ b/flytekit/core/python_auto_container.py @@ -7,6 +7,7 @@ from flytekit.common.tasks.raw_container import _get_container_definition from flytekit.core.base_task import PythonTask, TaskResolverMixin from flytekit.core.context_manager import FlyteContextManager, ImageConfig, SerializationSettings +from flytekit.core.docstring import Docstring from flytekit.core.resources import Resources, ResourceSpec from flytekit.core.tracked_abc import FlyteTrackedABC from flytekit.core.tracker import TrackedInstance @@ -37,6 +38,7 @@ def __init__( environment: Optional[Dict[str, str]] = None, task_resolver: Optional[TaskResolverMixin] = None, secret_requests: Optional[List[Secret]] = None, + docstring: Optional[Docstring] = None, **kwargs, ): """ @@ -73,6 +75,7 @@ def __init__( name=name, task_config=task_config, security_ctx=sec_ctx, + docstring=docstring, **kwargs, ) self._container_image = container_image diff --git a/flytekit/core/python_function_task.py b/flytekit/core/python_function_task.py index c2c5f58001..9e823d7219 100644 --- a/flytekit/core/python_function_task.py +++ b/flytekit/core/python_function_task.py @@ -23,6 +23,7 @@ from flytekit.common.exceptions import scopes as exception_scopes from flytekit.core.base_task import Task, TaskResolverMixin from flytekit.core.context_manager import ExecutionState, FastSerializationSettings, FlyteContext, FlyteContextManager +from flytekit.core.docstring import Docstring from flytekit.core.interface import transform_signature_to_interface from flytekit.core.python_auto_container import PythonAutoContainerTask, default_task_resolver from flytekit.core.tracker import isnested, istestfunction @@ -121,7 +122,7 @@ def __init__( interface=mutated_interface, task_config=task_config, task_resolver=task_resolver, - docstring=task_function.__doc__, + docstring=Docstring(callable_=task_function), **kwargs, ) diff --git a/flytekit/core/workflow.py b/flytekit/core/workflow.py index 72eacdd3eb..9ec0e3fd22 100644 --- a/flytekit/core/workflow.py +++ b/flytekit/core/workflow.py @@ -19,6 +19,7 @@ FlyteContextManager, FlyteEntities, ) +from flytekit.core.docstring import Docstring from flytekit.core.interface import ( Interface, transform_inputs_to_parameters, @@ -173,13 +174,14 @@ def __init__( workflow_metadata: WorkflowMetadata, workflow_metadata_defaults: WorkflowMetadataDefaults, python_interface: Interface, + docstring: Optional[Docstring] = None, **kwargs, ): self._name = name self._workflow_metadata = workflow_metadata self._workflow_metadata_defaults = workflow_metadata_defaults self._python_interface = python_interface - self._interface = transform_interface_to_typed_interface(python_interface) + self._interface = transform_interface_to_typed_interface(python_interface, docstring) self._inputs = {} self._unbound_inputs = set() self._nodes = [] @@ -640,6 +642,7 @@ def __init__( workflow_function: Callable, metadata: Optional[WorkflowMetadata], default_metadata: Optional[WorkflowMetadataDefaults], + docstring: Docstring = None, ): name = f"{workflow_function.__module__}.{workflow_function.__name__}" self._workflow_function = workflow_function @@ -654,6 +657,7 @@ def __init__( workflow_metadata=metadata, workflow_metadata_defaults=default_metadata, python_interface=native_interface, + docstring=docstring, ) @property @@ -794,7 +798,10 @@ def wrapper(fn): workflow_metadata_defaults = WorkflowMetadataDefaults(interruptible) workflow_instance = PythonFunctionWorkflow( - fn, metadata=workflow_metadata, default_metadata=workflow_metadata_defaults + fn, + metadata=workflow_metadata, + default_metadata=workflow_metadata_defaults, + docstring=Docstring(callable_=fn), ) workflow_instance.compile() return workflow_instance diff --git a/tests/flytekit/unit/core/test_docstring.py b/tests/flytekit/unit/core/test_docstring.py new file mode 100644 index 0000000000..de0a398af5 --- /dev/null +++ b/tests/flytekit/unit/core/test_docstring.py @@ -0,0 +1,95 @@ +import typing + +from flytekit.core.docstring import Docstring + + +def test_get_variable_descriptions(): + # sphinx style + def z(a: int, b: str) -> typing.Tuple[int, str]: + """ + function z + + longer description here + + :param a: foo + :param b: bar + :return: ramen + """ + ... + + docstring = Docstring(callable_=z) + input_descriptions = docstring.input_descriptions + output_descriptions = docstring.output_descriptions + assert input_descriptions["a"] == "foo" + assert input_descriptions["b"] == "bar" + assert len(output_descriptions) == 1 + assert next(iter(output_descriptions.items()))[1] == "ramen" + assert docstring.short_description == "function z" + assert docstring.long_description == "longer description here" + + # numpy style + def z(a: int, b: str) -> typing.Tuple[int, str]: + """ + function z + + longer description here + + Parameters + ---------- + a : int + foo + b : str + bar + + Returns + ------- + out : tuple + ramen + """ + ... + + docstring = Docstring(callable_=z) + input_descriptions = docstring.input_descriptions + output_descriptions = docstring.output_descriptions + assert input_descriptions["a"] == "foo" + assert input_descriptions["b"] == "bar" + assert len(output_descriptions) == 1 + assert next(iter(output_descriptions.items()))[1] == "ramen" + assert docstring.short_description == "function z" + assert docstring.long_description == "longer description here" + + # google style + def z(a: int, b: str) -> typing.Tuple[int, str]: + """function z + + longer description here + + Args: + a(int): foo + b(str): bar + Returns: + str: ramen + """ + ... + + docstring = Docstring(callable_=z) + input_descriptions = docstring.input_descriptions + output_descriptions = docstring.output_descriptions + assert input_descriptions["a"] == "foo" + assert input_descriptions["b"] == "bar" + assert len(output_descriptions) == 1 + assert next(iter(output_descriptions.items()))[1] == "ramen" + assert docstring.short_description == "function z" + assert docstring.long_description == "longer description here" + + # empty doc + def z(a: int, b: str) -> typing.Tuple[int, str]: + ... + + docstring = Docstring(callable_=z) + input_descriptions = docstring.input_descriptions + output_descriptions = docstring.output_descriptions + assert len(input_descriptions) == 0 + assert len(output_descriptions) == 0 + assert docstring.short_description is None + assert docstring.long_description is None diff --git a/tests/flytekit/unit/core/test_interface.py b/tests/flytekit/unit/core/test_interface.py index a14f52dfac..1fb47f39de 100644 --- a/tests/flytekit/unit/core/test_interface.py +++ b/tests/flytekit/unit/core/test_interface.py @@ -4,9 +4,9 @@ from typing import Dict, List from flytekit.core import context_manager +from flytekit.core.docstring import Docstring from flytekit.core.interface import ( extract_return_annotation, - get_variable_descriptions, transform_inputs_to_parameters, transform_interface_to_typed_interface, transform_signature_to_interface, @@ -192,7 +192,7 @@ def z(a: int, b: str) -> typing.Tuple[int, str]: ... our_interface = transform_signature_to_interface(inspect.signature(z)) - typed_interface = transform_interface_to_typed_interface(our_interface, z.__doc__) + typed_interface = transform_interface_to_typed_interface(our_interface, Docstring(callable_=z)) assert typed_interface.inputs.get("a").description == "foo" assert typed_interface.inputs.get("b").description == "bar" assert typed_interface.outputs.get("o1").description == "ramen" @@ -217,7 +217,7 @@ def z(a: int, b: str) -> typing.Tuple[int, str]: ... our_interface = transform_signature_to_interface(inspect.signature(z)) - typed_interface = transform_interface_to_typed_interface(our_interface, z.__doc__) + typed_interface = transform_interface_to_typed_interface(our_interface, Docstring(callable_=z)) assert typed_interface.inputs.get("a").description == "foo" assert typed_interface.inputs.get("b").description == "bar" assert typed_interface.outputs.get("o0").description == "ramen" @@ -245,78 +245,8 @@ def z(a: int, b: str) -> typing.NamedTuple("NT", x_str=str, y_int=int): ... our_interface = transform_signature_to_interface(inspect.signature(z)) - typed_interface = transform_interface_to_typed_interface(our_interface, z.__doc__) + typed_interface = transform_interface_to_typed_interface(our_interface, Docstring(callable_=z)) assert typed_interface.inputs.get("a").description == "foo" assert typed_interface.inputs.get("b").description == "bar" assert typed_interface.outputs.get("x_str").description == "description for x_str" assert typed_interface.outputs.get("y_int").description == "description for y_int" - - -def test_get_variable_descriptions(): - # sphinx style - def z(a: int, b: str) -> typing.Tuple[int, str]: - """ - function z - - :param a: foo - :param b: bar - :return: ramen - """ - ... - - input_descriptions, output_descriptions = get_variable_descriptions(z.__doc__) - assert input_descriptions["a"] == "foo" - assert input_descriptions["b"] == "bar" - assert len(output_descriptions) == 1 - assert next(iter(output_descriptions.items()))[1] == "ramen" - - # numpy style - def z(a: int, b: str) -> typing.Tuple[int, str]: - """ - function z - - Parameters - ---------- - a : int - foo - b : str - bar - - Returns - ------- - out : tuple - ramen - """ - ... - - input_descriptions, output_descriptions = get_variable_descriptions(z.__doc__) - assert input_descriptions["a"] == "foo" - assert input_descriptions["b"] == "bar" - assert len(output_descriptions) == 1 - assert next(iter(output_descriptions.items()))[1] == "ramen" - - # google style - def z(a: int, b: str) -> typing.Tuple[int, str]: - """function z - - Args: - a(int): foo - b(str): bar - Returns: - str: ramen - """ - ... - - input_descriptions, output_descriptions = get_variable_descriptions(z.__doc__) - assert input_descriptions["a"] == "foo" - assert input_descriptions["b"] == "bar" - assert len(output_descriptions) == 1 - assert next(iter(output_descriptions.items()))[1] == "ramen" - - # empty doc - def z(a: int, b: str) -> typing.Tuple[int, str]: - ... - - input_descriptions, output_descriptions = get_variable_descriptions(z.__doc__) - assert len(input_descriptions) == 0 - assert len(output_descriptions) == 0 diff --git a/tests/flytekit/unit/core/test_workflows.py b/tests/flytekit/unit/core/test_workflows.py index 865861243e..544b0ab74d 100644 --- a/tests/flytekit/unit/core/test_workflows.py +++ b/tests/flytekit/unit/core/test_workflows.py @@ -208,8 +208,12 @@ def simple_wf() -> int: @workflow def my_wf_example(a: int) -> (int, int): - """ + """example + Workflows can have inputs and return outputs of simple or complex types. + + :param a: input a + :return: outputs """ x = add_5(a=a) @@ -242,3 +246,13 @@ def test_all_node_types(): assert len(sub_wf.nodes) == 1 assert sub_wf.nodes[0].id == "n0" assert sub_wf.nodes[0].task_node.reference_id.name == "test_workflows.add_5" + + +def test_wf_docstring(): + model_wf = get_serializable(OrderedDict(), serialization_settings, my_wf_example) + + assert len(model_wf.template.interface.outputs) == 2 + assert model_wf.template.interface.outputs["o0"].description == "outputs" + assert model_wf.template.interface.outputs["o1"].description == "outputs" + assert len(model_wf.template.interface.inputs) == 1 + assert model_wf.template.interface.inputs["a"].description == "input a"