From 84b755f9e05700d05d96511ff495b5ee777b6110 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Thu, 9 Mar 2023 03:20:43 +0800 Subject: [PATCH] Structured Dataset with generic format should be castable to Flyte Schema (#536) * Structured Dataset with generic format should be castable to Schema Signed-off-by: Kevin Su * Structured Dataset with generic format should be castable to Schema Signed-off-by: Kevin Su * update Signed-off-by: Kevin Su --------- Signed-off-by: Kevin Su --- .../pkg/compiler/validators/typing.go | 5 +++-- .../pkg/compiler/validators/typing_test.go | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/flytepropeller/pkg/compiler/validators/typing.go b/flytepropeller/pkg/compiler/validators/typing.go index 9f9c1321c..09268359d 100644 --- a/flytepropeller/pkg/compiler/validators/typing.go +++ b/flytepropeller/pkg/compiler/validators/typing.go @@ -115,7 +115,7 @@ func (t schemaTypeChecker) CastsFrom(upstreamType *flyte.LiteralType) bool { } // Flyte Schema can only be serialized to parquet - if !strings.EqualFold(structuredDatasetType.Format, "parquet") { + if len(structuredDatasetType.Format) != 0 && !strings.EqualFold(structuredDatasetType.Format, "parquet") { return false } @@ -147,7 +147,8 @@ func (t structuredDatasetChecker) CastsFrom(upstreamType *flyte.LiteralType) boo } if schemaType != nil { // Flyte Schema can only be serialized to parquet - if !strings.EqualFold(t.literalType.GetStructuredDatasetType().Format, "parquet") { + format := t.literalType.GetStructuredDatasetType().Format + if len(format) != 0 && !strings.EqualFold(format, "parquet") { return false } return structuredDatasetCastFromSchema(schemaType, t.literalType.GetStructuredDatasetType()) diff --git a/flytepropeller/pkg/compiler/validators/typing_test.go b/flytepropeller/pkg/compiler/validators/typing_test.go index a4b6a7a3a..8344339f0 100644 --- a/flytepropeller/pkg/compiler/validators/typing_test.go +++ b/flytepropeller/pkg/compiler/validators/typing_test.go @@ -579,6 +579,14 @@ func TestSchemaCasting(t *testing.T) { }, }, } + genericStructuredDataset := &core.LiteralType{ + Type: &core.LiteralType_StructuredDatasetType{ + StructuredDatasetType: &core.StructuredDatasetType{ + Columns: []*core.StructuredDatasetType_DatasetColumn{}, + Format: "", + }, + }, + } subsetIntegerSchema := &core.LiteralType{ Type: &core.LiteralType_Schema{ Schema: &core.SchemaType{ @@ -657,6 +665,16 @@ func TestSchemaCasting(t *testing.T) { assert.True(t, castable, "Schema(a=Integer, b=Float) should be castable to Schema(a=Integer)") }) + t.Run("GenericToSubsetTypedSchema", func(t *testing.T) { + castable := AreTypesCastable(genericStructuredDataset, subsetIntegerSchema) + assert.True(t, castable, "StructuredDataset() with generic format should be castable to Schema(a=Integer)") + }) + + t.Run("SubsetTypedSchemaToGeneric", func(t *testing.T) { + castable := AreTypesCastable(subsetIntegerSchema, genericStructuredDataset) + assert.True(t, castable, "Schema(a=Integer) should be castable to StructuredDataset() with generic format") + }) + t.Run("SupersetStructuredToSubsetTypedSchema", func(t *testing.T) { castable := AreTypesCastable(supersetStructuredDataset, subsetIntegerSchema) assert.True(t, castable, "StructuredDataset(a=Integer, b=Float) should be castable to Schema(a=Integer)")