Skip to content

Commit

Permalink
refactor(json-schema) added support for nested array json schemas
Browse files Browse the repository at this point in the history
  • Loading branch information
AvaniSiddhapuraAPT committed Feb 28, 2024
1 parent 0e498c0 commit 48c097e
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -417,31 +417,37 @@ def _field_from_complex_type(
inner_field_path,
)
elif datahub_field_type == ArrayTypeClass:
field_path = field_path.expand_type("array", schema)
# default items schema is string
items_schema = schema.get("items", {"type": "string"})
items_type = JsonSchemaTranslator._get_type_from_schema(items_schema)
# Extracting description for array field
description = JsonSchemaTranslator._get_description_from_any_schema(schema)
field_path._set_parent_type_if_not_exists(
DataHubType(type=ArrayTypeClass, nested_type=items_type)
)
yield from JsonSchemaTranslator.get_fields(
items_type, items_schema, required=False, base_field_path=field_path
)
# Adding a field for the array itself with the extracted description
field_path = field_path.expand_type(discriminated_type, schema)
yield SchemaField(
fieldPath=field_path.as_string(),
type=type_override or SchemaFieldDataTypeClass(type=ArrayTypeClass()),
nativeDataType=native_type_override or "array",
description=description,
nativeDataType=native_type_override
or JsonSchemaTranslator._get_discriminated_type_from_schema(schema),
description=JsonSchemaTranslator._get_description_from_any_schema(
schema
),
nullable=nullable,
jsonProps=JsonSchemaTranslator._get_jsonprops_for_any_schema(
schema, required=required
),
isPartOfKey=field_path.is_key_schema,
)

items_schema = schema.get("items", {"type": "string"})
items_type = JsonSchemaTranslator._get_type_from_schema(items_schema)
field_name = items_schema.get("title", None)
if not field_name:
field_name = items_type
inner_field_path = field_path.clone_plus(
FieldElement(type=[], name=field_name, schema_types=[])
)
yield from JsonSchemaTranslator.get_fields(
items_type,
items_schema,
required=False,
base_field_path=inner_field_path,
)

elif datahub_field_type == MapTypeClass:
field_path = field_path.expand_type("map", schema)
# When additionalProperties is used alone, without properties, the object essentially functions as a map<string, T> where T is the type described in the additionalProperties sub-schema. Maybe that helps to answer your original question.
Expand Down
28 changes: 14 additions & 14 deletions metadata-ingestion/tests/unit/schema/test_json_schema_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,12 @@ def test_json_schema_with_recursion():
"type": NumberTypeClass,
},
{
"path": "[version=2.0].[type=TreeNode].[type=array].[type=TreeNode].children",
"path": "[version=2.0].[type=TreeNode].[type=array].children",
"type": ArrayTypeClass,
},
{
"path": "[version=2.0].[type=TreeNode].[type=array].children",
"type": ArrayTypeClass,
"path": "[version=2.0].[type=TreeNode].[type=array].children.[type=TreeNode].TreeNode",
"type": RecordTypeClass,
},
]
assert_field_paths_match(fields, expected_field_paths)
Expand Down Expand Up @@ -377,10 +377,10 @@ def test_nested_arrays():

fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
expected_field_paths: List[str] = [
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar",
"[version=2.0].[type=NestedArray].[type=array].[type=array].[type=Foo].ar.[type=integer].a",
"[version=2.0].[type=NestedArray].[type=array].[type=array].ar",
"[version=2.0].[type=NestedArray].[type=array].ar",
"[version=2.0].[type=NestedArray].[type=array].ar.[type=array].array",
"[version=2.0].[type=NestedArray].[type=array].ar.[type=array].array.[type=Foo].Foo",
"[version=2.0].[type=NestedArray].[type=array].ar.[type=array].array.[type=Foo].Foo.[type=integer].a",
]
assert_field_paths_match(fields, expected_field_paths)
assert isinstance(fields[0].type.type, ArrayTypeClass)
Expand Down Expand Up @@ -510,10 +510,10 @@ def test_needs_disambiguation_nested_union_of_records_with_same_field_name():
"[version=2.0].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
"[version=2.0].[type=ABFooUnion].[type=union].[type=B].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=integer].f",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].[type=array].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].a",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array.[type=Foo].Foo",
"[version=2.0].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array.[type=Foo].Foo.[type=integer].f",
]
assert_field_paths_match(fields, expected_field_paths)

Expand Down Expand Up @@ -588,10 +588,10 @@ def test_key_schema_handling():
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=A].a.[type=string].f",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=B].a.[type=string].f",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].[type=Foo].a.[type=number].f",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].[type=array].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].a",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array.[type=Foo].Foo",
"[version=2.0].[key=True].[type=ABFooUnion].[type=union].[type=array].a.[type=array].array.[type=Foo].Foo.[type=number].f",
]
assert_field_paths_match(fields, expected_field_paths)
for f in fields:
Expand Down Expand Up @@ -676,8 +676,8 @@ def test_simple_array():

fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
expected_field_paths: List[str] = [
"[version=2.0].[type=ObjectWithArray].[type=array].[type=string].ar",
"[version=2.0].[type=ObjectWithArray].[type=array].ar",
"[version=2.0].[type=ObjectWithArray].[type=array].ar.[type=string].string",
]
assert_field_paths_match(fields, expected_field_paths)
assert isinstance(fields[0].type.type, ArrayTypeClass)
Expand Down Expand Up @@ -875,8 +875,8 @@ def test_description_extraction():
}
fields = list(JsonSchemaTranslator.get_fields_from_schema(schema))
expected_field_paths: List[str] = [
"[version=2.0].[type=object].[type=array].[type=string].bar",
"[version=2.0].[type=object].[type=array].bar",
"[version=2.0].[type=object].[type=array].bar.[type=string].string",
]
assert_field_paths_match(fields, expected_field_paths)
assert_fields_are_valid(fields)
Expand Down

0 comments on commit 48c097e

Please sign in to comment.