From 9a23603066a2e50656a33e54292e81359f4c2dd5 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 29 Jun 2021 23:04:00 -0700 Subject: [PATCH 1/2] feat(ingest): basic support for complex hive types --- metadata-ingestion/setup.py | 2 +- .../datahub/ingestion/source/sql_common.py | 2 +- .../tests/integration/hive/docker-compose.yml | 56 +++ .../tests/integration/hive/hadoop-hive.env | 30 ++ .../integration/hive/hive_mces_golden.json | 409 ++++++++++++++++++ .../tests/integration/hive/hive_setup.sql | 36 ++ .../tests/integration/hive/hive_to_file.yml | 12 + .../tests/integration/hive/test_hive.py | 37 ++ 8 files changed, 582 insertions(+), 2 deletions(-) create mode 100644 metadata-ingestion/tests/integration/hive/docker-compose.yml create mode 100644 metadata-ingestion/tests/integration/hive/hadoop-hive.env create mode 100644 metadata-ingestion/tests/integration/hive/hive_mces_golden.json create mode 100644 metadata-ingestion/tests/integration/hive/hive_setup.sql create mode 100644 metadata-ingestion/tests/integration/hive/hive_to_file.yml create mode 100644 metadata-ingestion/tests/integration/hive/test_hive.py diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 0b59e79baf037..2d6a9d06e2542 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -83,7 +83,7 @@ def get_long_description(): | { # Acryl Data maintains a fork of PyHive, which adds support for table comments # and column comments, and also releases HTTP and HTTPS transport schemes. - "acryl-pyhive[hive]>=0.6.9" + "acryl-pyhive[hive]>=0.6.10" }, "ldap": {"python-ldap>=2.4"}, "looker": {"looker-sdk==21.6.0"}, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql_common.py index 823bb72407e49..ab01eb8781a92 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql_common.py @@ -206,8 +206,8 @@ def get_schema_metadata( for column in columns: field = SchemaField( fieldPath=column["name"], - nativeDataType=repr(column["type"]), type=get_column_type(sql_report, dataset_name, column["type"]), + nativeDataType=column.get("full_type", repr(column["type"])), description=column.get("comment", None), nullable=column["nullable"], recursive=False, diff --git a/metadata-ingestion/tests/integration/hive/docker-compose.yml b/metadata-ingestion/tests/integration/hive/docker-compose.yml new file mode 100644 index 0000000000000..6821d6dc679d8 --- /dev/null +++ b/metadata-ingestion/tests/integration/hive/docker-compose.yml @@ -0,0 +1,56 @@ +# Adapted from https://github.com/big-data-europe/docker-hive. + +version: "3" + +services: + namenode: + image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8 + volumes: + - namenode:/hadoop/dfs/name + environment: + - CLUSTER_NAME=test + env_file: + - ./hadoop-hive.env + ports: + - "50070:50070" + datanode: + image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8 + volumes: + - datanode:/hadoop/dfs/data + env_file: + - ./hadoop-hive.env + environment: + SERVICE_PRECONDITION: "namenode:50070" + ports: + - "50075:50075" + hive-server: + image: bde2020/hive:2.3.2-postgresql-metastore + container_name: "testhiveserver2" + env_file: + - ./hadoop-hive.env + environment: + HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore" + SERVICE_PRECONDITION: "hive-metastore:9083" + ports: + - "10000:10000" + volumes: + - ./hive_setup.sql:/hive_setup.sql + hive-metastore: + image: bde2020/hive:2.3.2-postgresql-metastore + env_file: + - ./hadoop-hive.env + command: /opt/hive/bin/hive --service metastore + environment: + SERVICE_PRECONDITION: "namenode:50070 datanode:50075 hive-metastore-postgresql:5432" + ports: + - "9083:9083" + hive-metastore-postgresql: + image: bde2020/hive-metastore-postgresql:2.3.0 +# presto-coordinator: +# image: shawnzhu/prestodb:0.181 +# ports: +# - "8080:8080" + +volumes: + namenode: + datanode: diff --git a/metadata-ingestion/tests/integration/hive/hadoop-hive.env b/metadata-ingestion/tests/integration/hive/hadoop-hive.env new file mode 100644 index 0000000000000..3da87a94c57e7 --- /dev/null +++ b/metadata-ingestion/tests/integration/hive/hadoop-hive.env @@ -0,0 +1,30 @@ +HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore +HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver +HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive +HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive +HIVE_SITE_CONF_datanucleus_autoCreateSchema=false +HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083 +HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false + +CORE_CONF_fs_defaultFS=hdfs://namenode:8020 +CORE_CONF_hadoop_http_staticuser_user=root +CORE_CONF_hadoop_proxyuser_hue_hosts=* +CORE_CONF_hadoop_proxyuser_hue_groups=* + +HDFS_CONF_dfs_webhdfs_enabled=true +HDFS_CONF_dfs_permissions_enabled=false + +YARN_CONF_yarn_log___aggregation___enable=true +YARN_CONF_yarn_resourcemanager_recovery_enabled=true +YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore +YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate +YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs +YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ +YARN_CONF_yarn_timeline___service_enabled=true +YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true +YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true +YARN_CONF_yarn_resourcemanager_hostname=resourcemanager +YARN_CONF_yarn_timeline___service_hostname=historyserver +YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 +YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 +YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031 diff --git a/metadata-ingestion/tests/integration/hive/hive_mces_golden.json b/metadata-ingestion/tests/integration/hive/hive_mces_golden.json new file mode 100644 index 0000000000000..3d5e2f25fcbb6 --- /dev/null +++ b/metadata-ingestion/tests/integration/hive/hive_mces_golden.json @@ -0,0 +1,409 @@ +[ +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,default._test_table_underscore,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "Database:": "default", + "Owner:": "root", + "CreateTime:": "Wed Jun 30 00:48:37 UTC 2021", + "LastAccessTime:": "UNKNOWN", + "Retention:": "0", + "Location:": "hdfs://namenode:8020/user/hive/warehouse/_test_table_underscore", + "Table Type:": "MANAGED_TABLE", + "Table Parameters: COLUMN_STATS_ACCURATE": "{\\\"BASIC_STATS\\\":\\\"true\\\"}", + "Table Parameters: numFiles": "0", + "Table Parameters: numRows": "0", + "Table Parameters: rawDataSize": "0", + "Table Parameters: totalSize": "0", + "Table Parameters: transient_lastDdlTime": "1625014117", + "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", + "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Compressed:": "No", + "Num Buckets:": "-1", + "Bucket Columns:": "[]", + "Sort Columns:": "[]", + "Storage Desc Params: serialization.format": "1" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "default._test_table_underscore", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 1615443388097, + "actor": "urn:li:corpuser:etl", + "impersonator": null + }, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:etl", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "foo", + "jsonPath": null, + "nullable": true, + "description": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "globalTags": null, + "glossaryTerms": null + }, + { + "fieldPath": "bar", + "jsonPath": null, + "nullable": true, + "description": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": null, + "glossaryTerms": null + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null + } + } + ] + } + }, + "proposedDelta": null +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,default.array_struct_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "Database:": "default", + "Owner:": "root", + "CreateTime:": "Wed Jun 30 00:48:37 UTC 2021", + "LastAccessTime:": "UNKNOWN", + "Retention:": "0", + "Location:": "hdfs://namenode:8020/user/hive/warehouse/array_struct_test", + "Table Type:": "MANAGED_TABLE", + "Table Parameters: COLUMN_STATS_ACCURATE": "{\\\"BASIC_STATS\\\":\\\"true\\\"}", + "Table Parameters: numFiles": "1", + "Table Parameters: numRows": "1", + "Table Parameters: rawDataSize": "32", + "Table Parameters: totalSize": "33", + "Table Parameters: transient_lastDdlTime": "1625014122", + "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", + "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Compressed:": "No", + "Num Buckets:": "-1", + "Bucket Columns:": "[]", + "Sort Columns:": "[]", + "Storage Desc Params: serialization.format": "1" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "default.array_struct_test", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 1615443388097, + "actor": "urn:li:corpuser:etl", + "impersonator": null + }, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:etl", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "jsonPath": null, + "nullable": true, + "description": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "globalTags": null, + "glossaryTerms": null + }, + { + "fieldPath": "service", + "jsonPath": null, + "nullable": true, + "description": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "array>>", + "recursive": false, + "globalTags": null, + "glossaryTerms": null + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null + } + } + ] + } + }, + "proposedDelta": null +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,default.pokes,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "Database:": "default", + "Owner:": "root", + "CreateTime:": "Wed Jun 30 00:48:35 UTC 2021", + "LastAccessTime:": "UNKNOWN", + "Retention:": "0", + "Location:": "hdfs://namenode:8020/user/hive/warehouse/pokes", + "Table Type:": "MANAGED_TABLE", + "Table Parameters: numFiles": "1", + "Table Parameters: numRows": "0", + "Table Parameters: rawDataSize": "0", + "Table Parameters: totalSize": "5812", + "Table Parameters: transient_lastDdlTime": "1625014117", + "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", + "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Compressed:": "No", + "Num Buckets:": "-1", + "Bucket Columns:": "[]", + "Sort Columns:": "[]", + "Storage Desc Params: serialization.format": "1" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "default.pokes", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 1615443388097, + "actor": "urn:li:corpuser:etl", + "impersonator": null + }, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:etl", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "foo", + "jsonPath": null, + "nullable": true, + "description": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "globalTags": null, + "glossaryTerms": null + }, + { + "fieldPath": "bar", + "jsonPath": null, + "nullable": true, + "description": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": null, + "glossaryTerms": null + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null + } + } + ] + } + }, + "proposedDelta": null +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,default.struct_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "Database:": "default", + "Owner:": "root", + "CreateTime:": "Wed Jun 30 00:48:37 UTC 2021", + "LastAccessTime:": "UNKNOWN", + "Retention:": "0", + "Location:": "hdfs://namenode:8020/user/hive/warehouse/struct_test", + "Table Type:": "MANAGED_TABLE", + "Table Parameters: COLUMN_STATS_ACCURATE": "{\\\"BASIC_STATS\\\":\\\"true\\\"}", + "Table Parameters: numFiles": "0", + "Table Parameters: numRows": "0", + "Table Parameters: rawDataSize": "0", + "Table Parameters: totalSize": "0", + "Table Parameters: transient_lastDdlTime": "1625014117", + "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", + "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Compressed:": "No", + "Num Buckets:": "-1", + "Bucket Columns:": "[]", + "Sort Columns:": "[]", + "Storage Desc Params: serialization.format": "1" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "default.struct_test", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 1615443388097, + "actor": "urn:li:corpuser:etl", + "impersonator": null + }, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:etl", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "jsonPath": null, + "nullable": true, + "description": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "globalTags": null, + "glossaryTerms": null + }, + { + "fieldPath": "service", + "jsonPath": null, + "nullable": true, + "description": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "struct>", + "recursive": false, + "globalTags": null, + "glossaryTerms": null + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null + } + } + ] + } + }, + "proposedDelta": null +} +] diff --git a/metadata-ingestion/tests/integration/hive/hive_setup.sql b/metadata-ingestion/tests/integration/hive/hive_setup.sql new file mode 100644 index 0000000000000..01d63eb7e5c71 --- /dev/null +++ b/metadata-ingestion/tests/integration/hive/hive_setup.sql @@ -0,0 +1,36 @@ + +-- Setup a "pokes" example table. +CREATE TABLE pokes (foo INT, bar STRING); +LOAD DATA LOCAL INPATH '/opt/hive/examples/files/kv1.txt' OVERWRITE INTO TABLE pokes; + +-- Setup a table with a special character. +CREATE TABLE `_test_table_underscore` (foo INT, bar STRING); + +-- Create tables with struct and array types. +-- From https://stackoverflow.com/questions/57491644/correct-usage-of-a-struct-in-hive. +CREATE TABLE struct_test +( + property_id INT, + service STRUCT< + type: STRING + ,provider: ARRAY + > +); + +CREATE TABLE array_struct_test +( + property_id INT, + service array + >> +); + +WITH +test_data as ( + SELECT 989 property_id, array(NAMED_STRUCT('type','Cleaning','provider', ARRAY(587, 887)), + NAMED_STRUCT('type','Pricing','provider', ARRAY(932)) + ) AS service +) +INSERT INTO TABLE array_struct_test +select * from test_data; diff --git a/metadata-ingestion/tests/integration/hive/hive_to_file.yml b/metadata-ingestion/tests/integration/hive/hive_to_file.yml new file mode 100644 index 0000000000000..879c45bb87ba7 --- /dev/null +++ b/metadata-ingestion/tests/integration/hive/hive_to_file.yml @@ -0,0 +1,12 @@ +run_id: hive-test + +source: + type: hive + config: + scheme: "hive" + host_port: localhost:10000 + +sink: + type: file + config: + filename: "./hive_mces.json" diff --git a/metadata-ingestion/tests/integration/hive/test_hive.py b/metadata-ingestion/tests/integration/hive/test_hive.py new file mode 100644 index 0000000000000..f09b417c76683 --- /dev/null +++ b/metadata-ingestion/tests/integration/hive/test_hive.py @@ -0,0 +1,37 @@ +import subprocess + +import pytest +from click.testing import CliRunner + +from datahub.entrypoints import datahub +from tests.test_helpers import fs_helpers, mce_helpers +from tests.test_helpers.docker_helpers import wait_for_port + + +@pytest.mark.slow +def test_hive_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time): + test_resources_dir = pytestconfig.rootpath / "tests/integration/hive" + + with docker_compose_runner( + test_resources_dir / "docker-compose.yml", "hive" + ) as docker_services: + wait_for_port(docker_services, "testhiveserver2", 10000, timeout=120) + + # Set up the container. + command = "docker exec testhiveserver2 /opt/hive/bin/beeline -u jdbc:hive2://localhost:10000 -f /hive_setup.sql" + subprocess.run(command, shell=True, check=True) + + # Run the metadata ingestion pipeline. + runner = CliRunner() + with fs_helpers.isolated_filesystem(tmp_path): + config_file = (test_resources_dir / "hive_to_file.yml").resolve() + result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"]) + assert result.exit_code == 0 + + output = mce_helpers.load_json_file("hive_mces.json") + + # Verify the output. + golden = mce_helpers.load_json_file( + str(test_resources_dir / "hive_mces_golden.json") + ) + mce_helpers.assert_mces_equal(output, golden) From b4ab800a503cae93ddafee7662f0ec27921e5742 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 30 Jun 2021 17:06:00 -0700 Subject: [PATCH 2/2] fix test --- .../tests/integration/hive/test_hive.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/tests/integration/hive/test_hive.py b/metadata-ingestion/tests/integration/hive/test_hive.py index f09b417c76683..26f0ea4d48a2e 100644 --- a/metadata-ingestion/tests/integration/hive/test_hive.py +++ b/metadata-ingestion/tests/integration/hive/test_hive.py @@ -28,10 +28,14 @@ def test_hive_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time): result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"]) assert result.exit_code == 0 - output = mce_helpers.load_json_file("hive_mces.json") - # Verify the output. - golden = mce_helpers.load_json_file( - str(test_resources_dir / "hive_mces_golden.json") + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / "hive_mces.json", + golden_path=test_resources_dir / "hive_mces_golden.json", + ignore_paths=[ + # example: root[1]['proposedSnapshot']['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot']['aspects'][0]['com.linkedin.pegasus2avro.dataset.DatasetProperties']['customProperties']['CreateTime:'] + # example: root[2]['proposedSnapshot']['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot']['aspects'][0]['com.linkedin.pegasus2avro.dataset.DatasetProperties']['customProperties']['Table Parameters: transient_lastDdlTime'] + r"root\[\d+\]\['proposedSnapshot'\]\['com\.linkedin\.pegasus2avro\.metadata\.snapshot\.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com\.linkedin\.pegasus2avro\.dataset\.DatasetProperties'\]\['customProperties'\]\['.*Time.*'\]" + ], ) - mce_helpers.assert_mces_equal(output, golden)