From 1a57f3e6586c6b9a4448c2fb5b06e75a1983cc3d Mon Sep 17 00:00:00 2001 From: "Bharti, Aakash" Date: Tue, 28 May 2024 22:37:10 +0530 Subject: [PATCH 1/2] Add-support-for-ingesting-schemas-from-schema-registry --- build.gradle | 3 +- .../linkedin/datahub/graphql/Constants.java | 1 + .../datahub/graphql/GmsGraphQLEngine.java | 16 +- .../datahub/graphql/GraphQLEngine.java | 5 +- .../datahub/graphql/SubTypesResolver.java | 7 +- .../graphql/WeaklyTypedAspectsResolver.java | 7 +- .../concurrency/GraphQLConcurrencyUtils.java | 35 + .../GraphQLWorkerPoolThreadFactory.java | 29 + .../DataHubFieldComplexityCalculator.java | 74 ++ .../datahub/graphql/resolvers/MeResolver.java | 7 +- .../assertion/AssertionRunEventResolver.java | 17 +- .../assertion/DeleteAssertionResolver.java | 7 +- .../assertion/EntityAssertionsResolver.java | 7 +- .../auth/CreateAccessTokenResolver.java | 7 +- .../resolvers/auth/DebugAccessResolver.java | 7 +- .../auth/GetAccessTokenMetadataResolver.java | 7 +- .../auth/GetAccessTokenResolver.java | 7 +- .../auth/ListAccessTokensResolver.java | 7 +- .../auth/RevokeAccessTokenResolver.java | 7 +- .../resolvers/browse/BrowsePathsResolver.java | 7 +- .../resolvers/browse/BrowseResolver.java | 7 +- .../browse/EntityBrowsePathsResolver.java | 7 +- .../AddBusinessAttributeResolver.java | 7 +- .../CreateBusinessAttributeResolver.java | 7 +- .../DeleteBusinessAttributeResolver.java | 7 +- .../ListBusinessAttributesResolver.java | 7 +- .../RemoveBusinessAttributeResolver.java | 7 +- .../UpdateBusinessAttributeResolver.java | 7 +- .../resolvers/chart/BrowseV2Resolver.java | 7 +- .../container/ContainerEntitiesResolver.java | 7 +- .../container/ParentContainersResolver.java | 7 +- .../DashboardStatsSummaryResolver.java | 7 +- .../DashboardUsageStatsResolver.java | 7 +- .../BatchSetDataProductResolver.java | 7 +- .../CreateDataProductResolver.java | 7 +- .../DeleteDataProductResolver.java | 7 +- .../ListDataProductAssetsResolver.java | 7 +- .../UpdateDataProductResolver.java | 7 +- .../dataset/DatasetHealthResolver.java | 7 +- .../dataset/DatasetStatsSummaryResolver.java | 7 +- .../dataset/DatasetUsageStatsResolver.java | 7 +- .../dataset/IsAssignedToMeResolver.java | 7 +- .../UpdateDeprecationResolver.java | 7 +- .../domain/CreateDomainResolver.java | 7 +- .../domain/DeleteDomainResolver.java | 7 +- .../domain/DomainEntitiesResolver.java | 7 +- .../resolvers/domain/ListDomainsResolver.java | 7 +- .../domain/ParentDomainsResolver.java | 7 +- .../resolvers/domain/SetDomainResolver.java | 7 +- .../resolvers/domain/UnsetDomainResolver.java | 7 +- .../resolvers/embed/UpdateEmbedResolver.java | 7 +- .../entity/EntityExistsResolver.java | 7 +- .../entity/EntityPrivilegesResolver.java | 7 +- .../form/BatchAssignFormResolver.java | 7 +- .../form/BatchRemoveFormResolver.java | 7 +- .../CreateDynamicFormAssignmentResolver.java | 7 +- .../form/IsFormAssignedToMeResolver.java | 7 +- .../form/SubmitFormPromptResolver.java | 7 +- .../resolvers/form/VerifyFormResolver.java | 7 +- .../glossary/AddRelatedTermsResolver.java | 7 +- .../glossary/CreateGlossaryNodeResolver.java | 7 +- .../glossary/CreateGlossaryTermResolver.java | 7 +- .../DeleteGlossaryEntityResolver.java | 7 +- .../GetRootGlossaryNodesResolver.java | 7 +- .../GetRootGlossaryTermsResolver.java | 7 +- .../glossary/ParentNodesResolver.java | 7 +- .../glossary/RemoveRelatedTermsResolver.java | 7 +- .../group/AddGroupMembersResolver.java | 7 +- .../resolvers/group/CreateGroupResolver.java | 7 +- .../resolvers/group/EntityCountsResolver.java | 7 +- .../resolvers/group/ListGroupsResolver.java | 7 +- .../group/RemoveGroupMembersResolver.java | 7 +- .../resolvers/group/RemoveGroupResolver.java | 7 +- .../health/EntityHealthResolver.java | 7 +- .../incident/EntityIncidentsResolver.java | 7 +- .../incident/RaiseIncidentResolver.java | 7 +- .../UpdateIncidentStatusResolver.java | 7 +- ...ncelIngestionExecutionRequestResolver.java | 7 +- ...eateIngestionExecutionRequestResolver.java | 7 +- .../CreateTestConnectionRequestResolver.java | 7 +- .../GetIngestionExecutionRequestResolver.java | 7 +- ...estionSourceExecutionRequestsResolver.java | 7 +- .../execution/RollbackIngestionResolver.java | 13 +- .../ingest/secret/CreateSecretResolver.java | 7 +- .../ingest/secret/DeleteSecretResolver.java | 7 +- .../secret/GetSecretValuesResolver.java | 7 +- .../ingest/secret/ListSecretsResolver.java | 7 +- .../ingest/secret/UpdateSecretResolver.java | 7 +- .../source/DeleteIngestionSourceResolver.java | 7 +- .../source/GetIngestionSourceResolver.java | 7 +- .../source/ListIngestionSourcesResolver.java | 7 +- .../source/UpsertIngestionSourceResolver.java | 7 +- .../resolvers/jobs/DataJobRunsResolver.java | 7 +- .../resolvers/jobs/EntityRunsResolver.java | 7 +- .../lineage/UpdateLineageResolver.java | 7 +- .../load/EntityLineageResultResolver.java | 7 +- .../EntityRelationshipsResultResolver.java | 7 +- .../load/TimeSeriesAspectResolver.java | 7 +- .../resolvers/mutate/AddLinkResolver.java | 7 +- .../resolvers/mutate/AddOwnerResolver.java | 7 +- .../resolvers/mutate/AddOwnersResolver.java | 7 +- .../resolvers/mutate/AddTagResolver.java | 7 +- .../resolvers/mutate/AddTagsResolver.java | 7 +- .../resolvers/mutate/AddTermResolver.java | 7 +- .../resolvers/mutate/AddTermsResolver.java | 7 +- .../mutate/BatchAddOwnersResolver.java | 7 +- .../mutate/BatchAddTagsResolver.java | 7 +- .../mutate/BatchAddTermsResolver.java | 7 +- .../mutate/BatchRemoveOwnersResolver.java | 7 +- .../mutate/BatchRemoveTagsResolver.java | 7 +- .../mutate/BatchRemoveTermsResolver.java | 7 +- .../mutate/BatchSetDomainResolver.java | 7 +- .../BatchUpdateDeprecationResolver.java | 7 +- .../BatchUpdateSoftDeletedResolver.java | 7 +- .../resolvers/mutate/MoveDomainResolver.java | 7 +- .../mutate/MutableTypeBatchResolver.java | 7 +- .../resolvers/mutate/MutableTypeResolver.java | 7 +- .../resolvers/mutate/RemoveLinkResolver.java | 7 +- .../resolvers/mutate/RemoveOwnerResolver.java | 7 +- .../resolvers/mutate/RemoveTagResolver.java | 7 +- .../resolvers/mutate/RemoveTermResolver.java | 7 +- .../mutate/UpdateDescriptionResolver.java | 91 +- .../resolvers/mutate/UpdateNameResolver.java | 7 +- .../mutate/UpdateParentNodeResolver.java | 7 +- .../mutate/UpdateUserSettingResolver.java | 7 +- .../operation/ReportOperationResolver.java | 7 +- .../CreateOwnershipTypeResolver.java | 7 +- .../DeleteOwnershipTypeResolver.java | 7 +- .../ownership/ListOwnershipTypesResolver.java | 7 +- .../UpdateOwnershipTypeResolver.java | 7 +- .../policy/DeletePolicyResolver.java | 7 +- .../policy/GetGrantedPrivilegesResolver.java | 7 +- .../policy/UpsertPolicyResolver.java | 7 +- .../resolvers/post/CreatePostResolver.java | 7 +- .../resolvers/post/DeletePostResolver.java | 7 +- .../resolvers/post/ListPostsResolver.java | 7 +- .../resolvers/post/UpdatePostResolver.java | 7 +- .../resolvers/query/CreateQueryResolver.java | 7 +- .../resolvers/query/DeleteQueryResolver.java | 7 +- .../resolvers/query/ListQueriesResolver.java | 7 +- .../resolvers/query/UpdateQueryResolver.java | 7 +- .../ListRecommendationsResolver.java | 7 +- .../resolvers/role/AcceptRoleResolver.java | 7 +- .../role/BatchAssignRoleResolver.java | 7 +- .../role/CreateInviteTokenResolver.java | 7 +- .../role/GetInviteTokenResolver.java | 7 +- .../resolvers/role/ListRolesResolver.java | 7 +- .../AggregateAcrossEntitiesResolver.java | 7 +- .../search/AutoCompleteResolver.java | 7 +- .../resolvers/search/AutocompleteUtils.java | 7 +- .../search/GetQuickFiltersResolver.java | 7 +- .../search/ScrollAcrossEntitiesResolver.java | 7 +- .../search/ScrollAcrossLineageResolver.java | 7 +- .../search/SearchAcrossEntitiesResolver.java | 7 +- .../search/SearchAcrossLineageResolver.java | 7 +- .../resolvers/search/SearchResolver.java | 7 +- .../UpdateCorpUserViewsSettingsResolver.java | 7 +- .../view/GlobalViewsSettingsResolver.java | 7 +- .../UpdateGlobalViewsSettingsResolver.java | 7 +- .../step/BatchGetStepStatesResolver.java | 7 +- .../step/BatchUpdateStepStatesResolver.java | 7 +- .../UpsertStructuredPropertiesResolver.java | 7 +- .../resolvers/tag/CreateTagResolver.java | 7 +- .../resolvers/tag/DeleteTagResolver.java | 7 +- .../resolvers/tag/SetTagColorResolver.java | 7 +- .../resolvers/test/CreateTestResolver.java | 7 +- .../resolvers/test/DeleteTestResolver.java | 7 +- .../resolvers/test/ListTestsResolver.java | 7 +- .../resolvers/test/TestResultsResolver.java | 7 +- .../resolvers/test/UpdateTestResolver.java | 7 +- .../timeline/GetSchemaBlameResolver.java | 7 +- .../GetSchemaVersionListResolver.java | 7 +- .../CreateNativeUserResetTokenResolver.java | 7 +- .../resolvers/user/ListUsersResolver.java | 7 +- .../resolvers/user/RemoveUserResolver.java | 7 +- .../user/UpdateUserStatusResolver.java | 7 +- .../resolvers/view/CreateViewResolver.java | 7 +- .../resolvers/view/DeleteViewResolver.java | 7 +- .../view/ListGlobalViewsResolver.java | 7 +- .../resolvers/view/ListMyViewsResolver.java | 7 +- .../resolvers/view/UpdateViewResolver.java | 7 +- .../types/assertion/AssertionMapper.java | 163 +++- .../types/assertion/AssertionType.java | 4 +- .../types/assertion/FieldAssertionMapper.java | 92 ++ .../assertion/FreshnessAssertionMapper.java | 59 ++ .../types/assertion/SqlAssertionMapper.java | 27 + .../assertion/VolumeAssertionMapper.java | 115 +++ .../dataset/mappers/SchemaFieldMapper.java | 2 +- .../CreateERModelRelationshipResolver.java | 7 +- .../UpdateERModelRelationshipResolver.java | 7 +- .../src/main/resources/assertions.graphql | 896 ++++++++++++++++++ .../src/main/resources/entity.graphql | 25 + .../src/main/resources/incident.graphql | 35 + .../AssertionRunEventResolverTest.java | 1 + .../types/assertion/AssertionMapperTest.java | 346 +++++++ .../types/assertion/AssertionTypeTest.java | 24 + .../assertion/FieldAssertionMapperTest.java | 100 ++ .../FreshnessAssertionMapperTest.java | 82 ++ .../assertion/SqlAssertionMapperTest.java | 78 ++ .../assertion/VolumeAssertionMapperTest.java | 207 ++++ docs-website/graphql/generateGraphQLSchema.sh | 1 + docs/townhalls.md | 25 +- .../aspect/hooks/FieldPathMutator.java | 142 +++ .../builder/UpstreamLineagePatchBuilder.java | 89 +- .../SearchableRefFieldSpecExtractor.java | 1 + .../annotation/SearchableRefAnnotation.java | 17 + .../aspect/hooks/FieldPathMutatorTest.java | 249 +++++ .../java/com/linkedin/metadata/Constants.java | 1 + metadata-dao-impl/kafka-producer/build.gradle | 1 + .../dao/producer/KafkaProducerThrottle.java | 246 +++++ .../producer/KafkaProducerThrottleTest.java | 363 +++++++ .../library/assertions_configuration.yml | 76 ++ .../api/entities/assertion/__init__.py | 0 .../api/entities/assertion/assertion.py | 57 ++ .../assertion/assertion_config_spec.py | 41 + .../entities/assertion/assertion_operator.py | 304 ++++++ .../entities/assertion/assertion_trigger.py | 52 + .../entities/assertion/compiler_interface.py | 81 ++ .../entities/assertion/datahub_assertion.py | 35 + .../api/entities/assertion/field_assertion.py | 158 +++ .../api/entities/assertion/field_metric.py | 21 + .../datahub/api/entities/assertion/filter.py | 13 + .../entities/assertion/freshness_assertion.py | 124 +++ .../api/entities/assertion/sql_assertion.py | 91 ++ .../entities/assertion/volume_assertion.py | 98 ++ .../datahub/cli/specific/assertions_cli.py | 151 +++ .../src/datahub/cli/state_cli.py | 1 + .../configuration/connection_resolver.py | 40 + metadata-ingestion/src/datahub/entrypoints.py | 2 + .../datahub/ingestion/api/global_context.py | 23 + .../datahub/ingestion/api/source_helpers.py | 13 +- .../src/datahub/ingestion/graph/client.py | 248 ++++- .../datahub/ingestion/graph/connections.py | 42 + .../src/datahub/ingestion/run/pipeline.py | 52 +- .../source/aws/sagemaker_processors/common.py | 2 - .../source/confluent_schema_registry.py | 84 +- .../ingestion/source/dbt/dbt_common.py | 9 +- .../src/datahub/ingestion/source/kafka.py | 83 +- .../source/kafka_schema_registry_base.py | 14 +- .../source/snowflake/snowflake_assertion.py | 127 +++ .../source/snowflake/snowflake_config.py | 6 + .../source/snowflake/snowflake_query.py | 22 + .../source/snowflake/snowflake_v2.py | 13 +- .../state/stale_entity_removal_handler.py | 28 +- .../ingestion/source_config/sql/snowflake.py | 3 + .../integrations/assertion/__init__.py | 0 .../datahub/integrations/assertion/common.py | 61 ++ .../integrations/assertion/registry.py | 8 + .../assertion/snowflake/__init__.py | 0 .../assertion/snowflake/compiler.py | 237 +++++ .../assertion/snowflake/dmf_generator.py | 22 + .../snowflake/field_metric_sql_generator.py | 154 +++ .../field_values_metric_sql_generator.py | 283 ++++++ .../metric_operator_sql_generator.py | 68 ++ .../snowflake/metric_sql_generator.py | 97 ++ .../dbt_enabled_with_schemas_mces_golden.json | 14 +- .../dbt_test_column_meta_mapping_golden.json | 17 +- .../dbt/dbt_test_events_golden.json | 17 +- ...bt_test_test_model_performance_golden.json | 25 +- ...th_complex_owner_patterns_mces_golden.json | 14 +- ...th_data_platform_instance_mces_golden.json | 14 +- ...h_non_incremental_lineage_mces_golden.json | 62 +- ..._target_platform_instance_mces_golden.json | 14 +- .../integration/kafka/kafka_mces_golden.json | 889 ++++++++++++++++- .../unit/api/entities/assertion/__init__.py | 0 .../assertion/test_assertion_config.yml | 76 ++ .../assertion/test_assertion_config_spec.py | 13 + .../tests/unit/cli/assertion/__init__.py | 0 .../unit/cli/assertion/dmf_associations.sql | 35 + .../unit/cli/assertion/dmf_definitions.sql | 71 ++ .../tests/unit/cli/assertion/test_compile.py | 42 + .../unit/config/test_connection_resolver.py | 68 ++ .../tests/unit/test_kafka_source.py | 71 +- .../config/DatahubOpenlineageConfig.java | 2 +- .../converter/OpenLineageToDataHub.java | 39 + .../openlineage/dataset/DatahubJob.java | 63 +- .../java/spark-lineage-beta/README.md | 14 +- .../datahub/spark/DatahubSparkListener.java | 12 +- .../datahub/spark/conf/SparkConfigParser.java | 2 +- .../agent/util/RemovePathPatternUtils.java | 2 +- .../plan/catalog/IcebergHandler.java | 192 ---- .../java/spark-lineage/README.md | 8 +- .../graph/neo4j/Neo4jGraphService.java | 223 +++-- .../request/AutocompleteRequestHandler.java | 8 +- .../query/request/SearchQueryBuilder.java | 21 +- .../query/request/SearchQueryBuilderTest.java | 12 + .../kafka/hook/UpdateIndicesHook.java | 3 +- metadata-jobs/mce-consumer/build.gradle | 1 + .../com/linkedin/assertion/AssertionInfo.pdl | 14 +- .../assertion/AssertionResultError.pdl | 10 +- .../linkedin/assertion/AssertionSource.pdl | 8 + .../assertion/AssertionStdOperator.pdl | 20 + .../assertion/AssertionStdParameter.pdl | 19 + .../linkedin/assertion/FieldAssertionInfo.pdl | 57 ++ .../assertion/FieldMetricAssertion.pdl | 39 + .../linkedin/assertion/FieldMetricType.pdl | 94 ++ .../com/linkedin/assertion/FieldTransform.pdl | 21 + .../assertion/FieldValuesAssertion.pdl | 83 ++ .../linkedin/assertion/FreshnessFieldSpec.pdl | 8 +- .../assertion/SchemaAssertionInfo.pdl | 33 +- .../assertion/VolumeAssertionInfo.pdl | 2 +- .../datacontract/DataQualityContract.pdl | 4 + .../linkedin/datacontract/SchemaContract.pdl | 4 + .../com/linkedin/incident/IncidentSource.pdl | 5 + .../com/linkedin/incident/IncidentType.pdl | 30 + .../src/main/resources/entity-registry.yml | 13 + .../GraphQLConcurrencyConfiguration.java | 12 + .../metadata/config/GraphQLConfiguration.java | 1 + .../config/MetadataChangeProposalConfig.java | 30 + .../src/main/resources/application.yaml | 29 + .../factory/config/ConfigurationProvider.java | 4 + .../factory/graphql/GraphQLEngineFactory.java | 40 +- .../kafka/DataHubKafkaProducerFactory.java | 2 +- .../kafka/KafkaEventConsumerFactory.java | 2 +- .../kafka/SimpleKafkaConsumerFactory.java | 2 +- .../KafkaProducerThrottleFactory.java | 93 ++ .../datahub/graphql/GraphQLController.java | 7 +- .../datahub/graphql/GraphiQLController.java | 4 +- ...maRegistryControllerTestConfiguration.java | 3 + 319 files changed, 9555 insertions(+), 1112 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/concurrency/GraphQLConcurrencyUtils.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/concurrency/GraphQLWorkerPoolThreadFactory.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/instrumentation/DataHubFieldComplexityCalculator.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/FieldAssertionMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/FreshnessAssertionMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/SqlAssertionMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/VolumeAssertionMapper.java create mode 100644 datahub-graphql-core/src/main/resources/assertions.graphql create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapperTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/FieldAssertionMapperTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/FreshnessAssertionMapperTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/SqlAssertionMapperTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/VolumeAssertionMapperTest.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/FieldPathMutator.java create mode 100644 entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/FieldPathMutatorTest.java create mode 100644 metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/producer/KafkaProducerThrottle.java create mode 100644 metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/producer/KafkaProducerThrottleTest.java create mode 100644 metadata-ingestion/examples/library/assertions_configuration.yml create mode 100644 metadata-ingestion/src/datahub/api/entities/assertion/__init__.py create mode 100644 metadata-ingestion/src/datahub/api/entities/assertion/assertion.py create mode 100644 metadata-ingestion/src/datahub/api/entities/assertion/assertion_config_spec.py create mode 100644 metadata-ingestion/src/datahub/api/entities/assertion/assertion_operator.py create mode 100644 metadata-ingestion/src/datahub/api/entities/assertion/assertion_trigger.py create mode 100644 metadata-ingestion/src/datahub/api/entities/assertion/compiler_interface.py create mode 100644 metadata-ingestion/src/datahub/api/entities/assertion/datahub_assertion.py create mode 100644 metadata-ingestion/src/datahub/api/entities/assertion/field_assertion.py create mode 100644 metadata-ingestion/src/datahub/api/entities/assertion/field_metric.py create mode 100644 metadata-ingestion/src/datahub/api/entities/assertion/filter.py create mode 100644 metadata-ingestion/src/datahub/api/entities/assertion/freshness_assertion.py create mode 100644 metadata-ingestion/src/datahub/api/entities/assertion/sql_assertion.py create mode 100644 metadata-ingestion/src/datahub/api/entities/assertion/volume_assertion.py create mode 100644 metadata-ingestion/src/datahub/cli/specific/assertions_cli.py create mode 100644 metadata-ingestion/src/datahub/configuration/connection_resolver.py create mode 100644 metadata-ingestion/src/datahub/ingestion/api/global_context.py create mode 100644 metadata-ingestion/src/datahub/ingestion/graph/connections.py create mode 100644 metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_assertion.py create mode 100644 metadata-ingestion/src/datahub/integrations/assertion/__init__.py create mode 100644 metadata-ingestion/src/datahub/integrations/assertion/common.py create mode 100644 metadata-ingestion/src/datahub/integrations/assertion/registry.py create mode 100644 metadata-ingestion/src/datahub/integrations/assertion/snowflake/__init__.py create mode 100644 metadata-ingestion/src/datahub/integrations/assertion/snowflake/compiler.py create mode 100644 metadata-ingestion/src/datahub/integrations/assertion/snowflake/dmf_generator.py create mode 100644 metadata-ingestion/src/datahub/integrations/assertion/snowflake/field_metric_sql_generator.py create mode 100644 metadata-ingestion/src/datahub/integrations/assertion/snowflake/field_values_metric_sql_generator.py create mode 100644 metadata-ingestion/src/datahub/integrations/assertion/snowflake/metric_operator_sql_generator.py create mode 100644 metadata-ingestion/src/datahub/integrations/assertion/snowflake/metric_sql_generator.py create mode 100644 metadata-ingestion/tests/unit/api/entities/assertion/__init__.py create mode 100644 metadata-ingestion/tests/unit/api/entities/assertion/test_assertion_config.yml create mode 100644 metadata-ingestion/tests/unit/api/entities/assertion/test_assertion_config_spec.py create mode 100644 metadata-ingestion/tests/unit/cli/assertion/__init__.py create mode 100644 metadata-ingestion/tests/unit/cli/assertion/dmf_associations.sql create mode 100644 metadata-ingestion/tests/unit/cli/assertion/dmf_definitions.sql create mode 100644 metadata-ingestion/tests/unit/cli/assertion/test_compile.py create mode 100644 metadata-ingestion/tests/unit/config/test_connection_resolver.py delete mode 100644 metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark3/agent/lifecycle/plan/catalog/IcebergHandler.java create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FieldAssertionInfo.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FieldMetricAssertion.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FieldMetricType.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FieldTransform.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/FieldValuesAssertion.pdl create mode 100644 metadata-service/configuration/src/main/java/com/linkedin/metadata/config/GraphQLConcurrencyConfiguration.java create mode 100644 metadata-service/configuration/src/main/java/com/linkedin/metadata/config/MetadataChangeProposalConfig.java create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaProducerThrottleFactory.java diff --git a/build.gradle b/build.gradle index 5264c1c58313c7..c6e14081c6147e 100644 --- a/build.gradle +++ b/build.gradle @@ -54,7 +54,7 @@ buildscript { ext.hazelcastVersion = '5.3.6' ext.ebeanVersion = '12.16.1' ext.googleJavaFormatVersion = '1.18.1' - ext.openLineageVersion = '1.13.1' + ext.openLineageVersion = '1.14.0' ext.logbackClassicJava8 = '1.2.12' ext.docker_registry = 'acryldata' @@ -250,6 +250,7 @@ project.ext.externalDependency = [ 'springBootStarterValidation': "org.springframework.boot:spring-boot-starter-validation:$springBootVersion", 'springKafka': "org.springframework.kafka:spring-kafka:$springKafkaVersion", 'springActuator': "org.springframework.boot:spring-boot-starter-actuator:$springBootVersion", + 'springRetry': "org.springframework.retry:spring-retry:2.0.6", 'swaggerAnnotations': 'io.swagger.core.v3:swagger-annotations:2.2.15', 'swaggerCli': 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.46', 'swaggerCore': 'io.swagger.core.v3:swagger-core:2.2.7', diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java index 0924dbc0c0a6d9..e55f1fd5ecf5bf 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java @@ -20,6 +20,7 @@ private Constants() {} public static final String LINEAGE_SCHEMA_FILE = "lineage.graphql"; public static final String PROPERTIES_SCHEMA_FILE = "properties.graphql"; public static final String FORMS_SCHEMA_FILE = "forms.graphql"; + public static final String ASSERTIONS_SCHEMA_FILE = "assertions.graphql"; public static final String INCIDENTS_SCHEMA_FILE = "incident.graphql"; public static final String CONNECTIONS_SCHEMA_FILE = "connection.graphql"; public static final String BROWSE_PATH_DELIMITER = "/"; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index c20fc7fef6ef68..50a73817678ee0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -22,6 +22,7 @@ import com.linkedin.datahub.graphql.analytics.resolver.GetMetadataAnalyticsResolver; import com.linkedin.datahub.graphql.analytics.resolver.IsAnalyticsEnabledResolver; import com.linkedin.datahub.graphql.analytics.service.AnalyticsService; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; import com.linkedin.datahub.graphql.generated.AccessToken; import com.linkedin.datahub.graphql.generated.AccessTokenMetadata; @@ -117,7 +118,12 @@ import com.linkedin.datahub.graphql.resolvers.assertion.AssertionRunEventResolver; import com.linkedin.datahub.graphql.resolvers.assertion.DeleteAssertionResolver; import com.linkedin.datahub.graphql.resolvers.assertion.EntityAssertionsResolver; -import com.linkedin.datahub.graphql.resolvers.auth.*; +import com.linkedin.datahub.graphql.resolvers.auth.CreateAccessTokenResolver; +import com.linkedin.datahub.graphql.resolvers.auth.DebugAccessResolver; +import com.linkedin.datahub.graphql.resolvers.auth.GetAccessTokenMetadataResolver; +import com.linkedin.datahub.graphql.resolvers.auth.GetAccessTokenResolver; +import com.linkedin.datahub.graphql.resolvers.auth.ListAccessTokensResolver; +import com.linkedin.datahub.graphql.resolvers.auth.RevokeAccessTokenResolver; import com.linkedin.datahub.graphql.resolvers.browse.BrowsePathsResolver; import com.linkedin.datahub.graphql.resolvers.browse.BrowseResolver; import com.linkedin.datahub.graphql.resolvers.browse.EntityBrowsePathsResolver; @@ -394,7 +400,6 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.concurrent.CompletableFuture; import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -814,6 +819,7 @@ public GraphQLEngine.Builder builder() { .addSchema(fileBasedSchema(PROPERTIES_SCHEMA_FILE)) .addSchema(fileBasedSchema(FORMS_SCHEMA_FILE)) .addSchema(fileBasedSchema(CONNECTIONS_SCHEMA_FILE)) + .addSchema(fileBasedSchema(ASSERTIONS_SCHEMA_FILE)) .addSchema(fileBasedSchema(INCIDENTS_SCHEMA_FILE)); for (GmsGraphQLPlugin plugin : this.graphQLPlugins) { @@ -2900,7 +2906,7 @@ private DataLoader> createDataLoader( DataLoaderOptions.newOptions().setBatchLoaderContextProvider(contextProvider); return DataLoader.newDataLoader( (keys, context) -> - CompletableFuture.supplyAsync( + GraphQLConcurrencyUtils.supplyAsync( () -> { try { log.debug( @@ -2919,7 +2925,9 @@ private DataLoader> createDataLoader( String.format("Failed to retrieve entities of type %s", graphType.name()), e); } - }), + }, + graphType.getClass().getSimpleName(), + "batchLoad"), loaderOptions); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GraphQLEngine.java index c72f82a8e1bf6a..dd8eabd3ce06fd 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GraphQLEngine.java @@ -3,6 +3,7 @@ import static graphql.schema.idl.RuntimeWiring.*; import com.linkedin.datahub.graphql.exception.DataHubDataFetcherExceptionHandler; +import com.linkedin.datahub.graphql.instrumentation.DataHubFieldComplexityCalculator; import graphql.ExecutionInput; import graphql.ExecutionResult; import graphql.GraphQL; @@ -80,7 +81,9 @@ private GraphQLEngine( List instrumentations = new ArrayList<>(3); instrumentations.add(new TracingInstrumentation()); instrumentations.add(new MaxQueryDepthInstrumentation(graphQLQueryDepthLimit)); - instrumentations.add(new MaxQueryComplexityInstrumentation(graphQLQueryComplexityLimit)); + instrumentations.add( + new MaxQueryComplexityInstrumentation( + graphQLQueryComplexityLimit, new DataHubFieldComplexityCalculator())); ChainedInstrumentation chainedInstrumentation = new ChainedInstrumentation(instrumentations); _graphQL = new GraphQL.Builder(graphQLSchema) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/SubTypesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/SubTypesResolver.java index de0ff137e1de46..b0422ed4bde6a3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/SubTypesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/SubTypesResolver.java @@ -2,6 +2,7 @@ import com.linkedin.common.SubTypes; import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.client.EntityClient; @@ -26,7 +27,7 @@ public class SubTypesResolver implements DataFetcher @Override @Nullable public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QueryContext context = environment.getContext(); SubTypes subType = null; @@ -50,6 +51,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) thro "Failed to fetch aspect " + _aspectName + " for urn " + urnStr + " ", e); } return subType; - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/WeaklyTypedAspectsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/WeaklyTypedAspectsResolver.java index 0dc17bc02ea100..b6599c38e6f425 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/WeaklyTypedAspectsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/WeaklyTypedAspectsResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.codec.JacksonDataCodec; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AspectParams; import com.linkedin.datahub.graphql.generated.AspectRenderSpec; import com.linkedin.datahub.graphql.generated.Entity; @@ -48,7 +49,7 @@ private boolean shouldReturnAspect(AspectSpec aspectSpec, AspectParams params) { @Override public CompletableFuture> get(DataFetchingEnvironment environment) throws Exception { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { List results = new ArrayList<>(); @@ -111,6 +112,8 @@ public CompletableFuture> get(DataFetchingEnvironment environmen } }); return results; - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/concurrency/GraphQLConcurrencyUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/concurrency/GraphQLConcurrencyUtils.java new file mode 100644 index 00000000000000..25ed4face564dc --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/concurrency/GraphQLConcurrencyUtils.java @@ -0,0 +1,35 @@ +package com.linkedin.datahub.graphql.concurrency; + +import com.codahale.metrics.MetricRegistry; +import com.linkedin.metadata.utils.metrics.MetricUtils; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.function.Supplier; + +public class GraphQLConcurrencyUtils { + private GraphQLConcurrencyUtils() {} + + private static ExecutorService graphQLExecutorService = null; + + public static ExecutorService getExecutorService() { + return GraphQLConcurrencyUtils.graphQLExecutorService; + } + + public static void setExecutorService(ExecutorService executorService) { + GraphQLConcurrencyUtils.graphQLExecutorService = executorService; + } + + public static CompletableFuture supplyAsync( + Supplier supplier, String caller, String task) { + MetricUtils.counter( + MetricRegistry.name( + GraphQLConcurrencyUtils.class.getSimpleName(), "supplyAsync", caller, task)) + .inc(); + if (GraphQLConcurrencyUtils.graphQLExecutorService == null) { + return CompletableFuture.supplyAsync(supplier); + } else { + return CompletableFuture.supplyAsync( + supplier, GraphQLConcurrencyUtils.graphQLExecutorService); + } + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/concurrency/GraphQLWorkerPoolThreadFactory.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/concurrency/GraphQLWorkerPoolThreadFactory.java new file mode 100644 index 00000000000000..bae492b85920b3 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/concurrency/GraphQLWorkerPoolThreadFactory.java @@ -0,0 +1,29 @@ +package com.linkedin.datahub.graphql.concurrency; + +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.atomic.AtomicLong; + +public class GraphQLWorkerPoolThreadFactory implements ThreadFactory { + + private static final AtomicLong THREAD_INIT_NUMBER = new AtomicLong(); + public static final String GRAPHQL_THREAD_POOL_GROUP_NAME = "graphQLThreadGroup"; + public static final ThreadGroup GRAPHQL_THREAD_POOL_GROUP = + new ThreadGroup(GRAPHQL_THREAD_POOL_GROUP_NAME); + + private static long nextThreadNum() { + return THREAD_INIT_NUMBER.getAndIncrement(); + } + + private long stackSize; + + public GraphQLWorkerPoolThreadFactory(long stackSize) { + this.stackSize = stackSize; + } + + @Override + public final Thread newThread(Runnable runnable) { + + return new Thread( + GRAPHQL_THREAD_POOL_GROUP, runnable, "GraphQLWorkerThread-" + nextThreadNum(), stackSize); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/instrumentation/DataHubFieldComplexityCalculator.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/instrumentation/DataHubFieldComplexityCalculator.java new file mode 100644 index 00000000000000..300f04cdaa0909 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/instrumentation/DataHubFieldComplexityCalculator.java @@ -0,0 +1,74 @@ +package com.linkedin.datahub.graphql.instrumentation; + +import graphql.analysis.FieldComplexityCalculator; +import graphql.analysis.FieldComplexityEnvironment; +import graphql.language.Field; +import graphql.language.FragmentSpread; +import graphql.language.Selection; +import graphql.language.SelectionSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class DataHubFieldComplexityCalculator implements FieldComplexityCalculator { + + private static final String COUNT_ARG = "count"; + private static final String INPUT_ARG = "input"; + private static final String SEARCH_RESULTS_FIELD = "searchResults"; + private static final String ENTITY_FIELD = "entity"; + private static final String SEARCH_RESULT_FIELDS_FIELD = "searchResultFields"; + private static final String GRAPHQL_QUERY_TYPE = "Query"; + + @SuppressWarnings("rawtypes") + @Override + public int calculate(FieldComplexityEnvironment environment, int childComplexity) { + int complexity = 1; + Map args = environment.getArguments(); + if (args.containsKey(INPUT_ARG)) { + Map input = (Map) args.get(INPUT_ARG); + if (input.containsKey(COUNT_ARG) && (Integer) input.get(COUNT_ARG) > 1) { + Integer count = (Integer) input.get(COUNT_ARG); + Field field = environment.getField(); + complexity += countRecursiveLineageComplexity(count, field); + } + } + if (GRAPHQL_QUERY_TYPE.equals(environment.getParentType().getName())) { + log.info( + "Query complexity for query: {} is {}", + environment.getField().getName(), + complexity + childComplexity); + } + return complexity + childComplexity; + } + + private int countRecursiveLineageComplexity(Integer count, Field field) { + List subFields = field.getSelectionSet().getSelections(); + Optional searchResultsFieldsField = + subFields.stream() + .filter(selection -> selection instanceof Field) + .map(selection -> (Field) selection) + .filter(subField -> SEARCH_RESULTS_FIELD.equals(subField.getName())) + .map(Field::getSelectionSet) + .map(SelectionSet::getSelections) + .flatMap(List::stream) + .filter(selection -> selection instanceof Field) + .map(selection -> (Field) selection) + .filter(subField -> ENTITY_FIELD.equals(subField.getName())) + .map(Field::getSelectionSet) + .map(SelectionSet::getSelections) + .flatMap(List::stream) + .filter(selection -> selection instanceof FragmentSpread) + .map(selection -> (FragmentSpread) selection) + .filter(subField -> SEARCH_RESULT_FIELDS_FIELD.equals(subField.getName())) + .findFirst(); + if (searchResultsFieldsField.isPresent()) { + // This fragment includes 2 lineage queries, we account for this additional complexity by + // multiplying + // by the count of entities attempting to be returned + return 2 * count; + } + return 0; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java index f7dbb73d148423..1f4ebbb88bf1a6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java @@ -11,6 +11,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; import com.linkedin.datahub.graphql.generated.AuthenticatedUser; import com.linkedin.datahub.graphql.generated.CorpUser; @@ -49,7 +50,7 @@ public MeResolver(final EntityClient entityClient, final FeatureFlags featureFla @Override public CompletableFuture get(DataFetchingEnvironment environment) { final QueryContext context = environment.getContext(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { // 1. Get currently logged in user profile. @@ -100,7 +101,9 @@ public CompletableFuture get(DataFetchingEnvironment environm } catch (URISyntaxException | RemoteInvocationException e) { throw new RuntimeException("Failed to fetch authenticated user!", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } /** Returns true if the authenticated user has privileges to view analytics. */ diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java index b96191202087f3..18f8ad85668d8d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java @@ -4,6 +4,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Assertion; import com.linkedin.datahub.graphql.generated.AssertionResultType; import com.linkedin.datahub.graphql.generated.AssertionRunEvent; @@ -40,7 +41,7 @@ public AssertionRunEventResolver(final EntityClient client) { @Override public CompletableFuture get(DataFetchingEnvironment environment) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QueryContext context = environment.getContext(); @@ -97,12 +98,24 @@ public CompletableFuture get(DataFetchingEnvironment e && AssertionResultType.SUCCESS.equals( runEvent.getResult().getType())) .count())); + result.setErrored( + Math.toIntExact( + runEvents.stream() + .filter( + runEvent -> + AssertionRunStatus.COMPLETE.equals(runEvent.getStatus()) + && runEvent.getResult() != null + && AssertionResultType.ERROR.equals( + runEvent.getResult().getType())) + .count())); result.setRunEvents(runEvents); return result; } catch (RemoteInvocationException e) { throw new RuntimeException("Failed to retrieve Assertion Run Events from GMS", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } @Nullable diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolver.java index d3a545b5adf033..1cf233221d4d33 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolver.java @@ -9,6 +9,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; @@ -38,7 +39,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); final Urn assertionUrn = Urn.createFromString(environment.getArgument("urn")); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { // 1. check the entity exists. If not, return false. @@ -75,7 +76,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } /** Determine whether the current user is allowed to remove an assertion. */ diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/EntityAssertionsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/EntityAssertionsResolver.java index 528b9bf46ded38..a56d1cdd2a02b0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/EntityAssertionsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/EntityAssertionsResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Assertion; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.EntityAssertionsResult; @@ -45,7 +46,7 @@ public EntityAssertionsResolver(final EntityClient entityClient, final GraphClie @Override public CompletableFuture get(DataFetchingEnvironment environment) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QueryContext context = environment.getContext(); @@ -102,7 +103,9 @@ public CompletableFuture get(DataFetchingEnvironment env } catch (URISyntaxException | RemoteInvocationException e) { throw new RuntimeException("Failed to retrieve Assertion Run Events from GMS", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private boolean assertionExists( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/CreateAccessTokenResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/CreateAccessTokenResolver.java index 14a1b9a1f7a017..e17e3cb6fb64a2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/CreateAccessTokenResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/CreateAccessTokenResolver.java @@ -10,6 +10,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AccessToken; import com.linkedin.datahub.graphql.generated.AccessTokenMetadata; @@ -37,7 +38,7 @@ public CreateAccessTokenResolver(final StatefulTokenService statefulTokenService @Override public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QueryContext context = environment.getContext(); final CreateAccessTokenInput input = @@ -97,7 +98,9 @@ public CompletableFuture get(final DataFetchingEnvironment environm } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } private boolean isAuthorizedToGenerateToken( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java index c56be893858353..44604e92c35ded 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java @@ -9,6 +9,7 @@ import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.DebugAccessResult; import com.linkedin.entity.EntityResponse; @@ -54,7 +55,7 @@ public DebugAccessResolver(EntityClient entityClient, GraphClient graphClient) { @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QueryContext context = environment.getContext(); @@ -65,7 +66,9 @@ public CompletableFuture get(DataFetchingEnvironment environm final String userUrn = environment.getArgument("userUrn"); return populateDebugAccessResult(userUrn, context); - }); + }, + this.getClass().getSimpleName(), + "get"); } public DebugAccessResult populateDebugAccessResult(String userUrn, QueryContext context) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/GetAccessTokenMetadataResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/GetAccessTokenMetadataResolver.java index c3e14565e0e27f..186dfe658c2cfd 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/GetAccessTokenMetadataResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/GetAccessTokenMetadataResolver.java @@ -4,6 +4,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AccessTokenMetadata; import com.linkedin.datahub.graphql.types.auth.AccessTokenMetadataType; @@ -31,7 +32,7 @@ public GetAccessTokenMetadataResolver( @Override public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QueryContext context = environment.getContext(); final String token = environment.getArgument("token"); @@ -54,6 +55,8 @@ public CompletableFuture get(final DataFetchingEnvironment } catch (Exception e) { throw new RuntimeException(e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/GetAccessTokenResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/GetAccessTokenResolver.java index aed6bd6cb98af0..4594fa5f891868 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/GetAccessTokenResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/GetAccessTokenResolver.java @@ -9,6 +9,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AccessToken; import com.linkedin.datahub.graphql.generated.AccessTokenType; @@ -33,7 +34,7 @@ public GetAccessTokenResolver(final StatelessTokenService tokenService) { @Override public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QueryContext context = environment.getContext(); final GetAccessTokenInput input = @@ -57,7 +58,9 @@ public CompletableFuture get(final DataFetchingEnvironment environm } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } private boolean isAuthorizedToGenerateToken( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java index 83789ec488e64c..eaac2aedef03a7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AccessTokenMetadata; import com.linkedin.datahub.graphql.generated.EntityType; @@ -40,7 +41,7 @@ public ListAccessTokensResolver(final EntityClient entityClient) { @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QueryContext context = environment.getContext(); final ListAccessTokenInput input = @@ -98,7 +99,9 @@ public CompletableFuture get(DataFetchingEnvironment envi } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } /** diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java index 79c8f4c15fe8cc..53ae6d4509e7df 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java @@ -9,6 +9,7 @@ import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.client.EntityClient; @@ -35,7 +36,7 @@ public RevokeAccessTokenResolver( @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QueryContext context = environment.getContext(); final String tokenId = bindArgument(environment.getArgument("tokenId"), String.class); @@ -52,7 +53,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } private boolean isAuthorizedToRevokeToken(final QueryContext context, final String tokenId) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/browse/BrowsePathsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/browse/BrowsePathsResolver.java index 40c91b43850f76..a8636dc2880829 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/browse/BrowsePathsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/browse/BrowsePathsResolver.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.BrowsePath; import com.linkedin.datahub.graphql.generated.BrowsePathsInput; import com.linkedin.datahub.graphql.generated.EntityType; @@ -35,7 +36,7 @@ public CompletableFuture> get(DataFetchingEnvironment environme final BrowsePathsInput input = bindArgument(environment.getArgument("input"), BrowsePathsInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _logger.debug( @@ -60,6 +61,8 @@ public CompletableFuture> get(DataFetchingEnvironment environme + String.format("entity type %s, urn %s", input.getType(), input.getUrn()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseResolver.java index 287d0eef8aec8e..619e950bd106ff 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseResolver.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.BrowseInput; import com.linkedin.datahub.graphql.generated.BrowseResults; import com.linkedin.datahub.graphql.generated.EntityType; @@ -38,7 +39,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) final int start = input.getStart() != null ? input.getStart() : DEFAULT_START; final int count = input.getCount() != null ? input.getCount() : DEFAULT_COUNT; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _logger.debug( @@ -64,6 +65,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) input.getType(), input.getPath(), input.getFilters(), start, count), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/browse/EntityBrowsePathsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/browse/EntityBrowsePathsResolver.java index 396d91c37d81c6..54faa567723366 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/browse/EntityBrowsePathsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/browse/EntityBrowsePathsResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.browse; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.BrowsePath; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.types.BrowsableEntityType; @@ -24,7 +25,7 @@ public CompletableFuture> get(DataFetchingEnvironment environme final QueryContext context = environment.getContext(); final String urn = ((Entity) environment.getSource()).getUrn(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { return _browsableType.browsePaths(urn, context); @@ -32,6 +33,8 @@ public CompletableFuture> get(DataFetchingEnvironment environme throw new RuntimeException( String.format("Failed to retrieve browse paths for entity with urn %s", urn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/AddBusinessAttributeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/AddBusinessAttributeResolver.java index 4ff2e64fad0a4e..54812d3442c9c1 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/AddBusinessAttributeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/AddBusinessAttributeResolver.java @@ -10,6 +10,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AddBusinessAttributeInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.metadata.entity.EntityService; @@ -39,7 +40,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final Urn businessAttributeUrn = UrnUtils.getUrn(input.getBusinessAttributeUrn()); final List resourceRefInputs = input.getResourceUrn(); validateBusinessAttribute(context.getOperationContext(), businessAttributeUrn); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { addBusinessAttributeToResource( @@ -60,7 +61,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw businessAttributeUrn, resourceRefInputs), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private void validateBusinessAttribute( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/CreateBusinessAttributeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/CreateBusinessAttributeResolver.java index 38397a535fb8f0..d9cb668cc051d8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/CreateBusinessAttributeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/CreateBusinessAttributeResolver.java @@ -12,6 +12,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -51,7 +52,7 @@ public CompletableFuture get(DataFetchingEnvironment environm throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final BusinessAttributeKey businessAttributeKey = new BusinessAttributeKey(); @@ -106,7 +107,9 @@ public CompletableFuture get(DataFetchingEnvironment environm String.format("Failed to create Business Attribute with name: %s", input.getName()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private BusinessAttributeInfo mapBusinessAttributeInfo( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/DeleteBusinessAttributeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/DeleteBusinessAttributeResolver.java index b11cad412cf6c4..bec37731a4ca03 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/DeleteBusinessAttributeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/DeleteBusinessAttributeResolver.java @@ -3,6 +3,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.entity.client.EntityClient; import graphql.schema.DataFetcher; @@ -29,7 +30,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("This urn does not exist: %s", businessAttributeUrn)); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _entityClient.deleteEntity(context.getOperationContext(), businessAttributeUrn); @@ -53,6 +54,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Failed to delete Business Attribute with urn %s", businessAttributeUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/ListBusinessAttributesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/ListBusinessAttributesResolver.java index 00ea5975d260e1..ebcdbe5e064942 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/ListBusinessAttributesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/ListBusinessAttributesResolver.java @@ -4,6 +4,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.BusinessAttribute; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.ListBusinessAttributesInput; @@ -46,7 +47,7 @@ public CompletableFuture get( final ListBusinessAttributesInput input = bindArgument(environment.getArgument("input"), ListBusinessAttributesInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final Integer start = input.getStart() == null ? DEFAULT_START : input.getStart(); final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); @@ -76,7 +77,9 @@ public CompletableFuture get( } catch (Exception e) { throw new RuntimeException("Failed to list Business Attributes", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private List mapUnresolvedBusinessAttributes(final List entityUrns) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/RemoveBusinessAttributeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/RemoveBusinessAttributeResolver.java index 369b1ae3d5bad3..d85282c921dffc 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/RemoveBusinessAttributeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/RemoveBusinessAttributeResolver.java @@ -8,6 +8,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AddBusinessAttributeInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.metadata.entity.EntityService; @@ -36,7 +37,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final Urn businessAttributeUrn = UrnUtils.getUrn(input.getBusinessAttributeUrn()); final List resourceRefInputs = input.getResourceUrn(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { removeBusinessAttribute( @@ -55,7 +56,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw businessAttributeUrn, resourceRefInputs), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private void removeBusinessAttribute( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/UpdateBusinessAttributeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/UpdateBusinessAttributeResolver.java index 90d2f58d923004..16120a27261a48 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/UpdateBusinessAttributeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/UpdateBusinessAttributeResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -52,7 +53,7 @@ public CompletableFuture get(DataFetchingEnvironment environm throw new RuntimeException( String.format("This urn does not exist: %s", businessAttributeUrn)); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { Urn updatedBusinessAttributeUrn = @@ -69,7 +70,9 @@ public CompletableFuture get(DataFetchingEnvironment environm "Failed to update Business Attribute with urn %s", businessAttributeUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private Urn updateBusinessAttribute( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java index d698e21c799a9b..18ee5f595ce582 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java @@ -7,6 +7,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.BrowseResultGroupV2; import com.linkedin.datahub.graphql.generated.BrowseResultMetadata; import com.linkedin.datahub.graphql.generated.BrowseResultsV2; @@ -58,7 +59,7 @@ public CompletableFuture get(DataFetchingEnvironment environmen // escape forward slash since it is a reserved character in Elasticsearch final String sanitizedQuery = ResolverUtils.escapeForwardSlash(query); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final DataHubViewInfo maybeResolvedView = @@ -91,7 +92,9 @@ public CompletableFuture get(DataFetchingEnvironment environmen } catch (Exception e) { throw new RuntimeException("Failed to execute browse V2", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } public static List getEntityNames(BrowseV2Input input) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java index f9cb75052dcc44..15927eef236cab 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java @@ -4,6 +4,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Container; import com.linkedin.datahub.graphql.generated.ContainerEntitiesInput; import com.linkedin.datahub.graphql.generated.SearchResults; @@ -67,7 +68,7 @@ public CompletableFuture get(final DataFetchingEnvironment enviro final int start = input.getStart() != null ? input.getStart() : 0; final int count = input.getCount() != null ? input.getCount() : 20; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { @@ -100,6 +101,8 @@ public CompletableFuture get(final DataFetchingEnvironment enviro "Failed to resolve entities associated with container with urn %s", urn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ParentContainersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ParentContainersResolver.java index ae31b2d4b02a55..e7c0f6bb0729a8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ParentContainersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ParentContainersResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; import com.linkedin.datahub.graphql.generated.Container; import com.linkedin.datahub.graphql.generated.Entity; @@ -65,7 +66,7 @@ public CompletableFuture get(DataFetchingEnvironment env final String urn = ((Entity) environment.getSource()).getUrn(); final List containers = new ArrayList<>(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { aggregateParentContainers(containers, urn, context); @@ -79,6 +80,8 @@ public CompletableFuture get(DataFetchingEnvironment env } catch (DataHubGraphQLException e) { throw new RuntimeException("Failed to load all containers", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryResolver.java index 2e5fc0d6c156d6..46d5add9d3f99e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.CorpUser; import com.linkedin.datahub.graphql.generated.DashboardStatsSummary; import com.linkedin.datahub.graphql.generated.DashboardUsageMetrics; @@ -42,7 +43,7 @@ public CompletableFuture get(DataFetchingEnvironment envi final Urn resourceUrn = UrnUtils.getUrn(((Entity) environment.getSource()).getUrn()); final QueryContext context = environment.getContext(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { @@ -85,7 +86,9 @@ public CompletableFuture get(DataFetchingEnvironment envi e); return null; // Do not throw when loading usage summary fails. } - }); + }, + this.getClass().getSimpleName(), + "get"); } private int getDashboardViewCount(@Nullable QueryContext context, final Urn resourceUrn) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsResolver.java index c143f6065afe9c..7e4a9c8a803880 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsResolver.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.DashboardUsageAggregation; import com.linkedin.datahub.graphql.generated.DashboardUsageMetrics; import com.linkedin.datahub.graphql.generated.DashboardUsageQueryResult; @@ -55,7 +56,7 @@ public CompletableFuture get(DataFetchingEnvironment // Max number of aspects to return for absolute dashboard usage. final Integer maybeLimit = environment.getArgumentOrDefault("limit", null); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { DashboardUsageQueryResult usageQueryResult = new DashboardUsageQueryResult(); @@ -84,7 +85,9 @@ public CompletableFuture get(DataFetchingEnvironment context, dashboardUrn, maybeStartTimeMillis, maybeEndTimeMillis, maybeLimit); usageQueryResult.setMetrics(dashboardUsageMetrics); return usageQueryResult; - }); + }, + this.getClass().getSimpleName(), + "get"); } private List getDashboardUsageMetrics( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/BatchSetDataProductResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/BatchSetDataProductResolver.java index 522dc7d0f0a32d..f128b9d27f997d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/BatchSetDataProductResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/BatchSetDataProductResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.BatchSetDataProductInput; import com.linkedin.metadata.service.DataProductService; @@ -31,7 +32,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final String maybeDataProductUrn = input.getDataProductUrn(); final List resources = input.getResourceUrns(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { verifyResources(resources, context); verifyDataProduct(maybeDataProductUrn, context); @@ -51,7 +52,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private void verifyResources(List resources, QueryContext context) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/CreateDataProductResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/CreateDataProductResolver.java index 77dc2732e6693e..470267264f12f2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/CreateDataProductResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/CreateDataProductResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateDataProductInput; import com.linkedin.datahub.graphql.generated.DataProduct; @@ -34,7 +35,7 @@ public CompletableFuture get(final DataFetchingEnvironment environm final Authentication authentication = context.getAuthentication(); final Urn domainUrn = UrnUtils.getUrn(input.getDomainUrn()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!_dataProductService.verifyEntityExists(context.getOperationContext(), domainUrn)) { throw new IllegalArgumentException("The Domain provided dos not exist"); @@ -68,6 +69,8 @@ public CompletableFuture get(final DataFetchingEnvironment environm throw new RuntimeException( String.format("Failed to create a new DataProduct from input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/DeleteDataProductResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/DeleteDataProductResolver.java index 22e3a4c498f7e8..25c4529abf3ce7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/DeleteDataProductResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/DeleteDataProductResolver.java @@ -4,6 +4,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.domain.Domains; import com.linkedin.metadata.service.DataProductService; @@ -26,7 +27,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final Urn dataProductUrn = UrnUtils.getUrn(environment.getArgument("urn")); final Authentication authentication = context.getAuthentication(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!_dataProductService.verifyEntityExists( context.getOperationContext(), dataProductUrn)) { @@ -52,6 +53,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } catch (Exception e) { throw new RuntimeException("Failed to delete Data Product", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java index 457e99487ae50e..041de9f58db231 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java @@ -8,6 +8,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.DataProduct; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.SearchAcrossEntitiesInput; @@ -116,7 +117,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) final int start = input.getStart() != null ? input.getStart() : DEFAULT_START; final int count = input.getCount() != null ? input.getCount() : DEFAULT_COUNT; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { // if no assets in data product properties, exit early before search and return empty // results @@ -178,6 +179,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) input.getTypes(), input.getQuery(), input.getOrFilters(), start, count), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/UpdateDataProductResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/UpdateDataProductResolver.java index e42ea88d9e3d2d..e49cdcfe94eb49 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/UpdateDataProductResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/UpdateDataProductResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.DataProduct; import com.linkedin.datahub.graphql.generated.UpdateDataProductInput; @@ -35,7 +36,7 @@ public CompletableFuture get(final DataFetchingEnvironment environm final Urn dataProductUrn = UrnUtils.getUrn(environment.getArgument("urn")); final Authentication authentication = context.getAuthentication(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!_dataProductService.verifyEntityExists( context.getOperationContext(), dataProductUrn)) { @@ -75,6 +76,8 @@ public CompletableFuture get(final DataFetchingEnvironment environm throw new RuntimeException( String.format("Failed to update DataProduct with urn %s", dataProductUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetHealthResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetHealthResolver.java index 1746c03f3dcf11..f38cf80f36ceb9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetHealthResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetHealthResolver.java @@ -7,6 +7,7 @@ import com.linkedin.data.template.StringArray; import com.linkedin.data.template.StringArrayArray; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Dataset; import com.linkedin.datahub.graphql.generated.Health; import com.linkedin.datahub.graphql.generated.HealthStatus; @@ -79,7 +80,7 @@ public DatasetHealthResolver( public CompletableFuture> get(final DataFetchingEnvironment environment) throws Exception { final Dataset parent = environment.getSource(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final CachedHealth cachedStatus = @@ -91,7 +92,9 @@ public CompletableFuture> get(final DataFetchingEnvironment environ } catch (Exception e) { throw new RuntimeException("Failed to resolve dataset's health status.", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } /** diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java index 298308c778015f..7d3603ec050e94 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java @@ -4,6 +4,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.CorpUser; import com.linkedin.datahub.graphql.generated.DatasetStatsSummary; import com.linkedin.datahub.graphql.generated.Entity; @@ -41,7 +42,7 @@ public CompletableFuture get(DataFetchingEnvironment enviro final QueryContext context = environment.getContext(); final Urn resourceUrn = UrnUtils.getUrn(((Entity) environment.getSource()).getUrn()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { if (!AuthorizationUtils.isViewDatasetUsageAuthorized(context, resourceUrn)) { @@ -80,7 +81,9 @@ public CompletableFuture get(DataFetchingEnvironment enviro e); return null; // Do not throw when loading usage summary fails. } - }); + }, + this.getClass().getSimpleName(), + "get"); } private List trimUsers(final List originalUsers) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java index 53d392baf4eb09..bed866db0fb3dc 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetUsageStatsResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.UsageQueryResult; import com.linkedin.datahub.graphql.types.usage.UsageQueryResultMapper; @@ -32,7 +33,7 @@ public CompletableFuture get(DataFetchingEnvironment environme final Urn resourceUrn = UrnUtils.getUrn(((Entity) environment.getSource()).getUrn()); final UsageTimeRange range = UsageTimeRange.valueOf(environment.getArgument("range")); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!isViewDatasetUsageAuthorized(context, resourceUrn)) { log.debug( @@ -52,6 +53,8 @@ public CompletableFuture get(DataFetchingEnvironment environme } return UsageQueryResultMapper.EMPTY; - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/IsAssignedToMeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/IsAssignedToMeResolver.java index e5781ec781c4e3..16a321d64f74eb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/IsAssignedToMeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/IsAssignedToMeResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.dataset; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.CorpUser; import com.linkedin.datahub.graphql.generated.Role; import com.linkedin.datahub.graphql.generated.RoleUser; @@ -20,7 +21,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); final Role role = environment.getSource(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final Set assignedUserUrns = @@ -35,6 +36,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new RuntimeException( "Failed to determine if current user is assigned to Role", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolver.java index d9d8b37cb304b0..c568ff6db3a27d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolver.java @@ -12,6 +12,7 @@ import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.UpdateDeprecationInput; import com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils; @@ -50,7 +51,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw bindArgument(environment.getArgument("input"), UpdateDeprecationInput.class); final Urn entityUrn = Urn.createFromString(input.getUrn()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!isAuthorizedToUpdateDeprecationForEntity(context, entityUrn)) { throw new AuthorizationException( @@ -84,7 +85,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Failed to update Deprecation for resource with entity urn %s", entityUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private boolean isAuthorizedToUpdateDeprecationForEntity( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java index dbfe8d22709c62..ec2b0346288268 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java @@ -10,6 +10,7 @@ import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -52,7 +53,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws final Urn parentDomain = input.getParentDomain() != null ? UrnUtils.getUrn(input.getParentDomain()) : null; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!AuthorizationUtils.canCreateDomains(context)) { throw new AuthorizationException( @@ -115,7 +116,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws input.getId(), input.getName()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private DomainProperties mapDomainProperties( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolver.java index fb0d75272dc3ef..eddb21303a7ee2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolver.java @@ -3,6 +3,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils; import com.linkedin.entity.client.EntityClient; @@ -27,7 +28,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final QueryContext context = environment.getContext(); final String domainUrn = environment.getArgument("urn"); final Urn urn = Urn.createFromString(domainUrn); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (AuthorizationUtils.canManageDomains(context) || AuthorizationUtils.canDeleteEntity(urn, context)) { @@ -65,6 +66,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java index dabce90aea21f4..9ca5de86034daa 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java @@ -4,6 +4,7 @@ import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.*; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.DomainEntitiesInput; import com.linkedin.datahub.graphql.generated.SearchResults; @@ -61,7 +62,7 @@ public CompletableFuture get(final DataFetchingEnvironment enviro final int start = input.getStart() != null ? input.getStart() : DEFAULT_START; final int count = input.getCount() != null ? input.getCount() : DEFAULT_COUNT; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { @@ -103,6 +104,8 @@ public CompletableFuture get(final DataFetchingEnvironment enviro String.format("Failed to resolve entities associated with Domain with urn %s", urn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java index fe4a7f23cfaab6..0c16470c642b71 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.ListDomainsInput; @@ -46,7 +47,7 @@ public CompletableFuture get(final DataFetchingEnvironment en final QueryContext context = environment.getContext(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final ListDomainsInput input = bindArgument(environment.getArgument("input"), ListDomainsInput.class); @@ -85,7 +86,9 @@ public CompletableFuture get(final DataFetchingEnvironment en } catch (Exception e) { throw new RuntimeException("Failed to list domains", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } // This method maps urns returned from the list endpoint into Partial Domain objects which will be diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ParentDomainsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ParentDomainsResolver.java index 3478cffb032b20..d02bcedd0eb1ac 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ParentDomainsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ParentDomainsResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.ParentDomainsResult; import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils; @@ -39,7 +40,7 @@ public CompletableFuture get(DataFetchingEnvironment enviro String.format("Failed to resolve parents for entity type %s", urn)); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { Entity parentDomain = DomainUtils.getParentDomain(urn, context, _entityClient); @@ -71,6 +72,8 @@ public CompletableFuture get(DataFetchingEnvironment enviro throw new RuntimeException( String.format("Failed to load parent domains for entity %s", urn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolver.java index c4b7fe3224e539..6ada447ca59ee8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils; import com.linkedin.domain.Domains; @@ -40,7 +41,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final Urn entityUrn = Urn.createFromString(environment.getArgument("entityUrn")); final Urn domainUrn = Urn.createFromString(environment.getArgument("domainUrn")); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DomainUtils.isAuthorizedToUpdateDomainsForEntity( environment.getContext(), entityUrn)) { @@ -77,7 +78,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw entityUrn, domainUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } public static Boolean validateSetDomainInput( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolver.java index 0b5e3dad3b4c70..783cf250a7ca61 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils; import com.linkedin.domain.Domains; @@ -39,7 +40,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final QueryContext context = environment.getContext(); final Urn entityUrn = Urn.createFromString(environment.getArgument("entityUrn")); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DomainUtils.isAuthorizedToUpdateDomainsForEntity( environment.getContext(), entityUrn)) { @@ -73,7 +74,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw String.format("Failed to unset Domains for resource with entity urn %s", entityUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } public static Boolean validateUnsetDomainInput( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/embed/UpdateEmbedResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/embed/UpdateEmbedResolver.java index 572e652555e6af..2a8944456d0c8e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/embed/UpdateEmbedResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/embed/UpdateEmbedResolver.java @@ -10,6 +10,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.UpdateEmbedInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.EmbedUtils; @@ -39,7 +40,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw bindArgument(environment.getArgument("input"), UpdateEmbedInput.class); final Urn entityUrn = UrnUtils.getUrn(input.getUrn()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!EmbedUtils.isAuthorizedToUpdateEmbedForEntity(entityUrn, environment.getContext())) { throw new AuthorizationException( @@ -74,7 +75,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Failed to update Embed for to resource with entity urn %s", entityUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } /** diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityExistsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityExistsResolver.java index 8ea41463010387..57570b431dd712 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityExistsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityExistsResolver.java @@ -4,6 +4,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; @@ -32,7 +33,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) Objects.requireNonNull(entityUrnString, "Entity urn must not be null!"); final Urn entityUrn = Urn.createFromString(entityUrnString); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { return _entityService @@ -42,6 +43,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new RuntimeException( String.format("Failed to check whether entity %s exists", entityUrn.toString())); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityPrivilegesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityPrivilegesResolver.java index 82a9b6a939e6d2..b25f5598b44bc0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityPrivilegesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityPrivilegesResolver.java @@ -9,6 +9,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.EntityPrivileges; import com.linkedin.datahub.graphql.resolvers.mutate.util.EmbedUtils; @@ -37,7 +38,7 @@ public CompletableFuture get(DataFetchingEnvironment environme final String urnString = ((Entity) environment.getSource()).getUrn(); final Urn urn = UrnUtils.getUrn(urnString); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { switch (urn.getEntityType()) { case Constants.GLOSSARY_TERM_ENTITY_NAME: @@ -60,7 +61,9 @@ public CompletableFuture get(DataFetchingEnvironment environme addCommonPrivileges(commonPrivileges, urn, context); return commonPrivileges; } - }); + }, + this.getClass().getSimpleName(), + "get"); } private EntityPrivileges getGlossaryTermPrivileges(Urn termUrn, QueryContext context) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/BatchAssignFormResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/BatchAssignFormResolver.java index 7eb27cf1edf039..03282934293ea5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/BatchAssignFormResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/BatchAssignFormResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.BatchAssignFormInput; import com.linkedin.metadata.service.FormService; import graphql.schema.DataFetcher; @@ -35,7 +36,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final List entityUrns = input.getEntityUrns(); final Authentication authentication = context.getAuthentication(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _formService.batchAssignFormToEntities( @@ -47,6 +48,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new RuntimeException( String.format("Failed to perform update against input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/BatchRemoveFormResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/BatchRemoveFormResolver.java index eb212b5ed0ea88..ac2a5fc1ec0ea6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/BatchRemoveFormResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/BatchRemoveFormResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.BatchAssignFormInput; import com.linkedin.metadata.service.FormService; import graphql.schema.DataFetcher; @@ -37,7 +38,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) // TODO: (PRD-1062) Add permission check once permission exists - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _formService.batchUnassignFormForEntities( @@ -49,6 +50,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new RuntimeException( String.format("Failed to perform update against input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java index 5f5d700732a3ab..b9d74f8af660e8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.CreateDynamicFormAssignmentInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.FormUtils; import com.linkedin.form.DynamicFormAssignment; @@ -34,7 +35,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final Urn formUrn = UrnUtils.getUrn(input.getFormUrn()); final DynamicFormAssignment formAssignment = FormUtils.mapDynamicFormAssignment(input); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _formService.createDynamicFormAssignment( @@ -44,6 +45,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new RuntimeException( String.format("Failed to perform update against input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/IsFormAssignedToMeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/IsFormAssignedToMeResolver.java index 9a90eee7f1bf29..29e5cd55f15e24 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/IsFormAssignedToMeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/IsFormAssignedToMeResolver.java @@ -3,6 +3,7 @@ import com.datahub.authentication.group.GroupService; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.CorpGroup; import com.linkedin.datahub.graphql.generated.CorpUser; import com.linkedin.datahub.graphql.generated.FormActorAssignment; @@ -31,7 +32,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) { final QueryContext context = environment.getContext(); final FormActorAssignment parent = environment.getSource(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { @@ -75,6 +76,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) { // Else the user is not directly assigned. return false; - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/SubmitFormPromptResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/SubmitFormPromptResolver.java index 63094ca9820d3e..9528bb125473e7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/SubmitFormPromptResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/SubmitFormPromptResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.FormPromptType; import com.linkedin.datahub.graphql.generated.SubmitFormPromptInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.FormUtils; @@ -36,7 +37,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final Urn formUrn = UrnUtils.getUrn(input.getFormUrn()); final String fieldPath = input.getFieldPath(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { if (input.getType().equals(FormPromptType.STRUCTURED_PROPERTY)) { @@ -84,6 +85,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new RuntimeException( String.format("Failed to perform update against input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/VerifyFormResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/VerifyFormResolver.java index 82efa476457eb2..76103d77fb32c5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/VerifyFormResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/VerifyFormResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.VerifyFormInput; import com.linkedin.metadata.service.FormService; @@ -40,7 +41,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final Authentication authentication = context.getAuthentication(); final Urn actorUrn = UrnUtils.getUrn(authentication.getActor().toUrnStr()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final List groupsForUser = @@ -58,6 +59,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new RuntimeException( String.format("Failed to perform update against input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolver.java index c27b2245528b50..1e99ea120354ee 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolver.java @@ -8,6 +8,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.RelatedTermsInput; import com.linkedin.datahub.graphql.generated.TermRelationshipType; @@ -43,7 +44,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw bindArgument(environment.getArgument("input"), RelatedTermsInput.class); final Urn urn = Urn.createFromString(input.getUrn()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final Urn parentUrn = GlossaryUtils.getParentUrn(urn, context, _entityClient); if (GlossaryUtils.canManageChildrenEntities(context, parentUrn, _entityClient)) { @@ -102,7 +103,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } public Boolean validateRelatedTermsInput( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryNodeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryNodeResolver.java index 71201600bcd870..75239ae8e7eeb6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryNodeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryNodeResolver.java @@ -9,6 +9,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateGlossaryEntityInput; import com.linkedin.datahub.graphql.generated.OwnerEntityType; @@ -43,7 +44,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws final Urn parentNode = input.getParentNode() != null ? UrnUtils.getUrn(input.getParentNode()) : null; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (GlossaryUtils.canManageChildrenEntities(context, parentNode, _entityClient)) { try { @@ -87,7 +88,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } private GlossaryNodeInfo mapGlossaryNodeInfo(final CreateGlossaryEntityInput input) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryTermResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryTermResolver.java index c68ef0df4965e2..d524a07b541621 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryTermResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryTermResolver.java @@ -10,6 +10,7 @@ import com.linkedin.data.DataMap; import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateGlossaryEntityInput; import com.linkedin.datahub.graphql.generated.OwnerEntityType; @@ -58,7 +59,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws final Urn parentNode = input.getParentNode() != null ? UrnUtils.getUrn(input.getParentNode()) : null; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (GlossaryUtils.canManageChildrenEntities(context, parentNode, _entityClient)) { // Ensure there isn't another glossary term with the same name at this level of the @@ -105,7 +106,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } private GlossaryTermInfo mapGlossaryTermInfo(final CreateGlossaryEntityInput input) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java index b8a2bd223bdab3..26f0c61de1b0f9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java @@ -2,6 +2,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.resolvers.mutate.util.GlossaryUtils; import com.linkedin.entity.client.EntityClient; @@ -30,7 +31,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final Urn entityUrn = Urn.createFromString(environment.getArgument("urn")); final Urn parentNodeUrn = GlossaryUtils.getParentUrn(entityUrn, context, _entityClient); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (GlossaryUtils.canManageChildrenEntities(context, parentNodeUrn, _entityClient)) { if (!_entityService.exists(context.getOperationContext(), entityUrn, true)) { @@ -65,6 +66,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolver.java index 700a38d50b317d..451abfdaf1c063 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolver.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.GetRootGlossaryEntitiesInput; import com.linkedin.datahub.graphql.generated.GetRootGlossaryNodesResult; @@ -42,7 +43,7 @@ public CompletableFuture get( final QueryContext context = environment.getContext(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final GetRootGlossaryEntitiesInput input = bindArgument(environment.getArgument("input"), GetRootGlossaryEntitiesInput.class); @@ -75,7 +76,9 @@ public CompletableFuture get( } catch (RemoteInvocationException e) { throw new RuntimeException("Failed to retrieve root glossary nodes from GMS", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private Filter buildGlossaryEntitiesFilter() { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolver.java index 9669d406344e71..7ca79b168819e6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolver.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.GetRootGlossaryEntitiesInput; import com.linkedin.datahub.graphql.generated.GetRootGlossaryTermsResult; @@ -42,7 +43,7 @@ public CompletableFuture get( final QueryContext context = environment.getContext(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final GetRootGlossaryEntitiesInput input = bindArgument(environment.getArgument("input"), GetRootGlossaryEntitiesInput.class); @@ -75,7 +76,9 @@ public CompletableFuture get( } catch (RemoteInvocationException e) { throw new RuntimeException("Failed to retrieve root glossary terms from GMS", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private Filter buildGlossaryEntitiesFilter() { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/ParentNodesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/ParentNodesResolver.java index 12edb6246de6bb..92c8aa7fd2d133 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/ParentNodesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/ParentNodesResolver.java @@ -9,6 +9,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.GlossaryNode; @@ -111,7 +112,7 @@ public CompletableFuture get(DataFetchingEnvironment environm final String urn = ((Entity) environment.getSource()).getUrn(); final List nodes = new ArrayList<>(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final String type = Urn.createFromString(urn).getEntityType(); @@ -144,6 +145,8 @@ public CompletableFuture get(DataFetchingEnvironment environm } catch (DataHubGraphQLException | URISyntaxException e) { throw new RuntimeException(("Failed to load parent nodes")); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolver.java index b9dcf8f9e52a27..59f820d7cbd36e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.RelatedTermsInput; import com.linkedin.datahub.graphql.generated.TermRelationshipType; @@ -39,7 +40,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw bindArgument(environment.getArgument("input"), RelatedTermsInput.class); final Urn urn = Urn.createFromString(input.getUrn()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final Urn parentUrn = GlossaryUtils.getParentUrn(urn, context, _entityClient); if (GlossaryUtils.canManageChildrenEntities(context, parentUrn, _entityClient)) { @@ -115,6 +116,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/AddGroupMembersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/AddGroupMembersResolver.java index 5813dcdfc9fce5..4a78547421518c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/AddGroupMembersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/AddGroupMembersResolver.java @@ -10,6 +10,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -51,7 +52,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) String.format("Failed to add members to group %s. Group does not exist.", groupUrnStr), DataHubGraphQLErrorCode.NOT_FOUND); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { Origin groupOrigin = _groupService.getGroupOrigin(context.getOperationContext(), groupUrn); @@ -85,6 +86,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new RuntimeException( String.format("Failed to add group members to group %s", groupUrnStr)); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/CreateGroupResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/CreateGroupResolver.java index 87572f3b20f6b0..7116d99bd0aba0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/CreateGroupResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/CreateGroupResolver.java @@ -6,6 +6,7 @@ import com.datahub.authentication.group.GroupService; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateGroupInput; import com.linkedin.metadata.key.CorpGroupKey; @@ -36,7 +37,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final CreateGroupInput input = bindArgument(environment.getArgument("input"), CreateGroupInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { // First, check if the group already exists. @@ -50,6 +51,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } catch (Exception e) { throw new RuntimeException("Failed to create group", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/EntityCountsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/EntityCountsResolver.java index 06a672f464f70a..ac195ca5d82520 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/EntityCountsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/EntityCountsResolver.java @@ -3,6 +3,7 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.EntityCountInput; import com.linkedin.datahub.graphql.generated.EntityCountResult; import com.linkedin.datahub.graphql.generated.EntityCountResults; @@ -39,7 +40,7 @@ public CompletableFuture get(final DataFetchingEnvironment e bindArgument(environment.getArgument("input"), EntityCountInput.class); final EntityCountResults results = new EntityCountResults(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { // First, get all counts @@ -67,6 +68,8 @@ public CompletableFuture get(final DataFetchingEnvironment e } catch (Exception e) { throw new RuntimeException("Failed to get entity counts", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/ListGroupsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/ListGroupsResolver.java index 671c92546e1a48..fce404a6baa16b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/ListGroupsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/ListGroupsResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CorpGroup; import com.linkedin.datahub.graphql.generated.EntityType; @@ -51,7 +52,7 @@ public CompletableFuture get(final DataFetchingEnvironment env final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); final String query = input.getQuery() == null ? DEFAULT_QUERY : input.getQuery(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { // First, get all group Urns. @@ -94,7 +95,9 @@ public CompletableFuture get(final DataFetchingEnvironment env } catch (Exception e) { throw new RuntimeException("Failed to list groups", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupMembersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupMembersResolver.java index d93e30f62da22f..55a26af603fbe0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupMembersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupMembersResolver.java @@ -10,6 +10,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -55,7 +56,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) DataHubGraphQLErrorCode.NOT_FOUND); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { Origin groupOrigin = _groupService.getGroupOrigin(context.getOperationContext(), groupUrn); @@ -82,6 +83,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } catch (Exception e) { throw new RuntimeException(e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupResolver.java index 8a54129afcf554..04196b01c108e1 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupResolver.java @@ -3,6 +3,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.entity.client.EntityClient; import graphql.schema.DataFetcher; @@ -27,7 +28,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) if (AuthorizationUtils.canManageUsersAndGroups(context)) { final String groupUrn = environment.getArgument("urn"); final Urn urn = Urn.createFromString(groupUrn); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _entityClient.deleteEntity(context.getOperationContext(), urn); @@ -51,7 +52,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new RuntimeException( String.format("Failed to perform delete against group with urn %s", groupUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/health/EntityHealthResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/health/EntityHealthResolver.java index 5db47f5bde2c9b..380b7c920ab2ff 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/health/EntityHealthResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/health/EntityHealthResolver.java @@ -5,6 +5,7 @@ import com.linkedin.data.template.StringArray; import com.linkedin.data.template.StringArrayArray; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.Health; import com.linkedin.datahub.graphql.generated.HealthStatus; @@ -86,7 +87,7 @@ public EntityHealthResolver( public CompletableFuture> get(final DataFetchingEnvironment environment) throws Exception { final Entity parent = environment.getSource(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final HealthStatuses statuses = @@ -95,7 +96,9 @@ public CompletableFuture> get(final DataFetchingEnvironment environ } catch (Exception e) { throw new RuntimeException("Failed to resolve asset's health status.", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } /** diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/EntityIncidentsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/EntityIncidentsResolver.java index 0aaf71303f04d1..2d4b24243073a5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/EntityIncidentsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/EntityIncidentsResolver.java @@ -2,6 +2,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.EntityIncidentsResult; import com.linkedin.datahub.graphql.generated.Incident; @@ -45,7 +46,7 @@ public EntityIncidentsResolver(final EntityClient entityClient) { @Override public CompletableFuture get(DataFetchingEnvironment environment) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QueryContext context = environment.getContext(); @@ -103,7 +104,9 @@ public CompletableFuture get(DataFetchingEnvironment envi } catch (URISyntaxException | RemoteInvocationException e) { throw new RuntimeException("Failed to retrieve incidents from GMS", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private Filter buildIncidentsEntityFilter( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/RaiseIncidentResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/RaiseIncidentResolver.java index 180b3120191b8a..454ba693da95a7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/RaiseIncidentResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/RaiseIncidentResolver.java @@ -14,6 +14,7 @@ import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.RaiseIncidentInput; import com.linkedin.entity.client.EntityClient; @@ -49,7 +50,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws bindArgument(environment.getArgument("input"), RaiseIncidentInput.class); final Urn resourceUrn = Urn.createFromString(input.getResourceUrn()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!isAuthorizedToCreateIncidentForResource(resourceUrn, context)) { throw new AuthorizationException( @@ -76,7 +77,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws log.error("Failed to create incident. {}", e.getMessage()); throw new RuntimeException("Failed to incident", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private IncidentInfo mapIncidentInfo(final RaiseIncidentInput input, final QueryContext context) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/UpdateIncidentStatusResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/UpdateIncidentStatusResolver.java index 774bcc55e43dcf..d51ceab31e60ec 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/UpdateIncidentStatusResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/UpdateIncidentStatusResolver.java @@ -13,6 +13,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -44,7 +45,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final Urn incidentUrn = Urn.createFromString(environment.getArgument("urn")); final UpdateIncidentStatusInput input = bindArgument(environment.getArgument("input"), UpdateIncidentStatusInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { // Check whether the incident exists. @@ -89,7 +90,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new DataHubGraphQLException( "Failed to update incident. Incident does not exist.", DataHubGraphQLErrorCode.NOT_FOUND); - }); + }, + this.getClass().getSimpleName(), + "get"); } private boolean isAuthorizedToUpdateIncident(final Urn resourceUrn, final QueryContext context) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CancelIngestionExecutionRequestResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CancelIngestionExecutionRequestResolver.java index d998fd118bd048..a373e4254d0597 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CancelIngestionExecutionRequestResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CancelIngestionExecutionRequestResolver.java @@ -10,6 +10,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -42,7 +43,7 @@ public CancelIngestionExecutionRequestResolver(final EntityClient entityClient) public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (IngestionAuthUtils.canManageIngestion(context)) { @@ -94,6 +95,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java index 39b8c65bdbd511..160624a4b0fe57 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java @@ -10,6 +10,7 @@ import com.linkedin.data.template.SetMode; import com.linkedin.data.template.StringMap; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -58,7 +59,7 @@ public CreateIngestionExecutionRequestResolver( public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (IngestionAuthUtils.canManageIngestion(context)) { @@ -153,7 +154,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } /** diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolver.java index de99044cb22ca7..aeb4dd4f948019 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolver.java @@ -8,6 +8,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.StringMap; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateTestConnectionRequestInput; import com.linkedin.datahub.graphql.resolvers.ingest.IngestionAuthUtils; @@ -48,7 +49,7 @@ public CreateTestConnectionRequestResolver( public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!IngestionAuthUtils.canManageIngestion(context)) { throw new AuthorizationException( @@ -97,6 +98,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) "Failed to create new test ingestion connection request %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/GetIngestionExecutionRequestResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/GetIngestionExecutionRequestResolver.java index ba7f0102a5edf8..474ab342256ac8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/GetIngestionExecutionRequestResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/GetIngestionExecutionRequestResolver.java @@ -3,6 +3,7 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -38,7 +39,7 @@ public CompletableFuture get(final DataFetchingEnvironment env if (IngestionAuthUtils.canManageIngestion(context)) { final String urnStr = environment.getArgument("urn"); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { // Fetch specific execution request @@ -62,7 +63,9 @@ public CompletableFuture get(final DataFetchingEnvironment env } catch (Exception e) { throw new RuntimeException("Failed to retrieve execution request", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java index ed70c7bd0d7716..4a3b75deddc452 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java @@ -4,6 +4,7 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.IngestionSource; import com.linkedin.datahub.graphql.generated.IngestionSourceExecutionRequests; import com.linkedin.datahub.graphql.resolvers.ingest.IngestionResolverUtils; @@ -54,7 +55,7 @@ public CompletableFuture get( final Integer count = environment.getArgument("count") != null ? environment.getArgument("count") : 10; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { @@ -116,6 +117,8 @@ public CompletableFuture get( urn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/RollbackIngestionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/RollbackIngestionResolver.java index 44233bf3bed9eb..62977b6b92676e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/RollbackIngestionResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/RollbackIngestionResolver.java @@ -3,6 +3,7 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.RollbackIngestionInput; import com.linkedin.datahub.graphql.resolvers.ingest.IngestionAuthUtils; @@ -23,7 +24,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!IngestionAuthUtils.canManageIngestion(context)) { throw new AuthorizationException( @@ -36,12 +37,14 @@ public CompletableFuture get(final DataFetchingEnvironment environment) rollbackIngestion(runId, context); return true; - }); + }, + this.getClass().getSimpleName(), + "get"); } public CompletableFuture rollbackIngestion( final String runId, final QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _entityClient.rollbackIngestion( @@ -50,6 +53,8 @@ public CompletableFuture rollbackIngestion( } catch (Exception e) { throw new RuntimeException("Failed to rollback ingestion execution", e); } - }); + }, + this.getClass().getSimpleName(), + "rollbackIngestion"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/CreateSecretResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/CreateSecretResolver.java index 509f4dde93ed44..8bd8325bf16c01 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/CreateSecretResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/CreateSecretResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateSecretInput; import com.linkedin.datahub.graphql.resolvers.ingest.IngestionAuthUtils; @@ -41,7 +42,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final CreateSecretInput input = bindArgument(environment.getArgument("input"), CreateSecretInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (IngestionAuthUtils.canManageSecrets(context)) { @@ -79,6 +80,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java index 424c34a86c0c7b..f557b9889f604d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java @@ -2,6 +2,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.resolvers.ingest.IngestionAuthUtils; import com.linkedin.entity.client.EntityClient; @@ -24,7 +25,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) if (IngestionAuthUtils.canManageSecrets(context)) { final String secretUrn = environment.getArgument("urn"); final Urn urn = Urn.createFromString(secretUrn); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _entityClient.deleteEntity(context.getOperationContext(), urn); @@ -34,7 +35,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) String.format("Failed to perform delete against secret with urn %s", secretUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/GetSecretValuesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/GetSecretValuesResolver.java index 710a90d88facd0..100965d457a1de 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/GetSecretValuesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/GetSecretValuesResolver.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.GetSecretValuesInput; import com.linkedin.datahub.graphql.generated.SecretValue; @@ -50,7 +51,7 @@ public CompletableFuture> get(final DataFetchingEnvironment en final GetSecretValuesInput input = bindArgument(environment.getArgument("input"), GetSecretValuesInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { // Fetch secrets @@ -92,7 +93,9 @@ public CompletableFuture> get(final DataFetchingEnvironment en throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/ListSecretsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/ListSecretsResolver.java index 09423acc57ef47..106a2d0d1e18e2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/ListSecretsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/ListSecretsResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.data.template.GetMode; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.ListSecretsInput; import com.linkedin.datahub.graphql.generated.ListSecretsResult; @@ -60,7 +61,7 @@ public CompletableFuture get(final DataFetchingEnvironment en final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); final String query = input.getQuery() == null ? DEFAULT_QUERY : input.getQuery(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { // First, get all secrets @@ -105,7 +106,9 @@ public CompletableFuture get(final DataFetchingEnvironment en } catch (Exception e) { throw new RuntimeException("Failed to list secrets", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/UpdateSecretResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/UpdateSecretResolver.java index be8db572c5e775..e23dd800478d40 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/UpdateSecretResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/UpdateSecretResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.UpdateSecretInput; import com.linkedin.datahub.graphql.resolvers.ingest.IngestionAuthUtils; @@ -38,7 +39,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final UpdateSecretInput input = bindArgument(environment.getArgument("input"), UpdateSecretInput.class); final Urn secretUrn = Urn.createFromString(input.getUrn()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (IngestionAuthUtils.canManageSecrets(context)) { @@ -77,6 +78,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/DeleteIngestionSourceResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/DeleteIngestionSourceResolver.java index 80b20be669ad7b..4d693f2acab308 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/DeleteIngestionSourceResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/DeleteIngestionSourceResolver.java @@ -2,6 +2,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.resolvers.ingest.IngestionAuthUtils; import com.linkedin.entity.client.EntityClient; @@ -27,7 +28,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) if (IngestionAuthUtils.canManageIngestion(context)) { final String ingestionSourceUrn = environment.getArgument("urn"); final Urn urn = Urn.createFromString(ingestionSourceUrn); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _entityClient.deleteEntity(context.getOperationContext(), urn); @@ -39,7 +40,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) ingestionSourceUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/GetIngestionSourceResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/GetIngestionSourceResolver.java index 23a1e99cce4d46..3dcbf7b1f7b8e9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/GetIngestionSourceResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/GetIngestionSourceResolver.java @@ -3,6 +3,7 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -37,7 +38,7 @@ public CompletableFuture get(final DataFetchingEnvironment envi if (IngestionAuthUtils.canManageIngestion(context)) { final String urnStr = environment.getArgument("urn"); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final Urn urn = Urn.createFromString(urnStr); @@ -58,7 +59,9 @@ public CompletableFuture get(final DataFetchingEnvironment envi } catch (Exception e) { throw new RuntimeException("Failed to retrieve ingestion source", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java index 3d5e2e8ae1e0c3..8ead47aa65ceb0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.generated.ListIngestionSourcesInput; @@ -56,7 +57,7 @@ public CompletableFuture get( final List filters = input.getFilters() == null ? Collections.emptyList() : input.getFilters(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { // First, get all ingestion sources Urns. @@ -108,7 +109,9 @@ public CompletableFuture get( } catch (Exception e) { throw new RuntimeException("Failed to list ingestion sources", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java index 5d595167d0eb0c..77fabd7167300e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringMap; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -45,7 +46,7 @@ public UpsertIngestionSourceResolver(final EntityClient entityClient) { public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (IngestionAuthUtils.canManageIngestion(context)) { @@ -94,7 +95,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } private DataHubIngestionSourceInfo mapIngestionSourceInfo( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java index 580bf22ad6a6c1..591712ef3f55ba 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java @@ -3,6 +3,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.DataProcessInstanceResult; import com.linkedin.datahub.graphql.generated.Entity; @@ -47,7 +48,7 @@ public DataJobRunsResolver(final EntityClient entityClient) { @Override public CompletableFuture get(DataFetchingEnvironment environment) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QueryContext context = environment.getContext(); @@ -103,7 +104,9 @@ public CompletableFuture get(DataFetchingEnvironment } catch (URISyntaxException | RemoteInvocationException e) { throw new RuntimeException("Failed to retrieve incidents from GMS", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private Filter buildTaskRunsEntityFilter(final String entityUrn) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java index 273361dd8a760d..163fc30fb6e6c6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java @@ -3,6 +3,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.DataProcessInstanceResult; import com.linkedin.datahub.graphql.generated.Entity; @@ -49,7 +50,7 @@ public EntityRunsResolver(final EntityClient entityClient) { @Override public CompletableFuture get(DataFetchingEnvironment environment) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QueryContext context = environment.getContext(); @@ -109,7 +110,9 @@ public CompletableFuture get(DataFetchingEnvironment } catch (URISyntaxException | RemoteInvocationException e) { throw new RuntimeException("Failed to retrieve incidents from GMS", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private Filter buildTaskRunsEntityFilter( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/lineage/UpdateLineageResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/lineage/UpdateLineageResolver.java index 27ffe14ca065b2..d462fb0820aa03 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/lineage/UpdateLineageResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/lineage/UpdateLineageResolver.java @@ -10,6 +10,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.LineageEdge; import com.linkedin.datahub.graphql.generated.UpdateLineageInput; @@ -57,7 +58,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw downstreamUrns.addAll(downstreamToUpstreamsToAdd.keySet()); downstreamUrns.addAll(downstreamToUpstreamsToRemove.keySet()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final Set existingDownstreamUrns = _entityService.exists(context.getOperationContext(), downstreamUrns, true); @@ -168,7 +169,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw } return true; - }); + }, + this.getClass().getSimpleName(), + "get"); } private List filterOutDataJobUrns(@Nonnull final List urns) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java index d1d3e23fac75a2..51b00bbe7b799b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java @@ -10,6 +10,7 @@ import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.EntityLineageResult; import com.linkedin.datahub.graphql.generated.EntityType; @@ -68,7 +69,7 @@ public CompletableFuture get(DataFetchingEnvironment enviro com.linkedin.metadata.graph.LineageDirection.valueOf(lineageDirection.toString()); final Urn finalUrn = urn; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { com.linkedin.metadata.graph.EntityLineageResult entityLineageResult = @@ -106,7 +107,9 @@ public CompletableFuture get(DataFetchingEnvironment enviro throw new RuntimeException( String.format("Failed to fetch lineage for %s", finalUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private EntityLineageResult mapEntityRelationships( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityRelationshipsResultResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityRelationshipsResultResolver.java index f3edbf8a3a7374..f775853dd59567 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityRelationshipsResultResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityRelationshipsResultResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.EntityRelationship; import com.linkedin.common.EntityRelationships; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.EntityRelationshipsResult; import com.linkedin.datahub.graphql.generated.RelationshipsInput; @@ -46,13 +47,15 @@ public CompletableFuture get(DataFetchingEnvironment final Integer count = input.getCount(); // Optional! final RelationshipDirection resolvedDirection = RelationshipDirection.valueOf(relationshipDirection.toString()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> mapEntityRelationships( context, fetchEntityRelationships( urn, relationshipTypes, resolvedDirection, start, count, context.getActorUrn()), - resolvedDirection)); + resolvedDirection), + this.getClass().getSimpleName(), + "get"); } private EntityRelationships fetchEntityRelationships( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java index f14032859c6288..1839fd3cc57055 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java @@ -5,6 +5,7 @@ import com.datahub.authorization.AuthUtil; import com.datahub.authorization.EntitySpec; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.FilterInput; import com.linkedin.datahub.graphql.generated.TimeSeriesAspect; @@ -86,7 +87,7 @@ private boolean isAuthorized(QueryContext context, String urn) { @Override public CompletableFuture> get(DataFetchingEnvironment environment) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QueryContext context = environment.getContext(); // Fetch the urn, assuming the parent has an urn field. @@ -129,7 +130,9 @@ public CompletableFuture> get(DataFetchingEnvironment env } catch (RemoteInvocationException e) { throw new RuntimeException("Failed to retrieve aspects from GMS", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private Filter buildFilters(@Nullable FilterInput maybeFilters) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddLinkResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddLinkResolver.java index 7290de024c2d45..5cffcd9c35c005 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddLinkResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddLinkResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AddLinkInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.GlossaryUtils; @@ -40,7 +41,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Unauthorized to perform this action. Please contact your DataHub administrator."); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { LinkUtils.validateAddRemoveInput( context.getOperationContext(), linkUrl, targetUrn, _entityService); @@ -65,7 +66,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to add link to resource with input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } // Returns whether this is a glossary entity and whether you can edit this glossary entity with diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java index 5f3b290e562f9b..7c0f7b3757ee9c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AddOwnerInput; import com.linkedin.datahub.graphql.generated.OwnerInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; @@ -42,7 +43,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw OwnerInput ownerInput = ownerInputBuilder.build(); OwnerUtils.validateAuthorizedToUpdateOwners(context, targetUrn); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { OwnerUtils.validateAddOwnerInput( context.getOperationContext(), ownerInput, ownerUrn, _entityService); @@ -64,6 +65,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to add owner to resource with input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java index 4ff6bdcff12695..ade4e7b744801f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AddOwnersInput; import com.linkedin.datahub.graphql.generated.OwnerInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; @@ -32,7 +33,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw List owners = input.getOwners(); Urn targetUrn = Urn.createFromString(input.getResourceUrn()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { OwnerUtils.validateAuthorizedToUpdateOwners(environment.getContext(), targetUrn); @@ -55,6 +56,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to add owners to resource with input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTagResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTagResolver.java index 05e73c274a131f..4275f869b29086 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTagResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTagResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.generated.TagAssociationInput; @@ -37,7 +38,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Unauthorized to perform this action. Please contact your DataHub administrator."); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { LabelUtils.validateResourceAndLabel( context.getOperationContext(), @@ -74,6 +75,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTagsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTagsResolver.java index e35bd7c20f9ec7..b2fc05720064cd 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTagsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTagsResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AddTagsInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; @@ -35,7 +36,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw input.getTagUrns().stream().map(UrnUtils::getUrn).collect(Collectors.toList()); Urn targetUrn = Urn.createFromString(input.getResourceUrn()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!LabelUtils.isAuthorizedToUpdateTags(context, targetUrn, input.getSubResource())) { throw new AuthorizationException( @@ -71,6 +72,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTermResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTermResolver.java index 6c1f1b2f045a3a..ffdb394369d173 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTermResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTermResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.generated.TermAssociationInput; @@ -36,7 +37,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Unauthorized to perform this action. Please contact your DataHub administrator."); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { LabelUtils.validateResourceAndLabel( context.getOperationContext(), @@ -68,6 +69,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTermsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTermsResolver.java index feb6f77bcd2fc2..68ba5f977c2bf0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTermsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddTermsResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AddTermsInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; @@ -34,7 +35,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw input.getTermUrns().stream().map(UrnUtils::getUrn).collect(Collectors.toList()); Urn targetUrn = Urn.createFromString(input.getResourceUrn()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!LabelUtils.isAuthorizedToUpdateTerms(context, targetUrn, input.getSubResource())) { throw new AuthorizationException( @@ -71,6 +72,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java index c89558b5ec4a23..28daef1b110628 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.BatchAddOwnersInput; import com.linkedin.datahub.graphql.generated.OwnerInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; @@ -35,7 +36,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final List resources = input.getResources(); final QueryContext context = environment.getContext(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { // First, validate the batch @@ -52,7 +53,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private void validateOwners(@Nonnull OperationContext opContext, List owners) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddTagsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddTagsResolver.java index 33b8b919283e33..7cd2483263479f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddTagsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddTagsResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.BatchAddTagsInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; @@ -40,7 +41,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw input.getTagUrns().stream().map(UrnUtils::getUrn).collect(Collectors.toList()); final List resources = input.getResources(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { // First, validate the batch @@ -62,7 +63,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } /** diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddTermsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddTermsResolver.java index c616eb93f3686e..2d092758b776b3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddTermsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddTermsResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.BatchAddTermsInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; @@ -40,7 +41,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw input.getTermUrns().stream().map(UrnUtils::getUrn).collect(Collectors.toList()); final List resources = input.getResources(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { // First, validate the batch @@ -62,7 +63,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } /** diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java index 3037c3ea673378..5aaace4e21e9ca 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.BatchRemoveOwnersInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.LabelUtils; @@ -36,7 +37,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw : Urn.createFromString(input.getOwnershipTypeUrn()); final QueryContext context = environment.getContext(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { // First, validate the batch @@ -52,7 +53,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private void validateInputResources(List resources, QueryContext context) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveTagsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveTagsResolver.java index 1dab9a990af23c..6d90d07521f924 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveTagsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveTagsResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.BatchRemoveTagsInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; @@ -35,7 +36,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw input.getTagUrns().stream().map(UrnUtils::getUrn).collect(Collectors.toList()); final List resources = input.getResources(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { // First, validate the batch @@ -51,7 +52,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private void validateInputResources( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveTermsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveTermsResolver.java index 0c8a15882c0561..adc5e77681a3ad 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveTermsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveTermsResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.BatchRemoveTermsInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; @@ -35,7 +36,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw input.getTermUrns().stream().map(UrnUtils::getUrn).collect(Collectors.toList()); final List resources = input.getResources(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { // First, validate the batch @@ -51,7 +52,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private void validateInputResources( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchSetDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchSetDomainResolver.java index e733e23d1faf30..abbeed29545e4a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchSetDomainResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchSetDomainResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.BatchSetDomainInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; @@ -36,7 +37,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final String maybeDomainUrn = input.getDomainUrn(); final List resources = input.getResources(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { // First, validate the domain @@ -53,7 +54,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private void validateDomain( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchUpdateDeprecationResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchUpdateDeprecationResolver.java index 1aee88f0dd9195..546694ede92697 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchUpdateDeprecationResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchUpdateDeprecationResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.BatchUpdateDeprecationInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; @@ -33,7 +34,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw bindArgument(environment.getArgument("input"), BatchUpdateDeprecationInput.class); final List resources = input.getResources(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { // First, validate the resources @@ -54,7 +55,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private void validateInputResources(List resources, QueryContext context) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchUpdateSoftDeletedResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchUpdateSoftDeletedResolver.java index 97f5c9676bd38c..9f24af66a70fa3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchUpdateSoftDeletedResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchUpdateSoftDeletedResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.BatchUpdateSoftDeletedInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.DeleteUtils; @@ -30,7 +31,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final List urns = input.getUrns(); final boolean deleted = input.getDeleted(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { // First, validate the entities exist @@ -50,7 +51,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Failed to perform batch soft delete against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private void validateInputUrns(List urnStrs, QueryContext context) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MoveDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MoveDomainResolver.java index 8f17ba43219048..70f7913f7b08cb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MoveDomainResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MoveDomainResolver.java @@ -6,6 +6,7 @@ import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -39,7 +40,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final Urn newParentDomainUrn = input.getParentDomain() != null ? UrnUtils.getUrn(input.getParentDomain()) : null; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!AuthorizationUtils.canManageDomains(context)) { throw new AuthorizationException( @@ -106,6 +107,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw input.getResourceUrn(), input.getParentDomain()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeBatchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeBatchResolver.java index 064b532a792c19..d647374b8e1efc 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeBatchResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeBatchResolver.java @@ -3,6 +3,7 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; import com.codahale.metrics.Timer; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.types.BatchMutableType; import com.linkedin.metadata.utils.metrics.MetricUtils; @@ -35,7 +36,7 @@ public CompletableFuture> get(DataFetchingEnvironment environment) throw final B[] input = bindArgument(environment.getArgument("input"), _batchMutableType.batchInputClass()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { Timer.Context timer = MetricUtils.timer(this.getClass(), "batchMutate").time(); @@ -49,6 +50,8 @@ public CompletableFuture> get(DataFetchingEnvironment environment) throw } finally { timer.stop(); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeResolver.java index c62282c9065971..23d16ed7d4d88b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeResolver.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.types.MutableType; import graphql.schema.DataFetcher; @@ -31,7 +32,7 @@ public MutableTypeResolver(final MutableType mutableType) { public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { final String urn = environment.getArgument("urn"); final I input = bindArgument(environment.getArgument("input"), _mutableType.inputClass()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _logger.debug(String.format("Mutating entity. input: %s", input)); @@ -46,6 +47,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws Exce throw new RuntimeException( String.format("Failed to perform update against input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveLinkResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveLinkResolver.java index 8628487638bff4..e047a24a0adaad 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveLinkResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveLinkResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.RemoveLinkInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.LinkUtils; @@ -35,7 +36,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Unauthorized to perform this action. Please contact your DataHub administrator."); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { LinkUtils.validateAddRemoveInput( context.getOperationContext(), linkUrl, targetUrn, _entityService); @@ -56,6 +57,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Failed to remove link from resource with input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java index 74727c02a778dd..8d148848855729 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.RemoveOwnerInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; @@ -37,7 +38,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw OwnerUtils.validateAuthorizedToUpdateOwners(context, targetUrn); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { OwnerUtils.validateRemoveInput(context.getOperationContext(), targetUrn, _entityService); try { @@ -57,6 +58,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Failed to remove owner from resource with input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveTagResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveTagResolver.java index 6bb3766a885d08..c1ff3514ff6c47 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveTagResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveTagResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.generated.TagAssociationInput; @@ -36,7 +37,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Unauthorized to perform this action. Please contact your DataHub administrator."); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { LabelUtils.validateResourceAndLabel( context.getOperationContext(), @@ -73,6 +74,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveTermResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveTermResolver.java index e44da52ec7e4e0..e9f5489f23af0c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveTermResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveTermResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.generated.TermAssociationInput; @@ -36,7 +37,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Unauthorized to perform this action. Please contact your DataHub administrator."); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { LabelUtils.validateResourceAndLabel( context.getOperationContext(), @@ -74,6 +75,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateDescriptionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateDescriptionResolver.java index 460616513fb96b..d571dae542d5a7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateDescriptionResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateDescriptionResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.DescriptionUpdateInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.GlossaryUtils; @@ -74,7 +75,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw private CompletableFuture updateContainerDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DescriptionUtils.isAuthorizedToUpdateContainerDescription(context, targetUrn)) { throw new AuthorizationException( @@ -99,12 +100,14 @@ private CompletableFuture updateContainerDescription( throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "updateContainerDescription"); } private CompletableFuture updateDomainDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DescriptionUtils.isAuthorizedToUpdateDomainDescription(context, targetUrn)) { throw new AuthorizationException( @@ -128,7 +131,9 @@ private CompletableFuture updateDomainDescription( throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "updateDomainDescription"); } // If updating schema field description fails, try again on a sibling until there are no more @@ -179,7 +184,7 @@ private Boolean attemptUpdateDatasetSchemaFieldDescription( private CompletableFuture updateDatasetSchemaFieldDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DescriptionUtils.isAuthorizedToUpdateFieldDescription(context, targetUrn)) { throw new AuthorizationException( @@ -197,12 +202,14 @@ private CompletableFuture updateDatasetSchemaFieldDescription( return attemptUpdateDatasetSchemaFieldDescription( targetUrn, input, context, new HashSet<>(), siblingUrns); - }); + }, + this.getClass().getSimpleName(), + "updateDatasetSchemaFieldDescription"); } private CompletableFuture updateTagDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DescriptionUtils.isAuthorizedToUpdateDescription(context, targetUrn)) { throw new AuthorizationException( @@ -226,12 +233,14 @@ private CompletableFuture updateTagDescription( throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "updateTagDescription"); } private CompletableFuture updateGlossaryTermDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final Urn parentNodeUrn = GlossaryUtils.getParentUrn(targetUrn, context, _entityClient); if (!DescriptionUtils.isAuthorizedToUpdateDescription(context, targetUrn) @@ -257,12 +266,14 @@ private CompletableFuture updateGlossaryTermDescription( throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "updateGlossaryTermDescription"); } private CompletableFuture updateGlossaryNodeDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final Urn parentNodeUrn = GlossaryUtils.getParentUrn(targetUrn, context, _entityClient); if (!DescriptionUtils.isAuthorizedToUpdateDescription(context, targetUrn) @@ -288,12 +299,14 @@ private CompletableFuture updateGlossaryNodeDescription( throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "updateGlossaryNodeDescription"); } private CompletableFuture updateCorpGroupDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DescriptionUtils.isAuthorizedToUpdateDescription(context, targetUrn)) { throw new AuthorizationException( @@ -317,12 +330,14 @@ private CompletableFuture updateCorpGroupDescription( throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "updateCorpGroupDescription"); } private CompletableFuture updateNotebookDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DescriptionUtils.isAuthorizedToUpdateDescription(context, targetUrn)) { throw new AuthorizationException( @@ -346,12 +361,14 @@ private CompletableFuture updateNotebookDescription( throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "updateNotebookDescription"); } private CompletableFuture updateMlModelDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DescriptionUtils.isAuthorizedToUpdateDescription(context, targetUrn)) { throw new AuthorizationException( @@ -375,12 +392,14 @@ private CompletableFuture updateMlModelDescription( throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "updateMlModelDescription"); } private CompletableFuture updateMlModelGroupDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DescriptionUtils.isAuthorizedToUpdateDescription(context, targetUrn)) { throw new AuthorizationException( @@ -404,12 +423,14 @@ private CompletableFuture updateMlModelGroupDescription( throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "updateMlModelGroupDescription"); } private CompletableFuture updateMlFeatureDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DescriptionUtils.isAuthorizedToUpdateDescription(context, targetUrn)) { throw new AuthorizationException( @@ -433,12 +454,14 @@ private CompletableFuture updateMlFeatureDescription( throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "updateMlFeatureDescription"); } private CompletableFuture updateMlPrimaryKeyDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DescriptionUtils.isAuthorizedToUpdateDescription(context, targetUrn)) { throw new AuthorizationException( @@ -462,12 +485,14 @@ private CompletableFuture updateMlPrimaryKeyDescription( throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "updateMlPrimaryKeyDescription"); } private CompletableFuture updateMlFeatureTableDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DescriptionUtils.isAuthorizedToUpdateDescription(context, targetUrn)) { throw new AuthorizationException( @@ -491,12 +516,14 @@ private CompletableFuture updateMlFeatureTableDescription( throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "updateMlFeatureTableDescription"); } private CompletableFuture updateDataProductDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DescriptionUtils.isAuthorizedToUpdateDescription(context, targetUrn)) { throw new AuthorizationException( @@ -520,12 +547,14 @@ private CompletableFuture updateDataProductDescription( throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "updateDataProductDescription"); } private CompletableFuture updateBusinessAttributeDescription( Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { // check if user has the rights to update description for business attribute if (!DescriptionUtils.isAuthorizedToUpdateDescription(context, targetUrn)) { @@ -552,6 +581,8 @@ private CompletableFuture updateBusinessAttributeDescription( throw new RuntimeException( String.format("Failed to perform update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "updateBusinessAttributeDescription"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java index 6f9ceec34bbf26..1d90720fc69023 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java @@ -9,6 +9,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -49,7 +50,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw Urn targetUrn = Urn.createFromString(input.getUrn()); log.info("Updating name. input: {}", input); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!_entityService.exists(context.getOperationContext(), targetUrn, true)) { throw new IllegalArgumentException( @@ -74,7 +75,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw String.format( "Failed to update name. Unsupported resource type %s provided.", targetUrn)); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private Boolean updateGlossaryTermName( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateParentNodeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateParentNodeResolver.java index 55461647374b81..7f714bfd33bd5a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateParentNodeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateParentNodeResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.GlossaryNodeUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.UpdateParentNodeInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.GlossaryUtils; @@ -55,7 +56,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw } GlossaryNodeUrn finalParentNodeUrn = parentNodeUrn; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { Urn currentParentUrn = GlossaryUtils.getParentUrn(targetUrn, context, _entityClient); // need to be able to manage current parent node and new parent node @@ -78,7 +79,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } private Boolean updateGlossaryTermParentNode( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolver.java index 966b3d1b6a5381..8a598f8d8bbdda 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.UpdateUserSettingInput; import com.linkedin.datahub.graphql.generated.UserSetting; import com.linkedin.datahub.graphql.resolvers.settings.user.UpdateCorpUserViewsSettingsResolver; @@ -38,7 +39,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final boolean value = input.getValue(); final Urn actor = UrnUtils.getUrn(context.getActorUrn()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { // In the future with more settings, we'll need to do a read-modify-write @@ -72,6 +73,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Failed to perform user settings update against input %s", input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/operation/ReportOperationResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/operation/ReportOperationResolver.java index fea323a0502804..6ef3222bc068f2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/operation/ReportOperationResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/operation/ReportOperationResolver.java @@ -17,6 +17,7 @@ import com.linkedin.data.template.StringMap; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -51,7 +52,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final ReportOperationInput input = bindArgument(environment.getArgument("input"), ReportOperationInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { Urn entityUrn = UrnUtils.getUrn(input.getUrn()); @@ -73,7 +74,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw log.error("Failed to report operation. {}", e.getMessage()); throw new RuntimeException("Failed to report operation", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private Operation mapOperation(final ReportOperationInput input, final QueryContext context) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/CreateOwnershipTypeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/CreateOwnershipTypeResolver.java index 25a88846cbe463..8d48e62c11cdf3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/CreateOwnershipTypeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/CreateOwnershipTypeResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateOwnershipTypeInput; import com.linkedin.datahub.graphql.generated.EntityType; @@ -37,7 +38,7 @@ public CompletableFuture get(DataFetchingEnvironment enviro "Unauthorized to perform this action. Please contact your DataHub administrator."); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final Urn urn = @@ -51,7 +52,9 @@ public CompletableFuture get(DataFetchingEnvironment enviro throw new RuntimeException( String.format("Failed to perform update against input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private OwnershipTypeEntity createOwnershipType( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/DeleteOwnershipTypeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/DeleteOwnershipTypeResolver.java index e221bf4d389609..8f76cd148e4a79 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/DeleteOwnershipTypeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/DeleteOwnershipTypeResolver.java @@ -4,6 +4,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.metadata.service.OwnershipTypeService; import graphql.schema.DataFetcher; @@ -34,7 +35,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Unauthorized to perform this action. Please contact your DataHub administrator."); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _ownershipTypeService.deleteOwnershipType( @@ -45,6 +46,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to delete ownership type with urn %s", ownershipTypeUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java index aec3848f966406..0f0bb299eda837 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java @@ -4,6 +4,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.generated.ListOwnershipTypesInput; @@ -47,7 +48,7 @@ public CompletableFuture get(DataFetchingEnvironment e final ListOwnershipTypesInput input = bindArgument(environment.getArgument("input"), ListOwnershipTypesInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final Integer start = input.getStart() == null ? DEFAULT_START : input.getStart(); final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); @@ -80,7 +81,9 @@ public CompletableFuture get(DataFetchingEnvironment e } catch (Exception e) { throw new RuntimeException("Failed to list custom ownership types", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private List mapUnresolvedOwnershipTypes(List entityUrns) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/UpdateOwnershipTypeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/UpdateOwnershipTypeResolver.java index 2eafd35a281bc2..45a20669558314 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/UpdateOwnershipTypeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/UpdateOwnershipTypeResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.OwnershipTypeEntity; import com.linkedin.datahub.graphql.generated.UpdateOwnershipTypeInput; @@ -40,7 +41,7 @@ public CompletableFuture get(DataFetchingEnvironment enviro "Unauthorized to perform this action. Please contact your DataHub administrator."); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _ownershipTypeService.updateOwnershipType( @@ -57,7 +58,9 @@ public CompletableFuture get(DataFetchingEnvironment enviro throw new RuntimeException( String.format("Failed to perform update against View with urn %s", urn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private OwnershipTypeEntity getOwnershipType( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/DeletePolicyResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/DeletePolicyResolver.java index aaaac898f656b1..f7b717f865035f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/DeletePolicyResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/DeletePolicyResolver.java @@ -3,6 +3,7 @@ import com.datahub.authorization.AuthorizerChain; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.entity.client.EntityClient; import graphql.schema.DataFetcher; @@ -24,7 +25,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) if (PolicyAuthUtils.canManagePolicies(context)) { final String policyUrn = environment.getArgument("urn"); final Urn urn = Urn.createFromString(policyUrn); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _entityClient.deleteEntity(context.getOperationContext(), urn); @@ -39,7 +40,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) String.format("Failed to perform delete against policy with urn %s", policyUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java index 7bfd166b18a205..a9097fa68a07de 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/GetGrantedPrivilegesResolver.java @@ -6,6 +6,7 @@ import com.datahub.authorization.DataHubAuthorizer; import com.datahub.authorization.EntitySpec; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.GetGrantedPrivilegesInput; import com.linkedin.datahub.graphql.generated.Privileges; @@ -44,8 +45,10 @@ public CompletableFuture get(final DataFetchingEnvironment environme DataHubAuthorizer dataHubAuthorizer = ((AuthorizerChain) context.getAuthorizer()).getDefaultAuthorizer(); List privileges = dataHubAuthorizer.getGrantedPrivileges(actor, resourceSpec); - return CompletableFuture.supplyAsync( - () -> Privileges.builder().setPrivileges(privileges).build()); + return GraphQLConcurrencyUtils.supplyAsync( + () -> Privileges.builder().setPrivileges(privileges).build(), + this.getClass().getSimpleName(), + "get"); } throw new UnsupportedOperationException( String.format( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/UpsertPolicyResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/UpsertPolicyResolver.java index 8b4a404271f12c..5fd6f0917a25ef 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/UpsertPolicyResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/UpsertPolicyResolver.java @@ -6,6 +6,7 @@ import com.datahub.authorization.AuthorizerChain; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.PolicyUpdateInput; import com.linkedin.datahub.graphql.resolvers.policy.mappers.PolicyUpdateInputInfoMapper; @@ -65,7 +66,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) key, POLICY_ENTITY_NAME, POLICY_INFO_ASPECT_NAME, info); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { String urn = @@ -80,7 +81,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new RuntimeException( String.format("Failed to perform update against input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/CreatePostResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/CreatePostResolver.java index cae02f67f4f448..8b4253501dedc7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/CreatePostResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/CreatePostResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.Media; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreatePostInput; import com.linkedin.datahub.graphql.generated.PostContentType; @@ -54,7 +55,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) PostContent postContent = _postService.mapPostContent(contentType.toString(), title, description, link, media); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { return _postService.createPost( @@ -62,6 +63,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } catch (Exception e) { throw new RuntimeException("Failed to create a new post", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/DeletePostResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/DeletePostResolver.java index 884722d96aa8ab..572b78a3e2cf63 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/DeletePostResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/DeletePostResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -31,13 +32,15 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final Urn postUrn = UrnUtils.getUrn(environment.getArgument("urn")); final Authentication authentication = context.getAuthentication(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { return _postService.deletePost(context.getOperationContext(), postUrn); } catch (Exception e) { throw new RuntimeException("Failed to create a new post", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java index 4b348b91673632..12e4047c2dc4e5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java @@ -6,6 +6,7 @@ import com.datahub.authentication.Authentication; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.ListPostsInput; import com.linkedin.datahub.graphql.generated.ListPostsResult; import com.linkedin.datahub.graphql.types.post.PostMapper; @@ -45,7 +46,7 @@ public CompletableFuture get(final DataFetchingEnvironment envi final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); final String query = input.getQuery() == null ? DEFAULT_QUERY : input.getQuery(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final SortCriterion sortCriterion = @@ -87,6 +88,8 @@ public CompletableFuture get(final DataFetchingEnvironment envi } catch (Exception e) { throw new RuntimeException("Failed to list posts", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/UpdatePostResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/UpdatePostResolver.java index d6ff37f8e5d5fa..8f7eee74046841 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/UpdatePostResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/UpdatePostResolver.java @@ -8,6 +8,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.PostContentType; import com.linkedin.datahub.graphql.generated.PostType; @@ -58,7 +59,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) PostContent postContent = postService.mapPostContent(contentType.toString(), title, description, link, media); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { return postService.updatePost( @@ -66,6 +67,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } catch (Exception e) { throw new GraphQLException("Failed to update or edit post", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolver.java index 4e390599bc7db4..dde38566490142 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateQueryInput; import com.linkedin.datahub.graphql.generated.CreateQuerySubjectInput; @@ -39,7 +40,7 @@ public CompletableFuture get(final DataFetchingEnvironment environm bindArgument(environment.getArgument("input"), CreateQueryInput.class); final Authentication authentication = context.getAuthentication(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!AuthorizationUtils.canCreateQuery( input.getSubjects().stream() @@ -76,6 +77,8 @@ public CompletableFuture get(final DataFetchingEnvironment environm throw new RuntimeException( String.format("Failed to create a new Query from input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolver.java index 70b6822cdc94a3..0879b41dd27abd 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.metadata.service.QueryService; import com.linkedin.query.QuerySubject; @@ -31,7 +32,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final Urn queryUrn = UrnUtils.getUrn(environment.getArgument("urn")); final Authentication authentication = context.getAuthentication(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QuerySubjects existingSubjects = _queryService.getQuerySubjects(context.getOperationContext(), queryUrn); @@ -53,6 +54,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } catch (Exception e) { throw new RuntimeException("Failed to delete Query", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java index 6fcc0fee763038..b0d84942b12572 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java @@ -6,6 +6,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AndFilterInput; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FacetFilterInput; @@ -56,7 +57,7 @@ public CompletableFuture get(final DataFetchingEnvironment en final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); final String query = input.getQuery() == null ? DEFAULT_QUERY : input.getQuery(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final SortCriterion sortCriterion = @@ -89,7 +90,9 @@ public CompletableFuture get(final DataFetchingEnvironment en } catch (Exception e) { throw new RuntimeException("Failed to list Queries", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } // This method maps urns returned from the list endpoint into Partial Query objects which will be diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolver.java index 4276185d435b06..29a02b2b0519bf 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -44,7 +45,7 @@ public CompletableFuture get(final DataFetchingEnvironment environm final Urn queryUrn = UrnUtils.getUrn(environment.getArgument("urn")); final Authentication authentication = context.getAuthentication(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final QuerySubjects existingSubjects = _queryService.getQuerySubjects(context.getOperationContext(), queryUrn); @@ -103,6 +104,8 @@ public CompletableFuture get(final DataFetchingEnvironment environm throw new RuntimeException( String.format("Failed to update Query from input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java index c13a6ce732b91a..202c78a62c9ae5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.ContentParams; import com.linkedin.datahub.graphql.generated.EntityProfileParams; import com.linkedin.datahub.graphql.generated.FacetFilter; @@ -54,7 +55,7 @@ public CompletableFuture get(DataFetchingEnvironment final ListRecommendationsInput input = bindArgument(environment.getArgument("input"), ListRecommendationsInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { log.debug("Listing recommendations for input {}", input); @@ -76,7 +77,9 @@ public CompletableFuture get(DataFetchingEnvironment log.error("Failed to get recommendations for input {}", input, e); return EMPTY_RECOMMENDATIONS; } - }); + }, + this.getClass().getSimpleName(), + "get"); } private com.linkedin.metadata.recommendation.RecommendationRequestContext mapRequestContext( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/AcceptRoleResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/AcceptRoleResolver.java index e1d3e7fe3d47cd..51b3569c3cc6ac 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/AcceptRoleResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/AcceptRoleResolver.java @@ -7,6 +7,7 @@ import com.datahub.authorization.role.RoleService; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AcceptRoleInput; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -30,7 +31,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final String inviteTokenStr = input.getInviteToken(); final Authentication authentication = context.getAuthentication(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final Urn inviteTokenUrn = _inviteTokenService.getInviteTokenUrn(inviteTokenStr); @@ -53,6 +54,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to accept role using invite token %s", inviteTokenStr), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/BatchAssignRoleResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/BatchAssignRoleResolver.java index 8c14ffdb4c4033..efadc2288df97d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/BatchAssignRoleResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/BatchAssignRoleResolver.java @@ -7,6 +7,7 @@ import com.datahub.authorization.role.RoleService; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.BatchAssignRoleInput; import graphql.schema.DataFetcher; @@ -35,7 +36,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final List actors = input.getActors(); final Authentication authentication = context.getAuthentication(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final Urn roleUrn = roleUrnStr == null ? null : Urn.createFromString(roleUrnStr); @@ -45,6 +46,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to perform update against input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/CreateInviteTokenResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/CreateInviteTokenResolver.java index 3bf11b9febc638..934a9d66fe2091 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/CreateInviteTokenResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/CreateInviteTokenResolver.java @@ -6,6 +6,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authentication.invite.InviteTokenService; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateInviteTokenInput; import com.linkedin.datahub.graphql.generated.InviteToken; @@ -34,7 +35,7 @@ public CompletableFuture get(final DataFetchingEnvironment environm final String roleUrnStr = input.getRoleUrn(); final Authentication authentication = context.getAuthentication(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { return new InviteToken( @@ -44,6 +45,8 @@ public CompletableFuture get(final DataFetchingEnvironment environm throw new RuntimeException( String.format("Failed to create invite token for role %s", roleUrnStr), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/GetInviteTokenResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/GetInviteTokenResolver.java index 039a1730e7e67a..20477e75699163 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/GetInviteTokenResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/GetInviteTokenResolver.java @@ -6,6 +6,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authentication.invite.InviteTokenService; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.GetInviteTokenInput; import com.linkedin.datahub.graphql.generated.InviteToken; @@ -34,7 +35,7 @@ public CompletableFuture get(final DataFetchingEnvironment environm final String roleUrnStr = input.getRoleUrn(); final Authentication authentication = context.getAuthentication(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { return new InviteToken( @@ -44,6 +45,8 @@ public CompletableFuture get(final DataFetchingEnvironment environm throw new RuntimeException( String.format("Failed to get invite token for role %s", roleUrnStr), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/ListRolesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/ListRolesResolver.java index b3c5e5485c951c..813753289a7580 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/ListRolesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/role/ListRolesResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.DataHubRole; import com.linkedin.datahub.graphql.generated.ListRolesInput; import com.linkedin.datahub.graphql.generated.ListRolesResult; @@ -47,7 +48,7 @@ public CompletableFuture get(final DataFetchingEnvironment envi final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); final String query = input.getQuery() == null ? DEFAULT_QUERY : input.getQuery(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { // First, get all role Urns. @@ -80,7 +81,9 @@ public CompletableFuture get(final DataFetchingEnvironment envi } catch (Exception e) { throw new RuntimeException("Failed to list roles", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private static List mapEntitiesToRoles( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java index 783789c738de07..10a09b95bfd6ea 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AggregateAcrossEntitiesInput; import com.linkedin.datahub.graphql.generated.AggregateResults; import com.linkedin.datahub.graphql.resolvers.ResolverUtils; @@ -52,7 +53,7 @@ public CompletableFuture get(DataFetchingEnvironment environme // escape forward slash since it is a reserved character in Elasticsearch final String sanitizedQuery = ResolverUtils.escapeForwardSlash(input.getQuery()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final DataHubViewInfo maybeResolvedView = (input.getViewUrn() != null) @@ -106,7 +107,9 @@ public CompletableFuture get(DataFetchingEnvironment environme input.getTypes(), input.getQuery(), input.getOrFilters()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } static AggregateResults mapAggregateResults( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java index 235f5f8d278995..13861c94ba3368 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java @@ -3,6 +3,7 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; import static org.apache.commons.lang3.StringUtils.isBlank; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.ValidationException; import com.linkedin.datahub.graphql.generated.AutoCompleteInput; import com.linkedin.datahub.graphql.generated.AutoCompleteResults; @@ -50,7 +51,7 @@ public CompletableFuture get(DataFetchingEnvironment enviro final Filter filter = ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); final int limit = input.getLimit() != null ? input.getLimit() : DEFAULT_LIMIT; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _logger.debug( @@ -89,6 +90,8 @@ public CompletableFuture get(DataFetchingEnvironment enviro input.getLimit()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java index 9cd860781c0d68..c849e3ad3f68c7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.search; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AutoCompleteMultipleInput; import com.linkedin.datahub.graphql.generated.AutoCompleteMultipleResults; import com.linkedin.datahub.graphql.generated.AutoCompleteResultForEntity; @@ -37,7 +38,7 @@ public static CompletableFuture batchGetAutocomplet entities.stream() .map( entity -> - CompletableFuture.supplyAsync( + GraphQLConcurrencyUtils.supplyAsync( () -> { final Filter filter = ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); @@ -72,7 +73,9 @@ public static CompletableFuture batchGetAutocomplet return new AutoCompleteResultForEntity( entity.type(), Collections.emptyList(), Collections.emptyList()); } - })) + }, + AutocompleteUtils.class.getSimpleName(), + "batchGetAutocompleteResults")) .collect(Collectors.toList()); return CompletableFuture.allOf(autoCompletesFuture.toArray(new CompletableFuture[0])) .thenApplyAsync( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java index 06f05cb73000ca..a61d9111321ca7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.GetQuickFiltersInput; import com.linkedin.datahub.graphql.generated.GetQuickFiltersResult; @@ -53,7 +54,7 @@ public CompletableFuture get(final DataFetchingEnvironmen final GetQuickFiltersInput input = bindArgument(environment.getArgument("input"), GetQuickFiltersInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final GetQuickFiltersResult result = new GetQuickFiltersResult(); final List quickFilters = new ArrayList<>(); @@ -73,7 +74,9 @@ public CompletableFuture get(final DataFetchingEnvironmen result.setQuickFilters(quickFilters); return result; - }); + }, + this.getClass().getSimpleName(), + "get"); } /** diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java index 036e1bb6717f7b..77eef1b9a25c69 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.ScrollAcrossEntitiesInput; import com.linkedin.datahub.graphql.generated.ScrollResults; @@ -61,7 +62,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) @Nullable final String scrollId = input.getScrollId(); final int count = input.getCount() != null ? input.getCount() : DEFAULT_COUNT; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final DataHubViewInfo maybeResolvedView = (input.getViewUrn() != null) @@ -123,6 +124,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) input.getTypes(), input.getQuery(), input.getOrFilters(), scrollId, count), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java index f5ce7f82573554..addd217d687241 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AndFilterInput; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FacetFilterInput; @@ -91,7 +92,7 @@ public CompletableFuture get(DataFetchingEnvironment com.linkedin.metadata.graph.LineageDirection resolvedDirection = com.linkedin.metadata.graph.LineageDirection.valueOf(lineageDirection.toString()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { log.debug( @@ -156,6 +157,8 @@ public CompletableFuture get(DataFetchingEnvironment count), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java index 88e5dd0a94f0ba..a9da1c40554345 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.SearchAcrossEntitiesInput; import com.linkedin.datahub.graphql.generated.SearchResults; import com.linkedin.datahub.graphql.resolvers.ResolverUtils; @@ -47,7 +48,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) final int start = input.getStart() != null ? input.getStart() : DEFAULT_START; final int count = input.getCount() != null ? input.getCount() : DEFAULT_COUNT; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final DataHubViewInfo maybeResolvedView = (input.getViewUrn() != null) @@ -112,6 +113,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) input.getTypes(), input.getQuery(), input.getOrFilters(), start, count), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java index 8df6c241f2965f..238f2375ee207d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java @@ -7,6 +7,7 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AndFilterInput; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FacetFilterInput; @@ -123,7 +124,7 @@ public CompletableFuture get(DataFetchingEnvironment com.linkedin.metadata.graph.LineageDirection resolvedDirection = com.linkedin.metadata.graph.LineageDirection.valueOf(lineageDirection.toString()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { log.debug( @@ -190,6 +191,8 @@ public CompletableFuture get(DataFetchingEnvironment } finally { log.debug("Returning from search across lineage resolver"); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java index ed9838b7074c71..35586ea29571c8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java @@ -5,6 +5,7 @@ import static com.linkedin.metadata.search.utils.SearchUtils.applyDefaultSearchFlags; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.SearchInput; import com.linkedin.datahub.graphql.generated.SearchResults; import com.linkedin.datahub.graphql.resolvers.ResolverUtils; @@ -65,7 +66,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) searchFlags = applyDefaultSearchFlags(null, sanitizedQuery, SEARCH_RESOLVER_DEFAULTS); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { log.debug( @@ -111,6 +112,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) searchFlags), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/user/UpdateCorpUserViewsSettingsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/user/UpdateCorpUserViewsSettingsResolver.java index dc38679d140912..3d9488a954af1d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/user/UpdateCorpUserViewsSettingsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/user/UpdateCorpUserViewsSettingsResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.UpdateCorpUserViewsSettingsInput; import com.linkedin.identity.CorpUserAppearanceSettings; import com.linkedin.identity.CorpUserSettings; @@ -32,7 +33,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final UpdateCorpUserViewsSettingsInput input = bindArgument(environment.getArgument("input"), UpdateCorpUserViewsSettingsInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { @@ -65,7 +66,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw input.toString()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private static void updateCorpUserSettings( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/view/GlobalViewsSettingsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/view/GlobalViewsSettingsResolver.java index a9736f15e79f7e..079799a269b01e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/view/GlobalViewsSettingsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/view/GlobalViewsSettingsResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.settings.view; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.GlobalViewsSettings; import com.linkedin.metadata.service.SettingsService; import com.linkedin.settings.global.GlobalSettingsInfo; @@ -30,7 +31,7 @@ public GlobalViewsSettingsResolver(final SettingsService settingsService) { public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final GlobalSettingsInfo globalSettings = @@ -41,7 +42,9 @@ public CompletableFuture get(final DataFetchingEnvironment } catch (Exception e) { throw new RuntimeException("Failed to retrieve Global Views Settings", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private static GlobalViewsSettings mapGlobalViewsSettings( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/view/UpdateGlobalViewsSettingsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/view/UpdateGlobalViewsSettingsResolver.java index d4b6db3f5ae993..9ef6f9474fa510 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/view/UpdateGlobalViewsSettingsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/settings/view/UpdateGlobalViewsSettingsResolver.java @@ -6,6 +6,7 @@ import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.UpdateGlobalViewsSettingsInput; import com.linkedin.metadata.service.SettingsService; @@ -37,7 +38,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final UpdateGlobalViewsSettingsInput input = bindArgument(environment.getArgument("input"), UpdateGlobalViewsSettingsInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (AuthorizationUtils.canManageGlobalViews(context)) { try { @@ -68,7 +69,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } private static void updateViewsSettings( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/step/BatchGetStepStatesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/step/BatchGetStepStatesResolver.java index 8a4b1c2ff0d380..afcc28e1fb2784 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/step/BatchGetStepStatesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/step/BatchGetStepStatesResolver.java @@ -8,6 +8,7 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.BatchGetStepStatesInput; import com.linkedin.datahub.graphql.generated.BatchGetStepStatesResult; import com.linkedin.datahub.graphql.generated.StepStateResult; @@ -46,7 +47,7 @@ public CompletableFuture get( final BatchGetStepStatesInput input = bindArgument(environment.getArgument("input"), BatchGetStepStatesInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { Map urnsToIdsMap; Set urns; @@ -84,7 +85,9 @@ public CompletableFuture get( final BatchGetStepStatesResult result = new BatchGetStepStatesResult(); result.setResults(results); return result; - }); + }, + this.getClass().getSimpleName(), + "get"); } @Nonnull diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/step/BatchUpdateStepStatesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/step/BatchUpdateStepStatesResolver.java index ea04f0fd2af34a..5b8481291f1054 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/step/BatchUpdateStepStatesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/step/BatchUpdateStepStatesResolver.java @@ -10,6 +10,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.StringMap; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.BatchUpdateStepStatesInput; import com.linkedin.datahub.graphql.generated.BatchUpdateStepStatesResult; import com.linkedin.datahub.graphql.generated.StepStateInput; @@ -47,7 +48,7 @@ public CompletableFuture get( final List states = input.getStates(); final String actorUrnStr = authentication.getActor().toUrnStr(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final Urn actorUrn = UrnUtils.getUrn(actorUrnStr); final AuditStamp auditStamp = @@ -62,7 +63,9 @@ public CompletableFuture get( final BatchUpdateStepStatesResult result = new BatchUpdateStepStatesResult(); result.setResults(results); return result; - }); + }, + this.getClass().getSimpleName(), + "get"); } private UpdateStepStateResult buildUpdateStepStateResult( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpsertStructuredPropertiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpsertStructuredPropertiesResolver.java index 95c3bebeeecf83..5f69512e5946b0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpsertStructuredPropertiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpsertStructuredPropertiesResolver.java @@ -9,6 +9,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.PropertyValueInput; import com.linkedin.datahub.graphql.generated.UpsertStructuredPropertiesInput; @@ -60,7 +61,7 @@ public CompletableFuture updateMap.put(param.getStructuredPropertyUrn(), param.getValues())); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { // check authorization first @@ -104,7 +105,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws final CreateTagInput input = bindArgument(environment.getArgument("input"), CreateTagInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!AuthorizationUtils.canCreateTags(context)) { throw new AuthorizationException( @@ -84,7 +85,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws "Failed to create Tag with id: %s, name: %s", input.getId(), input.getName()), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private TagProperties mapTagProperties(final CreateTagInput input) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/DeleteTagResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/DeleteTagResolver.java index b29ca8335247fa..2dabae55dd3e7b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/DeleteTagResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/DeleteTagResolver.java @@ -4,6 +4,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.entity.client.EntityClient; import graphql.schema.DataFetcher; @@ -28,7 +29,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final String tagUrn = environment.getArgument("urn"); final Urn urn = Urn.createFromString(tagUrn); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (AuthorizationUtils.canManageTags(context) || AuthorizationUtils.canDeleteEntity(UrnUtils.getUrn(tagUrn), context)) { @@ -57,6 +58,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/SetTagColorResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/SetTagColorResolver.java index d5e3c423f0d791..7a059ed9a1aeda 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/SetTagColorResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/SetTagColorResolver.java @@ -10,6 +10,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.authorization.PoliciesConfig; @@ -43,7 +44,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final Urn tagUrn = Urn.createFromString(environment.getArgument("urn")); final String colorHex = environment.getArgument("colorHex"); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { // If user is not authorized, then throw exception. @@ -86,7 +87,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new RuntimeException( String.format("Failed to set color for Tag with urn %s", tagUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } public static boolean isAuthorizedToSetTagColor(@Nonnull QueryContext context, Urn entityUrn) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/CreateTestResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/CreateTestResolver.java index 88e8f3e530410a..25fab6281f9483 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/CreateTestResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/CreateTestResolver.java @@ -8,6 +8,7 @@ import com.datahub.authentication.Authentication; import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateTestInput; import com.linkedin.entity.client.EntityClient; @@ -36,7 +37,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final CreateTestInput input = bindArgument(environment.getArgument("input"), CreateTestInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (canManageTests(context)) { @@ -71,7 +72,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } private static TestInfo mapCreateTestInput(final CreateTestInput input) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/DeleteTestResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/DeleteTestResolver.java index fab6bb3c412cd5..d9a7ed3f90ebee 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/DeleteTestResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/DeleteTestResolver.java @@ -4,6 +4,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.entity.client.EntityClient; import graphql.schema.DataFetcher; @@ -28,7 +29,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final QueryContext context = environment.getContext(); final String testUrn = environment.getArgument("urn"); final Urn urn = Urn.createFromString(testUrn); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (canManageTests(context)) { try { @@ -41,6 +42,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/ListTestsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/ListTestsResolver.java index 22c3b87712a347..9be0bcb0e18397 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/ListTestsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/ListTestsResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.ListTestsInput; @@ -43,7 +44,7 @@ public CompletableFuture get(final DataFetchingEnvironment envi final QueryContext context = environment.getContext(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (canManageTests(context) || canViewTests(context)) { final ListTestsInput input = @@ -78,7 +79,9 @@ public CompletableFuture get(final DataFetchingEnvironment envi } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } // This method maps urns returned from the list endpoint into Partial Test objects which will be diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/TestResultsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/TestResultsResolver.java index 0a834e8a665b17..df3566cf0efab9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/TestResultsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/TestResultsResolver.java @@ -3,6 +3,7 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.Test; import com.linkedin.datahub.graphql.generated.TestResult; @@ -35,7 +36,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) t final QueryContext context = environment.getContext(); final Urn entityUrn = Urn.createFromString(((Entity) environment.getSource()).getUrn()); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final com.linkedin.test.TestResults gmsTestResults = getTestResults(entityUrn, context); @@ -47,7 +48,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) t testResults.setPassing(mapTestResults(gmsTestResults.getPassing())); testResults.setFailing(mapTestResults(gmsTestResults.getFailing())); return testResults; - }); + }, + this.getClass().getSimpleName(), + "get"); } @Nullable diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/UpdateTestResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/UpdateTestResolver.java index 3c469b2d4f3f3f..20973170f5686a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/UpdateTestResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/UpdateTestResolver.java @@ -9,6 +9,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.UpdateTestInput; import com.linkedin.entity.client.EntityClient; @@ -32,7 +33,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final QueryContext context = environment.getContext(); final Authentication authentication = context.getAuthentication(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (canManageTests(context)) { @@ -55,7 +56,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } private static TestInfo mapUpdateTestInput(final UpdateTestInput input) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/timeline/GetSchemaBlameResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/timeline/GetSchemaBlameResolver.java index ea234280ed6c26..fd1179f91f3bdb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/timeline/GetSchemaBlameResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/timeline/GetSchemaBlameResolver.java @@ -3,6 +3,7 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.GetSchemaBlameInput; import com.linkedin.datahub.graphql.generated.GetSchemaBlameResult; import com.linkedin.datahub.graphql.types.timeline.mappers.SchemaBlameMapper; @@ -42,7 +43,7 @@ public CompletableFuture get(final DataFetchingEnvironment final long endTime = 0; final String version = input.getVersion() == null ? null : input.getVersion(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final Set changeCategorySet = @@ -63,6 +64,8 @@ public CompletableFuture get(final DataFetchingEnvironment log.error("Failed to list schema blame data", e); return null; } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/timeline/GetSchemaVersionListResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/timeline/GetSchemaVersionListResolver.java index 5063dbbf7ccf32..28f3c544f9d16f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/timeline/GetSchemaVersionListResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/timeline/GetSchemaVersionListResolver.java @@ -3,6 +3,7 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.GetSchemaVersionListInput; import com.linkedin.datahub.graphql.generated.GetSchemaVersionListResult; import com.linkedin.datahub.graphql.types.timeline.mappers.SchemaVersionListMapper; @@ -40,7 +41,7 @@ public CompletableFuture get( final long startTime = 0; final long endTime = 0; - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final Set changeCategorySet = new HashSet<>(); @@ -61,6 +62,8 @@ public CompletableFuture get( log.error("Failed to list schema blame data", e); return null; } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/CreateNativeUserResetTokenResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/CreateNativeUserResetTokenResolver.java index a8c0fb7d376ae9..5ffab37c3408ea 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/CreateNativeUserResetTokenResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/CreateNativeUserResetTokenResolver.java @@ -5,6 +5,7 @@ import com.datahub.authentication.user.NativeUserService; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateNativeUserResetTokenInput; import com.linkedin.datahub.graphql.generated.ResetToken; @@ -40,7 +41,7 @@ public CompletableFuture get(final DataFetchingEnvironment environme "Unauthorized to perform this action. Please contact your DataHub administrator."); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { String resetToken = @@ -52,6 +53,8 @@ public CompletableFuture get(final DataFetchingEnvironment environme String.format( "Failed to generate password reset token for user: %s", userUrnString)); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/ListUsersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/ListUsersResolver.java index 722b773d426321..2445ff3130ba93 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/ListUsersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/ListUsersResolver.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CorpUser; import com.linkedin.datahub.graphql.generated.ListUsersInput; @@ -51,7 +52,7 @@ public CompletableFuture get(final DataFetchingEnvironment envi final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); final String query = input.getQuery() == null ? DEFAULT_QUERY : input.getQuery(); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { // First, get all policy Urns. @@ -87,7 +88,9 @@ public CompletableFuture get(final DataFetchingEnvironment envi } catch (Exception e) { throw new RuntimeException("Failed to list users", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/RemoveUserResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/RemoveUserResolver.java index f212b43807dc13..7fa8bf7333f5fd 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/RemoveUserResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/RemoveUserResolver.java @@ -3,6 +3,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.entity.client.EntityClient; import graphql.schema.DataFetcher; @@ -27,7 +28,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) if (AuthorizationUtils.canManageUsersAndGroups(context)) { final String userUrn = environment.getArgument("urn"); final Urn urn = Urn.createFromString(userUrn); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _entityClient.deleteEntity(context.getOperationContext(), urn); @@ -51,7 +52,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new RuntimeException( String.format("Failed to perform delete against user with urn %s", userUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/UpdateUserStatusResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/UpdateUserStatusResolver.java index 478bc087115e1c..88130ade61a676 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/UpdateUserStatusResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/UpdateUserStatusResolver.java @@ -8,6 +8,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CorpUserStatus; import com.linkedin.entity.client.EntityClient; @@ -45,7 +46,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) .setTime(System.currentTimeMillis()) .setActor(Urn.createFromString(context.getActorUrn()))); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { final MetadataChangeProposal proposal = @@ -56,7 +57,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new RuntimeException( String.format("Failed to update user status for urn", userUrn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolver.java index 332e92ba7edd88..49c3467adb58bc 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolver.java @@ -4,6 +4,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateViewInput; import com.linkedin.datahub.graphql.generated.DataHubView; @@ -37,7 +38,7 @@ public CompletableFuture get(final DataFetchingEnvironment environm final CreateViewInput input = bindArgument(environment.getArgument("input"), CreateViewInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { if (ViewUtils.canCreateView( DataHubViewType.valueOf(input.getViewType().toString()), context)) { @@ -58,7 +59,9 @@ public CompletableFuture get(final DataFetchingEnvironment environm } throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); - }); + }, + this.getClass().getSimpleName(), + "get"); } private DataHubView createView(@Nonnull final Urn urn, @Nonnull final CreateViewInput input) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/DeleteViewResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/DeleteViewResolver.java index a10e0a5c7b962f..974b6f37203354 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/DeleteViewResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/DeleteViewResolver.java @@ -2,6 +2,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.metadata.service.ViewService; import graphql.schema.DataFetcher; @@ -27,7 +28,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final QueryContext context = environment.getContext(); final String urnStr = environment.getArgument("urn"); final Urn urn = Urn.createFromString(urnStr); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { if (ViewUtils.canUpdateView(_viewService, urn, context)) { @@ -43,6 +44,8 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new RuntimeException( String.format("Failed to perform delete against View with urn %s", urn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java index 80d33b84b4c763..28b8fe50b70d65 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AndFilterInput; import com.linkedin.datahub.graphql.generated.DataHubView; import com.linkedin.datahub.graphql.generated.DataHubViewType; @@ -57,7 +58,7 @@ public CompletableFuture get(final DataFetchingEnvironment envi final ListGlobalViewsInput input = bindArgument(environment.getArgument("input"), ListGlobalViewsInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final Integer start = input.getStart() == null ? DEFAULT_START : input.getStart(); final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); @@ -88,7 +89,9 @@ public CompletableFuture get(final DataFetchingEnvironment envi } catch (Exception e) { throw new RuntimeException("Failed to list global Views", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } // This method maps urns returned from the list endpoint into Partial View objects which will be diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java index fd029f9d6d3b2c..218bedcd0beffb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AndFilterInput; import com.linkedin.datahub.graphql.generated.DataHubView; import com.linkedin.datahub.graphql.generated.EntityType; @@ -58,7 +59,7 @@ public CompletableFuture get(final DataFetchingEnvironment envi final ListMyViewsInput input = bindArgument(environment.getArgument("input"), ListMyViewsInput.class); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { final Integer start = input.getStart() == null ? DEFAULT_START : input.getStart(); final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); @@ -91,7 +92,9 @@ public CompletableFuture get(final DataFetchingEnvironment envi } catch (Exception e) { throw new RuntimeException("Failed to list Views", e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } // This method maps urns returned from the list endpoint into Partial View objects which will be diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolver.java index f05195655177c8..9ab5efe83105a0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolver.java @@ -5,6 +5,7 @@ import com.datahub.authentication.Authentication; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.DataHubView; import com.linkedin.datahub.graphql.generated.UpdateViewInput; @@ -38,7 +39,7 @@ public CompletableFuture get(final DataFetchingEnvironment environm bindArgument(environment.getArgument("input"), UpdateViewInput.class); final Urn urn = Urn.createFromString(urnStr); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { if (ViewUtils.canUpdateView(_viewService, urn, context)) { @@ -60,7 +61,9 @@ public CompletableFuture get(final DataFetchingEnvironment environm throw new RuntimeException( String.format("Failed to perform update against View with urn %s", urn), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } private DataHubView getView( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapper.java index ca13792b1e92b2..1e7fac2edbc9a9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapper.java @@ -2,6 +2,8 @@ import static com.linkedin.metadata.Constants.GLOBAL_TAGS_ASPECT_NAME; +import com.linkedin.assertion.AssertionAction; +import com.linkedin.assertion.AssertionActions; import com.linkedin.assertion.AssertionInfo; import com.linkedin.common.DataPlatformInstance; import com.linkedin.common.GlobalTags; @@ -10,24 +12,40 @@ import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.Assertion; +import com.linkedin.datahub.graphql.generated.AssertionActionType; +import com.linkedin.datahub.graphql.generated.AssertionSource; +import com.linkedin.datahub.graphql.generated.AssertionSourceType; import com.linkedin.datahub.graphql.generated.AssertionStdAggregation; import com.linkedin.datahub.graphql.generated.AssertionStdOperator; import com.linkedin.datahub.graphql.generated.AssertionStdParameter; import com.linkedin.datahub.graphql.generated.AssertionStdParameterType; import com.linkedin.datahub.graphql.generated.AssertionStdParameters; import com.linkedin.datahub.graphql.generated.AssertionType; +import com.linkedin.datahub.graphql.generated.AuditStamp; import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DatasetAssertionInfo; import com.linkedin.datahub.graphql.generated.DatasetAssertionScope; +import com.linkedin.datahub.graphql.generated.DateInterval; import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.FieldAssertionInfo; +import com.linkedin.datahub.graphql.generated.FixedIntervalSchedule; +import com.linkedin.datahub.graphql.generated.FreshnessAssertionInfo; +import com.linkedin.datahub.graphql.generated.SchemaAssertionCompatibility; +import com.linkedin.datahub.graphql.generated.SchemaAssertionField; +import com.linkedin.datahub.graphql.generated.SchemaAssertionInfo; import com.linkedin.datahub.graphql.generated.SchemaFieldRef; +import com.linkedin.datahub.graphql.generated.SqlAssertionInfo; +import com.linkedin.datahub.graphql.generated.VolumeAssertionInfo; import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; +import com.linkedin.datahub.graphql.types.dataset.mappers.SchemaFieldMapper; +import com.linkedin.datahub.graphql.types.dataset.mappers.SchemaMetadataMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.metadata.Constants; +import com.linkedin.schema.SchemaField; import java.util.Collections; import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -48,6 +66,14 @@ public static Assertion map(@Nullable QueryContext context, final EntityResponse result.setInfo( mapAssertionInfo(context, new AssertionInfo(envelopedAssertionInfo.getValue().data()))); } + + final EnvelopedAspect envelopedAssertionActions = + aspects.get(Constants.ASSERTION_ACTIONS_ASPECT_NAME); + if (envelopedAssertionActions != null) { + result.setActions( + mapAssertionActions(new AssertionActions(envelopedAssertionActions.getValue().data()))); + } + final EnvelopedAspect envelopedPlatformInstance = aspects.get(Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME); if (envelopedPlatformInstance != null) { @@ -83,20 +109,93 @@ private static com.linkedin.datahub.graphql.generated.Status mapStatus(Status st return result; } - private static com.linkedin.datahub.graphql.generated.AssertionInfo mapAssertionInfo( + public static com.linkedin.datahub.graphql.generated.AssertionInfo mapAssertionInfo( @Nullable QueryContext context, final AssertionInfo gmsAssertionInfo) { final com.linkedin.datahub.graphql.generated.AssertionInfo assertionInfo = new com.linkedin.datahub.graphql.generated.AssertionInfo(); assertionInfo.setType(AssertionType.valueOf(gmsAssertionInfo.getType().name())); + + if (gmsAssertionInfo.hasLastUpdated()) { + assertionInfo.setLastUpdated( + new AuditStamp( + gmsAssertionInfo.getLastUpdated().getTime(), + gmsAssertionInfo.getLastUpdated().getActor().toString())); + } if (gmsAssertionInfo.hasDatasetAssertion()) { DatasetAssertionInfo datasetAssertion = mapDatasetAssertionInfo(context, gmsAssertionInfo.getDatasetAssertion()); assertionInfo.setDatasetAssertion(datasetAssertion); } - assertionInfo.setDescription(gmsAssertionInfo.getDescription()); + // Description + if (gmsAssertionInfo.hasDescription()) { + assertionInfo.setDescription(gmsAssertionInfo.getDescription()); + } + // FRESHNESS Assertions + if (gmsAssertionInfo.hasFreshnessAssertion()) { + FreshnessAssertionInfo freshnessAssertionInfo = + FreshnessAssertionMapper.mapFreshnessAssertionInfo( + context, gmsAssertionInfo.getFreshnessAssertion()); + assertionInfo.setFreshnessAssertion(freshnessAssertionInfo); + } + // VOLUME Assertions + if (gmsAssertionInfo.hasVolumeAssertion()) { + VolumeAssertionInfo volumeAssertionInfo = + VolumeAssertionMapper.mapVolumeAssertionInfo( + context, gmsAssertionInfo.getVolumeAssertion()); + assertionInfo.setVolumeAssertion(volumeAssertionInfo); + } + // SQL Assertions + if (gmsAssertionInfo.hasSqlAssertion()) { + SqlAssertionInfo sqlAssertionInfo = + SqlAssertionMapper.mapSqlAssertionInfo(gmsAssertionInfo.getSqlAssertion()); + assertionInfo.setSqlAssertion(sqlAssertionInfo); + } + // FIELD Assertions + if (gmsAssertionInfo.hasFieldAssertion()) { + FieldAssertionInfo fieldAssertionInfo = + FieldAssertionMapper.mapFieldAssertionInfo(context, gmsAssertionInfo.getFieldAssertion()); + assertionInfo.setFieldAssertion(fieldAssertionInfo); + } + // SCHEMA Assertions + if (gmsAssertionInfo.hasSchemaAssertion()) { + SchemaAssertionInfo schemaAssertionInfo = + mapSchemaAssertionInfo(context, gmsAssertionInfo.getSchemaAssertion()); + assertionInfo.setSchemaAssertion(schemaAssertionInfo); + } + // Source Type + if (gmsAssertionInfo.hasSource()) { + assertionInfo.setSource(mapSource(gmsAssertionInfo.getSource())); + } return assertionInfo; } + private static com.linkedin.datahub.graphql.generated.AssertionActions mapAssertionActions( + final AssertionActions gmsAssertionActions) { + final com.linkedin.datahub.graphql.generated.AssertionActions result = + new com.linkedin.datahub.graphql.generated.AssertionActions(); + if (gmsAssertionActions.hasOnFailure()) { + result.setOnFailure( + gmsAssertionActions.getOnFailure().stream() + .map(AssertionMapper::mapAssertionAction) + .collect(Collectors.toList())); + } + if (gmsAssertionActions.hasOnSuccess()) { + result.setOnSuccess( + gmsAssertionActions.getOnSuccess().stream() + .map(AssertionMapper::mapAssertionAction) + .collect(Collectors.toList())); + } + return result; + } + + private static com.linkedin.datahub.graphql.generated.AssertionAction mapAssertionAction( + final AssertionAction gmsAssertionAction) { + final com.linkedin.datahub.graphql.generated.AssertionAction result = + new com.linkedin.datahub.graphql.generated.AssertionAction(); + result.setType(AssertionActionType.valueOf(gmsAssertionAction.getType().toString())); + return result; + } + private static DatasetAssertionInfo mapDatasetAssertionInfo( @Nullable QueryContext context, final com.linkedin.assertion.DatasetAssertionInfo gmsDatasetAssertion) { @@ -152,7 +251,7 @@ private static SchemaFieldRef mapDatasetSchemaField(final Urn schemaFieldUrn) { return new SchemaFieldRef(schemaFieldUrn.toString(), schemaFieldUrn.getEntityKey().get(1)); } - private static AssertionStdParameters mapParameters( + protected static AssertionStdParameters mapParameters( final com.linkedin.assertion.AssertionStdParameters params) { final AssertionStdParameters result = new AssertionStdParameters(); if (params.hasValue()) { @@ -175,5 +274,61 @@ private static AssertionStdParameter mapParameter( return result; } - private AssertionMapper() {} + protected static FixedIntervalSchedule mapFixedIntervalSchedule( + com.linkedin.assertion.FixedIntervalSchedule gmsFixedIntervalSchedule) { + FixedIntervalSchedule fixedIntervalSchedule = new FixedIntervalSchedule(); + fixedIntervalSchedule.setUnit(DateInterval.valueOf(gmsFixedIntervalSchedule.getUnit().name())); + fixedIntervalSchedule.setMultiple(gmsFixedIntervalSchedule.getMultiple()); + return fixedIntervalSchedule; + } + + private static AssertionSource mapSource(final com.linkedin.assertion.AssertionSource gmsSource) { + AssertionSource result = new AssertionSource(); + result.setType(AssertionSourceType.valueOf(gmsSource.getType().toString())); + if (gmsSource.hasCreated()) { + result.setCreated( + new AuditStamp( + gmsSource.getCreated().getTime(), gmsSource.getCreated().getActor().toString())); + } + return result; + } + + protected static com.linkedin.datahub.graphql.generated.SchemaFieldSpec mapSchemaFieldSpec( + final com.linkedin.schema.SchemaFieldSpec gmsField) { + final com.linkedin.datahub.graphql.generated.SchemaFieldSpec result = + new com.linkedin.datahub.graphql.generated.SchemaFieldSpec(); + result.setPath(gmsField.getPath()); + result.setType(gmsField.getType()); + result.setNativeType(gmsField.getNativeType()); + return result; + } + + private static SchemaAssertionInfo mapSchemaAssertionInfo( + @Nullable final QueryContext context, + final com.linkedin.assertion.SchemaAssertionInfo gmsSchemaAssertionInfo) { + SchemaAssertionInfo result = new SchemaAssertionInfo(); + result.setCompatibility( + SchemaAssertionCompatibility.valueOf(gmsSchemaAssertionInfo.getCompatibility().name())); + result.setEntityUrn(gmsSchemaAssertionInfo.getEntity().toString()); + result.setSchema( + SchemaMetadataMapper.INSTANCE.apply( + context, gmsSchemaAssertionInfo.getSchema(), gmsSchemaAssertionInfo.getEntity(), 0L)); + result.setFields( + gmsSchemaAssertionInfo.getSchema().getFields().stream() + .map(AssertionMapper::mapSchemaField) + .collect(Collectors.toList())); + return result; + } + + private static SchemaAssertionField mapSchemaField(final SchemaField gmsField) { + SchemaAssertionField result = new SchemaAssertionField(); + result.setPath(gmsField.getFieldPath()); + result.setType(new SchemaFieldMapper().mapSchemaFieldDataType(gmsField.getType())); + if (gmsField.hasNativeDataType()) { + result.setNativeType(gmsField.getNativeDataType()); + } + return result; + } + + protected AssertionMapper() {} } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionType.java index 0cf74439132fe8..9c90478f03dc5f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionType.java @@ -28,8 +28,8 @@ public class AssertionType Constants.ASSERTION_KEY_ASPECT_NAME, Constants.ASSERTION_INFO_ASPECT_NAME, Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME, - Constants.GLOBAL_TAGS_ASPECT_NAME); - + Constants.GLOBAL_TAGS_ASPECT_NAME, + Constants.ASSERTION_ACTIONS_ASPECT_NAME); private final EntityClient _entityClient; public AssertionType(final EntityClient entityClient) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/FieldAssertionMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/FieldAssertionMapper.java new file mode 100644 index 00000000000000..82d041a464c3fb --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/FieldAssertionMapper.java @@ -0,0 +1,92 @@ +package com.linkedin.datahub.graphql.types.assertion; + +import com.linkedin.assertion.FieldAssertionInfo; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.AssertionStdOperator; +import com.linkedin.datahub.graphql.generated.FieldAssertionType; +import com.linkedin.datahub.graphql.generated.FieldMetricType; +import com.linkedin.datahub.graphql.generated.FieldTransformType; +import com.linkedin.datahub.graphql.generated.FieldValuesFailThresholdType; +import com.linkedin.datahub.graphql.types.dataset.mappers.DatasetFilterMapper; +import javax.annotation.Nullable; + +public class FieldAssertionMapper extends AssertionMapper { + + public static com.linkedin.datahub.graphql.generated.FieldAssertionInfo mapFieldAssertionInfo( + @Nullable final QueryContext context, final FieldAssertionInfo gmsFieldAssertionInfo) { + final com.linkedin.datahub.graphql.generated.FieldAssertionInfo result = + new com.linkedin.datahub.graphql.generated.FieldAssertionInfo(); + result.setEntityUrn(gmsFieldAssertionInfo.getEntity().toString()); + result.setType(FieldAssertionType.valueOf(gmsFieldAssertionInfo.getType().name())); + if (gmsFieldAssertionInfo.hasFilter()) { + result.setFilter(DatasetFilterMapper.map(context, gmsFieldAssertionInfo.getFilter())); + } + if (gmsFieldAssertionInfo.hasFieldValuesAssertion()) { + result.setFieldValuesAssertion( + mapFieldValuesAssertion(gmsFieldAssertionInfo.getFieldValuesAssertion())); + } + if (gmsFieldAssertionInfo.hasFieldMetricAssertion()) { + result.setFieldMetricAssertion( + mapFieldMetricAssertion(gmsFieldAssertionInfo.getFieldMetricAssertion())); + } + return result; + } + + private static com.linkedin.datahub.graphql.generated.FieldValuesAssertion + mapFieldValuesAssertion( + final com.linkedin.assertion.FieldValuesAssertion gmsFieldValuesAssertion) { + final com.linkedin.datahub.graphql.generated.FieldValuesAssertion result = + new com.linkedin.datahub.graphql.generated.FieldValuesAssertion(); + result.setField(mapSchemaFieldSpec(gmsFieldValuesAssertion.getField())); + result.setOperator(AssertionStdOperator.valueOf(gmsFieldValuesAssertion.getOperator().name())); + result.setFailThreshold( + mapFieldValuesFailThreshold(gmsFieldValuesAssertion.getFailThreshold())); + result.setExcludeNulls(gmsFieldValuesAssertion.isExcludeNulls()); + + if (gmsFieldValuesAssertion.hasTransform()) { + result.setTransform(mapFieldTransform(gmsFieldValuesAssertion.getTransform())); + } + + if (gmsFieldValuesAssertion.hasParameters()) { + result.setParameters(mapParameters(gmsFieldValuesAssertion.getParameters())); + } + return result; + } + + private static com.linkedin.datahub.graphql.generated.FieldMetricAssertion + mapFieldMetricAssertion( + final com.linkedin.assertion.FieldMetricAssertion gmsFieldMetricAssertion) { + final com.linkedin.datahub.graphql.generated.FieldMetricAssertion result = + new com.linkedin.datahub.graphql.generated.FieldMetricAssertion(); + result.setField(mapSchemaFieldSpec(gmsFieldMetricAssertion.getField())); + result.setMetric(FieldMetricType.valueOf(gmsFieldMetricAssertion.getMetric().name())); + result.setOperator(AssertionStdOperator.valueOf(gmsFieldMetricAssertion.getOperator().name())); + + if (gmsFieldMetricAssertion.hasParameters()) { + result.setParameters(mapParameters(gmsFieldMetricAssertion.getParameters())); + } + + return result; + } + + private static com.linkedin.datahub.graphql.generated.FieldTransform mapFieldTransform( + final com.linkedin.assertion.FieldTransform gmsFieldTransform) { + final com.linkedin.datahub.graphql.generated.FieldTransform result = + new com.linkedin.datahub.graphql.generated.FieldTransform(); + result.setType(FieldTransformType.valueOf(gmsFieldTransform.getType().name())); + return result; + } + + private static com.linkedin.datahub.graphql.generated.FieldValuesFailThreshold + mapFieldValuesFailThreshold( + final com.linkedin.assertion.FieldValuesFailThreshold gmsFieldValuesFailThreshold) { + final com.linkedin.datahub.graphql.generated.FieldValuesFailThreshold result = + new com.linkedin.datahub.graphql.generated.FieldValuesFailThreshold(); + result.setType( + FieldValuesFailThresholdType.valueOf(gmsFieldValuesFailThreshold.getType().name())); + result.setValue(gmsFieldValuesFailThreshold.getValue()); + return result; + } + + private FieldAssertionMapper() {} +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/FreshnessAssertionMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/FreshnessAssertionMapper.java new file mode 100644 index 00000000000000..22e1c1d8bae9ea --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/FreshnessAssertionMapper.java @@ -0,0 +1,59 @@ +package com.linkedin.datahub.graphql.types.assertion; + +import com.linkedin.data.template.GetMode; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.FreshnessAssertionInfo; +import com.linkedin.datahub.graphql.generated.FreshnessAssertionSchedule; +import com.linkedin.datahub.graphql.generated.FreshnessAssertionScheduleType; +import com.linkedin.datahub.graphql.generated.FreshnessAssertionType; +import com.linkedin.datahub.graphql.generated.FreshnessCronSchedule; +import com.linkedin.datahub.graphql.types.dataset.mappers.DatasetFilterMapper; +import javax.annotation.Nullable; + +public class FreshnessAssertionMapper extends AssertionMapper { + + public static FreshnessAssertionInfo mapFreshnessAssertionInfo( + @Nullable final QueryContext context, + final com.linkedin.assertion.FreshnessAssertionInfo gmsFreshnessAssertionInfo) { + FreshnessAssertionInfo freshnessAssertionInfo = new FreshnessAssertionInfo(); + freshnessAssertionInfo.setEntityUrn(gmsFreshnessAssertionInfo.getEntity().toString()); + freshnessAssertionInfo.setType( + FreshnessAssertionType.valueOf(gmsFreshnessAssertionInfo.getType().name())); + if (gmsFreshnessAssertionInfo.hasSchedule()) { + freshnessAssertionInfo.setSchedule( + mapFreshnessAssertionSchedule(gmsFreshnessAssertionInfo.getSchedule())); + } + if (gmsFreshnessAssertionInfo.hasFilter()) { + freshnessAssertionInfo.setFilter( + DatasetFilterMapper.map(context, gmsFreshnessAssertionInfo.getFilter())); + } + return freshnessAssertionInfo; + } + + private static FreshnessCronSchedule mapFreshnessCronSchedule( + final com.linkedin.assertion.FreshnessCronSchedule gmsCronSchedule) { + FreshnessCronSchedule cronSchedule = new FreshnessCronSchedule(); + cronSchedule.setCron(gmsCronSchedule.getCron()); + cronSchedule.setTimezone(gmsCronSchedule.getTimezone()); + cronSchedule.setWindowStartOffsetMs(gmsCronSchedule.getWindowStartOffsetMs(GetMode.NULL)); + return cronSchedule; + } + + private static FreshnessAssertionSchedule mapFreshnessAssertionSchedule( + final com.linkedin.assertion.FreshnessAssertionSchedule gmsFreshnessAssertionSchedule) { + FreshnessAssertionSchedule freshnessAssertionSchedule = new FreshnessAssertionSchedule(); + freshnessAssertionSchedule.setType( + FreshnessAssertionScheduleType.valueOf(gmsFreshnessAssertionSchedule.getType().name())); + if (gmsFreshnessAssertionSchedule.hasCron()) { + freshnessAssertionSchedule.setCron( + mapFreshnessCronSchedule(gmsFreshnessAssertionSchedule.getCron())); + } + if (gmsFreshnessAssertionSchedule.hasFixedInterval()) { + freshnessAssertionSchedule.setFixedInterval( + mapFixedIntervalSchedule(gmsFreshnessAssertionSchedule.getFixedInterval())); + } + return freshnessAssertionSchedule; + } + + private FreshnessAssertionMapper() {} +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/SqlAssertionMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/SqlAssertionMapper.java new file mode 100644 index 00000000000000..e75d2221164d4d --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/SqlAssertionMapper.java @@ -0,0 +1,27 @@ +package com.linkedin.datahub.graphql.types.assertion; + +import com.linkedin.assertion.SqlAssertionInfo; +import com.linkedin.datahub.graphql.generated.AssertionStdOperator; +import com.linkedin.datahub.graphql.generated.AssertionValueChangeType; +import com.linkedin.datahub.graphql.generated.SqlAssertionType; + +public class SqlAssertionMapper extends AssertionMapper { + + public static com.linkedin.datahub.graphql.generated.SqlAssertionInfo mapSqlAssertionInfo( + final SqlAssertionInfo gmsSqlAssertionInfo) { + final com.linkedin.datahub.graphql.generated.SqlAssertionInfo result = + new com.linkedin.datahub.graphql.generated.SqlAssertionInfo(); + result.setEntityUrn(gmsSqlAssertionInfo.getEntity().toString()); + result.setType(SqlAssertionType.valueOf(gmsSqlAssertionInfo.getType().name())); + result.setStatement(gmsSqlAssertionInfo.getStatement()); + result.setOperator(AssertionStdOperator.valueOf(gmsSqlAssertionInfo.getOperator().name())); + result.setParameters(mapParameters(gmsSqlAssertionInfo.getParameters())); + if (gmsSqlAssertionInfo.hasChangeType()) { + result.setChangeType( + AssertionValueChangeType.valueOf(gmsSqlAssertionInfo.getChangeType().name())); + } + return result; + } + + private SqlAssertionMapper() {} +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/VolumeAssertionMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/VolumeAssertionMapper.java new file mode 100644 index 00000000000000..3d0294c45e5205 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/VolumeAssertionMapper.java @@ -0,0 +1,115 @@ +package com.linkedin.datahub.graphql.types.assertion; + +import com.linkedin.assertion.VolumeAssertionInfo; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.AssertionStdOperator; +import com.linkedin.datahub.graphql.generated.AssertionValueChangeType; +import com.linkedin.datahub.graphql.generated.IncrementingSegmentFieldTransformerType; +import com.linkedin.datahub.graphql.generated.VolumeAssertionType; +import com.linkedin.datahub.graphql.types.dataset.mappers.DatasetFilterMapper; +import javax.annotation.Nullable; + +public class VolumeAssertionMapper extends AssertionMapper { + + public static com.linkedin.datahub.graphql.generated.VolumeAssertionInfo mapVolumeAssertionInfo( + @Nullable final QueryContext context, final VolumeAssertionInfo gmsVolumeAssertionInfo) { + final com.linkedin.datahub.graphql.generated.VolumeAssertionInfo result = + new com.linkedin.datahub.graphql.generated.VolumeAssertionInfo(); + result.setEntityUrn(gmsVolumeAssertionInfo.getEntity().toString()); + result.setType(VolumeAssertionType.valueOf(gmsVolumeAssertionInfo.getType().name())); + if (gmsVolumeAssertionInfo.hasFilter()) { + result.setFilter(DatasetFilterMapper.map(context, gmsVolumeAssertionInfo.getFilter())); + } + if (gmsVolumeAssertionInfo.hasRowCountTotal()) { + result.setRowCountTotal(mapRowCountTotal(gmsVolumeAssertionInfo.getRowCountTotal())); + } + if (gmsVolumeAssertionInfo.hasRowCountChange()) { + result.setRowCountChange(mapRowCountChange(gmsVolumeAssertionInfo.getRowCountChange())); + } + if (gmsVolumeAssertionInfo.hasIncrementingSegmentRowCountTotal()) { + result.setIncrementingSegmentRowCountTotal( + mapIncrementingSegmentRowCountTotal( + gmsVolumeAssertionInfo.getIncrementingSegmentRowCountTotal())); + } + if (gmsVolumeAssertionInfo.hasIncrementingSegmentRowCountChange()) { + result.setIncrementingSegmentRowCountChange( + mapIncrementingSegmentRowCountChange( + gmsVolumeAssertionInfo.getIncrementingSegmentRowCountChange())); + } + return result; + } + + private static com.linkedin.datahub.graphql.generated.RowCountTotal mapRowCountTotal( + final com.linkedin.assertion.RowCountTotal gmsRowCountTotal) { + final com.linkedin.datahub.graphql.generated.RowCountTotal result = + new com.linkedin.datahub.graphql.generated.RowCountTotal(); + result.setOperator(AssertionStdOperator.valueOf(gmsRowCountTotal.getOperator().name())); + result.setParameters(mapParameters(gmsRowCountTotal.getParameters())); + return result; + } + + private static com.linkedin.datahub.graphql.generated.RowCountChange mapRowCountChange( + final com.linkedin.assertion.RowCountChange gmsRowCountChange) { + final com.linkedin.datahub.graphql.generated.RowCountChange result = + new com.linkedin.datahub.graphql.generated.RowCountChange(); + result.setOperator(AssertionStdOperator.valueOf(gmsRowCountChange.getOperator().name())); + result.setParameters(mapParameters(gmsRowCountChange.getParameters())); + result.setType(AssertionValueChangeType.valueOf(gmsRowCountChange.getType().name())); + return result; + } + + private static com.linkedin.datahub.graphql.generated.IncrementingSegmentRowCountTotal + mapIncrementingSegmentRowCountTotal( + final com.linkedin.assertion.IncrementingSegmentRowCountTotal + gmsIncrementingSegmentRowCountTotal) { + final com.linkedin.datahub.graphql.generated.IncrementingSegmentRowCountTotal result = + new com.linkedin.datahub.graphql.generated.IncrementingSegmentRowCountTotal(); + result.setOperator( + AssertionStdOperator.valueOf(gmsIncrementingSegmentRowCountTotal.getOperator().name())); + result.setParameters(mapParameters(gmsIncrementingSegmentRowCountTotal.getParameters())); + result.setSegment(mapIncrementingSegmentSpec(gmsIncrementingSegmentRowCountTotal.getSegment())); + return result; + } + + private static com.linkedin.datahub.graphql.generated.IncrementingSegmentRowCountChange + mapIncrementingSegmentRowCountChange( + final com.linkedin.assertion.IncrementingSegmentRowCountChange + gmsIncrementingSegmentRowCountChange) { + final com.linkedin.datahub.graphql.generated.IncrementingSegmentRowCountChange result = + new com.linkedin.datahub.graphql.generated.IncrementingSegmentRowCountChange(); + result.setOperator( + AssertionStdOperator.valueOf(gmsIncrementingSegmentRowCountChange.getOperator().name())); + result.setParameters(mapParameters(gmsIncrementingSegmentRowCountChange.getParameters())); + result.setSegment( + mapIncrementingSegmentSpec(gmsIncrementingSegmentRowCountChange.getSegment())); + result.setType( + AssertionValueChangeType.valueOf(gmsIncrementingSegmentRowCountChange.getType().name())); + return result; + } + + private static com.linkedin.datahub.graphql.generated.IncrementingSegmentSpec + mapIncrementingSegmentSpec(final com.linkedin.assertion.IncrementingSegmentSpec gmsSegment) { + final com.linkedin.datahub.graphql.generated.IncrementingSegmentSpec result = + new com.linkedin.datahub.graphql.generated.IncrementingSegmentSpec(); + result.setField(mapSchemaFieldSpec(gmsSegment.getField())); + if (gmsSegment.hasTransformer()) { + result.setTransformer(mapIncrementingSegmentFieldTransformer(gmsSegment.getTransformer())); + } + return result; + } + + private static com.linkedin.datahub.graphql.generated.IncrementingSegmentFieldTransformer + mapIncrementingSegmentFieldTransformer( + final com.linkedin.assertion.IncrementingSegmentFieldTransformer gmsTransformer) { + final com.linkedin.datahub.graphql.generated.IncrementingSegmentFieldTransformer result = + new com.linkedin.datahub.graphql.generated.IncrementingSegmentFieldTransformer(); + result.setType( + IncrementingSegmentFieldTransformerType.valueOf(gmsTransformer.getType().name())); + if (gmsTransformer.hasNativeType()) { + result.setNativeType(gmsTransformer.getNativeType()); + } + return result; + } + + private VolumeAssertionMapper() {} +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaFieldMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaFieldMapper.java index a2cc9d5a66edd9..3674186ac23fe6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaFieldMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaFieldMapper.java @@ -51,7 +51,7 @@ public SchemaField apply( return result; } - private SchemaFieldDataType mapSchemaFieldDataType( + public SchemaFieldDataType mapSchemaFieldDataType( @Nonnull final com.linkedin.schema.SchemaFieldDataType dataTypeUnion) { final com.linkedin.schema.SchemaFieldDataType.Type type = dataTypeUnion.getType(); if (type.isBytesType()) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/ermodelrelationship/CreateERModelRelationshipResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/ermodelrelationship/CreateERModelRelationshipResolver.java index cc1896027492f1..61896ed1a0659f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/ermodelrelationship/CreateERModelRelationshipResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/ermodelrelationship/CreateERModelRelationshipResolver.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.ERModelRelationshipUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.ERModelRelationship; import com.linkedin.datahub.graphql.generated.ERModelRelationshipPropertiesInput; @@ -83,7 +84,7 @@ public CompletableFuture get(DataFetchingEnvironment enviro throw new AuthorizationException( "Unauthorized to create erModelRelationship. Please contact your DataHub administrator."); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { log.debug("Create ERModelRelation input: {}", input); @@ -109,6 +110,8 @@ public CompletableFuture get(DataFetchingEnvironment enviro "Failed to create erModelRelationship to resource with input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/ermodelrelationship/UpdateERModelRelationshipResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/ermodelrelationship/UpdateERModelRelationshipResolver.java index b71a99522eba6a..5413352a394b49 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/ermodelrelationship/UpdateERModelRelationshipResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/ermodelrelationship/UpdateERModelRelationshipResolver.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.ERModelRelationshipUrn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.ERModelRelationshipUpdateInput; import com.linkedin.datahub.graphql.types.ermodelrelationship.mappers.ERModelRelationshipUpdateInputMapper; @@ -36,7 +37,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); } - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { try { log.debug("Create ERModelRelation input: {}", input); @@ -61,6 +62,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw "Failed to update erModelRelationship to resource with input %s", input), e); } - }); + }, + this.getClass().getSimpleName(), + "get"); } } diff --git a/datahub-graphql-core/src/main/resources/assertions.graphql b/datahub-graphql-core/src/main/resources/assertions.graphql new file mode 100644 index 00000000000000..0ed264b20fe27e --- /dev/null +++ b/datahub-graphql-core/src/main/resources/assertions.graphql @@ -0,0 +1,896 @@ +""" +Defines a schema field, each with a specified path and type. +""" +type SchemaAssertionField { + """ + The standard V1 path of the field within the schema. + """ + path: String! + + """ + The std type of the field + """ + type: SchemaFieldDataType! + + """ + Optional: The specific native or standard type of the field. + """ + nativeType: String +} + +""" +Defines the required compatibility level for the schema assertion to pass. +""" +enum SchemaAssertionCompatibility { + """ + The schema must be exactly the same as the expected schema. + """ + EXACT_MATCH + + """ + The schema must be a superset of the expected schema. + """ + SUPERSET + + """ + The schema must be a subset of the expected schema. + """ + SUBSET +} + +""" +The source of an assertion +""" +enum AssertionSourceType { + """ + The assertion was defined natively on DataHub by a user. + """ + NATIVE + """ + The assertion was defined and managed externally of DataHub. + """ + EXTERNAL + """ + The assertion was inferred, e.g. from offline AI / ML models. + """ + INFERRED +} + +""" +The type of an Freshness assertion +""" +enum FreshnessAssertionType { + """ + An assertion defined against a Dataset Change Operation - insert, update, delete, etc + """ + DATASET_CHANGE + """ + An assertion defined against a Data Job run + """ + DATA_JOB_RUN +} + +extend type AssertionInfo { + """ + Information about an Freshness Assertion + """ + freshnessAssertion: FreshnessAssertionInfo + + """ + Information about an Volume Assertion + """ + volumeAssertion: VolumeAssertionInfo + + """ + Information about a SQL Assertion + """ + sqlAssertion: SqlAssertionInfo + + """ + Information about a Field Assertion + """ + fieldAssertion: FieldAssertionInfo + + """ + Schema assertion, e.g. defining the expected structure for an asset. + """ + schemaAssertion: SchemaAssertionInfo + + """ + The source or origin of the Assertion definition. + """ + source: AssertionSource + + """ + The time that the status last changed and the actor who changed it + """ + lastUpdated: AuditStamp +} + +extend type Assertion { + """ + The actions associated with the Assertion + """ + actions: AssertionActions +} + +""" +Some actions associated with an assertion +""" +type AssertionActions { + """ + Actions to be executed on successful assertion run. + """ + onSuccess: [AssertionAction!]! + + """ + Actions to be executed on failed assertion run. + """ + onFailure: [AssertionAction!]! +} + +""" +An action associated with an assertion +""" +type AssertionAction { + """ + The type of the actions + """ + type: AssertionActionType! +} + + +""" +The type of the Action +""" +enum AssertionActionType { + """ + Raise an incident. + """ + RAISE_INCIDENT + """ + Resolve open incidents related to the assertion. + """ + RESOLVE_INCIDENT +} + + +""" +Information about an Freshness assertion. +""" +type FreshnessAssertionInfo { + """ + The urn of the entity that the Freshness assertion is related to + """ + entityUrn: String! + + """ + The type of the Freshness Assertion + """ + type: FreshnessAssertionType! + + """ + Produce FAIL Assertion Result if the asset is not updated on the cadence and within the time range described by the schedule. + """ + schedule: FreshnessAssertionSchedule! + + """ + A filter applied when querying an external Dataset or Table + """ + filter: DatasetFilter +} + +""" +Attributes defining a single Freshness schedule. +""" +type FreshnessAssertionSchedule { + """ + The type of schedule + """ + type: FreshnessAssertionScheduleType! + + """ + A cron schedule. This is populated if the type is CRON. + """ + cron: FreshnessCronSchedule + + """ + A fixed interval schedule. This is populated if the type is FIXED_INTERVAL. + """ + fixedInterval: FixedIntervalSchedule +} + +""" +The type of an Freshness assertion +""" +enum FreshnessAssertionScheduleType { + """ + An schedule based on a CRON schedule representing the expected event times. + """ + CRON + + """ + A scheduled based on a recurring fixed schedule which is used to compute the expected operation window. E.g. "every 24 hours". + """ + FIXED_INTERVAL +} + +""" +A cron-formatted schedule +""" +type FreshnessCronSchedule { + """ + A cron-formatted execution interval, as a cron string, e.g. 1 * * * * + """ + cron: String! + + """ + Timezone in which the cron interval applies, e.g. America/Los Angeles + """ + timezone: String! + + """ + An optional offset in milliseconds to SUBTRACT from the timestamp generated by the cron schedule + to generate the lower bounds of the "Freshness window", or the window of time in which an event must have occurred in order for the Freshness + to be considering passing. + If left empty, the start of the Freshness window will be the _end_ of the previously evaluated Freshness window. + """ + windowStartOffsetMs: Long +} + +""" +A fixed interval schedule. +""" +type FixedIntervalSchedule { + """ + Interval unit such as minute/hour/day etc. + """ + unit: DateInterval! + + """ + How many units. Defaults to 1. + """ + multiple: Int! +} + +""" +The source of an Assertion +""" +type AssertionSource { + """ + The source type + """ + type: AssertionSourceType! + """ + The time at which the assertion was initially created and the actor who created it + """ + created: AuditStamp +} + +""" +Information about the field to use in an assertion +""" +type SchemaFieldSpec { + """ + The field path + """ + path: String! + + """ + The DataHub standard schema field type. + """ + type: String! + + """ + The native field type + """ + nativeType: String! +} + +""" +An enum to represent a type of change in an assertion value, metric, or measurement. +""" +enum AssertionValueChangeType { + """ + A change that is defined in absolute terms. + """ + ABSOLUTE + + """ + A change that is defined in relative terms using percentage change + from the original value. + """ + PERCENTAGE +} + +""" +A type of volume (row count) assertion +""" +enum VolumeAssertionType { + """ + A volume assertion that is evaluated against the total row count of a dataset. + """ + ROW_COUNT_TOTAL + + """ + A volume assertion that is evaluated against an incremental row count of a dataset, + or a row count change. + """ + ROW_COUNT_CHANGE + + """ + A volume assertion that checks the latest "segment" in a table based on an incrementing + column to check whether it's row count falls into a particular range. + This can be used to monitor the row count of an incrementing date-partition column segment. + """ + INCREMENTING_SEGMENT_ROW_COUNT_TOTAL + + """ + A volume assertion that compares the row counts in neighboring "segments" or "partitions" + of an incrementing column. This can be used to track changes between subsequent date partition + in a table, for example. + """ + INCREMENTING_SEGMENT_ROW_COUNT_CHANGE +} + +""" +Attributes defining an ROW_COUNT_TOTAL volume assertion. +""" +type RowCountTotal { + """ + The operator you'd like to apply. + Note that only numeric operators are valid inputs: + GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO, + BETWEEN. + """ + operator: AssertionStdOperator! + + """ + The parameters you'd like to provide as input to the operator. + Note that only numeric parameter types are valid inputs: NUMBER. + """ + parameters: AssertionStdParameters! +} + +""" +Attributes defining an ROW_COUNT_CHANGE volume assertion. +""" +type RowCountChange { + """ + The type of the value used to evaluate the assertion: a fixed absolute value or a relative percentage. + """ + type: AssertionValueChangeType! + + """ + The operator you'd like to apply. + Note that only numeric operators are valid inputs: + GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO, + BETWEEN. + """ + operator: AssertionStdOperator! + + """ + The parameters you'd like to provide as input to the operator. + Note that only numeric parameter types are valid inputs: NUMBER. + """ + parameters: AssertionStdParameters! +} + +""" +Attributes defining an INCREMENTING_SEGMENT_ROW_COUNT_TOTAL volume assertion. +""" +type IncrementingSegmentRowCountTotal { + """ + A specification of how the 'segment' can be derived using a column and an optional transformer function. + """ + segment: IncrementingSegmentSpec! + + """ + The operator you'd like to apply. + Note that only numeric operators are valid inputs: + GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO, + BETWEEN. + """ + operator: AssertionStdOperator! + + """ + The parameters you'd like to provide as input to the operator. + Note that only numeric parameter types are valid inputs: NUMBER. + """ + parameters: AssertionStdParameters! +} + +""" +Attributes defining an INCREMENTING_SEGMENT_ROW_COUNT_CHANGE volume assertion. +""" +type IncrementingSegmentRowCountChange { + """ + A specification of how the 'segment' can be derived using a column and an optional transformer function. + """ + segment: IncrementingSegmentSpec! + + """ + The type of the value used to evaluate the assertion: a fixed absolute value or a relative percentage. + """ + type: AssertionValueChangeType! + + """ + The operator you'd like to apply to the row count value + Note that only numeric operators are valid inputs: + GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO, + BETWEEN. + """ + operator: AssertionStdOperator! + + """ + The parameters you'd like to provide as input to the operator. + Note that only numeric parameter types are valid inputs: NUMBER. + """ + parameters: AssertionStdParameters! +} + +""" +Core attributes required to identify an incrementing segment in a table. This type is mainly useful +for tables that constantly increase with new rows being added on a particular cadence (e.g. fact or event tables). + +An incrementing segment represents a logical chunk of data which is INSERTED +into a dataset on a regular interval, along with the presence of a constantly-incrementing column +value such as an event time, date partition, or last modified column. + +An incrementing segment is principally identified by 2 key attributes combined: + +1. A field or column that represents the incrementing value. New rows that are inserted will be identified using this column. + Note that the value of this column may not by itself represent the "bucket" or the "segment" in which the row falls. + +2. [Optional] An transformer function that may be applied to the selected column value in order + to obtain the final "segment identifier" or "bucket identifier". Rows that have the same value after applying the transformation + will be grouped into the same segment, using which the final value (e.g. row count) will be determined. +""" +type IncrementingSegmentSpec { + """ + The field to use to generate segments. It must be constantly incrementing as new rows are inserted. + """ + field: SchemaFieldSpec! + + """ + Optional transformer function to apply to the field in order to obtain the final segment or bucket identifier. + If not provided, then no operator will be applied to the field. (identity function) + """ + transformer: IncrementingSegmentFieldTransformer +} + +""" +The definition of the transformer function that should be applied to a given field / column value in a dataset +in order to determine the segment or bucket that it belongs to, which in turn is used to evaluate +volume assertions. +""" +type IncrementingSegmentFieldTransformer { + """ + The 'standard' operator type. Note that not all source systems will support all operators. + """ + type: IncrementingSegmentFieldTransformerType! + + """ + The 'native' transformer type, useful as a back door if a custom transformer is required. + This field is required if the type is NATIVE. + """ + nativeType: String +} + +""" +The 'standard' transformer type. Note that not all source systems will support all operators. +""" +enum IncrementingSegmentFieldTransformerType { + """ + Rounds a timestamp (in seconds) down to the start of the month. + """ + TIMESTAMP_MS_TO_MINUTE + + """ + Rounds a timestamp (in milliseconds) down to the nearest hour. + """ + TIMESTAMP_MS_TO_HOUR + + """ + Rounds a timestamp (in milliseconds) down to the start of the day. + """ + TIMESTAMP_MS_TO_DATE + + """ + Rounds a timestamp (in milliseconds) down to the start of the month + """ + TIMESTAMP_MS_TO_MONTH + + """ + Rounds a timestamp (in milliseconds) down to the start of the year + """ + TIMESTAMP_MS_TO_YEAR + + """ + Rounds a numeric value down to the nearest integer. + """ + FLOOR + + """ + Rounds a numeric value up to the nearest integer. + """ + CEILING + + """ + A backdoor to provide a native operator type specific to a given source system like + Snowflake, Redshift, BQ, etc. + """ + NATIVE +} + +""" +A definition of a Volume (row count) assertion. +""" +type VolumeAssertionInfo { + """ + The entity targeted by this Volume check. + """ + entityUrn: String! + + """ + The type of the freshness assertion being monitored. + """ + type: VolumeAssertionType! + + """ + Produce FAILURE Assertion Result if the row count of the asset does not meet specific requirements. + Required if type is 'ROW_COUNT_TOTAL'. + """ + rowCountTotal: RowCountTotal + + """ + Produce FAILURE Assertion Result if the row count delta of the asset does not meet specific requirements. + Required if type is 'ROW_COUNT_CHANGE'. + """ + rowCountChange: RowCountChange + + """ + Produce FAILURE Assertion Result if the latest incrementing segment row count total of the asset + does not meet specific requirements. Required if type is 'INCREMENTING_SEGMENT_ROW_COUNT_TOTAL'. + """ + incrementingSegmentRowCountTotal: IncrementingSegmentRowCountTotal + + """ + Produce FAILURE Assertion Result if the incrementing segment row count delta of the asset + does not meet specific requirements. Required if type is 'INCREMENTING_SEGMENT_ROW_COUNT_CHANGE'. + """ + incrementingSegmentRowCountChange: IncrementingSegmentRowCountChange + + """ + A definition of the specific filters that should be applied, when performing monitoring. + If not provided, there is no filter, and the full table is under consideration. + """ + filter: DatasetFilter +} + +""" +The type of the SQL assertion being monitored. +""" +enum SqlAssertionType { + """ + A SQL Metric Assertion, e.g. one based on a numeric value returned by an arbitrary SQL query. + """ + METRIC + + """ + A SQL assertion that is evaluated against the CHANGE in a metric assertion over time. + """ + METRIC_CHANGE +} + +""" +Attributes defining a SQL Assertion +""" +type SqlAssertionInfo { + """ + The type of the SQL assertion being monitored. + """ + type: SqlAssertionType! + + """ + The entity targeted by this SQL check. + """ + entityUrn: String! + + """ + The SQL statement to be executed when evaluating the assertion. + """ + statement: String! + + """ + The type of the value used to evaluate the assertion: a fixed absolute value or a relative percentage. + Required if the type is METRIC_CHANGE. + """ + changeType: AssertionValueChangeType + + """ + The operator you'd like to apply to the result of the SQL query. + """ + operator: AssertionStdOperator! + + """ + The parameters you'd like to provide as input to the operator. + """ + parameters: AssertionStdParameters! +} + +""" +The type of a Field assertion +""" +enum FieldAssertionType { + """ + An assertion used to validate the values contained with a field / column given a set of rows. + """ + FIELD_VALUES + + """ + An assertion used to validate the value of a common field / column metric (e.g. aggregation) + such as null count + percentage, min, max, median, and more. + """ + FIELD_METRIC +} + +""" +The type of the Field Transform +""" +enum FieldTransformType { + """ + Obtain the length of a string field / column (applicable to string types) + """ + LENGTH +} + +""" +The type of failure threshold. +""" +enum FieldValuesFailThresholdType { + """ + The maximum number of column values (i.e. rows) that are allowed + to fail the defined expectations before the assertion officially fails. + """ + COUNT + + """ + The maximum percentage of rows that are allowed + to fail the defined column expectations before the assertion officially fails. + """ + PERCENTAGE +} + +""" +A standard metric that can be derived from the set of values +for a specific field / column of a dataset / table. +""" +enum FieldMetricType { + """ + The number of unique values found in the column value set + """ + UNIQUE_COUNT + + """ + The percentage of unique values to total rows for the dataset + """ + UNIQUE_PERCENTAGE + + """ + The number of null values found in the column value set + """ + NULL_COUNT + + """ + The percentage of null values to total rows for the dataset + """ + NULL_PERCENTAGE + + """ + The minimum value in the column set (applies to numeric columns) + """ + MIN + + """ + The maximum value in the column set (applies to numeric columns) + """ + MAX + + """ + The mean length found in the column set (applies to numeric columns) + """ + MEAN + + """ + The median length found in the column set (applies to numeric columns) + """ + MEDIAN + + """ + The stddev length found in the column set (applies to numeric columns) + """ + STDDEV + + """ + The number of negative values found in the value set (applies to numeric columns) + """ + NEGATIVE_COUNT + + """ + The percentage of negative values to total rows for the dataset (applies to numeric columns) + """ + NEGATIVE_PERCENTAGE + + """ + The number of zero values found in the value set (applies to numeric columns) + """ + ZERO_COUNT + + """ + The percentage of zero values to total rows for the dataset (applies to numeric columns) + """ + ZERO_PERCENTAGE + + """ + The minimum length found in the column set (applies to string columns) + """ + MIN_LENGTH + + """ + The maximum length found in the column set (applies to string columns) + """ + MAX_LENGTH + + """ + The number of empty string values found in the value set (applies to string columns). + Note: This is a completely different metric different from NULL_COUNT! + """ + EMPTY_COUNT + + """ + The percentage of empty string values to total rows for the dataset (applies to string columns). + Note: This is a completely different metric different from NULL_PERCENTAGE! + """ + EMPTY_PERCENTAGE +} + +""" +A definition of a Field (Column) assertion. +""" +type FieldAssertionInfo { + """ + The type of the field assertion being monitored. + """ + type: FieldAssertionType! + + """ + The entity targeted by this Field check. + """ + entityUrn: String! + + """ + The definition of an assertion that validates individual values of a field / column for a set of rows. + """ + fieldValuesAssertion: FieldValuesAssertion + + """ + The definition of an assertion that validates a common metric obtained about a field / column for a set of rows. + """ + fieldMetricAssertion: FieldMetricAssertion + + """ + A definition of the specific filters that should be applied, when performing monitoring. + If not provided, there is no filter, and the full table is under consideration. + """ + filter: DatasetFilter +} + +""" +A definition of a Field Values assertion. +""" +type FieldValuesAssertion { + """ + The field under evaluation. + """ + field: SchemaFieldSpec! + + """ + An optional transform to apply to field values before evaluating the operator. + """ + transform: FieldTransform + + """ + The predicate to evaluate against a single value of the field. + Depending on the operator, parameters may be required + """ + operator: AssertionStdOperator! + + """ + Standard parameters required for the assertion. + """ + parameters: AssertionStdParameters + + """ + Additional customization about when the assertion should be officially considered failing. + """ + failThreshold: FieldValuesFailThreshold! + + """ + Whether to ignore or allow nulls when running the values assertion. + """ + excludeNulls: Boolean! +} + +""" +Definition of a transform applied to the values of a column / field. +""" +type FieldTransform { + """ + The type of the field transform. + """ + type: FieldTransformType! +} + +type FieldValuesFailThreshold { + """ + The type of failure threshold. + """ + type: FieldValuesFailThresholdType! + + """ + The value of the threshold, either representing a count or percentage. + """ + value: Long! +} + +""" +A definition of a Field Metric assertion. +""" +type FieldMetricAssertion { + """ + The field under evaluation + """ + field: SchemaFieldSpec! + + """ + The specific metric to assert against. + """ + metric: FieldMetricType! + + """ + The predicate to evaluate against the metric for the field / column. + """ + operator: AssertionStdOperator! + + """ + Standard parameters required for the assertion. + """ + parameters: AssertionStdParameters +} + +""" +Information about an Schema assertion +""" +type SchemaAssertionInfo { + """ + The entity targeted by this schema assertion. + """ + entityUrn: String! + + """ + A single field in the schema assertion. + """ + fields: [SchemaAssertionField!]! + + """ + A definition of the expected structure for the asset + Deprecated! Use the simpler 'fields' instead. + """ + schema: SchemaMetadata + + """ + The compatibility level required for the assertion to pass. + """ + compatibility: SchemaAssertionCompatibility! +} diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index de030f77b0b017..92d4a1723c0b61 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -7508,6 +7508,11 @@ type BatchSpec { The result type of an assertion, success or failure. """ enum AssertionResultType { + """ + The assertion has not yet been fully evaluated. + """ + INIT + """ The assertion succeeded. """ @@ -7517,6 +7522,11 @@ enum AssertionResultType { The assertion failed. """ FAILURE + + """ + The assertion errored. + """ + ERROR } """ @@ -7678,6 +7688,16 @@ enum AssertionStdOperator { """ NOT_IN + """ + Value being asserted is true. + """ + IS_TRUE + + """ + Value being asserted is false. + """ + IS_FALSE + """ Other """ @@ -7824,6 +7844,11 @@ type AssertionRunEventsResult { """ succeeded: Int! + """ + The number of errored run events + """ + errored: Int! + """ The run events themselves """ diff --git a/datahub-graphql-core/src/main/resources/incident.graphql b/datahub-graphql-core/src/main/resources/incident.graphql index f7060b3ae8f67b..c2938543ed9494 100644 --- a/datahub-graphql-core/src/main/resources/incident.graphql +++ b/datahub-graphql-core/src/main/resources/incident.graphql @@ -136,6 +136,36 @@ enum IncidentState { A specific type of incident """ enum IncidentType { + """ + A Freshness Assertion has failed, triggering the incident. + Raised on assets where assertions are configured to generate incidents. + """ + FRESHNESS + + """ + A Volume Assertion has failed, triggering the incident. + Raised on assets where assertions are configured to generate incidents. + """ + VOLUME + + """ + A Field Assertion has failed, triggering the incident. + Raised on assets where assertions are configured to generate incidents. + """ + FIELD + + """ + A SQL Assertion has failed, triggering the incident. + Raised on assets where assertions are configured to generate incidents. + """ + SQL + + """ + A Schema has failed, triggering the incident. + Raised on assets where assertions are configured to generate incidents. + """ + DATA_SCHEMA + """ An operational incident, e.g. failure to materialize a dataset, or failure to execute a task / pipeline. """ @@ -174,6 +204,11 @@ enum IncidentSourceType { The incident was created manually, from either the API or the UI. """ MANUAL + + """ + An assertion has failed, triggering the incident. + """ + ASSERTION_FAILURE } """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java index 7323a62d94bfe2..c047a0d0a3f051 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java @@ -97,6 +97,7 @@ public void testGetSuccess() throws Exception { assertEquals(result.getTotal(), 1); assertEquals(result.getFailed(), 0); assertEquals(result.getSucceeded(), 1); + assertEquals(result.getErrored(), 0); com.linkedin.datahub.graphql.generated.AssertionRunEvent graphqlRunEvent = resolver.get(mockEnv).get().getRunEvents().get(0); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapperTest.java new file mode 100644 index 00000000000000..376af14af08f65 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapperTest.java @@ -0,0 +1,346 @@ +package com.linkedin.datahub.graphql.types.assertion; + +import static org.testng.Assert.assertEquals; + +import com.google.common.collect.ImmutableList; +import com.linkedin.assertion.AssertionInfo; +import com.linkedin.assertion.AssertionSource; +import com.linkedin.assertion.AssertionStdAggregation; +import com.linkedin.assertion.AssertionStdOperator; +import com.linkedin.assertion.AssertionStdParameter; +import com.linkedin.assertion.AssertionStdParameterType; +import com.linkedin.assertion.AssertionStdParameters; +import com.linkedin.assertion.AssertionType; +import com.linkedin.assertion.DatasetAssertionInfo; +import com.linkedin.assertion.DatasetAssertionScope; +import com.linkedin.assertion.FreshnessAssertionInfo; +import com.linkedin.assertion.FreshnessAssertionSchedule; +import com.linkedin.assertion.FreshnessAssertionScheduleType; +import com.linkedin.assertion.FreshnessAssertionType; +import com.linkedin.assertion.FreshnessCronSchedule; +import com.linkedin.assertion.SchemaAssertionCompatibility; +import com.linkedin.assertion.SchemaAssertionInfo; +import com.linkedin.common.GlobalTags; +import com.linkedin.common.TagAssociationArray; +import com.linkedin.common.UrnArray; +import com.linkedin.common.urn.TagUrn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.DataMap; +import com.linkedin.data.template.StringMap; +import com.linkedin.datahub.graphql.generated.Assertion; +import com.linkedin.datahub.graphql.generated.FixedIntervalSchedule; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.metadata.Constants; +import com.linkedin.schema.MySqlDDL; +import com.linkedin.schema.SchemaField; +import com.linkedin.schema.SchemaFieldArray; +import com.linkedin.schema.SchemaFieldDataType; +import com.linkedin.schema.SchemaMetadata; +import com.linkedin.schema.StringType; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class AssertionMapperTest { + + @Test + public void testMapDatasetAssertion() { + // Case 1: Without nullable fields + AssertionInfo input = createFreshnessAssertionInfoWithoutNullableFields(); + EntityResponse datasetAssertionEntityResponse = createAssertionInfoEntityResponse(input); + Assertion output = AssertionMapper.map(null, datasetAssertionEntityResponse); + verifyAssertionInfo(input, output); + + // Case 2: With nullable fields + input = createFreshnessAssertionInfoWithNullableFields(); + EntityResponse datasetAssertionEntityResponseWithNullables = + createAssertionInfoEntityResponse(input); + output = AssertionMapper.map(null, datasetAssertionEntityResponseWithNullables); + verifyAssertionInfo(input, output); + } + + @Test + public void testMapTags() throws Exception { + HashMap aspects = new HashMap<>(); + AssertionInfo info = createFreshnessAssertionInfoWithoutNullableFields(); + + EnvelopedAspect envelopedTagsAspect = new EnvelopedAspect(); + GlobalTags tags = new GlobalTags(); + tags.setTags( + new TagAssociationArray( + new TagAssociationArray( + Collections.singletonList( + new com.linkedin.common.TagAssociation() + .setTag(TagUrn.createFromString("urn:li:tag:test")))))); + envelopedTagsAspect.setValue(new Aspect(tags.data())); + + aspects.put(Constants.ASSERTION_INFO_ASPECT_NAME, createEnvelopedAspect(info.data())); + aspects.put(Constants.GLOBAL_TAGS_ASPECT_NAME, createEnvelopedAspect(tags.data())); + EntityResponse response = createEntityResponse(aspects); + + Assertion assertion = AssertionMapper.map(null, response); + assertEquals(assertion.getTags().getTags().size(), 1); + assertEquals( + assertion.getTags().getTags().get(0).getTag().getUrn().toString(), "urn:li:tag:test"); + } + + @Test + public void testMapFreshnessAssertion() { + // Case 1: Without nullable fields + AssertionInfo inputInfo = createFreshnessAssertionInfoWithoutNullableFields(); + + EntityResponse freshnessAssertionEntityResponse = createAssertionInfoEntityResponse(inputInfo); + Assertion output = AssertionMapper.map(null, freshnessAssertionEntityResponse); + verifyAssertionInfo(inputInfo, output); + + // Case 2: With nullable fields + inputInfo = createDatasetAssertionInfoWithNullableFields(); + EntityResponse freshnessAssertionEntityResponseWithNullables = + createAssertionInfoEntityResponse(inputInfo); + output = AssertionMapper.map(null, freshnessAssertionEntityResponseWithNullables); + verifyAssertionInfo(inputInfo, output); + } + + @Test + public void testMapDataSchemaAssertion() { + AssertionInfo input = createSchemaAssertion(); + EntityResponse schemaAssertionEntityResponse = createAssertionInfoEntityResponse(input); + Assertion output = AssertionMapper.map(null, schemaAssertionEntityResponse); + verifyAssertionInfo(input, output); + } + + private void verifyAssertionInfo(AssertionInfo input, Assertion output) { + Assert.assertNotNull(output); + Assert.assertNotNull(output.getInfo()); + Assert.assertEquals( + output.getInfo().getType().toString(), output.getInfo().getType().toString()); + + if (input.hasDatasetAssertion()) { + verifyDatasetAssertion(input.getDatasetAssertion(), output.getInfo().getDatasetAssertion()); + } + + if (input.hasFreshnessAssertion()) { + verifyFreshnessAssertion( + input.getFreshnessAssertion(), output.getInfo().getFreshnessAssertion()); + } + + if (input.hasSchemaAssertion()) { + verifySchemaAssertion(input.getSchemaAssertion(), output.getInfo().getSchemaAssertion()); + } + + if (input.hasSource()) { + verifySource(input.getSource(), output.getInfo().getSource()); + } + } + + private void verifyDatasetAssertion( + DatasetAssertionInfo input, + com.linkedin.datahub.graphql.generated.DatasetAssertionInfo output) { + Assert.assertEquals(output.getOperator().toString(), input.getOperator().toString()); + Assert.assertEquals(output.getOperator().toString(), input.getOperator().toString()); + Assert.assertEquals(output.getScope().toString(), input.getScope().toString()); + Assert.assertEquals(output.getDatasetUrn(), input.getDataset().toString()); + if (input.hasAggregation()) { + Assert.assertEquals(output.getAggregation().toString(), input.getAggregation().toString()); + } + if (input.hasNativeType()) { + Assert.assertEquals(output.getNativeType(), input.getNativeType().toString()); + } + if (input.hasLogic()) { + Assert.assertEquals(output.getLogic(), input.getLogic()); + } + if (input.hasFields()) { + Assert.assertTrue( + input.getFields().stream() + .allMatch( + field -> + output.getFields().stream() + .anyMatch(outField -> field.toString().equals(outField.getUrn())))); + } + } + + private void verifyFreshnessAssertion( + FreshnessAssertionInfo input, + com.linkedin.datahub.graphql.generated.FreshnessAssertionInfo output) { + Assert.assertEquals(output.getType().toString(), input.getType().toString()); + Assert.assertEquals(output.getEntityUrn(), input.getEntity().toString()); + if (input.hasSchedule()) { + verifyFreshnessSchedule(input.getSchedule(), output.getSchedule()); + } + } + + private void verifySchemaAssertion( + SchemaAssertionInfo input, + com.linkedin.datahub.graphql.generated.SchemaAssertionInfo output) { + Assert.assertEquals(output.getEntityUrn(), input.getEntity().toString()); + Assert.assertEquals(output.getCompatibility().toString(), input.getCompatibility().toString()); + Assert.assertEquals( + output.getSchema().getFields().size(), input.getSchema().getFields().size()); + } + + private void verifyCronSchedule( + FreshnessCronSchedule input, + com.linkedin.datahub.graphql.generated.FreshnessCronSchedule output) { + Assert.assertEquals(output.getCron(), input.getCron()); + Assert.assertEquals(output.getTimezone(), input.getTimezone()); + if (input.hasWindowStartOffsetMs()) { + Assert.assertEquals(output.getWindowStartOffsetMs(), input.getWindowStartOffsetMs()); + } + } + + private void verifyFreshnessSchedule( + FreshnessAssertionSchedule input, + com.linkedin.datahub.graphql.generated.FreshnessAssertionSchedule output) { + Assert.assertEquals(output.getType().toString(), input.getType().toString()); + if (input.hasCron()) { + verifyCronSchedule(input.getCron(), output.getCron()); + } + if (input.hasFixedInterval()) { + verifyFixedIntervalSchedule(input.getFixedInterval(), output.getFixedInterval()); + } + } + + private void verifyFixedIntervalSchedule( + com.linkedin.assertion.FixedIntervalSchedule input, FixedIntervalSchedule output) { + Assert.assertEquals(output.getMultiple(), (int) input.getMultiple()); + Assert.assertEquals(output.getUnit().toString(), input.getUnit().toString()); + } + + private void verifySource( + AssertionSource input, com.linkedin.datahub.graphql.generated.AssertionSource output) { + Assert.assertEquals(output.getType().toString(), input.getType().toString()); + } + + private EntityResponse createAssertionInfoEntityResponse(final AssertionInfo info) { + HashMap aspects = new HashMap<>(); + aspects.put(Constants.ASSERTION_INFO_ASPECT_NAME, createEnvelopedAspect(info.data())); + + return createEntityResponse(aspects); + } + + private EntityResponse createEntityResponse(Map aspects) { + EntityResponse entityResponse = new EntityResponse(); + entityResponse.setUrn(UrnUtils.getUrn("urn:li:assertion:1")); + entityResponse.setAspects(new EnvelopedAspectMap(new HashMap<>())); + aspects.forEach( + (aspectName, envelopedAspect) -> { + entityResponse.getAspects().put(aspectName, envelopedAspect); + }); + + return entityResponse; + } + + private EnvelopedAspect createEnvelopedAspect(DataMap dataMap) { + EnvelopedAspect envelopedAspect = new EnvelopedAspect(); + envelopedAspect.setValue(new Aspect(dataMap)); + return envelopedAspect; + } + + private AssertionInfo createDatasetAssertionInfoWithoutNullableFields() { + AssertionInfo info = new AssertionInfo(); + info.setType(com.linkedin.assertion.AssertionType.DATASET); + DatasetAssertionInfo datasetAssertionInfo = new DatasetAssertionInfo(); + datasetAssertionInfo.setDataset(UrnUtils.getUrn("urn:li:dataset:1")); + datasetAssertionInfo.setScope(DatasetAssertionScope.DATASET_COLUMN); + datasetAssertionInfo.setOperator(AssertionStdOperator.GREATER_THAN); + info.setDatasetAssertion(datasetAssertionInfo); + return info; + } + + private AssertionInfo createDatasetAssertionInfoWithNullableFields() { + AssertionInfo infoWithoutNullables = createDatasetAssertionInfoWithoutNullableFields(); + DatasetAssertionInfo baseInfo = infoWithoutNullables.getDatasetAssertion(); + baseInfo.setFields( + new UrnArray( + Arrays.asList( + UrnUtils.getUrn( + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,name,PROD),field)")))); + baseInfo.setAggregation(AssertionStdAggregation.SUM); + baseInfo.setParameters(createAssertionStdParameters()); + baseInfo.setNativeType("native_type"); + baseInfo.setNativeParameters(new StringMap(Collections.singletonMap("key", "value"))); + baseInfo.setLogic("sample_logic"); + infoWithoutNullables.setSource( + new AssertionSource().setType(com.linkedin.assertion.AssertionSourceType.INFERRED)); + return infoWithoutNullables; + } + + private AssertionInfo createFreshnessAssertionInfoWithoutNullableFields() { + AssertionInfo info = new AssertionInfo(); + info.setType(AssertionType.FRESHNESS); + FreshnessAssertionInfo freshnessAssertionInfo = new FreshnessAssertionInfo(); + freshnessAssertionInfo.setEntity( + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,name,PROD)")); + freshnessAssertionInfo.setType(FreshnessAssertionType.DATASET_CHANGE); + info.setFreshnessAssertion(freshnessAssertionInfo); + return info; + } + + private AssertionInfo createFreshnessAssertionInfoWithNullableFields() { + AssertionInfo infoWithoutNullables = createFreshnessAssertionInfoWithoutNullableFields(); + FreshnessAssertionInfo baseInfo = infoWithoutNullables.getFreshnessAssertion(); + baseInfo.setSchedule(createFreshnessAssertionSchedule()); + infoWithoutNullables.setSource( + new AssertionSource().setType(com.linkedin.assertion.AssertionSourceType.INFERRED)); + return infoWithoutNullables; + } + + private AssertionInfo createSchemaAssertion() { + AssertionInfo info = new AssertionInfo(); + info.setType(AssertionType.DATA_SCHEMA); + SchemaAssertionInfo schemaAssertionInfo = new SchemaAssertionInfo(); + schemaAssertionInfo.setEntity(UrnUtils.getUrn("urn:li:dataset:1")); + schemaAssertionInfo.setCompatibility(SchemaAssertionCompatibility.SUPERSET); + schemaAssertionInfo.setSchema( + new SchemaMetadata() + .setCluster("Test") + .setHash("Test") + .setPlatformSchema(SchemaMetadata.PlatformSchema.create(new MySqlDDL())) + .setFields( + new SchemaFieldArray( + ImmutableList.of( + new SchemaField() + .setType( + new SchemaFieldDataType() + .setType(SchemaFieldDataType.Type.create(new StringType()))) + .setNullable(false) + .setNativeDataType("string") + .setFieldPath("test"))))); + return info; + } + + private AssertionStdParameters createAssertionStdParameters() { + AssertionStdParameters parameters = new AssertionStdParameters(); + parameters.setValue(createAssertionStdParameter()); + parameters.setMinValue(createAssertionStdParameter()); + parameters.setMaxValue(createAssertionStdParameter()); + return parameters; + } + + private AssertionStdParameter createAssertionStdParameter() { + AssertionStdParameter parameter = new AssertionStdParameter(); + parameter.setType(AssertionStdParameterType.NUMBER); + parameter.setValue("100"); + return parameter; + } + + private FreshnessAssertionSchedule createFreshnessAssertionSchedule() { + FreshnessAssertionSchedule schedule = new FreshnessAssertionSchedule(); + schedule.setType(FreshnessAssertionScheduleType.CRON); + schedule.setCron(createCronSchedule()); + return schedule; + } + + private FreshnessCronSchedule createCronSchedule() { + FreshnessCronSchedule cronSchedule = new FreshnessCronSchedule(); + cronSchedule.setCron("0 0 * * *"); + cronSchedule.setTimezone("UTC"); + return cronSchedule; + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/AssertionTypeTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/AssertionTypeTest.java index dd2b676a941302..33774690b7c7a9 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/AssertionTypeTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/AssertionTypeTest.java @@ -7,6 +7,10 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.linkedin.assertion.AssertionAction; +import com.linkedin.assertion.AssertionActionArray; +import com.linkedin.assertion.AssertionActionType; +import com.linkedin.assertion.AssertionActions; import com.linkedin.assertion.AssertionInfo; import com.linkedin.assertion.AssertionType; import com.linkedin.common.DataPlatformInstance; @@ -48,6 +52,17 @@ public class AssertionTypeTest { new DataPlatformInstance() .setPlatform(new DataPlatformUrn("snowflake")) .setInstance(null, SetMode.IGNORE_NULL); + // Acryl SaaS Only + private static final AssertionActions TEST_ASSERTION_ACTIONS = + new AssertionActions() + .setOnSuccess( + new AssertionActionArray( + ImmutableList.of( + new AssertionAction().setType(AssertionActionType.RAISE_INCIDENT)))) + .setOnFailure( + new AssertionActionArray( + ImmutableList.of( + new AssertionAction().setType(AssertionActionType.RESOLVE_INCIDENT)))); private static final String TEST_ASSERTION_URN_2 = "urn:li:assertion:guid-2"; @@ -69,6 +84,9 @@ public void testBatchLoad() throws Exception { assertion1Aspects.put( Constants.ASSERTION_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(TEST_ASSERTION_INFO.data()))); + assertion1Aspects.put( + Constants.ASSERTION_ACTIONS_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_ASSERTION_ACTIONS.data()))); Mockito.when( client.batchGetV2( any(), @@ -112,6 +130,12 @@ public void testBatchLoad() throws Exception { assertEquals(assertion.getInfo().getType().toString(), AssertionType.DATASET.toString()); assertEquals(assertion.getInfo().getDatasetAssertion(), null); assertEquals(assertion.getPlatform().getUrn(), "urn:li:dataPlatform:snowflake"); + assertEquals( + assertion.getActions().getOnSuccess().get(0).getType(), + com.linkedin.datahub.graphql.generated.AssertionActionType.RAISE_INCIDENT); + assertEquals( + assertion.getActions().getOnFailure().get(0).getType(), + com.linkedin.datahub.graphql.generated.AssertionActionType.RESOLVE_INCIDENT); // Assert second element is null. assertNull(result.get(1)); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/FieldAssertionMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/FieldAssertionMapperTest.java new file mode 100644 index 00000000000000..7758aaa986fed3 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/FieldAssertionMapperTest.java @@ -0,0 +1,100 @@ +package com.linkedin.datahub.graphql.types.assertion; + +import com.linkedin.assertion.AssertionStdOperator; +import com.linkedin.assertion.FieldAssertionInfo; +import com.linkedin.assertion.FieldAssertionType; +import com.linkedin.assertion.FieldMetricAssertion; +import com.linkedin.assertion.FieldMetricType; +import com.linkedin.assertion.FieldTransform; +import com.linkedin.assertion.FieldTransformType; +import com.linkedin.assertion.FieldValuesAssertion; +import com.linkedin.assertion.FieldValuesFailThreshold; +import com.linkedin.assertion.FieldValuesFailThresholdType; +import com.linkedin.common.urn.Urn; +import com.linkedin.dataset.DatasetFilter; +import com.linkedin.dataset.DatasetFilterType; +import com.linkedin.schema.SchemaFieldSpec; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class FieldAssertionMapperTest { + @Test + public void testMapFieldValuesAssertionInfo() throws Exception { + FieldAssertionInfo fieldAssertionInfo = + new FieldAssertionInfo() + .setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)")) + .setFilter( + new DatasetFilter().setType(DatasetFilterType.SQL).setSql("WHERE value > 5;")) + .setType(FieldAssertionType.FIELD_VALUES) + .setFieldValuesAssertion( + new FieldValuesAssertion() + .setExcludeNulls(true) + .setFailThreshold( + new FieldValuesFailThreshold() + .setType(FieldValuesFailThresholdType.PERCENTAGE) + .setValue(5L)) + .setField( + new SchemaFieldSpec() + .setPath("path") + .setType("STRING") + .setNativeType("VARCHAR")) + .setOperator(AssertionStdOperator.IS_TRUE) + .setTransform(new FieldTransform().setType(FieldTransformType.LENGTH))); + + com.linkedin.datahub.graphql.generated.FieldAssertionInfo result = + FieldAssertionMapper.mapFieldAssertionInfo(null, fieldAssertionInfo); + Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"); + Assert.assertEquals( + result.getType(), com.linkedin.datahub.graphql.generated.FieldAssertionType.FIELD_VALUES); + Assert.assertEquals( + result.getFilter().getType(), com.linkedin.datahub.graphql.generated.DatasetFilterType.SQL); + Assert.assertEquals(result.getFilter().getSql(), "WHERE value > 5;"); + Assert.assertEquals(result.getFieldValuesAssertion().getField().getPath(), "path"); + Assert.assertEquals(result.getFieldValuesAssertion().getField().getType(), "STRING"); + Assert.assertEquals(result.getFieldValuesAssertion().getField().getNativeType(), "VARCHAR"); + Assert.assertEquals( + result.getFieldValuesAssertion().getOperator(), + com.linkedin.datahub.graphql.generated.AssertionStdOperator.IS_TRUE); + Assert.assertEquals( + result.getFieldValuesAssertion().getTransform().getType(), + com.linkedin.datahub.graphql.generated.FieldTransformType.LENGTH); + Assert.assertEquals(result.getFieldValuesAssertion().getExcludeNulls(), true); + Assert.assertEquals( + result.getFieldValuesAssertion().getFailThreshold().getType(), + com.linkedin.datahub.graphql.generated.FieldValuesFailThresholdType.PERCENTAGE); + Assert.assertEquals( + result.getFieldValuesAssertion().getFailThreshold().getValue(), Long.valueOf(5L)); + } + + @Test + public void testMapFieldMetricAssertionInfo() throws Exception { + FieldAssertionInfo fieldAssertionInfo = + new FieldAssertionInfo() + .setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)")) + .setType(FieldAssertionType.FIELD_METRIC) + .setFieldMetricAssertion( + new FieldMetricAssertion() + .setField( + new SchemaFieldSpec() + .setPath("path") + .setType("STRING") + .setNativeType("VARCHAR")) + .setOperator(AssertionStdOperator.IS_TRUE) + .setMetric(FieldMetricType.MEDIAN)); + + com.linkedin.datahub.graphql.generated.FieldAssertionInfo result = + FieldAssertionMapper.mapFieldAssertionInfo(null, fieldAssertionInfo); + Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"); + Assert.assertEquals( + result.getType(), com.linkedin.datahub.graphql.generated.FieldAssertionType.FIELD_METRIC); + Assert.assertEquals(result.getFieldMetricAssertion().getField().getPath(), "path"); + Assert.assertEquals(result.getFieldMetricAssertion().getField().getType(), "STRING"); + Assert.assertEquals(result.getFieldMetricAssertion().getField().getNativeType(), "VARCHAR"); + Assert.assertEquals( + result.getFieldMetricAssertion().getOperator(), + com.linkedin.datahub.graphql.generated.AssertionStdOperator.IS_TRUE); + Assert.assertEquals( + result.getFieldMetricAssertion().getMetric(), + com.linkedin.datahub.graphql.generated.FieldMetricType.MEDIAN); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/FreshnessAssertionMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/FreshnessAssertionMapperTest.java new file mode 100644 index 00000000000000..b69ed02bdfd626 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/FreshnessAssertionMapperTest.java @@ -0,0 +1,82 @@ +package com.linkedin.datahub.graphql.types.assertion; + +import com.linkedin.assertion.FixedIntervalSchedule; +import com.linkedin.assertion.FreshnessAssertionInfo; +import com.linkedin.assertion.FreshnessAssertionSchedule; +import com.linkedin.assertion.FreshnessAssertionScheduleType; +import com.linkedin.assertion.FreshnessAssertionType; +import com.linkedin.assertion.FreshnessCronSchedule; +import com.linkedin.common.urn.Urn; +import com.linkedin.dataset.DatasetFilter; +import com.linkedin.dataset.DatasetFilterType; +import com.linkedin.timeseries.CalendarInterval; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class FreshnessAssertionMapperTest { + @Test + public void testMapCronFreshnessAssertionInfo() throws Exception { + FreshnessAssertionInfo freshnessAssertionInfo = + new FreshnessAssertionInfo() + .setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)")) + .setType(FreshnessAssertionType.DATASET_CHANGE) + .setFilter( + new DatasetFilter().setType(DatasetFilterType.SQL).setSql("WHERE value > 5;")) + .setSchedule( + new FreshnessAssertionSchedule() + .setType(FreshnessAssertionScheduleType.CRON) + .setCron( + new FreshnessCronSchedule() + .setCron("0 0 0 * * ? *") + .setTimezone("America/Los_Angeles") + .setWindowStartOffsetMs(10L))); + + com.linkedin.datahub.graphql.generated.FreshnessAssertionInfo result = + FreshnessAssertionMapper.mapFreshnessAssertionInfo(null, freshnessAssertionInfo); + Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"); + Assert.assertEquals( + result.getType(), + com.linkedin.datahub.graphql.generated.FreshnessAssertionType.DATASET_CHANGE); + Assert.assertEquals( + result.getFilter().getType(), com.linkedin.datahub.graphql.generated.DatasetFilterType.SQL); + Assert.assertEquals(result.getFilter().getSql(), "WHERE value > 5;"); + Assert.assertEquals( + result.getSchedule().getType(), + com.linkedin.datahub.graphql.generated.FreshnessAssertionScheduleType.CRON); + Assert.assertEquals(result.getSchedule().getCron().getCron(), "0 0 0 * * ? *"); + Assert.assertEquals(result.getSchedule().getCron().getTimezone(), "America/Los_Angeles"); + Assert.assertEquals(result.getSchedule().getCron().getWindowStartOffsetMs(), Long.valueOf(10L)); + } + + @Test + public void testMapFixedIntervalFreshnessAssertionInfo() throws Exception { + FreshnessAssertionInfo freshnessAssertionInfo = + new FreshnessAssertionInfo() + .setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)")) + .setType(FreshnessAssertionType.DATASET_CHANGE) + .setFilter( + new DatasetFilter().setType(DatasetFilterType.SQL).setSql("WHERE value > 5;")) + .setSchedule( + new FreshnessAssertionSchedule() + .setType(FreshnessAssertionScheduleType.FIXED_INTERVAL) + .setFixedInterval( + new FixedIntervalSchedule().setUnit(CalendarInterval.DAY).setMultiple(10))); + + com.linkedin.datahub.graphql.generated.FreshnessAssertionInfo result = + FreshnessAssertionMapper.mapFreshnessAssertionInfo(null, freshnessAssertionInfo); + Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"); + Assert.assertEquals( + result.getType(), + com.linkedin.datahub.graphql.generated.FreshnessAssertionType.DATASET_CHANGE); + Assert.assertEquals( + result.getFilter().getType(), com.linkedin.datahub.graphql.generated.DatasetFilterType.SQL); + Assert.assertEquals(result.getFilter().getSql(), "WHERE value > 5;"); + Assert.assertEquals( + result.getSchedule().getType(), + com.linkedin.datahub.graphql.generated.FreshnessAssertionScheduleType.FIXED_INTERVAL); + Assert.assertEquals( + result.getSchedule().getFixedInterval().getUnit(), + com.linkedin.datahub.graphql.generated.DateInterval.DAY); + Assert.assertEquals(result.getSchedule().getFixedInterval().getMultiple(), 10); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/SqlAssertionMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/SqlAssertionMapperTest.java new file mode 100644 index 00000000000000..271362c9fd8468 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/SqlAssertionMapperTest.java @@ -0,0 +1,78 @@ +package com.linkedin.datahub.graphql.types.assertion; + +import com.linkedin.assertion.AssertionStdOperator; +import com.linkedin.assertion.AssertionStdParameter; +import com.linkedin.assertion.AssertionStdParameterType; +import com.linkedin.assertion.AssertionStdParameters; +import com.linkedin.assertion.AssertionValueChangeType; +import com.linkedin.assertion.SqlAssertionInfo; +import com.linkedin.assertion.SqlAssertionType; +import com.linkedin.common.urn.Urn; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class SqlAssertionMapperTest { + @Test + public void testMapMetricSqlAssertionInfo() throws Exception { + SqlAssertionInfo sqlAssertionInfo = + new SqlAssertionInfo() + .setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)")) + .setType(SqlAssertionType.METRIC) + .setStatement("SELECT COUNT(*) FROM foo.bar.baz") + .setOperator(AssertionStdOperator.GREATER_THAN) + .setParameters( + new AssertionStdParameters() + .setValue( + new AssertionStdParameter() + .setType(AssertionStdParameterType.NUMBER) + .setValue(("5")))); + + com.linkedin.datahub.graphql.generated.SqlAssertionInfo result = + SqlAssertionMapper.mapSqlAssertionInfo(sqlAssertionInfo); + Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"); + Assert.assertEquals( + result.getType(), com.linkedin.datahub.graphql.generated.SqlAssertionType.METRIC); + Assert.assertEquals(result.getStatement(), "SELECT COUNT(*) FROM foo.bar.baz"); + Assert.assertEquals( + result.getOperator(), + com.linkedin.datahub.graphql.generated.AssertionStdOperator.GREATER_THAN); + Assert.assertEquals( + result.getParameters().getValue().getType(), + com.linkedin.datahub.graphql.generated.AssertionStdParameterType.NUMBER); + Assert.assertEquals(result.getParameters().getValue().getValue(), "5"); + } + + @Test + public void testMapMetricChangeSqlAssertionInfo() throws Exception { + SqlAssertionInfo sqlAssertionInfo = + new SqlAssertionInfo() + .setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)")) + .setType(SqlAssertionType.METRIC_CHANGE) + .setStatement("SELECT COUNT(*) FROM foo.bar.baz") + .setChangeType(AssertionValueChangeType.ABSOLUTE) + .setOperator(AssertionStdOperator.GREATER_THAN) + .setParameters( + new AssertionStdParameters() + .setValue( + new AssertionStdParameter() + .setType(AssertionStdParameterType.NUMBER) + .setValue(("5")))); + + com.linkedin.datahub.graphql.generated.SqlAssertionInfo result = + SqlAssertionMapper.mapSqlAssertionInfo(sqlAssertionInfo); + Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"); + Assert.assertEquals( + result.getType(), com.linkedin.datahub.graphql.generated.SqlAssertionType.METRIC_CHANGE); + Assert.assertEquals(result.getStatement(), "SELECT COUNT(*) FROM foo.bar.baz"); + Assert.assertEquals( + result.getOperator(), + com.linkedin.datahub.graphql.generated.AssertionStdOperator.GREATER_THAN); + Assert.assertEquals( + result.getParameters().getValue().getType(), + com.linkedin.datahub.graphql.generated.AssertionStdParameterType.NUMBER); + Assert.assertEquals(result.getParameters().getValue().getValue(), "5"); + Assert.assertEquals( + result.getChangeType(), + com.linkedin.datahub.graphql.generated.AssertionValueChangeType.ABSOLUTE); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/VolumeAssertionMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/VolumeAssertionMapperTest.java new file mode 100644 index 00000000000000..f23fadb6992078 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/assertion/VolumeAssertionMapperTest.java @@ -0,0 +1,207 @@ +package com.linkedin.datahub.graphql.types.assertion; + +import com.linkedin.assertion.AssertionStdOperator; +import com.linkedin.assertion.AssertionStdParameter; +import com.linkedin.assertion.AssertionStdParameterType; +import com.linkedin.assertion.AssertionStdParameters; +import com.linkedin.assertion.AssertionValueChangeType; +import com.linkedin.assertion.IncrementingSegmentFieldTransformer; +import com.linkedin.assertion.IncrementingSegmentFieldTransformerType; +import com.linkedin.assertion.IncrementingSegmentRowCountChange; +import com.linkedin.assertion.IncrementingSegmentRowCountTotal; +import com.linkedin.assertion.RowCountChange; +import com.linkedin.assertion.RowCountTotal; +import com.linkedin.assertion.VolumeAssertionInfo; +import com.linkedin.assertion.VolumeAssertionType; +import com.linkedin.common.urn.Urn; +import com.linkedin.dataset.DatasetFilter; +import com.linkedin.dataset.DatasetFilterType; +import com.linkedin.schema.SchemaFieldSpec; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class VolumeAssertionMapperTest { + @Test + public void testMapRowCountTotalVolumeAssertionInfo() throws Exception { + VolumeAssertionInfo volumeAssertionInfo = + new VolumeAssertionInfo() + .setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)")) + .setType(VolumeAssertionType.ROW_COUNT_TOTAL) + .setFilter( + new DatasetFilter().setType(DatasetFilterType.SQL).setSql("WHERE value > 5;")) + .setRowCountTotal( + new RowCountTotal() + .setOperator(AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO) + .setParameters( + new AssertionStdParameters() + .setValue( + new AssertionStdParameter() + .setType(AssertionStdParameterType.NUMBER) + .setValue("10")))); + + com.linkedin.datahub.graphql.generated.VolumeAssertionInfo result = + VolumeAssertionMapper.mapVolumeAssertionInfo(null, volumeAssertionInfo); + Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"); + Assert.assertEquals( + result.getType(), + com.linkedin.datahub.graphql.generated.VolumeAssertionType.ROW_COUNT_TOTAL); + Assert.assertEquals( + result.getFilter().getType(), com.linkedin.datahub.graphql.generated.DatasetFilterType.SQL); + Assert.assertEquals(result.getFilter().getSql(), "WHERE value > 5;"); + Assert.assertEquals( + result.getRowCountTotal().getOperator(), + com.linkedin.datahub.graphql.generated.AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO); + Assert.assertEquals( + result.getRowCountTotal().getParameters().getValue().getType(), + com.linkedin.datahub.graphql.generated.AssertionStdParameterType.NUMBER); + Assert.assertEquals(result.getRowCountTotal().getParameters().getValue().getValue(), "10"); + } + + @Test + public void testMapRowCountChangeVolumeAssertionInfo() throws Exception { + VolumeAssertionInfo volumeAssertionInfo = + new VolumeAssertionInfo() + .setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)")) + .setType(VolumeAssertionType.ROW_COUNT_CHANGE) + .setFilter( + new DatasetFilter().setType(DatasetFilterType.SQL).setSql("WHERE value > 5;")) + .setRowCountChange( + new RowCountChange() + .setOperator(AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO) + .setParameters( + new AssertionStdParameters() + .setValue( + new AssertionStdParameter() + .setType(AssertionStdParameterType.NUMBER) + .setValue("10"))) + .setType(AssertionValueChangeType.ABSOLUTE)); + + com.linkedin.datahub.graphql.generated.VolumeAssertionInfo result = + VolumeAssertionMapper.mapVolumeAssertionInfo(null, volumeAssertionInfo); + Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"); + Assert.assertEquals( + result.getType(), + com.linkedin.datahub.graphql.generated.VolumeAssertionType.ROW_COUNT_CHANGE); + Assert.assertEquals( + result.getFilter().getType(), com.linkedin.datahub.graphql.generated.DatasetFilterType.SQL); + Assert.assertEquals(result.getFilter().getSql(), "WHERE value > 5;"); + Assert.assertEquals( + result.getRowCountChange().getOperator(), + com.linkedin.datahub.graphql.generated.AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO); + Assert.assertEquals( + result.getRowCountChange().getParameters().getValue().getType(), + com.linkedin.datahub.graphql.generated.AssertionStdParameterType.NUMBER); + Assert.assertEquals(result.getRowCountChange().getParameters().getValue().getValue(), "10"); + Assert.assertEquals( + result.getRowCountChange().getType(), + com.linkedin.datahub.graphql.generated.AssertionValueChangeType.ABSOLUTE); + } + + @Test + public void testMapIncrementingSegmentRowCountTotalVolumeAssertionInfo() throws Exception { + VolumeAssertionInfo volumeAssertionInfo = + new VolumeAssertionInfo() + .setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)")) + .setType(VolumeAssertionType.INCREMENTING_SEGMENT_ROW_COUNT_TOTAL) + .setIncrementingSegmentRowCountTotal( + new IncrementingSegmentRowCountTotal() + .setOperator(AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO) + .setParameters( + new AssertionStdParameters() + .setValue( + new AssertionStdParameter() + .setType(AssertionStdParameterType.NUMBER) + .setValue("10"))) + .setSegment( + new com.linkedin.assertion.IncrementingSegmentSpec() + .setField( + new SchemaFieldSpec() + .setPath("path") + .setNativeType("VARCHAR") + .setType("STRING")) + .setTransformer( + new IncrementingSegmentFieldTransformer() + .setType(IncrementingSegmentFieldTransformerType.CEILING) + .setNativeType("CEILING")))); + + com.linkedin.datahub.graphql.generated.VolumeAssertionInfo result = + VolumeAssertionMapper.mapVolumeAssertionInfo(null, volumeAssertionInfo); + Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"); + Assert.assertEquals( + result.getType(), + com.linkedin.datahub.graphql.generated.VolumeAssertionType + .INCREMENTING_SEGMENT_ROW_COUNT_TOTAL); + Assert.assertEquals( + result.getIncrementingSegmentRowCountTotal().getOperator(), + com.linkedin.datahub.graphql.generated.AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO); + Assert.assertEquals( + result.getIncrementingSegmentRowCountTotal().getParameters().getValue().getType(), + com.linkedin.datahub.graphql.generated.AssertionStdParameterType.NUMBER); + Assert.assertEquals( + result.getIncrementingSegmentRowCountTotal().getParameters().getValue().getValue(), "10"); + Assert.assertEquals( + result.getIncrementingSegmentRowCountTotal().getSegment().getField().getPath(), "path"); + Assert.assertEquals( + result.getIncrementingSegmentRowCountTotal().getSegment().getField().getNativeType(), + "VARCHAR"); + Assert.assertEquals( + result.getIncrementingSegmentRowCountTotal().getSegment().getField().getType(), "STRING"); + Assert.assertEquals( + result.getIncrementingSegmentRowCountTotal().getSegment().getTransformer().getType(), + com.linkedin.datahub.graphql.generated.IncrementingSegmentFieldTransformerType.CEILING); + Assert.assertEquals( + result.getIncrementingSegmentRowCountTotal().getSegment().getTransformer().getNativeType(), + "CEILING"); + } + + @Test + public void testMapIncrementingSegmentRowCountChangeVolumeAssertionInfo() throws Exception { + VolumeAssertionInfo volumeAssertionInfo = + new VolumeAssertionInfo() + .setEntity(new Urn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)")) + .setType(VolumeAssertionType.INCREMENTING_SEGMENT_ROW_COUNT_CHANGE) + .setIncrementingSegmentRowCountChange( + new IncrementingSegmentRowCountChange() + .setType(AssertionValueChangeType.ABSOLUTE) + .setOperator(AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO) + .setParameters( + new AssertionStdParameters() + .setValue( + new AssertionStdParameter() + .setType(AssertionStdParameterType.NUMBER) + .setValue("10"))) + .setSegment( + new com.linkedin.assertion.IncrementingSegmentSpec() + .setField( + new SchemaFieldSpec() + .setPath("path") + .setNativeType("VARCHAR") + .setType("STRING")))); + + com.linkedin.datahub.graphql.generated.VolumeAssertionInfo result = + VolumeAssertionMapper.mapVolumeAssertionInfo(null, volumeAssertionInfo); + Assert.assertEquals(result.getEntityUrn(), "urn:li:dataset:(urn:li:dataPlatform:foo,bar,baz)"); + Assert.assertEquals( + result.getType(), + com.linkedin.datahub.graphql.generated.VolumeAssertionType + .INCREMENTING_SEGMENT_ROW_COUNT_CHANGE); + Assert.assertEquals( + result.getIncrementingSegmentRowCountChange().getType(), + com.linkedin.datahub.graphql.generated.AssertionValueChangeType.ABSOLUTE); + Assert.assertEquals( + result.getIncrementingSegmentRowCountChange().getOperator(), + com.linkedin.datahub.graphql.generated.AssertionStdOperator.GREATER_THAN_OR_EQUAL_TO); + Assert.assertEquals( + result.getIncrementingSegmentRowCountChange().getParameters().getValue().getType(), + com.linkedin.datahub.graphql.generated.AssertionStdParameterType.NUMBER); + Assert.assertEquals( + result.getIncrementingSegmentRowCountChange().getParameters().getValue().getValue(), "10"); + Assert.assertEquals( + result.getIncrementingSegmentRowCountChange().getSegment().getField().getPath(), "path"); + Assert.assertEquals( + result.getIncrementingSegmentRowCountChange().getSegment().getField().getNativeType(), + "VARCHAR"); + Assert.assertEquals( + result.getIncrementingSegmentRowCountChange().getSegment().getField().getType(), "STRING"); + } +} diff --git a/docs-website/graphql/generateGraphQLSchema.sh b/docs-website/graphql/generateGraphQLSchema.sh index da14fbc337f903..a904a2e36d7c19 100755 --- a/docs-website/graphql/generateGraphQLSchema.sh +++ b/docs-website/graphql/generateGraphQLSchema.sh @@ -9,6 +9,7 @@ cat ../../datahub-graphql-core/src/main/resources/app.graphql >> combined.graphq cat ../../datahub-graphql-core/src/main/resources/auth.graphql >> combined.graphql cat ../../datahub-graphql-core/src/main/resources/constraints.graphql >> combined.graphql cat ../../datahub-graphql-core/src/main/resources/entity.graphql >> combined.graphql +cat ../../datahub-graphql-core/src/main/resources/assertions.graphql >> combined.graphql cat ../../datahub-graphql-core/src/main/resources/ingestion.graphql >> combined.graphql cat ../../datahub-graphql-core/src/main/resources/recommendation.graphql >> combined.graphql cat ../../datahub-graphql-core/src/main/resources/search.graphql >> combined.graphql diff --git a/docs/townhalls.md b/docs/townhalls.md index c80d198e5184c7..b98561c0f8dfdb 100644 --- a/docs/townhalls.md +++ b/docs/townhalls.md @@ -1,13 +1,24 @@ # DataHub Town Halls -We hold regular virtual town hall meetings to meet with DataHub community. -Currently it's held on the fourth Thursday of every month (with some exceptions such as holiday weekends). -It's the perfect venue to meet the team behind DataHub and other users, as well as to ask higher-level questions, such as roadmap and product direction. -From time to time we also use the opportunity to showcase upcoming features. +We hold virtual Town Hall meetings with the DataHub Community on the last Thursday of every month (with some exceptions due to holidays). -## Meeting Invite & Agenda +It's a great opportunity for the Community to come together to discuss project and roadmap updates, see live demos of upcoming features, and hear case studies from other Community Members. -You can join with [this link](https://zoom.datahubproject.io) or [RSVP](https://rsvp.datahubproject.io/) to get a calendar invite - this will always have the most up-to-date agenda for upcoming sessions. +## Upcoming Events + +Check out the DataHub Community Calendar to RSVP to upcoming events! + +

+ +

## Town Hall History @@ -15,5 +26,3 @@ See our Town Hall history for the recordings and summaries of the past town hall * [Town Hall Events (July 2023~)](https://www.linkedin.com/company/acryl-data/events/) * [Town Hall Events (~June 2023)](townhall-history.md) - - diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/FieldPathMutator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/FieldPathMutator.java new file mode 100644 index 00000000000000..cc6a338d41c22b --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/FieldPathMutator.java @@ -0,0 +1,142 @@ +package com.linkedin.metadata.aspect.hooks; + +import static com.linkedin.metadata.Constants.EDITABLE_SCHEMA_METADATA_ASPECT_NAME; +import static com.linkedin.metadata.Constants.SCHEMA_METADATA_ASPECT_NAME; + +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.ReadItem; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; +import com.linkedin.schema.EditableSchemaFieldInfo; +import com.linkedin.schema.EditableSchemaFieldInfoArray; +import com.linkedin.schema.EditableSchemaMetadata; +import com.linkedin.schema.SchemaField; +import com.linkedin.schema.SchemaFieldArray; +import com.linkedin.schema.SchemaMetadata; +import com.linkedin.util.Pair; +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Setter +@Getter +@Accessors(chain = true) +public class FieldPathMutator extends MutationHook { + @Nonnull private AspectPluginConfig config; + + @Override + protected Stream> writeMutation( + @Nonnull Collection changeMCPS, @Nonnull RetrieverContext retrieverContext) { + + List> results = new LinkedList<>(); + + for (ChangeMCP item : changeMCPS) { + if (changeTypeFilter(item) && aspectFilter(item)) { + if (item.getAspectName().equals(SCHEMA_METADATA_ASPECT_NAME)) { + results.add(Pair.of(item, processSchemaMetadataAspect(item))); + } else { + results.add(Pair.of(item, processEditableSchemaMetadataAspect(item))); + } + } else { + // no op + results.add(Pair.of(item, false)); + } + } + + return results.stream(); + } + + /* + TODO: After some time, this should no longer be required. Assuming at least 1 write has + occurred for all schema aspects. + */ + @Override + protected Stream> readMutation( + @Nonnull Collection items, @Nonnull RetrieverContext retrieverContext) { + List> results = new LinkedList<>(); + + for (ReadItem item : items) { + if (aspectFilter(item)) { + if (item.getAspectName().equals(SCHEMA_METADATA_ASPECT_NAME)) { + results.add(Pair.of(item, processSchemaMetadataAspect(item))); + } else { + results.add(Pair.of(item, processEditableSchemaMetadataAspect(item))); + } + } else { + // no op + results.add(Pair.of(item, false)); + } + } + + return results.stream(); + } + + private static boolean changeTypeFilter(BatchItem item) { + return !ChangeType.DELETE.equals(item.getChangeType()) + && !ChangeType.PATCH.equals(item.getChangeType()); + } + + private static boolean aspectFilter(ReadItem item) { + return item.getAspectName().equals(SCHEMA_METADATA_ASPECT_NAME) + || item.getAspectName().equals(EDITABLE_SCHEMA_METADATA_ASPECT_NAME); + } + + private static boolean processEditableSchemaMetadataAspect(ReadItem item) { + boolean mutated = false; + final EditableSchemaMetadata schemaMetadata = item.getAspect(EditableSchemaMetadata.class); + EditableSchemaFieldInfoArray fields = schemaMetadata.getEditableSchemaFieldInfo(); + List replaceFields = + deduplicateFieldPaths(fields, EditableSchemaFieldInfo::getFieldPath); + if (!replaceFields.isEmpty()) { + schemaMetadata.setEditableSchemaFieldInfo(new EditableSchemaFieldInfoArray(replaceFields)); + mutated = true; + } + return mutated; + } + + private static boolean processSchemaMetadataAspect(ReadItem item) { + boolean mutated = false; + final SchemaMetadata schemaMetadata = item.getAspect(SchemaMetadata.class); + SchemaFieldArray fields = schemaMetadata.getFields(); + List replaceFields = deduplicateFieldPaths(fields, SchemaField::getFieldPath); + if (!replaceFields.isEmpty()) { + schemaMetadata.setFields(new SchemaFieldArray(replaceFields)); + mutated = true; + } + return mutated; + } + + private static List deduplicateFieldPaths( + Collection fields, Function fieldPathExtractor) { + + // preserve order + final LinkedHashMap> grouped = + fields.stream() + .collect( + Collectors.groupingBy(fieldPathExtractor, LinkedHashMap::new, Collectors.toList())); + + if (grouped.values().stream().anyMatch(v -> v.size() > 1)) { + log.warn( + "Duplicate field path(s) detected. Dropping duplicates: {}", + grouped.values().stream().filter(v -> v.size() > 1).collect(Collectors.toList())); + // return first + return grouped.values().stream().map(l -> l.get(0)).collect(Collectors.toList()); + } + + return Collections.emptyList(); + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/UpstreamLineagePatchBuilder.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/UpstreamLineagePatchBuilder.java index 40b6de68f7b56a..08182761aeb03f 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/UpstreamLineagePatchBuilder.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/UpstreamLineagePatchBuilder.java @@ -9,8 +9,6 @@ import com.linkedin.common.urn.DatasetUrn; import com.linkedin.common.urn.Urn; import com.linkedin.dataset.DatasetLineageType; -import com.linkedin.dataset.FineGrainedLineageDownstreamType; -import com.linkedin.dataset.FineGrainedLineageUpstreamType; import com.linkedin.metadata.aspect.patch.PatchOperationType; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -57,48 +55,6 @@ public UpstreamLineagePatchBuilder removeUpstream(@Nonnull DatasetUrn datasetUrn return this; } - /** - * Adds a field as a fine grained upstream - * - * @param schemaFieldUrn a schema field to be marked as upstream, format: - * urn:li:schemaField(DATASET_URN, COLUMN NAME) - * @param confidenceScore optional, confidence score for the lineage edge. Defaults to 1.0 for - * full confidence - * @param transformationOperation string operation type that describes the transformation - * operation happening in the lineage edge - * @param type the upstream lineage type, either Field or Field Set - * @return this builder - */ - public UpstreamLineagePatchBuilder addFineGrainedUpstreamField( - @Nonnull Urn schemaFieldUrn, - @Nullable Float confidenceScore, - @Nonnull String transformationOperation, - @Nullable FineGrainedLineageUpstreamType type) { - Float finalConfidenceScore = getConfidenceScoreOrDefault(confidenceScore); - String finalType; - if (type == null) { - // Default to set of fields if not explicitly a single field - finalType = FineGrainedLineageUpstreamType.FIELD_SET.toString(); - } else { - finalType = type.toString(); - } - - pathValues.add( - ImmutableTriple.of( - PatchOperationType.ADD.getValue(), - FINE_GRAINED_PATH_START - + transformationOperation - + "/" - + "upstreamType" - + "/" - + finalType - + "/" - + encodeValueUrn(schemaFieldUrn), - instance.numberNode(finalConfidenceScore))); - - return this; - } - /** * Adds a field as a fine grained upstream * @@ -135,54 +91,13 @@ public UpstreamLineagePatchBuilder addFineGrainedUpstreamField( FINE_GRAINED_PATH_START + transformationOperation + "/" - + downstreamSchemaField + + encodeValueUrn(downstreamSchemaField) + "/" + finalQueryUrn + "/" + encodeValueUrn(upstreamSchemaField), - instance.numberNode(finalConfidenceScore))); - - return this; - } - - /** - * Adds a field as a fine grained downstream - * - * @param schemaFieldUrn a schema field to be marked as downstream, format: - * urn:li:schemaField(DATASET_URN, COLUMN NAME) - * @param confidenceScore optional, confidence score for the lineage edge. Defaults to 1.0 for - * full confidence - * @param transformationOperation string operation type that describes the transformation - * operation happening in the lineage edge - * @param type the downstream lineage type, either Field or Field Set - * @return this builder - */ - public UpstreamLineagePatchBuilder addFineGrainedDownstreamField( - @Nonnull Urn schemaFieldUrn, - @Nullable Float confidenceScore, - @Nonnull String transformationOperation, - @Nullable FineGrainedLineageDownstreamType type) { - Float finalConfidenceScore = getConfidenceScoreOrDefault(confidenceScore); - String finalType; - if (type == null) { - // Default to set of fields if not explicitly a single field - finalType = FineGrainedLineageDownstreamType.FIELD_SET.toString(); - } else { - finalType = type.toString(); - } + fineGrainedLineageNode)); - pathValues.add( - ImmutableTriple.of( - PatchOperationType.ADD.getValue(), - FINE_GRAINED_PATH_START - + transformationOperation - + "/" - + "downstreamType" - + "/" - + finalType - + "/" - + encodeValueUrn(schemaFieldUrn), - instance.numberNode(finalConfidenceScore))); return this; } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableRefFieldSpecExtractor.java b/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableRefFieldSpecExtractor.java index 4f03df973467a9..021713d71e28ba 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableRefFieldSpecExtractor.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableRefFieldSpecExtractor.java @@ -115,6 +115,7 @@ private void extractSearchableRefAnnotation( new SearchableRefAnnotation( pathName, annotation.getFieldType(), + annotation.isQueryByDefault(), annotation.getBoostScore(), annotation.getDepth(), annotation.getRefType(), diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/SearchableRefAnnotation.java b/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/SearchableRefAnnotation.java index e2ea94c84088b2..6f354cef4777f7 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/SearchableRefAnnotation.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/SearchableRefAnnotation.java @@ -32,6 +32,8 @@ public class SearchableRefAnnotation { String fieldName; // Type of the field. Defines how the field is indexed and matched SearchableAnnotation.FieldType fieldType; + // Whether we should match the field for the default search query + boolean queryByDefault; // Boost multiplier to the match score. Matches on fields with higher boost score ranks higher double boostScore; // defines what depth should be explored of reference object @@ -72,20 +74,35 @@ public static SearchableRefAnnotation fromPegasusAnnotationObject( + "Mandatory input field refType defining the Entity Type is not provided", ANNOTATION_NAME, context)); } + final Optional queryByDefault = + AnnotationUtils.getField(map, "queryByDefault", Boolean.class); final Optional depth = AnnotationUtils.getField(map, "depth", Integer.class); final Optional boostScore = AnnotationUtils.getField(map, "boostScore", Double.class); final List fieldNameAliases = getFieldNameAliases(map); final SearchableAnnotation.FieldType resolvedFieldType = getFieldType(fieldType, schemaDataType); + return new SearchableRefAnnotation( fieldName.orElse(schemaFieldName), resolvedFieldType, + getQueryByDefault(queryByDefault, resolvedFieldType), boostScore.orElse(1.0), depth.orElse(2), refType.get(), fieldNameAliases); } + private static Boolean getQueryByDefault( + Optional maybeQueryByDefault, SearchableAnnotation.FieldType fieldType) { + if (!maybeQueryByDefault.isPresent()) { + if (DEFAULT_QUERY_FIELD_TYPES.contains(fieldType)) { + return Boolean.TRUE; + } + return Boolean.FALSE; + } + return maybeQueryByDefault.get(); + } + private static SearchableAnnotation.FieldType getFieldType( Optional maybeFieldType, DataSchema.Type schemaDataType) { if (!maybeFieldType.isPresent()) { diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/FieldPathMutatorTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/FieldPathMutatorTest.java new file mode 100644 index 00000000000000..131d5f9a3d6079 --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/FieldPathMutatorTest.java @@ -0,0 +1,249 @@ +package com.linkedin.metadata.aspect.hooks; + +import static com.linkedin.metadata.Constants.DOMAINS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.EDITABLE_SCHEMA_METADATA_ASPECT_NAME; +import static com.linkedin.metadata.Constants.SCHEMA_METADATA_ASPECT_NAME; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; + +import com.google.common.collect.ImmutableList; +import com.linkedin.common.UrnArray; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.domain.Domains; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.schema.EditableSchemaFieldInfo; +import com.linkedin.schema.EditableSchemaFieldInfoArray; +import com.linkedin.schema.EditableSchemaMetadata; +import com.linkedin.schema.SchemaField; +import com.linkedin.schema.SchemaFieldArray; +import com.linkedin.schema.SchemaFieldDataType; +import com.linkedin.schema.SchemaMetadata; +import com.linkedin.schema.StringType; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; +import com.linkedin.util.Pair; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +public class FieldPathMutatorTest { + + private EntityRegistry entityRegistry; + private RetrieverContext mockRetrieverContext; + private DatasetUrn testDatasetUrn; + private final FieldPathMutator test = + new FieldPathMutator().setConfig(mock(AspectPluginConfig.class)); + + @BeforeTest + public void init() throws URISyntaxException { + testDatasetUrn = + DatasetUrn.createFromUrn( + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,test,PROD)")); + + entityRegistry = new TestEntityRegistry(); + AspectRetriever mockAspectRetriever = mock(AspectRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + GraphRetriever mockGraphRetriever = mock(GraphRetriever.class); + mockRetrieverContext = mock(RetrieverContext.class); + when(mockRetrieverContext.getAspectRetriever()).thenReturn(mockAspectRetriever); + when(mockRetrieverContext.getGraphRetriever()).thenReturn(mockGraphRetriever); + } + + @Test + public void testValidateIncorrectAspect() { + final Domains domains = + new Domains() + .setDomains(new UrnArray(ImmutableList.of(UrnUtils.getUrn("urn:li:domain:123")))); + assertEquals( + test.writeMutation( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(testDatasetUrn) + .entitySpec(entityRegistry.getEntitySpec(testDatasetUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testDatasetUrn.getEntityType()) + .getAspectSpec(DOMAINS_ASPECT_NAME)) + .recordTemplate(domains) + .build()), + mockRetrieverContext) + .filter(Pair::getSecond) + .count(), + 0); + } + + @Test + public void testValidateNonDuplicatedSchemaFieldPath() { + final SchemaMetadata schema = getMockSchemaMetadataAspect(false); + assertEquals( + test.writeMutation( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(testDatasetUrn) + .entitySpec(entityRegistry.getEntitySpec(testDatasetUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testDatasetUrn.getEntityType()) + .getAspectSpec(SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build()), + mockRetrieverContext) + .filter(Pair::getSecond) + .count(), + 0); + } + + @Test + public void testValidateDuplicatedSchemaFieldPath() { + final SchemaMetadata schema = getMockSchemaMetadataAspect(true); + + List> result = + test.writeMutation( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(testDatasetUrn) + .entitySpec(entityRegistry.getEntitySpec(testDatasetUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testDatasetUrn.getEntityType()) + .getAspectSpec(SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build()), + mockRetrieverContext) + .collect(Collectors.toList()); + + assertEquals(result.stream().filter(Pair::getSecond).count(), 1); + assertEquals(result.get(0).getFirst().getAspect(SchemaMetadata.class).getFields().size(), 1); + } + + @Test + public void testValidateDeleteDuplicatedSchemaFieldPath() { + final SchemaMetadata schema = getMockSchemaMetadataAspect(true); + + assertEquals( + test.writeMutation( + Set.of( + TestMCP.builder() + .changeType(ChangeType.DELETE) + .urn(testDatasetUrn) + .entitySpec(entityRegistry.getEntitySpec(testDatasetUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testDatasetUrn.getEntityType()) + .getAspectSpec(SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build()), + mockRetrieverContext) + .filter(Pair::getSecond) + .count(), + 0); + } + + @Test + public void testValidateNonDuplicatedEditableSchemaFieldPath() { + final EditableSchemaMetadata schema = getMockEditableSchemaMetadataAspect(false); + assertEquals( + test.writeMutation( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(testDatasetUrn) + .entitySpec(entityRegistry.getEntitySpec(testDatasetUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testDatasetUrn.getEntityType()) + .getAspectSpec(EDITABLE_SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build()), + mockRetrieverContext) + .filter(Pair::getSecond) + .count(), + 0); + } + + @Test + public void testValidateDuplicatedEditableSchemaFieldPath() { + final EditableSchemaMetadata schema = getMockEditableSchemaMetadataAspect(true); + + List> result = + test.writeMutation( + Set.of( + TestMCP.builder() + .changeType(ChangeType.UPSERT) + .urn(testDatasetUrn) + .entitySpec(entityRegistry.getEntitySpec(testDatasetUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testDatasetUrn.getEntityType()) + .getAspectSpec(EDITABLE_SCHEMA_METADATA_ASPECT_NAME)) + .recordTemplate(schema) + .build()), + mockRetrieverContext) + .collect(Collectors.toList()); + + assertEquals(result.stream().filter(Pair::getSecond).count(), 1); + assertEquals( + result + .get(0) + .getFirst() + .getAspect(EditableSchemaMetadata.class) + .getEditableSchemaFieldInfo() + .size(), + 1); + } + + private SchemaMetadata getMockSchemaMetadataAspect(boolean duplicateFields) { + List fields = new ArrayList<>(); + fields.add( + new SchemaField() + .setType( + new SchemaFieldDataType() + .setType(SchemaFieldDataType.Type.create(new StringType()))) + .setNullable(false) + .setNativeDataType("string") + .setFieldPath("test")); + + if (duplicateFields) { + fields.add( + new SchemaField() + .setType( + new SchemaFieldDataType() + .setType(SchemaFieldDataType.Type.create(new StringType()))) + .setNullable(false) + .setNativeDataType("string") + .setFieldPath("test")); + } + + return new SchemaMetadata() + .setPlatform(testDatasetUrn.getPlatformEntity()) + .setFields(new SchemaFieldArray(fields)); + } + + private EditableSchemaMetadata getMockEditableSchemaMetadataAspect(boolean duplicateFields) { + + List fields = new ArrayList<>(); + fields.add(new EditableSchemaFieldInfo().setFieldPath("test")); + + if (duplicateFields) { + fields.add(new EditableSchemaFieldInfo().setFieldPath("test")); + } + + return new EditableSchemaMetadata() + .setEditableSchemaFieldInfo(new EditableSchemaFieldInfoArray(fields)); + } +} diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 66ed48a428a216..79ae0fbeacd940 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -285,6 +285,7 @@ public class Constants { public static final String ASSERTION_INFO_ASPECT_NAME = "assertionInfo"; public static final String ASSERTION_RUN_EVENT_ASPECT_NAME = "assertionRunEvent"; public static final String ASSERTION_RUN_EVENT_STATUS_COMPLETE = "COMPLETE"; + public static final String ASSERTION_ACTIONS_ASPECT_NAME = "assertionActions"; // Tests public static final String TEST_ENTITY_NAME = "test"; diff --git a/metadata-dao-impl/kafka-producer/build.gradle b/metadata-dao-impl/kafka-producer/build.gradle index bc3415b2ccc8c1..2df15309810dba 100644 --- a/metadata-dao-impl/kafka-producer/build.gradle +++ b/metadata-dao-impl/kafka-producer/build.gradle @@ -18,6 +18,7 @@ dependencies { annotationProcessor externalDependency.lombok testImplementation externalDependency.mockito + testImplementation externalDependency.testng constraints { implementation(externalDependency.log4jCore) { diff --git a/metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/producer/KafkaProducerThrottle.java b/metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/producer/KafkaProducerThrottle.java new file mode 100644 index 00000000000000..8fbb34b1eacd6f --- /dev/null +++ b/metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/producer/KafkaProducerThrottle.java @@ -0,0 +1,246 @@ +package com.datahub.metadata.dao.producer; + +import com.codahale.metrics.Gauge; +import com.google.common.annotations.VisibleForTesting; +import com.linkedin.metadata.config.MetadataChangeProposalConfig; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.utils.metrics.MetricUtils; +import com.linkedin.util.Pair; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Builder; +import lombok.extern.slf4j.Slf4j; +import org.apache.kafka.clients.admin.Admin; +import org.apache.kafka.clients.admin.OffsetSpec; +import org.apache.kafka.clients.consumer.OffsetAndMetadata; +import org.apache.kafka.common.TopicPartition; +import org.springframework.util.backoff.BackOffExecution; +import org.springframework.util.backoff.ExponentialBackOff; + +@Slf4j +@Builder(toBuilder = true) +public class KafkaProducerThrottle { + @Nonnull private final EntityRegistry entityRegistry; + @Nonnull private final Admin kafkaAdmin; + @Nonnull private final MetadataChangeProposalConfig.ThrottlesConfig config; + @Nonnull private final String mclConsumerGroupId; + @Nonnull private final String versionedTopicName; + @Nonnull private final String timeseriesTopicName; + @Nonnull private final Consumer pauseConsumer; + + private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1); + private final Map medianLag = new ConcurrentHashMap<>(); + private final Map backoffMap = new ConcurrentHashMap<>(); + + /** Update lag information at a given rate */ + public KafkaProducerThrottle start() { + if ((config.getVersioned().isEnabled() || config.getTimeseries().isEnabled()) + && config.getUpdateIntervalMs() > 0) { + scheduler.scheduleAtFixedRate( + () -> { + refresh(); + try { + throttle(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }, + config.getUpdateIntervalMs(), + config.getUpdateIntervalMs(), + TimeUnit.MILLISECONDS); + } + return this; + } + + @VisibleForTesting + public void refresh() { + medianLag.putAll(getMedianLag()); + log.info("MCL medianLag: {}", medianLag); + } + + @VisibleForTesting + public void stop() { + scheduler.shutdown(); + } + + /** + * Get copy of the lag info + * + * @return median lag per mcl topic + */ + @VisibleForTesting + public Map getLag() { + return medianLag.entrySet().stream() + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + + @VisibleForTesting + public boolean isThrottled(MclType mclType) { + if (getThrottleConfig(mclType).isEnabled() && medianLag.containsKey(mclType)) { + return medianLag.get(mclType) > getThrottleConfig(mclType).getThreshold(); + } + return false; + } + + @VisibleForTesting + public long computeNextBackOff(MclType mclType) { + if (isThrottled(mclType)) { + BackOffExecution backOffExecution = + backoffMap.computeIfAbsent( + mclType, + k -> { + MetadataChangeProposalConfig.ThrottleConfig throttleConfig = + getThrottleConfig(mclType); + ExponentialBackOff backoff = + new ExponentialBackOff( + throttleConfig.getInitialIntervalMs(), throttleConfig.getMultiplier()); + backoff.setMaxAttempts(throttleConfig.getMaxAttempts()); + backoff.setMaxInterval(throttleConfig.getMaxIntervalMs()); + return backoff.start(); + }); + return backOffExecution.nextBackOff(); + } + return 0; + } + + @VisibleForTesting + public void throttle() throws InterruptedException { + for (MclType mclType : MclType.values()) { + if (isThrottled(mclType)) { + long backoffWaitMs = computeNextBackOff(mclType); + + if (backoffWaitMs > 0) { + log.warn( + "Throttled producer Topic: {} Duration: {} ms MedianLag: {}", + getTopicName(mclType), + backoffWaitMs, + medianLag.get(mclType)); + MetricUtils.gauge( + this.getClass(), + String.format("%s_throttled", getTopicName(mclType)), + () -> (Gauge) () -> 1); + MetricUtils.counter( + this.getClass(), String.format("%s_throttledCount", getTopicName(mclType))) + .inc(); + + log.info("Pausing MCE consumer for {} ms.", backoffWaitMs); + pauseConsumer.accept(true); + Thread.sleep(backoffWaitMs); + log.info("Resuming MCE consumer."); + pauseConsumer.accept(false); + + // if throttled for one topic, skip remaining + return; + } else { + // no throttle or exceeded configuration limits + log.info("MCE consumer throttle exponential backoff reset."); + backoffMap.remove(mclType); + MetricUtils.gauge( + this.getClass(), + String.format("%s_throttled", getTopicName(mclType)), + () -> (Gauge) () -> 0); + } + } else { + // not throttled, remove backoff tracking + log.info("MCE consumer throttle exponential backoff reset."); + backoffMap.remove(mclType); + MetricUtils.gauge( + this.getClass(), + String.format("%s_throttled", getTopicName(mclType)), + () -> (Gauge) () -> 0); + } + } + } + + private Map getMedianLag() { + try { + Map mclConsumerOffsets = + kafkaAdmin + .listConsumerGroupOffsets(mclConsumerGroupId) + .partitionsToOffsetAndMetadata() + .get() + .entrySet() + .stream() + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + Map latestOffsetRequest = + mclConsumerOffsets.keySet().stream() + .map(offsetAndMetadata -> Map.entry(offsetAndMetadata, OffsetSpec.latest())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + Map endOffsetValues = + kafkaAdmin.listOffsets(latestOffsetRequest).all().get().entrySet().stream() + .map(entry -> Map.entry(entry.getKey(), entry.getValue().offset())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + return Stream.of( + Pair.of(MclType.VERSIONED, versionedTopicName), + Pair.of(MclType.TIMESERIES, timeseriesTopicName)) + .map( + topic -> { + MclType mclType = topic.getFirst(); + String topicName = topic.getSecond(); + + Map topicOffsets = + mclConsumerOffsets.entrySet().stream() + .filter(entry -> entry.getKey().topic().equals(topicName)) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + List offsetValues = + topicOffsets.values().stream() + .map(OffsetAndMetadata::offset) + .map(Long::doubleValue) + .collect(Collectors.toList()); + long offsetMedian = getMedian(offsetValues).longValue(); + + List topicEndOffsetValues = + topicOffsets.keySet().stream() + .map(topicPart -> endOffsetValues.getOrDefault(topicPart, 0L)) + .map(Long::doubleValue) + .collect(Collectors.toList()); + long endOffsetMedian = getMedian(topicEndOffsetValues).longValue(); + return Map.entry(mclType, Math.max(0, endOffsetMedian - offsetMedian)); + }) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } catch (ExecutionException | InterruptedException e) { + log.error("Error fetching consumer group offsets.", e); + return Map.of(MclType.VERSIONED, 0L, MclType.TIMESERIES, 0L); + } + } + + private MetadataChangeProposalConfig.ThrottleConfig getThrottleConfig(MclType mclType) { + MetadataChangeProposalConfig.ThrottleConfig throttleConfig; + switch (mclType) { + case VERSIONED -> throttleConfig = config.getVersioned(); + case TIMESERIES -> throttleConfig = config.getTimeseries(); + default -> throw new IllegalStateException(); + } + return throttleConfig; + } + + private String getTopicName(MclType mclType) { + return MclType.TIMESERIES.equals(mclType) ? timeseriesTopicName : versionedTopicName; + } + + private static Double getMedian(Collection listValues) { + double[] values = listValues.stream().mapToDouble(d -> d).sorted().toArray(); + double median; + if (values.length % 2 == 0) + median = (values[values.length / 2] + values[values.length / 2 - 1]) / 2; + else median = values[values.length / 2]; + return median; + } + + public enum MclType { + TIMESERIES, + VERSIONED + } +} diff --git a/metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/producer/KafkaProducerThrottleTest.java b/metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/producer/KafkaProducerThrottleTest.java new file mode 100644 index 00000000000000..ce6104ee2ca7dc --- /dev/null +++ b/metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/producer/KafkaProducerThrottleTest.java @@ -0,0 +1,363 @@ +package com.datahub.metadata.dao.producer; + +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoInteractions; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import com.linkedin.metadata.config.MetadataChangeProposalConfig; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.mxe.Topics; +import com.linkedin.util.Pair; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.ListConsumerGroupOffsetsResult; +import org.apache.kafka.clients.admin.ListOffsetsResult; +import org.apache.kafka.clients.consumer.OffsetAndMetadata; +import org.apache.kafka.common.KafkaFuture; +import org.apache.kafka.common.TopicPartition; +import org.testng.annotations.Test; + +public class KafkaProducerThrottleTest { + private static final List STANDARD_TOPICS = + List.of(Topics.METADATA_CHANGE_LOG_VERSIONED, Topics.METADATA_CHANGE_LOG_TIMESERIES); + private static final String STANDARD_MCL_CONSUMER_GROUP_ID = "generic-mae-consumer-job-client"; + + @Test + public void testLagCalculation() throws ExecutionException, InterruptedException { + // 3 partitions + // Consumer offsets: 1, 2, 3 + // End offsets: 2, 4, 6 + // Lag: 1, 2, 3 + // MedianLag: 2 + AdminClient mockAdmin = + mockKafka( + generateLag( + STANDARD_TOPICS, + topicPart -> (long) topicPart.partition() + 1, + topicPart -> ((long) topicPart.partition() + 1) * 2, + 3)); + + KafkaProducerThrottle test = + KafkaProducerThrottle.builder() + .config(noSchedulerConfig().getThrottle()) + .kafkaAdmin(mockAdmin) + .versionedTopicName(STANDARD_TOPICS.get(0)) + .timeseriesTopicName(STANDARD_TOPICS.get(1)) + .entityRegistry(mock(EntityRegistry.class)) + .mclConsumerGroupId(STANDARD_MCL_CONSUMER_GROUP_ID) + .pauseConsumer(mock(Consumer.class)) + .build(); + + // Refresh calculations + test.refresh(); + + assertEquals( + test.getLag(), + Map.of( + KafkaProducerThrottle.MclType.VERSIONED, 2L, + KafkaProducerThrottle.MclType.TIMESERIES, 2L)); + } + + @Test + public void testThrottle() throws ExecutionException, InterruptedException { + MetadataChangeProposalConfig.ThrottlesConfig noThrottleConfig = + noSchedulerConfig().getThrottle(); + noThrottleConfig + .getVersioned() + .setThreshold(10) + .setInitialIntervalMs(1) + .setMultiplier(1) + .setMaxAttempts(1) + .setMaxIntervalMs(1); + + MetadataChangeProposalConfig.ThrottlesConfig throttleConfig = noSchedulerConfig().getThrottle(); + throttleConfig + .getVersioned() + .setThreshold(1) + .setInitialIntervalMs(1) + .setMultiplier(1) + .setMaxAttempts(1) + .setMaxIntervalMs(1); + + // 3 partitions + // Consumer offsets: 1, 2, 3 + // End offsets: 2, 4, 6 + // Lag: 1, 2, 3 + // MedianLag: 2 + AdminClient mockAdmin = + mockKafka( + generateLag( + STANDARD_TOPICS, + topicPart -> (long) topicPart.partition() + 1, + topicPart -> ((long) topicPart.partition() + 1) * 2, + 3)); + + Consumer pauseFunction = mock(Consumer.class); + + KafkaProducerThrottle test = + KafkaProducerThrottle.builder() + .config(noThrottleConfig) + .kafkaAdmin(mockAdmin) + .versionedTopicName(STANDARD_TOPICS.get(0)) + .timeseriesTopicName(STANDARD_TOPICS.get(1)) + .entityRegistry(mock(EntityRegistry.class)) + .mclConsumerGroupId(STANDARD_MCL_CONSUMER_GROUP_ID) + .pauseConsumer(pauseFunction) + .build(); + + // Refresh calculations + test.refresh(); + assertEquals( + test.getLag(), + Map.of( + KafkaProducerThrottle.MclType.VERSIONED, 2L, + KafkaProducerThrottle.MclType.TIMESERIES, 2L)); + assertFalse( + test.isThrottled(KafkaProducerThrottle.MclType.VERSIONED), + "Expected not throttling, lag is below threshold"); + assertFalse(test.isThrottled(KafkaProducerThrottle.MclType.TIMESERIES)); + test.throttle(); + verifyNoInteractions(pauseFunction); + reset(pauseFunction); + + KafkaProducerThrottle test2 = test.toBuilder().config(throttleConfig).build(); + // Refresh calculations + test2.refresh(); + assertEquals( + test2.getLag(), + Map.of( + KafkaProducerThrottle.MclType.VERSIONED, 2L, + KafkaProducerThrottle.MclType.TIMESERIES, 2L)); + assertTrue( + test2.isThrottled(KafkaProducerThrottle.MclType.VERSIONED), + "Expected throttling, lag is above threshold."); + assertFalse( + test2.isThrottled(KafkaProducerThrottle.MclType.TIMESERIES), + "Expected not throttling. Timeseries is disabled"); + test2.throttle(); + + // verify 1ms pause and resume + verify(pauseFunction).accept(eq(true)); + verify(pauseFunction).accept(eq(false)); + verifyNoMoreInteractions(pauseFunction); + } + + @Test + public void testBackOff() throws ExecutionException, InterruptedException { + MetadataChangeProposalConfig.ThrottlesConfig throttleConfig = noSchedulerConfig().getThrottle(); + throttleConfig + .getVersioned() + .setThreshold(1) + .setInitialIntervalMs(1) + .setMultiplier(2) + .setMaxAttempts(5) + .setMaxIntervalMs(8); + + // 3 partitions + // Consumer offsets: 1, 2, 3 + // End offsets: 2, 4, 6 + // Lag: 1, 2, 3 + // MedianLag: 2 + AdminClient mockAdmin = + mockKafka( + generateLag( + STANDARD_TOPICS, + topicPart -> (long) topicPart.partition() + 1, + topicPart -> ((long) topicPart.partition() + 1) * 2, + 3)); + + KafkaProducerThrottle test = + KafkaProducerThrottle.builder() + .config(throttleConfig) + .kafkaAdmin(mockAdmin) + .versionedTopicName(STANDARD_TOPICS.get(0)) + .timeseriesTopicName(STANDARD_TOPICS.get(1)) + .entityRegistry(mock(EntityRegistry.class)) + .mclConsumerGroupId(STANDARD_MCL_CONSUMER_GROUP_ID) + .pauseConsumer(mock(Consumer.class)) + .build(); + + // Refresh calculations + test.refresh(); + assertEquals( + test.getLag(), + Map.of( + KafkaProducerThrottle.MclType.VERSIONED, 2L, + KafkaProducerThrottle.MclType.TIMESERIES, 2L)); + assertTrue( + test.isThrottled(KafkaProducerThrottle.MclType.VERSIONED), + "Expected throttling, lag is above threshold."); + assertFalse( + test.isThrottled(KafkaProducerThrottle.MclType.TIMESERIES), + "Expected no throttling. Timeseries is disabled"); + + assertEquals( + test.computeNextBackOff(KafkaProducerThrottle.MclType.TIMESERIES), + 0L, + "Expected no backoff. Timeseries is disabled."); + + assertEquals( + test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), 1L, "Expected initial 1"); + assertEquals( + test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), + 2L, + "Expected second 2^1"); + assertEquals( + test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), 4L, "Expected third 2^2"); + assertEquals( + test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), + 8L, + "Expected fourth 2^3"); + assertEquals( + test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), + 8L, + "Expected fifth max interval at 8"); + assertEquals( + test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), + -1L, + "Expected max attempts"); + } + + @Test + public void testScheduler() throws ExecutionException, InterruptedException { + MetadataChangeProposalConfig config = new MetadataChangeProposalConfig(); + MetadataChangeProposalConfig.ThrottlesConfig throttlesConfig = + new MetadataChangeProposalConfig.ThrottlesConfig() + .setUpdateIntervalMs(10); // configure fast update for test + throttlesConfig.setVersioned( + new MetadataChangeProposalConfig.ThrottleConfig() + .setEnabled(true) // enable 1 throttle config to activate + ); + throttlesConfig.setTimeseries( + new MetadataChangeProposalConfig.ThrottleConfig().setEnabled(false)); + config.setThrottle(throttlesConfig); + + // 1 lag, 1 partition + AdminClient mockAdmin = + mockKafka(generateLag(STANDARD_TOPICS, topicPart -> 1L, topicPart -> 2L, 1)); + + KafkaProducerThrottle test = + KafkaProducerThrottle.builder() + .config(throttlesConfig) + .kafkaAdmin(mockAdmin) + .versionedTopicName(STANDARD_TOPICS.get(0)) + .timeseriesTopicName(STANDARD_TOPICS.get(1)) + .entityRegistry(mock(EntityRegistry.class)) + .mclConsumerGroupId(STANDARD_MCL_CONSUMER_GROUP_ID) + .pauseConsumer(mock(Consumer.class)) + .build(); + + try { + test.start(); + Thread.sleep(50); + assertEquals( + test.getLag(), + Map.of( + KafkaProducerThrottle.MclType.VERSIONED, 1L, + KafkaProducerThrottle.MclType.TIMESERIES, 1L), + "Expected lag updated"); + } finally { + test.stop(); + } + } + + private static MetadataChangeProposalConfig noSchedulerConfig() { + MetadataChangeProposalConfig config = new MetadataChangeProposalConfig(); + MetadataChangeProposalConfig.ThrottlesConfig throttlesConfig = + new MetadataChangeProposalConfig.ThrottlesConfig() + .setUpdateIntervalMs(0); // no scheduler, manual update + throttlesConfig.setVersioned( + new MetadataChangeProposalConfig.ThrottleConfig() + .setEnabled(true) // enable 1 throttle config to activate + ); + throttlesConfig.setTimeseries( + new MetadataChangeProposalConfig.ThrottleConfig().setEnabled(false)); + config.setThrottle(throttlesConfig); + return config; + } + + private static Pair, Map> + generateLag( + Collection topicNames, + Function consumerOffset, + Function endOffset, + int partitions) { + + Set topicPartitions = + topicNames.stream() + .flatMap( + topicName -> + IntStream.range(0, partitions) + .mapToObj(partitionNum -> new TopicPartition(topicName, partitionNum))) + .collect(Collectors.toSet()); + + Map consumerOffsetMap = + topicPartitions.stream() + .map( + topicPartition -> + Map.entry( + topicPartition, + new OffsetAndMetadata(consumerOffset.apply(topicPartition)))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + Map endOffsetMap = + topicPartitions.stream() + .map(topicPartition -> Map.entry(topicPartition, endOffset.apply(topicPartition))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + return Pair.of(consumerOffsetMap, endOffsetMap); + } + + private static AdminClient mockKafka( + Pair, Map> offsetPair) + throws ExecutionException, InterruptedException { + + AdminClient mockKafkaAdmin = mock(AdminClient.class); + + // consumer offsets + ListConsumerGroupOffsetsResult mockConsumerOffsetsResult = + mock(ListConsumerGroupOffsetsResult.class); + KafkaFuture> mockConsumerFuture = + mock(KafkaFuture.class); + when(mockConsumerOffsetsResult.partitionsToOffsetAndMetadata()).thenReturn(mockConsumerFuture); + when(mockConsumerFuture.get()).thenReturn(offsetPair.getFirst()); + when(mockKafkaAdmin.listConsumerGroupOffsets(anyString())) + .thenReturn(mockConsumerOffsetsResult); + + // end offsets + ListOffsetsResult mockOffsetsResult = mock(ListOffsetsResult.class); + KafkaFuture> mockOffsetFuture = + mock(KafkaFuture.class); + Map resultMap = + offsetPair.getSecond().entrySet().stream() + .map( + entry -> { + ListOffsetsResult.ListOffsetsResultInfo mockInfo = + mock(ListOffsetsResult.ListOffsetsResultInfo.class); + when(mockInfo.offset()).thenReturn(entry.getValue()); + return Map.entry(entry.getKey(), mockInfo); + }) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + when(mockOffsetFuture.get()).thenReturn(resultMap); + when(mockOffsetsResult.all()).thenReturn(mockOffsetFuture); + when(mockKafkaAdmin.listOffsets(anyMap())).thenReturn(mockOffsetsResult); + + return mockKafkaAdmin; + } +} diff --git a/metadata-ingestion/examples/library/assertions_configuration.yml b/metadata-ingestion/examples/library/assertions_configuration.yml new file mode 100644 index 00000000000000..a44945a30f9a37 --- /dev/null +++ b/metadata-ingestion/examples/library/assertions_configuration.yml @@ -0,0 +1,76 @@ +version: 1 +namespace: test-config-id-1 +assertions: + # Freshness Assertion + - entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.test_assertions_all_times,PROD) + type: freshness + lookback_interval: "1 hour" + last_modified_field: col_timestamp + schedule: + type: cron + cron: 0 * * * * + meta: + entity_qualified_name: TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES + entity_schema: + - col: col_date + native_type: DATE + # Volume Assertion + - type: volume + entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.test_assertions_all_times,PROD) + metric: row_count + condition: + type: less_than_or_equal_to + value: 1000 + schedule: + type: cron + cron: 0 * * * * + meta: + entity_qualified_name: TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES + entity_schema: + - col: col_date + native_type: DATE + # Field Metric Assertion + - type: field + entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.test_assertions_all_times,PROD) + field: col_date + metric: null_count + condition: + type: equal_to + value: 0 + schedule: + type: cron + cron: 0 * * * * + meta: + entity_qualified_name: TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES + entity_schema: + - col: col_date + native_type: DATE + # Field Value Assertion + - type: field + entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.purchase_event,PROD) + field: quantity + condition: + type: between + min: 0 + max: 10 + schedule: + type: on_table_change + meta: + entity_qualified_name: TEST_DB.PUBLIC.PURCHASE_EVENT + entity_schema: + - col: quantity + native_type: FLOAT + # Custom SQL Metric Assertion + - type: sql + entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.purchase_event,PROD) + statement: select mode(quantity) from test_db.public.purchase_event + condition: + type: equal_to + value: 5 + schedule: + type: on_table_change + meta: + entity_qualified_name: TEST_DB.PUBLIC.PURCHASE_EVENT + entity_schema: + - col: quantity + native_type: FLOAT diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/__init__.py b/metadata-ingestion/src/datahub/api/entities/assertion/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/assertion.py b/metadata-ingestion/src/datahub/api/entities/assertion/assertion.py new file mode 100644 index 00000000000000..e0975a1c0351c7 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/assertion/assertion.py @@ -0,0 +1,57 @@ +from abc import abstractmethod +from typing import Optional + +from datahub.api.entities.assertion.assertion_trigger import AssertionTrigger +from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel, v1_Field +from datahub.metadata.com.linkedin.pegasus2avro.assertion import AssertionInfo + + +class BaseAssertionProtocol(v1_ConfigModel): + @abstractmethod + def get_id(self) -> str: + pass + + @abstractmethod + def get_assertion_info_aspect( + self, + ) -> AssertionInfo: + pass + + @abstractmethod + def get_assertion_trigger( + self, + ) -> Optional[AssertionTrigger]: + pass + + +class BaseAssertion(v1_ConfigModel): + id_raw: Optional[str] = v1_Field( + default=None, + description="The raw id of the assertion." + "If provided, this is used when creating identifier for this assertion" + "along with assertion type and entity.", + ) + + id: Optional[str] = v1_Field( + default=None, + description="The id of the assertion." + "If provided, this is used as identifier for this assertion." + "If provided, no other assertion fields are considered to create identifier.", + ) + + description: Optional[str] = None + + # Can contain metadata extracted from datahub. e.g. + # - entity qualified name + # - entity schema + meta: Optional[dict] = None + + +class BaseEntityAssertion(BaseAssertion): + entity: str = v1_Field( + description="The entity urn that the assertion is associated with" + ) + + trigger: Optional[AssertionTrigger] = v1_Field( + description="The trigger schedule for assertion", alias="schedule" + ) diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/assertion_config_spec.py b/metadata-ingestion/src/datahub/api/entities/assertion/assertion_config_spec.py new file mode 100644 index 00000000000000..08205cc621253f --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/assertion/assertion_config_spec.py @@ -0,0 +1,41 @@ +from typing import List, Optional + +from ruamel.yaml import YAML +from typing_extensions import Literal + +from datahub.api.entities.assertion.datahub_assertion import DataHubAssertion +from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel, v1_Field + + +class AssertionsConfigSpec(v1_ConfigModel): + """ + Declarative configuration specification for datahub assertions. + + This model is used as a simpler, Python-native representation to define assertions. + It can be easily parsed from a equivalent YAML file. + + Currently, this is converted into series of assertion MCPs that can be emitted to DataHub. + In future, this would invoke datahub GraphQL API to upsert assertions. + """ + + version: Literal[1] + + id: Optional[str] = v1_Field( + default=None, + alias="namespace", + description="Unique identifier of assertions configuration file", + ) + + assertions: List[DataHubAssertion] + + @classmethod + def from_yaml( + cls, + file: str, + ) -> "AssertionsConfigSpec": + with open(file) as fp: + yaml = YAML(typ="rt") # default, if not specfied, is 'rt' (round-trip) + orig_dictionary = yaml.load(fp) + parsed_spec = AssertionsConfigSpec.parse_obj(orig_dictionary) + # parsed_spec._original_yaml_dict = orig_dictionary + return parsed_spec diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/assertion_operator.py b/metadata-ingestion/src/datahub/api/entities/assertion/assertion_operator.py new file mode 100644 index 00000000000000..8704ed13cb6c30 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/assertion/assertion_operator.py @@ -0,0 +1,304 @@ +import json +from typing import List, Optional, Union + +from typing_extensions import Literal, Protocol + +from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel +from datahub.metadata.schema_classes import ( + AssertionStdOperatorClass, + AssertionStdParameterClass, + AssertionStdParametersClass, + AssertionStdParameterTypeClass, +) + + +class Operator(Protocol): + """Specification for an assertion operator. + + This class exists only for documentation (not used in typing checking). + """ + + operator: str + + def id(self) -> str: + ... + + def generate_parameters(self) -> AssertionStdParametersClass: + ... + + +def _generate_assertion_std_parameter( + value: Union[str, int, float, list] +) -> AssertionStdParameterClass: + if isinstance(value, str): + return AssertionStdParameterClass( + value=value, type=AssertionStdParameterTypeClass.STRING + ) + elif isinstance(value, (int, float)): + return AssertionStdParameterClass( + value=str(value), type=AssertionStdParameterTypeClass.NUMBER + ) + elif isinstance(value, list): + return AssertionStdParameterClass( + value=json.dumps(value), type=AssertionStdParameterTypeClass.LIST + ) + else: + raise ValueError( + f"Unsupported assertion parameter {value} of type {type(value)}" + ) + + +Param = Union[str, int, float, List[Union[str, float, int]]] + + +def _generate_assertion_std_parameters( + value: Optional[Param] = None, + min_value: Optional[Param] = None, + max_value: Optional[Param] = None, +) -> AssertionStdParametersClass: + return AssertionStdParametersClass( + value=_generate_assertion_std_parameter(value) if value else None, + minValue=_generate_assertion_std_parameter(min_value) if min_value else None, + maxValue=_generate_assertion_std_parameter(max_value) if max_value else None, + ) + + +class EqualToOperator(v1_ConfigModel): + type: Literal["equal_to"] + value: Union[str, int, float] + + operator: str = AssertionStdOperatorClass.EQUAL_TO + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class NotEqualToOperator(v1_ConfigModel): + type: Literal["not_equal_to"] + value: Union[str, int, float] + + operator: str = AssertionStdOperatorClass.NOT_EQUAL_TO + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class BetweenOperator(v1_ConfigModel): + type: Literal["between"] + min: Union[int, float] + max: Union[int, float] + + operator: str = AssertionStdOperatorClass.BETWEEN + + def id(self) -> str: + return f"{self.type}-{self.min}-{self.max}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters( + min_value=self.min, max_value=self.max + ) + + +class LessThanOperator(v1_ConfigModel): + type: Literal["less_than"] + value: Union[int, float] + + operator: str = AssertionStdOperatorClass.LESS_THAN + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class GreaterThanOperator(v1_ConfigModel): + type: Literal["greater_than"] + value: Union[int, float] + + operator: str = AssertionStdOperatorClass.GREATER_THAN + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class LessThanOrEqualToOperator(v1_ConfigModel): + type: Literal["less_than_or_equal_to"] + value: Union[int, float] + + operator: str = AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class GreaterThanOrEqualToOperator(v1_ConfigModel): + type: Literal["greater_than_or_equal_to"] + value: Union[int, float] + + operator: str = AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class InOperator(v1_ConfigModel): + type: Literal["in"] + value: List[Union[str, float, int]] + + operator: str = AssertionStdOperatorClass.IN + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class NotInOperator(v1_ConfigModel): + type: Literal["not_in"] + value: List[Union[str, float, int]] + + operator: str = AssertionStdOperatorClass.NOT_IN + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class IsNullOperator(v1_ConfigModel): + type: Literal["is_null"] + + operator: str = AssertionStdOperatorClass.NULL + + def id(self) -> str: + return f"{self.type}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters() + + +class NotNullOperator(v1_ConfigModel): + type: Literal["is_not_null"] + + operator: str = AssertionStdOperatorClass.NOT_NULL + + def id(self) -> str: + return f"{self.type}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters() + + +class IsTrueOperator(v1_ConfigModel): + type: Literal["is_true"] + + operator: str = AssertionStdOperatorClass.IS_TRUE + + def id(self) -> str: + return f"{self.type}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters() + + +class IsFalseOperator(v1_ConfigModel): + type: Literal["is_false"] + + operator: str = AssertionStdOperatorClass.IS_FALSE + + def id(self) -> str: + return f"{self.type}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters() + + +class ContainsOperator(v1_ConfigModel): + type: Literal["contains"] + value: str + + operator: str = AssertionStdOperatorClass.CONTAIN + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class EndsWithOperator(v1_ConfigModel): + type: Literal["ends_with"] + value: str + + operator: str = AssertionStdOperatorClass.END_WITH + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class StartsWithOperator(v1_ConfigModel): + type: Literal["starts_with"] + value: str + + operator: str = AssertionStdOperatorClass.START_WITH + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +class MatchesRegexOperator(v1_ConfigModel): + type: Literal["matches_regex"] + value: str + + operator: str = AssertionStdOperatorClass.REGEX_MATCH + + def id(self) -> str: + return f"{self.type}-{self.value}" + + def generate_parameters(self) -> AssertionStdParametersClass: + return _generate_assertion_std_parameters(value=self.value) + + +Operators = Union[ + InOperator, + NotInOperator, + EqualToOperator, + NotEqualToOperator, + BetweenOperator, + LessThanOperator, + LessThanOrEqualToOperator, + GreaterThanOperator, + GreaterThanOrEqualToOperator, + IsNullOperator, + NotNullOperator, + IsTrueOperator, + IsFalseOperator, + ContainsOperator, + EndsWithOperator, + StartsWithOperator, + MatchesRegexOperator, +] diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/assertion_trigger.py b/metadata-ingestion/src/datahub/api/entities/assertion/assertion_trigger.py new file mode 100644 index 00000000000000..d7809164847447 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/assertion/assertion_trigger.py @@ -0,0 +1,52 @@ +from datetime import timedelta +from typing import Union + +import humanfriendly +from typing_extensions import Literal + +from datahub.configuration.pydantic_migration_helpers import ( + v1_ConfigModel, + v1_Field, + v1_validator, +) + + +class CronTrigger(v1_ConfigModel): + type: Literal["cron"] + cron: str = v1_Field( + description="The cron expression to use. See https://crontab.guru/ for help." + ) + timezone: str = v1_Field( + "UTC", + description="The timezone to use for the cron schedule. Defaults to UTC.", + ) + + +class IntervalTrigger(v1_ConfigModel): + type: Literal["interval"] + interval: timedelta + + @v1_validator("interval", pre=True) + def lookback_interval_to_timedelta(cls, v): + if isinstance(v, str): + seconds = humanfriendly.parse_timespan(v) + return timedelta(seconds=seconds) + raise ValueError("Invalid value.") + + +class EntityChangeTrigger(v1_ConfigModel): + type: Literal["on_table_change"] + + +class ManualTrigger(v1_ConfigModel): + type: Literal["manual"] + + +class AssertionTrigger(v1_ConfigModel): + __root__: Union[ + CronTrigger, IntervalTrigger, EntityChangeTrigger, ManualTrigger + ] = v1_Field(discriminator="type") + + @property + def trigger(self): + return self.__root__ diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/compiler_interface.py b/metadata-ingestion/src/datahub/api/entities/assertion/compiler_interface.py new file mode 100644 index 00000000000000..27b43a58530b1e --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/assertion/compiler_interface.py @@ -0,0 +1,81 @@ +from abc import abstractmethod +from dataclasses import dataclass, field +from enum import Enum +from pathlib import Path +from typing import Dict, List, Literal + +from datahub.api.entities.assertion.assertion_config_spec import AssertionsConfigSpec +from datahub.ingestion.api.report import Report +from datahub.utilities.lossy_collections import LossyDict, LossyList + + +class StrEnum(str, Enum): + pass + + +class CompileResultArtifactType(StrEnum): + SQL_QUERIES = "SQL_QUERIES" + COMPILE_REPORT = "COMPILE_REPORT" + + +@dataclass +class CompileResultArtifact(Report): + name: str + type: CompileResultArtifactType + path: Path + description: str + + +@dataclass +class AssertionCompilationReport(Report): + """Additional details to debug compilation""" + + num_processed: int = 0 + num_compile_succeeded: int = 0 + num_compile_failed: int = 0 # Likely due to assertion not supported in platform + + warnings: LossyDict[str, LossyList[str]] = field(default_factory=LossyDict) + failures: LossyDict[str, LossyList[str]] = field(default_factory=LossyDict) + + artifacts: List[Path] = field(default_factory=list) + + def report_warning(self, key: str, reason: str) -> None: + warnings = self.warnings.get(key, LossyList()) + warnings.append(reason) + self.warnings[key] = warnings + + def report_failure(self, key: str, reason: str) -> None: + failures = self.failures.get(key, LossyList()) + failures.append(reason) + self.failures[key] = failures + + +@dataclass +class AssertionCompilationResult: + """Results of compilation step , along with detailed report object""" + + platform: str + status: Literal["success", "failure"] + + report: AssertionCompilationReport = field( + default_factory=AssertionCompilationReport + ) + + artifacts: List[CompileResultArtifact] = field(default_factory=list) + + def add_artifact(self, artifact: CompileResultArtifact) -> None: + self.artifacts.append(artifact) + self.report.artifacts.append(artifact.path) + + +class AssertionCompiler: + @classmethod + @abstractmethod + def create(cls, output_dir: str, extras: Dict[str, str]) -> "AssertionCompiler": + pass + + @abstractmethod + def compile( + self, assertion_config_spec: AssertionsConfigSpec + ) -> AssertionCompilationResult: + pass diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/datahub_assertion.py b/metadata-ingestion/src/datahub/api/entities/assertion/datahub_assertion.py new file mode 100644 index 00000000000000..ed18b78418d768 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/assertion/datahub_assertion.py @@ -0,0 +1,35 @@ +from typing import Optional, Union + +from datahub.api.entities.assertion.assertion import BaseAssertionProtocol +from datahub.api.entities.assertion.assertion_trigger import AssertionTrigger +from datahub.api.entities.assertion.field_assertion import FieldAssertion +from datahub.api.entities.assertion.freshness_assertion import FreshnessAssertion +from datahub.api.entities.assertion.sql_assertion import SQLAssertion +from datahub.api.entities.assertion.volume_assertion import VolumeAssertion +from datahub.configuration.pydantic_migration_helpers import v1_Field +from datahub.metadata.com.linkedin.pegasus2avro.assertion import AssertionInfo + + +class DataHubAssertion(BaseAssertionProtocol): + __root__: Union[ + FreshnessAssertion, + VolumeAssertion, + SQLAssertion, + FieldAssertion, + # TODO: Add SchemaAssertion + ] = v1_Field(discriminator="type") + + @property + def assertion(self): + return self.__root__.assertion + + def get_assertion_info_aspect( + self, + ) -> AssertionInfo: + return self.__root__.get_assertion_info_aspect() + + def get_id(self) -> str: + return self.__root__.get_id() + + def get_assertion_trigger(self) -> Optional[AssertionTrigger]: + return self.__root__.get_assertion_trigger() diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/field_assertion.py b/metadata-ingestion/src/datahub/api/entities/assertion/field_assertion.py new file mode 100644 index 00000000000000..ae062c3a8e5cbd --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/assertion/field_assertion.py @@ -0,0 +1,158 @@ +from enum import Enum +from typing import Optional, Union + +from typing_extensions import Literal + +from datahub.api.entities.assertion.assertion import ( + BaseAssertionProtocol, + BaseEntityAssertion, +) +from datahub.api.entities.assertion.assertion_operator import Operators +from datahub.api.entities.assertion.assertion_trigger import AssertionTrigger +from datahub.api.entities.assertion.field_metric import FieldMetric +from datahub.api.entities.assertion.filter import DatasetFilter +from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel, v1_Field +from datahub.emitter.mce_builder import datahub_guid +from datahub.metadata.com.linkedin.pegasus2avro.assertion import ( + AssertionInfo, + AssertionType, + FieldAssertionInfo, + FieldAssertionType, +) +from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaFieldSpec +from datahub.metadata.schema_classes import ( + FieldMetricAssertionClass, + FieldTransformClass, + FieldTransformTypeClass, + FieldValuesAssertionClass, + FieldValuesFailThresholdClass, + FieldValuesFailThresholdTypeClass, +) + + +class FieldValuesFailThreshold(v1_ConfigModel): + type: Literal["count", "percentage"] = v1_Field(default="count") + value: int = v1_Field(default=0) + + def to_field_values_failure_threshold(self) -> FieldValuesFailThresholdClass: + return FieldValuesFailThresholdClass( + type=( + FieldValuesFailThresholdTypeClass.COUNT + if self.type == Literal["count"] + else FieldValuesFailThresholdTypeClass.PERCENTAGE + ), + value=self.value, + ) + + +class FieldTransform(Enum): + LENGTH = "length" + + +class FieldValuesAssertion(BaseEntityAssertion): + type: Literal["field"] + field: str + field_transform: Optional[FieldTransform] = v1_Field(default=None) + operator: Operators = v1_Field(discriminator="type", alias="condition") + filters: Optional[DatasetFilter] = v1_Field(default=None) + failure_threshold: FieldValuesFailThreshold = v1_Field( + default=FieldValuesFailThreshold() + ) + exclude_nulls: bool = v1_Field(default=True) + + def get_assertion_info( + self, + ) -> AssertionInfo: + return AssertionInfo( + description=self.description, + type=AssertionType.FIELD, + fieldAssertion=FieldAssertionInfo( + type=FieldAssertionType.FIELD_VALUES, + entity=self.entity, + fieldValuesAssertion=FieldValuesAssertionClass( + field=SchemaFieldSpec( + path=self.field, + type="", # Not required + nativeType="", # Not required + ), + operator=self.operator.operator, + parameters=self.operator.generate_parameters(), + failThreshold=self.failure_threshold.to_field_values_failure_threshold(), + excludeNulls=self.exclude_nulls, + transform=( + FieldTransformClass(type=FieldTransformTypeClass.LENGTH) + if self.field_transform == Literal["length"] + else None + ), + ), + ), + ) + + def get_id(self) -> str: + guid_dict = { + "entity": self.entity, + "type": self.type, + "field": self.field, + "operator": str(self.operator.operator), + "id_raw": self.id_raw, + } + return self.id or datahub_guid(guid_dict) + + +class FieldMetricAssertion(BaseEntityAssertion): + type: Literal["field"] + field: str + operator: Operators = v1_Field(discriminator="type", alias="condition") + metric: FieldMetric + filters: Optional[DatasetFilter] = v1_Field(default=None) + + def get_assertion_info( + self, + ) -> AssertionInfo: + return AssertionInfo( + description=self.description, + type=AssertionType.FIELD, + fieldAssertion=FieldAssertionInfo( + type=FieldAssertionType.FIELD_METRIC, + entity=self.entity, + fieldMetricAssertion=FieldMetricAssertionClass( + field=SchemaFieldSpec( + path=self.field, + type="", # Not required + nativeType="", # Not required + ), + metric=self.metric.name, + operator=self.operator.operator, + parameters=self.operator.generate_parameters(), + ), + ), + ) + + def get_id(self) -> str: + guid_dict = { + "entity": self.entity, + "type": self.type, + "field": self.field, + "metric": self.metric.value, + "id_raw": self.id_raw, + } + return self.id or datahub_guid(guid_dict) + + +class FieldAssertion(BaseAssertionProtocol): + __root__: Union[FieldMetricAssertion, FieldValuesAssertion] + + @property + def assertion(self): + return self.__root__ + + def get_id(self) -> str: + return self.__root__.get_id() + + def get_assertion_info_aspect( + self, + ) -> AssertionInfo: + return self.__root__.get_assertion_info() + + def get_assertion_trigger(self) -> Optional[AssertionTrigger]: + return self.__root__.trigger diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/field_metric.py b/metadata-ingestion/src/datahub/api/entities/assertion/field_metric.py new file mode 100644 index 00000000000000..7a236da2d562d3 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/assertion/field_metric.py @@ -0,0 +1,21 @@ +from enum import Enum + + +class FieldMetric(Enum): + UNIQUE_COUNT = "unique_count" + UNIQUE_PERCENTAGE = "unique_percentage" + NULL_COUNT = "null_count" + NULL_PERCENTAGE = "null_percentage" + MIN = "min" + MAX = "max" + MEAN = "mean" + MEDIAN = "median" + STDDEV = "stddev" + NEGATIVE_COUNT = "negative_count" + NEGATIVE_PERCENTAGE = "negative_percentage" + ZERO_COUNT = "zero_count" + ZERO_PERCENTAGE = "zero_percentage" + MIN_LENGTH = "min_length" + MAX_LENGTH = "max_length" + EMPTY_COUNT = "empty_count" + EMPTY_PERCENTAGE = "empty_percentage" diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/filter.py b/metadata-ingestion/src/datahub/api/entities/assertion/filter.py new file mode 100644 index 00000000000000..05d75b674d6af9 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/assertion/filter.py @@ -0,0 +1,13 @@ +from typing_extensions import Literal + +from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel + + +class SqlFilter(v1_ConfigModel): + type: Literal["sql"] + sql: str + + +DatasetFilter = SqlFilter +# class DatasetFilter(v1_ConfigModel): +# __root__: Union[SqlFilter] = v1_Field(discriminator="type") diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/freshness_assertion.py b/metadata-ingestion/src/datahub/api/entities/assertion/freshness_assertion.py new file mode 100644 index 00000000000000..f9e1df7d68f271 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/assertion/freshness_assertion.py @@ -0,0 +1,124 @@ +from datetime import timedelta +from enum import Enum +from typing import Optional, Union + +import humanfriendly +from typing_extensions import Literal + +from datahub.api.entities.assertion.assertion import ( + BaseAssertionProtocol, + BaseEntityAssertion, +) +from datahub.api.entities.assertion.assertion_trigger import AssertionTrigger +from datahub.api.entities.assertion.filter import DatasetFilter +from datahub.configuration.pydantic_migration_helpers import v1_Field, v1_validator +from datahub.emitter.mce_builder import datahub_guid +from datahub.metadata.com.linkedin.pegasus2avro.assertion import ( + AssertionInfo, + AssertionType, + FixedIntervalSchedule, + FreshnessAssertionInfo, + FreshnessAssertionSchedule, + FreshnessAssertionScheduleType, + FreshnessAssertionType, + FreshnessCronSchedule, +) +from datahub.metadata.com.linkedin.pegasus2avro.timeseries import CalendarInterval + + +class FreshnessSourceType(Enum): + LAST_MODIFIED_COLUMN = "last_modified_column" + + +class CronFreshnessAssertion(BaseEntityAssertion): + type: Literal["freshness"] + freshness_type: Literal["cron"] + cron: str = v1_Field( + description="The cron expression to use. See https://crontab.guru/ for help." + ) + timezone: str = v1_Field( + "UTC", + description="The timezone to use for the cron schedule. Defaults to UTC.", + ) + source_type: FreshnessSourceType = v1_Field( + default=FreshnessSourceType.LAST_MODIFIED_COLUMN + ) + last_modified_field: str + filters: Optional[DatasetFilter] = v1_Field(default=None) + + def get_assertion_info( + self, + ) -> AssertionInfo: + return AssertionInfo( + description=self.description, + type=AssertionType.FRESHNESS, + freshnessAssertion=FreshnessAssertionInfo( + type=FreshnessAssertionType.DATASET_CHANGE, + entity=self.entity, + schedule=FreshnessAssertionSchedule( + type=FreshnessAssertionScheduleType.CRON, + cron=FreshnessCronSchedule(cron=self.cron, timezone=self.timezone), + ), + ), + ) + + +class FixedIntervalFreshnessAssertion(BaseEntityAssertion): + type: Literal["freshness"] + freshness_type: Literal["interval"] = v1_Field(default="interval") + lookback_interval: timedelta + filters: Optional[DatasetFilter] = v1_Field(default=None) + source_type: FreshnessSourceType = v1_Field( + default=FreshnessSourceType.LAST_MODIFIED_COLUMN + ) + last_modified_field: str + + @v1_validator("lookback_interval", pre=True) + def lookback_interval_to_timedelta(cls, v): + if isinstance(v, str): + seconds = humanfriendly.parse_timespan(v) + return timedelta(seconds=seconds) + raise ValueError("Invalid value.") + + def get_assertion_info( + self, + ) -> AssertionInfo: + return AssertionInfo( + description=self.description, + type=AssertionType.FRESHNESS, + freshnessAssertion=FreshnessAssertionInfo( + type=FreshnessAssertionType.DATASET_CHANGE, + entity=self.entity, + schedule=FreshnessAssertionSchedule( + type=FreshnessAssertionScheduleType.FIXED_INTERVAL, + fixedInterval=FixedIntervalSchedule( + unit=CalendarInterval.SECOND, + multiple=self.lookback_interval.seconds, + ), + ), + ), + ) + + +class FreshnessAssertion(BaseAssertionProtocol): + __root__: Union[FixedIntervalFreshnessAssertion, CronFreshnessAssertion] + + @property + def assertion(self): + return self.__root__ + + def get_id(self) -> str: + guid_dict = { + "entity": self.__root__.entity, + "type": self.__root__.type, + "id_raw": self.__root__.id_raw, + } + return self.__root__.id or datahub_guid(guid_dict) + + def get_assertion_info_aspect( + self, + ) -> AssertionInfo: + return self.__root__.get_assertion_info() + + def get_assertion_trigger(self) -> Optional[AssertionTrigger]: + return self.__root__.trigger diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/sql_assertion.py b/metadata-ingestion/src/datahub/api/entities/assertion/sql_assertion.py new file mode 100644 index 00000000000000..3d12cfde428f4e --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/assertion/sql_assertion.py @@ -0,0 +1,91 @@ +from typing import Optional, Union + +from typing_extensions import Literal + +from datahub.api.entities.assertion.assertion import ( + BaseAssertionProtocol, + BaseEntityAssertion, +) +from datahub.api.entities.assertion.assertion_operator import Operators +from datahub.api.entities.assertion.assertion_trigger import AssertionTrigger +from datahub.configuration.pydantic_migration_helpers import v1_Field +from datahub.emitter.mce_builder import datahub_guid +from datahub.metadata.com.linkedin.pegasus2avro.assertion import ( + AssertionInfo, + AssertionType, + AssertionValueChangeType, + SqlAssertionInfo, + SqlAssertionType, +) + + +class SqlMetricAssertion(BaseEntityAssertion): + type: Literal["sql"] + statement: str + operator: Operators = v1_Field(discriminator="type", alias="condition") + + def get_assertion_info( + self, + ) -> AssertionInfo: + return AssertionInfo( + description=self.description, + type=AssertionType.SQL, + sqlAssertion=SqlAssertionInfo( + type=SqlAssertionType.METRIC, + entity=self.entity, + statement=self.statement, + operator=self.operator.operator, + parameters=self.operator.generate_parameters(), + ), + ) + + +class SqlMetricChangeAssertion(BaseEntityAssertion): + type: Literal["sql"] + statement: str + change_type: Literal["absolute", "percentage"] + operator: Operators = v1_Field(discriminator="type", alias="condition") + + def get_assertion_info( + self, + ) -> AssertionInfo: + return AssertionInfo( + description=self.description, + type=AssertionType.SQL, + sqlAssertion=SqlAssertionInfo( + type=SqlAssertionType.METRIC_CHANGE, + entity=self.entity, + statement=self.statement, + changeType=( + AssertionValueChangeType.ABSOLUTE + if self.change_type == Literal["absolute"] + else AssertionValueChangeType.PERCENTAGE + ), + operator=self.operator.operator, + parameters=self.operator.generate_parameters(), + ), + ) + + +class SQLAssertion(BaseAssertionProtocol): + __root__: Union[SqlMetricAssertion, SqlMetricChangeAssertion] = v1_Field() + + @property + def assertion(self): + return self.__root__ + + def get_id(self) -> str: + guid_dict = { + "entity": self.__root__.entity, + "type": self.__root__.type, + "id_raw": self.__root__.id_raw, + } + return self.__root__.id or datahub_guid(guid_dict) + + def get_assertion_info_aspect( + self, + ) -> AssertionInfo: + return self.__root__.get_assertion_info() + + def get_assertion_trigger(self) -> Optional[AssertionTrigger]: + return self.__root__.trigger diff --git a/metadata-ingestion/src/datahub/api/entities/assertion/volume_assertion.py b/metadata-ingestion/src/datahub/api/entities/assertion/volume_assertion.py new file mode 100644 index 00000000000000..da6a125874aa72 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/entities/assertion/volume_assertion.py @@ -0,0 +1,98 @@ +from typing import Optional, Union + +from typing_extensions import Literal + +from datahub.api.entities.assertion.assertion import ( + BaseAssertionProtocol, + BaseEntityAssertion, +) +from datahub.api.entities.assertion.assertion_operator import Operators +from datahub.api.entities.assertion.assertion_trigger import AssertionTrigger +from datahub.api.entities.assertion.filter import DatasetFilter +from datahub.configuration.pydantic_migration_helpers import v1_Field +from datahub.emitter.mce_builder import datahub_guid +from datahub.metadata.com.linkedin.pegasus2avro.assertion import ( + AssertionInfo, + AssertionType, + AssertionValueChangeType, + RowCountChange, + RowCountTotal, + VolumeAssertionInfo, + VolumeAssertionType, +) + + +class RowCountTotalVolumeAssertion(BaseEntityAssertion): + type: Literal["volume"] + metric: Literal["row_count"] = v1_Field(default="row_count") + operator: Operators = v1_Field(discriminator="type", alias="condition") + filters: Optional[DatasetFilter] = v1_Field(default=None) + + def get_assertion_info( + self, + ) -> AssertionInfo: + return AssertionInfo( + description=self.description, + type=AssertionType.VOLUME, + volumeAssertion=VolumeAssertionInfo( + type=VolumeAssertionType.ROW_COUNT_TOTAL, + entity=self.entity, + rowCountTotal=RowCountTotal( + operator=self.operator.operator, + parameters=self.operator.generate_parameters(), + ), + ), + ) + + +class RowCountChangeVolumeAssertion(BaseEntityAssertion): + type: Literal["volume"] + metric: Literal["row_count"] = v1_Field(default="row_count") + change_type: Literal["absolute", "percentage"] + operator: Operators = v1_Field(discriminator="type", alias="condition") + filters: Optional[DatasetFilter] = v1_Field(default=None) + + def get_assertion_info( + self, + ) -> AssertionInfo: + return AssertionInfo( + description=self.description, + type=AssertionType.VOLUME, + volumeAssertion=VolumeAssertionInfo( + type=VolumeAssertionType.ROW_COUNT_CHANGE, + entity=self.entity, + rowCountChange=RowCountChange( + type=( + AssertionValueChangeType.ABSOLUTE + if self.change_type == Literal["absolute"] + else AssertionValueChangeType.PERCENTAGE + ), + operator=self.operator.operator, + parameters=self.operator.generate_parameters(), + ), + ), + ) + + +class VolumeAssertion(BaseAssertionProtocol): + __root__: Union[RowCountTotalVolumeAssertion, RowCountChangeVolumeAssertion] + + @property + def assertion(self): + return self.__root__ + + def get_id(self) -> str: + guid_dict = { + "entity": self.__root__.entity, + "type": self.__root__.type, + "id_raw": self.__root__.id_raw, + } + return self.__root__.id or datahub_guid(guid_dict) + + def get_assertion_info_aspect( + self, + ) -> AssertionInfo: + return self.__root__.get_assertion_info() + + def get_assertion_trigger(self) -> Optional[AssertionTrigger]: + return self.__root__.trigger diff --git a/metadata-ingestion/src/datahub/cli/specific/assertions_cli.py b/metadata-ingestion/src/datahub/cli/specific/assertions_cli.py new file mode 100644 index 00000000000000..dad724bfe11157 --- /dev/null +++ b/metadata-ingestion/src/datahub/cli/specific/assertions_cli.py @@ -0,0 +1,151 @@ +import logging +import os +from pathlib import Path +from typing import Dict, List, Optional + +import click +from click_default_group import DefaultGroup + +from datahub.api.entities.assertion.assertion_config_spec import AssertionsConfigSpec +from datahub.api.entities.assertion.compiler_interface import ( + AssertionCompilationResult, + CompileResultArtifact, + CompileResultArtifactType, +) +from datahub.emitter.mce_builder import make_assertion_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.graph.client import get_default_graph +from datahub.integrations.assertion.registry import ASSERTION_PLATFORMS +from datahub.telemetry import telemetry +from datahub.upgrade import upgrade + +logger = logging.getLogger(__name__) + +REPORT_FILE_NAME = "compile_report.json" + + +@click.group(cls=DefaultGroup, default="upsert") +def assertions() -> None: + """A group of commands to interact with the Assertion entity in DataHub.""" + pass + + +@assertions.command() +@click.option("-f", "--file", required=True, type=click.Path(exists=True)) +@upgrade.check_upgrade +@telemetry.with_telemetry() +def upsert(file: str) -> None: + """Upsert (create or update) a set of assertions in DataHub.""" + + assertions_spec: AssertionsConfigSpec = AssertionsConfigSpec.from_yaml(file) + + with get_default_graph() as graph: + for assertion_spec in assertions_spec.assertions: + try: + mcp = MetadataChangeProposalWrapper( + entityUrn=make_assertion_urn(assertion_spec.get_id()), + aspect=assertion_spec.get_assertion_info_aspect(), + ) + graph.emit_mcp(mcp) + # TODO: Validate uniqueness of assertion ids. Report if duplicates found. + # TODO: Use upsert graphql endpoints here instead of graph.emit_mcp. + click.secho(f"Update succeeded for urn {mcp.entityUrn}.", fg="green") + except Exception as e: + logger.exception(e) + click.secho( + f"Update failed for {mcp.entityUrn}: {e}", + fg="red", + ) + + +@assertions.command() +@click.option("-f", "--file", required=True, type=click.Path(exists=True)) +@click.option("-p", "--platform", required=True, type=str) +@click.option("-o", "--output-to", required=False, type=click.Path(exists=True)) +@click.option( + "-x", + "--extras", + required=False, + multiple=True, + default=[], + help="Platform-specific extra key-value inputs in form key=value", +) +@upgrade.check_upgrade +@telemetry.with_telemetry() +def compile( + file: str, platform: str, output_to: Optional[str], extras: List[str] +) -> None: + """Compile a set of assertions for input assertion platform. + Note that this does not run any code or execute any queries on assertion platform + and only creates artifacts specific to assertion platform that can be executed manually. + In future, we may introduce separate command to automatically apply these compiled changes + in assertion platform. Currently, generated result artifacts are stored in target folder + unless another folder is specified using option `--output-to `. + """ + + if platform not in ASSERTION_PLATFORMS: + click.secho( + f"Platform {platform} is not supported.", + fg="red", + ) + + if output_to is None: + output_to = f"{os.getcwd()}/target" + + if not os.path.isdir(output_to): + os.mkdir(output_to) + + assertions_spec: AssertionsConfigSpec = AssertionsConfigSpec.from_yaml(file) + + try: + compiler = ASSERTION_PLATFORMS[platform].create( + output_dir=output_to, extras=extras_list_to_dict(extras) + ) + result = compiler.compile(assertions_spec) + + write_report_file(output_to, result) + click.secho("Compile report:", bold=True) + click.echo(result.report.as_string()) + if result.status == "failure": + click.secho("Failure", fg="yellow", bold=True) + else: + click.secho("Success", fg="green", bold=True) + except Exception as e: + logger.exception(e) + click.secho( + f"Compile failed: {e}", + fg="red", + ) + + +def write_report_file(output_to: str, result: AssertionCompilationResult) -> None: + report_path = Path(output_to) / REPORT_FILE_NAME + with (report_path).open("w") as f: + result.add_artifact( + CompileResultArtifact( + name=REPORT_FILE_NAME, + path=report_path, + type=CompileResultArtifactType.COMPILE_REPORT, + description="Detailed report about compile status", + ) + ) + f.write(result.report.as_json()) + + +def extras_list_to_dict(extras: List[str]) -> Dict[str, str]: + extra_properties: Dict[str, str] = dict() + for x in extras: + parts = x.split("=") + assert ( + len(parts) == 2 + ), f"Invalid value for extras {x}, should be in format key=value" + extra_properties[parts[0]] = parts[1] + return extra_properties + + +# TODO: support for +# Immediate: +# 1. delete assertions (from datahub) +# Later: +# 3. execute compiled assertions on assertion platform (Later, requires connection details to platform), +# 4. cleanup assertions from assertion platform (generate artifacts. optionally execute) diff --git a/metadata-ingestion/src/datahub/cli/state_cli.py b/metadata-ingestion/src/datahub/cli/state_cli.py index 547bac35883e7f..29c35be9d1982e 100644 --- a/metadata-ingestion/src/datahub/cli/state_cli.py +++ b/metadata-ingestion/src/datahub/cli/state_cli.py @@ -34,4 +34,5 @@ def inspect(pipeline_name: str, platform: str) -> None: click.secho("No ingestion state found.", fg="red") exit(1) + logger.info(f"Found ingestion state with {len(checkpoint.state.urns)} URNs.") click.echo(json.dumps(checkpoint.state.urns, indent=2)) diff --git a/metadata-ingestion/src/datahub/configuration/connection_resolver.py b/metadata-ingestion/src/datahub/configuration/connection_resolver.py new file mode 100644 index 00000000000000..a82698cd38cd71 --- /dev/null +++ b/metadata-ingestion/src/datahub/configuration/connection_resolver.py @@ -0,0 +1,40 @@ +from typing import Type + +import pydantic + +from datahub.ingestion.api.global_context import get_graph_context + + +def auto_connection_resolver( + connection_field: str = "connection", +) -> classmethod: + def _resolve_connection(cls: Type, values: dict) -> dict: + if connection_field in values: + connection_urn = values.pop(connection_field) + + graph = get_graph_context() + if not graph: + raise ValueError( + "Fetching connection details from the backend requires a DataHub graph client." + ) + + conn = graph.get_connection_json(connection_urn) + if conn is None: + raise ValueError( + f"Connection {connection_urn} not found using {graph}." + ) + + # TODO: Should this do some additional validation against the config model? + + # Update the config, but don't overwrite existing values. + for key, value in conn.items(): + if key not in values: + values[key] = value + + return values + + # Hack: Pydantic maintains unique list of validators by referring its __name__. + # https://github.com/pydantic/pydantic/blob/v1.10.9/pydantic/main.py#L264 + # This hack ensures that multiple validators do not overwrite each other. + _resolve_connection.__name__ = f"{_resolve_connection.__name__}_{connection_field}" + return pydantic.root_validator(pre=True, allow_reuse=True)(_resolve_connection) diff --git a/metadata-ingestion/src/datahub/entrypoints.py b/metadata-ingestion/src/datahub/entrypoints.py index 7c5d84b93726d8..49042db7b9299f 100644 --- a/metadata-ingestion/src/datahub/entrypoints.py +++ b/metadata-ingestion/src/datahub/entrypoints.py @@ -25,6 +25,7 @@ from datahub.cli.ingest_cli import ingest from datahub.cli.migrate import migrate from datahub.cli.put_cli import put +from datahub.cli.specific.assertions_cli import assertions from datahub.cli.specific.datacontract_cli import datacontract from datahub.cli.specific.dataproduct_cli import dataproduct from datahub.cli.specific.dataset_cli import dataset @@ -164,6 +165,7 @@ def init(use_password: bool = False) -> None: datahub.add_command(properties) datahub.add_command(forms) datahub.add_command(datacontract) +datahub.add_command(assertions) try: from datahub.cli.lite_cli import lite diff --git a/metadata-ingestion/src/datahub/ingestion/api/global_context.py b/metadata-ingestion/src/datahub/ingestion/api/global_context.py new file mode 100644 index 00000000000000..96eeae9ccf4333 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/api/global_context.py @@ -0,0 +1,23 @@ +import contextlib +import contextvars +from typing import Iterator, Optional + +from datahub.ingestion.graph.client import DataHubGraph + +_graph_context = contextvars.ContextVar[Optional[DataHubGraph]]("datahub_graph_context") + + +def get_graph_context() -> Optional[DataHubGraph]: + try: + return _graph_context.get() + except LookupError: + return None + + +@contextlib.contextmanager +def set_graph_context(graph: Optional[DataHubGraph]) -> Iterator[None]: + token = _graph_context.set(graph) + try: + yield + finally: + _graph_context.reset(token) diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py index 91402fa3c62b25..7226258515155f 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py @@ -125,17 +125,16 @@ def auto_status_aspect( else: raise ValueError(f"Unexpected type {type(wu.metadata)}") - if not isinstance( - wu.metadata, MetadataChangeEventClass - ) and not entity_supports_aspect(wu.metadata.entityType, StatusClass): + yield wu + + for urn in sorted(all_urns - status_urns - skip_urns): + entity_type = guess_entity_type(urn) + if not entity_supports_aspect(entity_type, StatusClass): # If any entity does not support aspect 'status' then skip that entity from adding status aspect. # Example like dataProcessInstance doesn't suppport status aspect. # If not skipped gives error: java.lang.RuntimeException: Unknown aspect status for entity dataProcessInstance - skip_urns.add(urn) - - yield wu + continue - for urn in sorted(all_urns - status_urns - skip_urns): yield MetadataChangeProposalWrapper( entityUrn=urn, aspect=StatusClass(removed=False), diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index d96b11f6b405c8..7621c6d363e3d2 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -23,6 +23,7 @@ from avro.schema import RecordSchema from deprecated import deprecated +from pydantic import BaseModel from requests.models import HTTPError from datahub.cli.cli_utils import get_url_and_token @@ -32,6 +33,10 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.rest_emitter import DatahubRestEmitter from datahub.emitter.serialization_helper import post_json_transform +from datahub.ingestion.graph.connections import ( + connections_gql, + get_id_from_connection_urn, +) from datahub.ingestion.graph.filters import ( RemovedStatusFilter, SearchFilterRule, @@ -599,6 +604,83 @@ def get_domain_urn_by_name(self, domain_name: str) -> Optional[str]: entities.append(x["entity"]) return entities[0] if entities_yielded else None + def get_connection_json(self, urn: str) -> Optional[dict]: + """Retrieve a connection config. + + This is only supported with Acryl Cloud. + + Args: + urn: The urn of the connection. + + Returns: + The connection config as a dictionary, or None if the connection was not found. + """ + + # TODO: This should be capable of resolving secrets. + + res = self.execute_graphql( + query=connections_gql, + operation_name="GetConnection", + variables={"urn": urn}, + ) + + if not res["connection"]: + return None + + connection_type = res["connection"]["details"]["type"] + if connection_type != "JSON": + logger.error( + f"Expected connection details type to be 'JSON', but got {connection_type}" + ) + return None + + blob = res["connection"]["details"]["json"]["blob"] + obj = json.loads(blob) + + name = res["connection"]["details"].get("name") + logger.info(f"Loaded connection {name or urn}") + + return obj + + def set_connection_json( + self, + urn: str, + *, + platform_urn: str, + config: Union[ConfigModel, BaseModel, dict], + name: Optional[str] = None, + ) -> None: + """Set a connection config. + + This is only supported with Acryl Cloud. + + Args: + urn: The urn of the connection. + platform_urn: The urn of the platform. + config: The connection config as a dictionary or a ConfigModel. + name: The name of the connection. + """ + + if isinstance(config, (ConfigModel, BaseModel)): + blob = config.json() + else: + blob = json.dumps(config) + + id = get_id_from_connection_urn(urn) + + res = self.execute_graphql( + query=connections_gql, + operation_name="SetConnection", + variables={ + "id": id, + "platformUrn": platform_urn, + "name": name, + "blob": blob, + }, + ) + + assert res["upsertConnection"]["urn"] == urn + @deprecated( reason='Use get_urns_by_filter(entity_types=["container"], ...) instead' ) @@ -900,7 +982,7 @@ def execute_graphql( body["operationName"] = operation_name logger.debug( - f"Executing graphql query: {query} with variables: {json.dumps(variables)}" + f"Executing {operation_name or ''} graphql query: {query} with variables: {json.dumps(variables)}" ) result = self._post_generic(url, body) if result.get("errors"): @@ -1199,6 +1281,170 @@ def create_tag(self, tag_name: str) -> str: # return urn return res["createTag"] + def _assertion_result_shared(self) -> str: + fragment: str = """ + fragment assertionResult on AssertionResult { + type + rowCount + missingCount + unexpectedCount + actualAggValue + externalUrl + nativeResults { + value + } + error { + type + properties { + value + } + } + } + """ + return fragment + + def _run_assertion_result_shared(self) -> str: + fragment: str = """ + fragment runAssertionResult on RunAssertionResult { + assertion { + urn + } + result { + ... assertionResult + } + } + """ + return fragment + + def _run_assertion_build_params( + self, params: Optional[Dict[str, str]] = {} + ) -> List[Any]: + if params is None: + return [] + + results = [] + for key, value in params.items(): + result = { + "key": key, + "value": value, + } + results.append(result) + + return results + + def run_assertion( + self, + urn: str, + save_result: bool = True, + parameters: Optional[Dict[str, str]] = {}, + async_flag: bool = False, + ) -> Dict: + params = self._run_assertion_build_params(parameters) + graph_query: str = """ + %s + mutation runAssertion($assertionUrn: String!, $saveResult: Boolean, $parameters: [StringMapEntryInput!], $async: Boolean!) { + runAssertion(urn: $assertionUrn, saveResult: $saveResult, parameters: $parameters, async: $async) { + ... assertionResult + } + } + """ % ( + self._assertion_result_shared() + ) + + variables = { + "assertionUrn": urn, + "saveResult": save_result, + "parameters": params, + "async": async_flag, + } + + res = self.execute_graphql( + query=graph_query, + variables=variables, + ) + + return res["runAssertion"] + + def run_assertions( + self, + urns: List[str], + save_result: bool = True, + parameters: Optional[Dict[str, str]] = {}, + async_flag: bool = False, + ) -> Dict: + params = self._run_assertion_build_params(parameters) + graph_query: str = """ + %s + %s + mutation runAssertions($assertionUrns: [String!]!, $saveResult: Boolean, $parameters: [StringMapEntryInput!], $async: Boolean!) { + runAssertions(urns: $assertionUrns, saveResults: $saveResult, parameters: $parameters, async: $async) { + passingCount + failingCount + errorCount + results { + ... runAssertionResult + } + } + } + """ % ( + self._assertion_result_shared(), + self._run_assertion_result_shared(), + ) + + variables = { + "assertionUrns": urns, + "saveResult": save_result, + "parameters": params, + "async": async_flag, + } + + res = self.execute_graphql( + query=graph_query, + variables=variables, + ) + + return res["runAssertions"] + + def run_assertions_for_asset( + self, + urn: str, + tag_urns: Optional[List[str]] = [], + parameters: Optional[Dict[str, str]] = {}, + async_flag: bool = False, + ) -> Dict: + params = self._run_assertion_build_params(parameters) + graph_query: str = """ + %s + %s + mutation runAssertionsForAsset($assetUrn: String!, $tagUrns: [String!], $parameters: [StringMapEntryInput!], $async: Boolean!) { + runAssertionsForAsset(urn: $assetUrn, tagUrns: $tagUrns, parameters: $parameters, async: $async) { + passingCount + failingCount + errorCount + results { + ... runAssertionResult + } + } + } + """ % ( + self._assertion_result_shared(), + self._run_assertion_result_shared(), + ) + + variables = { + "assetUrn": urn, + "tagUrns": tag_urns, + "parameters": params, + "async": async_flag, + } + + res = self.execute_graphql( + query=graph_query, + variables=variables, + ) + + return res["runAssertionsForAsset"] + def close(self) -> None: self._make_schema_resolver.cache_clear() super().close() diff --git a/metadata-ingestion/src/datahub/ingestion/graph/connections.py b/metadata-ingestion/src/datahub/ingestion/graph/connections.py new file mode 100644 index 00000000000000..0934bd24698333 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/graph/connections.py @@ -0,0 +1,42 @@ +_connection_urn_prefix = "urn:li:dataHubConnection:" + + +def _is_connection_urn(urn: str) -> bool: + return urn.startswith(_connection_urn_prefix) + + +def get_id_from_connection_urn(urn: str) -> str: + assert _is_connection_urn(urn) + return urn[len(_connection_urn_prefix) :] + + +connections_gql = """\ +query GetConnection($urn: String!) { + connection(urn: $urn) { + urn + details { + type + name + json { + blob + } + } + } +} + +mutation SetConnection($id: String!, $platformUrn: String!, $blob: String!, $name: String) { + upsertConnection( + input: { + id: $id, + type: JSON, + name: $name, + platformUrn: $platformUrn, + json: { + blob: $blob + } + } + ) { + urn + } +} +""" diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py index fe688468d2c4e6..75dabc4a7e02ae 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py @@ -22,6 +22,7 @@ ) from datahub.ingestion.api.committable import CommitPolicy from datahub.ingestion.api.common import EndOfStream, PipelineContext, RecordEnvelope +from datahub.ingestion.api.global_context import set_graph_context from datahub.ingestion.api.pipeline_run_listener import PipelineRunListener from datahub.ingestion.api.report import Report from datahub.ingestion.api.sink import Sink, SinkReport, WriteCallback @@ -121,6 +122,8 @@ def _add_init_error_context(step: str) -> Iterator[None]: try: yield + except PipelineInitError: + raise except Exception as e: raise PipelineInitError(f"Failed to {step}: {e}") from e @@ -259,31 +262,33 @@ def __init__( self.ctx.graph = self.graph telemetry.telemetry_instance.update_capture_exception_context(server=self.graph) - # once a sink is configured, we can configure reporting immediately to get observability - with _add_init_error_context("configure reporters"): - self._configure_reporting(report_to, no_default_report) + with set_graph_context(self.graph): + with _add_init_error_context("configure reporters"): + self._configure_reporting(report_to, no_default_report) - with _add_init_error_context( - f"find a registered source for type {self.source_type}" - ): - source_class = source_registry.get(self.source_type) + with _add_init_error_context( + f"find a registered source for type {self.source_type}" + ): + source_class = source_registry.get(self.source_type) - with _add_init_error_context(f"configure the source ({self.source_type})"): - self.source = source_class.create( - self.config.source.dict().get("config", {}), self.ctx - ) - logger.debug(f"Source type {self.source_type} ({source_class}) configured") - logger.info("Source configured successfully.") - - extractor_type = self.config.source.extractor - with _add_init_error_context(f"configure the extractor ({extractor_type})"): - extractor_class = extractor_registry.get(extractor_type) - self.extractor = extractor_class( - self.config.source.extractor_config, self.ctx - ) + with _add_init_error_context(f"configure the source ({self.source_type})"): + self.source = source_class.create( + self.config.source.dict().get("config", {}), self.ctx + ) + logger.debug( + f"Source type {self.source_type} ({source_class}) configured" + ) + logger.info("Source configured successfully.") + + extractor_type = self.config.source.extractor + with _add_init_error_context(f"configure the extractor ({extractor_type})"): + extractor_class = extractor_registry.get(extractor_type) + self.extractor = extractor_class( + self.config.source.extractor_config, self.ctx + ) - with _add_init_error_context("configure transformers"): - self._configure_transforms() + with _add_init_error_context("configure transformers"): + self._configure_transforms() @property def source_type(self) -> str: @@ -483,7 +488,6 @@ def run(self) -> None: # TODO: propagate EndOfStream and other control events to sinks, to allow them to flush etc. self.sink.write_record_async(record_envelope, callback) - self.sink.close() self.process_commits() self.final_status = "completed" except (SystemExit, RuntimeError, KeyboardInterrupt) as e: @@ -498,6 +502,8 @@ def run(self) -> None: self._notify_reporters_on_ingestion_completion() + self.sink.close() + def transform(self, records: Iterable[RecordEnvelope]) -> Iterable[RecordEnvelope]: """ Transforms the given sequence of records by passing the records through the transformers diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/common.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/common.py index 41472d3372ba51..45dadab7c24dff 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/common.py @@ -3,7 +3,6 @@ from pydantic.fields import Field -from datahub.configuration.source_common import PlatformInstanceConfigMixin from datahub.ingestion.source.aws.aws_common import AwsSourceConfig from datahub.ingestion.source.state.stale_entity_removal_handler import ( StaleEntityRemovalSourceReport, @@ -14,7 +13,6 @@ class SagemakerSourceConfig( AwsSourceConfig, - PlatformInstanceConfigMixin, StatefulIngestionConfigBase, ): extract_feature_groups: Optional[bool] = Field( diff --git a/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py b/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py index 54475cb509621d..cfa199314fc077 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py +++ b/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py @@ -76,7 +76,10 @@ def create( ) -> "ConfluentSchemaRegistry": return cls(source_config, report) - def _get_subject_for_topic(self, topic: str, is_key_schema: bool) -> Optional[str]: + def _get_subject_for_topic( + self, dataset_subtype: str, is_key_schema: bool + ) -> Optional[str]: + topic: str = dataset_subtype subject_key_suffix: str = "-key" if is_key_schema else "-value" # For details on schema registry subject name strategy, # see: https://docs.confluent.io/platform/current/schema-registry/serdes-develop/index.html#how-the-naming-strategies-work @@ -231,16 +234,24 @@ def get_schemas_from_confluent_ref_json( return all_schemas def _get_schema_and_fields( - self, topic: str, is_key_schema: bool + self, dataset_subtype: str, is_key_schema: bool, is_subject: bool ) -> Tuple[Optional[Schema], List[SchemaField]]: schema: Optional[Schema] = None - schema_type_str: str = "key" if is_key_schema else "value" - topic_subject: Optional[str] = self._get_subject_for_topic( - topic=topic, is_key_schema=is_key_schema - ) + + # if provided schema as dataset_subtype, assuming it as value subject + schema_type_str: Optional[str] = "value" + topic_subject: Optional[str] = None + if not is_subject: + schema_type_str = "key" if is_key_schema else "value" + topic_subject = self._get_subject_for_topic( + dataset_subtype=dataset_subtype, is_key_schema=is_key_schema + ) + else: + topic_subject = dataset_subtype + if topic_subject is not None: logger.debug( - f"The {schema_type_str} schema subject:'{topic_subject}' is found for topic:'{topic}'." + f"The {schema_type_str} schema subject:'{topic_subject}' is found for dataset_subtype:'{dataset_subtype}'." ) try: registered_schema = self.schema_registry_client.get_latest_version( @@ -249,29 +260,31 @@ def _get_schema_and_fields( schema = registered_schema.schema except Exception as e: logger.warning( - f"For topic: {topic}, failed to get {schema_type_str} schema from schema registry using subject:'{topic_subject}': {e}." + f"For dataset_subtype: {dataset_subtype}, failed to get {schema_type_str} schema from schema registry using subject:'{topic_subject}': {e}." ) self.report.report_warning( - topic, + dataset_subtype, f"failed to get {schema_type_str} schema from schema registry using subject:'{topic_subject}': {e}.", ) else: logger.debug( - f"For topic: {topic}, the schema registry subject for the {schema_type_str} schema is not found." + f"For dataset_subtype: {dataset_subtype}, the schema registry subject for the {schema_type_str} schema is not found." ) if not is_key_schema: # Value schema is always expected. Report a warning. self.report.report_warning( - topic, + dataset_subtype, f"The schema registry subject for the {schema_type_str} schema is not found." - f" The topic is either schema-less, or no messages have been written to the topic yet.", + f" The dataset_subtype is either schema-less, or no messages have been written to the dataset_subtype yet.", ) - # Obtain the schema fields from schema for the topic. + # Obtain the schema fields from schema for the dataset_subtype. fields: List[SchemaField] = [] if schema is not None: fields = self._get_schema_fields( - topic=topic, schema=schema, is_key_schema=is_key_schema + dataset_subtype=dataset_subtype, + schema=schema, + is_key_schema=is_key_schema, ) return (schema, fields) @@ -295,7 +308,7 @@ def _load_json_schema_with_resolved_references( return jsonref_schema def _get_schema_fields( - self, topic: str, schema: Schema, is_key_schema: bool + self, dataset_subtype: str, schema: Schema, is_key_schema: bool ) -> List[SchemaField]: # Parse the schema and convert it to SchemaFields. fields: List[SchemaField] = [] @@ -318,7 +331,7 @@ def _get_schema_fields( imported_schemas: List[ ProtobufSchema ] = self.get_schemas_from_confluent_ref_protobuf(schema) - base_name: str = topic.replace(".", "_") + base_name: str = dataset_subtype.replace(".", "_") fields = protobuf_util.protobuf_schema_to_mce_fields( ProtobufSchema( f"{base_name}-key.proto" @@ -330,14 +343,16 @@ def _get_schema_fields( is_key_schema=is_key_schema, ) elif schema.schema_type == "JSON": - base_name = topic.replace(".", "_") + base_name = dataset_subtype.replace(".", "_") canonical_name = ( f"{base_name}-key" if is_key_schema else f"{base_name}-value" ) jsonref_schema = self._load_json_schema_with_resolved_references( schema=schema, name=canonical_name, - subject=f"{topic}-key" if is_key_schema else f"{topic}-value", + subject=f"{dataset_subtype}-key" + if is_key_schema + else f"{dataset_subtype}-value", ) fields = list( JsonSchemaTranslator.get_fields_from_schema( @@ -346,22 +361,27 @@ def _get_schema_fields( ) elif not self.source_config.ignore_warnings_on_schema_type: self.report.report_warning( - topic, + dataset_subtype, f"Parsing kafka schema type {schema.schema_type} is currently not implemented", ) return fields def _get_schema_metadata( - self, topic: str, platform_urn: str + self, dataset_subtype: str, platform_urn: str, is_subject: bool ) -> Optional[SchemaMetadata]: + # Process the value schema schema, fields = self._get_schema_and_fields( - topic=topic, is_key_schema=False + dataset_subtype=dataset_subtype, + is_key_schema=False, + is_subject=is_subject, ) # type: Tuple[Optional[Schema], List[SchemaField]] # Process the key schema key_schema, key_fields = self._get_schema_and_fields( - topic=topic, is_key_schema=True + dataset_subtype=dataset_subtype, + is_key_schema=True, + is_subject=is_subject, ) # type:Tuple[Optional[Schema], List[SchemaField]] # Create the schemaMetadata aspect. @@ -373,7 +393,7 @@ def _get_schema_metadata( md5_hash: str = md5(schema_as_string.encode()).hexdigest() return SchemaMetadata( - schemaName=topic, + schemaName=dataset_subtype, version=0, hash=md5_hash, platform=platform_urn, @@ -388,17 +408,22 @@ def _get_schema_metadata( return None def get_schema_metadata( - self, topic: str, platform_urn: str + self, dataset_subtype: str, platform_urn: str, is_subject: bool ) -> Optional[SchemaMetadata]: - logger.debug(f"Inside _get_schema_metadata {topic} {platform_urn}") + logger.debug(f"Inside get_schema_metadata {dataset_subtype} {platform_urn}") + # Process the value schema schema, fields = self._get_schema_and_fields( - topic=topic, is_key_schema=False + dataset_subtype=dataset_subtype, + is_key_schema=False, + is_subject=is_subject, ) # type: Tuple[Optional[Schema], List[SchemaField]] # Process the key schema key_schema, key_fields = self._get_schema_and_fields( - topic=topic, is_key_schema=True + dataset_subtype=dataset_subtype, + is_key_schema=True, + is_subject=is_subject, ) # type:Tuple[Optional[Schema], List[SchemaField]] # Create the schemaMetadata aspect. @@ -410,7 +435,7 @@ def get_schema_metadata( md5_hash = md5(schema_as_string.encode()).hexdigest() return SchemaMetadata( - schemaName=topic, + schemaName=dataset_subtype, version=0, hash=md5_hash, platform=platform_urn, @@ -423,3 +448,6 @@ def get_schema_metadata( fields=key_fields + fields, ) return None + + def get_subjects(self) -> List[str]: + return self.known_schema_registry_subjects diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 0996f76fc2799b..b758f218e25866 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -45,7 +45,6 @@ from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.source_helpers import auto_workunit from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.ingestion.source.common.subtypes import DatasetSubTypes from datahub.ingestion.source.dbt.dbt_tests import ( DBTTest, DBTTestResult, @@ -1445,7 +1444,7 @@ def create_target_platform_mces( yield MetadataChangeProposalWrapper( entityUrn=node_datahub_urn, aspect=upstreams_lineage_class, - ).as_workunit() + ).as_workunit(is_primary_source=False) def extract_query_tag_aspects( self, @@ -1739,12 +1738,6 @@ def _create_subType_wu( return None subtypes: List[str] = [node.node_type.capitalize()] - if node.materialization == "table": - subtypes.append(DatasetSubTypes.TABLE) - - if node.node_type == "model" or node.node_type == "snapshot": - # We need to add the view subtype so that the view properties tab shows up in the UI. - subtypes.append(DatasetSubTypes.VIEW) return MetadataChangeProposalWrapper( entityUrn=node_datahub_urn, diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka.py index 99ef737206ab0c..ae055c51bb6bee 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka.py @@ -303,34 +303,66 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: ).topics extra_topic_details = self.fetch_extra_topic_details(topics.keys()) - for t, t_detail in topics.items(): - self.report.report_topic_scanned(t) - if self.source_config.topic_patterns.allowed(t): + for topic, topic_detail in topics.items(): + self.report.report_topic_scanned(topic) + if self.source_config.topic_patterns.allowed(topic): try: yield from self._extract_record( - t, t_detail, extra_topic_details.get(t) + topic, "", topic_detail, extra_topic_details.get(topic) ) except Exception as e: - logger.warning(f"Failed to extract topic {t}", exc_info=True) + logger.warning(f"Failed to extract topic {topic}", exc_info=True) self.report.report_warning( - "topic", f"Exception while extracting topic {t}: {e}" + "topic", f"Exception while extracting topic {topic}: {e}" ) else: - self.report.report_dropped(t) + self.report.report_dropped(topic) + + # Get all subjects from schema registry and ingest them as SCHEMA DatasetSubTypes + for subject in self.schema_registry_client.get_subjects(): + try: + yield from self._extract_record( + "", subject, topic_detail=None, extra_topic_config=None + ) + except Exception as e: + logger.warning(f"Failed to extract subject {subject}", exc_info=True) + self.report.report_warning( + "subject", f"Exception while extracting topic {subject}: {e}" + ) def _extract_record( self, topic: str, + subject: str, topic_detail: Optional[TopicMetadata], extra_topic_config: Optional[Dict[str, ConfigEntry]], ) -> Iterable[MetadataWorkUnit]: - logger.debug(f"topic = {topic}") - AVRO = "AVRO" - # 1. Create the default dataset snapshot for the topic. - dataset_name = topic + kafka_entity = topic if len(topic) != 0 else subject + is_subject = False if len(topic) != 0 else True + + logger.debug(f"kafka entity name = {kafka_entity}") + platform_urn = make_data_platform_urn(self.platform) + + # 1. Create schemaMetadata aspect (pass control to SchemaRegistry) + schema_metadata = self.schema_registry_client.get_schema_metadata( + kafka_entity, platform_urn, is_subject + ) + + # topic can have no associated subject, but still it can be ingested without schema + if is_subject: + if schema_metadata is None: + return + dataset_name = schema_metadata.schemaName + else: + dataset_name = topic + + # dataset_name = schema_metadata.schemaName if len(topic) == 0 else topic + # 2. Create the default dataset snapshot for the topic. + # if schema_metadata is not None: + # dataset_name = schema_metadata.schemaName if len(topic) == 0 else topic dataset_urn = make_dataset_urn_with_platform_instance( platform=self.platform, name=dataset_name, @@ -342,10 +374,6 @@ def _extract_record( aspects=[Status(removed=False)], # we append to this list later on ) - # 2. Attach schemaMetadata aspect (pass control to SchemaRegistry) - schema_metadata = self.schema_registry_client.get_schema_metadata( - topic, platform_urn - ) if schema_metadata is not None: dataset_snapshot.aspects.append(schema_metadata) @@ -356,9 +384,19 @@ def _extract_record( browse_path = BrowsePathsClass([browse_path_str]) dataset_snapshot.aspects.append(browse_path) - custom_props = self.build_custom_properties( - topic, topic_detail, extra_topic_config - ) + # build custom properties for topic, schema properties may be added as needed + custom_props: Dict[str, str] = {} + if len(topic) != 0: + custom_props = self.build_custom_properties( + topic, topic_detail, extra_topic_config + ) + schemaName: Optional[ + str + ] = self.schema_registry_client._get_subject_for_topic( + dataset_subtype=topic, is_key_schema=False + ) + if schemaName is not None: + custom_props["Schema Name"] = schemaName # 4. Set dataset's description, tags, ownership, etc, if topic schema type is avro description: Optional[str] = None @@ -414,7 +452,7 @@ def _extract_record( ) dataset_properties = DatasetPropertiesClass( - name=topic, customProperties=custom_props, description=description + name=dataset_name, customProperties=custom_props, description=description ) dataset_snapshot.aspects.append(dataset_properties) @@ -431,12 +469,13 @@ def _extract_record( # 6. Emit the datasetSnapshot MCE mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot) - yield MetadataWorkUnit(id=f"kafka-{topic}", mce=mce) + yield MetadataWorkUnit(id=f"kafka-{kafka_entity}", mce=mce) - # 7. Add the subtype aspect marking this as a "topic" + # 7. Add the subtype aspect marking this as a "topic" or "schema" + typeName = DatasetSubTypes.TOPIC if len(topic) != 0 else DatasetSubTypes.SCHEMA yield MetadataChangeProposalWrapper( entityUrn=dataset_urn, - aspect=SubTypesClass(typeNames=[DatasetSubTypes.TOPIC]), + aspect=SubTypesClass(typeNames=[typeName]), ).as_workunit() domain_urn: Optional[str] = None diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_schema_registry_base.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_schema_registry_base.py index 34ff76f44d1dd3..59f174a9a50458 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka_schema_registry_base.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_schema_registry_base.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Optional +from typing import List, Optional from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaMetadata @@ -7,6 +7,16 @@ class KafkaSchemaRegistryBase(ABC): @abstractmethod def get_schema_metadata( - self, topic: str, platform_urn: str + self, topic: str, platform_urn: str, is_subject: bool ) -> Optional[SchemaMetadata]: pass + + @abstractmethod + def get_subjects(self) -> List[str]: + pass + + @abstractmethod + def _get_subject_for_topic( + self, dataset_subtype: str, is_key_schema: bool + ) -> Optional[str]: + pass diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_assertion.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_assertion.py new file mode 100644 index 00000000000000..a28a81cc5b955d --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_assertion.py @@ -0,0 +1,127 @@ +import logging +from datetime import datetime +from typing import Callable, Iterable, List, Optional + +from pydantic import BaseModel + +from datahub.emitter.mce_builder import ( + make_assertion_urn, + make_data_platform_urn, + make_dataplatform_instance_urn, +) +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config +from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery +from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report +from datahub.ingestion.source.snowflake.snowflake_utils import ( + SnowflakeCommonMixin, + SnowflakeConnectionMixin, + SnowflakeQueryMixin, +) +from datahub.metadata.com.linkedin.pegasus2avro.assertion import ( + AssertionResult, + AssertionResultType, + AssertionRunEvent, + AssertionRunStatus, +) +from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance +from datahub.utilities.time import datetime_to_ts_millis + +logger: logging.Logger = logging.getLogger(__name__) + + +class DataQualityMonitoringResult(BaseModel): + MEASUREMENT_TIME: datetime + METRIC_NAME: str + TABLE_NAME: str + TABLE_SCHEMA: str + TABLE_DATABASE: str + VALUE: int + + +class SnowflakeAssertionsHandler( + SnowflakeCommonMixin, SnowflakeQueryMixin, SnowflakeConnectionMixin +): + def __init__( + self, + config: SnowflakeV2Config, + report: SnowflakeV2Report, + dataset_urn_builder: Callable[[str], str], + ) -> None: + self.config = config + self.report = report + self.logger = logger + self.dataset_urn_builder = dataset_urn_builder + self.connection = None + self._urns_processed: List[str] = [] + + def get_assertion_workunits( + self, discovered_datasets: List[str] + ) -> Iterable[MetadataWorkUnit]: + self.connection = self.create_connection() + if self.connection is None: + return + + cur = self.query( + SnowflakeQuery.dmf_assertion_results( + datetime_to_ts_millis(self.config.start_time), + datetime_to_ts_millis(self.config.end_time), + ) + ) + for db_row in cur: + mcp = self._process_result_row(db_row, discovered_datasets) + if mcp: + yield mcp.as_workunit(is_primary_source=False) + + if mcp.entityUrn and mcp.entityUrn not in self._urns_processed: + self._urns_processed.append(mcp.entityUrn) + yield self._gen_platform_instance_wu(mcp.entityUrn) + + def _gen_platform_instance_wu(self, urn: str) -> MetadataWorkUnit: + # Construct a MetadataChangeProposalWrapper object for assertion platform + return MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=DataPlatformInstance( + platform=make_data_platform_urn(self.platform), + instance=( + make_dataplatform_instance_urn( + self.platform, self.config.platform_instance + ) + if self.config.platform_instance + else None + ), + ), + ).as_workunit(is_primary_source=False) + + def _process_result_row( + self, result_row: dict, discovered_datasets: List[str] + ) -> Optional[MetadataChangeProposalWrapper]: + try: + result = DataQualityMonitoringResult.parse_obj(result_row) + assertion_guid = result.METRIC_NAME.split("__")[-1].lower() + status = bool(result.VALUE) # 1 if PASS, 0 if FAIL + assertee = self.get_dataset_identifier( + result.TABLE_NAME, result.TABLE_SCHEMA, result.TABLE_DATABASE + ) + if assertee in discovered_datasets: + return MetadataChangeProposalWrapper( + entityUrn=make_assertion_urn(assertion_guid), + aspect=AssertionRunEvent( + timestampMillis=datetime_to_ts_millis(result.MEASUREMENT_TIME), + runId=result.MEASUREMENT_TIME.strftime("%Y-%m-%dT%H:%M:%SZ"), + asserteeUrn=self.dataset_urn_builder(assertee), + status=AssertionRunStatus.COMPLETE, + assertionUrn=make_assertion_urn(assertion_guid), + result=AssertionResult( + type=( + AssertionResultType.SUCCESS + if status + else AssertionResultType.FAILURE + ) + ), + ), + ) + except Exception as e: + self.report.report_warning("assertion-result-parse-failure", str(e)) + return None diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index c1fbb2cdc1f3fb..4beb2684485694 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -164,6 +164,12 @@ class SnowflakeV2Config( "username.", ) + include_assertion_results: bool = Field( + default=False, + description="Whether to ingest assertion run results for assertions created using Datahub" + " assertions CLI in snowflake", + ) + @validator("convert_urns_to_lowercase") def validate_convert_urns_to_lowercase(cls, v): if not v: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index dac43499a1c715..8187fce78e5e47 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -1016,3 +1016,25 @@ def table_upstreams_only( ORDER BY h.downstream_table_name """ + + @staticmethod + def dmf_assertion_results(start_time_millis: int, end_time_millis: int) -> str: + pattern = r"datahub\\_\\_%" + escape_pattern = r"\\" + return f""" + SELECT + MEASUREMENT_TIME AS "MEASUREMENT_TIME", + METRIC_NAME AS "METRIC_NAME", + TABLE_NAME AS "TABLE_NAME", + TABLE_SCHEMA AS "TABLE_SCHEMA", + TABLE_DATABASE AS "TABLE_DATABASE", + VALUE::INT AS "VALUE" + FROM + SNOWFLAKE.LOCAL.DATA_QUALITY_MONITORING_RESULTS + WHERE + MEASUREMENT_TIME >= to_timestamp_ltz({start_time_millis}, 3) + AND MEASUREMENT_TIME < to_timestamp_ltz({end_time_millis}, 3) + AND METRIC_NAME ilike '{pattern}' escape '{escape_pattern}' + ORDER BY MEASUREMENT_TIME ASC; + +""" diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index 25626d434f2ef4..fc2733c211580c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -50,6 +50,9 @@ SnowflakeEdition, SnowflakeObjectDomain, ) +from datahub.ingestion.source.snowflake.snowflake_assertion import ( + SnowflakeAssertionsHandler, +) from datahub.ingestion.source.snowflake.snowflake_config import ( SnowflakeV2Config, TagOption, @@ -103,6 +106,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import ( StatefulIngestionSourceBase, ) +from datahub.ingestion.source_config.sql.snowflake import BaseSnowflakeConfig from datahub.ingestion.source_report.ingestion_stage import ( LINEAGE_EXTRACTION, METADATA_EXTRACTION, @@ -328,7 +332,7 @@ def test_connection(config_dict: dict) -> TestConnectionReport: test_report = TestConnectionReport() try: - connection_conf = SnowflakeV2Config.parse_obj_allow_extras(config_dict) + connection_conf = BaseSnowflakeConfig.parse_obj_allow_extras(config_dict) connection: SnowflakeConnection = connection_conf.get_connection() assert connection @@ -354,7 +358,7 @@ def test_connection(config_dict: dict) -> TestConnectionReport: @staticmethod def check_capabilities( - conn: SnowflakeConnection, connection_conf: SnowflakeV2Config + conn: SnowflakeConnection, connection_conf: BaseSnowflakeConfig ) -> Dict[Union[SourceCapability, str], CapabilityReport]: # Currently only overall capabilities are reported. # Resource level variations in capabilities are not considered. @@ -603,6 +607,11 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: ) and self.usage_extractor: yield from self.usage_extractor.get_usage_workunits(discovered_datasets) + if self.config.include_assertion_results: + yield from SnowflakeAssertionsHandler( + self.config, self.report, self.gen_dataset_urn + ).get_assertion_workunits(discovered_datasets) + def report_cache_info(self) -> None: lru_cache_functions: List[Callable] = [ self.data_dictionary.get_tables_for_database, diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py index 0145c922696e89..9154a555f23090 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py @@ -37,7 +37,7 @@ class StatefulStaleMetadataRemovalConfig(StatefulIngestionConfig): description="Soft-deletes the entities present in the last successful run but missing in the current run with stateful_ingestion enabled.", ) fail_safe_threshold: float = pydantic.Field( - default=100.0, + default=40.0, description="Prevents large amount of soft deletes & the state from committing from accidental changes to the source configuration if the relative change percent in entities compared to the previous state is above the 'fail_safe_threshold'.", le=100.0, ge=0.0, @@ -224,6 +224,8 @@ def gen_removed_entity_workunits(self) -> Iterable[MetadataWorkUnit]: assert self.stateful_ingestion_config + copy_previous_state_and_fail = False + # Check if the entity delta is below the fail-safe threshold. entity_difference_percent = cur_checkpoint_state.get_percent_entities_changed( last_checkpoint_state @@ -242,30 +244,32 @@ def gen_removed_entity_workunits(self) -> Iterable[MetadataWorkUnit]: f"Will not soft-delete entities, since we'd be deleting {entity_difference_percent:.1f}% of the existing entities. " f"To force a deletion, increase the value of 'stateful_ingestion.fail_safe_threshold' (currently {self.stateful_ingestion_config.fail_safe_threshold})", ) - return + copy_previous_state_and_fail = True if self.source.get_report().events_produced == 0: - # SUBTLE: By reporting this as a failure here, we also ensure that the - # new (empty) state doesn't get committed. - # TODO: Move back to using fail_safe_threshold once we're confident that we've squashed all the bugs. self.source.get_report().report_failure( "stale-entity-removal", "Skipping stale entity soft-deletion because the source produced no events. " "This is a fail-safe mechanism to prevent accidental deletion of all entities.", ) - return + copy_previous_state_and_fail = True # If the source already had a failure, skip soft-deletion. - # TODO: Eventually, switch this to check if anything in the pipeline had a failure so far. + # TODO: Eventually, switch this to check if anything in the pipeline had a failure so far, not just the source. if self.source.get_report().failures: - for urn in last_checkpoint_state.get_urns_not_in( - type="*", other_checkpoint_state=cur_checkpoint_state - ): - self.add_entity_to_state("", urn) self.source.get_report().report_warning( "stale-entity-removal", - "Skipping stale entity soft-deletion and coping urns from last state since source already had failures.", + "Skipping stale entity soft-deletion and copying urns from last state since source already had failures.", ) + copy_previous_state_and_fail = True + + if copy_previous_state_and_fail: + logger.info( + f"Copying urns from last state (size {last_checkpoint_state.urns}) to current state (size {cur_checkpoint_state.urns}) " + "to ensure stale entities from previous runs are deleted on the next successful run." + ) + for urn in last_checkpoint_state.urns: + self.add_entity_to_state("", urn) return # Everything looks good, emit the soft-deletion workunits diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py index e9db82ce75cd99..521e755b6a00c5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py +++ b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py @@ -13,6 +13,7 @@ ) from datahub.configuration.common import AllowDenyPattern, ConfigModel +from datahub.configuration.connection_resolver import auto_connection_resolver from datahub.configuration.oauth import OAuthConfiguration, OAuthIdentityProvider from datahub.configuration.time_window_config import BaseTimeWindowConfig from datahub.configuration.validate_field_rename import pydantic_renamed_field @@ -45,6 +46,8 @@ class BaseSnowflakeConfig(ConfigModel): # Note: this config model is also used by the snowflake-usage source. + _connection = auto_connection_resolver() + options: dict = pydantic.Field( default_factory=dict, description="Any options specified here will be passed to [SQLAlchemy.create_engine](https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine) as kwargs.", diff --git a/metadata-ingestion/src/datahub/integrations/assertion/__init__.py b/metadata-ingestion/src/datahub/integrations/assertion/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/src/datahub/integrations/assertion/common.py b/metadata-ingestion/src/datahub/integrations/assertion/common.py new file mode 100644 index 00000000000000..9ffad5cf66640a --- /dev/null +++ b/metadata-ingestion/src/datahub/integrations/assertion/common.py @@ -0,0 +1,61 @@ +from functools import lru_cache +from typing import List, Optional, Tuple, TypedDict + +from datahub.api.entities.assertion.assertion import BaseEntityAssertion +from datahub.ingestion.graph.client import get_default_graph +from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProperties +from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaMetadata +from datahub.utilities.urns.urn import Urn + + +class ColumnDict(TypedDict): + col: str + native_type: str + + +@lru_cache +def get_qualified_name_from_datahub(urn: str) -> Optional[str]: + with get_default_graph() as graph: + props: Optional[DatasetProperties] = graph.get_aspect(urn, DatasetProperties) + if props is not None: + return props.qualifiedName + return None + + +@lru_cache +def get_schema_from_datahub(urn: str) -> Optional[List[ColumnDict]]: + with get_default_graph() as graph: + schema: Optional[SchemaMetadata] = graph.get_aspect(urn, SchemaMetadata) + if schema is not None: + return [ + {"col": field.fieldPath, "native_type": field.nativeDataType} + for field in schema.fields + ] + return None + + +def get_entity_name(assertion: BaseEntityAssertion) -> Tuple[str, str, str]: + if assertion.meta and assertion.meta.get("entity_qualified_name"): + parts = assertion.meta["entity_qualified_name"].split(".") + else: + qualified_name = get_qualified_name_from_datahub(assertion.entity) + if qualified_name is not None: + parts = qualified_name.split(".") + else: + urn_id = Urn.create_from_string(assertion.entity).entity_ids[1] + parts = urn_id.split(".") + if len(parts) > 3: + parts = parts[-3:] + assert len(parts) == 3 + database = parts[-3] + schema = parts[-2] + table = parts[-1] + return database, schema, table + + +def get_entity_schema(assertion: BaseEntityAssertion) -> Optional[List[ColumnDict]]: + if assertion.meta and assertion.meta.get("entity_schema"): + return assertion.meta.get("entity_schema") + elif get_schema_from_datahub(assertion.entity): + return get_schema_from_datahub(assertion.entity) + return None diff --git a/metadata-ingestion/src/datahub/integrations/assertion/registry.py b/metadata-ingestion/src/datahub/integrations/assertion/registry.py new file mode 100644 index 00000000000000..26015ddbf9a315 --- /dev/null +++ b/metadata-ingestion/src/datahub/integrations/assertion/registry.py @@ -0,0 +1,8 @@ +from typing import Dict, Type + +from datahub.api.entities.assertion.compiler_interface import AssertionCompiler +from datahub.integrations.assertion.snowflake.compiler import SnowflakeAssertionCompiler + +ASSERTION_PLATFORMS: Dict[str, Type[AssertionCompiler]] = { + "snowflake": SnowflakeAssertionCompiler +} diff --git a/metadata-ingestion/src/datahub/integrations/assertion/snowflake/__init__.py b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/src/datahub/integrations/assertion/snowflake/compiler.py b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/compiler.py new file mode 100644 index 00000000000000..8d2ae2960ebd05 --- /dev/null +++ b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/compiler.py @@ -0,0 +1,237 @@ +import logging +import os +from pathlib import Path +from typing import Dict, Tuple + +from datahub.api.entities.assertion.assertion_config_spec import AssertionsConfigSpec +from datahub.api.entities.assertion.assertion_operator import LessThanOrEqualToOperator +from datahub.api.entities.assertion.assertion_trigger import ( + AssertionTrigger, + CronTrigger, + EntityChangeTrigger, + IntervalTrigger, +) +from datahub.api.entities.assertion.compiler_interface import ( + AssertionCompilationResult, + AssertionCompiler, + CompileResultArtifact, + CompileResultArtifactType, +) +from datahub.api.entities.assertion.datahub_assertion import DataHubAssertion +from datahub.api.entities.assertion.field_assertion import FieldValuesAssertion +from datahub.api.entities.assertion.freshness_assertion import ( + FixedIntervalFreshnessAssertion, +) +from datahub.emitter.mce_builder import make_assertion_urn +from datahub.integrations.assertion.common import get_entity_name, get_entity_schema +from datahub.integrations.assertion.snowflake.dmf_generator import SnowflakeDMFHandler +from datahub.integrations.assertion.snowflake.field_metric_sql_generator import ( + SnowflakeFieldMetricSQLGenerator, +) +from datahub.integrations.assertion.snowflake.field_values_metric_sql_generator import ( + SnowflakeFieldValuesMetricSQLGenerator, +) +from datahub.integrations.assertion.snowflake.metric_operator_sql_generator import ( + SnowflakeMetricEvalOperatorSQLGenerator, +) +from datahub.integrations.assertion.snowflake.metric_sql_generator import ( + SnowflakeMetricSQLGenerator, +) + +logger = logging.Logger(__name__) + +DMF_DEFINITIONS_FILE_NAME = "dmf_definitions.sql" +DMF_ASSOCIATIONS_FILE_NAME = "dmf_associations.sql" +DMF_SCHEMA_PROPERTY_KEY = "DMF_SCHEMA" + + +class SnowflakeAssertionCompiler(AssertionCompiler): + def __init__(self, output_dir: str, extras: Dict[str, str]) -> None: + self.output_dir = Path(output_dir) + self.extras = extras + self.metric_generator = SnowflakeMetricSQLGenerator( + SnowflakeFieldMetricSQLGenerator(), SnowflakeFieldValuesMetricSQLGenerator() + ) + self.metric_evaluator = SnowflakeMetricEvalOperatorSQLGenerator() + self.dmf_handler = SnowflakeDMFHandler() + + self._entity_schedule_history: Dict[str, AssertionTrigger] = dict() + + @classmethod + def create( + cls, output_dir: str, extras: Dict[str, str] + ) -> "SnowflakeAssertionCompiler": + assert os.path.exists( + output_dir + ), f"Specified location {output_dir} does not exist." + + assert os.path.isdir( + output_dir + ), f"Specified location {output_dir} is not a folder." + + assert any( + x.upper() == DMF_SCHEMA_PROPERTY_KEY for x in extras + ), "Must specify value for DMF schema using -x DMF_SCHEMA=" + + return SnowflakeAssertionCompiler(output_dir, extras) + + def compile( + self, assertion_config_spec: AssertionsConfigSpec + ) -> AssertionCompilationResult: + result = AssertionCompilationResult("snowflake", "success") + + # TODO: Create/Report permissions sql + + dmf_definitions_path = self.output_dir / DMF_DEFINITIONS_FILE_NAME + dmf_associations_path = self.output_dir / DMF_ASSOCIATIONS_FILE_NAME + with (dmf_definitions_path).open("w") as definitions, ( + dmf_associations_path + ).open("w") as associations: + for assertion_spec in assertion_config_spec.assertions: + result.report.num_processed += 1 + try: + start_line = f"\n-- Start of Assertion {assertion_spec.get_id()}\n" + (dmf_definition, dmf_association) = self.process_assertion( + assertion_spec + ) + end_line = f"\n-- End of Assertion {assertion_spec.get_id()}\n" + + definitions.write(start_line) + definitions.write(dmf_definition) + definitions.write(end_line) + + associations.write(start_line) + associations.write(dmf_association) + associations.write(end_line) + + result.report.num_compile_succeeded += 1 + except Exception as e: + result.status = "failure" + result.report.report_failure( + assertion_spec.get_id(), + f"Failed to compile assertion of type {assertion_spec.assertion.type} due to error: {e}", + ) + result.report.num_compile_failed += 1 + if result.report.num_compile_succeeded > 0: + result.add_artifact( + CompileResultArtifact( + name=DMF_DEFINITIONS_FILE_NAME, + path=dmf_definitions_path, + type=CompileResultArtifactType.SQL_QUERIES, + description="SQL file containing DMF create definitions equivalent to Datahub Assertions", + ) + ) + result.add_artifact( + CompileResultArtifact( + name=DMF_ASSOCIATIONS_FILE_NAME, + path=dmf_associations_path, + type=CompileResultArtifactType.SQL_QUERIES, + description="ALTER TABLE queries to associate DMFs to table to run on configured schedule.", + ) + ) + + return result + + def process_assertion(self, assertion: DataHubAssertion) -> Tuple[str, str]: + # TODO: support schema assertion ? + + # For freshness assertion, metric is difference in seconds between assertion execution time + # and last time table was updated. + # For field values assertion, metric is number or percentage of rows that do not satify + # operator condition. + # For remaining assertions, numeric metric is discernible in assertion definition itself. + metric_definition = self.metric_generator.metric_sql(assertion.assertion) + + if isinstance(assertion.assertion, FixedIntervalFreshnessAssertion): + assertion_sql = self.metric_evaluator.operator_sql( + LessThanOrEqualToOperator( + type="less_than_or_equal_to", + value=assertion.assertion.lookback_interval.total_seconds(), + ), + metric_definition, + ) + elif isinstance(assertion.assertion, FieldValuesAssertion): + assertion_sql = self.metric_evaluator.operator_sql( + LessThanOrEqualToOperator( + type="less_than_or_equal_to", + value=assertion.assertion.failure_threshold.value, + ), + metric_definition, + ) + else: + assertion_sql = self.metric_evaluator.operator_sql( + assertion.assertion.operator, metric_definition + ) + + dmf_name = get_dmf_name(assertion) + dmf_schema_name = self.extras[DMF_SCHEMA_PROPERTY_KEY] + + args_create_dmf, args_add_dmf = get_dmf_args(assertion) + + entity_name = get_entity_name(assertion.assertion) + + self._entity_schedule_history.setdefault( + assertion.assertion.entity, assertion.assertion.trigger + ) + if ( + assertion.assertion.entity in self._entity_schedule_history + and self._entity_schedule_history[assertion.assertion.entity] + != assertion.assertion.trigger + ): + raise ValueError( + "Assertions on same entity must have same schedules as of now." + f" Found different schedules on entity {assertion.assertion.entity} ->" + f" ({self._entity_schedule_history[assertion.assertion.entity].trigger})," + f" ({assertion.assertion.trigger.trigger})" + ) + + dmf_schedule = get_dmf_schedule(assertion.assertion.trigger) + dmf_definition = self.dmf_handler.create_dmf( + f"{dmf_schema_name}.{dmf_name}", + args_create_dmf, + assertion.assertion.description + or f"Created via DataHub for assertion {make_assertion_urn(assertion.get_id())} of type {assertion.assertion.type}", + assertion_sql, + ) + dmf_association = self.dmf_handler.add_dmf_to_table( + f"{dmf_schema_name}.{dmf_name}", + args_add_dmf, + dmf_schedule, + ".".join(entity_name), + ) + + return dmf_definition, dmf_association + + +def get_dmf_name(assertion: DataHubAssertion) -> str: + return f"datahub__{assertion.get_id()}" + + +def get_dmf_args(assertion: DataHubAssertion) -> Tuple[str, str]: + """Returns Tuple with + - Args used to create DMF + - Args used to add DMF to table""" + # Snowflake does not allow creating custom data metric + # function without column name argument. + # So we fetch any one column from table's schema + args_create_dmf = "ARGT TABLE({col_name} {col_type})" + args_add_dmf = "{col_name}" + entity_schema = get_entity_schema(assertion.assertion) + if entity_schema: + for col_dict in entity_schema: + return args_create_dmf.format( + col_name=col_dict["col"], col_type=col_dict["native_type"] + ), args_add_dmf.format(col_name=col_dict["col"]) + + raise ValueError("entity schema not available") + + +def get_dmf_schedule(trigger: AssertionTrigger) -> str: + if isinstance(trigger.trigger, EntityChangeTrigger): + return "TRIGGER_ON_CHANGES" + elif isinstance(trigger.trigger, CronTrigger): + return f"USING CRON {trigger.trigger.cron} {trigger.trigger.timezone}" + elif isinstance(trigger.trigger, IntervalTrigger): + return f"{trigger.trigger.interval.seconds/60} MIN" + else: + raise ValueError(f"Unsupported trigger type {type(trigger.trigger)}") diff --git a/metadata-ingestion/src/datahub/integrations/assertion/snowflake/dmf_generator.py b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/dmf_generator.py new file mode 100644 index 00000000000000..4f50b7c2b81a57 --- /dev/null +++ b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/dmf_generator.py @@ -0,0 +1,22 @@ +class SnowflakeDMFHandler: + def create_dmf( + self, dmf_name: str, dmf_args: str, dmf_comment: str, dmf_sql: str + ) -> str: + return f""" + CREATE or REPLACE DATA METRIC FUNCTION + {dmf_name} ({dmf_args}) + RETURNS NUMBER + COMMENT = '{dmf_comment}' + AS + $$ + {dmf_sql} + $$; + """ + + def add_dmf_to_table( + self, dmf_name: str, dmf_col_args: str, dmf_schedule: str, table_identifier: str + ) -> str: + return f""" + ALTER TABLE {table_identifier} SET DATA_METRIC_SCHEDULE = '{dmf_schedule}'; + ALTER TABLE {table_identifier} ADD DATA METRIC FUNCTION {dmf_name} ON ({dmf_col_args}); + """ diff --git a/metadata-ingestion/src/datahub/integrations/assertion/snowflake/field_metric_sql_generator.py b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/field_metric_sql_generator.py new file mode 100644 index 00000000000000..3ff218a9f280b3 --- /dev/null +++ b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/field_metric_sql_generator.py @@ -0,0 +1,154 @@ +from typing import List, Optional + +from datahub.api.entities.assertion.field_assertion import FieldMetricAssertion +from datahub.api.entities.assertion.field_metric import FieldMetric +from datahub.integrations.assertion.common import get_entity_name + + +class SnowflakeFieldMetricSQLGenerator: + def unique_count_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + return f"""select count(distinct {field_name}) + from {entity_name} {self._setup_where_clause([dataset_filter])}""" + + def unique_percentage_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + return f"""select count(distinct {field_name})/count(*) + from {entity_name} {self._setup_where_clause([dataset_filter])}""" + + def null_count_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + where_clause = self._setup_where_clause( + [dataset_filter, f"{field_name} is null"] + ) + return f"""select count(*) + from {entity_name} {where_clause}""" + + def null_percentage_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + return f"""select ({self.null_count_sql(field_name, entity_name, dataset_filter)})/count(*) + from {entity_name} {self._setup_where_clause([dataset_filter])}""" + + def min_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + return f"""select min({field_name}) + from {entity_name} {self._setup_where_clause([dataset_filter])}""" + + def max_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + return f"""select max({field_name}) + from {entity_name} {self._setup_where_clause([dataset_filter])}""" + + def mean_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + return f"""select avg({field_name}) + from {entity_name} {self._setup_where_clause([dataset_filter])}""" + + def median_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + return f"""select median({field_name}) + from {entity_name} {self._setup_where_clause([dataset_filter])}""" + + def stddev_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + return f"""select stddev({field_name}) + from {entity_name} {self._setup_where_clause([dataset_filter])}""" + + def negative_count_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + where_clause = self._setup_where_clause([dataset_filter, f"{field_name} < 0"]) + return f"""select count(*) + from {entity_name} {where_clause}""" + + def negative_percentage_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + return f"""select ({self.negative_count_sql(field_name, entity_name, dataset_filter)})/count(*) + from {entity_name} {self._setup_where_clause([dataset_filter])}""" + + def zero_count_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + where_clause = self._setup_where_clause([dataset_filter, f"{field_name} = 0"]) + return f"""select count(*) + from {entity_name} {where_clause}""" + + def zero_percentage_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + return f"""select ({self.zero_count_sql(field_name, entity_name, dataset_filter)})/count(*) + from {entity_name} {self._setup_where_clause([dataset_filter])}""" + + def min_length_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + return f"""select min(length({field_name})) + from {entity_name} {self._setup_where_clause([dataset_filter])}""" + + def max_length_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + return f"""select max(length({field_name})) + from {entity_name} {self._setup_where_clause([dataset_filter])}""" + + def empty_count_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + where_clause = self._setup_where_clause( + [dataset_filter, f"({field_name} is null or trim({field_name})='')"] + ) + return f"""select count(*) + from {entity_name} {where_clause}""" + + def empty_percentage_sql( + self, field_name: str, entity_name: str, dataset_filter: Optional[str] + ) -> str: + return f"""select ({self.empty_count_sql(field_name, entity_name, dataset_filter)})/count(*) + from {entity_name} {self._setup_where_clause([dataset_filter])}""" + + def _setup_where_clause(self, filters: List[Optional[str]]) -> str: + where_clause = " and ".join(f for f in filters if f) + return f"where {where_clause}" if where_clause else "" + + def metric_sql(self, assertion: FieldMetricAssertion) -> str: + metric_sql_mapping = { + FieldMetric.UNIQUE_COUNT: self.unique_count_sql, + FieldMetric.UNIQUE_PERCENTAGE: self.unique_percentage_sql, + FieldMetric.NULL_COUNT: self.null_count_sql, + FieldMetric.NULL_PERCENTAGE: self.null_percentage_sql, + FieldMetric.MIN: self.min_sql, + FieldMetric.MAX: self.max_sql, + FieldMetric.MEAN: self.mean_sql, + FieldMetric.MEDIAN: self.median_sql, + FieldMetric.STDDEV: self.stddev_sql, + FieldMetric.NEGATIVE_COUNT: self.negative_count_sql, + FieldMetric.NEGATIVE_PERCENTAGE: self.negative_percentage_sql, + FieldMetric.ZERO_COUNT: self.zero_count_sql, + FieldMetric.ZERO_PERCENTAGE: self.zero_percentage_sql, + FieldMetric.MIN_LENGTH: self.min_length_sql, + FieldMetric.MAX_LENGTH: self.max_length_sql, + FieldMetric.EMPTY_COUNT: self.empty_count_sql, + FieldMetric.EMPTY_PERCENTAGE: self.empty_percentage_sql, + } + + entity_name = ".".join(get_entity_name(assertion)) + + return metric_sql_mapping[assertion.metric]( + assertion.field, + entity_name, + ( + assertion.filters.sql + if assertion.filters and assertion.filters.sql + else None + ), + ) diff --git a/metadata-ingestion/src/datahub/integrations/assertion/snowflake/field_values_metric_sql_generator.py b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/field_values_metric_sql_generator.py new file mode 100644 index 00000000000000..b77cc971d3a450 --- /dev/null +++ b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/field_values_metric_sql_generator.py @@ -0,0 +1,283 @@ +from functools import singledispatchmethod +from typing import List, Optional + +from datahub.api.entities.assertion.assertion_operator import ( + BetweenOperator, + ContainsOperator, + EndsWithOperator, + EqualToOperator, + GreaterThanOperator, + GreaterThanOrEqualToOperator, + InOperator, + IsFalseOperator, + IsNullOperator, + IsTrueOperator, + LessThanOperator, + LessThanOrEqualToOperator, + MatchesRegexOperator, + NotEqualToOperator, + NotInOperator, + NotNullOperator, + Operators, + StartsWithOperator, +) +from datahub.api.entities.assertion.field_assertion import ( + FieldTransform, + FieldValuesAssertion, +) +from datahub.integrations.assertion.common import get_entity_name + + +class SnowflakeFieldValuesMetricSQLGenerator: + @singledispatchmethod + def values_metric_sql( + self, + operators: Operators, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + """ + Generates SQL that would return boolean value for each table row. + 1 if FAIL and 0 if PASS. Note the unusual reversal of 1 and 0. + This is deliberate, as metric represents number of failing rows. + """ + raise ValueError(f"Unsupported values metric operator type {type(operators)} ") + + @values_metric_sql.register + def _( + self, + operators: InOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when {transformed_field} in {tuple(operators.value)} then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: NotInOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when {transformed_field} not in {tuple(operators.value)} then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: EqualToOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when {transformed_field} = {operators.value} then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: NotEqualToOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when {transformed_field} != {operators.value} then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: BetweenOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when {transformed_field} between {operators.min} and {operators.max} then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: LessThanOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when {transformed_field} < {operators.value} then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: LessThanOrEqualToOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when {transformed_field} <= {operators.value} then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: GreaterThanOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when {transformed_field} > {operators.value} then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: GreaterThanOrEqualToOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when {transformed_field} >= {operators.value} then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: IsNullOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when {transformed_field} is null then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: NotNullOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when {transformed_field} is not null then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: IsTrueOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when {transformed_field} then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: IsFalseOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when not {transformed_field} then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: ContainsOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when contains({transformed_field},'{operators.value}') then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: StartsWithOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when startswith({transformed_field},'{operators.value}') then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: EndsWithOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when endswith({transformed_field},'{operators.value}') then 0 else 1 end + from {entity_name} {where_clause}""" + + @values_metric_sql.register + def _( + self, + operators: MatchesRegexOperator, + entity_name: str, + transformed_field: str, + where_clause: str, + ) -> str: + return f"""select case when REGEXP_LIKE({transformed_field},'{operators.value}') then 0 else 1 end + from {entity_name} {where_clause}""" + + def _setup_where_clause(self, filters: List[Optional[str]]) -> str: + where_clause = " and ".join(f for f in filters if f) + return f"where {where_clause}" if where_clause else "" + + def _setup_field_transform( + self, field: str, transform: Optional[FieldTransform] + ) -> str: + if transform is None: + return field + elif transform is FieldTransform.LENGTH: + return f"length({field})" + raise ValueError(f"Unsupported transform type {transform}") + + def metric_sql(self, assertion: FieldValuesAssertion) -> str: + """ + Note that this applies negative operator in order to check whether or not + number of invalid value rows are less than configured failThreshold. + + Args: + assertion (FieldValuesAssertion): _description_ + + Returns: + str: _description_ + """ + entity_name = ".".join(get_entity_name(assertion)) + + dataset_filter = ( + assertion.filters.sql + if assertion.filters and assertion.filters.sql + else None + ) + where_clause = self._setup_where_clause( + [ + dataset_filter, + f"{assertion.field} is not null" if assertion.exclude_nulls else None, + ] + ) + transformed_field = self._setup_field_transform( + assertion.field, assertion.field_transform + ) + # this sql would return boolean value for each table row. 1 if fail and 0 if pass. + sql = self.values_metric_sql( + assertion.operator, entity_name, transformed_field, where_clause + ) + + # metric would be number of failing rows OR percentage of failing rows. + if assertion.failure_threshold.type == "count": + return f"select sum($1) as metric from ({sql})" + else: # percentage + return f"select sum($1)/count(*) as metric from ({sql})" diff --git a/metadata-ingestion/src/datahub/integrations/assertion/snowflake/metric_operator_sql_generator.py b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/metric_operator_sql_generator.py new file mode 100644 index 00000000000000..e7549d105b3f62 --- /dev/null +++ b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/metric_operator_sql_generator.py @@ -0,0 +1,68 @@ +from functools import singledispatchmethod + +from datahub.api.entities.assertion.assertion_operator import ( + BetweenOperator, + EqualToOperator, + GreaterThanOperator, + GreaterThanOrEqualToOperator, + IsFalseOperator, + IsNullOperator, + IsTrueOperator, + LessThanOperator, + LessThanOrEqualToOperator, + NotNullOperator, + Operators, +) + + +class SnowflakeMetricEvalOperatorSQLGenerator: + @singledispatchmethod + def operator_sql(self, operators: Operators, metric_sql: str) -> str: + """ + Generates Operator SQL that applies operator on `metric` + and returns a numeric boolean value 1 if PASS, 0 if FAIL + + """ + raise ValueError(f"Unsupported metric operator type {type(operators)} ") + + @operator_sql.register + def _(self, operators: EqualToOperator, metric_sql: str) -> str: + return f"select case when metric={operators.value} then 1 else 0 end from ({metric_sql})" + + @operator_sql.register + def _(self, operators: BetweenOperator, metric_sql: str) -> str: + return f"select case when metric between {operators.min} and {operators.max} then 1 else 0 end from ({metric_sql})" + + @operator_sql.register + def _(self, operators: LessThanOperator, metric_sql: str) -> str: + return f"select case when metric < {operators.value} then 1 else 0 end from ({metric_sql})" + + @operator_sql.register + def _(self, operators: LessThanOrEqualToOperator, metric_sql: str) -> str: + return f"select case when metric <= {operators.value} then 1 else 0 end from ({metric_sql})" + + @operator_sql.register + def _(self, operators: GreaterThanOperator, metric_sql: str) -> str: + return f"select case when metric > {operators.value} then 1 else 0 end from ({metric_sql})" + + @operator_sql.register + def _(self, operators: GreaterThanOrEqualToOperator, metric_sql: str) -> str: + return f"select case when metric >= {operators.value} then 1 else 0 end from ({metric_sql})" + + @operator_sql.register + def _(self, operators: NotNullOperator, metric_sql: str) -> str: + return ( + f"select case when metric is not null then 1 else 0 end from ({metric_sql})" + ) + + @operator_sql.register + def _(self, operators: IsNullOperator, metric_sql: str) -> str: + return f"select case when metric is null then 1 else 0 end from ({metric_sql})" + + @operator_sql.register + def _(self, operators: IsTrueOperator, metric_sql: str) -> str: + return f"select case when metric then 1 else 0 end from ({metric_sql})" + + @operator_sql.register + def _(self, operators: IsFalseOperator, metric_sql: str) -> str: + return f"select case when not metric then 1 else 0 end from ({metric_sql})" diff --git a/metadata-ingestion/src/datahub/integrations/assertion/snowflake/metric_sql_generator.py b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/metric_sql_generator.py new file mode 100644 index 00000000000000..5b079129e0a9c5 --- /dev/null +++ b/metadata-ingestion/src/datahub/integrations/assertion/snowflake/metric_sql_generator.py @@ -0,0 +1,97 @@ +from dataclasses import dataclass +from functools import singledispatchmethod + +from datahub.api.entities.assertion.assertion import BaseEntityAssertion +from datahub.api.entities.assertion.field_assertion import ( + FieldMetricAssertion, + FieldValuesAssertion, +) +from datahub.api.entities.assertion.freshness_assertion import ( + FixedIntervalFreshnessAssertion, + FreshnessSourceType, +) +from datahub.api.entities.assertion.sql_assertion import ( + SqlMetricAssertion, + SqlMetricChangeAssertion, +) +from datahub.api.entities.assertion.volume_assertion import ( + RowCountChangeVolumeAssertion, + RowCountTotalVolumeAssertion, +) +from datahub.integrations.assertion.common import get_entity_name +from datahub.integrations.assertion.snowflake.field_metric_sql_generator import ( + SnowflakeFieldMetricSQLGenerator, +) +from datahub.integrations.assertion.snowflake.field_values_metric_sql_generator import ( + SnowflakeFieldValuesMetricSQLGenerator, +) + + +@dataclass +class SnowflakeMetricSQLGenerator: + field_metric_sql_generator: SnowflakeFieldMetricSQLGenerator + field_values_metric_sql_generator: SnowflakeFieldValuesMetricSQLGenerator + + @singledispatchmethod + def metric_sql( + self, + assertion: BaseEntityAssertion, + ) -> str: + """Generates Metric SQL that typically returns a numeric metric""" + raise ValueError(f"Unsupported assertion type {type(assertion)} ") + + @metric_sql.register + def _(self, assertion: RowCountChangeVolumeAssertion) -> str: + raise ValueError(f"Unsupported assertion type {type(assertion)} ") + + @metric_sql.register + def _(self, assertion: SqlMetricChangeAssertion) -> str: + raise ValueError(f"Unsupported assertion type {type(assertion)} ") + + @metric_sql.register + def _(self, assertion: FixedIntervalFreshnessAssertion) -> str: + entity_name = ".".join(get_entity_name(assertion)) + if assertion.filters and assertion.filters.sql: + where_clause = f"where {assertion.filters.sql}" + else: + where_clause = "" + + if ( + assertion.source_type == FreshnessSourceType.LAST_MODIFIED_COLUMN + and assertion.last_modified_field + ): + return f"""select timediff( + second, + max({assertion.last_modified_field}::TIMESTAMP_LTZ), + SNOWFLAKE.CORE.DATA_METRIC_SCHEDULED_TIME() + ) as metric from {entity_name} {where_clause}""" + else: + raise ValueError( + f"Unsupported freshness source type {assertion.source_type} " + ) + + @metric_sql.register + def _(self, assertion: RowCountTotalVolumeAssertion) -> str: + + # Can not use information schema here due to error - + # Data metric function body cannot refer to the non-deterministic function 'CURRENT_DATABASE_MAIN_METASTORE_ID'. + + entity_name = ".".join(get_entity_name(assertion)) + if assertion.filters and assertion.filters.sql: + where_clause = f"where {assertion.filters.sql}" + else: + where_clause = "" + return f"select count(*) as metric from {entity_name} {where_clause}" + + @metric_sql.register + def _(self, assertion: SqlMetricAssertion) -> str: + return f"select $1 as metric from ({assertion.statement})" + + @metric_sql.register + def _(self, assertion: FieldMetricAssertion) -> str: + sql = self.field_metric_sql_generator.metric_sql(assertion) + return f"select $1 as metric from ({sql})" + + @metric_sql.register + def _(self, assertion: FieldValuesAssertion) -> str: + return self.field_values_metric_sql_generator.metric_sql(assertion) diff --git a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json index ba381d688fbad3..54f4309d962aae 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json @@ -7,8 +7,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -372,9 +371,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, @@ -606,8 +603,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -943,9 +939,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json index a87b4134dc8989..04107bcde903ec 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json @@ -7,8 +7,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -337,9 +336,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, @@ -559,8 +556,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -888,9 +884,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, @@ -1107,8 +1101,7 @@ "aspect": { "json": { "typeNames": [ - "Snapshot", - "View" + "Snapshot" ] } }, diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json index 7f8b018afb586d..4e5199aeec3893 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json @@ -7,9 +7,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, @@ -233,9 +231,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, @@ -478,8 +474,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -633,8 +628,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -800,8 +794,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json index 66670e95402d91..60596547c3d508 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json @@ -7,8 +7,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -337,9 +336,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, @@ -565,7 +562,7 @@ "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", "type": "BATCH_SCHEDULED", "created": { - "time": 1643871600000, + "time": 1663355198240, "actor": "urn:li:corpuser:datahub" } } @@ -685,8 +682,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -1020,7 +1016,7 @@ "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD)", "type": "BATCH_SCHEDULED", "created": { - "time": 1643871600000, + "time": 1663355198240, "actor": "urn:li:corpuser:datahub" } } @@ -1144,9 +1140,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, @@ -1349,7 +1343,7 @@ "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD)", "type": "BATCH_SCHEDULED", "created": { - "time": 1643871600000, + "time": 1663355198240, "actor": "urn:li:corpuser:datahub" } } @@ -1468,8 +1462,7 @@ "aspect": { "json": { "typeNames": [ - "Snapshot", - "View" + "Snapshot" ] } }, @@ -1873,7 +1866,7 @@ "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD)", "type": "BATCH_SCHEDULED", "created": { - "time": 1643871600000, + "time": 1663355198240, "actor": "urn:li:corpuser:datahub" } } diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json index fcea12ecf8d3d5..4ec48e71badcdf 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json @@ -7,8 +7,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -336,9 +335,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, @@ -552,8 +549,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -889,9 +885,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json index c9fd7f5ba0d45e..9002001fde29ee 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json @@ -7,8 +7,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -338,9 +337,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, @@ -555,8 +552,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -893,9 +889,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json index 56afc4e074d6f8..d16542adaa0307 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json @@ -7,8 +7,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -337,9 +336,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, @@ -553,8 +550,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -890,9 +886,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, @@ -4258,54 +4252,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "dbt-test-with-non-incremental-lineage", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "tag", "entityUrn": "urn:li:tag:dbt:column_tag", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json index e0eb8a40a4d202..921af5cb3d1db3 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json @@ -7,8 +7,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -337,9 +336,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, @@ -553,8 +550,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "View" + "Model" ] } }, @@ -890,9 +886,7 @@ "aspect": { "json": { "typeNames": [ - "Model", - "Table", - "View" + "Model" ] } }, diff --git a/metadata-ingestion/tests/integration/kafka/kafka_mces_golden.json b/metadata-ingestion/tests/integration/kafka/kafka_mces_golden.json index 7dd328168e84c0..7df790b74e829c 100644 --- a/metadata-ingestion/tests/integration/kafka/kafka_mces_golden.json +++ b/metadata-ingestion/tests/integration/kafka/kafka_mces_golden.json @@ -270,7 +270,8 @@ "retention.ms": "604800000", "cleanup.policy": "delete", "max.message.bytes": "1048588", - "unclean.leader.election.enable": "false" + "unclean.leader.election.enable": "false", + "Schema Name": "key_value_topic-value" }, "name": "key_value_topic", "description": "Value schema for kafka topic", @@ -472,7 +473,8 @@ "retention.ms": "604800000", "cleanup.policy": "delete", "max.message.bytes": "1048588", - "unclean.leader.election.enable": "false" + "unclean.leader.election.enable": "false", + "Schema Name": "value_topic-value" }, "name": "value_topic", "description": "Value schema for kafka topic", @@ -522,6 +524,889 @@ "lastRunId": "no-run-id-provided" } }, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:kafka,key_topic-key,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "key_topic-key", + "platform": "urn:li:dataPlatform:kafka", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "5e468f7aa532c2f2ed9686ff3ec943ec", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.KafkaSchema": { + "documentSchema": "{\"type\":\"record\",\"name\":\"UserKey\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Key schema for kafka topic\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"namespace\",\"type\":\"string\"}]}", + "documentSchemaType": "AVRO", + "keySchema": "{\"type\":\"record\",\"name\":\"UserKey\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Key schema for kafka topic\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"namespace\",\"type\":\"string\"}]}", + "keySchemaType": "AVRO" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[key=True].[type=UserKey].[type=long].id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "id", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "[version=2.0].[key=True].[type=UserKey].[type=string].namespace", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "namespace", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "[version=2.0].[type=UserKey].[type=long].id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "id", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=UserKey].[type=string].namespace", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "namespace", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/kafka" + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "key_topic-key", + "description": "Key schema for kafka topic", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,key_topic-key,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,key_topic-key,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:kafka,key_value_topic-key,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "key_value_topic-key", + "platform": "urn:li:dataPlatform:kafka", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "5e468f7aa532c2f2ed9686ff3ec943ec", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.KafkaSchema": { + "documentSchema": "{\"type\":\"record\",\"name\":\"UserKey\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Key schema for kafka topic\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"namespace\",\"type\":\"string\"}]}", + "documentSchemaType": "AVRO", + "keySchema": "{\"type\":\"record\",\"name\":\"UserKey\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Key schema for kafka topic\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"namespace\",\"type\":\"string\"}]}", + "keySchemaType": "AVRO" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[key=True].[type=UserKey].[type=long].id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "id", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "[version=2.0].[key=True].[type=UserKey].[type=string].namespace", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "namespace", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "[version=2.0].[type=UserKey].[type=long].id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "id", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=UserKey].[type=string].namespace", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "namespace", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/kafka" + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "key_value_topic-key", + "description": "Key schema for kafka topic", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,key_value_topic-key,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,key_value_topic-key,PROD)", + "changeType": "UPSERT", + "aspectName": "domains", + "aspect": { + "json": { + "domains": [ + "urn:li:domain:sales" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,key_value_topic-key,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:kafka,key_value_topic-value,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "key_value_topic-value", + "platform": "urn:li:dataPlatform:kafka", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "c9b692583e304b9cb703ffa748a9f37d", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.KafkaSchema": { + "documentSchema": "{\"type\":\"record\",\"name\":\"CreateUserRequest\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Value schema for kafka topic\",\"fields\":[{\"name\":\"email\",\"type\":\"string\",\"tags\":[\"Email\"]},{\"name\":\"firstName\",\"type\":\"string\",\"tags\":[\"Name\"]},{\"name\":\"lastName\",\"type\":\"string\",\"tags\":[\"Name\"]}],\"tags\":[\"PII\"]}", + "documentSchemaType": "AVRO", + "keySchema": "{\"type\":\"record\",\"name\":\"CreateUserRequest\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Value schema for kafka topic\",\"fields\":[{\"name\":\"email\",\"type\":\"string\",\"tags\":[\"Email\"]},{\"name\":\"firstName\",\"type\":\"string\",\"tags\":[\"Name\"]},{\"name\":\"lastName\",\"type\":\"string\",\"tags\":[\"Name\"]}],\"tags\":[\"PII\"]}", + "keySchemaType": "AVRO" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[key=True].[type=CreateUserRequest].[type=string].email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "email", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Email" + } + ] + }, + "isPartOfKey": true, + "jsonProps": "{\"tags\": [\"Email\"]}" + }, + { + "fieldPath": "[version=2.0].[key=True].[type=CreateUserRequest].[type=string].firstName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "firstName", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Name" + } + ] + }, + "isPartOfKey": true, + "jsonProps": "{\"tags\": [\"Name\"]}" + }, + { + "fieldPath": "[version=2.0].[key=True].[type=CreateUserRequest].[type=string].lastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "lastName", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Name" + } + ] + }, + "isPartOfKey": true, + "jsonProps": "{\"tags\": [\"Name\"]}" + }, + { + "fieldPath": "[version=2.0].[type=CreateUserRequest].[type=string].email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "email", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Email" + } + ] + }, + "isPartOfKey": false, + "jsonProps": "{\"tags\": [\"Email\"]}" + }, + { + "fieldPath": "[version=2.0].[type=CreateUserRequest].[type=string].firstName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "firstName", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Name" + } + ] + }, + "isPartOfKey": false, + "jsonProps": "{\"tags\": [\"Name\"]}" + }, + { + "fieldPath": "[version=2.0].[type=CreateUserRequest].[type=string].lastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "lastName", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Name" + } + ] + }, + "isPartOfKey": false, + "jsonProps": "{\"tags\": [\"Name\"]}" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/kafka" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [ + { + "tag": "urn:li:tag:PII" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "key_value_topic-value", + "description": "Value schema for kafka topic", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,key_value_topic-value,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,key_value_topic-value,PROD)", + "changeType": "UPSERT", + "aspectName": "domains", + "aspect": { + "json": { + "domains": [ + "urn:li:domain:sales" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,key_value_topic-value,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:kafka,value_topic-key,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "value_topic-key", + "platform": "urn:li:dataPlatform:kafka", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "c088cd2eb2de57e32c00b32d4871ec72", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.KafkaSchema": { + "documentSchema": "\"string\"", + "documentSchemaType": "AVRO", + "keySchema": "\"string\"", + "keySchemaType": "AVRO" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[key=True].[type=string]", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "[version=2.0].[type=string]", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/kafka" + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "value_topic-key", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,value_topic-key,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,value_topic-key,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:kafka,value_topic-value,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "value_topic-value", + "platform": "urn:li:dataPlatform:kafka", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "c9b692583e304b9cb703ffa748a9f37d", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.KafkaSchema": { + "documentSchema": "{\"type\":\"record\",\"name\":\"CreateUserRequest\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Value schema for kafka topic\",\"fields\":[{\"name\":\"email\",\"type\":\"string\",\"tags\":[\"Email\"]},{\"name\":\"firstName\",\"type\":\"string\",\"tags\":[\"Name\"]},{\"name\":\"lastName\",\"type\":\"string\",\"tags\":[\"Name\"]}],\"tags\":[\"PII\"]}", + "documentSchemaType": "AVRO", + "keySchema": "{\"type\":\"record\",\"name\":\"CreateUserRequest\",\"namespace\":\"io.codebrews.createuserrequest\",\"doc\":\"Value schema for kafka topic\",\"fields\":[{\"name\":\"email\",\"type\":\"string\",\"tags\":[\"Email\"]},{\"name\":\"firstName\",\"type\":\"string\",\"tags\":[\"Name\"]},{\"name\":\"lastName\",\"type\":\"string\",\"tags\":[\"Name\"]}],\"tags\":[\"PII\"]}", + "keySchemaType": "AVRO" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[key=True].[type=CreateUserRequest].[type=string].email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "email", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Email" + } + ] + }, + "isPartOfKey": true, + "jsonProps": "{\"tags\": [\"Email\"]}" + }, + { + "fieldPath": "[version=2.0].[key=True].[type=CreateUserRequest].[type=string].firstName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "firstName", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Name" + } + ] + }, + "isPartOfKey": true, + "jsonProps": "{\"tags\": [\"Name\"]}" + }, + { + "fieldPath": "[version=2.0].[key=True].[type=CreateUserRequest].[type=string].lastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "lastName", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Name" + } + ] + }, + "isPartOfKey": true, + "jsonProps": "{\"tags\": [\"Name\"]}" + }, + { + "fieldPath": "[version=2.0].[type=CreateUserRequest].[type=string].email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "email", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Email" + } + ] + }, + "isPartOfKey": false, + "jsonProps": "{\"tags\": [\"Email\"]}" + }, + { + "fieldPath": "[version=2.0].[type=CreateUserRequest].[type=string].firstName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "firstName", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Name" + } + ] + }, + "isPartOfKey": false, + "jsonProps": "{\"tags\": [\"Name\"]}" + }, + { + "fieldPath": "[version=2.0].[type=CreateUserRequest].[type=string].lastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "lastName", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Name" + } + ] + }, + "isPartOfKey": false, + "jsonProps": "{\"tags\": [\"Name\"]}" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/kafka" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [ + { + "tag": "urn:li:tag:PII" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "value_topic-value", + "description": "Value schema for kafka topic", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,value_topic-value,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,value_topic-value,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "kafka-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Email", diff --git a/metadata-ingestion/tests/unit/api/entities/assertion/__init__.py b/metadata-ingestion/tests/unit/api/entities/assertion/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/tests/unit/api/entities/assertion/test_assertion_config.yml b/metadata-ingestion/tests/unit/api/entities/assertion/test_assertion_config.yml new file mode 100644 index 00000000000000..a44945a30f9a37 --- /dev/null +++ b/metadata-ingestion/tests/unit/api/entities/assertion/test_assertion_config.yml @@ -0,0 +1,76 @@ +version: 1 +namespace: test-config-id-1 +assertions: + # Freshness Assertion + - entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.test_assertions_all_times,PROD) + type: freshness + lookback_interval: "1 hour" + last_modified_field: col_timestamp + schedule: + type: cron + cron: 0 * * * * + meta: + entity_qualified_name: TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES + entity_schema: + - col: col_date + native_type: DATE + # Volume Assertion + - type: volume + entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.test_assertions_all_times,PROD) + metric: row_count + condition: + type: less_than_or_equal_to + value: 1000 + schedule: + type: cron + cron: 0 * * * * + meta: + entity_qualified_name: TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES + entity_schema: + - col: col_date + native_type: DATE + # Field Metric Assertion + - type: field + entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.test_assertions_all_times,PROD) + field: col_date + metric: null_count + condition: + type: equal_to + value: 0 + schedule: + type: cron + cron: 0 * * * * + meta: + entity_qualified_name: TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES + entity_schema: + - col: col_date + native_type: DATE + # Field Value Assertion + - type: field + entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.purchase_event,PROD) + field: quantity + condition: + type: between + min: 0 + max: 10 + schedule: + type: on_table_change + meta: + entity_qualified_name: TEST_DB.PUBLIC.PURCHASE_EVENT + entity_schema: + - col: quantity + native_type: FLOAT + # Custom SQL Metric Assertion + - type: sql + entity: urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.public.purchase_event,PROD) + statement: select mode(quantity) from test_db.public.purchase_event + condition: + type: equal_to + value: 5 + schedule: + type: on_table_change + meta: + entity_qualified_name: TEST_DB.PUBLIC.PURCHASE_EVENT + entity_schema: + - col: quantity + native_type: FLOAT diff --git a/metadata-ingestion/tests/unit/api/entities/assertion/test_assertion_config_spec.py b/metadata-ingestion/tests/unit/api/entities/assertion/test_assertion_config_spec.py new file mode 100644 index 00000000000000..74f13ac7b2a19d --- /dev/null +++ b/metadata-ingestion/tests/unit/api/entities/assertion/test_assertion_config_spec.py @@ -0,0 +1,13 @@ +from datahub.api.entities.assertion.assertion_config_spec import AssertionsConfigSpec + + +def test_assertion_config_spec_parses_correct_type(pytestconfig): + config_file = ( + pytestconfig.rootpath + / "tests/unit/api/entities/assertion/test_assertion_config.yml" + ) + + config_spec = AssertionsConfigSpec.from_yaml(config_file) + assert config_spec.version == 1 + assert config_spec.id == "test-config-id-1" + assert len(config_spec.assertions) == 5 diff --git a/metadata-ingestion/tests/unit/cli/assertion/__init__.py b/metadata-ingestion/tests/unit/cli/assertion/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/tests/unit/cli/assertion/dmf_associations.sql b/metadata-ingestion/tests/unit/cli/assertion/dmf_associations.sql new file mode 100644 index 00000000000000..7e6b1982515e0f --- /dev/null +++ b/metadata-ingestion/tests/unit/cli/assertion/dmf_associations.sql @@ -0,0 +1,35 @@ + +-- Start of Assertion 025cce4dd4123c0f007908011a9c64d7 + + ALTER TABLE TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES SET DATA_METRIC_SCHEDULE = 'USING CRON 0 * * * * UTC'; + ALTER TABLE TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES ADD DATA METRIC FUNCTION test_db.datahub_dmfs.datahub__025cce4dd4123c0f007908011a9c64d7 ON (col_date); + +-- End of Assertion 025cce4dd4123c0f007908011a9c64d7 + +-- Start of Assertion 5c32eef47bd763fece7d21c7cbf6c659 + + ALTER TABLE TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES SET DATA_METRIC_SCHEDULE = 'USING CRON 0 * * * * UTC'; + ALTER TABLE TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES ADD DATA METRIC FUNCTION test_db.datahub_dmfs.datahub__5c32eef47bd763fece7d21c7cbf6c659 ON (col_date); + +-- End of Assertion 5c32eef47bd763fece7d21c7cbf6c659 + +-- Start of Assertion 04be4145bd8de10bed3dfcb0cee57842 + + ALTER TABLE TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES SET DATA_METRIC_SCHEDULE = 'USING CRON 0 * * * * UTC'; + ALTER TABLE TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES ADD DATA METRIC FUNCTION test_db.datahub_dmfs.datahub__04be4145bd8de10bed3dfcb0cee57842 ON (col_date); + +-- End of Assertion 04be4145bd8de10bed3dfcb0cee57842 + +-- Start of Assertion b065942d2bca8a4dbe90cc3ec2d9ca9f + + ALTER TABLE TEST_DB.PUBLIC.PURCHASE_EVENT SET DATA_METRIC_SCHEDULE = 'TRIGGER_ON_CHANGES'; + ALTER TABLE TEST_DB.PUBLIC.PURCHASE_EVENT ADD DATA METRIC FUNCTION test_db.datahub_dmfs.datahub__b065942d2bca8a4dbe90cc3ec2d9ca9f ON (quantity); + +-- End of Assertion b065942d2bca8a4dbe90cc3ec2d9ca9f + +-- Start of Assertion 170dbd53f28eedbbaba52ebbf189f6b1 + + ALTER TABLE TEST_DB.PUBLIC.PURCHASE_EVENT SET DATA_METRIC_SCHEDULE = 'TRIGGER_ON_CHANGES'; + ALTER TABLE TEST_DB.PUBLIC.PURCHASE_EVENT ADD DATA METRIC FUNCTION test_db.datahub_dmfs.datahub__170dbd53f28eedbbaba52ebbf189f6b1 ON (quantity); + +-- End of Assertion 170dbd53f28eedbbaba52ebbf189f6b1 diff --git a/metadata-ingestion/tests/unit/cli/assertion/dmf_definitions.sql b/metadata-ingestion/tests/unit/cli/assertion/dmf_definitions.sql new file mode 100644 index 00000000000000..85056e150b9b33 --- /dev/null +++ b/metadata-ingestion/tests/unit/cli/assertion/dmf_definitions.sql @@ -0,0 +1,71 @@ + +-- Start of Assertion 025cce4dd4123c0f007908011a9c64d7 + + CREATE or REPLACE DATA METRIC FUNCTION + test_db.datahub_dmfs.datahub__025cce4dd4123c0f007908011a9c64d7 (ARGT TABLE(col_date DATE)) + RETURNS NUMBER + COMMENT = 'Created via DataHub for assertion urn:li:assertion:025cce4dd4123c0f007908011a9c64d7 of type freshness' + AS + $$ + select case when metric <= 3600 then 1 else 0 end from (select timediff( + second, + max(col_timestamp::TIMESTAMP_LTZ), + SNOWFLAKE.CORE.DATA_METRIC_SCHEDULED_TIME() + ) as metric from TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES ) + $$; + +-- End of Assertion 025cce4dd4123c0f007908011a9c64d7 + +-- Start of Assertion 5c32eef47bd763fece7d21c7cbf6c659 + + CREATE or REPLACE DATA METRIC FUNCTION + test_db.datahub_dmfs.datahub__5c32eef47bd763fece7d21c7cbf6c659 (ARGT TABLE(col_date DATE)) + RETURNS NUMBER + COMMENT = 'Created via DataHub for assertion urn:li:assertion:5c32eef47bd763fece7d21c7cbf6c659 of type volume' + AS + $$ + select case when metric <= 1000 then 1 else 0 end from (select count(*) as metric from TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES ) + $$; + +-- End of Assertion 5c32eef47bd763fece7d21c7cbf6c659 + +-- Start of Assertion 04be4145bd8de10bed3dfcb0cee57842 + + CREATE or REPLACE DATA METRIC FUNCTION + test_db.datahub_dmfs.datahub__04be4145bd8de10bed3dfcb0cee57842 (ARGT TABLE(col_date DATE)) + RETURNS NUMBER + COMMENT = 'Created via DataHub for assertion urn:li:assertion:04be4145bd8de10bed3dfcb0cee57842 of type field' + AS + $$ + select case when metric=0 then 1 else 0 end from (select $1 as metric from (select count(*) + from TEST_DB.PUBLIC.TEST_ASSERTIONS_ALL_TIMES where col_date is null)) + $$; + +-- End of Assertion 04be4145bd8de10bed3dfcb0cee57842 + +-- Start of Assertion b065942d2bca8a4dbe90cc3ec2d9ca9f + + CREATE or REPLACE DATA METRIC FUNCTION + test_db.datahub_dmfs.datahub__b065942d2bca8a4dbe90cc3ec2d9ca9f (ARGT TABLE(quantity FLOAT)) + RETURNS NUMBER + COMMENT = 'Created via DataHub for assertion urn:li:assertion:b065942d2bca8a4dbe90cc3ec2d9ca9f of type field' + AS + $$ + select case when metric <= 0 then 1 else 0 end from (select sum($1) as metric from (select case when quantity between 0 and 10 then 0 else 1 end + from TEST_DB.PUBLIC.PURCHASE_EVENT where quantity is not null)) + $$; + +-- End of Assertion b065942d2bca8a4dbe90cc3ec2d9ca9f + +-- Start of Assertion 170dbd53f28eedbbaba52ebbf189f6b1 + + CREATE or REPLACE DATA METRIC FUNCTION + test_db.datahub_dmfs.datahub__170dbd53f28eedbbaba52ebbf189f6b1 (ARGT TABLE(quantity FLOAT)) + RETURNS NUMBER + COMMENT = 'Created via DataHub for assertion urn:li:assertion:170dbd53f28eedbbaba52ebbf189f6b1 of type sql' + AS + $$ + select case when metric=5 then 1 else 0 end from (select $1 as metric from (select mode(quantity) from test_db.public.purchase_event)) + $$; + +-- End of Assertion 170dbd53f28eedbbaba52ebbf189f6b1 diff --git a/metadata-ingestion/tests/unit/cli/assertion/test_compile.py b/metadata-ingestion/tests/unit/cli/assertion/test_compile.py new file mode 100644 index 00000000000000..47253b5b0d71ea --- /dev/null +++ b/metadata-ingestion/tests/unit/cli/assertion/test_compile.py @@ -0,0 +1,42 @@ +import filecmp +import os + +from datahub.integrations.assertion.snowflake.compiler import ( + DMF_ASSOCIATIONS_FILE_NAME, + DMF_DEFINITIONS_FILE_NAME, +) +from tests.test_helpers.click_helpers import run_datahub_cmd + + +def test_compile_assertion_config_spec_for_snowflake(pytestconfig, tmp_path): + config_file = ( + pytestconfig.rootpath + / "tests/unit/api/entities/assertion/test_assertion_config.yml" + ).resolve() + + golden_file_path = pytestconfig.rootpath / "tests/unit/cli/assertion/" + run_datahub_cmd( + [ + "assertions", + "compile", + "-f", + f"{config_file}", + "-p", + "snowflake", + "-x", + "DMF_SCHEMA=test_db.datahub_dmfs", + "-o", + tmp_path, + ], + ) + + output_file_names = [ + DMF_DEFINITIONS_FILE_NAME, + DMF_ASSOCIATIONS_FILE_NAME, + ] + + for file_name in output_file_names: + assert os.path.exists(tmp_path / file_name) + assert filecmp.cmp( + golden_file_path / file_name, tmp_path / file_name + ), f"{file_name} is not as expected" diff --git a/metadata-ingestion/tests/unit/config/test_connection_resolver.py b/metadata-ingestion/tests/unit/config/test_connection_resolver.py new file mode 100644 index 00000000000000..592d145ac3c040 --- /dev/null +++ b/metadata-ingestion/tests/unit/config/test_connection_resolver.py @@ -0,0 +1,68 @@ +from unittest.mock import MagicMock + +import pydantic +import pytest + +from datahub.configuration.common import ConfigModel +from datahub.configuration.connection_resolver import auto_connection_resolver +from datahub.ingestion.api.global_context import set_graph_context + + +class MyConnectionType(ConfigModel): + username: str + password: str + + _connection = auto_connection_resolver() + + +def test_auto_connection_resolver(): + # Test a normal config. + config = MyConnectionType.parse_obj( + {"username": "test_user", "password": "test_password"} + ) + assert config.username == "test_user" + assert config.password == "test_password" + + # No graph context -> should raise an error. + with pytest.raises(pydantic.ValidationError, match=r"requires a .*graph"): + config = MyConnectionType.parse_obj( + { + "connection": "test_connection", + } + ) + + # Missing connection -> should raise an error. + fake_graph = MagicMock() + fake_graph.get_connection_json.return_value = None + with set_graph_context(fake_graph): + with pytest.raises(pydantic.ValidationError, match=r"not found"): + config = MyConnectionType.parse_obj( + { + "connection": "urn:li:dataHubConnection:missing-connection", + } + ) + + # Bad connection config -> should raise an error. + fake_graph.get_connection_json.return_value = {"bad_key": "bad_value"} + with set_graph_context(fake_graph): + with pytest.raises(pydantic.ValidationError): + config = MyConnectionType.parse_obj( + { + "connection": "urn:li:dataHubConnection:bad-connection", + } + ) + + # Good connection config. + fake_graph.get_connection_json.return_value = { + "username": "test_user", + "password": "test_password", + } + with set_graph_context(fake_graph): + config = MyConnectionType.parse_obj( + { + "connection": "urn:li:dataHubConnection:good-connection", + "username": "override_user", + } + ) + assert config.username == "override_user" + assert config.password == "test_password" diff --git a/metadata-ingestion/tests/unit/test_kafka_source.py b/metadata-ingestion/tests/unit/test_kafka_source.py index 5ad9ac45534aa1..b4e37d288a3041 100644 --- a/metadata-ingestion/tests/unit/test_kafka_source.py +++ b/metadata-ingestion/tests/unit/test_kafka_source.py @@ -334,7 +334,9 @@ def mock_get_latest_version(subject_name: str) -> Optional[RegisteredSchema]: mock_kafka_consumer.assert_called_once() mock_kafka_instance.list_topics.assert_called_once() - assert len(workunits) == 8 + # Along with with 4 topics(3 with schema and 1 schemaless) which constitutes to 8 workunits, + # there will be 6 schemas (1 key and 1 value schema for 3 topics) which constitutes to 12 workunits + assert len(workunits) == 20 i: int = -1 for wu in workunits: assert isinstance(wu, MetadataWorkUnit) @@ -343,6 +345,8 @@ def mock_get_latest_version(subject_name: str) -> Optional[RegisteredSchema]: mce: MetadataChangeEvent = wu.metadata i += 1 + # Only topic (named schema_less_topic) does not have schema metadata but other workunits (that are created + # for schema) will have corresponding SchemaMetadata aspect if i < len(topic_subject_schema_map.keys()): # First 3 workunits (topics) must have schemaMetadata aspect assert isinstance(mce.proposedSnapshot.aspects[1], SchemaMetadataClass) @@ -380,11 +384,18 @@ def mock_get_latest_version(subject_name: str) -> Optional[RegisteredSchema]: ) # Make sure we have 2 fields, one from the key schema & one from the value schema. assert len(schemaMetadataAspect.fields) == 2 - else: + elif i == len(topic_subject_schema_map.keys()): # Last topic('schema_less_topic') has no schema defined in the registry. # The schemaMetadata aspect should not be present for this. for aspect in mce.proposedSnapshot.aspects: assert not isinstance(aspect, SchemaMetadataClass) + else: + # Last 2 workunits (schemas) must have schemaMetadata aspect + assert isinstance(mce.proposedSnapshot.aspects[1], SchemaMetadataClass) + schemaMetadataAspectObj: SchemaMetadataClass = mce.proposedSnapshot.aspects[ + 1 + ] + assert isinstance(schemaMetadataAspectObj.platformSchema, KafkaSchemaClass) @pytest.mark.parametrize( @@ -465,7 +476,7 @@ def mock_get_latest_version(subject_name: str) -> Optional[RegisteredSchema]: workunits = list(kafka_source.get_workunits()) - assert len(workunits) == 2 + assert len(workunits) == 6 if ignore_warnings_on_schema_type: assert not kafka_source.report.warnings else: @@ -643,8 +654,10 @@ def mock_get_latest_version(subject_name: str) -> Optional[RegisteredSchema]: }, ctx, ) + # Along with with 1 topics(and 5 meta mapping) it constitutes to 6 workunits, + # there will be 2 schemas which constitutes to 4 workunits (1 mce and 1 mcp each) workunits = [w for w in kafka_source.get_workunits()] - assert len(workunits) == 6 + assert len(workunits) == 10 mce = workunits[0].metadata assert isinstance(mce, MetadataChangeEvent) @@ -677,11 +690,49 @@ def mock_get_latest_version(subject_name: str) -> Optional[RegisteredSchema]: "urn:li:glossaryTerm:double_meta_property", ] ) - assert isinstance(workunits[2].metadata, MetadataChangeProposalWrapper) + assert isinstance(workunits[1].metadata, MetadataChangeProposalWrapper) + mce = workunits[2].metadata + assert isinstance(mce, MetadataChangeEvent) assert isinstance(workunits[3].metadata, MetadataChangeProposalWrapper) - assert isinstance(workunits[4].metadata, MetadataChangeProposalWrapper) + + mce = workunits[4].metadata + assert isinstance(mce, MetadataChangeEvent) + ownership_aspect = [ + asp for asp in mce.proposedSnapshot.aspects if isinstance(asp, OwnershipClass) + ][0] + assert ownership_aspect == make_ownership_aspect_from_urn_list( + [ + make_owner_urn("charles", OwnerType.USER), + make_owner_urn("jdoe.last@gmail.com", OwnerType.USER), + ], + "SERVICE", + ) + + tags_aspect = [ + asp for asp in mce.proposedSnapshot.aspects if isinstance(asp, GlobalTagsClass) + ][0] + assert tags_aspect == make_global_tag_aspect_with_tag_list( + ["has_pii_test", "int_meta_property"] + ) + + terms_aspect = [ + asp + for asp in mce.proposedSnapshot.aspects + if isinstance(asp, GlossaryTermsClass) + ][0] + assert terms_aspect == make_glossary_terms_aspect_from_urn_list( + [ + "urn:li:glossaryTerm:Finance_test", + "urn:li:glossaryTerm:double_meta_property", + ] + ) + assert isinstance(workunits[5].metadata, MetadataChangeProposalWrapper) - assert workunits[2].metadata.aspectName == "glossaryTermKey" - assert workunits[3].metadata.aspectName == "glossaryTermKey" - assert workunits[4].metadata.aspectName == "tagKey" - assert workunits[5].metadata.aspectName == "tagKey" + assert isinstance(workunits[6].metadata, MetadataChangeProposalWrapper) + assert isinstance(workunits[7].metadata, MetadataChangeProposalWrapper) + assert isinstance(workunits[8].metadata, MetadataChangeProposalWrapper) + assert isinstance(workunits[9].metadata, MetadataChangeProposalWrapper) + assert workunits[6].metadata.aspectName == "glossaryTermKey" + assert workunits[7].metadata.aspectName == "glossaryTermKey" + assert workunits[8].metadata.aspectName == "tagKey" + assert workunits[9].metadata.aspectName == "tagKey" diff --git a/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/config/DatahubOpenlineageConfig.java b/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/config/DatahubOpenlineageConfig.java index b8d4d53511cd0b..5e4b791fa8d82d 100644 --- a/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/config/DatahubOpenlineageConfig.java +++ b/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/config/DatahubOpenlineageConfig.java @@ -27,7 +27,7 @@ public class DatahubOpenlineageConfig { @Builder.Default private final boolean captureColumnLevelLineage = true; @Builder.Default private final DataJobUrn parentJobUrn = null; // This is disabled until column level patch support won't be fixed in GMS - @Builder.Default private final boolean usePatch = false; + @Builder.Default private final boolean usePatch = true; public List getPathSpecsForPlatform(String platform) { if ((pathSpecs == null) || (pathSpecs.isEmpty())) { diff --git a/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/converter/OpenLineageToDataHub.java b/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/converter/OpenLineageToDataHub.java index 59cac8719c303a..1db09306cbdc22 100644 --- a/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/converter/OpenLineageToDataHub.java +++ b/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/converter/OpenLineageToDataHub.java @@ -423,6 +423,45 @@ private static StringMap generateCustomProperties( for (Map.Entry entry : event.getRun().getFacets().getAdditionalProperties().entrySet()) { switch (entry.getKey()) { + case "spark_jobDetails": + if (entry.getValue().getAdditionalProperties().get("jobId") != null) { + customProperties.put( + "jobId", + (String) entry.getValue().getAdditionalProperties().get("jobId").toString()); + } + if (entry.getValue().getAdditionalProperties().get("jobDescription") != null) { + customProperties.put( + "jobDescription", + (String) entry.getValue().getAdditionalProperties().get("jobDescription")); + } + if (entry.getValue().getAdditionalProperties().get("jobGroup") != null) { + customProperties.put( + "jobGroup", (String) entry.getValue().getAdditionalProperties().get("jobGroup")); + } + if (entry.getValue().getAdditionalProperties().get("jobCallSite") != null) { + customProperties.put( + "jobCallSite", + (String) entry.getValue().getAdditionalProperties().get("jobCallSite")); + } + case "processing_engine": + if (entry.getValue().getAdditionalProperties().get("processing-engine") != null) { + customProperties.put( + "processing-engine", + (String) entry.getValue().getAdditionalProperties().get("name")); + } + if (entry.getValue().getAdditionalProperties().get("processing-engine-version") != null) { + customProperties.put( + "processing-engine-version", + (String) entry.getValue().getAdditionalProperties().get("version")); + } + if (entry.getValue().getAdditionalProperties().get("openlineage-adapter-version") + != null) { + customProperties.put( + "openlineage-adapter-version", + (String) + entry.getValue().getAdditionalProperties().get("openlineageAdapterVersion")); + } + case "spark_version": { if (entry.getValue().getAdditionalProperties().get("spark-version") != null) { diff --git a/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/dataset/DatahubJob.java b/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/dataset/DatahubJob.java index 3682a42bb3571c..5f4a9b6a596e78 100644 --- a/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/dataset/DatahubJob.java +++ b/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/dataset/DatahubJob.java @@ -174,7 +174,7 @@ private void generateDataJobInputOutputMcp( List mcps) { DataJobInputOutput dataJobInputOutput = new DataJobInputOutput(); log.info("Adding DataJob edges to {}", jobUrn); - if (config.isUsePatch()) { + if (config.isUsePatch() && (!parentJobs.isEmpty() || !inSet.isEmpty() || !outSet.isEmpty())) { DataJobInputOutputPatchBuilder dataJobInputOutputPatchBuilder = new DataJobInputOutputPatchBuilder().urn(jobUrn); for (DatahubDataset dataset : inSet) { @@ -263,39 +263,38 @@ private Pair processDownstreams( if (dataset.getLineage() != null) { if (config.isUsePatch()) { - UpstreamLineagePatchBuilder upstreamLineagePatchBuilder = - new UpstreamLineagePatchBuilder().urn(dataset.getUrn()); - for (Upstream upstream : dataset.getLineage().getUpstreams()) { - upstreamLineagePatchBuilder.addUpstream(upstream.getDataset(), upstream.getType()); - } - - log.info("Adding FineGrainedLineage to {}", dataset.getUrn()); - for (FineGrainedLineage fineGrainedLineage : - Objects.requireNonNull(dataset.getLineage().getFineGrainedLineages())) { - for (Urn upstream : Objects.requireNonNull(fineGrainedLineage.getUpstreams())) { - upstreamLineagePatchBuilder.addFineGrainedUpstreamField( - upstream, - fineGrainedLineage.getConfidenceScore(), - StringUtils.defaultIfEmpty( - fineGrainedLineage.getTransformOperation(), "TRANSFORM"), - fineGrainedLineage.getUpstreamType()); + if (!dataset.getLineage().getUpstreams().isEmpty()) { + UpstreamLineagePatchBuilder upstreamLineagePatchBuilder = + new UpstreamLineagePatchBuilder().urn(dataset.getUrn()); + for (Upstream upstream : dataset.getLineage().getUpstreams()) { + upstreamLineagePatchBuilder.addUpstream( + upstream.getDataset(), upstream.getType()); } - for (Urn downstream : Objects.requireNonNull(fineGrainedLineage.getDownstreams())) { - upstreamLineagePatchBuilder.addFineGrainedDownstreamField( - downstream, - fineGrainedLineage.getConfidenceScore(), - StringUtils.defaultIfEmpty( - fineGrainedLineage.getTransformOperation(), "TRANSFORM"), - fineGrainedLineage.getDownstreamType()); + + log.info("Adding FineGrainedLineage to {}", dataset.getUrn()); + for (FineGrainedLineage fineGrainedLineage : + Objects.requireNonNull(dataset.getLineage().getFineGrainedLineages())) { + for (Urn upstream : Objects.requireNonNull(fineGrainedLineage.getUpstreams())) { + for (Urn downstream : + Objects.requireNonNull(fineGrainedLineage.getDownstreams())) { + upstreamLineagePatchBuilder.addFineGrainedUpstreamField( + downstream, + fineGrainedLineage.getConfidenceScore(), + StringUtils.defaultIfEmpty( + fineGrainedLineage.getTransformOperation(), "TRANSFORM"), + upstream, + null); + } + } } + MetadataChangeProposal mcp = upstreamLineagePatchBuilder.build(); + log.info( + "upstreamLineagePatch: {}", + mcp.getAspect().getValue().asString(Charset.defaultCharset())); + mcps.add(mcp); + } else { + addAspectToMcps(dataset.getUrn(), DATASET_ENTITY_TYPE, dataset.getLineage(), mcps); } - MetadataChangeProposal mcp = upstreamLineagePatchBuilder.build(); - log.info( - "upstreamLineagePatch: {}", - mcp.getAspect().getValue().asString(Charset.defaultCharset())); - mcps.add(mcp); - } else { - addAspectToMcps(dataset.getUrn(), DATASET_ENTITY_TYPE, dataset.getLineage(), mcps); } } }); @@ -361,7 +360,7 @@ private void generateFlowGlobalTagsAspect( DatahubOpenlineageConfig config, List mcps) { if (flowGlobalTags != null) { - if (config.isUsePatch()) { + if ((config.isUsePatch() && (!flowGlobalTags.getTags().isEmpty()))) { GlobalTagsPatchBuilder globalTagsPatchBuilder = new GlobalTagsPatchBuilder().urn(flowUrn); for (TagAssociation tag : flowGlobalTags.getTags()) { globalTagsPatchBuilder.addTag(tag.getTag(), null); diff --git a/metadata-integration/java/spark-lineage-beta/README.md b/metadata-integration/java/spark-lineage-beta/README.md index e09bc3938b6868..7b3598453498f7 100644 --- a/metadata-integration/java/spark-lineage-beta/README.md +++ b/metadata-integration/java/spark-lineage-beta/README.md @@ -24,7 +24,7 @@ When running jobs using spark-submit, the agent needs to be configured in the co ```text #Configuring DataHub spark agent jar -spark.jars.packages io.acryl:acryl-spark-lineage:0.2.3 +spark.jars.packages io.acryl:acryl-spark-lineage:0.2.6 spark.extraListeners datahub.spark.DatahubSparkListener spark.datahub.rest.server http://localhost:8080 ``` @@ -32,7 +32,7 @@ spark.datahub.rest.server http://localhost:8080 ## spark-submit command line ```sh -spark-submit --packages io.acryl:acryl-spark-lineage:0.2.3 --conf "spark.extraListeners=datahub.spark.DatahubSparkListener" my_spark_job_to_run.py +spark-submit --packages io.acryl:acryl-spark-lineage:0.2.6 --conf "spark.extraListeners=datahub.spark.DatahubSparkListener" my_spark_job_to_run.py ``` ### Configuration Instructions: Amazon EMR @@ -41,7 +41,7 @@ Set the following spark-defaults configuration properties as it stated [here](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-spark-configure.html) ```text -spark.jars.packages io.acryl:acryl-spark-lineage:0.2.3 +spark.jars.packages io.acryl:acryl-spark-lineage:0.2.6 spark.extraListeners datahub.spark.DatahubSparkListener spark.datahub.rest.server https://your_datahub_host/gms #If you have authentication set up then you also need to specify the Datahub access token @@ -56,7 +56,7 @@ When running interactive jobs from a notebook, the listener can be configured wh spark = SparkSession.builder .master("spark://spark-master:7077") .appName("test-application") -.config("spark.jars.packages", "io.acryl:acryl-spark-lineage:0.2.3") +.config("spark.jars.packages", "io.acryl:acryl-spark-lineage:0.2.6") .config("spark.extraListeners", "datahub.spark.DatahubSparkListener") .config("spark.datahub.rest.server", "http://localhost:8080") .enableHiveSupport() @@ -79,7 +79,7 @@ appName("test-application") config("spark.master","spark://spark-master:7077") . -config("spark.jars.packages","io.acryl:acryl-spark-lineage:0.2.3") +config("spark.jars.packages","io.acryl:acryl-spark-lineage:0.2.6") . config("spark.extraListeners","datahub.spark.DatahubSparkListener") @@ -181,8 +181,8 @@ information like tokens. | spark.datahub.partition_regexp_pattern | | | Strip partition part from the path if path end matches with the specified regexp. Example `year=.*/month=.*/day=.*` | | spark.datahub.tags | | | Comma separated list of tags to attach to the DataFlow | | spark.datahub.domains | | | Comma separated list of domain urns to attach to the DataFlow | -| spark.datahub.stage_metadata_coalescing | | | Normally it coalesce and send metadata at the onApplicationEnd event which is never called on Databricsk. You should enable this on Databricks if you want coalesced run . | -| spark.datahub.patch.enabled | | false | Set this to true to send lineage as a patch, which appends rather than overwrites existing Dataset lineage edges. By default it is enabled. +| spark.datahub.stage_metadata_coalescing | | | Normally it coalesce and send metadata at the onApplicationEnd event which is never called on Databricks. You should enable this on Databricks if you want coalesced run . | +| spark.datahub.patch.enabled | | true | Set this to true to send lineage as a patch, which appends rather than overwrites existing Dataset lineage edges. By default it is enabled. | ## What to Expect: The Metadata Model diff --git a/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/DatahubSparkListener.java b/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/DatahubSparkListener.java index 38de142c4dd171..54bb3821eddedf 100644 --- a/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/DatahubSparkListener.java +++ b/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/DatahubSparkListener.java @@ -87,11 +87,10 @@ private static SparkAppContext getSparkAppContext( public void onApplicationStart(SparkListenerApplicationStart applicationStart) { long startTime = System.currentTimeMillis(); - initializeContextFactoryIfNotInitialized(); log.info("Application start called"); this.appContext = getSparkAppContext(applicationStart); - + initializeContextFactoryIfNotInitialized(); listener.onApplicationStart(applicationStart); long elapsedTime = System.currentTimeMillis() - startTime; log.info("onApplicationStart completed successfully in {} ms", elapsedTime); @@ -155,7 +154,8 @@ public Optional initializeEmitter(Config sparkConf) { return Optional.empty(); } - private synchronized void loadDatahubConfig(SparkAppContext appContext, Properties properties) { + private synchronized SparkLineageConf loadDatahubConfig( + SparkAppContext appContext, Properties properties) { long startTime = System.currentTimeMillis(); datahubConf = parseSparkConfig(); SparkEnv sparkEnv = SparkEnv$.MODULE$.get(); @@ -169,14 +169,15 @@ private synchronized void loadDatahubConfig(SparkAppContext appContext, Properti Optional> databricksTags = getDatabricksTags(datahubConf); this.appContext.setDatabricksTags(databricksTags.orElse(null)); } + log.info("Datahub configuration: {}", datahubConf.root().render()); Optional restEmitter = initializeEmitter(datahubConf); SparkLineageConf sparkLineageConf = SparkLineageConf.toSparkLineageConf(datahubConf, appContext, restEmitter.orElse(null)); - emitter.setConfig(sparkLineageConf); long elapsedTime = System.currentTimeMillis() - startTime; log.debug("loadDatahubConfig completed successfully in {} ms", elapsedTime); + return sparkLineageConf; } public void onApplicationEnd(SparkListenerApplicationEnd applicationEnd) { @@ -220,7 +221,6 @@ public void onJobStart(SparkListenerJobStart jobStart) { initializeContextFactoryIfNotInitialized(); log.debug("Job start called"); - loadDatahubConfig(this.appContext, jobStart.properties()); listener.onJobStart(jobStart); long elapsedTime = System.currentTimeMillis() - startTime; log.debug("onJobStart completed successfully in {} ms", elapsedTime); @@ -333,10 +333,12 @@ private void initializeContextFactoryIfNotInitialized(SparkConf sparkConf, Strin return; } try { + SparkLineageConf datahubConfig = loadDatahubConfig(appContext, null); SparkOpenLineageConfig config = ArgumentParser.parse(sparkConf); // Needs to be done before initializing OpenLineageClient initializeMetrics(config); emitter = new DatahubEventEmitter(config, appName); + emitter.setConfig(datahubConfig); contextFactory = new ContextFactory(emitter, meterRegistry, config); circuitBreaker = new CircuitBreakerFactory(config.getCircuitBreaker()).build(); OpenLineageSparkListener.init(contextFactory); diff --git a/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/conf/SparkConfigParser.java b/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/conf/SparkConfigParser.java index f1af56ff888d3c..d8da5d95935c9a 100644 --- a/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/conf/SparkConfigParser.java +++ b/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/conf/SparkConfigParser.java @@ -307,7 +307,7 @@ public static boolean isCoalesceEnabled(Config datahubConfig) { public static boolean isPatchEnabled(Config datahubConfig) { if (!datahubConfig.hasPath(PATCH_ENABLED)) { - return false; + return true; } return datahubConfig.hasPath(PATCH_ENABLED) && datahubConfig.getBoolean(PATCH_ENABLED); } diff --git a/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/util/RemovePathPatternUtils.java b/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/util/RemovePathPatternUtils.java index a606a44ddd5160..c44dacf8ff3bea 100644 --- a/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/util/RemovePathPatternUtils.java +++ b/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/util/RemovePathPatternUtils.java @@ -22,7 +22,7 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.sql.SparkSession; diff --git a/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark3/agent/lifecycle/plan/catalog/IcebergHandler.java b/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark3/agent/lifecycle/plan/catalog/IcebergHandler.java deleted file mode 100644 index dcd1cf3fb3aff7..00000000000000 --- a/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark3/agent/lifecycle/plan/catalog/IcebergHandler.java +++ /dev/null @@ -1,192 +0,0 @@ -/* -/* Copyright 2018-2023 contributors to the OpenLineage project -/* SPDX-License-Identifier: Apache-2.0 -*/ - -package io.openlineage.spark3.agent.lifecycle.plan.catalog; - -import io.openlineage.client.OpenLineage; -import io.openlineage.client.utils.DatasetIdentifier; -import io.openlineage.spark.agent.util.PathUtils; -import io.openlineage.spark.agent.util.ScalaConversionUtils; -import io.openlineage.spark.agent.util.SparkConfUtils; -import io.openlineage.spark.api.OpenLineageContext; -import java.io.File; -import java.net.URI; -import java.util.Map; -import java.util.Optional; -import java.util.stream.Collectors; -import javax.annotation.Nullable; -import lombok.SneakyThrows; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.fs.Path; -import org.apache.iceberg.CatalogProperties; -import org.apache.iceberg.spark.SparkCatalog; -import org.apache.iceberg.spark.SparkSessionCatalog; -import org.apache.iceberg.spark.source.SparkTable; -import org.apache.spark.sql.SparkSession; -import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; -import org.apache.spark.sql.connector.catalog.Identifier; -import org.apache.spark.sql.connector.catalog.TableCatalog; - -@Slf4j -public class IcebergHandler implements CatalogHandler { - - private final OpenLineageContext context; - - private static final String TYPE = "type"; - private static final String CATALOG_IMPL = "catalog-impl"; - private static final String IO_IMPL = "io-impl"; - - public IcebergHandler(OpenLineageContext context) { - this.context = context; - } - - @Override - public boolean hasClasses() { - try { - IcebergHandler.class.getClassLoader().loadClass("org.apache.iceberg.catalog.Catalog"); - return true; - } catch (Exception e) { - // swallow- we don't care - } - return false; - } - - @Override - public boolean isClass(TableCatalog tableCatalog) { - return (tableCatalog instanceof SparkCatalog) || (tableCatalog instanceof SparkSessionCatalog); - } - - @Override - public DatasetIdentifier getDatasetIdentifier( - SparkSession session, - TableCatalog tableCatalog, - Identifier identifier, - Map properties) { - String catalogName = tableCatalog.name(); - - String prefix = String.format("spark.sql.catalog.%s", catalogName); - Map conf = - ScalaConversionUtils.fromMap(session.conf().getAll()); - log.info(conf.toString()); - Map catalogConf = - conf.entrySet().stream() - .filter(x -> x.getKey().startsWith(prefix)) - .filter(x -> x.getKey().length() > prefix.length()) - .collect( - Collectors.toMap( - x -> x.getKey().substring(prefix.length() + 1), // handle dot after prefix - Map.Entry::getValue)); - - log.info(catalogConf.toString()); - if (catalogConf.isEmpty() - || (!catalogConf.containsKey(TYPE) - && !catalogConf.get(CATALOG_IMPL).equals("org.apache.iceberg.aws.glue.GlueCatalog"))) { - throw new UnsupportedCatalogException(catalogName); - } - log.info(catalogConf.get(TYPE)); - - String warehouse = catalogConf.get(CatalogProperties.WAREHOUSE_LOCATION); - DatasetIdentifier di; - - if (catalogConf.get(CATALOG_IMPL).equals("org.apache.iceberg.aws.glue.GlueCatalog")) { - di = new DatasetIdentifier(identifier.toString(), "glue"); - log.info("Glue catalog detected, returning glue dataset identifier {}", di); - return di; - } else { - di = PathUtils.fromPath(new Path(warehouse, identifier.toString())); - } - if (catalogConf.get(TYPE).equals("hive")) { - di.withSymlink( - getHiveIdentifier( - session, catalogConf.get(CatalogProperties.URI), identifier.toString())); - } else if (catalogConf.get(TYPE).equals("hadoop")) { - di.withSymlink( - identifier.toString(), - StringUtils.substringBeforeLast( - di.getName(), File.separator), // parent location from a name becomes a namespace - DatasetIdentifier.SymlinkType.TABLE); - } else if (catalogConf.get(TYPE).equals("rest")) { - di.withSymlink( - getRestIdentifier( - session, catalogConf.get(CatalogProperties.URI), identifier.toString())); - } else if (catalogConf.get(TYPE).equals("nessie")) { - di.withSymlink( - getNessieIdentifier( - session, catalogConf.get(CatalogProperties.URI), identifier.toString())); - } - - return di; - } - - @SneakyThrows - private DatasetIdentifier.Symlink getNessieIdentifier( - SparkSession session, @Nullable String confUri, String table) { - - String uri = new URI(confUri).toString(); - return new DatasetIdentifier.Symlink(table, uri, DatasetIdentifier.SymlinkType.TABLE); - } - - @SneakyThrows - private DatasetIdentifier.Symlink getHiveIdentifier( - SparkSession session, @Nullable String confUri, String table) { - String slashPrefixedTable = String.format("/%s", table); - URI uri; - if (confUri == null) { - uri = - SparkConfUtils.getMetastoreUri(session.sparkContext().conf()) - .orElseThrow(() -> new UnsupportedCatalogException("hive")); - } else { - uri = new URI(confUri); - } - DatasetIdentifier metastoreIdentifier = - PathUtils.fromPath( - new Path(PathUtils.enrichHiveMetastoreURIWithTableName(uri, slashPrefixedTable))); - - return new DatasetIdentifier.Symlink( - metastoreIdentifier.getName(), - metastoreIdentifier.getNamespace(), - DatasetIdentifier.SymlinkType.TABLE); - } - - @SneakyThrows - private DatasetIdentifier.Symlink getRestIdentifier( - SparkSession session, @Nullable String confUri, String table) { - - String uri = new URI(confUri).toString(); - return new DatasetIdentifier.Symlink(table, uri, DatasetIdentifier.SymlinkType.TABLE); - } - - @Override - public Optional getStorageDatasetFacet( - Map properties) { - String format = properties.getOrDefault("format", ""); - return Optional.of( - context.getOpenLineage().newStorageDatasetFacet("iceberg", format.replace("iceberg/", ""))); - } - - @SneakyThrows - @Override - public Optional getDatasetVersion( - TableCatalog tableCatalog, Identifier identifier, Map properties) { - SparkTable table; - try { - table = (SparkTable) tableCatalog.loadTable(identifier); - } catch (NoSuchTableException | ClassCastException e) { - log.error("Failed to load table from catalog: {}", identifier, e); - return Optional.empty(); - } - - if (table.table() != null && table.table().currentSnapshot() != null) { - return Optional.of(Long.toString(table.table().currentSnapshot().snapshotId())); - } - return Optional.empty(); - } - - @Override - public String getName() { - return "iceberg"; - } -} diff --git a/metadata-integration/java/spark-lineage/README.md b/metadata-integration/java/spark-lineage/README.md index f56cb14a1ae546..041408aac6d6d3 100644 --- a/metadata-integration/java/spark-lineage/README.md +++ b/metadata-integration/java/spark-lineage/README.md @@ -1,4 +1,10 @@ -# Spark +# Spark (Legacy) + +:::note + +This is our legacy Spark Integration which is replaced by [Acryl Spark Lineage](https://datahubproject.io/docs/metadata-integration/java/spark-lineage-beta) + +::: To integrate Spark with DataHub, we provide a lightweight Java agent that listens for Spark application and job events and pushes metadata out to DataHub in real-time. The agent listens to events such application start/end, and SQLExecution start/end to create pipelines (i.e. DataJob) and tasks (i.e. DataFlow) in Datahub along with lineage to datasets that are being read from and written to. Read on to learn how to configure this for different Spark scenarios. diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java index a2c8070ea21a3e..70b30f27553c7a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java @@ -105,46 +105,26 @@ public void addEdge(@Nonnull final Edge edge) { // or indirect pattern match String endUrn = destinationUrn; String startUrn = sourceUrn; - String endType = destinationType; - String startType = sourceType; // Extra relationship typename start with r_ for // direct-outgoing-downstream/indirect-incoming-upstream relationships String reverseRelationshipType = "r_" + edge.getRelationshipType(); + final String createOrFindSourceNode = + String.format("MERGE (source:%s {urn: '%s'})", sourceType, sourceUrn); + final String createOrFindDestinationNode = + String.format("MERGE (destination:%s {urn: '%s'})", destinationType, destinationUrn); + final String createSourceToDestinationRelationShip = + String.format("MERGE (source)-[:%s]->(destination)", edge.getRelationshipType()); + String createReverseRelationShip = + String.format("MERGE (source)-[r:%s]->(destination)", reverseRelationshipType); + if (isSourceDestReversed(sourceType, edge.getRelationshipType())) { endUrn = sourceUrn; - endType = sourceType; startUrn = destinationUrn; - startType = destinationType; + createReverseRelationShip = + String.format("MERGE (destination)-[r:%s]->(source)", reverseRelationshipType); } - final List statements = new ArrayList<>(); - - // Add/Update source & destination node first - statements.add(getOrInsertNode(edge.getSource())); - statements.add(getOrInsertNode(edge.getDestination())); - - // Add/Update relationship - final String mergeRelationshipTemplate = - "MATCH (source:%s {urn: '%s'}),(destination:%s {urn: '%s'}) MERGE (source)-[r:%s]->(destination) "; - String statement = - String.format( - mergeRelationshipTemplate, - sourceType, - sourceUrn, - destinationType, - destinationUrn, - edge.getRelationshipType()); - - String statementR = - String.format( - mergeRelationshipTemplate, - startType, - startUrn, - endType, - endUrn, - reverseRelationshipType); - // Add/Update relationship properties String setCreatedOnTemplate; String setcreatedActorTemplate; @@ -193,12 +173,22 @@ public void addEdge(@Nonnull final Edge edge) { final String setStartEndUrnTemplate = String.format("r.startUrn = '%s', r.endUrn = '%s'", startUrn, endUrn); propertiesTemplateJoiner.add(setStartEndUrnTemplate); + + StringBuilder finalStatement = new StringBuilder(); + finalStatement + .append(createOrFindSourceNode) + .append(" ") + .append(createOrFindDestinationNode) + .append(" ") + .append(createSourceToDestinationRelationShip) + .append(" ") + .append(createReverseRelationShip) + .append(" "); if (!StringUtils.isEmpty(propertiesTemplateJoiner.toString())) { - statementR = String.format("%s SET %s", statementR, propertiesTemplateJoiner); + finalStatement.append("SET ").append(propertiesTemplateJoiner); } - - statements.add(buildStatement(statement, new HashMap<>())); - statements.add(buildStatement(statementR, new HashMap<>())); + final List statements = new ArrayList<>(); + statements.add(buildStatement(finalStatement.toString(), new HashMap<>())); executeStatements(statements); } @@ -387,20 +377,24 @@ private Pair> generateLineageStatementAndParameters( graphFilters.getAllowedEntityTypes(), direction), "maxHops", maxHops)); + final String entityType = entityUrn.getEntityType(); + if (lineageFlags == null || (lineageFlags.getStartTimeMillis() == null && lineageFlags.getEndTimeMillis() == null)) { // if no time filtering required, simply find all expansion paths to other nodes final var statement = - "MATCH (a {urn: $urn}) " - + "CALL apoc.path.spanningTree(a, { " - + " relationshipFilter: $relationshipFilter, " - + " labelFilter: $labelFilter, " - + " minLevel: 1, " - + " maxLevel: $maxHops " - + "}) " - + "YIELD path " - + "WITH a, path AS path " - + "RETURN a, path, last(nodes(path));"; + String.format( + "MATCH (a:%s {urn: $urn}) " + + "CALL apoc.path.spanningTree(a, { " + + " relationshipFilter: $relationshipFilter, " + + " labelFilter: $labelFilter, " + + " minLevel: 1, " + + " maxLevel: $maxHops " + + "}) " + + "YIELD path " + + "WITH a, path AS path " + + "RETURN a, path, last(nodes(path));", + entityType); return Pair.of(statement, parameterMap); } else { // when needing time filtering, possibility on multiple paths between two @@ -423,26 +417,28 @@ private Pair> generateLineageStatementAndParameters( // exploration, not // after path exploration is done) final var statement = - "MATCH (a {urn: $urn}) " - + "CALL apoc.path.subgraphNodes(a, { " - + " relationshipFilter: $relationshipFilter, " - + " labelFilter: $labelFilter, " - + " minLevel: 1, " - + " maxLevel: $maxHops " - + "}) " - + "YIELD node AS b " - + "WITH a, b " - + "MATCH path = shortestPath((a)" - + relationshipPattern - + "(b)) " - + "WHERE a <> b " - + " AND ALL(rt IN relationships(path) WHERE " - + " (rt.source IS NOT NULL AND rt.source = 'UI') OR " - + " (rt.createdOn IS NULL AND rt.updatedOn IS NULL) OR " - + " ($startTimeMillis <= rt.createdOn <= $endTimeMillis OR " - + " $startTimeMillis <= rt.updatedOn <= $endTimeMillis) " - + " ) " - + "RETURN a, path, b;"; + String.format( + "MATCH (a:%s {urn: $urn}) " + + "CALL apoc.path.subgraphNodes(a, { " + + " relationshipFilter: $relationshipFilter, " + + " labelFilter: $labelFilter, " + + " minLevel: 1, " + + " maxLevel: $maxHops " + + "}) " + + "YIELD node AS b " + + "WITH a, b " + + "MATCH path = shortestPath((a)" + + relationshipPattern + + "(b)) " + + "WHERE a <> b " + + " AND ALL(rt IN relationships(path) WHERE " + + " (rt.source IS NOT NULL AND rt.source = 'UI') OR " + + " (rt.createdOn IS NULL AND rt.updatedOn IS NULL) OR " + + " ($startTimeMillis <= rt.createdOn <= $endTimeMillis OR " + + " $startTimeMillis <= rt.updatedOn <= $endTimeMillis) " + + " ) " + + "RETURN a, path, b;", + entityType); // provide dummy start/end time when not provided, so no need to // format clause differently if either of them is missing @@ -497,10 +493,19 @@ public RelatedEntitiesResult findRelatedEntities( matchTemplate = "MATCH (src %s)-[r%s %s]->(dest %s)%s"; } - final String returnNodes = - String.format( - "RETURN dest, type(r)"); // Return both related entity and the relationship type. - final String returnCount = "RETURN count(*)"; // For getting the total results. + String srcNodeLabel = StringUtils.EMPTY; + // Create a URN from the String. Only proceed if srcCriteria is not null or empty + if (StringUtils.isNotEmpty(srcCriteria)) { + final String urnValue = + sourceEntityFilter.getOr().get(0).getAnd().get(0).getValue().toString(); + try { + final Urn urn = Urn.createFromString(urnValue); + srcNodeLabel = urn.getEntityType(); + matchTemplate = matchTemplate.replace("(src ", "(src:%s "); + } catch (URISyntaxException e) { + log.error("Failed to parse URN: {} ", urnValue, e); + } + } String relationshipTypeFilter = ""; if (!relationshipTypes.isEmpty()) { @@ -510,17 +515,34 @@ public RelatedEntitiesResult findRelatedEntities( String whereClause = computeEntityTypeWhereClause(sourceTypes, destinationTypes); // Build Statement strings - String baseStatementString = - String.format( - matchTemplate, - srcCriteria, - relationshipTypeFilter, - edgeCriteria, - destCriteria, - whereClause); + String baseStatementString; + if (StringUtils.isNotEmpty(srcNodeLabel)) { + baseStatementString = + String.format( + matchTemplate, + srcNodeLabel, + srcCriteria, + relationshipTypeFilter, + edgeCriteria, + destCriteria, + whereClause); + } else { + baseStatementString = + String.format( + matchTemplate, + srcCriteria, + relationshipTypeFilter, + edgeCriteria, + destCriteria, + whereClause); + } log.info(baseStatementString); + final String returnNodes = + "RETURN dest, type(r)"; // Return both related entity and the relationship type. + final String returnCount = "RETURN count(*)"; // For getting the total results. + final String resultStatementString = String.format("%s %s SKIP $offset LIMIT $count", baseStatementString, returnNodes); final String countStatementString = String.format("%s %s", baseStatementString, returnCount); @@ -581,10 +603,11 @@ private String computeEntityTypeWhereClause( public void removeNode(@Nonnull final Urn urn) { log.debug(String.format("Removing Neo4j node with urn: %s", urn)); + final String srcNodeLabel = urn.getEntityType(); // also delete any relationship going to or from it - final String matchTemplate = "MATCH (node {urn: $urn}) DETACH DELETE node"; - final String statement = String.format(matchTemplate); + final String matchTemplate = "MATCH (node:%s {urn: $urn}) DETACH DELETE node"; + final String statement = String.format(matchTemplate, srcNodeLabel); final Map params = new HashMap<>(); params.put("urn", urn.toString()); @@ -615,19 +638,20 @@ public void removeEdgesFromNode( // also delete any relationship going to or from it final RelationshipDirection relationshipDirection = relationshipFilter.getDirection(); + final String srcNodeLabel = urn.getEntityType(); - String matchTemplate = "MATCH (src {urn: $urn})-[r%s]-(dest) RETURN type(r), dest, 2"; + String matchTemplate = "MATCH (src:%s {urn: $urn})-[r%s]-(dest) RETURN type(r), dest, 2"; if (relationshipDirection == RelationshipDirection.INCOMING) { - matchTemplate = "MATCH (src {urn: $urn})<-[r%s]-(dest) RETURN type(r), dest, 0"; + matchTemplate = "MATCH (src:%s {urn: $urn})<-[r%s]-(dest) RETURN type(r), dest, 0"; } else if (relationshipDirection == RelationshipDirection.OUTGOING) { - matchTemplate = "MATCH (src {urn: $urn})-[r%s]->(dest) RETURN type(r), dest, 1"; + matchTemplate = "MATCH (src:%s {urn: $urn})-[r%s]->(dest) RETURN type(r), dest, 1"; } String relationshipTypeFilter = ""; if (!relationshipTypes.isEmpty()) { relationshipTypeFilter = ":" + StringUtils.join(relationshipTypes, "|"); } - final String statement = String.format(matchTemplate, relationshipTypeFilter); + final String statement = String.format(matchTemplate, srcNodeLabel, relationshipTypeFilter); final Map params = new HashMap<>(); params.put("urn", urn.toString()); @@ -636,7 +660,7 @@ public void removeEdgesFromNode( if (!neo4jResult.isEmpty()) { String removeMode = neo4jResult.get(0).values().get(2).toString(); if (removeMode.equals("2")) { - final String matchDeleteTemplate = "MATCH (src {urn: $urn})-[r%s]-(dest) DELETE r"; + final String matchDeleteTemplate = "MATCH (src:%s {urn: $urn})-[r%s]-(dest) DELETE r"; relationshipTypeFilter = ""; if (!relationshipTypes.isEmpty()) { relationshipTypeFilter = @@ -646,7 +670,7 @@ public void removeEdgesFromNode( + StringUtils.join(relationshipTypes, "|r_"); } final String statementNoDirection = - String.format(matchDeleteTemplate, relationshipTypeFilter); + String.format(matchDeleteTemplate, srcNodeLabel, relationshipTypeFilter); runQuery(buildStatement(statementNoDirection, params)).consume(); } else { for (Record typeDest : neo4jResult) { @@ -720,33 +744,32 @@ private static final class ExecutionResult { * * @param statements List of statements with parameters to be executed in order */ - private synchronized ExecutionResult executeStatements(@Nonnull List statements) { - int retry = 0; + private ExecutionResult executeStatements(@Nonnull List statements) { final StopWatch stopWatch = new StopWatch(); stopWatch.start(); - Exception lastException; + int retry = 0; try (final Session session = _driver.session(_sessionConfig)) { - do { + for (retry = 0; retry <= MAX_TRANSACTION_RETRY; retry++) { try { - session.writeTransaction( + session.executeWrite( tx -> { for (Statement statement : statements) { tx.run(statement.getCommandText(), statement.getParams()); } - return 0; + return null; }); - lastException = null; break; } catch (Neo4jException e) { - lastException = e; + log.warn("Failed to execute Neo4j write transaction. Retry count: {}", retry, e); + if (retry == MAX_TRANSACTION_RETRY) { + throw new RetryLimitReached( + "Failed to execute Neo4j write transaction after " + + MAX_TRANSACTION_RETRY + + " retries", + e); + } } - } while (++retry <= MAX_TRANSACTION_RETRY); - } - - if (lastException != null) { - throw new RetryLimitReached( - "Failed to execute Neo4j write transaction after " + MAX_TRANSACTION_RETRY + " retries", - lastException); + } } stopWatch.stop(); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java index 3768fbdb018846..37a7e5adde2dcb 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java @@ -37,10 +37,7 @@ import lombok.extern.slf4j.Slf4j; import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; -import org.opensearch.index.query.BoolQueryBuilder; -import org.opensearch.index.query.MultiMatchQueryBuilder; -import org.opensearch.index.query.QueryBuilder; -import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.*; import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; import org.opensearch.search.SearchHit; import org.opensearch.search.builder.SearchSourceBuilder; @@ -219,10 +216,9 @@ private static BoolQueryBuilder defaultQuery( autocompleteQueryBuilder.field(fieldName + ".ngram._3gram"); autocompleteQueryBuilder.field(fieldName + ".ngram._4gram"); } - + autocompleteQueryBuilder.field(fieldName + ".delimited"); finalQuery.should(QueryBuilders.matchPhrasePrefixQuery(fieldName + ".delimited", query)); }); - finalQuery.should(autocompleteQueryBuilder); return finalQuery; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java index 33195d4ea807d5..9262d17ccde92f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java @@ -181,6 +181,13 @@ public Set getStandardFields( return fields; } + /** + * Return query by default fields + * + * @param entityRegistry entity registry with search annotations + * @param entitySpec the entity spect + * @return set of queryByDefault field configurations + */ @VisibleForTesting public Set getFieldsFromEntitySpec( @Nonnull EntityRegistry entityRegistry, EntitySpec entitySpec) { @@ -212,14 +219,20 @@ public Set getFieldsFromEntitySpec( List searchableRefFieldSpecs = entitySpec.getSearchableRefFieldSpecs(); for (SearchableRefFieldSpec refFieldSpec : searchableRefFieldSpecs) { + if (!refFieldSpec.getSearchableRefAnnotation().isQueryByDefault()) { + continue; + } + int depth = refFieldSpec.getSearchableRefAnnotation().getDepth(); - Set searchFieldConfig = - SearchFieldConfig.detectSubFieldType(refFieldSpec, depth, entityRegistry); - fields.addAll(searchFieldConfig); + Set searchFieldConfigs = + SearchFieldConfig.detectSubFieldType(refFieldSpec, depth, entityRegistry).stream() + .filter(SearchFieldConfig::isQueryByDefault) + .collect(Collectors.toSet()); + fields.addAll(searchFieldConfigs); Map fieldTypeMap = getAllFieldTypeFromSearchableRef(refFieldSpec, depth, entityRegistry, ""); - for (SearchFieldConfig fieldConfig : searchFieldConfig) { + for (SearchFieldConfig fieldConfig : searchFieldConfigs) { if (fieldConfig.hasDelimitedSubfield()) { fields.add( SearchFieldConfig.detectSubFieldType( diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java index dbb5bdb0b7d01d..69cecba6bff9ac 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java @@ -541,4 +541,16 @@ public void testGetStandardFields() { .map(SearchFieldConfig::boost), Optional.of(2.0F)); } + + @Test + public void testStandardFieldsQueryByDefault() { + assertTrue( + TEST_BUILDER + .getStandardFields( + opContext.getEntityRegistry(), + opContext.getEntityRegistry().getEntitySpecs().values()) + .stream() + .allMatch(SearchFieldConfig::isQueryByDefault), + "Expect all search fields to be queryByDefault."); + } } diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java index e30fea976fbecc..a0e304b26ea60f 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java @@ -60,7 +60,8 @@ public UpdateIndicesHook init(@javax.annotation.Nonnull OperationContext systemO public void invoke(@Nonnull final MetadataChangeLog event) { if (event.getSystemMetadata() != null) { if (event.getSystemMetadata().getProperties() != null) { - if (UI_SOURCE.equals(event.getSystemMetadata().getProperties().get(APP_SOURCE)) + if (!Boolean.parseBoolean(event.getSystemMetadata().getProperties().get(FORCE_INDEXING_KEY)) + && UI_SOURCE.equals(event.getSystemMetadata().getProperties().get(APP_SOURCE)) && !reprocessUIEvents) { // If coming from the UI, we pre-process the Update Indices hook as a fast path to avoid // Kafka lag diff --git a/metadata-jobs/mce-consumer/build.gradle b/metadata-jobs/mce-consumer/build.gradle index b062547724138f..5ea24059a3ee33 100644 --- a/metadata-jobs/mce-consumer/build.gradle +++ b/metadata-jobs/mce-consumer/build.gradle @@ -22,6 +22,7 @@ dependencies { implementation project(':metadata-events:mxe-utils-avro') implementation project(':metadata-io') implementation project(':metadata-service:restli-client-api') + implementation project(':metadata-dao-impl:kafka-producer') implementation spec.product.pegasus.restliClient implementation spec.product.pegasus.restliCommon implementation externalDependency.elasticSearchRest diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl index 5b60aa18e87da9..65196a69ce3660 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl @@ -2,6 +2,7 @@ namespace com.linkedin.assertion import com.linkedin.common.CustomProperties import com.linkedin.common.ExternalReference +import com.linkedin.common.AuditStamp /** * Information about an assertion @@ -66,10 +67,15 @@ record AssertionInfo includes CustomProperties, ExternalReference { volumeAssertion: optional VolumeAssertionInfo /** - * A SQL Assertion definition. This field is populated when the type is SQL. + * A SQL Assertion definition. This field is populated when the type is SQL. */ sqlAssertion: optional SqlAssertionInfo + /** + * A Field Assertion definition. This field is populated when the type is FIELD. + */ + fieldAssertion: optional FieldAssertionInfo + /** * An schema Assertion definition. This field is populated when the type is DATA_SCHEMA */ @@ -83,6 +89,12 @@ record AssertionInfo includes CustomProperties, ExternalReference { */ source: optional AssertionSource + /** + * The time at which the assertion was last updated and the actor who updated it. + * This field is only present for Native assertions updated after this field was introduced. + */ + lastUpdated: optional AuditStamp + /** * An optional human-readable description of the assertion */ diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultError.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultError.pdl index e768fe8521942f..4bbfa20f8663ec 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultError.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultError.pdl @@ -33,6 +33,14 @@ record AssertionResultError { */ UNSUPPORTED_PLATFORM /** + * Error while executing a custom SQL assertion + */ + CUSTOM_SQL_ERROR + /** + * Error while executing a field assertion + */ + FIELD_ASSERTION_ERROR + /** * Unknown error */ UNKNOWN_ERROR @@ -42,4 +50,4 @@ record AssertionResultError { * Additional metadata depending on the type of error */ properties: optional map[string, string] -} \ No newline at end of file +} diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionSource.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionSource.pdl index d8892c0c71c6f6..734a48f7718863 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionSource.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionSource.pdl @@ -1,5 +1,7 @@ namespace com.linkedin.assertion +import com.linkedin.common.AuditStamp + /** * The source of an assertion */ @@ -24,4 +26,10 @@ record AssertionSource { */ INFERRED } + + /** + * The time at which the assertion was initially created and the author who created it. + * This field is only present for Native assertions created after this field was introduced. + */ + created: optional AuditStamp } \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdOperator.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdOperator.pdl index 2e0dcbe24986b8..ee4f9612490258 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdOperator.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdOperator.pdl @@ -34,6 +34,16 @@ enum AssertionStdOperator { */ EQUAL_TO + /** + * Value being asserted is not equal to value. Requires 'value' parameter. + */ + NOT_EQUAL_TO + + /** + * Value being asserted is null. Requires no parameters. + */ + NULL + /** * Value being asserted is not null. Requires no parameters. */ @@ -69,6 +79,16 @@ enum AssertionStdOperator { */ NOT_IN + /** + * Value being asserted is true. Requires no parameters. + */ + IS_TRUE + + /** + * Value being asserted is false. Requires no parameters. + */ + IS_FALSE + /** * Other */ diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdParameter.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdParameter.pdl index a212fe84aff13f..9c3e3ea7c1c958 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdParameter.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionStdParameter.pdl @@ -13,10 +13,29 @@ record AssertionStdParameter { * The type of the parameter */ type: enum AssertionStdParameterType { + /** + * A string value + */ STRING + + /** + * A numeric value + */ NUMBER + + /** + * A list of values. When used, value should be formatted as a serialized JSON array. + */ LIST + + /** + * A set of values. When used, value should be formatted as a serialized JSON array. + */ SET + + /** + * A value of unknown type + */ UNKNOWN } } \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldAssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldAssertionInfo.pdl new file mode 100644 index 00000000000000..0b8d9ab8cceb8f --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldAssertionInfo.pdl @@ -0,0 +1,57 @@ +namespace com.linkedin.assertion + +import com.linkedin.common.Urn +import com.linkedin.dataset.DatasetFilter + +/** +* Attributes defining a Field Assertion. +**/ +record FieldAssertionInfo { + /** + * The type of the field assertion being monitored. + */ + @Searchable = {} + type: enum FieldAssertionType { + /** + * An assertion used to validate the values contained with a field / column given a set of rows. + */ + FIELD_VALUES + /** + * An assertion used to validate the value of a common field / column metric (e.g. aggregation) such as null count + percentage, + * min, max, median, and more. + */ + FIELD_METRIC + } + + /** + * The entity targeted by this Field check. + */ + @Searchable = { + "fieldType": "URN" + } + @Relationship = { + "name": "Asserts", + "entityTypes": [ "dataset" ] + } + entity: Urn + + /** + * The definition of an assertion that validates individual values of a field / column for a set of rows. + * This type of assertion verifies that each column value meets a particular requirement. + */ + fieldValuesAssertion: optional FieldValuesAssertion + + /** + * The definition of an assertion that validates a common metric obtained about a field / column for a set of rows. + * This type of assertion verifies that the value of a high-level metric obtained by aggregating over a column meets + * expectations + */ + fieldMetricAssertion: optional FieldMetricAssertion + + /** + * A definition of the specific filters that should be applied, when performing monitoring. + * If not provided, there is no filter, and the full table is under consideration. + * If using DataHub Dataset Profiles as the assertion source type, the value of this field will be ignored. + */ + filter: optional DatasetFilter +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldMetricAssertion.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldMetricAssertion.pdl new file mode 100644 index 00000000000000..ca9ce9cbd6a8c3 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldMetricAssertion.pdl @@ -0,0 +1,39 @@ +namespace com.linkedin.assertion + +import com.linkedin.schema.SchemaFieldSpec + +/** +* Attributes defining a field metric assertion, which asserts an expectation against +* a common metric derived from the set of field / column values, for example: +* max, min, median, null count, null percentage, unique count, unique percentage, and more. +*/ +record FieldMetricAssertion { + /** + * The field under evaluation + */ + @Searchable = { + "/path": { + "fieldName": "fieldPath" + } + } + field: SchemaFieldSpec + + /** + * The specific metric to assert against. This is the value that + * will be obtained by applying a standard operation, such as an aggregation, + * to the selected field. + */ + metric: FieldMetricType + + /** + * The predicate to evaluate against the metric for the field / column. + * Depending on the operator, parameters may be required in order to successfully + * evaluate the assertion against the metric value. + */ + operator: AssertionStdOperator + + /** + * Standard parameters required for the assertion. e.g. min_value, max_value, value, columns + */ + parameters: optional AssertionStdParameters +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldMetricType.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldMetricType.pdl new file mode 100644 index 00000000000000..9df06e9dc1fe2c --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldMetricType.pdl @@ -0,0 +1,94 @@ +namespace com.linkedin.assertion + +/** + * A standard metric that can be derived from the set of values + * for a specific field / column of a dataset / table. + */ +enum FieldMetricType { + /** + * The number of unique values found in the column value set + */ + UNIQUE_COUNT + + /** + * The percentage of unique values to total rows for the dataset + */ + UNIQUE_PERCENTAGE + + /** + * The number of null values found in the column value set + */ + NULL_COUNT + + /** + * The percentage of null values to total rows for the dataset + */ + NULL_PERCENTAGE + + /** + * The minimum value in the column set (applies to numeric columns) + */ + MIN + + /** + * The maximum value in the column set (applies to numeric columns) + */ + MAX + + /** + * The mean length found in the column set (applies to numeric columns) + */ + MEAN + + /** + * The median length found in the column set (applies to numeric columns) + */ + MEDIAN + + /** + * The stddev length found in the column set (applies to numeric columns) + */ + STDDEV + + /** + * The number of negative values found in the value set (applies to numeric columns) + */ + NEGATIVE_COUNT + + /** + * The percentage of negative values to total rows for the dataset (applies to numeric columns) + */ + NEGATIVE_PERCENTAGE + + /** + * The number of zero values found in the value set (applies to numeric columns) + */ + ZERO_COUNT + + /** + * The percentage of zero values to total rows for the dataset (applies to numeric columns) + */ + ZERO_PERCENTAGE + + /** + * The minimum length found in the column set (applies to string columns) + */ + MIN_LENGTH + + /** + * The maximum length found in the column set (applies to string columns) + */ + MAX_LENGTH + + /** + * The number of empty string values found in the value set (applies to string columns). + * Note: This is a completely different metric different from NULL_COUNT! + */ + EMPTY_COUNT + + /** + * The percentage of empty string values to total rows for the dataset (applies to string columns) + * Note: This is a completely different metric different from NULL_PERCENTAGE! + */ + EMPTY_PERCENTAGE +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldTransform.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldTransform.pdl new file mode 100644 index 00000000000000..3b3d3339a9b864 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldTransform.pdl @@ -0,0 +1,21 @@ +namespace com.linkedin.assertion + +/** +* Definition of a transform applied to the values of a column / field. +* Note that the applicability of a field transform ultimately depends on the native type +* of the field / column. +* +* Model has single field to permit extension. +*/ +record FieldTransform { + /** + * The type of the field transform, e.g. the transformation + * function / operator to apply. + */ + type: enum FieldTransformType { + /** + * Obtain the length of a string field / column (applicable to string types) + */ + LENGTH + } +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldValuesAssertion.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldValuesAssertion.pdl new file mode 100644 index 00000000000000..0400124234462d --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FieldValuesAssertion.pdl @@ -0,0 +1,83 @@ +namespace com.linkedin.assertion + +import com.linkedin.schema.SchemaFieldSpec + +/** +* Attributes defining a field values assertion, which asserts that the values for a field / column +* of a dataset / table matches a set of expectations. +* +* In other words, this type of assertion acts as a semantic constraint applied to fields for a specific column. +* +* TODO: We should display the "failed row count" to the user if the column fails the verification rules. +* TODO: Determine whether we need an "operator" that can be applied to the field. +*/ +record FieldValuesAssertion { + /** + * The field under evaluation + */ + @Searchable = { + "/path": { + "fieldName": "fieldPath" + } + } + field: SchemaFieldSpec + + /** + * An optional transform to apply to field values + * before evaluating the operator. + * + * If none is applied, the field value will be compared as is. + */ + transform: optional FieldTransform + + /** + * The predicate to evaluate against a single value of the field. + * Depending on the operator, parameters may be required in order to successfully + * evaluate the assertion against the field value. + */ + operator: AssertionStdOperator + + /** + * Standard parameters required for the assertion. e.g. min_value, max_value, value, columns + */ + parameters: optional AssertionStdParameters + + /** + * Additional customization about when the assertion + * should be officially considered failing. + */ + failThreshold: record FieldValuesFailThreshold { + + /** + * The type of failure threshold. Either based on the number + * of column values (rows) that fail the expectations, or the percentage + * of the total rows under consideration. + */ + type: enum FieldValuesFailThresholdType { + /* + * The maximum number of column values (i.e. rows) that are allowed + * to fail the defined expectations before the assertion officially fails. + */ + COUNT + /* + * The maximum percentage of rows that are allowed + * to fail the defined column expectations before the assertion officially fails. + */ + PERCENTAGE + } = "COUNT" + + /** + * By default this is 0, meaning that ALL column values (i.e. rows) must + * meet the defined expectations. + */ + value: long = 0 + } + + /** + * Whether to ignore or allow nulls when running the values assertion. (i.e. + * consider only non-null values) using operators OTHER than the IS_NULL operator. + * + * Defaults to true, allowing null values. + */ + excludeNulls: boolean = true +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldSpec.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldSpec.pdl index 04acd1c71352de..179d4a1b135913 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldSpec.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/FreshnessFieldSpec.pdl @@ -4,11 +4,13 @@ import com.linkedin.schema.SchemaFieldSpec /** -* Lightweight spec used for referencing a particular schema field. -**/ +* Lightweight spec used for referencing a particular schema field that is used to compute +* a freshness signal or operation. +* TODO: Since this is now leveraged across assertions & metrics / operations, we should consider moving this to a common package. +*/ record FreshnessFieldSpec includes SchemaFieldSpec { /** - * The type of the field being used to verify the Freshness Assertion. + * The type of the field being used to verify the Freshness of the asset. */ kind: optional FreshnessFieldKind } \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/SchemaAssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/SchemaAssertionInfo.pdl index fd246e0c7cfc46..2e691d5152ae34 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/assertion/SchemaAssertionInfo.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/SchemaAssertionInfo.pdl @@ -25,5 +25,36 @@ record SchemaAssertionInfo { * Note that many of the fields of this model, especially those related to metadata (tags, terms) * will go unused in this context. */ - schema: SchemaMetadata +// @Relationship = { +// "/foreignKeys/*/foreignFields/*": null, +// "/foreignKeys/*/foreignDataset": null, +// "/fields/*/globalTags/tags/*/tag": null, +// "/fields/*/glossaryTerms/terms/*/urn": null +// } +// @Searchable = { +// "/fields/*/fieldPath": null, +// "/fields/*/description": null, +// "/fields/*/label": null, +// "/fields/*/globalTags/tags/*/tag": null, +// "/fields/*/glossaryTerms/terms/*/urn": null +// } + schema: SchemaMetadata + + /** + * The required compatibility level for the schema assertion to pass. + */ + compatibility: optional enum SchemaAssertionCompatibility { + /** + * The actual schema must be exactly the same as the expected schema + */ + EXACT_MATCH, + /** + * The actual schema must be a superset of the expected schema + */ + SUPERSET, + /** + * The actual schema must be a subset of the expected schema + */ + SUBSET + } = "EXACT_MATCH" } \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/VolumeAssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/VolumeAssertionInfo.pdl index 327b76f95762e3..bdc78d3bd0a6f2 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/assertion/VolumeAssertionInfo.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/VolumeAssertionInfo.pdl @@ -8,7 +8,7 @@ import com.linkedin.dataset.DatasetFilter */ record VolumeAssertionInfo { /** - * The type of the freshness assertion being monitored. + * The type of the volume assertion being monitored. */ @Searchable = {} type: enum VolumeAssertionType { diff --git a/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataQualityContract.pdl b/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataQualityContract.pdl index 273d2c2a56f95b..3ff8b58284f189 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataQualityContract.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/datacontract/DataQualityContract.pdl @@ -12,5 +12,9 @@ record DataQualityContract { * The assertion representing the Data Quality contract. * E.g. a table or column-level assertion. */ + @Relationship = { + "name": "IncludesDataQualityAssertion", + "entityTypes": [ "assertion" ] + } assertion: Urn } \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/datacontract/SchemaContract.pdl b/metadata-models/src/main/pegasus/com/linkedin/datacontract/SchemaContract.pdl index 6c11e0da5b1286..af61a660cdf768 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/datacontract/SchemaContract.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/datacontract/SchemaContract.pdl @@ -9,5 +9,9 @@ record SchemaContract { /** * The assertion representing the schema contract. */ + @Relationship = { + "name": "IncludesSchemaAssertion", + "entityTypes": [ "assertion" ] + } assertion: Urn } diff --git a/metadata-models/src/main/pegasus/com/linkedin/incident/IncidentSource.pdl b/metadata-models/src/main/pegasus/com/linkedin/incident/IncidentSource.pdl index 2f8912da5458c9..2e65d37dc09398 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/incident/IncidentSource.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/incident/IncidentSource.pdl @@ -22,6 +22,11 @@ record IncidentSource { * Manually created incident, via UI or API. */ MANUAL + + /** + * An assertion has failed, triggering the incident. + */ + ASSERTION_FAILURE } /** diff --git a/metadata-models/src/main/pegasus/com/linkedin/incident/IncidentType.pdl b/metadata-models/src/main/pegasus/com/linkedin/incident/IncidentType.pdl index 27c4790e3b6ef6..1c3473018d4e0a 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/incident/IncidentType.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/incident/IncidentType.pdl @@ -4,6 +4,36 @@ namespace com.linkedin.incident * A type of asset incident */ enum IncidentType { + /** + * An Freshness Assertion has failed, triggering the incident. + * Raised on entities where assertions are configured to generate incidents. + */ + FRESHNESS + + /** + * An Volume Assertion has failed, triggering the incident. + * Raised on entities where assertions are configured to generate incidents. + */ + VOLUME + + /** + * A Field Assertion has failed, triggering the incident. + * Raised on entities where assertions are configured to generate incidents. + */ + FIELD + + /** + * A raw SQL-statement based assertion has failed, triggering the incident. + * Raised on entities where assertions are configured to generate incidents. + */ + SQL + + /** + * A Data Schema assertion has failed, triggering the incident. + * Raised on entities where assertions are configured to generate incidents. + */ + DATA_SCHEMA + /** * A misc. operational incident, e.g. failure to materialize a dataset. */ diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index fe6063b3cefc63..1e8044e3b5f86e 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -614,3 +614,16 @@ plugins: supportedEntityAspectNames: - entityName: '*' aspectName: structuredProperties + - className: 'com.linkedin.metadata.aspect.hooks.FieldPathMutator' + enabled: true + supportedOperations: + - CREATE + - UPSERT + - UPDATE + - RESTATE + - PATCH + supportedEntityAspectNames: + - entityName: '*' + aspectName: 'schemaMetadata' + - entityName: '*' + aspectName: 'editableSchemaMetadata' diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/GraphQLConcurrencyConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/GraphQLConcurrencyConfiguration.java new file mode 100644 index 00000000000000..4728e6f8ac1038 --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/GraphQLConcurrencyConfiguration.java @@ -0,0 +1,12 @@ +package com.linkedin.metadata.config; + +import lombok.Data; + +@Data +public class GraphQLConcurrencyConfiguration { + boolean separateThreadPool; + long stackSize; + int corePoolSize; + int maxPoolSize; + int keepAlive; +} diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/GraphQLConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/GraphQLConfiguration.java index 93292a5edafeec..2b9905b48d6106 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/GraphQLConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/GraphQLConfiguration.java @@ -5,4 +5,5 @@ @Data public class GraphQLConfiguration { private GraphQLQueryConfiguration query; + private GraphQLConcurrencyConfiguration concurrency; } diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/MetadataChangeProposalConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/MetadataChangeProposalConfig.java new file mode 100644 index 00000000000000..3d3808bc5feb45 --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/MetadataChangeProposalConfig.java @@ -0,0 +1,30 @@ +package com.linkedin.metadata.config; + +import lombok.Data; +import lombok.experimental.Accessors; + +@Data +@Accessors(chain = true) +public class MetadataChangeProposalConfig { + + ThrottlesConfig throttle; + + @Data + @Accessors(chain = true) + public static class ThrottlesConfig { + Integer updateIntervalMs; + ThrottleConfig versioned; + ThrottleConfig timeseries; + } + + @Data + @Accessors(chain = true) + public static class ThrottleConfig { + boolean enabled; + Integer threshold; + Integer maxAttempts; + Integer initialIntervalMs; + Integer multiplier; + Integer maxIntervalMs; + } +} diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index 5991e413a43c8a..cae315e96e4d71 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -440,6 +440,12 @@ cache: dashboardUsageStatistics: 21600 # 6hrs graphQL: + concurrency: + separateThreadPool: ${GRAPHQL_CONCURRENCY_SEPARATE_THREAD_POOL:false} # Enable the separate thread pool, the following configurations only apply if enabled + stackSize: ${GRAPHQL_CONCURRENCY_STACK_SIZE:256000} # Default to JVM default of 256 KB + corePoolSize: ${GRAPHQL_CONCURRENCY_CORE_POOL_SIZE:-1} # Base thread pool size for GraphQL executor service, default 5 * # of cores + maxPoolSize: ${GRAPHQL_CONCURRENCY_MAX_POOL_SIZE:-1} # Maximum thread pool size for GraphQL executor service, default 100 * # of cores + keepAlive: ${GRAPHQL_CONCURRENCY_KEEP_ALIVE:60} # Number of seconds to keep inactive threads alive query: complexityLimit: ${GRAPHQL_QUERY_COMPLEXITY_LIMIT:2000} depthLimit: ${GRAPHQL_QUERY_DEPTH_LIMIT:50} @@ -454,3 +460,26 @@ forms: businessAttribute: fetchRelatedEntitiesCount: ${BUSINESS_ATTRIBUTE_RELATED_ENTITIES_COUNT:20000} fetchRelatedEntitiesBatchSize: ${BUSINESS_ATTRIBUTE_RELATED_ENTITIES_BATCH_SIZE:1000} + +metadataChangeProposal: + throttle: + updateIntervalMs: ${MCP_THROTTLE_UPDATE_INTERVAL_MS:60000} + + # Versioned MCL topic + versioned: + # Whether to throttle MCP processing based on MCL backlog + enabled: ${MCP_VERSIONED_THROTTLE_ENABLED:false} + threshold: ${MCP_VERSIONED_THRESHOLD:4000} # throttle threshold + maxAttempts: ${MCP_VERSIONED_MAX_ATTEMPTS:1000} + initialIntervalMs: ${MCP_VERSIONED_INITIAL_INTERVAL_MS:100} + multiplier: ${MCP_VERSIONED_MULTIPLIER:10} + maxIntervalMs: ${MCP_VERSIONED_MAX_INTERVAL_MS:30000} + # Timeseries MCL topic + timeseries: + # Whether to throttle MCP processing based on MCL backlog + enabled: ${MCP_TIMESERIES_THROTTLE_ENABLED:false} + threshold: ${MCP_TIMESERIES_THRESHOLD:4000} # throttle threshold + maxAttempts: ${MCP_TIMESERIES_MAX_ATTEMPTS:1000} + initialIntervalMs: ${MCP_TIMESERIES_INITIAL_INTERVAL_MS:100} + multiplier: ${MCP_TIMESERIES_MULTIPLIER:10} + maxIntervalMs: ${MCP_TIMESERIES_MAX_INTERVAL_MS:30000} \ No newline at end of file diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java index 9381e24fabab60..08adbd54730a74 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java @@ -7,6 +7,7 @@ import com.linkedin.metadata.config.EbeanConfiguration; import com.linkedin.metadata.config.GraphQLConfiguration; import com.linkedin.metadata.config.IngestionConfiguration; +import com.linkedin.metadata.config.MetadataChangeProposalConfig; import com.linkedin.metadata.config.SystemUpdateConfiguration; import com.linkedin.metadata.config.TestsConfiguration; import com.linkedin.metadata.config.ViewsConfiguration; @@ -80,4 +81,7 @@ public class ConfigurationProvider { /** GraphQL Configurations */ private GraphQLConfiguration graphQL; + + /** MCP throttling configuration */ + private MetadataChangeProposalConfig metadataChangeProposal; } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java index dffcb9df4d36e8..aa80fc62db09c4 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java @@ -10,6 +10,8 @@ import com.linkedin.datahub.graphql.GmsGraphQLEngineArgs; import com.linkedin.datahub.graphql.GraphQLEngine; import com.linkedin.datahub.graphql.analytics.service.AnalyticsService; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; +import com.linkedin.datahub.graphql.concurrency.GraphQLWorkerPoolThreadFactory; import com.linkedin.entity.client.EntityClient; import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.gms.factory.auth.DataHubTokenServiceFactory; @@ -21,6 +23,7 @@ import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.recommendation.RecommendationServiceFactory; import com.linkedin.metadata.client.UsageStatsJavaClient; +import com.linkedin.metadata.config.GraphQLConcurrencyConfiguration; import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphClient; @@ -43,11 +46,16 @@ import com.linkedin.metadata.version.GitVersion; import io.datahubproject.metadata.services.RestrictedService; import io.datahubproject.metadata.services.SecretService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.SynchronousQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; import org.opensearch.client.RestHighLevelClient; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; @@ -60,7 +68,7 @@ EntityRegistryFactory.class, DataHubTokenServiceFactory.class, GitVersionFactory.class, - SiblingGraphServiceFactory.class + SiblingGraphServiceFactory.class, }) public class GraphQLEngineFactory { @Autowired @@ -243,4 +251,34 @@ protected GraphQLEngine graphQLEngine( args.setConnectionService(_connectionService); return new GmsGraphQLEngine(args).builder().build(); } + + @Bean(name = "graphQLWorkerPool") + @ConditionalOnProperty("graphQL.concurrency.separateThreadPool") + protected ExecutorService graphQLWorkerPool() { + GraphQLConcurrencyConfiguration concurrencyConfig = + configProvider.getGraphQL().getConcurrency(); + GraphQLWorkerPoolThreadFactory threadFactory = + new GraphQLWorkerPoolThreadFactory(concurrencyConfig.getStackSize()); + int corePoolSize = + concurrencyConfig.getCorePoolSize() < 0 + ? Runtime.getRuntime().availableProcessors() * 5 + : concurrencyConfig.getCorePoolSize(); + int maxPoolSize = + concurrencyConfig.getMaxPoolSize() <= 0 + ? Runtime.getRuntime().availableProcessors() * 100 + : concurrencyConfig.getMaxPoolSize(); + + ThreadPoolExecutor graphQLWorkerPool = + new ThreadPoolExecutor( + corePoolSize, + maxPoolSize, + concurrencyConfig.getKeepAlive(), + TimeUnit.SECONDS, + new SynchronousQueue(), + threadFactory, + new ThreadPoolExecutor.CallerRunsPolicy()); + GraphQLConcurrencyUtils.setExecutorService(graphQLWorkerPool); + + return graphQLWorkerPool; + } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java index 5844dc4a8f72a2..6a2b9f511b79f8 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java @@ -44,7 +44,7 @@ public static Map buildProducerProperties( Arrays.asList(kafkaConfiguration.getBootstrapServers().split(","))); } // else we rely on KafkaProperties which defaults to localhost:9092 - Map props = properties.buildProducerProperties(); + Map props = properties.buildProducerProperties(null); props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, schemaRegistryConfig.getSerializer()); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java index d5210213185bea..9501b03482d045 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java @@ -73,7 +73,7 @@ private static Map buildCustomizedProperties( Arrays.asList(kafkaConfiguration.getBootstrapServers().split(","))); } // else we rely on KafkaProperties which defaults to localhost:9092 - Map customizedProperties = baseKafkaProperties.buildConsumerProperties(); + Map customizedProperties = baseKafkaProperties.buildConsumerProperties(null); customizedProperties.put( ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ErrorHandlingDeserializer.class); customizedProperties.put( diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java index 3a6c9770fd3623..0193ded97f81b5 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java @@ -44,7 +44,7 @@ protected KafkaListenerContainerFactory createInstance( Arrays.asList(kafkaConfiguration.getBootstrapServers().split(","))); } // else we rely on KafkaProperties which defaults to localhost:9092 - Map customizedProperties = properties.buildConsumerProperties(); + Map customizedProperties = properties.buildConsumerProperties(null); customizedProperties.put( ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG, kafkaConfiguration.getConsumer().getMaxPartitionFetchBytes()); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaProducerThrottleFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaProducerThrottleFactory.java new file mode 100644 index 00000000000000..1eaff82fd517f0 --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaProducerThrottleFactory.java @@ -0,0 +1,93 @@ +package com.linkedin.gms.factory.kafka.throttle; + +import com.datahub.metadata.dao.producer.KafkaProducerThrottle; +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.config.MetadataChangeProposalConfig; +import com.linkedin.metadata.config.kafka.KafkaConfiguration; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.mxe.Topics; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import lombok.extern.slf4j.Slf4j; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.AdminClientConfig; +import org.apache.kafka.clients.admin.KafkaAdminClient; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.autoconfigure.kafka.KafkaProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.kafka.config.KafkaListenerEndpointRegistry; +import org.springframework.kafka.listener.MessageListenerContainer; + +@Slf4j +@Configuration +public class KafkaProducerThrottleFactory { + + @Value("${METADATA_CHANGE_LOG_KAFKA_CONSUMER_GROUP_ID:generic-mae-consumer-job-client}") + private String maeConsumerGroupId; + + @Value("${METADATA_CHANGE_PROPOSAL_KAFKA_CONSUMER_GROUP_ID:generic-mce-consumer-job-client}") + private String mceConsumerGroupId; + + @Value("${METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_VERSIONED + "}") + private String versionedTopicName; + + @Value( + "${METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_TIMESERIES + "}") + private String timeseriesTopicName; + + @Bean + public KafkaProducerThrottle kafkaProducerThrottle( + @Qualifier("configurationProvider") ConfigurationProvider provider, + final KafkaProperties kafkaProperties, + final EntityRegistry entityRegistry, + final KafkaListenerEndpointRegistry registry) { + + KafkaConfiguration kafkaConfiguration = provider.getKafka(); + MetadataChangeProposalConfig mcpConfig = provider.getMetadataChangeProposal(); + + return KafkaProducerThrottle.builder() + .entityRegistry(entityRegistry) + .kafkaAdmin(kafkaAdmin(kafkaConfiguration, kafkaProperties)) + .config(mcpConfig.getThrottle()) + .mclConsumerGroupId(maeConsumerGroupId) + .timeseriesTopicName(timeseriesTopicName) + .versionedTopicName(versionedTopicName) + .pauseConsumer( + (pause) -> { + Optional container = + Optional.ofNullable(registry.getListenerContainer(mceConsumerGroupId)); + if (container.isEmpty()) { + log.warn( + "Expected container was missing: {} throttling is not possible.", + mceConsumerGroupId); + } else { + if (pause) { + container.ifPresent(MessageListenerContainer::pause); + } else { + container.ifPresent(MessageListenerContainer::resume); + } + } + }) + .build() + .start(); + } + + private static AdminClient kafkaAdmin( + KafkaConfiguration kafkaConfiguration, final KafkaProperties kafkaProperties) { + Map adminProperties = new HashMap<>(kafkaProperties.buildAdminProperties(null)); + + // KAFKA_BOOTSTRAP_SERVER has precedence over SPRING_KAFKA_BOOTSTRAP_SERVERS + if (kafkaConfiguration.getBootstrapServers() != null + && !kafkaConfiguration.getBootstrapServers().isEmpty()) { + adminProperties.put( + AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, + Arrays.asList(kafkaConfiguration.getBootstrapServers().split(","))); + } // else we rely on KafkaProperties which defaults to localhost:9092 or environment variables + + return KafkaAdminClient.create(adminProperties); + } +} diff --git a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java index e53e59b863c9c4..af69dce89041e6 100644 --- a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java +++ b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java @@ -13,6 +13,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.inject.name.Named; import com.linkedin.datahub.graphql.GraphQLEngine; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.DataHubGraphQLError; import com.linkedin.metadata.utils.metrics.MetricUtils; import graphql.ExecutionResult; @@ -125,7 +126,7 @@ CompletableFuture> postGraphQL(HttpEntity httpEnt log.info("Processing request, operation: {}, actor urn: {}", queryName, context.getActorUrn()); log.debug("Query: {}, variables: {}", query, variables); - return CompletableFuture.supplyAsync( + return GraphQLConcurrencyUtils.supplyAsync( () -> { log.info("Executing operation {} for {}", queryName, threadName); @@ -164,7 +165,9 @@ CompletableFuture> postGraphQL(HttpEntity httpEnt executionResult.toSpecification()); return new ResponseEntity<>(HttpStatus.SERVICE_UNAVAILABLE); } - }); + }, + this.getClass().getSimpleName(), + "postGraphQL"); } @GetMapping("/graphql") diff --git a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphiQLController.java b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphiQLController.java index 35636bf07eb10b..61f2720c6cfca4 100644 --- a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphiQLController.java +++ b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphiQLController.java @@ -2,6 +2,7 @@ import static java.nio.charset.StandardCharsets.*; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; @@ -34,6 +35,7 @@ public GraphiQLController() { @GetMapping(value = "/graphiql", produces = MediaType.TEXT_HTML_VALUE) @ResponseBody CompletableFuture graphiQL() { - return CompletableFuture.supplyAsync(() -> this.graphiqlHtml); + return GraphQLConcurrencyUtils.supplyAsync( + () -> this.graphiqlHtml, this.getClass().getSimpleName(), "graphiQL"); } } diff --git a/metadata-service/schema-registry-servlet/src/test/java/io/datahubproject/openapi/test/SchemaRegistryControllerTestConfiguration.java b/metadata-service/schema-registry-servlet/src/test/java/io/datahubproject/openapi/test/SchemaRegistryControllerTestConfiguration.java index 7ab673b0a46feb..6901cd665f1661 100644 --- a/metadata-service/schema-registry-servlet/src/test/java/io/datahubproject/openapi/test/SchemaRegistryControllerTestConfiguration.java +++ b/metadata-service/schema-registry-servlet/src/test/java/io/datahubproject/openapi/test/SchemaRegistryControllerTestConfiguration.java @@ -1,6 +1,7 @@ package io.datahubproject.openapi.test; import com.linkedin.metadata.dao.producer.KafkaHealthChecker; +import com.linkedin.metadata.models.registry.EntityRegistry; import org.springframework.boot.test.context.TestConfiguration; import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.context.annotation.ComponentScan; @@ -11,4 +12,6 @@ @ComponentScan(basePackages = {"com.linkedin.gms.factory.kafka", "com.linkedin.gms.factory.config"}) public class SchemaRegistryControllerTestConfiguration { @MockBean KafkaHealthChecker kafkaHealthChecker; + + @MockBean EntityRegistry entityRegistry; } From faa2280b6ffecf829227d70dbfbd557331c0a73f Mon Sep 17 00:00:00 2001 From: "Bharti, Aakash" Date: Thu, 6 Jun 2024 17:00:06 +0530 Subject: [PATCH 2/2] fix comments for code redability --- .../source/confluent_schema_registry.py | 60 +++++++++---------- .../src/datahub/ingestion/source/kafka.py | 29 ++++----- 2 files changed, 41 insertions(+), 48 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py b/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py index cfa199314fc077..fba71240282c43 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py +++ b/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py @@ -76,10 +76,7 @@ def create( ) -> "ConfluentSchemaRegistry": return cls(source_config, report) - def _get_subject_for_topic( - self, dataset_subtype: str, is_key_schema: bool - ) -> Optional[str]: - topic: str = dataset_subtype + def _get_subject_for_topic(self, topic: str, is_key_schema: bool) -> Optional[str]: subject_key_suffix: str = "-key" if is_key_schema else "-value" # For details on schema registry subject name strategy, # see: https://docs.confluent.io/platform/current/schema-registry/serdes-develop/index.html#how-the-naming-strategies-work @@ -234,24 +231,25 @@ def get_schemas_from_confluent_ref_json( return all_schemas def _get_schema_and_fields( - self, dataset_subtype: str, is_key_schema: bool, is_subject: bool + self, topic: str, is_key_schema: bool, is_subject: bool ) -> Tuple[Optional[Schema], List[SchemaField]]: schema: Optional[Schema] = None + kafka_entity = "subject" if is_subject else "topic" - # if provided schema as dataset_subtype, assuming it as value subject + # if provided schema as topic, assuming it as value subject schema_type_str: Optional[str] = "value" topic_subject: Optional[str] = None if not is_subject: schema_type_str = "key" if is_key_schema else "value" topic_subject = self._get_subject_for_topic( - dataset_subtype=dataset_subtype, is_key_schema=is_key_schema + topic=topic, is_key_schema=is_key_schema ) else: - topic_subject = dataset_subtype + topic_subject = topic if topic_subject is not None: logger.debug( - f"The {schema_type_str} schema subject:'{topic_subject}' is found for dataset_subtype:'{dataset_subtype}'." + f"The {schema_type_str} schema subject:'{topic_subject}' is found for {kafka_entity}:'{topic}'." ) try: registered_schema = self.schema_registry_client.get_latest_version( @@ -260,29 +258,29 @@ def _get_schema_and_fields( schema = registered_schema.schema except Exception as e: logger.warning( - f"For dataset_subtype: {dataset_subtype}, failed to get {schema_type_str} schema from schema registry using subject:'{topic_subject}': {e}." + f"For {kafka_entity}: {topic}, failed to get {schema_type_str} schema from schema registry using subject:'{topic_subject}': {e}." ) self.report.report_warning( - dataset_subtype, + topic, f"failed to get {schema_type_str} schema from schema registry using subject:'{topic_subject}': {e}.", ) else: logger.debug( - f"For dataset_subtype: {dataset_subtype}, the schema registry subject for the {schema_type_str} schema is not found." + f"For {kafka_entity}: {topic}, the schema registry subject for the {schema_type_str} schema is not found." ) if not is_key_schema: # Value schema is always expected. Report a warning. self.report.report_warning( - dataset_subtype, + topic, f"The schema registry subject for the {schema_type_str} schema is not found." - f" The dataset_subtype is either schema-less, or no messages have been written to the dataset_subtype yet.", + f" The {kafka_entity} is either schema-less, or no messages have been written to the {kafka_entity} yet.", ) - # Obtain the schema fields from schema for the dataset_subtype. + # Obtain the schema fields from schema for the topic. fields: List[SchemaField] = [] if schema is not None: fields = self._get_schema_fields( - dataset_subtype=dataset_subtype, + topic=topic, schema=schema, is_key_schema=is_key_schema, ) @@ -308,7 +306,7 @@ def _load_json_schema_with_resolved_references( return jsonref_schema def _get_schema_fields( - self, dataset_subtype: str, schema: Schema, is_key_schema: bool + self, topic: str, schema: Schema, is_key_schema: bool ) -> List[SchemaField]: # Parse the schema and convert it to SchemaFields. fields: List[SchemaField] = [] @@ -331,7 +329,7 @@ def _get_schema_fields( imported_schemas: List[ ProtobufSchema ] = self.get_schemas_from_confluent_ref_protobuf(schema) - base_name: str = dataset_subtype.replace(".", "_") + base_name: str = topic.replace(".", "_") fields = protobuf_util.protobuf_schema_to_mce_fields( ProtobufSchema( f"{base_name}-key.proto" @@ -343,16 +341,14 @@ def _get_schema_fields( is_key_schema=is_key_schema, ) elif schema.schema_type == "JSON": - base_name = dataset_subtype.replace(".", "_") + base_name = topic.replace(".", "_") canonical_name = ( f"{base_name}-key" if is_key_schema else f"{base_name}-value" ) jsonref_schema = self._load_json_schema_with_resolved_references( schema=schema, name=canonical_name, - subject=f"{dataset_subtype}-key" - if is_key_schema - else f"{dataset_subtype}-value", + subject=f"{topic}-key" if is_key_schema else f"{topic}-value", ) fields = list( JsonSchemaTranslator.get_fields_from_schema( @@ -361,25 +357,25 @@ def _get_schema_fields( ) elif not self.source_config.ignore_warnings_on_schema_type: self.report.report_warning( - dataset_subtype, + topic, f"Parsing kafka schema type {schema.schema_type} is currently not implemented", ) return fields def _get_schema_metadata( - self, dataset_subtype: str, platform_urn: str, is_subject: bool + self, topic: str, platform_urn: str, is_subject: bool ) -> Optional[SchemaMetadata]: # Process the value schema schema, fields = self._get_schema_and_fields( - dataset_subtype=dataset_subtype, + topic=topic, is_key_schema=False, is_subject=is_subject, ) # type: Tuple[Optional[Schema], List[SchemaField]] # Process the key schema key_schema, key_fields = self._get_schema_and_fields( - dataset_subtype=dataset_subtype, + topic=topic, is_key_schema=True, is_subject=is_subject, ) # type:Tuple[Optional[Schema], List[SchemaField]] @@ -393,7 +389,7 @@ def _get_schema_metadata( md5_hash: str = md5(schema_as_string.encode()).hexdigest() return SchemaMetadata( - schemaName=dataset_subtype, + schemaName=topic, version=0, hash=md5_hash, platform=platform_urn, @@ -408,20 +404,20 @@ def _get_schema_metadata( return None def get_schema_metadata( - self, dataset_subtype: str, platform_urn: str, is_subject: bool + self, topic: str, platform_urn: str, is_subject: bool ) -> Optional[SchemaMetadata]: - logger.debug(f"Inside get_schema_metadata {dataset_subtype} {platform_urn}") + logger.debug(f"Inside get_schema_metadata {topic} {platform_urn}") # Process the value schema schema, fields = self._get_schema_and_fields( - dataset_subtype=dataset_subtype, + topic=topic, is_key_schema=False, is_subject=is_subject, ) # type: Tuple[Optional[Schema], List[SchemaField]] # Process the key schema key_schema, key_fields = self._get_schema_and_fields( - dataset_subtype=dataset_subtype, + topic=topic, is_key_schema=True, is_subject=is_subject, ) # type:Tuple[Optional[Schema], List[SchemaField]] @@ -435,7 +431,7 @@ def get_schema_metadata( md5_hash = md5(schema_as_string.encode()).hexdigest() return SchemaMetadata( - schemaName=dataset_subtype, + schemaName=topic, version=0, hash=md5_hash, platform=platform_urn, diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka.py index ae055c51bb6bee..0d718e509d5c58 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka.py @@ -308,7 +308,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: if self.source_config.topic_patterns.allowed(topic): try: yield from self._extract_record( - topic, "", topic_detail, extra_topic_details.get(topic) + topic, False, topic_detail, extra_topic_details.get(topic) ) except Exception as e: logger.warning(f"Failed to extract topic {topic}", exc_info=True) @@ -322,7 +322,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: for subject in self.schema_registry_client.get_subjects(): try: yield from self._extract_record( - "", subject, topic_detail=None, extra_topic_config=None + subject, True, topic_detail=None, extra_topic_config=None ) except Exception as e: logger.warning(f"Failed to extract subject {subject}", exc_info=True) @@ -333,25 +333,25 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: def _extract_record( self, topic: str, - subject: str, + is_subject: bool, topic_detail: Optional[TopicMetadata], extra_topic_config: Optional[Dict[str, ConfigEntry]], ) -> Iterable[MetadataWorkUnit]: AVRO = "AVRO" - kafka_entity = topic if len(topic) != 0 else subject - is_subject = False if len(topic) != 0 else True + kafka_entity = "subject" if is_subject else "topic" - logger.debug(f"kafka entity name = {kafka_entity}") + logger.debug(f"extracting schema metadata from kafka entity = {kafka_entity}") platform_urn = make_data_platform_urn(self.platform) # 1. Create schemaMetadata aspect (pass control to SchemaRegistry) schema_metadata = self.schema_registry_client.get_schema_metadata( - kafka_entity, platform_urn, is_subject + topic, platform_urn, is_subject ) # topic can have no associated subject, but still it can be ingested without schema + # for schema ingestion, ingest only if it has valid schema if is_subject: if schema_metadata is None: return @@ -359,10 +359,7 @@ def _extract_record( else: dataset_name = topic - # dataset_name = schema_metadata.schemaName if len(topic) == 0 else topic # 2. Create the default dataset snapshot for the topic. - # if schema_metadata is not None: - # dataset_name = schema_metadata.schemaName if len(topic) == 0 else topic dataset_urn = make_dataset_urn_with_platform_instance( platform=self.platform, name=dataset_name, @@ -386,17 +383,17 @@ def _extract_record( # build custom properties for topic, schema properties may be added as needed custom_props: Dict[str, str] = {} - if len(topic) != 0: + if not is_subject: custom_props = self.build_custom_properties( topic, topic_detail, extra_topic_config ) - schemaName: Optional[ + schema_name: Optional[ str ] = self.schema_registry_client._get_subject_for_topic( - dataset_subtype=topic, is_key_schema=False + topic, is_key_schema=False ) - if schemaName is not None: - custom_props["Schema Name"] = schemaName + if schema_name is not None: + custom_props["Schema Name"] = schema_name # 4. Set dataset's description, tags, ownership, etc, if topic schema type is avro description: Optional[str] = None @@ -472,7 +469,7 @@ def _extract_record( yield MetadataWorkUnit(id=f"kafka-{kafka_entity}", mce=mce) # 7. Add the subtype aspect marking this as a "topic" or "schema" - typeName = DatasetSubTypes.TOPIC if len(topic) != 0 else DatasetSubTypes.SCHEMA + typeName = DatasetSubTypes.SCHEMA if is_subject else DatasetSubTypes.TOPIC yield MetadataChangeProposalWrapper( entityUrn=dataset_urn, aspect=SubTypesClass(typeNames=[typeName]),