From 26feea107b63b1a722bd9536f82db4e2e29ceccc Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 16 May 2024 07:05:18 -0700 Subject: [PATCH 01/12] Adding new docs for incidents assertions etc --- docs-website/sidebars.js | 5 +- docs/api/tutorials/assertions.md | 476 ++++++++++++++++++ docs/api/tutorials/incidents.md | 164 ++++++ docs/api/tutorials/operations.md | 136 +++++ .../src/datahub_airflow_plugin/__init__.py | 2 +- .../library/dataset_read_operations.py | 19 + .../library/dataset_report_operation.py | 21 + .../examples/library/delete_assertion.py | 18 + .../examples/library/run_assertion.py | 18 + .../examples/library/run_assertions.py | 26 + .../library/run_assertions_for_asset.py | 26 + 11 files changed, 909 insertions(+), 2 deletions(-) create mode 100644 docs/api/tutorials/assertions.md create mode 100644 docs/api/tutorials/incidents.md create mode 100644 docs/api/tutorials/operations.md create mode 100644 metadata-ingestion/examples/library/dataset_read_operations.py create mode 100644 metadata-ingestion/examples/library/dataset_report_operation.py create mode 100644 metadata-ingestion/examples/library/delete_assertion.py create mode 100644 metadata-ingestion/examples/library/run_assertion.py create mode 100644 metadata-ingestion/examples/library/run_assertions.py create mode 100644 metadata-ingestion/examples/library/run_assertions_for_asset.py diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 865b37c961a717..69bc31c30b8b91 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -677,7 +677,7 @@ module.exports = { }, { type: "category", - label: "Datahub Actions", + label: "DataHub Actions", link: { type: "doc", id: "docs/act-on-metadata" }, items: [ "docs/actions/README", @@ -729,6 +729,9 @@ module.exports = { "docs/api/tutorials/deprecation", "docs/api/tutorials/descriptions", "docs/api/tutorials/custom-properties", + "docs/api/tutorials/assertions", + "docs/api/tutorials/incidents", + "docs/api/tutorials/operations", "docs/api/tutorials/ml", ], }, diff --git a/docs/api/tutorials/assertions.md b/docs/api/tutorials/assertions.md new file mode 100644 index 00000000000000..fd270c96471028 --- /dev/null +++ b/docs/api/tutorials/assertions.md @@ -0,0 +1,476 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Assertions + +## Why Would You Use Assertions APIs? + +The Assertions APIs allow you to create, schedule, run, and delete Assertions with Acryl Cloud. + +Supported Assertion Types include: + +- [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions) +- [Volume Assertions](/docs/managed-datahub/observe/volume-assertions) +- [Column Assertions](/docs/managed-datahub/observe/column-assertions) +- [Schema Assertions](/docs/managed-datahub/observe/schema-assertions) +- [Custom SQL Assertions](/docs/managed-datahub/observe/custom-sql-assertions) + + +### Goal Of This Guide + +This guide will show you how to create, schedule, run and delete Assertions for a Table. + +## Prerequisites + +The actor making API calls must have the `Edit Assertions` and `Edit Monitors` privileges for the Tables at hand. + +## Create Assertions + +You can create new dataset Assertions to DataHub using the following APIs. + + + + +### Freshness Assertion + +To create a new freshness assertion, use the `upsertDatasetFreshnessAssertionMonitor` GraphQL Mutation. + +```graphql +mutation upsertDatasetFreshnessAssertionMonitor { + upsertDatasetFreshnessAssertionMonitor( + input: { + entityUrn: "", + schedule: { + type: FIXED_INTERVAL, + fixedInterval: { unit: HOUR, multiple: 8 } + } + evaluationSchedule: { + timezone: "America/Los_Angeles", + cron: "0 */8 * * *" + } + evaluationParameters: { + sourceType: INFORMATION_SCHEMA + } + mode: ACTIVE + } + ) { + urn + } +} +``` + +For more details, see the [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions) guide. + +### Volume Assertions + +To create a new volume assertion, use the `upsertDatasetVolumeAssertionMonitor` GraphQL Mutation. + +```graphql +mutation upsertDatasetVolumeAssertionMonitor { + upsertDatasetVolumeAssertionMonitor( + input: { + entityUrn: "" + type: ROW_COUNT_TOTAL + rowCountTotal: { + operator: BETWEEN + parameters: { + minValue: { + value: "10" + type: NUMBER + } + maxValue: { + value: "20" + type: NUMBER + } + } + } + evaluationSchedule: { + timezone: "America/Los_Angeles" + cron: "0 */8 * * *" + } + evaluationParameters: { + sourceType: INFORMATION_SCHEMA + } + mode: ACTIVE + } + ) { + urn + } +} +``` + +For more details, see the [Volume Assertions](/docs/managed-datahub/observe/volume-assertions) guide. + +### Column Assertions + +To create a new column assertion, use the `upsertDatasetVolumeAssertionMonitor` GraphQL Mutation. + +```graphql +mutation upsertDatasetFieldAssertionMonitor { + upsertDatasetFieldAssertionMonitor( + input: { + entityUrn: "" + type: FIELD_VALUES, + fieldValuesAssertion: { + field: { + path: "", + type: "NUMBER", + nativeType: "NUMBER(38,0)" + }, + operator: GREATER_THAN, + parameters: { + value: { + type: NUMBER, + value: "10" + } + }, + failThreshold: { + type: COUNT, + value: 0 + }, + excludeNulls: true + } + evaluationSchedule: { + timezone: "America/Los_Angeles" + cron: "0 */8 * * *" + } + evaluationParameters: { + sourceType: ALL_ROWS_QUERY + } + mode: ACTIVE + } + ){ + urn + } +} +``` + +For more details, see the [Column Assertions](/docs/managed-datahub/observe/column-assertions) guide. + +### Custom SQL Assertions + +To create a new column assertion, use the `upsertDatasetSqlAssertionMonitor` GraphQL Mutation. + +```graphql +mutation upsertDatasetSqlAssertionMonitor { + upsertDatasetSqlAssertionMonitor( + assertionUrn: "" + input: { + entityUrn: "" + type: METRIC, + description: "", + statement: "", + operator: GREATER_THAN_OR_EQUAL_TO, + parameters: { + value: { + value: "100", + type: NUMBER + } + } + evaluationSchedule: { + timezone: "America/Los_Angeles" + cron: "0 */6 * * *" + } + mode: ACTIVE + } + ) { + urn + } +} +``` + +For more details, see the [Custom SQL Assertions](/docs/managed-datahub/observe/custom-sql-assertions) guide. + +### Schema Assertions + +To create a new schema assertion, use the `upsertDatasetSchemaAssertionMonitor` GraphQL Mutation. + +```graphql +mutation upsertDatasetSchemaAssertionMonitor { + upsertDatasetSchemaAssertionMonitor( + assertionUrn: "urn:li:assertion:existing-assertion-id", + input: { + entityUrn: "", + assertion: { + compatibility: EXACT_MATCH, + fields: [ + { + path: "id", + type: STRING + }, + { + path: "count", + type: NUMBER + }, + { + path: "struct", + type: STRUCT + }, + { + path: "struct.nestedBooleanField", + type: BOOLEAN + } + ] + }, + description: "", + mode: ACTIVE + } + ) +} +``` + +For more details, see the [Schema Assertions](/docs/managed-datahub/observe/schema-assertions) guide. + + + + + +## Run Assertions + +You can use the following APIs to trigger the assertions you've created to run on-demand. This is +particularly useful for running assertions on a custom schedule, for example from your production +data pipelines. + + + + +### Run an assertion + +```graphql +mutation runAssertion { + runAssertion(urn: "urn:li:assertion:your-assertion-id", saveResult: true) { + type + nativeResults { + key + value + } + } +} +``` + +Where **type** will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`. + +The `saveResult` argument determines whether the result of the assertion will be saved to DataHub's backend, +and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's +backend. The value defaults to `true`. + +If the assertion is external (not natively executed by Acryl), this API will return an error. + +If running the assertion is successful, the result will be returned as follows: + +```json +{ + "data": { + "runAssertion": { + "type": "SUCCESS", + "nativeResults": [ + { + "key": "Value", + "value": "1382" + } + ] + } + }, + "extensions": {} +} +``` + +### Run multiple assertions + +```graphql +mutation runAssertions { + runAssertions(urns: ["urn:li:assertion:your-assertion-id-1", "urn:li:assertion:your-assertion-id-2"], saveResults: true) { + passingCount + failingCount + errorCount + results { + urn + type + nativeResults { + key + value + } + } + } +} +``` + +Where **type** will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`. + +The `saveResults` argument determines whether the result of the assertion will be saved to DataHub's backend, +and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's +backend. The value defaults to `true`. + +If any of the assertion are external (not natively executed by Acryl), they will simply be omitted from the result set. + +If running the assertions is successful, the results will be returned as follows: + +```json +{ + "data": { + "runAssertions": { + "passingCount": 2, + "failingCount": 0, + "errorCount": 0, + "results": [ + { + "urn": "urn:li:assertion:your-assertion-id-1", + "type": "SUCCESS", + "nativeResults": [ + { + "key": "Value", + "value": "1382" + } + ] + }, + { + "urn": "urn:li:assertion:your-assertion-id-2", + "type": "FAILURE", + "nativeResults": [ + { + "key": "Value", + "value": "12323" + } + ] + } + ] + } + }, + "extensions": {} +} +``` + +Where you should see one result object for each assertion. + +### Run all assertions for table + +You can also run all assertions for a specific data asset using the `runAssetAssertions` mutation. + +```graphql +mutation runAssertionsForAsset { + runAssertionsForAsset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,purchase_events,PROD)", saveResults: true) { + passingCount + failingCount + errorCount + results { + urn + type + nativeResults { + key + value + } + } + } +} +``` + +Where `type` will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`. + +The `saveResults` argument determines whether the result of the assertion will be saved to DataHub's backend, +and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's +backend. The value defaults to `true`. + +If any of the assertion are external (not natively executed by Acryl), they will simply be omitted from the result +set. + +If running the assertions is successful, the results will be returned as follows: + +```json +{ + "data": { + "runAssertionsForAsset": { + "passingCount": 2, + "failingCount": 0, + "errorCount": 0, + "results": [ + { + "urn": "urn:li:assertion:your-assertion-id-1", + "type": "SUCCESS", + "nativeResults": [ + { + "key": "Value", + "value": "1382" + } + ] + }, + { + "urn": "urn:li:assertion:your-assertion-id-2", + "type": "FAILURE", + "nativeResults": [ + { + "key": "Value", + "value": "12323" + } + ] + } + ] + } + }, + "extensions": {} +} +``` + +Where you should see one result object for each assertion. + + + + + + +### Run assertion + +```python +{{ inline /metadata-ingestion/examples/library/run_assertion.py show_path_as_comment }} +``` + +### Run multiple assertions + +```python +{{ inline /metadata-ingestion/examples/library/run_assertions.py show_path_as_comment }} +``` + +### Run all assertions for table + +```python +{{ inline /metadata-ingestion/examples/library/run_assertions_for_asset.py show_path_as_comment }} +``` + + + + + + +## Delete Assertions + +You can use delete dataset operations to DataHub using the following APIs. + + + + +```graphql +mutation deleteAssertion { + deleteAssertion(urn: "urn:li:assertion:test") +} +``` + +If you see the following response, the operation was successful: + +```json +{ + "data": { + "deleteAssertion": true + }, + "extensions": {} +} +``` + + + + + +```python +{{ inline /metadata-ingestion/examples/library/delete_assertion.py show_path_as_comment }} +``` + + + diff --git a/docs/api/tutorials/incidents.md b/docs/api/tutorials/incidents.md new file mode 100644 index 00000000000000..20a24d58a1db42 --- /dev/null +++ b/docs/api/tutorials/incidents.md @@ -0,0 +1,164 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Incidents + +## Why Would You Use Incidents APIs? + +The Incidents APIs allow you to raise, retrieve, update and resolve data incidents via API. This is +useful for raising or resolving data incidents programmatically, for example from Airflow, Prefect, or Dagster DAGs. +Incidents are also useful for conditional Circuit Breaking in these pipelines. + +### Goal Of This Guide + +This guide will show you how to raise, retrieve, update and resolve data incidents via API. + +## Prerequisites + +The actor making API calls must have the `Edit Incidents` privileges for the Tables at hand. + +## Raise Incident + +You can raise a new Data Incident for an existing asset using the following APIs. + + + + +```graphql +mutation raiseIncident { + raiseIncident( + input: { + resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,public.prod.purchases,PROD)", + type: OPERATIONAL, + title: "Data is Delayed", + description: "Data is delayed on May 15, 2024 because of downtime in the Spark Cluster.", + } + ) +} +``` + +Where `resourceUrn` is the unique identifier for the data asset (dataset, dashboard, chart, data job, or data flow) you want to raise the incident on. + +Where supported Incident Types include + +- `OPERATIONAL` +- `FRESHNESS` +- `VOLUME` +- `COLUMN` +- `SQL` +- `DATA_SCHEMA` +- `CUSTOM` + +If you see the following response, a unique identifier for the new incident will be returned. + +```json +{ + "data": { + "raiseIncident": "urn:li:incident:new-incident-id" + }, + "extensions": {} +} +``` + + + + + +``` +Python SDK support coming soon! +``` + + + + + +## Get Incidents For Data Asset + +You can use retrieve the incidents and their statuses for a given Data Asset using the following APIs. + + + + +```graphql +query getAssetIncidents { + dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,public.prod.purchases,PROD)") { + incidents( + state: ACTIVE, start: 0, count: 20 + ) { + start + count + total + incidents { + urn + incidentType + title + description + status { + state + lastUpdated { + time + actor + } + } + } + } + } +} +``` + +Where you can filter for active incidents by passing the `ACTIVE` state and resolved incidents by passing the `RESOLVED` state. +This will return all relevant incidents for the dataset. + + + + + +``` +Python SDK support coming soon! +``` + + + + + +## Resolve Incidents + +You can update the status of an incident using the following APIs. + + + + +```graphql +mutation updateIncidentStatus { + updateIncidentStatus( + input: { + state: RESOLVED, + message: "The delayed data issue was resolved at 4:55pm on May 15." + } + ) +} +``` + +You can also reopen an incident by updating the state from `RESOLVED` to `ACTIVE`. + +If you see the following response, the operation was successful: + +```json +{ + "data": { + "updateIncidentStatus": true + }, + "extensions": {} +} +``` + + + + + +``` +Python SDK support coming soon! +``` + + + \ No newline at end of file diff --git a/docs/api/tutorials/operations.md b/docs/api/tutorials/operations.md new file mode 100644 index 00000000000000..94bfd25fa2d0c1 --- /dev/null +++ b/docs/api/tutorials/operations.md @@ -0,0 +1,136 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Operations + +## Why Would You Use Operations APIs? + +The Operations APIs allow you to report operational changes that were made to a given Dataset or Table using the 'Operation' concept. +These operations may be viewed on the Dataset Profile (e.g. as last modified time), accessed via the DataHub GraphQL API, or +used to as inputs to Acryl Cloud [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions). + +### Goal Of This Guide + +This guide will show you how to report and query Operations for a Dataset. + +## Prerequisites + +For this tutorial, you need to deploy DataHub Quickstart and ingest sample data. +For detailed steps, please refer to [DataHub Quickstart Guide](/docs/quickstart.md). + +:::note +Before reporting operations for a dataset, you need to ensure the targeted dataset is already present in DataHub. +::: + +## Report Operations + +You can use report dataset operations to DataHub using the following APIs. + + + + +```graphql +mutation reportOperation { + reportOperation( + input: { + urn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)", + operationType: INSERT, + sourceType: DATA_PROCESS + } + ) +} +``` + +Where supported operation types include + +- `INSERT` +- `UPDATE` +- `DELETE` +- `CREATE` +- `ALTER` +- `DROP` +- `CUSTOM` + +If you want to report an operation that happened at a specific time, you can also optionally provide +the `timestampMillis` field. If not provided, the current server time will be used as the operation time. + +If you see the following response, the operation was successful: + +```json +{ + "data": { + "reportOperation": true + }, + "extensions": {} +} +``` + + + + + +```python +{{ inline /metadata-ingestion/examples/library/dataset_report_operation.py show_path_as_comment }} +``` + + + + +## Read Operations + +You can use read dataset operations to DataHub using the following APIs. + + + + +```graphql +query dataset { + dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)") { + operations( + limit: 10, filter: [], startTimeMillis: , endTimeMillis: + ) { + timestampMillis + operationType + sourceType + } + } +} +``` + +Where startTimeMillis and endTimeMillis are optional. By default, operations are sorted by time descending. + +If you see the following response, the operation was successful: + +```json +{ + "data": { + "dataset": { + "operations": [ + { + "timestampMillis": 1231232332, + "operationType": "INSERT", + "sourceType": "DATA_PROCESS" + } + ] + } + }, + "extensions": {} +} +``` + + + + + +```python +{{ inline /metadata-ingestion/examples/library/dataset_read_operations.py show_path_as_comment }} +``` + + + + +### Expected Outcomes of Reporting Operations + +Reported Operations will appear when displaying the Last Updated time for a Dataset on their DataHub Profile. +They will also be used when selecting the `DataHub Operation` source type under the **Advanced** settings of a Freshness +Assertion. \ No newline at end of file diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py index e4040e3a17dfdc..6ec30c4d9fe5bd 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py @@ -1,6 +1,6 @@ # Published at https://pypi.org/project/acryl-datahub/. __package_name__ = "acryl-datahub-airflow-plugin" -__version__ = "1!0.0.0.dev0" +__version__ = "0.0.0.dev1" def is_dev_mode() -> bool: diff --git a/metadata-ingestion/examples/library/dataset_read_operations.py b/metadata-ingestion/examples/library/dataset_read_operations.py new file mode 100644 index 00000000000000..78c9a92141cef2 --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_read_operations.py @@ -0,0 +1,19 @@ +from datahub.api.graphql import Operation + +DATAHUB_HOST = "https//:org.acryl.io/gms" +DATAHUB_TOKEN = ", + # end_time_millis= +) diff --git a/metadata-ingestion/examples/library/dataset_report_operation.py b/metadata-ingestion/examples/library/dataset_report_operation.py new file mode 100644 index 00000000000000..a27019ec47ab1f --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_report_operation.py @@ -0,0 +1,21 @@ +from datahub.api.graphql import Operation + +DATAHUB_HOST = "https//:org.acryl.io/gms" +DATAHUB_TOKEN = " Date: Thu, 16 May 2024 07:39:28 -0700 Subject: [PATCH 02/12] Adding docs for fetching assertions --- docs/api/tutorials/assertions.md | 498 ++++++++++++++++++++++++++++++- 1 file changed, 496 insertions(+), 2 deletions(-) diff --git a/docs/api/tutorials/assertions.md b/docs/api/tutorials/assertions.md index fd270c96471028..7f47689b062703 100644 --- a/docs/api/tutorials/assertions.md +++ b/docs/api/tutorials/assertions.md @@ -103,7 +103,7 @@ For more details, see the [Volume Assertions](/docs/managed-datahub/observe/volu ### Column Assertions -To create a new column assertion, use the `upsertDatasetVolumeAssertionMonitor` GraphQL Mutation. +To create a new column assertion, use the `upsertDatasetFieldAssertionMonitor` GraphQL Mutation. ```graphql mutation upsertDatasetFieldAssertionMonitor { @@ -225,6 +225,501 @@ For more details, see the [Schema Assertions](/docs/managed-datahub/observe/sche +## Get Assertions + +You can use the following APIs to + +1. Fetch existing assertion definitions + run history +2. Fetch the assertions associated with a given table + their run history. + + + + +### Get Assertions for a Table + +To retrieve all the assertions for a table, you can use the following (super long) GraphQL Query. + +```graphql +query dataset { + dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,purchases,PROD)") { + assertions(start: 0, count: 1000) { + start + count + total + assertions { + # Fetch the last run of each associated assertion. + runEvents(status: COMPLETE, limit: 1) { + total + failed + succeeded + runEvents { + timestampMillis + status + result { + type + nativeResults { + key + value + } + } + } + } + info { + type + description + lastUpdated { + time + actor + } + datasetAssertion { + datasetUrn + scope + aggregation + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + fields { + urn + path + } + nativeType + nativeParameters { + key + value + } + logic + } + freshnessAssertion { + type + entityUrn + schedule { + type + cron { + cron + timezone + } + fixedInterval { + unit + multiple + } + } + filter { + type + sql + } + } + sqlAssertion { + type + entityUrn + statement + changeType + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + fieldAssertion { + type + entityUrn + filter { + type + sql + } + fieldValuesAssertion { + field { + path + type + nativeType + } + transform { + type + } + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + failThreshold { + type + value + } + excludeNulls + } + fieldMetricAssertion { + field { + path + type + nativeType + } + metric + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + } + volumeAssertion { + type + entityUrn + filter { + type + sql + } + rowCountTotal { + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + rowCountChange { + type + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + } + schemaAssertion { + entityUrn + compatibility + fields { + path + type + nativeType + } + schema { + fields { + fieldPath + type + nativeDataType + } + } + } + source { + type + created { + time + actor + } + } + } + } + } + } +} +``` + +### Get a single assertion + +You can use the following GraphQL query to fetch a single assertion by its URN. + +```graphql +query getAssertion { + assertion(urn: "urn:li:assertion:assertion-id") { + # Fetch the last 10 runs for the assertion. + runEvents(status: COMPLETE, limit: 10) { + total + failed + succeeded + runEvents { + timestampMillis + status + result { + type + nativeResults { + key + value + } + } + } + } + info { + type + description + lastUpdated { + time + actor + } + datasetAssertion { + datasetUrn + scope + aggregation + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + fields { + urn + path + } + nativeType + nativeParameters { + key + value + } + logic + } + freshnessAssertion { + type + entityUrn + schedule { + type + cron { + cron + timezone + } + fixedInterval { + unit + multiple + } + } + filter { + type + sql + } + } + sqlAssertion { + type + entityUrn + statement + changeType + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + fieldAssertion { + type + entityUrn + filter { + type + sql + } + fieldValuesAssertion { + field { + path + type + nativeType + } + transform { + type + } + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + failThreshold { + type + value + } + excludeNulls + } + fieldMetricAssertion { + field { + path + type + nativeType + } + metric + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + } + volumeAssertion { + type + entityUrn + filter { + type + sql + } + rowCountTotal { + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + rowCountChange { + type + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + } + schemaAssertion { + entityUrn + compatibility + fields { + path + type + nativeType + } + schema { + fields { + fieldPath + type + nativeDataType + } + } + } + source { + type + created { + time + actor + } + } + } + } +} +``` + + + + + +```python +Python support coming soon! +``` + + + + + ## Run Assertions You can use the following APIs to trigger the assertions you've created to run on-demand. This is @@ -414,7 +909,6 @@ Where you should see one result object for each assertion. - ### Run assertion From f021a4f446278b369f1a8cfef74cc7a0f9531473 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 16 May 2024 08:19:36 -0700 Subject: [PATCH 03/12] Correct the links --- docs/api/tutorials/assertions.md | 20 ++++++++++---------- docs/api/tutorials/operations.md | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/api/tutorials/assertions.md b/docs/api/tutorials/assertions.md index 7f47689b062703..50433200b45c5d 100644 --- a/docs/api/tutorials/assertions.md +++ b/docs/api/tutorials/assertions.md @@ -9,11 +9,11 @@ The Assertions APIs allow you to create, schedule, run, and delete Assertions wi Supported Assertion Types include: -- [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions) -- [Volume Assertions](/docs/managed-datahub/observe/volume-assertions) -- [Column Assertions](/docs/managed-datahub/observe/column-assertions) -- [Schema Assertions](/docs/managed-datahub/observe/schema-assertions) -- [Custom SQL Assertions](/docs/managed-datahub/observe/custom-sql-assertions) +- [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions.md) +- [Volume Assertions](/docs/managed-datahub/observe/volume-assertions.md) +- [Column Assertions](/docs/managed-datahub/observe/column-assertions.md) +- [Schema Assertions](/docs/managed-datahub/observe/schema-assertions.md) +- [Custom SQL Assertions](/docs/managed-datahub/observe/custom-sql-assertions.md) ### Goal Of This Guide @@ -59,7 +59,7 @@ mutation upsertDatasetFreshnessAssertionMonitor { } ``` -For more details, see the [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions) guide. +For more details, see the [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions.md) guide. ### Volume Assertions @@ -99,7 +99,7 @@ mutation upsertDatasetVolumeAssertionMonitor { } ``` -For more details, see the [Volume Assertions](/docs/managed-datahub/observe/volume-assertions) guide. +For more details, see the [Volume Assertions](/docs/managed-datahub/observe/volume-assertions.md) guide. ### Column Assertions @@ -145,7 +145,7 @@ mutation upsertDatasetFieldAssertionMonitor { } ``` -For more details, see the [Column Assertions](/docs/managed-datahub/observe/column-assertions) guide. +For more details, see the [Column Assertions](/docs/managed-datahub/observe/column-assertions.md) guide. ### Custom SQL Assertions @@ -179,7 +179,7 @@ mutation upsertDatasetSqlAssertionMonitor { } ``` -For more details, see the [Custom SQL Assertions](/docs/managed-datahub/observe/custom-sql-assertions) guide. +For more details, see the [Custom SQL Assertions](/docs/managed-datahub/observe/custom-sql-assertions.md) guide. ### Schema Assertions @@ -219,7 +219,7 @@ mutation upsertDatasetSchemaAssertionMonitor { } ``` -For more details, see the [Schema Assertions](/docs/managed-datahub/observe/schema-assertions) guide. +For more details, see the [Schema Assertions](/docs/managed-datahub/observe/schema-assertions.md) guide. diff --git a/docs/api/tutorials/operations.md b/docs/api/tutorials/operations.md index 94bfd25fa2d0c1..70ede993ec95f6 100644 --- a/docs/api/tutorials/operations.md +++ b/docs/api/tutorials/operations.md @@ -7,7 +7,7 @@ import TabItem from '@theme/TabItem'; The Operations APIs allow you to report operational changes that were made to a given Dataset or Table using the 'Operation' concept. These operations may be viewed on the Dataset Profile (e.g. as last modified time), accessed via the DataHub GraphQL API, or -used to as inputs to Acryl Cloud [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions). +used to as inputs to Acryl Cloud [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions.md). ### Goal Of This Guide From 1bc1806168feca07d3290b4c5cf2eb1e556620e6 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 16 May 2024 15:06:51 -0700 Subject: [PATCH 04/12] Adding contrct thing --- docs-website/sidebars.js | 1 + docs/api/tutorials/assertions.md | 539 ++++++++++++++++----------- docs/api/tutorials/data-contracts.md | 217 +++++++++++ 3 files changed, 536 insertions(+), 221 deletions(-) create mode 100644 docs/api/tutorials/data-contracts.md diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 69bc31c30b8b91..9af59274f3dc2c 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -732,6 +732,7 @@ module.exports = { "docs/api/tutorials/assertions", "docs/api/tutorials/incidents", "docs/api/tutorials/operations", + "docs/incidents/data-contracts", "docs/api/tutorials/ml", ], }, diff --git a/docs/api/tutorials/assertions.md b/docs/api/tutorials/assertions.md index 50433200b45c5d..5fc4c31bd77db5 100644 --- a/docs/api/tutorials/assertions.md +++ b/docs/api/tutorials/assertions.md @@ -3,11 +3,9 @@ import TabItem from '@theme/TabItem'; # Assertions -## Why Would You Use Assertions APIs? - -The Assertions APIs allow you to create, schedule, run, and delete Assertions with Acryl Cloud. + -Supported Assertion Types include: +This guide specifically covers how to use the Assertion APIs for **Acryl Cloud** native assertions, including: - [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions.md) - [Volume Assertions](/docs/managed-datahub/observe/volume-assertions.md) @@ -15,6 +13,9 @@ Supported Assertion Types include: - [Schema Assertions](/docs/managed-datahub/observe/schema-assertions.md) - [Custom SQL Assertions](/docs/managed-datahub/observe/custom-sql-assertions.md) +## Why Would You Use Assertions APIs? + +The Assertions APIs allow you to create, schedule, run, and delete Assertions with Acryl Cloud. ### Goal Of This Guide @@ -59,6 +60,19 @@ mutation upsertDatasetFreshnessAssertionMonitor { } ``` +This API will return a unique identifier (URN) for the new assertion if you were successful: + +```json +{ + "data": { + "upsertDatasetFreshnessAssertionMonitor": { + "urn": "urn:li:assertion:your-new-assertion-id + } + }, + "extensions": {} +} +``` + For more details, see the [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions.md) guide. ### Volume Assertions @@ -99,6 +113,19 @@ mutation upsertDatasetVolumeAssertionMonitor { } ``` +This API will return a unique identifier (URN) for the new assertion if you were successful: + +```json +{ + "data": { + "upsertDatasetVolumeAssertionMonitor": { + "urn": "urn:li:assertion:your-new-assertion-id + } + }, + "extensions": {} +} +``` + For more details, see the [Volume Assertions](/docs/managed-datahub/observe/volume-assertions.md) guide. ### Column Assertions @@ -145,6 +172,19 @@ mutation upsertDatasetFieldAssertionMonitor { } ``` +This API will return a unique identifier (URN) for the new assertion if you were successful: + +```json +{ + "data": { + "upsertDatasetFieldAssertionMonitor": { + "urn": "urn:li:assertion:your-new-assertion-id + } + }, + "extensions": {} +} +``` + For more details, see the [Column Assertions](/docs/managed-datahub/observe/column-assertions.md) guide. ### Custom SQL Assertions @@ -179,6 +219,19 @@ mutation upsertDatasetSqlAssertionMonitor { } ``` +This API will return a unique identifier (URN) for the new assertion if you were successful: + +```json +{ + "data": { + "upsertDatasetSqlAssertionMonitor": { + "urn": "urn:li:assertion:your-new-assertion-id + } + }, + "extensions": {} +} +``` + For more details, see the [Custom SQL Assertions](/docs/managed-datahub/observe/custom-sql-assertions.md) guide. ### Schema Assertions @@ -219,13 +272,251 @@ mutation upsertDatasetSchemaAssertionMonitor { } ``` +This API will return a unique identifier (URN) for the new assertion if you were successful: + +```json +{ + "data": { + "upsertDatasetSchemaAssertionMonitor": { + "urn": "urn:li:assertion:your-new-assertion-id" + } + }, + "extensions": {} +} +``` + For more details, see the [Schema Assertions](/docs/managed-datahub/observe/schema-assertions.md) guide. -## Get Assertions +## Run Assertions + +You can use the following APIs to trigger the assertions you've created to run on-demand. This is +particularly useful for running assertions on a custom schedule, for example from your production +data pipelines. + + + + +### Run Assertion + +```graphql +mutation runAssertion { + runAssertion(urn: "urn:li:assertion:your-assertion-id", saveResult: true) { + type + nativeResults { + key + value + } + } +} +``` + +Where **type** will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`. + +The `saveResult` argument determines whether the result of the assertion will be saved to DataHub's backend, +and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's +backend. The value defaults to `true`. + +If the assertion is external (not natively executed by Acryl), this API will return an error. + +If running the assertion is successful, the result will be returned as follows: + +```json +{ + "data": { + "runAssertion": { + "type": "SUCCESS", + "nativeResults": [ + { + "key": "Value", + "value": "1382" + } + ] + } + }, + "extensions": {} +} +``` + +### Run Group of Assertions + +```graphql +mutation runAssertions { + runAssertions(urns: ["urn:li:assertion:your-assertion-id-1", "urn:li:assertion:your-assertion-id-2"], saveResults: true) { + passingCount + failingCount + errorCount + results { + urn + result { + type + nativeResults { + key + value + } + } + } + } +} +``` + +Where **type** will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`. + +The `saveResults` argument determines whether the result of the assertion will be saved to DataHub's backend, +and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's +backend. The value defaults to `true`. + +If any of the assertion are external (not natively executed by Acryl), they will simply be omitted from the result set. + +If running the assertions is successful, the results will be returned as follows: + +```json +{ + "data": { + "runAssertions": { + "passingCount": 2, + "failingCount": 0, + "errorCount": 0, + "results": [ + { + "urn": "urn:li:assertion:your-assertion-id-1", + "result": { + "type": "SUCCESS", + "nativeResults": [ + { + "key": "Value", + "value": "1382" + } + ] + } + }, + { + "urn": "urn:li:assertion:your-assertion-id-2", + "result": { + "type": "FAILURE", + "nativeResults": [ + { + "key": "Value", + "value": "12323" + } + ] + } + } + ] + } + }, + "extensions": {} +} +``` + +Where you should see one result object for each assertion. + +### Run All Assertions for Table + +You can also run all assertions for a specific data asset using the `runAssertionsForAsset` mutation. + +```graphql +mutation runAssertionsForAsset { + runAssertionsForAsset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,purchase_events,PROD)", saveResults: true) { + passingCount + failingCount + errorCount + results { + urn + result { + type + nativeResults { + key + value + } + } + } + } +} +``` + +Where `type` will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`. + +The `saveResults` argument determines whether the result of the assertion will be saved to DataHub's backend, +and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's +backend. The value defaults to `true`. + +If any of the assertion are external (not natively executed by Acryl), they will simply be omitted from the result +set. + +If running the assertions is successful, the results will be returned as follows: + +```json +{ + "data": { + "runAssertionsForAsset": { + "passingCount": 2, + "failingCount": 0, + "errorCount": 0, + "results": [ + { + "urn": "urn:li:assertion:your-assertion-id-1", + "result": { + "type": "SUCCESS", + "nativeResults": [ + { + "key": "Value", + "value": "1382" + } + ] + } + }, + { + "urn": "urn:li:assertion:your-assertion-id-2", + "result": { + "type": "FAILURE", + "nativeResults": [ + { + "key": "Value", + "value": "12323" + } + ] + } + } + ] + } + }, + "extensions": {} +} +``` + +Where you should see one result object for each assertion. + + + + + +### Run Assertion + +```python +{{ inline /metadata-ingestion/examples/library/run_assertion.py show_path_as_comment }} +``` + +### Run Group of Assertions + +```python +{{ inline /metadata-ingestion/examples/library/run_assertions.py show_path_as_comment }} +``` + +### Run All Assertions for Table + +```python +{{ inline /metadata-ingestion/examples/library/run_assertions_for_asset.py show_path_as_comment }} +``` + + + + + +## Get Assertion Details You can use the following APIs to @@ -235,9 +526,9 @@ You can use the following APIs to -### Get Assertions for a Table +### Get Assertions for Table -To retrieve all the assertions for a table, you can use the following (super long) GraphQL Query. +To retrieve all the assertions for a table, you can use the following GraphQL Query. ```graphql query dataset { @@ -475,9 +766,9 @@ query dataset { } ``` -### Get a single assertion +### Get Assertion Details -You can use the following GraphQL query to fetch a single assertion by its URN. +You can use the following GraphQL query to fetch the details for an assertion along with its evaluation history by URN. ```graphql query getAssertion { @@ -720,251 +1011,57 @@ Python support coming soon! -## Run Assertions +## Delete Assertions -You can use the following APIs to trigger the assertions you've created to run on-demand. This is -particularly useful for running assertions on a custom schedule, for example from your production -data pipelines. +You can use delete dataset operations to DataHub using the following APIs. -### Run an assertion - -```graphql -mutation runAssertion { - runAssertion(urn: "urn:li:assertion:your-assertion-id", saveResult: true) { - type - nativeResults { - key - value - } - } -} -``` - -Where **type** will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`. - -The `saveResult` argument determines whether the result of the assertion will be saved to DataHub's backend, -and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's -backend. The value defaults to `true`. - -If the assertion is external (not natively executed by Acryl), this API will return an error. - -If running the assertion is successful, the result will be returned as follows: - -```json -{ - "data": { - "runAssertion": { - "type": "SUCCESS", - "nativeResults": [ - { - "key": "Value", - "value": "1382" - } - ] - } - }, - "extensions": {} -} -``` - -### Run multiple assertions - -```graphql -mutation runAssertions { - runAssertions(urns: ["urn:li:assertion:your-assertion-id-1", "urn:li:assertion:your-assertion-id-2"], saveResults: true) { - passingCount - failingCount - errorCount - results { - urn - type - nativeResults { - key - value - } - } - } -} -``` - -Where **type** will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`. - -The `saveResults` argument determines whether the result of the assertion will be saved to DataHub's backend, -and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's -backend. The value defaults to `true`. - -If any of the assertion are external (not natively executed by Acryl), they will simply be omitted from the result set. - -If running the assertions is successful, the results will be returned as follows: - -```json -{ - "data": { - "runAssertions": { - "passingCount": 2, - "failingCount": 0, - "errorCount": 0, - "results": [ - { - "urn": "urn:li:assertion:your-assertion-id-1", - "type": "SUCCESS", - "nativeResults": [ - { - "key": "Value", - "value": "1382" - } - ] - }, - { - "urn": "urn:li:assertion:your-assertion-id-2", - "type": "FAILURE", - "nativeResults": [ - { - "key": "Value", - "value": "12323" - } - ] - } - ] - } - }, - "extensions": {} -} -``` - -Where you should see one result object for each assertion. - -### Run all assertions for table - -You can also run all assertions for a specific data asset using the `runAssetAssertions` mutation. - ```graphql -mutation runAssertionsForAsset { - runAssertionsForAsset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,purchase_events,PROD)", saveResults: true) { - passingCount - failingCount - errorCount - results { - urn - type - nativeResults { - key - value - } - } - } +mutation deleteAssertion { + deleteAssertion(urn: "urn:li:assertion:test") } ``` -Where `type` will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`. - -The `saveResults` argument determines whether the result of the assertion will be saved to DataHub's backend, -and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's -backend. The value defaults to `true`. - -If any of the assertion are external (not natively executed by Acryl), they will simply be omitted from the result -set. - -If running the assertions is successful, the results will be returned as follows: +If you see the following response, the operation was successful: ```json { "data": { - "runAssertionsForAsset": { - "passingCount": 2, - "failingCount": 0, - "errorCount": 0, - "results": [ - { - "urn": "urn:li:assertion:your-assertion-id-1", - "type": "SUCCESS", - "nativeResults": [ - { - "key": "Value", - "value": "1382" - } - ] - }, - { - "urn": "urn:li:assertion:your-assertion-id-2", - "type": "FAILURE", - "nativeResults": [ - { - "key": "Value", - "value": "12323" - } - ] - } - ] - } + "deleteAssertion": true }, "extensions": {} } ``` -Where you should see one result object for each assertion. - -### Run assertion - -```python -{{ inline /metadata-ingestion/examples/library/run_assertion.py show_path_as_comment }} -``` - -### Run multiple assertions - ```python -{{ inline /metadata-ingestion/examples/library/run_assertions.py show_path_as_comment }} -``` - -### Run all assertions for table - -```python -{{ inline /metadata-ingestion/examples/library/run_assertions_for_asset.py show_path_as_comment }} +{{ inline /metadata-ingestion/examples/library/delete_assertion.py show_path_as_comment }} ``` - +## (Advanced) Create and Report Results for Custom Assertions -## Delete Assertions +If you'd like to create and report results for your own custom assertions, e.g. those which are run and +evaluated outside of Acryl, you need to generate 2 important Assertion Entity aspects, and give the assertion a unique +URN of the following format: -You can use delete dataset operations to DataHub using the following APIs. - - +1. Generate a unique URN for your assertion -```graphql -mutation deleteAssertion { - deleteAssertion(urn: "urn:li:assertion:test") -} +```plaintext +urn:li:assertion: ``` -If you see the following response, the operation was successful: +2. Generate the [**AssertionInfo**](/docs/generated/metamodel/entities/assertion.md#assertion-info) aspect for the assertion. You can do this using the Python SDK. Give your assertion a `type` and a `source` +with type `EXTERNAL` to mark it as an external assertion, not run by DataHub itself. -```json -{ - "data": { - "deleteAssertion": true - }, - "extensions": {} -} -``` - - - - +3. Generate the [**AssertionRunEvent**](/docs/generated/metamodel/entities/assertion.md#assertionrunevent-timeseries) timeseries aspect using the Python SDK. This aspect should contain the result of the assertion +run at a given timestamp and will be shown on the results graph in DataHub's UI. -```python -{{ inline /metadata-ingestion/examples/library/delete_assertion.py show_path_as_comment }} -``` - - - diff --git a/docs/api/tutorials/data-contracts.md b/docs/api/tutorials/data-contracts.md new file mode 100644 index 00000000000000..056bff666cfea8 --- /dev/null +++ b/docs/api/tutorials/data-contracts.md @@ -0,0 +1,217 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Data Contracts + + + +This guide specifically covers how to use the Data Contract APIs with **Acryl Cloud**. + +## Why Would You Use Data Contract APIs? + +The Assertions APIs allow you to create, update, and evaluate Data Contracts programmatically. This is particularly +useful to automate the monitoring of data quality and schema compliance for your data. + +### Goal Of This Guide + +This guide will show you how to create, update, and check the status of aData Contract. + +## Prerequisites + +### Privileges Required + +The actor making API calls must have the `Edit Data Contract` privileges for the Tables at hand. + +### Assertions + +Before creating a Data Contract, you should have already created the Assertions that you want to associate with the Data Contract. +Check out the [Assertions](/docs/api/tutorials/assertions.md) guide for details on how to create DataHub Assertions. + +## Create & Update Data Contract + +You can create a new Data Contract, which is simply bundle of "important" assertions, using the following APIs. + + + + +To create or update a Data Contract, simply use the `upsertDataContract` GraphQL Mutation. + +```graphql +mutation upsertDataContract { + upsertDataContract( + input: { + entityUrn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,purchases,PROD)", # Table to Create Contract for + freshness: [ + { + assertionUrn: "urn:li:assertion:your-freshness-assertion-id", + } + ], + schema: [ + { + assertionUrn: "urn:li:assertion:your-schema-assertion-id", + } + ], + dataQuality: [ + { + assertionUrn: "urn:li:assertion:your-column-assertion-id-1", + }, + { + assertionUrn: "urn:li:assertion:your-column-assertion-id-2", + } + ] + }) { + urn + } + ) +} +``` + +This API will return a unique identifier (URN) for the Data Contract if you were successful: + +```json +{ + "data": { + "upsertDataContract": { + "urn": "urn:li:dataContract:your-new-contract-id + } + }, + "extensions": {} +} +``` + +If you want to update an existing Data Contract, you can use the same API, but also passing the `urn` parameter in the +`upsertDataContract` mutation. + +```graphql +mutation upsertDataContract { + upsertDataContract( + urn: "urn:li:dataContract:your-existing-contract-id", + input: { + freshness: [ + { + assertionUrn: "urn:li:assertion:your-freshness-assertion-id", + } + ], + schema: [ + { + assertionUrn: "urn:li:assertion:your-schema-assertion-id", + } + ], + dataQuality: [ + { + assertionUrn: "urn:li:assertion:your-column-assertion-id-1", + }, + { + assertionUrn: "urn:li:assertion:your-column-assertion-id-2", + } + ] + }) { + urn + } + ) +} +``` + + + + +## Check Contract Status + +You can use the following APIs to check whether a Data Contract is passing or failing, which is determined +by the last status of the assertions associated with the contract. + + + + + +### Check Contract Status for Table + +```graphql +query getTableContractStatus { + dataset(urn: "urn:li:dataset(urn:li:dataPlatform:snowflake,purchases,PROD") { + contract { + result { + type # Passing or Failing. + assertionResults { # Results of each contract assertion. + assertion { + urn + } + result { + type + nativeResults { + key + value + } + } + } + } + } + } +} +``` + +You can also _force refresh_ all of the Contract Assertions by evaluating them on-demand by providing the `refresh` argument +in your query. + +```graphql +query getTableContractStatus { + dataset(urn: "urn:li:dataset(urn:li:dataPlatform:snowflake,purchases,PROD") { + contract(refresh: true) { + ... same + } + } +} +``` + +This will run any native Acryl assertions comprising the Data Contract. Be careful! This can take a while depending on how many native assertions are part of the contract. + +If you're successful, you'll get the latest status for the Table Contract: + +```json +{ + "data": { + "dataset": { + "contract": { + "result": { + "type": "PASSING", + "assertionResults": [ + { + "assertion": { + "urn": "urn:li:assertion:your-freshness-assertion-id" + }, + "result": { + "type": "SUCCESS", + "nativeResults": [ + { + "key": "Value", + "value": "1382" + } + ] + } + }, + { + "assertion": { + "urn": "urn:li:assertion:your-schema-assertion-id" + }, + "result": { + "type": "SUCCESS", + "nativeResults": [ + { + "key": "Value", + "value": "12323" + } + ] + } + } + ] + } + } + } + }, + "extensions": {} +} +``` + + + + From d3c19eee3c47dd4ee05c0488a5d9553f277731eb Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 16 May 2024 15:09:22 -0700 Subject: [PATCH 05/12] Adding --- docs/api/tutorials/data-contracts.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api/tutorials/data-contracts.md b/docs/api/tutorials/data-contracts.md index 056bff666cfea8..ac19920a5c4b7b 100644 --- a/docs/api/tutorials/data-contracts.md +++ b/docs/api/tutorials/data-contracts.md @@ -72,7 +72,7 @@ This API will return a unique identifier (URN) for the Data Contract if you were { "data": { "upsertDataContract": { - "urn": "urn:li:dataContract:your-new-contract-id + "urn": "urn:li:dataContract:your-new-contract-id" } }, "extensions": {} @@ -191,7 +191,7 @@ If you're successful, you'll get the latest status for the Table Contract: }, { "assertion": { - "urn": "urn:li:assertion:your-schema-assertion-id" + "urn": "urn:li:assertion:your-volume-assertion-id" }, "result": { "type": "SUCCESS", From 0519d3978fd080b905bbb72b15c3af4852fe2cb1 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Fri, 24 May 2024 14:23:37 -0700 Subject: [PATCH 06/12] Adding more docs to clarify how tf to use this thing --- docs/api/tutorials/assertions.md | 106 ++++++++++++++++++ .../library/run_assertions_for_asset.py | 14 +-- 2 files changed, 113 insertions(+), 7 deletions(-) diff --git a/docs/api/tutorials/assertions.md b/docs/api/tutorials/assertions.md index 5fc4c31bd77db5..2f075db689a8b0 100644 --- a/docs/api/tutorials/assertions.md +++ b/docs/api/tutorials/assertions.md @@ -490,6 +490,52 @@ If running the assertions is successful, the results will be returned as follows Where you should see one result object for each assertion. +### Run Group of Assertions for Table + +If you don't always want to run _all_ assertions for a given table, you can also opt to run a subset of the +table's assertions using *Assertion Tags*. First, you'll add tags to your assertions to group and categorize them, +then you'll call the `runAssertionsForAsset` mutation with the `tagUrns` argument to filter for assertions having those tags. + +#### Step 1: Adding Tag to an Assertion + +Currently, you can add tags to an assertion only via the DataHub GraphQL API. You can do this using the following mutation: + +```graphql +mutation addTags { + addTag(input: { + resourceUrn: "urn:li:assertion:your-assertion", + tagUrn: "urn:li:tag:my-important-tag", + }) +} +``` + +#### Step 2: Run All Assertions for a Table with Tags + +Now, you can run all assertions for a table with a specific tag(s) using the `runAssertionsForAsset` mutation with the +`tagUrns` input parameter: + +```graphql +mutation runAssertionsForAsset { + runAssertionsForAsset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,purchase_events,PROD)", tagUrns: ["urn:li:tag:my-important-tag"]) { + passingCount + failingCount + errorCount + results { + urn + result { + type + nativeResults { + key + value + } + } + } + } +} +``` + +**Coming Soon**: Support for adding tags to assertions through the DataHub UI. + @@ -516,6 +562,37 @@ Where you should see one result object for each assertion. +### Experimental: Providing Dynamic Parameters to Assertions + +You can provide **dynamic parameters** to your assertions to customize their behavior. This is particularly useful for +assertions that require dynamic parameters, such as a threshold value that changes based on the time of day. + +Dynamic parameters can be injected into the SQL fragment portion of any Assertion. For example, it can appear +in any part of the SQL statement in a [Custom SQL](/docs/managed-datahub/observe/custom-sql-assertions.md) Assertion, +or it can appear in the **Advanced > Filter** section of a [Column](/docs/managed-datahub/observe/column-assertions.md), +[Volume](/docs/managed-datahub/observe/volume-assertions.md), or [Freshness](/docs/managed-datahub/observe/freshness-assertions.md) Assertion. + +To do so, you'll first need to edit the SQL fragment to include the dynamic parameter. Dynamic parameters appear +as `${parameterName}` in the SQL fragment. + +Next, you'll call the `runAssertion`, `runAssertions`, or `runAssertionsForAsset` mutations with the `parameters` input argument. +This argument is a list of key-value tuples, where the key is the parameter name and the value is the parameter value: + +```graphql +mutation runAssertion { + runAssertion(urn: "urn:li:assertion:your-assertion-id", parameters: [{key: "parameterName", value: "parameterValue"}]) { + type + nativeResults { + key + value + } + } +} +``` + +At runtime, the `${parameterName}` placeholder in the SQL fragment will be replaced with the provided `parameterValue` before the query +is sent to the database for execution. + ## Get Assertion Details You can use the following APIs to @@ -1010,6 +1087,35 @@ Python support coming soon! +## Add Tag to Assertion + +You can add a tags to individual assertions to group and categorize them, for example by its priority or severity. + + + + +```graphql +mutation addTags { + addTag(input: { + resourceUrn: "urn:li:assertion:your-assertion", + tagUrn: "urn:li:tag:my-important-tag", + }) +} +``` + +If you see the following response, the operation was successful: + +```json +{ + "data": { + "addTag": true + }, + "extensions": {} +} +``` + + + ## Delete Assertions diff --git a/metadata-ingestion/examples/library/run_assertions_for_asset.py b/metadata-ingestion/examples/library/run_assertions_for_asset.py index 209ceccee3f588..a96b647147cecc 100644 --- a/metadata-ingestion/examples/library/run_assertions_for_asset.py +++ b/metadata-ingestion/examples/library/run_assertions_for_asset.py @@ -10,17 +10,17 @@ ) ) -assertion_urns = [ - "urn:li:assertion:6e3f9e09-1483-40f9-b9cd-30e5f182694a", - "urn:li:assertion:9e3f9e09-1483-40f9-b9cd-30e5f182694g", -] +dataset_urn = "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_snowflake_table,PROD)" -# Run the assertions -assertion_results = graph.run_assertions(urns=assertion_urns, saveResults=True).get("results") +# Run all native assertions for the dataset +assertion_results = graph.run_assertions_for_asset(urn=dataset_urn, saveResults=True).get("results") assertion_result_1 = assertion_results.get("urn:li:assertion:6e3f9e09-1483-40f9-b9cd-30e5f182694a") assertion_result_2 = assertion_results.get("urn:li:assertion:9e3f9e09-1483-40f9-b9cd-30e5f182694g") - log.info(f"Assertion results: {assertion_results}") log.info(f"Assertion result 1 (SUCCESS / FAILURE / ERROR): {assertion_result_1.get('type')}") log.info(f"Assertion result 2 (SUCCESS / FAILURE / ERROR): {assertion_result_2.get('type')}") + +# Run a subset of native assertions having a specific tag +important_assertion_tag = "urn:li:tag:my-important-assertion-tag" +assertion_results = graph.run_assertions_for_asset(urn=dataset_urn, tagUrns=[important_assertion_tag]).get("results") \ No newline at end of file From c67bfb60fc9b0a5a317c69306c4e4baea3c0f2b9 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Fri, 24 May 2024 14:26:53 -0700 Subject: [PATCH 07/12] Add create tag clarifications: --- docs/api/tutorials/assertions.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/api/tutorials/assertions.md b/docs/api/tutorials/assertions.md index 2f075db689a8b0..28841b3d370d8b 100644 --- a/docs/api/tutorials/assertions.md +++ b/docs/api/tutorials/assertions.md @@ -1089,7 +1089,8 @@ Python support coming soon! ## Add Tag to Assertion -You can add a tags to individual assertions to group and categorize them, for example by its priority or severity. +You can add tags to individual assertions to group and categorize them, for example by its priority or severity. +Note that the tag should already exist in DataHub, or the operation will fail. @@ -1114,6 +1115,8 @@ If you see the following response, the operation was successful: } ``` +You can create new tags using the `createTag` mutation or via the UI. + From 27de9b48310bb628a2115192c593015dee3778db Mon Sep 17 00:00:00 2001 From: John Joyce Date: Fri, 24 May 2024 14:28:06 -0700 Subject: [PATCH 08/12] better stuff --- docs/api/tutorials/assertions.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/api/tutorials/assertions.md b/docs/api/tutorials/assertions.md index 28841b3d370d8b..b016cd1daf6054 100644 --- a/docs/api/tutorials/assertions.md +++ b/docs/api/tutorials/assertions.md @@ -66,7 +66,7 @@ This API will return a unique identifier (URN) for the new assertion if you were { "data": { "upsertDatasetFreshnessAssertionMonitor": { - "urn": "urn:li:assertion:your-new-assertion-id + "urn": "urn:li:assertion:your-new-assertion-id" } }, "extensions": {} @@ -119,7 +119,7 @@ This API will return a unique identifier (URN) for the new assertion if you were { "data": { "upsertDatasetVolumeAssertionMonitor": { - "urn": "urn:li:assertion:your-new-assertion-id + "urn": "urn:li:assertion:your-new-assertion-id" } }, "extensions": {} @@ -178,7 +178,7 @@ This API will return a unique identifier (URN) for the new assertion if you were { "data": { "upsertDatasetFieldAssertionMonitor": { - "urn": "urn:li:assertion:your-new-assertion-id + "urn": "urn:li:assertion:your-new-assertion-id" } }, "extensions": {} @@ -225,7 +225,7 @@ This API will return a unique identifier (URN) for the new assertion if you were { "data": { "upsertDatasetSqlAssertionMonitor": { - "urn": "urn:li:assertion:your-new-assertion-id + "urn": "urn:li:assertion:your-new-assertion-id" } }, "extensions": {} From d133d428b0a49edab5b75ccdd2c752ef471cd450 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Wed, 29 May 2024 16:30:01 -0700 Subject: [PATCH 09/12] Add note about async assertions --- docs/api/tutorials/assertions.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/api/tutorials/assertions.md b/docs/api/tutorials/assertions.md index b016cd1daf6054..08832ee19ff89d 100644 --- a/docs/api/tutorials/assertions.md +++ b/docs/api/tutorials/assertions.md @@ -297,6 +297,11 @@ You can use the following APIs to trigger the assertions you've created to run o particularly useful for running assertions on a custom schedule, for example from your production data pipelines. +> **Long-Running Assertions**: The timeout for synchronously running an assertion is currently limited to a maximum of 30 seconds. +> Each of the following APIs support an `async` parameter, which can be set to `true` to run the assertion asynchronously. +> When set to `true`, the API will kick off the assertion run and return null immediately. To view the result of the assertion, +> simply fetching the runEvents field of the `assertion(urn: String!)` GraphQL query. + From 3148fb3c4479278cdc6fe26f4290ced117da8454 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Wed, 5 Jun 2024 11:46:21 -0700 Subject: [PATCH 10/12] Update sidebars.js to include data contracts --- docs-website/sidebars.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 04cac3712915bf..2eb600eff74e81 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -804,7 +804,7 @@ module.exports = { "docs/api/tutorials/assertions", "docs/api/tutorials/incidents", "docs/api/tutorials/operations", - "docs/incidents/data-contracts", + "docs/api/tutorials/data-contracts", "docs/api/tutorials/domains", "docs/api/tutorials/forms", "docs/api/tutorials/lineage", From 77b5326001cb7d13becd5d02ace681bd5b95958f Mon Sep 17 00:00:00 2001 From: John Joyce Date: Wed, 5 Jun 2024 11:47:23 -0700 Subject: [PATCH 11/12] Update __init__.py to reflect latest --- .../airflow-plugin/src/datahub_airflow_plugin/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py index 6ec30c4d9fe5bd..e4040e3a17dfdc 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py @@ -1,6 +1,6 @@ # Published at https://pypi.org/project/acryl-datahub/. __package_name__ = "acryl-datahub-airflow-plugin" -__version__ = "0.0.0.dev1" +__version__ = "1!0.0.0.dev0" def is_dev_mode() -> bool: From b746cc3af2c7c62719dd1a7e12136416d686be42 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 6 Jun 2024 11:13:47 -0700 Subject: [PATCH 12/12] Fix python linting --- .../library/dataset_report_operation.py | 6 ++--- .../examples/library/run_assertion.py | 6 +++-- .../examples/library/run_assertions.py | 23 +++++++++++----- .../library/run_assertions_for_asset.py | 26 ++++++++++++++----- 4 files changed, 42 insertions(+), 19 deletions(-) diff --git a/metadata-ingestion/examples/library/dataset_report_operation.py b/metadata-ingestion/examples/library/dataset_report_operation.py index a27019ec47ab1f..15ebc43dba60a1 100644 --- a/metadata-ingestion/examples/library/dataset_report_operation.py +++ b/metadata-ingestion/examples/library/dataset_report_operation.py @@ -11,11 +11,9 @@ ) operation_type = "INSERT" -source_type = "DATA_PROCESS" # Source of the operation (data platform or DAG task) +source_type = "DATA_PROCESS" # Source of the operation (data platform or DAG task) # Report a change operation for the Dataset. operation_client.report_operation( - urn=dataset_urn, - operation_type=operation_type, - source_type=source_type + urn=dataset_urn, operation_type=operation_type, source_type=source_type ) diff --git a/metadata-ingestion/examples/library/run_assertion.py b/metadata-ingestion/examples/library/run_assertion.py index 4e556fe8184ce9..414e5f46cc7f91 100644 --- a/metadata-ingestion/examples/library/run_assertion.py +++ b/metadata-ingestion/examples/library/run_assertion.py @@ -13,6 +13,8 @@ assertion_urn = "urn:li:assertion:6e3f9e09-1483-40f9-b9cd-30e5f182694a" # Run the assertion -assertion_result = graph.run_assertion(urn=assertion_urn, saveResult=True) +assertion_result = graph.run_assertion(urn=assertion_urn, save_result=True) -log.info(f"Assertion result (SUCCESS / FAILURE / ERROR): {assertion_result.get("type")}") +log.info( + f'Assertion result (SUCCESS / FAILURE / ERROR): {assertion_result.get("type")}' +) diff --git a/metadata-ingestion/examples/library/run_assertions.py b/metadata-ingestion/examples/library/run_assertions.py index 209ceccee3f588..6d38d9b5edecd9 100644 --- a/metadata-ingestion/examples/library/run_assertions.py +++ b/metadata-ingestion/examples/library/run_assertions.py @@ -16,11 +16,22 @@ ] # Run the assertions -assertion_results = graph.run_assertions(urns=assertion_urns, saveResults=True).get("results") -assertion_result_1 = assertion_results.get("urn:li:assertion:6e3f9e09-1483-40f9-b9cd-30e5f182694a") -assertion_result_2 = assertion_results.get("urn:li:assertion:9e3f9e09-1483-40f9-b9cd-30e5f182694g") +assertion_results = graph.run_assertions(urns=assertion_urns, save_result=True).get( + "results" +) +if assertion_results is not None: + assertion_result_1 = assertion_results.get( + "urn:li:assertion:6e3f9e09-1483-40f9-b9cd-30e5f182694a" + ) + assertion_result_2 = assertion_results.get( + "urn:li:assertion:9e3f9e09-1483-40f9-b9cd-30e5f182694g" + ) -log.info(f"Assertion results: {assertion_results}") -log.info(f"Assertion result 1 (SUCCESS / FAILURE / ERROR): {assertion_result_1.get('type')}") -log.info(f"Assertion result 2 (SUCCESS / FAILURE / ERROR): {assertion_result_2.get('type')}") + log.info(f"Assertion results: {assertion_results}") + log.info( + f"Assertion result 1 (SUCCESS / FAILURE / ERROR): {assertion_result_1.get('type')}" + ) + log.info( + f"Assertion result 2 (SUCCESS / FAILURE / ERROR): {assertion_result_2.get('type')}" + ) diff --git a/metadata-ingestion/examples/library/run_assertions_for_asset.py b/metadata-ingestion/examples/library/run_assertions_for_asset.py index a96b647147cecc..ab2793c3b5b8a6 100644 --- a/metadata-ingestion/examples/library/run_assertions_for_asset.py +++ b/metadata-ingestion/examples/library/run_assertions_for_asset.py @@ -13,14 +13,26 @@ dataset_urn = "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_snowflake_table,PROD)" # Run all native assertions for the dataset -assertion_results = graph.run_assertions_for_asset(urn=dataset_urn, saveResults=True).get("results") -assertion_result_1 = assertion_results.get("urn:li:assertion:6e3f9e09-1483-40f9-b9cd-30e5f182694a") -assertion_result_2 = assertion_results.get("urn:li:assertion:9e3f9e09-1483-40f9-b9cd-30e5f182694g") +assertion_results = graph.run_assertions_for_asset(urn=dataset_urn).get("results") -log.info(f"Assertion results: {assertion_results}") -log.info(f"Assertion result 1 (SUCCESS / FAILURE / ERROR): {assertion_result_1.get('type')}") -log.info(f"Assertion result 2 (SUCCESS / FAILURE / ERROR): {assertion_result_2.get('type')}") +if assertion_results is not None: + assertion_result_1 = assertion_results.get( + "urn:li:assertion:6e3f9e09-1483-40f9-b9cd-30e5f182694a" + ) + assertion_result_2 = assertion_results.get( + "urn:li:assertion:9e3f9e09-1483-40f9-b9cd-30e5f182694g" + ) + + log.info(f"Assertion results: {assertion_results}") + log.info( + f"Assertion result 1 (SUCCESS / FAILURE / ERROR): {assertion_result_1.get('type')}" + ) + log.info( + f"Assertion result 2 (SUCCESS / FAILURE / ERROR): {assertion_result_2.get('type')}" + ) # Run a subset of native assertions having a specific tag important_assertion_tag = "urn:li:tag:my-important-assertion-tag" -assertion_results = graph.run_assertions_for_asset(urn=dataset_urn, tagUrns=[important_assertion_tag]).get("results") \ No newline at end of file +assertion_results = graph.run_assertions_for_asset( + urn=dataset_urn, tag_urns=[important_assertion_tag] +).get("results")