From 905f34b2b38dc1928626531479a4e8b5c50d944a Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 6 Jun 2024 14:05:44 -0700 Subject: [PATCH] docs(): Adding API docs for incidents, operations, and assertions (#10522) Co-authored-by: John Joyce Co-authored-by: John Joyce Co-authored-by: John Joyce Co-authored-by: John Joyce Co-authored-by: John Joyce --- docs-website/sidebars.js | 7 +- docs/api/tutorials/assertions.md | 1181 +++++++++++++++++ docs/api/tutorials/data-contracts.md | 217 +++ docs/api/tutorials/incidents.md | 164 +++ docs/api/tutorials/operations.md | 136 ++ .../library/dataset_read_operations.py | 19 + .../library/dataset_report_operation.py | 19 + .../examples/library/delete_assertion.py | 18 + .../examples/library/run_assertion.py | 20 + .../examples/library/run_assertions.py | 37 + .../library/run_assertions_for_asset.py | 38 + 11 files changed, 1855 insertions(+), 1 deletion(-) create mode 100644 docs/api/tutorials/assertions.md create mode 100644 docs/api/tutorials/data-contracts.md create mode 100644 docs/api/tutorials/incidents.md create mode 100644 docs/api/tutorials/operations.md create mode 100644 metadata-ingestion/examples/library/dataset_read_operations.py create mode 100644 metadata-ingestion/examples/library/dataset_report_operation.py create mode 100644 metadata-ingestion/examples/library/delete_assertion.py create mode 100644 metadata-ingestion/examples/library/run_assertion.py create mode 100644 metadata-ingestion/examples/library/run_assertions.py create mode 100644 metadata-ingestion/examples/library/run_assertions_for_asset.py diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 1f2208bc179782..2eb600eff74e81 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -754,7 +754,7 @@ module.exports = { }, { type: "category", - label: "Datahub Actions", + label: "DataHub Actions", link: { type: "doc", id: "docs/act-on-metadata" }, items: [ "docs/actions/README", @@ -800,6 +800,11 @@ module.exports = { "docs/api/tutorials/datasets", "docs/api/tutorials/deprecation", "docs/api/tutorials/descriptions", + "docs/api/tutorials/custom-properties", + "docs/api/tutorials/assertions", + "docs/api/tutorials/incidents", + "docs/api/tutorials/operations", + "docs/api/tutorials/data-contracts", "docs/api/tutorials/domains", "docs/api/tutorials/forms", "docs/api/tutorials/lineage", diff --git a/docs/api/tutorials/assertions.md b/docs/api/tutorials/assertions.md new file mode 100644 index 00000000000000..08832ee19ff89d --- /dev/null +++ b/docs/api/tutorials/assertions.md @@ -0,0 +1,1181 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Assertions + + + +This guide specifically covers how to use the Assertion APIs for **Acryl Cloud** native assertions, including: + +- [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions.md) +- [Volume Assertions](/docs/managed-datahub/observe/volume-assertions.md) +- [Column Assertions](/docs/managed-datahub/observe/column-assertions.md) +- [Schema Assertions](/docs/managed-datahub/observe/schema-assertions.md) +- [Custom SQL Assertions](/docs/managed-datahub/observe/custom-sql-assertions.md) + +## Why Would You Use Assertions APIs? + +The Assertions APIs allow you to create, schedule, run, and delete Assertions with Acryl Cloud. + +### Goal Of This Guide + +This guide will show you how to create, schedule, run and delete Assertions for a Table. + +## Prerequisites + +The actor making API calls must have the `Edit Assertions` and `Edit Monitors` privileges for the Tables at hand. + +## Create Assertions + +You can create new dataset Assertions to DataHub using the following APIs. + + + + +### Freshness Assertion + +To create a new freshness assertion, use the `upsertDatasetFreshnessAssertionMonitor` GraphQL Mutation. + +```graphql +mutation upsertDatasetFreshnessAssertionMonitor { + upsertDatasetFreshnessAssertionMonitor( + input: { + entityUrn: "", + schedule: { + type: FIXED_INTERVAL, + fixedInterval: { unit: HOUR, multiple: 8 } + } + evaluationSchedule: { + timezone: "America/Los_Angeles", + cron: "0 */8 * * *" + } + evaluationParameters: { + sourceType: INFORMATION_SCHEMA + } + mode: ACTIVE + } + ) { + urn + } +} +``` + +This API will return a unique identifier (URN) for the new assertion if you were successful: + +```json +{ + "data": { + "upsertDatasetFreshnessAssertionMonitor": { + "urn": "urn:li:assertion:your-new-assertion-id" + } + }, + "extensions": {} +} +``` + +For more details, see the [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions.md) guide. + +### Volume Assertions + +To create a new volume assertion, use the `upsertDatasetVolumeAssertionMonitor` GraphQL Mutation. + +```graphql +mutation upsertDatasetVolumeAssertionMonitor { + upsertDatasetVolumeAssertionMonitor( + input: { + entityUrn: "" + type: ROW_COUNT_TOTAL + rowCountTotal: { + operator: BETWEEN + parameters: { + minValue: { + value: "10" + type: NUMBER + } + maxValue: { + value: "20" + type: NUMBER + } + } + } + evaluationSchedule: { + timezone: "America/Los_Angeles" + cron: "0 */8 * * *" + } + evaluationParameters: { + sourceType: INFORMATION_SCHEMA + } + mode: ACTIVE + } + ) { + urn + } +} +``` + +This API will return a unique identifier (URN) for the new assertion if you were successful: + +```json +{ + "data": { + "upsertDatasetVolumeAssertionMonitor": { + "urn": "urn:li:assertion:your-new-assertion-id" + } + }, + "extensions": {} +} +``` + +For more details, see the [Volume Assertions](/docs/managed-datahub/observe/volume-assertions.md) guide. + +### Column Assertions + +To create a new column assertion, use the `upsertDatasetFieldAssertionMonitor` GraphQL Mutation. + +```graphql +mutation upsertDatasetFieldAssertionMonitor { + upsertDatasetFieldAssertionMonitor( + input: { + entityUrn: "" + type: FIELD_VALUES, + fieldValuesAssertion: { + field: { + path: "", + type: "NUMBER", + nativeType: "NUMBER(38,0)" + }, + operator: GREATER_THAN, + parameters: { + value: { + type: NUMBER, + value: "10" + } + }, + failThreshold: { + type: COUNT, + value: 0 + }, + excludeNulls: true + } + evaluationSchedule: { + timezone: "America/Los_Angeles" + cron: "0 */8 * * *" + } + evaluationParameters: { + sourceType: ALL_ROWS_QUERY + } + mode: ACTIVE + } + ){ + urn + } +} +``` + +This API will return a unique identifier (URN) for the new assertion if you were successful: + +```json +{ + "data": { + "upsertDatasetFieldAssertionMonitor": { + "urn": "urn:li:assertion:your-new-assertion-id" + } + }, + "extensions": {} +} +``` + +For more details, see the [Column Assertions](/docs/managed-datahub/observe/column-assertions.md) guide. + +### Custom SQL Assertions + +To create a new column assertion, use the `upsertDatasetSqlAssertionMonitor` GraphQL Mutation. + +```graphql +mutation upsertDatasetSqlAssertionMonitor { + upsertDatasetSqlAssertionMonitor( + assertionUrn: "" + input: { + entityUrn: "" + type: METRIC, + description: "", + statement: "", + operator: GREATER_THAN_OR_EQUAL_TO, + parameters: { + value: { + value: "100", + type: NUMBER + } + } + evaluationSchedule: { + timezone: "America/Los_Angeles" + cron: "0 */6 * * *" + } + mode: ACTIVE + } + ) { + urn + } +} +``` + +This API will return a unique identifier (URN) for the new assertion if you were successful: + +```json +{ + "data": { + "upsertDatasetSqlAssertionMonitor": { + "urn": "urn:li:assertion:your-new-assertion-id" + } + }, + "extensions": {} +} +``` + +For more details, see the [Custom SQL Assertions](/docs/managed-datahub/observe/custom-sql-assertions.md) guide. + +### Schema Assertions + +To create a new schema assertion, use the `upsertDatasetSchemaAssertionMonitor` GraphQL Mutation. + +```graphql +mutation upsertDatasetSchemaAssertionMonitor { + upsertDatasetSchemaAssertionMonitor( + assertionUrn: "urn:li:assertion:existing-assertion-id", + input: { + entityUrn: "", + assertion: { + compatibility: EXACT_MATCH, + fields: [ + { + path: "id", + type: STRING + }, + { + path: "count", + type: NUMBER + }, + { + path: "struct", + type: STRUCT + }, + { + path: "struct.nestedBooleanField", + type: BOOLEAN + } + ] + }, + description: "", + mode: ACTIVE + } + ) +} +``` + +This API will return a unique identifier (URN) for the new assertion if you were successful: + +```json +{ + "data": { + "upsertDatasetSchemaAssertionMonitor": { + "urn": "urn:li:assertion:your-new-assertion-id" + } + }, + "extensions": {} +} +``` + +For more details, see the [Schema Assertions](/docs/managed-datahub/observe/schema-assertions.md) guide. + + + + + +## Run Assertions + +You can use the following APIs to trigger the assertions you've created to run on-demand. This is +particularly useful for running assertions on a custom schedule, for example from your production +data pipelines. + +> **Long-Running Assertions**: The timeout for synchronously running an assertion is currently limited to a maximum of 30 seconds. +> Each of the following APIs support an `async` parameter, which can be set to `true` to run the assertion asynchronously. +> When set to `true`, the API will kick off the assertion run and return null immediately. To view the result of the assertion, +> simply fetching the runEvents field of the `assertion(urn: String!)` GraphQL query. + + + + +### Run Assertion + +```graphql +mutation runAssertion { + runAssertion(urn: "urn:li:assertion:your-assertion-id", saveResult: true) { + type + nativeResults { + key + value + } + } +} +``` + +Where **type** will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`. + +The `saveResult` argument determines whether the result of the assertion will be saved to DataHub's backend, +and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's +backend. The value defaults to `true`. + +If the assertion is external (not natively executed by Acryl), this API will return an error. + +If running the assertion is successful, the result will be returned as follows: + +```json +{ + "data": { + "runAssertion": { + "type": "SUCCESS", + "nativeResults": [ + { + "key": "Value", + "value": "1382" + } + ] + } + }, + "extensions": {} +} +``` + +### Run Group of Assertions + +```graphql +mutation runAssertions { + runAssertions(urns: ["urn:li:assertion:your-assertion-id-1", "urn:li:assertion:your-assertion-id-2"], saveResults: true) { + passingCount + failingCount + errorCount + results { + urn + result { + type + nativeResults { + key + value + } + } + } + } +} +``` + +Where **type** will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`. + +The `saveResults` argument determines whether the result of the assertion will be saved to DataHub's backend, +and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's +backend. The value defaults to `true`. + +If any of the assertion are external (not natively executed by Acryl), they will simply be omitted from the result set. + +If running the assertions is successful, the results will be returned as follows: + +```json +{ + "data": { + "runAssertions": { + "passingCount": 2, + "failingCount": 0, + "errorCount": 0, + "results": [ + { + "urn": "urn:li:assertion:your-assertion-id-1", + "result": { + "type": "SUCCESS", + "nativeResults": [ + { + "key": "Value", + "value": "1382" + } + ] + } + }, + { + "urn": "urn:li:assertion:your-assertion-id-2", + "result": { + "type": "FAILURE", + "nativeResults": [ + { + "key": "Value", + "value": "12323" + } + ] + } + } + ] + } + }, + "extensions": {} +} +``` + +Where you should see one result object for each assertion. + +### Run All Assertions for Table + +You can also run all assertions for a specific data asset using the `runAssertionsForAsset` mutation. + +```graphql +mutation runAssertionsForAsset { + runAssertionsForAsset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,purchase_events,PROD)", saveResults: true) { + passingCount + failingCount + errorCount + results { + urn + result { + type + nativeResults { + key + value + } + } + } + } +} +``` + +Where `type` will contain the Result of the assertion run, either `SUCCESS`, `FAILURE`, or `ERROR`. + +The `saveResults` argument determines whether the result of the assertion will be saved to DataHub's backend, +and available to view through the DataHub UI. If this is set to false, the result will NOT be stored in DataHub's +backend. The value defaults to `true`. + +If any of the assertion are external (not natively executed by Acryl), they will simply be omitted from the result +set. + +If running the assertions is successful, the results will be returned as follows: + +```json +{ + "data": { + "runAssertionsForAsset": { + "passingCount": 2, + "failingCount": 0, + "errorCount": 0, + "results": [ + { + "urn": "urn:li:assertion:your-assertion-id-1", + "result": { + "type": "SUCCESS", + "nativeResults": [ + { + "key": "Value", + "value": "1382" + } + ] + } + }, + { + "urn": "urn:li:assertion:your-assertion-id-2", + "result": { + "type": "FAILURE", + "nativeResults": [ + { + "key": "Value", + "value": "12323" + } + ] + } + } + ] + } + }, + "extensions": {} +} +``` + +Where you should see one result object for each assertion. + +### Run Group of Assertions for Table + +If you don't always want to run _all_ assertions for a given table, you can also opt to run a subset of the +table's assertions using *Assertion Tags*. First, you'll add tags to your assertions to group and categorize them, +then you'll call the `runAssertionsForAsset` mutation with the `tagUrns` argument to filter for assertions having those tags. + +#### Step 1: Adding Tag to an Assertion + +Currently, you can add tags to an assertion only via the DataHub GraphQL API. You can do this using the following mutation: + +```graphql +mutation addTags { + addTag(input: { + resourceUrn: "urn:li:assertion:your-assertion", + tagUrn: "urn:li:tag:my-important-tag", + }) +} +``` + +#### Step 2: Run All Assertions for a Table with Tags + +Now, you can run all assertions for a table with a specific tag(s) using the `runAssertionsForAsset` mutation with the +`tagUrns` input parameter: + +```graphql +mutation runAssertionsForAsset { + runAssertionsForAsset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,purchase_events,PROD)", tagUrns: ["urn:li:tag:my-important-tag"]) { + passingCount + failingCount + errorCount + results { + urn + result { + type + nativeResults { + key + value + } + } + } + } +} +``` + +**Coming Soon**: Support for adding tags to assertions through the DataHub UI. + + + + + +### Run Assertion + +```python +{{ inline /metadata-ingestion/examples/library/run_assertion.py show_path_as_comment }} +``` + +### Run Group of Assertions + +```python +{{ inline /metadata-ingestion/examples/library/run_assertions.py show_path_as_comment }} +``` + +### Run All Assertions for Table + +```python +{{ inline /metadata-ingestion/examples/library/run_assertions_for_asset.py show_path_as_comment }} +``` + + + + + +### Experimental: Providing Dynamic Parameters to Assertions + +You can provide **dynamic parameters** to your assertions to customize their behavior. This is particularly useful for +assertions that require dynamic parameters, such as a threshold value that changes based on the time of day. + +Dynamic parameters can be injected into the SQL fragment portion of any Assertion. For example, it can appear +in any part of the SQL statement in a [Custom SQL](/docs/managed-datahub/observe/custom-sql-assertions.md) Assertion, +or it can appear in the **Advanced > Filter** section of a [Column](/docs/managed-datahub/observe/column-assertions.md), +[Volume](/docs/managed-datahub/observe/volume-assertions.md), or [Freshness](/docs/managed-datahub/observe/freshness-assertions.md) Assertion. + +To do so, you'll first need to edit the SQL fragment to include the dynamic parameter. Dynamic parameters appear +as `${parameterName}` in the SQL fragment. + +Next, you'll call the `runAssertion`, `runAssertions`, or `runAssertionsForAsset` mutations with the `parameters` input argument. +This argument is a list of key-value tuples, where the key is the parameter name and the value is the parameter value: + +```graphql +mutation runAssertion { + runAssertion(urn: "urn:li:assertion:your-assertion-id", parameters: [{key: "parameterName", value: "parameterValue"}]) { + type + nativeResults { + key + value + } + } +} +``` + +At runtime, the `${parameterName}` placeholder in the SQL fragment will be replaced with the provided `parameterValue` before the query +is sent to the database for execution. + +## Get Assertion Details + +You can use the following APIs to + +1. Fetch existing assertion definitions + run history +2. Fetch the assertions associated with a given table + their run history. + + + + +### Get Assertions for Table + +To retrieve all the assertions for a table, you can use the following GraphQL Query. + +```graphql +query dataset { + dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,purchases,PROD)") { + assertions(start: 0, count: 1000) { + start + count + total + assertions { + # Fetch the last run of each associated assertion. + runEvents(status: COMPLETE, limit: 1) { + total + failed + succeeded + runEvents { + timestampMillis + status + result { + type + nativeResults { + key + value + } + } + } + } + info { + type + description + lastUpdated { + time + actor + } + datasetAssertion { + datasetUrn + scope + aggregation + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + fields { + urn + path + } + nativeType + nativeParameters { + key + value + } + logic + } + freshnessAssertion { + type + entityUrn + schedule { + type + cron { + cron + timezone + } + fixedInterval { + unit + multiple + } + } + filter { + type + sql + } + } + sqlAssertion { + type + entityUrn + statement + changeType + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + fieldAssertion { + type + entityUrn + filter { + type + sql + } + fieldValuesAssertion { + field { + path + type + nativeType + } + transform { + type + } + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + failThreshold { + type + value + } + excludeNulls + } + fieldMetricAssertion { + field { + path + type + nativeType + } + metric + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + } + volumeAssertion { + type + entityUrn + filter { + type + sql + } + rowCountTotal { + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + rowCountChange { + type + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + } + schemaAssertion { + entityUrn + compatibility + fields { + path + type + nativeType + } + schema { + fields { + fieldPath + type + nativeDataType + } + } + } + source { + type + created { + time + actor + } + } + } + } + } + } +} +``` + +### Get Assertion Details + +You can use the following GraphQL query to fetch the details for an assertion along with its evaluation history by URN. + +```graphql +query getAssertion { + assertion(urn: "urn:li:assertion:assertion-id") { + # Fetch the last 10 runs for the assertion. + runEvents(status: COMPLETE, limit: 10) { + total + failed + succeeded + runEvents { + timestampMillis + status + result { + type + nativeResults { + key + value + } + } + } + } + info { + type + description + lastUpdated { + time + actor + } + datasetAssertion { + datasetUrn + scope + aggregation + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + fields { + urn + path + } + nativeType + nativeParameters { + key + value + } + logic + } + freshnessAssertion { + type + entityUrn + schedule { + type + cron { + cron + timezone + } + fixedInterval { + unit + multiple + } + } + filter { + type + sql + } + } + sqlAssertion { + type + entityUrn + statement + changeType + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + fieldAssertion { + type + entityUrn + filter { + type + sql + } + fieldValuesAssertion { + field { + path + type + nativeType + } + transform { + type + } + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + failThreshold { + type + value + } + excludeNulls + } + fieldMetricAssertion { + field { + path + type + nativeType + } + metric + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + } + volumeAssertion { + type + entityUrn + filter { + type + sql + } + rowCountTotal { + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + rowCountChange { + type + operator + parameters { + value { + value + type + } + minValue { + value + type + } + maxValue { + value + type + } + } + } + } + schemaAssertion { + entityUrn + compatibility + fields { + path + type + nativeType + } + schema { + fields { + fieldPath + type + nativeDataType + } + } + } + source { + type + created { + time + actor + } + } + } + } +} +``` + + + + + +```python +Python support coming soon! +``` + + + + +## Add Tag to Assertion + +You can add tags to individual assertions to group and categorize them, for example by its priority or severity. +Note that the tag should already exist in DataHub, or the operation will fail. + + + + +```graphql +mutation addTags { + addTag(input: { + resourceUrn: "urn:li:assertion:your-assertion", + tagUrn: "urn:li:tag:my-important-tag", + }) +} +``` + +If you see the following response, the operation was successful: + +```json +{ + "data": { + "addTag": true + }, + "extensions": {} +} +``` + +You can create new tags using the `createTag` mutation or via the UI. + + + + +## Delete Assertions + +You can use delete dataset operations to DataHub using the following APIs. + + + + +```graphql +mutation deleteAssertion { + deleteAssertion(urn: "urn:li:assertion:test") +} +``` + +If you see the following response, the operation was successful: + +```json +{ + "data": { + "deleteAssertion": true + }, + "extensions": {} +} +``` + + + + + +```python +{{ inline /metadata-ingestion/examples/library/delete_assertion.py show_path_as_comment }} +``` + + + + +## (Advanced) Create and Report Results for Custom Assertions + +If you'd like to create and report results for your own custom assertions, e.g. those which are run and +evaluated outside of Acryl, you need to generate 2 important Assertion Entity aspects, and give the assertion a unique +URN of the following format: + + +1. Generate a unique URN for your assertion + +```plaintext +urn:li:assertion: +``` + +2. Generate the [**AssertionInfo**](/docs/generated/metamodel/entities/assertion.md#assertion-info) aspect for the assertion. You can do this using the Python SDK. Give your assertion a `type` and a `source` +with type `EXTERNAL` to mark it as an external assertion, not run by DataHub itself. + +3. Generate the [**AssertionRunEvent**](/docs/generated/metamodel/entities/assertion.md#assertionrunevent-timeseries) timeseries aspect using the Python SDK. This aspect should contain the result of the assertion +run at a given timestamp and will be shown on the results graph in DataHub's UI. + diff --git a/docs/api/tutorials/data-contracts.md b/docs/api/tutorials/data-contracts.md new file mode 100644 index 00000000000000..ac19920a5c4b7b --- /dev/null +++ b/docs/api/tutorials/data-contracts.md @@ -0,0 +1,217 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Data Contracts + + + +This guide specifically covers how to use the Data Contract APIs with **Acryl Cloud**. + +## Why Would You Use Data Contract APIs? + +The Assertions APIs allow you to create, update, and evaluate Data Contracts programmatically. This is particularly +useful to automate the monitoring of data quality and schema compliance for your data. + +### Goal Of This Guide + +This guide will show you how to create, update, and check the status of aData Contract. + +## Prerequisites + +### Privileges Required + +The actor making API calls must have the `Edit Data Contract` privileges for the Tables at hand. + +### Assertions + +Before creating a Data Contract, you should have already created the Assertions that you want to associate with the Data Contract. +Check out the [Assertions](/docs/api/tutorials/assertions.md) guide for details on how to create DataHub Assertions. + +## Create & Update Data Contract + +You can create a new Data Contract, which is simply bundle of "important" assertions, using the following APIs. + + + + +To create or update a Data Contract, simply use the `upsertDataContract` GraphQL Mutation. + +```graphql +mutation upsertDataContract { + upsertDataContract( + input: { + entityUrn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,purchases,PROD)", # Table to Create Contract for + freshness: [ + { + assertionUrn: "urn:li:assertion:your-freshness-assertion-id", + } + ], + schema: [ + { + assertionUrn: "urn:li:assertion:your-schema-assertion-id", + } + ], + dataQuality: [ + { + assertionUrn: "urn:li:assertion:your-column-assertion-id-1", + }, + { + assertionUrn: "urn:li:assertion:your-column-assertion-id-2", + } + ] + }) { + urn + } + ) +} +``` + +This API will return a unique identifier (URN) for the Data Contract if you were successful: + +```json +{ + "data": { + "upsertDataContract": { + "urn": "urn:li:dataContract:your-new-contract-id" + } + }, + "extensions": {} +} +``` + +If you want to update an existing Data Contract, you can use the same API, but also passing the `urn` parameter in the +`upsertDataContract` mutation. + +```graphql +mutation upsertDataContract { + upsertDataContract( + urn: "urn:li:dataContract:your-existing-contract-id", + input: { + freshness: [ + { + assertionUrn: "urn:li:assertion:your-freshness-assertion-id", + } + ], + schema: [ + { + assertionUrn: "urn:li:assertion:your-schema-assertion-id", + } + ], + dataQuality: [ + { + assertionUrn: "urn:li:assertion:your-column-assertion-id-1", + }, + { + assertionUrn: "urn:li:assertion:your-column-assertion-id-2", + } + ] + }) { + urn + } + ) +} +``` + + + + +## Check Contract Status + +You can use the following APIs to check whether a Data Contract is passing or failing, which is determined +by the last status of the assertions associated with the contract. + + + + + +### Check Contract Status for Table + +```graphql +query getTableContractStatus { + dataset(urn: "urn:li:dataset(urn:li:dataPlatform:snowflake,purchases,PROD") { + contract { + result { + type # Passing or Failing. + assertionResults { # Results of each contract assertion. + assertion { + urn + } + result { + type + nativeResults { + key + value + } + } + } + } + } + } +} +``` + +You can also _force refresh_ all of the Contract Assertions by evaluating them on-demand by providing the `refresh` argument +in your query. + +```graphql +query getTableContractStatus { + dataset(urn: "urn:li:dataset(urn:li:dataPlatform:snowflake,purchases,PROD") { + contract(refresh: true) { + ... same + } + } +} +``` + +This will run any native Acryl assertions comprising the Data Contract. Be careful! This can take a while depending on how many native assertions are part of the contract. + +If you're successful, you'll get the latest status for the Table Contract: + +```json +{ + "data": { + "dataset": { + "contract": { + "result": { + "type": "PASSING", + "assertionResults": [ + { + "assertion": { + "urn": "urn:li:assertion:your-freshness-assertion-id" + }, + "result": { + "type": "SUCCESS", + "nativeResults": [ + { + "key": "Value", + "value": "1382" + } + ] + } + }, + { + "assertion": { + "urn": "urn:li:assertion:your-volume-assertion-id" + }, + "result": { + "type": "SUCCESS", + "nativeResults": [ + { + "key": "Value", + "value": "12323" + } + ] + } + } + ] + } + } + } + }, + "extensions": {} +} +``` + + + + diff --git a/docs/api/tutorials/incidents.md b/docs/api/tutorials/incidents.md new file mode 100644 index 00000000000000..20a24d58a1db42 --- /dev/null +++ b/docs/api/tutorials/incidents.md @@ -0,0 +1,164 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Incidents + +## Why Would You Use Incidents APIs? + +The Incidents APIs allow you to raise, retrieve, update and resolve data incidents via API. This is +useful for raising or resolving data incidents programmatically, for example from Airflow, Prefect, or Dagster DAGs. +Incidents are also useful for conditional Circuit Breaking in these pipelines. + +### Goal Of This Guide + +This guide will show you how to raise, retrieve, update and resolve data incidents via API. + +## Prerequisites + +The actor making API calls must have the `Edit Incidents` privileges for the Tables at hand. + +## Raise Incident + +You can raise a new Data Incident for an existing asset using the following APIs. + + + + +```graphql +mutation raiseIncident { + raiseIncident( + input: { + resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,public.prod.purchases,PROD)", + type: OPERATIONAL, + title: "Data is Delayed", + description: "Data is delayed on May 15, 2024 because of downtime in the Spark Cluster.", + } + ) +} +``` + +Where `resourceUrn` is the unique identifier for the data asset (dataset, dashboard, chart, data job, or data flow) you want to raise the incident on. + +Where supported Incident Types include + +- `OPERATIONAL` +- `FRESHNESS` +- `VOLUME` +- `COLUMN` +- `SQL` +- `DATA_SCHEMA` +- `CUSTOM` + +If you see the following response, a unique identifier for the new incident will be returned. + +```json +{ + "data": { + "raiseIncident": "urn:li:incident:new-incident-id" + }, + "extensions": {} +} +``` + + + + + +``` +Python SDK support coming soon! +``` + + + + + +## Get Incidents For Data Asset + +You can use retrieve the incidents and their statuses for a given Data Asset using the following APIs. + + + + +```graphql +query getAssetIncidents { + dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:snowflake,public.prod.purchases,PROD)") { + incidents( + state: ACTIVE, start: 0, count: 20 + ) { + start + count + total + incidents { + urn + incidentType + title + description + status { + state + lastUpdated { + time + actor + } + } + } + } + } +} +``` + +Where you can filter for active incidents by passing the `ACTIVE` state and resolved incidents by passing the `RESOLVED` state. +This will return all relevant incidents for the dataset. + + + + + +``` +Python SDK support coming soon! +``` + + + + + +## Resolve Incidents + +You can update the status of an incident using the following APIs. + + + + +```graphql +mutation updateIncidentStatus { + updateIncidentStatus( + input: { + state: RESOLVED, + message: "The delayed data issue was resolved at 4:55pm on May 15." + } + ) +} +``` + +You can also reopen an incident by updating the state from `RESOLVED` to `ACTIVE`. + +If you see the following response, the operation was successful: + +```json +{ + "data": { + "updateIncidentStatus": true + }, + "extensions": {} +} +``` + + + + + +``` +Python SDK support coming soon! +``` + + + \ No newline at end of file diff --git a/docs/api/tutorials/operations.md b/docs/api/tutorials/operations.md new file mode 100644 index 00000000000000..70ede993ec95f6 --- /dev/null +++ b/docs/api/tutorials/operations.md @@ -0,0 +1,136 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Operations + +## Why Would You Use Operations APIs? + +The Operations APIs allow you to report operational changes that were made to a given Dataset or Table using the 'Operation' concept. +These operations may be viewed on the Dataset Profile (e.g. as last modified time), accessed via the DataHub GraphQL API, or +used to as inputs to Acryl Cloud [Freshness Assertions](/docs/managed-datahub/observe/freshness-assertions.md). + +### Goal Of This Guide + +This guide will show you how to report and query Operations for a Dataset. + +## Prerequisites + +For this tutorial, you need to deploy DataHub Quickstart and ingest sample data. +For detailed steps, please refer to [DataHub Quickstart Guide](/docs/quickstart.md). + +:::note +Before reporting operations for a dataset, you need to ensure the targeted dataset is already present in DataHub. +::: + +## Report Operations + +You can use report dataset operations to DataHub using the following APIs. + + + + +```graphql +mutation reportOperation { + reportOperation( + input: { + urn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)", + operationType: INSERT, + sourceType: DATA_PROCESS + } + ) +} +``` + +Where supported operation types include + +- `INSERT` +- `UPDATE` +- `DELETE` +- `CREATE` +- `ALTER` +- `DROP` +- `CUSTOM` + +If you want to report an operation that happened at a specific time, you can also optionally provide +the `timestampMillis` field. If not provided, the current server time will be used as the operation time. + +If you see the following response, the operation was successful: + +```json +{ + "data": { + "reportOperation": true + }, + "extensions": {} +} +``` + + + + + +```python +{{ inline /metadata-ingestion/examples/library/dataset_report_operation.py show_path_as_comment }} +``` + + + + +## Read Operations + +You can use read dataset operations to DataHub using the following APIs. + + + + +```graphql +query dataset { + dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)") { + operations( + limit: 10, filter: [], startTimeMillis: , endTimeMillis: + ) { + timestampMillis + operationType + sourceType + } + } +} +``` + +Where startTimeMillis and endTimeMillis are optional. By default, operations are sorted by time descending. + +If you see the following response, the operation was successful: + +```json +{ + "data": { + "dataset": { + "operations": [ + { + "timestampMillis": 1231232332, + "operationType": "INSERT", + "sourceType": "DATA_PROCESS" + } + ] + } + }, + "extensions": {} +} +``` + + + + + +```python +{{ inline /metadata-ingestion/examples/library/dataset_read_operations.py show_path_as_comment }} +``` + + + + +### Expected Outcomes of Reporting Operations + +Reported Operations will appear when displaying the Last Updated time for a Dataset on their DataHub Profile. +They will also be used when selecting the `DataHub Operation` source type under the **Advanced** settings of a Freshness +Assertion. \ No newline at end of file diff --git a/metadata-ingestion/examples/library/dataset_read_operations.py b/metadata-ingestion/examples/library/dataset_read_operations.py new file mode 100644 index 00000000000000..78c9a92141cef2 --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_read_operations.py @@ -0,0 +1,19 @@ +from datahub.api.graphql import Operation + +DATAHUB_HOST = "https//:org.acryl.io/gms" +DATAHUB_TOKEN = ", + # end_time_millis= +) diff --git a/metadata-ingestion/examples/library/dataset_report_operation.py b/metadata-ingestion/examples/library/dataset_report_operation.py new file mode 100644 index 00000000000000..15ebc43dba60a1 --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_report_operation.py @@ -0,0 +1,19 @@ +from datahub.api.graphql import Operation + +DATAHUB_HOST = "https//:org.acryl.io/gms" +DATAHUB_TOKEN = "