From 00f0ee8689c21735668851c25bd446cc553ad304 Mon Sep 17 00:00:00 2001 From: Maggie Hays Date: Tue, 10 Dec 2024 16:15:18 -0600 Subject: [PATCH] docs(structured properties) add guide (#12070) --- docs-website/sidebars.js | 16 +- docs/api/tutorials/structured-properties.md | 2 +- .../compliance-forms/create-a-form.md | 4 +- docs/features/feature-guides/properties.md | 158 ----------- .../properties/create-a-property.md | 261 ++++++++++++++++++ .../feature-guides/properties/overview.md | 54 ++++ 6 files changed, 333 insertions(+), 162 deletions(-) delete mode 100644 docs/features/feature-guides/properties.md create mode 100644 docs/features/feature-guides/properties/create-a-property.md create mode 100644 docs/features/feature-guides/properties/overview.md diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 6ae50215c8166..2f1ac04772097 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -239,7 +239,21 @@ module.exports = { type: "doc", id: "docs/posts", }, - "docs/features/feature-guides/properties", + { + label: "Properties", + type: "category", + collapsed: true, + items: [ + { + type: "doc", + id: "docs/features/feature-guides/properties/overview", + }, + { + type: "doc", + id: "docs/features/feature-guides/properties/create-a-property", + }, + ], + }, { label: "Schema history", type: "doc", diff --git a/docs/api/tutorials/structured-properties.md b/docs/api/tutorials/structured-properties.md index b606ce9a8e245..95c89424e9ca7 100644 --- a/docs/api/tutorials/structured-properties.md +++ b/docs/api/tutorials/structured-properties.md @@ -8,7 +8,7 @@ import TabItem from '@theme/TabItem'; Structured properties are a structured, named set of properties that can be attached to logical entities like Datasets, DataJobs, etc. Structured properties have values that are types. Conceptually, they are like “field definitions”. -Learn more about structured properties in the [Structured Properties Feature Guide](../../../docs/features/feature-guides/properties.md). +Learn more about structured properties in the [Structured Properties Feature Guide](../../../docs/features/feature-guides/properties/overview.md). ### Goal Of This Guide diff --git a/docs/features/feature-guides/compliance-forms/create-a-form.md b/docs/features/feature-guides/compliance-forms/create-a-form.md index e97aaaa581777..a768bb16e4f64 100644 --- a/docs/features/feature-guides/compliance-forms/create-a-form.md +++ b/docs/features/feature-guides/compliance-forms/create-a-form.md @@ -175,11 +175,11 @@ Great question. We are working on Compliance Forms Analytics that will directly ### API Tutorials -- [API Guides on Documentation Form](../../../api/tutorials/forms.md) +- [Compliance Form API Guide](../../../api/tutorials/forms.md) ### Related Features -- [DataHub Properties](../../feature-guides/properties.md) +- [DataHub Structured Properties](../../feature-guides/properties/overview.md) ## Next Steps diff --git a/docs/features/feature-guides/properties.md b/docs/features/feature-guides/properties.md deleted file mode 100644 index abdb736ad2a42..0000000000000 --- a/docs/features/feature-guides/properties.md +++ /dev/null @@ -1,158 +0,0 @@ -import FeatureAvailability from '@site/src/components/FeatureAvailability'; -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; - -# About DataHub Properties - - -DataHub Custom Properties and Structured Properties are powerful tools to collect meaningful metadata for Assets that might not perfectly fit into other Aspects within DataHub, such as Glossary Terms, Tags, etc. Both types can be found in an Asset's Properties tab: - -

- -

- -This guide will explain the differences and use cases of each property type. - -## What are Custom Properties and Structured Properties? -Here are the differences between the two property types at a glance: - -| Custom Properties | Structured Properties | -| --- | --- | -| Map of key-value pairs stored as strings | Validated namespaces and data types | -| Added to assets during ingestion and via API | Defined via YAML; created and added to assets via CLI | -| No support for UI-based Edits | Support for UI-based edits | - -**Custom Properties** are key-value pairs of strings that capture additional information about assets that is not readily available in standard metadata fields. Custom Properties can be added to assets automatically during ingestion or programmatically via API and *cannot* be edited via the UI. -

- -

-

Example of Custom Properties assigned to a Dataset

- -**Structured Properties** are an extension of Custom Properties, providing a structured and validated way to attach metadata to DataHub Assets. Available as of v0.13.1, Structured Properties have a pre-defined type (Date, Integer, URN, String, etc.). They can be configured to only accept a specific set of allowed values, making it easier to ensure high levels of data quality and consistency. Structured Properties are defined via YAML, added to assets via CLI, and can be edited via the UI. -

- -

-

Example of Structured Properties assigned to a Dataset

- -## Use Cases for Custom Properties and Structured Properties -**Custom Properties** are useful for capturing raw metadata from source systems during ingestion or programmatically via API. Some examples include: - -- GitHub file location of code which generated a dataset -- Data encoding type -- Account ID, cluster size, and region where a dataset is stored - -**Structured Properties** are useful for setting and enforcing standards of metadata collection, particularly in support of compliance and governance initiatives. Values can be added programmatically via API, then manually via the DataHub UI as necessary. Some examples include: - -- Deprecation Date - - Type: Date, Single Select - - Validation: Must be formatted as 'YYYY-MM-DD' -- Data Retention Period - - Type: String, Single Select - - Validation: Adheres to allowed values "30 Days", "90 Days", "365 Days", or "Indefinite" -- Consulted Compliance Officer, chosen from a list of DataHub users - - Type: DataHub User, Multi-Select - - Validation: Must be valid DataHub User URN - -By using Structured Properties, compliance and governance officers can ensure consistency in data collection across assets. - -## Creating, Assigning, and Editing Structured Properties - -Structured Properties are defined via YAML, then created and assigned to DataHub Assets via the DataHub CLI. - -Here's how we would define the above examples in YAML: - - - - -```yaml -- id: deprecation_date - qualified_name: deprecation_date - type: date # Supported types: date, string, number, urn, rich_text - cardinality: SINGLE # Supported options: SINGLE, MULTIPLE - display_name: Deprecation Date - description: "Scheduled date when resource will be deprecated in the source system" - entity_types: # Define which types of DataHub Assets the Property can be assigned to - - dataset -``` - - - - -```yaml -- id: retention_period - qualified_name: retention_period - type: string # Supported types: date, string, number, urn, rich_text - cardinality: SINGLE # Supported options: SINGLE, MULTIPLE - display_name: Data Retention Period - description: "Predetermined storage duration before being deleted or archived - based on legal, regulatory, or organizational requirements" - entity_types: # Define which types of DataHub Assets the Property can be assigned to - - dataset - allowed_values: - - value: "30 Days" - description: "Use this for datasets that are ephemeral and contain PII" - - value: "90 Days" - description: "Use this for datasets that drive monthly reporting but contain PII" - - value: "365 Days" - description: "Use this for non-sensitive data that can be retained for longer" - - value: "Indefinite" - description: "Use this for non-sensitive data that can be retained indefinitely" -``` - - - - -```yaml -- id: compliance_officer - qualified_name: compliance_officer - type: urn # Supported types: date, string, number, urn, rich_text - cardinality: MULTIPLE # Supported options: SINGLE, MULTIPLE - display_name: Consulted Compliance Officer(s) - description: "Member(s) of the Compliance Team consulted/informed during audit" - type_qualifier: # Define the type of Asset URNs to allow - - corpuser - - corpGroup - entity_types: # Define which types of DataHub Assets the Property can be assigned to - - dataset -``` - - - - -:::note -To learn more about creating and assigning Structured Properties via CLI, please see the [Create Structured Properties](/docs/api/tutorials/structured-properties.md) tutorial. -::: - -Once a Structured Property is assigned to an Asset, Users with the `Edit Properties` Metadata Privilege will be able to change Structured Property values via the DataHub UI. -

- -

-

Example of editing the value of a Structured Property via the UI

- -### Videos - -**Deep Dive: UI-Editable Properties** - -

- -

- - -### API - -Please see the following API guides related to Custom and Structured Properties: - -- [Custom Properties API Guide](/docs/api/tutorials/structured-properties.md) -- [Structured Properties API Guide](/docs/api/tutorials/structured-properties.md) - - -## FAQ and Troubleshooting - -**Why can't I edit the value of a Structured Property from the DataHub UI?** -1. Your version of DataHub does not support UI-based edits of Structured Properties. Confirm you are running DataHub v0.13.1 or later. -2. You are attempting to edit a Custom Property, not a Structured Property. Confirm you are trying to edit a Structured Property, which will have an "Edit" button visible. Please note that Custom Properties are not eligible for UI-based edits to minimize overwrites during recurring ingestion. -3. You do not have the necessary privileges. Confirm with your Admin that you have the `Edit Properties` Metadata Privilege. - -### Related Features - -- [Compliance Forms](compliance-forms/overview.md) \ No newline at end of file diff --git a/docs/features/feature-guides/properties/create-a-property.md b/docs/features/feature-guides/properties/create-a-property.md new file mode 100644 index 0000000000000..2428f70a10551 --- /dev/null +++ b/docs/features/feature-guides/properties/create-a-property.md @@ -0,0 +1,261 @@ +--- +title: Create and Add a Structured Property +--- + +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# Create and Add a DataHub Structured Property + + +This guide walks you through creating a Structured Property via the DataHub UI, including: + +1. Defining a new Structured Property +2. Configuring display preferences for a Structured Property +3. Adding a Structured Property to an Asset +4. Adding a Structured Property to a Column + +:::note +To learn more about creating and assigning Structured Properties via the CLI, please see the [Create Structured Properties](/docs/api/tutorials/structured-properties.md) tutorial. +::: + +### Prerequisites + +To create, edit, or remove Structured Properties, you must have the **View Structured Properties** and **Manage Structured Properties** platform privileges. + +To add an existing Structured Property to an Asset, change its value, or remove it from an Asset, you must have the **Edit Properties** metadata privilege. + +## Define a New Structured Property + +From the navigation bar, go to **Govern** > **Structured Properties**. + +Click **+ Create** to start defining your Property. + +

+ View all Structured Properties +

+ +First, provide the following details: + +1. **Name and Description:** Clearly describe the purpose and meaning of the Structured Property so users understand its role and context. +2. **Property Type:** Choose a type that best fits the metadata you want to collect. Available types include **Text**, **Number**, **Date**, **DataHub Entity**, or **Rich Text**. Choosing any of the "List" options allows multiple entries for the Property. +3. **Allowed Values (Optional):** For **Text**, **Number**, and **DataHub Entity** types, define a set of allowed values to ensure consistent input across assets. +4. **Applies To:** Specify which DataHub asset types (e.g., Datasets, Dashboards, Pipelines) the Structured Property can be associated with, ensuring relevance and precision. + +:::caution +Once you you save a Structured Property, you **cannot** edit or remove Allowed Values. However, you can add additional Allowed Values. +::: + +For example, imagine your organization wants to standardize how data assets (e.g., Datasets, Tasks, Pipelines) are categorized during their development cycle. By creating a **Lifecycle Stage** Structured Property, you can set a pre-defined list of allowed statuses, such as **Draft**, **Review**, and **Prod**, ensuring consistency and transparency. + +

+ View all Structured Properties +

+ +## Set Display Preferences for the Structured Property + +When defining a Structured Property, you can customize how it will be visible to DataHub users. By default, Structured Properties are visible in an Asset's **Properties** tab but can be conditionally configured with the following options: + +1. **Hide Property:** + Use this option if the Structured Property contains sensitive metadata that should not be visible to DataHub users via the UI. This ensures that only users with the necessary permissions can view or interact with the property values. + +2. **Customize Visibility:** + Decide where the Structured Property appears across the DataHub UI: + - **Asset Badge:** Display the property value as a badge on Assets to highlight key metadata. + - **Asset Sidebar:** Show the property in the Asset Sidebar for quick visibility while navigating an Asset. + +3. **Show in Search Filters:** + Enable this option to allow users to filter Assets by the values of this Structured Property. This improves discoverability and facilitates searches for Assets with specific attributes or classifications. + +4. **Show in Columns Table:** + Use this option to display the Structured Property value in the Dataset Schema view’s Columns Table. This is particularly useful for capturing field-level custom metadata and making it accessible alongside schema details. + +For the **Lifecycle Stage** example, imagine you want to allow users to filter by lifecycle status and view it at a glance during data discovery. To achieve this, you would enable **Show in Search Filters**, **Asset Badge**, and **Asset Sidebar**: + +

+ Configure Structured Property Visibility +

+ +## Add a Structured Property to an Asset + +Once a Structured Property has been defined, you can add it to the designated Asset Types. + +From an Asset's **Properties** tab, click the `+` button to see a drop-down list of all available Structured Properties. For example, you can now see **Lifecycle Stage** as an option for the `pet_profiles` Dataset: + +

+ Add a Structured Property to an Asset +

+ +Continuing with the **Lifecycle Stage** example, designate the `pet_profiles` Dataset as being in **Prod**: + +

+ Select a Structured Property Value +

+ +After clicking **Save**, the **Lifecycle Stage** for `pet_profiles` will appear in the following sections of the Asset Page: + +1. Properties Tab +2. Asset Badge +3. Asset Sidebar + +

+ View a Structured Property on an Asset +

+ +:::info +[**DataHub Compliance Forms**](../compliance-forms/overview.md) make it easy to update values for multiple Assets at once! +::: + +### Edit or Remove a Structured Property from an Asset + +After a Structured Property has been added to an Asset, users can modify its value or remove the property entirely using the **More** menu: + +

+ Editing or Removing a Structured Property on an Asset +

+ +### Search for Assets by a Structured Property Value + +For Structured Properties that have **Show in Search Filters** enabled, users can filter search results based on allowed values. + +For example, with the **Lifecycle Stage** property enabled as a filter, users can find it under the **More** dropdown in the Search interface: + +

+ Structured Property as Search Filter +

+ +From here, you can quickly narrow down Search results based on the desired stage, such as **Prod**: + +

+ Filtered Results by Structured Property Value +

+ +Notice how the **Prod** value is displayed prominently on the `pet_profiles` Asset Badge: + +

+ View Structured Property in Asset Badge +

+ +## Add a Structured Property to a Column + +Structured Properties can be applied at the column level, providing deeper context for how individual dataset fields relate to business concepts or terminology. In this example, we’ll create a Structured Property called **Business Label** to help business users understand how dataset columns align with common terminology, acronyms, or key business concepts. + +### Define the Business Label Property + +Follow these steps to define and configure the **Business Label** Structured Property: + +1. **Property Details:** + - **Name:** Business Label + - **Description:** Provide a description to explain its purpose, such as: + *"A user-friendly name for a dataset column, helping business users understand its meaning."* + - **Property Type:** Select **Text**, allowing any valid string to be entered. + - **Applies To:** Set this property to apply exclusively to **Columns**. + +2. **Display Preferences:** + - By default, column-level Structured Properties will be enabled for **all columns** on **all datasets** within DataHub, accessible via the Column Sidebar. + - Optionally, enable **Show in Table Columns** to make the **Business Label** visible within the Columns Table on the dataset schema. + +

+ Configuring Column Structured Property +

+ +:::caution +While the column sidebar provides convenient access to assigned properties, adding too many Structured Properties can clutter the view. Limit the number of properties shown in the sidebar to maintain clarity and usability. +::: + +Once configured, the **Business Label** Structured Property will automatically be added to all columns on dataset assets within DataHub. + +For example, after assigning the property, it will appear in two key areas of the `pet_profiles` Asset Page: + +1. **Columns Table:** The **Business Label** property and its populated values will be displayed directly within the Columns Table on the Dataset Schema, enabling users to view field-level metadata easily. +2. **Column Sidebar:** Structured Properties configured for columns, including **Business Label**, will also appear in the column’s sidebar. + +By applying column-level Structured Properties like **Business Label**, you enhance data discoverability and provide business users with valuable insights while keeping the interface user-friendly. + +

+ Column-level Structured Property in Columns Table +

+ +### Update the Business Label from the Column Sidebar + +When selecting a specific column in the UI, the **Business Label** Structured Property will be visible in the column’s sidebar. Users with appropriate permissions can view or update the value directly from this interface. + +

+ Column-level Structured Property in Sidebar +

+ +This setup ensures that column-specific metadata, such as the **Business Label**, is accessible and actionable, helping business users better understand the dataset's structure and its alignment with key business concepts. + +## FAQ and Troubleshooting + +### Why can’t I change a Structured Property’s definition? + +Once a Structured Property has been defined, only certain aspects can be modified: + +**You can change:** +- Title and description +- Add new allowed values +- Add new supported asset types +- Update display preferences + +**You cannot change:** +- The type of the Structured Property +- Existing allowed values and their definitions + +### Why can't I configure a Structured Property to appear as an Asset Badge? +- Only **Text** and **Number** types with allowed values can be configured as Asset Badges. +- Only one Structured Property can be displayed as a Badge for a given Asset. + +### Why can't I filter Search Results by a Structured Property? +- Verify that the Structured Property has been configured to appear in search filters. +- Ensure the filter is relevant by checking if there are assets associated with the Structured Property's value in your search results. Try different search terms or relax other applied filters. + +### Why can't I add a Structured Property to an Asset? +- Confirm you have the **Edit Properties** privilege. +- Ensure the Structured Property has already been created and supports the type of Asset you're trying to modify. + +### API Tutorials + +- [Structured Properties API Guide](/docs/api/tutorials/structured-properties.md) + +### Related Features + +- [DataHub Compliance Forms](/docs/features/feature-guides/compliance-forms/overview.md) \ No newline at end of file diff --git a/docs/features/feature-guides/properties/overview.md b/docs/features/feature-guides/properties/overview.md new file mode 100644 index 0000000000000..7637a3be53e0d --- /dev/null +++ b/docs/features/feature-guides/properties/overview.md @@ -0,0 +1,54 @@ +--- +title: Overview +--- + +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# About DataHub Structured Properties + + +DataHub **Structured Properties** allow you to add custom, validated properties to any Entity type in DataHub. Using Structured Properties, you can enable data discovery and governance based on attributes unique to your organization. + +

+ +

+ +## What are Structured Properties? + +**Structured Properties** are a powerful way to customize your DataHub environment, enabling you to align metadata with your organization’s unique needs. By defining specific property types—such as Date, Integer, DataHub Asset, or Text—you can apply meaningful, context-aware attributes to your Assets. Validation rules, like restricting allowed values or enforcing specific formats, ensure consistency while giving you the flexibility to reflect your business’s terminology, workflows, and priorities. + +Structured Properties can be added to the following Asset Types: + +- Data Assets, such as Datasets, Columns, Tasks, Pipelines, Charts, Dashboards, and more. +- DataHub Entities, such as Domains, Glossary Terms & Groups, and Data Products. + +### Key Features of Structured Properties: + +1. **Typed Fields:** Properties are explicitly typed, including options like Date, Integer, URN, or Text. +2. **Allowed Values:** Enforce standards by restricting values to a specific format or a pre-defined list of acceptable inputs. +3. **Targeted Application:** Structured Properties can be tailored to specific asset types—such as Datasets, Columns, or Dashboards—ensuring they align with your organization’s data management needs and usage context. + +### Display Settings + +Structured Properties offer several configuration options to enhance metadata management: + +- **Hide Property:** For use cases where property values should not be viewable by DataHub users. +- **Show in Search Filters:** Enables users to filter for Assets based on specific property values, improving discoverability. +- **Customize Visibility:** Allows you to control where the Structured Property appears, such as in the Asset Badge, Asset Sidebar, and/or a Dataset Schema view’s Columns Table. + +## Why Use Structured Properties? + +Structured Properties are especially useful for organizations that require: + +- **Customization:** Customize how your end-users find assets within DataHub. +- **Governance and Compliance:** Collect metadata in a way that supports compliance with internal or external standards. + +

+ +

+ +By leveraging these configurations, teams can ensure their metadata adheres to organizational policies and improves the discoverability and usability of Data Assets. + +## Next Steps + +Now that you understand Structured Properties, you’re ready to [Create a Structured Property](create-a-property.md). \ No newline at end of file