diff --git a/metadata-ingestion/archived/source_docs/superset.md b/metadata-ingestion/archived/source_docs/superset.md index 58ea6f81ab44c2..abc76e9264c57d 100644 --- a/metadata-ingestion/archived/source_docs/superset.md +++ b/metadata-ingestion/archived/source_docs/superset.md @@ -64,12 +64,15 @@ Note that a `.` is used to denote nested fields in the YAML recipe. | Field | Required | Default | Description | | ------------- | -------- | ------------------ | ------------------------------------------------------- | | `connect_uri` | | `"localhost:8088"` | Superset host URL. | +| `display_uri` | | `(connect_uri)` | Publicly accessible Superset URL, see note below. | | `username` | | | Superset username. | | `password` | | | Superset password. | | `provider` | | `"db"` | Superset provider. | | `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. | | `database_alias` | | | Can be used to change mapping for database names in superset to what you have in datahub | +NOTE: `display_uri` can be used when you need to ingest from a private, specially configured instance, but still want dashboard, graph, etc. links to point to the publicly accessible URL. So, for example, you could set `connect_uri: localhost:xxxx, display_uri: superset.mydomain.com`. You may need to do this if `superset.mydomain.com` has complex authentication that is not easy to pass through this source config. + ## Compatibility Coming soon! diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 8e0dd6f90a086a..b8cdee3a29a384 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -4,7 +4,7 @@ import dateutil.parser as dp import requests -from pydantic.class_validators import validator +from pydantic.class_validators import root_validator, validator from pydantic.fields import Field from datahub.configuration.common import ConfigModel @@ -59,6 +59,10 @@ class SupersetConfig(ConfigModel): # See the Superset /security/login endpoint for details # https://superset.apache.org/docs/rest-api connect_uri: str = Field(default="localhost:8088", description="Superset host URL.") + display_uri: str = Field( + default=None, + description="optional URL to use in links (if `connect_uri` is only for ingestion)", + ) username: Optional[str] = Field(default=None, description="Superset username.") password: Optional[str] = Field(default=None, description="Superset password.") provider: str = Field(default="db", description="Superset provider.") @@ -72,10 +76,17 @@ class SupersetConfig(ConfigModel): description="Can be used to change mapping for database names in superset to what you have in datahub", ) - @validator("connect_uri") + @validator("connect_uri", "display_uri") def remove_trailing_slash(cls, v): return config_clean.remove_trailing_slashes(v) + @root_validator + def default_display_uri_to_connect_uri(cls, values): + base = values.get("display_uri") + if base is None: + values.set("display_uri", values.get("connect_uri")) + return values + def get_metric_name(metric): if not metric: @@ -208,7 +219,7 @@ def construct_dashboard_from_api_data(self, dashboard_data): created=AuditStamp(time=modified_ts, actor=modified_actor), lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) - dashboard_url = f"{self.config.connect_uri}{dashboard_data.get('url', '')}" + dashboard_url = f"{self.config.display_uri}{dashboard_data.get('url', '')}" chart_urns = [] raw_position_data = dashboard_data.get("position_json", "{}") @@ -280,7 +291,7 @@ def construct_chart_from_chart_data(self, chart_data): lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) chart_type = chart_type_from_viz_type.get(chart_data.get("viz_type", "")) - chart_url = f"{self.config.connect_uri}{chart_data.get('url', '')}" + chart_url = f"{self.config.display_uri}{chart_data.get('url', '')}" datasource_id = chart_data.get("datasource_id") datasource_urn = self.get_datasource_urn_from_id(datasource_id)