diff --git a/metadata-ingestion/docs/sources/looker/looker_recipe.yml b/metadata-ingestion/docs/sources/looker/looker_recipe.yml index 42209f8cc68092..0939b6546411d2 100644 --- a/metadata-ingestion/docs/sources/looker/looker_recipe.yml +++ b/metadata-ingestion/docs/sources/looker/looker_recipe.yml @@ -8,4 +8,18 @@ source: client_id: ${LOOKER_CLIENT_ID} client_secret: ${LOOKER_CLIENT_SECRET} + # Liquid variables + # liquid_variables: + # _user_attributes: + # looker_env: "dev" + # dev_database_prefix: "employee" + # dev_schema_prefix: "public" + # dw_eff_dt_date: + # _is_selected: true + # source_region: "ap-south-1" + # db: "test-db" + + # LookML Constants + # lookml_constants: + # star_award_winner_year: "public.winner_2025" # sink configs diff --git a/metadata-ingestion/docs/sources/looker/lookml_post.md b/metadata-ingestion/docs/sources/looker/lookml_post.md index 8a4bf823ffc27d..fdbe7f3e1217d4 100644 --- a/metadata-ingestion/docs/sources/looker/lookml_post.md +++ b/metadata-ingestion/docs/sources/looker/lookml_post.md @@ -1,11 +1,49 @@ -#### Configuration Notes - -1. If a view contains a liquid template (e.g. `sql_table_name: {{ user_attributes['db']}}.kafka_streaming.events }}`, with `db=ANALYTICS_PROD`), then you will need to specify the values of those variables in the `liquid_variable` config as shown below: - ```yml - liquid_variable: - user_attributes: - db: ANALYTICS_PROD - ``` +### Configuration Notes + +1. Handling Liquid Templates + + If a view contains a liquid template, for example: + + ``` + sql_table_name: {{ user_attributes['db'] }}.kafka_streaming.events + ``` + + where `db=ANALYTICS_PROD`, you need to specify the values of those variables in the liquid_variables configuration as shown below: + + ```yml + liquid_variables: + user_attributes: + db: ANALYTICS_PROD + ``` + +2. Resolving LookML Constants + + If a view contains a LookML constant, for example: + + ``` + sql_table_name: @{db}.kafka_streaming.events; + ``` + + Ingestion attempts to resolve it's value by looking at project manifest files + + ```yml + manifest.lkml + constant: db { + value: "ANALYTICS_PROD" + } + ``` + + - If the constant's value is not resolved or incorrectly resolved, you can specify `lookml_constants` configuration in ingestion recipe as shown below. The constant value in recipe takes precedence over constant values resolved from manifest. + + ```yml + lookml_constants: + db: ANALYTICS_PROD + ``` + + +**Additional Notes** + +Although liquid variables and LookML constants can be used anywhere in LookML code, their values are currently resolved only for LookML views by DataHub LookML ingestion. This behavior is sufficient since LookML ingestion processes only views and their upstream dependencies. ### Multi-Project LookML (Advanced) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_dataclasses.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_dataclasses.py index d771821a14d88d..e928c25e22fbd0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_dataclasses.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_dataclasses.py @@ -32,6 +32,12 @@ class LookerField: sql: Optional[str] +@dataclass +class LookerConstant: + name: str + value: str + + @dataclass class LookerModel: connection: str @@ -75,6 +81,7 @@ def from_looker_dict( try: parsed = load_and_preprocess_file( path=included_file, + reporter=reporter, source_config=source_config, ) included_explores = parsed.get("explores", []) @@ -217,6 +224,7 @@ def resolve_includes( try: parsed = load_and_preprocess_file( path=included_file, + reporter=reporter, source_config=source_config, ) seen_so_far.add(included_file) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py index 9fac0b52fde0dd..bd6a37fe4b4e24 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py @@ -4,7 +4,10 @@ from typing import Dict, Optional from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition -from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile +from datahub.ingestion.source.looker.looker_dataclasses import ( + LookerConstant, + LookerViewFile, +) from datahub.ingestion.source.looker.looker_template_language import ( load_and_preprocess_file, ) @@ -30,12 +33,14 @@ def __init__( base_projects_folder: Dict[str, pathlib.Path], reporter: LookMLSourceReport, source_config: LookMLSourceConfig, + manifest_constants: Dict[str, LookerConstant] = {}, ) -> None: self.viewfile_cache: Dict[str, Optional[LookerViewFile]] = {} self._root_project_name = root_project_name self._base_projects_folder = base_projects_folder self.reporter = reporter self.source_config = source_config + self.manifest_constants = manifest_constants def _load_viewfile( self, project_name: str, path: str, reporter: LookMLSourceReport @@ -71,9 +76,15 @@ def _load_viewfile( try: logger.debug(f"Loading viewfile {path}") + # load_and preprocess_file is called multiple times for loading view file from multiple flows. + # Flag resolve_constants is a hack to avoid passing around manifest_constants from all of the flows. + # This is fine as rest of flows do not need resolution of constants. parsed = load_and_preprocess_file( path=path, + reporter=self.reporter, source_config=self.source_config, + resolve_constants=True, + manifest_constants=self.manifest_constants, ) looker_viewfile = LookerViewFile.from_looker_dict( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py index 2bcae4d46b8d52..60983f04bafa05 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py @@ -2,7 +2,7 @@ import pathlib import re from abc import ABC, abstractmethod -from typing import Any, ClassVar, Dict, List, Optional, Set, Union +from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Set, Union from deepmerge import always_merger from liquid import Undefined @@ -27,8 +27,12 @@ from datahub.ingestion.source.looker.lookml_config import ( DERIVED_VIEW_PATTERN, LookMLSourceConfig, + LookMLSourceReport, ) +if TYPE_CHECKING: + from datahub.ingestion.source.looker.looker_dataclasses import LookerConstant + logger = logging.getLogger(__name__) @@ -82,7 +86,12 @@ def liquid_variable_with_default(self, text: str) -> dict: return self._create_new_liquid_variables_with_default(variables=variables) -def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str: +def resolve_liquid_variable( + text: str, + view_name: str, + liquid_variable: Dict[Any, Any], + report: LookMLSourceReport, +) -> str: # Set variable value to NULL if not present in liquid_variable dictionary Undefined.__str__ = lambda instance: "NULL" # type: ignore try: @@ -96,6 +105,7 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str: # Resolve liquid template return create_template(text).render(liquid_variable) except LiquidSyntaxError as e: + # TODO: Will add warning once we get rid of duplcate warning message for same view logger.warning(f"Unsupported liquid template encountered. error [{e.message}]") # TODO: There are some tag specific to looker and python-liquid library does not understand them. currently # we are not parsing such liquid template. @@ -103,6 +113,7 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str: # See doc: https://cloud.google.com/looker/docs/templated-filters and look for { % condition region %} # order.region { % endcondition %} except CustomTagException as e: + # TODO: Will add warning once we get rid of duplcate warning message for same view logger.warning(e) logger.debug(e, exc_info=e) @@ -192,15 +203,20 @@ class LookMLViewTransformer(ABC): source_config: LookMLSourceConfig - def __init__(self, source_config: LookMLSourceConfig): + def __init__( + self, + source_config: LookMLSourceConfig, + reporter: LookMLSourceReport, + ): self.source_config = source_config + self.reporter = reporter def transform(self, view: dict) -> dict: value_to_transform: Optional[str] = None - # is_attribute_supported check is required because not all transformer works on all attributes in current - # case mostly all transformer works on sql_table_name and derived.sql attributes, - # however IncompleteSqlTransformer only transform the derived.sql attribute + # is_attribute_supported check is required because not all transformers work on all attributes in the current + # case, mostly all transformers work on sql_table_name and derived.sql attributes; + # however, IncompleteSqlTransformer only transform the derived.sql attribute if SQL_TABLE_NAME in view and self.is_attribute_supported(SQL_TABLE_NAME): # Give precedence to already processed transformed view.sql_table_name to apply more transformation value_to_transform = view.get( @@ -252,7 +268,9 @@ class LiquidVariableTransformer(LookMLViewTransformer): def _apply_transformation(self, value: str, view: dict) -> str: return resolve_liquid_variable( text=value, - liquid_variable=self.source_config.liquid_variable, + liquid_variable=self.source_config.liquid_variables, + view_name=view["name"], + report=self.reporter, ) @@ -287,7 +305,7 @@ def _apply_transformation(self, value: str, view: dict) -> str: class DropDerivedViewPatternTransformer(LookMLViewTransformer): """ - drop ${} from datahub_transformed_sql_table_name and view["derived_table"]["datahub_transformed_sql_table_name"] values. + drop ${} from datahub_transformed_sql_table_name and view["derived_table"]["datahub_transformed_sql_table_name"] values. Example: transform ${employee_income_source.SQL_TABLE_NAME} to employee_income_source.SQL_TABLE_NAME """ @@ -308,8 +326,8 @@ class LookMlIfCommentTransformer(LookMLViewTransformer): evaluate_to_true_regx: str remove_if_comment_line_regx: str - def __init__(self, source_config: LookMLSourceConfig): - super().__init__(source_config=source_config) + def __init__(self, source_config: LookMLSourceConfig, reporter: LookMLSourceReport): + super().__init__(source_config=source_config, reporter=reporter) # This regx will keep whatever after -- if looker_environment -- self.evaluate_to_true_regx = r"-- if {} --".format( @@ -335,6 +353,61 @@ def _apply_transformation(self, value: str, view: dict) -> str: return self._apply_regx(value) +class LookmlConstantTransformer(LookMLViewTransformer): + """ + Replace LookML constants @{constant} from the manifest/configuration. + """ + + CONSTANT_PATTERN = r"@{(\w+)}" # Matches @{constant} + + def __init__( + self, + source_config: LookMLSourceConfig, + reporter: LookMLSourceReport, + manifest_constants: Dict[str, "LookerConstant"], + ): + super().__init__(source_config=source_config, reporter=reporter) + self.manifest_constants = manifest_constants + + def resolve_lookml_constant(self, text: str, view_name: Optional[str]) -> str: + """ + Resolves LookML constants (@{ }) from manifest or config. + Logs warnings for misplaced or missing variables. + """ + + def replace_constants(match): + key = match.group(1) + # Resolve constant from config + if key in self.source_config.lookml_constants: + return str(self.source_config.lookml_constants.get(key)) + + # Resolve constant from manifest + if key in self.manifest_constants: + return self.manifest_constants[key].value + + # Check if it's a misplaced lookml constant + if key in self.source_config.liquid_variables: + self.reporter.warning( + title="Misplaced lookml constant", + message="Use 'lookml_constants' instead of 'liquid_variables'.", + context=f"Key {key}", + ) + return f"@{{{key}}}" + + self.reporter.warning( + title="LookML constant not found", + message="The constant is missing. Either add it under 'lookml_constants' in the config or define it in `manifest.lkml`.", + context=f"view-name: {view_name}, constant: {key}", + ) + return f"@{{{key}}}" + + # Resolve @{} (constant) + return re.sub(self.CONSTANT_PATTERN, replace_constants, text) + + def _apply_transformation(self, value: str, view: dict) -> str: + return self.resolve_lookml_constant(text=value, view_name=view.get("name")) + + class TransformedLookMlView: """ TransformedLookMlView is collecting output of LookMLViewTransformer and creating a new transformed LookML view. @@ -390,24 +463,35 @@ def view(self) -> dict: def process_lookml_template_language( source_config: LookMLSourceConfig, view_lkml_file_dict: dict, + reporter: LookMLSourceReport, + manifest_constants: Dict[str, "LookerConstant"] = {}, + resolve_constants: bool = False, ) -> None: if "views" not in view_lkml_file_dict: return transformers: List[LookMLViewTransformer] = [ LookMlIfCommentTransformer( - source_config=source_config + source_config=source_config, reporter=reporter ), # First evaluate the -- if -- comments. Looker does the same LiquidVariableTransformer( - source_config=source_config + source_config=source_config, reporter=reporter ), # Now resolve liquid variables DropDerivedViewPatternTransformer( - source_config=source_config + source_config=source_config, reporter=reporter ), # Remove any ${} symbol IncompleteSqlTransformer( - source_config=source_config + source_config=source_config, reporter=reporter ), # complete any incomplete sql ] + if resolve_constants: + transformers.append( + LookmlConstantTransformer( + source_config=source_config, + manifest_constants=manifest_constants, + reporter=reporter, + ), # Resolve @{} constant with its corresponding value + ) transformed_views: List[dict] = [] @@ -422,12 +506,18 @@ def process_lookml_template_language( def load_and_preprocess_file( path: Union[str, pathlib.Path], source_config: LookMLSourceConfig, + reporter: LookMLSourceReport, + manifest_constants: Dict[str, "LookerConstant"] = {}, + resolve_constants: bool = False, ) -> dict: parsed = load_lkml(path) process_lookml_template_language( view_lkml_file_dict=parsed, + reporter=reporter, source_config=source_config, + manifest_constants=manifest_constants, + resolve_constants=resolve_constants, ) return parsed diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py index 4d3255c3c0715b..75de6f1fe3c6e1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py @@ -161,13 +161,27 @@ class LookMLSourceConfig( description="When enabled, looker refinement will be processed to adapt an existing view.", ) - liquid_variable: Dict[Any, Any] = Field( + liquid_variables: Dict[Any, Any] = Field( {}, - description="A dictionary containing Liquid variables and their corresponding values, utilized in SQL-defined " + description="A dictionary containing Liquid variables with their corresponding values, utilized in SQL-defined " "derived views. The Liquid template will be resolved in view.derived_table.sql and " "view.sql_table_name. Defaults to an empty dictionary.", ) + _liquid_variable_deprecated = pydantic_renamed_field( + old_name="liquid_variable", new_name="liquid_variables", print_warning=True + ) + + lookml_constants: Dict[str, str] = Field( + {}, + description=( + "A dictionary containing LookML constants (`@{constant_name}`) and their values. " + "If a constant is defined in the `manifest.lkml` file, its value will be used. " + "If not found in the manifest, the value from this config will be used instead. " + "Defaults to an empty dictionary." + ), + ) + looker_environment: Literal["prod", "dev"] = Field( "prod", description="A looker prod or dev environment. " diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index 9a937840a5012f..5f39821ee6c2e3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -43,6 +43,7 @@ from datahub.ingestion.source.looker.looker_connection import ( get_connection_def_based_on_connection_string, ) +from datahub.ingestion.source.looker.looker_dataclasses import LookerConstant from datahub.ingestion.source.looker.looker_lib_wrapper import LookerAPI from datahub.ingestion.source.looker.looker_template_language import ( load_and_preprocess_file, @@ -254,6 +255,7 @@ class LookerManifest: # This must be set if the manifest has local_dependency entries. # See https://cloud.google.com/looker/docs/reference/param-manifest-project-name project_name: Optional[str] + constants: Optional[List[Dict[str, str]]] local_dependencies: List[str] remote_dependencies: List[LookerRemoteDependency] @@ -310,11 +312,14 @@ def __init__(self, config: LookMLSourceConfig, ctx: PipelineContext): "manage_models permission enabled on this API key." ) from err + self.manifest_constants: Dict[str, "LookerConstant"] = {} + def _load_model(self, path: str) -> LookerModel: logger.debug(f"Loading model from file {path}") parsed = load_and_preprocess_file( path=path, + reporter=self.reporter, source_config=self.source_config, ) @@ -500,27 +505,33 @@ def get_project_name(self, model_name: str) -> str: def get_manifest_if_present(self, folder: pathlib.Path) -> Optional[LookerManifest]: manifest_file = folder / "manifest.lkml" - if manifest_file.exists(): - manifest_dict = load_and_preprocess_file( - path=manifest_file, source_config=self.source_config - ) - manifest = LookerManifest( - project_name=manifest_dict.get("project_name"), - local_dependencies=[ - x["project"] for x in manifest_dict.get("local_dependencys", []) - ], - remote_dependencies=[ - LookerRemoteDependency( - name=x["name"], url=x["url"], ref=x.get("ref") - ) - for x in manifest_dict.get("remote_dependencys", []) - ], + if not manifest_file.exists(): + self.reporter.info( + message="manifest.lkml file missing from project", + context=str(manifest_file), ) - return manifest - else: return None + manifest_dict = load_and_preprocess_file( + path=manifest_file, + source_config=self.source_config, + reporter=self.reporter, + ) + + manifest = LookerManifest( + project_name=manifest_dict.get("project_name"), + constants=manifest_dict.get("constants", []), + local_dependencies=[ + x["project"] for x in manifest_dict.get("local_dependencys", []) + ], + remote_dependencies=[ + LookerRemoteDependency(name=x["name"], url=x["url"], ref=x.get("ref")) + for x in manifest_dict.get("remote_dependencys", []) + ], + ) + return manifest + def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: return [ *super().get_workunit_processors(), @@ -575,7 +586,10 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.base_projects_folder[project] = p_ref self._recursively_check_manifests( - tmp_dir, BASE_PROJECT_NAME, visited_projects + tmp_dir, + BASE_PROJECT_NAME, + visited_projects, + self.manifest_constants, ) yield from self.get_internal_workunits() @@ -588,7 +602,11 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: ) def _recursively_check_manifests( - self, tmp_dir: str, project_name: str, project_visited: Set[str] + self, + tmp_dir: str, + project_name: str, + project_visited: Set[str], + manifest_constants: Dict[str, "LookerConstant"], ) -> None: if project_name in project_visited: return @@ -605,6 +623,14 @@ def _recursively_check_manifests( if not manifest: return + if manifest.constants: + for constant in manifest.constants: + if constant.get("name") and constant.get("value"): + manifest_constants[constant["name"]] = LookerConstant( + name=constant["name"], + value=constant["value"], + ) + # Special case handling if the root project has a name in the manifest file. if project_name == BASE_PROJECT_NAME and manifest.project_name: if ( @@ -664,21 +690,27 @@ def _recursively_check_manifests( project_visited.add(project_name) else: self._recursively_check_manifests( - tmp_dir, remote_project.name, project_visited + tmp_dir, + remote_project.name, + project_visited, + manifest_constants, ) for project in manifest.local_dependencies: - self._recursively_check_manifests(tmp_dir, project, project_visited) + self._recursively_check_manifests( + tmp_dir, project, project_visited, manifest_constants + ) def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 assert self.source_config.base_folder - viewfile_loader = LookerViewFileLoader( self.source_config.project_name, self.base_projects_folder, self.reporter, self.source_config, + self.manifest_constants, ) + logger.debug(f"LookML Constants : {', '.join(self.manifest_constants.keys())}") # Some views can be mentioned by multiple 'include' statements and can be included via different connections. diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py index ac011324684189..7baaccbbaa664b 100644 --- a/metadata-ingestion/tests/integration/lookml/test_lookml.py +++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py @@ -14,13 +14,20 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.source.file import read_metadata_file -from datahub.ingestion.source.looker.looker_dataclasses import LookerModel +from datahub.ingestion.source.looker.looker_dataclasses import ( + LookerConstant, + LookerModel, +) from datahub.ingestion.source.looker.looker_template_language import ( + LookmlConstantTransformer, SpecialVariable, load_and_preprocess_file, resolve_liquid_variable, ) -from datahub.ingestion.source.looker.lookml_config import LookMLSourceConfig +from datahub.ingestion.source.looker.lookml_config import ( + LookMLSourceConfig, + LookMLSourceReport, +) from datahub.ingestion.source.looker.lookml_refinement import LookerRefinementResolver from datahub.ingestion.source.looker.lookml_source import LookMLSource from datahub.metadata.schema_classes import ( @@ -835,8 +842,7 @@ def test_manifest_parser(pytestconfig: pytest.Config) -> None: manifest_file = test_resources_dir / "lkml_manifest_samples/complex-manifest.lkml" manifest = load_and_preprocess_file( - path=manifest_file, - source_config=MagicMock(), + path=manifest_file, source_config=MagicMock(), reporter=LookMLSourceReport() ) assert manifest @@ -900,6 +906,31 @@ def test_view_to_view_lineage_and_liquid_template(pytestconfig, tmp_path, mock_t ) +@freeze_time(FROZEN_TIME) +def test_view_to_view_lineage_and_lookml_constant(pytestconfig, tmp_path, mock_time): + test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml" + mce_out_file = "vv_lineage_lookml_constant_golden.json" + + new_recipe = get_default_recipe( + f"{tmp_path}/{mce_out_file}", + f"{test_resources_dir}/vv-lineage-and-lookml-constant", + ) + + new_recipe["source"]["config"]["lookml_constants"] = {"winner_table": "dev"} + + pipeline = Pipeline.create(new_recipe) + pipeline.run() + pipeline.pretty_print_summary() + assert pipeline.source.get_report().warnings.total_elements == 1 + + golden_path = test_resources_dir / "vv_lineage_lookml_constant_golden.json" + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / mce_out_file, + golden_path=golden_path, + ) + + @freeze_time(FROZEN_TIME) def test_special_liquid_variables(): text: str = """{% assign source_table_variable = "source_table" | sql_quote | non_existing_filter_where_it_should_not_fail %} @@ -966,6 +997,8 @@ def test_special_liquid_variables(): actual_text = resolve_liquid_variable( text=text, liquid_variable=input_liquid_variable, + report=LookMLSourceReport(), + view_name="test", ) expected_text: str = ( @@ -976,6 +1009,108 @@ def test_special_liquid_variables(): assert actual_text == expected_text +@pytest.mark.parametrize( + "view, expected_result, warning_expected", + [ + # Case 1: Single constant replacement in sql_table_name + ( + {"sql_table_name": "@{constant1}.kafka_streaming.events"}, + {"datahub_transformed_sql_table_name": "value1.kafka_streaming.events"}, + False, + ), + # Case 2: Single constant replacement with config-defined constant + ( + {"sql_table_name": "SELECT * FROM @{constant2}"}, + {"datahub_transformed_sql_table_name": "SELECT * FROM value2"}, + False, + ), + # Case 3: Multiple constants in a derived_table SQL query + ( + {"derived_table": {"sql": "SELECT @{constant1}, @{constant3}"}}, + { + "derived_table": { + "datahub_transformed_sql": "SELECT value1, manifest_value3" + } + }, + False, + ), + # Case 4: Non-existent constant in sql_table_name + ( + {"sql_table_name": "SELECT * FROM @{nonexistent}"}, + {"datahub_transformed_sql_table_name": "SELECT * FROM @{nonexistent}"}, + False, + ), + # Case 5: View with unsupported attribute + ({"unsupported_attribute": "SELECT * FROM @{constant1}"}, {}, False), + # Case 6: View with no transformable attributes + ( + {"sql_table_name": "SELECT * FROM table_name"}, + {"datahub_transformed_sql_table_name": "SELECT * FROM table_name"}, + False, + ), + # Case 7: Constants only in manifest_constants + ( + {"sql_table_name": "SELECT @{constant3}"}, + {"datahub_transformed_sql_table_name": "SELECT manifest_value3"}, + False, + ), + # Case 8: Constants only in lookml_constants + ( + {"sql_table_name": "SELECT @{constant2}"}, + {"datahub_transformed_sql_table_name": "SELECT value2"}, + False, + ), + # Case 9: Multiple unsupported attributes + ( + { + "unsupported_attribute": "SELECT @{constant1}", + "another_unsupported_attribute": "SELECT @{constant2}", + }, + {}, + False, + ), + # Case 10: Misplaced lookml constant + ( + {"sql_table_name": "@{constant1}.@{constant2}.@{constant4}"}, + {"datahub_transformed_sql_table_name": "value1.value2.@{constant4}"}, + True, + ), + ], +) +@freeze_time(FROZEN_TIME) +def test_lookml_constant_transformer(view, expected_result, warning_expected): + """ + Test LookmlConstantTransformer with various view structures. + """ + config = MagicMock() + report = MagicMock() + config.lookml_constants = { + "constant1": "value1", + "constant2": "value2", + } + config.liquid_variables = { + "constant4": "liquid_value1", + } + + transformer = LookmlConstantTransformer( + source_config=config, + reporter=report, + manifest_constants={ + "constant1": LookerConstant(name="constant1", value="manifest_value1"), + "constant3": LookerConstant(name="constant3", value="manifest_value3"), + }, + ) + + result = transformer.transform(view) + assert result == expected_result + if warning_expected: + report.warning.assert_called_once_with( + title="Misplaced lookml constant", + message="Use 'lookml_constants' instead of 'liquid_variables'.", + context="Key constant4", + ) + + @freeze_time(FROZEN_TIME) def test_field_tag_ingest(pytestconfig, tmp_path, mock_time): test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml" diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml index d570e0ecdb5b22..4de4df34e15d1e 100644 --- a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml @@ -39,4 +39,4 @@ explore: rent_as_employee_income_source { } explore: child_view { -} \ No newline at end of file +} diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/data.model.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/data.model.lkml new file mode 100644 index 00000000000000..6f425c469c9546 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/data.model.lkml @@ -0,0 +1,10 @@ +connection: "my_connection" + +include: "star_award_winner.view.lkml" +include: "star_award_winner_dev.view.lkml" + +explore: star_award_winner { +} + +explore: star_award_winner_dev { +} diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/manifest.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/manifest.lkml new file mode 100644 index 00000000000000..fcdd71a6262945 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/manifest.lkml @@ -0,0 +1,15 @@ +constant: customer_support_db { + value: "star_award_winner_year" + export: none +} + +constant: customer_support_schema { + value: "public" + export: none +} + +constant: customer_support_table { + value: "winner" + export: none +} + diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/star_award_winner.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/star_award_winner.view.lkml new file mode 100644 index 00000000000000..fd0fcf33c376e7 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/star_award_winner.view.lkml @@ -0,0 +1,12 @@ +view: star_award_winner { + sql_table_name: @{customer_support_db}.@{customer_support_schema}.@{invalid_constant};; + + + dimension: id { + label: "id" + primary_key: yes + type: number + sql: ${TABLE}.id ;; + } + +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/star_award_winner_dev.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/star_award_winner_dev.view.lkml new file mode 100644 index 00000000000000..0c2417251fc15c --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/star_award_winner_dev.view.lkml @@ -0,0 +1,17 @@ +view: star_award_winner_dev { + sql_table_name: @{customer_support_db}.@{customer_support_schema}.@{winner_table};; + + + dimension: id { + label: "id" + primary_key: yes + type: number + sql: ${TABLE}.id ;; + } + + dimension: name { + type: string + sql: ${TABLE}.name;; + } + +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv_lineage_lookml_constant_golden.json b/metadata-ingestion/tests/integration/lookml/vv_lineage_lookml_constant_golden.json new file mode 100644 index 00000000000000..296f09b697ee4d --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv_lineage_lookml_constant_golden.json @@ -0,0 +1,514 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "project_name": "lkml_samples" + }, + "name": "lkml_samples", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "LookML Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "view: star_award_winner {\n sql_table_name: @{customer_support_db}.@{customer_support_schema}.@{invalid_constant};;\n\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n\n}", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,star_award_winner_year.public.@{invalid_constant},PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,star_award_winner_year.public.@{invalid_constant},PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner,PROD),id)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "star_award_winner", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": false, + "description": "", + "label": "id", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": true + } + ], + "primaryKeys": [ + "id" + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "star_award_winner.view.lkml", + "looker.model": "data" + }, + "name": "star_award_winner", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner_dev,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner_dev,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "view: star_award_winner_dev {\n sql_table_name: @{customer_support_db}.@{customer_support_schema}.@{winner_table};;\n\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n\n dimension: name {\n type: string\n sql: ${TABLE}.name;;\n }\n\n}", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner_dev,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner_dev,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,star_award_winner_year.public.dev,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,star_award_winner_year.public.dev,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner_dev,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,star_award_winner_year.public.dev,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner_dev,PROD),name)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "star_award_winner_dev", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": false, + "description": "", + "label": "id", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": true + }, + { + "fieldPath": "name", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [ + "id" + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "star_award_winner_dev.view.lkml", + "looker.model": "data" + }, + "name": "star_award_winner_dev", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner_dev,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Dimension", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Dimension" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file