Skip to content

Commit

Permalink
feat(ingestion/lookml): resolve access notation for LookML Constant (d…
Browse files Browse the repository at this point in the history
…atahub-project#12277)

Co-authored-by: Siddique Bagwan <[email protected]>
Co-authored-by: Mayuri Nehate <[email protected]>
  • Loading branch information
3 people authored Jan 28, 2025
1 parent d8ac6cd commit 563656c
Show file tree
Hide file tree
Showing 14 changed files with 962 additions and 52 deletions.
14 changes: 14 additions & 0 deletions metadata-ingestion/docs/sources/looker/looker_recipe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,18 @@ source:
client_id: ${LOOKER_CLIENT_ID}
client_secret: ${LOOKER_CLIENT_SECRET}

# Liquid variables
# liquid_variables:
# _user_attributes:
# looker_env: "dev"
# dev_database_prefix: "employee"
# dev_schema_prefix: "public"
# dw_eff_dt_date:
# _is_selected: true
# source_region: "ap-south-1"
# db: "test-db"

# LookML Constants
# lookml_constants:
# star_award_winner_year: "public.winner_2025"
# sink configs
54 changes: 46 additions & 8 deletions metadata-ingestion/docs/sources/looker/lookml_post.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,49 @@
#### Configuration Notes

1. If a view contains a liquid template (e.g. `sql_table_name: {{ user_attributes['db']}}.kafka_streaming.events }}`, with `db=ANALYTICS_PROD`), then you will need to specify the values of those variables in the `liquid_variable` config as shown below:
```yml
liquid_variable:
user_attributes:
db: ANALYTICS_PROD
```
### Configuration Notes

1. Handling Liquid Templates

If a view contains a liquid template, for example:

```
sql_table_name: {{ user_attributes['db'] }}.kafka_streaming.events
```

where `db=ANALYTICS_PROD`, you need to specify the values of those variables in the liquid_variables configuration as shown below:

```yml
liquid_variables:
user_attributes:
db: ANALYTICS_PROD
```
2. Resolving LookML Constants
If a view contains a LookML constant, for example:
```
sql_table_name: @{db}.kafka_streaming.events;
```
Ingestion attempts to resolve it's value by looking at project manifest files
```yml
manifest.lkml
constant: db {
value: "ANALYTICS_PROD"
}
```

- If the constant's value is not resolved or incorrectly resolved, you can specify `lookml_constants` configuration in ingestion recipe as shown below. The constant value in recipe takes precedence over constant values resolved from manifest.

```yml
lookml_constants:
db: ANALYTICS_PROD
```


**Additional Notes**

Although liquid variables and LookML constants can be used anywhere in LookML code, their values are currently resolved only for LookML views by DataHub LookML ingestion. This behavior is sufficient since LookML ingestion processes only views and their upstream dependencies.

### Multi-Project LookML (Advanced)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ class LookerField:
sql: Optional[str]


@dataclass
class LookerConstant:
name: str
value: str


@dataclass
class LookerModel:
connection: str
Expand Down Expand Up @@ -75,6 +81,7 @@ def from_looker_dict(
try:
parsed = load_and_preprocess_file(
path=included_file,
reporter=reporter,
source_config=source_config,
)
included_explores = parsed.get("explores", [])
Expand Down Expand Up @@ -217,6 +224,7 @@ def resolve_includes(
try:
parsed = load_and_preprocess_file(
path=included_file,
reporter=reporter,
source_config=source_config,
)
seen_so_far.add(included_file)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
from typing import Dict, Optional

from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition
from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile
from datahub.ingestion.source.looker.looker_dataclasses import (
LookerConstant,
LookerViewFile,
)
from datahub.ingestion.source.looker.looker_template_language import (
load_and_preprocess_file,
)
Expand All @@ -30,12 +33,14 @@ def __init__(
base_projects_folder: Dict[str, pathlib.Path],
reporter: LookMLSourceReport,
source_config: LookMLSourceConfig,
manifest_constants: Dict[str, LookerConstant] = {},
) -> None:
self.viewfile_cache: Dict[str, Optional[LookerViewFile]] = {}
self._root_project_name = root_project_name
self._base_projects_folder = base_projects_folder
self.reporter = reporter
self.source_config = source_config
self.manifest_constants = manifest_constants

def _load_viewfile(
self, project_name: str, path: str, reporter: LookMLSourceReport
Expand Down Expand Up @@ -71,9 +76,15 @@ def _load_viewfile(
try:
logger.debug(f"Loading viewfile {path}")

# load_and preprocess_file is called multiple times for loading view file from multiple flows.
# Flag resolve_constants is a hack to avoid passing around manifest_constants from all of the flows.
# This is fine as rest of flows do not need resolution of constants.
parsed = load_and_preprocess_file(
path=path,
reporter=self.reporter,
source_config=self.source_config,
resolve_constants=True,
manifest_constants=self.manifest_constants,
)

looker_viewfile = LookerViewFile.from_looker_dict(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pathlib
import re
from abc import ABC, abstractmethod
from typing import Any, ClassVar, Dict, List, Optional, Set, Union
from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Set, Union

from deepmerge import always_merger
from liquid import Undefined
Expand All @@ -27,8 +27,12 @@
from datahub.ingestion.source.looker.lookml_config import (
DERIVED_VIEW_PATTERN,
LookMLSourceConfig,
LookMLSourceReport,
)

if TYPE_CHECKING:
from datahub.ingestion.source.looker.looker_dataclasses import LookerConstant

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -82,7 +86,12 @@ def liquid_variable_with_default(self, text: str) -> dict:
return self._create_new_liquid_variables_with_default(variables=variables)


def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str:
def resolve_liquid_variable(
text: str,
view_name: str,
liquid_variable: Dict[Any, Any],
report: LookMLSourceReport,
) -> str:
# Set variable value to NULL if not present in liquid_variable dictionary
Undefined.__str__ = lambda instance: "NULL" # type: ignore
try:
Expand All @@ -96,13 +105,15 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str:
# Resolve liquid template
return create_template(text).render(liquid_variable)
except LiquidSyntaxError as e:
# TODO: Will add warning once we get rid of duplcate warning message for same view
logger.warning(f"Unsupported liquid template encountered. error [{e.message}]")
# TODO: There are some tag specific to looker and python-liquid library does not understand them. currently
# we are not parsing such liquid template.
#
# See doc: https://cloud.google.com/looker/docs/templated-filters and look for { % condition region %}
# order.region { % endcondition %}
except CustomTagException as e:
# TODO: Will add warning once we get rid of duplcate warning message for same view
logger.warning(e)
logger.debug(e, exc_info=e)

Expand Down Expand Up @@ -192,15 +203,20 @@ class LookMLViewTransformer(ABC):

source_config: LookMLSourceConfig

def __init__(self, source_config: LookMLSourceConfig):
def __init__(
self,
source_config: LookMLSourceConfig,
reporter: LookMLSourceReport,
):
self.source_config = source_config
self.reporter = reporter

def transform(self, view: dict) -> dict:
value_to_transform: Optional[str] = None

# is_attribute_supported check is required because not all transformer works on all attributes in current
# case mostly all transformer works on sql_table_name and derived.sql attributes,
# however IncompleteSqlTransformer only transform the derived.sql attribute
# is_attribute_supported check is required because not all transformers work on all attributes in the current
# case, mostly all transformers work on sql_table_name and derived.sql attributes;
# however, IncompleteSqlTransformer only transform the derived.sql attribute
if SQL_TABLE_NAME in view and self.is_attribute_supported(SQL_TABLE_NAME):
# Give precedence to already processed transformed view.sql_table_name to apply more transformation
value_to_transform = view.get(
Expand Down Expand Up @@ -252,7 +268,9 @@ class LiquidVariableTransformer(LookMLViewTransformer):
def _apply_transformation(self, value: str, view: dict) -> str:
return resolve_liquid_variable(
text=value,
liquid_variable=self.source_config.liquid_variable,
liquid_variable=self.source_config.liquid_variables,
view_name=view["name"],
report=self.reporter,
)


Expand Down Expand Up @@ -287,7 +305,7 @@ def _apply_transformation(self, value: str, view: dict) -> str:

class DropDerivedViewPatternTransformer(LookMLViewTransformer):
"""
drop ${} from datahub_transformed_sql_table_name and view["derived_table"]["datahub_transformed_sql_table_name"] values.
drop ${} from datahub_transformed_sql_table_name and view["derived_table"]["datahub_transformed_sql_table_name"] values.
Example: transform ${employee_income_source.SQL_TABLE_NAME} to employee_income_source.SQL_TABLE_NAME
"""
Expand All @@ -308,8 +326,8 @@ class LookMlIfCommentTransformer(LookMLViewTransformer):
evaluate_to_true_regx: str
remove_if_comment_line_regx: str

def __init__(self, source_config: LookMLSourceConfig):
super().__init__(source_config=source_config)
def __init__(self, source_config: LookMLSourceConfig, reporter: LookMLSourceReport):
super().__init__(source_config=source_config, reporter=reporter)

# This regx will keep whatever after -- if looker_environment --
self.evaluate_to_true_regx = r"-- if {} --".format(
Expand All @@ -335,6 +353,61 @@ def _apply_transformation(self, value: str, view: dict) -> str:
return self._apply_regx(value)


class LookmlConstantTransformer(LookMLViewTransformer):
"""
Replace LookML constants @{constant} from the manifest/configuration.
"""

CONSTANT_PATTERN = r"@{(\w+)}" # Matches @{constant}

def __init__(
self,
source_config: LookMLSourceConfig,
reporter: LookMLSourceReport,
manifest_constants: Dict[str, "LookerConstant"],
):
super().__init__(source_config=source_config, reporter=reporter)
self.manifest_constants = manifest_constants

def resolve_lookml_constant(self, text: str, view_name: Optional[str]) -> str:
"""
Resolves LookML constants (@{ }) from manifest or config.
Logs warnings for misplaced or missing variables.
"""

def replace_constants(match):
key = match.group(1)
# Resolve constant from config
if key in self.source_config.lookml_constants:
return str(self.source_config.lookml_constants.get(key))

# Resolve constant from manifest
if key in self.manifest_constants:
return self.manifest_constants[key].value

# Check if it's a misplaced lookml constant
if key in self.source_config.liquid_variables:
self.reporter.warning(
title="Misplaced lookml constant",
message="Use 'lookml_constants' instead of 'liquid_variables'.",
context=f"Key {key}",
)
return f"@{{{key}}}"

self.reporter.warning(
title="LookML constant not found",
message="The constant is missing. Either add it under 'lookml_constants' in the config or define it in `manifest.lkml`.",
context=f"view-name: {view_name}, constant: {key}",
)
return f"@{{{key}}}"

# Resolve @{} (constant)
return re.sub(self.CONSTANT_PATTERN, replace_constants, text)

def _apply_transformation(self, value: str, view: dict) -> str:
return self.resolve_lookml_constant(text=value, view_name=view.get("name"))


class TransformedLookMlView:
"""
TransformedLookMlView is collecting output of LookMLViewTransformer and creating a new transformed LookML view.
Expand Down Expand Up @@ -390,24 +463,35 @@ def view(self) -> dict:
def process_lookml_template_language(
source_config: LookMLSourceConfig,
view_lkml_file_dict: dict,
reporter: LookMLSourceReport,
manifest_constants: Dict[str, "LookerConstant"] = {},
resolve_constants: bool = False,
) -> None:
if "views" not in view_lkml_file_dict:
return

transformers: List[LookMLViewTransformer] = [
LookMlIfCommentTransformer(
source_config=source_config
source_config=source_config, reporter=reporter
), # First evaluate the -- if -- comments. Looker does the same
LiquidVariableTransformer(
source_config=source_config
source_config=source_config, reporter=reporter
), # Now resolve liquid variables
DropDerivedViewPatternTransformer(
source_config=source_config
source_config=source_config, reporter=reporter
), # Remove any ${} symbol
IncompleteSqlTransformer(
source_config=source_config
source_config=source_config, reporter=reporter
), # complete any incomplete sql
]
if resolve_constants:
transformers.append(
LookmlConstantTransformer(
source_config=source_config,
manifest_constants=manifest_constants,
reporter=reporter,
), # Resolve @{} constant with its corresponding value
)

transformed_views: List[dict] = []

Expand All @@ -422,12 +506,18 @@ def process_lookml_template_language(
def load_and_preprocess_file(
path: Union[str, pathlib.Path],
source_config: LookMLSourceConfig,
reporter: LookMLSourceReport,
manifest_constants: Dict[str, "LookerConstant"] = {},
resolve_constants: bool = False,
) -> dict:
parsed = load_lkml(path)

process_lookml_template_language(
view_lkml_file_dict=parsed,
reporter=reporter,
source_config=source_config,
manifest_constants=manifest_constants,
resolve_constants=resolve_constants,
)

return parsed
Loading

0 comments on commit 563656c

Please sign in to comment.