Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ingest/looker): include project name in model/explore properties #1

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1207,15 +1207,19 @@ def _to_metadata_events( # noqa: C901
dataset_snapshot.aspects.append(browse_paths)
dataset_snapshot.aspects.append(StatusClass(removed=False))

custom_properties = {}
if self.label is not None:
custom_properties["looker.explore.label"] = str(self.label)
if self.source_file is not None:
custom_properties["looker.explore.file"] = str(self.source_file)
custom_properties = {
"project": self.project_name,
"model": self.model_name,
"looker.explore.label": self.label,
"looker.explore.name": self.name,
"looker.explore.file": self.source_file,
}
dataset_props = DatasetPropertiesClass(
name=str(self.label) if self.label else LookerUtil._display_name(self.name),
description=self.description,
customProperties=custom_properties,
customProperties={
k: v for k, v in custom_properties.items() if v is not None
},
)
dataset_props.externalUrl = self._get_url(base_url)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,26 +139,21 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
"""

platform = "looker"
source_config: LookerDashboardSourceConfig
reporter: LookerDashboardSourceReport
user_registry: LookerUserRegistry
reachable_look_registry: Set[
str
] # Keep track of look-id which are reachable from Dashboard

def __init__(self, config: LookerDashboardSourceConfig, ctx: PipelineContext):
super().__init__(config, ctx)
self.source_config = config
self.reporter = LookerDashboardSourceReport()
self.source_config: LookerDashboardSourceConfig = config
self.reporter: LookerDashboardSourceReport = LookerDashboardSourceReport()
self.looker_api: LookerAPI = LookerAPI(self.source_config)
self.user_registry = LookerUserRegistry(self.looker_api)
self.explore_registry = LookerExploreRegistry(
self.user_registry: LookerUserRegistry = LookerUserRegistry(self.looker_api)
self.explore_registry: LookerExploreRegistry = LookerExploreRegistry(
self.looker_api, self.reporter, self.source_config
)
self.reporter._looker_explore_registry = self.explore_registry
self.reporter._looker_api = self.looker_api

self.reachable_look_registry = set()
# Keep track of look-id which are reachable from Dashboard
self.reachable_look_registry: Set[str] = set()

# (model, explore) -> list of charts/looks/dashboards that reference this explore
# The list values are used purely for debugging purposes.
Expand Down Expand Up @@ -868,21 +863,31 @@ def _make_explore_metadata_events(
) -> Iterable[
Union[MetadataChangeEvent, MetadataChangeProposalWrapper, MetadataWorkUnit]
]:
if self.source_config.emit_used_explores_only:
explores_to_fetch = list(self.reachable_explores.keys())
else:
if not self.source_config.emit_used_explores_only:
explores_to_fetch = list(self.list_all_explores())
else:
# We don't keep track of project names for each explore right now.
# Because project names are just used for a custom property, it's
# fine to set them to None.
# TODO: Track project names for each explore.
explores_to_fetch = [
(None, model, explore)
for (model, explore) in self.reachable_explores.keys()
]
explores_to_fetch.sort()

processed_models: List[str] = []

for model, _ in explores_to_fetch:
for project_name, model, _ in explores_to_fetch:
if model not in processed_models:
model_key = gen_model_key(self.source_config, model)
yield from gen_containers(
container_key=model_key,
name=model,
sub_types=[BIContainerSubTypes.LOOKML_MODEL],
extra_properties=(
{"project": project_name} if project_name is not None else None
),
)
yield MetadataChangeProposalWrapper(
entityUrn=model_key.as_urn(),
Expand All @@ -896,7 +901,7 @@ def _make_explore_metadata_events(
self.reporter.total_explores = len(explores_to_fetch)
for future in BackpressureAwareExecutor.map(
self.fetch_one_explore,
((model, explore) for (model, explore) in explores_to_fetch),
((model, explore) for (_project, model, explore) in explores_to_fetch),
max_workers=self.source_config.max_threads,
):
events, explore_id, start_time, end_time = future.result()
Expand All @@ -907,7 +912,7 @@ def _make_explore_metadata_events(
f"Running time of fetch_one_explore for {explore_id}: {(end_time - start_time).total_seconds()}"
)

def list_all_explores(self) -> Iterable[Tuple[str, str]]:
def list_all_explores(self) -> Iterable[Tuple[Optional[str], str, str]]:
# returns a list of (model, explore) tuples

for model in self.looker_api.all_lookml_models():
Expand All @@ -916,7 +921,7 @@ def list_all_explores(self) -> Iterable[Tuple[str, str]]:
for explore in model.explores:
if explore.name is None:
continue
yield (model.name, explore.name)
yield (model.project_name, model.name, explore.name)

def fetch_one_explore(
self, model: str, explore: str
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"description": "lorem ipsum",
"charts": [],
"datasets": [],
"dashboards": [],
"lastModified": {
"created": {
"time": 1586847600000,
Expand Down Expand Up @@ -440,7 +441,10 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"project": "lkml_samples",
"model": "bogus data",
"looker.explore.label": "My Explore View",
"looker.explore.name": "my_view",
"looker.explore.file": "test_source_file.lkml"
},
"externalUrl": "https://looker.company.com/explore/bogus data/my_view",
Expand Down Expand Up @@ -616,7 +620,10 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"project": "lkml_samples",
"model": "data",
"looker.explore.label": "My Explore View",
"looker.explore.name": "my_view",
"looker.explore.file": "test_source_file.lkml"
},
"externalUrl": "https://looker.company.com/explore/data/my_view",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"description": "lorem ipsum",
"charts": [],
"datasets": [],
"dashboards": [],
"lastModified": {
"created": {
"time": 1586847600000,
Expand Down Expand Up @@ -282,7 +283,10 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"project": "lkml_samples",
"model": "data",
"looker.explore.label": "My Explore View",
"looker.explore.name": "my_view",
"looker.explore.file": "test_source_file.lkml"
},
"externalUrl": "https://looker.company.com/explore/data/my_view",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@
"urn:li:chart:(looker,dashboard_elements.2)"
],
"datasets": [],
"dashboards": [],
"lastModified": {
"created": {
"time": 1586847600000,
Expand Down Expand Up @@ -520,7 +521,10 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"project": "looker_hub",
"model": "data",
"looker.explore.label": "My Explore View",
"looker.explore.name": "my_view",
"looker.explore.file": "test_source_file.lkml"
},
"externalUrl": "https://looker.company.com/explore/data/my_view",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@
"urn:li:chart:(looker,dashboard_elements.2)"
],
"datasets": [],
"dashboards": [],
"lastModified": {
"created": {
"time": 1586847600000,
Expand Down Expand Up @@ -520,7 +521,10 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"project": "looker_hub",
"model": "data",
"looker.explore.label": "My Explore View",
"looker.explore.name": "my_view",
"looker.explore.file": "test_source_file.lkml"
},
"externalUrl": "https://looker.company.com/explore/data/my_view",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@
"description": "third",
"charts": [],
"datasets": [],
"dashboards": [],
"lastModified": {
"created": {
"time": 1586847600000,
Expand Down Expand Up @@ -613,7 +614,10 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"project": "lkml_samples",
"model": "data",
"looker.explore.label": "My Explore View",
"looker.explore.name": "my_view",
"looker.explore.file": "test_source_file.lkml"
},
"externalUrl": "https://looker.company.com/explore/data/my_view",
Expand Down
Loading