Skip to content

Commit

Permalink
Merge pull request #64 from dbt-labs/fix/update-grain-of-multiple_sou…
Browse files Browse the repository at this point in the history
…rces_joined

Fix/update grain of multiple sources joined
  • Loading branch information
graciegoheen authored Apr 4, 2022
2 parents 3d6205e + e115ec7 commit 1f845ef
Show file tree
Hide file tree
Showing 10 changed files with 120 additions and 58 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ predefine every query or quandary your team might have. So decide as a team wher
### Multiple Sources Joined
#### Model

`fct_multiple_sources_joined` shows each parent/child relationship where a model references more than one source.
`fct_multiple_sources_joined` shows each instance where a model references more than one source.

#### Graph Example

Expand Down Expand Up @@ -235,7 +235,7 @@ This behavior may be observed in the case of a manually defined reference table
### Source Fanout
#### Model

`fct_source_fanout` shows each parent/child relationship where a source is the direct parent of multiple resources in the DAG.
`fct_source_fanout` shows each instance where a source is the direct parent of multiple resources in the DAG.

#### Graph Example

Expand Down
25 changes: 6 additions & 19 deletions integration_tests/seeds/dag/dag_seeds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,14 @@ seeds:
- name: test_fct_multiple_sources_joined
config:
column_types:
PARENT: varchar
PARENT_RESOURCE_TYPE: varchar
CHILD: varchar
CHILD_RESOURCE_TYPE: varchar
DISTANCE: int
SOURCE_PARENTS: varchar
tests:
- dbt_utils.equality:
compare_model: ref('fct_multiple_sources_joined')
compare_columns:
- parent
- parent_resource_type
- child
- child_resource_type
- distance
- source_parents

- name: test_fct_direct_join_to_source
config:
Expand Down Expand Up @@ -51,31 +45,24 @@ seeds:
config:
column_types:
PARENT: varchar
PARENT_RESOURCE_TYPE: varchar
CHILD: varchar
CHILD_RESOURCE_TYPE: varchar
DISTANCE: int
MODEL_CHILDREN: varchar
tests:
- dbt_utils.equality:
compare_model: ref('fct_source_fanout')
compare_columns:
- parent
- parent_resource_type
- child
- child_resource_type
- distance

- model_children
- name: test_fct_model_fanout
config:
column_types:
PARENT: varchar
NUM_OF_LEAF_CHILDREN: int
LEAF_CHILDREN: varchar
tests:
- dbt_utils.equality:
compare_model: ref('fct_model_fanout')
compare_columns:
- parent
- num_of_leaf_children
- leaf_children
- name: test_fct_bending_connections
config:
column_types:
Expand Down
4 changes: 2 additions & 2 deletions integration_tests/seeds/dag/test_fct_model_fanout.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
parent,num_of_leaf_children
fct_model_6,3
parent,leaf_children
fct_model_6,"report_1, report_2, report_3"
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
parent,parent_resource_type,child,child_resource_type,distance
source_1.table_1,source,stg_model_2,model,1
source_1.table_2,source,stg_model_2,model,1
child,source_parents
stg_model_2,"source_1.table_1, source_1.table_2"
8 changes: 3 additions & 5 deletions integration_tests/seeds/dag/test_fct_source_fanout.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
parent,parent_resource_type,child,child_resource_type,distance
source_1.table_1,source,stg_model_2,model,1
source_1.table_1,source,stg_model_1,model,1
source_1.table_2,source,stg_model_2,model,1
source_1.table_2,source,int_model_4,model,1
parent,model_children
source_1.table_2,"int_model_4, stg_model_2"
source_1.table_1,"stg_model_1, stg_model_2"
97 changes: 97 additions & 0 deletions macros/listagg.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
{% macro listagg(measure, delimiter_text="','", order_by_clause=none, limit_num=none) -%}
{{ return(adapter.dispatch('listagg') (measure, delimiter_text, order_by_clause, limit_num)) }}
{%- endmacro %}

{% macro default__listagg(measure, delimiter_text, order_by_clause, limit_num) -%}

{% if limit_num -%}
array_to_string(
array_slice(
array_agg(
{{ measure }}
){% if order_by_clause -%}
within group ({{ order_by_clause }})
{%- endif %}
,0
,{{ limit_num }}
),
{{ delimiter_text }}
)
{%- else %}
listagg(
{{ measure }},
{{ delimiter_text }}
)
{% if order_by_clause -%}
within group ({{ order_by_clause }})
{%- endif %}
{%- endif %}

{%- endmacro %}

{% macro bigquery__listagg(measure, delimiter_text, order_by_clause, limit_num) -%}

string_agg(
{{ measure }},
{{ delimiter_text }}
{% if order_by_clause -%}
{{ order_by_clause }}
{%- endif %}
{% if limit_num -%}
limit {{ limit_num }}
{%- endif %}
)

{%- endmacro %}

{% macro postgres__listagg(measure, delimiter_text, order_by_clause, limit_num) -%}

{% if limit_num -%}
array_to_string(
(array_agg(
{{ measure }}
{% if order_by_clause -%}
{{ order_by_clause }}
{%- endif %}
))[1:{{ limit_num }}],
{{ delimiter_text }}
)
{%- else %}
string_agg(
{{ measure }},
{{ delimiter_text }}
{% if order_by_clause -%}
{{ order_by_clause }}
{%- endif %}
)
{%- endif %}

{%- endmacro %}

{# if there are instances of delimiter_text within your measure, you cannot include a limit_num #}
{% macro redshift__listagg(measure, delimiter_text, order_by_clause, limit_num) -%}

{% if limit_num -%}
{% set delimiter_text_strip = delimiter_text|replace("'","") %}
{% set regex %}'([^{{ delimiter_text_strip }}]+{{ delimiter_text_strip }}){1,{{ limit_num - 1}}}[^{{ delimiter_text_strip }}]+'{% endset %}
regexp_substr(
listagg(
{{ measure }},
{{ delimiter_text }}
)
{% if order_by_clause -%}
within group ({{ order_by_clause }})
{%- endif %}
,{{ regex }}
)
{%- else %}
listagg(
{{ measure }},
{{ delimiter_text }}
)
{% if order_by_clause -%}
within group ({{ order_by_clause }})
{%- endif %}
{%- endif %}

{%- endmacro %}
4 changes: 2 additions & 2 deletions models/dag/dag.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ models:
tests:
- is_empty
- name: fct_multiple_sources_joined
description: "This table shows each parent/child relationship where a model references more than one source."
description: "This table shows each instance where a model references more than one source."
tests:
- is_empty
- name: fct_rejoining_of_upstream_concepts
Expand All @@ -31,7 +31,7 @@ models:
tests:
- is_empty
- name: fct_source_fanout
description: "This table shows each parent/child relationship where a source is the direct parent of multiple resources in the DAG."
description: "This table shows each instance where a source is the direct parent of multiple resources in the DAG."
tests:
- is_empty
- name: fct_staging_dependent_on_marts_or_intermediate
Expand Down
2 changes: 1 addition & 1 deletion models/dag/fct_model_fanout.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ models_without_children as (
model_fanout as (
select
all_dag_relationships.parent,
count(*) as num_of_leaf_children
{{ listagg('all_dag_relationships.child', "', '", 'order by all_dag_relationships.child') }} as leaf_children
from all_dag_relationships
inner join models_without_children
on all_dag_relationships.child = models_without_children.parent
Expand Down
15 changes: 2 additions & 13 deletions models/dag/fct_multiple_sources_joined.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
-- TO DO: maybe switch grain to one row per "issue" similar to fct_rejoining_of_upstream_concepts

-- this model finds cases where a model references more than one source
with direct_source_relationships as (
select
Expand All @@ -12,19 +10,10 @@ with direct_source_relationships as (
multiple_sources_joined as (
select
child,
count(*)
{{ listagg('parent', "', '", 'order by parent') }} as source_parents
from direct_source_relationships
group by 1
having count(*) > 1
),

final as (
select
direct_source_relationships.*
from direct_source_relationships
inner join multiple_sources_joined
on direct_source_relationships.child = multiple_sources_joined.child
order by direct_source_relationships.child
)

select * from final
select * from multiple_sources_joined
14 changes: 3 additions & 11 deletions models/dag/fct_source_fanout.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,16 @@ with direct_source_relationships as (
from {{ ref('int_all_dag_relationships') }}
where distance = 1
and parent_resource_type = 'source'
and child_resource_type = 'model'
),

source_fanout as (
select
parent,
count(*)
{{ listagg('child', "', '", 'order by child') }} as model_children
from direct_source_relationships
group by 1
having count(*) > 1
),

final as (
select
direct_source_relationships.*
from direct_source_relationships
inner join source_fanout
on direct_source_relationships.parent = source_fanout.parent
order by direct_source_relationships.parent
)

select * from final
select * from source_fanout

0 comments on commit 1f845ef

Please sign in to comment.