Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for BigQuery adapter #172

Merged
merged 18 commits into from
Aug 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
dfcf105
feat(gitignore): ignore .python-version file generated by pyenv
charles-astrafy Aug 11, 2022
8a61d11
Merge branch 'main' of github.com:charles-astrafy/dbt_artifacts
charles-astrafy Aug 12, 2022
4a96c83
Merge branch 'main' of github.com:charles-astrafy/dbt_artifacts
charles-astrafy Aug 12, 2022
0d5e732
feat(bigquery): New adapter around the generation of DML SQL and new …
Aug 8, 2022
2ef840c
feat(bigquery): Add bytes_processed to model_executions models
charles-astrafy Aug 12, 2022
2eaa540
feat(bigquery): Add configurations for integration tests for BigQuery
charles-astrafy Aug 12, 2022
0860a43
feat(bigquery): Update README with BigQuery and explanation trick to …
charles-astrafy Aug 12, 2022
66443f5
amend gitignore to ignore python_version file from pyenv
charles-astrafy Aug 12, 2022
15ef765
feat(bigquery): only add 'bytes_processed' column in case the adapter…
charles-astrafy Aug 15, 2022
cb1718e
resolve merge conflict from main origin repo
charles-astrafy Aug 28, 2022
e906195
Merge branch 'main' into main
NiallRees Aug 29, 2022
e618976
Delete .python-version
NiallRees Aug 29, 2022
a77e76f
debug model execution upload macro. Remove column 14 that is only nee…
charles-astrafy Aug 29, 2022
bcddd48
fix sqlfluff linting errors
charles-astrafy Aug 29, 2022
bf1f58c
Update README.md
NiallRees Aug 29, 2022
fa8d211
Update README.md
NiallRees Aug 29, 2022
6482d5f
Delete profiles.yml
NiallRees Aug 29, 2022
a34e8de
Apply suggestions from code review
NiallRees Aug 30, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/ci_test_package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,3 @@ jobs:
title: "SQLFluff Lint"
input: "./annotations.json"


2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ logs/
Pipfile
Pipfile.lock
env.sh

.python_version
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ models:
+schema: your_destination_schema # optional, default is your target database
staging:
+schema: your_destination_schema # optional, default is your target schema
...
```

Note that the model materializations are defined in this package's `dbt_project.yml`, so do not set them in your project.
Expand Down Expand Up @@ -145,6 +146,7 @@ pipx install tox
```
tox -e integration_snowflake # For the Snowflake tests
tox -e integration_databricks # For the Databricks tests
tox -e integration_bigquery # For the BigQuery tests
```

The Spark tests require installing the [ODBC driver](https://www.databricks.com/spark/odbc-drivers-download). On a Mac, DBT_ENV_SPARK_DRIVER_PATH should be set to `/Library/simba/spark/lib/libsparkodbc_sbu.dylib`. Spark tests have not yet been added to the integration tests.
Expand Down
5 changes: 5 additions & 0 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ config-version: 2
require-dbt-version: ">=1.0.0"
profile: "dbt_artifacts"

clean-targets: # folders to be removed by `dbt clean`
- "target"
- "dbt_packages"
- "logs"

models:
dbt_artifacts:
+materialized: view
Expand Down
17 changes: 17 additions & 0 deletions macros/create_exposures_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,23 @@
)
{%- endmacro %}

{% macro bigquery__get_create_exposures_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
run_started_at TIMESTAMP,
name STRING,
type STRING,
owner JSON,
maturity STRING,
path STRING,
description STRING,
url STRING,
package_name STRING,
depends_on_nodes ARRAY<STRING>
)
{%- endmacro %}

{% macro default__get_create_exposures_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
Expand Down
22 changes: 22 additions & 0 deletions macros/create_invocations_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,28 @@
)
{%- endmacro %}

{% macro bigquery__get_create_invocations_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
dbt_version STRING,
project_name STRING,
run_started_at TIMESTAMP,
dbt_command STRING,
full_refresh_flag BOOLEAN,
target_profile_name STRING,
target_name STRING,
target_schema STRING,
target_threads INTEGER,
dbt_cloud_project_id STRING,
dbt_cloud_job_id STRING,
dbt_cloud_run_id STRING,
dbt_cloud_run_reason_category STRING,
dbt_cloud_run_reason STRING,
env_vars JSON,
dbt_vars JSON
)
{%- endmacro %}

{% macro default__get_create_invocations_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
Expand Down
19 changes: 19 additions & 0 deletions macros/create_model_executions_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,25 @@
)
{%- endmacro %}

{% macro bigquery__get_create_model_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
run_started_at TIMESTAMP,
was_full_refresh BOOLEAN,
thread_id STRING,
status STRING,
compile_started_at TIMESTAMP,
query_completed_at TIMESTAMP,
total_node_runtime FLOAT64,
rows_affected INTEGER,
bytes_processed INTEGER,
materialization STRING,
schema STRING,
name STRING
)
{%- endmacro %}

{% macro default__get_create_model_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
Expand Down
16 changes: 16 additions & 0 deletions macros/create_models_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,22 @@
)
{%- endmacro %}

{% macro bigquery__get_create_models_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
run_started_at TIMESTAMP,
database STRING,
schema STRING,
name STRING,
depends_on_nodes ARRAY<STRING>,
package_name STRING,
path STRING,
checksum STRING,
materialization STRING
)
{%- endmacro %}

{% macro default__get_create_models_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
Expand Down
18 changes: 18 additions & 0 deletions macros/create_seed_executions_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,24 @@
)
{%- endmacro %}

{% macro bigquery__get_create_seed_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
run_started_at TIMESTAMP,
was_full_refresh BOOLEAN,
thread_id STRING,
status STRING,
compile_started_at TIMESTAMP,
query_completed_at TIMESTAMP,
total_node_runtime FLOAT64,
rows_affected INTEGER,
materialization STRING,
schema STRING,
name STRING
)
{%- endmacro %}

{% macro default__get_create_seed_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
Expand Down
14 changes: 14 additions & 0 deletions macros/create_seeds_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,20 @@
)
{%- endmacro %}

{% macro bigquery__get_create_seeds_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
run_started_at TIMESTAMP,
database STRING,
schema STRING,
name STRING,
package_name STRING,
path STRING,
checksum STRING
)
{%- endmacro %}

{% macro default__get_create_seeds_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
Expand Down
18 changes: 18 additions & 0 deletions macros/create_snapshot_executions_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,24 @@
)
{%- endmacro %}

{% macro bigquery__get_create_snapshot_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create or replace table {{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
run_started_at TIMESTAMP,
was_full_refresh BOOLEAN,
thread_id STRING,
status STRING,
compile_started_at TIMESTAMP,
query_completed_at TIMESTAMP,
total_node_runtime FLOAT64,
rows_affected INTEGER,
materialization STRING,
schema STRING,
name STRING
)
{%- endmacro %}

{% macro default__get_create_snapshot_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
Expand Down
16 changes: 16 additions & 0 deletions macros/create_snapshots_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,22 @@
)
{%- endmacro %}

{% macro bigquery__get_create_snapshots_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
run_started_at TIMESTAMP,
database STRING,
schema STRING,
name STRING,
depends_on_nodes ARRAY<STRING>,
package_name STRING,
path STRING,
checksum STRING,
strategy STRING
)
{%- endmacro %}

{% macro default__get_create_snapshots_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
Expand Down
16 changes: 16 additions & 0 deletions macros/create_sources_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,22 @@
)
{%- endmacro %}

{% macro bigquery__get_create_sources_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
run_started_at TIMESTAMP,
database STRING,
schema STRING,
source_name STRING,
loader STRING,
name STRING,
identifier STRING,
loaded_at_field STRING,
freshness JSON
)
{%- endmacro %}

{% macro default__get_create_sources_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
Expand Down
16 changes: 16 additions & 0 deletions macros/create_test_executions_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,22 @@
)
{%- endmacro %}

{% macro bigquery__get_create_test_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
run_started_at TIMESTAMP,
was_full_refresh BOOLEAN,
thread_id STRING,
status STRING,
compile_started_at TIMESTAMP,
query_completed_at TIMESTAMP,
total_node_runtime FLOAT64,
rows_affected INTEGER,
failures INTEGER
)
{%- endmacro %}

{% macro default__get_create_test_executions_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
Expand Down
13 changes: 13 additions & 0 deletions macros/create_tests_table_if_not_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,19 @@
)
{%- endmacro %}

{% macro bigquery__get_create_tests_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
node_id STRING,
run_started_at TIMESTAMP,
name STRING,
depends_on_nodes ARRAY<STRING>,
package_name STRING,
test_path STRING,
tags ARRAY<STRING>
)
{%- endmacro %}

{% macro default__get_create_tests_table_if_not_exists_statement(database_name, schema_name, table_name) -%}
create table {{database_name}}.{{schema_name}}.{{table_name}} (
command_invocation_id STRING,
Expand Down
17 changes: 16 additions & 1 deletion macros/insert_into_metadata_table.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
{% macro insert_into_metadata_table(database_name, schema_name, table_name, content) -%}
{{ return(adapter.dispatch('insert_into_metadata_table', 'dbt_artifacts')(database_name, schema_name, table_name, content)) }}
{% if content != "" %}
{{ return(adapter.dispatch('insert_into_metadata_table', 'dbt_artifacts')(database_name, schema_name, table_name, content)) }}
{% endif %}
{%- endmacro %}

{% macro spark__insert_into_metadata_table(database_name, schema_name, table_name, content) -%}
Expand All @@ -19,3 +21,16 @@

{% do run_query(insert_into_table_query) %}
{%- endmacro %}

{% macro bigquery__insert_into_metadata_table(database_name, schema_name, table_name, content) -%}

{% set insert_into_table_query %}
insert into {{database_name}}.{{ schema_name }}.{{ table_name }}
VALUES
Copy link

@adrpino adrpino Aug 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @charles-astrafy I was just playing around with your branch, and I got this query from from the statement that stores the tests (I've generalized the info yo you can play with it in your BQ instance):

insert into project.dbt_artifacts.tests
VALUES

select
  1,
  2,
  3,
  4,
  parse_json(5),
  6,
  7,
  parse_json(8)
from values
(
  '061f741c-c350-4774-9343-c409f0518d48', 
  'test.project.test_name', 
  '2022-08-24 20:46:12.337589+00:00', 
  'test_name', 
  '["model.project.model_name"]', 
  'project', 
  'path/to/model.sql', 
  '[]' 
)

However this query produces an error, saying that

Syntax error: INSERT target cannot have an alias at [2:1]

Is there something I'm doing wrong?

Copy link
Contributor Author

@charles-astrafy charles-astrafy Aug 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adrpino : I think you are doing smth wrong because the DML that insert values in the raw table for BigQuery does not use the syntax
insert into {{database_name}}.{{ schema_name }}.{{ table_name }} select ...... from values
but it uses instead the syntax:
insert into {{database_name}}.{{ schema_name }}.{{ table_name }} VALUES

So it seems you are not running the code with the BigQuery adapter as it is taking the code from the default adapter.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @charles-astrafy thanks!

is there any way in which I have to enforce the BQ adapter? isn't this done by default?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @adrpino ,

The adapter that dbt will use is defined via your profiles.yaml file. You define it in the "type" parameter of the target definition within that profiles.yaml file (see screenshot in attachment).

Let me know if you still have issues.

image

{{ content }}
{% endset %}

{% do run_query(insert_into_table_query) %}

{%- endmacro %}

5 changes: 5 additions & 0 deletions macros/parse_json.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,8 @@
{% macro snowflake__parse_json(field) -%}
parse_json({{ field }})
{%- endmacro %}

{% macro bigquery__parse_json(field) -%}
parse_json({{ field }})
{%- endmacro %}

43 changes: 34 additions & 9 deletions macros/upload_exposures.sql
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
{% macro upload_exposures(graph) -%}
{% set src_dbt_exposures = source('dbt_artifacts', 'exposures') %}
{% set exposures = [] %}
{% for node in graph.exposures.values() %}
{% do exposures.append(node) %}
{% endfor %}
{{ return(adapter.dispatch('get_exposures_dml_sql', 'dbt_artifacts')(exposures)) }}
{%- endmacro %}

{% macro default__get_exposures_dml_sql(exposures) -%}

{% if exposures != [] %}
{% set exposure_values %}
Expand Down Expand Up @@ -39,13 +42,35 @@
{%- if not loop.last %},{%- endif %}
{%- endfor %}
{% endset %}

{{ dbt_artifacts.insert_into_metadata_table(
database_name=src_dbt_exposures.database,
schema_name=src_dbt_exposures.schema,
table_name=src_dbt_exposures.identifier,
content=exposure_values
)
}}
{{ exposure_values }}
{% else %}
{{ return("") }}
{% endif %}
{% endmacro -%}

{% macro bigquery__get_exposures_dml_sql(exposures) -%}
{% if exposures != [] %}
{% set exposure_values %}
{% for exposure in exposures -%}
(
'{{ invocation_id }}', {# command_invocation_id #}
'{{ exposure.unique_id | replace("'","\\'") }}', {# node_id #}
'{{ run_started_at }}', {# run_started_at #}
'{{ exposure.name | replace("'","\\'") }}', {# name #}
'{{ exposure.type }}', {# type #}
parse_json('{{ tojson(exposure.owner) | replace("'","\\'") }}'), {# owner #}
'{{ exposure.maturity }}', {# maturity #}
'{{ exposure.original_file_path | replace('\\', '\\\\') }}', {# path #}
'{{ exposure.description | replace("'","\\'") }}', {# description #}
'{{ exposure.url }}', {# url #}
'{{ exposure.package_name }}', {# package_name #}
{{ tojson(exposure.depends_on.nodes) }} {# depends_on_nodes #}
)
{%- if not loop.last %},{%- endif %}
{%- endfor %}
{% endset %}
{{ exposure_values }}
{% else %}
{{ return("") }}
{% endif %}
{%- endmacro %}
Loading