Merge branch 'development' into dev/isaac-asimov

dbt-labs · Jul 24, 2018 · b28b23c · b28b23c
2 parents a57c0b2 + fa7f507
commit b28b23c
Show file tree

Hide file tree

Showing 34 changed files with 1,480 additions and 129 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,7 +1,24 @@
 [bumpversion]
-current_version = 0.10.1
-commit = True
-tag = True
+current_version = 0.10.2a2
+parse = (?P<major>\d+)
+	\.(?P<minor>\d+)
+	\.(?P<patch>\d+)
+	((?P<prerelease>[a-z]+)(?P<num>\d+))?
+serialize = 
+	{major}.{minor}.{patch}{prerelease}{num}
+	{major}.{minor}.{patch}
+commit = False
+tag = False
+
+[bumpversion:part:prerelease]
+first_value = a
+values = 
+	a
+	b
+	rc
+
+[bumpversion:part:num]
+first_value = 1
 
 [bumpversion:file:setup.py]
 

diff --git a/.gitignore b/.gitignore
@@ -68,3 +68,5 @@ target/
 
 # Sublime Text
 *.sublime-*
+
+.python-version
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,19 +1,56 @@
-## Betsy Ross (Unreleased)
+## dbt 0.10.2 (unreleased, codename: Betsy Ross)
+
+### Overview
+
+This release makes it possible to alias relation names, rounds out support for BigQuery with incremental, archival, and hook support, adds the IAM Auth method for Redshift, and builds the foundation for autogenerated dbt project documentation, to come in the next release.
+
+Additionally, a number of bugs have been fixed including intermittent BigQuery 404 errors, Redshift "table dropped by concurrent query" errors, and a probable fix for Redshift connection timeout issues.
+
+### Contributors
+
+We want to extend a big thank you to our outside contributors for this release! You all are amazing.
+
+- [@danielchalef](https://github.com/danielchalef) ([#818](https://github.com/fishtown-analytics/dbt/pull/818))
+- [@mjumbewu](https://github.com/mjumbewu) ([#796](https://github.com/fishtown-analytics/dbt/pull/796))
+- [@abelsonlive](https://github.com/abelsonlive) ([#800](https://github.com/fishtown-analytics/dbt/pull/800))
+- [@jon-rtr](https://github.com/jon-rtr) ([#800](https://github.com/fishtown-analytics/dbt/pull/800))
+- [@mturzanska](https://github.com/mturzanska) ([#797](https://github.com/fishtown-analytics/dbt/pull/797))
+- [@cpdean](https://github.com/cpdean) ([#780](https://github.com/fishtown-analytics/dbt/pull/780))
+
+### Features
+
+- BigQuery
+  - Support incremental models ([#856](https://github.com/fishtown-analytics/dbt/pull/856))
+  - Support archival ([#856](https://github.com/fishtown-analytics/dbt/pull/856))
+  - Add pre/post hook support ([#836](https://github.com/fishtown-analytics/dbt/pull/836))
+- Redshift: IAM Auth ([#818](https://github.com/fishtown-analytics/dbt/pull/818))
+- Model aliases ([#800](https://github.com/fishtown-analytics/dbt/pull/800))
+- Write JSON manifest file to disk during compilation ([#761](https://github.com/fishtown-analytics/dbt/pull/761))
+- Add forward and backward graph edges to the JSON manifest file ([#762](https://github.com/fishtown-analytics/dbt/pull/762))
+- Add a 'dbt docs generate' command to generate a JSON catalog file ([#774](https://github.com/fishtown-analytics/dbt/pull/774), [#808](https://github.com/fishtown-analytics/dbt/pull/808))
+
+### Bugfixes
+
+- BigQuery: fix concurrent relation loads ([#835](https://github.com/fishtown-analytics/dbt/pull/835))
+- BigQuery: support external relations ([#828](https://github.com/fishtown-analytics/dbt/pull/828))
+- Redshift: set TCP keepalive on connections ([#826](https://github.com/fishtown-analytics/dbt/pull/826))
+- Redshift: fix "table dropped by concurrent query" ([#825](https://github.com/fishtown-analytics/dbt/pull/825))
+- Fix the error handling for profiles.yml validation ([#820](https://github.com/fishtown-analytics/dbt/pull/820))
+- Make the `--threads` parameter actually change the number of threads used ([#819](https://github.com/fishtown-analytics/dbt/pull/819))
+- Ensure that numeric precision of a column is not `None` ([#796](https://github.com/fishtown-analytics/dbt/pull/796))
+- Allow for more complex version comparison ([#797](https://github.com/fishtown-analytics/dbt/pull/797))
 
 ### Changes
 
- - Add pre/post hook support for bigquery ([#836](https://github.com/fishtown-analytics/dbt/pull/836))
- - Improve consistency of BigQuery list_relations, create shortcut for most materializations ([#835](https://github.com/fishtown-analytics/dbt/pull/835))
- - Support external BigQuery relations ([#828](https://github.com/fishtown-analytics/dbt/pull/828))
- - Added tox environments that have the user specify what tests should be run ([#837](https://github.com/fishtown-analytics/dbt/pull/837))
- - Set the TCP keepalive on redshift ([#826](https://github.com/fishtown-analytics/dbt/pull/826))
- - Fix the error handling for profiles.yml validation ([#820](https://github.com/fishtown-analytics/dbt/pull/820))
- - Make the `--threads` parameter actually change the number of threads used ([#819](https://github.com/fishtown-analytics/dbt/pull/819))
- - Use Mapping instead of dict as the base class for APIObject ([#756](https://github.com/fishtown-analytics/dbt/pull/756))
- - Write JSON manifest file to disk during compilation ([#761](https://github.com/fishtown-analytics/dbt/pull/761))
- - Add forward and backward graph edges to the JSON manifest file ([#762](https://github.com/fishtown-analytics/dbt/pull/762))
-- Add a 'dbt docs generate' command to generate a JSON catalog file ([#774](https://github.com/fishtown-analytics/dbt/pull/774))
-- Stop tracking `run_error` in tracking code ([#817](https://github.com/fishtown-analytics/dbt/pull/817))
+- Use a subselect instead of CTE when building incremental models ([#787](https://github.com/fishtown-analytics/dbt/pull/787))
+- Internals
+  - Improved dependency selection, rip out some unused dependencies ([#848](https://github.com/fishtown-analytics/dbt/pull/848))
+  - Stop tracking `run_error` in tracking code ([#817](https://github.com/fishtown-analytics/dbt/pull/817))
+  - Use Mapping instead of dict as the base class for APIObject ([#756](https://github.com/fishtown-analytics/dbt/pull/756))
+  - Split out parsers ([#809](https://github.com/fishtown-analytics/dbt/pull/809))
+  - Fix `__all__` parameter in submodules ([#780](https://github.com/fishtown-analytics/dbt/pull/780))
+  - Switch to CircleCI 2.0 ([#843](https://github.com/fishtown-analytics/dbt/pull/843), [#850](https://github.com/fishtown-analytics/dbt/pull/850))
+  - Added tox environments that have the user specify what tests should be run ([#837](https://github.com/fishtown-analytics/dbt/pull/837))
 
 ## dbt 0.10.1 (May 18, 2018)
 

diff --git a/RELEASE.md b/RELEASE.md
@@ -1,17 +1,23 @@
 ### Release Procedure :shipit:
 
-1. Update changelog
-1. Bumpversion
-1. Merge to master
-  - (on master) git pull origin development
-1. Deploy to pypi
-  - python setup.py sdist upload -r pypi
-1. Deploy to homebrew
+1. Update CHANGELOG.md with the most recent changes
+2. If this is a release candidate, you want to create it off of development. If it's an actual release, you must first merge to master.
+  - `git checkout master`
+  - `git pull origin development`
+3. Bump the version using `bumpversion`:
+  - Dry run first by running `bumpversion --new-version <desired-version> <part>` and checking the diff. If it looks correct, clean up the chanages and move on:
+  - Alpha releases: `bumpversion --commit --tag --new-version 0.10.2a1 num`
+  - Patch releases: `bumpversion --commit --tag --new-version 0.10.2 patch`
+  - Minor releases: `bumpversion --commit --tag --new-version 0.11.0 minor`
+  - Major releases: `bumpversion --commit --tag --new-version 1.0.0 major`
+4. Deploy to pypi
+  - `python setup.py sdist upload -r pypi`
+5. Deploy to homebrew
   - Make a pull request against homebrew-core
-1. Deploy to conda-forge
+6. Deploy to conda-forge
   - Make a pull request against dbt-feedstock
-1. Git release notes (points to changelog)
-1. Post to slack (point to changelog)
+7. Git release notes (points to changelog)
+8. Post to slack (point to changelog)
 
 #### Homebrew Release Process
 

diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py
@@ -16,6 +16,7 @@
 from dbt.logger import GLOBAL_LOGGER as logger
 
 import google.auth
+import google.api_core
 import google.oauth2
 import google.cloud.exceptions
 import google.cloud.bigquery
@@ -30,13 +31,18 @@ class BigQueryAdapter(PostgresAdapter):
         # deprecated -- use versions that take relations instead
         "query_for_existing",
         "execute_model",
+        "create_temporary_table",
         "drop",
         "execute",
         "quote_schema_and_table",
         "make_date_partitioned_table",
         "already_exists",
         "expand_target_column_types",
         "load_dataframe",
+        "get_missing_columns",
+
+        "create_schema",
+        "alter_table_add_columns",
 
         # versions of adapter functions that take / return Relations
         "list_relations",
@@ -183,7 +189,6 @@ def close(cls, connection):
     @classmethod
     def list_relations(cls, profile, project_cfg, schema, model_name=None):
         connection = cls.get_connection(profile, model_name)
-        credentials = connection.get('credentials', {})
         client = connection.get('handle')
 
         bigquery_dataset = cls.get_dataset(
@@ -201,7 +206,12 @@ def list_relations(cls, profile, project_cfg, schema, model_name=None):
             #       won't need to do this
             max_results=100000)
 
-        return [cls.bq_table_to_relation(table) for table in all_tables]
+        # This will 404 if the dataset does not exist. This behavior mirrors
+        # the implementation of list_relations for other adapters
+        try:
+            return [cls.bq_table_to_relation(table) for table in all_tables]
+        except google.api_core.exceptions.NotFound as e:
+            return []
 
     @classmethod
     def get_relation(cls, profile, project_cfg, schema=None, identifier=None,
@@ -357,7 +367,7 @@ def execute_model(cls, profile, project_cfg, model,
         return res
 
     @classmethod
-    def execute(cls, profile, sql, model_name=None, fetch=False, **kwargs):
+    def raw_execute(cls, profile, sql, model_name=None, fetch=False, **kwargs):
         conn = cls.get_connection(profile, model_name)
         client = conn.get('handle')
 
@@ -372,6 +382,53 @@ def execute(cls, profile, sql, model_name=None, fetch=False, **kwargs):
         with cls.exception_handler(profile, sql, model_name):
             iterator = query_job.result()
 
+        return query_job, iterator
+
+    @classmethod
+    def create_temporary_table(cls, profile, project, sql, model_name=None,
+                               **kwargs):
+
+        # BQ queries always return a temp table with their results
+        query_job, _ = cls.raw_execute(profile, sql, model_name)
+        bq_table = query_job.destination
+
+        return cls.Relation.create(
+            project=bq_table.project,
+            schema=bq_table.dataset_id,
+            identifier=bq_table.table_id,
+            quote_policy={
+                'schema': True,
+                'identifier': True
+            },
+            type=BigQueryRelation.Table)
+
+    @classmethod
+    def alter_table_add_columns(cls, profile, project, relation, columns,
+                                model_name=None):
+
+        logger.debug('Adding columns ({}) to table {}".'.format(
+                     columns, relation))
+
+        conn = cls.get_connection(profile, model_name)
+        client = conn.get('handle')
+
+        dataset = cls.get_dataset(profile, project, relation.schema,
+                                  model_name)
+
+        table_ref = dataset.table(relation.name)
+        table = client.get_table(table_ref)
+
+        new_columns = [col.to_bq_schema_object() for col in columns]
+        new_schema = table.schema + new_columns
+
+        new_table = google.cloud.bigquery.Table(table_ref, schema=new_schema)
+        client.update_table(new_table, ['schema'])
+
+    @classmethod
+    def execute(cls, profile, sql, model_name=None, fetch=None, **kwargs):
+        _, iterator = cls.raw_execute(profile, sql, model_name, fetch,
+                                      **kwargs)
+
         if fetch:
             res = cls.get_table_from_response(iterator)
         else:
@@ -410,8 +467,13 @@ def create_schema(cls, profile, project_cfg, schema, model_name=None):
         client = conn.get('handle')
 
         dataset = cls.get_dataset(profile, project_cfg, schema, model_name)
-        with cls.exception_handler(profile, 'create dataset', model_name):
-            client.create_dataset(dataset)
+
+        # Emulate 'create schema if not exists ...'
+        try:
+            client.get_dataset(dataset)
+        except google.api_core.exceptions.NotFound:
+            with cls.exception_handler(profile, 'create dataset', model_name):
+                client.create_dataset(dataset)
 
     @classmethod
     def drop_tables_in_schema(cls, profile, project_cfg, dataset):
@@ -468,8 +530,10 @@ def get_columns_in_table(cls, profile, project_cfg,
 
         columns = []
         for col in table_schema:
+            # BigQuery returns type labels that are not valid type specifiers
+            dtype = cls.Column.translate_type(col.field_type)
             column = cls.Column(
-                col.name, col.field_type, col.fields, col.mode)
+                col.name, dtype, col.fields, col.mode)
             columns.append(column)
 
         return columns

diff --git a/dbt/adapters/default/impl.py b/dbt/adapters/default/impl.py
@@ -33,6 +33,7 @@ class DefaultAdapter(object):
         "get_columns_in_table",
         "get_missing_columns",
         "expand_target_column_types",
+        "create_schema",
 
         # deprecated -- use versions that take relations instead
         "already_exists",
@@ -722,7 +723,7 @@ def already_exists(cls, profile, project_cfg,
 
     @classmethod
     def quote(cls, identifier):
-        return '"{}"'.format(identifier.replace('"', '""'))
+        return '"{}"'.format(identifier)
 
     @classmethod
     def quote_schema_and_table(cls, profile, project_cfg,

diff --git a/dbt/flags.py b/dbt/flags.py
@@ -1,3 +1,11 @@
 STRICT_MODE = False
 NON_DESTRUCTIVE = False
 FULL_REFRESH = False
+
+
+def reset():
+    global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH
+
+    STRICT_MODE = False
+    NON_DESTRUCTIVE = False
+    FULL_REFRESH = False
diff --git a/dbt/include/global_project/macros/adapters/common.sql b/dbt/include/global_project/macros/adapters/common.sql
@@ -29,10 +29,9 @@
 {%- endmacro %}
 
 {% macro create_schema(schema_name) %}
-  create schema if not exists {{ schema_name }};
+  {{ adapter.create_schema(schema_name) }}
 {% endmacro %}
 
-
 {% macro create_table_as(temporary, relation, sql) -%}
   {{ adapter_macro('create_table_as', temporary, relation, sql) }}
 {%- endmacro %}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -68,3 +68,5 @@ target/

		# Sublime Text
		.sublime-

		.python-version