From c5c09d077f97bcac1df32922e4deb65a03709110 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 19 Jul 2018 08:07:54 -0600 Subject: [PATCH 1/6] Add generated_at field to catalog and manifest (#864) --- dbt/contracts/graph/parsed.py | 14 ++++++++++--- dbt/task/generate.py | 1 + dbt/utils.py | 7 +++++++ .../test_docs_generate.py | 11 +++++++++- .../test_late_binding_view.py | 21 +++++++++++++++++++ test/unit/test_manifest.py | 8 ++++++- test/unit/test_parser.py | 2 ++ 7 files changed, 59 insertions(+), 5 deletions(-) create mode 100644 test/integration/034_redshift_test/test_late_binding_view.py diff --git a/dbt/contracts/graph/parsed.py b/dbt/contracts/graph/parsed.py index 4c9eb5457f2..61ae0b03e9d 100644 --- a/dbt/contracts/graph/parsed.py +++ b/dbt/contracts/graph/parsed.py @@ -1,5 +1,5 @@ from dbt.api import APIObject -from dbt.utils import deep_merge +from dbt.utils import deep_merge, timestring from dbt.node_types import NodeType import dbt.clients.jinja @@ -252,6 +252,9 @@ 'properties': { 'nodes': PARSED_NODES_CONTRACT, 'macros': PARSED_MACROS_CONTRACT, + 'generated_at': { + 'type': 'date-time' + } }, 'required': ['nodes', 'macros'], } @@ -333,12 +336,16 @@ def build_edges(nodes): class ParsedManifest(object): """The final result of parsing all macros and nodes in a graph.""" - def __init__(self, nodes, macros): + def __init__(self, nodes, macros, generated_at=None): """The constructor. nodes and macros are dictionaries mapping unique - IDs to ParsedNode and ParsedMacro objects, respectively. + IDs to ParsedNode and ParsedMacro objects, respectively. generated_at + is a text timestamp in RFC 3339 format. """ self.nodes = nodes self.macros = macros + if generated_at is None: + generated_at = timestring() + self.generated_at = generated_at def serialize(self): """Convert the parsed manifest to a nested dict structure that we can @@ -351,6 +358,7 @@ def serialize(self): 'macros': {k: v.serialize() for k, v in self.macros.items()}, 'parent_map': backward_edges, 'child_map': forward_edges, + 'generated_at': self.generated_at, } def _find_by_name(self, name, package, subgraph, nodetype): diff --git a/dbt/task/generate.py b/dbt/task/generate.py index aa1a295c4c0..e63f805eddf 100644 --- a/dbt/task/generate.py +++ b/dbt/task/generate.py @@ -115,6 +115,7 @@ def run(self): for row in results ] results = unflatten(results) + results['generated_at'] = dbt.utils.timestring() path = os.path.join(self.project['target-path'], CATALOG_FILENAME) write_file(path, json.dumps(results)) diff --git a/dbt/utils.py b/dbt/utils.py index b6d24668883..27b71d582d4 100644 --- a/dbt/utils.py +++ b/dbt/utils.py @@ -1,3 +1,4 @@ +from datetime import datetime import os import hashlib import itertools @@ -460,3 +461,9 @@ def filter_null_values(input): def add_ephemeral_model_prefix(s): return '__dbt__CTE__{}'.format(s) + + +def timestring(): + """Get the current datetime as an RFC 3339-compliant string""" + # isoformat doesn't include the mandatory trailing 'Z' for UTC. + return datetime.utcnow().isoformat() + 'Z' diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 91effc026f7..8fc1a497ef2 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -3,6 +3,8 @@ from test.integration.base import DBTIntegrationTest, use_profile +from freezegun import freeze_time + class TestDocsGenerate(DBTIntegrationTest): def setUp(self): @@ -51,6 +53,7 @@ def verify_catalog(self, expected): my_schema_name = self.unique_schema() self.assertIn(my_schema_name, catalog) + self.assertEqual(catalog['generated_at'], '2017-08-16T10:11:12Z') my_schema = catalog[my_schema_name] self.assertEqual(expected, my_schema) @@ -152,6 +155,7 @@ def expected_seeded_manifest(self): 'model.test.model': [], 'seed.test.seed': ['model.test.model'], }, + 'generated_at': '2017-08-16T10:11:12Z', } def verify_manifest(self, expected_manifest): @@ -162,7 +166,7 @@ def verify_manifest(self, expected_manifest): self.assertEqual( set(manifest), - {'nodes', 'macros', 'parent_map', 'child_map'} + {'nodes', 'macros', 'parent_map', 'child_map', 'generated_at'} ) self.verify_manifest_macros(manifest) @@ -172,6 +176,7 @@ def verify_manifest(self, expected_manifest): self.assertEqual(manifest_without_macros, expected_manifest) @use_profile('postgres') + @freeze_time('2017-08-16T10:11:12Z') def test__postgres__run_and_generate(self): self.run_and_generate() my_schema_name = self.unique_schema() @@ -231,6 +236,7 @@ def test__postgres__run_and_generate(self): self.verify_manifest(self.expected_seeded_manifest()) @use_profile('snowflake') + @freeze_time('2017-08-16T10:11:12Z') def test__snowflake__run_and_generate(self): self.run_and_generate() my_schema_name = self.unique_schema() @@ -291,6 +297,7 @@ def test__snowflake__run_and_generate(self): self.verify_manifest(self.expected_seeded_manifest()) @use_profile('bigquery') + @freeze_time('2017-08-16T10:11:12Z') def test__bigquery__run_and_generate(self): self.run_and_generate() my_schema_name = self.unique_schema() @@ -350,6 +357,7 @@ def test__bigquery__run_and_generate(self): self.verify_manifest(self.expected_seeded_manifest()) @use_profile('bigquery') + @freeze_time('2017-08-16T10:11:12Z') def test__bigquery__nested_models(self): self.use_default_project({'source-paths': [self.dir('bq_models')]}) @@ -481,6 +489,7 @@ def test__bigquery__nested_models(self): 'model.test.model': ['model.test.seed'], 'model.test.seed': [] }, + 'generated_at': '2017-08-16T10:11:12Z', } self.verify_manifest(expected_manifest) diff --git a/test/integration/034_redshift_test/test_late_binding_view.py b/test/integration/034_redshift_test/test_late_binding_view.py new file mode 100644 index 00000000000..7af7275632d --- /dev/null +++ b/test/integration/034_redshift_test/test_late_binding_view.py @@ -0,0 +1,21 @@ +import json +import os + +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest + + +class TestLateBindingView(DBTIntegrationTest): + @property + def schema(self): + return 'late_binding_view_033' + + @staticmethod + def dir(path): + return os.path.normpath( + os.path.join('test/integration/033_redshift_test', path) + ) + + @property + def models(self): + return self.dir("models") diff --git a/test/unit/test_manifest.py b/test/unit/test_manifest.py index 60d4440a7ca..b99a910d28c 100644 --- a/test/unit/test_manifest.py +++ b/test/unit/test_manifest.py @@ -5,6 +5,8 @@ import dbt.flags from dbt.contracts.graph.parsed import ParsedNode, ParsedManifest +from dbt.utils import timestring +import freezegun class ManifestTest(unittest.TestCase): def setUp(self): @@ -151,17 +153,21 @@ def setUp(self): ), } + @freezegun.freeze_time('2018-02-14T09:15:13Z') def test__no_nodes(self): manifest = ParsedManifest(nodes={}, macros={}) self.assertEqual( manifest.serialize(), - {'nodes': {}, 'macros': {}, 'parent_map': {}, 'child_map': {}} + {'nodes': {}, 'macros': {}, 'parent_map': {}, 'child_map': {}, + 'generated_at': '2018-02-14T09:15:13Z'} ) + @freezegun.freeze_time('2018-02-14T09:15:13Z') def test__nested_nodes(self): nodes = copy.copy(self.nested_nodes) manifest = ParsedManifest(nodes=nodes, macros={}) serialized = manifest.serialize() + self.assertEqual(serialized['generated_at'], '2018-02-14T09:15:13Z') parent_map = serialized['parent_map'] child_map = serialized['child_map'] # make sure there aren't any extra/missing keys. diff --git a/test/unit/test_parser.py b/test/unit/test_parser.py index 56b9b6407cf..0df20511fef 100644 --- a/test/unit/test_parser.py +++ b/test/unit/test_parser.py @@ -4,6 +4,7 @@ import dbt.flags from dbt.parser import ModelParser, MacroParser, DataTestParser, SchemaParser, ParserUtils +from dbt.utils import timestring from dbt.node_types import NodeType from dbt.contracts.graph.parsed import ParsedManifest, ParsedNode, ParsedMacro @@ -702,6 +703,7 @@ def test__process_refs__packages(self): manifest = ParsedManifest( nodes={k: ParsedNode(**v) for (k,v) in graph['nodes'].items()}, macros={k: ParsedMacro(**v) for (k,v) in graph['macros'].items()}, + generated_at=timestring(), ) processed_manifest = ParserUtils.process_refs(manifest, 'root') From 9ad1dd10bf761a6d02f082c4789ed8a99a4d2efe Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 24 Jul 2018 13:03:55 -0600 Subject: [PATCH 2/6] No freezing time for us --- .../test_docs_generate.py | 38 +++++++++++++------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 8fc1a497ef2..79cfdfce175 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -1,10 +1,9 @@ import json import os +from datetime import datetime, timedelta from test.integration.base import DBTIntegrationTest, use_profile -from freezegun import freeze_time - class TestDocsGenerate(DBTIntegrationTest): def setUp(self): @@ -45,6 +44,25 @@ def run_and_generate(self, extra=None): self.assertEqual(len(self.run_dbt()), 1) self.run_dbt(['docs', 'generate']) + def assertRecent(self, timestr): + """Given a timestring in '%Y-%m-%dT%H:%M:%SZ' format (ISO8601), assert + that it represents a time before now and a time after 24h ago. + + We can't just set the time via freezegun.freeze_time because that + breaks SSL, and a lot of these tests use SSL. + """ + now = datetime.utcnow() + yesterday = now + timedelta(days=-1) + parsed = datetime.strptime(timestr, '%Y-%m-%dT%H:%M:%S.%fZ') + self.assertLess( + yesterday, parsed, + 'parsed date {} happened over 24h ago'.format(parsed) + ) + self.assertGreater( + now, parsed, + 'parsed date {} happened in the future'.format(parsed) + ) + def verify_catalog(self, expected): self.assertTrue(os.path.exists('./target/catalog.json')) @@ -53,7 +71,8 @@ def verify_catalog(self, expected): my_schema_name = self.unique_schema() self.assertIn(my_schema_name, catalog) - self.assertEqual(catalog['generated_at'], '2017-08-16T10:11:12Z') + self.assertIn('generated_at', catalog) + self.assertRecent(catalog.pop('generated_at')) my_schema = catalog[my_schema_name] self.assertEqual(expected, my_schema) @@ -155,7 +174,6 @@ def expected_seeded_manifest(self): 'model.test.model': [], 'seed.test.seed': ['model.test.model'], }, - 'generated_at': '2017-08-16T10:11:12Z', } def verify_manifest(self, expected_manifest): @@ -170,13 +188,13 @@ def verify_manifest(self, expected_manifest): ) self.verify_manifest_macros(manifest) - manifest_without_macros = { - k: v for k, v in manifest.items() if k != 'macros' + manifest_without_extras = { + k: v for k, v in manifest.items() + if k not in {'macros', 'generated_at'} } - self.assertEqual(manifest_without_macros, expected_manifest) + self.assertEqual(manifest_without_extras, expected_manifest) @use_profile('postgres') - @freeze_time('2017-08-16T10:11:12Z') def test__postgres__run_and_generate(self): self.run_and_generate() my_schema_name = self.unique_schema() @@ -236,7 +254,6 @@ def test__postgres__run_and_generate(self): self.verify_manifest(self.expected_seeded_manifest()) @use_profile('snowflake') - @freeze_time('2017-08-16T10:11:12Z') def test__snowflake__run_and_generate(self): self.run_and_generate() my_schema_name = self.unique_schema() @@ -297,7 +314,6 @@ def test__snowflake__run_and_generate(self): self.verify_manifest(self.expected_seeded_manifest()) @use_profile('bigquery') - @freeze_time('2017-08-16T10:11:12Z') def test__bigquery__run_and_generate(self): self.run_and_generate() my_schema_name = self.unique_schema() @@ -357,7 +373,6 @@ def test__bigquery__run_and_generate(self): self.verify_manifest(self.expected_seeded_manifest()) @use_profile('bigquery') - @freeze_time('2017-08-16T10:11:12Z') def test__bigquery__nested_models(self): self.use_default_project({'source-paths': [self.dir('bq_models')]}) @@ -489,7 +504,6 @@ def test__bigquery__nested_models(self): 'model.test.model': ['model.test.seed'], 'model.test.seed': [] }, - 'generated_at': '2017-08-16T10:11:12Z', } self.verify_manifest(expected_manifest) From 2330e67499ca8088d0a1bbed933e247e8b6402be Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 24 Jul 2018 13:12:57 -0600 Subject: [PATCH 3/6] Somehow I re-broke this test that I fixed a while ago --- .../029_docs_generate_tests/test_docs_generate.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 79cfdfce175..4d2e87a9574 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -321,7 +321,7 @@ def test__bigquery__run_and_generate(self): { 'name': 'id', 'index': 1, - 'type': 'INTEGER', + 'type': 'INT64', 'comment': None, }, { @@ -384,31 +384,31 @@ def test__bigquery__nested_models(self): { "name": "field_1", "index": 1, - "type": "INTEGER", + "type": "INT64", "comment": None }, { "name": "field_2", "index": 2, - "type": "INTEGER", + "type": "INT64", "comment": None }, { "name": "field_3", "index": 3, - "type": "INTEGER", + "type": "INT64", "comment": None }, { "name": "nested_field.field_4", "index": 4, - "type": "INTEGER", + "type": "INT64", "comment": None }, { "name": "nested_field.field_5", "index": 5, - "type": "INTEGER", + "type": "INT64", "comment": None } ] From adca49cc9df10767ff78abc7c174bffad8e4cb2d Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 24 Jul 2018 13:24:44 -0600 Subject: [PATCH 4/6] Make the generated_at field mandatory --- dbt/contracts/graph/parsed.py | 4 +--- dbt/loader.py | 4 +++- dbt/task/generate.py | 1 - test/unit/test_manifest.py | 9 ++++++--- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/dbt/contracts/graph/parsed.py b/dbt/contracts/graph/parsed.py index 61ae0b03e9d..c6f60ff090a 100644 --- a/dbt/contracts/graph/parsed.py +++ b/dbt/contracts/graph/parsed.py @@ -336,15 +336,13 @@ def build_edges(nodes): class ParsedManifest(object): """The final result of parsing all macros and nodes in a graph.""" - def __init__(self, nodes, macros, generated_at=None): + def __init__(self, nodes, macros, generated_at): """The constructor. nodes and macros are dictionaries mapping unique IDs to ParsedNode and ParsedMacro objects, respectively. generated_at is a text timestamp in RFC 3339 format. """ self.nodes = nodes self.macros = macros - if generated_at is None: - generated_at = timestring() self.generated_at = generated_at def serialize(self): diff --git a/dbt/loader.py b/dbt/loader.py index c0cc27dd835..5974f12cc6c 100644 --- a/dbt/loader.py +++ b/dbt/loader.py @@ -2,6 +2,7 @@ from dbt.node_types import NodeType from dbt.contracts.graph.parsed import ParsedManifest +from dbt.utils import timestring import dbt.parser @@ -18,7 +19,8 @@ def load_all(cls, root_project, all_projects): for loader in cls._LOADERS: nodes.update(loader.load_all(root_project, all_projects, macros)) - manifest = ParsedManifest(nodes=nodes, macros=macros) + manifest = ParsedManifest(nodes=nodes, macros=macros, + generated_at=timestring()) manifest = dbt.parser.ParserUtils.process_refs(manifest, root_project) return manifest diff --git a/dbt/task/generate.py b/dbt/task/generate.py index e63f805eddf..426c86d4589 100644 --- a/dbt/task/generate.py +++ b/dbt/task/generate.py @@ -1,7 +1,6 @@ import json import os -from dbt.contracts.graph.parsed import ParsedManifest, ParsedNode, ParsedMacro from dbt.adapters.factory import get_adapter from dbt.clients.system import write_file from dbt.compat import bigint diff --git a/test/unit/test_manifest.py b/test/unit/test_manifest.py index b99a910d28c..cc9dd0f5024 100644 --- a/test/unit/test_manifest.py +++ b/test/unit/test_manifest.py @@ -155,7 +155,8 @@ def setUp(self): @freezegun.freeze_time('2018-02-14T09:15:13Z') def test__no_nodes(self): - manifest = ParsedManifest(nodes={}, macros={}) + manifest = ParsedManifest(nodes={}, macros={}, + generated_at=timestring()) self.assertEqual( manifest.serialize(), {'nodes': {}, 'macros': {}, 'parent_map': {}, 'child_map': {}, @@ -165,7 +166,8 @@ def test__no_nodes(self): @freezegun.freeze_time('2018-02-14T09:15:13Z') def test__nested_nodes(self): nodes = copy.copy(self.nested_nodes) - manifest = ParsedManifest(nodes=nodes, macros={}) + manifest = ParsedManifest(nodes=nodes, macros={}, + generated_at=timestring()) serialized = manifest.serialize() self.assertEqual(serialized['generated_at'], '2018-02-14T09:15:13Z') parent_map = serialized['parent_map'] @@ -226,7 +228,8 @@ def test__nested_nodes(self): def test__to_flat_graph(self): nodes = copy.copy(self.nested_nodes) - manifest = ParsedManifest(nodes=nodes, macros={}) + manifest = ParsedManifest(nodes=nodes, macros={}, + generated_at=timestring()) flat_graph = manifest.to_flat_graph() flat_nodes = flat_graph['nodes'] self.assertEqual(set(flat_graph), set(['nodes', 'macros'])) From 5d049b0edead37b1dcc304191ebbb61461e0a5df Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 24 Jul 2018 14:20:37 -0600 Subject: [PATCH 5/6] PR feedback --- dbt/contracts/graph/parsed.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/dbt/contracts/graph/parsed.py b/dbt/contracts/graph/parsed.py index c6f60ff090a..0e5d74bbc7b 100644 --- a/dbt/contracts/graph/parsed.py +++ b/dbt/contracts/graph/parsed.py @@ -242,6 +242,23 @@ }, } + +NODE_EDGE_MAP = { + 'type': 'object', + 'additionalProperties': False, + 'description': 'A map of node relationships', + 'patternProperties': { + '.*': { + 'type': 'array', + 'items': { + 'type': 'string', + 'description': 'A node name', + } + } + } +} + + PARSED_MANIFEST_CONTRACT = { 'type': 'object', 'additionalProperties': False, @@ -253,8 +270,11 @@ 'nodes': PARSED_NODES_CONTRACT, 'macros': PARSED_MACROS_CONTRACT, 'generated_at': { - 'type': 'date-time' - } + 'type': 'string', + 'format': 'date-time', + }, + 'parent_map': NODE_EDGE_MAP, + 'child_map': NODE_EDGE_MAP, }, 'required': ['nodes', 'macros'], } @@ -334,7 +354,8 @@ def build_edges(nodes): return forward_edges, backward_edges -class ParsedManifest(object): +class ParsedManifest(APIObject): + SCHEMA = PARSED_MANIFEST_CONTRACT """The final result of parsing all macros and nodes in a graph.""" def __init__(self, nodes, macros, generated_at): """The constructor. nodes and macros are dictionaries mapping unique @@ -344,6 +365,8 @@ def __init__(self, nodes, macros, generated_at): self.nodes = nodes self.macros = macros self.generated_at = generated_at + self._contents = {} + super(ParsedManifest, self).__init__() def serialize(self): """Convert the parsed manifest to a nested dict structure that we can From 3cb174feb2104471a49d97717f55564adbcad203 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 25 Jul 2018 06:51:23 -0600 Subject: [PATCH 6/6] Handle weird windows clocks --- test/integration/029_docs_generate_tests/test_docs_generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 4d2e87a9574..e7472f9b1ee 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -58,7 +58,7 @@ def assertRecent(self, timestr): yesterday, parsed, 'parsed date {} happened over 24h ago'.format(parsed) ) - self.assertGreater( + self.assertGreaterEqual( now, parsed, 'parsed date {} happened in the future'.format(parsed) )