Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/metadata hash check#61 jm #147

Merged
merged 8 commits into from
Aug 7, 2024
25 changes: 17 additions & 8 deletions dbt/include/fabricsparknb/notebooks/master_notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -181,15 +181,24 @@
" if(h['file'] == file):\n",
" return h['hash']\n",
" return ret\n",
" \n",
"if current_hashes != embedded_hashes:\n",
" for h in embedded_hashes:\n",
" print(\n",
" h['file'] + '\\n \\t Emb Hash: ' + get_hash(h['file'], embedded_hashes) + '\\n \\t Env Hash: ' + get_hash(h['file'], current_hashes)\n",
" )\n",
" print('Warning!: Hashes do not match. Its recommended to re-generate the dbt project using the latest extract of the target environment metadata.')\n",
"\n",
"embedded_hashcheck = {{ notebook_hashcheck }}\n",
"\n",
"##Hashcheck: BYPASS = 0, WARNING = 1, ERROR = 2\n",
"if embedded_hashcheck == 0:\n",
" print('Metadata Hash Check Bypassed')\n",
"else:\n",
" print('Metadata Hashes Match 😏')"
" if current_hashes != embedded_hashes:\n",
" for h in embedded_hashes:\n",
" print(\n",
" h['file'] + '\\n \\t Emb Hash: ' + get_hash(h['file'], embedded_hashes) + '\\n \\t Env Hash: ' + get_hash(h['file'], current_hashes)\n",
" )\n",
" if embedded_hashcheck==1:\n",
" print('Warning!: Hashes do not match. Its recommended to re-generate the dbt project using the latest extract of the target environment metadata.')\n",
" else:\n",
" raise Exception('ERROR, Hashes do not match. Its recommended to re-generate the dbt project using the latest extract of the target environment metadata.')\n",
" else:\n",
" print('Metadata Hashes Match 😏')"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions dbt_wrapper/generate_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


@staticmethod
def GenerateMasterNotebook(project_root, workspaceid, lakehouseid, lakehouse_name, project_name, progress: ProgressConsoleWrapper, task_id, notebook_timeout, max_worker):
def GenerateMasterNotebook(project_root, workspaceid, lakehouseid, lakehouse_name, project_name, progress: ProgressConsoleWrapper, task_id, notebook_timeout, max_worker, notebook_hashcheck):
# Iterate through the notebooks directory and create a list of notebook files
notebook_dir = f'./{project_root}/target/notebooks/'
notebook_files_str = [os.path.splitext(os.path.basename(f))[0] for f in os.listdir(Path(notebook_dir)) if f.endswith('.ipynb') and 'master_notebook' not in f]
Expand Down Expand Up @@ -96,7 +96,7 @@ def GenerateMasterNotebook(project_root, workspaceid, lakehouseid, lakehouse_nam

MetaHashes = Catalog.GetMetaHashes(project_root)
# Render the template with the notebook_file variable
rendered_template = template.render(lakehouse_name=lakehouse_name, hashes=MetaHashes, project_name=project_name, notebook_timeout=notebook_timeout)
rendered_template = template.render(lakehouse_name=lakehouse_name, hashes=MetaHashes, project_name=project_name, notebook_timeout=notebook_timeout,notebook_hashcheck=notebook_hashcheck)

# Parse the rendered template as a notebook
nb = nbf.reads(rendered_template, as_version=4)
Expand Down
23 changes: 23 additions & 0 deletions dbt_wrapper/hashcheck_levels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@

class HashCheckLevel:
BYPASS = 0
WARNING = 1
ERROR = 2

@staticmethod
def from_string(level_str: str):
hashcheck_mapping = {
"BYPASS": HashCheckLevel.BYPASS,
"WARNING": HashCheckLevel.WARNING,
"ERROR": HashCheckLevel.ERROR
}
return hashcheck_mapping.get(str(level_str).upper(), None)

@staticmethod
def to_string(level):
hashcheck_mapping = {
HashCheckLevel.BYPASS: "BYPASS",
HashCheckLevel.WARNING: "WARNING",
HashCheckLevel.ERROR: "ERROR",
}
return hashcheck_mapping.get(level, "Unknown")
22 changes: 20 additions & 2 deletions dbt_wrapper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from dbt_wrapper.wrapper import Commands
from rich import print
from dbt_wrapper.log_levels import LogLevel
from dbt_wrapper.hashcheck_levels import HashCheckLevel
from dbt_wrapper.stage_executor import stage_executor

app = typer.Typer(no_args_is_help=True)
Expand All @@ -21,14 +22,20 @@
if (_log_level is None):
_log_level = LogLevel.WARNING

#JM issues61 adding _hashcheck_level
_hashcheck_level: HashCheckLevel = None
if (_hashcheck_level is None):
_hashcheck_level = HashCheckLevel.BYPASS

def docs_options():
return ["generate", "serve"]


def log_levels():
return ["DEBUG", "INFO", "WARNING", "ERROR"]

#JM issues61 adding _hashcheck_level
def hashcheck_levels():
return ["BYPASS", "WARNING", "ERROR"]

@app.command()
def docs():
Expand Down Expand Up @@ -112,6 +119,13 @@ def run_all(
help="The option to set the log level. This controls the verbosity of the output. Allowed values are `DEBUG`, `INFO`, `WARNING`, `ERROR`. Default is `WARNING`.",
),
] = "WARNING",
#JM issues61 adding _hashcheck_level
hashcheck_level: Annotated[
Optional[str],
typer.Option(
help="The option to set the hash check level. This controls the verbosity of the output. Allowed values are `BYPASS`, `WARNING`, `ERROR`. Default is `BYPASS`.",
),
] = "BYPASS",
notebook_timeout: Annotated[
int,
typer.Option(
Expand All @@ -138,6 +152,9 @@ def run_all(
"""

_log_level: LogLevel = LogLevel.from_string(log_level)
#JM issues61 adding _hashcheck_level
_hashcheck_level: HashCheckLevel = HashCheckLevel.from_string(hashcheck_level)

wrapper_commands.GetDbtConfigs(dbt_project_dir=dbt_project_dir, dbt_profiles_dir=dbt_profiles_dir)
se: stage_executor = stage_executor(log_level=_log_level, console=console)
se.perform_stage(option=clean_target_dir, action_callables=[wrapper_commands.CleanProjectTargetDirectory], stage_name="Clean Target")
Expand All @@ -155,8 +172,9 @@ def run_all(
if (build_dbt_project):
wrapper_commands.BuildDbtProject(PreInstall=pre_install, select=select, exclude=exclude)

#JM issues61 adding _hashcheck_level
action_callables = [
lambda **kwargs: wrapper_commands.GeneratePostDbtScripts(PreInstall=pre_install, notebook_timeout=notebook_timeout, **kwargs),
lambda **kwargs: wrapper_commands.GeneratePostDbtScripts(PreInstall=pre_install, notebook_timeout=notebook_timeout, notebook_hashcheck=_hashcheck_level, **kwargs),
lambda **kwargs: wrapper_commands.ConvertNotebooksToFabricFormat(**kwargs)
]
se.perform_stage(option=generate_post_dbt_scripts, action_callables=action_callables, stage_name="Generate Post-DBT Scripts")
Expand Down
4 changes: 2 additions & 2 deletions dbt_wrapper/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ def GeneratePreDbtScripts(self, PreInstall, progress: ProgressConsoleWrapper, ta

gf.GenerateAzCopyScripts(self.dbt_project_dir, self.target_info['workspaceid'], self.target_info['lakehouseid'], progress=progress, task_id=task_id)

def GeneratePostDbtScripts(self, PreInstall=False, progress=None, task_id=None, notebook_timeout=None):
def GeneratePostDbtScripts(self, PreInstall=False, progress=None, task_id=None, notebook_timeout=None, notebook_hashcheck=None):
gf.SetSqlVariableForAllNotebooks(self.dbt_project_dir, self.lakehouse, progress=progress, task_id=task_id)
gf.GenerateMasterNotebook(self.dbt_project_dir, self.target_info['workspaceid'], self.target_info['lakehouseid'], self.lakehouse, self.config['name'], progress=progress, task_id=task_id, notebook_timeout=notebook_timeout,max_worker = self.target_info['threads'])
gf.GenerateMasterNotebook(self.dbt_project_dir, self.target_info['workspaceid'], self.target_info['lakehouseid'], self.lakehouse, self.config['name'], progress=progress, task_id=task_id, notebook_timeout=notebook_timeout,max_worker = self.target_info['threads'], notebook_hashcheck=notebook_hashcheck)

def ConvertNotebooksToFabricFormat(self, progress: ProgressConsoleWrapper, task_id=None):
curr_dir = os.getcwd()
Expand Down
Loading