Skip to content

Commit

Permalink
Merge pull request #137 from Insight-Services-APAC/Feature/issuetags
Browse files Browse the repository at this point in the history
Feature/issuetags
  • Loading branch information
grantkriegerai authored Aug 2, 2024
2 parents f3280d7 + f469698 commit 861df28
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 176 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/Test_Post_Install_TestProj.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install .
dbt_wrapper run-all-local samples/testproj samples/testproj
dbt_wrapper run-all samples/testproj samples/testproj --no-upload-notebooks-via-api --no-auto-run-master-notebook --no-download-metadata --no-auto-execute-metadata-extract
- name: Archive testproj artifacts
uses: actions/upload-artifact@v4
with:
Expand Down
6 changes: 5 additions & 1 deletion dbt_wrapper/generate_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ def GenerateMasterNotebook(project_root, workspaceid, lakehouseid, lakehouse_nam
notebook_file['name'] = file
notebook_file['sort_order'] = matching_node.sort_order
notebook_files.append(notebook_file)


if len(notebook_files) == 0:
print("No notebooks found.Try checking your model configs and model specification args")
exit(1)

# Find the minimum and maximum sort_order
min_sort_order = min(file['sort_order'] for file in notebook_files)
max_sort_order = max(file['sort_order'] for file in notebook_files)
Expand Down
175 changes: 15 additions & 160 deletions dbt_wrapper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,25 @@ def run_all(
help="Use this option to change the default notebook execution timeout setting.",
),
] = 1800
,
select: Annotated[
str,
typer.Option(
help="Use this option to provide a dbt resource selection syntax.Default is ``",
),
] = ""
,
exclude: Annotated[
str,
typer.Option(
help="Use this option to provide a dbt resource exclude syntax.Default is ``",
),
] = ""
):
"""
This command will run all elements of the project. For more granular control you can use the options provided to suppress certain stages or use a different command.
"""


_log_level: LogLevel = LogLevel.from_string(log_level)
wrapper_commands.GetDbtConfigs(dbt_project_dir=dbt_project_dir, dbt_profiles_dir=dbt_profiles_dir)
se: stage_executor = stage_executor(log_level=_log_level, console=console)
Expand All @@ -140,7 +153,7 @@ def run_all(
se.perform_stage(option=download_metadata, action_callables=[wrapper_commands.DownloadMetadata], stage_name="Download Metadata")

if (build_dbt_project):
wrapper_commands.BuildDbtProject(PreInstall=pre_install)
wrapper_commands.BuildDbtProject(PreInstall=pre_install, select=select, exclude=exclude)

action_callables = [
lambda **kwargs: wrapper_commands.GeneratePostDbtScripts(PreInstall=pre_install, notebook_timeout=notebook_timeout, **kwargs),
Expand All @@ -153,163 +166,5 @@ def run_all(
se.perform_stage(option=auto_run_master_notebook, action_callables=[wrapper_commands.RunMasterNotebook], stage_name="Run Master Notebook")


@app.command()
def execute_master_notebook(
dbt_project_dir: Annotated[
str,
typer.Argument(
help="The path to the dbt_project directory. If left blank it will use the current directory"
),
],
dbt_profiles_dir: Annotated[
str,
typer.Argument(
help="The path to the dbt_profile directory. If left blank it will use the users home directory followed by .dbt."
),
] = None,
pre_install: Annotated[
bool,
typer.Option(
help="The option to run the dbt adapter using source code and not the installed package."
),
] = False,
log_level: Annotated[
Optional[str],
typer.Option(
help="The option to set the log level. This controls the verbosity of the output. Allowed values are `DEBUG`, `INFO`, `WARNING`, `ERROR`. Default is `WARNING`.",
),
] = "WARNING",
notebook_timeout: Annotated[
int,
typer.Option(
help="Use this option to change the default notebook execution timeout setting.",
),
] = 1800
):
"""
This command will just execute the final orchestrator notebook in Fabric. Assumes that the notebook has been uploaded.
"""
run_all(
dbt_project_dir=dbt_project_dir,
dbt_profiles_dir=dbt_profiles_dir,
clean_target_dir=False,
generate_pre_dbt_scripts=False,
generate_post_dbt_scripts=False,
auto_execute_metadata_extract=False,
download_metadata=False,
build_dbt_project=False,
pre_install=pre_install,
upload_notebooks_via_api=False,
auto_run_master_notebook=True,
log_level=log_level,
notebook_timeout=notebook_timeout
)


@app.command()
def run_all_local(
dbt_project_dir: Annotated[
str,
typer.Argument(
help="The path to the dbt_project directory. If left blank it will use the current directory"
),
],
dbt_profiles_dir: Annotated[
str,
typer.Argument(
help="The path to the dbt_profile directory. If left blank it will use the users home directory followed by .dbt."
),
] = None,
pre_install: Annotated[
bool,
typer.Option(
help="The option to run the dbt adapter using source code and not the installed package."
),
] = False,
log_level: Annotated[
Optional[str],
typer.Option(
help="The option to set the log level. This controls the verbosity of the output. Allowed values are `DEBUG`, `INFO`, `WARNING`, `ERROR`. Default is `WARNING`.",
),
] = "WARNING",
notebook_timeout: Annotated[
int,
typer.Option(
help="Use this option to change the default notebook execution timeout setting.",
),
] = 1800
):
"""
This command will just execute the final orchestrator notebook in Fabric. Assumes that the notebook has been uploaded.
"""
run_all(
dbt_project_dir=dbt_project_dir,
dbt_profiles_dir=dbt_profiles_dir,
clean_target_dir=True,
generate_pre_dbt_scripts=True,
generate_post_dbt_scripts=True,
auto_execute_metadata_extract=False,
download_metadata=False,
build_dbt_project=True,
pre_install=pre_install,
upload_notebooks_via_api=False,
auto_run_master_notebook=False,
log_level=log_level,
notebook_timeout=notebook_timeout
)


@app.command()
def build_dbt_project(
dbt_project_dir: Annotated[
str,
typer.Argument(
help="The path to the dbt_project directory. If left blank it will use the current directory"
),
],
dbt_profiles_dir: Annotated[
str,
typer.Argument(
help="The path to the dbt_profile directory. If left blank it will use the users home directory followed by .dbt."
),
] = None,
pre_install: Annotated[
bool,
typer.Option(
help="The option to run the dbt adapter using source code and not the installed package."
),
] = False,
log_level: Annotated[
Optional[str],
typer.Option(
help="The option to set the log level. This controls the verbosity of the output. Allowed values are `DEBUG`, `INFO`, `WARNING`, `ERROR`. Default is `WARNING`.",
),
] = "WARNING",
notebook_timeout: Annotated[
int,
typer.Option(
help="Use this option to change the default notebook execution timeout setting.",
),
] = 1800
):
"""
This command will just build the dbt project. It assumes all other stages have been completed.
"""
run_all(
dbt_project_dir=dbt_project_dir,
dbt_profiles_dir=dbt_profiles_dir,
clean_target_dir=True,
generate_pre_dbt_scripts=False,
generate_post_dbt_scripts=False,
auto_execute_metadata_extract=False,
download_metadata=False,
build_dbt_project=True,
pre_install=pre_install,
upload_notebooks_via_api=False,
auto_run_master_notebook=False,
log_level=log_level,
notebook_timeout=notebook_timeout
)

if __name__ == "__main__":
app()
38 changes: 24 additions & 14 deletions dbt_wrapper/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,8 @@ def GetDbtConfigs(self, dbt_project_dir, dbt_profiles_dir=None):
#self.workspaceid = self.config['workspaceid']

def PrintFirstTimeRunningMessage(self):
print('\033[1;33;48m', "It seems like this is the first time you are running this project. Please update the metadata extract json files in the metaextracts directory by performing the following steps:")
print(f"1. Run ./{os.environ['DBT_PROJECT_DIR']}/target/pwsh/upload.ps1")
print("2. Login to the Fabric Portal and navigate to the workspace and lakehouse you are using")
print(f"3. Manually upload the following notebook to your workspace: {os.environ['DBT_PROJECT_DIR']}/target/notebooks/import_{os.environ['DBT_PROJECT_DIR']}_notebook.ipynb. See https://learn.microsoft.com/en-us/fabric/data-engineering/how-to-use-notebook#import-existing-notebooks")
print("4. Open the notebook in the workspace and run all cells. This will upload the generated notebooks to your workspace.")
print(f"5. A new notebook should appear in the workspace called metadata_{os.environ['DBT_PROJECT_DIR']}_extract.ipynb. Open this notebook and run all cells. This will generate the metadata extract json files in the metaextracts directory.")
print(f"6. Run ./{os.environ['DBT_PROJECT_DIR']}/target/pwsh/download.ps1. This will download the metadata extract json files to the metaextracts directory.")
print("7. Re-run this script to generate the model and master notebooks.")
print('\033[1;33;48m', "Error!")
print(f"Directory ./{os.environ['DBT_PROJECT_DIR']}/metaextracts/ does not exist and should have been created automatically.")

def GeneratePreDbtScripts(self, PreInstall, progress: ProgressConsoleWrapper, task_id):
gf.GenerateMetadataExtract(self.dbt_project_dir, self.target_info['workspaceid'], self.target_info['lakehouseid'], self.lakehouse, self.config['name'], progress=progress, task_id=task_id)
Expand Down Expand Up @@ -96,7 +90,7 @@ def AutoUploadNotebooksViaApi(self, progress: ProgressConsoleWrapper, task_id):
dbt_project_dir = os.path.join(curr_dir, self.dbt_project_dir)
self.fa.APIUpsertNotebooks(progress=progress, task_id=task_id, dbt_project_dir=dbt_project_dir, workspace_id=self.target_info['workspaceid'])

def BuildDbtProject(self, PreInstall=False):
def BuildDbtProject(self, PreInstall=False, select="", exclude=""):
print(Panel.fit("[blue]<<<<<<<<<<<<<<<<<<<<<<< Start of dbt build[/blue]"))
# Check if PreInstall is True
if (PreInstall is True):
Expand All @@ -115,12 +109,28 @@ def BuildDbtProject(self, PreInstall=False):
spec = importlib.util.spec_from_file_location("util.name", utilpath)
foo = importlib.util.module_from_spec(spec)
sys.modules["module.name"] = foo
spec.loader.exec_module(foo)
foo.run_dbt(['build'])

spec.loader.exec_module(foo)
Buildarr = ['build']
if (len(select.strip()) > 0):
Buildarr.append('--select')
Buildarr.append(select)
if (len(exclude.strip()) > 0):
Buildarr.append('--exclude')
Buildarr.append(exclude)

foo.run_dbt(Buildarr)

else:
# Call dbt build
result = subprocess.run(["dbt", "build"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# Call dbt build
Buildarr = ['dbt', 'build']
if (len(select.strip()) > 0):
Buildarr.append('--select')
Buildarr.append(select)
if (len(exclude.strip()) > 0):
Buildarr.append('--exclude')
Buildarr.append(exclude)

result = subprocess.run(Buildarr, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# Access the output and error
output = result.stdout.decode('utf-8')
error = result.stderr.decode('utf-8')
Expand Down

0 comments on commit 861df28

Please sign in to comment.