diff --git a/dbt/adapters/fabricsparknb/fabric_spark_credentials.py b/dbt/adapters/fabricsparknb/fabric_spark_credentials.py
index 9985bb5..5189a8f 100644
--- a/dbt/adapters/fabricsparknb/fabric_spark_credentials.py
+++ b/dbt/adapters/fabricsparknb/fabric_spark_credentials.py
@@ -9,6 +9,7 @@ class SparkCredentials(Credentials):
method: str = "livy"
workspaceid: str = None
database: Optional[str] = None
+ log_lakehouse: Optional[str] = None
lakehouse: str = None
lakehouseid: str = None # type: ignore
endpoint: Optional[str] = "https://msitapi.fabric.microsoft.com/v1"
diff --git a/dbt/include/fabricsparknb/notebooks/master_notebook.ipynb b/dbt/include/fabricsparknb/notebooks/master_notebook.ipynb
index 4574f33..a99583c 100644
--- a/dbt/include/fabricsparknb/notebooks/master_notebook.ipynb
+++ b/dbt/include/fabricsparknb/notebooks/master_notebook.ipynb
@@ -50,7 +50,7 @@
"import pandas as pd # type: ignore\n",
"from tabulate import tabulate # type: ignore\n",
"import json\n",
- "from pyspark.sql.functions import *\n",
+ "from pyspark.sql.functions import * # type: ignore\n",
"import os\n",
"import uuid"
]
@@ -68,7 +68,8 @@
"metadata": {},
"outputs": [],
"source": [
- "gv_lakehouse = '{{lakehouse_name}}'"
+ "gv_lakehouse = '{{lakehouse_name}}'\n",
+ "gv_log_lakehouse = '{{log_lakehouse}}'"
]
},
{
@@ -144,8 +145,8 @@
"\n",
" return all_files\n",
"\n",
- "def call_child_notebook(notebook, batch_id):\n",
- " mssparkutils.notebook.run(notebook, {{ notebook_timeout }},{\"pm_batch_id\": batch_id})"
+ "def call_child_notebook(notebook, batch_id, master_notebook):\n",
+ " mssparkutils.notebook.run(notebook, {{ notebook_timeout }},{\"pm_batch_id\": batch_id, \"pm_master_notebook\": master_notebook}) # type: ignore"
]
},
{
@@ -171,7 +172,7 @@
"metadata": {},
"outputs": [],
"source": [
- "embedded_hashes = {{ hashes }}\n",
+ "embedded_hashes = {{ hashes }} # type: ignore\n",
"RelativePathForMetaData = \"Files/MetaExtracts/\"\n",
"current_hashes = json.loads(get_file_content_using_notebookutils(RelativePathForMetaData + 'MetaHashes.json'))\n",
"\n",
@@ -182,7 +183,7 @@
" return h['hash']\n",
" return ret\n",
"\n",
- "embedded_hashcheck = {{ notebook_hashcheck }}\n",
+ "embedded_hashcheck = {{ notebook_hashcheck }} # type: ignore\n",
"\n",
"##Hashcheck: BYPASS = 0, WARNING = 1, ERROR = 2\n",
"if embedded_hashcheck == 0:\n",
@@ -212,7 +213,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Create Tables"
+ "## Create or Alter Tables"
]
},
{
@@ -222,14 +223,15 @@
"outputs": [],
"source": [
"sql = f'''\n",
- "CREATE TABLE IF NOT EXISTS {gv_lakehouse}.execution_log (\n",
+ "CREATE TABLE IF NOT EXISTS {gv_log_lakehouse}.execution_log (\n",
" notebook STRING,\n",
" start_time DOUBLE,\n",
" status STRING,\n",
" error STRING,\n",
" execution_time DOUBLE,\n",
" run_order INT,\n",
- " batch_id string \n",
+ " batch_id string,\n",
+ " master_notebook STRING \n",
")\n",
"USING DELTA\n",
"'''\n",
@@ -244,10 +246,11 @@
"outputs": [],
"source": [
"sql = f'''\n",
- "CREATE TABLE IF NOT EXISTS {gv_lakehouse}.batch (\n",
+ "CREATE TABLE IF NOT EXISTS {gv_log_lakehouse}.batch (\n",
" batch_id STRING,\n",
" start_time LONG,\n",
- " status STRING\n",
+ " status STRING,\n",
+ " master_notebook STRING\n",
")\n",
"USING DELTA\n",
"'''\n",
@@ -255,6 +258,46 @@
"spark.sql(sql) # type: ignore"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Check if the master_notebook column exists in the batch table\n",
+ "schema_check_sql = f\"DESCRIBE {gv_log_lakehouse}.execution_log\"\n",
+ "schema_check_df = spark.sql(schema_check_sql) # type: ignore\n",
+ "\n",
+ "# Check if the master_notebook column exists in the schema\n",
+ "if 'master_notebook' not in [row['col_name'] for row in schema_check_df.collect()]:\n",
+ " # Add the master_notebook column to the table\n",
+ " alter_table_sql = f'''\n",
+ " ALTER TABLE {gv_log_lakehouse}.execution_log\n",
+ " ADD COLUMN master_notebook STRING\n",
+ " '''\n",
+ " spark.sql(alter_table_sql) # type: ignore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Check if the master_notebook column exists in the batch table\n",
+ "schema_check_sql = f\"DESCRIBE {gv_log_lakehouse}.batch\"\n",
+ "schema_check_df = spark.sql(schema_check_sql) # type: ignore\n",
+ "\n",
+ "# Check if the master_notebook column exists in the schema\n",
+ "if 'master_notebook' not in [row['col_name'] for row in schema_check_df.collect()]:\n",
+ " # Add the master_notebook column to the table\n",
+ " alter_table_sql = f'''\n",
+ " ALTER TABLE {gv_log_lakehouse}.batch\n",
+ " ADD COLUMN master_notebook STRING\n",
+ " '''\n",
+ " spark.sql(alter_table_sql) # type: ignore"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -270,28 +313,29 @@
"source": [
"\n",
"\n",
- "def close_batch(batch_id, status):\n",
+ "def close_batch(batch_id, master_notebook, status):\n",
" sql = f'''\n",
- " UPDATE {gv_lakehouse}.batch\n",
+ " UPDATE {gv_log_lakehouse}.batch\n",
" SET status = '{status}'\n",
- " WHERE batch_id = '{str(batch_id)}' '''\n",
+ " WHERE batch_id = '{str(batch_id)}' \n",
+ " AND master_notebook = '{str(master_notebook)}' '''\n",
"\n",
- " spark.sql(sql)\n",
+ " spark.sql(sql) # type: ignore\n",
"\n",
- "def get_open_batch():\n",
+ "def get_open_batch(master_notebook):\n",
" sql = f'''\n",
- " SELECT MAX(batch_id) AS LatestBatchID FROM {gv_lakehouse}.batch WHERE status = 'open'\n",
+ " SELECT MAX(batch_id) AS LatestBatchID FROM {gv_log_lakehouse}.batch WHERE status = 'open' AND master_notebook = '{str(master_notebook)}'\n",
" '''\n",
"\n",
- " return spark.sql(sql).collect()[0]['LatestBatchID']\n",
+ " return spark.sql(sql).collect()[0]['LatestBatchID'] # type: ignore\n",
"\n",
- "def insert_new_batch(batch_id):\n",
+ "def insert_new_batch(batch_id, master_notebook):\n",
" sql = f'''\n",
- " INSERT INTO {gv_lakehouse}.batch\n",
- " SELECT '{batch_id}' AS batch_id, UNIX_TIMESTAMP() AS start_time, 'open' AS status\n",
+ " INSERT INTO {gv_log_lakehouse}.batch\n",
+ " SELECT '{batch_id}' AS batch_id, UNIX_TIMESTAMP() AS start_time, 'open' AS status, '{str(master_notebook)}' AS master_notebook\n",
" '''\n",
"\n",
- " spark.sql(sql)"
+ " spark.sql(sql) # type: ignore"
]
},
{
@@ -308,7 +352,8 @@
"outputs": [],
"source": [
"new_batch_id = str(uuid.uuid4())\n",
- "insert_new_batch(new_batch_id)"
+ "master_notebook = mssparkutils.runtime.context.get('currentNotebookName')\n",
+ "insert_new_batch(new_batch_id, master_notebook) # type: ignore"
]
},
{
@@ -332,10 +377,10 @@
"outputs": [],
"source": [
"# Read the log for this batch execution\n",
- "df_execution_log = spark.sql(f\"SELECT * FROM {gv_lakehouse}.execution_log WHERE batch_id = '{new_batch_id}'\")\n",
+ "df_execution_log = spark.sql(f\"SELECT * FROM {gv_log_lakehouse}.execution_log WHERE batch_id = '{new_batch_id}' AND master_notebook = '{master_notebook}'\") # type: ignore\n",
"# Check if any have not succeeded\n",
- "failed_results = df_execution_log.filter(col(\"status\") != \"success\")\n",
- "succeeded_results = df_execution_log.filter(col(\"status\") == \"success\")\n",
+ "failed_results = df_execution_log.filter(col(\"status\") != \"success\") # type: ignore\n",
+ "succeeded_results = df_execution_log.filter(col(\"status\") == \"success\") # type: ignore\n",
"\n",
"if failed_results.count() == 0: \n",
" print(\"Batch Succeeded\")\n",
@@ -344,7 +389,7 @@
" print(\"Batch Failed\")\n",
" display(failed_results)\n",
"\n",
- "close_batch(new_batch_id, 'closed')\n"
+ "close_batch(new_batch_id, master_notebook, 'closed') # type: ignore\n"
]
}
],
diff --git a/dbt/include/fabricsparknb/notebooks/master_notebook_x.ipynb b/dbt/include/fabricsparknb/notebooks/master_notebook_x.ipynb
index eca9f5b..4091a1f 100644
--- a/dbt/include/fabricsparknb/notebooks/master_notebook_x.ipynb
+++ b/dbt/include/fabricsparknb/notebooks/master_notebook_x.ipynb
@@ -17,7 +17,8 @@
},
"outputs": [],
"source": [
- "pm_batch_id = None"
+ "pm_batch_id = None\n",
+ "pm_master_notebook = None"
]
},
{
@@ -64,8 +65,8 @@
"import time\n",
"import jsonpickle # type: ignore\n",
"import json\n",
- "from pyspark.sql.types import *\n",
- "from pyspark.sql.functions import *\n",
+ "from pyspark.sql.types import * # type: ignore\n",
+ "from pyspark.sql.functions import * # type: ignore\n",
"import os"
]
},
@@ -82,17 +83,17 @@
"metadata": {},
"outputs": [],
"source": [
- "notebook_files1 = {{ notebook_files }}\n",
- "run_order1 = {{ run_order }}\n",
+ "notebook_files1 = {{ notebook_files }} # type: ignore\n",
+ "run_order1 = {{ run_order }} # type: ignore\n",
"\n",
"# Define a function to execute a notebook and return the results\n",
"@dataclass\n",
"class NotebookResult: \n",
" notebook: str\n",
- " start_time: float\n",
+ " start_time: int\n",
" status: str\n",
" error: str\n",
- " execution_time: float\n",
+ " execution_time: int\n",
" run_order: int\n",
"\n",
"def execute_notebook(notebook_file):\n",
@@ -170,31 +171,31 @@
"outputs": [],
"source": [
"# Define the schema for the DataFrame\n",
- "schema = StructType([\n",
- " StructField(\"notebook\", StringType(), True),\n",
- " StructField(\"start_time\", DoubleType(), True),\n",
- " StructField(\"status\", StringType(), True),\n",
- " StructField(\"error\", StringType(), True),\n",
- " StructField(\"execution_time\", DoubleType(), True),\n",
- " StructField(\"run_order\", IntegerType(), True),\n",
- " StructField(\"batch_id\", StringType(), True)\n",
+ "schema = StructType([ # type: ignore\n",
+ " StructField(\"notebook\", StringType(), True), # type: ignore\n",
+ " StructField(\"start_time\", DoubleType(), True), # type: ignore\n",
+ " StructField(\"status\", StringType(), True), # type: ignore\n",
+ " StructField(\"error\", StringType(), True), # type: ignore\n",
+ " StructField(\"execution_time\", DoubleType(), True), # type: ignore\n",
+ " StructField(\"run_order\", IntegerType(), True), # type: ignore\n",
+ " StructField(\"batch_id\", StringType(), True) # type: ignore\n",
"])\n",
"\n",
"# Create an empty DataFrame with the defined schema\n",
- "failed_results = spark.createDataFrame([], schema=schema)\n",
+ "failed_results = spark.createDataFrame([], schema=schema) # type: ignore\n",
"# Read the log for this batch execution\n",
- "df_execution_log = spark.sql(f\"SELECT * FROM {{lakehouse_name}}.execution_log WHERE batch_id = '{pm_batch_id}'\")\n",
+ "df_execution_log = spark.sql(f\"SELECT * FROM {{log_lakehouse}}.execution_log WHERE batch_id = '{pm_batch_id}' AND master_notebook = '{pm_master_notebook}'\") # type: ignore\n",
"if df_execution_log.count() > 0:\n",
" \n",
" # Check if any have not succeeded\n",
- " failed_results = df_execution_log.filter(col(\"status\") != \"success\")\n",
+ " failed_results = df_execution_log.filter(col(\"status\") != \"success\") # type: ignore\n",
"\n",
" # Print the failed results\n",
" for row in failed_results.collect():\n",
" print(f\"Notebook {row['notebook']} failed with error: {row['error']}\")\n",
"\n",
" # Check if have succeeded\n",
- " succeeded_results = df_execution_log.filter(col(\"status\") == \"success\")\n",
+ " succeeded_results = df_execution_log.filter(col(\"status\") == \"success\") # type: ignore\n",
"\n",
" # Print the succeeded results\n",
" for row in succeeded_results.collect():\n",
@@ -215,26 +216,27 @@
"outputs": [],
"source": [
"# Define the schema for the Log DataFrame\n",
- "schema = StructType([\n",
- " StructField(\"notebook\", StringType(), True),\n",
- " StructField(\"start_time\", DoubleType(), True),\n",
- " StructField(\"status\", StringType(), True),\n",
- " StructField(\"error\", StringType(), True),\n",
- " StructField(\"execution_time\", DoubleType(), True),\n",
- " StructField(\"run_order\", IntegerType(), True)\n",
+ "schema = StructType([ # type: ignore\n",
+ " StructField(\"notebook\", StringType(), True), # type: ignore\n",
+ " StructField(\"start_time\", DoubleType(), True), # type: ignore\n",
+ " StructField(\"status\", StringType(), True), # type: ignore\n",
+ " StructField(\"error\", StringType(), True), # type: ignore\n",
+ " StructField(\"execution_time\", DoubleType(), True), # type: ignore\n",
+ " StructField(\"run_order\", IntegerType(), True) # type: ignore\n",
"])\n",
"\n",
"if failed_results.count() == 0:\n",
" new_results = []\n",
" # Use a ThreadPoolExecutor to run the notebooks in parallel\n",
" # Execute the notebooks and collect the results\n",
- " with ThreadPoolExecutor(max_workers={{ max_worker }}) as executor:\n",
- " new_results = list(executor.map(execute_notebook, notebook_files1))\n",
+ " with ThreadPoolExecutor(max_workers={{ max_worker }}) as executor: # type: ignore\n",
+ " new_results = list(executor.map(execute_notebook, notebook_files1)) # type: ignore\n",
"\n",
" # Write the results to the log file\n",
- " df_log = spark.createDataFrame(new_results, schema=schema)\n",
- " df_log = df_log.withColumn(\"batch_id\", lit(f'{pm_batch_id}'))\n",
- " df_log.write.format(\"delta\").mode(\"append\").saveAsTable(\"{{lakehouse_name}}.execution_log\")\n",
+ " df_log = spark.createDataFrame(new_results, schema=schema) # type: ignore\n",
+ " df_log = df_log.withColumn(\"batch_id\", lit(f'{pm_batch_id}')) # type: ignore\n",
+ " df_log = df_log.withColumn(\"master_notebook\", lit(f'{pm_master_notebook}')) # type: ignore\n",
+ " df_log.write.format(\"delta\").mode(\"append\").saveAsTable(\"{{log_lakehouse}}.execution_log\")\n",
"else:\n",
" print(\"Failures in previous run_order... supressing execution\")\n",
" raise Exception(\"Failures in previous run_order... supressing execution\")"
diff --git a/dbt/include/fabricsparknb/profile_template.yml b/dbt/include/fabricsparknb/profile_template.yml
index 16fc3ac..a9e3406 100644
--- a/dbt/include/fabricsparknb/profile_template.yml
+++ b/dbt/include/fabricsparknb/profile_template.yml
@@ -10,6 +10,8 @@ prompts:
hint: Name of the Lakehouse in the workspace that you want to connect to
lakehouseid:
hint: GUID of the lakehouse, which can be extracted from url when you open lakehouse artifact from fabric.microsoft.com
+ log_lakehouse:
+ hint: Name of the Lakehouse in the workspace that you want to log to
endpoint:
default: https://api.fabric.microsoft.com/v1
auth:
diff --git a/dbt_wrapper/generate_files.py b/dbt_wrapper/generate_files.py
index 28b6a36..4e15f6d 100644
--- a/dbt_wrapper/generate_files.py
+++ b/dbt_wrapper/generate_files.py
@@ -15,7 +15,11 @@
@staticmethod
-def GenerateMasterNotebook(project_root, workspaceid, lakehouseid, lakehouse_name, project_name, progress: ProgressConsoleWrapper, task_id, notebook_timeout, max_worker, notebook_hashcheck):
+def GenerateMasterNotebook(project_root, workspaceid, lakehouseid, lakehouse_name, project_name, progress: ProgressConsoleWrapper, task_id, notebook_timeout, max_worker, log_lakehouse, notebook_hashcheck):
+ # If log lakehouse is None use lakehouse as default
+ if log_lakehouse is None:
+ log_lakehouse = lakehouse_name
+
# Iterate through the notebooks directory and create a list of notebook files
notebook_dir = f'./{project_root}/target/notebooks/'
notebook_files_str = [os.path.splitext(os.path.basename(f))[0] for f in os.listdir(Path(notebook_dir)) if f.endswith('.ipynb') and 'master_notebook' not in f]
@@ -77,7 +81,7 @@ def GenerateMasterNotebook(project_root, workspaceid, lakehouseid, lakehouse_nam
template = env.get_template('master_notebook_x.ipynb')
# Render the template with the notebook_file variable
- rendered_template = template.render(notebook_files=file_str_with_current_sort_order, run_order=sort_order, lakehouse_name=lakehouse_name, project_name=project_name,max_worker=max_worker)
+ rendered_template = template.render(notebook_files=file_str_with_current_sort_order, run_order=sort_order, lakehouse_name=lakehouse_name, project_name=project_name,max_worker=max_worker, log_lakehouse=log_lakehouse)
# Parse the rendered template as a notebook
nb = nbf.reads(rendered_template, as_version=4)
@@ -104,7 +108,7 @@ def GenerateMasterNotebook(project_root, workspaceid, lakehouseid, lakehouse_nam
MetaHashes = Catalog.GetMetaHashes(project_root)
# Render the template with the notebook_file variable
- rendered_template = template.render(lakehouse_name=lakehouse_name, hashes=MetaHashes, project_name=project_name, notebook_timeout=notebook_timeout,notebook_hashcheck=notebook_hashcheck)
+ rendered_template = template.render(lakehouse_name=lakehouse_name, hashes=MetaHashes, project_name=project_name, notebook_timeout=notebook_timeout, log_lakehouse=log_lakehouse,notebook_hashcheck=notebook_hashcheck)
# Parse the rendered template as a notebook
nb = nbf.reads(rendered_template, as_version=4)
@@ -121,7 +125,7 @@ def GenerateMasterNotebook(project_root, workspaceid, lakehouseid, lakehouse_nam
nb.cells.insert((insertion_point), cell)
insertion_point += 1
# Create a new code cell with the SQL
- code = f'call_child_notebook("master_{project_name}_notebook_' + str(sort_order) + '", new_batch_id)'
+ code = f'call_child_notebook("master_{project_name}_notebook_' + str(sort_order) + '", new_batch_id, master_notebook)'
cell = nbf.v4.new_code_cell(source=code)
# Add the cell to the notebook
nb.cells.insert((insertion_point), cell)
diff --git a/dbt_wrapper/wrapper.py b/dbt_wrapper/wrapper.py
index f497bea..335cce3 100644
--- a/dbt_wrapper/wrapper.py
+++ b/dbt_wrapper/wrapper.py
@@ -74,9 +74,14 @@ def GeneratePreDbtScripts(self, PreInstall, progress: ProgressConsoleWrapper, ta
gf.GenerateAzCopyScripts(self.dbt_project_dir, self.target_info['workspaceid'], self.target_info['lakehouseid'], progress=progress, task_id=task_id)
- def GeneratePostDbtScripts(self, PreInstall=False, progress=None, task_id=None, notebook_timeout=None, notebook_hashcheck=None):
+ def GeneratePostDbtScripts(self, PreInstall=False, progress=None, task_id=None, notebook_timeout=None, log_lakehouse=None, notebook_hashcheck=None):
+ try:
+ log_lakehouse = self.target_info['log_lakehouse']
+ except KeyError:
+ log_lakehouse = self.lakehouse
+
gf.SetSqlVariableForAllNotebooks(self.dbt_project_dir, self.lakehouse, progress=progress, task_id=task_id)
- gf.GenerateMasterNotebook(self.dbt_project_dir, self.target_info['workspaceid'], self.target_info['lakehouseid'], self.lakehouse, self.config['name'], progress=progress, task_id=task_id, notebook_timeout=notebook_timeout,max_worker = self.target_info['threads'], notebook_hashcheck=notebook_hashcheck)
+ gf.GenerateMasterNotebook(self.dbt_project_dir, self.target_info['workspaceid'], self.target_info['lakehouseid'], self.lakehouse, self.config['name'], progress=progress, task_id=task_id, notebook_timeout=notebook_timeout,max_worker = self.target_info['threads'], log_lakehouse=log_lakehouse, notebook_hashcheck=notebook_hashcheck)
def ConvertNotebooksToFabricFormat(self, progress: ProgressConsoleWrapper, task_id=None):
curr_dir = os.getcwd()
diff --git a/docs/developer_guide/framework_setup.md b/docs/developer_guide/framework_setup.md
index a66db59..a1d3e87 100644
--- a/docs/developer_guide/framework_setup.md
+++ b/docs/developer_guide/framework_setup.md
@@ -112,6 +112,7 @@ my_project:
endpoint: dkld #remove
lakehouse: 'lakehouse' #the name of your lakehouse
lakehouseid: 'aa2e5f92-53cc-4ab3-9a54-a6e5b1aeb9a9' #the guid of your lakehouse
+ log_lakehouse: 'loglakehouse' #the name of your logging lakehouse, this is not required as lakehouse will be used by default
method: livy
schema: dbo #the schema you want to use
tenant_id: '72f988bf-86f1-41af-91ab-2d7cd011db47' #your power bi tenant id
diff --git a/docs/user_guide/dbt_project_setup.md b/docs/user_guide/dbt_project_setup.md
index 2d5bf0a..7fb76f9 100644
--- a/docs/user_guide/dbt_project_setup.md
+++ b/docs/user_guide/dbt_project_setup.md
@@ -39,15 +39,16 @@ dbt init my_project # Note that the name of the project is arbitrary... call it
3. `workspaceid (GUID of the workspace. Open the workspace from fabric.microsoft.com and copy the workspace url):`
**Enter the workspace id**
4. `lakehouse (Name of the Lakehouse in the workspace that you want to connect to):`
**Enter the lakehouse name**
5. `lakehouseid (GUID of the lakehouse, which can be extracted from url when you open lakehouse artifact from fabric.microsoft.com):`
**Enter the lakehouse id**
- 6. `endpoint [https://api.fabric.microsoft.com/v1]:`
**Press enter to accept the default**
- 7. `auth (Use CLI (az login) for interactive execution or SPN for automation) [CLI]:`
**select `cli`**
- 8. `client_id (Use when SPN auth is used.):`
**Enter a single space and press enter**
- 9. `client_scrent (Use when SPN auth is used.):`
**Enter a single space and press enter**
- 10. `tenant_id (Use when SPN auth is used.):`
**Enter a single space or Enter your PowerBI tenant id**
- 11. `connect_retries [0]:`
**Enter 0**
- 12. `connect_timeout [10]:`
**Enter 10**
- 13. `schema (default schema that dbt will build objects in):`
**Enter `dbo`**
- 14. threads (1 or more) [1]:
**Enter 1**
+ 6. `log_lakehouse (Name of the log Lakehouse in the workspace that you want to log to):`
**Enter the log_lakehouse name**
+ 7. `endpoint [https://api.fabric.microsoft.com/v1]:`
**Press enter to accept the default**
+ 8. `auth (Use CLI (az login) for interactive execution or SPN for automation) [CLI]:`
**select `cli`**
+ 9. `client_id (Use when SPN auth is used.):`
**Enter a single space and press enter**
+ 10. `client_scrent (Use when SPN auth is used.):`
**Enter a single space and press enter**
+ 11. `tenant_id (Use when SPN auth is used.):`
**Enter a single space or Enter your PowerBI tenant id**
+ 12. `connect_retries [0]:`
**Enter 0**
+ 13. `connect_timeout [10]:`
**Enter 10**
+ 14. `schema (default schema that dbt will build objects in):`
**Enter `dbo`**
+ 15. threads (1 or more) [1]:
**Enter 1**
The command above will create a new directory called `my_project`. Within this directory you will find a `dbt_project.yml` file. Open this file in your favourite text editor and note that it should look like the example below except that in your case my_project will be replaced with the name of the project you created above.:
@@ -113,9 +114,10 @@ The dbt init command will also update your `profiles.yml` file with a profile ma
When run this will display a file similar to the one below. Check that your details are correct.
!!! note
- The `profiles.yml` file should look like the example below except that in your case the highlighted lines may contain different values.
+ * The `profiles.yml` file should look like the example below except that in your case the highlighted lines may contain different values.
+ * log_lakehouse is an optional value in the profile.
-```{.yaml hl_lines="1 2 4 10 11 13 14" linenums="1" title="profiles.yml"}
+```{.yaml hl_lines="1 2 4 10 11 13 14 15" linenums="1" title="profiles.yml"}
my_project:
target: my_project_target
outputs:
@@ -130,6 +132,7 @@ my_project:
lakehousedatapath: /lakehouse
lakehouseid: 031feff6-071d-42df-818a-984771c083c4
lakehouse: datalake
+ log_lakehouse: logdatalake
schema: dbo
threads: 1
type: fabricsparknb