diff --git a/dbt/adapters/fabricsparknb/fabric_spark_credentials.py b/dbt/adapters/fabricsparknb/fabric_spark_credentials.py index 9985bb5..5189a8f 100644 --- a/dbt/adapters/fabricsparknb/fabric_spark_credentials.py +++ b/dbt/adapters/fabricsparknb/fabric_spark_credentials.py @@ -9,6 +9,7 @@ class SparkCredentials(Credentials): method: str = "livy" workspaceid: str = None database: Optional[str] = None + log_lakehouse: Optional[str] = None lakehouse: str = None lakehouseid: str = None # type: ignore endpoint: Optional[str] = "https://msitapi.fabric.microsoft.com/v1" diff --git a/dbt/include/fabricsparknb/notebooks/master_notebook.ipynb b/dbt/include/fabricsparknb/notebooks/master_notebook.ipynb index 4574f33..a99583c 100644 --- a/dbt/include/fabricsparknb/notebooks/master_notebook.ipynb +++ b/dbt/include/fabricsparknb/notebooks/master_notebook.ipynb @@ -50,7 +50,7 @@ "import pandas as pd # type: ignore\n", "from tabulate import tabulate # type: ignore\n", "import json\n", - "from pyspark.sql.functions import *\n", + "from pyspark.sql.functions import * # type: ignore\n", "import os\n", "import uuid" ] @@ -68,7 +68,8 @@ "metadata": {}, "outputs": [], "source": [ - "gv_lakehouse = '{{lakehouse_name}}'" + "gv_lakehouse = '{{lakehouse_name}}'\n", + "gv_log_lakehouse = '{{log_lakehouse}}'" ] }, { @@ -144,8 +145,8 @@ "\n", " return all_files\n", "\n", - "def call_child_notebook(notebook, batch_id):\n", - " mssparkutils.notebook.run(notebook, {{ notebook_timeout }},{\"pm_batch_id\": batch_id})" + "def call_child_notebook(notebook, batch_id, master_notebook):\n", + " mssparkutils.notebook.run(notebook, {{ notebook_timeout }},{\"pm_batch_id\": batch_id, \"pm_master_notebook\": master_notebook}) # type: ignore" ] }, { @@ -171,7 +172,7 @@ "metadata": {}, "outputs": [], "source": [ - "embedded_hashes = {{ hashes }}\n", + "embedded_hashes = {{ hashes }} # type: ignore\n", "RelativePathForMetaData = \"Files/MetaExtracts/\"\n", "current_hashes = json.loads(get_file_content_using_notebookutils(RelativePathForMetaData + 'MetaHashes.json'))\n", "\n", @@ -182,7 +183,7 @@ " return h['hash']\n", " return ret\n", "\n", - "embedded_hashcheck = {{ notebook_hashcheck }}\n", + "embedded_hashcheck = {{ notebook_hashcheck }} # type: ignore\n", "\n", "##Hashcheck: BYPASS = 0, WARNING = 1, ERROR = 2\n", "if embedded_hashcheck == 0:\n", @@ -212,7 +213,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Create Tables" + "## Create or Alter Tables" ] }, { @@ -222,14 +223,15 @@ "outputs": [], "source": [ "sql = f'''\n", - "CREATE TABLE IF NOT EXISTS {gv_lakehouse}.execution_log (\n", + "CREATE TABLE IF NOT EXISTS {gv_log_lakehouse}.execution_log (\n", " notebook STRING,\n", " start_time DOUBLE,\n", " status STRING,\n", " error STRING,\n", " execution_time DOUBLE,\n", " run_order INT,\n", - " batch_id string \n", + " batch_id string,\n", + " master_notebook STRING \n", ")\n", "USING DELTA\n", "'''\n", @@ -244,10 +246,11 @@ "outputs": [], "source": [ "sql = f'''\n", - "CREATE TABLE IF NOT EXISTS {gv_lakehouse}.batch (\n", + "CREATE TABLE IF NOT EXISTS {gv_log_lakehouse}.batch (\n", " batch_id STRING,\n", " start_time LONG,\n", - " status STRING\n", + " status STRING,\n", + " master_notebook STRING\n", ")\n", "USING DELTA\n", "'''\n", @@ -255,6 +258,46 @@ "spark.sql(sql) # type: ignore" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check if the master_notebook column exists in the batch table\n", + "schema_check_sql = f\"DESCRIBE {gv_log_lakehouse}.execution_log\"\n", + "schema_check_df = spark.sql(schema_check_sql) # type: ignore\n", + "\n", + "# Check if the master_notebook column exists in the schema\n", + "if 'master_notebook' not in [row['col_name'] for row in schema_check_df.collect()]:\n", + " # Add the master_notebook column to the table\n", + " alter_table_sql = f'''\n", + " ALTER TABLE {gv_log_lakehouse}.execution_log\n", + " ADD COLUMN master_notebook STRING\n", + " '''\n", + " spark.sql(alter_table_sql) # type: ignore" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check if the master_notebook column exists in the batch table\n", + "schema_check_sql = f\"DESCRIBE {gv_log_lakehouse}.batch\"\n", + "schema_check_df = spark.sql(schema_check_sql) # type: ignore\n", + "\n", + "# Check if the master_notebook column exists in the schema\n", + "if 'master_notebook' not in [row['col_name'] for row in schema_check_df.collect()]:\n", + " # Add the master_notebook column to the table\n", + " alter_table_sql = f'''\n", + " ALTER TABLE {gv_log_lakehouse}.batch\n", + " ADD COLUMN master_notebook STRING\n", + " '''\n", + " spark.sql(alter_table_sql) # type: ignore" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -270,28 +313,29 @@ "source": [ "\n", "\n", - "def close_batch(batch_id, status):\n", + "def close_batch(batch_id, master_notebook, status):\n", " sql = f'''\n", - " UPDATE {gv_lakehouse}.batch\n", + " UPDATE {gv_log_lakehouse}.batch\n", " SET status = '{status}'\n", - " WHERE batch_id = '{str(batch_id)}' '''\n", + " WHERE batch_id = '{str(batch_id)}' \n", + " AND master_notebook = '{str(master_notebook)}' '''\n", "\n", - " spark.sql(sql)\n", + " spark.sql(sql) # type: ignore\n", "\n", - "def get_open_batch():\n", + "def get_open_batch(master_notebook):\n", " sql = f'''\n", - " SELECT MAX(batch_id) AS LatestBatchID FROM {gv_lakehouse}.batch WHERE status = 'open'\n", + " SELECT MAX(batch_id) AS LatestBatchID FROM {gv_log_lakehouse}.batch WHERE status = 'open' AND master_notebook = '{str(master_notebook)}'\n", " '''\n", "\n", - " return spark.sql(sql).collect()[0]['LatestBatchID']\n", + " return spark.sql(sql).collect()[0]['LatestBatchID'] # type: ignore\n", "\n", - "def insert_new_batch(batch_id):\n", + "def insert_new_batch(batch_id, master_notebook):\n", " sql = f'''\n", - " INSERT INTO {gv_lakehouse}.batch\n", - " SELECT '{batch_id}' AS batch_id, UNIX_TIMESTAMP() AS start_time, 'open' AS status\n", + " INSERT INTO {gv_log_lakehouse}.batch\n", + " SELECT '{batch_id}' AS batch_id, UNIX_TIMESTAMP() AS start_time, 'open' AS status, '{str(master_notebook)}' AS master_notebook\n", " '''\n", "\n", - " spark.sql(sql)" + " spark.sql(sql) # type: ignore" ] }, { @@ -308,7 +352,8 @@ "outputs": [], "source": [ "new_batch_id = str(uuid.uuid4())\n", - "insert_new_batch(new_batch_id)" + "master_notebook = mssparkutils.runtime.context.get('currentNotebookName')\n", + "insert_new_batch(new_batch_id, master_notebook) # type: ignore" ] }, { @@ -332,10 +377,10 @@ "outputs": [], "source": [ "# Read the log for this batch execution\n", - "df_execution_log = spark.sql(f\"SELECT * FROM {gv_lakehouse}.execution_log WHERE batch_id = '{new_batch_id}'\")\n", + "df_execution_log = spark.sql(f\"SELECT * FROM {gv_log_lakehouse}.execution_log WHERE batch_id = '{new_batch_id}' AND master_notebook = '{master_notebook}'\") # type: ignore\n", "# Check if any have not succeeded\n", - "failed_results = df_execution_log.filter(col(\"status\") != \"success\")\n", - "succeeded_results = df_execution_log.filter(col(\"status\") == \"success\")\n", + "failed_results = df_execution_log.filter(col(\"status\") != \"success\") # type: ignore\n", + "succeeded_results = df_execution_log.filter(col(\"status\") == \"success\") # type: ignore\n", "\n", "if failed_results.count() == 0: \n", " print(\"Batch Succeeded\")\n", @@ -344,7 +389,7 @@ " print(\"Batch Failed\")\n", " display(failed_results)\n", "\n", - "close_batch(new_batch_id, 'closed')\n" + "close_batch(new_batch_id, master_notebook, 'closed') # type: ignore\n" ] } ], diff --git a/dbt/include/fabricsparknb/notebooks/master_notebook_x.ipynb b/dbt/include/fabricsparknb/notebooks/master_notebook_x.ipynb index eca9f5b..4091a1f 100644 --- a/dbt/include/fabricsparknb/notebooks/master_notebook_x.ipynb +++ b/dbt/include/fabricsparknb/notebooks/master_notebook_x.ipynb @@ -17,7 +17,8 @@ }, "outputs": [], "source": [ - "pm_batch_id = None" + "pm_batch_id = None\n", + "pm_master_notebook = None" ] }, { @@ -64,8 +65,8 @@ "import time\n", "import jsonpickle # type: ignore\n", "import json\n", - "from pyspark.sql.types import *\n", - "from pyspark.sql.functions import *\n", + "from pyspark.sql.types import * # type: ignore\n", + "from pyspark.sql.functions import * # type: ignore\n", "import os" ] }, @@ -82,17 +83,17 @@ "metadata": {}, "outputs": [], "source": [ - "notebook_files1 = {{ notebook_files }}\n", - "run_order1 = {{ run_order }}\n", + "notebook_files1 = {{ notebook_files }} # type: ignore\n", + "run_order1 = {{ run_order }} # type: ignore\n", "\n", "# Define a function to execute a notebook and return the results\n", "@dataclass\n", "class NotebookResult: \n", " notebook: str\n", - " start_time: float\n", + " start_time: int\n", " status: str\n", " error: str\n", - " execution_time: float\n", + " execution_time: int\n", " run_order: int\n", "\n", "def execute_notebook(notebook_file):\n", @@ -170,31 +171,31 @@ "outputs": [], "source": [ "# Define the schema for the DataFrame\n", - "schema = StructType([\n", - " StructField(\"notebook\", StringType(), True),\n", - " StructField(\"start_time\", DoubleType(), True),\n", - " StructField(\"status\", StringType(), True),\n", - " StructField(\"error\", StringType(), True),\n", - " StructField(\"execution_time\", DoubleType(), True),\n", - " StructField(\"run_order\", IntegerType(), True),\n", - " StructField(\"batch_id\", StringType(), True)\n", + "schema = StructType([ # type: ignore\n", + " StructField(\"notebook\", StringType(), True), # type: ignore\n", + " StructField(\"start_time\", DoubleType(), True), # type: ignore\n", + " StructField(\"status\", StringType(), True), # type: ignore\n", + " StructField(\"error\", StringType(), True), # type: ignore\n", + " StructField(\"execution_time\", DoubleType(), True), # type: ignore\n", + " StructField(\"run_order\", IntegerType(), True), # type: ignore\n", + " StructField(\"batch_id\", StringType(), True) # type: ignore\n", "])\n", "\n", "# Create an empty DataFrame with the defined schema\n", - "failed_results = spark.createDataFrame([], schema=schema)\n", + "failed_results = spark.createDataFrame([], schema=schema) # type: ignore\n", "# Read the log for this batch execution\n", - "df_execution_log = spark.sql(f\"SELECT * FROM {{lakehouse_name}}.execution_log WHERE batch_id = '{pm_batch_id}'\")\n", + "df_execution_log = spark.sql(f\"SELECT * FROM {{log_lakehouse}}.execution_log WHERE batch_id = '{pm_batch_id}' AND master_notebook = '{pm_master_notebook}'\") # type: ignore\n", "if df_execution_log.count() > 0:\n", " \n", " # Check if any have not succeeded\n", - " failed_results = df_execution_log.filter(col(\"status\") != \"success\")\n", + " failed_results = df_execution_log.filter(col(\"status\") != \"success\") # type: ignore\n", "\n", " # Print the failed results\n", " for row in failed_results.collect():\n", " print(f\"Notebook {row['notebook']} failed with error: {row['error']}\")\n", "\n", " # Check if have succeeded\n", - " succeeded_results = df_execution_log.filter(col(\"status\") == \"success\")\n", + " succeeded_results = df_execution_log.filter(col(\"status\") == \"success\") # type: ignore\n", "\n", " # Print the succeeded results\n", " for row in succeeded_results.collect():\n", @@ -215,26 +216,27 @@ "outputs": [], "source": [ "# Define the schema for the Log DataFrame\n", - "schema = StructType([\n", - " StructField(\"notebook\", StringType(), True),\n", - " StructField(\"start_time\", DoubleType(), True),\n", - " StructField(\"status\", StringType(), True),\n", - " StructField(\"error\", StringType(), True),\n", - " StructField(\"execution_time\", DoubleType(), True),\n", - " StructField(\"run_order\", IntegerType(), True)\n", + "schema = StructType([ # type: ignore\n", + " StructField(\"notebook\", StringType(), True), # type: ignore\n", + " StructField(\"start_time\", DoubleType(), True), # type: ignore\n", + " StructField(\"status\", StringType(), True), # type: ignore\n", + " StructField(\"error\", StringType(), True), # type: ignore\n", + " StructField(\"execution_time\", DoubleType(), True), # type: ignore\n", + " StructField(\"run_order\", IntegerType(), True) # type: ignore\n", "])\n", "\n", "if failed_results.count() == 0:\n", " new_results = []\n", " # Use a ThreadPoolExecutor to run the notebooks in parallel\n", " # Execute the notebooks and collect the results\n", - " with ThreadPoolExecutor(max_workers={{ max_worker }}) as executor:\n", - " new_results = list(executor.map(execute_notebook, notebook_files1))\n", + " with ThreadPoolExecutor(max_workers={{ max_worker }}) as executor: # type: ignore\n", + " new_results = list(executor.map(execute_notebook, notebook_files1)) # type: ignore\n", "\n", " # Write the results to the log file\n", - " df_log = spark.createDataFrame(new_results, schema=schema)\n", - " df_log = df_log.withColumn(\"batch_id\", lit(f'{pm_batch_id}'))\n", - " df_log.write.format(\"delta\").mode(\"append\").saveAsTable(\"{{lakehouse_name}}.execution_log\")\n", + " df_log = spark.createDataFrame(new_results, schema=schema) # type: ignore\n", + " df_log = df_log.withColumn(\"batch_id\", lit(f'{pm_batch_id}')) # type: ignore\n", + " df_log = df_log.withColumn(\"master_notebook\", lit(f'{pm_master_notebook}')) # type: ignore\n", + " df_log.write.format(\"delta\").mode(\"append\").saveAsTable(\"{{log_lakehouse}}.execution_log\")\n", "else:\n", " print(\"Failures in previous run_order... supressing execution\")\n", " raise Exception(\"Failures in previous run_order... supressing execution\")" diff --git a/dbt/include/fabricsparknb/profile_template.yml b/dbt/include/fabricsparknb/profile_template.yml index 16fc3ac..a9e3406 100644 --- a/dbt/include/fabricsparknb/profile_template.yml +++ b/dbt/include/fabricsparknb/profile_template.yml @@ -10,6 +10,8 @@ prompts: hint: Name of the Lakehouse in the workspace that you want to connect to lakehouseid: hint: GUID of the lakehouse, which can be extracted from url when you open lakehouse artifact from fabric.microsoft.com + log_lakehouse: + hint: Name of the Lakehouse in the workspace that you want to log to endpoint: default: https://api.fabric.microsoft.com/v1 auth: diff --git a/dbt_wrapper/generate_files.py b/dbt_wrapper/generate_files.py index 28b6a36..4e15f6d 100644 --- a/dbt_wrapper/generate_files.py +++ b/dbt_wrapper/generate_files.py @@ -15,7 +15,11 @@ @staticmethod -def GenerateMasterNotebook(project_root, workspaceid, lakehouseid, lakehouse_name, project_name, progress: ProgressConsoleWrapper, task_id, notebook_timeout, max_worker, notebook_hashcheck): +def GenerateMasterNotebook(project_root, workspaceid, lakehouseid, lakehouse_name, project_name, progress: ProgressConsoleWrapper, task_id, notebook_timeout, max_worker, log_lakehouse, notebook_hashcheck): + # If log lakehouse is None use lakehouse as default + if log_lakehouse is None: + log_lakehouse = lakehouse_name + # Iterate through the notebooks directory and create a list of notebook files notebook_dir = f'./{project_root}/target/notebooks/' notebook_files_str = [os.path.splitext(os.path.basename(f))[0] for f in os.listdir(Path(notebook_dir)) if f.endswith('.ipynb') and 'master_notebook' not in f] @@ -77,7 +81,7 @@ def GenerateMasterNotebook(project_root, workspaceid, lakehouseid, lakehouse_nam template = env.get_template('master_notebook_x.ipynb') # Render the template with the notebook_file variable - rendered_template = template.render(notebook_files=file_str_with_current_sort_order, run_order=sort_order, lakehouse_name=lakehouse_name, project_name=project_name,max_worker=max_worker) + rendered_template = template.render(notebook_files=file_str_with_current_sort_order, run_order=sort_order, lakehouse_name=lakehouse_name, project_name=project_name,max_worker=max_worker, log_lakehouse=log_lakehouse) # Parse the rendered template as a notebook nb = nbf.reads(rendered_template, as_version=4) @@ -104,7 +108,7 @@ def GenerateMasterNotebook(project_root, workspaceid, lakehouseid, lakehouse_nam MetaHashes = Catalog.GetMetaHashes(project_root) # Render the template with the notebook_file variable - rendered_template = template.render(lakehouse_name=lakehouse_name, hashes=MetaHashes, project_name=project_name, notebook_timeout=notebook_timeout,notebook_hashcheck=notebook_hashcheck) + rendered_template = template.render(lakehouse_name=lakehouse_name, hashes=MetaHashes, project_name=project_name, notebook_timeout=notebook_timeout, log_lakehouse=log_lakehouse,notebook_hashcheck=notebook_hashcheck) # Parse the rendered template as a notebook nb = nbf.reads(rendered_template, as_version=4) @@ -121,7 +125,7 @@ def GenerateMasterNotebook(project_root, workspaceid, lakehouseid, lakehouse_nam nb.cells.insert((insertion_point), cell) insertion_point += 1 # Create a new code cell with the SQL - code = f'call_child_notebook("master_{project_name}_notebook_' + str(sort_order) + '", new_batch_id)' + code = f'call_child_notebook("master_{project_name}_notebook_' + str(sort_order) + '", new_batch_id, master_notebook)' cell = nbf.v4.new_code_cell(source=code) # Add the cell to the notebook nb.cells.insert((insertion_point), cell) diff --git a/dbt_wrapper/wrapper.py b/dbt_wrapper/wrapper.py index f497bea..335cce3 100644 --- a/dbt_wrapper/wrapper.py +++ b/dbt_wrapper/wrapper.py @@ -74,9 +74,14 @@ def GeneratePreDbtScripts(self, PreInstall, progress: ProgressConsoleWrapper, ta gf.GenerateAzCopyScripts(self.dbt_project_dir, self.target_info['workspaceid'], self.target_info['lakehouseid'], progress=progress, task_id=task_id) - def GeneratePostDbtScripts(self, PreInstall=False, progress=None, task_id=None, notebook_timeout=None, notebook_hashcheck=None): + def GeneratePostDbtScripts(self, PreInstall=False, progress=None, task_id=None, notebook_timeout=None, log_lakehouse=None, notebook_hashcheck=None): + try: + log_lakehouse = self.target_info['log_lakehouse'] + except KeyError: + log_lakehouse = self.lakehouse + gf.SetSqlVariableForAllNotebooks(self.dbt_project_dir, self.lakehouse, progress=progress, task_id=task_id) - gf.GenerateMasterNotebook(self.dbt_project_dir, self.target_info['workspaceid'], self.target_info['lakehouseid'], self.lakehouse, self.config['name'], progress=progress, task_id=task_id, notebook_timeout=notebook_timeout,max_worker = self.target_info['threads'], notebook_hashcheck=notebook_hashcheck) + gf.GenerateMasterNotebook(self.dbt_project_dir, self.target_info['workspaceid'], self.target_info['lakehouseid'], self.lakehouse, self.config['name'], progress=progress, task_id=task_id, notebook_timeout=notebook_timeout,max_worker = self.target_info['threads'], log_lakehouse=log_lakehouse, notebook_hashcheck=notebook_hashcheck) def ConvertNotebooksToFabricFormat(self, progress: ProgressConsoleWrapper, task_id=None): curr_dir = os.getcwd() diff --git a/docs/developer_guide/framework_setup.md b/docs/developer_guide/framework_setup.md index a66db59..a1d3e87 100644 --- a/docs/developer_guide/framework_setup.md +++ b/docs/developer_guide/framework_setup.md @@ -112,6 +112,7 @@ my_project: endpoint: dkld #remove lakehouse: 'lakehouse' #the name of your lakehouse lakehouseid: 'aa2e5f92-53cc-4ab3-9a54-a6e5b1aeb9a9' #the guid of your lakehouse + log_lakehouse: 'loglakehouse' #the name of your logging lakehouse, this is not required as lakehouse will be used by default method: livy schema: dbo #the schema you want to use tenant_id: '72f988bf-86f1-41af-91ab-2d7cd011db47' #your power bi tenant id diff --git a/docs/user_guide/dbt_project_setup.md b/docs/user_guide/dbt_project_setup.md index 2d5bf0a..7fb76f9 100644 --- a/docs/user_guide/dbt_project_setup.md +++ b/docs/user_guide/dbt_project_setup.md @@ -39,15 +39,16 @@ dbt init my_project # Note that the name of the project is arbitrary... call it 3. `workspaceid (GUID of the workspace. Open the workspace from fabric.microsoft.com and copy the workspace url):`
**Enter the workspace id** 4. `lakehouse (Name of the Lakehouse in the workspace that you want to connect to):`
**Enter the lakehouse name** 5. `lakehouseid (GUID of the lakehouse, which can be extracted from url when you open lakehouse artifact from fabric.microsoft.com):`
**Enter the lakehouse id** - 6. `endpoint [https://api.fabric.microsoft.com/v1]:`
**Press enter to accept the default** - 7. `auth (Use CLI (az login) for interactive execution or SPN for automation) [CLI]:`
**select `cli`** - 8. `client_id (Use when SPN auth is used.):`
**Enter a single space and press enter** - 9. `client_scrent (Use when SPN auth is used.):`
**Enter a single space and press enter** - 10. `tenant_id (Use when SPN auth is used.):`
**Enter a single space or Enter your PowerBI tenant id** - 11. `connect_retries [0]:`
**Enter 0** - 12. `connect_timeout [10]:`
**Enter 10** - 13. `schema (default schema that dbt will build objects in):`
**Enter `dbo`** - 14. threads (1 or more) [1]:
**Enter 1** + 6. `log_lakehouse (Name of the log Lakehouse in the workspace that you want to log to):`
**Enter the log_lakehouse name** + 7. `endpoint [https://api.fabric.microsoft.com/v1]:`
**Press enter to accept the default** + 8. `auth (Use CLI (az login) for interactive execution or SPN for automation) [CLI]:`
**select `cli`** + 9. `client_id (Use when SPN auth is used.):`
**Enter a single space and press enter** + 10. `client_scrent (Use when SPN auth is used.):`
**Enter a single space and press enter** + 11. `tenant_id (Use when SPN auth is used.):`
**Enter a single space or Enter your PowerBI tenant id** + 12. `connect_retries [0]:`
**Enter 0** + 13. `connect_timeout [10]:`
**Enter 10** + 14. `schema (default schema that dbt will build objects in):`
**Enter `dbo`** + 15. threads (1 or more) [1]:
**Enter 1** The command above will create a new directory called `my_project`. Within this directory you will find a `dbt_project.yml` file. Open this file in your favourite text editor and note that it should look like the example below except that in your case my_project will be replaced with the name of the project you created above.: @@ -113,9 +114,10 @@ The dbt init command will also update your `profiles.yml` file with a profile ma When run this will display a file similar to the one below. Check that your details are correct. !!! note - The `profiles.yml` file should look like the example below except that in your case the highlighted lines may contain different values. + * The `profiles.yml` file should look like the example below except that in your case the highlighted lines may contain different values. + * log_lakehouse is an optional value in the profile. -```{.yaml hl_lines="1 2 4 10 11 13 14" linenums="1" title="profiles.yml"} +```{.yaml hl_lines="1 2 4 10 11 13 14 15" linenums="1" title="profiles.yml"} my_project: target: my_project_target outputs: @@ -130,6 +132,7 @@ my_project: lakehousedatapath: /lakehouse lakehouseid: 031feff6-071d-42df-818a-984771c083c4 lakehouse: datalake + log_lakehouse: logdatalake schema: dbo threads: 1 type: fabricsparknb