datahub-project · hsheth2 · Jan 26, 2024 · Jan 15, 2024 · Jan 15, 2024 · Jan 15, 2024
diff --git a/metadata-ingestion/developing.md b/metadata-ingestion/developing.md
@@ -10,7 +10,7 @@ Also take a look at the guide to [adding a source](./adding-source.md).
 ### Requirements
 
 1. Python 3.7+ must be installed in your host environment.
-2. Java8 (gradle won't work with newer versions)
+2. Java 17 (gradle won't work with newer or older versions)
 4. On Debian/Ubuntu: `sudo apt install python3-dev python3-venv`
 5. On Fedora (if using LDAP source integration): `sudo yum install openldap-devel`
 

diff --git a/metadata-ingestion/docs/sources/metabase/metabase.md b/metadata-ingestion/docs/sources/metabase/metabase.md
@@ -19,4 +19,4 @@ The key in this map must be string, not integer although  Metabase API provides
 If `database_id_to_instance_map` is not specified, `platform_instance_map` is used for platform instance mapping. If none of the above are specified, platform instance is not used when constructing `urn` when searching for dataset relations.
 ## Compatibility
 
-Metabase version [v0.41.2](https://www.metabase.com/start/oss/)
+Metabase version [v0.48.3](https://www.metabase.com/start/oss/)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py
@@ -87,10 +87,17 @@ class MetabaseSource(Source):
     """
     This plugin extracts Charts, dashboards, and associated metadata. This plugin is in beta and has only been tested
     on PostgreSQL and H2 database.
-    ### Dashboard
 
-    [/api/dashboard](https://www.metabase.com/docs/latest/api-documentation.html#dashboard) endpoint is used to
-    retrieve the following dashboard information.
+    ### Collection
+
+    [/api/collection](https://www.metabase.com/docs/latest/api/collection) endpoint is used to
+    retrieve the available collections.
+
+    [/api/collection/<COLLECTION_ID>/items?models=dashboard](https://www.metabase.com/docs/latest/api/collection#get-apicollectioniditems) endpoint is used to retrieve a given collection and list their dashboards.
+
+     ### Dashboard
+
+    [/api/dashboard/<DASHBOARD_ID>](https://www.metabase.com/docs/latest/api/dashboard) endpoint is used to retrieve a given Dashboard and grab its information.
 
     - Title and description
     - Last edited by
@@ -184,19 +191,29 @@ def close(self) -> None:
 
     def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]:
         try:
-            dashboard_response = self.session.get(
-                f"{self.config.connect_uri}/api/dashboard"
+            collections_response = self.session.get(
+                f"{self.config.connect_uri}/api/collection/"
             )
-            dashboard_response.raise_for_status()
-            dashboards = dashboard_response.json()
+            collections_response.raise_for_status()
+            collections = collections_response.json()
 
-            for dashboard_info in dashboards:
-                dashboard_snapshot = self.construct_dashboard_from_api_data(
-                    dashboard_info
+            for collection in collections:
+                collection_dashboards_response = self.session.get(
+                    f"{self.config.connect_uri}/api/collection/{collection['id']}/items?models=dashboard"
                 )
-                if dashboard_snapshot is not None:
-                    mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)
-                    yield MetadataWorkUnit(id=dashboard_snapshot.urn, mce=mce)
+                collection_dashboards_response.raise_for_status()
+                collection_dashboards = collection_dashboards_response.json()
+
+                if not collection_dashboards.get("data"):
+                    continue
+
+                for dashboard_info in collection_dashboards.get("data"):
+                    dashboard_snapshot = self.construct_dashboard_from_api_data(
+                        dashboard_info
+                    )
+                    if dashboard_snapshot is not None:
+                        mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)
+                        yield MetadataWorkUnit(id=dashboard_snapshot.urn, mce=mce)
 
         except HTTPError as http_error:
             self.report.report_failure(
@@ -251,10 +268,10 @@ def construct_dashboard_from_api_data(
         )
 
         chart_urns = []
-        cards_data = dashboard_details.get("ordered_cards", "{}")
+        cards_data = dashboard_details.get("dashcards", {})
         for card_info in cards_data:
             chart_urn = builder.make_chart_urn(
-                self.platform, card_info.get("card_id", "")
+                self.platform, card_info.get("card").get("id", "")
             )
             chart_urns.append(chart_urn)
 

diff --git a/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json
@@ -57,7 +57,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1636614000000,
-        "runId": "metabase-test"
+        "runId": "metabase-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -112,7 +113,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1636614000000,
-        "runId": "metabase-test"
+        "runId": "metabase-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -167,7 +169,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1636614000000,
-        "runId": "metabase-test"
+        "runId": "metabase-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -180,10 +183,55 @@
                         "customProperties": {},
                         "title": "Dashboard 1",
                         "description": "",
-                        "charts": [
-                            "urn:li:chart:(metabase,1)",
-                            "urn:li:chart:(metabase,2)"
+                        "charts": [],
+                        "datasets": [],
+                        "lastModified": {
+                            "created": {
+                                "time": 1639417721742,
+                                "actor": "urn:li:corpuser:[email protected]"
+                            },
+                            "lastModified": {
+                                "time": 1639417721742,
+                                "actor": "urn:li:corpuser:[email protected]"
+                            }
+                        },
+                        "dashboardUrl": "http://localhost:3000/dashboard/10"
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.common.Ownership": {
+                        "owners": [
+                            {
+                                "owner": "urn:li:corpuser:[email protected]",
+                                "type": "DATAOWNER"
+                            }
                         ],
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        }
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1636614000000,
+        "runId": "metabase-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": {
+            "urn": "urn:li:dashboard:(metabase,1)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.dashboard.DashboardInfo": {
+                        "customProperties": {},
+                        "title": "Dashboard 1",
+                        "description": "",
+                        "charts": [],
                         "datasets": [],
                         "lastModified": {
                             "created": {
@@ -195,7 +243,7 @@
                                 "actor": "urn:li:corpuser:[email protected]"
                             }
                         },
-                        "dashboardUrl": "http://localhost:3000/dashboard/1"
+                        "dashboardUrl": "http://localhost:3000/dashboard/10"
                     }
                 },
                 {
@@ -217,7 +265,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1636614000000,
-        "runId": "metabase-test"
+        "runId": "metabase-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -232,7 +281,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1636614000000,
-        "runId": "metabase-test"
+        "runId": "metabase-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -247,7 +297,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1636614000000,
-        "runId": "metabase-test"
+        "runId": "metabase-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -262,7 +313,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1636614000000,
-        "runId": "metabase-test"
+        "runId": "metabase-test",
+        "lastRunId": "no-run-id-provided"
     }
 },
 {
@@ -277,7 +329,8 @@
     },
     "systemMetadata": {
         "lastObserved": 1636614000000,
-        "runId": "metabase-test"
+        "runId": "metabase-test",
+        "lastRunId": "no-run-id-provided"
     }
 }
 ]
diff --git a/metadata-ingestion/tests/integration/metabase/setup/collection_dashboards.json b/metadata-ingestion/tests/integration/metabase/setup/collection_dashboards.json
@@ -0,0 +1 @@
+{"total": 1, "data": [{"description": null, "collection_position": null, "database_id": null, "name": "This is a test", "id": 10, "entity_id": "Q4gEaOmoBkfQX3_gXiH9g", "last-edit-info": {"id": 14, "last_name": "Doe", "first_name": "John", "email": "[email protected]", "timestamp": "2024-01-12T14:55:38.43304Z"}, "model": "dashboard"}], "models": ["dashboard"], "limit": null, "offset": null}
diff --git a/metadata-ingestion/tests/integration/metabase/setup/collections.json b/metadata-ingestion/tests/integration/metabase/setup/collections.json
@@ -0,0 +1 @@
+[{"authority_level": null, "can_write": true, "name": "Our analytics", "effective_ancestors": [], "effective_location": null, "parent_id": null, "id": "root", "is_personal": false}, {"authority_level": null, "description": null, "archived": false, "slug": "john_doe_personal_collection", "can_write": true, "name": "John Doe", "personal_owner_id": 14, "type": null, "id": 150, "entity_id": "kdLA_-CQy4F5lL15k8-TU", "location": "/", "namespace": null, "is_personal": true, "created_at": "2024-01-12T11:51:24.394309Z"}]
diff --git a/metadata-ingestion/tests/integration/metabase/setup/dashboard.json b/metadata-ingestion/tests/integration/metabase/setup/dashboard.json
diff --git a/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json b/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json
@@ -330,4 +330,4 @@
   "created_at": "2021-12-13T17:46:48.185",
   "public_uuid": null,
   "points_of_interest": null
-}
+}
diff --git a/metadata-ingestion/tests/integration/metabase/test_metabase.py b/metadata-ingestion/tests/integration/metabase/test_metabase.py
@@ -14,8 +14,10 @@
 JSON_RESPONSE_MAP = {
     "http://localhost:3000/api/session": "session.json",
     "http://localhost:3000/api/user/current": "user.json",
-    "http://localhost:3000/api/dashboard": "dashboard.json",
-    "http://localhost:3000/api/dashboard/1": "dashboard_1.json",
+    "http://localhost:3000/api/collection/": "collections.json",
+    "http://localhost:3000/api/collection/root/items?models=dashboard": "collection_dashboards.json",
+    "http://localhost:3000/api/collection/150/items?models=dashboard": "collection_dashboards.json",
+    "http://localhost:3000/api/dashboard/10": "dashboard_1.json",
     "http://localhost:3000/api/user/1": "user.json",
     "http://localhost:3000/api/card": "card.json",
     "http://localhost:3000/api/database/1": "bigquery_database.json",
@@ -26,7 +28,7 @@
     "http://localhost:3000/api/card/3": "card_3.json",
 }
 
-RESPONSE_ERROR_LIST = ["http://localhost:3000/api/dashboard"]
+RESPONSE_ERROR_LIST = ["http://localhost:3000/api/dashboard/public"]
 
 test_resources_dir = pathlib.Path(__file__).parent
 
@@ -114,9 +116,9 @@ def test_mode_ingest_success(pytestconfig, tmp_path):
                 },
             }
         )
+
         pipeline.run()
         pipeline.raise_from_status()
-
         mce_helpers.check_golden_file(
             pytestconfig,
             output_path=f"{tmp_path}/metabase_mces.json",
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"total": 1, "data": [{"description": null, "collection_position": null, "database_id": null, "name": "This is a test", "id": 10, "entity_id": "Q4gEaOmoBkfQX3_gXiH9g", "last-edit-info": {"id": 14, "last_name": "Doe", "first_name": "John", "email": "[email protected]", "timestamp": "2024-01-12T14:55:38.43304Z"}, "model": "dashboard"}], "models": ["dashboard"], "limit": null, "offset": null}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		[{"authority_level": null, "can_write": true, "name": "Our analytics", "effective_ancestors": [], "effective_location": null, "parent_id": null, "id": "root", "is_personal": false}, {"authority_level": null, "description": null, "archived": false, "slug": "john_doe_personal_collection", "can_write": true, "name": "John Doe", "personal_owner_id": 14, "type": null, "id": 150, "entity_id": "kdLA_-CQy4F5lL15k8-TU", "location": "/", "namespace": null, "is_personal": true, "created_at": "2024-01-12T11:51:24.394309Z"}]