Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ingest): add tests for colon characters in urns #11976

Merged
merged 3 commits into from
Nov 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion metadata-ingestion/src/datahub/utilities/urn_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
# NOTE: Frontend relies on encoding these three characters. Specifically, we decode and encode schema fields for column level lineage.
# If this changes, make appropriate changes to datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
# We also rely on encoding these exact three characters when generating schemaField urns in our graphQL layer. Update SchemaFieldUtils if this changes.
RESERVED_CHARS = {",", "(", ")"}
# Also see https://datahubproject.io/docs/what/urn/#restrictions
RESERVED_CHARS = {",", "(", ")", "␟"}
RESERVED_CHARS_EXTENDED = RESERVED_CHARS.union({"%"})


Expand Down
49 changes: 43 additions & 6 deletions metadata-ingestion/tests/unit/urns/test_urn.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import pytest

from datahub.metadata.urns import DatasetUrn, Urn
from datahub.metadata.urns import (
CorpUserUrn,
DashboardUrn,
DataPlatformUrn,
DatasetUrn,
Urn,
)
from datahub.utilities.urns.error import InvalidUrnError

pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")
Expand Down Expand Up @@ -36,20 +42,51 @@ def test_url_encode_urn() -> None:

def test_invalid_urn() -> None:
with pytest.raises(InvalidUrnError):
Urn.create_from_string("urn:li:abc")
Urn.from_string("urn:li:abc")

with pytest.raises(InvalidUrnError):
Urn.create_from_string("urn:li:abc:")
Urn.from_string("urn:li:abc:")

with pytest.raises(InvalidUrnError):
Urn.create_from_string("urn:li:abc:()")
Urn.from_string("urn:li:abc:()")

with pytest.raises(InvalidUrnError):
Urn.create_from_string("urn:li:abc:(abc,)")
Urn.from_string("urn:li:abc:(abc,)")

with pytest.raises(InvalidUrnError):
Urn.from_string("urn:li:corpuser:abc)")


def test_urn_colon() -> None:
# Colon characters are valid in urns, and should not mess up parsing.

urn = Urn.from_string(
"urn:li:dashboard:(looker,dashboards.thelook::customer_lookup)"
)
assert isinstance(urn, DashboardUrn)

assert DataPlatformUrn.from_string("urn:li:dataPlatform:abc:def")
assert DatasetUrn.from_string(
"urn:li:dataset:(urn:li:dataPlatform:abc:def,table_name,PROD)"
)
assert Urn.from_string("urn:li:corpuser:foo:[email protected]")

# I'm not sure why you'd ever want this, but technically it's a valid urn.
urn = Urn.from_string("urn:li:corpuser::")
assert isinstance(urn, CorpUserUrn)
assert urn.username == ":"
assert urn == CorpUserUrn(":")


def test_urn_coercion() -> None:
urn = CorpUserUrn("foo␟bar")
assert urn.urn() == "urn:li:corpuser:foo%E2%90%9Fbar"

assert urn == Urn.from_string(urn.urn())


def test_urn_type_dispatch() -> None:
urn = Urn.from_string("urn:li:dataset:(urn:li:dataPlatform:abc,def,prod)")
urn = Urn.from_string("urn:li:dataset:(urn:li:dataPlatform:abc,def,PROD)")
assert isinstance(urn, DatasetUrn)

with pytest.raises(InvalidUrnError, match="Passed an urn of type corpuser"):
Expand Down
Loading