-
Notifications
You must be signed in to change notification settings - Fork 3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(field-level-lineage): Add models for field level lineage (#1936)
* feat(field-level-lineage): adding models for field level lineage adding models for field level lineage. Introduce DatasetFieldUrn as a unique identifier for dataset field
- Loading branch information
1 parent
89c7855
commit 7d574d1
Showing
12 changed files
with
378 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
101 changes: 101 additions & 0 deletions
101
li-utils/src/main/javaPegasus/com/linkedin/common/urn/DatasetFieldUrn.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
package com.linkedin.common.urn; | ||
|
||
import com.linkedin.common.FabricType; | ||
import com.linkedin.data.template.Custom; | ||
import com.linkedin.data.template.DirectCoercer; | ||
import com.linkedin.data.template.TemplateOutputCastException; | ||
import java.net.URISyntaxException; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
|
||
/** | ||
* Standardized dataset field information identifier | ||
*/ | ||
public class DatasetFieldUrn extends Urn { | ||
|
||
// uniquely identifies urn's key type | ||
public static final String ENTITY_TYPE = "datasetField"; | ||
|
||
// urn pattern | ||
private static final Pattern DATASET_FIELD_URN_PATTERN = Pattern.compile( | ||
"urn:li:datasetField:\\(urn:li:dataset:\\(urn:li:dataPlatform:(?<dataPlatform>.+),(?<datasetName>.+),(?<fabric>.+)\\),(?<fieldPath>.+)\\)"); | ||
|
||
/** | ||
* Dataset urn of the datasetFieldUrn | ||
*/ | ||
private final DatasetUrn _dataset; | ||
|
||
/** | ||
* Field of datasetFieldUrn | ||
*/ | ||
private final String _fieldPath; | ||
|
||
static { | ||
Custom.initializeCustomClass(DatasetUrn.class); | ||
Custom.registerCoercer(new DirectCoercer<DatasetFieldUrn>() { | ||
|
||
@Override | ||
public String coerceInput(DatasetFieldUrn object) throws ClassCastException { | ||
return object.toString(); | ||
} | ||
|
||
@Override | ||
public DatasetFieldUrn coerceOutput(Object object) throws TemplateOutputCastException { | ||
if (object instanceof String) { | ||
try { | ||
return DatasetFieldUrn.deserialize(((String) object)); | ||
} catch (URISyntaxException e) { | ||
throw new TemplateOutputCastException((("Deserializing output '" + object) + "' failed"), e); | ||
} | ||
} | ||
throw new TemplateOutputCastException((("Output '" + object) + ("' is not a String, and cannot be coerced to " | ||
+ DatasetFieldUrn.class.getName()))); | ||
} | ||
}, DatasetFieldUrn.class); | ||
} | ||
|
||
/** | ||
* Creates a new instance of a {@link DatasetFieldUrn }. | ||
* | ||
* @param dataset Dataset that this dataset field belongs to. | ||
* @param fieldPath Dataset field path or column name | ||
*/ | ||
public DatasetFieldUrn(DatasetUrn dataset, String fieldPath) { | ||
this(dataset.getPlatformEntity().getPlatformNameEntity(), dataset.getDatasetNameEntity(), dataset.getOriginEntity(), | ||
fieldPath); | ||
} | ||
|
||
public DatasetFieldUrn(String dataPlatform, String datasetName, FabricType fabricType, String fieldPath) { | ||
super(ENTITY_TYPE, String.format("(urn:li:dataset:(urn:li:dataPlatform:%s,%s,%s),%s)", dataPlatform, datasetName, | ||
fabricType.name(), fieldPath)); | ||
this._dataset = new DatasetUrn(new DataPlatformUrn(dataPlatform), datasetName, fabricType); | ||
this._fieldPath = fieldPath; | ||
} | ||
|
||
public DatasetUrn getDatasetEntity() { | ||
return _dataset; | ||
} | ||
|
||
public String getFieldPathEntity() { | ||
return _fieldPath; | ||
} | ||
|
||
/** | ||
* Creates an instance of a DatasetFieldUrn from a raw urn string. | ||
* @param rawUrn The raw urn input to convert to a full DatasetFieldUrn instance. | ||
* @return {@link DatasetFieldUrn} dataset Field Urn | ||
*/ | ||
public static DatasetFieldUrn deserialize(String rawUrn) throws URISyntaxException { | ||
final Matcher matcher = DATASET_FIELD_URN_PATTERN.matcher(rawUrn); | ||
if (matcher.matches()) { | ||
final String dataPlatform = matcher.group("dataPlatform"); | ||
final String datasetName = matcher.group("datasetName"); | ||
final String fabric = matcher.group("fabric"); | ||
final String fieldName = matcher.group("fieldPath"); | ||
return new DatasetFieldUrn(dataPlatform, datasetName, FabricType.valueOf(fabric), fieldName); | ||
} | ||
throw new URISyntaxException(rawUrn, | ||
String.format("urn does match dataset field urn pattern %s", DATASET_FIELD_URN_PATTERN.toString())); | ||
} | ||
} |
28 changes: 28 additions & 0 deletions
28
li-utils/src/main/pegasus/com/linkedin/common/DatasetFieldUrn.pdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
namespace com.linkedin.common | ||
|
||
/** | ||
* Standardized dataset field information identifier. | ||
*/ | ||
@java.class = "com.linkedin.common.urn.DatasetFieldUrn" | ||
@validate.`com.linkedin.common.validator.TypedUrnValidator` = { | ||
"accessible" : true, | ||
"owningTeam" : "urn:li:internalTeam:datahub", | ||
"entityType" : "datasetField", | ||
"constructable" : true, | ||
"namespace" : "li", | ||
"name" : "DatasetField", | ||
"doc" : "Standardized dataset field information identifier", | ||
"owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], | ||
"fields" : [ { | ||
"type" : "com.linkedin.common.urn.DatasetUrn", | ||
"name" : "dataset", | ||
"doc" : "Dataset that this dataset field belongs to." | ||
}, { | ||
"name" : "fieldPath", | ||
"doc" : "Dataset field path", | ||
"type" : "string", | ||
"maxLength" : 500 | ||
} ], | ||
"maxLength" : 807 | ||
} | ||
typeref DatasetFieldUrn = string |
54 changes: 54 additions & 0 deletions
54
li-utils/src/test/java/com/linkedin/common/urn/DatasetFieldUrnTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
package com.linkedin.common.urn; | ||
|
||
import com.linkedin.common.FabricType; | ||
import java.net.URISyntaxException; | ||
import org.assertj.core.api.Assertions; | ||
import org.testng.annotations.Test; | ||
|
||
|
||
public class DatasetFieldUrnTest { | ||
|
||
private static final String PLATFORM = "fooPlatform"; | ||
private static final String DATASET_NAME = "fooName"; | ||
private static final String FIELD_NAME = "fooField"; | ||
private static final FabricType FABRIC_TYPE = FabricType.PROD; | ||
|
||
@Test | ||
public void testSerialization() throws URISyntaxException { | ||
final String datasetFieldString = | ||
String.format("urn:li:datasetField:(urn:li:dataset:(urn:li:dataPlatform:%s,%s,%s),%s)", PLATFORM, DATASET_NAME, | ||
FABRIC_TYPE, FIELD_NAME); | ||
|
||
final DatasetFieldUrn datasetFieldUrn = DatasetFieldUrn.deserialize(datasetFieldString); | ||
final DatasetUrn datasetUrn = datasetFieldUrn.getDatasetEntity(); | ||
|
||
Assertions.assertThat(datasetFieldUrn.getFieldPathEntity()).isEqualTo(FIELD_NAME); | ||
Assertions.assertThat(datasetUrn.getDatasetNameEntity()).isEqualTo(DATASET_NAME); | ||
Assertions.assertThat(datasetUrn.getPlatformEntity().getPlatformNameEntity()).isEqualTo(PLATFORM); | ||
Assertions.assertThat(datasetUrn.getOriginEntity()).isEqualTo(FabricType.PROD); | ||
Assertions.assertThat(datasetFieldUrn.toString()) | ||
.isEqualTo(datasetFieldString) | ||
.describedAs("serialization followed by deserialization should produce the same urn string"); | ||
} | ||
|
||
@Test | ||
public void testCreateUrn() { | ||
final DatasetFieldUrn datasetFieldUrn = new DatasetFieldUrn(PLATFORM, DATASET_NAME, FABRIC_TYPE, FIELD_NAME); | ||
|
||
final DatasetUrn datasetUrn = datasetFieldUrn.getDatasetEntity(); | ||
|
||
Assertions.assertThat(datasetFieldUrn.getFieldPathEntity()).isEqualTo(FIELD_NAME); | ||
Assertions.assertThat(datasetUrn.getDatasetNameEntity()).isEqualTo(DATASET_NAME); | ||
Assertions.assertThat(datasetUrn.getPlatformEntity().getPlatformNameEntity()).isEqualTo(PLATFORM); | ||
Assertions.assertThat(datasetUrn.getOriginEntity()).isEqualTo(FabricType.PROD); | ||
} | ||
|
||
@Test | ||
public void testUrnConstructors() { | ||
final DatasetFieldUrn datasetFieldUrn1 = new DatasetFieldUrn(PLATFORM, DATASET_NAME, FABRIC_TYPE, FIELD_NAME); | ||
final DatasetUrn datasetUrn = datasetFieldUrn1.getDatasetEntity(); | ||
final DatasetFieldUrn datasetFieldUrn2 = new DatasetFieldUrn(datasetUrn, FIELD_NAME); | ||
|
||
Assertions.assertThat(datasetFieldUrn1).isEqualTo(datasetFieldUrn2); | ||
} | ||
} |
19 changes: 19 additions & 0 deletions
19
metadata-models/src/main/pegasus/com/linkedin/common/BaseFieldMapping.pdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
namespace com.linkedin.common | ||
|
||
import com.linkedin.common.fieldtransformer.TransformationType | ||
import com.linkedin.common.fieldtransformer.UDFTransformer | ||
|
||
/** | ||
* Base model representing field mappings | ||
*/ | ||
record BaseFieldMapping { | ||
/** | ||
* Audit stamp containing who reported the field mapping and when | ||
*/ | ||
created: AuditStamp | ||
|
||
/** | ||
* Transfomration function between the fields involved | ||
*/ | ||
transformation: union [TransformationType, UDFTransformer] | ||
} |
16 changes: 16 additions & 0 deletions
16
metadata-models/src/main/pegasus/com/linkedin/common/fieldtransformer/TransformationType.pdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
namespace com.linkedin.common.fieldtransformer | ||
|
||
/** | ||
* Type of the transformation involved in generating destination fields from source fields. | ||
*/ | ||
enum TransformationType { | ||
/** | ||
* Field transformation expressed as unknown black box function. | ||
*/ | ||
BLACKBOX, | ||
|
||
/** | ||
* Field transformation expressed as Identity function. | ||
*/ | ||
IDENTITY | ||
} |
Oops, something went wrong.