Skip to content

Commit

Permalink
refactor search index builder to store urn parts efficiently (datahub…
Browse files Browse the repository at this point in the history
…-project#1937)

Co-authored-by: Jyoti Wadhwani <[email protected]>
  • Loading branch information
jywadhwani and Jyoti Wadhwani committed Oct 28, 2020
1 parent 5fd2110 commit 2681a48
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 29 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package com.linkedin.metadata.builders.search;

import com.linkedin.common.Ownership;
import com.linkedin.common.Status;
import com.linkedin.common.urn.DataProcessUrn;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.data.template.StringArray;
Expand Down Expand Up @@ -42,15 +41,15 @@ private static DataProcessDocument setUrnDerivedFields(@Nonnull DataProcessUrn u
@Nonnull
private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn, @Nonnull Ownership ownership) {
final StringArray owners = BuilderUtils.getCorpUserOwners(ownership);
return setUrnDerivedFields(urn)
return new DataProcessDocument()
.setHasOwners(!owners.isEmpty())
.setOwners(owners);
}

@Nonnull
private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn,
@Nonnull DataProcessInfo dataProcessInfo) {
DataProcessDocument dataProcessDocument = setUrnDerivedFields(urn);
final DataProcessDocument dataProcessDocument = new DataProcessDocument();
if (dataProcessInfo.getInputs() != null) {
dataProcessDocument.setInputs(dataProcessInfo.getInputs())
.setNumInputDatasets(dataProcessInfo.getInputs().size());
Expand All @@ -62,23 +61,19 @@ private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUr
return dataProcessDocument;
}

@Nonnull
private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn, @Nonnull Status status) {
return setUrnDerivedFields(urn)
.setRemoved(status.isRemoved());
}

@Nonnull
private List<DataProcessDocument> getDocumentsToUpdateFromSnapshotType(@Nonnull DataProcessSnapshot dataProcessSnapshot) {
DataProcessUrn urn = dataProcessSnapshot.getUrn();
return dataProcessSnapshot.getAspects().stream().map(aspect -> {
final DataProcessUrn urn = dataProcessSnapshot.getUrn();
final List<DataProcessDocument> documents = dataProcessSnapshot.getAspects().stream().map(aspect -> {
if (aspect.isDataProcessInfo()) {
return getDocumentToUpdateFromAspect(urn, aspect.getDataProcessInfo());
} else if (aspect.isOwnership()) {
return getDocumentToUpdateFromAspect(urn, aspect.getOwnership());
}
return null;
}).filter(Objects::nonNull).collect(Collectors.toList());
documents.add(setUrnDerivedFields(urn));
return documents;
}

@Nullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,42 +51,40 @@ private static DatasetDocument setUrnDerivedFields(@Nonnull DatasetUrn urn) {
@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull Ownership ownership) {
final StringArray owners = BuilderUtils.getCorpUserOwners(ownership);
return setUrnDerivedFields(urn)
return new DatasetDocument()
.setHasOwners(!owners.isEmpty())
.setOwners(owners);
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull Status status) {
return setUrnDerivedFields(urn)
return new DatasetDocument()
.setRemoved(status.isRemoved());
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull DatasetDeprecation deprecation) {
return setUrnDerivedFields(urn).setDeprecated(deprecation.isDeprecated());
return new DatasetDocument().setDeprecated(deprecation.isDeprecated());
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull DatasetProperties datasetProperties) {
final DatasetDocument doc = setUrnDerivedFields(urn);
if (datasetProperties.hasDescription()) {
final DatasetDocument doc = new DatasetDocument();
if (datasetProperties.getDescription() != null) {
doc.setDescription(datasetProperties.getDescription());
} else {
doc.setDescription("");
}
return doc;
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull SchemaMetadata schemaMetadata) {
return setUrnDerivedFields(urn)
return new DatasetDocument()
.setHasSchema(true);
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull UpstreamLineage upstreamLineage) {
return setUrnDerivedFields(urn)
return new DatasetDocument()
.setUpstreams(new DatasetUrnArray(
upstreamLineage.getUpstreams().stream().map(upstream -> upstream.getDataset()).collect(Collectors.toList())
));
Expand All @@ -95,7 +93,7 @@ private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @
@Nonnull
private List<DatasetDocument> getDocumentsToUpdateFromSnapshotType(@Nonnull DatasetSnapshot datasetSnapshot) {
final DatasetUrn urn = datasetSnapshot.getUrn();
return datasetSnapshot.getAspects().stream().map(aspect -> {
final List<DatasetDocument> documents = datasetSnapshot.getAspects().stream().map(aspect -> {
if (aspect.isDatasetDeprecation()) {
return getDocumentToUpdateFromAspect(urn, aspect.getDatasetDeprecation());
} else if (aspect.isDatasetProperties()) {
Expand All @@ -111,6 +109,8 @@ private List<DatasetDocument> getDocumentsToUpdateFromSnapshotType(@Nonnull Data
}
return null;
}).filter(Objects::nonNull).collect(Collectors.toList());
documents.add(setUrnDerivedFields(urn));
return documents;
}

@Override
Expand All @@ -123,6 +123,7 @@ public final List<DatasetDocument> getDocumentsToUpdate(@Nonnull RecordTemplate
}

@Override
@Nonnull
public Class<DatasetDocument> getDocumentType() {
return DatasetDocument.class;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,9 @@ public void testGetDocumentsToUpdateFromDataProcessSnapshot() {
new DataProcessSnapshot().setUrn(dataProcessUrn).setAspects(dataProcessAspectArray);

List<DataProcessDocument> actualDocs = new DataProcessIndexBuilder().getDocumentsToUpdate(dataProcessSnapshot);
assertEquals(actualDocs.size(), 1);
assertEquals(actualDocs.get(0).getUrn(), dataProcessUrn);
assertEquals(actualDocs.size(), 2);
assertEquals(actualDocs.get(0).getInputs().get(0), inputDatasetUrn);
assertEquals(actualDocs.get(0).getOutputs().get(0), outputDatasetUrn);

assertEquals(actualDocs.get(1).getUrn(), dataProcessUrn);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@ public void testDescriptionClearing() {
DatasetSnapshot datasetSnapshot = ModelUtils.newSnapshot(DatasetSnapshot.class, datasetUrn,
Collections.singletonList(ModelUtils.newAspectUnion(DatasetAspect.class, datasetProperties)));
List<DatasetDocument> actualDocs = new DatasetIndexBuilder().getDocumentsToUpdate(datasetSnapshot);
assertEquals(actualDocs.size(), 1);
assertEquals(actualDocs.get(0).getUrn(), datasetUrn);
assertEquals(actualDocs.size(), 2);
assertEquals(actualDocs.get(0).getDescription(), "baz");
assertEquals(actualDocs.get(1).getUrn(), datasetUrn);

datasetProperties = new DatasetProperties();
datasetSnapshot = ModelUtils.newSnapshot(DatasetSnapshot.class, datasetUrn,
Collections.singletonList(ModelUtils.newAspectUnion(DatasetAspect.class, datasetProperties)));
actualDocs = new DatasetIndexBuilder().getDocumentsToUpdate(datasetSnapshot);
assertEquals(actualDocs.size(), 1);
assertEquals(actualDocs.get(0).getUrn(), datasetUrn);
assertEquals(actualDocs.get(0).getDescription(), "");
assertEquals(actualDocs.size(), 2);
assertNull(actualDocs.get(0).getDescription());
assertEquals(actualDocs.get(1).getUrn(), datasetUrn);
}
}

0 comments on commit 2681a48

Please sign in to comment.