Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: store urns parts in search index builder efficiently #1937

Merged
merged 1 commit into from
Oct 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package com.linkedin.metadata.builders.search;

import com.linkedin.common.Ownership;
import com.linkedin.common.Status;
import com.linkedin.common.urn.DataProcessUrn;
import com.linkedin.data.template.RecordTemplate;
import com.linkedin.data.template.StringArray;
Expand Down Expand Up @@ -42,15 +41,15 @@ private static DataProcessDocument setUrnDerivedFields(@Nonnull DataProcessUrn u
@Nonnull
private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn, @Nonnull Ownership ownership) {
final StringArray owners = BuilderUtils.getCorpUserOwners(ownership);
return setUrnDerivedFields(urn)
return new DataProcessDocument()
.setHasOwners(!owners.isEmpty())
.setOwners(owners);
}

@Nonnull
private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn,
@Nonnull DataProcessInfo dataProcessInfo) {
DataProcessDocument dataProcessDocument = setUrnDerivedFields(urn);
final DataProcessDocument dataProcessDocument = new DataProcessDocument();
if (dataProcessInfo.getInputs() != null) {
dataProcessDocument.setInputs(dataProcessInfo.getInputs())
.setNumInputDatasets(dataProcessInfo.getInputs().size());
Expand All @@ -62,23 +61,19 @@ private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUr
return dataProcessDocument;
}

@Nonnull
private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn, @Nonnull Status status) {
keremsahin1 marked this conversation as resolved.
Show resolved Hide resolved
return setUrnDerivedFields(urn)
.setRemoved(status.isRemoved());
}

@Nonnull
private List<DataProcessDocument> getDocumentsToUpdateFromSnapshotType(@Nonnull DataProcessSnapshot dataProcessSnapshot) {
DataProcessUrn urn = dataProcessSnapshot.getUrn();
return dataProcessSnapshot.getAspects().stream().map(aspect -> {
final DataProcessUrn urn = dataProcessSnapshot.getUrn();
final List<DataProcessDocument> documents = dataProcessSnapshot.getAspects().stream().map(aspect -> {
if (aspect.isDataProcessInfo()) {
return getDocumentToUpdateFromAspect(urn, aspect.getDataProcessInfo());
} else if (aspect.isOwnership()) {
return getDocumentToUpdateFromAspect(urn, aspect.getOwnership());
}
return null;
}).filter(Objects::nonNull).collect(Collectors.toList());
documents.add(setUrnDerivedFields(urn));
return documents;
}

@Nullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,42 +51,40 @@ private static DatasetDocument setUrnDerivedFields(@Nonnull DatasetUrn urn) {
@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull Ownership ownership) {
final StringArray owners = BuilderUtils.getCorpUserOwners(ownership);
return setUrnDerivedFields(urn)
return new DatasetDocument()
.setHasOwners(!owners.isEmpty())
.setOwners(owners);
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull Status status) {
return setUrnDerivedFields(urn)
return new DatasetDocument()
.setRemoved(status.isRemoved());
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull DatasetDeprecation deprecation) {
return setUrnDerivedFields(urn).setDeprecated(deprecation.isDeprecated());
return new DatasetDocument().setDeprecated(deprecation.isDeprecated());
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull DatasetProperties datasetProperties) {
final DatasetDocument doc = setUrnDerivedFields(urn);
if (datasetProperties.hasDescription()) {
final DatasetDocument doc = new DatasetDocument();
if (datasetProperties.getDescription() != null) {
doc.setDescription(datasetProperties.getDescription());
} else {
doc.setDescription("");
}
return doc;
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull SchemaMetadata schemaMetadata) {
return setUrnDerivedFields(urn)
return new DatasetDocument()
.setHasSchema(true);
}

@Nonnull
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull UpstreamLineage upstreamLineage) {
return setUrnDerivedFields(urn)
return new DatasetDocument()
.setUpstreams(new DatasetUrnArray(
upstreamLineage.getUpstreams().stream().map(upstream -> upstream.getDataset()).collect(Collectors.toList())
));
Expand All @@ -95,7 +93,7 @@ private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @
@Nonnull
private List<DatasetDocument> getDocumentsToUpdateFromSnapshotType(@Nonnull DatasetSnapshot datasetSnapshot) {
final DatasetUrn urn = datasetSnapshot.getUrn();
return datasetSnapshot.getAspects().stream().map(aspect -> {
final List<DatasetDocument> documents = datasetSnapshot.getAspects().stream().map(aspect -> {
if (aspect.isDatasetDeprecation()) {
return getDocumentToUpdateFromAspect(urn, aspect.getDatasetDeprecation());
} else if (aspect.isDatasetProperties()) {
Expand All @@ -111,6 +109,8 @@ private List<DatasetDocument> getDocumentsToUpdateFromSnapshotType(@Nonnull Data
}
return null;
}).filter(Objects::nonNull).collect(Collectors.toList());
documents.add(setUrnDerivedFields(urn));
return documents;
}

@Override
Expand All @@ -123,6 +123,7 @@ public final List<DatasetDocument> getDocumentsToUpdate(@Nonnull RecordTemplate
}

@Override
@Nonnull
public Class<DatasetDocument> getDocumentType() {
return DatasetDocument.class;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,9 @@ public void testGetDocumentsToUpdateFromDataProcessSnapshot() {
new DataProcessSnapshot().setUrn(dataProcessUrn).setAspects(dataProcessAspectArray);

List<DataProcessDocument> actualDocs = new DataProcessIndexBuilder().getDocumentsToUpdate(dataProcessSnapshot);
assertEquals(actualDocs.size(), 1);
assertEquals(actualDocs.get(0).getUrn(), dataProcessUrn);
assertEquals(actualDocs.size(), 2);
assertEquals(actualDocs.get(0).getInputs().get(0), inputDatasetUrn);
assertEquals(actualDocs.get(0).getOutputs().get(0), outputDatasetUrn);

assertEquals(actualDocs.get(1).getUrn(), dataProcessUrn);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@ public void testDescriptionClearing() {
DatasetSnapshot datasetSnapshot = ModelUtils.newSnapshot(DatasetSnapshot.class, datasetUrn,
Collections.singletonList(ModelUtils.newAspectUnion(DatasetAspect.class, datasetProperties)));
List<DatasetDocument> actualDocs = new DatasetIndexBuilder().getDocumentsToUpdate(datasetSnapshot);
assertEquals(actualDocs.size(), 1);
assertEquals(actualDocs.get(0).getUrn(), datasetUrn);
assertEquals(actualDocs.size(), 2);
assertEquals(actualDocs.get(0).getDescription(), "baz");
assertEquals(actualDocs.get(1).getUrn(), datasetUrn);

datasetProperties = new DatasetProperties();
datasetSnapshot = ModelUtils.newSnapshot(DatasetSnapshot.class, datasetUrn,
Collections.singletonList(ModelUtils.newAspectUnion(DatasetAspect.class, datasetProperties)));
actualDocs = new DatasetIndexBuilder().getDocumentsToUpdate(datasetSnapshot);
assertEquals(actualDocs.size(), 1);
assertEquals(actualDocs.get(0).getUrn(), datasetUrn);
assertEquals(actualDocs.get(0).getDescription(), "");
assertEquals(actualDocs.size(), 2);
assertNull(actualDocs.get(0).getDescription());
assertEquals(actualDocs.get(1).getUrn(), datasetUrn);
}
}