From 5e1efd8e9934528b7db64bcfb39bc751c6bb68b4 Mon Sep 17 00:00:00 2001 From: Jyoti Wadhwani Date: Wed, 14 Oct 2020 11:30:25 -0700 Subject: [PATCH] refactor search index builder to store urn parts efficiently --- .../search/DataProcessIndexBuilder.java | 17 ++++++--------- .../builders/search/DatasetIndexBuilder.java | 21 ++++++++++--------- .../search/DataProcessIndexBuilderTest.java | 5 ++--- .../search/DatasetIndexBuilderTest.java | 10 ++++----- 4 files changed, 24 insertions(+), 29 deletions(-) diff --git a/metadata-builders/src/main/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilder.java b/metadata-builders/src/main/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilder.java index a2c33d602a96a..b4b0f4f19016d 100644 --- a/metadata-builders/src/main/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilder.java +++ b/metadata-builders/src/main/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilder.java @@ -1,7 +1,6 @@ package com.linkedin.metadata.builders.search; import com.linkedin.common.Ownership; -import com.linkedin.common.Status; import com.linkedin.common.urn.DataProcessUrn; import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.StringArray; @@ -42,7 +41,7 @@ private static DataProcessDocument setUrnDerivedFields(@Nonnull DataProcessUrn u @Nonnull private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn, @Nonnull Ownership ownership) { final StringArray owners = BuilderUtils.getCorpUserOwners(ownership); - return setUrnDerivedFields(urn) + return new DataProcessDocument() .setHasOwners(!owners.isEmpty()) .setOwners(owners); } @@ -50,7 +49,7 @@ private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUr @Nonnull private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn, @Nonnull DataProcessInfo dataProcessInfo) { - DataProcessDocument dataProcessDocument = setUrnDerivedFields(urn); + final DataProcessDocument dataProcessDocument = new DataProcessDocument(); if (dataProcessInfo.getInputs() != null) { dataProcessDocument.setInputs(dataProcessInfo.getInputs()) .setNumInputDatasets(dataProcessInfo.getInputs().size()); @@ -62,16 +61,10 @@ private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUr return dataProcessDocument; } - @Nonnull - private DataProcessDocument getDocumentToUpdateFromAspect(@Nonnull DataProcessUrn urn, @Nonnull Status status) { - return setUrnDerivedFields(urn) - .setRemoved(status.isRemoved()); - } - @Nonnull private List getDocumentsToUpdateFromSnapshotType(@Nonnull DataProcessSnapshot dataProcessSnapshot) { - DataProcessUrn urn = dataProcessSnapshot.getUrn(); - return dataProcessSnapshot.getAspects().stream().map(aspect -> { + final DataProcessUrn urn = dataProcessSnapshot.getUrn(); + final List documents = dataProcessSnapshot.getAspects().stream().map(aspect -> { if (aspect.isDataProcessInfo()) { return getDocumentToUpdateFromAspect(urn, aspect.getDataProcessInfo()); } else if (aspect.isOwnership()) { @@ -79,6 +72,8 @@ private List getDocumentsToUpdateFromSnapshotType(@Nonnull } return null; }).filter(Objects::nonNull).collect(Collectors.toList()); + documents.add(setUrnDerivedFields(urn)); + return documents; } @Nullable diff --git a/metadata-builders/src/main/java/com/linkedin/metadata/builders/search/DatasetIndexBuilder.java b/metadata-builders/src/main/java/com/linkedin/metadata/builders/search/DatasetIndexBuilder.java index 6c90ad0d94caf..89ec81c9e1670 100644 --- a/metadata-builders/src/main/java/com/linkedin/metadata/builders/search/DatasetIndexBuilder.java +++ b/metadata-builders/src/main/java/com/linkedin/metadata/builders/search/DatasetIndexBuilder.java @@ -51,42 +51,40 @@ private static DatasetDocument setUrnDerivedFields(@Nonnull DatasetUrn urn) { @Nonnull private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull Ownership ownership) { final StringArray owners = BuilderUtils.getCorpUserOwners(ownership); - return setUrnDerivedFields(urn) + return new DatasetDocument() .setHasOwners(!owners.isEmpty()) .setOwners(owners); } @Nonnull private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull Status status) { - return setUrnDerivedFields(urn) + return new DatasetDocument() .setRemoved(status.isRemoved()); } @Nonnull private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull DatasetDeprecation deprecation) { - return setUrnDerivedFields(urn).setDeprecated(deprecation.isDeprecated()); + return new DatasetDocument().setDeprecated(deprecation.isDeprecated()); } @Nonnull private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull DatasetProperties datasetProperties) { - final DatasetDocument doc = setUrnDerivedFields(urn); - if (datasetProperties.hasDescription()) { + final DatasetDocument doc = new DatasetDocument(); + if (datasetProperties.getDescription() != null) { doc.setDescription(datasetProperties.getDescription()); - } else { - doc.setDescription(""); } return doc; } @Nonnull private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull SchemaMetadata schemaMetadata) { - return setUrnDerivedFields(urn) + return new DatasetDocument() .setHasSchema(true); } @Nonnull private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull UpstreamLineage upstreamLineage) { - return setUrnDerivedFields(urn) + return new DatasetDocument() .setUpstreams(new DatasetUrnArray( upstreamLineage.getUpstreams().stream().map(upstream -> upstream.getDataset()).collect(Collectors.toList()) )); @@ -95,7 +93,7 @@ private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @ @Nonnull private List getDocumentsToUpdateFromSnapshotType(@Nonnull DatasetSnapshot datasetSnapshot) { final DatasetUrn urn = datasetSnapshot.getUrn(); - return datasetSnapshot.getAspects().stream().map(aspect -> { + final List documents = datasetSnapshot.getAspects().stream().map(aspect -> { if (aspect.isDatasetDeprecation()) { return getDocumentToUpdateFromAspect(urn, aspect.getDatasetDeprecation()); } else if (aspect.isDatasetProperties()) { @@ -111,6 +109,8 @@ private List getDocumentsToUpdateFromSnapshotType(@Nonnull Data } return null; }).filter(Objects::nonNull).collect(Collectors.toList()); + documents.add(setUrnDerivedFields(urn)); + return documents; } @Override @@ -123,6 +123,7 @@ public final List getDocumentsToUpdate(@Nonnull RecordTemplate } @Override + @Nonnull public Class getDocumentType() { return DatasetDocument.class; } diff --git a/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilderTest.java b/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilderTest.java index e84374cf06788..8ba9597420477 100644 --- a/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilderTest.java +++ b/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DataProcessIndexBuilderTest.java @@ -39,10 +39,9 @@ public void testGetDocumentsToUpdateFromDataProcessSnapshot() { new DataProcessSnapshot().setUrn(dataProcessUrn).setAspects(dataProcessAspectArray); List actualDocs = new DataProcessIndexBuilder().getDocumentsToUpdate(dataProcessSnapshot); - assertEquals(actualDocs.size(), 1); - assertEquals(actualDocs.get(0).getUrn(), dataProcessUrn); + assertEquals(actualDocs.size(), 2); assertEquals(actualDocs.get(0).getInputs().get(0), inputDatasetUrn); assertEquals(actualDocs.get(0).getOutputs().get(0), outputDatasetUrn); - + assertEquals(actualDocs.get(1).getUrn(), dataProcessUrn); } } diff --git a/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DatasetIndexBuilderTest.java b/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DatasetIndexBuilderTest.java index 02020007457e4..8155c4f9f1a85 100644 --- a/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DatasetIndexBuilderTest.java +++ b/metadata-builders/src/test/java/com/linkedin/metadata/builders/search/DatasetIndexBuilderTest.java @@ -25,16 +25,16 @@ public void testDescriptionClearing() { DatasetSnapshot datasetSnapshot = ModelUtils.newSnapshot(DatasetSnapshot.class, datasetUrn, Collections.singletonList(ModelUtils.newAspectUnion(DatasetAspect.class, datasetProperties))); List actualDocs = new DatasetIndexBuilder().getDocumentsToUpdate(datasetSnapshot); - assertEquals(actualDocs.size(), 1); - assertEquals(actualDocs.get(0).getUrn(), datasetUrn); + assertEquals(actualDocs.size(), 2); assertEquals(actualDocs.get(0).getDescription(), "baz"); + assertEquals(actualDocs.get(1).getUrn(), datasetUrn); datasetProperties = new DatasetProperties(); datasetSnapshot = ModelUtils.newSnapshot(DatasetSnapshot.class, datasetUrn, Collections.singletonList(ModelUtils.newAspectUnion(DatasetAspect.class, datasetProperties))); actualDocs = new DatasetIndexBuilder().getDocumentsToUpdate(datasetSnapshot); - assertEquals(actualDocs.size(), 1); - assertEquals(actualDocs.get(0).getUrn(), datasetUrn); - assertEquals(actualDocs.get(0).getDescription(), ""); + assertEquals(actualDocs.size(), 2); + assertNull(actualDocs.get(0).getDescription()); + assertEquals(actualDocs.get(1).getUrn(), datasetUrn); } } \ No newline at end of file