Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump Elasticsearch codec to track Lucene101Codec #116318

Merged
merged 10 commits into from
Nov 8, 2024
3 changes: 2 additions & 1 deletion server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,8 @@
with
org.elasticsearch.index.codec.Elasticsearch814Codec,
org.elasticsearch.index.codec.Elasticsearch816Codec,
org.elasticsearch.index.codec.Elasticsearch900Codec;
org.elasticsearch.index.codec.Elasticsearch900Codec,
org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec;

provides org.apache.logging.log4j.core.util.ContextDataProvider with org.elasticsearch.common.logging.DynamicContextDataProvider;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.backward_codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
Expand All @@ -21,7 +22,7 @@
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.DirectoryReader;
Expand Down Expand Up @@ -306,6 +307,9 @@ private static void readProximity(Terms terms, PostingsEnum postings) throws IOE
private static BlockTermState getBlockTermState(TermsEnum termsEnum, BytesRef term) throws IOException {
if (term != null && termsEnum.seekExact(term)) {
final TermState termState = termsEnum.termState();
if (termState instanceof final Lucene101PostingsFormat.IntBlockTermState blockTermState) {
return new BlockTermState(blockTermState.docStartFP, blockTermState.posStartFP, blockTermState.payStartFP);
}
if (termState instanceof final Lucene912PostingsFormat.IntBlockTermState blockTermState) {
return new BlockTermState(blockTermState.docStartFP, blockTermState.posStartFP, blockTermState.payStartFP);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
import java.util.Objects;

public class Lucene {
public static final String LATEST_CODEC = "Lucene100";
public static final String LATEST_CODEC = "Lucene101";

public static final String SOFT_DELETES_FIELD = "__soft_deletes";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ private static Version parseUnchecked(String version) {
public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT_BACKPORT = def(8_519_00_0, Version.LUCENE_9_12_0);
public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_00_0, Version.LUCENE_10_0_0);
public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_00_0, Version.LUCENE_10_0_0);
public static final IndexVersion UPGRADE_TO_LUCENE_10_0_1 = def(9_002_00_0, Version.LUCENE_10_0_1);
public static final IndexVersion UPGRADE_TO_LUCENE_10_1_0 = def(9_002_00_0, Version.LUCENE_10_1_0);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.FeatureFlag;
import org.elasticsearch.core.Nullable;
Expand Down Expand Up @@ -46,7 +46,7 @@ public class CodecService implements CodecProvider {
public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays) {
final var codecs = new HashMap<String, Codec>();

Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene100Codec.Mode.BEST_SPEED, mapperService, bigArrays);
Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene101Codec.Mode.BEST_SPEED, mapperService, bigArrays);
if (ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled()) {
codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays));
} else {
Expand All @@ -58,7 +58,7 @@ public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays)
BEST_COMPRESSION_CODEC,
new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION, mapperService, bigArrays)
);
Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene100Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays);
Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene101Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays);
codecs.put(LEGACY_BEST_COMPRESSION_CODEC, legacyBestCompressionCodec);

codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
package org.elasticsearch.index.codec;

import org.apache.lucene.backward_codecs.lucene912.Lucene912Codec;
import org.apache.lucene.backward_codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,22 @@

package org.elasticsearch.index.codec;

import org.apache.lucene.backward_codecs.lucene100.Lucene100Codec;
import org.apache.lucene.backward_codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;

/**
* Elasticsearch codec as of 9.0. This extends the Lucene 10.0 codec to compressed stored fields with ZSTD instead of LZ4/DEFLATE. See
* {@link Zstd814StoredFieldsFormat}.
* Elasticsearch codec as of 9.0-snapshot relying on Lucene 10.0. This extends the Lucene 10.0 codec to compressed stored fields
* with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch900Codec extends CodecService.DeduplicateFieldInfosCodec {

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec;

import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;

/**
* Elasticsearch codec as of 9.0 relying on Lucene 10.1. This extends the Lucene 10.1 codec to compressed
* stored fields with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch900Lucene101Codec extends CodecService.DeduplicateFieldInfosCodec {

private final StoredFieldsFormat storedFieldsFormat;

private final PostingsFormat defaultPostingsFormat;
private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return Elasticsearch900Lucene101Codec.this.getPostingsFormatForField(field);
}
};

private final DocValuesFormat defaultDVFormat;
private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return Elasticsearch900Lucene101Codec.this.getDocValuesFormatForField(field);
}
};

private final KnnVectorsFormat defaultKnnVectorsFormat;
private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return Elasticsearch900Lucene101Codec.this.getKnnVectorsFormatForField(field);
}
};

/** Public no-arg constructor, needed for SPI loading at read-time. */
public Elasticsearch900Lucene101Codec() {
this(Zstd814StoredFieldsFormat.Mode.BEST_SPEED);
}

/**
* Constructor. Takes a {@link Zstd814StoredFieldsFormat.Mode} that describes whether to optimize for retrieval speed at the expense of
* worse space-efficiency or vice-versa.
*/
public Elasticsearch900Lucene101Codec(Zstd814StoredFieldsFormat.Mode mode) {
super("Elasticsearch900Lucene101", new Lucene101Codec());
this.storedFieldsFormat = mode.getFormat();
this.defaultPostingsFormat = new Lucene101PostingsFormat();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it would make maintetance easier if we retrieved it from the wrapper Lucene101Codec instead of instantiating it manually here. Then we'd only need to update the delegate codec when Lucene bumps the codec, and not the postings/doc-values/knn-vectors formats.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I wonder why we have not done that before. That can be applied to doc values format and knn vectors format too?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I think it's just a bit annoying because you don't want to use getPostingsFormat() but getPostingsFormatForField("some_ignored_field_name").

this.defaultDVFormat = new Lucene90DocValuesFormat();
this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
}

@Override
public StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}

@Override
public final PostingsFormat postingsFormat() {
return postingsFormat;
}

@Override
public final DocValuesFormat docValuesFormat() {
return docValuesFormat;
}

@Override
public final KnnVectorsFormat knnVectorsFormat() {
return knnVectorsFormat;
}

/**
* Returns the postings format that should be used for writing new segments of <code>field</code>.
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation,
*/
public PostingsFormat getPostingsFormatForField(String field) {
return defaultPostingsFormat;
}

/**
* Returns the docvalues format that should be used for writing new segments of <code>field</code>
* .
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public DocValuesFormat getDocValuesFormatForField(String field) {
return defaultDVFormat;
}

/**
* Returns the vectors format that should be used for writing new segments of <code>field</code>
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return defaultKnnVectorsFormat;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.mapper.MapperService;
Expand All @@ -22,11 +22,11 @@
* Legacy version of {@link PerFieldMapperCodec}. This codec is preserved to give an escape hatch in case we encounter issues with new
* changes in {@link PerFieldMapperCodec}.
*/
public final class LegacyPerFieldMapperCodec extends Lucene100Codec {
public final class LegacyPerFieldMapperCodec extends Lucene101Codec {

private final PerFieldFormatSupplier formatSupplier;

public LegacyPerFieldMapperCodec(Lucene100Codec.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
public LegacyPerFieldMapperCodec(Lucene101Codec.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
super(compressionMode);
this.formatSupplier = new PerFieldFormatSupplier(mapperService, bigArrays);
// If the below assertion fails, it is a sign that Lucene released a new codec. You must create a copy of the current Elasticsearch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
* per index in real time via the mapping API. If no specific postings format or vector format is
* configured for a specific field the default postings or vector format is used.
*/
public final class PerFieldMapperCodec extends Elasticsearch900Codec {
public final class PerFieldMapperCodec extends Elasticsearch900Lucene101Codec {

private final PerFieldFormatSupplier formatSupplier;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ public CompletionFieldType fieldType() {
}

static PostingsFormat postingsFormat() {
return PostingsFormat.forName("Completion912");
return PostingsFormat.forName("Completion101");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
org.elasticsearch.index.codec.Elasticsearch814Codec
org.elasticsearch.index.codec.Elasticsearch816Codec
org.elasticsearch.index.codec.Elasticsearch900Codec
org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
Expand Down Expand Up @@ -55,7 +55,7 @@
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.suggest.document.Completion912PostingsFormat;
import org.apache.lucene.search.suggest.document.Completion101PostingsFormat;
import org.apache.lucene.search.suggest.document.CompletionPostingsFormat;
import org.apache.lucene.search.suggest.document.SuggestField;
import org.apache.lucene.store.Directory;
Expand Down Expand Up @@ -327,11 +327,11 @@ public void testTriangle() throws Exception {
public void testCompletionField() throws Exception {
IndexWriterConfig config = new IndexWriterConfig().setCommitOnClose(true)
.setUseCompoundFile(false)
.setCodec(new Lucene100Codec(Lucene100Codec.Mode.BEST_SPEED) {
.setCodec(new Lucene101Codec(Lucene101Codec.Mode.BEST_SPEED) {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.startsWith("suggest_")) {
return new Completion912PostingsFormat(randomFrom(CompletionPostingsFormat.FSTLoadMode.values()));
return new Completion101PostingsFormat(randomFrom(CompletionPostingsFormat.FSTLoadMode.values()));
} else {
return super.postingsFormat();
}
Expand Down Expand Up @@ -414,25 +414,25 @@ private static void addFieldsToDoc(Document doc, IndexableField[] fields) {
enum CodecMode {
BEST_SPEED {
@Override
Lucene100Codec.Mode mode() {
return Lucene100Codec.Mode.BEST_SPEED;
Lucene101Codec.Mode mode() {
return Lucene101Codec.Mode.BEST_SPEED;
}
},

BEST_COMPRESSION {
@Override
Lucene100Codec.Mode mode() {
return Lucene100Codec.Mode.BEST_COMPRESSION;
Lucene101Codec.Mode mode() {
return Lucene101Codec.Mode.BEST_COMPRESSION;
}
};

abstract Lucene100Codec.Mode mode();
abstract Lucene101Codec.Mode mode();
}

static void indexRandomly(Directory directory, CodecMode codecMode, int numDocs, Consumer<Document> addFields) throws IOException {
IndexWriterConfig config = new IndexWriterConfig().setCommitOnClose(true)
.setUseCompoundFile(randomBoolean())
.setCodec(new Lucene100Codec(codecMode.mode()));
.setCodec(new Lucene101Codec(codecMode.mode()));
try (IndexWriter writer = new IndexWriter(directory, config)) {
for (int i = 0; i < numDocs; i++) {
final Document doc = new Document();
Expand Down Expand Up @@ -640,7 +640,7 @@ static void rewriteIndexWithPerFieldCodec(Directory source, CodecMode mode, Dire
try (DirectoryReader reader = DirectoryReader.open(source)) {
IndexWriterConfig config = new IndexWriterConfig().setSoftDeletesField(Lucene.SOFT_DELETES_FIELD)
.setUseCompoundFile(randomBoolean())
.setCodec(new Lucene100Codec(mode.mode()) {
.setCodec(new Lucene101Codec(mode.mode()) {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return new ES812PostingsFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public void testResolveDefaultCodecs() throws Exception {
assumeTrue("Only when zstd_stored_fields feature flag is enabled", CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled());
CodecService codecService = createCodecService();
assertThat(codecService.codec("default"), instanceOf(PerFieldMapperCodec.class));
assertThat(codecService.codec("default"), instanceOf(Elasticsearch900Codec.class));
assertThat(codecService.codec("default"), instanceOf(Elasticsearch900Lucene101Codec.class));
}

public void testDefault() throws Exception {
Expand Down
Loading