Skip to content

Commit

Permalink
Bump Elasticsearch codec to track Lucene101Codec
Browse files Browse the repository at this point in the history
  • Loading branch information
javanna committed Nov 6, 2024
1 parent 5c618ee commit 5ca891d
Show file tree
Hide file tree
Showing 24 changed files with 186 additions and 49 deletions.
3 changes: 2 additions & 1 deletion server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,8 @@
with
org.elasticsearch.index.codec.Elasticsearch814Codec,
org.elasticsearch.index.codec.Elasticsearch816Codec,
org.elasticsearch.index.codec.Elasticsearch900Codec;
org.elasticsearch.index.codec.Elasticsearch900Codec,
org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec;

provides org.apache.logging.log4j.core.util.ContextDataProvider with org.elasticsearch.common.logging.DynamicContextDataProvider;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.backward_codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
Expand All @@ -21,7 +22,7 @@
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.DirectoryReader;
Expand Down Expand Up @@ -306,6 +307,9 @@ private static void readProximity(Terms terms, PostingsEnum postings) throws IOE
private static BlockTermState getBlockTermState(TermsEnum termsEnum, BytesRef term) throws IOException {
if (term != null && termsEnum.seekExact(term)) {
final TermState termState = termsEnum.termState();
if (termState instanceof final Lucene101PostingsFormat.IntBlockTermState blockTermState) {
return new BlockTermState(blockTermState.docStartFP, blockTermState.posStartFP, blockTermState.payStartFP);
}
if (termState instanceof final Lucene912PostingsFormat.IntBlockTermState blockTermState) {
return new BlockTermState(blockTermState.docStartFP, blockTermState.posStartFP, blockTermState.payStartFP);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
import java.util.Objects;

public class Lucene {
public static final String LATEST_CODEC = "Lucene100";
public static final String LATEST_CODEC = "Lucene101";

public static final String SOFT_DELETES_FIELD = "__soft_deletes";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ private static Version parseUnchecked(String version) {
public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT_BACKPORT = def(8_519_00_0, Version.LUCENE_9_12_0);
public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_00_0, Version.LUCENE_10_0_0);
public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_00_0, Version.LUCENE_10_0_0);
public static final IndexVersion UPGRADE_TO_LUCENE_10_0_1 = def(9_002_00_0, Version.LUCENE_10_0_1);
public static final IndexVersion UPGRADE_TO_LUCENE_10_1_0 = def(9_002_00_0, Version.LUCENE_10_1_0);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.FeatureFlag;
import org.elasticsearch.core.Nullable;
Expand Down Expand Up @@ -46,7 +46,7 @@ public class CodecService implements CodecProvider {
public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays) {
final var codecs = new HashMap<String, Codec>();

Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene100Codec.Mode.BEST_SPEED, mapperService, bigArrays);
Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene101Codec.Mode.BEST_SPEED, mapperService, bigArrays);
if (ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled()) {
codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays));
} else {
Expand All @@ -58,7 +58,7 @@ public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays)
BEST_COMPRESSION_CODEC,
new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION, mapperService, bigArrays)
);
Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene100Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays);
Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene101Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays);
codecs.put(LEGACY_BEST_COMPRESSION_CODEC, legacyBestCompressionCodec);

codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
package org.elasticsearch.index.codec;

import org.apache.lucene.backward_codecs.lucene912.Lucene912Codec;
import org.apache.lucene.backward_codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,22 @@

package org.elasticsearch.index.codec;

import org.apache.lucene.backward_codecs.lucene100.Lucene100Codec;
import org.apache.lucene.backward_codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;

/**
* Elasticsearch codec as of 9.0. This extends the Lucene 10.0 codec to compressed stored fields with ZSTD instead of LZ4/DEFLATE. See
* {@link Zstd814StoredFieldsFormat}.
* Elasticsearch codec as of 9.0-snapshot. This extends the Lucene 10.0 codec to compressed stored fields with ZSTD instead of LZ4/DEFLATE.
* See {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch900Codec extends CodecService.DeduplicateFieldInfosCodec {

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec;

import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;

/**
* Elasticsearch codec as of 9.0-snapshot. This extends the Lucene 10.1 codec to compressed stored fields with ZSTD instead of LZ4/DEFLATE.
* See {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch900Lucene101Codec extends CodecService.DeduplicateFieldInfosCodec {

private final StoredFieldsFormat storedFieldsFormat;

private final PostingsFormat defaultPostingsFormat;
private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return Elasticsearch900Lucene101Codec.this.getPostingsFormatForField(field);
}
};

private final DocValuesFormat defaultDVFormat;
private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return Elasticsearch900Lucene101Codec.this.getDocValuesFormatForField(field);
}
};

private final KnnVectorsFormat defaultKnnVectorsFormat;
private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return Elasticsearch900Lucene101Codec.this.getKnnVectorsFormatForField(field);
}
};

/** Public no-arg constructor, needed for SPI loading at read-time. */
public Elasticsearch900Lucene101Codec() {
this(Zstd814StoredFieldsFormat.Mode.BEST_SPEED);
}

/**
* Constructor. Takes a {@link Zstd814StoredFieldsFormat.Mode} that describes whether to optimize for retrieval speed at the expense of
* worse space-efficiency or vice-versa.
*/
public Elasticsearch900Lucene101Codec(Zstd814StoredFieldsFormat.Mode mode) {
super("Elasticsearch900Lucene101", new Lucene101Codec());
this.storedFieldsFormat = mode.getFormat();
this.defaultPostingsFormat = new Lucene101PostingsFormat();
this.defaultDVFormat = new Lucene90DocValuesFormat();
this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
}

@Override
public StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}

@Override
public final PostingsFormat postingsFormat() {
return postingsFormat;
}

@Override
public final DocValuesFormat docValuesFormat() {
return docValuesFormat;
}

@Override
public final KnnVectorsFormat knnVectorsFormat() {
return knnVectorsFormat;
}

/**
* Returns the postings format that should be used for writing new segments of <code>field</code>.
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation,
*/
public PostingsFormat getPostingsFormatForField(String field) {
return defaultPostingsFormat;
}

/**
* Returns the docvalues format that should be used for writing new segments of <code>field</code>
* .
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public DocValuesFormat getDocValuesFormatForField(String field) {
return defaultDVFormat;
}

/**
* Returns the vectors format that should be used for writing new segments of <code>field</code>
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return defaultKnnVectorsFormat;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.mapper.MapperService;
Expand All @@ -22,11 +22,11 @@
* Legacy version of {@link PerFieldMapperCodec}. This codec is preserved to give an escape hatch in case we encounter issues with new
* changes in {@link PerFieldMapperCodec}.
*/
public final class LegacyPerFieldMapperCodec extends Lucene100Codec {
public final class LegacyPerFieldMapperCodec extends Lucene101Codec {

private final PerFieldFormatSupplier formatSupplier;

public LegacyPerFieldMapperCodec(Lucene100Codec.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
public LegacyPerFieldMapperCodec(Lucene101Codec.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
super(compressionMode);
this.formatSupplier = new PerFieldFormatSupplier(mapperService, bigArrays);
// If the below assertion fails, it is a sign that Lucene released a new codec. You must create a copy of the current Elasticsearch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
* per index in real time via the mapping API. If no specific postings format or vector format is
* configured for a specific field the default postings or vector format is used.
*/
public final class PerFieldMapperCodec extends Elasticsearch900Codec {
public final class PerFieldMapperCodec extends Elasticsearch900Lucene101Codec {

private final PerFieldFormatSupplier formatSupplier;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
org.elasticsearch.index.codec.Elasticsearch814Codec
org.elasticsearch.index.codec.Elasticsearch816Codec
org.elasticsearch.index.codec.Elasticsearch900Codec
org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
Expand Down Expand Up @@ -327,7 +327,7 @@ public void testTriangle() throws Exception {
public void testCompletionField() throws Exception {
IndexWriterConfig config = new IndexWriterConfig().setCommitOnClose(true)
.setUseCompoundFile(false)
.setCodec(new Lucene100Codec(Lucene100Codec.Mode.BEST_SPEED) {
.setCodec(new Lucene101Codec(Lucene101Codec.Mode.BEST_SPEED) {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.startsWith("suggest_")) {
Expand Down Expand Up @@ -414,25 +414,25 @@ private static void addFieldsToDoc(Document doc, IndexableField[] fields) {
enum CodecMode {
BEST_SPEED {
@Override
Lucene100Codec.Mode mode() {
return Lucene100Codec.Mode.BEST_SPEED;
Lucene101Codec.Mode mode() {
return Lucene101Codec.Mode.BEST_SPEED;
}
},

BEST_COMPRESSION {
@Override
Lucene100Codec.Mode mode() {
return Lucene100Codec.Mode.BEST_COMPRESSION;
Lucene101Codec.Mode mode() {
return Lucene101Codec.Mode.BEST_COMPRESSION;
}
};

abstract Lucene100Codec.Mode mode();
abstract Lucene101Codec.Mode mode();
}

static void indexRandomly(Directory directory, CodecMode codecMode, int numDocs, Consumer<Document> addFields) throws IOException {
IndexWriterConfig config = new IndexWriterConfig().setCommitOnClose(true)
.setUseCompoundFile(randomBoolean())
.setCodec(new Lucene100Codec(codecMode.mode()));
.setCodec(new Lucene101Codec(codecMode.mode()));
try (IndexWriter writer = new IndexWriter(directory, config)) {
for (int i = 0; i < numDocs; i++) {
final Document doc = new Document();
Expand Down Expand Up @@ -640,7 +640,7 @@ static void rewriteIndexWithPerFieldCodec(Directory source, CodecMode mode, Dire
try (DirectoryReader reader = DirectoryReader.open(source)) {
IndexWriterConfig config = new IndexWriterConfig().setSoftDeletesField(Lucene.SOFT_DELETES_FIELD)
.setUseCompoundFile(randomBoolean())
.setCodec(new Lucene100Codec(mode.mode()) {
.setCodec(new Lucene101Codec(mode.mode()) {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return new ES812PostingsFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public void testResolveDefaultCodecs() throws Exception {
assumeTrue("Only when zstd_stored_fields feature flag is enabled", CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled());
CodecService codecService = createCodecService();
assertThat(codecService.codec("default"), instanceOf(PerFieldMapperCodec.class));
assertThat(codecService.codec("default"), instanceOf(Elasticsearch900Codec.class));
assertThat(codecService.codec("default"), instanceOf(Elasticsearch900Lucene101Codec.class));
}

public void testDefault() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
import org.elasticsearch.common.logging.LogConfigurator;

Expand All @@ -24,7 +24,7 @@ public class ES813FlatVectorFormatTests extends BaseKnnVectorsFormatTestCase {

@Override
protected Codec getCodec() {
return new Lucene100Codec() {
return new Lucene101Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new ES813FlatVectorFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
import org.elasticsearch.common.logging.LogConfigurator;

Expand All @@ -24,7 +24,7 @@ public class ES813Int8FlatVectorFormatTests extends BaseKnnVectorsFormatTestCase

@Override
protected Codec getCodec() {
return new Lucene100Codec() {
return new Lucene101Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new ES813Int8FlatVectorFormat();
Expand Down
Loading

0 comments on commit 5ca891d

Please sign in to comment.