diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 3ba407b2b4de..c3bc61d12bca 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -288,6 +288,8 @@ Improvements Optimizations --------------------- +* GITHUB#13252: Replace handwritten loops compare with Arrays.compareUnsigned in SegmentTermsEnum. (zhouhui) + * GITHUB#12996: Reduce ArrayUtil#grow in decompress. (Zhang Chao) * GITHUB#13115: Short circuit queued flush check when flush on update is disabled (Prabhat Sharma) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java index 479736099ef2..e3389931be71 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.io.PrintStream; +import java.util.Arrays; import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.ImpactsEnum; @@ -387,31 +388,18 @@ public boolean seekExact(BytesRef target) throws IOException { } if (cmp == 0) { - final int targetUptoMid = targetUpto; - // Second compare the rest of the term, but // don't save arc/output/frame; we only do this // to find out if the target term is before, // equal or after the current term - final int targetLimit2 = Math.min(target.length, term.length()); - while (targetUpto < targetLimit2) { - cmp = - (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); - // if (DEBUG) { - // System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + - // targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + - // targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"); - // } - if (cmp != 0) { - break; - } - targetUpto++; - } - - if (cmp == 0) { - cmp = term.length() - target.length; - } - targetUpto = targetUptoMid; + cmp = + Arrays.compareUnsigned( + term.bytes(), + targetUpto, + term.length(), + target.bytes, + target.offset + targetUpto, + target.offset + target.length); } if (cmp < 0) { @@ -666,28 +654,16 @@ public SeekStatus seekCeil(BytesRef target) throws IOException { } if (cmp == 0) { - final int targetUptoMid = targetUpto; // Second compare the rest of the term, but // don't save arc/output/frame: - final int targetLimit2 = Math.min(target.length, term.length()); - while (targetUpto < targetLimit2) { - cmp = - (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); - // if (DEBUG) { - // System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit - // + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) - // + " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")"); - // } - if (cmp != 0) { - break; - } - targetUpto++; - } - - if (cmp == 0) { - cmp = term.length() - target.length; - } - targetUpto = targetUptoMid; + cmp = + Arrays.compareUnsigned( + term.bytes(), + targetUpto, + term.length(), + target.bytes, + target.offset + targetUpto, + target.offset + target.length); } if (cmp < 0) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BasePostingsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BasePostingsFormatTestCase.java index 4d0024b93e30..8f8233ee680e 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BasePostingsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BasePostingsFormatTestCase.java @@ -369,6 +369,42 @@ public void testGhosts() throws Exception { dir.close(); } + // Test seek in disorder. + public void testDisorder() throws Exception { + Directory dir = newDirectory(); + + IndexWriterConfig iwc = newIndexWriterConfig(null); + iwc.setCodec(getCodec()); + iwc.setMergePolicy(newTieredMergePolicy()); + IndexWriter iw = new IndexWriter(dir, iwc); + + for (int i = 0; i < 10000; i++) { + Document document = new Document(); + document.add(new StringField("id", i + "", Field.Store.NO)); + iw.addDocument(document); + } + iw.commit(); + iw.forceMerge(1); + + DirectoryReader reader = DirectoryReader.open(iw); + TermsEnum termsEnum = getOnlyLeafReader(reader).terms("id").iterator(); + + for (int i = 0; i < 20000; i++) { + int n = random().nextInt(0, 10000); + BytesRef target = new BytesRef(n + ""); + // seekExact. + assertTrue(termsEnum.seekExact(target)); + assertEquals(termsEnum.term(), target); + // seekCeil. + assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(target)); + assertEquals(termsEnum.term(), target); + } + + reader.close(); + iw.close(); + dir.close(); + } + protected void subCheckBinarySearch(TermsEnum termsEnum) throws Exception {} public void testBinarySearchTermLeaf() throws Exception {