apache · hachikuji · Oct 10, 2018 · Dec 16, 2016 · Mar 10, 2018 · Jun 13, 2018
diff --git a/LICENSE b/LICENSE
@@ -201,6 +201,7 @@
    See the License for the specific language governing permissions and
    limitations under the License.
 
+------------------------------------------------------------------------------------
 This distribution has a binary dependency on jersey, which is available under the CDDL
 License as described below.
 
@@ -328,3 +329,68 @@ As between Initial Developer and the Contributors, each party is responsible for
 NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL)
 
 The code released under the CDDL shall be governed by the laws of the State of California (excluding conflict-of-law provisions). Any litigation relating to this License shall be subject to the jurisdiction of the Federal Courts of the Northern District of California and the state courts of the State of California, with venue lying in Santa Clara County, California.
+
+------------------------------------------------------------------------------------
+This distribution has a binary dependency on zstd, which is available under the BSD 3-Clause License as described below.
+
+BSD License
+
+For Zstandard software
+
+Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name Facebook nor the names of its contributors may be used to
+   endorse or promote products derived from this software without specific
+   prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+------------------------------------------------------------------------------------
+This distribution has a binary dependency on zstd-jni, which is available under the BSD 2-Clause License
+as described below.
+
+Zstd-jni: JNI bindings to Zstd Library
+
+Copyright (c) 2015-2016, Luben Karavelov/ All rights reserved.
+
+BSD License
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/build.gradle b/build.gradle
@@ -820,6 +820,7 @@ project(':clients') {
   conf2ScopeMappings.addMapping(1000, configurations.jacksonDatabindConfig, "provided")
 
   dependencies {
+    compile libs.zstd
     compile libs.lz4
     compile libs.snappy
     compile libs.slf4jApi

diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java
@@ -159,7 +159,7 @@ public class ProducerConfig extends AbstractConfig {
     /** <code>compression.type</code> */
     public static final String COMPRESSION_TYPE_CONFIG = "compression.type";
     private static final String COMPRESSION_TYPE_DOC = "The compression type for all data generated by the producer. The default is none (i.e. no compression). Valid "
-                                                       + " values are <code>none</code>, <code>gzip</code>, <code>snappy</code>, or <code>lz4</code>. "
+                                                       + " values are <code>none</code>, <code>gzip</code>, <code>snappy</code>, <code>lz4</code>, or <code>zstd</code>. "
                                                        + "Compression is of full batches of data, so the efficacy of batching will also impact the compression ratio (more batching means better compression).";
 
     /** <code>metrics.sample.window.ms</code> */

diff --git a/clients/src/main/java/org/apache/kafka/common/config/TopicConfig.java b/clients/src/main/java/org/apache/kafka/common/config/TopicConfig.java
@@ -140,7 +140,7 @@ public class TopicConfig {
 
     public static final String COMPRESSION_TYPE_CONFIG = "compression.type";
     public static final String COMPRESSION_TYPE_DOC = "Specify the final compression type for a given topic. " +
-        "This configuration accepts the standard compression codecs ('gzip', 'snappy', lz4). It additionally " +
+        "This configuration accepts the standard compression codecs ('gzip', 'snappy', 'lz4', 'zstd'). It additionally " +
         "accepts 'uncompressed' which is equivalent to no compression; and 'producer' which means retain the " +
         "original compression codec set by the producer.";
 

diff --git a/...nts/src/main/java/org/apache/kafka/common/errors/UnsupportedCompressionTypeException.java b/...nts/src/main/java/org/apache/kafka/common/errors/UnsupportedCompressionTypeException.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.errors;
+
+/**
+ * The requesting client does not support the compression type of given partition.
+ */
+public class UnsupportedCompressionTypeException extends ApiException {
+
+    private static final long serialVersionUID = 1L;
+
+    public UnsupportedCompressionTypeException(String message) {
+        super(message);
+    }
+
+    public UnsupportedCompressionTypeException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java b/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java
@@ -91,6 +91,7 @@
 import org.apache.kafka.common.errors.UnknownServerException;
 import org.apache.kafka.common.errors.UnknownTopicOrPartitionException;
 import org.apache.kafka.common.errors.UnsupportedByAuthenticationException;
+import org.apache.kafka.common.errors.UnsupportedCompressionTypeException;
 import org.apache.kafka.common.errors.UnsupportedForMessageFormatException;
 import org.apache.kafka.common.errors.UnsupportedSaslMechanismException;
 import org.apache.kafka.common.errors.UnsupportedVersionException;
@@ -284,7 +285,9 @@ public enum Errors {
     FENCED_LEADER_EPOCH(74, "The leader epoch in the request is older than the epoch on the broker",
             FencedLeaderEpochException::new),
     UNKNOWN_LEADER_EPOCH(75, "The leader epoch in the request is newer than the epoch on the broker",
-            UnknownLeaderEpochException::new);
+            UnknownLeaderEpochException::new),
+    UNSUPPORTED_COMPRESSION_TYPE(76, "The requesting client does not support the compression type of given partition.",
+            UnsupportedCompressionTypeException::new);
 
     private static final Logger log = LoggerFactory.getLogger(Errors.class);
 

diff --git a/clients/src/main/java/org/apache/kafka/common/record/AbstractLegacyRecordBatch.java b/clients/src/main/java/org/apache/kafka/common/record/AbstractLegacyRecordBatch.java
@@ -322,6 +322,8 @@ private DeepRecordsIterator(AbstractLegacyRecordBatch wrapperEntry,
                 throw new InvalidRecordException("Invalid wrapper magic found in legacy deep record iterator " + wrapperMagic);
 
             CompressionType compressionType = wrapperRecord.compressionType();
+            if (compressionType == CompressionType.ZSTD)
+                throw new InvalidRecordException("Invalid wrapper compressionType found in legacy deep record iterator " + wrapperMagic);
             ByteBuffer wrapperValue = wrapperRecord.value();
             if (wrapperValue == null)
                 throw new InvalidRecordException("Found invalid compressed record set with null value (magic = " +

diff --git a/clients/src/main/java/org/apache/kafka/common/record/CompressionType.java b/clients/src/main/java/org/apache/kafka/common/record/CompressionType.java
@@ -113,6 +113,26 @@ public InputStream wrapForInput(ByteBuffer inputBuffer, byte messageVersion, Buf
                 throw new KafkaException(e);
             }
         }
+    },
+
+    ZSTD(4, "zstd", 1.0f) {
+        @Override
+        public OutputStream wrapForOutput(ByteBufferOutputStream buffer, byte messageVersion) {
+            try {
+                return (OutputStream) ZstdConstructors.OUTPUT.invoke(buffer);
+            } catch (Throwable e) {
+                throw new KafkaException(e);
+            }
+        }
+
+        @Override
+        public InputStream wrapForInput(ByteBuffer buffer, byte messageVersion, BufferSupplier decompressionBufferSupplier) {
+            try {
+                return (InputStream) ZstdConstructors.INPUT.invoke(new ByteBufferInputStream(buffer));
+            } catch (Throwable e) {
+                throw new KafkaException(e);
+            }
+        }
     };
 
     public final int id;
@@ -156,6 +176,8 @@ public static CompressionType forId(int id) {
                 return SNAPPY;
             case 3:
                 return LZ4;
+            case 4:
+                return ZSTD;
             default:
                 throw new IllegalArgumentException("Unknown compression type id: " + id);
         }
@@ -170,14 +192,16 @@ else if (SNAPPY.name.equals(name))
             return SNAPPY;
         else if (LZ4.name.equals(name))
             return LZ4;
+        else if (ZSTD.name.equals(name))
+            return ZSTD;
         else
             throw new IllegalArgumentException("Unknown compression name: " + name);
     }
 
     // We should only have a runtime dependency on compression algorithms in case the native libraries don't support
     // some platforms.
     //
-    // For Snappy, we dynamically load the classes and rely on the initialization-on-demand holder idiom to ensure
+    // For Snappy and Zstd, we dynamically load the classes and rely on the initialization-on-demand holder idiom to ensure
     // they're only loaded if used.
     //
     // For LZ4 we are using org.apache.kafka classes, which should always be in the classpath, and would not trigger
@@ -190,6 +214,13 @@ private static class SnappyConstructors {
                 MethodType.methodType(void.class, OutputStream.class));
     }
 
+    private static class ZstdConstructors {
+        static final MethodHandle INPUT = findConstructor("com.github.luben.zstd.ZstdInputStream",
+            MethodType.methodType(void.class, InputStream.class));
+        static final MethodHandle OUTPUT = findConstructor("com.github.luben.zstd.ZstdOutputStream",
+            MethodType.methodType(void.class, OutputStream.class));
+    }
+
     private static MethodHandle findConstructor(String className, MethodType methodType) {
         try {
             return MethodHandles.publicLookup().findConstructor(Class.forName(className), methodType);

diff --git a/clients/src/main/java/org/apache/kafka/common/record/LazyDownConversionRecords.java b/clients/src/main/java/org/apache/kafka/common/record/LazyDownConversionRecords.java
@@ -44,6 +44,9 @@ public class LazyDownConversionRecords implements BaseRecords {
      * @param firstOffset The starting offset for down-converted records. This only impacts some cases. See
      *                    {@link RecordsUtil#downConvert(Iterable, byte, long, Time)} for an explanation.
      * @param time The time instance to use
+     *
+     * @throws org.apache.kafka.common.errors.UnsupportedCompressionTypeException If the first batch to down-convert
+     *    has a compression type which we do not support down-conversion for.
      */
     public LazyDownConversionRecords(TopicPartition topicPartition, Records records, byte toMagic, long firstOffset, Time time) {
         this.topicPartition = Objects.requireNonNull(topicPartition);
@@ -150,7 +153,7 @@ protected ConvertedRecords makeNext() {
             }
 
             while (batchIterator.hasNext()) {
-                List<RecordBatch> batches = new ArrayList<>();
+                final List<RecordBatch> batches = new ArrayList<>();
                 boolean isFirstBatch = true;
                 long sizeSoFar = 0;
 
@@ -162,6 +165,7 @@ protected ConvertedRecords makeNext() {
                     sizeSoFar += currentBatch.sizeInBytes();
                     isFirstBatch = false;
                 }
+
                 ConvertedRecords convertedRecords = RecordsUtil.downConvert(batches, toMagic, firstOffset, time);
                 // During conversion, it is possible that we drop certain batches because they do not have an equivalent
                 // representation in the message format we want to convert to. For example, V0 and V1 message formats

diff --git a/clients/src/main/java/org/apache/kafka/common/record/LazyDownConversionRecordsSend.java b/clients/src/main/java/org/apache/kafka/common/record/LazyDownConversionRecordsSend.java
@@ -17,6 +17,7 @@
 package org.apache.kafka.common.record;
 
 import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.errors.UnsupportedCompressionTypeException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -45,35 +46,50 @@ public LazyDownConversionRecordsSend(String destination, LazyDownConversionRecor
         convertedRecordsIterator = records().iterator(MAX_READ_SIZE);
     }
 
+    private MemoryRecords buildOverflowBatch(int remaining) {
+        // We do not have any records left to down-convert. Construct an overflow message for the length remaining.
+        // This message will be ignored by the consumer because its length will be past the length of maximum
+        // possible response size.
+        // DefaultRecordBatch =>
+        //      BaseOffset => Int64
+        //      Length => Int32
+        //      ...
+        ByteBuffer overflowMessageBatch = ByteBuffer.allocate(
+                Math.max(MIN_OVERFLOW_MESSAGE_LENGTH, Math.min(remaining + 1, MAX_READ_SIZE)));
+        overflowMessageBatch.putLong(-1L);
+
+        // Fill in the length of the overflow batch. A valid batch must be at least as long as the minimum batch
+        // overhead.
+        overflowMessageBatch.putInt(Math.max(remaining + 1, DefaultRecordBatch.RECORD_BATCH_OVERHEAD));
+        log.debug("Constructed overflow message batch for partition {} with length={}", topicPartition(), remaining);
+        return MemoryRecords.readableRecords(overflowMessageBatch);
+    }
+
     @Override
     public long writeTo(GatheringByteChannel channel, long previouslyWritten, int remaining) throws IOException {
         if (convertedRecordsWriter == null || convertedRecordsWriter.completed()) {
             MemoryRecords convertedRecords;
-            // Check if we have more chunks left to down-convert
-            if (convertedRecordsIterator.hasNext()) {
-                // Get next chunk of down-converted messages
-                ConvertedRecords<MemoryRecords> recordsAndStats = convertedRecordsIterator.next();
-                convertedRecords = recordsAndStats.records();
-                recordConversionStats.add(recordsAndStats.recordConversionStats());
-                log.debug("Down-converted records for partition {} with length={}", topicPartition(), convertedRecords.sizeInBytes());
-            } else {
-                // We do not have any records left to down-convert. Construct an overflow message for the length remaining.
-                // This message will be ignored by the consumer because its length will be past the length of maximum
-                // possible response size.
-                // DefaultRecordBatch =>
-                //      BaseOffset => Int64
-                //      Length => Int32
-                //      ...
-                ByteBuffer overflowMessageBatch = ByteBuffer.allocate(
-                        Math.max(MIN_OVERFLOW_MESSAGE_LENGTH, Math.min(remaining + 1, MAX_READ_SIZE)));
-                overflowMessageBatch.putLong(-1L);
 
-                // Fill in the length of the overflow batch. A valid batch must be at least as long as the minimum batch
-                // overhead.
-                overflowMessageBatch.putInt(Math.max(remaining + 1, DefaultRecordBatch.RECORD_BATCH_OVERHEAD));
-                convertedRecords = MemoryRecords.readableRecords(overflowMessageBatch);
-                log.debug("Constructed overflow message batch for partition {} with length={}", topicPartition(), remaining);
+            try {
+                // Check if we have more chunks left to down-convert
+                if (convertedRecordsIterator.hasNext()) {
+                    // Get next chunk of down-converted messages
+                    ConvertedRecords<MemoryRecords> recordsAndStats = convertedRecordsIterator.next();
+                    convertedRecords = recordsAndStats.records();
+                    recordConversionStats.add(recordsAndStats.recordConversionStats());
+                    log.debug("Down-converted records for partition {} with length={}", topicPartition(), convertedRecords.sizeInBytes());
+                } else {
+                    convertedRecords = buildOverflowBatch(remaining);
+                }
+            } catch (UnsupportedCompressionTypeException e) {
+                // We have encountered a compression type which does not support down-conversion (e.g. zstd).
+                // Since we have already sent at least one batch and we have committed to the fetch size, we
+                // send an overflow batch. The consumer will read the first few records and then fetch from the
+                // offset of the batch which has the unsupported compression type. At that time, we will
+                // send back the UNSUPPORTED_COMPRESSION_TYPE erro which will allow the consumer to fail gracefully.
+                convertedRecords = buildOverflowBatch(remaining);
             }
+
             convertedRecordsWriter = new DefaultRecordsSend(destination(), convertedRecords, Math.min(convertedRecords.sizeInBytes(), remaining));
         }
         return convertedRecordsWriter.writeTo(channel);

diff --git a/clients/src/main/java/org/apache/kafka/common/record/MemoryRecordsBuilder.java b/clients/src/main/java/org/apache/kafka/common/record/MemoryRecordsBuilder.java
@@ -102,6 +102,8 @@ public MemoryRecordsBuilder(ByteBufferOutputStream bufferStream,
                 throw new IllegalArgumentException("Transactional records are not supported for magic " + magic);
             if (isControlBatch)
                 throw new IllegalArgumentException("Control records are not supported for magic " + magic);
+            if (compressionType == CompressionType.ZSTD)
+                throw new IllegalArgumentException("ZStandard compression is not supported for magic " + magic);
         }
 
         this.magic = magic;