From 8f508b0268c27960616e0e39e1ea6bb73a8e9e5d Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Mon, 6 Jan 2025 10:47:36 +1100 Subject: [PATCH] fix: support for Zarr V2 `zstd` encoded data created with `numcodecs` < 0.13 (#121) --- CHANGELOG.md | 2 + zarrs/tests/data/v3/array_zstd.zarr/zarr.json | 4 +- zarrs_metadata/src/v2/array/codec/zstd.rs | 39 +++++++++++++++++++ zarrs_metadata/src/v2_to_v3.rs | 11 ++++++ 4 files changed, 54 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ef582117..eedf33d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `ArrayShardedExt::is_exclusively_sharded` - Add `ArrayShardedReadableExtCache::array_is_exclusively_sharded` - Add `Vlen{Array,Bytes,Utf8}Codec`, replacing `VlenV2Codec` +- Add `ZstdCodecConfigurationNumCodecs` + - Adds support for Zarr V2 `zstd` encoded data created with `numcodecs` < 0.13 ### Changed - **Breaking**: Seal `Array` extension traits: `ArraySharded[Readable]Ext` and `ArrayChunkCacheExt` diff --git a/zarrs/tests/data/v3/array_zstd.zarr/zarr.json b/zarrs/tests/data/v3/array_zstd.zarr/zarr.json index a883790f..60247395 100644 --- a/zarrs/tests/data/v3/array_zstd.zarr/zarr.json +++ b/zarrs/tests/data/v3/array_zstd.zarr/zarr.json @@ -32,8 +32,8 @@ { "name": "zstd", "configuration": { - "checksum": false, - "level": 5 + "level": 5, + "checksum": false } } ], diff --git a/zarrs_metadata/src/v2/array/codec/zstd.rs b/zarrs_metadata/src/v2/array/codec/zstd.rs index f8135d2f..78678ba6 100644 --- a/zarrs_metadata/src/v2/array/codec/zstd.rs +++ b/zarrs_metadata/src/v2/array/codec/zstd.rs @@ -1 +1,40 @@ +use derive_more::derive::{Display, From}; +use serde::{Deserialize, Serialize}; + pub use crate::v3::array::codec::zstd::ZstdCodecConfigurationV1; + +use crate::v3::array::codec::zstd::{ZstdCodecConfiguration, ZstdCompressionLevel}; + +type ZstdCodecConfigurationNumCodecs0_13 = ZstdCodecConfigurationV1; + +/// A wrapper to handle various versions of `zstd` codec configuration parameters. +#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display, From)] +#[serde(untagged)] +pub enum ZstdCodecConfigurationNumCodecs { + /// `numcodecs` version 0.13. + V0_13(ZstdCodecConfigurationNumCodecs0_13), + /// `numcodecs` version 0.1. + V0_1(ZstdCodecConfigurationNumCodecs0_1), +} + +/// Configuration parameters for the `zstd` codec (`numcodecs` version 0.1). +#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display)] +#[serde(deny_unknown_fields)] +#[display("{}", serde_json::to_string(self).unwrap_or_default())] +pub struct ZstdCodecConfigurationNumCodecs0_1 { + /// The compression level. + pub level: ZstdCompressionLevel, +} + +/// Convert [`ZstdCodecConfigurationNumCodecs`] to [`ZstdCodecConfiguration`]. +#[must_use] +pub fn codec_zstd_v2_numcodecs_to_v3( + zstd: &ZstdCodecConfigurationNumCodecs, +) -> ZstdCodecConfiguration { + match zstd { + ZstdCodecConfigurationNumCodecs::V0_13(zstd) => ZstdCodecConfiguration::V1(zstd.clone()), + ZstdCodecConfigurationNumCodecs::V0_1(zstd) => { + ZstdCodecConfiguration::V1(ZstdCodecConfigurationV1::new(zstd.level.clone(), false)) + } + } +} diff --git a/zarrs_metadata/src/v2_to_v3.rs b/zarrs_metadata/src/v2_to_v3.rs index b81c3042..1c6429c5 100644 --- a/zarrs_metadata/src/v2_to_v3.rs +++ b/zarrs_metadata/src/v2_to_v3.rs @@ -6,6 +6,7 @@ use crate::{ codec::{ blosc::{codec_blosc_v2_numcodecs_to_v3, BloscCodecConfigurationNumcodecs}, zfpy::{codec_zfpy_v2_numcodecs_to_v3, ZfpyCodecConfigurationNumcodecs}, + zstd::{codec_zstd_v2_numcodecs_to_v3, ZstdCodecConfigurationNumCodecs}, }, data_type_metadata_v2_to_endianness, ArrayMetadataV2Order, DataTypeMetadataV2, DataTypeMetadataV2InvalidEndiannessError, FillValueMetadataV2, @@ -214,6 +215,16 @@ pub fn array_metadata_v2_to_v3( &configuration, )?); } + crate::v3::array::codec::zstd::IDENTIFIER => { + let zstd = serde_json::from_value::( + serde_json::to_value(compressor.configuration())?, + )?; + let configuration = codec_zstd_v2_numcodecs_to_v3(&zstd); + codecs.push(MetadataV3::new_with_serializable_configuration( + crate::v3::array::codec::zstd::IDENTIFIER, + &configuration, + )?); + } _ => codecs.push(MetadataV3::new_with_configuration( compressor.id(), compressor.configuration().clone(),