Skip to content

Commit

Permalink
refactor!: make VLenV2Codec private, add `VLen{Array,Bytes,Utf8}Cod…
Browse files Browse the repository at this point in the history
…ec` (#119)
  • Loading branch information
LDeakin authored Jan 1, 2025
1 parent 698fb83 commit 7851ce6
Show file tree
Hide file tree
Showing 20 changed files with 256 additions and 67 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `ArrayShardedReadableExt::inner_chunk_byte_range`
- Add `ArrayShardedExt::is_exclusively_sharded`
- Add `ArrayShardedReadableExtCache::array_is_exclusively_sharded`
- Add `Vlen{Array,Bytes,Utf8}Codec`, replacing `VlenV2Codec`

### Changed
- **Breaking**: Seal `Array` extension traits: `ArraySharded[Readable]Ext` and `ArrayChunkCacheExt`
Expand All @@ -20,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- **Breaking**: Add `ArrayError::UnsupportedMethod`
- **Breaking**: Rename `DataType::Binary` to `Bytes` for compatibility with `zarr-python`
- **Breaking**: Make `array::codec::array_to_bytes::bytes::reverse_endianness` private
- **Breaking**: Make `VlenV2Codec` private

### Fixed
- Cleanup unnecessary lifetime constraints in partial decoders
Expand Down
24 changes: 14 additions & 10 deletions zarrs/doc/status/codecs_experimental.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,24 @@ Experimental codecs are recommended for evaluation only.
By default, the `"name"` of of experimental codecs in array metadata links the codec documentation in this crate.
This is configurable with [`Config::experimental_codec_names_mut`](config::Config::experimental_codec_names_mut).

| Codec Type | Codec | ZEP or URI | V3 | V2 | Feature Flag |
| -------------- | ------------------------ | -------------------------------------------------- | ------- | ------- | ------------ |
| Array to Array | [bitround] | <https://codec.zarrs.dev/array_to_array/bitround> | &check; | &check; | bitround |
| Array to Bytes | [zfp]<br>zfpy (V2) | <https://codec.zarrs.dev/array_to_bytes/zfp> | &check; | &check; | zfp |
| | [pcodec] | <https://codec.zarrs.dev/array_to_bytes/pcodec> | &check; | &check; | pcodec |
| | [vlen] | <https://codec.zarrs.dev/array_to_bytes/vlen> | &check; | | |
| | [vlen_v2]<br>vlen-* (V2) | <https://codec.zarrs.dev/array_to_bytes/vlen_v2> | &check; | &check; | |
| Bytes to Bytes | [bz2] | <https://codec.zarrs.dev/bytes_to_bytes/bz2> | &check; | &check; | bz2 |
| | [gdeflate] | <https://codec.zarrs.dev/bytes_to_bytes/gdeflate> | &check; | | gdeflate |
| Codec Type | Codec | ZEP or URI | V3 | V2 | Feature Flag |
| -------------- | ------------------------ | --------------------------------------------------- | ------- | ------- | ------------ |
| Array to Array | [bitround] | <https://codec.zarrs.dev/array_to_array/bitround> | &check; | &check; | bitround |
| Array to Bytes | [zfp]<br>zfpy (V2) | <https://codec.zarrs.dev/array_to_bytes/zfp> | &check; | &check; | zfp |
| | [pcodec] | <https://codec.zarrs.dev/array_to_bytes/pcodec> | &check; | &check; | pcodec |
| | [vlen] | <https://codec.zarrs.dev/array_to_bytes/vlen> | &check; | | |
| | [vlen-array] | <https://codec.zarrs.dev/array_to_bytes/vlen_array> | &check; | &check; | |
| | [vlen-bytes] | <https://codec.zarrs.dev/array_to_bytes/vlen_bytes> | &check; | &check; | |
| | [vlen-utf8] | <https://codec.zarrs.dev/array_to_bytes/vlen_utf8> | &check; | &check; | |
| Bytes to Bytes | [bz2] | <https://codec.zarrs.dev/bytes_to_bytes/bz2> | &check; | &check; | bz2 |
| | [gdeflate] | <https://codec.zarrs.dev/bytes_to_bytes/gdeflate> | &check; | | gdeflate |

[bitround]: (crate::array::codec::array_to_array::bitround)
[zfp]: crate::array::codec::array_to_bytes::zfp
[pcodec]: crate::array::codec::array_to_bytes::pcodec
[vlen]: crate::array::codec::array_to_bytes::vlen
[vlen_v2]: crate::array::codec::array_to_bytes::vlen_v2
[vlen-array]: crate::array::codec::array_to_bytes::vlen_array
[vlen-bytes]: crate::array::codec::array_to_bytes::vlen_bytes
[vlen-utf8]: crate::array::codec::array_to_bytes::vlen_utf8
[bz2]: crate::array::codec::bytes_to_bytes::bz2
[gdeflate]: crate::array::codec::bytes_to_bytes::gdeflate
6 changes: 5 additions & 1 deletion zarrs/src/array/codec/array_to_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
pub mod bytes;
pub mod codec_chain;
pub mod vlen;
pub mod vlen_v2;
pub mod vlen_array;
pub mod vlen_bytes;
pub mod vlen_utf8;

pub(crate) mod vlen_v2;

#[cfg(feature = "pcodec")]
pub mod pcodec;
Expand Down
5 changes: 5 additions & 0 deletions zarrs/src/array/codec/array_to_bytes/vlen_array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
//! The `vlen-array` array to bytes codec.
use crate::array::codec::array_to_bytes::vlen_v2::vlen_v2_macros;

vlen_v2_macros::vlen_v2_module!(vlen_array, vlen_array_codec, VlenArrayCodec);
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
use crate::array::codec::array_to_bytes::vlen_v2::vlen_v2_macros;

vlen_v2_macros::vlen_v2_codec!(VlenArrayCodec, "vlen-array");
5 changes: 5 additions & 0 deletions zarrs/src/array/codec/array_to_bytes/vlen_bytes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
//! The `vlen-bytes` array to bytes codec.
use crate::array::codec::array_to_bytes::vlen_v2::vlen_v2_macros;

vlen_v2_macros::vlen_v2_module!(vlen_bytes, vlen_bytes_codec, VlenBytesCodec);
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
use crate::array::codec::array_to_bytes::vlen_v2::vlen_v2_macros;

vlen_v2_macros::vlen_v2_codec!(VlenBytesCodec, "vlen-bytes");
5 changes: 5 additions & 0 deletions zarrs/src/array/codec/array_to_bytes/vlen_utf8.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
//! The `vlen-utf8` array to bytes codec.
use crate::array::codec::array_to_bytes::vlen_v2::vlen_v2_macros;

vlen_v2_macros::vlen_v2_module!(vlen_utf8, vlen_utf8_codec, VlenUtf8Codec);
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
use crate::array::codec::array_to_bytes::vlen_v2::vlen_v2_macros;

vlen_v2_macros::vlen_v2_codec!(VlenUtf8Codec, "vlen-utf8");
36 changes: 13 additions & 23 deletions zarrs/src/array/codec/array_to_bytes/vlen_v2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,17 @@
mod vlen_v2_codec;
mod vlen_v2_partial_decoder;

pub(crate) mod vlen_v2_macros;

use std::{mem::size_of, sync::Arc};

pub use vlen_v2::IDENTIFIER;
/// The identifier for the `vlen_v2` codec.
pub(crate) const IDENTIFIER: &str = "vlen_v2";
// pub use vlen_v2::IDENTIFIER;

pub use crate::metadata::v3::array::codec::vlen_v2::{
VlenV2CodecConfiguration, VlenV2CodecConfigurationV1,
};
use crate::{
array::{codec::CodecError, RawBytes},
config::global_config,
metadata::v3::array::codec::vlen_v2,
};
use crate::array::{codec::CodecError, RawBytes};

pub use vlen_v2_codec::VlenV2Codec;
pub(crate) use vlen_v2_codec::VlenV2Codec;

use crate::{
array::codec::{Codec, CodecPlugin},
Expand All @@ -40,11 +37,6 @@ inventory::submit! {

fn is_name_vlen_v2(name: &str) -> bool {
name.eq(IDENTIFIER)
|| name
== global_config()
.experimental_codec_names()
.get(IDENTIFIER)
.expect("experimental codec identifier in global map")
}

fn is_name_vlen_array(name: &str) -> bool {
Expand All @@ -60,14 +52,12 @@ fn is_name_vlen_utf8(name: &str) -> bool {
}

pub(crate) fn create_codec_vlen_v2(metadata: &MetadataV3) -> Result<Codec, PluginCreateError> {
let configuration: VlenV2CodecConfiguration = metadata
.to_configuration()
.map_err(|_| PluginMetadataInvalidError::new(IDENTIFIER, "codec", metadata.clone()))?;
let codec = Arc::new(VlenV2Codec::new_with_name_configuration(
metadata.name().to_string(),
&configuration,
));
Ok(Codec::ArrayToBytes(codec))
if metadata.configuration_is_none_or_empty() {
let codec = Arc::new(VlenV2Codec::new(metadata.name().to_string()));
Ok(Codec::ArrayToBytes(codec))
} else {
Err(PluginMetadataInvalidError::new(IDENTIFIER, "codec", metadata.clone()).into())
}
}

fn get_interleaved_bytes_and_offsets(
Expand Down
22 changes: 6 additions & 16 deletions zarrs/src/array/codec/array_to_bytes/vlen_v2/vlen_v2_codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,16 @@ use crate::{
#[cfg(feature = "async")]
use crate::array::codec::{AsyncArrayPartialDecoderTraits, AsyncBytesPartialDecoderTraits};

use super::{VlenV2CodecConfiguration, VlenV2CodecConfigurationV1};

/// The `vlen_v2` codec implementation.
#[derive(Debug, Clone)]
pub struct VlenV2Codec {
pub(crate) struct VlenV2Codec {
name: String,
}

impl VlenV2Codec {
/// Create a new `vlen_v2` codec.
#[must_use]
pub fn new(name: String) -> Self {
Self { name }
}

/// Create a new `vlen_v2` codec from configuration.
#[must_use]
pub fn new_with_name_configuration(
name: String,
_configuration: &VlenV2CodecConfiguration,
) -> Self {
// let VlenV2CodecConfiguration::V1(configuration) = configuration;
pub(crate) fn new(name: String) -> Self {
Self { name }
}
}
Expand All @@ -53,8 +41,10 @@ impl CodecTraits for VlenV2Codec {
.experimental_codec_names()
.get(&self.name)
.unwrap_or(&self.name);
let configuration = VlenV2CodecConfigurationV1 {};
Some(MetadataV3::new_with_serializable_configuration(name, &configuration).unwrap())
Some(MetadataV3::new_with_configuration(
name,
serde_json::Map::default(),
))
}

fn partial_decoder_should_cache_input(&self) -> bool {
Expand Down
176 changes: 176 additions & 0 deletions zarrs/src/array/codec/array_to_bytes/vlen_v2/vlen_v2_macros.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
macro_rules! vlen_v2_module {
($module:ident, $module_codec:ident, $struct:ident) => {
mod $module_codec;

use std::sync::Arc;

pub use $module::IDENTIFIER;

pub use $module_codec::$struct;

use crate::{
array::codec::{Codec, CodecPlugin},
metadata::v2::array::codec::$module,
metadata::v3::MetadataV3,
plugin::{PluginCreateError, PluginMetadataInvalidError},
};

// Register the codec.
inventory::submit! {
CodecPlugin::new(IDENTIFIER, is_name, create_codec)
}

fn is_name(name: &str) -> bool {
name.eq(IDENTIFIER)
}

fn create_codec(metadata: &MetadataV3) -> Result<Codec, PluginCreateError> {
if metadata.configuration_is_none_or_empty() {
let codec = Arc::new($struct::new());
Ok(Codec::ArrayToBytes(codec))
} else {
Err(PluginMetadataInvalidError::new(IDENTIFIER, "codec", metadata.clone()).into())
}
}
};
}

macro_rules! vlen_v2_codec {
($struct:ident,$identifier:expr) => {
use std::sync::Arc;

use zarrs_metadata::v3::MetadataV3;

use crate::array::{
codec::{
array_to_bytes::vlen_v2::VlenV2Codec, ArrayPartialDecoderTraits,
ArrayPartialEncoderTraits, ArrayToBytesCodecTraits, BytesPartialDecoderTraits,
BytesPartialEncoderTraits, CodecError, CodecOptions, CodecTraits,
},
ArrayBytes, ArrayCodecTraits, ArrayMetadataOptions, BytesRepresentation,
ChunkRepresentation, RawBytes, RecommendedConcurrency,
};

#[cfg(feature = "async")]
use crate::array::codec::{AsyncArrayPartialDecoderTraits, AsyncBytesPartialDecoderTraits};

/// The `$identifier` codec implementation.
#[derive(Debug, Clone)]
pub struct $struct {
inner: Arc<VlenV2Codec>,
}

impl $struct {
/// Create a new `$identifier` codec.
#[must_use]
pub fn new() -> Self {
Self {
inner: Arc::new(VlenV2Codec::new($identifier.to_string())),
}
}
}

impl Default for $struct {
fn default() -> Self {
Self::new()
}
}

impl CodecTraits for $struct {
fn create_metadata_opt(&self, options: &ArrayMetadataOptions) -> Option<MetadataV3> {
self.inner.create_metadata_opt(options)
}

fn partial_decoder_should_cache_input(&self) -> bool {
self.inner.partial_decoder_should_cache_input()
}

fn partial_decoder_decodes_all(&self) -> bool {
self.inner.partial_decoder_decodes_all()
}
}

impl ArrayCodecTraits for $struct {
fn recommended_concurrency(
&self,
decoded_representation: &ChunkRepresentation,
) -> Result<RecommendedConcurrency, CodecError> {
self.inner.recommended_concurrency(decoded_representation)
}
}

#[cfg_attr(feature = "async", async_trait::async_trait)]
impl ArrayToBytesCodecTraits for $struct {
fn dynamic(self: Arc<Self>) -> Arc<dyn ArrayToBytesCodecTraits> {
self as Arc<dyn ArrayToBytesCodecTraits>
}

fn encode<'a>(
&self,
bytes: ArrayBytes<'a>,
decoded_representation: &ChunkRepresentation,
options: &CodecOptions,
) -> Result<RawBytes<'a>, CodecError> {
self.inner.encode(bytes, decoded_representation, options)
}

fn decode<'a>(
&self,
bytes: RawBytes<'a>,
decoded_representation: &ChunkRepresentation,
options: &CodecOptions,
) -> Result<ArrayBytes<'a>, CodecError> {
self.inner.decode(bytes, decoded_representation, options)
}

fn partial_decoder(
self: Arc<Self>,
input_handle: Arc<dyn BytesPartialDecoderTraits>,
decoded_representation: &ChunkRepresentation,
options: &CodecOptions,
) -> Result<Arc<dyn ArrayPartialDecoderTraits>, CodecError> {
self.inner
.clone()
.partial_decoder(input_handle, decoded_representation, options)
}

fn partial_encoder(
self: Arc<Self>,
input_handle: Arc<dyn BytesPartialDecoderTraits>,
output_handle: Arc<dyn BytesPartialEncoderTraits>,
decoded_representation: &ChunkRepresentation,
options: &CodecOptions,
) -> Result<Arc<dyn ArrayPartialEncoderTraits>, CodecError> {
self.inner.clone().partial_encoder(
input_handle,
output_handle,
decoded_representation,
options,
)
}

#[cfg(feature = "async")]
async fn async_partial_decoder(
self: Arc<Self>,
input_handle: Arc<dyn AsyncBytesPartialDecoderTraits>,
decoded_representation: &ChunkRepresentation,
options: &CodecOptions,
) -> Result<Arc<dyn AsyncArrayPartialDecoderTraits>, CodecError> {
self.inner
.clone()
.async_partial_decoder(input_handle, decoded_representation, options)
.await
}

fn compute_encoded_size(
&self,
decoded_representation: &ChunkRepresentation,
) -> Result<BytesRepresentation, CodecError> {
self.inner.compute_encoded_size(decoded_representation)
}
}
};
}

pub(crate) use vlen_v2_codec;
pub(crate) use vlen_v2_module;
1 change: 0 additions & 1 deletion zarrs/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ impl Default for Config {
#[cfg(feature = "pcodec")]
(codec::pcodec::IDENTIFIER.to_string(), "https://codec.zarrs.dev/array_to_bytes/pcodec".to_string()),
(codec::vlen::IDENTIFIER.to_string(), "https://codec.zarrs.dev/array_to_bytes/vlen".to_string()),
(codec::vlen_v2::IDENTIFIER.to_string(), "https://codec.zarrs.dev/array_to_bytes/vlen_v2".to_string()),
// Bytes to bytes
#[cfg(feature = "bz2")]
(codec::bz2::IDENTIFIER.to_string(), "https://codec.zarrs.dev/bytes_to_bytes/bz2".to_string()),
Expand Down
Loading

0 comments on commit 7851ce6

Please sign in to comment.