diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e7c8cc0..f5fd8b32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `ZstdCodecConfigurationNumCodecs` - Adds support for Zarr V2 `zstd` encoded data created with `numcodecs` < 0.13 - Add support for pcodec `Auto`, `None`, and `TryLookback` delta specs +- Add `Group::[set_]consolidated_metadata` +- Add `Node::consolidate_metadata` + - Consolidated metadata is not currently used to optimise node hierarchy requests ### Changed - **Breaking**: Seal `Array` extension traits: `ArraySharded[Readable]Ext` and `ArrayChunkCacheExt` diff --git a/zarrs/src/group.rs b/zarrs/src/group.rs index c1bd3a09..54d04380 100644 --- a/zarrs/src/group.rs +++ b/zarrs/src/group.rs @@ -30,6 +30,7 @@ use std::sync::Arc; use derive_more::Display; use thiserror::Error; +use zarrs_metadata::v3::group::ConsolidatedMetadata; use zarrs_metadata::NodeMetadata; use zarrs_storage::ListableStorageTraits; @@ -161,6 +162,30 @@ impl Group { } } + /// Get the consolidated metadata. Returns [`None`] if `consolidated_metadata` is absent. + /// + /// Consolidated metadata is not currently supported for Zarr V2 groups. + #[must_use] + pub fn consolidated_metadata(&self) -> Option<&ConsolidatedMetadata> { + if let GroupMetadata::V3(group_metadata) = &self.metadata { + group_metadata.consolidated_metadata.as_ref() + } else { + None + } + } + + /// Set the consolidated metadata. + /// + /// Consolidated metadata is not currently supported for Zarr V2 groups, and this function is a no-op. + pub fn set_consolidated_metadata( + &mut self, + consolidated_metadata: Option, + ) { + if let GroupMetadata::V3(group_metadata) = &mut self.metadata { + group_metadata.consolidated_metadata = consolidated_metadata; + } + } + /// Convert the group to Zarr V3. /// /// If the group is already Zarr V3, this is a no-op. diff --git a/zarrs/src/node.rs b/zarrs/src/node.rs index d4b02a6b..66ee83c6 100644 --- a/zarrs/src/node.rs +++ b/zarrs/src/node.rs @@ -27,8 +27,9 @@ mod node_async; pub(crate) use node_async::_async_get_child_nodes; #[cfg(feature = "async")] pub use node_async::{async_get_child_nodes, async_node_exists, async_node_exists_listable}; +use zarrs_metadata::v3::group::ConsolidatedMetadataMetadata; -use std::sync::Arc; +use std::{collections::HashMap, sync::Arc}; pub use crate::metadata::NodeMetadata; use thiserror::Error; @@ -250,6 +251,7 @@ impl Node { let metadata = Self::get_metadata(storage, &path, version)?; let children = match metadata { NodeMetadata::Array(_) => Vec::default(), + // TODO: Add consolidated metadata support NodeMetadata::Group(_) => get_child_nodes(storage, &path)?, }; let node = Self { @@ -290,6 +292,7 @@ impl Node { let metadata = Self::async_get_metadata(&storage, &path, version).await?; let children = match metadata { NodeMetadata::Array(_) => Vec::default(), + // TODO: Add consolidated metadata support NodeMetadata::Group(_) => async_get_child_nodes(&storage, &path).await?, }; let node = Self { @@ -393,6 +396,43 @@ impl Node { update_tree(&mut string, &self.children, 1); string } + + /// Consolidate metadata. Returns [`None`] for an array. + /// + /// [`ConsolidatedMetadataMetadata`] can be converted into [`ConsolidatedMetadata`](crate::metadata::v3::group::ConsolidatedMetadata) in [`GroupMetadataV3`](crate::metadata::v3::group::GroupMetadataV3). + #[must_use] + #[allow(clippy::items_after_statements)] + pub fn consolidate_metadata(&self) -> Option { + if let NodeMetadata::Array(_) = self.metadata { + // Arrays cannot have consolidated metadata + return None; + } + + fn update_consolidated_metadata( + node_path: &str, + consolidated_metadata: &mut ConsolidatedMetadataMetadata, + children: &[Node], + ) { + for child in children { + let relative_path = child + .path() + .as_str() + .strip_prefix(node_path) + .expect("child path should always include the node path"); + let relative_path = relative_path.strip_prefix('/').unwrap_or(relative_path); + let relative_path = relative_path.to_string(); + consolidated_metadata.insert(relative_path, child.metadata.clone()); + update_consolidated_metadata(node_path, consolidated_metadata, &child.children); + } + } + let mut consolidated_metadata = HashMap::default(); + update_consolidated_metadata( + self.path().as_str(), + &mut consolidated_metadata, + &self.children, + ); + Some(consolidated_metadata) + } } #[cfg(test)] diff --git a/zarrs/tests/hierarchy.rs b/zarrs/tests/hierarchy.rs index c314c87c..9d5e5046 100644 --- a/zarrs/tests/hierarchy.rs +++ b/zarrs/tests/hierarchy.rs @@ -1,9 +1,12 @@ +#![cfg(feature = "filesystem")] #![allow(missing_docs)] use std::sync::Arc; -use zarrs::node::Node; -use zarrs_filesystem::FilesystemStore; +use zarrs::{ + filesystem::FilesystemStore, group::Group, metadata::v3::group::ConsolidatedMetadata, + node::Node, +}; #[test] fn hierarchy_tree() { @@ -25,3 +28,40 @@ fn hierarchy_tree() { " ); } + +#[test] +fn consolidated_metadata() { + let store = Arc::new( + FilesystemStore::new("./tests/data/hierarchy.zarr") + .unwrap() + .sorted(), + ); + let node = Node::open(&store, "/").unwrap(); + let consolidated_metadata = node.consolidate_metadata().unwrap(); + println!("{:#?}", consolidated_metadata); + + for relative_path in ["a", "a/baz", "a/foo", "b"] { + let consolidated = consolidated_metadata.get(relative_path).unwrap(); + let node_path = format!("/{}", relative_path); + let actual = Node::open(&store, &node_path).unwrap(); + assert_eq!(consolidated, actual.metadata()); + } + + let mut group = Group::open(store.clone(), "/").unwrap(); + assert!(group.consolidated_metadata().is_none()); + group.set_consolidated_metadata(Some(ConsolidatedMetadata { + metadata: consolidated_metadata, + ..Default::default() + })); + assert!(group.consolidated_metadata().is_some()); + + let node = Node::open(&store, "/a").unwrap(); + let consolidated_metadata = node.consolidate_metadata().unwrap(); + println!("{:#?}", consolidated_metadata); + for relative_path in ["baz", "foo"] { + let consolidated = consolidated_metadata.get(relative_path).unwrap(); + let node_path = format!("/a/{}", relative_path); + let actual = Node::open(&store, &node_path).unwrap(); + assert_eq!(consolidated, actual.metadata()); + } +} diff --git a/zarrs_metadata/CHANGELOG.md b/zarrs_metadata/CHANGELOG.md index 1c19bbc4..6b5d8561 100644 --- a/zarrs_metadata/CHANGELOG.md +++ b/zarrs_metadata/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Add `v3::group::{ConsolidatedMetadata,ConsolidatedMetadataMetadata,ConsolidatedMetadataKind}` +- Add `GroupMetadataV3::consolidated_metadata` field +- Add `GroupMetadataV3::with_consolidated_metadata` field + ### Changed - **Breaking**: Rename `DataTypeMetadataV3::Binary` to `Bytes` for compatibility with `zarr-python` - **Breaking**: Revise `PcodecCodecConfiguration` to match `numcodecs`: diff --git a/zarrs_metadata/src/v3/group.rs b/zarrs_metadata/src/v3/group.rs index cb5dc70f..a733f987 100644 --- a/zarrs_metadata/src/v3/group.rs +++ b/zarrs_metadata/src/v3/group.rs @@ -1,6 +1,10 @@ +use std::collections::HashMap; + use derive_more::Display; use serde::{Deserialize, Serialize}; +use crate::NodeMetadata; + use super::AdditionalFields; /// Zarr group metadata (storage specification v3). @@ -18,7 +22,7 @@ use super::AdditionalFields; /// } /// } #[non_exhaustive] -#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display)] +#[derive(Serialize, Deserialize, Clone, Debug, Display)] #[display("{}", serde_json::to_string(self).unwrap_or_default())] pub struct GroupMetadataV3 { /// An integer defining the version of the storage specification to which the group adheres. Must be `3`. @@ -28,11 +32,24 @@ pub struct GroupMetadataV3 { /// Optional user metadata. #[serde(default, skip_serializing_if = "serde_json::Map::is_empty")] pub attributes: serde_json::Map, + /// Consolidated metadata. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub consolidated_metadata: Option, /// Additional fields. #[serde(flatten)] pub additional_fields: AdditionalFields, } +impl std::cmp::PartialEq for GroupMetadataV3 { + fn eq(&self, other: &Self) -> bool { + self.attributes == other.attributes + // && self.consolidated_metadata == other.consolidated_metadata + && self.additional_fields == other.additional_fields + } +} + +impl Eq for GroupMetadataV3 {} + impl Default for GroupMetadataV3 { fn default() -> Self { Self::new() @@ -48,6 +65,7 @@ impl GroupMetadataV3 { node_type: monostate::MustBe!("group"), attributes: serde_json::Map::new(), additional_fields: AdditionalFields::default(), + consolidated_metadata: None, } } @@ -67,4 +85,99 @@ impl GroupMetadataV3 { self.additional_fields = additional_fields; self } + + /// Set the consolidated metadata. + #[must_use] + pub fn with_consolidated_metadata( + mut self, + consolidated_metadata: Option, + ) -> Self { + self.consolidated_metadata = consolidated_metadata; + self + } +} + +/// Consolidated metadata of a Zarr hierarchy. +#[derive(Serialize, Deserialize, Clone, PartialEq, Debug, Display)] +#[display("{}", serde_json::to_string(self).unwrap_or_default())] +pub struct ConsolidatedMetadata { + /// A mapping from node path to Group or Array [`NodeMetadata`] object. + pub metadata: ConsolidatedMetadataMetadata, + /// The kind of the consolidated metadata. Must be `'inline'`. Reserved for future use. + pub kind: ConsolidatedMetadataKind, + /// The boolean literal `false`. Indicates that the field is not required to load the Zarr hierarchy. + pub must_understand: monostate::MustBe!(false), +} + +/// The `metadata` field of `consolidated_metadata` in [`GroupMetadataV3`]. +pub type ConsolidatedMetadataMetadata = HashMap; + +impl Default for ConsolidatedMetadata { + fn default() -> Self { + Self { + metadata: HashMap::default(), + kind: ConsolidatedMetadataKind::Inline, + must_understand: monostate::MustBe!(false), + } + } +} + +/// The "kind" of consolidated metadata. +#[non_exhaustive] +#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display)] +pub enum ConsolidatedMetadataKind { + /// Indicates that consolidated metadata is stored inline in the root `zarr.json` object. + #[serde(rename = "inline")] + Inline, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn group_metadata_consolidated() { + let group_metadata = serde_json::from_str::( + r#"{ + "zarr_format": 3, + "node_type": "group", + "attributes": { + "spam": "ham", + "eggs": 42 + }, + "consolidated_metadata": { + "metadata": { + "/subgroup": { + "zarr_format": 3, + "node_type": "group", + "attributes": { + "consolidated": "attributes" + } + } + }, + "kind": "inline", + "must_understand": false + } + }"#, + ) + .unwrap(); + assert_eq!( + group_metadata + .consolidated_metadata + .unwrap() + .metadata + .get("/subgroup") + .unwrap(), + &serde_json::from_str::( + r#"{ + "zarr_format": 3, + "node_type": "group", + "attributes": { + "consolidated": "attributes" + } + }"# + ) + .unwrap() + ); + } }