Skip to content

Commit

Permalink
Add partial Zarr V3 consolidated metadata support (#55)
Browse files Browse the repository at this point in the history
  • Loading branch information
LDeakin authored Jan 6, 2025
1 parent cb436be commit 1962b14
Show file tree
Hide file tree
Showing 6 changed files with 230 additions and 4 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `ZstdCodecConfigurationNumCodecs`
- Adds support for Zarr V2 `zstd` encoded data created with `numcodecs` < 0.13
- Add support for pcodec `Auto`, `None`, and `TryLookback` delta specs
- Add `Group::[set_]consolidated_metadata`
- Add `Node::consolidate_metadata`
- Consolidated metadata is not currently used to optimise node hierarchy requests

### Changed
- **Breaking**: Seal `Array` extension traits: `ArraySharded[Readable]Ext` and `ArrayChunkCacheExt`
Expand Down
25 changes: 25 additions & 0 deletions zarrs/src/group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use std::sync::Arc;

use derive_more::Display;
use thiserror::Error;
use zarrs_metadata::v3::group::ConsolidatedMetadata;
use zarrs_metadata::NodeMetadata;
use zarrs_storage::ListableStorageTraits;

Expand Down Expand Up @@ -161,6 +162,30 @@ impl<TStorage: ?Sized> Group<TStorage> {
}
}

/// Get the consolidated metadata. Returns [`None`] if `consolidated_metadata` is absent.
///
/// Consolidated metadata is not currently supported for Zarr V2 groups.
#[must_use]
pub fn consolidated_metadata(&self) -> Option<&ConsolidatedMetadata> {
if let GroupMetadata::V3(group_metadata) = &self.metadata {
group_metadata.consolidated_metadata.as_ref()
} else {
None
}
}

/// Set the consolidated metadata.
///
/// Consolidated metadata is not currently supported for Zarr V2 groups, and this function is a no-op.
pub fn set_consolidated_metadata(
&mut self,
consolidated_metadata: Option<ConsolidatedMetadata>,
) {
if let GroupMetadata::V3(group_metadata) = &mut self.metadata {
group_metadata.consolidated_metadata = consolidated_metadata;
}
}

/// Convert the group to Zarr V3.
///
/// If the group is already Zarr V3, this is a no-op.
Expand Down
42 changes: 41 additions & 1 deletion zarrs/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ mod node_async;
pub(crate) use node_async::_async_get_child_nodes;
#[cfg(feature = "async")]
pub use node_async::{async_get_child_nodes, async_node_exists, async_node_exists_listable};
use zarrs_metadata::v3::group::ConsolidatedMetadataMetadata;

use std::sync::Arc;
use std::{collections::HashMap, sync::Arc};

pub use crate::metadata::NodeMetadata;
use thiserror::Error;
Expand Down Expand Up @@ -250,6 +251,7 @@ impl Node {
let metadata = Self::get_metadata(storage, &path, version)?;
let children = match metadata {
NodeMetadata::Array(_) => Vec::default(),
// TODO: Add consolidated metadata support
NodeMetadata::Group(_) => get_child_nodes(storage, &path)?,
};
let node = Self {
Expand Down Expand Up @@ -290,6 +292,7 @@ impl Node {
let metadata = Self::async_get_metadata(&storage, &path, version).await?;
let children = match metadata {
NodeMetadata::Array(_) => Vec::default(),
// TODO: Add consolidated metadata support
NodeMetadata::Group(_) => async_get_child_nodes(&storage, &path).await?,
};
let node = Self {
Expand Down Expand Up @@ -393,6 +396,43 @@ impl Node {
update_tree(&mut string, &self.children, 1);
string
}

/// Consolidate metadata. Returns [`None`] for an array.
///
/// [`ConsolidatedMetadataMetadata`] can be converted into [`ConsolidatedMetadata`](crate::metadata::v3::group::ConsolidatedMetadata) in [`GroupMetadataV3`](crate::metadata::v3::group::GroupMetadataV3).
#[must_use]
#[allow(clippy::items_after_statements)]
pub fn consolidate_metadata(&self) -> Option<ConsolidatedMetadataMetadata> {
if let NodeMetadata::Array(_) = self.metadata {
// Arrays cannot have consolidated metadata
return None;
}

fn update_consolidated_metadata(
node_path: &str,
consolidated_metadata: &mut ConsolidatedMetadataMetadata,
children: &[Node],
) {
for child in children {
let relative_path = child
.path()
.as_str()
.strip_prefix(node_path)
.expect("child path should always include the node path");
let relative_path = relative_path.strip_prefix('/').unwrap_or(relative_path);
let relative_path = relative_path.to_string();
consolidated_metadata.insert(relative_path, child.metadata.clone());
update_consolidated_metadata(node_path, consolidated_metadata, &child.children);
}
}
let mut consolidated_metadata = HashMap::default();
update_consolidated_metadata(
self.path().as_str(),
&mut consolidated_metadata,
&self.children,
);
Some(consolidated_metadata)
}
}

#[cfg(test)]
Expand Down
44 changes: 42 additions & 2 deletions zarrs/tests/hierarchy.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
#![cfg(feature = "filesystem")]
#![allow(missing_docs)]

use std::sync::Arc;

use zarrs::node::Node;
use zarrs_filesystem::FilesystemStore;
use zarrs::{
filesystem::FilesystemStore, group::Group, metadata::v3::group::ConsolidatedMetadata,
node::Node,
};

#[test]
fn hierarchy_tree() {
Expand All @@ -25,3 +28,40 @@ fn hierarchy_tree() {
"
);
}

#[test]
fn consolidated_metadata() {
let store = Arc::new(
FilesystemStore::new("./tests/data/hierarchy.zarr")
.unwrap()
.sorted(),
);
let node = Node::open(&store, "/").unwrap();
let consolidated_metadata = node.consolidate_metadata().unwrap();
println!("{:#?}", consolidated_metadata);

for relative_path in ["a", "a/baz", "a/foo", "b"] {
let consolidated = consolidated_metadata.get(relative_path).unwrap();
let node_path = format!("/{}", relative_path);
let actual = Node::open(&store, &node_path).unwrap();
assert_eq!(consolidated, actual.metadata());
}

let mut group = Group::open(store.clone(), "/").unwrap();
assert!(group.consolidated_metadata().is_none());
group.set_consolidated_metadata(Some(ConsolidatedMetadata {
metadata: consolidated_metadata,
..Default::default()
}));
assert!(group.consolidated_metadata().is_some());

let node = Node::open(&store, "/a").unwrap();
let consolidated_metadata = node.consolidate_metadata().unwrap();
println!("{:#?}", consolidated_metadata);
for relative_path in ["baz", "foo"] {
let consolidated = consolidated_metadata.get(relative_path).unwrap();
let node_path = format!("/a/{}", relative_path);
let actual = Node::open(&store, &node_path).unwrap();
assert_eq!(consolidated, actual.metadata());
}
}
5 changes: 5 additions & 0 deletions zarrs_metadata/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
- Add `v3::group::{ConsolidatedMetadata,ConsolidatedMetadataMetadata,ConsolidatedMetadataKind}`
- Add `GroupMetadataV3::consolidated_metadata` field
- Add `GroupMetadataV3::with_consolidated_metadata` field

### Changed
- **Breaking**: Rename `DataTypeMetadataV3::Binary` to `Bytes` for compatibility with `zarr-python`
- **Breaking**: Revise `PcodecCodecConfiguration` to match `numcodecs`:
Expand Down
115 changes: 114 additions & 1 deletion zarrs_metadata/src/v3/group.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use std::collections::HashMap;

use derive_more::Display;
use serde::{Deserialize, Serialize};

use crate::NodeMetadata;

use super::AdditionalFields;

/// Zarr group metadata (storage specification v3).
Expand All @@ -18,7 +22,7 @@ use super::AdditionalFields;
/// }
/// }
#[non_exhaustive]
#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display)]
#[derive(Serialize, Deserialize, Clone, Debug, Display)]
#[display("{}", serde_json::to_string(self).unwrap_or_default())]
pub struct GroupMetadataV3 {
/// An integer defining the version of the storage specification to which the group adheres. Must be `3`.
Expand All @@ -28,11 +32,24 @@ pub struct GroupMetadataV3 {
/// Optional user metadata.
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
pub attributes: serde_json::Map<String, serde_json::Value>,
/// Consolidated metadata.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub consolidated_metadata: Option<ConsolidatedMetadata>,
/// Additional fields.
#[serde(flatten)]
pub additional_fields: AdditionalFields,
}

impl std::cmp::PartialEq for GroupMetadataV3 {
fn eq(&self, other: &Self) -> bool {
self.attributes == other.attributes
// && self.consolidated_metadata == other.consolidated_metadata
&& self.additional_fields == other.additional_fields
}
}

impl Eq for GroupMetadataV3 {}

impl Default for GroupMetadataV3 {
fn default() -> Self {
Self::new()
Expand All @@ -48,6 +65,7 @@ impl GroupMetadataV3 {
node_type: monostate::MustBe!("group"),
attributes: serde_json::Map::new(),
additional_fields: AdditionalFields::default(),
consolidated_metadata: None,
}
}

Expand All @@ -67,4 +85,99 @@ impl GroupMetadataV3 {
self.additional_fields = additional_fields;
self
}

/// Set the consolidated metadata.
#[must_use]
pub fn with_consolidated_metadata(
mut self,
consolidated_metadata: Option<ConsolidatedMetadata>,
) -> Self {
self.consolidated_metadata = consolidated_metadata;
self
}
}

/// Consolidated metadata of a Zarr hierarchy.
#[derive(Serialize, Deserialize, Clone, PartialEq, Debug, Display)]
#[display("{}", serde_json::to_string(self).unwrap_or_default())]
pub struct ConsolidatedMetadata {
/// A mapping from node path to Group or Array [`NodeMetadata`] object.
pub metadata: ConsolidatedMetadataMetadata,
/// The kind of the consolidated metadata. Must be `'inline'`. Reserved for future use.
pub kind: ConsolidatedMetadataKind,
/// The boolean literal `false`. Indicates that the field is not required to load the Zarr hierarchy.
pub must_understand: monostate::MustBe!(false),
}

/// The `metadata` field of `consolidated_metadata` in [`GroupMetadataV3`].
pub type ConsolidatedMetadataMetadata = HashMap<String, NodeMetadata>;

impl Default for ConsolidatedMetadata {
fn default() -> Self {
Self {
metadata: HashMap::default(),
kind: ConsolidatedMetadataKind::Inline,
must_understand: monostate::MustBe!(false),
}
}
}

/// The "kind" of consolidated metadata.
#[non_exhaustive]
#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display)]
pub enum ConsolidatedMetadataKind {
/// Indicates that consolidated metadata is stored inline in the root `zarr.json` object.
#[serde(rename = "inline")]
Inline,
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn group_metadata_consolidated() {
let group_metadata = serde_json::from_str::<GroupMetadataV3>(
r#"{
"zarr_format": 3,
"node_type": "group",
"attributes": {
"spam": "ham",
"eggs": 42
},
"consolidated_metadata": {
"metadata": {
"/subgroup": {
"zarr_format": 3,
"node_type": "group",
"attributes": {
"consolidated": "attributes"
}
}
},
"kind": "inline",
"must_understand": false
}
}"#,
)
.unwrap();
assert_eq!(
group_metadata
.consolidated_metadata
.unwrap()
.metadata
.get("/subgroup")
.unwrap(),
&serde_json::from_str::<NodeMetadata>(
r#"{
"zarr_format": 3,
"node_type": "group",
"attributes": {
"consolidated": "attributes"
}
}"#
)
.unwrap()
);
}
}

0 comments on commit 1962b14

Please sign in to comment.