Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add partial Zarr V3 consolidated metadata support #55

Merged
merged 6 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `ZstdCodecConfigurationNumCodecs`
- Adds support for Zarr V2 `zstd` encoded data created with `numcodecs` < 0.13
- Add support for pcodec `Auto`, `None`, and `TryLookback` delta specs
- Add `Group::[set_]consolidated_metadata`
- Add `Node::consolidate_metadata`
- Consolidated metadata is not currently used to optimise node hierarchy requests

### Changed
- **Breaking**: Seal `Array` extension traits: `ArraySharded[Readable]Ext` and `ArrayChunkCacheExt`
Expand Down
25 changes: 25 additions & 0 deletions zarrs/src/group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

use derive_more::Display;
use thiserror::Error;
use zarrs_metadata::v3::group::ConsolidatedMetadata;
use zarrs_metadata::NodeMetadata;
use zarrs_storage::ListableStorageTraits;

Expand Down Expand Up @@ -161,6 +162,30 @@
}
}

/// Get the consolidated metadata. Returns [`None`] if `consolidated_metadata` is absent.
///
/// Consolidated metadata is not currently supported for Zarr V2 groups.
#[must_use]
pub fn consolidated_metadata(&self) -> Option<&ConsolidatedMetadata> {
if let GroupMetadata::V3(group_metadata) = &self.metadata {
group_metadata.consolidated_metadata.as_ref()
} else {
None

Check warning on line 173 in zarrs/src/group.rs

View check run for this annotation

Codecov / codecov/patch

zarrs/src/group.rs#L173

Added line #L173 was not covered by tests
}
}

/// Set the consolidated metadata.
///
/// Consolidated metadata is not currently supported for Zarr V2 groups, and this function is a no-op.
pub fn set_consolidated_metadata(
&mut self,
consolidated_metadata: Option<ConsolidatedMetadata>,
) {
if let GroupMetadata::V3(group_metadata) = &mut self.metadata {
group_metadata.consolidated_metadata = consolidated_metadata;
}
}

/// Convert the group to Zarr V3.
///
/// If the group is already Zarr V3, this is a no-op.
Expand Down
42 changes: 41 additions & 1 deletion zarrs/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@
pub(crate) use node_async::_async_get_child_nodes;
#[cfg(feature = "async")]
pub use node_async::{async_get_child_nodes, async_node_exists, async_node_exists_listable};
use zarrs_metadata::v3::group::ConsolidatedMetadataMetadata;

use std::sync::Arc;
use std::{collections::HashMap, sync::Arc};

pub use crate::metadata::NodeMetadata;
use thiserror::Error;
Expand Down Expand Up @@ -250,6 +251,7 @@
let metadata = Self::get_metadata(storage, &path, version)?;
let children = match metadata {
NodeMetadata::Array(_) => Vec::default(),
// TODO: Add consolidated metadata support
NodeMetadata::Group(_) => get_child_nodes(storage, &path)?,
};
let node = Self {
Expand Down Expand Up @@ -290,6 +292,7 @@
let metadata = Self::async_get_metadata(&storage, &path, version).await?;
let children = match metadata {
NodeMetadata::Array(_) => Vec::default(),
// TODO: Add consolidated metadata support
NodeMetadata::Group(_) => async_get_child_nodes(&storage, &path).await?,
};
let node = Self {
Expand Down Expand Up @@ -393,6 +396,43 @@
update_tree(&mut string, &self.children, 1);
string
}

/// Consolidate metadata. Returns [`None`] for an array.
///
/// [`ConsolidatedMetadataMetadata`] can be converted into [`ConsolidatedMetadata`](crate::metadata::v3::group::ConsolidatedMetadata) in [`GroupMetadataV3`](crate::metadata::v3::group::GroupMetadataV3).
#[must_use]
#[allow(clippy::items_after_statements)]
pub fn consolidate_metadata(&self) -> Option<ConsolidatedMetadataMetadata> {
if let NodeMetadata::Array(_) = self.metadata {
// Arrays cannot have consolidated metadata
return None;

Check warning on line 408 in zarrs/src/node.rs

View check run for this annotation

Codecov / codecov/patch

zarrs/src/node.rs#L408

Added line #L408 was not covered by tests
}

fn update_consolidated_metadata(
node_path: &str,
consolidated_metadata: &mut ConsolidatedMetadataMetadata,
children: &[Node],
) {
for child in children {
let relative_path = child
.path()
.as_str()
.strip_prefix(node_path)
.expect("child path should always include the node path");
let relative_path = relative_path.strip_prefix('/').unwrap_or(relative_path);
let relative_path = relative_path.to_string();
consolidated_metadata.insert(relative_path, child.metadata.clone());
update_consolidated_metadata(node_path, consolidated_metadata, &child.children);
}
}
let mut consolidated_metadata = HashMap::default();
update_consolidated_metadata(
self.path().as_str(),
&mut consolidated_metadata,
&self.children,
);
Some(consolidated_metadata)
}
}

#[cfg(test)]
Expand Down
44 changes: 42 additions & 2 deletions zarrs/tests/hierarchy.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
#![cfg(feature = "filesystem")]
#![allow(missing_docs)]

use std::sync::Arc;

use zarrs::node::Node;
use zarrs_filesystem::FilesystemStore;
use zarrs::{
filesystem::FilesystemStore, group::Group, metadata::v3::group::ConsolidatedMetadata,
node::Node,
};

#[test]
fn hierarchy_tree() {
Expand All @@ -25,3 +28,40 @@ fn hierarchy_tree() {
"
);
}

#[test]
fn consolidated_metadata() {
let store = Arc::new(
FilesystemStore::new("./tests/data/hierarchy.zarr")
.unwrap()
.sorted(),
);
let node = Node::open(&store, "/").unwrap();
let consolidated_metadata = node.consolidate_metadata().unwrap();
println!("{:#?}", consolidated_metadata);

for relative_path in ["a", "a/baz", "a/foo", "b"] {
let consolidated = consolidated_metadata.get(relative_path).unwrap();
let node_path = format!("/{}", relative_path);
let actual = Node::open(&store, &node_path).unwrap();
assert_eq!(consolidated, actual.metadata());
}

let mut group = Group::open(store.clone(), "/").unwrap();
assert!(group.consolidated_metadata().is_none());
group.set_consolidated_metadata(Some(ConsolidatedMetadata {
metadata: consolidated_metadata,
..Default::default()
}));
assert!(group.consolidated_metadata().is_some());

let node = Node::open(&store, "/a").unwrap();
let consolidated_metadata = node.consolidate_metadata().unwrap();
println!("{:#?}", consolidated_metadata);
for relative_path in ["baz", "foo"] {
let consolidated = consolidated_metadata.get(relative_path).unwrap();
let node_path = format!("/a/{}", relative_path);
let actual = Node::open(&store, &node_path).unwrap();
assert_eq!(consolidated, actual.metadata());
}
}
5 changes: 5 additions & 0 deletions zarrs_metadata/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
- Add `v3::group::{ConsolidatedMetadata,ConsolidatedMetadataMetadata,ConsolidatedMetadataKind}`
- Add `GroupMetadataV3::consolidated_metadata` field
- Add `GroupMetadataV3::with_consolidated_metadata` field

### Changed
- **Breaking**: Rename `DataTypeMetadataV3::Binary` to `Bytes` for compatibility with `zarr-python`
- **Breaking**: Revise `PcodecCodecConfiguration` to match `numcodecs`:
Expand Down
115 changes: 114 additions & 1 deletion zarrs_metadata/src/v3/group.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use std::collections::HashMap;

use derive_more::Display;
use serde::{Deserialize, Serialize};

use crate::NodeMetadata;

use super::AdditionalFields;

/// Zarr group metadata (storage specification v3).
Expand All @@ -18,7 +22,7 @@
/// }
/// }
#[non_exhaustive]
#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display)]
#[derive(Serialize, Deserialize, Clone, Debug, Display)]
#[display("{}", serde_json::to_string(self).unwrap_or_default())]
pub struct GroupMetadataV3 {
/// An integer defining the version of the storage specification to which the group adheres. Must be `3`.
Expand All @@ -28,11 +32,24 @@
/// Optional user metadata.
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
pub attributes: serde_json::Map<String, serde_json::Value>,
/// Consolidated metadata.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub consolidated_metadata: Option<ConsolidatedMetadata>,
/// Additional fields.
#[serde(flatten)]
pub additional_fields: AdditionalFields,
}

impl std::cmp::PartialEq for GroupMetadataV3 {
fn eq(&self, other: &Self) -> bool {
self.attributes == other.attributes
// && self.consolidated_metadata == other.consolidated_metadata
&& self.additional_fields == other.additional_fields
}
}

impl Eq for GroupMetadataV3 {}

impl Default for GroupMetadataV3 {
fn default() -> Self {
Self::new()
Expand All @@ -48,6 +65,7 @@
node_type: monostate::MustBe!("group"),
attributes: serde_json::Map::new(),
additional_fields: AdditionalFields::default(),
consolidated_metadata: None,
}
}

Expand All @@ -67,4 +85,99 @@
self.additional_fields = additional_fields;
self
}

/// Set the consolidated metadata.
#[must_use]
pub fn with_consolidated_metadata(
mut self,
consolidated_metadata: Option<ConsolidatedMetadata>,
) -> Self {
self.consolidated_metadata = consolidated_metadata;
self
}

Check warning on line 97 in zarrs_metadata/src/v3/group.rs

View check run for this annotation

Codecov / codecov/patch

zarrs_metadata/src/v3/group.rs#L91-L97

Added lines #L91 - L97 were not covered by tests
}

/// Consolidated metadata of a Zarr hierarchy.
#[derive(Serialize, Deserialize, Clone, PartialEq, Debug, Display)]
#[display("{}", serde_json::to_string(self).unwrap_or_default())]
pub struct ConsolidatedMetadata {
/// A mapping from node path to Group or Array [`NodeMetadata`] object.
pub metadata: ConsolidatedMetadataMetadata,
/// The kind of the consolidated metadata. Must be `'inline'`. Reserved for future use.
pub kind: ConsolidatedMetadataKind,
/// The boolean literal `false`. Indicates that the field is not required to load the Zarr hierarchy.
pub must_understand: monostate::MustBe!(false),
}

/// The `metadata` field of `consolidated_metadata` in [`GroupMetadataV3`].
pub type ConsolidatedMetadataMetadata = HashMap<String, NodeMetadata>;

impl Default for ConsolidatedMetadata {
fn default() -> Self {
Self {
metadata: HashMap::default(),
kind: ConsolidatedMetadataKind::Inline,
must_understand: monostate::MustBe!(false),
}
}
}

/// The "kind" of consolidated metadata.
#[non_exhaustive]
#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug, Display)]
pub enum ConsolidatedMetadataKind {
/// Indicates that consolidated metadata is stored inline in the root `zarr.json` object.
#[serde(rename = "inline")]
Inline,
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn group_metadata_consolidated() {
let group_metadata = serde_json::from_str::<GroupMetadataV3>(
r#"{
"zarr_format": 3,
"node_type": "group",
"attributes": {
"spam": "ham",
"eggs": 42
},
"consolidated_metadata": {
"metadata": {
"/subgroup": {
"zarr_format": 3,
"node_type": "group",
"attributes": {
"consolidated": "attributes"
}
}
},
"kind": "inline",
"must_understand": false
}
}"#,
)
.unwrap();
assert_eq!(
group_metadata
.consolidated_metadata
.unwrap()
.metadata
.get("/subgroup")
.unwrap(),
&serde_json::from_str::<NodeMetadata>(
r#"{
"zarr_format": 3,
"node_type": "group",
"attributes": {
"consolidated": "attributes"
}
}"#
)
.unwrap()
);
}
}
Loading