Skip to content

Commit

Permalink
Sharding add compute_encoded_shard_bounded_size
Browse files Browse the repository at this point in the history
  • Loading branch information
LDeakin committed Nov 9, 2023
1 parent 60a5a1c commit c067b20
Showing 1 changed file with 32 additions and 22 deletions.
54 changes: 32 additions & 22 deletions src/array/codec/array_to_bytes/sharding/sharding_codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -382,29 +382,11 @@ impl ArrayToBytesCodecTraits for ShardingCodec {
.inner_codecs
.compute_encoded_size(&chunk_representation)?;

match chunk_bytes_representation.size() {
Some(chunk_encoded_size) => {
// Get the chunks per shard
let chunks_per_shard =
calculate_chunks_per_shard(decoded_representation.shape(), &self.chunk_shape)
.map_err(|e| CodecError::Other(e.to_string()))?;
let num_chunks = chunks_per_shard.iter().product::<u64>();

// Get the index size
let index_decoded_representation =
sharding_index_decoded_representation(&chunks_per_shard);
let index_encoded_size = usize::try_from(compute_index_encoded_size(
&self.index_codecs,
&index_decoded_representation,
)?)
.unwrap();
let chunks_per_shard =
calculate_chunks_per_shard(decoded_representation.shape(), &self.chunk_shape)
.map_err(|e| CodecError::Other(e.to_string()))?;

Ok(BytesRepresentation::BoundedSize(
num_chunks * chunk_encoded_size + index_encoded_size as u64,
))
}
None => Ok(BytesRepresentation::UnboundedSize),
}
self.encoded_shard_bounded_size(&chunk_bytes_representation, &chunks_per_shard)
}
}

Expand All @@ -424,6 +406,34 @@ impl ShardingCodec {
(chunk_index, chunk_subset)
}

/// Computed the bounded size of an encoded shard from
/// - the chunk bytes representation, and
/// - the number of chunks per shard.
/// Equal to the num chunks * max chunk size + index size
fn encoded_shard_bounded_size(
&self,
chunk_bytes_representation: &BytesRepresentation,
chunks_per_shard: &[u64],
) -> Result<BytesRepresentation, CodecError> {
match chunk_bytes_representation.size() {
Some(chunk_encoded_size) => {
let num_chunks = chunks_per_shard.iter().product::<u64>();
let index_decoded_representation =
sharding_index_decoded_representation(chunks_per_shard);
let index_encoded_size = usize::try_from(compute_index_encoded_size(
&self.index_codecs,
&index_decoded_representation,
)?)
.unwrap();

Ok(BytesRepresentation::BoundedSize(
num_chunks * chunk_encoded_size + index_encoded_size as u64,
))
}
None => Ok(BytesRepresentation::UnboundedSize),
}
}

fn decode_index(
&self,
encoded_shard: &[u8],
Expand Down

0 comments on commit c067b20

Please sign in to comment.