Skip to content

Commit

Permalink
Speed up dataset tests, compressed writes to zarr3 arrays (#963)
Browse files Browse the repository at this point in the history
* Add pytest-timestamper to debug slow tests

* add shortuct for reading from zarrita array

* smaller shard shape

* do full test again

* update test durations

* changelog

* fix after merge
  • Loading branch information
fm3 authored Nov 8, 2023
1 parent 3a4771e commit c09101f
Show file tree
Hide file tree
Showing 6 changed files with 542 additions and 458 deletions.
930 changes: 486 additions & 444 deletions webknossos/.test_durations

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions webknossos/Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ For upgrade instructions, please check the respective _Breaking Changes_ section
### Added

### Changed
- Performance improvements for reading from and writing to sharded zarr3 datasets, also speeding up the automated tests [#963](https://github.com/scalableminds/webknossos-libs/pull/963)

### Fixed

Expand Down
32 changes: 29 additions & 3 deletions webknossos/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions webknossos/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ JPype1 = { version = "^1.3.0", optional = true }
pims = { version = "^0.6.0", optional = true }
tifffile = { version = ">=2021.11.2", optional = true }
pylibCZIrw = { version = "3.5.1", source = "scm", optional = true }
pytest-timestamper = "^0.0.9"

[tool.poetry.extras]
pims = ["pims"]
Expand Down
22 changes: 19 additions & 3 deletions webknossos/tests/dataset/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2571,7 +2571,14 @@ def test_aligned_downsampling(data_format: DataFormat, output_path: Path) -> Non
num_channels=3,
data_format=input_layer.data_format,
)
test_mag = test_layer.add_mag("1")

chunks_per_shard = None
if data_format == DataFormat.Zarr3:
# Writing compressed zarr with large shard shape is slow
# compare https://github.com/scalableminds/webknossos-libs/issues/964
chunks_per_shard = (4, 4, 4)

test_mag = test_layer.add_mag("1", chunks_per_shard=chunks_per_shard)
test_mag.write(
absolute_offset=(0, 0, 0),
# assuming the layer has 3 channels:
Expand Down Expand Up @@ -2601,8 +2608,15 @@ def test_guided_downsampling(data_format: DataFormat, output_path: Path) -> None

input_dataset = Dataset.open(ds_path)
input_layer = input_dataset.get_layer("color")

chunks_per_shard = None
if data_format == DataFormat.Zarr3:
# Writing compressed zarr with large shard shape is slow
# compare https://github.com/scalableminds/webknossos-libs/issues/964
chunks_per_shard = (4, 4, 4)

# Adding additional mags to the input dataset for testing
input_layer.get_or_add_mag("2-2-1")
input_layer.add_mag("2-2-1", chunks_per_shard=chunks_per_shard)
input_layer.redownsample()
assert len(input_layer.mags) == 2
# Use the mag with the best resolution
Expand All @@ -2619,7 +2633,9 @@ def test_guided_downsampling(data_format: DataFormat, output_path: Path) -> None
data_format=input_layer.data_format,
)
# Create the same mag in the new output dataset
output_mag = output_layer.add_mag(finest_input_mag.mag)
output_mag = output_layer.add_mag(
finest_input_mag.mag, chunks_per_shard=chunks_per_shard
)
# Copying some data into the output dataset
input_data = finest_input_mag.read(absolute_offset=(0, 0, 0), size=(24, 24, 24))
output_mag.write(absolute_offset=(0, 0, 0), data=input_data)
Expand Down
14 changes: 6 additions & 8 deletions webknossos/webknossos/dataset/_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,10 +346,9 @@ def read(self, offset: Vec3IntLike, shape: Vec3IntLike) -> np.ndarray:
offset.y : (offset.y + shape.y),
offset.z : (offset.z + shape.z),
]
if data.shape != shape:
padded_data = np.zeros(
(self.info.num_channels,) + shape.to_tuple(), dtype=data.dtype
)
shape_with_channels = (self.info.num_channels,) + shape.to_tuple()
if data.shape != shape and data.shape != shape_with_channels:
padded_data = np.zeros(shape_with_channels, dtype=data.dtype)
padded_data[
:,
0 : data.shape[1],
Expand Down Expand Up @@ -591,10 +590,9 @@ def read(self, offset: Vec3IntLike, shape: Vec3IntLike) -> np.ndarray:
offset.y : (offset.y + shape.y),
offset.z : (offset.z + shape.z),
]
if data.shape != shape:
padded_data = np.zeros(
(self.info.num_channels,) + shape.to_tuple(), dtype=data.dtype
)
shape_with_channels = (self.info.num_channels,) + shape.to_tuple()
if data.shape != shape and data.shape != shape_with_channels:
padded_data = np.zeros(shape_with_channels, dtype=data.dtype)
padded_data[
:,
0 : data.shape[1],
Expand Down

0 comments on commit c09101f

Please sign in to comment.