Skip to content

Commit

Permalink
add downgrade_onda_dataset_to_v0_4 (#78)
Browse files Browse the repository at this point in the history
  • Loading branch information
jrevels authored Apr 13, 2021
1 parent 6a6f7dc commit 017a525
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 3 deletions.
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
name = "Onda"
uuid = "e853f5be-6863-11e9-128d-476edb89bfb5"
authors = ["Beacon Biosignals, Inc."]
version = "0.13.3"
version = "0.13.4"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2"
ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
MsgPack = "99f44e22-a591-53d1-9472-aa23ef4bd671"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Expand All @@ -21,6 +22,7 @@ Arrow = "1.3"
CodecZstd = "0.6, 0.7"
ConstructionBase = "1.0"
DataFrames = "0.22.7"
JSON3 = "v1.8"
MsgPack = "1.1"
Tables = "1.2"
TimeSpans = "0.2.2"
Expand Down
1 change: 1 addition & 0 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,5 @@ Onda.materialize
Onda.gather
Onda.validate_on_construction
Onda.upgrade_onda_dataset_to_v0_5!
Onda.downgrade_onda_dataset_to_v0_4!
```
74 changes: 73 additions & 1 deletion src/Onda.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ module Onda
using UUIDs, Dates, Random, Mmap
using TimeSpans, ConstructionBase
using Arrow, Tables
using MsgPack, TranscodingStreams, CodecZstd
using TranscodingStreams, CodecZstd
using MsgPack, JSON3 # only used to facilitate conversion to/from Onda v0.4 datasets

#####
##### includes/exports
Expand Down Expand Up @@ -111,4 +112,75 @@ function upgrade_onda_dataset_to_v0_5!(dataset_path;
return signals, annotations
end

_v0_4_json_value_from_annotation(ann) = JSON3.write(Dict(name => getproperty(ann, name) for name in propertynames(ann)
if !(name in (:recording, :span))))

"""
downgrade_onda_dataset_to_v0_4!(dataset_path, signals, annotations;
verbose=true,
value_from_annotation=Onda._v0_4_json_value_from_annotation,
signal_file_extension_and_options_from_format=(fmt -> (fmt, nothing)))
Write an Onda-Format-v0.4-compliant `recordings.msgpack.zst` file to `dataset_path` given Onda-Format-v0.5-compliant
`signals` and `annotations` tables.
- This function internally uses `Onda.gather`, and thus expects `signals`/`annotations` to support `view` for
row extraction. One way to ensure this is the case is to convert `signals`/`annotations` to `DataFrame`s before
passing them to this function.
- If `verbose` is `true`, this function will print out timestamped progress logs.
- `value_from_annotation` is a function that takes in an `Onda.Annotation` and returns the string that
should be written out as that annotation's value. By default, this value will be a JSON object string
whose fields are all fields in the given annotation except for `recording` and `span`.
- `signal_file_extension_and_options_from_format` is a function that takes in `signal.file_format` and
returns the `file_extension` and `file_options` fields that should be written out for the signal.
Note that this function does not thoroughly validate that sample data files referenced by `signals` are in an
appropriate Onda-Format-v0.4-compliant location (i.e. in `<dataset_path>/samples/<recording UUID>/<kind>.<extension>`).
"""
function downgrade_onda_dataset_to_v0_4!(dataset_path, signals, annotations;
verbose=true,
value_from_annotation=_v0_4_json_value_from_annotation,
signal_file_extension_and_options_from_format=(fmt -> (fmt, nothing)))
raw_recordings = Dict{String,Dict}()
recordings = Onda.gather(:recording, signals, annotations)
for (i, (uuid, (sigs, anns))) in enumerate(recordings)
verbose && log("($i / $(length(recordings))) converting recording $uuid...")
raw_sigs = Dict()
for sig in Tables.rows(sigs)
sig = Signal(sig)
ext, opt = signal_file_extension_and_options_from_format(sig.file_format)
if verbose && !endswith(sig.file_path, joinpath("samples", string(uuid), sig.kind * "." * ext))
@warn "potentially invalid Onda Format v0.4 sample data file path: $(sig.file_path)"
end
raw_sigs[sig.kind] = Dict("start_nanosecond" => TimeSpans.start(sig.span).value,
"stop_nanosecond" => TimeSpans.stop(sig.span).value,
"channel_names" => sig.channels,
"sample_unit" => sig.sample_unit,
"sample_resolution_in_unit" => sig.sample_resolution_in_unit,
"sample_offset_in_unit" => sig.sample_offset_in_unit,
"sample_type" => sig.sample_type,
"sample_rate" => sig.sample_rate,
"file_extension" => ext,
"file_options" => opt)
end
raw_anns = Dict[]
for ann in Tables.rows(anns)
ann = Annotation(ann)
push!(raw_anns, Dict("start_nanosecond" => TimeSpans.start(ann.span).value,
"stop_nanosecond" => TimeSpans.stop(ann.span).value,
"value" => value_from_annotation(ann)))
end
raw_recordings[string(uuid)] = Dict("signals" => raw_sigs, "annotations" => raw_anns)
end
recordings_file_path = joinpath(dataset_path, "recordings.msgpack.zst")
verbose && log("writing out $recordings_file_path...")
io = IOBuffer()
MsgPack.pack(io, [Dict("onda_format_version" => "v0.4.0", "ordered_keys" => false), raw_recordings])
write(recordings_file_path, zstd_compress(resize!(io.data, io.size)))
return raw_recordings
end

end # module
18 changes: 17 additions & 1 deletion test/deprecations.jl
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
@testset "upgrade_onda_dataset_to_v0_5!" begin
@testset "upgrade_onda_dataset_to_v0_5!/downgrade_onda_dataset_to_v0_4!" begin
new_path = mktempdir()
old_path = joinpath(@__DIR__, "old_test_v0_3.onda")
cp(old_path, new_path; force=true)
Onda.upgrade_onda_dataset_to_v0_5!(new_path)
signals = DataFrame(read_signals(joinpath(new_path, "upgraded.onda.signals.arrow")))
annotations = DataFrame(read_annotations(joinpath(new_path, "upgraded.onda.annotations.arrow")))

downgraded_path = mktempdir()
Onda.downgrade_onda_dataset_to_v0_4!(downgraded_path, signals, annotations)
downgraded_header, downgraded_recordings = MsgPack.unpack(Onda.zstd_decompress(read(joinpath(downgraded_path, "recordings.msgpack.zst"))))
@test downgraded_header == Dict("onda_format_version" => "v0.4.0", "ordered_keys" => false)

_, old_recordings = MsgPack.unpack(Onda.zstd_decompress(read(joinpath(new_path, "recordings.msgpack.zst"))))
new_recordings = Onda.gather(:recording, signals, annotations)
for (uuid, old_recording) in old_recordings
new_signals, new_annotations = new_recordings[UUID(uuid)]
downgraded_recording = downgraded_recordings[uuid]
@test length(old_recording["signals"]) == nrow(new_signals)
@test length(old_recording["annotations"]) == nrow(new_annotations)
for (old_kind, old_signal) in old_recording["signals"]
new_signal = view(new_signals, findall(==(old_kind), new_signals.kind), :)
@test old_signal == downgraded_recording["signals"][old_kind]
@test nrow(new_signal) == 1
@test new_signal.file_path[] == joinpath("samples", uuid, old_kind * "." * old_signal["file_extension"])
@test new_signal.file_format[] == old_signal["file_extension"]
Expand All @@ -30,5 +38,13 @@
@test nrow(new_annotation) == 1
@test new_annotation.recording[] == UUID(uuid)
end
for downgraded_annotation in downgraded_recording["annotations"]
downgraded_span = TimeSpan(downgraded_annotation["start_nanosecond"], downgraded_annotation["stop_nanosecond"])
downgraded_value = Onda.JSON3.read(downgraded_annotation["value"])
new_annotation = filter(a -> a.id == UUID(downgraded_value.id) && a.span == downgraded_span && a.value == downgraded_value.value,
new_annotations)
@test nrow(new_annotation) == 1
@test new_annotation.recording[] == UUID(uuid)
end
end
end

2 comments on commit 017a525

@jrevels
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/34217

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.13.4 -m "<description of version>" 017a5254ac8466bc185ad83406eedaa22e601ac1
git push origin v0.13.4

Please sign in to comment.