-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* adding Sentek test data * Add sentek data format * new contributor added * contributor added * restore deleted doc file and correct docstring * improve auto-format detection --------- Co-authored-by: Derrick Chambers <[email protected]>
- Loading branch information
1 parent
910f0eb
commit 6d38342
Showing
8 changed files
with
184 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
""" | ||
Module for reading DAS data recorded by Sentek interrogator | ||
Examples | ||
-------- | ||
import dascore as dc | ||
data_sentek = dc.spool('path_to_file.das') | ||
""" | ||
from .core import SentekV5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
"""IO module for reading Sentek's DAS data format.""" | ||
from __future__ import annotations | ||
|
||
import numpy as np | ||
|
||
import dascore as dc | ||
from dascore.io import BinaryReader | ||
from dascore.io.core import FiberIO | ||
|
||
from .utils import _get_patch_attrs, _get_version | ||
|
||
|
||
class SentekV5(FiberIO): | ||
"""Support for Sentek Instrument data format.""" | ||
|
||
name = "sentek" | ||
version = "5" | ||
preferred_extensions = ("das",) | ||
|
||
def read( | ||
self, | ||
resource: BinaryReader, | ||
time=None, | ||
distance=None, | ||
**kwargs, | ||
) -> dc.BaseSpool: | ||
"""Read a Sentek das file, return a DataArray.""" | ||
attrs, coords, offsets = _get_patch_attrs(resource) | ||
resource.seek(offsets[0]) | ||
array = np.fromfile(resource, dtype=np.float32, count=offsets[1] * offsets[2]) | ||
array = np.reshape(array, (offsets[1], offsets[2])).T | ||
patch = dc.Patch(data=array, attrs=attrs, coords=coords, dims=coords.dims) | ||
# Note: we are being a bit sloppy here in that selecting on time/distance | ||
# doesn't actually affect how much data is read from the binary file. This | ||
# is probably ok though since Sentek files tend to be quite small. | ||
return dc.spool(patch).select(time=time, distance=distance) | ||
|
||
def get_format(self, resource: BinaryReader) -> tuple[str, str] | bool: | ||
"""Auto detect sentek format.""" | ||
return _get_version(resource) | ||
|
||
def scan(self, resource: BinaryReader): | ||
"""Extract metadata from sentek file.""" | ||
extras = { | ||
"file_format": self.name, | ||
"file_version": self.version, | ||
"path": resource.name, | ||
} | ||
|
||
return [_get_patch_attrs(resource, extras=extras)[0]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
"""Utilities for Sentek data format.""" | ||
|
||
from __future__ import annotations | ||
|
||
import numpy as np | ||
|
||
import dascore as dc | ||
from dascore.core import get_coord, get_coord_manager | ||
|
||
|
||
def _get_version(fid): | ||
"""Determine if Sentek file.""" | ||
name = fid.name | ||
# Sentek files cannot change the extension, or file name. | ||
sw_data = name.endswith(".das") | ||
fid.seek(0) | ||
# There isn't anything in the header particularly useful for determining | ||
# if it is a Sentek file, so we do what we can here. | ||
# First check if sensor_num and measurement_count are positive and nearly | ||
# ints. | ||
sensor_num = np.fromfile(fid, dtype=np.float32, count=1)[0] | ||
measurement_count = np.fromfile(fid, dtype=np.float32, count=1)[0] | ||
_ = np.fromfile(fid, dtype=np.float32, count=1)[0] # sampling_interval | ||
is_positive = (sensor_num > 1) and (measurement_count > 1) | ||
sens_nearly_int = np.round(sensor_num, 5) == np.round(sensor_num) | ||
meas_nearly_int = np.round(measurement_count, 5) == np.round(measurement_count) | ||
nearly_ints = sens_nearly_int and meas_nearly_int | ||
# Then check if strain_rate value is valid. | ||
strain_rate = int(np.fromfile(fid, dtype=np.float32, count=1)[0]) | ||
proper_strain_rate = strain_rate in {0, 1} | ||
# Note: We will need to modify this later for different versions of the | ||
# sentek data, but for now we only support 5. | ||
if sw_data and is_positive and proper_strain_rate and nearly_ints: | ||
return ("sentek", "5") | ||
return False | ||
|
||
|
||
def _get_time_from_file_name(name) -> np.datetime64: | ||
"""Extract time contained in the file name. | ||
example file name: DASDMSShot00_20230328155652124.das | ||
""" | ||
time_str = name.split("_")[1].split(".")[0] | ||
year = time_str[:4] | ||
month = time_str[4:6] | ||
day = time_str[6:8] | ||
hour = time_str[8:10] | ||
minute = time_str[10:12] | ||
second = float(time_str[12:]) / 1_000 | ||
iso = f"{year}-{month}-{day}T{hour}:{minute}:{second:02f}" | ||
return np.datetime64(iso) | ||
|
||
|
||
def _get_patch_attrs(fid, extras=None): | ||
"""Extracts patch metadata. | ||
A few important fields in the header and their meaning: | ||
sensor_num: number of channels in the sensing fiber | ||
measurement_count: number of measurements in ONE single file | ||
sampling_interval: sampling interval in nanosecond (delta t) | ||
strain_rate: flag that is set when the loaded data represents strain rate | ||
trigger_position: index position where the trigger occurs | ||
decimation_factor: decimation factor (integer) | ||
""" | ||
fid.seek(0) | ||
sensor_num = np.fromfile(fid, dtype=np.float32, count=1)[0] | ||
measurement_count = np.fromfile(fid, dtype=np.float32, count=1)[0] | ||
_ = np.fromfile(fid, dtype=np.float32, count=1)[0] # sampling_interval | ||
strain_rate = np.fromfile(fid, dtype=np.float32, count=1)[0] | ||
_ = np.fromfile(fid, dtype=np.float32, count=1)[0] # trigger_position | ||
_ = np.fromfile(fid, dtype=np.float32, count=1)[0] # decimation_factor | ||
# create distance coordinate | ||
distance_start = np.fromfile(fid, dtype=np.float32, count=1)[0] | ||
fid.seek(int(sensor_num - 1) * 4) | ||
distance_stop = np.fromfile(fid, dtype=np.float32, count=1)[0] | ||
distance_step = (distance_stop - distance_start) / sensor_num | ||
dist = get_coord(start=distance_start, stop=distance_stop, step=distance_step) | ||
# create time coord | ||
file_time = _get_time_from_file_name(fid.name) | ||
offset_start = np.fromfile(fid, dtype=np.float32, count=1)[0] | ||
fid.seek(int(measurement_count - 1) * 4) | ||
offset_stop = np.fromfile(fid, dtype=np.float32, count=1)[0] | ||
time_start = file_time + dc.to_timedelta64(offset_start) | ||
time_stop = file_time + dc.to_timedelta64(offset_stop) | ||
time_step = (time_stop - time_start) / measurement_count | ||
time = get_coord(start=time_start, stop=time_stop, step=time_step) | ||
|
||
data_type = "strain_rate" if strain_rate else "strain" | ||
coord_manager = get_coord_manager( | ||
{"time": time, "distance": dist}, dims=("distance", "time") | ||
) | ||
attrs = dc.PatchAttrs( | ||
coords=coord_manager, data_type=data_type, **({} if extras is None else extras) | ||
) | ||
offsets = fid.tell(), int(measurement_count), int(sensor_num) | ||
return attrs, coord_manager, offsets |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
""" | ||
Tests specific to the Sentek format. | ||
""" | ||
import numpy as np | ||
|
||
from dascore.io.sentek import SentekV5 | ||
|
||
|
||
class TestSentekV5: | ||
"""Tests for Sentek format that aren;t covered by common tests.""" | ||
|
||
def test_das_extension_not_sentek(self, tmp_path_factory): | ||
"""Ensure a non-sentek file with a das extension isn't id as sentek.""" | ||
path = tmp_path_factory.mktemp("sentek_test") / "not_sentek.das" | ||
ar = np.random.random(10) | ||
with path.open("wb") as fi: | ||
np.save(fi, ar) | ||
sentek = SentekV5() | ||
assert not sentek.get_format(path) |