Skip to content

Commit

Permalink
Sentek format (#320)
Browse files Browse the repository at this point in the history
* adding Sentek test data

* Add sentek data format

* new contributor added

* contributor added

* restore deleted doc file and correct docstring

* improve auto-format detection

---------

Co-authored-by: Derrick Chambers <[email protected]>
  • Loading branch information
Shihao-Yuan and d-chambers authored Dec 21, 2023
1 parent 910f0eb commit 6d38342
Show file tree
Hide file tree
Showing 8 changed files with 184 additions and 1 deletion.
1 change: 1 addition & 0 deletions dascore/data_registry.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ conoco_segy_1.sgy 3944297d7c27dd265b40d56d4797f1a14caa5c2bed9f0af020b0f6ea193d4d
brady_hs_DAS_DTS_coords.csv b2e766136aac6516ddbb757d7dc26a8df0d5de48af03c8be769cc50b6816648f https://github.com/dasdae/test_data/raw/master/meta/brady_hs_DAS_DTS_coords.csv
dispersion_event.h5 598c8baa2a5610c930e1c003f2ba02da13f8d8686e3ccf2a034e94bfc5e1990c https://github.com/dasdae/test_data/raw/master/das/dispersion_event.h5
PoroTomo_iDAS_1.h5 967a2885e79937ac0426b2022a9c03d5f24790ecf3abbaa9a16eb28055566fc6 https://github.com/dasdae/test_data/raw/master/das/PoroTomo_iDAS_1.h5
DASDMSShot00_20230328155653619.das 12ac53f78b32d8b0e32cc674c43ff5b4c79a6c8b19de2ad577fd481679b2b7b3 https://github.com/dasdae/test_data/raw/master/das/DASDMSShot00_20230328155653619.das
11 changes: 11 additions & 0 deletions dascore/io/sentek/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
Module for reading DAS data recorded by Sentek interrogator
Examples
--------
import dascore as dc
data_sentek = dc.spool('path_to_file.das')
"""
from .core import SentekV5
50 changes: 50 additions & 0 deletions dascore/io/sentek/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""IO module for reading Sentek's DAS data format."""
from __future__ import annotations

import numpy as np

import dascore as dc
from dascore.io import BinaryReader
from dascore.io.core import FiberIO

from .utils import _get_patch_attrs, _get_version


class SentekV5(FiberIO):
"""Support for Sentek Instrument data format."""

name = "sentek"
version = "5"
preferred_extensions = ("das",)

def read(
self,
resource: BinaryReader,
time=None,
distance=None,
**kwargs,
) -> dc.BaseSpool:
"""Read a Sentek das file, return a DataArray."""
attrs, coords, offsets = _get_patch_attrs(resource)
resource.seek(offsets[0])
array = np.fromfile(resource, dtype=np.float32, count=offsets[1] * offsets[2])
array = np.reshape(array, (offsets[1], offsets[2])).T
patch = dc.Patch(data=array, attrs=attrs, coords=coords, dims=coords.dims)
# Note: we are being a bit sloppy here in that selecting on time/distance
# doesn't actually affect how much data is read from the binary file. This
# is probably ok though since Sentek files tend to be quite small.
return dc.spool(patch).select(time=time, distance=distance)

def get_format(self, resource: BinaryReader) -> tuple[str, str] | bool:
"""Auto detect sentek format."""
return _get_version(resource)

def scan(self, resource: BinaryReader):
"""Extract metadata from sentek file."""
extras = {
"file_format": self.name,
"file_version": self.version,
"path": resource.name,
}

return [_get_patch_attrs(resource, extras=extras)[0]]
97 changes: 97 additions & 0 deletions dascore/io/sentek/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""Utilities for Sentek data format."""

from __future__ import annotations

import numpy as np

import dascore as dc
from dascore.core import get_coord, get_coord_manager


def _get_version(fid):
"""Determine if Sentek file."""
name = fid.name
# Sentek files cannot change the extension, or file name.
sw_data = name.endswith(".das")
fid.seek(0)
# There isn't anything in the header particularly useful for determining
# if it is a Sentek file, so we do what we can here.
# First check if sensor_num and measurement_count are positive and nearly
# ints.
sensor_num = np.fromfile(fid, dtype=np.float32, count=1)[0]
measurement_count = np.fromfile(fid, dtype=np.float32, count=1)[0]
_ = np.fromfile(fid, dtype=np.float32, count=1)[0] # sampling_interval
is_positive = (sensor_num > 1) and (measurement_count > 1)
sens_nearly_int = np.round(sensor_num, 5) == np.round(sensor_num)
meas_nearly_int = np.round(measurement_count, 5) == np.round(measurement_count)
nearly_ints = sens_nearly_int and meas_nearly_int
# Then check if strain_rate value is valid.
strain_rate = int(np.fromfile(fid, dtype=np.float32, count=1)[0])
proper_strain_rate = strain_rate in {0, 1}
# Note: We will need to modify this later for different versions of the
# sentek data, but for now we only support 5.
if sw_data and is_positive and proper_strain_rate and nearly_ints:
return ("sentek", "5")
return False


def _get_time_from_file_name(name) -> np.datetime64:
"""Extract time contained in the file name.
example file name: DASDMSShot00_20230328155652124.das
"""
time_str = name.split("_")[1].split(".")[0]
year = time_str[:4]
month = time_str[4:6]
day = time_str[6:8]
hour = time_str[8:10]
minute = time_str[10:12]
second = float(time_str[12:]) / 1_000
iso = f"{year}-{month}-{day}T{hour}:{minute}:{second:02f}"
return np.datetime64(iso)


def _get_patch_attrs(fid, extras=None):
"""Extracts patch metadata.
A few important fields in the header and their meaning:
sensor_num: number of channels in the sensing fiber
measurement_count: number of measurements in ONE single file
sampling_interval: sampling interval in nanosecond (delta t)
strain_rate: flag that is set when the loaded data represents strain rate
trigger_position: index position where the trigger occurs
decimation_factor: decimation factor (integer)
"""
fid.seek(0)
sensor_num = np.fromfile(fid, dtype=np.float32, count=1)[0]
measurement_count = np.fromfile(fid, dtype=np.float32, count=1)[0]
_ = np.fromfile(fid, dtype=np.float32, count=1)[0] # sampling_interval
strain_rate = np.fromfile(fid, dtype=np.float32, count=1)[0]
_ = np.fromfile(fid, dtype=np.float32, count=1)[0] # trigger_position
_ = np.fromfile(fid, dtype=np.float32, count=1)[0] # decimation_factor
# create distance coordinate
distance_start = np.fromfile(fid, dtype=np.float32, count=1)[0]
fid.seek(int(sensor_num - 1) * 4)
distance_stop = np.fromfile(fid, dtype=np.float32, count=1)[0]
distance_step = (distance_stop - distance_start) / sensor_num
dist = get_coord(start=distance_start, stop=distance_stop, step=distance_step)
# create time coord
file_time = _get_time_from_file_name(fid.name)
offset_start = np.fromfile(fid, dtype=np.float32, count=1)[0]
fid.seek(int(measurement_count - 1) * 4)
offset_stop = np.fromfile(fid, dtype=np.float32, count=1)[0]
time_start = file_time + dc.to_timedelta64(offset_start)
time_stop = file_time + dc.to_timedelta64(offset_stop)
time_step = (time_stop - time_start) / measurement_count
time = get_coord(start=time_start, stop=time_stop, step=time_step)

data_type = "strain_rate" if strain_rate else "strain"
coord_manager = get_coord_manager(
{"time": time, "distance": dist}, dims=("distance", "time")
)
attrs = dc.PatchAttrs(
coords=coord_manager, data_type=data_type, **({} if extras is None else extras)
)
offsets = fid.tell(), int(measurement_count), int(sensor_num)
return attrs, coord_manager, offsets
2 changes: 2 additions & 0 deletions docs/contributors.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ A Huge thanks to all of our contributors:

[Thomas Cullison](https:/github.com/code-cullison)

[Shihao Yuan](https://github.com/Shihao-Yuan)

[Tomas Snyder](https:/github.com/quasiStellar45)

You can find more contributor information
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ H5SIMPLE__V1_0 = "dascore.io.h5simple.core:H5Simple"
PICKLE = "dascore.io.pickle.core:PickleIO"
PRODML__V2_0 = "dascore.io.prodml.core:ProdMLV2_0"
PRODML__V2_1 = "dascore.io.prodml.core:ProdMLV2_1"
SENTEK__V5 = "dascore.io.sentek.core:SentekV5"
TDMS__V4713 = "dascore.io.tdms.core:TDMSFormatterV4713"
TERRA15__V4 = "dascore.io.terra15.core:Terra15FormatterV4"
TERRA15__V5 = "dascore.io.terra15.core:Terra15FormatterV5"
Expand Down Expand Up @@ -152,7 +153,7 @@ target-version = "py310"
fixable = ["ALL"]

# List of codes to ignore
ignore = ["D105", "D107", "D401", "D205"]
ignore = ["D105", "D107", "D401", "D205", "D200"]

[tool.ruff.mccabe]
# Unlike Flake8, default to a complexity level of 10.
Expand Down
2 changes: 2 additions & 0 deletions tests/test_io/test_common_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from dascore.io.pickle import PickleIO
from dascore.io.prodml import ProdMLV2_0, ProdMLV2_1
from dascore.io.segy import SegyV2
from dascore.io.sentek import SentekV5
from dascore.io.tdms import TDMSFormatterV4713
from dascore.io.terra15 import (
Terra15FormatterV4,
Expand Down Expand Up @@ -62,6 +63,7 @@
Terra15FormatterV6(): ("terra15_v6_test_file.hdf5",),
SegyV2(): ("conoco_segy_1.sgy",),
DASHDF5(): ("PoroTomo_iDAS_1.h5",),
SentekV5(): ("DASDMSShot00_20230328155653619.das",),
}

# This tuple is for fiber io which support a write method and can write
Expand Down
19 changes: 19 additions & 0 deletions tests/test_io/test_sentek/test_sentek.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
Tests specific to the Sentek format.
"""
import numpy as np

from dascore.io.sentek import SentekV5


class TestSentekV5:
"""Tests for Sentek format that aren;t covered by common tests."""

def test_das_extension_not_sentek(self, tmp_path_factory):
"""Ensure a non-sentek file with a das extension isn't id as sentek."""
path = tmp_path_factory.mktemp("sentek_test") / "not_sentek.das"
ar = np.random.random(10)
with path.open("wb") as fi:
np.save(fi, ar)
sentek = SentekV5()
assert not sentek.get_format(path)

0 comments on commit 6d38342

Please sign in to comment.