Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sentek format #320

Merged
merged 8 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dascore/data_registry.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ conoco_segy_1.sgy 3944297d7c27dd265b40d56d4797f1a14caa5c2bed9f0af020b0f6ea193d4d
brady_hs_DAS_DTS_coords.csv b2e766136aac6516ddbb757d7dc26a8df0d5de48af03c8be769cc50b6816648f https://github.com/dasdae/test_data/raw/master/meta/brady_hs_DAS_DTS_coords.csv
dispersion_event.h5 598c8baa2a5610c930e1c003f2ba02da13f8d8686e3ccf2a034e94bfc5e1990c https://github.com/dasdae/test_data/raw/master/das/dispersion_event.h5
PoroTomo_iDAS_1.h5 967a2885e79937ac0426b2022a9c03d5f24790ecf3abbaa9a16eb28055566fc6 https://github.com/dasdae/test_data/raw/master/das/PoroTomo_iDAS_1.h5
DASDMSShot00_20230328155653619.das 12ac53f78b32d8b0e32cc674c43ff5b4c79a6c8b19de2ad577fd481679b2b7b3 https://github.com/dasdae/test_data/raw/master/das/DASDMSShot00_20230328155653619.das
11 changes: 11 additions & 0 deletions dascore/io/sentek/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
Module for reading DAS data recorded by Sentek interrogator

Examples
--------

import dascore as dc

data_sentek = dc.spool('path_to_file.das')
"""
from .core import SentekV5
50 changes: 50 additions & 0 deletions dascore/io/sentek/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""IO module for reading Sentek's DAS data format."""
from __future__ import annotations

import numpy as np

import dascore as dc
from dascore.io import BinaryReader
from dascore.io.core import FiberIO

from .utils import _get_patch_attrs, _get_version


class SentekV5(FiberIO):
"""Support for Sentek Instrument data format."""

name = "sentek"
version = "5"
preferred_extensions = ("das",)

def read(
self,
resource: BinaryReader,
time=None,
distance=None,
**kwargs,
) -> dc.BaseSpool:
"""Read a Sentek das file, return a DataArray."""
attrs, coords, offsets = _get_patch_attrs(resource)
resource.seek(offsets[0])
array = np.fromfile(resource, dtype=np.float32, count=offsets[1] * offsets[2])
array = np.reshape(array, (offsets[1], offsets[2])).T
patch = dc.Patch(data=array, attrs=attrs, coords=coords, dims=coords.dims)
# Note: we are being a bit sloppy here in that selecting on time/distance
# doesn't actually affect how much data is read from the binary file. This
# is probably ok though since Sentek files tend to be quite small.
return dc.spool(patch).select(time=time, distance=distance)

def get_format(self, resource: BinaryReader) -> tuple[str, str] | bool:
"""Auto detect sentek format."""
return _get_version(resource)

def scan(self, resource: BinaryReader):
"""Extract metadata from sentek file."""
extras = {
"file_format": self.name,
"file_version": self.version,
"path": resource.name,
}

return [_get_patch_attrs(resource, extras=extras)[0]]
97 changes: 97 additions & 0 deletions dascore/io/sentek/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""Utilities for Sentek data format."""

from __future__ import annotations

import numpy as np

import dascore as dc
from dascore.core import get_coord, get_coord_manager


def _get_version(fid):
"""Determine if Sentek file."""
name = fid.name
# Sentek files cannot change the extension, or file name.
sw_data = name.endswith(".das")
fid.seek(0)
# There isn't anything in the header particularly useful for determining
# if it is a Sentek file, so we do what we can here.
# First check if sensor_num and measurement_count are positive and nearly
# ints.
sensor_num = np.fromfile(fid, dtype=np.float32, count=1)[0]
measurement_count = np.fromfile(fid, dtype=np.float32, count=1)[0]
_ = np.fromfile(fid, dtype=np.float32, count=1)[0] # sampling_interval
is_positive = (sensor_num > 1) and (measurement_count > 1)
sens_nearly_int = np.round(sensor_num, 5) == np.round(sensor_num)
meas_nearly_int = np.round(measurement_count, 5) == np.round(measurement_count)
nearly_ints = sens_nearly_int and meas_nearly_int
# Then check if strain_rate value is valid.
strain_rate = int(np.fromfile(fid, dtype=np.float32, count=1)[0])
proper_strain_rate = strain_rate in {0, 1}
# Note: We will need to modify this later for different versions of the
# sentek data, but for now we only support 5.
if sw_data and is_positive and proper_strain_rate and nearly_ints:
return ("sentek", "5")
return False


def _get_time_from_file_name(name) -> np.datetime64:
"""Extract time contained in the file name.

example file name: DASDMSShot00_20230328155652124.das
"""
time_str = name.split("_")[1].split(".")[0]
year = time_str[:4]
month = time_str[4:6]
day = time_str[6:8]
hour = time_str[8:10]
minute = time_str[10:12]
second = float(time_str[12:]) / 1_000
iso = f"{year}-{month}-{day}T{hour}:{minute}:{second:02f}"
return np.datetime64(iso)


def _get_patch_attrs(fid, extras=None):
"""Extracts patch metadata.

A few important fields in the header and their meaning:

sensor_num: number of channels in the sensing fiber
measurement_count: number of measurements in ONE single file
sampling_interval: sampling interval in nanosecond (delta t)
strain_rate: flag that is set when the loaded data represents strain rate
trigger_position: index position where the trigger occurs
decimation_factor: decimation factor (integer)
"""
fid.seek(0)
sensor_num = np.fromfile(fid, dtype=np.float32, count=1)[0]
measurement_count = np.fromfile(fid, dtype=np.float32, count=1)[0]
_ = np.fromfile(fid, dtype=np.float32, count=1)[0] # sampling_interval
strain_rate = np.fromfile(fid, dtype=np.float32, count=1)[0]
_ = np.fromfile(fid, dtype=np.float32, count=1)[0] # trigger_position
_ = np.fromfile(fid, dtype=np.float32, count=1)[0] # decimation_factor
# create distance coordinate
distance_start = np.fromfile(fid, dtype=np.float32, count=1)[0]
fid.seek(int(sensor_num - 1) * 4)
distance_stop = np.fromfile(fid, dtype=np.float32, count=1)[0]
distance_step = (distance_stop - distance_start) / sensor_num
dist = get_coord(start=distance_start, stop=distance_stop, step=distance_step)
# create time coord
file_time = _get_time_from_file_name(fid.name)
offset_start = np.fromfile(fid, dtype=np.float32, count=1)[0]
fid.seek(int(measurement_count - 1) * 4)
offset_stop = np.fromfile(fid, dtype=np.float32, count=1)[0]
time_start = file_time + dc.to_timedelta64(offset_start)
time_stop = file_time + dc.to_timedelta64(offset_stop)
time_step = (time_stop - time_start) / measurement_count
time = get_coord(start=time_start, stop=time_stop, step=time_step)

data_type = "strain_rate" if strain_rate else "strain"
coord_manager = get_coord_manager(
{"time": time, "distance": dist}, dims=("distance", "time")
)
attrs = dc.PatchAttrs(
coords=coord_manager, data_type=data_type, **({} if extras is None else extras)
)
offsets = fid.tell(), int(measurement_count), int(sensor_num)
return attrs, coord_manager, offsets
2 changes: 2 additions & 0 deletions docs/contributors.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ A Huge thanks to all of our contributors:

[Thomas Cullison](https:/github.com/code-cullison)

[Shihao Yuan](https://github.com/Shihao-Yuan)

[Tomas Snyder](https:/github.com/quasiStellar45)

You can find more contributor information
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ H5SIMPLE__V1_0 = "dascore.io.h5simple.core:H5Simple"
PICKLE = "dascore.io.pickle.core:PickleIO"
PRODML__V2_0 = "dascore.io.prodml.core:ProdMLV2_0"
PRODML__V2_1 = "dascore.io.prodml.core:ProdMLV2_1"
SENTEK__V5 = "dascore.io.sentek.core:SentekV5"
TDMS__V4713 = "dascore.io.tdms.core:TDMSFormatterV4713"
TERRA15__V4 = "dascore.io.terra15.core:Terra15FormatterV4"
TERRA15__V5 = "dascore.io.terra15.core:Terra15FormatterV5"
Expand Down Expand Up @@ -152,7 +153,7 @@ target-version = "py310"
fixable = ["ALL"]

# List of codes to ignore
ignore = ["D105", "D107", "D401", "D205"]
ignore = ["D105", "D107", "D401", "D205", "D200"]

[tool.ruff.mccabe]
# Unlike Flake8, default to a complexity level of 10.
Expand Down
2 changes: 2 additions & 0 deletions tests/test_io/test_common_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from dascore.io.pickle import PickleIO
from dascore.io.prodml import ProdMLV2_0, ProdMLV2_1
from dascore.io.segy import SegyV2
from dascore.io.sentek import SentekV5
from dascore.io.tdms import TDMSFormatterV4713
from dascore.io.terra15 import (
Terra15FormatterV4,
Expand Down Expand Up @@ -62,6 +63,7 @@
Terra15FormatterV6(): ("terra15_v6_test_file.hdf5",),
SegyV2(): ("conoco_segy_1.sgy",),
DASHDF5(): ("PoroTomo_iDAS_1.h5",),
SentekV5(): ("DASDMSShot00_20230328155653619.das",),
}

# This tuple is for fiber io which support a write method and can write
Expand Down
19 changes: 19 additions & 0 deletions tests/test_io/test_sentek/test_sentek.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
Tests specific to the Sentek format.
"""
import numpy as np

from dascore.io.sentek import SentekV5


class TestSentekV5:
"""Tests for Sentek format that aren;t covered by common tests."""

def test_das_extension_not_sentek(self, tmp_path_factory):
"""Ensure a non-sentek file with a das extension isn't id as sentek."""
path = tmp_path_factory.mktemp("sentek_test") / "not_sentek.das"
ar = np.random.random(10)
with path.open("wb") as fi:
np.save(fi, ar)
sentek = SentekV5()
assert not sentek.get_format(path)
Loading