Parser for raw acoustic data

6c2446fb · Massimiliano Lincetto · Tamas Gal · 54ef99bd · 6c2446fb · 6c2446fb
Commit 6c2446fb authored 3 years ago by Massimiliano Lincetto Committed by Tamas Gal 3 years ago
--- a/km3io/__init__.py
+++ b/km3io/__init__.py
@@ -11,4 +11,5 @@ with warnings.catch_warnings():

 from .offline import OfflineReader
 from .online import OnlineReader
+from .acoustics import RawAcousticReader
 from .gseagen import GSGReader
--- a/km3io/acoustics.py
+++ b/km3io/acoustics.py
+"""
+This tookit provides an interface to read the raw acoustic binary data tipe as
+produced by the Acoustic Data Filter (ADF).
+
+The acoustic signals are acquired by the CLB with a 195312.5 Hz sampling
+frequency (F_S) corresponding to a period of 5.12 microseconds (16 ns * 320).
+
+The acoustic data filter processes the stream in segments (windows) of given
+size. Two consecutive segments overlap by a given number of samples. These two
+parameters are specified in the ADF configuration as:
+
+  * DAQ_ADF_ANALYSIS_WINDOW_SIZE (typ.: 131072)
+  * DAQ_ADF_ANALYSIS_WINDOW_OVERLAP (typ.: 7812)
+
+Typical values are indicated but may change in the future.
+
+When the ADF is set to dump the raw data, the overlapping segment is omitted
+so the length of the data chunk is
+
+  FRAME_LENGTH = DAQ_ADF_ANALYSIS_WINDOW_SIZE - DAQ_ADF_ANALYSIS_WINDOW_OVERLAP
+
+Summary description of the raw acoustic data format.
+
+- 4B: UTC seconds (UNIX timestamp);
+- 4B: number of 16ns cycles;
+- 4B: referred to as 'samples' corresponds to DAQ_ADF_ANALYSIS_WINDOW_SIZE
+      (typ.: 131072).
+
+Follows a sequence of 4B * FRAME_LENGTH audio samples. Each sample is 32 bit
+float PCM value. FRAME_LENGTH cannot be reconstructed from
+DAQ_ADF_ANALYSIS_WINDOW_SIZE without knowing DAQ_ADF_ANALYSIS_WINDOW_OVERLAP
+so it has to be set by the user.
+
+FRAME_LENGTH should be constant within the same file, so this approach will hold
+(FRAME_LENGTH is configurable in the constructor).
+
+Note: each file contains data from a single transducer and the DOM or base ID
+is stored in the filename only. This is not a very good design but here it is
+tentatively dealt with.
+
+WARNING: data should be in general expected as ordered but may not be contiguous
+in time.
+"""
+import numpy as np
+
+
+F_S = 195312.5  # sampling frequency of the acoustic stream in the CLB
+
+
+def get_dtype(FRAME_LENGTH):
+    """Returns the data layout corresponding to FRAME_LENGTH"""
+    DATA_TYPE = np.dtype(
+        [
+            ("utc_seconds", np.uint32),
+            ("16ns_cycles", np.uint32),
+            ("samples", np.uint32),
+            ("frame", np.float32, FRAME_LENGTH),
+        ]
+    )
+    return DATA_TYPE
+
+
+class RawAcousticReader:
+    def __init__(self, filepath, FRAME_LENGTH=123260):
+
+        self.FRAME_LENGTH = FRAME_LENGTH
+        DATA_TYPE = get_dtype(FRAME_LENGTH)
+
+        with open(filepath) as acoufile:
+            self._data = np.fromfile(acoufile, dtype=DATA_TYPE)
+            """ extract CLB id from filename """
+            """ split the extension and scan backwards the path """
+            self._id = filepath.split(".")[-2][-24 : -16 + 1]
+
+    @property
+    def id(self):
+        return self._id
+
+    @property
+    def pcm(self):
+        """Get PCM data concatenating all frames. Data may not be not contiguous."""
+        return self._data["frame"].flatten()
+
+    @property
+    def timestamps(self):
+        return self._data["utc_seconds"], self._data["16ns_cycles"]
+
+    @property
+    def timebase(self):
+        """
+        Constructs sequence of times corresponding to each sample.
+        For convenience, the time is stored as a double precision float.
+        The resolution is not fixed (which is sub-optimal) but should always
+        allow exact representation of the sample time.
+        """
+        sample_interval = 1 / F_S
+        frame_duration = self.FRAME_LENGTH * sample_interval
+        time_axis = np.arange(0, frame_duration, sample_interval)
+        start_frame = self._data[0]
+        n_samples = self.FRAME_LENGTH * len(self._data)
+
+        timebase = np.zeros(n_samples, dtype=np.float64)
+
+        for i, frame in enumerate(self._data):
+            sample_range = slice(i * self.FRAME_LENGTH, (i + 1) * self.FRAME_LENGTH)
+            timebase[sample_range] = (
+                frame["utc_seconds"] + 16e-9 * frame["16ns_cycles"] + time_axis
+            )
+
+        return timebase
+
+    def to_wav(self, filepath, gain_dB=0.0):
+        """
+        Write as wave, with optional gain.
+        """
+        from scipy.io import wavfile
+
+        pcm = self.pcm
+        if gain_dB != 0.0:
+            pcm *= 10 ** (0.1 * gain_dB)
+        wavfile.write(filepath, int(F_S), pcm)
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
 black==21.6b0
-km3net-testdata>=0.2.19
+km3net-testdata>=0.2.26
 matplotlib
 memory_profiler
 numpydoc==0.9.2
@@ -9,6 +9,7 @@ pytest-cov
 pytest-flake8
 pytest-pylint
 pytest-watch
+scipy
 sphinx
 sphinx-autoapi
 sphinx-gallery>=0.1.12

--- a/tests/test_acoustics.py
+++ b/tests/test_acoustics.py
+#!/usr/bin/env python3
+import unittest
+import tempfile
+
+import numpy as np
+from km3net_testdata import data_path
+
+from km3io.acoustics import RawAcousticReader
+
+
+class TestRawAcousticReader(unittest.TestCase):
+    def setUp(self):
+        self.r = RawAcousticReader(
+            data_path("acoustics/DOM_808956920_CH1_1608751683.bin")
+        )
+
+    def test_id(self):
+        assert "808956920" == self.r.id
+
+    def test_timestamps(self):
+        assert np.allclose(
+            [[1608751679, 1608751680], [47216000, 24159200]], self.r.timestamps
+        )
+
+    def test_pcm(self):
+        assert np.allclose([0.00315881, 0.00326228, 0.00348866], self.r.pcm[:3])
+
+    def test_timebase(self):
+        assert 246520 == len(self.r.timebase)
+        print(self.r.timebase[:3])
+        assert np.allclose(
+            [1.60875168e09, 1.60875168e09, 1.60875168e09], list(self.r.timebase[:3])
+        )
+
+    def test_to_wav(self):
+        outfile = tempfile.NamedTemporaryFile(delete=True)
+        self.r.to_wav(outfile)