From 1508e51f87db6140bc1183f42565a3dcd56747e9 Mon Sep 17 00:00:00 2001
From: rodri <rgracia@km3net.de>
Date: Sun, 13 Feb 2022 03:37:41 +0100
Subject: [PATCH] redo the reading with the chunks to memory approach.

---
 km3io/utils/extract_dom_rates.py | 83 ++++++++++++++++++++------------
 1 file changed, 51 insertions(+), 32 deletions(-)

diff --git a/km3io/utils/extract_dom_rates.py b/km3io/utils/extract_dom_rates.py
index 550fe8e..2253435 100644
--- a/km3io/utils/extract_dom_rates.py
+++ b/km3io/utils/extract_dom_rates.py
@@ -6,7 +6,9 @@ Options:
   -f --input_file INPUT_FILE             Input file.
   -o --output_file OUTPUT_FILE           Output file.
   -a --detector_file DETECTOR_FILE       Detector file.
+  -s --chunk_size CHUNK_SIZE             Number of slices dumped to memory at once.[default: 10000]
 """
+
 from docopt import docopt
 import km3io
 import km3pipe as kp
@@ -15,13 +17,31 @@ import os
 import numpy as np
 import h5py
 
+def create_matrix(dom_ids, dom_rates, dom_map):
+    rows = len(dom_map)
+    columns = len(dom_rates)
+    matrix = np.zeros((rows,columns))
+    for i, (rates, doms) in enumerate(zip(dom_rates,dom_ids)):
+        for r,d in zip(rates,doms):
+            row = dom_map[d]
+            matrix[row][i]=r
+    return matrix
 
-def get_doms_rates(slices, frame):
-    raw_rates = [
-        km3io.online.get_rate(getattr(slices.rates[frame], f"ch{i}")) for i in range(31)
-    ]
-    return np.sum(raw_rates, axis=0)
-
+def append_to_hdf5(file, doms_rates_matrix, frame_indices, frame_times, index2dom):
+    for i, row in enumerate(doms_rates_matrix):
+        dataset = file[str(index2dom[i])]
+        chunk_size = len(row)
+        dataset.resize(dataset.shape[0]+chunk_size, axis=0)
+        dataset[-chunk_size:] = row
+        
+    frame_indices_dataset = file["frame_indices"]
+    chunk_size = len(frame_indices)
+    frame_indices_dataset.resize(frame_indices_dataset.shape[0]+chunk_size, axis=0)
+    dataset[-chunk_size:] = frame_indices
+    frame_times_dataset = file["frame_times"]
+    chunk_size = len(frame_times)
+    frame_times_dataset.resize(frame_times_dataset.shape[0]+chunk_size, axis=0)
+    dataset[-chunk_size:] = frame_times
 
 def main():
     arguments = docopt(__doc__)
@@ -30,36 +50,35 @@ def main():
     for key in arguments:
         data[key.replace("-", "")] = arguments[key]
 
-    reader = km3io.OnlineReader(data["input_file"])
-    summary_slices = reader.summaryslices
-
+    # Read list of modules from detector file, and map to indices.
     detector = km3pipe.hardware.Detector(data["detector_file"])
 
-    n_slices = len(summary_slices.slices)
-
-    doms_rates = {}
-    for dom in detector.dom_ids:
-        doms_rates[dom] = np.zeros(n_slices)
-
-    for i in range(n_slices):
-        rates = get_doms_rates(summary_slices, i)
-        dom_ids = summary_slices.slices[i].dom_id
-        for j in range(len(dom_ids) - 1):
-            doms_rates[dom_ids[j]][i] = rates[j]
-
-    f = h5py.File(data["output_file"], "a")
-    f.create_dataset("frame_indices", data=np.array(summary_slices.headers.frame_index))
-    f.create_dataset(
-        "frame_times",
-        data=np.array(
-            summary_slices.headers.UTC_seconds * 1e9
-            + summary_slices.headers.UTC_16nanosecondcycles * 16
-        ),
-    )
+    dom2index = {}
+    index2dom = {}
+    for idx, dom in enumerate(detector.dom_ids):
+        dom2index[dom]=idx
+        index2dom[idx]=dom
 
-    for key, value in doms_rates.items():
-        f.create_dataset(str(key), data=value)
+    # Create output file and datasets
+    h5 = h5py.File(data["output_file"], "a")
+    for key, value in dom2index.items():
+        h5.create_dataset(str(key), (0,), maxshape=(None,))
+    h5.create_dataset("frame_times", (0,), maxshape=(None,))
+    h5.create_dataset("frame_indices", (0,), maxshape=(None,))
 
+    # Read the channel rates from the summary slices, calculate the total module rates, and save them to the output file
+    reader = km3io.online.SummarysliceReader(data["input_file"],10000)
+    
+    for ss_chunk in reader:
+        frame_indices = ss_chunk.headers.frame_index
+        frame_times = ss_chunk.headers.UTC_seconds * 1e9 + ss_chunk.headers.UTC_16nanosecondcycles * 16
+        raw_rates = [km3io.online.get_rate(getattr(ss_chunk.slices, f"ch{ch}")) for ch in range(31)]
+        dom_ids = ss_chunk.slices.dom_id
+        dom_rates = np.zeros_like(raw_rates[0])
+        for ch in range(31):
+            dom_rates=np.add(dom_rates, raw_rates[ch])
+        m = create_matrix(dom_ids, dom_rates, dom2index)
+        append_to_hdf5(h5, m, frame_indices, frame_times, index2dom)
 
 if __name__ == "__main__":
     main()
-- 
GitLab