From 28e8302a0021314e26f1c12147acc52fa73338bc Mon Sep 17 00:00:00 2001
From: Stefan Reck <stefan.reck@fau.de>
Date: Wed, 7 Jul 2021 16:35:22 +0200
Subject: [PATCH] add test for indexed conc

---
 orcasong/tools/concatenate.py |  7 ++++-
 tests/test_concatenate.py     | 48 +++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/orcasong/tools/concatenate.py b/orcasong/tools/concatenate.py
index 2a29e26..73602e2 100644
--- a/orcasong/tools/concatenate.py
+++ b/orcasong/tools/concatenate.py
@@ -272,7 +272,12 @@ def get_compopts(file):
 
     """
     with h5py.File(file, 'r') as f:
-        dset = f[strip_keys(list(f.keys()))[0]]
+        # for reading the comptopts, take first datsets thats not indexed
+        dset_names = strip_keys(list(f.keys()))
+        for dset_name in dset_names:
+            if f"{dset_name}_indices" not in dset_names:
+                break
+        dset = f[dset_name]
         comptopts = {}
         comptopts["complib"] = dset.compression
         if comptopts["complib"] == 'lzf':
diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py
index b22fcc8..302878f 100644
--- a/tests/test_concatenate.py
+++ b/tests/test_concatenate.py
@@ -3,6 +3,7 @@ from unittest import TestCase
 import numpy as np
 import h5py
 import orcasong.tools.concatenate as conc
+import os
 
 __author__ = 'Stefan Reck'
 
@@ -116,6 +117,53 @@ class TestFileConcatenator(TestCase):
                 )
 
 
+class TestConcatenateIndexed(TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.infile = tempfile.NamedTemporaryFile()
+        with h5py.File(cls.infile, "w") as f:
+            cls.x = np.arange(20)
+            dset_x = f.create_dataset("x", data=cls.x, chunks=True)
+            dset_x.attrs.create("indexed", True)
+            cls.indices = np.array(
+                [(0, 5), (5, 12), (17, 3)],
+                dtype=[('index', '<i8'), ('n_items', '<i8')]
+            )
+            f.create_dataset("x_indices", data=cls.indices, chunks=True)
+
+    def setUp(self) -> None:
+        self.outfile = "temp_out.h5"
+        conc.concatenate([self.infile.name] * 2, outfile=self.outfile)
+
+    def tearDown(self) -> None:
+        if os.path.exists(self.outfile):
+            os.remove(self.outfile)
+
+    @classmethod
+    def tearDownClass(cls) -> None:
+        cls.infile.close()
+
+    def test_check_x(self):
+        with h5py.File(self.outfile) as f_out:
+            np.testing.assert_array_equal(
+                f_out["x"],
+                np.concatenate([self.x]*2)
+            )
+
+    def test_check_x_indices_n_items(self):
+        with h5py.File(self.outfile) as f_out:
+            target_n_items = np.concatenate([self.indices] * 2)["n_items"]
+            np.testing.assert_array_equal(
+                f_out["x_indices"]["n_items"], target_n_items)
+
+    def test_check_x_indices_index(self):
+        with h5py.File(self.outfile) as f_out:
+            target_n_items = np.concatenate([self.indices] * 2)["n_items"]
+            target_index = np.concatenate([[0], target_n_items.cumsum()[:-1]])
+            np.testing.assert_array_equal(
+                f_out["x_indices"]["index"], target_index)
+
+
 def _create_dummy_file(filepath, columns=10, val_array=1, val_recarray=(1, 3)):
     """ Create a dummy h5 file with an array and a recarray in it. """
     with h5py.File(filepath, "w") as f:
-- 
GitLab