diff --git a/orcasong/tools/concatenate.py b/orcasong/tools/concatenate.py index 2a29e26703080018cc99446295c27361774949a8..73602e2643c917a6dc57ed481d69a359117bce74 100644 --- a/orcasong/tools/concatenate.py +++ b/orcasong/tools/concatenate.py @@ -272,7 +272,12 @@ def get_compopts(file): """ with h5py.File(file, 'r') as f: - dset = f[strip_keys(list(f.keys()))[0]] + # for reading the comptopts, take first datsets thats not indexed + dset_names = strip_keys(list(f.keys())) + for dset_name in dset_names: + if f"{dset_name}_indices" not in dset_names: + break + dset = f[dset_name] comptopts = {} comptopts["complib"] = dset.compression if comptopts["complib"] == 'lzf': diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py index b22fcc845ad90f614a89b013137f098e5e70bed4..302878f8bdb6bc83b35fb9aae4c65f38276f70e3 100644 --- a/tests/test_concatenate.py +++ b/tests/test_concatenate.py @@ -3,6 +3,7 @@ from unittest import TestCase import numpy as np import h5py import orcasong.tools.concatenate as conc +import os __author__ = 'Stefan Reck' @@ -116,6 +117,53 @@ class TestFileConcatenator(TestCase): ) +class TestConcatenateIndexed(TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.infile = tempfile.NamedTemporaryFile() + with h5py.File(cls.infile, "w") as f: + cls.x = np.arange(20) + dset_x = f.create_dataset("x", data=cls.x, chunks=True) + dset_x.attrs.create("indexed", True) + cls.indices = np.array( + [(0, 5), (5, 12), (17, 3)], + dtype=[('index', '<i8'), ('n_items', '<i8')] + ) + f.create_dataset("x_indices", data=cls.indices, chunks=True) + + def setUp(self) -> None: + self.outfile = "temp_out.h5" + conc.concatenate([self.infile.name] * 2, outfile=self.outfile) + + def tearDown(self) -> None: + if os.path.exists(self.outfile): + os.remove(self.outfile) + + @classmethod + def tearDownClass(cls) -> None: + cls.infile.close() + + def test_check_x(self): + with h5py.File(self.outfile) as f_out: + np.testing.assert_array_equal( + f_out["x"], + np.concatenate([self.x]*2) + ) + + def test_check_x_indices_n_items(self): + with h5py.File(self.outfile) as f_out: + target_n_items = np.concatenate([self.indices] * 2)["n_items"] + np.testing.assert_array_equal( + f_out["x_indices"]["n_items"], target_n_items) + + def test_check_x_indices_index(self): + with h5py.File(self.outfile) as f_out: + target_n_items = np.concatenate([self.indices] * 2)["n_items"] + target_index = np.concatenate([[0], target_n_items.cumsum()[:-1]]) + np.testing.assert_array_equal( + f_out["x_indices"]["index"], target_index) + + def _create_dummy_file(filepath, columns=10, val_array=1, val_recarray=(1, 3)): """ Create a dummy h5 file with an array and a recarray in it. """ with h5py.File(filepath, "w") as f: