From 2762369cc4f2f5664a18218c284947640c80a456 Mon Sep 17 00:00:00 2001
From: Tamas Gal <tgal@km3net.de>
Date: Mon, 9 Mar 2020 11:38:10 +0100
Subject: [PATCH] Further cleanup

---
 km3io/offline.py      | 344 +-----------------------------------------
 tests/test_offline.py | 106 -------------
 2 files changed, 4 insertions(+), 446 deletions(-)

diff --git a/km3io/offline.py b/km3io/offline.py
index 6d242cb..4ef4470 100644
--- a/km3io/offline.py
+++ b/km3io/offline.py
@@ -103,12 +103,14 @@ class OfflineReader:
 
     @cached_property
     def events(self):
+        """The `E` branch, containing all offline events."""
         return Branch(self._tree,
                       mapper=EVENTS_MAP,
                       subbranchmaps=SUBBRANCH_MAPS)
 
     @cached_property
     def header(self):
+        """The file header"""
         if 'Head' in self._fobj:
             header = {}
             for n, x in self._fobj['Head']._map_3c_string_2c_string_3e_.items(
@@ -118,351 +120,13 @@ class OfflineReader:
         else:
             warnings.warn("Your file header has an unsupported format")
 
-    def get_best_reco(self):
-        """returns the best reconstructed track fit data. The best fit is defined
-        as the track fit with the maximum reconstruction stages. When "nan" is
-        returned, it means that the reconstruction parameter of interest is not
-        found. for example, in the case of muon simulations: if [1, 2] are the
-        reconstruction stages, then only the fit parameters corresponding to the
-        stages [1, 2] are found in the Offline files, the remaining fit parameters
-        corresponding to the stages 3, 4, 5 are all filled with nan. 
-
-        Returns
-        -------
-        numpy recarray
-            a recarray of the best track fit data (reconstruction data).
-        """
-        keys = ", ".join(self.keys.fit_keys[:-1])
-        empty_fit_info = np.array(
-            [match for match in self._find_empty(self.tracks.fitinf)])
-        fit_info = [
-            i for i, j in zip(self.tracks.fitinf, empty_fit_info[:, 1])
-            if j is not None
-        ]
-        stages = self._get_max_reco_stages(self.tracks.rec_stages)
-        fit_data = np.array([i[j] for i, j in zip(fit_info, stages[:, 2])])
-        rows_size = len(max(fit_data, key=len))
-        equal_size_data = np.vstack([
-            np.hstack([i, np.zeros(rows_size - len(i)) + np.nan])
-            for i in fit_data
-        ])
-        return np.core.records.fromarrays(equal_size_data.transpose(),
-                                          names=keys)
-
-    def _get_max_reco_stages(self, reco_stages):
-        """find the longest reconstructed track based on the maximum size of 
-        reconstructed stages. 
-
-        Parameters
-        ----------
-        reco_stages : chunked array 
-            chunked array of all the reconstruction stages of all tracks.
-            In km3io, it is accessed with
-            km3io.OfflineReader(my_file).tracks.rec_stages .
-
-        Returns
-        -------
-        numpy array
-            array with 3 columns: *list of the maximum reco_stages
-                                  *lentgh of the maximum reco_stages
-                                  *position of the maximum reco_stages
-        """
-        empty_reco_stages = np.array(
-            [match for match in self._find_empty(reco_stages)])
-        max_reco_stages = np.array(
-            [[max(i, key=len),
-              len(max(i, key=len)),
-              i.index(max(i, key=len))]
-             for i, j in zip(reco_stages, empty_reco_stages[:, 1])
-             if j is not None])
-        return max_reco_stages
-
-    def get_reco_fit(self, stages, mc=False):
-        """construct a numpy recarray of the fit information (reconstruction
-        data) of the tracks reconstructed following the reconstruction stages
-        of interest.
-
-        Parameters
-        ----------
-        stages : list
-            list of reconstruction stages of interest. for example
-            [1, 2, 3, 4, 5].
-        mc : bool, optional
-            default is False to look for fit data in the tracks tree in offline files
-            (not the mc tracks tree). mc=True to look for fit data from the mc tracks
-            tree in offline files.
-
-        Returns
-        -------
-        numpy recarray
-            a recarray of the fit information (reconstruction data) of
-            the tracks of interest.
-
-        Raises
-        ------
-        ValueError
-            ValueError raised when the reconstruction stages of interest
-            are not found in the file.
-        """
-        keys = ", ".join(self.keys.fit_keys[:-1])
-
-        if mc is False:
-            rec_stages = np.array(
-                [match for match in self._find_rec_stages(stages, mc=False)])
-            fitinf = self.tracks.fitinf
-
-        if mc is True:
-            rec_stages = np.array(
-                [match for match in self._find_rec_stages(stages, mc=True)])
-            fitinf = self.mc_tracks.fitinf
-
-        mask = rec_stages[:, 1] != None
-
-        if np.all(rec_stages[:, 1] == None):
-            raise ValueError(
-                "The stages {} are not found in your file.".format(
-                    str(stages)))
-        else:
-            fit_data = np.array(
-                [i[k] for i, k in zip(fitinf[mask], rec_stages[:, 1][mask])])
-            rec_array = np.core.records.fromarrays(fit_data.transpose(),
-                                                   names=keys)
-            return rec_array
-
-    def get_reco_hits(self, stages, keys, mc=False):
-        """construct a dictionary of hits class data based on the reconstruction
-        stages of interest. For example, if the reconstruction stages of interest
-        are [1, 2, 3, 4, 5], then get_reco_hits method will select the hits data 
-        from the events that were reconstructed following these stages (i.e 
-        [1, 2, 3, 4, 5]).
-
-        Parameters
-        ----------
-        stages : list
-            list of reconstruction stages of interest. for example
-            [1, 2, 3, 4, 5].
-        keys : list of str
-            list of the hits class attributes.
-        mc : bool, optional
-            default is False to look for hits data in the hits tree in offline files
-            (not the mc_hits tree). mc=True to look for mc hits data in the mc hits
-            tree in offline files.
-
-        Returns
-        -------
-        dict
-            dictionary of lazyarrays containing data for each hits attribute requested.
-
-        Raises
-        ------
-        ValueError
-            ValueError raised when the reconstruction stages of interest
-            are not found in the file.
-        """
-        lazy_d = {}
-
-        if mc is False:
-            rec_stages = np.array(
-                [match for match in self._find_rec_stages(stages, mc=False)])
-            hits_data = self.hits
-
-        if mc is True:
-            rec_stages = np.array(
-                [match for match in self._find_rec_stages(stages, mc=True)])
-            hits_data = self.mc_hits
-
-        mask = rec_stages[:, 1] != None
-
-        if np.all(rec_stages[:, 1] == None):
-            raise ValueError(
-                "The stages {} are not found in your file.".format(
-                    str(stages)))
-        else:
-            for key in keys:
-                lazy_d[key] = getattr(hits_data, key)[mask]
-        return lazy_d
-
-    def get_reco_events(self, stages, keys, mc=False):
-        """construct a dictionary of events class data based on the reconstruction
-        stages of interest. For example, if the reconstruction stages of interest
-        are [1, 2, 3, 4, 5], then get_reco_events method will select the events data 
-        that were reconstructed following these stages (i.e [1, 2, 3, 4, 5]).
-
-        Parameters
-        ----------
-        stages : list
-            list of reconstruction stages of interest. for example
-            [1, 2, 3, 4, 5].
-        keys : list of str
-            list of the events class attributes.
-        mc : bool, optional
-            default is False to look for the reconstruction stages in the tracks tree
-            in offline files (not the mc tracks tree). mc=True to look for the reconstruction
-            data in the mc tracks tree in offline files.
-
-        Returns
-        -------
-        dict
-            dictionary of lazyarrays containing data for each events attribute requested.
-
-        Raises
-        ------
-        ValueError
-            ValueError raised when the reconstruction stages of interest
-            are not found in the file.
-        """
-        lazy_d = {}
-
-        if mc is False:
-            rec_stages = np.array(
-                [match for match in self._find_rec_stages(stages, mc=False)])
-
-        if mc is True:
-            rec_stages = np.array(
-                [match for match in self._find_rec_stages(stages, mc=True)])
-
-        mask = rec_stages[:, 1] != None
-
-        if np.all(rec_stages[:, 1] == None):
-            raise ValueError(
-                "The stages {} are not found in your file.".format(
-                    str(stages)))
-        else:
-            for key in keys:
-                lazy_d[key] = getattr(self.events, key)[mask]
-        return lazy_d
-
-    def get_reco_tracks(self, stages, keys, mc=False):
-        """construct a dictionary of tracks class data based on the reconstruction
-        stages of interest. For example, if the reconstruction stages of interest
-        are [1, 2, 3, 4, 5], then get_reco_tracks method will select tracks data 
-        from the events that were reconstructed following these stages (i.e 
-        [1, 2, 3, 4, 5]).
-
-        Parameters
-        ----------
-        stages : list
-            list of reconstruction stages of interest. for example
-            [1, 2, 3, 4, 5].
-        keys : list of str
-            list of the tracks class attributes.
-        mc : bool, optional
-            default is False to look for tracks data in the tracks tree in offline files
-            (not the mc tracks tree). mc=True to look for tracks data in the mc tracks
-            tree in offline files.
-
-        Returns
-        -------
-        dict
-            dictionary of lazyarrays containing data for each tracks attribute requested.
-
-        Raises
-        ------
-        ValueError
-            ValueError raised when the reconstruction stages of interest
-            are not found in the file.
-        """
-        lazy_d = {}
-
-        if mc is False:
-            rec_stages = np.array(
-                [match for match in self._find_rec_stages(stages, mc=False)])
-            tracks_data = self.tracks
-
-        if mc is True:
-            rec_stages = np.array(
-                [match for match in self._find_rec_stages(stages, mc=True)])
-            tracks_data = self.mc_tracks
-
-        mask = rec_stages[:, 1] != None
-
-        if np.all(rec_stages[:, 1] == None):
-            raise ValueError(
-                "The stages {} are not found in your file.".format(
-                    str(stages)))
-        else:
-            for key in keys:
-                lazy_d[key] = np.array([
-                    i[k] for i, k in zip(
-                        getattr(tracks_data, key)[mask], rec_stages[:,
-                                                                    1][mask])
-                ])
-
-        return lazy_d
-
-    def _find_rec_stages(self, stages, mc=False):
-        """find the index of reconstruction stages of interest in a
-        list of multiple reconstruction stages.
-
-        Parameters
-        ----------
-        stages : list
-            list of reconstruction stages of interest. for example
-            [1, 2, 3, 4, 5].
-        mc : bool, optional
-            default is False to look for reconstruction stages in the tracks tree in
-            offline files (not the mc tracks tree). mc=True to look for reconstruction
-            stages in the mc tracks tree in offline files.
-        Yields
-        ------
-        generator
-            the track id and the index of the reconstruction stages of
-            interest if found. If the reconstruction stages of interest
-            are not found, None is returned as the stages index.
-        """
-        if mc is False:
-            stages_data = self.events.tracks.rec_stages
-
-        if mc is True:
-            stages_data = self.events.mc_tracks.rec_stages
-
-        for trk_index, rec_stages in enumerate(stages_data):
-            try:
-                stages_index = rec_stages.index(stages)
-            except ValueError:
-                stages_index = None
-                yield trk_index, stages_index
-                continue
-
-            yield trk_index, stages_index
-
-    def _find_empty(self, array):
-        """finds empty lists/arrays in an awkward array
-
-        Parameters
-        ----------
-        array : awkward array
-            Awkward array of data of interest. For example:
-            km3io.OfflineReader(my_file).tracks.fitinf .
-
-        Yields
-        ------
-        generator
-            the empty list id and the index of the empty list. When
-            data structure (list) is simply empty, None is written in the
-            corresponding index. However, when data structure (list) is not
-            empty and does not contain an empty list, then False is written in the
-            corresponding index.
-        """
-        for i, rs in enumerate(array):
-            try:
-                if len(rs) == 0:
-                    j = None
-                if len(rs) != 0:
-                    j = rs.index([])
-            except ValueError:
-                j = False  # rs not empty but [] not found
-                yield i, j
-                continue
-            yield i, j
-
 
 class Usr:
     """Helper class to access AAObject `usr`` stuff"""
     def __init__(self, name, tree, index=None):
         # Here, we assume that every event has the same names in the same order
-        # to massively increase the performance. This needs triple check if it's
-        # always the case; the usr-format is simply a very bad design.
+        # to massively increase the performance. This needs triple check if
+        # it's always the case; the usr-format is simply a very bad design.
         self._name = name
         try:
             tree['usr']  # This will raise a KeyError in old aanet files
diff --git a/tests/test_offline.py b/tests/test_offline.py
index 3d31800..1d5229a 100644
--- a/tests/test_offline.py
+++ b/tests/test_offline.py
@@ -19,108 +19,6 @@ class TestOfflineReader(unittest.TestCase):
     def test_number_events(self):
         assert self.n_events == len(self.r.events)
 
-    def test_find_empty(self):
-        fitinf = self.nu.events.tracks.fitinf
-        rec_stages = self.nu.events.tracks.rec_stages
-
-        empty_fitinf = np.array(
-            [match for match in self.nu._find_empty(fitinf)])
-        empty_stages = np.array(
-            [match for match in self.nu._find_empty(rec_stages)])
-
-        self.assertListEqual(empty_fitinf[:5, 1].tolist(),
-                             [23, 14, 14, 4, None])
-        self.assertListEqual(empty_stages[:5, 1].tolist(),
-                             [False, False, False, False, None])
-
-    def test_find_rec_stages(self):
-        stages = np.array(
-            [match for match in self.nu._find_rec_stages([1, 2, 3, 4, 5])])
-
-        self.assertListEqual(stages[:5, 1].tolist(), [0, 0, 0, 0, None])
-
-    @unittest.skip
-    def test_get_reco_fit(self):
-        JGANDALF_BETA0_RAD = [
-            0.0020367251782607574, 0.003306725805622178, 0.0057877124222254885,
-            0.015581698352185896
-        ]
-        reco_fit = self.nu.get_reco_fit([1, 2, 3, 4, 5])['JGANDALF_BETA0_RAD']
-
-        self.assertListEqual(JGANDALF_BETA0_RAD, reco_fit[:4].tolist())
-        with self.assertRaises(ValueError):
-            self.nu.get_reco_fit([1000, 4512, 5625], mc=True)
-
-    @unittest.skip
-    def test_get_reco_hits(self):
-
-        doms = self.nu.get_reco_hits([1, 2, 3, 4, 5], ["dom_id"])["dom_id"]
-
-        mc_doms = self.nu.get_reco_hits([], ["dom_id"], mc=True)["dom_id"]
-
-        self.assertEqual(doms.size, 9)
-        self.assertEqual(mc_doms.size, 10)
-
-        self.assertListEqual(doms[0][0:4].tolist(),
-                             self.nu.hits[0].dom_id[0:4].tolist())
-        self.assertListEqual(mc_doms[0][0:4].tolist(),
-                             self.nu.mc_hits[0].dom_id[0:4].tolist())
-
-        with self.assertRaises(ValueError):
-            self.nu.get_reco_hits([1000, 4512, 5625], ["dom_id"])
-
-    @unittest.skip
-    def test_get_reco_tracks(self):
-
-        pos = self.nu.get_reco_tracks([1, 2, 3, 4, 5], ["pos_x"])["pos_x"]
-        mc_pos = self.nu.get_reco_tracks([], ["pos_x"], mc=True)["pos_x"]
-
-        self.assertEqual(pos.size, 9)
-        self.assertEqual(mc_pos.size, 10)
-
-        self.assertEqual(pos[0], self.nu.tracks[0].pos_x[0])
-        self.assertEqual(mc_pos[0], self.nu.mc_tracks[0].pos_x[0])
-
-        with self.assertRaises(ValueError):
-            self.nu.get_reco_tracks([1000, 4512, 5625], ["pos_x"])
-
-    @unittest.skip
-    def test_get_reco_events(self):
-
-        hits = self.nu.get_reco_events([1, 2, 3, 4, 5], ["hits"])["hits"]
-        mc_hits = self.nu.get_reco_events([], ["mc_hits"], mc=True)["mc_hits"]
-
-        self.assertEqual(hits.size, 9)
-        self.assertEqual(mc_hits.size, 10)
-
-        self.assertListEqual(hits[0:4].tolist(),
-                             self.nu.events.hits[0:4].tolist())
-        self.assertListEqual(mc_hits[0:4].tolist(),
-                             self.nu.events.mc_hits[0:4].tolist())
-
-        with self.assertRaises(ValueError):
-            self.nu.get_reco_events([1000, 4512, 5625], ["hits"])
-
-    @unittest.skip
-    def test_get_max_reco_stages(self):
-        rec_stages = self.nu.tracks.rec_stages
-        max_reco = self.nu._get_max_reco_stages(rec_stages)
-
-        self.assertEqual(len(max_reco.tolist()), 9)
-        self.assertListEqual(max_reco[0].tolist(), [[1, 2, 3, 4, 5], 5, 0])
-
-    @unittest.skip
-    def test_best_reco(self):
-        JGANDALF_BETA1_RAD = [
-            0.0014177681261476852, 0.002094094517471032, 0.003923368624980349,
-            0.009491461076780453
-        ]
-        best = self.nu.get_best_reco()
-
-        self.assertEqual(best.size, 9)
-        self.assertEqual(best['JGANDALF_BETA1_RAD'][:4].tolist(),
-                         JGANDALF_BETA1_RAD)
-
     def test_reading_header(self):
         # head is the supported format
         head = OFFLINE_NUMUCC.header
@@ -347,10 +245,6 @@ class TestUsr(unittest.TestCase):
     def test_str(self):
         print(self.f.events.usr)
 
-    def test_nonexistent_usr(self):
-        f = OfflineReader(SAMPLES_DIR / "daq_v1.0.0.root")
-        assert not hasattr(self.f, "usr")
-
     def test_keys(self):
         self.assertListEqual([
             'RecoQuality', 'RecoNDF', 'CoC', 'ToT', 'ChargeAbove',
-- 
GitLab