From 2762369cc4f2f5664a18218c284947640c80a456 Mon Sep 17 00:00:00 2001 From: Tamas Gal <tgal@km3net.de> Date: Mon, 9 Mar 2020 11:38:10 +0100 Subject: [PATCH] Further cleanup --- km3io/offline.py | 344 +----------------------------------------- tests/test_offline.py | 106 ------------- 2 files changed, 4 insertions(+), 446 deletions(-) diff --git a/km3io/offline.py b/km3io/offline.py index 6d242cb..4ef4470 100644 --- a/km3io/offline.py +++ b/km3io/offline.py @@ -103,12 +103,14 @@ class OfflineReader: @cached_property def events(self): + """The `E` branch, containing all offline events.""" return Branch(self._tree, mapper=EVENTS_MAP, subbranchmaps=SUBBRANCH_MAPS) @cached_property def header(self): + """The file header""" if 'Head' in self._fobj: header = {} for n, x in self._fobj['Head']._map_3c_string_2c_string_3e_.items( @@ -118,351 +120,13 @@ class OfflineReader: else: warnings.warn("Your file header has an unsupported format") - def get_best_reco(self): - """returns the best reconstructed track fit data. The best fit is defined - as the track fit with the maximum reconstruction stages. When "nan" is - returned, it means that the reconstruction parameter of interest is not - found. for example, in the case of muon simulations: if [1, 2] are the - reconstruction stages, then only the fit parameters corresponding to the - stages [1, 2] are found in the Offline files, the remaining fit parameters - corresponding to the stages 3, 4, 5 are all filled with nan. - - Returns - ------- - numpy recarray - a recarray of the best track fit data (reconstruction data). - """ - keys = ", ".join(self.keys.fit_keys[:-1]) - empty_fit_info = np.array( - [match for match in self._find_empty(self.tracks.fitinf)]) - fit_info = [ - i for i, j in zip(self.tracks.fitinf, empty_fit_info[:, 1]) - if j is not None - ] - stages = self._get_max_reco_stages(self.tracks.rec_stages) - fit_data = np.array([i[j] for i, j in zip(fit_info, stages[:, 2])]) - rows_size = len(max(fit_data, key=len)) - equal_size_data = np.vstack([ - np.hstack([i, np.zeros(rows_size - len(i)) + np.nan]) - for i in fit_data - ]) - return np.core.records.fromarrays(equal_size_data.transpose(), - names=keys) - - def _get_max_reco_stages(self, reco_stages): - """find the longest reconstructed track based on the maximum size of - reconstructed stages. - - Parameters - ---------- - reco_stages : chunked array - chunked array of all the reconstruction stages of all tracks. - In km3io, it is accessed with - km3io.OfflineReader(my_file).tracks.rec_stages . - - Returns - ------- - numpy array - array with 3 columns: *list of the maximum reco_stages - *lentgh of the maximum reco_stages - *position of the maximum reco_stages - """ - empty_reco_stages = np.array( - [match for match in self._find_empty(reco_stages)]) - max_reco_stages = np.array( - [[max(i, key=len), - len(max(i, key=len)), - i.index(max(i, key=len))] - for i, j in zip(reco_stages, empty_reco_stages[:, 1]) - if j is not None]) - return max_reco_stages - - def get_reco_fit(self, stages, mc=False): - """construct a numpy recarray of the fit information (reconstruction - data) of the tracks reconstructed following the reconstruction stages - of interest. - - Parameters - ---------- - stages : list - list of reconstruction stages of interest. for example - [1, 2, 3, 4, 5]. - mc : bool, optional - default is False to look for fit data in the tracks tree in offline files - (not the mc tracks tree). mc=True to look for fit data from the mc tracks - tree in offline files. - - Returns - ------- - numpy recarray - a recarray of the fit information (reconstruction data) of - the tracks of interest. - - Raises - ------ - ValueError - ValueError raised when the reconstruction stages of interest - are not found in the file. - """ - keys = ", ".join(self.keys.fit_keys[:-1]) - - if mc is False: - rec_stages = np.array( - [match for match in self._find_rec_stages(stages, mc=False)]) - fitinf = self.tracks.fitinf - - if mc is True: - rec_stages = np.array( - [match for match in self._find_rec_stages(stages, mc=True)]) - fitinf = self.mc_tracks.fitinf - - mask = rec_stages[:, 1] != None - - if np.all(rec_stages[:, 1] == None): - raise ValueError( - "The stages {} are not found in your file.".format( - str(stages))) - else: - fit_data = np.array( - [i[k] for i, k in zip(fitinf[mask], rec_stages[:, 1][mask])]) - rec_array = np.core.records.fromarrays(fit_data.transpose(), - names=keys) - return rec_array - - def get_reco_hits(self, stages, keys, mc=False): - """construct a dictionary of hits class data based on the reconstruction - stages of interest. For example, if the reconstruction stages of interest - are [1, 2, 3, 4, 5], then get_reco_hits method will select the hits data - from the events that were reconstructed following these stages (i.e - [1, 2, 3, 4, 5]). - - Parameters - ---------- - stages : list - list of reconstruction stages of interest. for example - [1, 2, 3, 4, 5]. - keys : list of str - list of the hits class attributes. - mc : bool, optional - default is False to look for hits data in the hits tree in offline files - (not the mc_hits tree). mc=True to look for mc hits data in the mc hits - tree in offline files. - - Returns - ------- - dict - dictionary of lazyarrays containing data for each hits attribute requested. - - Raises - ------ - ValueError - ValueError raised when the reconstruction stages of interest - are not found in the file. - """ - lazy_d = {} - - if mc is False: - rec_stages = np.array( - [match for match in self._find_rec_stages(stages, mc=False)]) - hits_data = self.hits - - if mc is True: - rec_stages = np.array( - [match for match in self._find_rec_stages(stages, mc=True)]) - hits_data = self.mc_hits - - mask = rec_stages[:, 1] != None - - if np.all(rec_stages[:, 1] == None): - raise ValueError( - "The stages {} are not found in your file.".format( - str(stages))) - else: - for key in keys: - lazy_d[key] = getattr(hits_data, key)[mask] - return lazy_d - - def get_reco_events(self, stages, keys, mc=False): - """construct a dictionary of events class data based on the reconstruction - stages of interest. For example, if the reconstruction stages of interest - are [1, 2, 3, 4, 5], then get_reco_events method will select the events data - that were reconstructed following these stages (i.e [1, 2, 3, 4, 5]). - - Parameters - ---------- - stages : list - list of reconstruction stages of interest. for example - [1, 2, 3, 4, 5]. - keys : list of str - list of the events class attributes. - mc : bool, optional - default is False to look for the reconstruction stages in the tracks tree - in offline files (not the mc tracks tree). mc=True to look for the reconstruction - data in the mc tracks tree in offline files. - - Returns - ------- - dict - dictionary of lazyarrays containing data for each events attribute requested. - - Raises - ------ - ValueError - ValueError raised when the reconstruction stages of interest - are not found in the file. - """ - lazy_d = {} - - if mc is False: - rec_stages = np.array( - [match for match in self._find_rec_stages(stages, mc=False)]) - - if mc is True: - rec_stages = np.array( - [match for match in self._find_rec_stages(stages, mc=True)]) - - mask = rec_stages[:, 1] != None - - if np.all(rec_stages[:, 1] == None): - raise ValueError( - "The stages {} are not found in your file.".format( - str(stages))) - else: - for key in keys: - lazy_d[key] = getattr(self.events, key)[mask] - return lazy_d - - def get_reco_tracks(self, stages, keys, mc=False): - """construct a dictionary of tracks class data based on the reconstruction - stages of interest. For example, if the reconstruction stages of interest - are [1, 2, 3, 4, 5], then get_reco_tracks method will select tracks data - from the events that were reconstructed following these stages (i.e - [1, 2, 3, 4, 5]). - - Parameters - ---------- - stages : list - list of reconstruction stages of interest. for example - [1, 2, 3, 4, 5]. - keys : list of str - list of the tracks class attributes. - mc : bool, optional - default is False to look for tracks data in the tracks tree in offline files - (not the mc tracks tree). mc=True to look for tracks data in the mc tracks - tree in offline files. - - Returns - ------- - dict - dictionary of lazyarrays containing data for each tracks attribute requested. - - Raises - ------ - ValueError - ValueError raised when the reconstruction stages of interest - are not found in the file. - """ - lazy_d = {} - - if mc is False: - rec_stages = np.array( - [match for match in self._find_rec_stages(stages, mc=False)]) - tracks_data = self.tracks - - if mc is True: - rec_stages = np.array( - [match for match in self._find_rec_stages(stages, mc=True)]) - tracks_data = self.mc_tracks - - mask = rec_stages[:, 1] != None - - if np.all(rec_stages[:, 1] == None): - raise ValueError( - "The stages {} are not found in your file.".format( - str(stages))) - else: - for key in keys: - lazy_d[key] = np.array([ - i[k] for i, k in zip( - getattr(tracks_data, key)[mask], rec_stages[:, - 1][mask]) - ]) - - return lazy_d - - def _find_rec_stages(self, stages, mc=False): - """find the index of reconstruction stages of interest in a - list of multiple reconstruction stages. - - Parameters - ---------- - stages : list - list of reconstruction stages of interest. for example - [1, 2, 3, 4, 5]. - mc : bool, optional - default is False to look for reconstruction stages in the tracks tree in - offline files (not the mc tracks tree). mc=True to look for reconstruction - stages in the mc tracks tree in offline files. - Yields - ------ - generator - the track id and the index of the reconstruction stages of - interest if found. If the reconstruction stages of interest - are not found, None is returned as the stages index. - """ - if mc is False: - stages_data = self.events.tracks.rec_stages - - if mc is True: - stages_data = self.events.mc_tracks.rec_stages - - for trk_index, rec_stages in enumerate(stages_data): - try: - stages_index = rec_stages.index(stages) - except ValueError: - stages_index = None - yield trk_index, stages_index - continue - - yield trk_index, stages_index - - def _find_empty(self, array): - """finds empty lists/arrays in an awkward array - - Parameters - ---------- - array : awkward array - Awkward array of data of interest. For example: - km3io.OfflineReader(my_file).tracks.fitinf . - - Yields - ------ - generator - the empty list id and the index of the empty list. When - data structure (list) is simply empty, None is written in the - corresponding index. However, when data structure (list) is not - empty and does not contain an empty list, then False is written in the - corresponding index. - """ - for i, rs in enumerate(array): - try: - if len(rs) == 0: - j = None - if len(rs) != 0: - j = rs.index([]) - except ValueError: - j = False # rs not empty but [] not found - yield i, j - continue - yield i, j - class Usr: """Helper class to access AAObject `usr`` stuff""" def __init__(self, name, tree, index=None): # Here, we assume that every event has the same names in the same order - # to massively increase the performance. This needs triple check if it's - # always the case; the usr-format is simply a very bad design. + # to massively increase the performance. This needs triple check if + # it's always the case; the usr-format is simply a very bad design. self._name = name try: tree['usr'] # This will raise a KeyError in old aanet files diff --git a/tests/test_offline.py b/tests/test_offline.py index 3d31800..1d5229a 100644 --- a/tests/test_offline.py +++ b/tests/test_offline.py @@ -19,108 +19,6 @@ class TestOfflineReader(unittest.TestCase): def test_number_events(self): assert self.n_events == len(self.r.events) - def test_find_empty(self): - fitinf = self.nu.events.tracks.fitinf - rec_stages = self.nu.events.tracks.rec_stages - - empty_fitinf = np.array( - [match for match in self.nu._find_empty(fitinf)]) - empty_stages = np.array( - [match for match in self.nu._find_empty(rec_stages)]) - - self.assertListEqual(empty_fitinf[:5, 1].tolist(), - [23, 14, 14, 4, None]) - self.assertListEqual(empty_stages[:5, 1].tolist(), - [False, False, False, False, None]) - - def test_find_rec_stages(self): - stages = np.array( - [match for match in self.nu._find_rec_stages([1, 2, 3, 4, 5])]) - - self.assertListEqual(stages[:5, 1].tolist(), [0, 0, 0, 0, None]) - - @unittest.skip - def test_get_reco_fit(self): - JGANDALF_BETA0_RAD = [ - 0.0020367251782607574, 0.003306725805622178, 0.0057877124222254885, - 0.015581698352185896 - ] - reco_fit = self.nu.get_reco_fit([1, 2, 3, 4, 5])['JGANDALF_BETA0_RAD'] - - self.assertListEqual(JGANDALF_BETA0_RAD, reco_fit[:4].tolist()) - with self.assertRaises(ValueError): - self.nu.get_reco_fit([1000, 4512, 5625], mc=True) - - @unittest.skip - def test_get_reco_hits(self): - - doms = self.nu.get_reco_hits([1, 2, 3, 4, 5], ["dom_id"])["dom_id"] - - mc_doms = self.nu.get_reco_hits([], ["dom_id"], mc=True)["dom_id"] - - self.assertEqual(doms.size, 9) - self.assertEqual(mc_doms.size, 10) - - self.assertListEqual(doms[0][0:4].tolist(), - self.nu.hits[0].dom_id[0:4].tolist()) - self.assertListEqual(mc_doms[0][0:4].tolist(), - self.nu.mc_hits[0].dom_id[0:4].tolist()) - - with self.assertRaises(ValueError): - self.nu.get_reco_hits([1000, 4512, 5625], ["dom_id"]) - - @unittest.skip - def test_get_reco_tracks(self): - - pos = self.nu.get_reco_tracks([1, 2, 3, 4, 5], ["pos_x"])["pos_x"] - mc_pos = self.nu.get_reco_tracks([], ["pos_x"], mc=True)["pos_x"] - - self.assertEqual(pos.size, 9) - self.assertEqual(mc_pos.size, 10) - - self.assertEqual(pos[0], self.nu.tracks[0].pos_x[0]) - self.assertEqual(mc_pos[0], self.nu.mc_tracks[0].pos_x[0]) - - with self.assertRaises(ValueError): - self.nu.get_reco_tracks([1000, 4512, 5625], ["pos_x"]) - - @unittest.skip - def test_get_reco_events(self): - - hits = self.nu.get_reco_events([1, 2, 3, 4, 5], ["hits"])["hits"] - mc_hits = self.nu.get_reco_events([], ["mc_hits"], mc=True)["mc_hits"] - - self.assertEqual(hits.size, 9) - self.assertEqual(mc_hits.size, 10) - - self.assertListEqual(hits[0:4].tolist(), - self.nu.events.hits[0:4].tolist()) - self.assertListEqual(mc_hits[0:4].tolist(), - self.nu.events.mc_hits[0:4].tolist()) - - with self.assertRaises(ValueError): - self.nu.get_reco_events([1000, 4512, 5625], ["hits"]) - - @unittest.skip - def test_get_max_reco_stages(self): - rec_stages = self.nu.tracks.rec_stages - max_reco = self.nu._get_max_reco_stages(rec_stages) - - self.assertEqual(len(max_reco.tolist()), 9) - self.assertListEqual(max_reco[0].tolist(), [[1, 2, 3, 4, 5], 5, 0]) - - @unittest.skip - def test_best_reco(self): - JGANDALF_BETA1_RAD = [ - 0.0014177681261476852, 0.002094094517471032, 0.003923368624980349, - 0.009491461076780453 - ] - best = self.nu.get_best_reco() - - self.assertEqual(best.size, 9) - self.assertEqual(best['JGANDALF_BETA1_RAD'][:4].tolist(), - JGANDALF_BETA1_RAD) - def test_reading_header(self): # head is the supported format head = OFFLINE_NUMUCC.header @@ -347,10 +245,6 @@ class TestUsr(unittest.TestCase): def test_str(self): print(self.f.events.usr) - def test_nonexistent_usr(self): - f = OfflineReader(SAMPLES_DIR / "daq_v1.0.0.root") - assert not hasattr(self.f, "usr") - def test_keys(self): self.assertListEqual([ 'RecoQuality', 'RecoNDF', 'CoC', 'ToT', 'ChargeAbove', -- GitLab