Skip to content
Snippets Groups Projects

outsource reconstruction data in separate arrays

Merged Zineb Aly requested to merge add-fitinf into master
1 file
+ 151
14
Compare changes
  • Side-by-side
  • Inline
+ 151
14
@@ -179,8 +179,7 @@ class OfflineKeys:
'JSTART_NPE_MIP', 'JSTART_NPE_MIP_TOTAL',
'JSTART_LENGTH_METRES', 'JVETO_NPE', 'JVETO_NUMBER_OF_HITS',
'JENERGY_MUON_RANGE_METRES', 'JENERGY_NOISE_LIKELIHOOD',
'JENERGY_NDF', 'JENERGY_NUMBER_OF_HITS', 'JCOPY_Z_M'
]
'JENERGY_NDF', 'JENERGY_NUMBER_OF_HITS'] # 'JCOPY_Z_M' not anymore in Jpp
return self._fit_keys
@property
@@ -320,6 +319,7 @@ class OfflineReader:
self._mc_hits = None
self._mc_tracks = None
self._keys = None
self._best_reco = None
def __getitem__(self, item):
return OfflineReader(file_path=self._file_path, data=self._data[item])
@@ -416,7 +416,156 @@ class OfflineReader:
[self._data[key] for key in self.keys.mc_tracks_keys])
return self._mc_tracks
@property
def best_reco(self):
"""returns the best reconstructed track fit data. The best fit is defined
as the track fit with the maximum reconstruction stages. When "nan" is
returned, it means that the reconstruction parameter of interest is not
found. for example, in the case of muon simulations: if [1, 2] are the
reconstruction stages, then only the fit parameters corresponding to the
stages [1, 2] are found in the Offline files, the remaining fit parameters
corresponding to the stages 3, 4, 5 are all filled with nan.
Returns
-------
numpy recarray
a recarray of the best track fit data (reconstruction data).
"""
if self._best_reco is None:
keys = ", ".join(self.keys.fit_keys)
empty_fit_info = np.array([match for match in
self._find_empty(self.tracks.fitinf)])
fit_info = [i for i,j in zip(self.tracks.fitinf,
empty_fit_info[:,1]) if j is not None]
stages = self._get_max_reco_stages(self.tracks.rec_stages)
fit_data = np.array([i[j] for i,j in zip(fit_info, stages[:,2])])
rows_size = len(max(fit_data, key=len))
equal_size_data = np.vstack([np.hstack([i, np.zeros(rows_size-len(i))
+ np.nan]) for i in fit_data])
self._best_reco = np.core.records.fromarrays(equal_size_data.transpose(),
names=keys)
return self._best_reco
def _get_max_reco_stages(self, reco_stages):
"""find the longest reconstructed track based on the maximum size of
reconstructed stages.
Parameters
----------
reco_stages : chunked array
chunked array of all the reconstruction stages of all tracks.
In km3io, it is accessed with
km3io.OfflineReader(my_file).tracks.rec_stages .
Returns
-------
numpy array
array with 3 columns: *list of the maximum reco_stages
*lentgh of the maximum reco_stages
*position of the maximum reco_stages
"""
empty_reco_stages = np.array([match for match in
self._find_empty(reco_stages)])
max_reco_stages = np.array([[max(i, key=len), len(max(i, key=len)),
i.index(max(i, key=len))] for i,j in
zip(reco_stages, empty_reco_stages[:,1])
if j is not None])
return max_reco_stages
def get_reco_fit(self, stages):
"""construct a numpy recarray of the fit information (reconstruction
data) of the tracks reconstructed following the reconstruction stages
of interest.
Parameters
----------
stages : list
list of reconstruction stages of interest. for example
[1, 2, 3, 4, 5].
Returns
-------
numpy recarray
a recarray of the fit information (reconstruction data) of
the tracks of interest.
Raises
------
ValueError
ValueError raised when the reconstruction stages of interest
are not found in the file.
"""
keys = ", ".join(self.keys.fit_keys)
fit_info = self.tracks.fitinf
rec_stages = np.array([match for match in
self._find_rec_stages(stages)])
if np.all(rec_stages[:,1]==None):
raise ValueError("The stages {} are not found in your file."
.format(str(stages)))
else:
fit_data = np.array([i[k] for i,j,k in zip(fit_info,
rec_stages[:,0], rec_stages[:,1])
if k is not None])
rec_array = np.core.records.fromarrays(fit_data.transpose(),
names=keys)
return rec_array
def _find_rec_stages(self, stages):
"""find the index of reconstruction stages of interest in a
list of multiple reconstruction stages.
Parameters
----------
stages : list
list of reconstruction stages of interest. for example
[1, 2, 3, 4, 5].
Yields
------
generator
the track id and the index of the reconstruction stages of
interest if found. If the reconstruction stages of interest
are not found, None is returned as the stages index.
"""
for trk_index, rec_stages in enumerate(self.tracks.rec_stages):
try:
stages_index = rec_stages.index(stages)
except ValueError:
stages_index = None
yield trk_index, stages_index
continue
yield trk_index, stages_index
def _find_empty(self, chunk_arr):
"""finds empty lists/arrays in a chunked-jagged array of data
of interest.
Parameters
----------
chunk_arr : Chunked array
Chunked array or jagged array of data of interest.
Yields
------
generator
the empty list id and the index of the empty list. When
data structure (list) is simply empty, None is written in the
corresponding index. However, when data structure (list) is not
empty and does not contain an empty list, then False is written in the
corresponding index.
"""
for i, rs in enumerate(chunk_arr):
try:
if len(rs)==0:
j = None
if len(rs)!=0:
j = rs.index([])
except ValueError:
j = False # rs not empty but [] not found
yield i, j
continue
yield i, j
class OfflineEvents:
@@ -567,7 +716,6 @@ class OfflineTracks:
"""
self._keys = keys
self._values = values
self._reco = None
if fit_keys is not None:
self._fit_keys = fit_keys
for k, v in zip(self._keys, self._values):
@@ -597,17 +745,6 @@ class OfflineTracks:
return "<{}: {} parsed elements>".format(self.__class__.__name__,
len(self))
@property
def reco(self):
if self._reco is None:
# the last key is not found in files
keys = ", ".join(self._fit_keys[:-1])
# i[0] is the reco data of the track with the highest likelihood
# 18 is always the position of fit info
fit_data = np.array([i[0] for i in self._values[18] if len(i)!=0])
self._reco = np.core.records.fromarrays(fit_data.transpose(), names=keys)
return self._reco
class OfflineTrack:
"""wrapper for an offline track"""
Loading