Skip to content
Snippets Groups Projects
Commit 976019b8 authored by Tamas Gal's avatar Tamas Gal :speech_balloon:
Browse files

Final implementation of the best track masking

parent 0b6b8d86
No related branches found
No related tags found
1 merge request!47Resolve "uproot4 integration"
Pipeline #16267 failed
...@@ -224,37 +224,38 @@ def best_track(tracks, startend=None, minmax=None, stages=None): ...@@ -224,37 +224,38 @@ def best_track(tracks, startend=None, minmax=None, stages=None):
Parameters Parameters
---------- ----------
tracks : km3io.offline.OfflineBranch tracks : awkward.Array
Array of tracks or jagged array of tracks (multiple events). A list of tracks or doubly nested tracks, usually from
OfflineReader.events.tracks or subarrays of that, containing recunstructed
tracks.
startend: tuple(int, int), optional startend: tuple(int, int), optional
The required first and last stage in tracks.rec_stages. The required first and last stage in tracks.rec_stages.
minmax: tuple(int, int), optional minmax: tuple(int, int), optional
The range (minimum and maximum) value of rec_stages to take into account. The range (minimum and maximum) value of rec_stages to take into account.
stages : list or set, optional stages : list or set, optional
- list: the order of the rec_stages is respected. - list: the order of the rec_stages is respected.
- set: a subset of required stages; the order is irrelevant. - set: a subset of required stages; the order is irrelevant.
Returns Returns
------- -------
km3io.offline.OfflineBranch awkward.Array or namedtuple
The best tracks based on the selection. Be aware that the dimensions are kept, which means that the final
track attributes are nested when multiple events are passed in.
If a single event (just a list of tracks) is provided, a named tuple
with a single track and flat attributes is created.
Raises Raises
------ ------
ValueError ValueError
- too many inputs specified. When invalid inputs are specified.
- no inputs are specified.
""" """
inputs = (stages, startend, minmax) inputs = (stages, startend, minmax)
if all(v is None for v in inputs): if sum(v is not None for v in inputs) != 1:
raise ValueError("either stages, startend or minmax must be specified.") raise ValueError("either stages, startend or minmax must be specified.")
if stages is not None and (startend is not None or minmax is not None): if stages is not None:
raise ValueError("Please specify either a range or a set of rec stages.")
if stages is not None and startend is None and minmax is None:
if isinstance(stages, list): if isinstance(stages, list):
m1 = mask(tracks.rec_stages, sequence=stages) m1 = mask(tracks.rec_stages, sequence=stages)
elif isinstance(stages, set): elif isinstance(stages, set):
...@@ -262,10 +263,10 @@ def best_track(tracks, startend=None, minmax=None, stages=None): ...@@ -262,10 +263,10 @@ def best_track(tracks, startend=None, minmax=None, stages=None):
else: else:
raise ValueError("stages must be a list or a set of integers") raise ValueError("stages must be a list or a set of integers")
if startend is not None and minmax is None and stages is None: if startend is not None:
m1 = mask(tracks.rec_stages, startend=startend) m1 = mask(tracks.rec_stages, startend=startend)
if minmax is not None and startend is None and stages is None: if minmax is not None:
m1 = mask(tracks.rec_stages, minmax=minmax) m1 = mask(tracks.rec_stages, minmax=minmax)
try: try:
...@@ -291,7 +292,7 @@ def best_track(tracks, startend=None, minmax=None, stages=None): ...@@ -291,7 +292,7 @@ def best_track(tracks, startend=None, minmax=None, stages=None):
def mask(arr, sequence=None, startend=None, minmax=None, atleast=None): def mask(arr, sequence=None, startend=None, minmax=None, atleast=None):
"""Return a boolean mask which check each nested sub-array for a condition. """Return a boolean mask which mask each nested sub-array for a condition.
Parameters Parameters
---------- ----------
...@@ -306,93 +307,119 @@ def mask(arr, sequence=None, startend=None, minmax=None, atleast=None): ...@@ -306,93 +307,119 @@ def mask(arr, sequence=None, startend=None, minmax=None, atleast=None):
order) order)
atleast : list(int), optional atleast : list(int), optional
True for entries where at least the provided elements are present. True for entries where at least the provided elements are present.
An extensive discussion about this implementation can be found at:
https://github.com/scikit-hep/awkward-1.0/issues/580
Many thanks for Jim for the fruitful discussion and the final implementation.
""" """
inputs = (sequence, startend, minmax, atleast) inputs = (sequence, startend, minmax, atleast)
if all(v is None for v in inputs): if sum(v is not None for v in inputs) != 1:
raise ValueError( raise ValueError(
"either sequence, startend, minmax or atleast must be specified." "either sequence, startend, minmax or atleast must be specified."
) )
builder = ak.ArrayBuilder() def recurse(layout):
# Numba has very limited recursion support, so this is hardcoded if layout.purelist_depth == 2:
if arr.ndim == 2: if startend is not None:
_mask2d(arr, builder, sequence, startend, minmax, atleast) np_array = _mask_startend(ak.Array(layout), *startend)
else: elif minmax is not None:
_mask3d(arr, builder, sequence, startend, minmax, atleast) np_array = _mask_minmax(ak.Array(layout), *minmax)
return builder.snapshot() elif sequence is not None:
np_array = _mask_sequence(ak.Array(layout), np.array(sequence))
elif atleast is not None:
np_array = _mask_atleast(ak.Array(layout), np.array(atleast))
return ak.layout.NumpyArray(np_array)
elif isinstance(
layout,
(
ak.layout.ListArray32,
ak.layout.ListArrayU32,
ak.layout.ListArray64,
),
):
if len(layout.stops) == 0:
content = recurse(layout.content)
else:
content = recurse(layout.content[: np.max(layout.stops)])
return type(layout)(layout.starts, layout.stops, content)
elif isinstance(
layout,
(
ak.layout.ListOffsetArray32,
ak.layout.ListOffsetArrayU32,
ak.layout.ListOffsetArray64,
),
):
content = recurse(layout.content[: layout.offsets[-1]])
return type(layout)(layout.offsets, content)
elif isinstance(layout, ak.layout.RegularArray):
content = recurse(layout.content)
return ak.layout.RegularArray(content, layout.size)
def mask_alt(arr, start, end): else:
nonempty = ak.num(arr, axis=-1) > 0 raise NotImplementedError(repr(arr))
mask = ((arr.mask[nonempty][..., 0] == start) & (arr.mask[nonempty][..., -1] == end))
return ak.fill_none(mask, False)
layout = ak.to_layout(arr, allow_record=True, allow_other=False)
@nb.njit return ak.Array(recurse(layout))
def _mask3d(arr, builder, sequence=None, startend=None, minmax=None, atleast=None):
for subarray in arr:
builder.begin_list()
_mask2d(subarray, builder, sequence, startend, minmax, atleast)
builder.end_list()
@nb.njit @nb.njit
def _mask_startend(arr, builder, start, end): def _mask_startend(arr, start, end):
for els in arr: out = np.empty(len(arr), np.bool_)
if len(els) > 0 and els[0] == start and els[-1] == end: for i, subarr in enumerate(arr):
builder.boolean(True) out[i] = len(subarr) > 0 and subarr[0] == start and subarr[-1] == end
else: return out
builder.boolean(False)
@nb.njit @nb.njit
def _mask_minmax(arr, builder, min, max): def _mask_minmax(arr, min, max):
for els in arr: out = np.empty(len(arr), np.bool_)
for el in els: for i, subarr in enumerate(arr):
if el < min or el > max: if len(subarr) == 0:
builder.boolean(False) out[i] = False
break
else: else:
builder.boolean(True) for el in subarr:
if el < min or el > max:
out[i] = False
break
else:
out[i] = True
return out
@nb.njit @nb.njit
def _mask_sequence(arr, builder, sequence): def _mask_sequence(arr, sequence):
out = np.empty(len(arr), np.bool_)
n = len(sequence) n = len(sequence)
for els in arr: for i, subarr in enumerate(arr):
if len(els) != n: if len(subarr) != n:
builder.boolean(False) out[i] = False
else: else:
for i in range(n): for j in range(n):
if els[i] != sequence[i]: if subarr[j] != sequence[j]:
builder.boolean(False) out[i] = False
break break
else: else:
builder.boolean(True) out[i] = True
return out
@nb.njit @nb.njit
def _mask_atleast(arr, builder, atleast): def _mask_atleast(arr, atleast):
for els in arr: out = np.empty(len(arr), np.bool_)
for e in atleast: for i, subarr in enumerate(arr):
if e not in els: for req_el in atleast:
builder.boolean(False) if req_el not in subarr:
out[i] = False
break break
else: else:
builder.boolean(True) out[i] = True
return out
@nb.njit
def _mask2d(arr, builder, sequence=None, startend=None, minmax=None, atleast=None):
if startend is not None:
_mask_startend(arr, builder, *startend)
elif minmax is not None:
_mask_minmax(arr, builder, *minmax)
elif sequence is not None:
_mask_sequence(arr, builder, sequence)
elif atleast is not None:
_mask_atleast(arr, builder, atleast)
def best_jmuon(tracks): def best_jmuon(tracks):
......
...@@ -429,7 +429,6 @@ class TestMask(unittest.TestCase): ...@@ -429,7 +429,6 @@ class TestMask(unittest.TestCase):
m = mask(arr, minmax=(1, 4)) m = mask(arr, minmax=(1, 4))
self.assertListEqual(m.tolist(), [[True, False, False], [True]]) self.assertListEqual(m.tolist(), [[True, False, False], [True]])
@unittest.skip
def test_minmax_4dim_mask(self): def test_minmax_4dim_mask(self):
arr = ak.Array( arr = ak.Array(
[[[[1, 2, 3, 4], [3, 4, 5], [1, 2, 5]], [[1, 2, 3]]], [[[1, 9], [3, 3]]]] [[[[1, 2, 3, 4], [3, 4, 5], [1, 2, 5]], [[1, 2, 3]]], [[[1, 9], [3, 3]]]]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment