Skip to content
Snippets Groups Projects

WIP: Slicing and refactoring offline

Closed Tamas Gal requested to merge 37-user-parameters-seem-to-be-transposed into master
Compare and Show latest version
3 files
+ 201
93
Compare changes
  • Side-by-side
  • Inline
Files
3
+ 138
56
@@ -8,6 +8,7 @@ from .definitions import mc_header
MAIN_TREE_NAME = "E"
# 110 MB based on the size of the largest basket found so far in km3net
BASKET_CACHE_SIZE = 110 * 1024**2
BASKET_CACHE = uproot.cache.ThreadSafeArrayCache(BASKET_CACHE_SIZE)
BranchMapper = namedtuple(
"BranchMapper",
@@ -20,7 +21,18 @@ def _nested_mapper(key):
EXCLUDE_KEYS = set(["AAObject", "t", "fBits", "fUniqueID"])
BRANCH_MAPS = [
EVENTS_MAP = BranchMapper("events", "Evt", {
't_sec': 't.fSec',
't_ns': 't.fNanoSec'
}, [], {
'n_hits': 'hits',
'n_mc_hits': 'mc_hits',
'n_tracks': 'trks',
'n_mc_tracks': 'mc_trks'
}, lambda a: a, True)
SUBBRANCH_MAPS = [
BranchMapper("tracks", "trks", {}, ['trks.usr_data', 'trks.usr'], {},
_nested_mapper, False),
BranchMapper("mc_tracks", "mc_trks", {},
@@ -56,11 +68,7 @@ class cached_property:
class OfflineReader:
"""reader for offline ROOT files"""
def __init__(self,
file_path=None,
fobj=None,
data=None,
index=slice(None)):
def __init__(self, file_path=None, fobj=None, data=None, index=None):
""" OfflineReader class is an offline ROOT file wrapper
Parameters
@@ -74,17 +82,18 @@ class OfflineReader:
if file_path is not None:
self._fobj = uproot.open(file_path)
self._tree = self._fobj[MAIN_TREE_NAME]
self._data = self._tree.lazyarrays(
basketcache=uproot.cache.ThreadSafeArrayCache(
BASKET_CACHE_SIZE))
self._data = self._tree.lazyarrays(basketcache=BASKET_CACHE)
else:
self._fobj = fobj
self._tree = self._fobj[MAIN_TREE_NAME]
self._data = data
for mapper in BRANCH_MAPS:
setattr(self, mapper.name,
Branch(self._tree, mapper=mapper, index=self._index))
@cached_property
def events(self):
return Branch(self._tree,
mapper=EVENTS_MAP,
index=self._index,
subbranchmaps=SUBBRANCH_MAPS)
@classmethod
def from_index(cls, source, index):
@@ -107,12 +116,11 @@ class OfflineReader:
def __len__(self):
tree = self._fobj[MAIN_TREE_NAME]
if self._index == slice(None):
if self._index is None:
return len(tree)
else:
return len(
tree.lazyarrays(basketcache=uproot.cache.ThreadSafeArrayCache(
BASKET_CACHE_SIZE))[self.index])
tree.lazyarrays(basketcache=BASKET_CACHE)[self.index])
@cached_property
def header(self):
@@ -418,10 +426,10 @@ class OfflineReader:
are not found, None is returned as the stages index.
"""
if mc is False:
stages_data = self.tracks.rec_stages
stages_data = self.events.tracks.rec_stages
if mc is True:
stages_data = self.mc_tracks.rec_stages
stages_data = self.events.mc_tracks.rec_stages
for trk_index, rec_stages in enumerate(stages_data):
try:
@@ -466,17 +474,17 @@ class OfflineReader:
class Usr:
"""Helper class to access AAObject usr stuff"""
def __init__(self, name, tree, index=slice(None)):
def __init__(self, name, tree, index=None):
# Here, we assume that every event has the same names in the same order
# to massively increase the performance. This needs triple check if it's
# always the case; the usr-format is simply a very bad design.
self._name = name
try:
tree['usr'] # This will raise a KeyError in old aanet files
# which has a different strucuter and key (usr_data)
# We do not support those...
# which has a different strucuter and key (usr_data)
# We do not support those...
self._usr_names = [
n.decode("utf-8") for n in tree['usr_names'].array()[0]
n.decode("utf-8") for n in tree['usr_names'].lazyarray()[0]
]
except (KeyError, IndexError): # e.g. old aanet files
self._usr_names = []
@@ -485,9 +493,10 @@ class Usr:
name: index
for index, name in enumerate(self._usr_names)
}
self._usr_data = tree['usr'].lazyarray(
basketcache=uproot.cache.ThreadSafeArrayCache(
BASKET_CACHE_SIZE))[index]
data = tree['usr'].lazyarray(basketcache=BASKET_CACHE)
if index is not None:
data = data[index]
self._usr_data = data
for name in self._usr_names:
setattr(self, name, self[name])
@@ -549,17 +558,40 @@ class Header:
class Branch:
"""Branch accessor class"""
def __init__(self, tree, mapper, index=slice(None)):
# @profile
def __init__(self,
tree,
mapper,
index=None,
subbranches=None,
subbranchmaps=None,
keymap=None):
self._tree = tree
self._mapper = mapper
self._index = index
self._keymap = None
self._branch = tree[mapper.key]
self._subbranches = []
self._initialise_keys()
if keymap is None:
self._initialise_keys() #
else:
self._keymap = keymap
if subbranches is not None:
self._subbranches = subbranches
if subbranchmaps is not None:
for mapper in subbranchmaps:
subbranch = Branch(self._tree,
mapper=mapper,
index=self._index)
self._subbranches.append(subbranch)
for subbranch in self._subbranches:
setattr(self, subbranch._mapper.name, subbranch)
# @profile
def _initialise_keys(self):
"""Create the keymap and instance attributes"""
"""Create the keymap and instance attributes for branch keys"""
keys = set(k.decode('utf-8') for k in self._branch.keys()) - set(
self._mapper.exclude) - EXCLUDE_KEYS
self._keymap = {
@@ -571,11 +603,8 @@ class Branch:
for k in self._mapper.update.values():
del self._keymap[k]
# self._EntryType = namedtuple(mapper.name[:-1], self.keys())
for key in self.keys():
# print("setting", self._mapper.name, key)
setattr(self, key, self[key])
for key in self._keymap.keys():
setattr(self, key, None)
def keys(self):
return self._keymap.keys()
@@ -584,35 +613,79 @@ class Branch:
def usr(self):
return Usr(self._mapper.name, self._branch, index=self._index)
def __getattribute__(self, attr):
if attr.startswith("_"): # let all private and magic methods pass
return object.__getattribute__(self, attr)
if attr in self._keymap.keys(): # intercept branch key lookups
item = self._keymap[attr]
out = self._branch[item].lazyarray(
basketcache=BASKET_CACHE)
if self._index is not None:
out = out[self._index]
return out
return object.__getattribute__(self, attr)
# @profile
def __getitem__(self, item):
"""Slicing magic a la numpy"""
if isinstance(item, slice):
return self.__class__(self._tree, self._mapper, index=item)
return self.__class__(self._tree,
self._mapper,
index=item,
subbranches=self._subbranches)
if isinstance(item, int):
# TODO refactor this
if self._mapper.flat:
return BranchElement(
self._mapper.name, {
key:
self._branch[self._keymap[key]].array()[self._index]
if self._index is None:
dct = {
key: self._branch[self._keymap[key]].lazyarray()
for key in self.keys()
})[item]
}
else:
dct = {
key: self._branch[self._keymap[key]].lazyarray()[
self._index]
for key in self.keys()
}
for subbranch in self._subbranches:
dct[subbranch._mapper.name] = subbranch
return BranchElement(self._mapper.name, dct)[item]
else:
return BranchElement(
self._mapper.name, {
key:
self._branch[self._keymap[key]].array()[self._index,
item]
if self._index is None:
dct = {
key: self._branch[self._keymap[key]].lazyarray()[item]
for key in self.keys()
}
else:
dct = {
key: self._branch[self._keymap[key]].lazyarray()[
self._index, item]
for key in self.keys()
})
}
for subbranch in self._subbranches:
dct[subbranch._mapper.name] = subbranch
return BranchElement(self._mapper.name, dct)
if isinstance(item, tuple):
return self[item[0]][item[1]]
out = self._branch[self._keymap[item]].lazyarray(
basketcache=uproot.cache.ThreadSafeArrayCache(BASKET_CACHE_SIZE))
if self._index != slice(None):
out[self._index]
if isinstance(item, str):
item = self._keymap[item]
out = self._branch[item].lazyarray(
basketcache=BASKET_CACHE)
if self._index is not None:
out = out[self._index]
return out
return self.__class__(self._tree,
self._mapper,
index=np.array(item),
subbranches=self._subbranches)
def __len__(self):
if self._index == slice(None):
if self._index is None:
return len(self._branch)
else:
return len(
@@ -639,21 +712,30 @@ class BranchElement:
index: slice
The slice mask to be applied to the sub-arrays
"""
def __init__(self, name, dct, index=slice(None)):
def __init__(self, name, dct, index=None, subbranches=[]):
self._dct = dct
self._name = name
self._index = index
self.ItemConstructor = namedtuple(self._name[:-1], dct.keys())
for key, values in dct.items():
setattr(self, key, values[index])
if index is None:
for key, values in dct.items():
setattr(self, key, values)
else:
for key, values in dct.items():
setattr(self, key, values[index])
def __getitem__(self, item):
if isinstance(item, slice):
return self.__class__(self._name, self._dct, index=item)
if isinstance(item, int):
return self.ItemConstructor(
**{k: v[self._index][item]
for k, v in self._dct.items()})
if self._index is None:
return self.ItemConstructor(
**{k: v[item]
for k, v in self._dct.items()})
else:
return self.ItemConstructor(
**{k: v[self._index][item]
for k, v in self._dct.items()})
def __repr__(self):
return "<{}[{}]>".format(self.__class__.__name__, self._name)
Loading