Skip to content
Snippets Groups Projects

Resolve "uproot4 integration"

Merged Tamas Gal requested to merge 58-uproot4-integration-2 into master
Compare and Show latest version
3 files
+ 66
106
Compare changes
  • Side-by-side
  • Inline
Files
3
+ 42
24
@@ -28,25 +28,26 @@ class EventReader:
step_size=2000,
keys=None,
aliases=None,
nested_branches=None,
event_ctor=None,
):
"""EventReader base class
Parameters
----------
f: str or uproot4.reading.ReadOnlyDirectory (from uproot4.open)
f : str or uproot4.reading.ReadOnlyDirectory (from uproot4.open)
Path to the file of interest or uproot4 filedescriptor.
step_size: int, optional
step_size : int, optional
Number of events to read into the cache when iterating.
Choosing higher numbers may improve the speed but also increases
the memory overhead.
index_chain: list, optional
index_chain : list, optional
Keeps track of index chaining.
keys: list or set, optional
keys : list or set, optional
Branch keys.
aliases: dict, optional
aliases : dict, optional
Branch key aliases.
event_ctor: class or namedtuple, optional
event_ctor : class or namedtuple, optional
Event constructor.
"""
@@ -65,19 +66,10 @@ class EventReader:
self._event_ctor = event_ctor
self._index_chain = [] if index_chain is None else index_chain
# if aliases is not None:
# self.aliases = aliases
# else:
# # Check for usr-awesomeness backward compatibility crap
# if "E/Evt/AAObject/usr" in self._fobj:
# print("Found usr data")
# if ak.count(f["E/Evt/AAObject/usr"].array()) > 0:
# self.aliases.update(
# {
# "usr": "AAObject/usr",
# "usr_names": "AAObject/usr_names",
# }
# )
if aliases is not None:
self.aliases = aliases
if nested_branches is not None:
self.nested_branches = nested_branches
if self._keys is None:
self._initialise_keys()
@@ -155,6 +147,7 @@ class EventReader:
index_chain=self._index_chain + [key],
step_size=self._step_size,
aliases=self.aliases,
nested_branches=self.nested_branches,
keys=self.keys(),
event_ctor=self._event_ctor,
)
@@ -176,16 +169,38 @@ class EventReader:
if from_field in branch[key].keys():
fields.append(to_field)
log.debug(fields)
# out = branch[key].arrays(fields, aliases=self.nested_branches[key])
return Branch(branch[key], fields, self.nested_branches[key], self._index_chain)
else:
return unfold_indices(branch[self.aliases.get(key, key)].array(), self._index_chain)
def __iter__(self):
self._events = self._event_generator()
def __iter__(self, chunkwise=False):
self._events = self._event_generator(chunkwise=chunkwise)
return self
def _event_generator(self):
def _get_iterator_limits(self):
"""Determines start and stop, used for event iteration"""
if len(self._index_chain) > 1:
raise NotImplementedError("iteration is currently not supported with nested slices")
if self._index_chain:
s = self._index_chain[0]
if not isinstance(s, slice):
raise NotImplementedError("iteration is only supported with slices")
if s.step is None or s.step == 1:
start = s.start
stop = s.stop
else:
raise NotImplementedError("iteration is only supported with single steps")
else:
start = None
stop = None
return start, stop
def _event_generator(self, chunkwise=False):
start, stop = self._get_iterator_limits()
if chunkwise:
raise NotImplementedError("iterating over chunks is not implemented yet")
events = self._fobj[self.event_path]
group_count_keys = set(
k for k in self.keys() if k.startswith("n_")
@@ -203,7 +218,7 @@ class EventReader:
log.debug("keys: %s", keys)
log.debug("aliases: %s", self.aliases)
events_it = events.iterate(
keys, aliases=self.aliases, step_size=self._step_size
keys, aliases=self.aliases, step_size=self._step_size, entry_start=start, entry_stop=stop
)
nested = []
nested_keys = (
@@ -216,6 +231,8 @@ class EventReader:
self.nested_branches[key].keys(),
aliases=self.nested_branches[key],
step_size=self._step_size,
entry_start=start,
entry_stop=stop
)
)
group_counts = {}
@@ -244,6 +261,7 @@ class EventReader:
return self._fobj[self.event_path].num_entries
elif isinstance(self._index_chain[-1], (int, np.int32, np.int64)):
if len(self._index_chain) == 1:
# TODO: not sure why this is needed at all, it's too late...
return 1
# try:
# return len(self[:])
Loading