Skip to content
Snippets Groups Projects
Commit 27ca8221 authored by Stefan Reck's avatar Stefan Reck
Browse files

Minor changes.

parent e761c769
No related branches found
No related tags found
No related merge requests found
......@@ -126,7 +126,7 @@ def parse_input():
def shuffle_h5(filepath_input, tool=False, seed=42, delete=False, chunksize=None,
complib=None, complevel=None, legacy_mode=False, shuffle=True,
event_skipper=None):
event_skipper=None, filepath_output=None):
"""
Shuffles a .h5 file where each dataset needs to have the same number of rows (axis_0).
The shuffled data is saved to a new .h5 file with the suffix < _shuffled.h5 >.
......@@ -166,6 +166,9 @@ def shuffle_h5(filepath_input, tool=False, seed=42, delete=False, chunksize=None
event_skipper : func, optional
Function that takes the blob as an input, and returns a bool.
If the bool is true, the blob will be skipped.
filepath_output : str, optional
If given, this will be the name of the output file. Otherwise, a name
is auto generated.
Returns
-------
......@@ -185,13 +188,14 @@ def shuffle_h5(filepath_input, tool=False, seed=42, delete=False, chunksize=None
if complib == 'lzf':
complevel = None
filepath_input_without_ext = os.path.splitext(filepath_input)[0]
fname_adtn = ''
if shuffle:
fname_adtn += '_shuffled'
if event_skipper is not None:
fname_adtn += '_reb'
filepath_output = filepath_input_without_ext + fname_adtn + ".h5"
if filepath_output is None:
filepath_input_without_ext = os.path.splitext(filepath_input)[0]
fname_adtn = ''
if shuffle:
fname_adtn += '_shuffled'
if event_skipper is not None:
fname_adtn += '_reb'
filepath_output = filepath_input_without_ext + fname_adtn + ".h5"
if not legacy_mode:
# set random km3pipe (=numpy) seed
......
......@@ -26,7 +26,7 @@ class FileBinner:
bin_edges_list : List
List with the names of the fields to bin, and the respective bin edges,
including the left- and right-most bin edge.
Example:
Example: For 10 bins in the z direction, and 100 bins in time:
bin_edges_list = [
["pos_z", np.linspace(0, 10, 11)],
["time", np.linspace(-50, 550, 101)],
......@@ -44,6 +44,10 @@ class FileBinner:
It shows the distribution of hits, the bin edges, and how many hits
were cut off for each field name in bin_edges_list.
It will be saved to the same path as the outfile in run.
keep_event_info : bool
If True, will keep the "event_info" table.
keep_mc_tracks : bool
If True, will keep the "McTracks" table.
n_statusbar : int, optional
Print a statusbar every n blobs.
n_memory_observer : int, optional
......@@ -70,6 +74,7 @@ class FileBinner:
"""
def __init__(self, bin_edges_list, mc_info_extr=None,
event_skipper=None, add_bin_stats=True):
self.bin_edges_list = bin_edges_list
self.mc_info_extr = mc_info_extr
self.event_skipper = event_skipper
......@@ -79,6 +84,9 @@ class FileBinner:
else:
self.bin_plot_freq = None
self.keep_event_info = True
self.keep_mc_tracks = False
self.n_statusbar = 1000
self.n_memory_observer = 1000
self.do_time_preproc = True
......@@ -192,7 +200,12 @@ class FileBinner:
mc_info_extr=mc_info_extr,
store_as="mc_info")
pipe.attach(km.common.Keep, keys=['histogram', 'mc_info'])
keys_keep = ['histogram', 'mc_info']
if self.keep_event_info:
keys_keep.append('EventInfo')
if self.keep_mc_tracks:
keys_keep.append('McTracks')
pipe.attach(km.common.Keep, keys=keys_keep)
pipe.attach(kp.io.HDF5Sink,
filename=outfile,
......
......@@ -59,50 +59,76 @@ def get_mupage_mc(blob):
The info for mc_info.
"""
event_id = blob['EventInfo'].event_id[0]
run_id = blob["EventInfo"].run_id
# only one line has hits, but there are two for the mc. This one is active:
active_du = 2
track = dict()
track["event_id"] = blob['EventInfo'].event_id[0]
track["run_id"] = blob["EventInfo"].run_id
# run_id = blob['Header'].start_run.run_id.astype('float32')
# take 0: assumed that this is the same for all muons in a bundle
particle_type = blob['McTracks'][0].type
track["particle_type"] = blob['McTracks'][0].type
# always 1 actually
is_cc = blob['McTracks'][0].is_cc
track["is_cc"] = blob['McTracks'][0].is_cc
# always 0 actually
bjorkeny = blob['McTracks'][0].bjorkeny
track["bjorkeny"] = blob['McTracks'][0].bjorkeny
# same for all muons in a bundle #TODO not?
time_interaction = blob['McTracks'][0].time
track["time_interaction"] = blob['McTracks'][0].time
# takes position of time_residual_vertex in 'neutrino' case
n_muons = blob['McTracks'].shape[0]
track["n_muons"] = blob['McTracks'].shape[0]
# sum up the energy of all muons
energy = np.sum(blob['McTracks'].energy)
energy = blob['McTracks'].energy
track["energy"] = np.sum(energy)
# energy in the highest energy muons of the bundle
sorted_energy = np.sort(energy)
track["energy_highest_frac"] = sorted_energy[-1]/np.sum(energy)
if len(energy) > 1:
sec_highest_energy = sorted_energy[-2]/np.sum(energy)
else:
sec_highest_energy = 0.
track["energy_sec_highest_frac"] = sec_highest_energy
# coefficient of variation of energy
track["energy_cvar"] = np.std(energy)/np.mean(energy)
# get how many mchits were produced per muon in the bundle
origin = blob["McHits"]["origin"][blob["McHits"]["du"] == active_du]
origin_dict = dict(zip(*np.unique(origin, return_counts=True)))
origin_list = []
for i in range(1, track["n_muons"]+1):
origin_list.append(origin_dict.get(i, 0))
origin_list = np.array(origin_list)
# fraction of mchits in the highest mchit muons of the bundle
sorted_origin = np.sort(origin_list)
track["mchit_highest_frac"] = sorted_origin[-1] / np.sum(origin_list)
if len(sorted_origin) > 1:
sec_highest_mchit_frac = sorted_origin[-2] / np.sum(origin_list)
else:
sec_highest_mchit_frac = 0.
track["mchit_sec_highest_frac"] = sec_highest_mchit_frac
# only muons with at least one mchit in active line
track["n_muons_visible"] = len(origin_list[origin_list > 0])
# only muons with at least 5 mchits in active line
track["n_muons_thresh"] = len(origin_list[origin_list > 4])
# coefficient of variation of the origin of mc hits in the bundle
track["mchit_cvar"] = np.std(origin_list)/np.mean(origin_list)
# all muons in a bundle are parallel, so just take dir of first muon
dir_x = blob['McTracks'][0].dir_x
dir_y = blob['McTracks'][0].dir_y
dir_z = blob['McTracks'][0].dir_z
track["dir_x"] = blob['McTracks'][0].dir_x
track["dir_y"] = blob['McTracks'][0].dir_y
track["dir_z"] = blob['McTracks'][0].dir_z
# vertex is the weighted (energy) mean of the individual vertices
vertex_pos_x = np.average(blob['McTracks'][:].pos_x,
weights=blob['McTracks'][:].energy)
vertex_pos_y = np.average(blob['McTracks'][:].pos_y,
weights=blob['McTracks'][:].energy)
vertex_pos_z = np.average(blob['McTracks'][:].pos_z,
weights=blob['McTracks'][:].energy)
track = {'event_id': event_id,
'particle_type': particle_type,
'energy': energy,
'is_cc': is_cc,
'bjorkeny': bjorkeny,
'dir_x': dir_x,
'dir_y': dir_y,
'dir_z': dir_z,
'time_interaction': time_interaction,
'run_id': run_id,
'vertex_pos_x': vertex_pos_x,
'vertex_pos_y': vertex_pos_y,
'vertex_pos_z': vertex_pos_z,
'n_muons': n_muons}
track["vertex_pos_x"] = np.average(blob['McTracks'][:].pos_x,
weights=blob['McTracks'][:].energy)
track["vertex_pos_y"] = np.average(blob['McTracks'][:].pos_y,
weights=blob['McTracks'][:].energy)
track["vertex_pos_z"] = np.average(blob['McTracks'][:].pos_z,
weights=blob['McTracks'][:].energy)
return track
......@@ -186,7 +186,7 @@ def plot_hist_of_files(files, save_as):
file.close()
if __name__ == "__main__":
def main():
parser = argparse.ArgumentParser(description='Plot the bin stats in h5 files')
parser.add_argument('save_as', metavar='S', type=str,
help='Where to save the plot to.')
......@@ -195,3 +195,7 @@ if __name__ == "__main__":
args = parser.parse_args()
plot_hist_of_files(args.files, args.save_as)
if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment