From 1ea16654731c56ad1c8d02641094914f35e3681a Mon Sep 17 00:00:00 2001 From: ViaFerrata <michimoser@onlinehome.de> Date: Tue, 11 Dec 2018 13:33:03 +0100 Subject: [PATCH] Work in progress, not running yet. --- orcasong/data_to_images.py | 23 +++++ orcasong/file_to_hits.py | 169 ++++++++++++++++++++++++++++++++++++- 2 files changed, 190 insertions(+), 2 deletions(-) diff --git a/orcasong/data_to_images.py b/orcasong/data_to_images.py index ed2202d..41e82c0 100644 --- a/orcasong/data_to_images.py +++ b/orcasong/data_to_images.py @@ -422,6 +422,29 @@ def data_to_images(fname, detx_filepath, n_bins, det_geo, do2d, do2d_plots, do3d # filter out all hit and track information belonging that to this event event_hits, event_track = get_event_data(event_blob, file_particle_type, geo, do_mc_hits, data_cuts, do4d, prod_ident) + class EventSkipper(kp.Module): + def configure(self): + self.data_cuts = self.require('data_cuts') + + def process(self, blob): + if self._skip_event(blob['EventData']): + return + else: + return blob + + def _skip_event(self, blob): + ... should i skip? + return True/False + + import km3modules as km + pipe = kp.Pipeline() + pipe.attach(km.common.StatusBar, every=10) + pipe.attach(EventDataExtractor, file_particle_type='...') + pipe.attach(EventSkipper, data_cuts=...) + pipe.attach(km.common.Keep, keys=['FinalTrack', 'Foo']) + pipe.attach(kp.io.HDF5Sink, filename='dump.h5') + + continue_bool = skip_event(event_track, data_cuts) if continue_bool: continue diff --git a/orcasong/file_to_hits.py b/orcasong/file_to_hits.py index 8c61d88..686d4fe 100644 --- a/orcasong/file_to_hits.py +++ b/orcasong/file_to_hits.py @@ -3,6 +3,7 @@ """Code that reads the h5 simulation files and extracts the necessary information for making event images.""" import numpy as np +import km3pipe as kp #from memory_profiler import profile #import line_profiler # call with kernprof -l -v file.py args @@ -130,7 +131,7 @@ def get_tracks(event_blob, file_particle_type, event_hits, prod_ident): vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex/n_muons, (prod_ident)]. """ - # parse EventInfo and Header information + # parse EventInfo and Header information # TODO always read from header event_id = event_blob['EventInfo'].event_id[0] # the run_id info in the EventInfo group is broken for ORCA neutrino and mupage files @@ -244,4 +245,168 @@ def get_event_data(event_blob, file_particle_type, geo, do_mc_hits, data_cuts, d event_hits = get_hits(event_blob, geo, do_mc_hits, data_cuts, do4d) event_track = get_tracks(event_blob, file_particle_type, event_hits, prod_ident) - return event_hits, event_track \ No newline at end of file + return event_hits, event_track + + +class EventDataExtractor(kp.Module): + def configure(self): + self.file_particle_type = self.require('file_particle_type') + self.geo = self.require('geo') + self.do_mc_hits = self.require('do_mc_hits') + self.data_cuts = self.require('data_cuts') + self.do4d = self.require('do4d') + self.prod_ident = self.require('prod_ident') + self.event_hits = self.get('event_hits', default='event_hits') + self.event_track = self.get('event_track', default='event_track') + + def process(self, blob): + blob[self.event_hits] = self.get_hits(blob, self.geo, self.do_mc_hits, self.data_cuts, self.do4d) + blob[self.event_track] = self.get_tracks(blob, self.file_particle_type, self.event_hits, self.prod_ident) + return blob + + def finish(self): + return + + def get_hits(self, blob, geo, do_mc_hits, data_cuts, do4d): + """ + Returns a hits array that contains [pos_x, pos_y, pos_z, time, triggered, channel_id (optional)]. + + Parameters + ---------- + blob : kp.io.HDF5Pump.blob + Event blob of the HDF5Pump which contains all information for one event. + geo : kp.Geometry + km3pipe Geometry instance that contains the geometry information of the detector. + Only used if the event_blob is from a non-calibrated file! + do_mc_hits : bool + Tells the function of the hits (mc_hits + BG) or the mc_hits only should be parsed. + In the case of mc_hits, the dom_id needs to be calculated thanks to the jpp output. + data_cuts : dict + Specifies if cuts should be applied. + Contains the keys 'triggered' and 'energy_lower/upper_limit' and 'throw_away_prob'. + do4d : tuple(bool, str) + Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz. + In the case of 'channel_id', this information needs to be included in the event_hits as well. + + Returns + ------- + event_hits : ndarray(ndim=2) + 2D array that contains the hits data for the input event [pos_x, pos_y, pos_z, time, triggered, (channel_id)]. + + """ + # parse hits [x,y,z,time] + hits = blob['Hits'] if do_mc_hits is False else blob['McHits'] + + if 'pos_x' not in blob['Hits'].dtype.names: # check if blob already calibrated + hits = geo.apply(hits) + + if data_cuts['triggered'] is True: + hits = hits.__array__[hits.triggered.astype(bool)] + # hits = hits.triggered_hits # alternative, though it only works for the triggered condition! + + pos_x, pos_y, pos_z = hits.pos_x, hits.pos_y, hits.pos_z + hits_time = hits.time + triggered = hits.triggered + + ax = np.newaxis + event_hits = np.concatenate([pos_x[:, ax], pos_y[:, ax], pos_z[:, ax], hits_time[:, ax], triggered[:, ax]], + axis=1) # dtype: np.float64 + + if do4d[0] is True and do4d[1] == 'channel_id' or do4d[1] == 'xzt-c': + event_hits = np.concatenate([event_hits, hits.channel_id[:, ax]], axis=1) + + return event_hits + + + def get_tracks(self, blob, file_particle_type, event_hits, prod_ident): + """ + Returns the event_track, which contains important event_info and mc_tracks data for the input event. + + Parameters + ---------- + blob : kp.io.HDF5Pump.blob + Event blob of the HDF5Pump which contains all information for one event. + file_particle_type : str + String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino']. + event_hits : ndarray(ndim=2) + 2D array that contains the hits data for the input event. + prod_ident : int + Optional int that identifies the used production, more documentation in the docs of the main function. + + Returns + ------- + event_track : ndarray(ndim=1) + 1D array that contains important event_info and mc_tracks data for the input event. + + If file_particle_type = 'undefined': + [event_id, run_id, (prod_ident)]. + + If file_particle_type = 'neutrino'/'muon': + [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_track, run_id, + vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex/n_muons, (prod_ident)]. + + """ + # parse EventInfo and Header information # TODO always read from header + event_id = blob['EventInfo'].event_id[0] + + # the run_id info in the EventInfo group is broken for ORCA neutrino and mupage files + # The Header run_id is the most reliable one. + + if 'Header' in blob: # if Header exists in file, take run_id from it. + run_id = blob['Header'].start_run.run_id.astype('float32') + else: + raise ValueError('There is no "Header" folder in your h5 file, something must be wrong!') + + # collect all event_track information, dependent on file_particle_type + + if file_particle_type == 'undefined': + particle_type = 0 + track = [event_id, run_id, particle_type] + + elif file_particle_type == 'muon': + # take index 1, index 0 is the empty neutrino mc_track + particle_type = blob['McTracks'][1].type # assumed that this is the same for all muons in a bundle + is_cc = blob['McTracks'][1].is_cc # always 1 actually + bjorkeny = blob['McTracks'][1].bjorkeny # always 0 actually + time_interaction = blob['McTracks'][1].time # same for all muons in a bundle + n_muons = blob['McTracks'].shape[0] - 1 # takes position of time_residual_vertex in 'neutrino' case + + # sum up the energy of all muons + energy = np.sum(blob['McTracks'].energy) + + # all muons in a bundle are parallel, so just take dir of first muon + dir_x, dir_y, dir_z = blob['McTracks'][1].dir_x, blob['McTracks'][1].dir_y, blob['McTracks'][1].dir_z + + # vertex is the weighted (energy) mean of the individual vertices + vertex_pos_x = np.average(blob['McTracks'][1:].pos_x, weights=blob['McTracks'][1:].energy) + vertex_pos_y = np.average(blob['McTracks'][1:].pos_y, weights=blob['McTracks'][1:].energy) + vertex_pos_z = np.average(blob['McTracks'][1:].pos_z, weights=blob['McTracks'][1:].energy) + + track = [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_interaction, run_id, + vertex_pos_x, vertex_pos_y, vertex_pos_z, n_muons] + + elif file_particle_type == 'neutrino': + p = get_primary_track_index(blob) + particle_type = blob['McTracks'][p].type + energy = blob['McTracks'][p].energy + is_cc = blob['McTracks'][p].is_cc + bjorkeny = blob['McTracks'][p].bjorkeny + dir_x, dir_y, dir_z = blob['McTracks'][p].dir_x, blob['McTracks'][p].dir_y, blob['McTracks'][p].dir_z + time_interaction = blob['McTracks'][p].time # actually always 0 for primary neutrino, measured in MC time + vertex_pos_x, vertex_pos_y, vertex_pos_z = blob['McTracks'][p].pos_x, blob['McTracks'][p].pos_y, \ + blob['McTracks'][p].pos_z + + hits_time, triggered = event_hits[:, 3], event_hits[:, 4] + time_residual_vertex = get_time_residual_nu_interaction_mean_triggered_hits(time_interaction, hits_time, triggered) + + track = [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_interaction, run_id, + vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex] + + else: + raise ValueError('The file_particle_type "', str(file_particle_type), '" is not known.') + + if prod_ident is not None: track.append(prod_ident) + + event_track = np.array(track, dtype=np.float64) + + return event_track -- GitLab