diff --git a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-c.toml b/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-c.toml new file mode 100644 index 0000000000000000000000000000000000000000..2f9aa05028ef950493a143f01543de07707bc86f --- /dev/null +++ b/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-c.toml @@ -0,0 +1,35 @@ +# A config file for OrcaSong with multiple configurations. +# Outcomment the config that you want to use! +# More info about the .toml format at https://github.com/toml-lang/toml + +### All available options with some dummy values +# --n_bins = '11,13,18,60' +# --det_geo = 'Orca_115l_23m_h_9m_v' +# --do2d = false +# --do2d_plots = false +# --do2d_plots_n = 10 +# --do3d = false +# --do4d = true +# --do4d_mode = 'time' +# --timecut_mode = 'trigger_cluster' +# --timecut_timespan = 'tight_1' +# --do_mc_hits = false +# --data_cut_triggered = false +# --data_cut_e_low = 3 +# --data_cut_e_high = 100 +# --data_cut_throw_away = 0.00 +# --prod_ident = 1 + + +####----- Configuration for ORCA 115l -----#### + +###--- 3-100GeV ---### + +## XYZ-C + +--n_bins = '11,13,18,31' +--det_geo = 'Orca_115l_23m_h_9m_v' +--do4d_mode = 'channel_id' +--timecut_mode = 'trigger_cluster' +--timecut_timespan = 'tight_0' +--prod_ident = 3 # for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod ; mupage: 3 ; random_noise: 4 \ No newline at end of file diff --git a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-t.toml b/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-t.toml new file mode 100644 index 0000000000000000000000000000000000000000..1e705c16acfa974a9151be355c2bed5b712b4e07 --- /dev/null +++ b/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-t.toml @@ -0,0 +1,35 @@ +# A config file for OrcaSong with multiple configurations. +# Outcomment the config that you want to use! +# More info about the .toml format at https://github.com/toml-lang/toml + +### All available options with some dummy values +# --n_bins = '11,13,18,60' +# --det_geo = 'Orca_115l_23m_h_9m_v' +# --do2d = false +# --do2d_plots = false +# --do2d_plots_n = 10 +# --do3d = false +# --do4d = true +# --do4d_mode = 'time' +# --timecut_mode = 'trigger_cluster' +# --timecut_timespan = 'tight_1' +# --do_mc_hits = false +# --data_cut_triggered = false +# --data_cut_e_low = 3 +# --data_cut_e_high = 100 +# --data_cut_throw_away = 0.00 +# --prod_ident = 1 + + +####----- Configuration for ORCA 115l -----#### + +###--- 3-100GeV ---### + +## XYZ-T + +--n_bins = '11,13,18,100' +--det_geo = 'Orca_115l_23m_h_9m_v' +--do4d_mode = 'time' +--timecut_mode = 'trigger_cluster' +--timecut_timespan = 'tight_0' +--prod_ident = 3 # for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod ; mupage: 3 ; random_noise: 4 \ No newline at end of file diff --git a/orcasong/data_to_images.py b/orcasong/data_to_images.py index ae56ed7cad0bf5fe7bde9d866653c0994d0ba13b..87b12e81e5080168daaf69a32828d6b30fb66ac8 100644 --- a/orcasong/data_to_images.py +++ b/orcasong/data_to_images.py @@ -275,6 +275,35 @@ def calculate_bin_edges_test(geo, y_bin_edges, z_bin_edges): print('----------------------------------------------------------------------------------------------') +def get_file_particle_type(event_pump): + """ + Returns a string that specifies the type of the particles that are contained in the input file. + + Parameters + ---------- + event_pump : kp.io.hdf5.HDF5Pump + Km3pipe HDF5Pump instance which is linked to the input file. + + Returns + ------- + file_particle_type : str + String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino']. + + """ + if 'McTracks' not in event_pump[0]: + file_particle_type = 'undefined' + else: + particle_type = event_pump[0]['McTracks'].type + if np.abs(particle_type) == 13: + file_particle_type = 'muon' + elif np.abs(particle_type) in [12, 14, 16]: + file_particle_type = 'neutrino' + else: + raise ValueError('The type of the particles in the "McTracks" folder, <', str(particle_type), '> is not known.') + + return file_particle_type + + def skip_event(event_track, data_cuts): """ Function that checks if an event should be skipped, depending on the data_cuts input. @@ -379,13 +408,15 @@ def data_to_images(fname, detx_filepath, n_bins, det_geo, do2d, do2d_plots, do3d # Initialize HDF5Pump of the input file event_pump = kp.io.hdf5.HDF5Pump(filename=fname) + file_particle_type = get_file_particle_type(event_pump) + print('Generating histograms from the hits in XYZT format for files based on ' + fname) for i, event_blob in enumerate(event_pump): if i % 10 == 0: print('Event No. ' + str(i)) # filter out all hit and track information belonging that to this event - event_hits, event_track = get_event_data(event_blob, geo, do_mc_hits, data_cuts, do4d, prod_ident) + event_hits, event_track = get_event_data(event_blob, file_particle_type, geo, do_mc_hits, data_cuts, do4d, prod_ident) continue_bool = skip_event(event_track, data_cuts) if continue_bool: continue diff --git a/orcasong/file_to_hits.py b/orcasong/file_to_hits.py index fe311ff5584a53d424bc6769eb876ac8f482d77c..785da075b9b4a2670e07157c63c96bd7e9516758 100644 --- a/orcasong/file_to_hits.py +++ b/orcasong/file_to_hits.py @@ -52,10 +52,9 @@ def get_time_residual_nu_interaction_mean_triggered_hits(time_interaction, hits_ return time_residual_vertex -def get_event_data(event_blob, geo, do_mc_hits, data_cuts, do4d, prod_ident): +def get_hits(event_blob, geo, do_mc_hits, data_cuts, do4d): """ - Reads a km3pipe blob which contains the information for one event and returns a hit array and a track array - that contains all relevant information of the event. + Returns a hits array that contains [pos_x, pos_y, pos_z, time, triggered, channel_id (optional)]. Parameters ---------- @@ -73,42 +72,13 @@ def get_event_data(event_blob, geo, do_mc_hits, data_cuts, do4d, prod_ident): do4d : tuple(bool, str) Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz. In the case of 'channel_id', this information needs to be included in the event_hits as well. - prod_ident : int - Optional int that identifies the used production, more documentation in the docs of the main function. - geo : None/kp.Geometry - If none, the event_blob should already contain the calibrated hit information (e.g. pos_xyz). - Else, a km3pipe Geometry instance that contains the geometry information of the detector. Returns ------- event_hits : ndarray(ndim=2) - 2D array containing the hit information of the event [pos_xyz, time, triggered, (channel_id)]. - - event_track : ndarray(ndim=1) - 1D array containing important MC information of the event, - [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_track, run_id, - vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex, (prod_ident)]. + 2D array that contains the hits data for the input event [pos_x, pos_y, pos_z, time, triggered, (channel_id)]. """ - p = get_primary_track_index(event_blob) # TODO fix for mupage/random_noise - - # parse track, contains event information like mc_energy - - if 'Header' in event_blob: # if Header exists in file, take run_id from it. Else take it from RawHeader. - run_id = event_blob['Header'].start_run.run_id.astype('float32') - else: - run_id = event_blob['RawHeader'][0][0].astype('float32') - - event_id = event_blob['EventInfo'].event_id[0] - particle_type = event_blob['McTracks'][p].type - energy = event_blob['McTracks'][p].energy - is_cc = event_blob['McTracks'][p].is_cc - bjorkeny = event_blob['McTracks'][p].bjorkeny - dir_x, dir_y, dir_z = event_blob['McTracks'][p].dir_x, event_blob['McTracks'][p].dir_y, event_blob['McTracks'][p].dir_z - time_track = event_blob['McTracks'][p].time # actually always 0 for primary neutrino, measured in MC time - vertex_pos_x, vertex_pos_y, vertex_pos_z = event_blob['McTracks'][p].pos_x, event_blob['McTracks'][p].pos_y, event_blob['McTracks'][p].pos_z - time_interaction = event_blob['McTracks'][p].time - # parse hits [x,y,z,time] hits = event_blob['Hits'] if do_mc_hits is False else event_blob['McHits'] @@ -123,20 +93,146 @@ def get_event_data(event_blob, geo, do_mc_hits, data_cuts, do4d, prod_ident): hits_time = hits.time triggered = hits.triggered - time_residual_vertex = get_time_residual_nu_interaction_mean_triggered_hits(time_interaction, hits_time, triggered) + ax = np.newaxis + event_hits = np.concatenate([pos_x[:, ax], pos_y[:, ax], pos_z[:, ax], hits_time[:, ax], triggered[:, ax]], axis=1) # dtype: np.float64 + + if do4d[0] is True and do4d[1] == 'channel_id' or do4d[1] == 'xzt-c': + event_hits = np.concatenate([event_hits, hits.channel_id[:, ax]], axis=1) + + return event_hits + + +def get_tracks(event_blob, file_particle_type, event_hits, prod_ident): + """ + Returns the event_track, which contains important event_info and mc_tracks data for the input event. + + Parameters + ---------- + event_blob : kp.io.HDF5Pump.blob + Event blob of the HDF5Pump which contains all information for one event. + file_particle_type : str + String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino']. + event_hits : ndarray(ndim=2) + 2D array that contains the hits data for the input event. + prod_ident : int + Optional int that identifies the used production, more documentation in the docs of the main function. + + Returns + ------- + event_track : ndarray(ndim=1) + 1D array that contains important event_info and mc_tracks data for the input event. + + If file_particle_type = 'undefined': + [event_id, run_id, (prod_ident)]. + + If file_particle_type = 'neutrino'/'muon': + [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_track, run_id, + vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex/n_muons, (prod_ident)]. + + """ + # parse EventInfo and Header information + event_id = event_blob['EventInfo'].event_id[0] + if 'Header' in event_blob: # if Header exists in file, take run_id from it. Else take it from RawHeader. + run_id = event_blob['Header'].start_run.run_id.astype('float32') + else: + run_id = event_blob['RawHeader'][0][0].astype('float32') + + # collect all event_track information, dependent on file_particle_type + + if file_particle_type == 'undefined': + particle_type = 0 + track = [event_id, run_id, particle_type] + + elif file_particle_type == 'muon': + # take index 1, index 0 is the empty neutrino mc_track + particle_type = event_blob['McTracks'][1].type # assumed that this is the same for all muons in a bundle + is_cc = event_blob['McTracks'][1].is_cc # always 1 actually + bjorkeny = event_blob['McTracks'][1].bjorkeny # always 0 actually + time_interaction = event_blob['McTracks'][1].time # same for all muons in a bundle + n_muons = event_blob['McTracks'].shape[0] - 1 # takes position of time_residual_vertex in 'neutrino' case + + # sum up the energy of all muons + energy = np.sum(event_blob['McTracks'].energy) + + # all muons in a bundle are parallel, so just take dir of first muon + dir_x, dir_y, dir_z = event_blob['McTracks'][1].dir_x, event_blob['McTracks'][1].dir_y, event_blob['McTracks'][1].dir_z + + # vertex is the weighted (energy) mean of the individual vertices + vertex_pos_x = np.average(event_blob['McTracks'][1:].pos_x, weights=event_blob[4]['McTracks'][1:].energy) + vertex_pos_y = np.average(event_blob['McTracks'][1:].pos_y, weights=event_blob[4]['McTracks'][1:].energy) + vertex_pos_z = np.average(event_blob['McTracks'][1:].pos_z, weights=event_blob[4]['McTracks'][1:].energy) + + track = [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_interaction, run_id, + vertex_pos_x, vertex_pos_y, vertex_pos_z, n_muons] + + elif file_particle_type == 'neutrino': + p = get_primary_track_index(event_blob) + particle_type = event_blob['McTracks'][p].type + energy = event_blob['McTracks'][p].energy + is_cc = event_blob['McTracks'][p].is_cc + bjorkeny = event_blob['McTracks'][p].bjorkeny + dir_x, dir_y, dir_z = event_blob['McTracks'][p].dir_x, event_blob['McTracks'][p].dir_y, event_blob['McTracks'][p].dir_z + time_interaction = event_blob['McTracks'][p].time # actually always 0 for primary neutrino, measured in MC time + vertex_pos_x, vertex_pos_y, vertex_pos_z = event_blob['McTracks'][p].pos_x, event_blob['McTracks'][p].pos_y, \ + event_blob['McTracks'][p].pos_z + + hits_time, triggered = event_hits[:, 3], event_hits[:, 4] + time_residual_vertex = get_time_residual_nu_interaction_mean_triggered_hits(time_interaction, hits_time, triggered) + + track = [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_interaction, run_id, + vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex] + + else: + raise ValueError('The file_particle_type "', str(file_particle_type), '" is not known.') - # save collected information to arrays event_track and event_hits - track = [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_track, run_id, - vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex] if prod_ident is not None: track.append(prod_ident) event_track = np.array(track, dtype=np.float64) - ax = np.newaxis - event_hits = np.concatenate([pos_x[:, ax], pos_y[:, ax], pos_z[:, ax], hits_time[:, ax], triggered[:, ax]], axis=1) # dtype: np.float64 + return event_track - if do4d[0] is True and do4d[1] == 'channel_id' or do4d[1] == 'xzt-c': - channel_id = hits.channel_id - event_hits = np.concatenate([event_hits, channel_id[:, ax]], axis=1) + +def get_event_data(event_blob, file_particle_type, geo, do_mc_hits, data_cuts, do4d, prod_ident): + """ + Reads a km3pipe blob which contains the information for one event and returns a hit array and a track array + that contains all relevant information of the event. + + Parameters + ---------- + event_blob : kp.io.HDF5Pump.blob + Event blob of the HDF5Pump which contains all information for one event. + file_particle_type : str + String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino']. + geo : kp.Geometry + km3pipe Geometry instance that contains the geometry information of the detector. + Only used if the event_blob is from a non-calibrated file! + do_mc_hits : bool + Tells the function of the hits (mc_hits + BG) or the mc_hits only should be parsed. + In the case of mc_hits, the dom_id needs to be calculated thanks to the jpp output. + data_cuts : dict + Specifies if cuts should be applied. + Contains the keys 'triggered' and 'energy_lower/upper_limit' and 'throw_away_prob'. + do4d : tuple(bool, str) + Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz. + In the case of 'channel_id', this information needs to be included in the event_hits as well. + prod_ident : int + Optional int that identifies the used production, more documentation in the docs of the main function. + geo : None/kp.Geometry + If none, the event_blob should already contain the calibrated hit information (e.g. pos_xyz). + Else, a km3pipe Geometry instance that contains the geometry information of the detector. + + Returns + ------- + event_hits : ndarray(ndim=2) + 2D array containing the hit information of the event [pos_xyz, time, triggered, (channel_id)]. + + event_track : ndarray(ndim=1) + 1D array containing important MC information of the event, + [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_track, run_id, + vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex, (prod_ident)]. + + """ + event_hits = get_hits(event_blob, geo, do_mc_hits, data_cuts, do4d) + event_track = get_tracks(event_blob, file_particle_type, event_hits, prod_ident) return event_hits, event_track \ No newline at end of file diff --git a/orcasong/job_submission_scripts/submit_data_to_images.sh b/orcasong/job_submission_scripts/submit_data_to_images.sh index 8e8504df640389a8a3d244c990a068bf8e88e08a..6f42f3c7da8c5c9e9c345a857cf0e1a3a5b2a537 100644 --- a/orcasong/job_submission_scripts/submit_data_to_images.sh +++ b/orcasong/job_submission_scripts/submit_data_to_images.sh @@ -31,6 +31,8 @@ files_per_job=60 # total number of files per job, e.g. 10 jobs for 600: 600/10 = #--- USER INPUT ---## +# setup + n=${PBS_ARRAYID} source activate ${python_env_folder} cd ${code_folder} @@ -62,25 +64,8 @@ folder_ip_files_arr=( ["neutr_3-100GeV"]="/home/saturn/capn/mppi033h/Data/raw_da filename="${filename_arr[${particle_type}]}" folder="${folder_ip_files_arr[${mc_prod}]}" -#ParticleType=muon-CC -#ParticleType=elec-CC -#ParticleType=elec-NC -#ParticleType=tau-CC -# ----- 3-100GeV------ -#FileName=JTE.KM3Sim.gseagen.muon-CC.3-100GeV-9.1E7-1bin-3.0gspec.ORCA115_9m_2016 #muon-CC -#FileName=JTE.KM3Sim.gseagen.elec-CC.3-100GeV-1.1E6-1bin-3.0gspec.ORCA115_9m_2016 #elec-CC -#FileName=JTE.KM3Sim.gseagen.elec-NC.3-100GeV-3.4E6-1bin-3.0gspec.ORCA115_9m_2016 #elec-NC -#FileName=JTE.KM3Sim.gseagen.tau-CC.3.4-100GeV-2.0E8-1bin-3.0gspec.ORCA115_9m_2016 #tau-CC -#HDFFOLDER=/home/woody/capn/mppi033h/Data/ORCA_JTE_NEMOWATER/raw_data/calibrated/with_jte_times/3-100GeV/${ParticleType} -# ----- 3-100GeV------ -# ----- 1-5GeV------ -#FileName=JTE.KM3Sim.gseagen.muon-CC.1-5GeV-9.2E5-1bin-1.0gspec.ORCA115_9m_2016 #muon-CC -#FileName=JTE.KM3Sim.gseagen.elec-CC.1-5GeV-2.7E5-1bin-1.0gspec.ORCA115_9m_2016 #elec-CC -#FileName=JTE.KM3Sim.gseagen.elec-NC.1-5GeV-2.2E6-1bin-1.0gspec.ORCA115_9m_2016 #elec-NC -#HDFFOLDER=/home/woody/capn/mppi033h/Data/ORCA_JTE_NEMOWATER/raw_data/calibrated/with_jte_times/1-5GeV/${ParticleType} -# ----- 1-5GeV------ - # run + no_of_loops=$((${files_per_job}/4)) # divide by 4 cores -> e.g, 15 4-core loops needed for files_per_job=60 file_no_start=$((1+((${n}-1) * ${files_per_job}))) # filenumber of the first file that is being processed by this script (depends on JobArray variable 'n')