Skip to content
Snippets Groups Projects
Commit 5430773f authored by ViaFerrata's avatar ViaFerrata
Browse files

Add possibility to also take mupage or random_noise files as an input to OrcaSong.

parent c38bd6fe
No related branches found
No related tags found
No related merge requests found
# A config file for OrcaSong with multiple configurations.
# Outcomment the config that you want to use!
# More info about the .toml format at https://github.com/toml-lang/toml
### All available options with some dummy values
# --n_bins = '11,13,18,60'
# --det_geo = 'Orca_115l_23m_h_9m_v'
# --do2d = false
# --do2d_plots = false
# --do2d_plots_n = 10
# --do3d = false
# --do4d = true
# --do4d_mode = 'time'
# --timecut_mode = 'trigger_cluster'
# --timecut_timespan = 'tight_1'
# --do_mc_hits = false
# --data_cut_triggered = false
# --data_cut_e_low = 3
# --data_cut_e_high = 100
# --data_cut_throw_away = 0.00
# --prod_ident = 1
####----- Configuration for ORCA 115l -----####
###--- 3-100GeV ---###
## XYZ-C
--n_bins = '11,13,18,31'
--det_geo = 'Orca_115l_23m_h_9m_v'
--do4d_mode = 'channel_id'
--timecut_mode = 'trigger_cluster'
--timecut_timespan = 'tight_0'
--prod_ident = 3 # for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod ; mupage: 3 ; random_noise: 4
\ No newline at end of file
# A config file for OrcaSong with multiple configurations.
# Outcomment the config that you want to use!
# More info about the .toml format at https://github.com/toml-lang/toml
### All available options with some dummy values
# --n_bins = '11,13,18,60'
# --det_geo = 'Orca_115l_23m_h_9m_v'
# --do2d = false
# --do2d_plots = false
# --do2d_plots_n = 10
# --do3d = false
# --do4d = true
# --do4d_mode = 'time'
# --timecut_mode = 'trigger_cluster'
# --timecut_timespan = 'tight_1'
# --do_mc_hits = false
# --data_cut_triggered = false
# --data_cut_e_low = 3
# --data_cut_e_high = 100
# --data_cut_throw_away = 0.00
# --prod_ident = 1
####----- Configuration for ORCA 115l -----####
###--- 3-100GeV ---###
## XYZ-T
--n_bins = '11,13,18,100'
--det_geo = 'Orca_115l_23m_h_9m_v'
--do4d_mode = 'time'
--timecut_mode = 'trigger_cluster'
--timecut_timespan = 'tight_0'
--prod_ident = 3 # for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod ; mupage: 3 ; random_noise: 4
\ No newline at end of file
......@@ -275,6 +275,35 @@ def calculate_bin_edges_test(geo, y_bin_edges, z_bin_edges):
print('----------------------------------------------------------------------------------------------')
def get_file_particle_type(event_pump):
"""
Returns a string that specifies the type of the particles that are contained in the input file.
Parameters
----------
event_pump : kp.io.hdf5.HDF5Pump
Km3pipe HDF5Pump instance which is linked to the input file.
Returns
-------
file_particle_type : str
String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino'].
"""
if 'McTracks' not in event_pump[0]:
file_particle_type = 'undefined'
else:
particle_type = event_pump[0]['McTracks'].type
if np.abs(particle_type) == 13:
file_particle_type = 'muon'
elif np.abs(particle_type) in [12, 14, 16]:
file_particle_type = 'neutrino'
else:
raise ValueError('The type of the particles in the "McTracks" folder, <', str(particle_type), '> is not known.')
return file_particle_type
def skip_event(event_track, data_cuts):
"""
Function that checks if an event should be skipped, depending on the data_cuts input.
......@@ -379,13 +408,15 @@ def data_to_images(fname, detx_filepath, n_bins, det_geo, do2d, do2d_plots, do3d
# Initialize HDF5Pump of the input file
event_pump = kp.io.hdf5.HDF5Pump(filename=fname)
file_particle_type = get_file_particle_type(event_pump)
print('Generating histograms from the hits in XYZT format for files based on ' + fname)
for i, event_blob in enumerate(event_pump):
if i % 10 == 0:
print('Event No. ' + str(i))
# filter out all hit and track information belonging that to this event
event_hits, event_track = get_event_data(event_blob, geo, do_mc_hits, data_cuts, do4d, prod_ident)
event_hits, event_track = get_event_data(event_blob, file_particle_type, geo, do_mc_hits, data_cuts, do4d, prod_ident)
continue_bool = skip_event(event_track, data_cuts)
if continue_bool: continue
......
......@@ -52,10 +52,9 @@ def get_time_residual_nu_interaction_mean_triggered_hits(time_interaction, hits_
return time_residual_vertex
def get_event_data(event_blob, geo, do_mc_hits, data_cuts, do4d, prod_ident):
def get_hits(event_blob, geo, do_mc_hits, data_cuts, do4d):
"""
Reads a km3pipe blob which contains the information for one event and returns a hit array and a track array
that contains all relevant information of the event.
Returns a hits array that contains [pos_x, pos_y, pos_z, time, triggered, channel_id (optional)].
Parameters
----------
......@@ -73,42 +72,13 @@ def get_event_data(event_blob, geo, do_mc_hits, data_cuts, do4d, prod_ident):
do4d : tuple(bool, str)
Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz.
In the case of 'channel_id', this information needs to be included in the event_hits as well.
prod_ident : int
Optional int that identifies the used production, more documentation in the docs of the main function.
geo : None/kp.Geometry
If none, the event_blob should already contain the calibrated hit information (e.g. pos_xyz).
Else, a km3pipe Geometry instance that contains the geometry information of the detector.
Returns
-------
event_hits : ndarray(ndim=2)
2D array containing the hit information of the event [pos_xyz, time, triggered, (channel_id)].
event_track : ndarray(ndim=1)
1D array containing important MC information of the event,
[event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_track, run_id,
vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex, (prod_ident)].
2D array that contains the hits data for the input event [pos_x, pos_y, pos_z, time, triggered, (channel_id)].
"""
p = get_primary_track_index(event_blob) # TODO fix for mupage/random_noise
# parse track, contains event information like mc_energy
if 'Header' in event_blob: # if Header exists in file, take run_id from it. Else take it from RawHeader.
run_id = event_blob['Header'].start_run.run_id.astype('float32')
else:
run_id = event_blob['RawHeader'][0][0].astype('float32')
event_id = event_blob['EventInfo'].event_id[0]
particle_type = event_blob['McTracks'][p].type
energy = event_blob['McTracks'][p].energy
is_cc = event_blob['McTracks'][p].is_cc
bjorkeny = event_blob['McTracks'][p].bjorkeny
dir_x, dir_y, dir_z = event_blob['McTracks'][p].dir_x, event_blob['McTracks'][p].dir_y, event_blob['McTracks'][p].dir_z
time_track = event_blob['McTracks'][p].time # actually always 0 for primary neutrino, measured in MC time
vertex_pos_x, vertex_pos_y, vertex_pos_z = event_blob['McTracks'][p].pos_x, event_blob['McTracks'][p].pos_y, event_blob['McTracks'][p].pos_z
time_interaction = event_blob['McTracks'][p].time
# parse hits [x,y,z,time]
hits = event_blob['Hits'] if do_mc_hits is False else event_blob['McHits']
......@@ -123,20 +93,146 @@ def get_event_data(event_blob, geo, do_mc_hits, data_cuts, do4d, prod_ident):
hits_time = hits.time
triggered = hits.triggered
time_residual_vertex = get_time_residual_nu_interaction_mean_triggered_hits(time_interaction, hits_time, triggered)
ax = np.newaxis
event_hits = np.concatenate([pos_x[:, ax], pos_y[:, ax], pos_z[:, ax], hits_time[:, ax], triggered[:, ax]], axis=1) # dtype: np.float64
if do4d[0] is True and do4d[1] == 'channel_id' or do4d[1] == 'xzt-c':
event_hits = np.concatenate([event_hits, hits.channel_id[:, ax]], axis=1)
return event_hits
def get_tracks(event_blob, file_particle_type, event_hits, prod_ident):
"""
Returns the event_track, which contains important event_info and mc_tracks data for the input event.
Parameters
----------
event_blob : kp.io.HDF5Pump.blob
Event blob of the HDF5Pump which contains all information for one event.
file_particle_type : str
String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino'].
event_hits : ndarray(ndim=2)
2D array that contains the hits data for the input event.
prod_ident : int
Optional int that identifies the used production, more documentation in the docs of the main function.
Returns
-------
event_track : ndarray(ndim=1)
1D array that contains important event_info and mc_tracks data for the input event.
If file_particle_type = 'undefined':
[event_id, run_id, (prod_ident)].
If file_particle_type = 'neutrino'/'muon':
[event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_track, run_id,
vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex/n_muons, (prod_ident)].
"""
# parse EventInfo and Header information
event_id = event_blob['EventInfo'].event_id[0]
if 'Header' in event_blob: # if Header exists in file, take run_id from it. Else take it from RawHeader.
run_id = event_blob['Header'].start_run.run_id.astype('float32')
else:
run_id = event_blob['RawHeader'][0][0].astype('float32')
# collect all event_track information, dependent on file_particle_type
if file_particle_type == 'undefined':
particle_type = 0
track = [event_id, run_id, particle_type]
elif file_particle_type == 'muon':
# take index 1, index 0 is the empty neutrino mc_track
particle_type = event_blob['McTracks'][1].type # assumed that this is the same for all muons in a bundle
is_cc = event_blob['McTracks'][1].is_cc # always 1 actually
bjorkeny = event_blob['McTracks'][1].bjorkeny # always 0 actually
time_interaction = event_blob['McTracks'][1].time # same for all muons in a bundle
n_muons = event_blob['McTracks'].shape[0] - 1 # takes position of time_residual_vertex in 'neutrino' case
# sum up the energy of all muons
energy = np.sum(event_blob['McTracks'].energy)
# all muons in a bundle are parallel, so just take dir of first muon
dir_x, dir_y, dir_z = event_blob['McTracks'][1].dir_x, event_blob['McTracks'][1].dir_y, event_blob['McTracks'][1].dir_z
# vertex is the weighted (energy) mean of the individual vertices
vertex_pos_x = np.average(event_blob['McTracks'][1:].pos_x, weights=event_blob[4]['McTracks'][1:].energy)
vertex_pos_y = np.average(event_blob['McTracks'][1:].pos_y, weights=event_blob[4]['McTracks'][1:].energy)
vertex_pos_z = np.average(event_blob['McTracks'][1:].pos_z, weights=event_blob[4]['McTracks'][1:].energy)
track = [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_interaction, run_id,
vertex_pos_x, vertex_pos_y, vertex_pos_z, n_muons]
elif file_particle_type == 'neutrino':
p = get_primary_track_index(event_blob)
particle_type = event_blob['McTracks'][p].type
energy = event_blob['McTracks'][p].energy
is_cc = event_blob['McTracks'][p].is_cc
bjorkeny = event_blob['McTracks'][p].bjorkeny
dir_x, dir_y, dir_z = event_blob['McTracks'][p].dir_x, event_blob['McTracks'][p].dir_y, event_blob['McTracks'][p].dir_z
time_interaction = event_blob['McTracks'][p].time # actually always 0 for primary neutrino, measured in MC time
vertex_pos_x, vertex_pos_y, vertex_pos_z = event_blob['McTracks'][p].pos_x, event_blob['McTracks'][p].pos_y, \
event_blob['McTracks'][p].pos_z
hits_time, triggered = event_hits[:, 3], event_hits[:, 4]
time_residual_vertex = get_time_residual_nu_interaction_mean_triggered_hits(time_interaction, hits_time, triggered)
track = [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_interaction, run_id,
vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex]
else:
raise ValueError('The file_particle_type "', str(file_particle_type), '" is not known.')
# save collected information to arrays event_track and event_hits
track = [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_track, run_id,
vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex]
if prod_ident is not None: track.append(prod_ident)
event_track = np.array(track, dtype=np.float64)
ax = np.newaxis
event_hits = np.concatenate([pos_x[:, ax], pos_y[:, ax], pos_z[:, ax], hits_time[:, ax], triggered[:, ax]], axis=1) # dtype: np.float64
return event_track
if do4d[0] is True and do4d[1] == 'channel_id' or do4d[1] == 'xzt-c':
channel_id = hits.channel_id
event_hits = np.concatenate([event_hits, channel_id[:, ax]], axis=1)
def get_event_data(event_blob, file_particle_type, geo, do_mc_hits, data_cuts, do4d, prod_ident):
"""
Reads a km3pipe blob which contains the information for one event and returns a hit array and a track array
that contains all relevant information of the event.
Parameters
----------
event_blob : kp.io.HDF5Pump.blob
Event blob of the HDF5Pump which contains all information for one event.
file_particle_type : str
String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino'].
geo : kp.Geometry
km3pipe Geometry instance that contains the geometry information of the detector.
Only used if the event_blob is from a non-calibrated file!
do_mc_hits : bool
Tells the function of the hits (mc_hits + BG) or the mc_hits only should be parsed.
In the case of mc_hits, the dom_id needs to be calculated thanks to the jpp output.
data_cuts : dict
Specifies if cuts should be applied.
Contains the keys 'triggered' and 'energy_lower/upper_limit' and 'throw_away_prob'.
do4d : tuple(bool, str)
Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz.
In the case of 'channel_id', this information needs to be included in the event_hits as well.
prod_ident : int
Optional int that identifies the used production, more documentation in the docs of the main function.
geo : None/kp.Geometry
If none, the event_blob should already contain the calibrated hit information (e.g. pos_xyz).
Else, a km3pipe Geometry instance that contains the geometry information of the detector.
Returns
-------
event_hits : ndarray(ndim=2)
2D array containing the hit information of the event [pos_xyz, time, triggered, (channel_id)].
event_track : ndarray(ndim=1)
1D array containing important MC information of the event,
[event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_track, run_id,
vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex, (prod_ident)].
"""
event_hits = get_hits(event_blob, geo, do_mc_hits, data_cuts, do4d)
event_track = get_tracks(event_blob, file_particle_type, event_hits, prod_ident)
return event_hits, event_track
\ No newline at end of file
......@@ -31,6 +31,8 @@ files_per_job=60 # total number of files per job, e.g. 10 jobs for 600: 600/10 =
#--- USER INPUT ---##
# setup
n=${PBS_ARRAYID}
source activate ${python_env_folder}
cd ${code_folder}
......@@ -62,25 +64,8 @@ folder_ip_files_arr=( ["neutr_3-100GeV"]="/home/saturn/capn/mppi033h/Data/raw_da
filename="${filename_arr[${particle_type}]}"
folder="${folder_ip_files_arr[${mc_prod}]}"
#ParticleType=muon-CC
#ParticleType=elec-CC
#ParticleType=elec-NC
#ParticleType=tau-CC
# ----- 3-100GeV------
#FileName=JTE.KM3Sim.gseagen.muon-CC.3-100GeV-9.1E7-1bin-3.0gspec.ORCA115_9m_2016 #muon-CC
#FileName=JTE.KM3Sim.gseagen.elec-CC.3-100GeV-1.1E6-1bin-3.0gspec.ORCA115_9m_2016 #elec-CC
#FileName=JTE.KM3Sim.gseagen.elec-NC.3-100GeV-3.4E6-1bin-3.0gspec.ORCA115_9m_2016 #elec-NC
#FileName=JTE.KM3Sim.gseagen.tau-CC.3.4-100GeV-2.0E8-1bin-3.0gspec.ORCA115_9m_2016 #tau-CC
#HDFFOLDER=/home/woody/capn/mppi033h/Data/ORCA_JTE_NEMOWATER/raw_data/calibrated/with_jte_times/3-100GeV/${ParticleType}
# ----- 3-100GeV------
# ----- 1-5GeV------
#FileName=JTE.KM3Sim.gseagen.muon-CC.1-5GeV-9.2E5-1bin-1.0gspec.ORCA115_9m_2016 #muon-CC
#FileName=JTE.KM3Sim.gseagen.elec-CC.1-5GeV-2.7E5-1bin-1.0gspec.ORCA115_9m_2016 #elec-CC
#FileName=JTE.KM3Sim.gseagen.elec-NC.1-5GeV-2.2E6-1bin-1.0gspec.ORCA115_9m_2016 #elec-NC
#HDFFOLDER=/home/woody/capn/mppi033h/Data/ORCA_JTE_NEMOWATER/raw_data/calibrated/with_jte_times/1-5GeV/${ParticleType}
# ----- 1-5GeV------
# run
no_of_loops=$((${files_per_job}/4)) # divide by 4 cores -> e.g, 15 4-core loops needed for files_per_job=60
file_no_start=$((1+((${n}-1) * ${files_per_job}))) # filenumber of the first file that is being processed by this script (depends on JobArray variable 'n')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment