From fc328b769ecd2926bade6f00a707eaa25c333799 Mon Sep 17 00:00:00 2001
From: ViaFerrata <>
Date: Sun, 6 Jan 2019 12:27:14 +0100
Subject: [PATCH] Fix for parsing of the event_id of random_noise files.

 orcasong/                      | 16 ++++++++++++----
 .../conf_ORCA_115l_1-5GeV_xyz-c.toml          | 13 ++++++++++---
 .../conf_ORCA_115l_1-5GeV_xyz-t.toml          | 13 ++++++++++---
 .../conf_ORCA_115l_3-100GeV_xyz-c.toml        | 13 ++++++++++---
 .../conf_ORCA_115l_3-100GeV_xyz-t.toml        | 13 ++++++++++---
 .../                  | 19 ++++++++-----------
 6 files changed, 60 insertions(+), 27 deletions(-)

diff --git a/orcasong/ b/orcasong/
index 47a6f5c..2044b54 100644
--- a/orcasong/
+++ b/orcasong/
@@ -133,8 +133,14 @@ def get_tracks(event_blob, file_particle_type, event_hits, prod_ident):
         vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex/n_muons, (prod_ident)].
-    # parse EventInfo and Header information
-    event_id = event_blob['EventInfo'].event_id[0]
+    ## parse EventInfo and Header information
+    # km3pipe event_id is the aanet frame_index
+    # for random_noise files, multiple events have the same frame_index, so use the group_id instead
+    if file_particle_type == 'undefined':
+        event_id = event_blob['EventInfo'].group_id[0]
+    else:
+        event_id = event_blob['EventInfo'].event_id[0]
     if 'Header' in event_blob: # if Header exists in file, take run_id from it.
         run_id = event_blob['Header'].start_run.run_id.astype('float32')
@@ -145,11 +151,13 @@ def get_tracks(event_blob, file_particle_type, event_hits, prod_ident):
             raise InputError('The run_id could not be read from the EventInfo or the Header, '
                              'please check the source code in get_tracks().')
-    # collect all event_track information, dependent on file_particle_type
+    ## collect all event_track information, dependent on file_particle_type
     if file_particle_type == 'undefined':
         particle_type = 0
-        track = {'event_id': event_id, 'run_id': run_id, 'particle_type': particle_type}
+        frame_index = event_blob['EventInfo'].event_id[0]
+        track = {'event_id': event_id, 'run_id': run_id, 'particle_type': particle_type, 'frame_index': frame_index}
     elif file_particle_type == 'muon':
         # take index 1, index 0 is the empty neutrino mc_track
diff --git a/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-c.toml b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-c.toml
index 6cb1553..0476b79 100644
--- a/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-c.toml
+++ b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-c.toml
@@ -1,8 +1,11 @@
-# A config file for OrcaSong with multiple configurations.
-# Outcomment the config that you want to use!
+# A config file for OrcaSong with a list of all configuration options.
 # More info about the .toml format at
 ### All available options with some dummy values
+# --o = '/home/woody/capn/mppi033h/orcasong_output'
+# --chunksize = 32
+# --complib = 'zlib'
+# --complevel = '1'
 # --n_bins = '11,13,18,60'
 # --det_geo = 'Orca_115l_23m_h_9m_v'
 # --do2d = false
@@ -32,4 +35,8 @@
 --do4d_mode = 'channel_id'
 --timecut_mode = 'trigger_cluster'
 --timecut_timespan = 'tight-0'
---prod_ident = 2 # only for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod.
\ No newline at end of file
+--prod_ident = 2 # only for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod.
+--o = '/home/woody/capn/mppi033h'
+--chunksize = 32
+--complib = 'zlib'
+--complevel = 1
\ No newline at end of file
diff --git a/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-t.toml b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-t.toml
index 11ca230..de45470 100644
--- a/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-t.toml
+++ b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-t.toml
@@ -1,8 +1,11 @@
-# A config file for OrcaSong with multiple configurations.
-# Outcomment the config that you want to use!
+# A config file for OrcaSong with a list of all configuration options.
 # More info about the .toml format at
 ### All available options with some dummy values
+# --o = '/home/woody/capn/mppi033h/orcasong_output'
+# --chunksize = 32
+# --complib = 'zlib'
+# --complevel = '1'
 # --n_bins = '11,13,18,60'
 # --det_geo = 'Orca_115l_23m_h_9m_v'
 # --do2d = false
@@ -32,4 +35,8 @@
 --do4d_mode = 'time'
 --timecut_mode = 'trigger_cluster'
 --timecut_timespan = 'tight-0'
---prod_ident = 2 # only for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod.
\ No newline at end of file
+--prod_ident = 2 # only for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod.
+--o = '/home/woody/capn/mppi033h'
+--chunksize = 32
+--complib = 'zlib'
+--complevel = 1
\ No newline at end of file
diff --git a/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-c.toml b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-c.toml
index 13f90d0..cbdba56 100644
--- a/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-c.toml
+++ b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-c.toml
@@ -1,8 +1,11 @@
-# A config file for OrcaSong with multiple configurations.
-# Outcomment the config that you want to use!
+# A config file for OrcaSong with a list of all configuration options.
 # More info about the .toml format at
 ### All available options with some dummy values
+# --o = '/home/woody/capn/mppi033h/orcasong_output'
+# --chunksize = 32
+# --complib = 'zlib'
+# --complevel = '1'
 # --n_bins = '11,13,18,60'
 # --det_geo = 'Orca_115l_23m_h_9m_v'
 # --do2d = false
@@ -32,4 +35,8 @@
 --do4d_mode = 'channel_id'
 --timecut_mode = 'trigger_cluster'
 --timecut_timespan = 'tight-0'
---prod_ident = 1 # only for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod.
\ No newline at end of file
+--prod_ident = 1 # only for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod.
+--o = '/home/woody/capn/mppi033h'
+--chunksize = 32
+--complib = 'zlib'
+--complevel = 1
\ No newline at end of file
diff --git a/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-t.toml b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-t.toml
index 129186a..1af7c4d 100644
--- a/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-t.toml
+++ b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-t.toml
@@ -1,8 +1,11 @@
-# A config file for OrcaSong with multiple configurations.
-# Outcomment the config that you want to use!
+# A config file for OrcaSong with a list of all configuration options.
 # More info about the .toml format at
 ### All available options with some dummy values
+# --o = '/home/woody/capn/mppi033h/orcasong_output'
+# --chunksize = 32
+# --complib = 'zlib'
+# --complevel = '1'
 # --n_bins = '11,13,18,60'
 # --det_geo = 'Orca_115l_23m_h_9m_v'
 # --do2d = false
@@ -32,4 +35,8 @@
 --do4d_mode = 'time'
 --timecut_mode = 'trigger_cluster'
 --timecut_timespan = 'tight-0'
---prod_ident = 1 # only for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod.
\ No newline at end of file
+--prod_ident = 1 # only for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod.
+--o = '/home/woody/capn/mppi033h'
+--chunksize = 32
+--complib = 'zlib'
+--complevel = 1
\ No newline at end of file
diff --git a/user/job_submission_scripts/ b/user/job_submission_scripts/
index a4ea863..ce1f1dc 100644
--- a/user/job_submission_scripts/
+++ b/user/job_submission_scripts/
@@ -1,7 +1,7 @@
-#PBS -l nodes=1:ppn=4:sl32g,walltime=15:00:00
-#PBS -o /home/woody/capn/mppi033h/logs/orcasong_submit_data_to_images_${PBS_JOBID}_${PBS_ARRAYID}.out -e /home/woody/capn/mppi033h/logs/orcasong/submit_data_to_images_${PBS_JOBID}_${PBS_ARRAYID}.err
+#PBS -l nodes=1:ppn=4:sl,walltime=15:00:00
+#PBS -o /home/woody/capn/mppi033h/logs/orcasong/submit_data_to_images_${PBS_JOBID}_${PBS_ARRAYID}.out -e /home/woody/capn/mppi033h/logs/orcasong/submit_data_to_images_${PBS_JOBID}_${PBS_ARRAYID}.err
 # first non-empty non-comment line ends PBS options
 # Submit with 'qsub -t 1-10'
@@ -23,14 +23,14 @@ python_env_folder=/home/hpc/capn/mppi033h/.virtualenv/python_3_env/
 # total number of files per job, e.g. 10 jobs for 600: 600/10 = 60. For neutrin
-# For neutrinos and random_noise n=60 with PBS -l nodes=1:ppn=4:sl32g,walltime=3:00:00
-# For mupage: n=1000 with PBS -l nodes=1:ppn=4:sl32g,walltime=15:00:00
+# For neutrinos and random_noise n=60 with PBS -l nodes=1:ppn=4:sl,walltime=3:00:00
+# For mupage: n=1000 with PBS -l nodes=1:ppn=4:sl,walltime=15:00:00
 #--- USER INPUT ---#
@@ -69,14 +69,14 @@ folder="${folder_ip_files_arr[${mc_prod}]}"
 # run
-no_of_loops=$((${files_per_job}/${4})) # divide by 4 cores -> e.g, 15 4-core loops needed for files_per_job=60
+no_of_loops=$((${files_per_job}/4)) # divide by 4 cores -> e.g, 15 4-core loops needed for files_per_job=60
 file_no_start=$((1+((${n}-1) * ${files_per_job}))) # filenumber of the first file that is being processed by this script (depends on JobArray variable 'n')
 # currently only working for 4 cores
 for (( k=1; k<=${no_of_loops}; k++ ))
-    file_no_loop_start=$((${file_no_start}+(k-1)*${4}))
+    file_no_loop_start=$((${file_no_start}+(k-1)*4))
     thread2=$((${file_no_loop_start} + 1))
     thread3=$((${file_no_loop_start} + 2))
@@ -87,7 +87,4 @@ do
     (time taskset -c 2  make_nn_images -c ${config_file} ${folder}/${filename}.${thread3}.h5 ${detx_filepath} > ${job_logs_folder}/${filename}.${thread3}.txt) &
     (time taskset -c 3  make_nn_images -c ${config_file} ${folder}/${filename}.${thread4}.h5 ${detx_filepath} > ${job_logs_folder}/${filename}.${thread4}.txt) &
\ No newline at end of file