diff --git a/orcasong/make_nn_images.py b/orcasong/make_nn_images.py
index 5d7112d6f1d236135263a6ed047431c5ace8d5de..531564a61a0fdf11c3571a8194f5d0a869c39cda 100644
--- a/orcasong/make_nn_images.py
+++ b/orcasong/make_nn_images.py
@@ -477,6 +477,9 @@ def make_nn_images(fname, detx_filepath, config):
     # Execute Pipeline
     pipe.drain()
 
+    if do2d_plots[0] is True:
+        pdf_2d_plots.close()
+
 
 def main():
     """
diff --git a/utilities/__init__.py b/orcasong_contrib/__init__.py
similarity index 100%
rename from utilities/__init__.py
rename to orcasong_contrib/__init__.py
diff --git a/utilities/timecut_test/__init__.py b/orcasong_contrib/data_tools/__init__.py
similarity index 100%
rename from utilities/timecut_test/__init__.py
rename to orcasong_contrib/data_tools/__init__.py
diff --git a/orcasong_contrib/data_tools/concatenate/__init__.py b/orcasong_contrib/data_tools/concatenate/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/orcasong_contrib/data_tools/concatenate/concatenate_h5.py b/orcasong_contrib/data_tools/concatenate/concatenate_h5.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c8f0a7e04010537f116c9ad4e2dcc6c8c27c6e1
--- /dev/null
+++ b/orcasong_contrib/data_tools/concatenate/concatenate_h5.py
@@ -0,0 +1,287 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""Concatenates .h5 files. Works only for files where each dataset has the same number of rows."""
+
+import h5py
+import numpy as np
+import math
+from argparse import ArgumentParser, RawTextHelpFormatter
+import sys
+# from memory_profiler import profile # for memory profiling, call with @profile; myfunc()
+
+__author__ = 'Michael Moser'
+__license__ = 'AGPL'
+__version__ = '1.0'
+__email__ = 'michael.m.moser@fau.de'
+__status__ = 'Production'
+
+
+def parse_input():
+    """
+    Parses the user input in order to return the most important information:
+
+    1) list of files that should be concatenated
+    2) the filepath of the output .h5 file
+    3) use custom chunksize or not.
+
+    Returns
+    -------
+    file_list : list
+        List that contains all filepaths of the input files.
+    output_filepath : str
+        String that specifies the filepath (path+name) of the output .h5 file.
+    chunksize : None/int
+        Specifies the chunksize for axis_0 in the concatenated output files.
+        If None, the chunksize is read from the first input file.
+        Else, a custom chunksize will be used.
+    complib : None/str
+        Specifies the compression library that should be used for saving the concatenated output files.
+        If None, the compression library is read from the first input file.
+        Else, a custom compression library will be used.
+        Currently available: 'gzip', or 'lzf'.
+    complevel : None/int
+        Specifies the compression level that should be used for saving the concatenated output files.
+        A compression level is only available for gzip compression, not lzf!
+        If None, the compression level is read from the first input file.
+        Else, a custom compression level will be used.
+
+    """
+    parser = ArgumentParser(description='E.g. < python concatenate_h5.py file_1 file_2 /path/to/output.h5 > or '
+                                        '< python concatenate_h5.py --list filepaths.txt /path/to/output.h5 >.\n'
+                                        'Concatenates arrays stored in .h5 files for either multiple direct .h5 inputs or a .txt file of .h5 files (--list option).\n'
+                                        'Outputs a new .h5 file with the concatenated arrays. This output is chunked!\n'
+                                        'Careful: The folders of one file need to have the same number of rows (axis_0)!\n'
+                                        'Make a .txt file with < find /path/to/files -name "file_x-*.h5" | sort --version-sort > listname.list >\n'
+                                        'Chunksize: By default, the chunksize is set to the chunksize of the first inputfile!',
+                            formatter_class=RawTextHelpFormatter)
+
+    parser.add_argument('files', metavar='file', type=str, nargs='*', help = 'a file that should be concatenated, minimum of two.')
+    parser.add_argument('output_filepath', metavar='output_filepath', type=str, nargs=1, help='filepath and name of the output .h5 file')
+    parser.add_argument('-l', '--list', dest='list_file', type=str,
+                        help='filepath of a .list file that contains all .h5 files that should be concatenated')
+    parser.add_argument('--chunksize', dest='chunksize', type=int,
+                        help='Specify a chunksize value in order to use chunked storage for the concatenated .h5 file.'
+                             ' Otherwise, it will be read from the first input file..')
+    parser.add_argument('--complib', dest='complib', type=str,
+                        help='Specify a filter that should be used for compression. Either "gzip" or "lzf". '
+                             'Otherwise, the filter will be read from the first input file.')
+    parser.add_argument('--complevel', dest='complevel', type=int,
+                        help='Specify a compression filter strength that should be used for the compression. '
+                             'Otherwise, the filter will be read from the first input file. '
+                             'Can range from 0 to 9. Has no effect on "lzf" compression.')
+
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+
+    args = parser.parse_args()
+
+    if args.list_file:
+        file_list = [line.rstrip('\n') for line in open(args.list_file)]
+    else:
+        file_list = []
+        for filepath in args.files:
+            file_list.append(filepath)
+
+    output_filepath = args.output_filepath[0]
+
+    chunksize = None
+    if args.chunksize:
+        chunksize = args.chunksize
+        print('You chose chunksize = ' + str(chunksize))
+
+    complib = None
+    if args.complib:
+        complib = args.complib
+        print('You chose complib = ' + complib)
+
+    complevel = None
+    if args.complevel:
+        complevel = args.complevel
+        print('You chose complevel = ' + str(complevel))
+
+    return file_list, output_filepath, chunksize, complib, complevel
+
+
+def get_cum_number_of_rows(file_list):
+    """
+    Returns the cumulative number of rows (axis_0) in a list based on the specified input .h5 files.
+
+    Parameters
+    ----------
+    file_list : list
+        List that contains all filepaths of the input files.
+
+    Returns
+    -------
+    cum_number_of_rows_list : list
+        List that contains the cumulative number of rows (i.e. [0,100,200,300,...] if each file has 100 rows).
+
+    """
+    total_number_of_rows = 0
+    cum_number_of_rows_list = [0]
+    number_of_rows_list = []  # used for approximating the chunksize
+
+    # Get total number of rows for the files in the list, faster than resizing the dataset in each iteration of the file loop in concatenate_h5_files()
+
+    for file_name in file_list:
+        f = h5py.File(file_name, 'r')
+
+        # get number of rows from the first folder of the file -> each folder needs to have the same number of rows
+        f_keys = list(f.keys())
+        # remove pytables folders starting with '_i_', because the shape of its first axis does not correspond to the number of events in the file.
+        # all other folders normally have an axis_0 shape that is equal to the number of events in the file.
+        f_keys_stripped = [x for x in f_keys if '_i_' not in x]
+
+        total_number_of_rows += f[f_keys_stripped[0]].shape[0]
+        cum_number_of_rows_list.append(total_number_of_rows)
+        number_of_rows_list.append(f[f_keys_stripped[0]].shape[0])
+
+        f.close()
+
+    return cum_number_of_rows_list
+
+
+def get_f_compression_and_chunking(filepath):
+    """
+    Function that gets the used compression library, the compression level (if applicable)
+    and the chunksize of axis_0 of the first dataset of the file.
+
+    Parameters
+    ----------
+    filepath : str
+        Filepath of a .hdf5 file.
+
+    Returns
+    -------
+    compression : str
+        The compression library that has been identified in the input file. E.g. 'gzip', or 'lzf'.
+    complevel : int
+        The compression level that has been identified in the input file.
+    chunksize : None/int
+        The chunksize of axis_0 that has been indentified in the input file.
+
+    """
+    f = h5py.File(filepath, 'r')
+
+    # remove any keys to pytables folders that may be in the file
+    f_keys_stripped = [x for x in list(f.keys()) if '_i_' not in x]
+
+    compression = f[f_keys_stripped[0]].compression  # compression filter
+    compression_opts = f[f_keys_stripped[0]].compression_opts  # filter strength
+    chunksize = f[f_keys_stripped[0]].chunks[0]  # chunksize along axis_0 of the dataset
+
+    return compression, compression_opts, chunksize
+
+
+def concatenate_h5_files(output_filepath, file_list, cum_rows_list, chunksize, complib, complevel):
+    """
+    Function that concatenates hdf5 files based on an output_filepath and a file_list of input files.
+
+    If the files contain group_info and x_indices folders (if the input files are coming from km3pipe output),
+    the group-id / the index of the x_indices is fixed in order to not get duplicates of group-ids / x-indices.
+
+    Parameters
+    ----------
+    output_filepath : str
+        String that specifies the filepath (path+name) of the output .h5 file.
+    file_list : list
+        List that contains all filepaths of the input files.
+    cum_rows_list : list
+        List that contains the cumulative number of rows (i.e. [0,100,200,300,...] if each file has 100 rows).
+    chunksize : None/int
+        Specifies the chunksize for axis_0 in the concatenated output files.
+        If None, the chunksize is read from the first input file.
+        Else, a custom chunksize will be used.
+    complib : None/str
+        Specifies the compression library that should be used for saving the concatenated output files.
+        If None, the compression library is read from the first input file.
+        Else, a custom compression library will be used.
+        Currently available: 'gzip', or 'lzf'.
+    complevel : None/int
+        Specifies the compression level that should be used for saving the concatenated output files.
+        A compression level is only available for gzip compression, not lzf!
+        If None, the compression level is read from the first input file.
+        Else, a custom compression level will be used.
+
+    """
+    complib_f, complevel_f, chunksize_f = get_f_compression_and_chunking(file_list[0])
+
+    chunksize = chunksize_f if chunksize is None else chunksize
+    complib = complib_f if complib is None else complib
+    complevel = complevel_f if complevel is None else complevel
+
+    if complib == 'lzf':
+        complevel = None
+
+    file_output = h5py.File(output_filepath, 'w')
+
+    for n, input_file_name in enumerate(file_list):
+        print('Processing file ' + file_list[n])
+        input_file = h5py.File(input_file_name, 'r')
+
+        # create metadata
+        if 'format_version' in list(input_file.attrs.keys()) and n == 0:
+            file_output.attrs['format_version'] = input_file.attrs['format_version']
+
+        for folder_name in input_file:
+
+            if folder_name.startswith('_i_'):
+                # we ignore datasets that have been created by pytables, don't need them anymore
+                continue
+
+            if n > 0 and folder_name in ['group_info', 'x_indices', 'y']:
+                folder_data = input_file[folder_name][()]
+                # we need to add the current number of the group_id / index in the file_output
+                # to the group_ids / indices of the file that is to be appended
+                column_name = 'group_id' if folder_name in ['group_info', 'y'] else 'index'
+                # add 1 because the group_ids / indices start with 0
+                folder_data[column_name] += np.amax(file_output[folder_name][column_name]) + 1
+
+            else:
+                folder_data = input_file[folder_name]
+
+            print('Shape and dtype of dataset ' + folder_name + ': ' + str(folder_data.shape) + ' ; ' + str(folder_data.dtype))
+
+            if n == 0:
+                # first file; create the dummy dataset with no max shape
+                maxshape = (None,) + folder_data.shape[1:]  # change shape of axis zero to None
+                chunks = (chunksize,) + folder_data.shape[1:]
+
+                output_dataset = file_output.create_dataset(folder_name, data=folder_data, maxshape=maxshape, chunks=chunks,
+                                                            compression=complib, compression_opts=complevel)
+
+                output_dataset.resize(cum_rows_list[-1], axis=0)
+
+            else:
+                file_output[folder_name][cum_rows_list[n]:cum_rows_list[n + 1]] = folder_data
+
+        file_output.flush()
+
+    print('Output information:')
+    print('-------------------')
+    print('The output file contains the following datasets:')
+    for folder_name in file_output:
+        print('Dataset ' + folder_name + ' with the following shape, dtype and chunks (first argument'
+              ' is the chunksize in axis_0): \n' + str(file_output[folder_name].shape) + ' ; ' +
+              str(file_output[folder_name].dtype) + ' ; ' + str(file_output[folder_name].chunks))
+
+    file_output.close()
+
+
+def main():
+    """
+    Main code. Concatenates .h5 files with multiple datasets, where each dataset in one file needs to have the same number of rows (axis_0).
+
+    Gets user input with aid of the parse_input() function. By default, the chunksize for the output .h5 file is automatically computed.
+    based on the average number of rows per file, in order to eliminate padding (wastes disk space).
+    For faster I/O, the chunksize should be set by the user depending on the use case.
+    In deep learning applications for example, the chunksize should be equal to the batch size that is used later on for reading the data.
+    """
+    file_list, output_filepath, chunksize, complib, complevel = parse_input()
+    cum_rows_list = get_cum_number_of_rows(file_list)
+    concatenate_h5_files(output_filepath, file_list, cum_rows_list, chunksize, complib, complevel)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/orcasong_contrib/data_tools/make_data_split/__init__.py b/orcasong_contrib/data_tools/make_data_split/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/orcasong_contrib/data_tools/make_data_split/configs/config_bg_classifier_xyz-c_tight-0_100b.toml b/orcasong_contrib/data_tools/make_data_split/configs/config_bg_classifier_xyz-c_tight-0_100b.toml
new file mode 100644
index 0000000000000000000000000000000000000000..89e5c9e563e975eae5b2bc8fdc4cb27c97eaeb13
--- /dev/null
+++ b/orcasong_contrib/data_tools/make_data_split/configs/config_bg_classifier_xyz-c_tight-0_100b.toml
@@ -0,0 +1,141 @@
+# Example configuration file for make_data_split.py
+
+# --- Documentation for every config parameter that is available --- #
+#
+#    Main Parameters
+#    ----------
+#    n_files_train : int
+#       Into how many files the training dataset should be split.
+#       If you don't want to have this dataset, comment out the line or delete it!
+#    n_files_validate : int
+#       Into how many files the validation dataset should be split.
+#       If you don't want to have this dataset, comment out the line or delete it!
+#    n_files_rest : int
+#       Into how many files the "rest" dataset should be split.
+#       If you don't want to have this dataset, comment out the line or delete it!
+#    output_file_folder : str
+#       Path to the folder, where all the output .list files (and the bash job scripts) should be stored.
+#    output_file_name : str
+#       String, that specifies the prefix of the filename of the output .list files.
+#       E.g. if = "xyzc_tight_0":
+#       xyzc_tight_0_train_0.list, xyzc_tight_0_validate_0.list, ...
+#    print_only : bool
+#       If only informationa about the input_groups should be printed, and no .list files should be made.
+#
+#    Job submission Parameters
+#    -------------------------
+#    make_qsub_bash_files : bool
+#       If bash files should be made, that can be submitted to a cluster, in order to actually concatenate
+#       the files in the .list files.
+#    submit_jobs : bool
+#       Additionally to make_qsub_bash_files, submit the bash job scripts to the cluster after they have been made.
+#       CAREFUL: May only work for Erlangen-CC.
+#    venv_path : str
+#       Path to a virtualenv, e.g. "/home/hpc/capn/mppi033h/.virtualenv/python_3_env/"
+#    data_tools_folder : str
+#       Dirpath, where the concatenate.py tool is located.
+#       E.g. "/home/woody/capn/mppi033h/Code/OrcaNet/orcanet_contrib/data_tools"
+#    chunksize : int
+#       Chunksize parameter, that is used when calling concatenate.py
+#    complib : str
+#       Complib parameter, that is used when calling concatenate.py
+#    complevel : int
+#       Complevel parameter, that is used when calling concatenate.py
+#    shuffle_delete : bool
+#       Option for the shuffle_h5 tool, specifies if the input file that will be shuffled should be
+#       deleted after the shuffling is finished.
+#
+#    Input Group Parameters
+#    ----------------------
+#    dir : str
+#       Path of the directory, where the files for this input group are located.
+#    run_ids_train/run_ids_validate/run_ids_rest : array
+#       Array, which specifies the range of the run_ids, that should be used for the training/validation.rest
+#       dataset of this input group.
+#       E.g. if [1,5], the script will put files from this input group with run_ids from 1 to 5 (including 1 and 5)
+#       to the training/validation/rest dataset.
+#       If you don't want to use a specific dataset for this input group, comment out the line or delete it!
+#
+# --- Documentation for every config parameter that is available --- #
+
+# --- Main options ---#
+
+n_files_train = 29
+n_files_validate = 13
+n_files_rest = 1
+output_file_folder = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/data_splits/xyzc"
+output_file_name = "xyzc_tight_0_100b_bg_classifier_dataset"
+print_only = false # only print information of your input_groups, don't make any .list files
+
+# --- Main options ---#
+
+
+# --- Options, for submitting jobs to concatenate the .list files. --- #
+
+make_qsub_bash_files = true
+submit_jobs = false
+venv_path = "/home/hpc/capn/mppi033h/.virtualenv/python_3_env"
+data_tools_folder = "/home/woody/capn/mppi033h/Code/OrcaNet/orcanet_contrib/data_tools"
+chunksize = 32
+complib = "gzip"
+complevel = 1
+shuffle_delete = false
+
+# --- Options, for submitting jobs to concatenate the .list files. --- #
+
+# --- Input groups : these are the datafiles, that should be concatenated somehow --- #
+[mupage] # 1 to 20000
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/mupage/xyzc"
+run_ids_train = [15618, 20000]
+run_ids_validate = [13741, 15617]
+run_ids_rest = [1, 13740]
+
+
+[random_noise] # 1 to 1500
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/random_noise/xyzc"
+run_ids_train = [576, 1500]
+run_ids_validate = [186, 575]
+run_ids_rest = [1, 185]
+
+
+[muon_cc_3_100] # 1 to 2400
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/muon-CC/3-100GeV/xyzc"
+run_ids_train = [721, 2400]
+run_ids_validate = [1, 720]
+
+
+[muon_cc_1_5] # 1 to 600
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/muon-CC/1-5GeV/xyzc"
+run_ids_train = [181, 600]
+run_ids_validate = [1, 180]
+
+
+[elec_cc_3_100] # 1 to 2400
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/elec-CC/3-100GeV/xyzc"
+run_ids_train = [361, 1200]
+run_ids_validate = [1, 360]
+
+
+[elec_cc_1_5] # 1 to 600
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/elec-CC/1-5GeV/xyzc"
+run_ids_train = [181, 600]
+run_ids_validate = [1, 180]
+
+
+[elec_nc_3_100] # 1 to 2400
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/elec-NC/3-100GeV/xyzc"
+run_ids_train = [361, 1200]
+run_ids_validate = [1, 360]
+
+
+[elec_nc_1_5] # 1 to 600
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/elec-NC/1-5GeV/xyzc"
+run_ids_train = [181, 600]
+run_ids_validate = [1, 180]
+
+
+[tau_cc_3_100] # 1 to 1800
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/tau-CC/3-100GeV/xyzc"
+run_ids_rest = [1, 1800]
+
+# --- Input groups : these are the datafiles, that should be concatenated somehow --- #
\ No newline at end of file
diff --git a/orcasong_contrib/data_tools/make_data_split/configs/config_bg_classifier_xyz-t_tight-0_100b.toml b/orcasong_contrib/data_tools/make_data_split/configs/config_bg_classifier_xyz-t_tight-0_100b.toml
new file mode 100644
index 0000000000000000000000000000000000000000..072093e7323195ffcf5e1e859f136fce8d95f2d5
--- /dev/null
+++ b/orcasong_contrib/data_tools/make_data_split/configs/config_bg_classifier_xyz-t_tight-0_100b.toml
@@ -0,0 +1,141 @@
+# Example configuration file for make_data_split.py
+
+# --- Documentation for every config parameter that is available --- #
+#
+#    Main Parameters
+#    ----------
+#    n_files_train : int
+#       Into how many files the training dataset should be split.
+#       If you don't want to have this dataset, comment out the line or delete it!
+#    n_files_validate : int
+#       Into how many files the validation dataset should be split.
+#       If you don't want to have this dataset, comment out the line or delete it!
+#    n_files_rest : int
+#       Into how many files the "rest" dataset should be split.
+#       If you don't want to have this dataset, comment out the line or delete it!
+#    output_file_folder : str
+#       Path to the folder, where all the output .list files (and the bash job scripts) should be stored.
+#    output_file_name : str
+#       String, that specifies the prefix of the filename of the output .list files.
+#       E.g. if = "xyzc_tight_0":
+#       xyzc_tight_0_train_0.list, xyzc_tight_0_validate_0.list, ...
+#    print_only : bool
+#       If only informationa about the input_groups should be printed, and no .list files should be made.
+#
+#    Job submission Parameters
+#    -------------------------
+#    make_qsub_bash_files : bool
+#       If bash files should be made, that can be submitted to a cluster, in order to actually concatenate
+#       the files in the .list files.
+#    submit_jobs : bool
+#       Additionally to make_qsub_bash_files, submit the bash job scripts to the cluster after they have been made.
+#       CAREFUL: May only work for Erlangen-CC.
+#    venv_path : str
+#       Path to a virtualenv, e.g. "/home/hpc/capn/mppi033h/.virtualenv/python_3_env/"
+#    data_tools_folder : str
+#       Dirpath, where the concatenate.py tool is located.
+#       E.g. "/home/woody/capn/mppi033h/Code/OrcaNet/orcanet_contrib/data_tools"
+#    chunksize : int
+#       Chunksize parameter, that is used when calling concatenate.py
+#    complib : str
+#       Complib parameter, that is used when calling concatenate.py
+#    complevel : int
+#       Complevel parameter, that is used when calling concatenate.py
+#    shuffle_delete : bool
+#       Option for the shuffle_h5 tool, specifies if the input file that will be shuffled should be
+#       deleted after the shuffling is finished.
+#
+#    Input Group Parameters
+#    ----------------------
+#    dir : str
+#       Path of the directory, where the files for this input group are located.
+#    run_ids_train/run_ids_validate/run_ids_rest : array
+#       Array, which specifies the range of the run_ids, that should be used for the training/validation.rest
+#       dataset of this input group.
+#       E.g. if [1,5], the script will put files from this input group with run_ids from 1 to 5 (including 1 and 5)
+#       to the training/validation/rest dataset.
+#       If you don't want to use a specific dataset for this input group, comment out the line or delete it!
+#
+# --- Documentation for every config parameter that is available --- #
+
+# --- Main options ---#
+
+n_files_train = 29
+n_files_validate = 13
+n_files_rest = 1
+output_file_folder = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/data_splits/xyzt"
+output_file_name = "xyzt_tight_0_100b_bg_classifier_dataset"
+print_only = false # only print information of your input_groups, don't make any .list files
+
+# --- Main options ---#
+
+
+# --- Options, for submitting jobs to concatenate the .list files. --- #
+
+make_qsub_bash_files = true
+submit_jobs = false
+venv_path = "/home/hpc/capn/mppi033h/.virtualenv/python_3_env"
+data_tools_folder = "/home/woody/capn/mppi033h/Code/OrcaNet/orcanet_contrib/data_tools"
+chunksize = 32
+complib = "gzip"
+complevel = 1
+shuffle_delete = false
+
+# --- Options, for submitting jobs to concatenate the .list files. --- #
+
+# --- Input groups : these are the datafiles, that should be concatenated somehow --- #
+[mupage] # 1 to 20000
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/mupage/xyzt"
+run_ids_train = [15618, 20000]
+run_ids_validate = [13741, 15617]
+run_ids_rest = [1, 13740]
+
+
+[random_noise] # 1 to 1500
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/random_noise/xyzt"
+run_ids_train = [576, 1500]
+run_ids_validate = [186, 575]
+run_ids_rest = [1, 185]
+
+
+[muon_cc_3_100] # 1 to 2400
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/muon-CC/3-100GeV/xyzt"
+run_ids_train = [721, 2400]
+run_ids_validate = [1, 720]
+
+
+[muon_cc_1_5] # 1 to 600
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/muon-CC/1-5GeV/xyzt"
+run_ids_train = [181, 600]
+run_ids_validate = [1, 180]
+
+
+[elec_cc_3_100] # 1 to 2400
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/elec-CC/3-100GeV/xyzt"
+run_ids_train = [361, 1200]
+run_ids_validate = [1, 360]
+
+
+[elec_cc_1_5] # 1 to 600
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/elec-CC/1-5GeV/xyzt"
+run_ids_train = [181, 600]
+run_ids_validate = [1, 180]
+
+
+[elec_nc_3_100] # 1 to 2400
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/elec-NC/3-100GeV/xyzt"
+run_ids_train = [361, 1200]
+run_ids_validate = [1, 360]
+
+
+[elec_nc_1_5] # 1 to 600
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/elec-NC/1-5GeV/xyzt"
+run_ids_train = [181, 600]
+run_ids_validate = [1, 180]
+
+
+[tau_cc_3_100] # 1 to 1800
+dir = "/home/saturn/capn/mppi033h/Data/input_images/ORCA_2016_115l/tight_0_100b_t_bg_classifier/tau-CC/3-100GeV/xyzt"
+run_ids_rest = [1, 1800]
+
+# --- Input groups : these are the datafiles, that should be concatenated somehow --- #
\ No newline at end of file
diff --git a/orcasong_contrib/data_tools/make_data_split/example_config.toml b/orcasong_contrib/data_tools/make_data_split/example_config.toml
new file mode 100644
index 0000000000000000000000000000000000000000..cded7422b15b6354878da9defe688fa6935c3184
--- /dev/null
+++ b/orcasong_contrib/data_tools/make_data_split/example_config.toml
@@ -0,0 +1,108 @@
+# Example configuration file for make_data_split.py
+
+# --- Documentation for every config parameter that is available --- #
+#
+#    Main Parameters
+#    ----------
+#    n_files_train : int
+#       Into how many files the training dataset should be split.
+#       If you don't want to have this dataset, comment out the line or delete it!
+#    n_files_validate : int
+#       Into how many files the validation dataset should be split.
+#       If you don't want to have this dataset, comment out the line or delete it!
+#    n_files_rest : int
+#       Into how many files the "rest" dataset should be split.
+#       If you don't want to have this dataset, comment out the line or delete it!
+#    output_file_folder : str
+#       Path to the folder, where all the output .list files (and the bash job scripts) should be stored.
+#    output_file_name : str
+#       String, that specifies the prefix of the filename of the output .list files.
+#       E.g. if = "xyzc_tight_0":
+#       xyzc_tight_0_train_0.list, xyzc_tight_0_validate_0.list, ...
+#    print_only : bool
+#       If only informationa about the input_groups should be printed, and no .list files should be made.
+#
+#    Job submission Parameters
+#    -------------------------
+#    make_qsub_bash_files : bool
+#       If bash files should be made, that can be submitted to a cluster, in order to actually concatenate
+#       the files in the .list files.
+#    submit_jobs : bool
+#       Additionally to make_qsub_bash_files, submit the bash job scripts to the cluster after they have been made.
+#       CAREFUL: May only work for Erlangen-CC.
+#    venv_path : str
+#       Path to a virtualenv, e.g. "/home/hpc/capn/mppi033h/.virtualenv/python_3_env/"
+#    data_tools_folder : str
+#       Dirpath, where the concatenate.py tool is located.
+#       E.g. "/home/woody/capn/mppi033h/Code/OrcaNet/orcanet_contrib/data_tools"
+#    chunksize : int
+#       Chunksize parameter, that is used when calling concatenate.py
+#    complib : str
+#       Complib parameter, that is used when calling concatenate.py
+#    complevel : int
+#       Complevel parameter, that is used when calling concatenate.py
+#    shuffle_delete : bool
+#       Option for the shuffle_h5 tool, specifies if the input file that will be shuffled should be
+#       deleted after the shuffling is finished.
+#
+#    Input Group Parameters
+#    ----------------------
+#    dir : str
+#       Path of the directory, where the files for this input group are located.
+#    run_ids_train/run_ids_validate/run_ids_rest : array
+#       Array, which specifies the range of the run_ids, that should be used for the training/validation.rest
+#       dataset of this input group.
+#       E.g. if [1,5], the script will put files from this input group with run_ids from 1 to 5 (including 1 and 5)
+#       to the training/validation/rest dataset.
+#       If you don't want to use a specific dataset for this input group, comment out the line or delete it!
+#
+# --- Documentation for every config parameter that is available --- #
+
+# --- Main options ---#
+
+n_files_train = 5
+n_files_validate = 3
+n_files_rest = 1
+output_file_folder = "/home/woody/capn/mppi033h/make_dsplit_test"
+output_file_name = "xyzc_tight_0"
+print_only = false # only print information of your input_groups, don't make any .list files
+
+# --- Main options ---#
+
+
+# --- Options, for submitting jobs to concatenate the .list files. --- #
+
+make_qsub_bash_files = true
+submit_jobs = false
+venv_path = "/home/hpc/capn/mppi033h/.virtualenv/python_3_env"
+data_tools_folder = "/home/woody/capn/mppi033h/Code/OrcaNet/orcanet_contrib/data_tools"
+chunksize = 32
+complib = "gzip"
+complevel = 1
+shuffle_delete = false
+
+# --- Options, for submitting jobs to concatenate the .list files. --- #
+
+# --- Input groups : these are the datafiles, that should be concatenated somehow --- #
+
+[input_group_1] # You can assign any name to this, doesnt matter which one. However, don't make whitespaces!!
+dir = "/path/to/the/folder/of/the/data/for/this/input_1/group"
+run_ids_train = [1001, 5000]
+run_ids_validate = [1, 1000]
+run_ids_rest = [5001, 20000]
+
+
+[input_group_2] # 1 to 1500
+dir = "/path/to/the/folder/of/the/data/for/this/input_2/group"
+run_ids_train = [101, 500]
+run_ids_validate = [1, 100]
+#run_ids_rest = [501, 600]
+
+
+[input_group_3] # 1 to 2400
+dir = "/path/to/the/folder/of/the/data/for/this/input_3/group"
+run_ids_train = [601, 2400]
+#run_ids_validate = [1, 500] # comment out or delete it, if you dont want it
+run_ids_rest = [501, 600]
+
+# --- Input groups : these are the datafiles, that should be concatenated somehow --- #
\ No newline at end of file
diff --git a/orcasong_contrib/data_tools/make_data_split/make_data_split.py b/orcasong_contrib/data_tools/make_data_split/make_data_split.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ec7b4ba92f1dcb0c22adf71862bd7fc72b11b6c
--- /dev/null
+++ b/orcasong_contrib/data_tools/make_data_split/make_data_split.py
@@ -0,0 +1,382 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Utility script that makes .list files for the concatenate_h5.py tool.
+
+Usage:
+    make_data_split.py CONFIG
+    make_data_split.py (-h | --help)
+
+Arguments:
+    CONFIG  A .toml file which contains the configuration options.
+
+Options:
+    -h --help  Show this screen.
+
+"""
+
+import os
+import toml
+import docopt
+import natsort as ns
+import h5py
+
+
+def parse_input():
+    """
+    Parses the config of the .toml file, specified by the user.
+
+    Returns
+    -------
+    cfg : dict
+        Dict that contains all configuration options from the input .toml file.
+
+    """
+
+    args = docopt.docopt(__doc__)
+    config_file = args['CONFIG']
+
+    cfg = toml.load(config_file)
+    cfg['toml_filename'] = config_file
+
+    return cfg
+
+
+def get_all_ip_group_keys(cfg):
+    """
+    Gets the keys of all input groups in the config dict.
+
+    The input groups are defined as the dict elements, where the values have the type of a dict.
+
+    Parameters
+    ----------
+    cfg : dict
+        Dict that contains all configuration options and additional information.
+
+    Returns
+    -------
+    ip_group_keys : list
+        List of the input_group keys.
+
+    """
+    ip_group_keys = []
+    for key in cfg:
+        if type(cfg[key]) == dict:
+            ip_group_keys.append(key)
+
+    return ip_group_keys
+
+
+def get_h5_filepaths(dirpath):
+    """
+    Returns the filepaths of all .h5 files that are located in a specific directory.
+
+    Parameters
+    ----------
+    dirpath: str
+        Path of the directory where the .h5 files are located.
+
+    Returns
+    -------
+    filepaths : list
+        List with the full filepaths of all .h5 files in the dirpath folder.
+
+    """
+    filepaths = []
+    for f in os.listdir(dirpath):
+        if f.endswith('.h5'):
+            filepaths.append(dirpath + '/' + f)
+
+    filepaths = ns.natsorted(filepaths)  # TODO should not be necessary actually!
+    return filepaths
+
+
+def get_number_of_evts_and_run_ids(list_of_files, dataset_key='y', run_id_col_name='run_id'):
+    """
+    Gets the number of events and the run_ids for all hdf5 files in the list_of_files.
+
+    The number of events is calculated based on the dataset, which is specified with the dataset_key parameter.
+
+    Parameters
+    ----------
+    list_of_files : list
+        List which contains filepaths to h5 files.
+    dataset_key : str
+        String which specifies, which dataset in a h5 file should be used for calculating the number of events.
+    run_id_col_name : str
+        String, which specifies the column name of the 'run_id' column.
+
+    Returns
+    -------
+    total_number_of_evts : int
+        The cumulative (total) number of events.
+    mean_number_of_evts_per_file : float
+        The mean number of evts per file.
+    run_ids : list
+        List containing the run_ids of the files in the list_of_files.
+
+    """
+
+    total_number_of_evts = 0
+    run_ids = []
+
+    for i, fpath in enumerate(list_of_files):
+        f = h5py.File(fpath, 'r')
+
+        dset = f[dataset_key]
+        n_evts = dset.shape[0]
+        total_number_of_evts += n_evts
+
+        run_id = f[dataset_key][0][run_id_col_name]
+        run_ids.append(run_id)
+
+        f.close()
+
+    mean_number_of_evts_per_file = total_number_of_evts / len(list_of_files)
+
+    return total_number_of_evts, mean_number_of_evts_per_file, run_ids
+
+
+def split(a, n):
+    """
+    Splits a list into n equal sized (if possible! if not, approximately) chunks.
+
+    Parameters
+    ----------
+    a : list
+        A list that should be split.
+    n : int
+        Number of times the input list should be split.
+
+    Returns
+    -------
+    a_split : list
+        The input list a, which has been split into n chunks.
+
+    """
+    # from https://stackoverflow.com/questions/2130016/splitting-a-list-into-n-parts-of-approximately-equal-length
+    k, m = divmod(len(a), n)
+    a_split = list((a[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)))
+    return a_split
+
+
+def print_input_statistics(cfg, ip_group_keys):
+    """
+    Prints some useful information for each input_group.
+
+    Parameters
+    ----------
+    cfg : dict
+        Dict that contains all configuration options and additional information.
+    ip_group_keys : list
+        List of the input_group keys.
+
+    """
+
+    print('----------------------------------------------------------------------')
+    print('Printing input statistics for your ' + cfg['toml_filename'] + ' input:')
+    print('----------------------------------------------------------------------')
+
+    print('Your input .toml file has the following data input groups: ' + str(ip_group_keys))
+    print('Total number of events: ' + str(cfg['n_evts_total']))
+
+    for key in ip_group_keys:
+        print('--------------------------------------------------------------------')
+        print('Info for group ' + key + ':')
+        print('Directory: ' + cfg[key]['dir'])
+        print('Total number of files: ' + str(cfg[key]['n_files']))
+        print('Total number of events: ' + str(cfg[key]['n_evts']))
+        print('Mean number of events per file: ' + str(round(cfg[key]['n_evts_per_file_mean'], 3)))
+        print('--------------------------------------------------------------------')
+
+
+def add_fpaths_for_data_split_to_cfg(cfg, key):
+    """
+    Adds all the filepaths for the output files into a list, and puts them into the cfg['output_dsplit'][key] location
+    for all dsplits (train, validate, rest).
+
+    Parameters
+    ----------
+    cfg : dict
+        Dict that contains all configuration options and additional information.
+    key : str
+        The key of an input_group.
+
+    """
+
+    fpath_lists = {'train': [], 'validate': [], 'rest': []}
+    for i, fpath in enumerate(cfg[key]['fpaths']):
+
+        run_id = cfg[key]['run_ids'][i]
+
+        for dsplit in ['train', 'validate', 'rest']:
+            if 'run_ids_' + dsplit in cfg[key]:
+                if cfg[key]['run_ids_' + dsplit][0] <= run_id <= cfg[key]['run_ids_' + dsplit][1]:
+                    fpath_lists[dsplit].append(fpath)
+
+    for dsplit in ['train', 'validate', 'rest']:
+        if len(fpath_lists[dsplit]) == 0:
+            continue
+
+        n_files_dsplit = cfg['n_files_' + dsplit]
+        fpath_lists[dsplit] = split(fpath_lists[dsplit], n_files_dsplit)
+        if 'output_' + dsplit not in cfg:
+            cfg['output_' + dsplit] = dict()
+        cfg['output_' + dsplit][key] = fpath_lists[dsplit]
+
+
+def make_dsplit_list_files(cfg):
+    """
+    Writes .list files of the datasplits to the disk, with the information in the cfg['output_dsplit'] dict.
+
+    Parameters
+    ----------
+    cfg : dict
+        Dict that contains all configuration options and additional information.
+
+    """
+    # check if //conc_list_files folder exists, if not create it.
+    if not os.path.exists(cfg['output_file_folder'] + '/conc_list_files'):
+        os.makedirs(cfg['output_file_folder'] + '/conc_list_files')
+
+    for dsplit in ['train', 'validate', 'rest']:
+
+        if 'output_' + dsplit not in cfg:
+            continue
+
+        first_key = list(cfg['output_' + dsplit].keys())[0]
+        n_output_files = len(cfg['output_' + dsplit][first_key])
+
+        for i in range(n_output_files):
+            fpath_output = cfg['output_file_folder'] + '/conc_list_files/' + cfg['output_file_name'] + '_' + dsplit + '_' + str(i) + '.list'
+
+            # for later usage
+            if 'output_lists' not in cfg:
+                cfg['output_lists'] = list()
+            cfg['output_lists'].append(fpath_output)
+
+            with open(fpath_output, 'w') as f_out:
+                for group_key in cfg['output_' + dsplit]:
+                    for fpath in cfg['output_' + dsplit][group_key][i]:
+                        f_out.write(fpath + '\n')
+
+
+def make_concatenate_and_shuffle_list_files(cfg):
+    """
+    Function that writes qsub .sh files which concatenates all files inside the .list files.
+
+    Parameters
+    ----------
+    cfg : dict
+        Dict that contains all configuration options and additional information.
+
+    """
+    # TODO include options for multicore
+
+    dirpath = cfg['output_file_folder']
+
+    if not os.path.exists(dirpath + '/logs'):  # check if /logs folder exists, if not create it.
+        os.makedirs(dirpath + '/logs')
+    if not os.path.exists(dirpath + '/job_scripts'):  # check if /job_scripts folder exists, if not create it.
+        os.makedirs(dirpath + '/job_scripts')
+    if not os.path.exists(dirpath + '/data_split'):  # check if /data_split folder exists, if not create it.
+        os.makedirs(dirpath + '/data_split')
+
+    # make qsub .sh file for concatenating
+    for listfile_fpath in cfg['output_lists']:
+        listfile_fname = os.path.basename(listfile_fpath)
+        listfile_fname_wout_ext = os.path.splitext(listfile_fname)[0]
+        conc_outputfile_fpath = cfg['output_file_folder'] + '/data_split/' + listfile_fname_wout_ext + '.h5'
+
+        fpath_bash_script = dirpath + '/job_scripts/submit_concatenate_h5_' + listfile_fname_wout_ext + '.sh'
+
+        with open(fpath_bash_script, 'w') as f:
+            f.write('#!/usr/bin/env bash\n')
+            f.write('#\n')
+            f.write('#PBS -o ' + cfg['output_file_folder'] + '/logs/submit_concatenate_h5_' + listfile_fname_wout_ext + '.out'
+                    ' -e ' + cfg['output_file_folder'] + '/logs/submit_concatenate_h5_' + listfile_fname_wout_ext + '.err\n')
+            f.write('\n')
+            f.write('CodeFolder="' + cfg['data_tools_folder'] + '"\n')
+            f.write('cd ${CodeFolder}\n')
+            f.write('source activate ' + cfg['venv_path'] + '\n')
+            f.write('\n')
+            f.write('# Concatenate the files in the list\n')
+
+            f.write(
+                    'time python concatenate_h5.py'
+                    + ' --chunksize ' + str(cfg['chunksize'])
+                    + ' --complib ' + str(cfg['complib'])
+                    + ' --complevel ' + str(cfg['complevel'])
+                    + ' -l ' + listfile_fpath + ' ' + conc_outputfile_fpath)
+
+        if cfg['submit_jobs'] is True:
+            os.system('qsub -l nodes=1:ppn=4,walltime=23:59:00 ' + fpath_bash_script)
+
+    # make qsub .sh file for shuffling
+    delete_flag_shuffle_tool = '--delete' if cfg['shuffle_delete'] is True else ''
+    for listfile_fpath in cfg['output_lists']:
+        listfile_fname = os.path.basename(listfile_fpath)
+        listfile_fname_wout_ext = os.path.splitext(listfile_fname)[0]
+
+        # This is the input for the shuffle tool!
+        conc_outputfile_fpath = cfg['output_file_folder'] + '/data_split/' + listfile_fname_wout_ext + '.h5'
+
+        fpath_bash_script = dirpath + '/job_scripts/submit_shuffle_h5_' + listfile_fname_wout_ext + '.sh'
+
+        with open(fpath_bash_script, 'w') as f:
+            f.write('#!/usr/bin/env bash\n')
+            f.write('#\n')
+            f.write('#PBS -o ' + cfg['output_file_folder'] + '/logs/submit_shuffle_h5_' + listfile_fname_wout_ext + '.out'
+                    ' -e ' + cfg['output_file_folder'] + '/logs/submit_shuffle_h5_' + listfile_fname_wout_ext + '.err\n')
+            f.write('\n')
+            f.write('CodeFolder="' + cfg['data_tools_folder'] + '"\n')
+            f.write('cd ${CodeFolder}\n')
+            f.write('source activate ' + cfg['venv_path'] + '\n')
+            f.write('\n')
+            f.write('# Shuffle the h5 file \n')
+
+            f.write(
+                    'time python shuffle_h5.py'
+                    + delete_flag_shuffle_tool
+                    + ' --chunksize ' + str(cfg['chunksize'])
+                    + ' --complib ' + str(cfg['complib'])
+                    + ' --complevel ' + str(cfg['complevel'])
+                    + ' ' + conc_outputfile_fpath)
+
+
+def make_data_split():
+    """
+    Main function.
+    """
+
+    cfg = parse_input()
+
+    ip_group_keys = get_all_ip_group_keys(cfg)
+
+    n_evts_total = 0
+    for key in ip_group_keys:
+        print('Collecting information from input group ' + key)
+        cfg[key]['fpaths'] = get_h5_filepaths(cfg[key]['dir'])
+        cfg[key]['n_files'] = len(cfg[key]['fpaths'])
+        cfg[key]['n_evts'], cfg[key]['n_evts_per_file_mean'], cfg[key]['run_ids'] = get_number_of_evts_and_run_ids(cfg[key]['fpaths'], dataset_key='y')
+
+        n_evts_total += cfg[key]['n_evts']
+
+    cfg['n_evts_total'] = n_evts_total
+    print_input_statistics(cfg, ip_group_keys)
+
+    if cfg['print_only'] is True:
+        from sys import exit
+        exit()
+
+    for key in ip_group_keys:
+        add_fpaths_for_data_split_to_cfg(cfg, key)
+
+    make_dsplit_list_files(cfg)
+
+    if cfg['make_qsub_bash_files'] is True:
+        make_concatenate_and_shuffle_list_files(cfg)
+
+
+if __name__ == '__main__':
+    make_data_split()
diff --git a/orcasong_contrib/data_tools/shuffle/__init__.py b/orcasong_contrib/data_tools/shuffle/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/orcasong_contrib/data_tools/shuffle/shuffle_h5.py b/orcasong_contrib/data_tools/shuffle/shuffle_h5.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3c1c0f7d4f7cf0716feb52632030d585a844671
--- /dev/null
+++ b/orcasong_contrib/data_tools/shuffle/shuffle_h5.py
@@ -0,0 +1,300 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Contains functions to shuffles .h5 files.
+
+Can only be used for files where each dataset has the same number of rows (axis_0).
+A fixed random seed (42) is used for the shuffling!
+
+Currently, two types of .h5 files are supported:
+
+1) Files which can be read by km3pipe (e.g. files produced with OrcaSong).
+2) Plain hdf5 files with a hdf5 folder depth of 1. This method is based on some legacy code.
+   Be careful to not run out of memory! Needs the unshuffled .h5 file's disk space + the python overhead as memory.
+   If you want to use it, please use the --legacy_mode option.
+"""
+
+import sys
+import os
+from argparse import ArgumentParser, RawTextHelpFormatter
+import numpy as np
+import h5py
+import km3pipe as kp
+import km3modules as km
+
+# from memory_profiler import profile # for memory profiling, call with @profile; myfunc()
+
+__author__ = 'Michael Moser'
+__license__ = 'AGPL'
+__email__ = 'michael.m.moser@fau.de'
+
+
+def parse_input():
+    """
+    Parses the user input in order to return the most important information:
+
+    1) list of files that should be shuffled
+    2) if the unshuffled file should be deleted
+    3) if the user wants to use a custom chunksize, or if the chunksize should be read from the input file.
+    4) if the user wants to use a custom complib, or if the complib should be read from the input file.
+    5) if the user wants to use a custom complevel, or if the complevel should be read from the input file.
+
+    Returns
+    -------
+    input_files_list : list
+        List that contains all filepaths of the input files that should be shuffled.
+    delete : bool
+        Boolean flag that specifies, if the unshuffled input files should be deleted after the shuffling.
+    chunksize : None/int
+        Specifies the chunksize for axis_0 in the shuffled output files.
+        If None, the chunksize is read from the input files.
+        Else, a custom chunksize will be used.
+    complib : None/str
+        Specifies the compression library that should be used for saving the shuffled output files.
+        If None, the compression library is read from the input files.
+        Else, a custom compression library will be used.
+        Currently available: 'gzip', or 'lzf'.
+    complevel : None/int
+        Specifies the compression level that should be used for saving the shuffled output files.
+        A compression level is only available for gzip compression, not lzf!
+        If None, the compression level is read from the input files.
+        Else, a custom compression level will be used.
+    legacy_mode : bool
+        Boolean flag that specifies, if the legacy shuffle mode should be used instead of the standard one.
+        A more detailed description of this mode can be found in the summary at the top of this python file.
+
+    """
+    parser = ArgumentParser(description='E.g. < python shuffle_h5.py filepath_1 [filepath_2] [...] > \n'
+                                        'Shuffles .h5 files. Requires that each dataset of the files has the same number of rows (axis_0). \n'
+                                        'Outputs a new, shuffled .h5 file with the suffix < _shuffled >.',
+                            formatter_class=RawTextHelpFormatter)
+
+    parser.add_argument('files', metavar='file', type=str, nargs='+', help='a .h5 file that should be shuffled, can be more than one argument.')
+    parser.add_argument('-d', '--delete', action='store_true',
+                        help='deletes the original input file after the shuffled .h5 is created.')
+    parser.add_argument('--chunksize', dest='chunksize', type=int,
+                        help='Specify a chunksize value in order to use chunked storage for the shuffled .h5 file. \n'
+                             ' Otherwise, it will be read from the input file..')
+    parser.add_argument('--complib', dest='complib', type=str,
+                        help='Specify a filter that should be used for compression. Either "gzip" or "lzf". \n'
+                             'Otherwise, the filter will be read from the input file.')
+    parser.add_argument('--complevel', dest='complevel', type=int,
+                        help='Specify a compression filter strength that should be used for the compression. \n'
+                             'Otherwise, the filter will be read from the input file. \n'
+                             'Can range from 0 to 9. Has no effect on "lzf" compression.')
+    parser.add_argument('--legacy_mode', dest='legacy_mode', action='store_true',
+                        help='If you want to use the legacy mode, as described in the summary at the top of this python file.')
+
+    parser.set_defaults(legacy_mode=False)
+
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+
+    args = parser.parse_args()
+
+    input_files_list = []
+    for filepath in args.files:
+        input_files_list.append(filepath)
+
+    delete = False
+    if args.delete:
+        delete = True
+        print('You chose delete = True')
+
+    chunksize = None
+    if args.chunksize:
+        chunksize = args.chunksize
+        print('You chose chunksize = ' + str(chunksize))
+
+    complib = None
+    if args.complib:
+        complib = args.complib
+        print('You chose complib = ' + complib)
+
+    complevel = None
+    if args.complevel:
+        complevel = args.complevel
+        print('You chose complevel = ' + str(complevel))
+
+    legacy_mode = args.legacy_mode
+
+    return input_files_list, delete, chunksize, complib, complevel, legacy_mode
+
+
+def get_f_compression_and_chunking(filepath):
+    """
+    Function that gets the used compression library, the compression level (if applicable)
+    and the chunksize of axis_0 of the first dataset of the file.
+
+    Parameters
+    ----------
+    filepath : str
+        Filepath of a .hdf5 file.
+
+    Returns
+    -------
+    compression : str
+        The compression library that has been identified in the input file. E.g. 'gzip', or 'lzf'.
+    complevel : int
+        The compression level that has been identified in the input file.
+    chunksize : None/int
+        The chunksize of axis_0 that has been indentified in the input file.
+
+    """
+    f = h5py.File(filepath, 'r')
+
+    # remove any keys to pytables folders that may be in the file
+    f_keys_stripped = [x for x in list(f.keys()) if '_i_' not in x]
+
+    compression = f[f_keys_stripped[0]].compression  # compression filter
+    compression_opts = f[f_keys_stripped[0]].compression_opts  # filter strength
+    chunksize = f[f_keys_stripped[0]].chunks[0]  # chunksize along axis_0 of the dataset
+
+    return compression, compression_opts, chunksize
+
+
+def shuffle_h5(filepath_input, tool=False, seed=42, delete=True, chunksize=None, complib=None, complevel=None, legacy_mode=False):
+    """
+    Shuffles a .h5 file where each dataset needs to have the same number of rows (axis_0).
+    The shuffled data is saved to a new .h5 file with the suffix < _shuffled.h5 >.
+
+    Parameters
+    ----------
+    filepath_input : str
+        Filepath of the unshuffled input file.
+    tool : bool
+        Specifies if the function is accessed from the shuffle_h5_tool.
+        In this case, the shuffled .h5 file is returned.
+    seed : int
+        Sets a fixed random seed for the shuffling.
+    delete : bool
+        Specifies if the old, unshuffled file should be deleted after extracting the data.
+    chunksize : None/int
+        Specifies the chunksize for axis_0 in the shuffled output files.
+        If None, the chunksize is read from the input files.
+        Else, a custom chunksize will be used.
+    complib : None/str
+        Specifies the compression library that should be used for saving the shuffled output files.
+        If None, the compression library is read from the input files.
+        Else, a custom compression library will be used.
+        Currently available: 'gzip', or 'lzf'.
+    complevel : None/int
+        Specifies the compression level that should be used for saving the shuffled output files.
+        A compression level is only available for gzip compression, not lzf!
+        If None, the compression level is read from the input files.
+        Else, a custom compression level will be used.
+    legacy_mode : bool
+        Boolean flag that specifies, if the legacy shuffle mode should be used instead of the standard one.
+        A more detailed description of this mode can be found in the summary at the top of this python file.
+
+    Returns
+    -------
+    output_file_shuffled : h5py.File
+        H5py file instance of the shuffled output file.
+
+    """
+    complib_f, complevel_f, chunksize_f = get_f_compression_and_chunking(filepath_input)
+
+    chunksize = chunksize_f if chunksize is None else chunksize
+    complib = complib_f if complib is None else complib
+    complevel = complevel_f if complevel is None else complevel
+
+    if complib == 'lzf':
+        complevel = None
+
+    filepath_input_without_ext = os.path.splitext(filepath_input)[0]
+    filepath_output = filepath_input_without_ext + '_shuffled.h5'
+
+    if not legacy_mode:
+        # set random km3pipe (=numpy) seed
+        print('Setting a Global Random State with the seed < 42 >.')
+        km.GlobalRandomState(seed=seed)
+
+        # km3pipe uses pytables for saving the shuffled output file, which has the name 'zlib' for the 'gzip' filter
+        if complib == 'gzip':
+            complib = 'zlib'
+
+        pipe = kp.Pipeline(timeit=True)  # add timeit=True argument for profiling
+        pipe.attach(km.common.StatusBar, every=200)
+        pipe.attach(km.common.MemoryObserver, every=200)
+        pipe.attach(kp.io.hdf5.HDF5Pump, filename=filepath_input, shuffle=True, reset_index=True)
+        pipe.attach(kp.io.hdf5.HDF5Sink, filename=filepath_output, complib=complib, complevel=complevel, chunksize=chunksize, flush_frequency=1000)
+        pipe.drain()
+        if delete:
+            os.remove(filepath_input)
+
+        output_file_filepath = filepath_output if delete is False else filepath_input
+        output_file_shuffled = h5py.File(output_file_filepath, 'r+')
+
+        # delete folders with '_i_' that are created by pytables in the HDF5Sink, we don't need them
+        for folder_name in output_file_shuffled:
+            if folder_name.startswith('_i_'):
+                del output_file_shuffled[folder_name]
+
+    else:
+        input_file = h5py.File(filepath_input, 'r')
+        folder_data_array_dict = {}
+
+        for folder_name in input_file:
+            folder_data_array = input_file[folder_name][()]  # get whole numpy array into memory
+            folder_data_array_dict[folder_name] = folder_data_array  # workaround in order to be able to close the input file at the next step
+
+        input_file.close()
+
+        if delete:
+            os.remove(filepath_input)
+
+        output_file_shuffled = h5py.File(filepath_output, 'w')
+        for n, dataset_key in enumerate(folder_data_array_dict):
+
+            dataset = folder_data_array_dict[dataset_key]
+
+            if n == 0:
+                # get a particular seed for the first dataset such that the shuffling is consistent across the datasets
+                r = np.random.RandomState(seed)
+                state = r.get_state()
+                r.shuffle(dataset)
+
+            else:
+                r.set_state(state)  # recover shuffle seed of the first dataset
+                r.shuffle(dataset)
+
+            chunks = (chunksize,) + dataset.shape[1:]
+            output_file_shuffled.create_dataset(dataset_key, data=dataset, dtype=dataset.dtype, chunks=chunks,
+                                                compression=complib, compression_opts=complevel)
+
+    # close file in the case of tool=True
+    if tool is False:
+        output_file_shuffled.close()
+    else:
+        return output_file_shuffled
+
+
+def shuffle_h5_tool():
+    """
+    Frontend for the shuffle_h5 function that can be used in a bash environment.
+
+    Shuffles .h5 files where each dataset needs to have the same number of rows (axis_0) for a single file.
+    Saves the shuffled data to a new .h5 file.
+    """
+    input_files_list, delete, chunksize, complib, complevel, legacy_mode = parse_input()
+
+    for filepath_input in input_files_list:
+        print('Shuffling file ' + filepath_input)
+        output_file_shuffled = shuffle_h5(filepath_input, tool=True, seed=42, delete=delete, chunksize=chunksize,
+                                          complib=complib, complevel=complevel, legacy_mode=legacy_mode)
+        print('Finished shuffling. Output information:')
+        print('---------------------------------------')
+        print('The output file contains the following datasets:')
+        for dataset_name in output_file_shuffled:
+            print('Dataset ' + dataset_name + ' with the following shape, dtype and chunks '
+                  '(first argument is the chunksize in axis_0): \n' + str(output_file_shuffled[dataset_name].shape)
+                  + ' ; ' + str(output_file_shuffled[dataset_name].dtype) + ' ; '
+                  + str(output_file_shuffled[dataset_name].chunks))
+
+        output_file_shuffled.close()
+
+
+if __name__ == '__main__':
+    shuffle_h5_tool()
diff --git a/user/detx_files/orca_115strings_av23min20mhorizontal_18OMs_alt9mvertical_v1.detx b/orcasong_contrib/detx_files/orca_115strings_av23min20mhorizontal_18OMs_alt9mvertical_v1.detx
similarity index 100%
rename from user/detx_files/orca_115strings_av23min20mhorizontal_18OMs_alt9mvertical_v1.detx
rename to orcasong_contrib/detx_files/orca_115strings_av23min20mhorizontal_18OMs_alt9mvertical_v1.detx
diff --git a/orcasong_contrib/utilities/__init__.py b/orcasong_contrib/utilities/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/utilities/count_number_of_events_in_folder.py b/orcasong_contrib/utilities/count_number_of_events_in_folder.py
similarity index 100%
rename from utilities/count_number_of_events_in_folder.py
rename to orcasong_contrib/utilities/count_number_of_events_in_folder.py
diff --git a/utilities/dom_binning.py b/orcasong_contrib/utilities/dom_binning.py
similarity index 100%
rename from utilities/dom_binning.py
rename to orcasong_contrib/utilities/dom_binning.py
diff --git a/orcasong_contrib/utilities/get_func_for_flat_track_shower.py b/orcasong_contrib/utilities/get_func_for_flat_track_shower.py
new file mode 100644
index 0000000000000000000000000000000000000000..772e193b0c968878848ecf10f3b98863363eded9
--- /dev/null
+++ b/orcasong_contrib/utilities/get_func_for_flat_track_shower.py
@@ -0,0 +1,219 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+TODO
+"""
+
+import os
+import numpy as np
+import matplotlib as mpl
+mpl.use('Agg')
+from matplotlib.backends.backend_pdf import PdfPages
+from matplotlib import pyplot as plt
+import h5py
+import natsort as ns
+
+
+def get_h5_filepaths(dirpath):
+    """
+    Returns the filepaths of all .h5 files that are located in a specific directory.
+
+    Parameters
+    ----------
+    dirpath: str
+        Path of the directory where the .h5 files are located.
+
+    Returns
+    -------
+    filepaths : list
+        List with the full filepaths of all .h5 files in the dirpath folder.
+
+    """
+    filepaths = []
+    for f in os.listdir(dirpath):
+        if f.endswith('.h5'):
+            filepaths.append(dirpath + '/' + f)
+
+    filepaths = ns.natsorted(filepaths)  # TODO should not be necessary actually!
+    return filepaths
+
+
+def get_energies_for_fpaths(fpath_list, fpath_list_key_ic, cut_e_higher_than_3=False):
+    """
+
+    Parameters
+    ----------
+    fpath_list
+    fpath_list_key_ic
+    cut_e_higher_than_3
+
+    Returns
+    -------
+
+    """
+
+    energy_conc_arr = None
+    for i, fpath in enumerate(fpath_list):
+        if i % 100 == 0: print('Step ' + str(i))
+
+        f = h5py.File(fpath, 'r')
+
+        tracks = f['mc_tracks']
+        tracks_neutr = tracks[tracks['bjorkeny'] != 0]
+
+        assert f['event_info'].shape == tracks_neutr.shape
+        energies = tracks_neutr['energy']
+
+        if cut_e_higher_than_3 is True:
+            energies = energies[energies <= 3]
+
+        if energy_conc_arr is None:
+            energy_conc_arr = energies
+        else:
+            energy_conc_arr = np.concatenate([energy_conc_arr, energies], axis=0)
+
+        f.close()
+
+    print('Total number of events for ' + fpath_list_key_ic + ' (without 3-5GeV from low_e prod): '
+          + str(energy_conc_arr.shape[0]))
+    print('Total number of files: ' + str(len(fpath_list)))
+
+    return energy_conc_arr
+
+
+def save_energies_for_ic(energies_for_ic):
+
+    np.savez('./energies_for_ic.npz',
+             muon_cc_3_100=energies_for_ic['muon_cc_3_100'], muon_cc_1_5=energies_for_ic['muon_cc_1_5'],
+             elec_cc_3_100=energies_for_ic['elec_cc_3_100'], elec_cc_1_5=energies_for_ic['elec_cc_1_5'],
+             elec_nc_3_100=energies_for_ic['elec_nc_3_100'], elec_nc_1_5=energies_for_ic['elec_nc_1_5'])
+
+
+def load_energies_for_ic():
+
+    data = np.load('./energies_for_ic.npz')
+
+    energies_for_ic = dict()
+    energies_for_ic['muon_cc_3_100'] = data['muon_cc_3_100']
+    energies_for_ic['muon_cc_1_5'] = data['muon_cc_1_5']
+    energies_for_ic['elec_cc_3_100'] = data['elec_cc_3_100']
+    energies_for_ic['elec_cc_1_5'] = data['elec_cc_1_5']
+    energies_for_ic['elec_nc_3_100'] = data['elec_nc_3_100']
+    energies_for_ic['elec_nc_1_5'] = data['elec_nc_1_5']
+
+    return energies_for_ic
+
+
+def add_low_and_high_e_prods(energies_for_ic):
+    """
+
+    Parameters
+    ----------
+    energies_for_ic
+
+    Returns
+    -------
+
+    """
+
+    energies_for_ic['muon_cc'] = np.concatenate([energies_for_ic['muon_cc_3_100'], energies_for_ic['muon_cc_1_5']])
+    energies_for_ic['elec_cc'] = np.concatenate([energies_for_ic['elec_cc_3_100'], energies_for_ic['elec_cc_1_5']])
+    energies_for_ic['elec_nc'] = np.concatenate([energies_for_ic['elec_nc_3_100'], energies_for_ic['elec_nc_1_5']])
+    energies_for_ic['elec_cc_and_nc'] = np.concatenate([energies_for_ic['elec_cc'], energies_for_ic['elec_nc']])
+
+
+def plot_e_and_make_flat_func(energies_for_ic):
+    """
+
+    Parameters
+    ----------
+    energies_for_ic
+
+    Returns
+    -------
+
+    """
+    def make_plot_options_and_save(ax, pdfpages, ylabel):
+        plt.xlabel('Energy [GeV]')
+        plt.ylabel(ylabel)
+        x_ticks_major = np.arange(0, 101, 10)
+        ax.set_xticks(x_ticks_major)
+        ax.grid(True)
+        plt.tight_layout()
+        pdfpages.savefig(fig)
+        plt.cla()
+
+
+    pdfpages = PdfPages('./e_hist_plots.pdf')
+    fig, ax = plt.subplots()
+
+    # plot
+    hist_muon_cc = plt.hist(energies_for_ic['muon_cc'], bins=99)
+    plt.title('Muon-CC 1-3 + 3-100 GeV for Run 1-2400')
+    make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]')
+
+    hist_shower = plt.hist(energies_for_ic['elec_cc_and_nc'], bins=99)
+    plt.title('Shower (elec-CC + elec-NC) 1-3 + 3-100 GeV for 2x Run 1-1200')
+    make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]')
+
+    hist_elec_cc = plt.hist(energies_for_ic['elec_cc'], bins=99)
+    plt.title('Elec-CC 1-3 + 3-100 GeV for Run 1-1200')
+    make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]')
+
+    hist_elec_nc = plt.hist(energies_for_ic['elec_nc'], bins=99)
+    plt.title('Elec-NC 1-3 + 3-100 GeV for Run 1-1200')
+    make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]')
+
+    # We take 600 muon-CC files and 300 elec-cc and 300 elec_nc files for the split, reduce 1-3GeV bins by 1/2
+    hist_shower[0][0] = hist_shower[0][0] / 2 # 1-2GeV
+    hist_shower[0][1] = hist_shower[0][1] / 2 # 2-3GeV
+
+    track_div_shower = np.divide(hist_muon_cc[0], hist_shower[0])
+    print(hist_muon_cc[0])
+    print(hist_shower[0])
+
+    bins=hist_muon_cc[1] # doesnt matter which bins to use
+    track_div_shower = np.append(track_div_shower, track_div_shower[-1])
+    #track_div_shower = np.concatenate([track_div_shower, np.array(track_div_shower[-1])[:, np.newaxis]], axis=0) # fix for mpl
+    print(bins)
+    print(track_div_shower)
+    ax.step(bins, track_div_shower, linestyle='-', where='post')
+    plt.title('Ratio tracks divided by showers')
+    make_plot_options_and_save(ax, pdfpages, ylabel='Fraction')
+
+    pdfpages.close()
+
+
+def main():
+    dirs = {
+            'muon_cc_3_100': '/home/saturn/capn/mppi033h/Data/raw_data/ORCA_JTE_NEMOWATER/calibrated/with_jte_times/3-100GeV/muon-CC',
+            'muon_cc_1_5': '/home/saturn/capn/mppi033h/Data/raw_data/ORCA_JTE_NEMOWATER/calibrated/with_jte_times/1-5GeV/muon-CC',
+            'elec_cc_3_100': '/home/saturn/capn/mppi033h/Data/raw_data/ORCA_JTE_NEMOWATER/calibrated/with_jte_times/3-100GeV/elec-CC',
+            'elec_cc_1_5': '/home/saturn/capn/mppi033h/Data/raw_data/ORCA_JTE_NEMOWATER/calibrated/with_jte_times/1-5GeV/elec-CC',
+            'elec_nc_3_100': '/home/saturn/capn/mppi033h/Data/raw_data/ORCA_JTE_NEMOWATER/calibrated/with_jte_times/3-100GeV/elec-NC',
+            'elec_nc_1_5': '/home/saturn/capn/mppi033h/Data/raw_data/ORCA_JTE_NEMOWATER/calibrated/with_jte_times/1-5GeV/elec-NC'
+            }
+
+    if os.path.isfile('./energies_for_ic.npz') is True:
+        energies_for_ic = load_energies_for_ic()
+
+    else:
+        fpaths = dict()
+        for dir_ic_key in dirs:
+            fpaths[dir_ic_key] = get_h5_filepaths(dirs[dir_ic_key])
+
+        energies_for_ic = dict()
+        for fpath_list_key_ic in fpaths:
+            print('Getting energies for ' + fpath_list_key_ic)
+            cut_flag = True if fpath_list_key_ic in ['muon_cc_1_5', 'elec_cc_1_5', 'elec_nc_1_5'] else False
+            fpath_list = fpaths[fpath_list_key_ic]
+            energies_for_ic[fpath_list_key_ic] = get_energies_for_fpaths(fpath_list, fpath_list_key_ic, cut_e_higher_than_3=cut_flag)
+
+        save_energies_for_ic(energies_for_ic)
+
+    add_low_and_high_e_prods(energies_for_ic)
+    plot_e_and_make_flat_func(energies_for_ic)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/orcasong_contrib/utilities/timecut_test/__init__.py b/orcasong_contrib/utilities/timecut_test/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/utilities/timecut_test/plots/elec-CC/.gitkeep b/orcasong_contrib/utilities/timecut_test/plots/elec-CC/.gitkeep
similarity index 100%
rename from utilities/timecut_test/plots/elec-CC/.gitkeep
rename to orcasong_contrib/utilities/timecut_test/plots/elec-CC/.gitkeep
diff --git a/utilities/timecut_test/plots/muon-CC/.gitkeep b/orcasong_contrib/utilities/timecut_test/plots/muon-CC/.gitkeep
similarity index 100%
rename from utilities/timecut_test/plots/muon-CC/.gitkeep
rename to orcasong_contrib/utilities/timecut_test/plots/muon-CC/.gitkeep
diff --git a/utilities/timecut_test/plots/mupage/.gitkeep b/orcasong_contrib/utilities/timecut_test/plots/mupage/.gitkeep
similarity index 100%
rename from utilities/timecut_test/plots/mupage/.gitkeep
rename to orcasong_contrib/utilities/timecut_test/plots/mupage/.gitkeep
diff --git a/utilities/timecut_test/plots/random_noise/.gitkeep b/orcasong_contrib/utilities/timecut_test/plots/random_noise/.gitkeep
similarity index 100%
rename from utilities/timecut_test/plots/random_noise/.gitkeep
rename to orcasong_contrib/utilities/timecut_test/plots/random_noise/.gitkeep
diff --git a/utilities/timecut_test/plots/tau-CC/.gitkeep b/orcasong_contrib/utilities/timecut_test/plots/tau-CC/.gitkeep
similarity index 100%
rename from utilities/timecut_test/plots/tau-CC/.gitkeep
rename to orcasong_contrib/utilities/timecut_test/plots/tau-CC/.gitkeep
diff --git a/utilities/timecut_test/timecut_test.py b/orcasong_contrib/utilities/timecut_test/timecut_test.py
similarity index 100%
rename from utilities/timecut_test/timecut_test.py
rename to orcasong_contrib/utilities/timecut_test/timecut_test.py
diff --git a/user/job_submission_scripts/submit_data_to_images.sh b/user/job_submission_scripts/submit_data_to_images.sh
index 5cc9fe075e7d2f54bce6e49e256f36a7070027a0..9b3754224637747734ccc86f4bf341bed6ee7930 100644
--- a/user/job_submission_scripts/submit_data_to_images.sh
+++ b/user/job_submission_scripts/submit_data_to_images.sh
@@ -24,10 +24,10 @@ python_env_folder=/home/hpc/capn/mppi033h/.virtualenv/python_3_env/
 job_logs_folder=/home/woody/capn/mppi033h/logs/orcasong/cout
 
 detx_filepath=/home/woody/capn/mppi033h/Code/OrcaSong/user/detx_files/orca_115strings_av23min20mhorizontal_18OMs_alt9mvertical_v1.detx
-config_file=/home/woody/capn/mppi033h/Code/OrcaSong/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-c.toml
+config_file=/home/woody/capn/mppi033h/Code/OrcaSong/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-t.toml
 
-particle_type=mupage
-mc_prod=mupage
+particle_type=random_noise
+mc_prod=random_noise
 
 # total number of files per job
 # For neutrinos 3-100GeV:
@@ -36,7 +36,7 @@ mc_prod=mupage
 # muon-CC/elec-CC/elec-NC n=120 with PBS -l nodes=1:ppn=4:sl,walltime=5:00:00
 # For mupage: n=250 with PBS -l nodes=1:ppn=4:sl,walltime=5:00:00
 # For random_noise: n=100 with PBS -l nodes=1:ppn=4:sl,walltime=5:00:00
-files_per_job=200 # must be dividible by 4!
+files_per_job=100 # must be dividible by 4!
 
 #--- USER INPUT ---#
 
@@ -75,7 +75,7 @@ folder="${folder_ip_files_arr[${mc_prod}]}"
 # run
 
 no_of_loops=$((${files_per_job}/4)) # divide by 4 cores -> e.g, 15 4-core loops needed for files_per_job=60
-file_no_start=$((1+((${n}-1) * ${files_per_job}))) # filenumber of the first file that is being processed by this script (depends on JobArray variable 'n')
+file_no_start=$((500+1+((${n}-1) * ${files_per_job}))) # filenumber of the first file that is being processed by this script (depends on JobArray variable 'n')
 
 # currently only working for 4 cores
 
diff --git a/utilities/evaluate_generator_IO_speed.py b/utilities/evaluate_generator_IO_speed.py
deleted file mode 100644
index 5c9859bec981598e41e484264237ed7d5d54e5d3..0000000000000000000000000000000000000000
--- a/utilities/evaluate_generator_IO_speed.py
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""Code for testing the readout speed of orcasong .hdf5 files."""
-
-import numpy as np
-import h5py
-import timeit
-import cProfile
-
-def generate_batches_from_hdf5_file():
-    # 4d
-    #filepath = 'JTE_KM3Sim_gseagen_muon-CC_3-100GeV-9_1E7-1bin-3_0gspec_ORCA115_9m_2016_9_xyzt_no_compression_chunked.h5' # 4D, (11x13x18x50)), no compression. chunksize=32 --> 1011 ms
-    #filepath = 'JTE_KM3Sim_gseagen_muon-CC_3-100GeV-9_1E7-1bin-3_0gspec_ORCA115_9m_2016_9_xyzt_lzf.h5' # 4D, (11x13x18x50), lzf --> 2194 ms
-    #filepath = 'JTE_KM3Sim_gseagen_muon-CC_3-100GeV-9_1E7-1bin-3_0gspec_ORCA115_9m_2016_9_xyzt_gzip_1.h5' # 4D, (11x13x18x50), gzip, compression_opts=1 --> 1655 ms
-
-    # With new km3pipe structure
-    filepath = '/home/woody/capn/mppi033h/orcasong_output/4dTo4d/xyzc/JTE_ph_ph_mupage_ph_ph_ph_ORCA115_9m_2016_9_xyzc.h5'
-
-    print('Testing generator on file ' + filepath)
-    batchsize = 32
-    dimensions = (batchsize, 11, 13, 18, 31)  # 4D
-
-    f = h5py.File(filepath, "r")
-    filesize = len(f['y'])
-    print(filesize)
-
-    n_entries = 0
-    while n_entries < (filesize - batchsize):
-        xs = f['x'][n_entries : n_entries + batchsize]
-        xs = np.reshape(xs, dimensions).astype(np.float32)
-
-        y_values = f['y'][n_entries:n_entries+batchsize]
-        ys = y_values[['run_id', 'event_id']]
-
-        n_entries += batchsize
-        yield (xs, ys)
-    f.close()
-
-
-number = 20
-#t = timeit.timeit(generate_batches_from_hdf5_file, number = number)
-#t = timeit.Timer(stmt="list(generate_batches_from_hdf5_file())", setup="from __main__ import generate_batches_from_hdf5_file")
-#print t.timeit(number) / number
-#print str(number) + 'loops, on average ' + str(t.timeit(number) / number *1000) + 'ms'
-
-pr = cProfile.Profile()
-pr.enable()
-
-t = timeit.Timer(stmt="list(generate_batches_from_hdf5_file())", setup="from __main__ import generate_batches_from_hdf5_file")
-print(str(number) + 'loops, on average ' + str(t.timeit(number) / number *1000) + 'ms')
-
-pr.disable()
-
-pr.print_stats(sort='time')
\ No newline at end of file
diff --git a/utilities/low_e_prod_get_surviving_events.py b/utilities/low_e_prod_get_surviving_events.py
deleted file mode 100644
index e31d08fc0bfb0416090c4fe21c17ff3c7d69116a..0000000000000000000000000000000000000000
--- a/utilities/low_e_prod_get_surviving_events.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import h5py
-import numpy as np
-
-path = '/home/woody/capn/mppi033h/Data/ORCA_JTE_NEMOWATER/ip_images_1-100GeV/4dTo4d/time_-250+500_w_gf_60b'
-# JTE_KM3Sim_gseagen_muon-CC_1-5GeV-9_2E5-1bin-1_0gspec_ORCA115_9m_2016_98_xyzt.h5
-ptypes = {'muon-CC': 'JTE_KM3Sim_gseagen_muon-CC_1-5GeV-9_2E5-1bin-1_0gspec_ORCA115_9m_2016_',
-          'elec-CC': 'JTE_KM3Sim_gseagen_elec-CC_1-5GeV-2_7E5-1bin-1_0gspec_ORCA115_9m_2016_'}
-
-event_id, run_id = None, None
-for ptype in ptypes.keys():
-    for i in range(601):
-        if i % 100 == 0:
-            print(i)
-        if i == 0: continue
-
-        f = h5py.File(path + '/' + ptypes[ptype] + str(i) + '_xyzt.h5', 'r')
-        event_id_f = f['y'][:, 0]
-        run_id_f = f['y'][:, 9]
-
-        if event_id is None:
-            event_id = event_id_f
-            run_id = run_id_f
-        else:
-            event_id = np.concatenate([event_id, event_id_f], axis=0)
-            run_id = np.concatenate([run_id, run_id_f], axis=0)
-
-        f.close()
-
-    ax = np.newaxis
-    arr = np.concatenate([run_id[:, ax], event_id[:, ax]], axis=1)
-    np.save('/home/woody/capn/mppi033h/Code/OrcaSong/utilities/low_e_prod_surviving_evts_' + ptype + '.npy', arr)
-    event_id, run_id = None, None
-
-
-
-