- Separated make_nn_images code into separate files

- Added new data cut with custom user function support (not ideal atm)

- Separated make_nn_images code into separate files
315719a3 · ViaFerrata · 36dc6ad3 · 315719a3 · 315719a3 · 315719a3
Commit 315719a3 authored 6 years ago by ViaFerrata
--- a/orcasong/default_config.toml
+++ b/orcasong/default_config.toml
@@ -93,6 +93,7 @@ data_cut_triggered = false
 data_cut_e_low = 'None'
 data_cut_e_high = 'None'
 data_cut_throw_away = 0.00
+data_cut_custom_func = 'None'
 prod_ident = 1
 flush_freq = 1000


--- a/orcasong/geo_binning.py
+++ b/orcasong/geo_binning.py
+#!/usr/bin/env python
+# coding=utf-8
+# Filename: geo_binning.py
+
+"""
+OrcaSong code that handles the spatial binning of the output images.
+"""
+
+import numpy as np
+import km3pipe as kp
+
+
+def calculate_bin_edges(n_bins, det_geo, detx_filepath, do4d):
+    """
+    Calculates the bin edges for the corresponding detector geometry (1 DOM/bin) based on the number of specified bins.
+
+    Used later on for making the event "images" with the in the np.histogramdd funcs in hits_to_histograms.py.
+    The bin edges are necessary in order to get the same bin size for each event regardless of the fact if all bins have a hit or not.
+
+    Parameters
+    ----------
+    n_bins : tuple
+        Contains the desired number of bins for each dimension, [n_bins_x, n_bins_y, n_bins_z].
+    det_geo : str
+        Declares what detector geometry should be used for the binning.
+    detx_filepath : str
+        Filepath of a .detx detector file which contains the geometry of the detector.
+    do4d : tuple(boo, str)
+        Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz.
+
+    Returns
+    -------
+    x_bin_edges, y_bin_edges, z_bin_edges : ndarray(ndim=1)
+        The bin edges for the x,y,z direction.
+
+    """
+    # Loads a kp.Geometry object based on the filepath of a .detx file
+    print("Reading detector geometry in order to calculate the detector dimensions from file " + detx_filepath)
+    geo = kp.calib.Calibration(filename=detx_filepath)
+
+    # derive maximum and minimum x,y,z coordinates of the geometry input [[xmin, ymin, zmin], [xmax, ymax, zmax]]
+    dom_position_values = geo.get_detector().dom_positions.values()
+    dom_pos_max = np.amax([pos for pos in dom_position_values], axis=0)
+    dom_pos_min = np.amin([pos for pos in dom_position_values], axis=0)
+    geo_limits = dom_pos_min, dom_pos_max
+    print('Detector dimensions [[xmin, ymin, zmin], [xmax, ymax, zmax]]: ' + str(geo_limits))
+
+    if det_geo == 'Orca_115l_23m_h_9m_v' or det_geo == 'Orca_115l_23m_h_?m_v':
+        x_bin_edges = np.linspace(geo_limits[0][0] - 9.95, geo_limits[1][0] + 9.95, num=n_bins[0] + 1) #try to get the lines in the bin center 9.95*2 = average x-separation of two lines
+        y_bin_edges = np.linspace(geo_limits[0][1] - 9.75, geo_limits[1][1] + 9.75, num=n_bins[1] + 1) # Delta y = 19.483
+
+        # Fitted offsets: x,y,factor: factor*(x+x_off), # Stefan's modifications:
+        offset_x, offset_y, scale = [6.19, 0.064, 1.0128]
+        x_bin_edges = (x_bin_edges + offset_x) * scale
+        y_bin_edges = (y_bin_edges + offset_y) * scale
+
+        if det_geo == 'Orca_115l_23m_h_?m_v':
+            # ORCA denser detector study
+            z_bin_edges = np.linspace(37.84 - 7.5, 292.84 + 7.5, num=n_bins[2] + 1)  # 15m vertical, 18 DOMs
+            # z_bin_edges = np.linspace(37.84 - 6, 241.84 + 6, num=n_bins[2] + 1)  # 12m vertical, 18 DOMs
+            # z_bin_edges = np.linspace(37.84 - 4.5, 190.84 + 4.5, num=n_bins[2] + 1)  # 9m vertical, 18 DOMs
+            # z_bin_edges = np.linspace(37.84 - 3, 139.84 + 3, num=n_bins[2] + 1)  # 6m vertical, 18 DOMs
+            # z_bin_edges = np.linspace(37.84 - 2.25, 114.34 + 2.25, num=n_bins[2] + 1)  # 4.5m vertical, 18 DOMs
+
+        else:
+            n_bins_z = n_bins[2] if do4d[1] != 'xzt-c' else n_bins[1] # n_bins = [xyz,t/c] or n_bins = [xzt,c]
+            z_bin_edges = np.linspace(geo_limits[0][2] - 4.665, geo_limits[1][2] + 4.665, num=n_bins_z + 1)  # Delta z = 9.329
+
+        # calculate_bin_edges_test(dom_positions, y_bin_edges, z_bin_edges) # test disabled by default. Activate it, if you change the offsets in x/y/z-bin-edges
+
+    else:
+        raise ValueError('The specified detector geometry "' + str(det_geo) + '" is not available.')
+
+    return geo, x_bin_edges, y_bin_edges, z_bin_edges
+
+
+def calculate_bin_edges_test(geo, y_bin_edges, z_bin_edges):
+    """
+    Tests, if the bins in one direction don't accidentally have more than 'one' OM.
+
+    For the x-direction, an overlapping can not be avoided in an orthogonal space.
+    For y and z though, it can!
+    For y, every bin should contain the number of lines per y-direction * 18 for 18 doms per line.
+    For z, every bin should contain 115 entries, since every z bin contains one storey of the 115 ORCA lines.
+    Not 100% accurate, since only the dom positions are used and not the individual pmt positions for a dom.
+    """
+    dom_positions = np.stack(list(geo.get_detector().dom_positions.values()))
+    dom_y = dom_positions[:, 1]
+    dom_z = dom_positions[:, 2]
+    hist_y = np.histogram(dom_y, bins=y_bin_edges)
+    hist_z = np.histogram(dom_z, bins=z_bin_edges)
+
+    print('----------------------------------------------------------------------------------------------')
+    print('Y-axis: Bin content: ' + str(hist_y[0]))
+    print('It should be:        ' + str(np.array(
+        [4 * 18, 7 * 18, 9 * 18, 10 * 18, 9 * 18, 10 * 18, 10 * 18, 10 * 18, 11 * 18, 10 * 18, 9 * 18, 8 * 18, 8 * 18])))
+    print('Y-axis: Bin edges: ' + str(hist_y[1]))
+    print('..............................................................................................')
+    print('Z-axis: Bin content: ' + str(hist_z[0]))
+    print('It should have 115 entries everywhere')
+    print('Z-axis: Bin edges: ' + str(hist_z[1]))
+    print('----------------------------------------------------------------------------------------------')
\ No newline at end of file
--- a/orcasong/io.py
+++ b/orcasong/io.py
+#!/usr/bin/env python
+# coding=utf-8
+# Filename: io.py
+
+"""
+IO Code for OrcaSong.
+"""
+
+import os
+import errno
+import toml
+from docopt import docopt
+import warnings
+
+
+def parse_input():
+    """
+    Parses and returns all necessary input options for the make_nn_images function.
+
+    Returns
+    -------
+    fname : str
+        Full filepath to the input .h5 file.
+    detx_filepath : str
+        Full filepath to the .detx geometry file that belongs to the fname.
+    config_filepath : str
+        Full filepath to a config file. An example can be found in orcasong/default_config.toml
+
+    """
+    args = docopt(__doc__)
+
+    fname = args['SIMFILE']
+    detx_filepath = args['DETXFILE']
+    config_filepath = args['CONFIGFILE']
+
+    return fname, detx_filepath, config_filepath
+
+
+def load_config(config_filepath):
+    """
+    Loads the config from a .toml file.
+
+    Parameters
+    ----------
+    config_filepath : str
+        Full filepath to a config file. An example can be found in orcasong/default_config.toml
+
+    Returns
+    -------
+    config : dict
+        Dictionary that contains all configuration options of the make_nn_images function.
+        An explanation of the config parameters can be found in orcasong/default_config.toml.
+
+    """
+    config = toml.load(config_filepath)
+    print('Loaded the config file from ' + os.path.abspath(config_filepath))
+
+    return config
+
+
+def check_user_input(fname, detx_filepath, config):
+    """
+    Sanity check of the user input.
+
+    Parameters
+    ----------
+    fname : str
+        Full filepath to the input .h5 file.
+    detx_filepath : str
+        Full filepath to the .detx geometry file that belongs to the fname.
+    config : dict
+        Dictionary that contains all configuration options of the make_nn_images function.
+        An explanation of the config parameters can be found in orcasong/default_config.toml.
+
+    """
+    #---- Checks input types ----#
+
+    # Check for options with a single, non-boolean element
+    number_args = {'do2d_plots_n': int,  'data_cut_e_low': float, 'data_cut_e_high': float,
+                   'data_cut_throw_away': float, 'prod_ident': int}
+
+    for key in number_args:
+        expected_arg_type = number_args[key]
+        parsed_arg = config[key]
+
+        if parsed_arg in [None, 'None']: # we don't want to check args when there has been no user input
+            continue
+
+        if type(parsed_arg) != expected_arg_type:
+            try:
+                map(expected_arg_type, parsed_arg)
+            except ValueError:
+                raise TypeError('The argument option ', key, ' only accepts ', str(expected_arg_type),
+                                ' values as an input.')
+
+    # Checks the n_bins tuple input
+    for dim in config['n_bins']:
+        if type(dim) != int:
+            raise TypeError('The argument option n_bins only accepts integer values as an input!'
+                            ' Your values have the type ' + str(type(dim)))
+
+    # ---- Checks input types ----#
+
+    # ---- Check other things ----#
+
+    if not os.path.isfile(fname):
+        raise IOError('The file -' + fname+ '- does not exist.')
+
+    if not os.path.isfile(detx_filepath):
+        raise IOError('The file -' + detx_filepath + '- does not exist.')
+
+    if all(do_nd == False for do_nd in [config['do2d'], config['do3d'],config['do4d']]):
+        raise ValueError('At least one of do2d, do3d or do4d options must be set to True.')
+
+    if config['do2d'] == False and config['do2d_plots'] == True:
+        raise ValueError('The 2D pdf images cannot be created if do2d=False!')
+
+    if config['do2d_plots'] == True and config['do2d_plots_n'] > 100:
+        warnings.warn('You declared do2d_pdf=(True, int) with int > 100. This will take more than two minutes.'
+                      'Do you really want to create pdfs images for so many events?')
+
+
+def make_output_dirs(output_dirpath, do2d, do3d, do4d):
+    """
+    Function that creates all output directories if they don't exist already.
+
+    Parameters
+    ----------
+    output_dirpath : str
+        Full path to the directory, where the orcasong output should be stored.
+    do2d : bool
+        Declares if 2D histograms, are to be created.
+    do3d : bool
+        Declares if 3D histograms are to be created.
+    do4d : tuple(bool, str)
+        Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz.
+        Currently, only 'time' and 'channel_id' are available.
+
+    """
+    if do2d:
+        projections = ['xy', 'xz', 'yz', 'xt', 'yt', 'zt']
+        for proj in projections:
+            if not os.path.exists(output_dirpath + '/orcasong_output/4dTo2d/' + proj):
+                try:
+                    os.makedirs(output_dirpath + '/orcasong_output/4dTo2d/' + proj)
+                except OSError as e:
+                    if e.errno != errno.EEXIST:
+                        raise
+
+    if do3d:
+        projections = ['xyz', 'xyt', 'xzt', 'yzt', 'rzt']
+        for proj in projections:
+            if not os.path.exists(output_dirpath + '/orcasong_output/4dTo3d/' + proj):
+                try:
+                    os.makedirs(output_dirpath + '/orcasong_output/4dTo3d/' + proj)
+                except OSError as e:
+                    if e.errno != errno.EEXIST:
+                        raise
+
+    if do4d[0]:
+        proj = 'xyzt' if not do4d[1] == 'channel_id' else 'xyzc'
+        if not os.path.exists(output_dirpath + '/orcasong_output/4dTo4d/' + proj):
+            try:
+                os.makedirs(output_dirpath + '/orcasong_output/4dTo4d/' + proj)
+            except OSError as e:
+                if e.errno != errno.EEXIST:
+                    raise
\ No newline at end of file
--- a/orcasong/make_nn_images.py
+++ b/orcasong/make_nn_images.py
@@ -35,14 +35,9 @@ __email__ = 'michael.m.moser@fau.de'
 __status__ = 'Prototype'

 import os
-import errno
 import sys
-import warnings
-import toml
-from docopt import docopt
 #from memory_profiler import profile # for memory profiling, call with @profile; myfunc()
 #import line_profiler # call with kernprof -l -v file.py args
-import numpy as np
 import km3pipe as kp
 import km3modules as km
 import matplotlib as mpl
@@ -51,337 +46,9 @@ from matplotlib.backends.backend_pdf import PdfPages

 from orcasong.file_to_hits import EventDataExtractor
 from orcasong.hits_to_histograms import HistogramMaker
-
-
-def parse_input():
-    """
-    Parses and returns all necessary input options for the make_nn_images function.
-
-    Returns
-    -------
-    fname : str
-        Full filepath to the input .h5 file.
-    detx_filepath : str
-        Full filepath to the .detx geometry file that belongs to the fname.
-    config_filepath : str
-        Full filepath to a config file. An example can be found in orcasong/default_config.toml
-
-    """
-    args = docopt(__doc__)
-
-    fname = args['SIMFILE']
-    detx_filepath = args['DETXFILE']
-    config_filepath = args['CONFIGFILE']
-
-    return fname, detx_filepath, config_filepath
-
-
-def load_config(config_filepath):
-    """
-    Loads the config from a .toml file.
-
-    Parameters
-    ----------
-    config_filepath : str
-        Full filepath to a config file. An example can be found in orcasong/default_config.toml
-
-    Returns
-    -------
-    config : dict
-        Dictionary that contains all configuration options of the make_nn_images function.
-        An explanation of the config parameters can be found in orcasong/default_config.toml.
-
-    """
-    config = toml.load(config_filepath)
-    print('Loaded the config file from ' + os.path.abspath(config_filepath))
-
-    return config
-
-
-def check_user_input(fname, detx_filepath, config):
-    """
-    Sanity check of the user input.
-
-    Parameters
-    ----------
-    fname : str
-        Full filepath to the input .h5 file.
-    detx_filepath : str
-        Full filepath to the .detx geometry file that belongs to the fname.
-    config : dict
-        Dictionary that contains all configuration options of the make_nn_images function.
-        An explanation of the config parameters can be found in orcasong/default_config.toml.
-
-    """
-    #---- Checks input types ----#
-
-    # Check for options with a single, non-boolean element
-    number_args = {'do2d_plots_n': int,  'data_cut_e_low': float, 'data_cut_e_high': float,
-                   'data_cut_throw_away': float, 'prod_ident': int}
-
-    for key in number_args:
-        expected_arg_type = number_args[key]
-        parsed_arg = config[key]
-
-        if parsed_arg in [None, 'None']: # we don't want to check args when there has been no user input
-            continue
-
-        if type(parsed_arg) != expected_arg_type:
-            try:
-                map(expected_arg_type, parsed_arg)
-            except ValueError:
-                raise TypeError('The argument option ', key, ' only accepts ', str(expected_arg_type),
-                                ' values as an input.')
-
-    # Checks the n_bins tuple input
-    for dim in config['n_bins']:
-        if type(dim) != int:
-            raise TypeError('The argument option n_bins only accepts integer values as an input!'
-                            ' Your values have the type ' + str(type(dim)))
-
-    # ---- Checks input types ----#
-
-    # ---- Check other things ----#
-
-    if not os.path.isfile(fname):
-        raise IOError('The file -' + fname+ '- does not exist.')
-
-    if not os.path.isfile(detx_filepath):
-        raise IOError('The file -' + detx_filepath + '- does not exist.')
-
-    if all(do_nd == False for do_nd in [config['do2d'], config['do3d'],config['do4d']]):
-        raise ValueError('At least one of do2d, do3d or do4d options must be set to True.')
-
-    if config['do2d'] == False and config['do2d_plots'] == True:
-        raise ValueError('The 2D pdf images cannot be created if do2d=False!')
-
-    if config['do2d_plots'] == True and config['do2d_plots_n'] > 100:
-        warnings.warn('You declared do2d_pdf=(True, int) with int > 100. This will take more than two minutes.'
-                      'Do you really want to create pdfs images for so many events?')
-
-
-def make_output_dirs(output_dirpath, do2d, do3d, do4d):
-    """
-    Function that creates all output directories if they don't exist already.
-
-    Parameters
-    ----------
-    output_dirpath : str
-        Full path to the directory, where the orcasong output should be stored.
-    do2d : bool
-        Declares if 2D histograms, are to be created.
-    do3d : bool
-        Declares if 3D histograms are to be created.
-    do4d : tuple(bool, str)
-        Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz.
-        Currently, only 'time' and 'channel_id' are available.
-
-    """
-    if do2d:
-        projections = ['xy', 'xz', 'yz', 'xt', 'yt', 'zt']
-        for proj in projections:
-            if not os.path.exists(output_dirpath + '/orcasong_output/4dTo2d/' + proj):
-                try:
-                    os.makedirs(output_dirpath + '/orcasong_output/4dTo2d/' + proj)
-                except OSError as e:
-                    if e.errno != errno.EEXIST:
-                        raise
-
-    if do3d:
-        projections = ['xyz', 'xyt', 'xzt', 'yzt', 'rzt']
-        for proj in projections:
-            if not os.path.exists(output_dirpath + '/orcasong_output/4dTo3d/' + proj):
-                try:
-                    os.makedirs(output_dirpath + '/orcasong_output/4dTo3d/' + proj)
-                except OSError as e:
-                    if e.errno != errno.EEXIST:
-                        raise
-
-    if do4d[0]:
-        proj = 'xyzt' if not do4d[1] == 'channel_id' else 'xyzc'
-        if not os.path.exists(output_dirpath + '/orcasong_output/4dTo4d/' + proj):
-            try:
-                os.makedirs(output_dirpath + '/orcasong_output/4dTo4d/' + proj)
-            except OSError as e:
-                if e.errno != errno.EEXIST:
-                    raise
-
-
-def calculate_bin_edges(n_bins, det_geo, detx_filepath, do4d):
-    """
-    Calculates the bin edges for the corresponding detector geometry (1 DOM/bin) based on the number of specified bins.
-
-    Used later on for making the event "images" with the in the np.histogramdd funcs in hits_to_histograms.py.
-    The bin edges are necessary in order to get the same bin size for each event regardless of the fact if all bins have a hit or not.
-
-    Parameters
-    ----------
-    n_bins : tuple
-        Contains the desired number of bins for each dimension, [n_bins_x, n_bins_y, n_bins_z].
-    det_geo : str
-        Declares what detector geometry should be used for the binning.
-    detx_filepath : str
-        Filepath of a .detx detector file which contains the geometry of the detector.
-    do4d : tuple(boo, str)
-        Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz.
-
-    Returns
-    -------
-    x_bin_edges, y_bin_edges, z_bin_edges : ndarray(ndim=1)
-        The bin edges for the x,y,z direction.
-
-    """
-    # Loads a kp.Geometry object based on the filepath of a .detx file
-    print("Reading detector geometry in order to calculate the detector dimensions from file " + detx_filepath)
-    geo = kp.calib.Calibration(filename=detx_filepath)
-
-    # derive maximum and minimum x,y,z coordinates of the geometry input [[xmin, ymin, zmin], [xmax, ymax, zmax]]
-    dom_position_values = geo.get_detector().dom_positions.values()
-    dom_pos_max = np.amax([pos for pos in dom_position_values], axis=0)
-    dom_pos_min = np.amin([pos for pos in dom_position_values], axis=0)
-    geo_limits = dom_pos_min, dom_pos_max
-    print('Detector dimensions [[xmin, ymin, zmin], [xmax, ymax, zmax]]: ' + str(geo_limits))
-
-    if det_geo == 'Orca_115l_23m_h_9m_v' or det_geo == 'Orca_115l_23m_h_?m_v':
-        x_bin_edges = np.linspace(geo_limits[0][0] - 9.95, geo_limits[1][0] + 9.95, num=n_bins[0] + 1) #try to get the lines in the bin center 9.95*2 = average x-separation of two lines
-        y_bin_edges = np.linspace(geo_limits[0][1] - 9.75, geo_limits[1][1] + 9.75, num=n_bins[1] + 1) # Delta y = 19.483
-
-        # Fitted offsets: x,y,factor: factor*(x+x_off), # Stefan's modifications:
-        offset_x, offset_y, scale = [6.19, 0.064, 1.0128]
-        x_bin_edges = (x_bin_edges + offset_x) * scale
-        y_bin_edges = (y_bin_edges + offset_y) * scale
-
-        if det_geo == 'Orca_115l_23m_h_?m_v':
-            # ORCA denser detector study
-            z_bin_edges = np.linspace(37.84 - 7.5, 292.84 + 7.5, num=n_bins[2] + 1)  # 15m vertical, 18 DOMs
-            # z_bin_edges = np.linspace(37.84 - 6, 241.84 + 6, num=n_bins[2] + 1)  # 12m vertical, 18 DOMs
-            # z_bin_edges = np.linspace(37.84 - 4.5, 190.84 + 4.5, num=n_bins[2] + 1)  # 9m vertical, 18 DOMs
-            # z_bin_edges = np.linspace(37.84 - 3, 139.84 + 3, num=n_bins[2] + 1)  # 6m vertical, 18 DOMs
-            # z_bin_edges = np.linspace(37.84 - 2.25, 114.34 + 2.25, num=n_bins[2] + 1)  # 4.5m vertical, 18 DOMs
-
-        else:
-            n_bins_z = n_bins[2] if do4d[1] != 'xzt-c' else n_bins[1] # n_bins = [xyz,t/c] or n_bins = [xzt,c]
-            z_bin_edges = np.linspace(geo_limits[0][2] - 4.665, geo_limits[1][2] + 4.665, num=n_bins_z + 1)  # Delta z = 9.329
-
-        # calculate_bin_edges_test(dom_positions, y_bin_edges, z_bin_edges) # test disabled by default. Activate it, if you change the offsets in x/y/z-bin-edges
-
-    else:
-        raise ValueError('The specified detector geometry "' + str(det_geo) + '" is not available.')
-
-    return geo, x_bin_edges, y_bin_edges, z_bin_edges
-
-
-def calculate_bin_edges_test(geo, y_bin_edges, z_bin_edges):
-    """
-    Tests, if the bins in one direction don't accidentally have more than 'one' OM.
-
-    For the x-direction, an overlapping can not be avoided in an orthogonal space.
-    For y and z though, it can!
-    For y, every bin should contain the number of lines per y-direction * 18 for 18 doms per line.
-    For z, every bin should contain 115 entries, since every z bin contains one storey of the 115 ORCA lines.
-    Not 100% accurate, since only the dom positions are used and not the individual pmt positions for a dom.
-    """
-    dom_positions = np.stack(list(geo.get_detector().dom_positions.values()))
-    dom_y = dom_positions[:, 1]
-    dom_z = dom_positions[:, 2]
-    hist_y = np.histogram(dom_y, bins=y_bin_edges)
-    hist_z = np.histogram(dom_z, bins=z_bin_edges)
-
-    print('----------------------------------------------------------------------------------------------')
-    print('Y-axis: Bin content: ' + str(hist_y[0]))
-    print('It should be:        ' + str(np.array(
-        [4 * 18, 7 * 18, 9 * 18, 10 * 18, 9 * 18, 10 * 18, 10 * 18, 10 * 18, 11 * 18, 10 * 18, 9 * 18, 8 * 18, 8 * 18])))
-    print('Y-axis: Bin edges: ' + str(hist_y[1]))
-    print('..............................................................................................')
-    print('Z-axis: Bin content: ' + str(hist_z[0]))
-    print('It should have 115 entries everywhere')
-    print('Z-axis: Bin edges: ' + str(hist_z[1]))
-    print('----------------------------------------------------------------------------------------------')
-
-
-def get_file_particle_type(fname):
-    """
-    Returns a string that specifies the type of the particles that are contained in the input file.
-
-    Parameters
-    ----------
-    fname : str
-        Filename (full path!) of the input file.
-
-    Returns
-    -------
-    file_particle_type : str
-        String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino'].
-
-    """
-    event_pump = kp.io.hdf5.HDF5Pump(filename=fname, verbose=False) # TODO suppress print of hdf5pump
-
-    if 'McTracks' not in event_pump[0]:
-        file_particle_type = 'undefined'
-    else:
-        particle_type = event_pump[0]['McTracks'].type
-
-        # if mupage file: first mc_track is an empty neutrino track, second track is the first muon
-        if particle_type[0] == 0 and np.abs(particle_type[1]) == 13:
-            file_particle_type = 'muon'
-
-        # the first mc_track is the primary neutrino if the input file is containing neutrino events
-        elif np.abs(particle_type[0]) in [12, 14, 16]:
-            file_particle_type = 'neutrino'
-        else:
-            raise ValueError('The type of the particles in the "McTracks" folder, <', str(particle_type), '> is not known.')
-
-    event_pump.close_file()
-
-    return file_particle_type
-
-
-class EventSkipper(kp.Module):
-    """
-    KM3Pipe module that skips events based on some data_cuts.
-    """
-    def configure(self):
-        self.data_cuts = self.require('data_cuts')
-
-    def process(self, blob):
-        continue_bool = skip_event(blob['event_track'], self.data_cuts)
-        if continue_bool:
-            return
-        else:
-            return blob
-
-
-def skip_event(event_track, data_cuts):
-    """
-    Function that checks if an event should be skipped, depending on the data_cuts input.
-
-    Parameters
-    ----------
-    event_track : ndarray(ndim=1)
-        1D array containing important MC information of the event, only the energy of the event (pos_2) is used here.
-    data_cuts : dict
-        Dictionary that contains information about any possible cuts that should be applied.
-        Supports the following cuts: 'triggered', 'energy_lower_limit', 'energy_upper_limit', 'throw_away_prob'.
-
-    Returns
-    -------
-    continue_bool : bool
-        boolean flag to specify, if this event should be skipped or not.
-
-    """
-    continue_bool = False
-    if data_cuts['energy_lower_limit'] is not None:
-        continue_bool = event_track.energy[0] < data_cuts['energy_lower_limit'] # True if E < lower limit
-
-    if data_cuts['energy_upper_limit'] is not None and continue_bool == False:
-        continue_bool = event_track.energy[0] > data_cuts['energy_upper_limit'] # True if E > upper limit
-
-    if data_cuts['throw_away_prob'] is not None and continue_bool == False:
-        throw_away_prob = data_cuts['throw_away_prob']
-        throw_away = np.random.choice([False, True], p=[1 - throw_away_prob, throw_away_prob])
-        if throw_away: continue_bool = True
-
-    return continue_bool
+from orcasong.io import parse_input, load_config, check_user_input, make_output_dirs
+from orcasong.geo_binning import calculate_bin_edges
+from orcasong.utils import get_file_particle_type, EventSkipper


 def make_nn_images(fname, detx_filepath, config):
@@ -402,7 +69,7 @@ def make_nn_images(fname, detx_filepath, config):
        An explanation of the config parameters can be found in orcasong/default_config.toml.

    """
-    # Load all parameters from the config
+    # Load all parameters from the config # TODO put everything in a config class, this is horrible
    output_dirpath = config['output_dirpath']
    chunksize, complib, complevel = config['chunksize'], config['complib'], config['complevel']
    flush_freq = config['flush_freq']
@@ -420,6 +87,7 @@ def make_nn_images(fname, detx_filepath, config):
    data_cuts['energy_lower_limit'] = config['data_cut_e_low'] if config['data_cut_e_low'] != 'None' else None
    data_cuts['energy_upper_limit'] = config['data_cut_e_high'] if config['data_cut_e_high'] != 'None' else None
    data_cuts['throw_away_prob'] = config['data_cut_throw_away'] if config['data_cut_throw_away'] != 'None' else None
+    data_cuts['custom_skip_function'] = config['data_cut_custom_func'] if config['data_cut_custom_func'] != 'None' else None

    make_output_dirs(output_dirpath, do2d, do3d, do4d)


--- a/orcasong/utils.py
+++ b/orcasong/utils.py
+#!/usr/bin/env python
+# coding=utf-8
+# Filename: utils.py
+
+"""
+Utility code for OrcaSong.
+"""
+
+import numpy as np
+import km3pipe as kp
+
+
+class EventSkipper(kp.Module):
+    """
+    KM3Pipe module that skips events based on some data_cuts.
+    """
+    def configure(self):
+        self.data_cuts = self.require('data_cuts')
+
+    def process(self, blob):
+        continue_bool = skip_event(blob['event_track'], self.data_cuts)
+        if continue_bool:
+            return
+        else:
+            return blob
+
+
+def skip_event(event_track, data_cuts):
+    """
+    Function that checks if an event should be skipped, depending on the data_cuts input.
+
+    Parameters
+    ----------
+    event_track : ndarray(ndim=1)
+        1D array containing important MC information of the event, only the energy of the event (pos_2) is used here.
+    data_cuts : dict
+        Dictionary that contains information about any possible cuts that should be applied.
+        Supports the following cuts: 'triggered', 'energy_lower_limit', 'energy_upper_limit', 'throw_away_prob'.
+
+    Returns
+    -------
+    continue_bool : bool
+        boolean flag to specify, if this event should be skipped or not.
+
+    """
+    continue_bool = False
+
+    if data_cuts['energy_lower_limit'] is not None:
+        continue_bool = event_track.energy[0] < data_cuts['energy_lower_limit'] # True if E < lower limit
+
+    if data_cuts['energy_upper_limit'] is not None and continue_bool == False:
+        continue_bool = event_track.energy[0] > data_cuts['energy_upper_limit'] # True if E > upper limit
+
+    if data_cuts['throw_away_prob'] is not None and continue_bool == False:
+        throw_away_prob = data_cuts['throw_away_prob']
+        throw_away = np.random.choice([False, True], p=[1 - throw_away_prob, throw_away_prob])
+        if throw_away: continue_bool = True
+
+    if data_cuts['custom_skip_function'] is not None:
+        continue_bool = use_custom_skip_function(data_cuts['custom_skip_function'], event_track)
+
+    return continue_bool
+
+
+def use_custom_skip_function(skip_function, event_track):
+    """
+    User defined, custom skip functions.
+
+    Parameters
+    ----------
+    skip_function : str
+        String that specifies, which custom skip function should be used.
+    event_track : ndarray(ndim=1)
+        Structured numpy array containing the McTracks info of this event.
+
+    Returns
+    -------
+    continue_bool : bool
+        Bool which specifies, if this event should be skipped or not.
+
+    """
+    if skip_function == 'ts_e_flat':
+        # cf. orcanet_contrib/utilities/get_func_for_flat_track_shower.py
+
+        particle_type = event_track.particle_type[0]
+        if np.abs(particle_type) not in [12, 14]:
+            continue_bool = False
+
+        else:
+            prob_for_e_bin_arr_path = 'placeholder.npy'
+            # Fraction of tracks compared to showers (n_muon-cc / (n_e-cc + n_e-nc))
+            arr_fract_for_e_bins = np.load(prob_for_e_bin_arr_path) # 2d arr, one row e_bins, second row prob
+            e_bins = arr_fract_for_e_bins[0, :]
+            fract_e = arr_fract_for_e_bins[1, :]
+
+            e_evt = event_track.energy[0]
+            idx_e = (np.abs(e_bins - e_evt)).argmin()
+
+            fract = fract_e[idx_e]
+
+            if np.abs(particle_type) == 14: # for muon neutrinos
+                if fract <= 1:
+                    continue_bool = False
+                else:
+                    evt_survive_prob = 1/float(fract)
+                    continue_bool = np.random.choice([False, True], p=[evt_survive_prob, 1 - evt_survive_prob])
+
+            else:  # for elec neutrinos
+                assert np.abs(particle_type) == 12
+
+                if fract >= 1:
+                    continue_bool = False
+                else:
+                    evt_survive_prob = fract
+                    continue_bool = np.random.choice([False, True], p=[evt_survive_prob, 1 - evt_survive_prob])
+
+        return continue_bool
+
+    else:
+        raise ValueError('The custom skip function "' + str(skip_function) + '" is not known.')
+
+
+def get_file_particle_type(fname):
+    """
+    Returns a string that specifies the type of the particles that are contained in the input file.
+
+    Parameters
+    ----------
+    fname : str
+        Filename (full path!) of the input file.
+
+    Returns
+    -------
+    file_particle_type : str
+        String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino'].
+
+    """
+    event_pump = kp.io.hdf5.HDF5Pump(filename=fname, verbose=False) # TODO suppress print of hdf5pump
+
+    if 'McTracks' not in event_pump[0]:
+        file_particle_type = 'undefined'
+    else:
+        particle_type = event_pump[0]['McTracks'].type
+
+        # if mupage file: first mc_track is an empty neutrino track, second track is the first muon
+        if particle_type[0] == 0 and np.abs(particle_type[1]) == 13:
+            file_particle_type = 'muon'
+
+        # the first mc_track is the primary neutrino if the input file is containing neutrino events
+        elif np.abs(particle_type[0]) in [12, 14, 16]:
+            file_particle_type = 'neutrino'
+        else:
+            raise ValueError('The type of the particles in the "McTracks" folder, <', str(particle_type), '> is not known.')
+
+    event_pump.close_file()
+
+    return file_particle_type
+
+
--- a/orcasong_contrib/utilities/get_func_for_flat_track_shower.py
+++ b/orcasong_contrib/utilities/get_func_for_flat_track_shower.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
-TODO
+Code that calculates the fraction of track to shower events for given files.
 """

 import os
@@ -74,7 +74,7 @@ def get_energies_for_fpaths(fpath_list, fpath_list_key_ic, cut_e_higher_than_3=F

        f.close()

-    print('Total number of events for ' + fpath_list_key_ic + ' (without 3-5GeV from low_e prod): '
+    print('Total number of events for ' + fpath_list_key_ic + ' : '
          + str(energy_conc_arr.shape[0]))
    print('Total number of files: ' + str(len(fpath_list)))

@@ -82,6 +82,16 @@ def get_energies_for_fpaths(fpath_list, fpath_list_key_ic, cut_e_higher_than_3=F


 def save_energies_for_ic(energies_for_ic):
+    """
+
+    Parameters
+    ----------
+    energies_for_ic
+
+    Returns
+    -------
+
+    """

    np.savez('./energies_for_ic.npz',
             muon_cc_3_100=energies_for_ic['muon_cc_3_100'], muon_cc_1_5=energies_for_ic['muon_cc_1_5'],
@@ -90,6 +100,12 @@ def save_energies_for_ic(energies_for_ic):


 def load_energies_for_ic():
+    """
+
+    Returns
+    -------
+
+    """

    data = np.load('./energies_for_ic.npz')

@@ -147,26 +163,28 @@ def plot_e_and_make_flat_func(energies_for_ic):
    pdfpages = PdfPages('./e_hist_plots.pdf')
    fig, ax = plt.subplots()

+    e_bins = 199
+
    # plot
-    hist_muon_cc = plt.hist(energies_for_ic['muon_cc'], bins=99)
-    plt.title('Muon-CC 1-3 + 3-100 GeV for Run 1-2400')
+    hist_muon_cc = plt.hist(energies_for_ic['muon_cc'], bins=e_bins)
+    plt.title('Muon-CC 1-5 + 3-100 GeV for Run 1-2400')
    make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]')

-    hist_shower = plt.hist(energies_for_ic['elec_cc_and_nc'], bins=99)
-    plt.title('Shower (elec-CC + elec-NC) 1-3 + 3-100 GeV for 2x Run 1-1200')
+    hist_shower = plt.hist(energies_for_ic['elec_cc_and_nc'], bins=e_bins)
+    plt.title('Shower (elec-CC + elec-NC) 1-5 + 3-100 GeV for 2x Run 1-1200')
    make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]')

-    hist_elec_cc = plt.hist(energies_for_ic['elec_cc'], bins=99)
-    plt.title('Elec-CC 1-3 + 3-100 GeV for Run 1-1200')
+    hist_elec_cc = plt.hist(energies_for_ic['elec_cc'], bins=e_bins)
+    plt.title('Elec-CC 1-5 + 3-100 GeV for Run 1-1200')
    make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]')

-    hist_elec_nc = plt.hist(energies_for_ic['elec_nc'], bins=99)
-    plt.title('Elec-NC 1-3 + 3-100 GeV for Run 1-1200')
+    hist_elec_nc = plt.hist(energies_for_ic['elec_nc'], bins=e_bins)
+    plt.title('Elec-NC 1-5 + 3-100 GeV for Run 1-1200')
    make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]')

-    # We take 600 muon-CC files and 300 elec-cc and 300 elec_nc files for the split, reduce 1-3GeV bins by 1/2
-    hist_shower[0][0] = hist_shower[0][0] / 2 # 1-2GeV
-    hist_shower[0][1] = hist_shower[0][1] / 2 # 2-3GeV
+    # # We take 600 muon-CC files and 300 elec-cc and 300 elec_nc files for the split, reduce 1-3GeV bins by 1/2
+    # hist_shower[0][0] = hist_shower[0][0] / 2 # 1-2GeV
+    # hist_shower[0][1] = hist_shower[0][1] / 2 # 2-3GeV

    track_div_shower = np.divide(hist_muon_cc[0], hist_shower[0])
    print(hist_muon_cc[0])
@@ -174,7 +192,6 @@ def plot_e_and_make_flat_func(energies_for_ic):

    bins=hist_muon_cc[1] # doesnt matter which bins to use
    track_div_shower = np.append(track_div_shower, track_div_shower[-1])
-    #track_div_shower = np.concatenate([track_div_shower, np.array(track_div_shower[-1])[:, np.newaxis]], axis=0) # fix for mpl
    print(bins)
    print(track_div_shower)
    ax.step(bins, track_div_shower, linestyle='-', where='post')
@@ -205,7 +222,8 @@ def main():
        energies_for_ic = dict()
        for fpath_list_key_ic in fpaths:
            print('Getting energies for ' + fpath_list_key_ic)
-            cut_flag = True if fpath_list_key_ic in ['muon_cc_1_5', 'elec_cc_1_5', 'elec_nc_1_5'] else False
+            cut_flag = False
+            # cut_flag = True if fpath_list_key_ic in ['muon_cc_1_5', 'elec_cc_1_5', 'elec_nc_1_5'] else False
            fpath_list = fpaths[fpath_list_key_ic]
            energies_for_ic[fpath_list_key_ic] = get_energies_for_fpaths(fpath_list, fpath_list_key_ic, cut_e_higher_than_3=cut_flag)