diff --git a/orcasong/default_config.toml b/orcasong/default_config.toml index e2ae9961b5b8cbcfd44214cf9341663be685a731..ef4c332a91378c290ae72ef7e6a30b6c39ca3bdd 100644 --- a/orcasong/default_config.toml +++ b/orcasong/default_config.toml @@ -93,6 +93,7 @@ data_cut_triggered = false data_cut_e_low = 'None' data_cut_e_high = 'None' data_cut_throw_away = 0.00 +data_cut_custom_func = 'None' prod_ident = 1 flush_freq = 1000 diff --git a/orcasong/geo_binning.py b/orcasong/geo_binning.py new file mode 100644 index 0000000000000000000000000000000000000000..ecc59b49eab39691c4b6fbea28d9c2bff8f03251 --- /dev/null +++ b/orcasong/geo_binning.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +# coding=utf-8 +# Filename: geo_binning.py + +""" +OrcaSong code that handles the spatial binning of the output images. +""" + +import numpy as np +import km3pipe as kp + + +def calculate_bin_edges(n_bins, det_geo, detx_filepath, do4d): + """ + Calculates the bin edges for the corresponding detector geometry (1 DOM/bin) based on the number of specified bins. + + Used later on for making the event "images" with the in the np.histogramdd funcs in hits_to_histograms.py. + The bin edges are necessary in order to get the same bin size for each event regardless of the fact if all bins have a hit or not. + + Parameters + ---------- + n_bins : tuple + Contains the desired number of bins for each dimension, [n_bins_x, n_bins_y, n_bins_z]. + det_geo : str + Declares what detector geometry should be used for the binning. + detx_filepath : str + Filepath of a .detx detector file which contains the geometry of the detector. + do4d : tuple(boo, str) + Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz. + + Returns + ------- + x_bin_edges, y_bin_edges, z_bin_edges : ndarray(ndim=1) + The bin edges for the x,y,z direction. + + """ + # Loads a kp.Geometry object based on the filepath of a .detx file + print("Reading detector geometry in order to calculate the detector dimensions from file " + detx_filepath) + geo = kp.calib.Calibration(filename=detx_filepath) + + # derive maximum and minimum x,y,z coordinates of the geometry input [[xmin, ymin, zmin], [xmax, ymax, zmax]] + dom_position_values = geo.get_detector().dom_positions.values() + dom_pos_max = np.amax([pos for pos in dom_position_values], axis=0) + dom_pos_min = np.amin([pos for pos in dom_position_values], axis=0) + geo_limits = dom_pos_min, dom_pos_max + print('Detector dimensions [[xmin, ymin, zmin], [xmax, ymax, zmax]]: ' + str(geo_limits)) + + if det_geo == 'Orca_115l_23m_h_9m_v' or det_geo == 'Orca_115l_23m_h_?m_v': + x_bin_edges = np.linspace(geo_limits[0][0] - 9.95, geo_limits[1][0] + 9.95, num=n_bins[0] + 1) #try to get the lines in the bin center 9.95*2 = average x-separation of two lines + y_bin_edges = np.linspace(geo_limits[0][1] - 9.75, geo_limits[1][1] + 9.75, num=n_bins[1] + 1) # Delta y = 19.483 + + # Fitted offsets: x,y,factor: factor*(x+x_off), # Stefan's modifications: + offset_x, offset_y, scale = [6.19, 0.064, 1.0128] + x_bin_edges = (x_bin_edges + offset_x) * scale + y_bin_edges = (y_bin_edges + offset_y) * scale + + if det_geo == 'Orca_115l_23m_h_?m_v': + # ORCA denser detector study + z_bin_edges = np.linspace(37.84 - 7.5, 292.84 + 7.5, num=n_bins[2] + 1) # 15m vertical, 18 DOMs + # z_bin_edges = np.linspace(37.84 - 6, 241.84 + 6, num=n_bins[2] + 1) # 12m vertical, 18 DOMs + # z_bin_edges = np.linspace(37.84 - 4.5, 190.84 + 4.5, num=n_bins[2] + 1) # 9m vertical, 18 DOMs + # z_bin_edges = np.linspace(37.84 - 3, 139.84 + 3, num=n_bins[2] + 1) # 6m vertical, 18 DOMs + # z_bin_edges = np.linspace(37.84 - 2.25, 114.34 + 2.25, num=n_bins[2] + 1) # 4.5m vertical, 18 DOMs + + else: + n_bins_z = n_bins[2] if do4d[1] != 'xzt-c' else n_bins[1] # n_bins = [xyz,t/c] or n_bins = [xzt,c] + z_bin_edges = np.linspace(geo_limits[0][2] - 4.665, geo_limits[1][2] + 4.665, num=n_bins_z + 1) # Delta z = 9.329 + + # calculate_bin_edges_test(dom_positions, y_bin_edges, z_bin_edges) # test disabled by default. Activate it, if you change the offsets in x/y/z-bin-edges + + else: + raise ValueError('The specified detector geometry "' + str(det_geo) + '" is not available.') + + return geo, x_bin_edges, y_bin_edges, z_bin_edges + + +def calculate_bin_edges_test(geo, y_bin_edges, z_bin_edges): + """ + Tests, if the bins in one direction don't accidentally have more than 'one' OM. + + For the x-direction, an overlapping can not be avoided in an orthogonal space. + For y and z though, it can! + For y, every bin should contain the number of lines per y-direction * 18 for 18 doms per line. + For z, every bin should contain 115 entries, since every z bin contains one storey of the 115 ORCA lines. + Not 100% accurate, since only the dom positions are used and not the individual pmt positions for a dom. + """ + dom_positions = np.stack(list(geo.get_detector().dom_positions.values())) + dom_y = dom_positions[:, 1] + dom_z = dom_positions[:, 2] + hist_y = np.histogram(dom_y, bins=y_bin_edges) + hist_z = np.histogram(dom_z, bins=z_bin_edges) + + print('----------------------------------------------------------------------------------------------') + print('Y-axis: Bin content: ' + str(hist_y[0])) + print('It should be: ' + str(np.array( + [4 * 18, 7 * 18, 9 * 18, 10 * 18, 9 * 18, 10 * 18, 10 * 18, 10 * 18, 11 * 18, 10 * 18, 9 * 18, 8 * 18, 8 * 18]))) + print('Y-axis: Bin edges: ' + str(hist_y[1])) + print('..............................................................................................') + print('Z-axis: Bin content: ' + str(hist_z[0])) + print('It should have 115 entries everywhere') + print('Z-axis: Bin edges: ' + str(hist_z[1])) + print('----------------------------------------------------------------------------------------------') \ No newline at end of file diff --git a/orcasong/io.py b/orcasong/io.py new file mode 100644 index 0000000000000000000000000000000000000000..938139daa2070f3ef45a157cd7c215c6e100ee54 --- /dev/null +++ b/orcasong/io.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python +# coding=utf-8 +# Filename: io.py + +""" +IO Code for OrcaSong. +""" + +import os +import errno +import toml +from docopt import docopt +import warnings + + +def parse_input(): + """ + Parses and returns all necessary input options for the make_nn_images function. + + Returns + ------- + fname : str + Full filepath to the input .h5 file. + detx_filepath : str + Full filepath to the .detx geometry file that belongs to the fname. + config_filepath : str + Full filepath to a config file. An example can be found in orcasong/default_config.toml + + """ + args = docopt(__doc__) + + fname = args['SIMFILE'] + detx_filepath = args['DETXFILE'] + config_filepath = args['CONFIGFILE'] + + return fname, detx_filepath, config_filepath + + +def load_config(config_filepath): + """ + Loads the config from a .toml file. + + Parameters + ---------- + config_filepath : str + Full filepath to a config file. An example can be found in orcasong/default_config.toml + + Returns + ------- + config : dict + Dictionary that contains all configuration options of the make_nn_images function. + An explanation of the config parameters can be found in orcasong/default_config.toml. + + """ + config = toml.load(config_filepath) + print('Loaded the config file from ' + os.path.abspath(config_filepath)) + + return config + + +def check_user_input(fname, detx_filepath, config): + """ + Sanity check of the user input. + + Parameters + ---------- + fname : str + Full filepath to the input .h5 file. + detx_filepath : str + Full filepath to the .detx geometry file that belongs to the fname. + config : dict + Dictionary that contains all configuration options of the make_nn_images function. + An explanation of the config parameters can be found in orcasong/default_config.toml. + + """ + #---- Checks input types ----# + + # Check for options with a single, non-boolean element + number_args = {'do2d_plots_n': int, 'data_cut_e_low': float, 'data_cut_e_high': float, + 'data_cut_throw_away': float, 'prod_ident': int} + + for key in number_args: + expected_arg_type = number_args[key] + parsed_arg = config[key] + + if parsed_arg in [None, 'None']: # we don't want to check args when there has been no user input + continue + + if type(parsed_arg) != expected_arg_type: + try: + map(expected_arg_type, parsed_arg) + except ValueError: + raise TypeError('The argument option ', key, ' only accepts ', str(expected_arg_type), + ' values as an input.') + + # Checks the n_bins tuple input + for dim in config['n_bins']: + if type(dim) != int: + raise TypeError('The argument option n_bins only accepts integer values as an input!' + ' Your values have the type ' + str(type(dim))) + + # ---- Checks input types ----# + + # ---- Check other things ----# + + if not os.path.isfile(fname): + raise IOError('The file -' + fname+ '- does not exist.') + + if not os.path.isfile(detx_filepath): + raise IOError('The file -' + detx_filepath + '- does not exist.') + + if all(do_nd == False for do_nd in [config['do2d'], config['do3d'],config['do4d']]): + raise ValueError('At least one of do2d, do3d or do4d options must be set to True.') + + if config['do2d'] == False and config['do2d_plots'] == True: + raise ValueError('The 2D pdf images cannot be created if do2d=False!') + + if config['do2d_plots'] == True and config['do2d_plots_n'] > 100: + warnings.warn('You declared do2d_pdf=(True, int) with int > 100. This will take more than two minutes.' + 'Do you really want to create pdfs images for so many events?') + + +def make_output_dirs(output_dirpath, do2d, do3d, do4d): + """ + Function that creates all output directories if they don't exist already. + + Parameters + ---------- + output_dirpath : str + Full path to the directory, where the orcasong output should be stored. + do2d : bool + Declares if 2D histograms, are to be created. + do3d : bool + Declares if 3D histograms are to be created. + do4d : tuple(bool, str) + Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz. + Currently, only 'time' and 'channel_id' are available. + + """ + if do2d: + projections = ['xy', 'xz', 'yz', 'xt', 'yt', 'zt'] + for proj in projections: + if not os.path.exists(output_dirpath + '/orcasong_output/4dTo2d/' + proj): + try: + os.makedirs(output_dirpath + '/orcasong_output/4dTo2d/' + proj) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + if do3d: + projections = ['xyz', 'xyt', 'xzt', 'yzt', 'rzt'] + for proj in projections: + if not os.path.exists(output_dirpath + '/orcasong_output/4dTo3d/' + proj): + try: + os.makedirs(output_dirpath + '/orcasong_output/4dTo3d/' + proj) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + if do4d[0]: + proj = 'xyzt' if not do4d[1] == 'channel_id' else 'xyzc' + if not os.path.exists(output_dirpath + '/orcasong_output/4dTo4d/' + proj): + try: + os.makedirs(output_dirpath + '/orcasong_output/4dTo4d/' + proj) + except OSError as e: + if e.errno != errno.EEXIST: + raise \ No newline at end of file diff --git a/orcasong/make_nn_images.py b/orcasong/make_nn_images.py index 531564a61a0fdf11c3571a8194f5d0a869c39cda..2c8cfeebf2ff18a968fc84de2f8d847a7bcf2453 100644 --- a/orcasong/make_nn_images.py +++ b/orcasong/make_nn_images.py @@ -35,14 +35,9 @@ __email__ = 'michael.m.moser@fau.de' __status__ = 'Prototype' import os -import errno import sys -import warnings -import toml -from docopt import docopt #from memory_profiler import profile # for memory profiling, call with @profile; myfunc() #import line_profiler # call with kernprof -l -v file.py args -import numpy as np import km3pipe as kp import km3modules as km import matplotlib as mpl @@ -51,337 +46,9 @@ from matplotlib.backends.backend_pdf import PdfPages from orcasong.file_to_hits import EventDataExtractor from orcasong.hits_to_histograms import HistogramMaker - - -def parse_input(): - """ - Parses and returns all necessary input options for the make_nn_images function. - - Returns - ------- - fname : str - Full filepath to the input .h5 file. - detx_filepath : str - Full filepath to the .detx geometry file that belongs to the fname. - config_filepath : str - Full filepath to a config file. An example can be found in orcasong/default_config.toml - - """ - args = docopt(__doc__) - - fname = args['SIMFILE'] - detx_filepath = args['DETXFILE'] - config_filepath = args['CONFIGFILE'] - - return fname, detx_filepath, config_filepath - - -def load_config(config_filepath): - """ - Loads the config from a .toml file. - - Parameters - ---------- - config_filepath : str - Full filepath to a config file. An example can be found in orcasong/default_config.toml - - Returns - ------- - config : dict - Dictionary that contains all configuration options of the make_nn_images function. - An explanation of the config parameters can be found in orcasong/default_config.toml. - - """ - config = toml.load(config_filepath) - print('Loaded the config file from ' + os.path.abspath(config_filepath)) - - return config - - -def check_user_input(fname, detx_filepath, config): - """ - Sanity check of the user input. - - Parameters - ---------- - fname : str - Full filepath to the input .h5 file. - detx_filepath : str - Full filepath to the .detx geometry file that belongs to the fname. - config : dict - Dictionary that contains all configuration options of the make_nn_images function. - An explanation of the config parameters can be found in orcasong/default_config.toml. - - """ - #---- Checks input types ----# - - # Check for options with a single, non-boolean element - number_args = {'do2d_plots_n': int, 'data_cut_e_low': float, 'data_cut_e_high': float, - 'data_cut_throw_away': float, 'prod_ident': int} - - for key in number_args: - expected_arg_type = number_args[key] - parsed_arg = config[key] - - if parsed_arg in [None, 'None']: # we don't want to check args when there has been no user input - continue - - if type(parsed_arg) != expected_arg_type: - try: - map(expected_arg_type, parsed_arg) - except ValueError: - raise TypeError('The argument option ', key, ' only accepts ', str(expected_arg_type), - ' values as an input.') - - # Checks the n_bins tuple input - for dim in config['n_bins']: - if type(dim) != int: - raise TypeError('The argument option n_bins only accepts integer values as an input!' - ' Your values have the type ' + str(type(dim))) - - # ---- Checks input types ----# - - # ---- Check other things ----# - - if not os.path.isfile(fname): - raise IOError('The file -' + fname+ '- does not exist.') - - if not os.path.isfile(detx_filepath): - raise IOError('The file -' + detx_filepath + '- does not exist.') - - if all(do_nd == False for do_nd in [config['do2d'], config['do3d'],config['do4d']]): - raise ValueError('At least one of do2d, do3d or do4d options must be set to True.') - - if config['do2d'] == False and config['do2d_plots'] == True: - raise ValueError('The 2D pdf images cannot be created if do2d=False!') - - if config['do2d_plots'] == True and config['do2d_plots_n'] > 100: - warnings.warn('You declared do2d_pdf=(True, int) with int > 100. This will take more than two minutes.' - 'Do you really want to create pdfs images for so many events?') - - -def make_output_dirs(output_dirpath, do2d, do3d, do4d): - """ - Function that creates all output directories if they don't exist already. - - Parameters - ---------- - output_dirpath : str - Full path to the directory, where the orcasong output should be stored. - do2d : bool - Declares if 2D histograms, are to be created. - do3d : bool - Declares if 3D histograms are to be created. - do4d : tuple(bool, str) - Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz. - Currently, only 'time' and 'channel_id' are available. - - """ - if do2d: - projections = ['xy', 'xz', 'yz', 'xt', 'yt', 'zt'] - for proj in projections: - if not os.path.exists(output_dirpath + '/orcasong_output/4dTo2d/' + proj): - try: - os.makedirs(output_dirpath + '/orcasong_output/4dTo2d/' + proj) - except OSError as e: - if e.errno != errno.EEXIST: - raise - - if do3d: - projections = ['xyz', 'xyt', 'xzt', 'yzt', 'rzt'] - for proj in projections: - if not os.path.exists(output_dirpath + '/orcasong_output/4dTo3d/' + proj): - try: - os.makedirs(output_dirpath + '/orcasong_output/4dTo3d/' + proj) - except OSError as e: - if e.errno != errno.EEXIST: - raise - - if do4d[0]: - proj = 'xyzt' if not do4d[1] == 'channel_id' else 'xyzc' - if not os.path.exists(output_dirpath + '/orcasong_output/4dTo4d/' + proj): - try: - os.makedirs(output_dirpath + '/orcasong_output/4dTo4d/' + proj) - except OSError as e: - if e.errno != errno.EEXIST: - raise - - -def calculate_bin_edges(n_bins, det_geo, detx_filepath, do4d): - """ - Calculates the bin edges for the corresponding detector geometry (1 DOM/bin) based on the number of specified bins. - - Used later on for making the event "images" with the in the np.histogramdd funcs in hits_to_histograms.py. - The bin edges are necessary in order to get the same bin size for each event regardless of the fact if all bins have a hit or not. - - Parameters - ---------- - n_bins : tuple - Contains the desired number of bins for each dimension, [n_bins_x, n_bins_y, n_bins_z]. - det_geo : str - Declares what detector geometry should be used for the binning. - detx_filepath : str - Filepath of a .detx detector file which contains the geometry of the detector. - do4d : tuple(boo, str) - Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz. - - Returns - ------- - x_bin_edges, y_bin_edges, z_bin_edges : ndarray(ndim=1) - The bin edges for the x,y,z direction. - - """ - # Loads a kp.Geometry object based on the filepath of a .detx file - print("Reading detector geometry in order to calculate the detector dimensions from file " + detx_filepath) - geo = kp.calib.Calibration(filename=detx_filepath) - - # derive maximum and minimum x,y,z coordinates of the geometry input [[xmin, ymin, zmin], [xmax, ymax, zmax]] - dom_position_values = geo.get_detector().dom_positions.values() - dom_pos_max = np.amax([pos for pos in dom_position_values], axis=0) - dom_pos_min = np.amin([pos for pos in dom_position_values], axis=0) - geo_limits = dom_pos_min, dom_pos_max - print('Detector dimensions [[xmin, ymin, zmin], [xmax, ymax, zmax]]: ' + str(geo_limits)) - - if det_geo == 'Orca_115l_23m_h_9m_v' or det_geo == 'Orca_115l_23m_h_?m_v': - x_bin_edges = np.linspace(geo_limits[0][0] - 9.95, geo_limits[1][0] + 9.95, num=n_bins[0] + 1) #try to get the lines in the bin center 9.95*2 = average x-separation of two lines - y_bin_edges = np.linspace(geo_limits[0][1] - 9.75, geo_limits[1][1] + 9.75, num=n_bins[1] + 1) # Delta y = 19.483 - - # Fitted offsets: x,y,factor: factor*(x+x_off), # Stefan's modifications: - offset_x, offset_y, scale = [6.19, 0.064, 1.0128] - x_bin_edges = (x_bin_edges + offset_x) * scale - y_bin_edges = (y_bin_edges + offset_y) * scale - - if det_geo == 'Orca_115l_23m_h_?m_v': - # ORCA denser detector study - z_bin_edges = np.linspace(37.84 - 7.5, 292.84 + 7.5, num=n_bins[2] + 1) # 15m vertical, 18 DOMs - # z_bin_edges = np.linspace(37.84 - 6, 241.84 + 6, num=n_bins[2] + 1) # 12m vertical, 18 DOMs - # z_bin_edges = np.linspace(37.84 - 4.5, 190.84 + 4.5, num=n_bins[2] + 1) # 9m vertical, 18 DOMs - # z_bin_edges = np.linspace(37.84 - 3, 139.84 + 3, num=n_bins[2] + 1) # 6m vertical, 18 DOMs - # z_bin_edges = np.linspace(37.84 - 2.25, 114.34 + 2.25, num=n_bins[2] + 1) # 4.5m vertical, 18 DOMs - - else: - n_bins_z = n_bins[2] if do4d[1] != 'xzt-c' else n_bins[1] # n_bins = [xyz,t/c] or n_bins = [xzt,c] - z_bin_edges = np.linspace(geo_limits[0][2] - 4.665, geo_limits[1][2] + 4.665, num=n_bins_z + 1) # Delta z = 9.329 - - # calculate_bin_edges_test(dom_positions, y_bin_edges, z_bin_edges) # test disabled by default. Activate it, if you change the offsets in x/y/z-bin-edges - - else: - raise ValueError('The specified detector geometry "' + str(det_geo) + '" is not available.') - - return geo, x_bin_edges, y_bin_edges, z_bin_edges - - -def calculate_bin_edges_test(geo, y_bin_edges, z_bin_edges): - """ - Tests, if the bins in one direction don't accidentally have more than 'one' OM. - - For the x-direction, an overlapping can not be avoided in an orthogonal space. - For y and z though, it can! - For y, every bin should contain the number of lines per y-direction * 18 for 18 doms per line. - For z, every bin should contain 115 entries, since every z bin contains one storey of the 115 ORCA lines. - Not 100% accurate, since only the dom positions are used and not the individual pmt positions for a dom. - """ - dom_positions = np.stack(list(geo.get_detector().dom_positions.values())) - dom_y = dom_positions[:, 1] - dom_z = dom_positions[:, 2] - hist_y = np.histogram(dom_y, bins=y_bin_edges) - hist_z = np.histogram(dom_z, bins=z_bin_edges) - - print('----------------------------------------------------------------------------------------------') - print('Y-axis: Bin content: ' + str(hist_y[0])) - print('It should be: ' + str(np.array( - [4 * 18, 7 * 18, 9 * 18, 10 * 18, 9 * 18, 10 * 18, 10 * 18, 10 * 18, 11 * 18, 10 * 18, 9 * 18, 8 * 18, 8 * 18]))) - print('Y-axis: Bin edges: ' + str(hist_y[1])) - print('..............................................................................................') - print('Z-axis: Bin content: ' + str(hist_z[0])) - print('It should have 115 entries everywhere') - print('Z-axis: Bin edges: ' + str(hist_z[1])) - print('----------------------------------------------------------------------------------------------') - - -def get_file_particle_type(fname): - """ - Returns a string that specifies the type of the particles that are contained in the input file. - - Parameters - ---------- - fname : str - Filename (full path!) of the input file. - - Returns - ------- - file_particle_type : str - String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino']. - - """ - event_pump = kp.io.hdf5.HDF5Pump(filename=fname, verbose=False) # TODO suppress print of hdf5pump - - if 'McTracks' not in event_pump[0]: - file_particle_type = 'undefined' - else: - particle_type = event_pump[0]['McTracks'].type - - # if mupage file: first mc_track is an empty neutrino track, second track is the first muon - if particle_type[0] == 0 and np.abs(particle_type[1]) == 13: - file_particle_type = 'muon' - - # the first mc_track is the primary neutrino if the input file is containing neutrino events - elif np.abs(particle_type[0]) in [12, 14, 16]: - file_particle_type = 'neutrino' - else: - raise ValueError('The type of the particles in the "McTracks" folder, <', str(particle_type), '> is not known.') - - event_pump.close_file() - - return file_particle_type - - -class EventSkipper(kp.Module): - """ - KM3Pipe module that skips events based on some data_cuts. - """ - def configure(self): - self.data_cuts = self.require('data_cuts') - - def process(self, blob): - continue_bool = skip_event(blob['event_track'], self.data_cuts) - if continue_bool: - return - else: - return blob - - -def skip_event(event_track, data_cuts): - """ - Function that checks if an event should be skipped, depending on the data_cuts input. - - Parameters - ---------- - event_track : ndarray(ndim=1) - 1D array containing important MC information of the event, only the energy of the event (pos_2) is used here. - data_cuts : dict - Dictionary that contains information about any possible cuts that should be applied. - Supports the following cuts: 'triggered', 'energy_lower_limit', 'energy_upper_limit', 'throw_away_prob'. - - Returns - ------- - continue_bool : bool - boolean flag to specify, if this event should be skipped or not. - - """ - continue_bool = False - if data_cuts['energy_lower_limit'] is not None: - continue_bool = event_track.energy[0] < data_cuts['energy_lower_limit'] # True if E < lower limit - - if data_cuts['energy_upper_limit'] is not None and continue_bool == False: - continue_bool = event_track.energy[0] > data_cuts['energy_upper_limit'] # True if E > upper limit - - if data_cuts['throw_away_prob'] is not None and continue_bool == False: - throw_away_prob = data_cuts['throw_away_prob'] - throw_away = np.random.choice([False, True], p=[1 - throw_away_prob, throw_away_prob]) - if throw_away: continue_bool = True - - return continue_bool +from orcasong.io import parse_input, load_config, check_user_input, make_output_dirs +from orcasong.geo_binning import calculate_bin_edges +from orcasong.utils import get_file_particle_type, EventSkipper def make_nn_images(fname, detx_filepath, config): @@ -402,7 +69,7 @@ def make_nn_images(fname, detx_filepath, config): An explanation of the config parameters can be found in orcasong/default_config.toml. """ - # Load all parameters from the config + # Load all parameters from the config # TODO put everything in a config class, this is horrible output_dirpath = config['output_dirpath'] chunksize, complib, complevel = config['chunksize'], config['complib'], config['complevel'] flush_freq = config['flush_freq'] @@ -420,6 +87,7 @@ def make_nn_images(fname, detx_filepath, config): data_cuts['energy_lower_limit'] = config['data_cut_e_low'] if config['data_cut_e_low'] != 'None' else None data_cuts['energy_upper_limit'] = config['data_cut_e_high'] if config['data_cut_e_high'] != 'None' else None data_cuts['throw_away_prob'] = config['data_cut_throw_away'] if config['data_cut_throw_away'] != 'None' else None + data_cuts['custom_skip_function'] = config['data_cut_custom_func'] if config['data_cut_custom_func'] != 'None' else None make_output_dirs(output_dirpath, do2d, do3d, do4d) diff --git a/orcasong/utils.py b/orcasong/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..affba0ce499e9ced3c07a735988f8ab1caad4e09 --- /dev/null +++ b/orcasong/utils.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python +# coding=utf-8 +# Filename: utils.py + +""" +Utility code for OrcaSong. +""" + +import numpy as np +import km3pipe as kp + + +class EventSkipper(kp.Module): + """ + KM3Pipe module that skips events based on some data_cuts. + """ + def configure(self): + self.data_cuts = self.require('data_cuts') + + def process(self, blob): + continue_bool = skip_event(blob['event_track'], self.data_cuts) + if continue_bool: + return + else: + return blob + + +def skip_event(event_track, data_cuts): + """ + Function that checks if an event should be skipped, depending on the data_cuts input. + + Parameters + ---------- + event_track : ndarray(ndim=1) + 1D array containing important MC information of the event, only the energy of the event (pos_2) is used here. + data_cuts : dict + Dictionary that contains information about any possible cuts that should be applied. + Supports the following cuts: 'triggered', 'energy_lower_limit', 'energy_upper_limit', 'throw_away_prob'. + + Returns + ------- + continue_bool : bool + boolean flag to specify, if this event should be skipped or not. + + """ + continue_bool = False + + if data_cuts['energy_lower_limit'] is not None: + continue_bool = event_track.energy[0] < data_cuts['energy_lower_limit'] # True if E < lower limit + + if data_cuts['energy_upper_limit'] is not None and continue_bool == False: + continue_bool = event_track.energy[0] > data_cuts['energy_upper_limit'] # True if E > upper limit + + if data_cuts['throw_away_prob'] is not None and continue_bool == False: + throw_away_prob = data_cuts['throw_away_prob'] + throw_away = np.random.choice([False, True], p=[1 - throw_away_prob, throw_away_prob]) + if throw_away: continue_bool = True + + if data_cuts['custom_skip_function'] is not None: + continue_bool = use_custom_skip_function(data_cuts['custom_skip_function'], event_track) + + return continue_bool + + +def use_custom_skip_function(skip_function, event_track): + """ + User defined, custom skip functions. + + Parameters + ---------- + skip_function : str + String that specifies, which custom skip function should be used. + event_track : ndarray(ndim=1) + Structured numpy array containing the McTracks info of this event. + + Returns + ------- + continue_bool : bool + Bool which specifies, if this event should be skipped or not. + + """ + if skip_function == 'ts_e_flat': + # cf. orcanet_contrib/utilities/get_func_for_flat_track_shower.py + + particle_type = event_track.particle_type[0] + if np.abs(particle_type) not in [12, 14]: + continue_bool = False + + else: + prob_for_e_bin_arr_path = 'placeholder.npy' + # Fraction of tracks compared to showers (n_muon-cc / (n_e-cc + n_e-nc)) + arr_fract_for_e_bins = np.load(prob_for_e_bin_arr_path) # 2d arr, one row e_bins, second row prob + e_bins = arr_fract_for_e_bins[0, :] + fract_e = arr_fract_for_e_bins[1, :] + + e_evt = event_track.energy[0] + idx_e = (np.abs(e_bins - e_evt)).argmin() + + fract = fract_e[idx_e] + + if np.abs(particle_type) == 14: # for muon neutrinos + if fract <= 1: + continue_bool = False + else: + evt_survive_prob = 1/float(fract) + continue_bool = np.random.choice([False, True], p=[evt_survive_prob, 1 - evt_survive_prob]) + + else: # for elec neutrinos + assert np.abs(particle_type) == 12 + + if fract >= 1: + continue_bool = False + else: + evt_survive_prob = fract + continue_bool = np.random.choice([False, True], p=[evt_survive_prob, 1 - evt_survive_prob]) + + return continue_bool + + else: + raise ValueError('The custom skip function "' + str(skip_function) + '" is not known.') + + +def get_file_particle_type(fname): + """ + Returns a string that specifies the type of the particles that are contained in the input file. + + Parameters + ---------- + fname : str + Filename (full path!) of the input file. + + Returns + ------- + file_particle_type : str + String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino']. + + """ + event_pump = kp.io.hdf5.HDF5Pump(filename=fname, verbose=False) # TODO suppress print of hdf5pump + + if 'McTracks' not in event_pump[0]: + file_particle_type = 'undefined' + else: + particle_type = event_pump[0]['McTracks'].type + + # if mupage file: first mc_track is an empty neutrino track, second track is the first muon + if particle_type[0] == 0 and np.abs(particle_type[1]) == 13: + file_particle_type = 'muon' + + # the first mc_track is the primary neutrino if the input file is containing neutrino events + elif np.abs(particle_type[0]) in [12, 14, 16]: + file_particle_type = 'neutrino' + else: + raise ValueError('The type of the particles in the "McTracks" folder, <', str(particle_type), '> is not known.') + + event_pump.close_file() + + return file_particle_type + + diff --git a/orcasong_contrib/utilities/get_func_for_flat_track_shower.py b/orcasong_contrib/utilities/get_func_for_flat_track_shower.py index 772e193b0c968878848ecf10f3b98863363eded9..a01f3cb86af4bc2093af737d4f147b4d194133ed 100644 --- a/orcasong_contrib/utilities/get_func_for_flat_track_shower.py +++ b/orcasong_contrib/utilities/get_func_for_flat_track_shower.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ -TODO +Code that calculates the fraction of track to shower events for given files. """ import os @@ -74,7 +74,7 @@ def get_energies_for_fpaths(fpath_list, fpath_list_key_ic, cut_e_higher_than_3=F f.close() - print('Total number of events for ' + fpath_list_key_ic + ' (without 3-5GeV from low_e prod): ' + print('Total number of events for ' + fpath_list_key_ic + ' : ' + str(energy_conc_arr.shape[0])) print('Total number of files: ' + str(len(fpath_list))) @@ -82,6 +82,16 @@ def get_energies_for_fpaths(fpath_list, fpath_list_key_ic, cut_e_higher_than_3=F def save_energies_for_ic(energies_for_ic): + """ + + Parameters + ---------- + energies_for_ic + + Returns + ------- + + """ np.savez('./energies_for_ic.npz', muon_cc_3_100=energies_for_ic['muon_cc_3_100'], muon_cc_1_5=energies_for_ic['muon_cc_1_5'], @@ -90,6 +100,12 @@ def save_energies_for_ic(energies_for_ic): def load_energies_for_ic(): + """ + + Returns + ------- + + """ data = np.load('./energies_for_ic.npz') @@ -147,26 +163,28 @@ def plot_e_and_make_flat_func(energies_for_ic): pdfpages = PdfPages('./e_hist_plots.pdf') fig, ax = plt.subplots() + e_bins = 199 + # plot - hist_muon_cc = plt.hist(energies_for_ic['muon_cc'], bins=99) - plt.title('Muon-CC 1-3 + 3-100 GeV for Run 1-2400') + hist_muon_cc = plt.hist(energies_for_ic['muon_cc'], bins=e_bins) + plt.title('Muon-CC 1-5 + 3-100 GeV for Run 1-2400') make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]') - hist_shower = plt.hist(energies_for_ic['elec_cc_and_nc'], bins=99) - plt.title('Shower (elec-CC + elec-NC) 1-3 + 3-100 GeV for 2x Run 1-1200') + hist_shower = plt.hist(energies_for_ic['elec_cc_and_nc'], bins=e_bins) + plt.title('Shower (elec-CC + elec-NC) 1-5 + 3-100 GeV for 2x Run 1-1200') make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]') - hist_elec_cc = plt.hist(energies_for_ic['elec_cc'], bins=99) - plt.title('Elec-CC 1-3 + 3-100 GeV for Run 1-1200') + hist_elec_cc = plt.hist(energies_for_ic['elec_cc'], bins=e_bins) + plt.title('Elec-CC 1-5 + 3-100 GeV for Run 1-1200') make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]') - hist_elec_nc = plt.hist(energies_for_ic['elec_nc'], bins=99) - plt.title('Elec-NC 1-3 + 3-100 GeV for Run 1-1200') + hist_elec_nc = plt.hist(energies_for_ic['elec_nc'], bins=e_bins) + plt.title('Elec-NC 1-5 + 3-100 GeV for Run 1-1200') make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]') - # We take 600 muon-CC files and 300 elec-cc and 300 elec_nc files for the split, reduce 1-3GeV bins by 1/2 - hist_shower[0][0] = hist_shower[0][0] / 2 # 1-2GeV - hist_shower[0][1] = hist_shower[0][1] / 2 # 2-3GeV + # # We take 600 muon-CC files and 300 elec-cc and 300 elec_nc files for the split, reduce 1-3GeV bins by 1/2 + # hist_shower[0][0] = hist_shower[0][0] / 2 # 1-2GeV + # hist_shower[0][1] = hist_shower[0][1] / 2 # 2-3GeV track_div_shower = np.divide(hist_muon_cc[0], hist_shower[0]) print(hist_muon_cc[0]) @@ -174,7 +192,6 @@ def plot_e_and_make_flat_func(energies_for_ic): bins=hist_muon_cc[1] # doesnt matter which bins to use track_div_shower = np.append(track_div_shower, track_div_shower[-1]) - #track_div_shower = np.concatenate([track_div_shower, np.array(track_div_shower[-1])[:, np.newaxis]], axis=0) # fix for mpl print(bins) print(track_div_shower) ax.step(bins, track_div_shower, linestyle='-', where='post') @@ -205,7 +222,8 @@ def main(): energies_for_ic = dict() for fpath_list_key_ic in fpaths: print('Getting energies for ' + fpath_list_key_ic) - cut_flag = True if fpath_list_key_ic in ['muon_cc_1_5', 'elec_cc_1_5', 'elec_nc_1_5'] else False + cut_flag = False + # cut_flag = True if fpath_list_key_ic in ['muon_cc_1_5', 'elec_cc_1_5', 'elec_nc_1_5'] else False fpath_list = fpaths[fpath_list_key_ic] energies_for_ic[fpath_list_key_ic] = get_energies_for_fpaths(fpath_list, fpath_list_key_ic, cut_e_higher_than_3=cut_flag)