Skip to content
Snippets Groups Projects
Commit 315719a3 authored by ViaFerrata's avatar ViaFerrata
Browse files

- Separated make_nn_images code into separate files

- Added new data cut with custom user function support (not ideal atm)
parent 36dc6ad3
No related branches found
No related tags found
No related merge requests found
......@@ -93,6 +93,7 @@ data_cut_triggered = false
data_cut_e_low = 'None'
data_cut_e_high = 'None'
data_cut_throw_away = 0.00
data_cut_custom_func = 'None'
prod_ident = 1
flush_freq = 1000
......
#!/usr/bin/env python
# coding=utf-8
# Filename: geo_binning.py
"""
OrcaSong code that handles the spatial binning of the output images.
"""
import numpy as np
import km3pipe as kp
def calculate_bin_edges(n_bins, det_geo, detx_filepath, do4d):
"""
Calculates the bin edges for the corresponding detector geometry (1 DOM/bin) based on the number of specified bins.
Used later on for making the event "images" with the in the np.histogramdd funcs in hits_to_histograms.py.
The bin edges are necessary in order to get the same bin size for each event regardless of the fact if all bins have a hit or not.
Parameters
----------
n_bins : tuple
Contains the desired number of bins for each dimension, [n_bins_x, n_bins_y, n_bins_z].
det_geo : str
Declares what detector geometry should be used for the binning.
detx_filepath : str
Filepath of a .detx detector file which contains the geometry of the detector.
do4d : tuple(boo, str)
Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz.
Returns
-------
x_bin_edges, y_bin_edges, z_bin_edges : ndarray(ndim=1)
The bin edges for the x,y,z direction.
"""
# Loads a kp.Geometry object based on the filepath of a .detx file
print("Reading detector geometry in order to calculate the detector dimensions from file " + detx_filepath)
geo = kp.calib.Calibration(filename=detx_filepath)
# derive maximum and minimum x,y,z coordinates of the geometry input [[xmin, ymin, zmin], [xmax, ymax, zmax]]
dom_position_values = geo.get_detector().dom_positions.values()
dom_pos_max = np.amax([pos for pos in dom_position_values], axis=0)
dom_pos_min = np.amin([pos for pos in dom_position_values], axis=0)
geo_limits = dom_pos_min, dom_pos_max
print('Detector dimensions [[xmin, ymin, zmin], [xmax, ymax, zmax]]: ' + str(geo_limits))
if det_geo == 'Orca_115l_23m_h_9m_v' or det_geo == 'Orca_115l_23m_h_?m_v':
x_bin_edges = np.linspace(geo_limits[0][0] - 9.95, geo_limits[1][0] + 9.95, num=n_bins[0] + 1) #try to get the lines in the bin center 9.95*2 = average x-separation of two lines
y_bin_edges = np.linspace(geo_limits[0][1] - 9.75, geo_limits[1][1] + 9.75, num=n_bins[1] + 1) # Delta y = 19.483
# Fitted offsets: x,y,factor: factor*(x+x_off), # Stefan's modifications:
offset_x, offset_y, scale = [6.19, 0.064, 1.0128]
x_bin_edges = (x_bin_edges + offset_x) * scale
y_bin_edges = (y_bin_edges + offset_y) * scale
if det_geo == 'Orca_115l_23m_h_?m_v':
# ORCA denser detector study
z_bin_edges = np.linspace(37.84 - 7.5, 292.84 + 7.5, num=n_bins[2] + 1) # 15m vertical, 18 DOMs
# z_bin_edges = np.linspace(37.84 - 6, 241.84 + 6, num=n_bins[2] + 1) # 12m vertical, 18 DOMs
# z_bin_edges = np.linspace(37.84 - 4.5, 190.84 + 4.5, num=n_bins[2] + 1) # 9m vertical, 18 DOMs
# z_bin_edges = np.linspace(37.84 - 3, 139.84 + 3, num=n_bins[2] + 1) # 6m vertical, 18 DOMs
# z_bin_edges = np.linspace(37.84 - 2.25, 114.34 + 2.25, num=n_bins[2] + 1) # 4.5m vertical, 18 DOMs
else:
n_bins_z = n_bins[2] if do4d[1] != 'xzt-c' else n_bins[1] # n_bins = [xyz,t/c] or n_bins = [xzt,c]
z_bin_edges = np.linspace(geo_limits[0][2] - 4.665, geo_limits[1][2] + 4.665, num=n_bins_z + 1) # Delta z = 9.329
# calculate_bin_edges_test(dom_positions, y_bin_edges, z_bin_edges) # test disabled by default. Activate it, if you change the offsets in x/y/z-bin-edges
else:
raise ValueError('The specified detector geometry "' + str(det_geo) + '" is not available.')
return geo, x_bin_edges, y_bin_edges, z_bin_edges
def calculate_bin_edges_test(geo, y_bin_edges, z_bin_edges):
"""
Tests, if the bins in one direction don't accidentally have more than 'one' OM.
For the x-direction, an overlapping can not be avoided in an orthogonal space.
For y and z though, it can!
For y, every bin should contain the number of lines per y-direction * 18 for 18 doms per line.
For z, every bin should contain 115 entries, since every z bin contains one storey of the 115 ORCA lines.
Not 100% accurate, since only the dom positions are used and not the individual pmt positions for a dom.
"""
dom_positions = np.stack(list(geo.get_detector().dom_positions.values()))
dom_y = dom_positions[:, 1]
dom_z = dom_positions[:, 2]
hist_y = np.histogram(dom_y, bins=y_bin_edges)
hist_z = np.histogram(dom_z, bins=z_bin_edges)
print('----------------------------------------------------------------------------------------------')
print('Y-axis: Bin content: ' + str(hist_y[0]))
print('It should be: ' + str(np.array(
[4 * 18, 7 * 18, 9 * 18, 10 * 18, 9 * 18, 10 * 18, 10 * 18, 10 * 18, 11 * 18, 10 * 18, 9 * 18, 8 * 18, 8 * 18])))
print('Y-axis: Bin edges: ' + str(hist_y[1]))
print('..............................................................................................')
print('Z-axis: Bin content: ' + str(hist_z[0]))
print('It should have 115 entries everywhere')
print('Z-axis: Bin edges: ' + str(hist_z[1]))
print('----------------------------------------------------------------------------------------------')
\ No newline at end of file
#!/usr/bin/env python
# coding=utf-8
# Filename: io.py
"""
IO Code for OrcaSong.
"""
import os
import errno
import toml
from docopt import docopt
import warnings
def parse_input():
"""
Parses and returns all necessary input options for the make_nn_images function.
Returns
-------
fname : str
Full filepath to the input .h5 file.
detx_filepath : str
Full filepath to the .detx geometry file that belongs to the fname.
config_filepath : str
Full filepath to a config file. An example can be found in orcasong/default_config.toml
"""
args = docopt(__doc__)
fname = args['SIMFILE']
detx_filepath = args['DETXFILE']
config_filepath = args['CONFIGFILE']
return fname, detx_filepath, config_filepath
def load_config(config_filepath):
"""
Loads the config from a .toml file.
Parameters
----------
config_filepath : str
Full filepath to a config file. An example can be found in orcasong/default_config.toml
Returns
-------
config : dict
Dictionary that contains all configuration options of the make_nn_images function.
An explanation of the config parameters can be found in orcasong/default_config.toml.
"""
config = toml.load(config_filepath)
print('Loaded the config file from ' + os.path.abspath(config_filepath))
return config
def check_user_input(fname, detx_filepath, config):
"""
Sanity check of the user input.
Parameters
----------
fname : str
Full filepath to the input .h5 file.
detx_filepath : str
Full filepath to the .detx geometry file that belongs to the fname.
config : dict
Dictionary that contains all configuration options of the make_nn_images function.
An explanation of the config parameters can be found in orcasong/default_config.toml.
"""
#---- Checks input types ----#
# Check for options with a single, non-boolean element
number_args = {'do2d_plots_n': int, 'data_cut_e_low': float, 'data_cut_e_high': float,
'data_cut_throw_away': float, 'prod_ident': int}
for key in number_args:
expected_arg_type = number_args[key]
parsed_arg = config[key]
if parsed_arg in [None, 'None']: # we don't want to check args when there has been no user input
continue
if type(parsed_arg) != expected_arg_type:
try:
map(expected_arg_type, parsed_arg)
except ValueError:
raise TypeError('The argument option ', key, ' only accepts ', str(expected_arg_type),
' values as an input.')
# Checks the n_bins tuple input
for dim in config['n_bins']:
if type(dim) != int:
raise TypeError('The argument option n_bins only accepts integer values as an input!'
' Your values have the type ' + str(type(dim)))
# ---- Checks input types ----#
# ---- Check other things ----#
if not os.path.isfile(fname):
raise IOError('The file -' + fname+ '- does not exist.')
if not os.path.isfile(detx_filepath):
raise IOError('The file -' + detx_filepath + '- does not exist.')
if all(do_nd == False for do_nd in [config['do2d'], config['do3d'],config['do4d']]):
raise ValueError('At least one of do2d, do3d or do4d options must be set to True.')
if config['do2d'] == False and config['do2d_plots'] == True:
raise ValueError('The 2D pdf images cannot be created if do2d=False!')
if config['do2d_plots'] == True and config['do2d_plots_n'] > 100:
warnings.warn('You declared do2d_pdf=(True, int) with int > 100. This will take more than two minutes.'
'Do you really want to create pdfs images for so many events?')
def make_output_dirs(output_dirpath, do2d, do3d, do4d):
"""
Function that creates all output directories if they don't exist already.
Parameters
----------
output_dirpath : str
Full path to the directory, where the orcasong output should be stored.
do2d : bool
Declares if 2D histograms, are to be created.
do3d : bool
Declares if 3D histograms are to be created.
do4d : tuple(bool, str)
Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz.
Currently, only 'time' and 'channel_id' are available.
"""
if do2d:
projections = ['xy', 'xz', 'yz', 'xt', 'yt', 'zt']
for proj in projections:
if not os.path.exists(output_dirpath + '/orcasong_output/4dTo2d/' + proj):
try:
os.makedirs(output_dirpath + '/orcasong_output/4dTo2d/' + proj)
except OSError as e:
if e.errno != errno.EEXIST:
raise
if do3d:
projections = ['xyz', 'xyt', 'xzt', 'yzt', 'rzt']
for proj in projections:
if not os.path.exists(output_dirpath + '/orcasong_output/4dTo3d/' + proj):
try:
os.makedirs(output_dirpath + '/orcasong_output/4dTo3d/' + proj)
except OSError as e:
if e.errno != errno.EEXIST:
raise
if do4d[0]:
proj = 'xyzt' if not do4d[1] == 'channel_id' else 'xyzc'
if not os.path.exists(output_dirpath + '/orcasong_output/4dTo4d/' + proj):
try:
os.makedirs(output_dirpath + '/orcasong_output/4dTo4d/' + proj)
except OSError as e:
if e.errno != errno.EEXIST:
raise
\ No newline at end of file
......@@ -35,14 +35,9 @@ __email__ = 'michael.m.moser@fau.de'
__status__ = 'Prototype'
import os
import errno
import sys
import warnings
import toml
from docopt import docopt
#from memory_profiler import profile # for memory profiling, call with @profile; myfunc()
#import line_profiler # call with kernprof -l -v file.py args
import numpy as np
import km3pipe as kp
import km3modules as km
import matplotlib as mpl
......@@ -51,337 +46,9 @@ from matplotlib.backends.backend_pdf import PdfPages
from orcasong.file_to_hits import EventDataExtractor
from orcasong.hits_to_histograms import HistogramMaker
def parse_input():
"""
Parses and returns all necessary input options for the make_nn_images function.
Returns
-------
fname : str
Full filepath to the input .h5 file.
detx_filepath : str
Full filepath to the .detx geometry file that belongs to the fname.
config_filepath : str
Full filepath to a config file. An example can be found in orcasong/default_config.toml
"""
args = docopt(__doc__)
fname = args['SIMFILE']
detx_filepath = args['DETXFILE']
config_filepath = args['CONFIGFILE']
return fname, detx_filepath, config_filepath
def load_config(config_filepath):
"""
Loads the config from a .toml file.
Parameters
----------
config_filepath : str
Full filepath to a config file. An example can be found in orcasong/default_config.toml
Returns
-------
config : dict
Dictionary that contains all configuration options of the make_nn_images function.
An explanation of the config parameters can be found in orcasong/default_config.toml.
"""
config = toml.load(config_filepath)
print('Loaded the config file from ' + os.path.abspath(config_filepath))
return config
def check_user_input(fname, detx_filepath, config):
"""
Sanity check of the user input.
Parameters
----------
fname : str
Full filepath to the input .h5 file.
detx_filepath : str
Full filepath to the .detx geometry file that belongs to the fname.
config : dict
Dictionary that contains all configuration options of the make_nn_images function.
An explanation of the config parameters can be found in orcasong/default_config.toml.
"""
#---- Checks input types ----#
# Check for options with a single, non-boolean element
number_args = {'do2d_plots_n': int, 'data_cut_e_low': float, 'data_cut_e_high': float,
'data_cut_throw_away': float, 'prod_ident': int}
for key in number_args:
expected_arg_type = number_args[key]
parsed_arg = config[key]
if parsed_arg in [None, 'None']: # we don't want to check args when there has been no user input
continue
if type(parsed_arg) != expected_arg_type:
try:
map(expected_arg_type, parsed_arg)
except ValueError:
raise TypeError('The argument option ', key, ' only accepts ', str(expected_arg_type),
' values as an input.')
# Checks the n_bins tuple input
for dim in config['n_bins']:
if type(dim) != int:
raise TypeError('The argument option n_bins only accepts integer values as an input!'
' Your values have the type ' + str(type(dim)))
# ---- Checks input types ----#
# ---- Check other things ----#
if not os.path.isfile(fname):
raise IOError('The file -' + fname+ '- does not exist.')
if not os.path.isfile(detx_filepath):
raise IOError('The file -' + detx_filepath + '- does not exist.')
if all(do_nd == False for do_nd in [config['do2d'], config['do3d'],config['do4d']]):
raise ValueError('At least one of do2d, do3d or do4d options must be set to True.')
if config['do2d'] == False and config['do2d_plots'] == True:
raise ValueError('The 2D pdf images cannot be created if do2d=False!')
if config['do2d_plots'] == True and config['do2d_plots_n'] > 100:
warnings.warn('You declared do2d_pdf=(True, int) with int > 100. This will take more than two minutes.'
'Do you really want to create pdfs images for so many events?')
def make_output_dirs(output_dirpath, do2d, do3d, do4d):
"""
Function that creates all output directories if they don't exist already.
Parameters
----------
output_dirpath : str
Full path to the directory, where the orcasong output should be stored.
do2d : bool
Declares if 2D histograms, are to be created.
do3d : bool
Declares if 3D histograms are to be created.
do4d : tuple(bool, str)
Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz.
Currently, only 'time' and 'channel_id' are available.
"""
if do2d:
projections = ['xy', 'xz', 'yz', 'xt', 'yt', 'zt']
for proj in projections:
if not os.path.exists(output_dirpath + '/orcasong_output/4dTo2d/' + proj):
try:
os.makedirs(output_dirpath + '/orcasong_output/4dTo2d/' + proj)
except OSError as e:
if e.errno != errno.EEXIST:
raise
if do3d:
projections = ['xyz', 'xyt', 'xzt', 'yzt', 'rzt']
for proj in projections:
if not os.path.exists(output_dirpath + '/orcasong_output/4dTo3d/' + proj):
try:
os.makedirs(output_dirpath + '/orcasong_output/4dTo3d/' + proj)
except OSError as e:
if e.errno != errno.EEXIST:
raise
if do4d[0]:
proj = 'xyzt' if not do4d[1] == 'channel_id' else 'xyzc'
if not os.path.exists(output_dirpath + '/orcasong_output/4dTo4d/' + proj):
try:
os.makedirs(output_dirpath + '/orcasong_output/4dTo4d/' + proj)
except OSError as e:
if e.errno != errno.EEXIST:
raise
def calculate_bin_edges(n_bins, det_geo, detx_filepath, do4d):
"""
Calculates the bin edges for the corresponding detector geometry (1 DOM/bin) based on the number of specified bins.
Used later on for making the event "images" with the in the np.histogramdd funcs in hits_to_histograms.py.
The bin edges are necessary in order to get the same bin size for each event regardless of the fact if all bins have a hit or not.
Parameters
----------
n_bins : tuple
Contains the desired number of bins for each dimension, [n_bins_x, n_bins_y, n_bins_z].
det_geo : str
Declares what detector geometry should be used for the binning.
detx_filepath : str
Filepath of a .detx detector file which contains the geometry of the detector.
do4d : tuple(boo, str)
Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz.
Returns
-------
x_bin_edges, y_bin_edges, z_bin_edges : ndarray(ndim=1)
The bin edges for the x,y,z direction.
"""
# Loads a kp.Geometry object based on the filepath of a .detx file
print("Reading detector geometry in order to calculate the detector dimensions from file " + detx_filepath)
geo = kp.calib.Calibration(filename=detx_filepath)
# derive maximum and minimum x,y,z coordinates of the geometry input [[xmin, ymin, zmin], [xmax, ymax, zmax]]
dom_position_values = geo.get_detector().dom_positions.values()
dom_pos_max = np.amax([pos for pos in dom_position_values], axis=0)
dom_pos_min = np.amin([pos for pos in dom_position_values], axis=0)
geo_limits = dom_pos_min, dom_pos_max
print('Detector dimensions [[xmin, ymin, zmin], [xmax, ymax, zmax]]: ' + str(geo_limits))
if det_geo == 'Orca_115l_23m_h_9m_v' or det_geo == 'Orca_115l_23m_h_?m_v':
x_bin_edges = np.linspace(geo_limits[0][0] - 9.95, geo_limits[1][0] + 9.95, num=n_bins[0] + 1) #try to get the lines in the bin center 9.95*2 = average x-separation of two lines
y_bin_edges = np.linspace(geo_limits[0][1] - 9.75, geo_limits[1][1] + 9.75, num=n_bins[1] + 1) # Delta y = 19.483
# Fitted offsets: x,y,factor: factor*(x+x_off), # Stefan's modifications:
offset_x, offset_y, scale = [6.19, 0.064, 1.0128]
x_bin_edges = (x_bin_edges + offset_x) * scale
y_bin_edges = (y_bin_edges + offset_y) * scale
if det_geo == 'Orca_115l_23m_h_?m_v':
# ORCA denser detector study
z_bin_edges = np.linspace(37.84 - 7.5, 292.84 + 7.5, num=n_bins[2] + 1) # 15m vertical, 18 DOMs
# z_bin_edges = np.linspace(37.84 - 6, 241.84 + 6, num=n_bins[2] + 1) # 12m vertical, 18 DOMs
# z_bin_edges = np.linspace(37.84 - 4.5, 190.84 + 4.5, num=n_bins[2] + 1) # 9m vertical, 18 DOMs
# z_bin_edges = np.linspace(37.84 - 3, 139.84 + 3, num=n_bins[2] + 1) # 6m vertical, 18 DOMs
# z_bin_edges = np.linspace(37.84 - 2.25, 114.34 + 2.25, num=n_bins[2] + 1) # 4.5m vertical, 18 DOMs
else:
n_bins_z = n_bins[2] if do4d[1] != 'xzt-c' else n_bins[1] # n_bins = [xyz,t/c] or n_bins = [xzt,c]
z_bin_edges = np.linspace(geo_limits[0][2] - 4.665, geo_limits[1][2] + 4.665, num=n_bins_z + 1) # Delta z = 9.329
# calculate_bin_edges_test(dom_positions, y_bin_edges, z_bin_edges) # test disabled by default. Activate it, if you change the offsets in x/y/z-bin-edges
else:
raise ValueError('The specified detector geometry "' + str(det_geo) + '" is not available.')
return geo, x_bin_edges, y_bin_edges, z_bin_edges
def calculate_bin_edges_test(geo, y_bin_edges, z_bin_edges):
"""
Tests, if the bins in one direction don't accidentally have more than 'one' OM.
For the x-direction, an overlapping can not be avoided in an orthogonal space.
For y and z though, it can!
For y, every bin should contain the number of lines per y-direction * 18 for 18 doms per line.
For z, every bin should contain 115 entries, since every z bin contains one storey of the 115 ORCA lines.
Not 100% accurate, since only the dom positions are used and not the individual pmt positions for a dom.
"""
dom_positions = np.stack(list(geo.get_detector().dom_positions.values()))
dom_y = dom_positions[:, 1]
dom_z = dom_positions[:, 2]
hist_y = np.histogram(dom_y, bins=y_bin_edges)
hist_z = np.histogram(dom_z, bins=z_bin_edges)
print('----------------------------------------------------------------------------------------------')
print('Y-axis: Bin content: ' + str(hist_y[0]))
print('It should be: ' + str(np.array(
[4 * 18, 7 * 18, 9 * 18, 10 * 18, 9 * 18, 10 * 18, 10 * 18, 10 * 18, 11 * 18, 10 * 18, 9 * 18, 8 * 18, 8 * 18])))
print('Y-axis: Bin edges: ' + str(hist_y[1]))
print('..............................................................................................')
print('Z-axis: Bin content: ' + str(hist_z[0]))
print('It should have 115 entries everywhere')
print('Z-axis: Bin edges: ' + str(hist_z[1]))
print('----------------------------------------------------------------------------------------------')
def get_file_particle_type(fname):
"""
Returns a string that specifies the type of the particles that are contained in the input file.
Parameters
----------
fname : str
Filename (full path!) of the input file.
Returns
-------
file_particle_type : str
String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino'].
"""
event_pump = kp.io.hdf5.HDF5Pump(filename=fname, verbose=False) # TODO suppress print of hdf5pump
if 'McTracks' not in event_pump[0]:
file_particle_type = 'undefined'
else:
particle_type = event_pump[0]['McTracks'].type
# if mupage file: first mc_track is an empty neutrino track, second track is the first muon
if particle_type[0] == 0 and np.abs(particle_type[1]) == 13:
file_particle_type = 'muon'
# the first mc_track is the primary neutrino if the input file is containing neutrino events
elif np.abs(particle_type[0]) in [12, 14, 16]:
file_particle_type = 'neutrino'
else:
raise ValueError('The type of the particles in the "McTracks" folder, <', str(particle_type), '> is not known.')
event_pump.close_file()
return file_particle_type
class EventSkipper(kp.Module):
"""
KM3Pipe module that skips events based on some data_cuts.
"""
def configure(self):
self.data_cuts = self.require('data_cuts')
def process(self, blob):
continue_bool = skip_event(blob['event_track'], self.data_cuts)
if continue_bool:
return
else:
return blob
def skip_event(event_track, data_cuts):
"""
Function that checks if an event should be skipped, depending on the data_cuts input.
Parameters
----------
event_track : ndarray(ndim=1)
1D array containing important MC information of the event, only the energy of the event (pos_2) is used here.
data_cuts : dict
Dictionary that contains information about any possible cuts that should be applied.
Supports the following cuts: 'triggered', 'energy_lower_limit', 'energy_upper_limit', 'throw_away_prob'.
Returns
-------
continue_bool : bool
boolean flag to specify, if this event should be skipped or not.
"""
continue_bool = False
if data_cuts['energy_lower_limit'] is not None:
continue_bool = event_track.energy[0] < data_cuts['energy_lower_limit'] # True if E < lower limit
if data_cuts['energy_upper_limit'] is not None and continue_bool == False:
continue_bool = event_track.energy[0] > data_cuts['energy_upper_limit'] # True if E > upper limit
if data_cuts['throw_away_prob'] is not None and continue_bool == False:
throw_away_prob = data_cuts['throw_away_prob']
throw_away = np.random.choice([False, True], p=[1 - throw_away_prob, throw_away_prob])
if throw_away: continue_bool = True
return continue_bool
from orcasong.io import parse_input, load_config, check_user_input, make_output_dirs
from orcasong.geo_binning import calculate_bin_edges
from orcasong.utils import get_file_particle_type, EventSkipper
def make_nn_images(fname, detx_filepath, config):
......@@ -402,7 +69,7 @@ def make_nn_images(fname, detx_filepath, config):
An explanation of the config parameters can be found in orcasong/default_config.toml.
"""
# Load all parameters from the config
# Load all parameters from the config # TODO put everything in a config class, this is horrible
output_dirpath = config['output_dirpath']
chunksize, complib, complevel = config['chunksize'], config['complib'], config['complevel']
flush_freq = config['flush_freq']
......@@ -420,6 +87,7 @@ def make_nn_images(fname, detx_filepath, config):
data_cuts['energy_lower_limit'] = config['data_cut_e_low'] if config['data_cut_e_low'] != 'None' else None
data_cuts['energy_upper_limit'] = config['data_cut_e_high'] if config['data_cut_e_high'] != 'None' else None
data_cuts['throw_away_prob'] = config['data_cut_throw_away'] if config['data_cut_throw_away'] != 'None' else None
data_cuts['custom_skip_function'] = config['data_cut_custom_func'] if config['data_cut_custom_func'] != 'None' else None
make_output_dirs(output_dirpath, do2d, do3d, do4d)
......
#!/usr/bin/env python
# coding=utf-8
# Filename: utils.py
"""
Utility code for OrcaSong.
"""
import numpy as np
import km3pipe as kp
class EventSkipper(kp.Module):
"""
KM3Pipe module that skips events based on some data_cuts.
"""
def configure(self):
self.data_cuts = self.require('data_cuts')
def process(self, blob):
continue_bool = skip_event(blob['event_track'], self.data_cuts)
if continue_bool:
return
else:
return blob
def skip_event(event_track, data_cuts):
"""
Function that checks if an event should be skipped, depending on the data_cuts input.
Parameters
----------
event_track : ndarray(ndim=1)
1D array containing important MC information of the event, only the energy of the event (pos_2) is used here.
data_cuts : dict
Dictionary that contains information about any possible cuts that should be applied.
Supports the following cuts: 'triggered', 'energy_lower_limit', 'energy_upper_limit', 'throw_away_prob'.
Returns
-------
continue_bool : bool
boolean flag to specify, if this event should be skipped or not.
"""
continue_bool = False
if data_cuts['energy_lower_limit'] is not None:
continue_bool = event_track.energy[0] < data_cuts['energy_lower_limit'] # True if E < lower limit
if data_cuts['energy_upper_limit'] is not None and continue_bool == False:
continue_bool = event_track.energy[0] > data_cuts['energy_upper_limit'] # True if E > upper limit
if data_cuts['throw_away_prob'] is not None and continue_bool == False:
throw_away_prob = data_cuts['throw_away_prob']
throw_away = np.random.choice([False, True], p=[1 - throw_away_prob, throw_away_prob])
if throw_away: continue_bool = True
if data_cuts['custom_skip_function'] is not None:
continue_bool = use_custom_skip_function(data_cuts['custom_skip_function'], event_track)
return continue_bool
def use_custom_skip_function(skip_function, event_track):
"""
User defined, custom skip functions.
Parameters
----------
skip_function : str
String that specifies, which custom skip function should be used.
event_track : ndarray(ndim=1)
Structured numpy array containing the McTracks info of this event.
Returns
-------
continue_bool : bool
Bool which specifies, if this event should be skipped or not.
"""
if skip_function == 'ts_e_flat':
# cf. orcanet_contrib/utilities/get_func_for_flat_track_shower.py
particle_type = event_track.particle_type[0]
if np.abs(particle_type) not in [12, 14]:
continue_bool = False
else:
prob_for_e_bin_arr_path = 'placeholder.npy'
# Fraction of tracks compared to showers (n_muon-cc / (n_e-cc + n_e-nc))
arr_fract_for_e_bins = np.load(prob_for_e_bin_arr_path) # 2d arr, one row e_bins, second row prob
e_bins = arr_fract_for_e_bins[0, :]
fract_e = arr_fract_for_e_bins[1, :]
e_evt = event_track.energy[0]
idx_e = (np.abs(e_bins - e_evt)).argmin()
fract = fract_e[idx_e]
if np.abs(particle_type) == 14: # for muon neutrinos
if fract <= 1:
continue_bool = False
else:
evt_survive_prob = 1/float(fract)
continue_bool = np.random.choice([False, True], p=[evt_survive_prob, 1 - evt_survive_prob])
else: # for elec neutrinos
assert np.abs(particle_type) == 12
if fract >= 1:
continue_bool = False
else:
evt_survive_prob = fract
continue_bool = np.random.choice([False, True], p=[evt_survive_prob, 1 - evt_survive_prob])
return continue_bool
else:
raise ValueError('The custom skip function "' + str(skip_function) + '" is not known.')
def get_file_particle_type(fname):
"""
Returns a string that specifies the type of the particles that are contained in the input file.
Parameters
----------
fname : str
Filename (full path!) of the input file.
Returns
-------
file_particle_type : str
String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino'].
"""
event_pump = kp.io.hdf5.HDF5Pump(filename=fname, verbose=False) # TODO suppress print of hdf5pump
if 'McTracks' not in event_pump[0]:
file_particle_type = 'undefined'
else:
particle_type = event_pump[0]['McTracks'].type
# if mupage file: first mc_track is an empty neutrino track, second track is the first muon
if particle_type[0] == 0 and np.abs(particle_type[1]) == 13:
file_particle_type = 'muon'
# the first mc_track is the primary neutrino if the input file is containing neutrino events
elif np.abs(particle_type[0]) in [12, 14, 16]:
file_particle_type = 'neutrino'
else:
raise ValueError('The type of the particles in the "McTracks" folder, <', str(particle_type), '> is not known.')
event_pump.close_file()
return file_particle_type
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
TODO
Code that calculates the fraction of track to shower events for given files.
"""
import os
......@@ -74,7 +74,7 @@ def get_energies_for_fpaths(fpath_list, fpath_list_key_ic, cut_e_higher_than_3=F
f.close()
print('Total number of events for ' + fpath_list_key_ic + ' (without 3-5GeV from low_e prod): '
print('Total number of events for ' + fpath_list_key_ic + ' : '
+ str(energy_conc_arr.shape[0]))
print('Total number of files: ' + str(len(fpath_list)))
......@@ -82,6 +82,16 @@ def get_energies_for_fpaths(fpath_list, fpath_list_key_ic, cut_e_higher_than_3=F
def save_energies_for_ic(energies_for_ic):
"""
Parameters
----------
energies_for_ic
Returns
-------
"""
np.savez('./energies_for_ic.npz',
muon_cc_3_100=energies_for_ic['muon_cc_3_100'], muon_cc_1_5=energies_for_ic['muon_cc_1_5'],
......@@ -90,6 +100,12 @@ def save_energies_for_ic(energies_for_ic):
def load_energies_for_ic():
"""
Returns
-------
"""
data = np.load('./energies_for_ic.npz')
......@@ -147,26 +163,28 @@ def plot_e_and_make_flat_func(energies_for_ic):
pdfpages = PdfPages('./e_hist_plots.pdf')
fig, ax = plt.subplots()
e_bins = 199
# plot
hist_muon_cc = plt.hist(energies_for_ic['muon_cc'], bins=99)
plt.title('Muon-CC 1-3 + 3-100 GeV for Run 1-2400')
hist_muon_cc = plt.hist(energies_for_ic['muon_cc'], bins=e_bins)
plt.title('Muon-CC 1-5 + 3-100 GeV for Run 1-2400')
make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]')
hist_shower = plt.hist(energies_for_ic['elec_cc_and_nc'], bins=99)
plt.title('Shower (elec-CC + elec-NC) 1-3 + 3-100 GeV for 2x Run 1-1200')
hist_shower = plt.hist(energies_for_ic['elec_cc_and_nc'], bins=e_bins)
plt.title('Shower (elec-CC + elec-NC) 1-5 + 3-100 GeV for 2x Run 1-1200')
make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]')
hist_elec_cc = plt.hist(energies_for_ic['elec_cc'], bins=99)
plt.title('Elec-CC 1-3 + 3-100 GeV for Run 1-1200')
hist_elec_cc = plt.hist(energies_for_ic['elec_cc'], bins=e_bins)
plt.title('Elec-CC 1-5 + 3-100 GeV for Run 1-1200')
make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]')
hist_elec_nc = plt.hist(energies_for_ic['elec_nc'], bins=99)
plt.title('Elec-NC 1-3 + 3-100 GeV for Run 1-1200')
hist_elec_nc = plt.hist(energies_for_ic['elec_nc'], bins=e_bins)
plt.title('Elec-NC 1-5 + 3-100 GeV for Run 1-1200')
make_plot_options_and_save(ax, pdfpages, ylabel='Counts [#]')
# We take 600 muon-CC files and 300 elec-cc and 300 elec_nc files for the split, reduce 1-3GeV bins by 1/2
hist_shower[0][0] = hist_shower[0][0] / 2 # 1-2GeV
hist_shower[0][1] = hist_shower[0][1] / 2 # 2-3GeV
# # We take 600 muon-CC files and 300 elec-cc and 300 elec_nc files for the split, reduce 1-3GeV bins by 1/2
# hist_shower[0][0] = hist_shower[0][0] / 2 # 1-2GeV
# hist_shower[0][1] = hist_shower[0][1] / 2 # 2-3GeV
track_div_shower = np.divide(hist_muon_cc[0], hist_shower[0])
print(hist_muon_cc[0])
......@@ -174,7 +192,6 @@ def plot_e_and_make_flat_func(energies_for_ic):
bins=hist_muon_cc[1] # doesnt matter which bins to use
track_div_shower = np.append(track_div_shower, track_div_shower[-1])
#track_div_shower = np.concatenate([track_div_shower, np.array(track_div_shower[-1])[:, np.newaxis]], axis=0) # fix for mpl
print(bins)
print(track_div_shower)
ax.step(bins, track_div_shower, linestyle='-', where='post')
......@@ -205,7 +222,8 @@ def main():
energies_for_ic = dict()
for fpath_list_key_ic in fpaths:
print('Getting energies for ' + fpath_list_key_ic)
cut_flag = True if fpath_list_key_ic in ['muon_cc_1_5', 'elec_cc_1_5', 'elec_nc_1_5'] else False
cut_flag = False
# cut_flag = True if fpath_list_key_ic in ['muon_cc_1_5', 'elec_cc_1_5', 'elec_nc_1_5'] else False
fpath_list = fpaths[fpath_list_key_ic]
energies_for_ic[fpath_list_key_ic] = get_energies_for_fpaths(fpath_list, fpath_list_key_ic, cut_e_higher_than_3=cut_flag)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment