From e120064023d6980f825aacd1118c5e7c39141230 Mon Sep 17 00:00:00 2001 From: ViaFerrata <michimoser@onlinehome.de> Date: Fri, 21 Dec 2018 12:03:33 +0100 Subject: [PATCH] Major changes: - changed data_to_images in order to use the KM3Pipe Pipeline concept instead of iterating over the HDF5Pump. - Added output dirpath, chunksize, compression filter and compression strength arguments - Added CI versioning and publishing of OrcaSong via Pypi --- .gitignore | 2 + .gitlab-ci.yml | 12 + orcasong/__init__.py | 1 + orcasong/__version__.py | 22 ++ orcasong/data_to_images.py | 222 ++++++++++-------- orcasong/file_to_hits.py | 219 +++++------------ orcasong/histograms_to_files.py | 34 --- orcasong/hits_to_histograms.py | 149 ++++++++---- setup.py | 17 +- .../conf_ORCA_115l_1-5GeV_xyz-c.toml | 2 +- .../conf_ORCA_115l_1-5GeV_xyz-t.toml | 2 +- .../conf_ORCA_115l_3-100GeV_xyz-c.toml | 2 +- .../conf_ORCA_115l_3-100GeV_xyz-t.toml | 2 +- .../conf_ORCA_115l_mupage_xyz-c.toml | 2 +- .../conf_ORCA_115l_mupage_xyz-t.toml | 2 +- .../conf_ORCA_115l_random_noise_xyz-c.toml | 2 +- .../conf_ORCA_115l_random_noise_xyz-t.toml | 2 +- .../conf_ORCA_115l_1-5GeV_xyz-c.toml | 0 .../conf_ORCA_115l_1-5GeV_xyz-t.toml | 0 .../conf_ORCA_115l_3-100GeV_xyz-c.toml | 2 +- .../conf_ORCA_115l_3-100GeV_xyz-t.toml | 2 +- {orcasong => user}/job_logs/cout/.gitkeep | 0 .../submit_data_to_images.sh | 41 ++-- 23 files changed, 365 insertions(+), 374 deletions(-) create mode 100644 .gitignore create mode 100644 orcasong/__version__.py delete mode 100644 orcasong/histograms_to_files.py rename {orcasong => user}/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-c.toml (96%) rename {orcasong => user}/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-t.toml (96%) rename {orcasong => user}/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-c.toml (96%) rename {orcasong => user}/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-t.toml (96%) rename {orcasong => user}/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-c.toml (96%) rename {orcasong => user}/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-t.toml (96%) rename {orcasong => user}/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-c.toml (96%) rename {orcasong => user}/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-t.toml (96%) rename {orcasong => user}/config/orca_115l_regression/conf_ORCA_115l_1-5GeV_xyz-c.toml (100%) rename {orcasong => user}/config/orca_115l_regression/conf_ORCA_115l_1-5GeV_xyz-t.toml (100%) rename {orcasong => user}/config/orca_115l_regression/conf_ORCA_115l_3-100GeV_xyz-c.toml (96%) rename {orcasong => user}/config/orca_115l_regression/conf_ORCA_115l_3-100GeV_xyz-t.toml (96%) rename {orcasong => user}/job_logs/cout/.gitkeep (100%) rename {orcasong => user}/job_submission_scripts/submit_data_to_images.sh (63%) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..06ef6e3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# Version info for PyPI +orcasong/version.txt diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0658a95..523afb5 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -16,3 +16,15 @@ pages: artifacts: paths: - public + + pypi: + image: docker.km3net.de/base/python:3 + stage: release + cache: {} + script: + - pip install -U twine + - python setup.py sdist + - twine upload dist/* + only: + - tags + diff --git a/orcasong/__init__.py b/orcasong/__init__.py index e69de29..c1bb8a7 100644 --- a/orcasong/__init__.py +++ b/orcasong/__init__.py @@ -0,0 +1 @@ +from .__version__ import version \ No newline at end of file diff --git a/orcasong/__version__.py b/orcasong/__version__.py new file mode 100644 index 0000000..45827bf --- /dev/null +++ b/orcasong/__version__.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# Filename: __version__.py +# pylint: disable=C0103 +""" +Pep 386 compliant version info. + + (major, minor, micro, alpha/beta/rc/final, #) + (1, 1, 2, 'alpha', 0) => "1.1.2.dev" + (1, 2, 0, 'beta', 2) => "1.2b2" + +""" +from __future__ import absolute_import, print_function, division +from os.path import dirname, realpath, join +from setuptools_scm import get_version + +version = 'unknown version' + +try: + version = get_version(root='..', relative_to=__file__) +except LookupError: + with open(join(realpath(dirname(__file__)), "version.txt"), 'r') as fobj: + version = fobj.read() diff --git a/orcasong/data_to_images.py b/orcasong/data_to_images.py index 41e82c0..0c97f7f 100644 --- a/orcasong/data_to_images.py +++ b/orcasong/data_to_images.py @@ -4,7 +4,12 @@ """ Main OrcaSong code which takes raw simulated .h5 files and the corresponding .detx detector file as input in order to generate 2D/3D/4D histograms ('images') that can be used for CNNs. -The input file can be calibrated or not (e.g. contains pos_xyz of the hits). + +First argument: KM3NeT hdf5 simfile at JTE level. +Second argument: a .detx file that is associated with the hdf5 file. + +The input file can be calibrated or not (e.g. contains pos_xyz of the hits) and the OrcaSong output is written +to the current folder by default (otherwise use --o option). Makes only 4D histograms ('images') by default. Usage: @@ -16,6 +21,15 @@ Options: -c CONFIGFILE Load all options from a config file (.toml format). + --o OUTPUTPATH Path for the directory, where the OrcaSong output should be stored. [default: ./] + + --chunksize CHUNKSIZE Chunksize (axis_0) that should be used for the hdf5 output of OrcaSong. [default: 32] + + --complib COMPLIB Compression library that should be used for the OrcaSong output. + All PyTables compression filters are available. [default: zlib] + + --complevel COMPLEVEL Compression level that should be used for the OrcaSong output. [default: 1] + --n_bins N_BINS Number of bins that are used in the image making for each dimension, e.g. (x,y,z,t). [default: 11,13,18,60] @@ -84,13 +98,13 @@ from docopt import docopt #from memory_profiler import profile # for memory profiling, call with @profile; myfunc() #import line_profiler # call with kernprof -l -v file.py args import km3pipe as kp +import km3modules as km import matplotlib as mpl mpl.use('Agg') from matplotlib.backends.backend_pdf import PdfPages -from orcasong.file_to_hits import * -from orcasong.histograms_to_files import * -from orcasong.hits_to_histograms import * +from orcasong.file_to_hits import EventDataExtractor +from orcasong.hits_to_histograms import HistogramMaker def parse_input(): @@ -108,6 +122,10 @@ def parse_input(): fname = args['FILENAME'] detx_filepath = args['DETXFILE'] + output_dirpath = args['--o'] + chunksize = int(args['--chunksize']) + complib = args['--complib'] + complevel = int(args['--complevel']) n_bins = tuple(map(int, args['--n_bins'].split(','))) det_geo = args['--det_geo'] do2d = args['--do2d'] @@ -123,8 +141,8 @@ def parse_input(): data_cuts['throw_away_prob'] = float(args['--data_cut_throw_away']) prod_ident = int(args['--prod_ident']) - return fname, detx_filepath, n_bins, det_geo, do2d, do2d_plots, do3d, do4d, prod_ident, timecut,\ - do_mc_hits, data_cuts + return fname, detx_filepath, output_dirpath, chunksize, complib, complevel, n_bins, det_geo, do2d,\ + do2d_plots, do3d, do4d, prod_ident, timecut, do_mc_hits, data_cuts def parser_check_input(args): @@ -183,6 +201,39 @@ def parser_check_input(args): 'Do you really want to create pdfs images for so many events?') +def make_output_dirs(output_dirpath, do2d, do3d, do4d): + """ + Function that creates all output directories if they don't exist already. + + Parameters + ---------- + output_dirpath : str + Full path to the directory, where the orcasong output should be stored. + do2d : bool + Declares if 2D histograms, are to be created. + do3d : bool + Declares if 3D histograms are to be created. + do4d : tuple(bool, str) + Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz. + Currently, only 'time' and 'channel_id' are available. + + """ + if do2d: + projections = ['xy', 'xz', 'yz', 'xt', 'yt', 'zt'] + for proj in projections: + if not os.path.exists(output_dirpath + '/orcasong_output/4dTo2d/' + proj): + os.makedirs(output_dirpath + '/orcasong_output/4dTo2d/' + proj) + if do3d: + projections = ['xyz', 'xyt', 'xzt', 'yzt', 'rzt'] + for proj in projections: + if not os.path.exists(output_dirpath + '/orcasong_output/4dTo3d/' + proj): + os.makedirs(output_dirpath + '/orcasong_output/4dTo3d/' + proj) + if do4d[0]: + proj = 'xyzt' if not do4d[1] == 'channel_id' else 'xyzc' + if not os.path.exists(output_dirpath + '/orcasong_output/4dTo4d/' + proj): + os.makedirs(output_dirpath + '/orcasong_output/4dTo4d/' + proj) + + def calculate_bin_edges(n_bins, det_geo, detx_filepath, do4d): """ Calculates the bin edges for the corresponding detector geometry (1 DOM/bin) based on the number of specified bins. @@ -275,14 +326,14 @@ def calculate_bin_edges_test(geo, y_bin_edges, z_bin_edges): print('----------------------------------------------------------------------------------------------') -def get_file_particle_type(event_pump): +def get_file_particle_type(fname): """ Returns a string that specifies the type of the particles that are contained in the input file. Parameters ---------- - event_pump : kp.io.hdf5.HDF5Pump - Km3pipe HDF5Pump instance which is linked to the input file. + fname : str + Filename (full path!) of the input file. Returns ------- @@ -290,6 +341,8 @@ def get_file_particle_type(event_pump): String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino']. """ + event_pump = kp.io.hdf5.HDF5Pump(filename=fname) # TODO suppress print of hdf5pump and close pump afterwards + if 'McTracks' not in event_pump[0]: file_particle_type = 'undefined' else: @@ -305,9 +358,26 @@ def get_file_particle_type(event_pump): else: raise ValueError('The type of the particles in the "McTracks" folder, <', str(particle_type), '> is not known.') + event_pump.close_file() + return file_particle_type +class EventSkipper(kp.Module): + """ + KM3Pipe module that skips events based on some data_cuts. + """ + def configure(self): + self.data_cuts = self.require('data_cuts') + + def process(self, blob): + continue_bool = skip_event(blob['event_track'], self.data_cuts) + if continue_bool: + return + else: + return blob + + def skip_event(event_track, data_cuts): """ Function that checks if an event should be skipped, depending on the data_cuts input. @@ -329,15 +399,15 @@ def skip_event(event_track, data_cuts): continue_bool = False if data_cuts['energy_lower_limit'] is not None: - continue_bool = event_track[2] < data_cuts['energy_lower_limit'] # True if E < lower limit + continue_bool = event_track.energy[0] < data_cuts['energy_lower_limit'] # True if E < lower limit - if data_cuts['energy_upper_limit'] is not None: - continue_bool = event_track[2] > data_cuts['energy_upper_limit'] # True if E > upper limit + if data_cuts['energy_upper_limit'] is not None and continue_bool == False: + continue_bool = event_track.energy[0] > data_cuts['energy_upper_limit'] # True if E > upper limit - if data_cuts['throw_away_prob'] > 0: + if data_cuts['throw_away_prob'] > 0 and continue_bool == False: throw_away_prob = data_cuts['throw_away_prob'] throw_away = np.random.choice([False, True], p=[1 - throw_away_prob, throw_away_prob]) - if throw_away == True: continue_bool = True + if throw_away: continue_bool = True # # TODO temporary, deprecated solution, we always need to throw away the same events if we have multiple inputs -> use fixed seed # arr = np.load('/home/woody/capn/mppi033h/Code/OrcaSong/utilities/low_e_prod_surviving_evts_elec-CC.npy') @@ -351,7 +421,7 @@ def skip_event(event_track, data_cuts): return continue_bool -def data_to_images(fname, detx_filepath, n_bins, det_geo, do2d, do2d_plots, do3d, do4d, prod_ident, timecut, +def data_to_images(fname, detx_filepath, output_dirpath, chunksize, complib, complevel, n_bins, det_geo, do2d, do2d_plots, do3d, do4d, prod_ident, timecut, do_mc_hits, data_cuts): """ Main code with config parameters. Reads raw .hdf5 files and creates 2D/3D histogram projections that can be used @@ -365,6 +435,15 @@ def data_to_images(fname, detx_filepath, n_bins, det_geo, do2d, do2d_plots, do3d String with the full filepath to the corresponding .detx file of the input file. Used for the binning and for the hits calibration if the input file is not calibrated yet (e.g. hits do not contain pos_x/y/z, time, ...). + output_dirpath : str + Full path to the directory, where the orcasong output should be stored. + chunksize : int + Chunksize (along axis_0) that is used for saving the OrcaSong output to a .h5 file. + complib : str + Compression library that is used for saving the OrcaSong output to a .h5 file. + All PyTables compression filters are available, e.g. 'zlib', 'lzf', 'blosc', ... . + complevel : int + Compression level for the compression filter that is used for saving the OrcaSong output to a .h5 file. n_bins : tuple of int Declares the number of bins that should be used for each dimension, e.g. (x,y,z,t). det_geo : str @@ -399,105 +478,64 @@ def data_to_images(fname, detx_filepath, n_bins, det_geo, do2d, do2d_plots, do3d Supports the following cuts: 'triggered', 'energy_lower_limit', 'energy_upper_limit', 'throw_away_prob'. """ - np.random.seed(42) # set random seed + make_output_dirs(output_dirpath, do2d, do3d, do4d) filename = os.path.basename(os.path.splitext(fname)[0]) filename_output = filename.replace('.','_') + km.GlobalRandomState(seed=42) # set random km3pipe (=numpy) seed geo, x_bin_edges, y_bin_edges, z_bin_edges = calculate_bin_edges(n_bins, det_geo, detx_filepath, do4d) + pdf_2d_plots = PdfPages(output_dirpath + '/orcasong_output/4dTo2d/' + filename_output + '_plots.pdf') if do2d_plots[0] is True else None - all_4d_to_2d_hists, all_4d_to_3d_hists, all_4d_to_4d_hists, mc_infos = [], [], [], [] - - pdf_2d_plots = PdfPages('Results/4dTo2d/' + filename_output + '_plots.pdf') if do2d_plots[0] is True else None - - # Initialize HDF5Pump of the input file - event_pump = kp.io.hdf5.HDF5Pump(filename=fname) - file_particle_type = get_file_particle_type(event_pump) - - print('Generating histograms from the hits in XYZT format for files based on ' + fname) - for i, event_blob in enumerate(event_pump): - if i % 10 == 0: - print('Event No. ' + str(i)) + file_particle_type = get_file_particle_type(fname) - # filter out all hit and track information belonging that to this event - event_hits, event_track = get_event_data(event_blob, file_particle_type, geo, do_mc_hits, data_cuts, do4d, prod_ident) + print('Generating histograms from the hits for files based on ' + fname) - class EventSkipper(kp.Module): - def configure(self): - self.data_cuts = self.require('data_cuts') + # Initialize OrcaSong Event Pipeline - def process(self, blob): - if self._skip_event(blob['EventData']): - return - else: - return blob - - def _skip_event(self, blob): - ... should i skip? - return True/False - - import km3modules as km - pipe = kp.Pipeline() - pipe.attach(km.common.StatusBar, every=10) - pipe.attach(EventDataExtractor, file_particle_type='...') - pipe.attach(EventSkipper, data_cuts=...) - pipe.attach(km.common.Keep, keys=['FinalTrack', 'Foo']) - pipe.attach(kp.io.HDF5Sink, filename='dump.h5') - - - continue_bool = skip_event(event_track, data_cuts) - if continue_bool: continue - - # event_track: [event_id, particle_type, energy, isCC, bjorkeny, dir_x/y/z, time] - mc_infos.append(event_track) - - if do2d: - compute_4d_to_2d_histograms(event_hits, x_bin_edges, y_bin_edges, z_bin_edges, n_bins, all_4d_to_2d_hists, timecut, event_track, do2d_plots[0], pdf_2d_plots) - - if do3d: - compute_4d_to_3d_histograms(event_hits, x_bin_edges, y_bin_edges, z_bin_edges, n_bins, all_4d_to_3d_hists, timecut) - - if do4d[0]: - compute_4d_to_4d_histograms(event_hits, x_bin_edges, y_bin_edges, z_bin_edges, n_bins, all_4d_to_4d_hists, timecut, do4d) - - if do2d_plots[0] is True and i >= do2d_plots[1]: - pdf_2d_plots.close() - break + pipe = kp.Pipeline() + pipe.attach(km.common.StatusBar, every=50) + pipe.attach(kp.io.hdf5.HDF5Pump, filename=fname) + pipe.attach(km.common.Keep, keys=['EventInfo', 'Header', 'RawHeader', 'McTracks', 'Hits', 'McHits']) + pipe.attach(EventDataExtractor, + file_particle_type=file_particle_type, geo=geo, do_mc_hits=do_mc_hits, + data_cuts=data_cuts, do4d=do4d, prod_ident=prod_ident) + pipe.attach(km.common.Keep, keys=['event_hits', 'event_track']) + pipe.attach(EventSkipper, data_cuts=data_cuts) + pipe.attach(HistogramMaker, + x_bin_edges=x_bin_edges, y_bin_edges=y_bin_edges, z_bin_edges=z_bin_edges, + n_bins=n_bins, timecut=timecut, do2d=do2d, do2d_plots=do2d_plots, pdf_2d_plots=pdf_2d_plots, + do3d=do3d, do4d=do4d) + pipe.attach(km.common.Delete, keys=['event_hits']) if do2d: - store_histograms_as_hdf5(np.stack([hist_tuple[0] for hist_tuple in all_4d_to_2d_hists]), np.array(mc_infos), 'Results/4dTo2d/h5/xy/' + filename_output + '_xy.h5') - store_histograms_as_hdf5(np.stack([hist_tuple[1] for hist_tuple in all_4d_to_2d_hists]), np.array(mc_infos), 'Results/4dTo2d/h5/xz/' + filename_output + '_xz.h5') - store_histograms_as_hdf5(np.stack([hist_tuple[2] for hist_tuple in all_4d_to_2d_hists]), np.array(mc_infos), 'Results/4dTo2d/h5/yz/' + filename_output + '_yz.h5') - store_histograms_as_hdf5(np.stack([hist_tuple[3] for hist_tuple in all_4d_to_2d_hists]), np.array(mc_infos), 'Results/4dTo2d/h5/xt/' + filename_output + '_xt.h5') - store_histograms_as_hdf5(np.stack([hist_tuple[4] for hist_tuple in all_4d_to_2d_hists]), np.array(mc_infos), 'Results/4dTo2d/h5/yt/' + filename_output + '_yt.h5') - store_histograms_as_hdf5(np.stack([hist_tuple[5] for hist_tuple in all_4d_to_2d_hists]), np.array(mc_infos), 'Results/4dTo2d/h5/zt/' + filename_output + '_zt.h5') + for proj in ['xy', 'xz', 'yz', 'xt', 'yt', 'zt']: + savestr = output_dirpath + '/orcasong_output/4dTo2d/' + proj + '/' + filename_output + '_' + proj + '.h5' + pipe.attach(kp.io.HDF5Sink, filename=savestr, blob_keys=[proj, 'event_track'], complib=complib, complevel=complevel, chunksize=chunksize) + if do3d: - store_histograms_as_hdf5(np.stack([hist_tuple[0] for hist_tuple in all_4d_to_3d_hists]), np.array(mc_infos), 'Results/4dTo3d/h5/xyz/' + filename_output + '_xyz.h5', compression=('gzip', 1)) - store_histograms_as_hdf5(np.stack([hist_tuple[1] for hist_tuple in all_4d_to_3d_hists]), np.array(mc_infos), 'Results/4dTo3d/h5/xyt/' + filename_output + '_xyt.h5', compression=('gzip', 1)) - store_histograms_as_hdf5(np.stack([hist_tuple[2] for hist_tuple in all_4d_to_3d_hists]), np.array(mc_infos), 'Results/4dTo3d/h5/xzt/' + filename_output + '_xzt.h5', compression=('gzip', 1)) - store_histograms_as_hdf5(np.stack([hist_tuple[3] for hist_tuple in all_4d_to_3d_hists]), np.array(mc_infos), 'Results/4dTo3d/h5/yzt/' + filename_output + '_yzt.h5', compression=('gzip', 1)) - store_histograms_as_hdf5(np.stack([hist_tuple[4] for hist_tuple in all_4d_to_3d_hists]), np.array(mc_infos), 'Results/4dTo3d/h5/rzt/' + filename_output + '_rzt.h5', compression=('gzip', 1)) + for proj in ['xyz', 'xyt', 'xzt', 'yzt', 'rzt']: + savestr = output_dirpath + '/orcasong_output/4dTo3d/' + proj + '/' + filename_output + '_' + proj + '.h5' + pipe.attach(kp.io.HDF5Sink, filename=savestr, blob_keys=[proj, 'event_track'], complib=complib, complevel=complevel, chunksize=chunksize) if do4d[0]: - folder = '' - if not os.path.exists('Results/4dTo4d/h5/xyzt/' + folder): - os.makedirs('Results/4dTo4d/h5/xyzt/' + folder) - if folder != '': folder += '/' + proj = 'xyzt' if not do4d[1] == 'channel_id' else 'xyzc' + savestr = output_dirpath + '/orcasong_output/4dTo4d/' + proj + '/' + filename_output + '_' + proj + '.h5' + pipe.attach(kp.io.HDF5Sink, filename=savestr, blob_keys=[proj, 'event_track'], complib=complib, complevel=complevel, chunksize=chunksize) - if do4d[1] == 'channel_id': - store_histograms_as_hdf5(np.array(all_4d_to_4d_hists), np.array(mc_infos), 'Results/4dTo4d/h5/xyzc/' + folder + filename_output + '_xyzc.h5', compression=('gzip', 1)) - else: - store_histograms_as_hdf5(np.array(all_4d_to_4d_hists), np.array(mc_infos), 'Results/4dTo4d/h5/xyzt/' + folder + filename_output + '_xyzt.h5', compression=('gzip', 1)) + # Execute Pipeline + pipe.drain() def main(): """ Parses the input to the main data_to_images function. """ - fname, detx_filepath, n_bins, det_geo, do2d, do2d_plots, do3d, do4d, prod_ident, timecut, do_mc_hits, data_cuts = parse_input() - data_to_images(fname, detx_filepath, n_bins, det_geo, do2d, do2d_plots, do3d, do4d, prod_ident, timecut, - do_mc_hits, data_cuts) + fname, detx_filepath, output_dirpath, chunksize, complib, complevel, n_bins, det_geo, do2d, do2d_plots, do3d,\ + do4d, prod_ident, timecut, do_mc_hits, data_cuts = parse_input() + data_to_images(fname, detx_filepath, output_dirpath, chunksize, complib, complevel, n_bins, det_geo, do2d, + do2d_plots, do3d, do4d, prod_ident, timecut, do_mc_hits, data_cuts) if __name__ == '__main__': diff --git a/orcasong/file_to_hits.py b/orcasong/file_to_hits.py index 686d4fe..71067b7 100644 --- a/orcasong/file_to_hits.py +++ b/orcasong/file_to_hits.py @@ -4,8 +4,6 @@ import numpy as np import km3pipe as kp -#from memory_profiler import profile -#import line_profiler # call with kernprof -l -v file.py args def get_primary_track_index(event_blob): @@ -36,6 +34,10 @@ def get_time_residual_nu_interaction_mean_triggered_hits(time_interaction, hits_ This is required for vertex_time reconstruction, as the absolute time scale needs to be relative to the triggered hits. + Careful: sometimes, not the neutrino event is triggered, but just some random noise! + This means that in very rare cases, the time_residual_vertex can be very large (Mio. of ns), which might throw off + a NN with vertex_time reconstruction. + Parameters ---------- time_interaction : float @@ -131,27 +133,25 @@ def get_tracks(event_blob, file_particle_type, event_hits, prod_ident): vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex/n_muons, (prod_ident)]. """ - # parse EventInfo and Header information # TODO always read from header + # parse EventInfo and Header information event_id = event_blob['EventInfo'].event_id[0] - - # the run_id info in the EventInfo group is broken for ORCA neutrino and mupage files - # The Header run_id is the most reliable one. - - if 'Header' in event_blob: # if Header exists in file, take run_id from it. - run_id = event_blob['Header'].start_run.run_id.astype('float32') - else: - if file_particle_type == 'muon': - run_id = event_blob['RawHeader'][1][0].astype('float32') - elif file_particle_type == 'undefined': # currently used with random_noise files - run_id = event_blob['EventInfo'].run_id - else: - run_id = event_blob['RawHeader'][0][0].astype('float32') + run_id = event_blob['Header'].start_run.run_id.astype('float32') + + # if 'Header' in event_blob: # if Header exists in file, take run_id from it. + # run_id = event_blob['Header'].start_run.run_id.astype('float32') + # else: + # if file_particle_type == 'muon': + # run_id = event_blob['RawHeader'][1][0].astype('float32') + # elif file_particle_type == 'undefined': # currently used with random_noise files + # run_id = event_blob['EventInfo'].run_id + # else: + # run_id = event_blob['RawHeader'][0][0].astype('float32') # collect all event_track information, dependent on file_particle_type if file_particle_type == 'undefined': particle_type = 0 - track = [event_id, run_id, particle_type] + track = {'event_id': event_id, 'run_id': run_id, 'particle_type': particle_type} elif file_particle_type == 'muon': # take index 1, index 0 is the empty neutrino mc_track @@ -172,8 +172,10 @@ def get_tracks(event_blob, file_particle_type, event_hits, prod_ident): vertex_pos_y = np.average(event_blob['McTracks'][1:].pos_y, weights=event_blob['McTracks'][1:].energy) vertex_pos_z = np.average(event_blob['McTracks'][1:].pos_z, weights=event_blob['McTracks'][1:].energy) - track = [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_interaction, run_id, - vertex_pos_x, vertex_pos_y, vertex_pos_z, n_muons] + track = {'event_id': event_id, 'particle_type': particle_type, 'energy': energy, 'is_cc': is_cc, + 'bjorkeny': bjorkeny, 'dir_x': dir_x, 'dir_y': dir_y, 'dir_z': dir_z, + 'time_interaction': time_interaction, 'run_id': run_id, 'vertex_pos_x': vertex_pos_x, + 'vertex_pos_y': vertex_pos_y, 'vertex_pos_z': vertex_pos_z, 'n_muons': n_muons} elif file_particle_type == 'neutrino': p = get_primary_track_index(event_blob) @@ -189,15 +191,28 @@ def get_tracks(event_blob, file_particle_type, event_hits, prod_ident): hits_time, triggered = event_hits[:, 3], event_hits[:, 4] time_residual_vertex = get_time_residual_nu_interaction_mean_triggered_hits(time_interaction, hits_time, triggered) - track = [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_interaction, run_id, - vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex] + if event_id == 12627: + print(time_interaction) + hits_time_triggered = hits_time[triggered == 1] + print(hits_time_triggered) + t_mean_triggered = np.mean(hits_time_triggered, dtype=np.float64) + print(t_mean_triggered) + time_residual_vertex = t_mean_triggered - time_interaction + print(time_residual_vertex) + + track = {'event_id': event_id, 'particle_type': particle_type, 'energy': energy, 'is_cc': is_cc, + 'bjorkeny': bjorkeny, 'dir_x': dir_x, 'dir_y': dir_y, 'dir_z': dir_z, + 'time_interaction': time_interaction, 'run_id': run_id, 'vertex_pos_x': vertex_pos_x, + 'vertex_pos_y': vertex_pos_y, 'vertex_pos_z': vertex_pos_z, + 'time_residual_vertex': time_residual_vertex} else: raise ValueError('The file_particle_type "', str(file_particle_type), '" is not known.') - if prod_ident is not None: track.append(prod_ident) + if prod_ident is not None: track['prod_ident'] = prod_ident - event_track = np.array(track, dtype=np.float64) + dtypes = [(key, np.float64) for key in track.keys()] + event_track = kp.dataclasses.Table(track, dtype=dtypes, h5loc='y', name='Event_Information') return event_track @@ -249,164 +264,40 @@ def get_event_data(event_blob, file_particle_type, geo, do_mc_hits, data_cuts, d class EventDataExtractor(kp.Module): + """ + Class that takes a km3pipe blob which contains the information for one event and returns + a blob with a hit array and a track array that contains all relevant information of the event. + """ def configure(self): + """ + Sets up the input arguments of the EventDataExtractor class. + """ self.file_particle_type = self.require('file_particle_type') self.geo = self.require('geo') self.do_mc_hits = self.require('do_mc_hits') self.data_cuts = self.require('data_cuts') self.do4d = self.require('do4d') self.prod_ident = self.require('prod_ident') - self.event_hits = self.get('event_hits', default='event_hits') - self.event_track = self.get('event_track', default='event_track') + self.event_hits_key = self.get('event_hits', default='event_hits') + self.event_track_key = self.get('event_track', default='event_track') def process(self, blob): - blob[self.event_hits] = self.get_hits(blob, self.geo, self.do_mc_hits, self.data_cuts, self.do4d) - blob[self.event_track] = self.get_tracks(blob, self.file_particle_type, self.event_hits, self.prod_ident) - return blob - - def finish(self): - return - - def get_hits(self, blob, geo, do_mc_hits, data_cuts, do4d): """ - Returns a hits array that contains [pos_x, pos_y, pos_z, time, triggered, channel_id (optional)]. + Returns a blob (dict), which contains the event_hits array and the event_track array. Parameters ---------- - blob : kp.io.HDF5Pump.blob - Event blob of the HDF5Pump which contains all information for one event. - geo : kp.Geometry - km3pipe Geometry instance that contains the geometry information of the detector. - Only used if the event_blob is from a non-calibrated file! - do_mc_hits : bool - Tells the function of the hits (mc_hits + BG) or the mc_hits only should be parsed. - In the case of mc_hits, the dom_id needs to be calculated thanks to the jpp output. - data_cuts : dict - Specifies if cuts should be applied. - Contains the keys 'triggered' and 'energy_lower/upper_limit' and 'throw_away_prob'. - do4d : tuple(bool, str) - Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz. - In the case of 'channel_id', this information needs to be included in the event_hits as well. + blob : dict + Km3pipe blob which contains all the data from the input file. Returns ------- - event_hits : ndarray(ndim=2) - 2D array that contains the hits data for the input event [pos_x, pos_y, pos_z, time, triggered, (channel_id)]. + blob : dict + Dictionary that contains the event_hits array and the event_track array. """ - # parse hits [x,y,z,time] - hits = blob['Hits'] if do_mc_hits is False else blob['McHits'] - - if 'pos_x' not in blob['Hits'].dtype.names: # check if blob already calibrated - hits = geo.apply(hits) - - if data_cuts['triggered'] is True: - hits = hits.__array__[hits.triggered.astype(bool)] - # hits = hits.triggered_hits # alternative, though it only works for the triggered condition! - - pos_x, pos_y, pos_z = hits.pos_x, hits.pos_y, hits.pos_z - hits_time = hits.time - triggered = hits.triggered - - ax = np.newaxis - event_hits = np.concatenate([pos_x[:, ax], pos_y[:, ax], pos_z[:, ax], hits_time[:, ax], triggered[:, ax]], - axis=1) # dtype: np.float64 - - if do4d[0] is True and do4d[1] == 'channel_id' or do4d[1] == 'xzt-c': - event_hits = np.concatenate([event_hits, hits.channel_id[:, ax]], axis=1) - - return event_hits - - - def get_tracks(self, blob, file_particle_type, event_hits, prod_ident): - """ - Returns the event_track, which contains important event_info and mc_tracks data for the input event. - - Parameters - ---------- - blob : kp.io.HDF5Pump.blob - Event blob of the HDF5Pump which contains all information for one event. - file_particle_type : str - String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino']. - event_hits : ndarray(ndim=2) - 2D array that contains the hits data for the input event. - prod_ident : int - Optional int that identifies the used production, more documentation in the docs of the main function. - - Returns - ------- - event_track : ndarray(ndim=1) - 1D array that contains important event_info and mc_tracks data for the input event. - - If file_particle_type = 'undefined': - [event_id, run_id, (prod_ident)]. - - If file_particle_type = 'neutrino'/'muon': - [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_track, run_id, - vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex/n_muons, (prod_ident)]. - - """ - # parse EventInfo and Header information # TODO always read from header - event_id = blob['EventInfo'].event_id[0] - - # the run_id info in the EventInfo group is broken for ORCA neutrino and mupage files - # The Header run_id is the most reliable one. - - if 'Header' in blob: # if Header exists in file, take run_id from it. - run_id = blob['Header'].start_run.run_id.astype('float32') - else: - raise ValueError('There is no "Header" folder in your h5 file, something must be wrong!') - - # collect all event_track information, dependent on file_particle_type - - if file_particle_type == 'undefined': - particle_type = 0 - track = [event_id, run_id, particle_type] - - elif file_particle_type == 'muon': - # take index 1, index 0 is the empty neutrino mc_track - particle_type = blob['McTracks'][1].type # assumed that this is the same for all muons in a bundle - is_cc = blob['McTracks'][1].is_cc # always 1 actually - bjorkeny = blob['McTracks'][1].bjorkeny # always 0 actually - time_interaction = blob['McTracks'][1].time # same for all muons in a bundle - n_muons = blob['McTracks'].shape[0] - 1 # takes position of time_residual_vertex in 'neutrino' case - - # sum up the energy of all muons - energy = np.sum(blob['McTracks'].energy) - - # all muons in a bundle are parallel, so just take dir of first muon - dir_x, dir_y, dir_z = blob['McTracks'][1].dir_x, blob['McTracks'][1].dir_y, blob['McTracks'][1].dir_z - - # vertex is the weighted (energy) mean of the individual vertices - vertex_pos_x = np.average(blob['McTracks'][1:].pos_x, weights=blob['McTracks'][1:].energy) - vertex_pos_y = np.average(blob['McTracks'][1:].pos_y, weights=blob['McTracks'][1:].energy) - vertex_pos_z = np.average(blob['McTracks'][1:].pos_z, weights=blob['McTracks'][1:].energy) - - track = [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_interaction, run_id, - vertex_pos_x, vertex_pos_y, vertex_pos_z, n_muons] - - elif file_particle_type == 'neutrino': - p = get_primary_track_index(blob) - particle_type = blob['McTracks'][p].type - energy = blob['McTracks'][p].energy - is_cc = blob['McTracks'][p].is_cc - bjorkeny = blob['McTracks'][p].bjorkeny - dir_x, dir_y, dir_z = blob['McTracks'][p].dir_x, blob['McTracks'][p].dir_y, blob['McTracks'][p].dir_z - time_interaction = blob['McTracks'][p].time # actually always 0 for primary neutrino, measured in MC time - vertex_pos_x, vertex_pos_y, vertex_pos_z = blob['McTracks'][p].pos_x, blob['McTracks'][p].pos_y, \ - blob['McTracks'][p].pos_z - - hits_time, triggered = event_hits[:, 3], event_hits[:, 4] - time_residual_vertex = get_time_residual_nu_interaction_mean_triggered_hits(time_interaction, hits_time, triggered) - - track = [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_interaction, run_id, - vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex] - - else: - raise ValueError('The file_particle_type "', str(file_particle_type), '" is not known.') - - if prod_ident is not None: track.append(prod_ident) + blob[self.event_hits_key] = get_hits(blob, self.geo, self.do_mc_hits, self.data_cuts, self.do4d) + blob[self.event_track_key] = get_tracks(blob, self.file_particle_type, blob[self.event_hits_key], self.prod_ident) + return blob - event_track = np.array(track, dtype=np.float64) - return event_track diff --git a/orcasong/histograms_to_files.py b/orcasong/histograms_to_files.py deleted file mode 100644 index 12cd802..0000000 --- a/orcasong/histograms_to_files.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -"""Code that saves the histograms (generated by hits_to_histograms) and the mc event_info to a h5 file""" - -import h5py - -def store_histograms_as_hdf5(hists, mc_infos, filepath_output, compression=(None, None)): - """ - Takes numpy histograms ('images') for a certain projection as well as the mc_info ('tracks') and saves them to a h5 file. - - Parameters - ---------- - hists : ndarray(ndim=2) - Array that contains all histograms for a certain projection. - mc_infos : ndarray(ndim=2) - 2D array containing important MC information for each event_id, [event_id, particle_type, energy, isCC, categorical event types]. - filepath_output : str - Complete filepath of the to be created h5 file. - compression : tuple(None/str, None/int) - Tuple that specifies if a compression filter should be used. Either ('gzip', 1-9) or ('lzf', None). - - """ - f = h5py.File(filepath_output, 'w') - - chunks_hists = (32,) + hists.shape[1:] if compression[0] is not None else None - chunks_mc_infos = (32,) + mc_infos.shape[1:] if compression[0] is not None else None - fletcher32 = True if compression[0] is not None else False - - f.create_dataset('x', data=hists, dtype='uint8', fletcher32=fletcher32, chunks=chunks_hists, - compression=compression[0], compression_opts=compression[1], shuffle=False) - f.create_dataset('y', data=mc_infos, dtype='float32', fletcher32=fletcher32, chunks=chunks_mc_infos, - compression=compression[0], compression_opts=compression[1], shuffle=False) - - f.close() \ No newline at end of file diff --git a/orcasong/hits_to_histograms.py b/orcasong/hits_to_histograms.py index e7d5191..3befa02 100644 --- a/orcasong/hits_to_histograms.py +++ b/orcasong/hits_to_histograms.py @@ -2,12 +2,11 @@ # -*- coding: utf-8 -*- """Code for computeing 2D/3D/4D histograms ("images") based on the event_hits hit pattern of the file_to_hits.py output""" +import km3pipe as kp import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np -#import line_profiler # call with kernprof -l -v file.py args -#from memory_profiler import profile from mpl_toolkits.axes_grid1 import make_axes_locatable @@ -40,17 +39,17 @@ def get_time_parameters(event_hits, mode=('trigger_cluster', 'all'), t_start_mar t = t[triggered == 1] t_mean = np.mean(t, dtype=np.float64) - if mode[1] == 'tight_0': + if mode[1] == 'tight-0': # make a tighter cut, 9.5ns / bin with 100 bins, useful for ORCA 115l mupage events t_start = t_mean - 450 # trigger-cluster - 350ns t_end = t_mean + 500 # trigger-cluster + 850ns - elif mode[1] == 'tight_1': + elif mode[1] == 'tight-1': # make a tighter cut, 12.5ns / bin with 60 bins t_start = t_mean - 250 # trigger-cluster - 350ns t_end = t_mean + 500 # trigger-cluster + 850ns - elif mode[1] == 'tight_2': + elif mode[1] == 'tight-2': # make an even tighter cut, 5.8ns / bin with 60 bins t_start = t_mean - 150 # trigger-cluster - 150ns t_end = t_mean + 200 # trigger-cluster + 200ns @@ -79,7 +78,7 @@ def get_time_parameters(event_hits, mode=('trigger_cluster', 'all'), t_start_mar return t_start, t_end -def compute_4d_to_2d_histograms(event_hits, x_bin_edges, y_bin_edges, z_bin_edges, n_bins, all_4d_to_2d_hists, timecut, event_track, do2d_plots, pdf_2d_plots): +def compute_4d_to_2d_histograms(event_hits, event_track, x_bin_edges, y_bin_edges, z_bin_edges, n_bins, timecut, do2d_plots, pdf_2d_plots): """ Computes 2D numpy histogram 'images' from the 4D data and appends the 2D histograms to the all_4d_to_2d_hists list, [xy, xz, yz, xt, yt, zt]. @@ -88,16 +87,14 @@ def compute_4d_to_2d_histograms(event_hits, x_bin_edges, y_bin_edges, z_bin_edge ---------- event_hits : ndarray(ndim=2) 2D array that contains the hits data for a certain event_id. + event_track : ndarray(ndim=2) + Contains the relevant mc_track info for the event in order to get a nice title for the pdf histos. x_bin_edges, y_bin_edges, z_bin_edges: ndarray(ndim=1) Bin edges for the X/Y/Z-direction. n_bins : tuple of int Contains the number of bins that should be used for each dimension. - all_4d_to_2d_hists : list - List that contains all 2D histogram projections. timecut : tuple(str, str/None) Tuple that defines what timecut should be used in hits_to_histograms. - event_track : ndarray(ndim=2) - Contains the relevant mc_track info for the event in order to get a nice title for the pdf histos. do2d_plots : bool If True, generate 2D matplotlib pdf histograms. pdf_2d_plots : mpl.backends.backend_pdf.PdfPages/None @@ -118,21 +115,19 @@ def compute_4d_to_2d_histograms(event_hits, x_bin_edges, y_bin_edges, z_bin_edge hist_yt = np.histogram2d(y, t, bins=(y_bin_edges, n_bins[3]), range=((min(y_bin_edges), max(y_bin_edges)), (t_start, t_end))) hist_zt = np.histogram2d(z, t, bins=(z_bin_edges, n_bins[3]), range=((min(z_bin_edges), max(z_bin_edges)), (t_start, t_end))) - # Format in classical numpy convention: x along first dim (vertical), y along second dim (horizontal) - all_4d_to_2d_hists.append((np.array(hist_xy[0], dtype=np.uint8), - np.array(hist_xz[0], dtype=np.uint8), - np.array(hist_yz[0], dtype=np.uint8), - np.array(hist_xt[0], dtype=np.uint8), - np.array(hist_yt[0], dtype=np.uint8), - np.array(hist_zt[0], dtype=np.uint8))) - if do2d_plots: - # Format in classical numpy convention: x along first dim (vertical), y along second dim (horizontal) - # Need to take that into account in convert_2d_numpy_hists_to_pdf_image() - # transpose to get typical cartesian convention: y along first dim (vertical), x along second dim (horizontal) hists = [hist_xy, hist_xz, hist_yz, hist_xt, hist_yt, hist_zt] convert_2d_numpy_hists_to_pdf_image(hists, t_start, t_end, pdf_2d_plots, event_track=event_track) # slow! takes about 1s per event + hist_xy = kp.dataclasses.NDArray(hist_xy[0][np.newaxis, ...].astype(np.uint8), h5loc='x', title='XY_Event_Images') + hist_xz = kp.dataclasses.NDArray(hist_xz[0][np.newaxis, ...].astype(np.uint8), h5loc='x', title='XZ_Event_Images') + hist_yz = kp.dataclasses.NDArray(hist_yz[0][np.newaxis, ...].astype(np.uint8), h5loc='x', title='YZ_Event_Images') + hist_xt = kp.dataclasses.NDArray(hist_xt[0][np.newaxis, ...].astype(np.uint8), h5loc='x', title='XT_Event_Images') + hist_yt = kp.dataclasses.NDArray(hist_yt[0][np.newaxis, ...].astype(np.uint8), h5loc='x', title='YT_Event_Images') + hist_zt = kp.dataclasses.NDArray(hist_zt[0][np.newaxis, ...].astype(np.uint8), h5loc='x', title='ZT_Event_Images') + + return hist_xy, hist_xz, hist_yz, hist_xt, hist_yt, hist_zt + def convert_2d_numpy_hists_to_pdf_image(hists, t_start, t_end, pdf_2d_plots, event_track=None): """ @@ -154,8 +149,9 @@ def convert_2d_numpy_hists_to_pdf_image(hists, t_start, t_end, pdf_2d_plots, eve fig = plt.figure(figsize=(10, 13)) if event_track is not None: particle_type = {16: 'Tau', -16: 'Anti-Tau', 14: 'Muon', -14: 'Anti-Muon', 12: 'Electron', -12: 'Anti-Electron', 'isCC': ['NC', 'CC']} - event_info = {'event_id': str(int(event_track[0])), 'energy': str(event_track[2]), - 'particle_type': particle_type[int(event_track[1])], 'interaction_type': particle_type['isCC'][int(event_track[3])]} + event_info = {'event_id': str(int(event_track.event_id[0])), 'energy': str(event_track.energy[0]), + 'particle_type': particle_type[int(event_track.particle_type[0])], + 'interaction_type': particle_type['isCC'][int(event_track.is_cc[0])]} title = event_info['particle_type'] + '-' + event_info['interaction_type'] + ', Event ID: ' + event_info['event_id'] + ', Energy: ' + event_info['energy'] + ' GeV' props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) fig.suptitle(title, usetex=False, horizontalalignment='center', size='xx-large', bbox=props) @@ -163,7 +159,7 @@ def convert_2d_numpy_hists_to_pdf_image(hists, t_start, t_end, pdf_2d_plots, eve t_diff = t_end - t_start axes_xy = plt.subplot2grid((3, 2), (0, 0), title='XY - projection', xlabel='X Position [m]', ylabel='Y Position [m]', aspect='equal', xlim=(-175, 175), ylim=(-175, 175)) - axes_xz = plt.subplot2grid((3, 2), (0, 1), title='XZ - projection', xlabel='X Position [m]', ylabel='Z Position [m]', aspect='equal', xlim=(-175, 175), ylim=(-57.8, 400)) + axes_xz = plt.subplot2grid((3, 2), (0, 1), title='XZ - projection', xlabel='X Position [m]', ylabel='Z Position [m]', aspect='equal', xlim=(-175, 175), ylim=(-57.8, 292.2)) axes_yz = plt.subplot2grid((3, 2), (1, 0), title='YZ - projection', xlabel='Y Position [m]', ylabel='Z Position [m]', aspect='equal', xlim=(-175, 175), ylim=(-57.8, 292.2)) axes_xt = plt.subplot2grid((3, 2), (1, 1), title='XT - projection', xlabel='X Position [m]', ylabel='Time [ns]', aspect='auto', @@ -178,6 +174,10 @@ def convert_2d_numpy_hists_to_pdf_image(hists, t_start, t_end, pdf_2d_plots, eve h_ab_masked = np.ma.masked_where(hist_ab[0] == 0, hist_ab[0]) a, b = np.meshgrid(hist_ab[1], hist_ab[2]) #2,1 + + # Format in classical numpy convention: x along first dim (vertical), y along second dim (horizontal) + # Need to take that into account in convert_2d_numpy_hists_to_pdf_image() + # transpose to get typical cartesian convention: y along first dim (vertical), x along second dim (horizontal) plot_ab = axes_ab.pcolormesh(a, b, h_ab_masked.T) the_divider = make_axes_locatable(axes_ab) @@ -187,21 +187,19 @@ def convert_2d_numpy_hists_to_pdf_image(hists, t_start, t_end, pdf_2d_plots, eve cbar_ab = plt.colorbar(plot_ab, cax=color_axis, ax=axes_ab) cbar_ab.ax.set_ylabel('Hits [#]') - return plot_ab - - plot_xy = fill_subplot(hists[0], axes_xy) - plot_xz = fill_subplot(hists[1], axes_xz) - plot_yz = fill_subplot(hists[2], axes_yz) - plot_xt = fill_subplot(hists[3], axes_xt) - plot_yt = fill_subplot(hists[4], axes_yt) - plot_zt = fill_subplot(hists[5], axes_zt) + fill_subplot(hists[0], axes_xy) + fill_subplot(hists[1], axes_xz) + fill_subplot(hists[2], axes_yz) + fill_subplot(hists[3], axes_xt) + fill_subplot(hists[4], axes_yt) + fill_subplot(hists[5], axes_zt) fig.tight_layout(rect=[0, 0.02, 1, 0.95]) pdf_2d_plots.savefig(fig) plt.close() -def compute_4d_to_3d_histograms(event_hits, x_bin_edges, y_bin_edges, z_bin_edges, n_bins, all_4d_to_3d_hists, timecut): +def compute_4d_to_3d_histograms(event_hits, x_bin_edges, y_bin_edges, z_bin_edges, n_bins, timecut): """ Computes 3D numpy histogram 'images' from the 4D data and appends the 3D histograms to the all_4d_to_3d_hists list, [xyz, xyt, xzt, yzt, rzt]. @@ -218,8 +216,6 @@ def compute_4d_to_3d_histograms(event_hits, x_bin_edges, y_bin_edges, z_bin_edge Bin edges for the X/Y/Z-direction. n_bins : tuple of int Contains the number of bins that should be used for each dimension. - all_4d_to_3d_hists : list - List that contains all 3D histogram projections. timecut : tuple(str, str/None) Tuple that defines what timecut should be used in hits_to_histograms. @@ -243,14 +239,16 @@ def compute_4d_to_3d_histograms(event_hits, x_bin_edges, y_bin_edges, z_bin_edge rzt = np.concatenate([r, z, t], axis=1) hist_rzt = np.histogramdd(rzt, bins=(n_bins[0], n_bins[2], n_bins[3]), range=((np.amin(r), np.amax(r)), (np.amin(z), np.amax(z)), (t_start, t_end))) - all_4d_to_3d_hists.append((np.array(hist_xyz[0], dtype=np.uint8), - np.array(hist_xyt[0], dtype=np.uint8), - np.array(hist_xzt[0], dtype=np.uint8), - np.array(hist_yzt[0], dtype=np.uint8), - np.array(hist_rzt[0], dtype=np.uint8))) + hist_xyz = kp.dataclasses.NDArray(hist_xyz[0][np.newaxis, ...].astype(np.uint8), h5loc='x', title='XYZ_Event_Images') + hist_xyt = kp.dataclasses.NDArray(hist_xyt[0][np.newaxis, ...].astype(np.uint8), h5loc='x', title='XYT_Event_Images') + hist_xzt = kp.dataclasses.NDArray(hist_xzt[0][np.newaxis, ...].astype(np.uint8), h5loc='x', title='XZT_Event_Images') + hist_yzt = kp.dataclasses.NDArray(hist_yzt[0][np.newaxis, ...].astype(np.uint8), h5loc='x', title='YZT_Event_Images') + hist_rzt = kp.dataclasses.NDArray(hist_rzt[0][np.newaxis, ...].astype(np.uint8), h5loc='x', title='RZT_Event_Images') + + return hist_xyz, hist_xyt, hist_xzt, hist_yzt, hist_rzt -def compute_4d_to_4d_histograms(event_hits, x_bin_edges, y_bin_edges, z_bin_edges, n_bins, all_4d_to_4d_hists, timecut, do4d): +def compute_4d_to_4d_histograms(event_hits, x_bin_edges, y_bin_edges, z_bin_edges, n_bins, timecut, do4d): """ Computes 4D numpy histogram 'images' from the 4D data and appends the 4D histogram to the all_4d_to_4d_hists list, [xyzt / xyzc] @@ -263,8 +261,6 @@ def compute_4d_to_4d_histograms(event_hits, x_bin_edges, y_bin_edges, z_bin_edge Bin edges for the X/Y/Z-direction. n_bins : tuple of int Contains the number of bins that should be used for each dimension. - all_4d_to_4d_hists : list - List that contains all 4D histogram projections. timecut : tuple(str, str/None) Tuple that defines what timecut should be used in hits_to_histograms. do4d : tuple(bool, str) @@ -290,4 +286,69 @@ def compute_4d_to_4d_histograms(event_hits, x_bin_edges, y_bin_edges, z_bin_edge else: raise ValueError('The parameter in do4d[1] ' + str(do4d[1]) + ' is not available. Currently, only time and channel_id are supported.') - all_4d_to_4d_hists.append(np.array(hist_4d[0], dtype=np.uint8)) \ No newline at end of file + proj_name = 'XYZT' if not do4d[1] == 'channel_id' else 'XYZC' + title = proj_name + '_Event_Images' + hist_4d = kp.dataclasses.NDArray(hist_4d[0][np.newaxis, ...].astype(np.uint8), h5loc='x', title=title) + + return hist_4d + + +class HistogramMaker(kp.Module): + """ + Class that takes a km3pipe blob which contains the information for one event and returns + a blob with a hit array and a track array that contains all relevant information of the event. + """ + def configure(self): + """ + Sets up the input arguments of the EventDataExtractor class. + """ + self.x_bin_edges = self.require('x_bin_edges') + self.y_bin_edges = self.require('y_bin_edges') + self.z_bin_edges = self.require('z_bin_edges') + self.n_bins = self.require('n_bins') + self.timecut = self.require('timecut') + self.do2d = self.require('do2d') + self.do2d_plots = self.require('do2d_plots') + self.pdf_2d_plots = self.get('pdf_2d_plots') + self.do3d = self.require('do3d') + self.do4d = self.require('do4d') + + self.i = 0 + + def process(self, blob): + """ + Returns a blob (dict), which contains the event_hits array and the event_track array. + + Parameters + ---------- + blob : dict + Km3pipe blob which contains all the data from the input file. + + Returns + ------- + blob : dict + Dictionary that contains the event_hits array and the event_track array. + + """ + if self.do2d: + blob['xy'], blob['xz'], blob['yz'], blob['xt'], blob['yt'], blob['zt'] = compute_4d_to_2d_histograms( + blob['event_hits'], blob['event_track'], self.x_bin_edges, self.y_bin_edges, self.z_bin_edges, + self.n_bins, self.timecut, self.do2d_plots[0], self.pdf_2d_plots) + + self.i += 1 + if self.pdf_2d_plots is not None and self.i >= self.do2d_plots[1]: + self.pdf_2d_plots.close() + raise StopIteration + + if self.do3d: + blob['xyz'], blob['xyt'], blob['xzt'], blob['yzt'], blob['rzt'] = compute_4d_to_3d_histograms( + blob['event_hits'], self.x_bin_edges, self.y_bin_edges, self.z_bin_edges, + self.n_bins, self.timecut) + + if self.do4d[0]: + proj_key = 'xyzt' if not self.do4d[1] == 'channel_id' else 'xyzc' + + blob[proj_key] = compute_4d_to_4d_histograms(blob['event_hits'], self.x_bin_edges, self.y_bin_edges, self.z_bin_edges, + self.n_bins, self.timecut, self.do4d) + + return blob \ No newline at end of file diff --git a/setup.py b/setup.py index d66b46c..245c645 100644 --- a/setup.py +++ b/setup.py @@ -1,18 +1,27 @@ #!/usr/bin/env python from setuptools import setup, find_packages +from pkg_resources import get_distribution, DistributionNotFound with open('requirements.txt') as fobj: requirements = [l.strip() for l in fobj.readlines()] setup( name='orcasong', - version='1.0', - description='Makes images for a NN based on the hit information of neutrino events in the neutrino telescope KM3NeT-ORCA', - url='https://github.com/ViaFerrata/OrcaSong', + description='Makes images for a NN based on the hit information of neutrino events in the neutrino telescope KM3NeT', + url='https://git.km3net.de/ml/OrcaSong', author='Michael Moser', author_email='mmoser@km3net.de, michael.m.moser@fau.de', license='AGPL', install_requires=requirements, packages=find_packages(), include_package_data=True, -) \ No newline at end of file + entry_points={'console_scripts': ['make_nn_images=orcasong.data_to_images:main']}, + setup_requires=['setuptools_scm'], + use_scm_version={ + 'write_to': 'km3pipe/version.txt', + 'tag_regex': r'^(?P<prefix>v)?(?P<version>[^\+]+)(?P<suffix>.*)?$', + }, + +) + +__author__ = 'Michael Moser' \ No newline at end of file diff --git a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-c.toml b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-c.toml similarity index 96% rename from orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-c.toml rename to user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-c.toml index 07b4ad8..6cb1553 100644 --- a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-c.toml +++ b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-c.toml @@ -31,5 +31,5 @@ --det_geo = 'Orca_115l_23m_h_9m_v' --do4d_mode = 'channel_id' --timecut_mode = 'trigger_cluster' ---timecut_timespan = 'tight_0' +--timecut_timespan = 'tight-0' --prod_ident = 2 # only for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod. \ No newline at end of file diff --git a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-t.toml b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-t.toml similarity index 96% rename from orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-t.toml rename to user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-t.toml index 26a49c2..11ca230 100644 --- a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-t.toml +++ b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_1-5GeV_xyz-t.toml @@ -31,5 +31,5 @@ --det_geo = 'Orca_115l_23m_h_9m_v' --do4d_mode = 'time' --timecut_mode = 'trigger_cluster' ---timecut_timespan = 'tight_0' +--timecut_timespan = 'tight-0' --prod_ident = 2 # only for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod. \ No newline at end of file diff --git a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-c.toml b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-c.toml similarity index 96% rename from orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-c.toml rename to user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-c.toml index ea314ee..13f90d0 100644 --- a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-c.toml +++ b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-c.toml @@ -31,5 +31,5 @@ --det_geo = 'Orca_115l_23m_h_9m_v' --do4d_mode = 'channel_id' --timecut_mode = 'trigger_cluster' ---timecut_timespan = 'tight_0' +--timecut_timespan = 'tight-0' --prod_ident = 1 # only for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod. \ No newline at end of file diff --git a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-t.toml b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-t.toml similarity index 96% rename from orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-t.toml rename to user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-t.toml index ea41e02..129186a 100644 --- a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-t.toml +++ b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_3-100GeV_xyz-t.toml @@ -31,5 +31,5 @@ --det_geo = 'Orca_115l_23m_h_9m_v' --do4d_mode = 'time' --timecut_mode = 'trigger_cluster' ---timecut_timespan = 'tight_0' +--timecut_timespan = 'tight-0' --prod_ident = 1 # only for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod. \ No newline at end of file diff --git a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-c.toml b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-c.toml similarity index 96% rename from orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-c.toml rename to user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-c.toml index 2f9aa05..bcaa0b3 100644 --- a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-c.toml +++ b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-c.toml @@ -31,5 +31,5 @@ --det_geo = 'Orca_115l_23m_h_9m_v' --do4d_mode = 'channel_id' --timecut_mode = 'trigger_cluster' ---timecut_timespan = 'tight_0' +--timecut_timespan = 'tight-0' --prod_ident = 3 # for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod ; mupage: 3 ; random_noise: 4 \ No newline at end of file diff --git a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-t.toml b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-t.toml similarity index 96% rename from orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-t.toml rename to user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-t.toml index 1e705c1..e594752 100644 --- a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-t.toml +++ b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_mupage_xyz-t.toml @@ -31,5 +31,5 @@ --det_geo = 'Orca_115l_23m_h_9m_v' --do4d_mode = 'time' --timecut_mode = 'trigger_cluster' ---timecut_timespan = 'tight_0' +--timecut_timespan = 'tight-0' --prod_ident = 3 # for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod ; mupage: 3 ; random_noise: 4 \ No newline at end of file diff --git a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-c.toml b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-c.toml similarity index 96% rename from orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-c.toml rename to user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-c.toml index ab87ea9..2f070c2 100644 --- a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-c.toml +++ b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-c.toml @@ -31,5 +31,5 @@ --det_geo = 'Orca_115l_23m_h_9m_v' --do4d_mode = 'channel_id' --timecut_mode = 'trigger_cluster' ---timecut_timespan = 'tight_0' +--timecut_timespan = 'tight-0' --prod_ident = 4 # for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod ; mupage: 3 ; random_noise: 4 \ No newline at end of file diff --git a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-t.toml b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-t.toml similarity index 96% rename from orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-t.toml rename to user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-t.toml index 1db8329..9a0940d 100644 --- a/orcasong/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-t.toml +++ b/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-t.toml @@ -31,5 +31,5 @@ --det_geo = 'Orca_115l_23m_h_9m_v' --do4d_mode = 'time' --timecut_mode = 'trigger_cluster' ---timecut_timespan = 'tight_0' +--timecut_timespan = 'tight-0' --prod_ident = 4 # for neutrinos: 1: 3-100 GeV prod, 2: 1-5 GeV prod ; mupage: 3 ; random_noise: 4 \ No newline at end of file diff --git a/orcasong/config/orca_115l_regression/conf_ORCA_115l_1-5GeV_xyz-c.toml b/user/config/orca_115l_regression/conf_ORCA_115l_1-5GeV_xyz-c.toml similarity index 100% rename from orcasong/config/orca_115l_regression/conf_ORCA_115l_1-5GeV_xyz-c.toml rename to user/config/orca_115l_regression/conf_ORCA_115l_1-5GeV_xyz-c.toml diff --git a/orcasong/config/orca_115l_regression/conf_ORCA_115l_1-5GeV_xyz-t.toml b/user/config/orca_115l_regression/conf_ORCA_115l_1-5GeV_xyz-t.toml similarity index 100% rename from orcasong/config/orca_115l_regression/conf_ORCA_115l_1-5GeV_xyz-t.toml rename to user/config/orca_115l_regression/conf_ORCA_115l_1-5GeV_xyz-t.toml diff --git a/orcasong/config/orca_115l_regression/conf_ORCA_115l_3-100GeV_xyz-c.toml b/user/config/orca_115l_regression/conf_ORCA_115l_3-100GeV_xyz-c.toml similarity index 96% rename from orcasong/config/orca_115l_regression/conf_ORCA_115l_3-100GeV_xyz-c.toml rename to user/config/orca_115l_regression/conf_ORCA_115l_3-100GeV_xyz-c.toml index 6874394..652fc8b 100644 --- a/orcasong/config/orca_115l_regression/conf_ORCA_115l_3-100GeV_xyz-c.toml +++ b/user/config/orca_115l_regression/conf_ORCA_115l_3-100GeV_xyz-c.toml @@ -31,5 +31,5 @@ --det_geo = 'Orca_115l_23m_h_9m_v' --do4d_mode = 'channel_id' --timecut_mode = 'trigger_cluster' ---timecut_timespan = 'tight_1' +--timecut_timespan = 'tight-1' --prod_ident = 1 \ No newline at end of file diff --git a/orcasong/config/orca_115l_regression/conf_ORCA_115l_3-100GeV_xyz-t.toml b/user/config/orca_115l_regression/conf_ORCA_115l_3-100GeV_xyz-t.toml similarity index 96% rename from orcasong/config/orca_115l_regression/conf_ORCA_115l_3-100GeV_xyz-t.toml rename to user/config/orca_115l_regression/conf_ORCA_115l_3-100GeV_xyz-t.toml index 6534de0..8e7b8dd 100644 --- a/orcasong/config/orca_115l_regression/conf_ORCA_115l_3-100GeV_xyz-t.toml +++ b/user/config/orca_115l_regression/conf_ORCA_115l_3-100GeV_xyz-t.toml @@ -31,5 +31,5 @@ --det_geo = 'Orca_115l_23m_h_9m_v' --do4d_mode = 'time' --timecut_mode = 'trigger_cluster' ---timecut_timespan = 'tight_1' +--timecut_timespan = 'tight-1' --prod_ident = 1 \ No newline at end of file diff --git a/orcasong/job_logs/cout/.gitkeep b/user/job_logs/cout/.gitkeep similarity index 100% rename from orcasong/job_logs/cout/.gitkeep rename to user/job_logs/cout/.gitkeep diff --git a/orcasong/job_submission_scripts/submit_data_to_images.sh b/user/job_submission_scripts/submit_data_to_images.sh similarity index 63% rename from orcasong/job_submission_scripts/submit_data_to_images.sh rename to user/job_submission_scripts/submit_data_to_images.sh index a25de70..b246b7d 100644 --- a/orcasong/job_submission_scripts/submit_data_to_images.sh +++ b/user/job_submission_scripts/submit_data_to_images.sh @@ -16,14 +16,14 @@ # random_noise: 500 files, with files_per_job= -#--- USER INPUT ---## +#--- USER INPUT ---# # load env, only working for conda env as of now python_env_folder=/home/hpc/capn/mppi033h/.virtualenv/python_3_env/ -code_folder=/home/woody/capn/mppi033h/Code/OrcaSong/orcasong +code_folder=/home/woody/capn/mppi033h/Code/OrcaSong -detx_filepath=${code_folder}/detx_files/orca_115strings_av23min20mhorizontal_18OMs_alt9mvertical_v1.detx -config_file=${code_folder}/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-c.toml +detx_filepath=${code_folder}/orcasong/detx_files/orca_115strings_av23min20mhorizontal_18OMs_alt9mvertical_v1.detx +config_file=${code_folder}/user/config/orca_115l_mupage_rn_neutr_classifier/conf_ORCA_115l_random_noise_xyz-c.toml particle_type=random_noise mc_prod=random_noise @@ -33,9 +33,7 @@ mc_prod=random_noise # For mupage: n=1000 with PBS -l nodes=1:ppn=4:sl32g,walltime=15:00:00 files_per_job=50 -n_cores=2 # number of available CPU cores, be careful about memory as well! Currently only working for 4 cores! - -#--- USER INPUT ---## +#--- USER INPUT ---# # setup @@ -72,34 +70,25 @@ folder="${folder_ip_files_arr[${mc_prod}]}" # run -no_of_loops=$((${files_per_job}/${n_cores})) # divide by 4 cores -> e.g, 15 4-core loops needed for files_per_job=60 +no_of_loops=$((${files_per_job}/${4})) # divide by 4 cores -> e.g, 15 4-core loops needed for files_per_job=60 file_no_start=$((1+((${n}-1) * ${files_per_job}))) # filenumber of the first file that is being processed by this script (depends on JobArray variable 'n') # currently only working for 4 cores -#for (( k=1; k<=${no_of_loops}; k++ )) -#do -# file_no_loop_start=$((${file_no_start}+(k-1)*${n_cores})) -# thread1=${file_no_loop_start} -# thread2=$((${file_no_loop_start} + 1)) -# thread3=$((${file_no_loop_start} + 2)) -# thread4=$((${file_no_loop_start} + 3)) -# -# (time taskset -c 0 python ${code_folder}/data_to_images.py -c ${config_file} ${folder}/${filename}.${thread1}.h5 ${detx_filepath} > ./job_logs/cout/${filename}.${thread1}.txt) & -# (time taskset -c 1 python ${code_folder}/data_to_images.py -c ${config_file} ${folder}/${filename}.${thread2}.h5 ${detx_filepath} > ./job_logs/cout/${filename}.${thread2}.txt) & -# (time taskset -c 2 python ${code_folder}/data_to_images.py -c ${config_file} ${folder}/${filename}.${thread3}.h5 ${detx_filepath} > ./job_logs/cout/${filename}.${thread3}.txt) & -# (time taskset -c 3 python ${code_folder}/data_to_images.py -c ${config_file} ${folder}/${filename}.${thread4}.h5 ${detx_filepath} > ./job_logs/cout/${filename}.${thread4}.txt) & -# wait -#done - for (( k=1; k<=${no_of_loops}; k++ )) do - file_no_loop_start=$((${file_no_start}+(k-1)*${n_cores})) + file_no_loop_start=$((${file_no_start}+(k-1)*${4})) thread1=${file_no_loop_start} thread2=$((${file_no_loop_start} + 1)) + thread3=$((${file_no_loop_start} + 2)) + thread4=$((${file_no_loop_start} + 3)) - (time taskset -c 0 python ${code_folder}/data_to_images.py -c ${config_file} ${folder}/${filename}.${thread1}.h5 ${detx_filepath} > ./job_logs/cout/${filename}.${thread1}.txt) & - (time taskset -c 1 python ${code_folder}/data_to_images.py -c ${config_file} ${folder}/${filename}.${thread2}.h5 ${detx_filepath} > ./job_logs/cout/${filename}.${thread2}.txt) & + (time taskset -c 0 python ${code_folder}/orcasong/data_to_images.py -c ${config_file} ${folder}/${filename}.${thread1}.h5 ${detx_filepath} > ${code_folder}/user/job_logs/cout/${filename}.${thread1}.txt) & + (time taskset -c 1 python ${code_folder}/orcasong/data_to_images.py -c ${config_file} ${folder}/${filename}.${thread2}.h5 ${detx_filepath} > ${code_folder}/user/job_logs/cout/${filename}.${thread2}.txt) & + (time taskset -c 2 python ${code_folder}/orcasong/data_to_images.py -c ${config_file} ${folder}/${filename}.${thread3}.h5 ${detx_filepath} > ${code_folder}/user/job_logs/cout/${filename}.${thread3}.txt) & + (time taskset -c 3 python ${code_folder}/orcasong/data_to_images.py -c ${config_file} ${folder}/${filename}.${thread4}.h5 ${detx_filepath} > ${code_folder}/user/job_logs/cout/${filename}.${thread4}.txt) & wait done + + -- GitLab