diff --git a/orcasong/core.py b/orcasong/core.py index a91c4d9d219f8dcfb3eeacf359b1ed99382f7929..5840cc7799b433ba58ca95b9e4b334acb9a282b2 100644 --- a/orcasong/core.py +++ b/orcasong/core.py @@ -9,7 +9,7 @@ import orcasong.modules as modules import orcasong.plotting.plot_binstats as plot_binstats -__author__ = 'Stefan Reck' +__author__ = "Stefan Reck" class BaseProcessor: @@ -86,18 +86,22 @@ class BaseProcessor: each pipeline. """ - def __init__(self, extractor=None, - det_file=None, - correct_mc_time=True, - center_time=True, - add_t0=False, - correct_timeslew=True, - center_hits_to=None, - event_skipper=None, - chunksize=32, - keep_event_info=False, - overwrite=True, - mc_info_to_float64=True): + + def __init__( + self, + extractor=None, + det_file=None, + correct_mc_time=True, + center_time=True, + add_t0=False, + correct_timeslew=True, + center_hits_to=None, + event_skipper=None, + chunksize=32, + keep_event_info=False, + overwrite=True, + mc_info_to_float64=True, + ): self.extractor = extractor self.det_file = det_file self.correct_mc_time = correct_mc_time @@ -113,7 +117,7 @@ class BaseProcessor: self.n_statusbar = 1000 self.n_memory_observer = 1000 - self.complib = 'zlib' + self.complib = "zlib" self.complevel = 1 self.flush_frequency = 1000 self.seed = 42 @@ -132,8 +136,10 @@ class BaseProcessor: """ if outfile is None: - outfile = os.path.join(os.getcwd(), "{}_dl.h5".format( - os.path.splitext(os.path.basename(infile))[0])) + outfile = os.path.join( + os.getcwd(), + "{}_dl.h5".format(os.path.splitext(os.path.basename(infile))[0]), + ) if not self.overwrite: if os.path.isfile(outfile): raise FileExistsError(f"File exists: {outfile}") @@ -160,8 +166,8 @@ class BaseProcessor: outfiles = [] for infile in infiles: outfile = os.path.join( - outfolder, - f"{os.path.splitext(os.path.basename(infile))[0]}_dl.h5") + outfolder, f"{os.path.splitext(os.path.basename(infile))[0]}_dl.h5" + ) outfiles.append(outfile) self.run(infile, outfile) return outfiles @@ -189,16 +195,24 @@ class BaseProcessor: if self.correct_mc_time: cmpts.append((km.mc.MCTimeCorrector, {})) if self.det_file: - cmpts.append((modules.DetApplier, { - "det_file": self.det_file, - "correct_timeslew": self.correct_timeslew, - "center_hits_to": self.center_hits_to, - })) + cmpts.append( + ( + modules.DetApplier, + { + "det_file": self.det_file, + "correct_timeslew": self.correct_timeslew, + "center_hits_to": self.center_hits_to, + }, + ) + ) if any((self.center_time, self.add_t0)): - cmpts.append((modules.TimePreproc, { - "add_t0": self.add_t0, - "center_time": self.center_time})) + cmpts.append( + ( + modules.TimePreproc, + {"add_t0": self.add_t0, "center_time": self.center_time}, + ) + ) return cmpts @abstractmethod @@ -210,26 +224,37 @@ class BaseProcessor: """ Modules that postproc and save the events. """ cmpts = [] if self.extractor is not None: - cmpts.append((modules.McInfoMaker, { - "extractor": self.extractor, - "to_float64": self.mc_info_to_float64, - "store_as": "mc_info"})) + cmpts.append( + ( + modules.McInfoMaker, + { + "extractor": self.extractor, + "to_float64": self.mc_info_to_float64, + "store_as": "mc_info", + }, + ) + ) if self.event_skipper is not None: - cmpts.append((modules.EventSkipper, { - "event_skipper": self.event_skipper})) + cmpts.append((modules.EventSkipper, {"event_skipper": self.event_skipper})) - keys_keep = ['samples', 'mc_info', "header", "raw_header"] + keys_keep = ["samples", "mc_info", "header", "raw_header"] if self.keep_event_info: - keys_keep.append('EventInfo') + keys_keep.append("EventInfo") cmpts.append((km.common.Keep, {"keys": keys_keep})) - cmpts.append((kp.io.HDF5Sink, { - "filename": outfile, - "complib": self.complib, - "complevel": self.complevel, - "chunksize": self.chunksize, - "flush_frequency": self.flush_frequency})) + cmpts.append( + ( + kp.io.HDF5Sink, + { + "filename": outfile, + "complib": self.complib, + "complevel": self.complevel, + "chunksize": self.chunksize, + "flush_frequency": self.flush_frequency, + }, + ) + ) return cmpts def finish_file(self, f, summary): @@ -276,10 +301,8 @@ class FileBinner(BaseProcessor): Options of the BaseProcessor. """ - def __init__(self, bin_edges_list, - add_bin_stats=True, - hit_weights=None, - **kwargs): + + def __init__(self, bin_edges_list, add_bin_stats=True, hit_weights=None, **kwargs): self.bin_edges_list = bin_edges_list self.add_bin_stats = add_bin_stats self.hit_weights = hit_weights @@ -289,11 +312,18 @@ class FileBinner(BaseProcessor): """ Generate nD images. """ cmpts = [] if self.add_bin_stats: - cmpts.append((modules.BinningStatsMaker, { - "bin_edges_list": self.bin_edges_list})) - cmpts.append((modules.ImageMaker, { - "bin_edges_list": self.bin_edges_list, - "hit_weights": self.hit_weights})) + cmpts.append( + (modules.BinningStatsMaker, {"bin_edges_list": self.bin_edges_list}) + ) + cmpts.append( + ( + modules.ImageMaker, + { + "bin_edges_list": self.bin_edges_list, + "hit_weights": self.hit_weights, + }, + ) + ) return cmpts def finish_file(self, f, summary): @@ -327,7 +357,8 @@ class FileBinner(BaseProcessor): if save_plot: plot_binstats.plot_hist_of_files( - files=outfiles, save_as=outfolder+"binning_hist.pdf") + files=outfiles, save_as=outfolder + "binning_hist.pdf" + ) return outfiles def get_names_and_shape(self): @@ -371,23 +402,83 @@ class FileGraph(BaseProcessor): Options of the BaseProcessor. """ - def __init__(self, max_n_hits, - time_window=None, - hit_infos=None, - **kwargs): + + def __init__(self, max_n_hits, time_window=None, hit_infos=None, **kwargs): self.max_n_hits = max_n_hits self.time_window = time_window self.hit_infos = hit_infos super().__init__(**kwargs) def get_cmpts_main(self): - return [((modules.PointMaker, { - "max_n_hits": self.max_n_hits, - "time_window": self.time_window, - "hit_infos": self.hit_infos, - "dset_n_hits": "EventInfo"}))] + return [ + ( + ( + modules.PointMaker, + { + "max_n_hits": self.max_n_hits, + "time_window": self.time_window, + "hit_infos": self.hit_infos, + "dset_n_hits": "EventInfo", + }, + ) + ) + ] def finish_file(self, f, summary): super().finish_file(f, summary) for i, hit_info in enumerate(summary["PointMaker"]["hit_infos"]): f["x"].attrs.create(f"hit_info_{i}", hit_info) + + +class TriggeredFileGraph(BaseProcessor): + """ + Turn km3 events to graph data. + + The resulting file will have a dataset "x" of shape + (?, max_n_hits, len(hit_infos) + 1). + The column names of the last axis (i.e. hit_infos) are saved + as attributes of the dataset (f["x"].attrs). + The last column will always be called 'is_valid', and its 0 if + the entry is padded, and 1 otherwise. + + Parameters + ---------- + max_n_hits : int + Maximum number of hits that gets saved per event. If an event has + more, some will get cut randomly! + time_window : tuple, optional + Two ints (start, end). Hits outside of this time window will be cut + away (based on 'Hits/time'). Default: Keep all hits. + hit_infos : tuple, optional + Which entries in the '/Hits' Table will be kept. E.g. pos_x, time, ... + Default: Keep all entries. + kwargs + Options of the BaseProcessor. + + """ + + def __init__(self, max_n_hits, time_window=None, hit_infos=None, **kwargs): + self.max_n_hits = max_n_hits + self.time_window = time_window + self.hit_infos = hit_infos + super().__init__(**kwargs) + + def get_cmpts_main(self): + return [ + ( + ( + modules.TriggeredPointMaker, + { + "max_n_hits": self.max_n_hits, + "time_window": self.time_window, + "hit_infos": self.hit_infos, + "dset_n_hits": "EventInfo", + }, + ) + ) + ] + + def finish_file(self, f, summary): + super().finish_file(f, summary) + for i, hit_info in enumerate(summary["TriggeredPointMaker"]["hit_infos"]): + f["x"].attrs.create(f"hit_info_{i}", hit_info) \ No newline at end of file diff --git a/orcasong/modules.py b/orcasong/modules.py index 7645fb776073121d63c4e076ea9cd41342bdc423..0ecb8cca2b9fec94dc26720993b9ff699f549c5e 100644 --- a/orcasong/modules.py +++ b/orcasong/modules.py @@ -7,7 +7,7 @@ import km3pipe as kp import km3modules as km import orcasong.plotting.plot_binstats as plot_binstats -__author__ = 'Stefan Reck' +__author__ = "Stefan Reck" class McInfoMaker(kp.Module): @@ -25,8 +25,8 @@ class McInfoMaker(kp.Module): """ def configure(self): - self.extractor = self.require('extractor') - self.store_as = self.require('store_as') + self.extractor = self.require("extractor") + self.store_as = self.require("store_as") self.to_float64 = self.get("to_float64", default=True) def process(self, blob): @@ -41,7 +41,8 @@ class McInfoMaker(kp.Module): else: dtypes = None kp_hist = kp.dataclasses.Table( - track, dtype=dtypes, h5loc='y', name='event_info') + track, dtype=dtypes, h5loc="y", name="event_info" + ) if len(kp_hist) != 1: self.log.warning( "Warning: Extracted mc_info should have len 1, " @@ -66,8 +67,8 @@ class TimePreproc(kp.Module): """ def configure(self): - self.add_t0 = self.get('add_t0', default=False) - self.center_time = self.get('center_time', default=True) + self.add_t0 = self.get("add_t0", default=False) + self.center_time = self.get("center_time", default=True) self._print_flags = set() @@ -119,8 +120,8 @@ class ImageMaker(kp.Module): """ def configure(self): - self.bin_edges_list = self.require('bin_edges_list') - self.hit_weights = self.get('hit_weights') + self.bin_edges_list = self.require("bin_edges_list") + self.hit_weights = self.get("hit_weights") self.store_as = "samples" def process(self, blob): @@ -140,7 +141,8 @@ class ImageMaker(kp.Module): hist_one_event = histogram[np.newaxis, ...].astype(np.uint8) kp_hist = kp.dataclasses.NDArray( - hist_one_event, h5loc='x', title=name + "event_images") + hist_one_event, h5loc="x", title=name + "event_images" + ) blob[self.store_as] = kp_hist return blob @@ -175,7 +177,7 @@ class BinningStatsMaker(kp.Module): """ def configure(self): - self.bin_edges_list = self.require('bin_edges_list') + self.bin_edges_list = self.require("bin_edges_list") self.res_increase = self.get("res_increase", default=5) self.bin_plot_freq = 1 @@ -202,8 +204,7 @@ class BinningStatsMaker(kp.Module): Increase resolution of given binning. """ increased_n_bins = (len(bin_edges) - 1) * self.res_increase + 1 - bin_edges = np.linspace( - bin_edges[0], bin_edges[-1], increased_n_bins) + bin_edges = np.linspace(bin_edges[0], bin_edges[-1], increased_n_bins) return bin_edges @@ -222,8 +223,7 @@ class BinningStatsMaker(kp.Module): out_neg = data[data < np.min(hist_bin_edges)].size # get all hits which are not cut off by other bin edges - data = hits[bin_name][self._is_in_limits( - hits, excluded=bin_name)] + data = hits[bin_name][self._is_in_limits(hits, excluded=bin_name)] hist = np.histogram(data, bins=hist_bin_edges)[0] self.hists[bin_name]["hist"] += hist @@ -250,14 +250,15 @@ class BinningStatsMaker(kp.Module): return self.hists def _is_in_limits(self, hits, excluded=None): - """ Get which hits are in the limits defined by ALL bin edges - (except for given one). """ + """Get which hits are in the limits defined by ALL bin edges + (except for given one).""" inside = None for dfield, edges in self.bin_edges_list: if dfield == excluded: continue - is_in = np.logical_and(hits[dfield] >= min(edges), - hits[dfield] <= max(edges)) + is_in = np.logical_and( + hits[dfield] >= min(edges), hits[dfield] <= max(edges) + ) if inside is None: inside = is_in else: @@ -289,6 +290,7 @@ class PointMaker(kp.Module): this name (usually this is EventInfo). """ + def configure(self): self.max_n_hits = self.require("max_n_hits") self.hit_infos = self.get("hit_infos", default=None) @@ -301,10 +303,12 @@ class PointMaker(kp.Module): self.hit_infos = blob["Hits"].dtype.names points, n_hits = self.get_points(blob) blob[self.store_as] = kp.NDArray( - np.expand_dims(points, 0), h5loc="x", title="nodes") + np.expand_dims(points, 0), h5loc="x", title="nodes" + ) if self.dset_n_hits: blob[self.dset_n_hits] = blob[self.dset_n_hits].append_columns( - "n_hits_intime", n_hits) + "n_hits_intime", n_hits + ) return blob def get_points(self, blob): @@ -322,23 +326,24 @@ class PointMaker(kp.Module): Number of hits in the given time window. """ - points = np.zeros( - (self.max_n_hits, len(self.hit_infos) + 1), dtype="float32") + points = np.zeros((self.max_n_hits, len(self.hit_infos) + 1), dtype="float32") hits = blob["Hits"] if self.time_window is not None: # remove hits outside of time window - hits = hits[np.logical_and( - hits["time"] >= self.time_window[0], - hits["time"] <= self.time_window[1], - )] + hits = hits[ + np.logical_and( + hits["time"] >= self.time_window[0], + hits["time"] <= self.time_window[1], + ) + ] n_hits = len(hits) if n_hits > self.max_n_hits: # if there are too many hits, take random ones, but keep order indices = np.arange(n_hits) np.random.shuffle(indices) - which = indices[:self.max_n_hits] + which = indices[: self.max_n_hits] which.sort() hits = hits[which] @@ -346,11 +351,108 @@ class PointMaker(kp.Module): data = hits[which] points[:n_hits, i] = data # last column is whether there was a hit or no - points[:n_hits, -1] = 1. + points[:n_hits, -1] = 1.0 return points, n_hits def finish(self): - return {"hit_infos": tuple(self.hit_infos) + ("is_valid", )} + return {"hit_infos": tuple(self.hit_infos) + ("is_valid",)} + + +class TriggeredPointMaker(kp.Module): + """ + Quick and dirty adaption of TriggeredPointMaker to only use triggered hits. + + Store individual hit info from "Hits" in the blob as 'samples'. + + Used for graph networks. + + Attributes + ---------- + max_n_hits : int + Maximum number of hits that gets saved per event. If an event has + more, some will get cut! + time_window : tuple, optional + Two ints (start, end). Hits outside of this time window will be cut + away (base on 'Hits/time'). + Default: Keep all hits. + hit_infos : tuple, optional + Which entries in the '/Hits' Table will be kept. E.g. pos_x, time, ... + Default: Keep all entries. + dset_n_hits : str, optional + If given, store the number of hits that are in the time window + as a new column called 'n_hits_intime' in the dataset with + this name (usually this is EventInfo). + + """ + + def configure(self): + self.max_n_hits = self.require("max_n_hits") + self.hit_infos = self.get("hit_infos", default=None) + self.time_window = self.get("time_window", default=None) + self.dset_n_hits = self.get("dset_n_hits", default=None) + self.store_as = "samples" + + def process(self, blob): + if self.hit_infos is None: + self.hit_infos = blob["Hits"].dtype.names + points, n_hits = self.get_points(blob) + blob[self.store_as] = kp.NDArray( + np.expand_dims(points, 0), h5loc="x", title="nodes" + ) + if self.dset_n_hits: + blob[self.dset_n_hits] = blob[self.dset_n_hits].append_columns( + "n_hits_intime", n_hits + ) + return blob + + def get_points(self, blob): + """ + Get the desired hit infos from the blob. + + Returns + ------- + points : np.array + The hit infos of this event as a 2d matrix. No of rows are + fixed to the given max_n_hits. Each of the self.extract_keys, + is in one column + an additional column which is 1 for + actual hits, and 0 for if its a padded row. + n_hits : int + Number of hits in the given time window. + + """ + points = np.zeros((self.max_n_hits, len(self.hit_infos) + 1), dtype="float32") + + hits = blob["Hits"] + triggered = hits.triggered + hits = hits[triggered != 0] + if self.time_window is not None: + # remove hits outside of time window + hits = hits[ + np.logical_and( + hits["time"] >= self.time_window[0], + hits["time"] <= self.time_window[1], + ) + ] + + n_hits = len(hits) + + if n_hits > self.max_n_hits: + # if there are too many hits, take random ones, but keep order + indices = np.arange(n_hits) + np.random.shuffle(indices) + which = indices[: self.max_n_hits] + which.sort() + hits = hits[which] + + for i, which in enumerate(self.hit_infos): + data = hits[which] + points[:n_hits, i] = data + # last column is whether there was a hit or no + points[:n_hits, -1] = 1.0 + return points, n_hits + + def finish(self): + return {"hit_infos": tuple(self.hit_infos) + ("is_valid",)} class EventSkipper(kp.Module): @@ -366,7 +468,7 @@ class EventSkipper(kp.Module): """ def configure(self): - self.event_skipper = self.require('event_skipper') + self.event_skipper = self.require("event_skipper") self._not_skipped = 0 self._skipped = 0 @@ -430,7 +532,8 @@ class DetApplier(kp.Module): ) self._calib_checked = True blob["Hits"] = self.calib.apply( - blob["Hits"], correct_slewing=self.correct_timeslew) + blob["Hits"], correct_slewing=self.correct_timeslew + ) if "McHits" in blob: blob["McHits"] = self.calib.apply(blob["McHits"]) if self.center_hits_to: @@ -472,14 +575,18 @@ class HitRotator(kp.Module): """ def configure(self): - self.theta = self.require('theta') + self.theta = self.require("theta") def process(self, blob): - x = blob['Hits']['x'] - y = blob['Hits']['y'] - - rot_matrix = np.array([[np.cos(self.theta), - np.sin(self.theta)], - [np.sin(self.theta), np.cos(self.theta)]]) + x = blob["Hits"]["x"] + y = blob["Hits"]["y"] + + rot_matrix = np.array( + [ + [np.cos(self.theta), -np.sin(self.theta)], + [np.sin(self.theta), np.cos(self.theta)], + ] + ) x_rot = [] y_rot = [] @@ -490,8 +597,7 @@ class HitRotator(kp.Module): x_rot.append(rot[0][0]) y_rot.append(rot[1][0]) - blob['Hits']['x'] = x_rot - blob['Hits']['y'] = y_rot + blob["Hits"]["x"] = x_rot + blob["Hits"]["y"] = y_rot return blob - diff --git a/scripts/concatenate.sh b/scripts/concatenate.sh new file mode 100755 index 0000000000000000000000000000000000000000..1870ae4a056f3537dae3fd622d9c5f2d825f90fe --- /dev/null +++ b/scripts/concatenate.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +source activate orcasong + +DIR=/data/arca/v6 + +concatenate --outfile $DIR/ready_for_training/train_nuecc_vs_nutaushower_triggered_small_v6.h5 $DIR/ml_ready_triggered/train/* +concatenate --outfile $DIR/ready_for_training/test_nuecc_vs_nutaushower_triggered_small_v6.h5 $DIR/ml_ready_triggered/test/* diff --git a/scripts/extract.py b/scripts/extract.py new file mode 100644 index 0000000000000000000000000000000000000000..3eac53ed8fdb25c3f566d9442b9c2490b62a5faa --- /dev/null +++ b/scripts/extract.py @@ -0,0 +1,35 @@ +#!/usr/bin/python -i +from orcasong.core import TriggeredFileGraph +from orcasong.extractors import ( + get_muon_mc_info_extr, + get_neutrino_mc_info_extr, + get_real_data_info_extr, + get_random_noise_mc_info_extr, +) +import numpy as np +import sys +import os + + +inputfile = str(sys.argv[1]) +detectorfile = str(sys.argv[2]) +outputfile = str(sys.argv[3]) + + +# fg = FileGraph(max_n_hits=5000,extractor=get_muon_mc_info_extr(inputfile),det_file=detectorfile,keep_event_info = True) +# fg = FileGraph(max_n_hits=5000,extractor=get_muon_mc_info_extr(inputfile),det_file=detectorfile, +# keep_event_info = True, time_window = [-1000, +7500]) + + +def skip_low_energy(blob): + return blob["mc_info"]["energy"] < 1e5 + + +fg = TriggeredFileGraph( + max_n_hits=5000, + extractor=get_neutrino_mc_info_extr(inputfile), + det_file=detectorfile, + keep_event_info=True, +) + +fg.run(inputfile, outputfile) diff --git a/scripts/extract.sh b/scripts/extract.sh new file mode 100755 index 0000000000000000000000000000000000000000..e816daa538724b97556565ffa02da422f2b663f9 --- /dev/null +++ b/scripts/extract.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +source /sps/km3net/users/gvermar/OrcaSong/mypython/bin/activate + +detector=/pbs/throng/km3net/detectors/KM3NeT_-00000001_20171212.detx + +# inputfile=/sps/km3net/users/gvermar/arca/v6/raw_h5/mcv6.gsg_nutau-CCHEDIS-shower_1e2-1e8GeV.sirene.jte.jchain.aashower.${i}.h5 +# outputfile=/sps/km3net/users/gvermar/arca/v6/ml_ready_le1e5cut/ML_mcv6.gsg_nutau-CCHEDIS-shower_1e2-1e8GeV.sirene.jte.jchain.aashower.${i}.h5 + +inputfile=/sps/km3net/users/gvermar/arca/v6/raw_h5/mcv6.gsg_nue-CCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.${i}.h5 +outputfile=/sps/km3net/users/gvermar/arca/v6/ml_ready_le1e5cut/ML_mcv6.gsg_nue-CCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.${i}.h5 + +python ${DIR}/OrcaSong/extract.py $inputfile $detector $outputfile + diff --git a/scripts/extract_local.sh b/scripts/extract_local.sh new file mode 100755 index 0000000000000000000000000000000000000000..9a982a53fc22178e8f2e30d2cd6ffb14a5e14c17 --- /dev/null +++ b/scripts/extract_local.sh @@ -0,0 +1,14 @@ +#!/bin/bash +echo "before" $(which python) +source activate orcasong +echo "after" $(which python) +detector=/data/arca/KM3NeT_-00000001_20171212.detx + +# inputfile=/data/arca/v6/raw_h5/mcv6.gsg_nutau-CCHEDIS-shower_1e2-1e8GeV.sirene.jte.jchain.aashower.${i}.h5 +# outputfile=/data/arca/v6/ml_ready_triggered/ml_ready_triggered/ML_mcv6.gsg_nutau-CCHEDIS-shower_1e2-1e8GeV.sirene.jte.jchain.aashower.${i}.h5 + +inputfile=/data/arca/v6/raw_h5/mcv6.gsg_nue-CCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.${i}.h5 +outputfile=/data/arca/v6/ml_ready_triggered/ML_mcv6.gsg_nue-CCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.${i}.h5 + +python /data/OrcaSong/scripts/extract.py $inputfile $detector $outputfile + diff --git a/scripts/extract_localtau.sh b/scripts/extract_localtau.sh new file mode 100755 index 0000000000000000000000000000000000000000..a379c13eb6fec505c7ee106e923f72b43d15aef3 --- /dev/null +++ b/scripts/extract_localtau.sh @@ -0,0 +1,14 @@ +#!/bin/bash +echo "before" $(which python) +source activate orcasong +echo "after" $(which python) +detector=/data/arca/KM3NeT_-00000001_20171212.detx + +inputfile=/data/arca/v6/raw_h5/mcv6.gsg_nutau-CCHEDIS-shower_1e2-1e8GeV.sirene.jte.jchain.aashower.${i}.h5 +outputfile=/data/arca/v6/ml_ready_triggered/ML_mcv6.gsg_nutau-CCHEDIS-shower_1e2-1e8GeV.sirene.jte.jchain.aashower.${i}.h5 + +# inputfile=/data/arca/v6/raw_h5/mcv6.gsg_nue-CCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.${i}.h5 +# outputfile=/data/arca/v6/ml_ready_triggered/ML_mcv6.gsg_nue-CCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.${i}.h5 + +python /data/OrcaSong/scripts/extract.py $inputfile $detector $outputfile + diff --git a/scripts/run_extract_local.sh b/scripts/run_extract_local.sh new file mode 100644 index 0000000000000000000000000000000000000000..883fa50f098e5d44c72cff407f45fbf66ccad121 --- /dev/null +++ b/scripts/run_extract_local.sh @@ -0,0 +1,6 @@ +export i= + +for i in {1..10} +do + bash extract_local.sh +done \ No newline at end of file diff --git a/scripts/run_extract_localtau.sh b/scripts/run_extract_localtau.sh new file mode 100644 index 0000000000000000000000000000000000000000..4dd8b1eac79497d34f097a054f574140035e4aff --- /dev/null +++ b/scripts/run_extract_localtau.sh @@ -0,0 +1,6 @@ +export i= + +for i in {1..10} +do + bash extract_localtau.sh +done \ No newline at end of file diff --git a/scripts/shuffle.sh b/scripts/shuffle.sh new file mode 100755 index 0000000000000000000000000000000000000000..f143c55dfb8e21465226ac00b6f6fc6e6e1b9287 --- /dev/null +++ b/scripts/shuffle.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +source /pbs/home/a/adomi/mypython/bin/activate + +DIR=/sps/km3net/users/adomi/GNNs + +echo "START" + +h5shuffle2 --output_file $DIR/training/Muons_vs_Neutrinos_shuffled2.h5 $DIR/training/Muons_vs_Neutrinos_shuffled2.h5 +#h5shuffle --output_file $DIR/training/Muons_vs_Neutrinos_shuffled.h5 $DIR/training/Muons_vs_Neutrinos.h5 + +echo "END" diff --git a/scripts/submit.sh b/scripts/submit.sh new file mode 100755 index 0000000000000000000000000000000000000000..a6b5669c40d29181e60a9b610dc177db2bf86d45 --- /dev/null +++ b/scripts/submit.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +export DIR=/sps/km3net/users/gvermar/ +export i= + +#set environment + +ccenv python 3.8.6 + + + +for i in {1..10} +do + qsub -V \ + -P P_km3net \ + -l ct=1:00:00 \ + -l vmem=40G \ + -l sps=1 \ + -l fsize=2G \ + -e ${DIR}/logs/ \ + -o ${DIR}/logs/ \ + $DIR/OrcaSong/extract.sh +done diff --git a/scripts/submit_conc.sh b/scripts/submit_conc.sh new file mode 100755 index 0000000000000000000000000000000000000000..a4c674f27d81865ead4e0c1e341227a35a9c1b6a --- /dev/null +++ b/scripts/submit_conc.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +DIR=/sps/km3net/users/gvermar/ + +qsub -V \ + -P P_km3net \ + -l ct=3:00:00 \ + -l vmem=30G \ + -l sps=1 \ + -l fsize=100G \ + -e ${DIR}/logs/ \ + -o ${DIR}/logs/ \ + $DIR/OrcaSong/concatenatetriggered.sh diff --git a/scripts/submit_shuf.sh b/scripts/submit_shuf.sh new file mode 100755 index 0000000000000000000000000000000000000000..e7eeec3d610a555f2a62b4e2ccdea77eaf8f04cc --- /dev/null +++ b/scripts/submit_shuf.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +cd + +. mypython/bin/activate + +DIR=/sps/km3net/users/adomi/GNNs/ + +qsub -V \ + -P P_km3net \ + -l ct=72:00:00 \ + -l vmem=30G \ + -l sps=1 \ + -l fsize=100G \ + -e ${DIR}/logs/ \ + -o ${DIR}/logs/ \ + $DIR/OrcaSong/shuffle.sh diff --git a/scripts/test_extract.sh b/scripts/test_extract.sh new file mode 100644 index 0000000000000000000000000000000000000000..ec958ec8306c25fbd2f309462c5afa06dd35d5b1 --- /dev/null +++ b/scripts/test_extract.sh @@ -0,0 +1,5 @@ +export i=1 +export DIR=/sps/km3net/users/gvermar/ +ccenv python 3.8.6 +. mypython/bin/activate +bash ./extract.sh \ No newline at end of file