Skip to content
Snippets Groups Projects
Commit 3b8ce43e authored by Stefan Reck's avatar Stefan Reck
Browse files

Merge branch 'ifx_amke_data_split_parser' into 'master'

fix it and remove old stuff

See merge request !33
parents 0c84e925 7dcc8814
Branches 1-addition-to-readme
Tags v3.2.3
1 merge request!33fix it and remove old stuff
......@@ -262,7 +262,7 @@ def get_random_noise_mc_info_extr(input_file):
return mc_info_extr
def get_neutrino_mc_info_extr(input_file):
def get_neutrino_mc_info_extr(input_file,prod_identifier=999):
"""
Wrapper function that includes the actual mc_info_extr
......@@ -273,6 +273,9 @@ def get_neutrino_mc_info_extr(input_file):
----------
input_file : km3net data file
Can be online or offline format.
prod_identifier : int
An internal, unofficial identifier to mark the neutrino production. This has to be
defined in a dict before.
Returns
-------
......@@ -399,6 +402,7 @@ def get_neutrino_mc_info_extr(input_file):
"n_gen": n_gen,
"part_number": part_number,
"tau_topology": tau_topology,
"prod_identifier": prod_identifier,
}
# get all the std reco info
......
......@@ -13,13 +13,6 @@ import random
import numpy as np
def get_parser():
# TODO deprecated
raise NotImplementedError(
"make_data_split has been renamed to orcasong make_data_split"
)
def add_parser(subparsers):
parser = subparsers.add_parser(
"make_data_split",
......@@ -29,7 +22,7 @@ def add_parser(subparsers):
"concatenate the files specfied",
)
parser.add_argument(
"config", type=str, help="See example config for detailed information"
"config_file", type=str, help="See example config for detailed information"
)
parser.set_defaults(func=make_split)
......@@ -354,11 +347,6 @@ def make_concatenate_and_shuffle_scripts(cfg):
): # check if /data_split folder exists, if not create it.
os.makedirs(dirpath + "/data_split")
# not available atm...
# chunksize = '' if cfg['chunksize'] is None else ' --chunksize ' + str(cfg['chunksize'])
# complib = '' if cfg['complib'] is None else ' --complib ' + str(cfg['complib'])
# complevel = '' if cfg['complevel'] is None else ' --complevel ' + str(cfg['complevel'])
# make qsub .sh file for concatenating
for listfile_fpath in cfg["output_lists"]:
listfile_fname = os.path.basename(listfile_fpath)
......@@ -379,10 +367,9 @@ def make_concatenate_and_shuffle_scripts(cfg):
f.write("# Concatenate the files in the list\n")
f.write(
"concatenate " + listfile_fpath + " --outfile " + conc_outputfile_fpath
"orcasong concatenate " + listfile_fpath + " --outfile " + conc_outputfile_fpath
)
# at the moment it is not possible to set the comp opts like this+ chunksize + complib + complevel
# make qsub .sh file for shuffling
for listfile_fpath in cfg["output_lists"]:
......@@ -406,39 +393,18 @@ def make_concatenate_and_shuffle_scripts(cfg):
f.write("# Shuffle the h5 file \n")
f.write(
"h5shuffle2 " + conc_outputfile_fpath + " --max_ram 1000000000 \n"
) # fix to 1GB ram; in lyon using a fraction
# is difficult...
# time python shuffle/shuffle_h5.py'
# + delete_flag_shuffle_tool
# + chunksize + complib + complevel
"orcasong h5shuffle2 " + conc_outputfile_fpath)
if cfg["shuffle_delete"]:
f.write("\n")
f.write("rm " + conc_outputfile_fpath + "\n")
def main():
"""
Main function to make the data split.
"""
# load the config
parser = get_parser()
parsed_args = parser.parse_args()
make_split(parsed_args.config)
def make_split(config_file):
# decode config
cfg = toml.load(config_file)
cfg["toml_filename"] = config_file
# set some defaults/Nones - at the moment setting of the com opts is not available!
# if 'chunksize' not in cfg: cfg['chunksize'] = None
# if 'complib' not in cfg: cfg['complib'] = None
# if 'complevel' not in cfg: cfg['complevel'] = None
# read out all the input groups
ip_group_keys = get_all_ip_group_keys(cfg)
......@@ -474,7 +440,3 @@ def make_split(config_file):
# create bash scripts that can be submitted to do the concatenation and shuffle
if cfg["make_qsub_bash_files"] is True:
make_concatenate_and_shuffle_scripts(cfg)
if __name__ == "__main__":
main()
No preview for this file type
No preview for this file type
......@@ -62,14 +62,14 @@ shuffle_delete = false
[neutrino]
dir = "processed_data_neutrino"
run_ids_train = [1, 6767]
run_ids_validate = [1, 6769]
run_ids_train = [1, 12000]
run_ids_validate = [1, 12000]
[muon]
dir = "processed_data_muon"
run_ids_train = [1, 6767]
run_ids_validate = [9999, 6769]
run_ids_train = [1, 12000]
run_ids_validate = [1, 12000]
# --- Input groups : these are the datafiles, that should be concatenated somehow --- #
\ No newline at end of file
# --- Input groups : these are the datafiles, that should be concatenated somehow --- #
......@@ -35,7 +35,7 @@ class TestStdRecoExtractor(TestCase):
max_n_hits=3,
time_window=[0, 50],
hit_infos=["pos_z", "time", "channel_id"],
extractor=extractors.get_neutrino_mc_info_extr(NEUTRINO_FILE),
extractor=extractors.get_neutrino_mc_info_extr(NEUTRINO_FILE,1),
det_file=DET_FILE_NEUTRINO,
add_t0=True,
keep_event_info=True,
......@@ -56,7 +56,7 @@ class TestStdRecoExtractor(TestCase):
time_window=[0, 50],
hit_infos=["pos_z", "time", "channel_id"],
extractor=extractors.get_neutrino_mc_info_extr(
NOT_FULLY_RECONSTRUCTED_FILE
NOT_FULLY_RECONSTRUCTED_FILE,1
),
det_file=DET_FILE_NEUTRINO,
add_t0=True,
......
......@@ -65,12 +65,12 @@ class TestMakeDataSplit(TestCase):
"processed_data_neutrino/processed_graph_neutrino.h5",
"processed_data_neutrino/processed_graph_neutrino.h5\n",
]
cls.n_events_list = [18, 3]
cls.n_events_list = [50, 33]
cls.contents_concatenate_script = [
"concatenate " + list_output_train + " --outfile " + concatenate_file
"orcasong concatenate " + list_output_train + " --outfile " + concatenate_file
]
cls.contents_shuffle_script = [
"h5shuffle2 " + concatenate_file + " --max_ram 1000000000 \n"
"orcasong h5shuffle2 " + concatenate_file
]
# create list_file_dir
......@@ -127,7 +127,7 @@ class TestMakeDataSplit(TestCase):
assert os.path.exists(list_output_val) == 1
with open(list_output_val) as f:
for line in f:
self.assertIn(line, self.file_path_list_val)
self.assertIn(line, self.file_path_list)
f.close
assert os.path.exists(list_output_train) == 1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment