Skip to content
Snippets Groups Projects
Commit 5f3b11e0 authored by Daniel Guderian's avatar Daniel Guderian
Browse files

united layout with other tools from doctopts to argparse, no more natsort,...

united layout with other tools from doctopts to argparse, no more natsort, separete folder for config
parent 13cc9d1a
No related branches found
No related tags found
1 merge request!14revive make_data_split
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Utility script that makes .list files for the concatenate_h5.py tool.
Usage:
make_data_split.py CONFIG
make_data_split.py (-h | --help)
Arguments:
CONFIG A .toml file which contains the configuration options.
Options:
-h --help Show this screen.
"""
__author__ = 'Michael Moser, Daniel Guderian'
import os
import toml
import docopt
import natsort as ns
import argparse
import h5py
import random
import numpy as np
def parse_input():
"""
Parses the config of the .toml file, specified by the user.
Returns
-------
cfg : dict
Dict that contains all configuration options from the input .toml file.
"""
args = docopt.docopt(__doc__)
config_file = args['CONFIG']
cfg = toml.load(config_file)
cfg['toml_filename'] = config_file
return cfg
def get_all_ip_group_keys(cfg):
......@@ -315,14 +282,34 @@ def make_dsplit_list_files(cfg):
print("----------------------------------------------")
def get_parser():
parser = argparse.ArgumentParser(
description="Create datasets based on the run_id's."
"Use the config to add input folder and set the ranges."
"Outputs a list in an txt file that can be used to "
"concatenate the files specfied")
parser.add_argument(
'config', type=str,
help="See example config for detailed information")
return parser
def main():
"""
Main function to make the data split.
"""
cfg = parse_input()
#load the config
parser = get_parser()
parsed_args = parser.parse_args()
config_file = parsed_args.config
#decode config
cfg = toml.load(config_file)
cfg['toml_filename'] = config_file
ip_group_keys = get_all_ip_group_keys(cfg)
n_evts_total = 0
......@@ -349,5 +336,9 @@ def main():
if __name__ == '__main__':
print("well, i am oin here")
main()
# Example configuration file for make_data_split.py
# --- Documentation for every config parameter that is available --- #
#
# Main Parameters
# ----------
# n_files_train : int
# Into how many files the training dataset should be split.
# If you don't want to have this dataset, comment out the line or delete it!
# n_files_validate : int
# Into how many files the validation dataset should be split.
# If you don't want to have this dataset, comment out the line or delete it!
# n_files_rest : int
# Into how many files the "rest" dataset should be split.
# If you don't want to have this dataset, comment out the line or delete it!
# output_file_folder : str
# Path to the folder, where all the output .list files (and the bash job scripts) should be stored.
# output_file_name : str
# String, that specifies the prefix of the filename of the output .list files.
# E.g. if = "xyzc_tight_0":
# xyzc_tight_0_train_0.list, xyzc_tight_0_validate_0.list, ...
# print_only : bool
# If only informationa about the input_groups should be printed, and no .list files should be made.
#
#
# Input Group Parameters
# ----------------------
# dir : str
# Path of the directory, where the files for this input group are located.
# run_ids_train/run_ids_validate/run_ids_rest : array
# Array, which specifies the range of the run_ids, that should be used for the training/validation.rest
# dataset of this input group.
# E.g. if [1,5], the script will put files from this input group with run_ids from 1 to 5 (including 1 and 5)
# to the training/validation/rest dataset.
# If you don't want to use a specific dataset for this input group, comment out the line or delete it!
#
# --- Documentation for every config parameter that is available --- #
# --- Main options ---#
n_files_train = 3
n_files_validate = 1
n_files_rest = 0
output_file_folder = "/sps/km3net/users/guderian/NN_stuff/split_data_output/ORCA4/graph/ts/"
output_file_name = "test_list"
print_only = false # only print information of your input_groups, don't make any .list files
# --- Main options ---#
# --- Input groups : these are the datafiles, that should be concatenated somehow --- #
[elec_cc]
dir = "/sps/km3net/users/guderian/NN_stuff/graphs/ORCA4/base/gsg_elecCC-CC_1-500GeV.km3sim/test/"
run_ids_train = [6763, 6767]
run_ids_validate = [6768, 6769]
[muon_nc]
dir = "/sps/km3net/users/guderian/NN_stuff/graphs/ORCA4/base/gsg_muonNC-NC_1-500GeV.km3sim/test/"
run_ids_train = [6763, 6767]
run_ids_validate = [6768, 6769]
# --- Input groups : these are the datafiles, that should be concatenated somehow --- #
\ No newline at end of file
......@@ -4,4 +4,3 @@ matplotlib
km3pipe>=9
psutil
setuptools_scm
natsort
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment