united layout with other tools from doctopts to argparse, no more natsort,...

united layout with other tools from doctopts to argparse, no more natsort, separete folder for config

united layout with other tools from doctopts to argparse, no more natsort,...
5f3b11e0 · Daniel Guderian · 13cc9d1a · 5f3b11e0 · 5f3b11e0 · 5f3b11e0
Commit 5f3b11e0 authored 4 years ago by Daniel Guderian
--- a/orcasong/tools/make_data_split.py
+++ b/orcasong/tools/make_data_split.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-"""
-Utility script that makes .list files for the concatenate_h5.py tool.

-Usage:
-    make_data_split.py CONFIG
-    make_data_split.py (-h | --help)
-
-Arguments:
-    CONFIG  A .toml file which contains the configuration options.
-
-Options:
-    -h --help  Show this screen.
-
-"""

 __author__ = 'Michael Moser, Daniel Guderian'

 import os
 import toml
-import docopt
-import natsort as ns
+import argparse
 import h5py
 import random
 import numpy as np

-   
-def parse_input():
-    """
-    Parses the config of the .toml file, specified by the user.
-
-    Returns
-    -------
-    cfg : dict
-        Dict that contains all configuration options from the input .toml file.
-
-    """
-
-    args = docopt.docopt(__doc__)
-    config_file = args['CONFIG']
-
-    cfg = toml.load(config_file)
-    cfg['toml_filename'] = config_file
-
-    return cfg


 def get_all_ip_group_keys(cfg):
@@ -315,14 +282,34 @@ def make_dsplit_list_files(cfg):
       
        print("----------------------------------------------")
        
-        
+   
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="Create datasets based on the run_id's."
+                    "Use the config to add input folder and set the ranges."
+                    "Outputs a list in an txt file that can be used to "
+                    "concatenate the files specfied")
+    parser.add_argument(
+        'config', type=str,
+        help="See example config for detailed information")
+    
+    return parser
+         
 def main():
    """
    Main function to make the data split.
    """

-    cfg = parse_input()
+    #load the config
+    parser = get_parser()
+    parsed_args = parser.parse_args()

+    config_file = parsed_args.config
+    
+    #decode config
+    cfg = toml.load(config_file)
+    cfg['toml_filename'] = config_file
+    
    ip_group_keys = get_all_ip_group_keys(cfg)

    n_evts_total = 0
@@ -349,5 +336,9 @@ def main():


 if __name__ == '__main__':
-    print("well, i am oin here")
    main()
+
+
+
+
+
--- a/orcasong/tools/make_data_split_configs/example_make_data_split_config.toml
+++ b/orcasong/tools/make_data_split_configs/example_make_data_split_config.toml
+# Example configuration file for make_data_split.py
+
+# --- Documentation for every config parameter that is available --- #
+#
+#    Main Parameters
+#    ----------
+#    n_files_train : int
+#       Into how many files the training dataset should be split.
+#       If you don't want to have this dataset, comment out the line or delete it!
+#    n_files_validate : int
+#       Into how many files the validation dataset should be split.
+#       If you don't want to have this dataset, comment out the line or delete it!
+#    n_files_rest : int
+#       Into how many files the "rest" dataset should be split.
+#       If you don't want to have this dataset, comment out the line or delete it!
+#    output_file_folder : str
+#       Path to the folder, where all the output .list files (and the bash job scripts) should be stored.
+#    output_file_name : str
+#       String, that specifies the prefix of the filename of the output .list files.
+#       E.g. if = "xyzc_tight_0":
+#       xyzc_tight_0_train_0.list, xyzc_tight_0_validate_0.list, ...
+#    print_only : bool
+#       If only informationa about the input_groups should be printed, and no .list files should be made.
+#
+#
+#    Input Group Parameters
+#    ----------------------
+#    dir : str
+#       Path of the directory, where the files for this input group are located.
+#    run_ids_train/run_ids_validate/run_ids_rest : array
+#       Array, which specifies the range of the run_ids, that should be used for the training/validation.rest
+#       dataset of this input group.
+#       E.g. if [1,5], the script will put files from this input group with run_ids from 1 to 5 (including 1 and 5)
+#       to the training/validation/rest dataset.
+#       If you don't want to use a specific dataset for this input group, comment out the line or delete it!
+#
+# --- Documentation for every config parameter that is available --- #
+
+# --- Main options ---#
+
+n_files_train = 3
+n_files_validate = 1
+n_files_rest = 0
+
+output_file_folder = "/sps/km3net/users/guderian/NN_stuff/split_data_output/ORCA4/graph/ts/"
+
+output_file_name = "test_list"
+
+print_only = false # only print information of your input_groups, don't make any .list files
+
+# --- Main options ---#
+
+
+# --- Input groups : these are the datafiles, that should be concatenated somehow --- #
+
+
+[elec_cc] 
+dir = "/sps/km3net/users/guderian/NN_stuff/graphs/ORCA4/base/gsg_elecCC-CC_1-500GeV.km3sim/test/"
+run_ids_train = [6763, 6767]
+run_ids_validate = [6768, 6769]
+
+
+[muon_nc] 
+dir = "/sps/km3net/users/guderian/NN_stuff/graphs/ORCA4/base/gsg_muonNC-NC_1-500GeV.km3sim/test/"
+run_ids_train = [6763, 6767]
+run_ids_validate = [6768, 6769]
+
+
+# --- Input groups : these are the datafiles, that should be concatenated somehow --- #
\ No newline at end of file
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,3 @@ matplotlib
 km3pipe>=9
 psutil
 setuptools_scm
-natsort