Minor.

06d458cf · ViaFerrata · 3521d407 · 06d458cf
Commit 06d458cf authored 6 years ago by ViaFerrata
--- a/orcasong_contrib/data_tools/make_data_split/example_config.toml
+++ b/orcasong_contrib/data_tools/make_data_split/example_config.toml
@@ -3,125 +3,132 @@
 # Feel free to make a copy and keep only the lines you need!
 # If you don't want to have a parameter, comment out the line or delete it!
-# --- Main Parameters ---#
+# --- Main Parameters --- #
-# -----------------------------------------------------------------------------#
+# ----------------------------------------------------------------------------- #
-n_files_train = 5                                                          #int
+# Into how many files the training dataset should be split. Type: int
-#       Into how many files the training dataset should be split.
+# This option is needed if one of your input_groups has a run_id_train range.
-#       This option is needed if one of your input_groups has a run_id_train range.
-# -----------------------------------------------------------------------------#
+n_files_train = 5
+# ----------------------------------------------------------------------------- #
-# -----------------------------------------------------------------------------#
-n_files_validate = 1                                                        #int
+# ----------------------------------------------------------------------------- #
-#       Into how many files the validation dataset should be split.
+# Into how many files the validation dataset should be split. Type: int
-#       This option is needed if one of your input_groups has a run_id_validate range.
+# This option is needed if one of your input_groups has a run_id_validate range.
-# -----------------------------------------------------------------------------#
+n_files_validate = 1
-# -----------------------------------------------------------------------------#
+# ----------------------------------------------------------------------------- #
-n_files_rest = 1                                                            #int
-#       Into how many files the "rest" dataset should be split.
+# ----------------------------------------------------------------------------- #
-#       This option is needed if one of your input_groups has a run_id_rest range.
+# Into how many files the "rest" dataset should be split. Type: int
-# -----------------------------------------------------------------------------#
+# This option is needed if one of your input_groups has a run_id_rest range.
-# -----------------------------------------------------------------------------#
+n_files_rest = 1
-output_file_folder = "/project/antares/enriqueh/data/working"           #str
+# ----------------------------------------------------------------------------- #
-#       Path to the folder, where all the output .list files (and the bash job scripts) should be stored.
-# -----------------------------------------------------------------------------#
+# ----------------------------------------------------------------------------- #
+# Path to the folder, where all the output .list files
-# -----------------------------------------------------------------------------#
+# (and the bash job scripts) should be stored. Type: str
-output_file_name = "full_test"                                           #str
-#      Prefix of the filename of the output .list files.
+output_file_folder = "/path/to/output/folder"
-#       E.g. if = "xyzc_tight_0":
+# ----------------------------------------------------------------------------- #
-#       xyzc_tight_0_train_0.list, xyzc_tight_0_validate_0.list, ...
-# -----------------------------------------------------------------------------#
+# ----------------------------------------------------------------------------- #
+# Prefix of the filename of the output .list files. Type: str
-# -----------------------------------------------------------------------------#
+# E.g. if = "xyzc_tight_0":
-print_only = false                                                         #bool
+# xyzc_tight_0_train_0.list, xyzc_tight_0_validate_0.list, ...
-#      true = No .list files are made, only prints information about the input_groups.
-# -----------------------------------------------------------------------------#
+output_file_name = "output_file_name"
+# ----------------------------------------------------------------------------- #
-# --- Job Submission Parameters ---#
+# ----------------------------------------------------------------------------- #
+# true = No .list files are made, only prints information about the input_groups.
-# -----------------------------------------------------------------------------#
+# Type: bool
-make_qsub_bash_files = true                                                #bool
-#      true = Makes the cluster submission bash files needed to actually
+print_only = false
-#       concatenate the files in the .list files.
+# ----------------------------------------------------------------------------- #
-# -----------------------------------------------------------------------------#
-# -----------------------------------------------------------------------------#
+# --- Job Submission Parameters --- #
-submit_jobs = false                                                        #bool
-#      true = Submit the bash job scripts to the cluster after they have been made.
+# ----------------------------------------------------------------------------- #
-#       CAREFUL: May only work for Erlangen-CC.
+# true = Makes the cluster submission bash files needed to actually
-# -----------------------------------------------------------------------------#
+# concatenate the files in the .list files. Type: bool
-# -----------------------------------------------------------------------------#
+make_qsub_bash_files = true
-venv_path = "/project/antares/enriqueh/gpu_venv3/"              #str
+# ----------------------------------------------------------------------------- #
-#      Path to a virtualenv, e.g. "/home/hpc/capn/mppi033h/.virtualenv/python_3_env/"
-#      /project/antares/enriqueh/gpu_venv3/bin/activate
+# ----------------------------------------------------------------------------- #
-# -----------------------------------------------------------------------------#
+# true = Submit the bash job scripts to the cluster after they have been made.
+# CAREFUL: May only work for Erlangen-CC. Type: bool
-# -----------------------------------------------------------------------------#
-data_tools_folder = "/project/antares/enriqueh/OrcaNet/orcanet_contrib/data_tools"            #str
+submit_jobs = false
-#      Dirpath, where the concatenate.py tool is located.
+# ----------------------------------------------------------------------------- #
-#       E.g. "/home/woody/capn/mppi033h/Code/OrcaNet/orcanet_contrib/data_tools"
-# -----------------------------------------------------------------------------#
+# ----------------------------------------------------------------------------- #
+# Path to a virtualenv, e.g. "/home/hpc/capn/mppi033h/.virtualenv/python_3_env/"
-# -----------------------------------------------------------------------------#
+# Type: str
-shuffle_delete = false                                                     #bool
-#       True = the input file that will be deleted after the shuffling is finished.
+venv_path = "/path/to/your/venv"
-#       Option for the shuffle_h5 tool.
+# ----------------------------------------------------------------------------- #
-# -----------------------------------------------------------------------------#
+# ----------------------------------------------------------------------------- #
-# -----------------------------------------------------------------------------#
+# Dirpath, where the concatenate.py tool is located. Type: str
-# Concatenate.py Parameters
+# E.g. "/home/woody/capn/mppi033h/Code/OrcaNet/orcanet_contrib/data_tools"
+data_tools_folder = "path/to/OrcaNet/orcanet_contrib/data_tools"
+# ----------------------------------------------------------------------------- #
+# ----------------------------------------------------------------------------- #
+# True = the input file that will be deleted after the shuffling is finished.
+# Option for the shuffle_h5 tool. Type: bool
+shuffle_delete = false
+# ----------------------------------------------------------------------------- #
+# ----------------------------------------------------------------------------- #
+# Concatenate.py & shuffle.py Parameters
 # If they are commented, it will be set None on concatenate.py,
-# and the script will use the configurations that are already in the file.
+# and the script will use the chunksize/complib/complevel that are already
+# used in the input files for these scripts.
-#chunksize = 32                                                             #int
+#chunksize = 32 # Type: int
-#complib = "gzip"                                                           #str
+#complib = "gzip" # Type: str
-#complevel = 1                                                              #int
+#complevel = 1 # Type: int
-#
-# -----------------------------------------------------------------------------#
+# ----------------------------------------------------------------------------- #
 # --- Input Group Parameters: Datafiles to be concatenated --- #
-# -----------------------------------------------------------------------------#
+# ----------------------------------------------------------------------------- #
-[elec_cc_3_100]
-#      Name of the group, don't make whitespaces!
+[input_group_1] # Name of the group, don't make whitespaces!
-dir = "/dcache/antares/enriqueh/orcasong_output/xyzt/3-100GeV/elec-CC"
+# Type: str
-#       "/path/to/the/folder/of/the/data/for/this/input_1/group"              #str
+# Path of the directory where the files for this input group are located.
-#       Path of the directory where the files for this input group are located.
+dir = "/path/to/the/folder/of/the/data/for/this/input_1/group"
-run_ids_train = [1, 1000]                                              #array
+# Arrays with the range of the run_ids that should be used for the
-run_ids_validate = [1001, 1200]
+# training, validation and rest datasets of this input group.
-#run_ids_rest = [1001, 1300]
+# E.g. if [1,5] = Files from this input group with run_ids from 1 to 5
-#       Arrays with the range of the run_ids that should be used for the
+# (including 1 and 5!) will go to the training/validation/rest dataset.
-#       training, validation and rest datasets of this input group.
+# If you don't want to use a specific dataset for this input group,
-#       E.g. if [1,5] = Files from this input group with run_ids from 1 to 5
+# you can comment out the corresponding line or delete it!
-#      (including 1 and 5!!) will go to the training/validation/rest dataset.
+run_ids_train = [1, 200]
-#       If you don't want to use a specific dataset for this input group,
+run_ids_validate = [201, 1200]
-#       you can comment out the corresponding line or delete it!
+#run_ids_rest = [1201, 1300]
-# -----------------------------------------------------------------------------#
+# ----------------------------------------------------------------------------- #
-# You can have more than 1 input group!!
+# You can have more than 1 input group!
-[elec_nc_3_100]
+[input_group_2] # Name of the group, don't make whitespaces!
-#      Name of the group, don't make whitespaces!
-dir = "/dcache/antares/enriqueh/orcasong_output/xyzt/3-100GeV/elec-NC"
+# Type: str
-#       "/path/to/the/folder/of/the/data/for/this/input_1/group"              #str
+# Path of the directory where the files for this input group are located.
-#       Path of the directory where the files for this input group are located.
+dir = "/path/to/the/folder/of/the/data/for/this/input_2/group"
-run_ids_train = [1, 1000]                                              #array
+run_ids_train = [1, 1000]
 run_ids_validate = [1001, 1188]
-#     [input_group_2] # 1 to 1500
+# ----------------------------------------------------------------------------- #
-#     dir = "/path/to/the/folder/of/the/data/for/this/input_2/group"
-#     run_ids_train = [101, 500]
-#     run_ids_validate = [1, 100]
-#     #run_ids_rest = [501, 600]
-# -----------------------------------------------------------------------------#