Minor.

06d458cf · ViaFerrata · 3521d407 · 06d458cf
Commit 06d458cf authored 6 years ago by ViaFerrata
--- a/orcasong_contrib/data_tools/make_data_split/example_config.toml
+++ b/orcasong_contrib/data_tools/make_data_split/example_config.toml
@@ -3,125 +3,132 @@
 # Feel free to make a copy and keep only the lines you need!
 # If you don't want to have a parameter, comment out the line or delete it!

-# --- Main Parameters ---#
-
-# -----------------------------------------------------------------------------#
-n_files_train = 5                                                          #int
-#       Into how many files the training dataset should be split.
-#       This option is needed if one of your input_groups has a run_id_train range.
-# -----------------------------------------------------------------------------#
-
-# -----------------------------------------------------------------------------#
-n_files_validate = 1                                                        #int
-#       Into how many files the validation dataset should be split.
-#       This option is needed if one of your input_groups has a run_id_validate range.
-# -----------------------------------------------------------------------------#
-
-# -----------------------------------------------------------------------------#
-n_files_rest = 1                                                            #int
-#       Into how many files the "rest" dataset should be split.
-#       This option is needed if one of your input_groups has a run_id_rest range.
-# -----------------------------------------------------------------------------#
-
-# -----------------------------------------------------------------------------#
-output_file_folder = "/project/antares/enriqueh/data/working"           #str
-#       Path to the folder, where all the output .list files (and the bash job scripts) should be stored.
-# -----------------------------------------------------------------------------#
-
-# -----------------------------------------------------------------------------#
-output_file_name = "full_test"                                           #str
-#      Prefix of the filename of the output .list files.
-#       E.g. if = "xyzc_tight_0":
-#       xyzc_tight_0_train_0.list, xyzc_tight_0_validate_0.list, ...
-# -----------------------------------------------------------------------------#
-
-# -----------------------------------------------------------------------------#
-print_only = false                                                         #bool
-#      true = No .list files are made, only prints information about the input_groups.
-# -----------------------------------------------------------------------------#
-
-
-# --- Job Submission Parameters ---#
-
-# -----------------------------------------------------------------------------#
-make_qsub_bash_files = true                                                #bool
-#      true = Makes the cluster submission bash files needed to actually
-#       concatenate the files in the .list files.
-# -----------------------------------------------------------------------------#
-
-# -----------------------------------------------------------------------------#
-submit_jobs = false                                                        #bool
-#      true = Submit the bash job scripts to the cluster after they have been made.
-#       CAREFUL: May only work for Erlangen-CC.
-# -----------------------------------------------------------------------------#
-
-# -----------------------------------------------------------------------------#
-venv_path = "/project/antares/enriqueh/gpu_venv3/"              #str
-#      Path to a virtualenv, e.g. "/home/hpc/capn/mppi033h/.virtualenv/python_3_env/"
-#      /project/antares/enriqueh/gpu_venv3/bin/activate
-# -----------------------------------------------------------------------------#
-
-# -----------------------------------------------------------------------------#
-data_tools_folder = "/project/antares/enriqueh/OrcaNet/orcanet_contrib/data_tools"            #str
-#      Dirpath, where the concatenate.py tool is located.
-#       E.g. "/home/woody/capn/mppi033h/Code/OrcaNet/orcanet_contrib/data_tools"
-# -----------------------------------------------------------------------------#
-
-# -----------------------------------------------------------------------------#
-shuffle_delete = false                                                     #bool
-#       True = the input file that will be deleted after the shuffling is finished.
-#       Option for the shuffle_h5 tool.
-# -----------------------------------------------------------------------------#
-
-# -----------------------------------------------------------------------------#
-# Concatenate.py Parameters
+# --- Main Parameters --- #
+
+# ----------------------------------------------------------------------------- #
+# Into how many files the training dataset should be split. Type: int
+# This option is needed if one of your input_groups has a run_id_train range.
+
+n_files_train = 5
+# ----------------------------------------------------------------------------- #
+
+# ----------------------------------------------------------------------------- #
+# Into how many files the validation dataset should be split. Type: int
+# This option is needed if one of your input_groups has a run_id_validate range.
+
+n_files_validate = 1
+# ----------------------------------------------------------------------------- #
+
+# ----------------------------------------------------------------------------- #
+# Into how many files the "rest" dataset should be split. Type: int
+# This option is needed if one of your input_groups has a run_id_rest range.
+
+n_files_rest = 1
+# ----------------------------------------------------------------------------- #
+
+# ----------------------------------------------------------------------------- #
+# Path to the folder, where all the output .list files
+# (and the bash job scripts) should be stored. Type: str
+
+output_file_folder = "/path/to/output/folder"
+# ----------------------------------------------------------------------------- #
+
+# ----------------------------------------------------------------------------- #
+# Prefix of the filename of the output .list files. Type: str
+# E.g. if = "xyzc_tight_0":
+# xyzc_tight_0_train_0.list, xyzc_tight_0_validate_0.list, ...
+
+output_file_name = "output_file_name"
+# ----------------------------------------------------------------------------- #
+
+# ----------------------------------------------------------------------------- #
+# true = No .list files are made, only prints information about the input_groups.
+# Type: bool
+
+print_only = false
+# ----------------------------------------------------------------------------- #
+
+
+# --- Job Submission Parameters --- #
+
+# ----------------------------------------------------------------------------- #
+# true = Makes the cluster submission bash files needed to actually
+# concatenate the files in the .list files. Type: bool
+
+make_qsub_bash_files = true
+# ----------------------------------------------------------------------------- #
+
+# ----------------------------------------------------------------------------- #
+# true = Submit the bash job scripts to the cluster after they have been made.
+# CAREFUL: May only work for Erlangen-CC. Type: bool
+
+submit_jobs = false
+# ----------------------------------------------------------------------------- #
+
+# ----------------------------------------------------------------------------- #
+# Path to a virtualenv, e.g. "/home/hpc/capn/mppi033h/.virtualenv/python_3_env/"
+# Type: str
+
+venv_path = "/path/to/your/venv"
+# ----------------------------------------------------------------------------- #
+
+# ----------------------------------------------------------------------------- #
+# Dirpath, where the concatenate.py tool is located. Type: str
+# E.g. "/home/woody/capn/mppi033h/Code/OrcaNet/orcanet_contrib/data_tools"
+
+data_tools_folder = "path/to/OrcaNet/orcanet_contrib/data_tools"
+# ----------------------------------------------------------------------------- #
+
+# ----------------------------------------------------------------------------- #
+# True = the input file that will be deleted after the shuffling is finished.
+# Option for the shuffle_h5 tool. Type: bool
+shuffle_delete = false
+
+# ----------------------------------------------------------------------------- #
+
+# ----------------------------------------------------------------------------- #
+# Concatenate.py & shuffle.py Parameters
 # If they are commented, it will be set None on concatenate.py,
-# and the script will use the configurations that are already in the file.
+# and the script will use the chunksize/complib/complevel that are already
+# used in the input files for these scripts.

-#chunksize = 32                                                             #int
-#complib = "gzip"                                                           #str
-#complevel = 1                                                              #int
-#
+#chunksize = 32 # Type: int
+#complib = "gzip" # Type: str
+#complevel = 1 # Type: int

-# -----------------------------------------------------------------------------#
+# ----------------------------------------------------------------------------- #


 # --- Input Group Parameters: Datafiles to be concatenated --- #

-# -----------------------------------------------------------------------------#
-[elec_cc_3_100]
-#      Name of the group, don't make whitespaces!
+# ----------------------------------------------------------------------------- #
+
+[input_group_1] # Name of the group, don't make whitespaces!

-dir = "/dcache/antares/enriqueh/orcasong_output/xyzt/3-100GeV/elec-CC"
-#       "/path/to/the/folder/of/the/data/for/this/input_1/group"              #str
-#       Path of the directory where the files for this input group are located.
+# Type: str
+# Path of the directory where the files for this input group are located.
+dir = "/path/to/the/folder/of/the/data/for/this/input_1/group"

-run_ids_train = [1, 1000]                                              #array
-run_ids_validate = [1001, 1200]
-#run_ids_rest = [1001, 1300]
-#       Arrays with the range of the run_ids that should be used for the
-#       training, validation and rest datasets of this input group.
-#       E.g. if [1,5] = Files from this input group with run_ids from 1 to 5
-#      (including 1 and 5!!) will go to the training/validation/rest dataset.
-#       If you don't want to use a specific dataset for this input group,
-#       you can comment out the corresponding line or delete it!
+# Arrays with the range of the run_ids that should be used for the
+# training, validation and rest datasets of this input group.
+# E.g. if [1,5] = Files from this input group with run_ids from 1 to 5
+# (including 1 and 5!) will go to the training/validation/rest dataset.
+# If you don't want to use a specific dataset for this input group,
+# you can comment out the corresponding line or delete it!
+run_ids_train = [1, 200]
+run_ids_validate = [201, 1200]
+#run_ids_rest = [1201, 1300]

-# -----------------------------------------------------------------------------#
-# You can have more than 1 input group!!
+# ----------------------------------------------------------------------------- #
+# You can have more than 1 input group!

-[elec_nc_3_100]
-#      Name of the group, don't make whitespaces!
+[input_group_2] # Name of the group, don't make whitespaces!

-dir = "/dcache/antares/enriqueh/orcasong_output/xyzt/3-100GeV/elec-NC"
-#       "/path/to/the/folder/of/the/data/for/this/input_1/group"              #str
-#       Path of the directory where the files for this input group are located.
+# Type: str
+# Path of the directory where the files for this input group are located.
+dir = "/path/to/the/folder/of/the/data/for/this/input_2/group"

-run_ids_train = [1, 1000]                                              #array
+run_ids_train = [1, 1000]
 run_ids_validate = [1001, 1188]

-#     [input_group_2] # 1 to 1500
-#     dir = "/path/to/the/folder/of/the/data/for/this/input_2/group"
-#     run_ids_train = [101, 500]
-#     run_ids_validate = [1, 100]
-#     #run_ids_rest = [501, 600]
-# -----------------------------------------------------------------------------#
+# ----------------------------------------------------------------------------- #