Skip to content
Snippets Groups Projects

Resolve "Accept None parameter for concatenate"

Merged Michael Moser requested to merge 7-accept-none-parameter-for-concatenate into master
2 files
+ 117
93
Compare changes
  • Side-by-side
  • Inline
Files
2
# Example configuration file for make_data_split.py
# --- Example configuration file for make_data_split.py --- #
# Documentation for every config parameter that is available.
# Feel free to make a copy and keep only the lines you need!
# If you don't want to have a parameter, comment out the line or delete it!
# --- Documentation for every config parameter that is available --- #
#
# Main Parameters
# ----------
# n_files_train : int
# --- Main Parameters ---#
# -----------------------------------------------------------------------------#
n_files_train = 5 #int
# Into how many files the training dataset should be split.
# If you don't want to have this dataset, comment out the line or delete it!
# n_files_validate : int
# This option is needed if one of your input_groups has a run_id_train range.
# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#
n_files_validate = 1 #int
# Into how many files the validation dataset should be split.
# If you don't want to have this dataset, comment out the line or delete it!
# n_files_rest : int
# This option is needed if one of your input_groups has a run_id_validate range.
# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#
n_files_rest = 1 #int
# Into how many files the "rest" dataset should be split.
# If you don't want to have this dataset, comment out the line or delete it!
# output_file_folder : str
# This option is needed if one of your input_groups has a run_id_rest range.
# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#
output_file_folder = "/project/antares/enriqueh/data/working" #str
# Path to the folder, where all the output .list files (and the bash job scripts) should be stored.
# output_file_name : str
# String, that specifies the prefix of the filename of the output .list files.
# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#
output_file_name = "full_test" #str
# Prefix of the filename of the output .list files.
# E.g. if = "xyzc_tight_0":
# xyzc_tight_0_train_0.list, xyzc_tight_0_validate_0.list, ...
# print_only : bool
# If only informationa about the input_groups should be printed, and no .list files should be made.
#
# Job submission Parameters
# -------------------------
# make_qsub_bash_files : bool
# If bash files should be made, that can be submitted to a cluster, in order to actually concatenate
# the files in the .list files.
# submit_jobs : bool
# Additionally to make_qsub_bash_files, submit the bash job scripts to the cluster after they have been made.
# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#
print_only = false #bool
# true = No .list files are made, only prints information about the input_groups.
# -----------------------------------------------------------------------------#
# --- Job Submission Parameters ---#
# -----------------------------------------------------------------------------#
make_qsub_bash_files = true #bool
# true = Makes the cluster submission bash files needed to actually
# concatenate the files in the .list files.
# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#
submit_jobs = false #bool
# true = Submit the bash job scripts to the cluster after they have been made.
# CAREFUL: May only work for Erlangen-CC.
# venv_path : str
# Path to a virtualenv, e.g. "/home/hpc/capn/mppi033h/.virtualenv/python_3_env/"
# data_tools_folder : str
# Dirpath, where the concatenate.py tool is located.
# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#
venv_path = "/project/antares/enriqueh/gpu_venv3/" #str
# Path to a virtualenv, e.g. "/home/hpc/capn/mppi033h/.virtualenv/python_3_env/"
# /project/antares/enriqueh/gpu_venv3/bin/activate
# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#
data_tools_folder = "/project/antares/enriqueh/OrcaNet/orcanet_contrib/data_tools" #str
# Dirpath, where the concatenate.py tool is located.
# E.g. "/home/woody/capn/mppi033h/Code/OrcaNet/orcanet_contrib/data_tools"
# chunksize : int
# Chunksize parameter, that is used when calling concatenate.py
# complib : str
# Complib parameter, that is used when calling concatenate.py
# complevel : int
# Complevel parameter, that is used when calling concatenate.py
# shuffle_delete : bool
# Option for the shuffle_h5 tool, specifies if the input file that will be shuffled should be
# deleted after the shuffling is finished.
#
# Input Group Parameters
# ----------------------
# dir : str
# Path of the directory, where the files for this input group are located.
# run_ids_train/run_ids_validate/run_ids_rest : array
# Array, which specifies the range of the run_ids, that should be used for the training/validation.rest
# dataset of this input group.
# E.g. if [1,5], the script will put files from this input group with run_ids from 1 to 5 (including 1 and 5)
# to the training/validation/rest dataset.
# If you don't want to use a specific dataset for this input group, comment out the line or delete it!
#
# --- Documentation for every config parameter that is available --- #
# -----------------------------------------------------------------------------#
# --- Main options ---#
# -----------------------------------------------------------------------------#
shuffle_delete = false #bool
# True = the input file that will be deleted after the shuffling is finished.
# Option for the shuffle_h5 tool.
# -----------------------------------------------------------------------------#
n_files_train = 5
n_files_validate = 3
n_files_rest = 1
output_file_folder = "/home/woody/capn/mppi033h/make_dsplit_test"
output_file_name = "xyzc_tight_0"
print_only = false # only print information of your input_groups, don't make any .list files
# -----------------------------------------------------------------------------#
# Concatenate.py Parameters
# If they are commented, it will be set None on concatenate.py,
# and the script will use the configurations that are already in the file.
# --- Main options ---#
#chunksize = 32 #int
#complib = "gzip" #str
#complevel = 1 #int
#
# -----------------------------------------------------------------------------#
# --- Options, for submitting jobs to concatenate the .list files. --- #
make_qsub_bash_files = true
submit_jobs = false
venv_path = "/home/hpc/capn/mppi033h/.virtualenv/python_3_env"
data_tools_folder = "/home/woody/capn/mppi033h/Code/OrcaNet/orcanet_contrib/data_tools"
chunksize = 32
complib = "gzip"
complevel = 1
shuffle_delete = false
# --- Input Group Parameters: Datafiles to be concatenated --- #
# --- Options, for submitting jobs to concatenate the .list files. --- #
# -----------------------------------------------------------------------------#
[elec_cc_3_100]
# Name of the group, don't make whitespaces!
# --- Input groups : these are the datafiles, that should be concatenated somehow --- #
dir = "/dcache/antares/enriqueh/orcasong_output/xyzt/3-100GeV/elec-CC"
# "/path/to/the/folder/of/the/data/for/this/input_1/group" #str
# Path of the directory where the files for this input group are located.
[input_group_1] # You can assign any name to this, doesnt matter which one. However, don't make whitespaces!!
dir = "/path/to/the/folder/of/the/data/for/this/input_1/group"
run_ids_train = [1001, 5000]
run_ids_validate = [1, 1000]
run_ids_rest = [5001, 20000]
run_ids_train = [1, 1000] #array
run_ids_validate = [1001, 1200]
#run_ids_rest = [1001, 1300]
# Arrays with the range of the run_ids that should be used for the
# training, validation and rest datasets of this input group.
# E.g. if [1,5] = Files from this input group with run_ids from 1 to 5
# (including 1 and 5!!) will go to the training/validation/rest dataset.
# If you don't want to use a specific dataset for this input group,
# you can comment out the corresponding line or delete it!
# -----------------------------------------------------------------------------#
# You can have more than 1 input group!!
[input_group_2] # 1 to 1500
dir = "/path/to/the/folder/of/the/data/for/this/input_2/group"
run_ids_train = [101, 500]
run_ids_validate = [1, 100]
#run_ids_rest = [501, 600]
[elec_nc_3_100]
# Name of the group, don't make whitespaces!
dir = "/dcache/antares/enriqueh/orcasong_output/xyzt/3-100GeV/elec-NC"
# "/path/to/the/folder/of/the/data/for/this/input_1/group" #str
# Path of the directory where the files for this input group are located.
[input_group_3] # 1 to 2400
dir = "/path/to/the/folder/of/the/data/for/this/input_3/group"
run_ids_train = [601, 2400]
#run_ids_validate = [1, 500] # comment out or delete it, if you dont want it
run_ids_rest = [501, 600]
run_ids_train = [1, 1000] #array
run_ids_validate = [1001, 1188]
# --- Input groups : these are the datafiles, that should be concatenated somehow --- #
\ No newline at end of file
# [input_group_2] # 1 to 1500
# dir = "/path/to/the/folder/of/the/data/for/this/input_2/group"
# run_ids_train = [101, 500]
# run_ids_validate = [1, 100]
# #run_ids_rest = [501, 600]
# -----------------------------------------------------------------------------#
Loading