-
Stefan Reck authoredStefan Reck authored
parser.py 4.38 KiB
"""
Run OrcaSong functionalities from command line.
"""
import argparse
from orcasong.tools.concatenate import concatenate
from orcasong.tools.postproc import postproc_file
from orcasong.tools.shuffle2 import h5shuffle2
def _add_parser_concatenate(subparsers):
parser = subparsers.add_parser(
"concatenate",
description='Concatenate many small h5 files to a single large one '
'in a km3pipe compatible format. This is intended for '
'files that get generated by orcasong, i.e. all datsets '
'should have the same length, with one row per '
'blob. '
'Compression options and the datasets to be created in '
'the new file will be read from the first input file.')
parser.add_argument(
'file', type=str, nargs="*",
help="Define the files to concatenate. If it's one argument: A txt list "
"with pathes of h5 files to concatenate (one path per line). "
"If it's multiple arguments: "
"The pathes of h5 files to concatenate.")
parser.add_argument(
'--outfile', type=str, default="concatenated.h5",
help='The absoulte filepath of the output .h5 file that will be created. ')
parser.add_argument(
'--no_used_files', action='store_true',
help="Per default, the paths of the input files are added "
"as their own datagroup in the output file. Use this flag to "
"disable. ")
parser.add_argument(
'--skip_errors', action='store_true',
help="If true, ignore files that can't be concatenated. ")
parser.set_defaults(func=concatenate)
def _add_parser_h5shuffle(subparsers):
parser = subparsers.add_parser(
"h5shuffle",
description='Shuffle an h5 file using km3pipe.',
)
parser.add_argument('input_file', type=str, help='File to shuffle.')
parser.add_argument('--output_file', type=str,
help='Name of output file. Default: Auto generate name.')
parser.add_argument('--delete', action="store_true",
help='Delete original file afterwards.')
parser.set_defaults(func=postproc_file)
def _add_parser_h5shuffle2(subparsers):
parser = subparsers.add_parser(
"h5shuffle2",
description="Shuffle datasets in a h5file that have the same length. "
"Uses chunkwise readout for speed-up."
)
parser.add_argument(
"input_file", type=str, help="Path of the file that will be shuffled."
)
parser.add_argument(
"--output_file",
type=str,
default=None,
help="If given, this will be the name of the output file. "
"Default: input_file + suffix.",
)
parser.add_argument(
"--datasets",
type=str,
nargs="*",
default=("x", "y"),
help="Which datasets to include in output. Default: x, y",
)
parser.add_argument(
"--max_ram_fraction",
type=float,
default=0.25,
help="in [0, 1]. Fraction of all available ram to use for reading one batch of data "
"Note: this should "
"be <=~0.25 or so, since lots of ram is needed for in-memory shuffling. "
"Default: 0.25",
)
parser.add_argument(
"--iterations",
type=int,
default=None,
help="Shuffle the file this many times. Default: Auto choose best number.",
)
parser.add_argument(
"--max_ram",
type=int,
default=None,
help="Available ram in bytes. Default: Use fraction of maximum "
"available instead (see max_ram_fraction).",
)
parser.set_defaults(func=h5shuffle2)
def _add_parser_version(subparsers):
def show_version():
from orcasong import version
print(version)
parser = subparsers.add_parser(
"version",
description="Show installed orcanet version.",
)
parser.set_defaults(func=show_version)
def main():
parser = argparse.ArgumentParser(
prog="orcasong",
description=__doc__,
formatter_class=argparse.RawTextHelpFormatter,
)
subparsers = parser.add_subparsers()
_add_parser_concatenate(subparsers)
_add_parser_h5shuffle(subparsers)
_add_parser_h5shuffle2(subparsers)
_add_parser_version(subparsers)
kwargs = vars(parser.parse_args())
func = kwargs.pop("func")
func(**kwargs)