Skip to content
Snippets Groups Projects
Commit 7f6a045e authored by Stefan Reck's avatar Stefan Reck
Browse files

move some commands to central parser

parent 06dac652
No related branches found
No related tags found
1 merge request!20Parser
"""
Run OrcaSong functionalities from command line.
"""
import argparse
from orcasong.tools.concatenate import concatenate
from orcasong.tools.postproc import postproc_file
from orcasong.tools.shuffle2 import h5shuffle2
def _add_parser_concatenate(subparsers):
parser = subparsers.add_parser(
"concatenate",
description='Concatenate many small h5 files to a single large one '
'in a km3pipe compatible format. This is intended for '
'files that get generated by orcasong, i.e. all datsets '
'should have the same length, with one row per '
'blob. '
'Compression options and the datasets to be created in '
'the new file will be read from the first input file.')
parser.add_argument(
'file', type=str, nargs="*",
help="Define the files to concatenate. If it's one argument: A txt list "
"with pathes of h5 files to concatenate (one path per line). "
"If it's multiple arguments: "
"The pathes of h5 files to concatenate.")
parser.add_argument(
'--outfile', type=str, default="concatenated.h5",
help='The absoulte filepath of the output .h5 file that will be created. ')
parser.add_argument(
'--no_used_files', action='store_true',
help="Per default, the paths of the input files are added "
"as their own datagroup in the output file. Use this flag to "
"disable. ")
parser.add_argument(
'--skip_errors', action='store_true',
help="If true, ignore files that can't be concatenated. ")
parser.set_defaults(func=concatenate)
def _add_parser_h5shuffle(subparsers):
parser = subparsers.add_parser(
"h5shuffle",
description='Shuffle an h5 file using km3pipe.',
)
parser.add_argument('input_file', type=str, help='File to shuffle.')
parser.add_argument('--output_file', type=str,
help='Name of output file. Default: Auto generate name.')
parser.add_argument('--delete', action="store_true",
help='Delete original file afterwards.')
parser.set_defaults(func=postproc_file)
def _add_parser_h5shuffle2(subparsers):
parser = subparsers.add_parser(
"h5shuffle2",
description="Shuffle datasets in a h5file that have the same length. "
"Uses chunkwise readout for speed-up."
)
parser.add_argument(
"input_file", type=str, help="Path of the file that will be shuffled."
)
parser.add_argument(
"--output_file",
type=str,
default=None,
help="If given, this will be the name of the output file. "
"Default: input_file + suffix.",
)
parser.add_argument(
"--datasets",
type=str,
nargs="*",
default=("x", "y"),
help="Which datasets to include in output. Default: x, y",
)
parser.add_argument(
"--max_ram_fraction",
type=float,
default=0.25,
help="in [0, 1]. Fraction of all available ram to use for reading one batch of data "
"Note: this should "
"be <=~0.25 or so, since lots of ram is needed for in-memory shuffling. "
"Default: 0.25",
)
parser.add_argument(
"--iterations",
type=int,
default=None,
help="Shuffle the file this many times. Default: Auto choose best number.",
)
parser.add_argument(
"--max_ram",
type=int,
default=None,
help="Available ram in bytes. Default: Use fraction of maximum "
"available instead (see max_ram_fraction).",
)
parser.set_defaults(func=h5shuffle2)
def _add_parser_version(subparsers):
def show_version():
from orcasong import version
print(version)
parser = subparsers.add_parser(
"version",
description="Show installed orcanet version.",
)
parser.set_defaults(func=show_version)
def main():
parser = argparse.ArgumentParser(
prog="orcasong",
description=__doc__,
formatter_class=argparse.RawTextHelpFormatter,
)
subparsers = parser.add_subparsers()
_add_parser_concatenate(subparsers)
_add_parser_h5shuffle(subparsers)
_add_parser_h5shuffle2(subparsers)
_add_parser_version(subparsers)
kwargs = vars(parser.parse_args())
func = kwargs.pop("func")
func(**kwargs)
......@@ -305,7 +305,26 @@ def _copy_attrs(src_datset, target_dataset):
warnings.warn(f"Error: Can not copy attribute {k}: {e}")
def get_parser():
def concatenate(file, outfile="concatenated.h5", no_used_files=False, skip_errors=False):
""" Concatenate wrapped in a function. """
if len(file) == 1:
fc = FileConcatenator.from_list(
file[0],
skip_errors=skip_errors
)
else:
fc = FileConcatenator(
input_files=file,
skip_errors=skip_errors
)
fc.concatenate(
outfile,
append_used_files=not no_used_files,
)
def main():
warnings.warn("concatenate is deprecated and has been renamed to orcasong concatenate")
parser = argparse.ArgumentParser(
description='Concatenate many small h5 files to a single large one '
'in a km3pipe compatible format. This is intended for '
......@@ -331,27 +350,7 @@ def get_parser():
parser.add_argument(
'--skip_errors', action='store_true',
help="If true, ignore files that can't be concatenated. ")
return parser
def main():
parser = get_parser()
parsed_args = parser.parse_args()
if len(parsed_args.file) == 1:
fc = FileConcatenator.from_list(
parsed_args.file[0],
skip_errors=parsed_args.skip_errors
)
else:
fc = FileConcatenator(
input_files=parsed_args.file,
skip_errors=parsed_args.skip_errors
)
fc.concatenate(
parsed_args.outfile,
append_used_files=not parsed_args.no_used_files,
)
concatenate(**vars(parser.parse_args()))
if __name__ == '__main__':
......
......@@ -3,6 +3,8 @@ Scripts for postprocessing h5 files, e.g. shuffling.
"""
import os
import argparse
import warnings
import h5py
import km3pipe as kp
import km3modules as km
......@@ -115,6 +117,7 @@ def get_filepath_output(input_file, shuffle=True, event_skipper=None):
def h5shuffle():
warnings.warn("h5shuffle is deprecated and has been renamed to orcasong h5shuffle")
parser = argparse.ArgumentParser(description='Shuffle an h5 file using km3pipe.')
parser.add_argument('input_file', type=str, help='File to shuffle.')
parser.add_argument('--output_file', type=str,
......
......@@ -2,6 +2,8 @@ import os
import time
import datetime
import argparse
import warnings
import numpy as np
import psutil
import h5py
......@@ -335,6 +337,7 @@ def slicify(fancy_indices):
def run_parser():
warnings.warn("h5shuffle2 is deprecated and has been renamed to orcasong h5shuffle2")
parser = argparse.ArgumentParser(
description="Shuffle datasets in a h5file that have the same length. "
"Uses chunkwise readout for speed-up."
......
......@@ -27,11 +27,13 @@ setup(
'tag_regex': r'^(?P<prefix>v)?(?P<version>[^\+]+)(?P<suffix>.*)?$', },
entry_points={'console_scripts': [
'orcasong=orcasong.parser:main',
'make_dsplit=orcasong.tools.make_data_split:main',
'plot_binstats=orcasong.plotting.plot_binstats:main',
# deprecated:
'concatenate=orcasong.tools.concatenate:main',
'h5shuffle=orcasong.tools.postproc:h5shuffle',
'h5shuffle2=orcasong.tools.shuffle2:run_parser',
'make_dsplit=orcasong.tools.make_data_split:main',
'plot_binstats=orcasong.plotting.plot_binstats:main',
]}
)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment