From 7f6a045e61cc6913a9d9ff2dc7d97d44e9f7c87f Mon Sep 17 00:00:00 2001
From: Stefan Reck <stefan.reck@fau.de>
Date: Tue, 13 Apr 2021 19:08:27 +0200
Subject: [PATCH] move some commands to central parser

---
 orcasong/parser.py            | 129 ++++++++++++++++++++++++++++++++++
 orcasong/tools/concatenate.py |  43 ++++++------
 orcasong/tools/postproc.py    |   3 +
 orcasong/tools/shuffle2.py    |   3 +
 setup.py                      |   6 +-
 5 files changed, 160 insertions(+), 24 deletions(-)
 create mode 100644 orcasong/parser.py

diff --git a/orcasong/parser.py b/orcasong/parser.py
new file mode 100644
index 0000000..a95bc7b
--- /dev/null
+++ b/orcasong/parser.py
@@ -0,0 +1,129 @@
+"""
+Run OrcaSong functionalities from command line.
+
+"""
+import argparse
+from orcasong.tools.concatenate import concatenate
+from orcasong.tools.postproc import postproc_file
+from orcasong.tools.shuffle2 import h5shuffle2
+
+
+def _add_parser_concatenate(subparsers):
+    parser = subparsers.add_parser(
+        "concatenate",
+        description='Concatenate many small h5 files to a single large one '
+                    'in a km3pipe compatible format. This is intended for '
+                    'files that get generated by orcasong, i.e. all datsets '
+                    'should have the same length, with one row per '
+                    'blob. '
+                    'Compression options and the datasets to be created in '
+                    'the new file will be read from the first input file.')
+    parser.add_argument(
+        'file', type=str, nargs="*",
+        help="Define the files to concatenate. If it's one argument: A txt list "
+             "with pathes of h5 files to concatenate (one path per line). "
+             "If it's multiple arguments: "
+             "The pathes of h5 files to concatenate.")
+    parser.add_argument(
+        '--outfile', type=str, default="concatenated.h5",
+        help='The absoulte filepath of the output .h5 file that will be created. ')
+    parser.add_argument(
+        '--no_used_files', action='store_true',
+        help="Per default, the paths of the input files are added "
+             "as their own datagroup in the output file. Use this flag to "
+             "disable. ")
+    parser.add_argument(
+        '--skip_errors', action='store_true',
+        help="If true, ignore files that can't be concatenated. ")
+    parser.set_defaults(func=concatenate)
+
+
+def _add_parser_h5shuffle(subparsers):
+    parser = subparsers.add_parser(
+        "h5shuffle",
+        description='Shuffle an h5 file using km3pipe.',
+    )
+    parser.add_argument('input_file', type=str, help='File to shuffle.')
+    parser.add_argument('--output_file', type=str,
+                        help='Name of output file. Default: Auto generate name.')
+    parser.add_argument('--delete', action="store_true",
+                        help='Delete original file afterwards.')
+    parser.set_defaults(func=postproc_file)
+
+
+def _add_parser_h5shuffle2(subparsers):
+    parser = subparsers.add_parser(
+        "h5shuffle2",
+        description="Shuffle datasets in a h5file that have the same length. "
+        "Uses chunkwise readout for speed-up."
+    )
+    parser.add_argument(
+        "input_file", type=str, help="Path of the file that will be shuffled."
+    )
+    parser.add_argument(
+        "--output_file",
+        type=str,
+        default=None,
+        help="If given, this will be the name of the output file. "
+        "Default: input_file + suffix.",
+    )
+    parser.add_argument(
+        "--datasets",
+        type=str,
+        nargs="*",
+        default=("x", "y"),
+        help="Which datasets to include in output. Default: x, y",
+    )
+    parser.add_argument(
+        "--max_ram_fraction",
+        type=float,
+        default=0.25,
+        help="in [0, 1]. Fraction of all available ram to use for reading one batch of data "
+        "Note: this should "
+        "be <=~0.25 or so, since lots of ram is needed for in-memory shuffling. "
+        "Default: 0.25",
+    )
+    parser.add_argument(
+        "--iterations",
+        type=int,
+        default=None,
+        help="Shuffle the file this many times. Default: Auto choose best number.",
+    )
+    parser.add_argument(
+        "--max_ram",
+        type=int,
+        default=None,
+        help="Available ram in bytes. Default: Use fraction of maximum "
+             "available instead (see max_ram_fraction).",
+    )
+    parser.set_defaults(func=h5shuffle2)
+
+
+def _add_parser_version(subparsers):
+    def show_version():
+        from orcasong import version
+        print(version)
+
+    parser = subparsers.add_parser(
+        "version",
+        description="Show installed orcanet version.",
+    )
+    parser.set_defaults(func=show_version)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        prog="orcasong",
+        description=__doc__,
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    subparsers = parser.add_subparsers()
+
+    _add_parser_concatenate(subparsers)
+    _add_parser_h5shuffle(subparsers)
+    _add_parser_h5shuffle2(subparsers)
+    _add_parser_version(subparsers)
+
+    kwargs = vars(parser.parse_args())
+    func = kwargs.pop("func")
+    func(**kwargs)
diff --git a/orcasong/tools/concatenate.py b/orcasong/tools/concatenate.py
index 4f3ed15..0a86218 100644
--- a/orcasong/tools/concatenate.py
+++ b/orcasong/tools/concatenate.py
@@ -305,7 +305,26 @@ def _copy_attrs(src_datset, target_dataset):
             warnings.warn(f"Error: Can not copy attribute {k}: {e}")
 
 
-def get_parser():
+def concatenate(file, outfile="concatenated.h5", no_used_files=False, skip_errors=False):
+    """ Concatenate wrapped in a function. """
+    if len(file) == 1:
+        fc = FileConcatenator.from_list(
+            file[0],
+            skip_errors=skip_errors
+        )
+    else:
+        fc = FileConcatenator(
+            input_files=file,
+            skip_errors=skip_errors
+        )
+    fc.concatenate(
+        outfile,
+        append_used_files=not no_used_files,
+    )
+
+
+def main():
+    warnings.warn("concatenate is deprecated and has been renamed to orcasong concatenate")
     parser = argparse.ArgumentParser(
         description='Concatenate many small h5 files to a single large one '
                     'in a km3pipe compatible format. This is intended for '
@@ -331,27 +350,7 @@ def get_parser():
     parser.add_argument(
         '--skip_errors', action='store_true',
         help="If true, ignore files that can't be concatenated. ")
-    return parser
-
-
-def main():
-    parser = get_parser()
-    parsed_args = parser.parse_args()
-
-    if len(parsed_args.file) == 1:
-        fc = FileConcatenator.from_list(
-            parsed_args.file[0],
-            skip_errors=parsed_args.skip_errors
-        )
-    else:
-        fc = FileConcatenator(
-            input_files=parsed_args.file,
-            skip_errors=parsed_args.skip_errors
-        )
-    fc.concatenate(
-        parsed_args.outfile,
-        append_used_files=not parsed_args.no_used_files,
-    )
+    concatenate(**vars(parser.parse_args()))
 
 
 if __name__ == '__main__':
diff --git a/orcasong/tools/postproc.py b/orcasong/tools/postproc.py
index b2191c8..acd3c38 100644
--- a/orcasong/tools/postproc.py
+++ b/orcasong/tools/postproc.py
@@ -3,6 +3,8 @@ Scripts for postprocessing h5 files, e.g. shuffling.
 """
 import os
 import argparse
+import warnings
+
 import h5py
 import km3pipe as kp
 import km3modules as km
@@ -115,6 +117,7 @@ def get_filepath_output(input_file, shuffle=True, event_skipper=None):
 
 
 def h5shuffle():
+    warnings.warn("h5shuffle is deprecated and has been renamed to orcasong h5shuffle")
     parser = argparse.ArgumentParser(description='Shuffle an h5 file using km3pipe.')
     parser.add_argument('input_file', type=str, help='File to shuffle.')
     parser.add_argument('--output_file', type=str,
diff --git a/orcasong/tools/shuffle2.py b/orcasong/tools/shuffle2.py
index 4185985..11083e1 100644
--- a/orcasong/tools/shuffle2.py
+++ b/orcasong/tools/shuffle2.py
@@ -2,6 +2,8 @@ import os
 import time
 import datetime
 import argparse
+import warnings
+
 import numpy as np
 import psutil
 import h5py
@@ -335,6 +337,7 @@ def slicify(fancy_indices):
 
 
 def run_parser():
+    warnings.warn("h5shuffle2 is deprecated and has been renamed to orcasong h5shuffle2")
     parser = argparse.ArgumentParser(
         description="Shuffle datasets in a h5file that have the same length. "
         "Uses chunkwise readout for speed-up."
diff --git a/setup.py b/setup.py
index e7c3320..281186f 100644
--- a/setup.py
+++ b/setup.py
@@ -27,11 +27,13 @@ setup(
                      'tag_regex': r'^(?P<prefix>v)?(?P<version>[^\+]+)(?P<suffix>.*)?$', },
 
     entry_points={'console_scripts': [
+        'orcasong=orcasong.parser:main',
+        'make_dsplit=orcasong.tools.make_data_split:main',
+        'plot_binstats=orcasong.plotting.plot_binstats:main',
+        # deprecated:
         'concatenate=orcasong.tools.concatenate:main',
         'h5shuffle=orcasong.tools.postproc:h5shuffle',
         'h5shuffle2=orcasong.tools.shuffle2:run_parser',
-        'make_dsplit=orcasong.tools.make_data_split:main',
-        'plot_binstats=orcasong.plotting.plot_binstats:main',
     ]}
 )
 
-- 
GitLab