Breaking changes:

- orcasong is now called legacy - orcasong_2 is now called orcasong Other changes: - Added one line 2017 bin edges to repo - added unittests - added versioning for files created with orcasong - Expanded doc - Added check to calib if calib has been done to file already

Breaking changes:
6dea7902 · Stefan Reck · d0a4087e · 6dea7902 · 6dea7902 · 6dea7902
Commit 6dea7902 authored 5 years ago by Stefan Reck
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
+variables:
+  PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
+
+
 stages:
-  - install
+  - test
+  - coverage
  - doc
  - release

-before_script:
-  - apt-get update -qq && apt-get install -y -qq libhdf5-dev
-  - pip install -U pip setuptools wheel numpy
-  - pip install .

-install-os:
-    image: docker.km3net.de/base/python:3
-    stage: install
+cache:
+  paths:
+    - .cache/pip
+    - venv/
+  key: "$CI_COMMIT_REF_SLUG"
+
+
+.virtualenv_template: &virtualenv_definition |
+  python -V
+  pip install virtualenv
+  virtualenv venv
+  source venv/bin/activate
+  make install
+
+
+test:
+    image: docker.km3net.de/base/python:3.6
+    stage: test
    script:
-        - pip install .
+        - *virtualenv_definition
+        - make test
+
+
+coverage:
+    image: docker.km3net.de/base/python:3.6
+    stage: coverage
+    script:
+        - *virtualenv_definition
+        - "make test-cov|grep TOTAL| awk '{printf \"COVERAGE: %.2f%%\", (1-$3/$2)*100 }'"
+    coverage: '/COVERAGE:\s*([0-9]*\.[0-9]*%)/'
+    artifacts:
+        paths:
+            - reports/coverage
+
+
+code-style:
+    image: docker.km3net.de/base/python:3.7
+    stage: test
+    script:
+        - *virtualenv_definition
+        - yapf -r -d -e "venv" .
+    allow_failure: true
+

 pages:
-    image: docker.km3net.de/base/python:3
+    image: docker.km3net.de/base/python:3.6
    stage: doc
    script:
+        - *virtualenv_definition
        - cd docs && make html
        - mv _build/html/ ../public/
+        - cd .. && mv reports/coverage public/coverage
    artifacts:
        paths:
            - public
+    cache: {}
    only:
        - tags
        - master

 pypi:
-    image: docker.km3net.de/base/python:3
+    image: docker.km3net.de/base/python:3.6
    stage: release
    cache: {}
    script:
@@ -37,4 +79,3 @@ pypi:
        - twine upload dist/*
    only:
        - tags
-
--- a/Makefile
+++ b/Makefile
+PKGNAME=orcasong
+ALLNAMES = $(PKGNAME)
+ALLNAMES += orcasong_contrib
+
+default: build
+
+all: install
+
+build:
+	@echo "No need to build anymore :)"
+
+install:
+	pip install .
+
+install-dev:
+	pip install -e .
+
+clean:
+	python setup.py clean --all
+	rm -f -r build/
+
+test:
+	py.test --junitxml=./reports/junit.xml -o junit_suite_name=$(PKGNAME) tests
+
+test-cov:
+	py.test tests --cov $(ALLNAMES) --cov-report term-missing --cov-report xml:reports/coverage.xml --cov-report html:reports/coverage tests
+
+flake8:
+	py.test --flake8
+
+pep8: flake8
+
+docstyle:
+	py.test --docstyle
+
+lint:
+	py.test --pylint
+
+dependencies:
+	pip install -Ur requirements.txt
+
+.PHONY: yapf
+yapf:
+	yapf -i -r $(PKGNAME)
+	yapf -i setup.py
+
+.PHONY: all clean build install install-dev test test-nocov flake8 pep8 dependencies docstyle
--- a/Readme.md
+++ b/Readme.md
-## OrcaSong: Generating DL images based on KM3NeT data
+OrcaSong: Generating DL images from KM3NeT data
+===============================================

-[![alt text][image_1]][hyperlink_1] [![alt text][image_2]][hyperlink_2]
+.. image:: https://git.km3net.de/ml/OrcaSong/badges/master/build.svg
+    :target: https://git.km3net.de/ml/OrcaSong/pipelines

-  [hyperlink_1]: https://git.km3net.de/ml/OrcaSong/pipelines
-  [image_1]: https://git.km3net.de/ml/OrcaSong/badges/master/build.svg
+.. image:: https://examples.pages.km3net.de/km3badges/docs-latest-brightgreen.svg
+    :target: https://ml.pages.km3net.de/OrcaSong

-  [hyperlink_2]: https://ml.pages.km3net.de/OrcaSong
-  [image_2]: https://examples.pages.km3net.de/km3badges/docs-latest-brightgreen.svg
-  

 The documentation for OrcaSong can be found at https://ml.pages.km3net.de/OrcaSong!

@@ -19,3 +18,8 @@ This means that OrcaSong takes a datafile with (neutrino-) events and based on t
 Currently, only simulations with a hdf5 data format are supported as an input.

 These event 'images' are required for some Deep Learning machine learning algorithms, e.g. Convolutional Neural Networks.
+
+OrcaSong can be installed via pip by running::
+
+    pip install orcasong
+
--- a/docs/CONTRIBUTING.rst
+++ b/docs/CONTRIBUTING.rst
@@ -24,7 +24,7 @@ necessary information which will help other people to understand the
 situation.

 Make a Fork of OrcaSong
----------------------
+-----------------------

 You create a fork (your full own copy of the
 repository), change the code and when you are happy with the changes, you create

--- a/docs/conf.py
+++ b/docs/conf.py
@@ -12,20 +12,18 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-import os
-import sys
 from datetime import date
 from pkg_resources import get_distribution

 import orcasong
-#sys.path.insert(0, os.path.abspath('.'))
+# sys.path.insert(0, os.path.abspath('.'))


 # -- Project information -----------------------------------------------------

 project = "OrcaSong {}".format(orcasong.__version__)
-copyright = u'{0}, Michael Moser'.format(date.today().year)
-author = 'Michael Moser'
+copyright = u'{0}, Stefan Reck, Michael Moser'.format(date.today().year)
+author = 'Stefan Reck, Michael Moser'

 # The full version, including alpha/beta/rc tags
 release = get_distribution('orcasong').version
@@ -33,7 +31,6 @@ release = get_distribution('orcasong').version
 version = '.'.join(release.split('.')[:2])


-
 # -- General configuration ---------------------------------------------------

 # If your documentation needs a minimal Sphinx version, state it here.
@@ -57,7 +54,7 @@ autosummary_generate = True

 # Document Python Code
 autoapi_type = 'python'
-autoapi_dirs = ['../orcasong', '../orcasong_contrib', '../orcasong_2']
+autoapi_dirs = ['../orcasong', '../orcasong_contrib']
 autoapi_options = [
    'members', 'undoc-members'
    # , 'private-members', 'special-members'
@@ -73,9 +70,9 @@ templates_path = ['_templates']
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
-#source_parsers = {
+# source_parsers = {
 #   '.md': 'recommonmark.parser.CommonMarkParser',}
-#source_suffix = ['.rst', '.md']
+# source_suffix = ['.rst', '.md']
 source_suffix = ['.rst']

 # The master toctree document.
@@ -169,7 +166,7 @@ latex_elements = {
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
    (master_doc, 'OrcaSong.tex', 'OrcaSong Documentation',
-     'Michael Moser', 'manual'),
+     'Stefan Reck, Michael Moser', 'manual'),
 ]


@@ -219,5 +216,7 @@ epub_exclude_files = ['search.html']

 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = True
+
+
 def setup(app):
-    app.add_stylesheet('_static/style.css')
\ No newline at end of file
+    app.add_stylesheet('_static/style.css')
--- a/docs/getting_started.rst
+++ b/docs/getting_started.rst
-Getting started with OrcaSong
-=============================
+Getting started
+===============

 .. contents:: :local:

 Introduction
 ------------

-On this page, you can find a step by step introduction into the usage of OrcaSong.
-The guide starts with some exemplary root simulation files made with jpp and ends with hdf5 event 'images' that can be used for deep neural networks.
+On this page, you can find a step by step introduction of how to prepare
+root files for OrcaSong.
+The guide starts with some exemplary root simulation files made with jpp and
+ends with hdf5 files ready for the use with OrcaSong.

 Preprocessing
 -------------
@@ -120,117 +122,22 @@ channel_id of a hit.
 Calibrating the .h5 file
 ~~~~~~~~~~~~~~~~~~~~~~~~

-In order to fix this, we can run another tool, :code:`calibrate`, that will add the pos_xyz information to the hdf5 datafile::
+In order to fix this, the data needs to be calibrated.
+This can be done in two ways: You can either:
+
+- calibrate the files on the fly by providing the detx file to orcasong (recommended),
+- or use a seperate tool from km3pipe called :code:`calibrate`, that will add the pos_xyz information to the hdf5 datafile.
+
+While the first method is the recommended one in principal, the second one can be useful for determining the proper bin edges by looking
+at single files. It can be used like this::

    calibrate /sps/km3net/users/mmoser/det_files/orca_115strings_av23min20mhorizontal_18OMs_alt9mvertical_v1.detx testfile.h5

 As you can see, you need a .detx geometry file for this "calibration". Typically, you can find the path of this detx
-file on the wiki page of the simulation production that you are using. This calibration step is optional, since OrcaSong
-can also do it on the fly, using a .detx file.
+file on the wiki page of the simulation production that you are using.

 At this point, we are now ready to start using OrcaSong for the generation of event images.
-
-
-Usage of OrcaSong
-----------------
-
-In order to use OrcaSong, you can just install it with :code:`pip`::
-
-    ~/$: pip install orcasong
-
-Before you can start to use OrcaSong, you need a .detx detector geometry file that corresponds to your input files.
-OrcaSong is currently producing event "images" based on a 1 DOM / XYZ-bin assumption. This image generation is done
-automatically, based on the number of bins (n_bins) for each dimension XYZ that you supply as an input and based on the
-.detx file which contains the DOM positions.
-
-If your .detx file is not contained in the OrcaSong/detx_files folder, please add it to the repository!
-Currently, only the 115l ORCA 2016 detx file is available.
-
-At this point, you're finally ready to use OrcaSong.
-OrcaSong can be called from every directory by using the :code:`make_nn_images` command::
-
-    ~/$: make_nn_images testfile.h5 geofile.detx configfile.toml
-
-OrcaSong will then generate a hdf5 file with images that will be put in a "Results" folder at the path that
-you've specified in the configfile current path.
-Please checkout the default_config.toml file in the orcasong folder of the OrcaSong repo in order to get an idea about
-the structure of the config files.
-
-All available configuration options of OrcaSong can be found in /orcasong/default_config::
-
-    --- Documentation for every config parameter that is available ---
-
-    None arguments should be written as string: 'None'
-
-    Parameters
-    ----------
-    output_dirpath : str
-        Full path to the directory, where the orcasong output should be stored.
-    chunksize : int
-        Chunksize (along axis_0) that is used for saving the OrcaSong output to a .h5 file.
-    complib : str
-        Compression library that is used for saving the OrcaSong output to a .h5 file.
-        All PyTables compression filters are available, e.g. 'zlib', 'lzf', 'blosc', ... .
-    complevel : int
-        Compression level for the compression filter that is used for saving the OrcaSong output to a .h5 file.
-    n_bins : tuple of int
-        Declares the number of bins that should be used for each dimension, e.g. (x,y,z,t).
-        The option should be written as string, e.g. '11,13,18,60'.
-    det_geo : str
-        Declares what detector geometry should be used for the binning. E.g. 'Orca_115l_23m_h_9m_v'.
-    do2d : bool
-        Declares if 2D histograms, 'images', should be created.
-    do2d_plots : bool
-        Declares if pdf visualizations of the 2D histograms should be created, cannot be called if do2d=False.
-    do2d_plots_n: int
-        After how many events the event loop will be stopped (making the 2d plots in do2d_plots takes long time).
-    do3d : bool
-        Declares if 3D histograms should be created.
-    do4d : bool
-        Declares if 4D histograms should be created.
-    do4d_mode : str
-        If do4d is True, what should be used as the 4th dim after xyz.
-        Currently, only 'time' and 'channel_id' are available.
-    prod_ident : int
-        Optional int identifier for the used mc production.
-        This is e.g. useful, if you use events from two different mc productions, e.g. the 1-5GeV & 3-100GeV Orca 2016 MC.
-        In this case, the events are not fully distinguishable with only the run_id and the event_id!
-        In order to keep a separation, an integer can be set in the event_track for all events, such that they stay distinguishable.
-    timecut_mode : str
-        Defines what timecut should be used in hits_to_histograms.py.
-        Currently available:
-        'timeslice_relative': Cuts out the central 30% of the snapshot. The value of timecut_timespan doesn't matter in this case.
-        'trigger_cluster': Cuts based on the mean of the triggered hits.
-        'None': No timecut. The value of timecut_timespan doesn't matter in this case.
-    timecut_timespan : str/None
-        Defines what timespan should be used if a timecut is applied. Only relevant for timecut_mode = 'trigger_cluster'.
-        Currently available:
-        'all': [-350ns, 850ns] -> 20ns / bin (if e.g. 60 timebins)
-        'tight-0': [-450ns, 500ns] -> 15.8ns / bin (if e.g. 60 timebins)
-        'tight-1': [-250ns, 500ns] -> 12.5ns / bin (if e.g. 60 timebins)
-        'tight-2': [-150ns, 200ns] -> 5.8ns / bin (if e.g. 60 timebins)
-    do_mc_hits : bool
-        Declares if hits (False, mc_hits + BG) or mc_hits (True) should be processed.
-    data_cut_triggered : bool
-        Cuts away hits that haven't been triggered.
-    data_cut_e_low : float
-        Cuts away events that have an energy lower than data_cut_e_low.
-    data_cut_e_high : float
-        Cuts away events that have an energy higher than data_cut_e_high.
-    data_cut_throw_away : float
-        Cuts away random events with a certain probability (1: 100%, 0: 0%).
-    flush_freq : int
-        After how many events the accumulated output should be flushed to the harddisk.
-        A larger value leads to a faster orcasong execution, but it increases the RAM usage as well.
-
-    --- Documentation for every config parameter that is available ---
-
-
-
-
-
-If anything is still unclear after this introduction just tell me in the deep_learning channel on chat.km3net.de or
-write me an email at michael.m.moser@fau.de, such that I can improve this guide!
+See the page :ref:`orcasong_page` for instructions on how to use it.




--- a/docs/imgs/orcasong_function.PNG
+++ b/docs/imgs/orcasong_function.PNG
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -11,33 +11,20 @@

 |vspace|

-Welcome to OrcaSong's documentation!
-====================================
+.. include:: ../Readme.rst

-.. image:: https://git.km3net.de/ml/OrcaSong/badges/master/build.svg
-    :target: https://git.km3net.de/ml/OrcaSong/pipelines
-
-| OrcaSong is a part of the Deep Learning efforts for the neutrino telescope KM3NeT.
-| Find more information about KM3NeT on http://www.km3net.org.
-
-In this regard, OrcaSong is a project that produces KM3NeT event images based on the raw detector data.
-This means that OrcaSong takes a datafile with (neutrino-) events and based on this data, it produces 2D/3D/4D 'images' (histograms).
-Currently, only simulations with a hdf5 data format are supported as an input.
-These event 'images' are required for some Deep Learning machine learning algorithms, e.g. Convolutional Neural Networks.
-
-As of now, only ORCA detector simulations are supported, but ARCA geometries can be easily implemented as well.
-
-The main code for generating the images is located in orcanet/make_nn_images.py.
+.. toctree::
+    :hidden:
+    :titlesonly:

-As of now, the documentation contains a small introduction to get started and and a complete API documentation.
-Please feel free to contact me or just open an issue on Gitlab / Github if you have any suggestions.
+    self

 .. toctree::
   :maxdepth: 2
   :caption: Contents:

   getting_started
-   orcasong_2
+   orcasong
   CONTRIBUTING
   Source (Git) <https://git.km3net.de/ml/OrcaSong.git>


--- a/docs/orcasong_2.rst
+++ b/docs/orcasong_2.rst
-OrcaSong 2
-==========
+.. _orcasong_page:

-OrcaSong 2 is an alternative to orcasong, with (hopefully) more
-accessible features.
-It has a slightly reduced functionality (no plots), but apart from that
-does the same job as orcasong.
+Producing images
+================
+
+The main functionality of OrcaSong is to generate multidimensional images
+out of ORCA data.
+
+.. image:: imgs/orcasong_function.PNG
+   :height: 400px

 Basic Use
 ---------

 Import the main class, the FileBinner (see
-:py:class:`orcasong_2.core.FileBinner`),
+:py:class:`orcasong.core.FileBinner`),
 like this:

 .. code-block:: python

-    from orcasong_2.core import FileBinner
+    from orcasong.core import FileBinner

-The FileBinner allows to make nd histograms ("images") from calibrated and
-h5-converted root files.
+The FileBinner allows to make nd histograms ("images") from h5-converted root files.
 To do this, you can pass a list defining the binning. E.g., the following would
 set up the file binner to generate zt data:

 .. code-block:: python

    bin_edges_list = [
-        ["pos_z", np.linspace(0, 10, 11)],
+        ["pos_z", np.linspace(0, 200, 11)],
        ["time", np.linspace(-50, 550, 101)],
    ]

@@ -46,7 +48,7 @@ Convert a file like this:

    fb.run(infile, outfile)

-Or event this for multiple files, which will all be saved in the given folder:
+Or convert multiple files, which will all be saved in the given folder:

 .. code-block:: python

@@ -54,6 +56,7 @@ Or event this for multiple files, which will all be saved in the given folder:

 Calibration
 -----------
+
 You can supply a detx file to the file binner, in order to
 calibrate the data on the fly:

@@ -65,8 +68,8 @@ calibrate the data on the fly:
 Adding mc_info
 --------------

-To add info from the mc_tracks (or from wherever), you can define some
-function `my_mcinfo_extractor` which takes as an input a km3pipe blob,
+To add info from the mc_tracks (or from anywhere in the blob), you can define some
+function ``my_mcinfo_extractor`` which takes as an input a km3pipe blob,
 and outputs a dict mapping str to float.

 This will be saved as a numpy structured array "y" in the output file, with
@@ -76,3 +79,22 @@ the str being the dtype names. Set up like follows:

    fb = FileBinner(bin_edges_list, mc_info_extr=my_mcinfo_extractor)

+
+Plotting binning statistics
+---------------------------
+
+After the binning has succeeded, you can generate a plot which shows the
+distribution of hits among the bins you defined. For this, call the following
+console command::
+
+    plot_binstats my_plotname.pdf file_1_binned.h5 file_2_binned.h5 ...
+
+This will plot the statistics for the files file_1_binned.h5, file_2_binned.h5, ...
+into the file my_plotname.pdf.
+
+Using existing binnings
+-----------------------
+
+You can use existing bin edges and mc info extractors from ``orcasong.bin_edges``
+and ``orcasong.mc_info_extr``. These were designed for specific detector layouts
+and productions, though, and might not work properly when used on other data.
--- a/examples/example_do2d_plots_output.pdf
+++ b/examples/example_do2d_plots_output.pdf
--- a/orcasong/tests/__init__.py
+++ b/orcasong/tests/__init__.py
--- a/orcasong/default_config.toml
+++ b/orcasong/default_config.toml
--- a/orcasong/file_to_hits.py
+++ b/orcasong/file_to_hits.py
--- a/orcasong/geo_binning.py
+++ b/orcasong/geo_binning.py
--- a/orcasong/hits_to_histograms.py
+++ b/orcasong/hits_to_histograms.py
--- a/orcasong/io.py
+++ b/orcasong/io.py
--- a/orcasong/make_nn_images.py
+++ b/orcasong/make_nn_images.py
@@ -34,6 +34,7 @@ __version__ = '1.0'
 __email__ = 'michael.m.moser@fau.de'
 __status__ = 'Prototype'

+import warnings
 import os
 import sys
 #from memory_profiler import profile # for memory profiling, call with @profile; myfunc()
@@ -45,11 +46,16 @@ from docopt import docopt
 mpl.use('Agg')
 from matplotlib.backends.backend_pdf import PdfPages

-from orcasong.file_to_hits import EventDataExtractor
-from orcasong.hits_to_histograms import HistogramMaker
-from orcasong.io import load_config, check_user_input, make_output_dirs
-from orcasong.geo_binning import calculate_bin_edges
-from orcasong.utils import get_file_particle_type, EventSkipper
+from legacy.file_to_hits import EventDataExtractor
+from legacy.hits_to_histograms import HistogramMaker
+from legacy.io import load_config, check_user_input, make_output_dirs
+from legacy.geo_binning import calculate_bin_edges
+from legacy.utils import get_file_particle_type, EventSkipper
+
+
+# TODO deprecated
+warnings.warn("The original Orcasong is deprecated, and is no longer supported. "
+              "Consider switching to the new orcasong.")


 def parse_input():

--- a/orcasong/utils.py
+++ b/orcasong/utils.py
--- a/orcasong/__init__.py
+++ b/orcasong/__init__.py
-from .__version__ import version
\ No newline at end of file
+from .__version__ import version
+
+__version__ = version
--- a/orcasong/__version__.py
+++ b/orcasong/__version__.py
@@ -19,7 +19,8 @@ try:
    version = get_version(root='..', relative_to=__file__)
 except LookupError:
    try:
-        with open(join(realpath(dirname(__file__)), "version.txt"), 'r') as fobj:
+        with open(join(realpath(dirname(__file__)), "version.txt"),
+                  'r') as fobj:
            version = fobj.read()
    except IOError:
        pass