diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 02a4137f70f518fb3dbe870c0096d3c58a61158b..bd50318ea4557faa33a524b093bdab14c7eea2a9 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,34 +1,76 @@
+variables:
+  PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
+
+
 stages:
-  - install
+  - test
+  - coverage
   - doc
   - release
 
-before_script:
-  - apt-get update -qq && apt-get install -y -qq libhdf5-dev
-  - pip install -U pip setuptools wheel numpy
-  - pip install .
 
-install-os:
-    image: docker.km3net.de/base/python:3
-    stage: install
+cache:
+  paths:
+    - .cache/pip
+    - venv/
+  key: "$CI_COMMIT_REF_SLUG"
+
+
+.virtualenv_template: &virtualenv_definition |
+  python -V
+  pip install virtualenv
+  virtualenv venv
+  source venv/bin/activate
+  make install
+
+
+test:
+    image: docker.km3net.de/base/python:3.6
+    stage: test
     script:
-        - pip install .
+        - *virtualenv_definition
+        - make test
+
+
+coverage:
+    image: docker.km3net.de/base/python:3.6
+    stage: coverage
+    script:
+        - *virtualenv_definition
+        - "make test-cov|grep TOTAL| awk '{printf \"COVERAGE: %.2f%%\", (1-$3/$2)*100 }'"
+    coverage: '/COVERAGE:\s*([0-9]*\.[0-9]*%)/'
+    artifacts:
+        paths:
+            - reports/coverage
+
+
+code-style:
+    image: docker.km3net.de/base/python:3.7
+    stage: test
+    script:
+        - *virtualenv_definition
+        - yapf -r -d -e "venv" .
+    allow_failure: true
+
 
 pages:
-    image: docker.km3net.de/base/python:3
+    image: docker.km3net.de/base/python:3.6
     stage: doc
     script:
+        - *virtualenv_definition
         - cd docs && make html
         - mv _build/html/ ../public/
+        - cd .. && mv reports/coverage public/coverage
     artifacts:
         paths:
             - public
+    cache: {}
     only:
         - tags
         - master
 
 pypi:
-    image: docker.km3net.de/base/python:3
+    image: docker.km3net.de/base/python:3.6
     stage: release
     cache: {}
     script:
@@ -37,4 +79,3 @@ pypi:
         - twine upload dist/*
     only:
         - tags
-
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..7fa617367dc4c586634e165f55fdc8404c19e3f6
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,47 @@
+PKGNAME=orcasong
+ALLNAMES = $(PKGNAME)
+ALLNAMES += orcasong_contrib
+
+default: build
+
+all: install
+
+build:
+	@echo "No need to build anymore :)"
+
+install:
+	pip install .
+
+install-dev:
+	pip install -e .
+
+clean:
+	python setup.py clean --all
+	rm -f -r build/
+
+test:
+	py.test --junitxml=./reports/junit.xml -o junit_suite_name=$(PKGNAME) tests
+
+test-cov:
+	py.test tests --cov $(ALLNAMES) --cov-report term-missing --cov-report xml:reports/coverage.xml --cov-report html:reports/coverage tests
+
+flake8:
+	py.test --flake8
+
+pep8: flake8
+
+docstyle:
+	py.test --docstyle
+
+lint:
+	py.test --pylint
+
+dependencies:
+	pip install -Ur requirements.txt
+
+.PHONY: yapf
+yapf:
+	yapf -i -r $(PKGNAME)
+	yapf -i setup.py
+
+.PHONY: all clean build install install-dev test test-nocov flake8 pep8 dependencies docstyle
diff --git a/Readme.md b/Readme.rst
similarity index 61%
rename from Readme.md
rename to Readme.rst
index 8ca83fb7d627ded1cfd180be0ed5eb1732f3a711..c3651fadc5595e96a79d67155552339d7ad3b40b 100644
--- a/Readme.md
+++ b/Readme.rst
@@ -1,13 +1,12 @@
-## OrcaSong: Generating DL images based on KM3NeT data
+OrcaSong: Generating DL images from KM3NeT data
+===============================================
 
-[![alt text][image_1]][hyperlink_1] [![alt text][image_2]][hyperlink_2]
+.. image:: https://git.km3net.de/ml/OrcaSong/badges/master/build.svg
+    :target: https://git.km3net.de/ml/OrcaSong/pipelines
 
-  [hyperlink_1]: https://git.km3net.de/ml/OrcaSong/pipelines
-  [image_1]: https://git.km3net.de/ml/OrcaSong/badges/master/build.svg
+.. image:: https://examples.pages.km3net.de/km3badges/docs-latest-brightgreen.svg
+    :target: https://ml.pages.km3net.de/OrcaSong
 
-  [hyperlink_2]: https://ml.pages.km3net.de/OrcaSong
-  [image_2]: https://examples.pages.km3net.de/km3badges/docs-latest-brightgreen.svg
-  
 
 The documentation for OrcaSong can be found at https://ml.pages.km3net.de/OrcaSong!
 
@@ -19,3 +18,8 @@ This means that OrcaSong takes a datafile with (neutrino-) events and based on t
 Currently, only simulations with a hdf5 data format are supported as an input.
 
 These event 'images' are required for some Deep Learning machine learning algorithms, e.g. Convolutional Neural Networks.
+
+OrcaSong can be installed via pip by running::
+
+    pip install orcasong
+
diff --git a/docs/CONTRIBUTING.rst b/docs/CONTRIBUTING.rst
index 229cbca232c76a659c5a116a0acb12f18954ada4..e11fd4b43062cdb600aa097e7511564ec7d6ca2a 100644
--- a/docs/CONTRIBUTING.rst
+++ b/docs/CONTRIBUTING.rst
@@ -24,7 +24,7 @@ necessary information which will help other people to understand the
 situation.
 
 Make a Fork of OrcaSong
-----------------------
+-----------------------
 
 You create a fork (your full own copy of the
 repository), change the code and when you are happy with the changes, you create
diff --git a/docs/conf.py b/docs/conf.py
index 6ce6f491a0d3f50f2c5911d910028222e7e90907..45be6ae2e83a274a439f533ea8c48f4ca7ce5104 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -12,20 +12,18 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-import os
-import sys
 from datetime import date
 from pkg_resources import get_distribution
 
 import orcasong
-#sys.path.insert(0, os.path.abspath('.'))
+# sys.path.insert(0, os.path.abspath('.'))
 
 
 # -- Project information -----------------------------------------------------
 
 project = "OrcaSong {}".format(orcasong.__version__)
-copyright = u'{0}, Michael Moser'.format(date.today().year)
-author = 'Michael Moser'
+copyright = u'{0}, Stefan Reck, Michael Moser'.format(date.today().year)
+author = 'Stefan Reck, Michael Moser'
 
 # The full version, including alpha/beta/rc tags
 release = get_distribution('orcasong').version
@@ -33,7 +31,6 @@ release = get_distribution('orcasong').version
 version = '.'.join(release.split('.')[:2])
 
 
-
 # -- General configuration ---------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
@@ -57,7 +54,7 @@ autosummary_generate = True
 
 # Document Python Code
 autoapi_type = 'python'
-autoapi_dirs = ['../orcasong', '../orcasong_contrib', '../orcasong_2']
+autoapi_dirs = ['../orcasong', '../orcasong_contrib']
 autoapi_options = [
     'members', 'undoc-members'
     # , 'private-members', 'special-members'
@@ -73,9 +70,9 @@ templates_path = ['_templates']
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
-#source_parsers = {
+# source_parsers = {
 #   '.md': 'recommonmark.parser.CommonMarkParser',}
-#source_suffix = ['.rst', '.md']
+# source_suffix = ['.rst', '.md']
 source_suffix = ['.rst']
 
 # The master toctree document.
@@ -169,7 +166,7 @@ latex_elements = {
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
     (master_doc, 'OrcaSong.tex', 'OrcaSong Documentation',
-     'Michael Moser', 'manual'),
+     'Stefan Reck, Michael Moser', 'manual'),
 ]
 
 
@@ -219,5 +216,7 @@ epub_exclude_files = ['search.html']
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = True
+
+
 def setup(app):
-    app.add_stylesheet('_static/style.css')
\ No newline at end of file
+    app.add_stylesheet('_static/style.css')
diff --git a/docs/getting_started.rst b/docs/getting_started.rst
index 5b3cb1fdaae9b2b6350187ecb1e7fea524b883f4..0aaea684d421df804734dded041dec1f6192ef80 100644
--- a/docs/getting_started.rst
+++ b/docs/getting_started.rst
@@ -1,13 +1,15 @@
-Getting started with OrcaSong
-=============================
+Getting started
+===============
 
 .. contents:: :local:
 
 Introduction
 ------------
 
-On this page, you can find a step by step introduction into the usage of OrcaSong.
-The guide starts with some exemplary root simulation files made with jpp and ends with hdf5 event 'images' that can be used for deep neural networks.
+On this page, you can find a step by step introduction of how to prepare
+root files for OrcaSong.
+The guide starts with some exemplary root simulation files made with jpp and
+ends with hdf5 files ready for the use with OrcaSong.
 
 Preprocessing
 -------------
@@ -120,117 +122,22 @@ channel_id of a hit.
 Calibrating the .h5 file
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-In order to fix this, we can run another tool, :code:`calibrate`, that will add the pos_xyz information to the hdf5 datafile::
+In order to fix this, the data needs to be calibrated.
+This can be done in two ways: You can either:
+
+- calibrate the files on the fly by providing the detx file to orcasong (recommended),
+- or use a seperate tool from km3pipe called :code:`calibrate`, that will add the pos_xyz information to the hdf5 datafile.
+
+While the first method is the recommended one in principal, the second one can be useful for determining the proper bin edges by looking
+at single files. It can be used like this::
 
     calibrate /sps/km3net/users/mmoser/det_files/orca_115strings_av23min20mhorizontal_18OMs_alt9mvertical_v1.detx testfile.h5
 
 As you can see, you need a .detx geometry file for this "calibration". Typically, you can find the path of this detx
-file on the wiki page of the simulation production that you are using. This calibration step is optional, since OrcaSong
-can also do it on the fly, using a .detx file.
+file on the wiki page of the simulation production that you are using.
 
 At this point, we are now ready to start using OrcaSong for the generation of event images.
-
-
-Usage of OrcaSong
------------------
-
-In order to use OrcaSong, you can just install it with :code:`pip`::
-
-    ~/$: pip install orcasong
-
-Before you can start to use OrcaSong, you need a .detx detector geometry file that corresponds to your input files.
-OrcaSong is currently producing event "images" based on a 1 DOM / XYZ-bin assumption. This image generation is done
-automatically, based on the number of bins (n_bins) for each dimension XYZ that you supply as an input and based on the
-.detx file which contains the DOM positions.
-
-If your .detx file is not contained in the OrcaSong/detx_files folder, please add it to the repository!
-Currently, only the 115l ORCA 2016 detx file is available.
-
-At this point, you're finally ready to use OrcaSong.
-OrcaSong can be called from every directory by using the :code:`make_nn_images` command::
-
-    ~/$: make_nn_images testfile.h5 geofile.detx configfile.toml
-
-OrcaSong will then generate a hdf5 file with images that will be put in a "Results" folder at the path that
-you've specified in the configfile current path.
-Please checkout the default_config.toml file in the orcasong folder of the OrcaSong repo in order to get an idea about
-the structure of the config files.
-
-All available configuration options of OrcaSong can be found in /orcasong/default_config::
-
-    --- Documentation for every config parameter that is available ---
-
-    None arguments should be written as string: 'None'
-
-    Parameters
-    ----------
-    output_dirpath : str
-        Full path to the directory, where the orcasong output should be stored.
-    chunksize : int
-        Chunksize (along axis_0) that is used for saving the OrcaSong output to a .h5 file.
-    complib : str
-        Compression library that is used for saving the OrcaSong output to a .h5 file.
-        All PyTables compression filters are available, e.g. 'zlib', 'lzf', 'blosc', ... .
-    complevel : int
-        Compression level for the compression filter that is used for saving the OrcaSong output to a .h5 file.
-    n_bins : tuple of int
-        Declares the number of bins that should be used for each dimension, e.g. (x,y,z,t).
-        The option should be written as string, e.g. '11,13,18,60'.
-    det_geo : str
-        Declares what detector geometry should be used for the binning. E.g. 'Orca_115l_23m_h_9m_v'.
-    do2d : bool
-        Declares if 2D histograms, 'images', should be created.
-    do2d_plots : bool
-        Declares if pdf visualizations of the 2D histograms should be created, cannot be called if do2d=False.
-    do2d_plots_n: int
-        After how many events the event loop will be stopped (making the 2d plots in do2d_plots takes long time).
-    do3d : bool
-        Declares if 3D histograms should be created.
-    do4d : bool
-        Declares if 4D histograms should be created.
-    do4d_mode : str
-        If do4d is True, what should be used as the 4th dim after xyz.
-        Currently, only 'time' and 'channel_id' are available.
-    prod_ident : int
-        Optional int identifier for the used mc production.
-        This is e.g. useful, if you use events from two different mc productions, e.g. the 1-5GeV & 3-100GeV Orca 2016 MC.
-        In this case, the events are not fully distinguishable with only the run_id and the event_id!
-        In order to keep a separation, an integer can be set in the event_track for all events, such that they stay distinguishable.
-    timecut_mode : str
-        Defines what timecut should be used in hits_to_histograms.py.
-        Currently available:
-        'timeslice_relative': Cuts out the central 30% of the snapshot. The value of timecut_timespan doesn't matter in this case.
-        'trigger_cluster': Cuts based on the mean of the triggered hits.
-        'None': No timecut. The value of timecut_timespan doesn't matter in this case.
-    timecut_timespan : str/None
-        Defines what timespan should be used if a timecut is applied. Only relevant for timecut_mode = 'trigger_cluster'.
-        Currently available:
-        'all': [-350ns, 850ns] -> 20ns / bin (if e.g. 60 timebins)
-        'tight-0': [-450ns, 500ns] -> 15.8ns / bin (if e.g. 60 timebins)
-        'tight-1': [-250ns, 500ns] -> 12.5ns / bin (if e.g. 60 timebins)
-        'tight-2': [-150ns, 200ns] -> 5.8ns / bin (if e.g. 60 timebins)
-    do_mc_hits : bool
-        Declares if hits (False, mc_hits + BG) or mc_hits (True) should be processed.
-    data_cut_triggered : bool
-        Cuts away hits that haven't been triggered.
-    data_cut_e_low : float
-        Cuts away events that have an energy lower than data_cut_e_low.
-    data_cut_e_high : float
-        Cuts away events that have an energy higher than data_cut_e_high.
-    data_cut_throw_away : float
-        Cuts away random events with a certain probability (1: 100%, 0: 0%).
-    flush_freq : int
-        After how many events the accumulated output should be flushed to the harddisk.
-        A larger value leads to a faster orcasong execution, but it increases the RAM usage as well.
-
-    --- Documentation for every config parameter that is available ---
-
-
-
-
-
-If anything is still unclear after this introduction just tell me in the deep_learning channel on chat.km3net.de or
-write me an email at michael.m.moser@fau.de, such that I can improve this guide!
+See the page :ref:`orcasong_page` for instructions on how to use it.
 
 
 
diff --git a/docs/imgs/orcasong_function.PNG b/docs/imgs/orcasong_function.PNG
new file mode 100644
index 0000000000000000000000000000000000000000..a4ff67ee3277a80bfe4080bb8c1c28e713804460
Binary files /dev/null and b/docs/imgs/orcasong_function.PNG differ
diff --git a/docs/index.rst b/docs/index.rst
index d184789c31ea444155a642c2ba35547bd6afb966..3483d0d32294aa579c11316e44ff8a73a8bb1b8b 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -11,33 +11,20 @@
 
 |vspace|
 
-Welcome to OrcaSong's documentation!
-====================================
+.. include:: ../Readme.rst
 
-.. image:: https://git.km3net.de/ml/OrcaSong/badges/master/build.svg
-    :target: https://git.km3net.de/ml/OrcaSong/pipelines
-
-| OrcaSong is a part of the Deep Learning efforts for the neutrino telescope KM3NeT.
-| Find more information about KM3NeT on http://www.km3net.org.
-
-In this regard, OrcaSong is a project that produces KM3NeT event images based on the raw detector data.
-This means that OrcaSong takes a datafile with (neutrino-) events and based on this data, it produces 2D/3D/4D 'images' (histograms).
-Currently, only simulations with a hdf5 data format are supported as an input.
-These event 'images' are required for some Deep Learning machine learning algorithms, e.g. Convolutional Neural Networks.
-
-As of now, only ORCA detector simulations are supported, but ARCA geometries can be easily implemented as well.
-
-The main code for generating the images is located in orcanet/make_nn_images.py.
+.. toctree::
+    :hidden:
+    :titlesonly:
 
-As of now, the documentation contains a small introduction to get started and and a complete API documentation.
-Please feel free to contact me or just open an issue on Gitlab / Github if you have any suggestions.
+    self
 
 .. toctree::
    :maxdepth: 2
    :caption: Contents:
 
    getting_started
-   orcasong_2
+   orcasong
    CONTRIBUTING
    Source (Git) <https://git.km3net.de/ml/OrcaSong.git>
 
diff --git a/docs/orcasong.rst b/docs/orcasong.rst
new file mode 100644
index 0000000000000000000000000000000000000000..c2dca73f7476a369b03dc6b67137adc81593b231
--- /dev/null
+++ b/docs/orcasong.rst
@@ -0,0 +1,100 @@
+.. _orcasong_page:
+
+Producing images
+================
+
+The main functionality of OrcaSong is to generate multidimensional images
+out of ORCA data.
+
+.. image:: imgs/orcasong_function.PNG
+   :height: 400px
+
+Basic Use
+---------
+
+Import the main class, the FileBinner (see
+:py:class:`orcasong.core.FileBinner`),
+like this:
+
+.. code-block:: python
+
+    from orcasong.core import FileBinner
+
+The FileBinner allows to make nd histograms ("images") from h5-converted root files.
+To do this, you can pass a list defining the binning. E.g., the following would
+set up the file binner to generate zt data:
+
+.. code-block:: python
+
+    bin_edges_list = [
+        ["pos_z", np.linspace(0, 200, 11)],
+        ["time", np.linspace(-50, 550, 101)],
+    ]
+
+    fb = FileBinner(bin_edges_list)
+
+Calling the object like this will show you the binning:
+
+.. code-block:: python
+
+    >>> fb
+    <FileBinner: ('pos_z', 'time') (10, 100)>
+
+As you can see, the FileBinner will produce zt data, with 10 and 100 bins,
+respectively.
+Convert a file like this:
+
+.. code-block:: python
+
+    fb.run(infile, outfile)
+
+Or convert multiple files, which will all be saved in the given folder:
+
+.. code-block:: python
+
+    fb.run_multi(infiles, outfolder)
+
+Calibration
+-----------
+
+You can supply a detx file to the file binner, in order to
+calibrate the data on the fly:
+
+.. code-block:: python
+
+    fb = FileBinner(bin_edges_list, det_file="path/to/det_file.detx")
+
+
+Adding mc_info
+--------------
+
+To add info from the mc_tracks (or from anywhere in the blob), you can define some
+function ``my_mcinfo_extractor`` which takes as an input a km3pipe blob,
+and outputs a dict mapping str to float.
+
+This will be saved as a numpy structured array "y" in the output file, with
+the str being the dtype names. Set up like follows:
+
+.. code-block:: python
+
+    fb = FileBinner(bin_edges_list, mc_info_extr=my_mcinfo_extractor)
+
+
+Plotting binning statistics
+---------------------------
+
+After the binning has succeeded, you can generate a plot which shows the
+distribution of hits among the bins you defined. For this, call the following
+console command::
+
+    plot_binstats my_plotname.pdf file_1_binned.h5 file_2_binned.h5 ...
+
+This will plot the statistics for the files file_1_binned.h5, file_2_binned.h5, ...
+into the file my_plotname.pdf.
+
+Using existing binnings
+-----------------------
+
+You can use existing bin edges and mc info extractors from ``orcasong.bin_edges``
+and ``orcasong.mc_info_extr``. These were designed for specific detector layouts
+and productions, though, and might not work properly when used on other data.
diff --git a/docs/orcasong_2.rst b/docs/orcasong_2.rst
deleted file mode 100644
index 09a9d5226b4b48a856d82a4a9c22d36a32f5eefc..0000000000000000000000000000000000000000
--- a/docs/orcasong_2.rst
+++ /dev/null
@@ -1,78 +0,0 @@
-OrcaSong 2
-==========
-
-OrcaSong 2 is an alternative to orcasong, with (hopefully) more
-accessible features.
-It has a slightly reduced functionality (no plots), but apart from that
-does the same job as orcasong.
-
-Basic Use
----------
-
-Import the main class, the FileBinner (see
-:py:class:`orcasong_2.core.FileBinner`),
-like this:
-
-.. code-block:: python
-
-    from orcasong_2.core import FileBinner
-
-The FileBinner allows to make nd histograms ("images") from calibrated and
-h5-converted root files.
-To do this, you can pass a list defining the binning. E.g., the following would
-set up the file binner to generate zt data:
-
-.. code-block:: python
-
-    bin_edges_list = [
-        ["pos_z", np.linspace(0, 10, 11)],
-        ["time", np.linspace(-50, 550, 101)],
-    ]
-
-    fb = FileBinner(bin_edges_list)
-
-Calling the object like this will show you the binning:
-
-.. code-block:: python
-
-    >>> fb
-    <FileBinner: ('pos_z', 'time') (10, 100)>
-
-As you can see, the FileBinner will produce zt data, with 10 and 100 bins,
-respectively.
-Convert a file like this:
-
-.. code-block:: python
-
-    fb.run(infile, outfile)
-
-Or event this for multiple files, which will all be saved in the given folder:
-
-.. code-block:: python
-
-    fb.run_multi(infiles, outfolder)
-
-Calibration
------------
-You can supply a detx file to the file binner, in order to
-calibrate the data on the fly:
-
-.. code-block:: python
-
-    fb = FileBinner(bin_edges_list, det_file="path/to/det_file.detx")
-
-
-Adding mc_info
---------------
-
-To add info from the mc_tracks (or from wherever), you can define some
-function `my_mcinfo_extractor` which takes as an input a km3pipe blob,
-and outputs a dict mapping str to float.
-
-This will be saved as a numpy structured array "y" in the output file, with
-the str being the dtype names. Set up like follows:
-
-.. code-block:: python
-
-    fb = FileBinner(bin_edges_list, mc_info_extr=my_mcinfo_extractor)
-
diff --git a/examples/example_do2d_plots_output.pdf b/examples/example_do2d_plots_output.pdf
deleted file mode 100644
index 255c56e27176b2bc00e1502c67b44d0d204fab0b..0000000000000000000000000000000000000000
Binary files a/examples/example_do2d_plots_output.pdf and /dev/null differ
diff --git a/orcasong/tests/__init__.py b/legacy/__init__.py
similarity index 100%
rename from orcasong/tests/__init__.py
rename to legacy/__init__.py
diff --git a/orcasong/default_config.toml b/legacy/default_config.toml
similarity index 100%
rename from orcasong/default_config.toml
rename to legacy/default_config.toml
diff --git a/orcasong/file_to_hits.py b/legacy/file_to_hits.py
similarity index 100%
rename from orcasong/file_to_hits.py
rename to legacy/file_to_hits.py
diff --git a/orcasong/geo_binning.py b/legacy/geo_binning.py
similarity index 100%
rename from orcasong/geo_binning.py
rename to legacy/geo_binning.py
diff --git a/orcasong/hits_to_histograms.py b/legacy/hits_to_histograms.py
similarity index 100%
rename from orcasong/hits_to_histograms.py
rename to legacy/hits_to_histograms.py
diff --git a/orcasong/io.py b/legacy/io.py
similarity index 100%
rename from orcasong/io.py
rename to legacy/io.py
diff --git a/orcasong/make_nn_images.py b/legacy/make_nn_images.py
similarity index 94%
rename from orcasong/make_nn_images.py
rename to legacy/make_nn_images.py
index cca183b6a4cafb0e3f889756013d5f8ef3cfeab4..39cab955ebf70ca918ae930351f38b56df2a85e0 100644
--- a/orcasong/make_nn_images.py
+++ b/legacy/make_nn_images.py
@@ -34,6 +34,7 @@ __version__ = '1.0'
 __email__ = 'michael.m.moser@fau.de'
 __status__ = 'Prototype'
 
+import warnings
 import os
 import sys
 #from memory_profiler import profile # for memory profiling, call with @profile; myfunc()
@@ -45,11 +46,16 @@ from docopt import docopt
 mpl.use('Agg')
 from matplotlib.backends.backend_pdf import PdfPages
 
-from orcasong.file_to_hits import EventDataExtractor
-from orcasong.hits_to_histograms import HistogramMaker
-from orcasong.io import load_config, check_user_input, make_output_dirs
-from orcasong.geo_binning import calculate_bin_edges
-from orcasong.utils import get_file_particle_type, EventSkipper
+from legacy.file_to_hits import EventDataExtractor
+from legacy.hits_to_histograms import HistogramMaker
+from legacy.io import load_config, check_user_input, make_output_dirs
+from legacy.geo_binning import calculate_bin_edges
+from legacy.utils import get_file_particle_type, EventSkipper
+
+
+# TODO deprecated
+warnings.warn("The original Orcasong is deprecated, and is no longer supported. "
+              "Consider switching to the new orcasong.")
 
 
 def parse_input():
diff --git a/orcasong/utils.py b/legacy/utils.py
similarity index 100%
rename from orcasong/utils.py
rename to legacy/utils.py
diff --git a/orcasong/__init__.py b/orcasong/__init__.py
index c1bb8a7dde64793732725c05f5c7fbea903e21ab..58814dc0620379f8c6e7cf309ec3ec560e9c2c43 100644
--- a/orcasong/__init__.py
+++ b/orcasong/__init__.py
@@ -1 +1,3 @@
-from .__version__ import version
\ No newline at end of file
+from .__version__ import version
+
+__version__ = version
diff --git a/orcasong/__version__.py b/orcasong/__version__.py
index 6879d8da2ed22c3bb5bdaa208c53f071e660bf1d..ceb23da3a155cc2929fb00e82f7f3b00a2df33eb 100644
--- a/orcasong/__version__.py
+++ b/orcasong/__version__.py
@@ -19,7 +19,8 @@ try:
     version = get_version(root='..', relative_to=__file__)
 except LookupError:
     try:
-        with open(join(realpath(dirname(__file__)), "version.txt"), 'r') as fobj:
+        with open(join(realpath(dirname(__file__)), "version.txt"),
+                  'r') as fobj:
             version = fobj.read()
     except IOError:
         pass
diff --git a/orcasong/bin_edges.py b/orcasong/bin_edges.py
new file mode 100644
index 0000000000000000000000000000000000000000..41b828a65e5317c0af42d6529085c3944a871bdc
--- /dev/null
+++ b/orcasong/bin_edges.py
@@ -0,0 +1,29 @@
+"""
+Binnings used for some existing detector configurations.
+
+These are made for the specific given runs. They might not be
+applicable to other data, and could cause errors or produce unexpected
+results when used on data other then the specified.
+"""
+
+import numpy as np
+
+
+def get_edges_2017_ztc():
+    """
+    Designed for the 2017 runs with the one line detector.
+
+    Will produce (18, 100, 31) 3d data, with dimensions ztc.
+
+    Z binning: 9.45 meters each
+    Time binning: 6 ns each
+    Channel id binning: 1 DOM per bin
+
+    """
+    bin_edges_list = [
+        ["pos_z", np.linspace(26, 198, 18 + 1)],
+        ["time", np.linspace(-50, 550, 100 + 1)],
+        ["channel_id", np.linspace(-0.5, 30.5, 31 + 1)],
+    ]
+    return bin_edges_list
+
diff --git a/orcasong/core.py b/orcasong/core.py
new file mode 100644
index 0000000000000000000000000000000000000000..b60beb688117bc5b795439e976dff229b59dc496
--- /dev/null
+++ b/orcasong/core.py
@@ -0,0 +1,276 @@
+import os
+import h5py
+import km3pipe as kp
+import km3modules as km
+
+import orcasong
+import orcasong.modules as modules
+import orcasong.plotting.plot_binstats as plot_binstats
+from orcasong.mc_info_extr import get_mc_info_extr
+
+
+__author__ = 'Stefan Reck'
+
+
+class FileBinner:
+    """
+    For making binned images and mc_infos, which can be used for conv. nets.
+
+    Can also add statistics of the binning to the h5 files, which can
+    be plotted to show the distribution of hits among the bins and how
+    many hits were cut off.
+
+    Attributes
+    ----------
+    n_statusbar : int, optional
+        Print a statusbar every n blobs.
+    n_memory_observer : int, optional
+        Print memory usage every n blobs.
+    complib : str
+        Compression library used for saving the output to a .h5 file.
+        All PyTables compression filters are available, e.g. 'zlib',
+        'lzf', 'blosc', ... .
+    complevel : int
+        Compression level for the compression filter that is used for
+        saving the output to a .h5 file.
+    flush_frequency : int
+        After how many events the accumulated output should be flushed to
+        the harddisk.
+        A larger value leads to a faster orcasong execution,
+        but it increases the RAM usage as well.
+    bin_plot_freq : int or None
+        If int is given, defines after how many blobs data for an overview
+        histogram is extracted.
+        It shows the distribution of hits, the bin edges, and how many hits
+        were cut off for each field name in bin_edges_list.
+        It will be saved to the same path as the outfile in run.
+
+    """
+    def __init__(self,
+                 bin_edges_list,
+                 mc_info_extr=None,
+                 det_file=None,
+                 center_time=True,
+                 event_skipper=None,
+                 add_bin_stats=True,
+                 chunksize=32,
+                 keep_event_info=True,
+                 keep_mc_tracks=False,
+                 add_t0=False,):
+        """
+        Parameters
+        ----------
+        bin_edges_list : List
+            List with the names of the fields to bin, and the respective bin
+            edges, including the left- and right-most bin edge.
+            Example: For 10 bins in the z direction, and 100 bins in time:
+                bin_edges_list = [
+                    ["pos_z", np.linspace(0, 10, 11)],
+                    ["time", np.linspace(-50, 550, 101)],
+                ]
+            Some examples can be found in orcasong.bin_edges.
+        mc_info_extr : function, optional
+            Function that extracts desired mc_info from a blob, which is then
+            stored as the "y" datafield in the .h5 file.
+            The function takes the km3pipe blob as an input, and returns
+            a dict mapping str to floats.
+            Some examples can be found in orcasong.mc_info_extr.
+        det_file : str, optional
+            Path to a .detx detector geometry file, which can be used to
+            calibrate the hits.
+        center_time : bool
+            Subtract time of first triggered hit from all hit times. Will
+            also be done for McHits if they are in the blob [default: True].
+        event_skipper : func, optional
+            Function that takes the blob as an input, and returns a bool.
+            If the bool is true, the blob will be skipped.
+        add_bin_stats : bool
+            Add statistics of the binning to the output file. They can be
+            plotted with util/bin_stats_plot.py [default: True].
+        chunksize : int
+            Chunksize (along axis_0) used for saving the output
+            to a .h5 file [default: 32].
+        keep_event_info : bool
+            If True, will keep the "event_info" table [default: True].
+        keep_mc_tracks : bool
+            If True, will keep the "McTracks" table [default: False].
+        add_t0 : bool
+            If true, add t0 to the time of hits. If using a det_file,
+            this will already have been done automatically [default: False].
+
+        """
+        self.bin_edges_list = bin_edges_list
+        self.mc_info_extr = mc_info_extr
+        self.det_file = det_file
+        self.add_t0 = add_t0
+        self.center_time = center_time
+        self.event_skipper = event_skipper
+
+        self.keep_event_info = keep_event_info
+        self.keep_mc_tracks = keep_mc_tracks
+        self.chunksize = chunksize
+
+        if add_bin_stats:
+            self.bin_plot_freq = 1
+        else:
+            self.bin_plot_freq = None
+
+        self.n_statusbar = 1000
+        self.n_memory_observer = 1000
+        self.complib = 'zlib'
+        self.complevel = 1
+        self.flush_frequency = 1000
+
+    def run(self, infile, outfile=None, save_plot=False):
+        """
+        Generate images from the infile, and save them as the outfile.
+
+        Parameters
+        ----------
+        infile : str
+            Path to the input file.
+        outfile : str, optional
+            Path to the output file (will be created). If none is given,
+            will auto generate the name and save it in the cwd.
+        save_plot : bool
+            Save the binning hists as a pdf. Only possible if add_bin_stats
+            is True.
+
+        """
+        if save_plot and self.bin_plot_freq is None:
+            raise ValueError("Can not make plot when add_bin_stats is False")
+
+        name, shape = self.get_names_and_shape()
+        print("Generating {} images with shape {}".format(name, shape))
+
+        if outfile is None:
+            infile_basename = os.path.basename(infile)
+            outfile_name = os.path.splitext(infile_basename)[0] + "_binned.h5"
+            outfile = os.path.join(os.getcwd(), outfile_name)
+
+        pipe = self.build_pipe(infile, outfile)
+        smry = pipe.drain()
+
+        if self.bin_plot_freq is not None:
+            hists = smry["BinningStatsMaker"]
+            plot_binstats.add_hists_to_h5file(hists, outfile)
+
+            if save_plot:
+                save_as = os.path.splitext(outfile)[0] + "_hists.pdf"
+                plot_binstats.plot_hists(hists, save_as)
+
+        add_version_info(outfile)
+
+    def run_multi(self, infiles, outfolder, save_plot=False):
+        """
+        Bin multiple files into their own output files each.
+        The output file names will be generated automatically.
+
+        Parameters
+        ----------
+        infiles : List
+            The path to infiles as str.
+        outfolder : str
+            The output folder to place them in.
+        save_plot : bool
+            Save the binning hists as a pdf. Only possible if add_bin_stats
+            is True.
+
+        """
+        if save_plot and self.bin_plot_freq is None:
+            raise ValueError("Can not make plot when add_bin_stats is False")
+
+        outfiles = []
+        for infile in infiles:
+            outfile_name = os.path.splitext(os.path.basename(infile))[0] \
+                           + "_hist.h5"
+            outfile = os.path.join(outfolder, outfile_name)
+            outfiles.append(outfile)
+
+            self.run(infile, outfile, save_plot=False)
+
+        if save_plot:
+            plot_binstats.plot_hist_of_files(
+                files=outfiles, save_as=outfolder+"binning_hist.pdf")
+
+    def build_pipe(self, infile, outfile):
+        """
+        Build the pipeline to generate images and mc_info for a file.
+        """
+        pipe = kp.Pipeline()
+
+        if self.n_statusbar is not None:
+            pipe.attach(km.common.StatusBar, every=self.n_statusbar)
+        if self.n_memory_observer is not None:
+            pipe.attach(km.common.MemoryObserver, every=self.n_memory_observer)
+
+        pipe.attach(kp.io.hdf5.HDF5Pump, filename=infile)
+        pipe.attach(km.common.Keep, keys=['EventInfo', 'Header', 'RawHeader',
+                                          'McTracks', 'Hits', 'McHits'])
+
+        if self.det_file:
+            pipe.attach(modules.DetApplier, det_file=self.det_file)
+
+        if self.center_time or self.add_t0:
+            pipe.attach(modules.TimePreproc,
+                        add_t0=self.add_t0,
+                        center_time=self.center_time)
+
+        if self.event_skipper is not None:
+            pipe.attach(modules.EventSkipper, event_skipper=self.event_skipper)
+
+        if self.bin_plot_freq is not None:
+            pipe.attach(modules.BinningStatsMaker,
+                        bin_plot_freq=self.bin_plot_freq,
+                        bin_edges_list=self.bin_edges_list)
+
+        pipe.attach(modules.ImageMaker,
+                    bin_edges_list=self.bin_edges_list,
+                    store_as="histogram")
+
+        if self.mc_info_extr is not None:
+            if isinstance(self.mc_info_extr, str):
+                mc_info_extr = get_mc_info_extr(self.mc_info_extr)
+            else:
+                mc_info_extr = self.mc_info_extr
+
+            pipe.attach(modules.McInfoMaker,
+                        mc_info_extr=mc_info_extr,
+                        store_as="mc_info")
+
+        keys_keep = ['histogram', 'mc_info']
+        if self.keep_event_info:
+            keys_keep.append('EventInfo')
+        if self.keep_mc_tracks:
+            keys_keep.append('McTracks')
+        pipe.attach(km.common.Keep, keys=keys_keep)
+
+        pipe.attach(kp.io.HDF5Sink,
+                    filename=outfile,
+                    complib=self.complib,
+                    complevel=self.complevel,
+                    chunksize=self.chunksize,
+                    flush_frequency=self.flush_frequency)
+        return pipe
+
+    def get_names_and_shape(self):
+        """
+        Get names and shape of the resulting x data,
+        e.g. (pos_z, time), (18, 50).
+        """
+        names, shape = [], []
+        for bin_name, bin_edges in self.bin_edges_list:
+            names.append(bin_name)
+            shape.append(len(bin_edges) - 1)
+
+        return tuple(names), tuple(shape)
+
+    def __repr__(self):
+        name, shape = self.get_names_and_shape()
+        return "<FileBinner: {} {}>".format(name, shape)
+
+
+def add_version_info(file):
+    """ Add current orcasong version to h5 file. """
+    with h5py.File(file, "a") as f:
+        f.attrs.create("orcasong", orcasong.__version__, dtype="S6")
diff --git a/orcasong_2/mc_info_types.py b/orcasong/mc_info_extr.py
similarity index 82%
rename from orcasong_2/mc_info_types.py
rename to orcasong/mc_info_extr.py
index 5132f94a76fee188441987bb552cf8050f0fce77..b9737864faade8c899e9500276e8fc2680990438 100644
--- a/orcasong_2/mc_info_types.py
+++ b/orcasong/mc_info_extr.py
@@ -2,8 +2,9 @@
 Functions that extract info from a blob for the mc_info / y datafield
 in the h5 files.
 
-These are examples made for the specific given runs. They might not be
-applicable to other data.
+These are made for the specific given runs. They might not be
+applicable to other data, and could cause errors or produce unexpected
+results when used on data other then the specified.
 
 """
 
@@ -25,47 +26,58 @@ def get_mc_info_extr(mc_info_extr):
 
     """
     if mc_info_extr == "mupage":
+        funct = "get_mupage_mc"
         mc_info_extr = get_mupage_mc
 
     elif mc_info_extr == "real_data":
-        mc_info_extr = get_data_info
+        funct = "get_real_data"
+        mc_info_extr = get_real_data
 
     elif mc_info_extr == "random_noise":
-        mc_info_extr = get_rn_mc
+        funct = "get_pure_noise"
+        mc_info_extr = get_pure_noise
 
     else:
         raise NameError("Unknown mc_info_type " + mc_info_extr)
 
+    # TODO deprecated
+    wrng = "The use of a str for mc_info_extr is deprecated. Import the " \
+           "function {} from orcasong.mc_info_types instead, and use this" \
+           "as mc_info_extr".format(funct)
+    warnings.warn(wrng)
+
     return mc_info_extr
 
 
-def get_data_info(blob):
+def get_real_data(blob):
     """
-    Get info present for real data, e.g.
-    for the 2017 one line real data.
+    Get info present in real data.
+    Designed for the 2017 one line runs.
 
     """
     event_info = blob['EventInfo']
 
     track = {
-        'event_id': event_info.event_id,  # was .event_id[0] up to km3pipe 8.16.0
+        'event_id': event_info.event_id,
+        # was .event_id[0] up to km3pipe 8.16.0
         'run_id': event_info.run_id,
         'trigger_mask': event_info.trigger_mask,
     }
     return track
 
 
-def get_rn_mc(blob):
+def get_pure_noise(blob):
     """
-    For random noise, which has particle_type 0.
+    For simulated pure noise events, which have particle_type 0.
+
     """
-    event_id = blob['EventInfo'].event_id[0]
-    run_id = blob["EventInfo"].run_id
-    particle_type = 0
+    event_info = blob['EventInfo']
 
-    track = {'event_id': event_id,
-             'run_id': run_id,
-             'particle_type': particle_type}
+    track = {
+        'event_id': event_info.event_id[0],
+        'run_id': event_info.run_id,
+        'particle_type': 0
+    }
     return track
 
 
@@ -76,6 +88,7 @@ def get_mupage_mc(blob):
     Will only take into account muons with at least 1 McHit in the active
     line of the detector.
 
+    Designed for the 2017 run by run mupage simulations.
     e.g. mcv5.1_r3.mupage_10G.km3_AAv1.jterbr00002800.5103.root.h5
 
     Parameters
diff --git a/orcasong_2/modules.py b/orcasong/modules.py
similarity index 77%
rename from orcasong_2/modules.py
rename to orcasong/modules.py
index 9a6b64feba3b549760c2b9daccee1b233ef47842..2829f44fb51b4fa2f75cc5740077d41472c414e3 100644
--- a/orcasong_2/modules.py
+++ b/orcasong/modules.py
@@ -22,6 +22,7 @@ class McInfoMaker(kp.Module):
         Store the mcinfo with this name in the blob.
 
     """
+
     def configure(self):
         self.mc_info_extr = self.require('mc_info_extr')
         self.store_as = self.require('store_as')
@@ -29,10 +30,8 @@ class McInfoMaker(kp.Module):
     def process(self, blob):
         track = self.mc_info_extr(blob)
         dtypes = [(key, np.float64) for key in track.keys()]
-        kp_hist = kp.dataclasses.Table(track,
-                                       dtype=dtypes,
-                                       h5loc='y',
-                                       name='event_info')
+        kp_hist = kp.dataclasses.Table(
+            track, dtype=dtypes,  h5loc='y', name='event_info')
 
         blob[self.store_as] = kp_hist
         return blob
@@ -54,6 +53,7 @@ class TimePreproc(kp.Module):
         If true, center hit and mchit times.
 
     """
+
     def configure(self):
         self.add_t0 = self.require('add_t0')
         self.center_time = self.get('center_time', default=True)
@@ -77,9 +77,11 @@ class TimePreproc(kp.Module):
         if not self._t0_flag:
             self._t0_flag = True
             print("Adding t0 to hit times")
-        hits_time = blob["Hits"].time
-        hits_t0 = blob["Hits"].t0
-        blob["Hits"].time = np.add(hits_time, hits_t0)
+        blob["Hits"].time = np.add(blob["Hits"].time, blob["Hits"].t0)
+
+        if self.has_mchits:
+            blob["McHits"].time = np.add(blob["McHits"].time,
+                                         blob["McHits"].t0)
 
         return blob
 
@@ -106,7 +108,7 @@ class TimePreproc(kp.Module):
 
 class ImageMaker(kp.Module):
     """
-    Make a n-d histogram from the blob.
+    Make a n-d histogram from "Hits" in blob, and store it.
 
     Attributes
     ----------
@@ -117,6 +119,7 @@ class ImageMaker(kp.Module):
         Store the images with this name in the blob.
 
     """
+
     def configure(self):
         self.bin_edges_list = self.require('bin_edges_list')
         self.store_as = self.require('store_as')
@@ -133,7 +136,8 @@ class ImageMaker(kp.Module):
         title = name + "event_images"
 
         hist_one_event = histogram[np.newaxis, ...].astype(np.uint8)
-        kp_hist = kp.dataclasses.NDArray(hist_one_event, h5loc='x', title=title)
+        kp_hist = kp.dataclasses.NDArray(
+            hist_one_event, h5loc='x', title=title)
 
         blob[self.store_as] = kp_hist
         return blob
@@ -166,16 +170,17 @@ class BinningStatsMaker(kp.Module):
         for the time binning (field name "time").
 
     """
+
     def configure(self):
         self.bin_edges_list = self.require('bin_edges_list')
 
         self.pdf_path = self.get('pdf_path', default=None)
         self.bin_plot_freq = self.get("bin_plot_freq", default=1)
         self.res_increase = self.get('res_increase', default=5)
-        self.plot_bin_edges = self.get('plot_bin_edges', default=True)
 
         self.hists = {}
         for bin_name, org_bin_edges in self.bin_edges_list:
+            # dont space bin edges for time
             if bin_name == "time":
                 bin_edges = org_bin_edges
             else:
@@ -196,8 +201,8 @@ class BinningStatsMaker(kp.Module):
         Increase resolution of given binning.
         """
         increased_n_bins = (len(bin_edges) - 1) * self.res_increase + 1
-        bin_edges = np.linspace(bin_edges[0], bin_edges[-1],
-                                increased_n_bins)
+        bin_edges = np.linspace(
+            bin_edges[0], bin_edges[-1], increased_n_bins)
 
         return bin_edges
 
@@ -209,12 +214,17 @@ class BinningStatsMaker(kp.Module):
             for bin_name, hists_data in self.hists.items():
                 hist_bin_edges = hists_data["hist_bin_edges"]
 
-                data = blob["Hits"][bin_name]
-                hist = np.histogram(data, bins=hist_bin_edges)[0]
-
+                hits = blob["Hits"]
+                data = hits[bin_name]
+                # get how much is cut off due to these limits
                 out_pos = data[data > np.max(hist_bin_edges)].size
                 out_neg = data[data < np.min(hist_bin_edges)].size
 
+                # get all hits which are not cut off by other bin edges
+                data = hits[bin_name][self._is_in_limits(
+                    hits, excluded=bin_name)]
+                hist = np.histogram(data, bins=hist_bin_edges)[0]
+
                 self.hists[bin_name]["hist"] += hist
                 self.hists[bin_name]["cut_off"] += np.array([out_neg, out_pos])
 
@@ -238,6 +248,21 @@ class BinningStatsMaker(kp.Module):
         """
         return self.hists
 
+    def _is_in_limits(self, hits, excluded=None):
+        """ Get which hits are in the limits defined by ALL bin edges
+        (except for given one). """
+        inside = None
+        for dfield, edges in self.bin_edges_list:
+            if dfield == excluded:
+                continue
+            is_in = np.logical_and(hits[dfield] >= min(edges),
+                                   hits[dfield] <= max(edges))
+            if inside is None:
+                inside = is_in
+            else:
+                inside = np.logical_and(inside, is_in)
+        return inside
+
 
 class EventSkipper(kp.Module):
     """
@@ -250,6 +275,7 @@ class EventSkipper(kp.Module):
         If the bool is true, the blob will be skipped.
 
     """
+
     def configure(self):
         self.event_skipper = self.require('event_skipper')
 
@@ -271,29 +297,39 @@ class DetApplier(kp.Module):
         Path to a .detx detector geometry file.
 
     """
+
     def configure(self):
         self.det_file = self.require("det_file")
-        self.assert_t0_is_added = self.get("check_t0", default=False)
 
         self.calib = kp.calib.Calibration(filename=self.det_file)
+        self._calib_checked = False
+
+        # for debugging
+        self._assert_t0_is_added = False
 
     def process(self, blob):
-        if self.assert_t0_is_added:
-            original_time = blob["Hits"].time
+        if self._calib_checked is False:
+            if "pos_x" in blob["Hits"]:
+                warnings.warn("Warning: Using a det file, but pos_x in Hits "
+                              " detected. Is the file already "
+                              "calibrated? This might lead to errors with t0.")
+            self._calib_checked = True
+
+        # original_time = blob["Hits"].time
 
         blob = self.calib.process(blob, key="Hits", outkey="Hits")
         if "McHits" in blob:
             blob = self.calib.process(blob, key="McHits", outkey="McHits")
 
-        if self.assert_t0_is_added:
-            actual_time = blob["Hits"].time
-            t0 = blob["Hits"].t0
-            target_time = np.add(original_time, t0)
-            if not np.array_equal(actual_time, target_time):
-                print(actual_time)
-                print(target_time)
-                raise AssertionError("t0 not added!")
-            else:
-                print("t0 was added ok")
-
+        """
+        actual_time = blob["Hits"].time
+        t0 = blob["Hits"].t0
+        target_time = np.add(original_time, t0)
+        if not np.array_equal(actual_time, target_time):
+            print(actual_time)
+            print(target_time)
+            raise AssertionError("t0 not added!")
+        else:
+            print("t0 was added ok")
+        """
         return blob
diff --git a/orcasong_2/util/__init__.py b/orcasong/plotting/__init__.py
similarity index 100%
rename from orcasong_2/util/__init__.py
rename to orcasong/plotting/__init__.py
diff --git a/orcasong_2/util/binning_1d_visualizer.py b/orcasong/plotting/binning_1d_visualizer.py
similarity index 99%
rename from orcasong_2/util/binning_1d_visualizer.py
rename to orcasong/plotting/binning_1d_visualizer.py
index 1cc697d4764803ff9f3ceb607834686e10b174f1..07ece8c030458e0d9bfa37c3347dbe781fbc3d90 100644
--- a/orcasong_2/util/binning_1d_visualizer.py
+++ b/orcasong/plotting/binning_1d_visualizer.py
@@ -14,7 +14,7 @@ import numpy as np
 import km3pipe as kp
 import matplotlib.pyplot as plt
 
-from orcasong_2.modules import TimePreproc, DetApplier
+from orcasong.modules import TimePreproc, DetApplier
 
 __author__ = 'Stefan Reck'
 
diff --git a/orcasong_2/util/bin_stats_plot.py b/orcasong/plotting/plot_binstats.py
similarity index 86%
rename from orcasong_2/util/bin_stats_plot.py
rename to orcasong/plotting/plot_binstats.py
index 4c4edbcb9c934fd34db4d54faca8dd8bf825ffd8..425fa7840d99180c3f9a3a6a8b85b6478078dff4 100644
--- a/orcasong_2/util/bin_stats_plot.py
+++ b/orcasong/plotting/plot_binstats.py
@@ -3,13 +3,14 @@ Run with a parser to plot the binning statistics.
 Functions for plotting the bin stats made by the BinningStatsMaker module.
 """
 
+import os
+import warnings
+import argparse
 import matplotlib.pyplot as plt
 from matplotlib.backends.backend_pdf import PdfPages
 import h5py
 import numpy as np
-import argparse
-import os
-import warnings
+
 
 __author__ = 'Stefan Reck'
 
@@ -168,29 +169,22 @@ def plot_hist_of_files(save_as, files=None):
         Path of files to use instead.
 
     """
+    if files is None:
+        files = get_all_h5_files()
+
     hists_list = []
     opened_files = []
-
-    if not files:
-        all_files = os.listdir(os.getcwd())
-        files = []
-        for file in all_files:
-            if file.endswith(".h5"):
-                files.append(file)
+    print("Plotting stats of {} file(s)".format(len(files)))
 
     try:
-        print("Plotting stats of {} files".format(len(files)))
-        for i, file in enumerate(files):
-            if i % 100 == 0:
-                print("File {}..." .format(i))
-
+        print("Opening files...")
+        for file in files:
             f = h5py.File(file, "r")
             if "bin_stats/" not in f:
                 warnings.warn("ERROR: File {} does not have bin_stats dataset. "
                               "Skipping ...".format(file))
                 f.close()
                 continue
-
             hists_list.append(f["bin_stats/"])
             opened_files.append(f)
 
@@ -203,21 +197,32 @@ def plot_hist_of_files(save_as, files=None):
             file.close()
 
 
+def get_all_h5_files():
+    """ Get a list of all h5 files in the cwd. """
+    files = []
+    for file in os.listdir(os.getcwd()):
+        if file.endswith(".h5"):
+            files.append(file)
+    return files
+
+
 def main():
     parser = argparse.ArgumentParser(
-        description='Plot the bin stats in h5 files. Navigate to the folder '
-                    'where the h5 files are, and then run this script.')
+        description='Generate a plot with statistics of the binning. '
+                    'Can only be used on files generated with the FileBinner when '
+                    'add_bin_stats was set to true (default). ')
 
     parser.add_argument('save_as', type=str, nargs="?",
                         default="bin_stats_plot.pdf",
-                        help='Overwrite the default path or filename where'
-                             'this gets saved to.')
+                        help='Filename of the plot. Default: '
+                             'bin_stats_plot.pdf.')
 
-    parser.add_argument('files', type=str, nargs='*', default=None,
-                        help='List of files to plot. Default: ls.')
+    parser.add_argument('file', type=str, nargs='*', default=None,
+                        help='File(s) to plot. Default: Plot for all h5 '
+                             'files in current dir.')
 
     args = parser.parse_args()
-    plot_hist_of_files(args.save_as, args.files)
+    plot_hist_of_files(args.save_as, args.file)
 
 
 if __name__ == "__main__":
diff --git a/orcasong_2/Readme.rst b/orcasong_2/Readme.rst
deleted file mode 100644
index b824164f3b1bcddddb810408a0aa982a94caa47c..0000000000000000000000000000000000000000
--- a/orcasong_2/Readme.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-OrcaSong 2
-==========
-
-Several changes to the original OrcaSong. Allows to set desired binning via
-a list.
-Does not contain all features of OrcaSong, like getting mchits, plotting, etc.
\ No newline at end of file
diff --git a/orcasong_2/core.py b/orcasong_2/core.py
index 82154833f99d316bf4a915156dde8723f89ca999..7a1ed8a7959a9a4cfcb950c8863123a8b920fe05 100644
--- a/orcasong_2/core.py
+++ b/orcasong_2/core.py
@@ -1,261 +1,8 @@
-import os
-import km3pipe as kp
-import km3modules as km
-
-import orcasong_2.modules as modules
-import orcasong_2.util.bin_stats_plot as bs_plot
-from orcasong_2.mc_info_types import get_mc_info_extr
-
-
-__author__ = 'Stefan Reck'
-
-
-class FileBinner:
-    """
-    For making binned images and mc_infos, which can be used for conv. nets.
-
-    Can also add statistics of the binning to the h5 files, which can
-    be plotted to show the distribution of hits among the bins and how
-    many hits were cut off.
-
-    Attributes
-    ----------
-    bin_plot_freq : int or None
-        If int is given, defines after how many blobs data for an overview
-        histogram is extracted.
-        It shows the distribution of hits, the bin edges, and how many hits
-        were cut off for each field name in bin_edges_list.
-        It will be saved to the same path as the outfile in run.
-    keep_event_info : bool
-        If True, will keep the "event_info" table.
-    keep_mc_tracks : bool
-        If True, will keep the "McTracks" table.
-    n_statusbar : int, optional
-        Print a statusbar every n blobs.
-    n_memory_observer : int, optional
-        Print memory usage every n blobs.
-    chunksize : int
-        Chunksize (along axis_0) used for saving the output to a .h5 file.
-    complib : str
-        Compression library used for saving the output to a .h5 file.
-        All PyTables compression filters are available, e.g. 'zlib',
-        'lzf', 'blosc', ... .
-    complevel : int
-        Compression level for the compression filter that is used for
-        saving the output to a .h5 file.
-    flush_frequency : int
-        After how many events the accumulated output should be flushed to
-        the harddisk.
-        A larger value leads to a faster orcasong execution,
-        but it increases the RAM usage as well.
-
-    """
-    def __init__(self,
-                 bin_edges_list,
-                 mc_info_extr=None,
-                 det_file=None,
-                 add_t0=False,
-                 center_time=True,
-                 event_skipper=None,
-                 add_bin_stats=True):
-        """
-        Parameters
-        ----------
-        bin_edges_list : List
-            List with the names of the fields to bin, and the respective bin
-            edges, including the left- and right-most bin edge.
-            Example: For 10 bins in the z direction, and 100 bins in time:
-                bin_edges_list = [
-                    ["pos_z", np.linspace(0, 10, 11)],
-                    ["time", np.linspace(-50, 550, 101)],
-                ]
-        mc_info_extr : function or string, optional
-            Function that extracts desired mc_info from a blob, which is then
-            stored as the "y" datafield in the .h5 file.
-            Can also give a str identifier for an existing extractor.
-        det_file : str, optional
-            Path to a .detx detector geometry file, which can be used to
-            calibrate the hits.
-        add_t0 : bool
-            If true, add t0 to the time of hits. If using a det_file,
-            this will already have been done automatically.
-        center_time : bool
-            Subtract time of first triggered hit from all hit times.
-            Will also be done for McHits if they are in the blob.
-        event_skipper : func, optional
-            Function that takes the blob as an input, and returns a bool.
-            If the bool is true, the blob will be skipped.
-        add_bin_stats : bool
-            Add statistics of the binning to the output file. They can be
-            plotted with util/bin_stats_plot.py.
-
-        """
-        self.bin_edges_list = bin_edges_list
-        self.mc_info_extr = mc_info_extr
-        self.det_file = det_file
-        self.add_t0 = add_t0
-        self.center_time = center_time
-        self.event_skipper = event_skipper
-
-        if add_bin_stats:
-            self.bin_plot_freq = 1
-        else:
-            self.bin_plot_freq = None
-
-        self.keep_event_info = True
-        self.keep_mc_tracks = False
-
-        self.n_statusbar = 1000
-        self.n_memory_observer = 1000
-        self.chunksize = 32
-        self.complib = 'zlib'
-        self.complevel = 1
-        self.flush_frequency = 1000
-
-    def run(self, infile, outfile=None, save_plot=False):
-        """
-        Make images for a file.
-
-        Parameters
-        ----------
-        infile : str
-            Path to the input file.
-        outfile : str, optional
-            Path to the output file (will be created). If none is given,
-            will auto generate the name and save it in the cwd.
-        save_plot : bool
-            Save the binning hists as a pdf. Only possible if add_bin_stats
-            is True.
-
-        """
-        if save_plot and self.bin_plot_freq is None:
-            raise ValueError("Can not make plot when add_bin_stats is False")
-
-        name, shape = self.get_names_and_shape()
-        print("Generating {} images with shape {}".format(name, shape))
-
-        if outfile is None:
-            infile_basename = os.path.basename(infile)
-            outfile_name = os.path.splitext(infile_basename)[0] + "_binned.h5"
-            outfile = os.path.join(os.getcwd(), outfile_name)
-
-        pipe = self.build_pipe(infile, outfile)
-        smry = pipe.drain()
-
-        if self.bin_plot_freq is not None:
-            hists = smry["BinningStatsMaker"]
-            bs_plot.add_hists_to_h5file(hists, outfile)
-
-            if save_plot:
-                save_as = os.path.splitext(outfile)[0] + "_hists.pdf"
-                bs_plot.plot_hists(hists, save_as)
-
-    def run_multi(self, infiles, outfolder, save_plot=False):
-        """
-        Bin multiple files into their own output files each.
-
-        Parameters
-        ----------
-        infiles : List
-            The path to infiles as str.
-        outfolder : str
-            The output folder to place them in. The output file name will
-            be generated automatically.
-        save_plot : bool
-            Save the binning hists as a pdf. Only possible if add_bin_stats
-            is True.
-
-        """
-        if save_plot and self.bin_plot_freq is None:
-            raise ValueError("Can not make plot when add_bin_stats is False")
-
-        outfiles = []
-        for infile in infiles:
-            outfile_name = os.path.splitext(os.path.basename(infile))[0] \
-                           + "_hist.h5"
-            outfile = os.path.join(outfolder, outfile_name)
-            outfiles.append(outfile)
-
-            self.run(infile, outfile, save_plot=False)
-
-        if save_plot:
-            bs_plot.plot_hist_of_files(files=outfiles,
-                                       save_as=outfolder+"binning_hist.pdf")
-
-    def build_pipe(self, infile, outfile):
-        """
-        Build the pipeline to generate images and mc_info for a file.
-        """
-
-        pipe = kp.Pipeline()
-
-        if self.n_statusbar is not None:
-            pipe.attach(km.common.StatusBar, every=self.n_statusbar)
-        if self.n_memory_observer is not None:
-            pipe.attach(km.common.MemoryObserver, every=self.n_memory_observer)
-
-        pipe.attach(kp.io.hdf5.HDF5Pump, filename=infile)
-
-        pipe.attach(km.common.Keep, keys=['EventInfo', 'Header', 'RawHeader',
-                                          'McTracks', 'Hits', 'McHits'])
-
-        if self.det_file:
-            pipe.attach(modules.DetApplier, det_file=self.det_file)
-
-        if self.center_time or self.add_t0:
-            pipe.attach(modules.TimePreproc,
-                        add_t0=self.add_t0,
-                        center_time=self.center_time)
-
-        if self.event_skipper is not None:
-            pipe.attach(modules.EventSkipper, event_skipper=self.event_skipper)
-
-        if self.bin_plot_freq is not None:
-            pipe.attach(modules.BinningStatsMaker,
-                        bin_plot_freq=self.bin_plot_freq,
-                        bin_edges_list=self.bin_edges_list)
-
-        pipe.attach(modules.ImageMaker,
-                    bin_edges_list=self.bin_edges_list,
-                    store_as="histogram")
-
-        if self.mc_info_extr is not None:
-            if isinstance(self.mc_info_extr, str):
-                mc_info_extr = get_mc_info_extr(self.mc_info_extr)
-            else:
-                mc_info_extr = self.mc_info_extr
-
-            pipe.attach(modules.McInfoMaker,
-                        mc_info_extr=mc_info_extr,
-                        store_as="mc_info")
-
-        keys_keep = ['histogram', 'mc_info']
-        if self.keep_event_info:
-            keys_keep.append('EventInfo')
-        if self.keep_mc_tracks:
-            keys_keep.append('McTracks')
-        pipe.attach(km.common.Keep, keys=keys_keep)
-
-        pipe.attach(kp.io.HDF5Sink,
-                    filename=outfile,
-                    complib=self.complib,
-                    complevel=self.complevel,
-                    chunksize=self.chunksize,
-                    flush_frequency=self.flush_frequency)
-        return pipe
-
-    def get_names_and_shape(self):
-        """
-        Get names and shape of the resulting x data,
-        e.g. (pos_z, time), (18, 50).
-        """
-        names, shape = [], []
-        for bin_name, bin_edges in self.bin_edges_list:
-            names.append(bin_name)
-            shape.append(len(bin_edges) - 1)
-
-        return tuple(names), tuple(shape)
-
-    def __repr__(self):
-        name, shape = self.get_names_and_shape()
-        return "<FileBinner: {} {}>".format(name, shape)
+"""
+For backwards compatibility.
+"""
+import warnings
+from orcasong.core import FileBinner
+
+# TODO deprecated
+warnings.warn("orcasong_2 has been renamed to orcasong, please update your code.")
diff --git a/orcasong_contrib/data_tools/make_data_split/make_data_split.py b/orcasong_contrib/data_tools/make_data_split/make_data_split.py
index cce5f2fef4a523dfbf9e09e8503bb803d8e9808a..f278fa3b125e282c36158e43e3bac7544c323092 100644
--- a/orcasong_contrib/data_tools/make_data_split/make_data_split.py
+++ b/orcasong_contrib/data_tools/make_data_split/make_data_split.py
@@ -15,6 +15,8 @@ Options:
 
 """
 
+__author__ = 'Michael Moser'
+
 import os
 import toml
 import docopt
diff --git a/orcasong_contrib/data_tools/shuffle/shuffle_h5.py b/orcasong_contrib/data_tools/shuffle/shuffle_h5.py
index 6488408712e3443a60611ff92ef60eb41c42d9cc..9103dd8e5ff8b450cd43b3bdce685021c4785f26 100644
--- a/orcasong_contrib/data_tools/shuffle/shuffle_h5.py
+++ b/orcasong_contrib/data_tools/shuffle/shuffle_h5.py
@@ -22,7 +22,7 @@ import h5py
 import km3pipe as kp
 import km3modules as km
 from orcasong_contrib.data_tools.concatenate.concatenate_h5 import get_f_compression_and_chunking
-from orcasong_2.modules import EventSkipper
+from orcasong.modules import EventSkipper
 
 # from memory_profiler import profile # for memory profiling, call with @profile; myfunc()
 
diff --git a/orcasong_2/util/split_conc.py b/orcasong_contrib/data_tools/split_conc.py
similarity index 100%
rename from orcasong_2/util/split_conc.py
rename to orcasong_contrib/data_tools/split_conc.py
diff --git a/setup.py b/setup.py
index 1bc018980037c0ce14af5a0b1fa05fe06100ef95..15239ff826b799b7e35fcf65587a37d7daa3a536 100644
--- a/setup.py
+++ b/setup.py
@@ -1,16 +1,17 @@
 #!/usr/bin/env python
 from setuptools import setup, find_packages
-from pkg_resources import get_distribution, DistributionNotFound
+# from pkg_resources import get_distribution, DistributionNotFound
 
 with open('requirements.txt') as fobj:
     requirements = [l.strip() for l in fobj.readlines()]
 
 setup(
     name='orcasong',
-    description='Makes images for a NN based on the hit information of neutrino events in the neutrino telescope KM3NeT',
+    description='Makes images for a NN based on the hit information of neutrino '
+                'events in the neutrino telescope KM3NeT',
     url='https://git.km3net.de/ml/OrcaSong',
-    author='Michael Moser, Stefan Reck',
-    author_email='mmoser@km3net.de, michael.m.moser@fau.de, stefan.reck@fau.de',
+    author='Stefan Reck, Michael Moser',
+    author_email='stefan.reck@fau.de, mmoser@km3net.de, michael.m.moser@fau.de',
     license='AGPL',
     install_requires=requirements,
     packages=find_packages(),
@@ -25,12 +26,13 @@ setup(
     use_scm_version={'write_to': 'orcasong/version.txt',
                      'tag_regex': r'^(?P<prefix>v)?(?P<version>[^\+]+)(?P<suffix>.*)?$', },
 
-    entry_points={'console_scripts': ['make_nn_images=orcasong.make_nn_images:main',
-                                      'shuffle=orcasong_contrib.data_tools.shuffle.shuffle_h5:main',
-                                      'concatenate=orcasong_contrib.data_tools.concatenate.concatenate_h5:main',
-                                      'make_dsplit=orcasong_contrib.data_tools.make_data_split.make_data_split:main',
-                                      'plot_binstats=orcasong_2.util.bin_stats_plot:main']}
+    entry_points={'console_scripts': [
+        'make_nn_images=legacy.make_nn_images:main',
+        'shuffle=orcasong_contrib.data_tools.shuffle.shuffle_h5:main',
+        'concatenate=orcasong_contrib.data_tools.concatenate.concatenate_h5:main',
+        'make_dsplit=orcasong_contrib.data_tools.make_data_split.make_data_split:main',
+        'plot_binstats=orcasong.plotting.plot_binstats:main']}
 
 )
 
-__author__ = 'Michael Moser'
\ No newline at end of file
+__author__ = 'Stefan Reck, Michael Moser'
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/tests/test_bin_edges.py b/tests/test_bin_edges.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d3cd2853d6033648eba7367668613ce981c00e5
--- /dev/null
+++ b/tests/test_bin_edges.py
@@ -0,0 +1,21 @@
+import inspect
+from unittest import TestCase
+import orcasong.bin_edges
+from orcasong.core import FileBinner
+
+
+__author__ = 'Stefan Reck'
+
+
+class TestEdges(TestCase):
+    """
+    Just call all functions in the bin_edges module and see if they work
+    with the filebinner.
+    """
+    def test_them(self):
+        funcs = [memb[1] for memb in inspect.getmembers(orcasong.bin_edges)
+                 if inspect.isfunction(memb[1])]
+
+        for func in funcs:
+            fb = FileBinner(func())
+            fb.get_names_and_shape()
diff --git a/tests/test_modules.py b/tests/test_modules.py
new file mode 100644
index 0000000000000000000000000000000000000000..49605cb06e8cdbf209d25c74d71f7d61a53033ad
--- /dev/null
+++ b/tests/test_modules.py
@@ -0,0 +1,341 @@
+from unittest import TestCase
+import numpy as np
+import orcasong.modules as modules
+from km3pipe.dataclasses import Table
+
+
+__author__ = 'Stefan Reck'
+
+
+class TestModules(TestCase):
+    def test_mc_info_maker(self):
+        """ Test the mcinfo maker on some dummy data. """
+        def mc_info_extr(blob):
+            hits = blob["Hits"]
+            return {"dom_id_0": hits.dom_id[0],
+                    "time_2": hits.time[2]}
+
+        in_blob = {
+            "Hits": Table({
+                'dom_id': [2, 3, 3],
+                'channel_id': [0, 1, 2],
+                'time': [10.1, 11.2, 12.3]
+            })
+        }
+        module = modules.McInfoMaker(
+            mc_info_extr=mc_info_extr, store_as="test")
+        out_blob = module.process(in_blob)
+
+        self.assertSequenceEqual(list(out_blob.keys()), ["Hits", "test"])
+        self.assertSequenceEqual(list(out_blob["test"].dtype.names),
+                                 ('dom_id_0', 'time_2'))
+        np.testing.assert_array_equal(out_blob["test"]["dom_id_0"],
+                                      np.array([2, ]))
+        np.testing.assert_array_equal(out_blob["test"]["time_2"],
+                                      np.array([12.3, ]))
+
+    def test_event_skipper(self):
+        def event_skipper(blob):
+            return blob == 42
+
+        module = modules.EventSkipper(event_skipper=event_skipper)
+
+        self.assertEqual(module.process(42), None)
+        self.assertEqual(module.process(25), 25)
+
+
+class TestTimePreproc(TestCase):
+    def setUp(self):
+        self.in_blob = {
+            "Hits": Table({
+                'time': [1., 2., 3.],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            })
+        }
+
+        self.in_blob_mc = {
+            "Hits": Table({
+                'time': [1., 2., 3.],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            }),
+            "McHits": Table({
+                'time': [1., 2., 3.],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            })
+        }
+
+    def test_time_preproc_t0(self):
+        module = modules.TimePreproc(
+            add_t0=True, center_time=False)
+
+        target = {
+            "Hits": Table({
+                'time': [1.1, 2.2, 3.3],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            })
+        }
+
+        out_blob = module.process(self.in_blob)
+
+        self.assertSetEqual(set(out_blob.keys()), set(target.keys()))
+        np.testing.assert_array_equal(np.array(out_blob["Hits"]),
+                                      np.array(target["Hits"]))
+
+    def test_time_preproc_center(self):
+        module = modules.TimePreproc(
+            add_t0=False, center_time=True)
+
+        target = {
+            "Hits": Table({
+                'time': [-1., 0., 1.],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            })
+        }
+
+        out_blob = module.process(self.in_blob)
+
+        self.assertSetEqual(set(out_blob.keys()), set(target.keys()))
+        np.testing.assert_array_equal(np.array(out_blob["Hits"]),
+                                      np.array(target["Hits"]))
+
+    def test_time_preproc_t0_and_center(self):
+        module = modules.TimePreproc(
+            add_t0=True, center_time=True)
+
+        target = {
+            "Hits": Table({
+                'time': [-1.1, 0., 1.1],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            })
+        }
+
+        out_blob = module.process(self.in_blob)
+
+        self.assertSetEqual(set(out_blob.keys()), set(target.keys()))
+        np.testing.assert_array_almost_equal(
+            np.array(out_blob["Hits"].view("<f8")),
+            np.array(target["Hits"].view("<f8")))
+
+    def test_time_preproc_mchits_t0_and_center(self):
+        module = modules.TimePreproc(
+            add_t0=True, center_time=True)
+
+        target = {
+            "Hits": Table({
+                'time': [-1.1, 0., 1.1],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            }),
+            "McHits": Table({
+                'time': [-1.1, 0., 1.1],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            }),
+        }
+        out_blob = module.process(self.in_blob_mc)
+
+        self.assertSetEqual(set(out_blob.keys()), set(target.keys()))
+        np.testing.assert_array_almost_equal(
+            np.array(out_blob["McHits"].view("<f8")),
+            np.array(target["McHits"].view("<f8")))
+
+
+class TestImageMaker(TestCase):
+    def test_2d_xt_binning(self):
+        # (3 x 2) x-t binning
+        bin_edges_list = [
+            ["x", [3.5, 4.5, 5.5, 6.5]],
+            ["time", [0.5, 2, 3.5]]
+        ]
+
+        module = modules.ImageMaker(
+            bin_edges_list=bin_edges_list, store_as="histogram")
+        in_blob = {
+            "Hits": Table({
+                "x": [4, 5, 6],
+                'time': [1., 2., 3.],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            })
+        }
+
+        target = {
+            "Hits": Table({
+                "x": [4, 5, 6],
+                'time': [1., 2., 3.],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            }),
+            "histogram": np.array([[
+                [1, 0],
+                [0, 1],
+                [0, 1],
+            ]])
+        }
+
+        out_blob = module.process(in_blob)
+        self.assertSetEqual(set(out_blob.keys()), set(target.keys()))
+        np.testing.assert_array_almost_equal(
+            np.array(out_blob["Hits"].view("<f8")),
+            np.array(target["Hits"].view("<f8")))
+        np.testing.assert_array_almost_equal(
+            np.array(out_blob["histogram"]),
+            np.array(target["histogram"]))
+
+    def test_unknown_field(self):
+        # (3 x 2) x-t binning
+        bin_edges_list = [
+            ["aggg", [3.5, 4.5, 5.5, 6.5]],
+            ["time", [0.5, 2, 3.5]]
+        ]
+
+        module = modules.ImageMaker(
+            bin_edges_list=bin_edges_list, store_as="histogram")
+        in_blob = {
+            "Hits": Table({
+                "x": [4, 5, 6],
+                'time': [1., 2., 3.],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            })
+        }
+
+        with self.assertRaises(ValueError):
+            module.process(in_blob)
+
+    def test_1d_binning(self):
+        # (1, ) t binning
+        bin_edges_list = [
+            ["time", [2.5, 3.5]]
+        ]
+
+        module = modules.ImageMaker(
+            bin_edges_list=bin_edges_list, store_as="histogram")
+        in_blob = {
+            "Hits": Table({
+                'time': [1., 2., 3.],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            })
+        }
+
+        target = {
+            "Hits": Table({
+                'time': [1., 2., 3.],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            }),
+            "histogram": np.array([
+                [1, ],
+            ])
+        }
+
+        out_blob = module.process(in_blob)
+        self.assertSetEqual(set(out_blob.keys()), set(target.keys()))
+        np.testing.assert_array_almost_equal(
+            np.array(out_blob["Hits"].view("<f8")),
+            np.array(target["Hits"].view("<f8")))
+        np.testing.assert_array_almost_equal(
+            np.array(out_blob["histogram"]),
+            np.array(target["histogram"]))
+
+    def test_1d_binning_no_hits(self):
+        # (1, ) t binning
+        bin_edges_list = [
+            ["time", [3.5, 4.5]]
+        ]
+
+        module = modules.ImageMaker(
+            bin_edges_list=bin_edges_list, store_as="histogram")
+        in_blob = {
+            "Hits": Table({
+                'time': [1., 2., 3.],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            })
+        }
+
+        target = {
+            "Hits": Table({
+                'time': [1., 2., 3.],
+                "t0": [0.1, 0.2, 0.3],
+                "triggered": [0, 1, 1],
+            }),
+            "histogram": np.array([
+                [0, ],
+            ])
+        }
+
+        out_blob = module.process(in_blob)
+        self.assertSetEqual(set(out_blob.keys()), set(target.keys()))
+        np.testing.assert_array_almost_equal(
+            np.array(out_blob["Hits"].view("<f8")),
+            np.array(target["Hits"].view("<f8")))
+        np.testing.assert_array_almost_equal(
+            np.array(out_blob["histogram"]),
+            np.array(target["histogram"]))
+
+
+class TestBinningStatsMaker(TestCase):
+    def test_it(self):
+        # (3 x 2) x-t binning
+        bin_edges_list = [
+            ["x", [3.5, 4.5, 5.5, 6.5]],
+            ["time", [0.5, 2, 3.5]],
+            ["z", [1, 4]]
+        ]
+
+        in_blob = {
+            "Hits": Table({
+                "x": [4, 5, 6, 6],
+                'time': [1., 2., 3., 50],
+                "z": [0, 3, 4, 5],
+
+                "t0": [0.1, 0.2, 0.3, 0.4],
+                "triggered": [0, 1, 1, 1],
+            })
+        }
+
+        target = {
+            'x': {
+                'hist': np.array([0., 0., 0., 1., 0., 1.]),
+                'hist_bin_edges': np.array([3.5, 4., 4.5, 5., 5.5, 6., 6.5]),
+                'bin_edges': [3.5, 4.5, 5.5, 6.5],
+                'cut_off': np.array([0., 0.])
+            },
+            'time': {
+                'hist': np.array([0., 2.]),
+                'hist_bin_edges': [0.5, 2, 3.5],
+                'bin_edges': [0.5, 2, 3.5],
+                'cut_off': np.array([0., 1.])
+            },
+            'z': {
+                'hist': np.array([0., 2.]),
+                'hist_bin_edges': np.array([1., 2.5, 4.]),
+                'bin_edges': [1, 4],
+                'cut_off': np.array([1., 1.])
+            }
+        }
+
+        module = modules.BinningStatsMaker(
+            bin_edges_list=bin_edges_list, res_increase=2)
+        module.process(in_blob)
+        output = module.finish()
+        check_dicts_n_ray(output, target)
+
+
+def check_dicts_n_ray(a, b):
+    """ Check if dicts with dicts with ndarrays are equal. """
+    if set(a.keys()) != set(b.keys()):
+        raise KeyError("{} != {}".format(a.keys(), b.keys()))
+    for key in a.keys():
+        if set(a[key].keys()) != set(b[key].keys()):
+            raise KeyError("{} != {}".format(a[key].keys(), b[key].keys()))
+        for skey in a[key].keys():
+            np.testing.assert_array_almost_equal(a[key][skey], b[key][skey])