Skip to content
Snippets Groups Projects
Commit 6dea7902 authored by Stefan Reck's avatar Stefan Reck
Browse files

Breaking changes:

- orcasong is now called legacy
- orcasong_2 is now called orcasong

Other changes:
- Added one line 2017 bin edges to repo
- added unittests
- added versioning for files created with orcasong
- Expanded doc
- Added check to calib if calib has been done to file already
parent d0a4087e
No related branches found
Tags v2.0
No related merge requests found
Showing with 260 additions and 166 deletions
variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
stages:
- install
- test
- coverage
- doc
- release
before_script:
- apt-get update -qq && apt-get install -y -qq libhdf5-dev
- pip install -U pip setuptools wheel numpy
- pip install .
install-os:
image: docker.km3net.de/base/python:3
stage: install
cache:
paths:
- .cache/pip
- venv/
key: "$CI_COMMIT_REF_SLUG"
.virtualenv_template: &virtualenv_definition |
python -V
pip install virtualenv
virtualenv venv
source venv/bin/activate
make install
test:
image: docker.km3net.de/base/python:3.6
stage: test
script:
- pip install .
- *virtualenv_definition
- make test
coverage:
image: docker.km3net.de/base/python:3.6
stage: coverage
script:
- *virtualenv_definition
- "make test-cov|grep TOTAL| awk '{printf \"COVERAGE: %.2f%%\", (1-$3/$2)*100 }'"
coverage: '/COVERAGE:\s*([0-9]*\.[0-9]*%)/'
artifacts:
paths:
- reports/coverage
code-style:
image: docker.km3net.de/base/python:3.7
stage: test
script:
- *virtualenv_definition
- yapf -r -d -e "venv" .
allow_failure: true
pages:
image: docker.km3net.de/base/python:3
image: docker.km3net.de/base/python:3.6
stage: doc
script:
- *virtualenv_definition
- cd docs && make html
- mv _build/html/ ../public/
- cd .. && mv reports/coverage public/coverage
artifacts:
paths:
- public
cache: {}
only:
- tags
- master
pypi:
image: docker.km3net.de/base/python:3
image: docker.km3net.de/base/python:3.6
stage: release
cache: {}
script:
......@@ -37,4 +79,3 @@ pypi:
- twine upload dist/*
only:
- tags
Makefile 0 → 100644
PKGNAME=orcasong
ALLNAMES = $(PKGNAME)
ALLNAMES += orcasong_contrib
default: build
all: install
build:
@echo "No need to build anymore :)"
install:
pip install .
install-dev:
pip install -e .
clean:
python setup.py clean --all
rm -f -r build/
test:
py.test --junitxml=./reports/junit.xml -o junit_suite_name=$(PKGNAME) tests
test-cov:
py.test tests --cov $(ALLNAMES) --cov-report term-missing --cov-report xml:reports/coverage.xml --cov-report html:reports/coverage tests
flake8:
py.test --flake8
pep8: flake8
docstyle:
py.test --docstyle
lint:
py.test --pylint
dependencies:
pip install -Ur requirements.txt
.PHONY: yapf
yapf:
yapf -i -r $(PKGNAME)
yapf -i setup.py
.PHONY: all clean build install install-dev test test-nocov flake8 pep8 dependencies docstyle
## OrcaSong: Generating DL images based on KM3NeT data
OrcaSong: Generating DL images from KM3NeT data
===============================================
[![alt text][image_1]][hyperlink_1] [![alt text][image_2]][hyperlink_2]
.. image:: https://git.km3net.de/ml/OrcaSong/badges/master/build.svg
:target: https://git.km3net.de/ml/OrcaSong/pipelines
[hyperlink_1]: https://git.km3net.de/ml/OrcaSong/pipelines
[image_1]: https://git.km3net.de/ml/OrcaSong/badges/master/build.svg
.. image:: https://examples.pages.km3net.de/km3badges/docs-latest-brightgreen.svg
:target: https://ml.pages.km3net.de/OrcaSong
[hyperlink_2]: https://ml.pages.km3net.de/OrcaSong
[image_2]: https://examples.pages.km3net.de/km3badges/docs-latest-brightgreen.svg
The documentation for OrcaSong can be found at https://ml.pages.km3net.de/OrcaSong!
......@@ -19,3 +18,8 @@ This means that OrcaSong takes a datafile with (neutrino-) events and based on t
Currently, only simulations with a hdf5 data format are supported as an input.
These event 'images' are required for some Deep Learning machine learning algorithms, e.g. Convolutional Neural Networks.
OrcaSong can be installed via pip by running::
pip install orcasong
......@@ -24,7 +24,7 @@ necessary information which will help other people to understand the
situation.
Make a Fork of OrcaSong
----------------------
-----------------------
You create a fork (your full own copy of the
repository), change the code and when you are happy with the changes, you create
......
......@@ -12,20 +12,18 @@
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
from datetime import date
from pkg_resources import get_distribution
import orcasong
#sys.path.insert(0, os.path.abspath('.'))
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
project = "OrcaSong {}".format(orcasong.__version__)
copyright = u'{0}, Michael Moser'.format(date.today().year)
author = 'Michael Moser'
copyright = u'{0}, Stefan Reck, Michael Moser'.format(date.today().year)
author = 'Stefan Reck, Michael Moser'
# The full version, including alpha/beta/rc tags
release = get_distribution('orcasong').version
......@@ -33,7 +31,6 @@ release = get_distribution('orcasong').version
version = '.'.join(release.split('.')[:2])
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
......@@ -57,7 +54,7 @@ autosummary_generate = True
# Document Python Code
autoapi_type = 'python'
autoapi_dirs = ['../orcasong', '../orcasong_contrib', '../orcasong_2']
autoapi_dirs = ['../orcasong', '../orcasong_contrib']
autoapi_options = [
'members', 'undoc-members'
# , 'private-members', 'special-members'
......@@ -73,9 +70,9 @@ templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
#source_parsers = {
# source_parsers = {
# '.md': 'recommonmark.parser.CommonMarkParser',}
#source_suffix = ['.rst', '.md']
# source_suffix = ['.rst', '.md']
source_suffix = ['.rst']
# The master toctree document.
......@@ -169,7 +166,7 @@ latex_elements = {
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'OrcaSong.tex', 'OrcaSong Documentation',
'Michael Moser', 'manual'),
'Stefan Reck, Michael Moser', 'manual'),
]
......@@ -219,5 +216,7 @@ epub_exclude_files = ['search.html']
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = True
def setup(app):
app.add_stylesheet('_static/style.css')
\ No newline at end of file
app.add_stylesheet('_static/style.css')
Getting started with OrcaSong
=============================
Getting started
===============
.. contents:: :local:
Introduction
------------
On this page, you can find a step by step introduction into the usage of OrcaSong.
The guide starts with some exemplary root simulation files made with jpp and ends with hdf5 event 'images' that can be used for deep neural networks.
On this page, you can find a step by step introduction of how to prepare
root files for OrcaSong.
The guide starts with some exemplary root simulation files made with jpp and
ends with hdf5 files ready for the use with OrcaSong.
Preprocessing
-------------
......@@ -120,117 +122,22 @@ channel_id of a hit.
Calibrating the .h5 file
~~~~~~~~~~~~~~~~~~~~~~~~
In order to fix this, we can run another tool, :code:`calibrate`, that will add the pos_xyz information to the hdf5 datafile::
In order to fix this, the data needs to be calibrated.
This can be done in two ways: You can either:
- calibrate the files on the fly by providing the detx file to orcasong (recommended),
- or use a seperate tool from km3pipe called :code:`calibrate`, that will add the pos_xyz information to the hdf5 datafile.
While the first method is the recommended one in principal, the second one can be useful for determining the proper bin edges by looking
at single files. It can be used like this::
calibrate /sps/km3net/users/mmoser/det_files/orca_115strings_av23min20mhorizontal_18OMs_alt9mvertical_v1.detx testfile.h5
As you can see, you need a .detx geometry file for this "calibration". Typically, you can find the path of this detx
file on the wiki page of the simulation production that you are using. This calibration step is optional, since OrcaSong
can also do it on the fly, using a .detx file.
file on the wiki page of the simulation production that you are using.
At this point, we are now ready to start using OrcaSong for the generation of event images.
Usage of OrcaSong
-----------------
In order to use OrcaSong, you can just install it with :code:`pip`::
~/$: pip install orcasong
Before you can start to use OrcaSong, you need a .detx detector geometry file that corresponds to your input files.
OrcaSong is currently producing event "images" based on a 1 DOM / XYZ-bin assumption. This image generation is done
automatically, based on the number of bins (n_bins) for each dimension XYZ that you supply as an input and based on the
.detx file which contains the DOM positions.
If your .detx file is not contained in the OrcaSong/detx_files folder, please add it to the repository!
Currently, only the 115l ORCA 2016 detx file is available.
At this point, you're finally ready to use OrcaSong.
OrcaSong can be called from every directory by using the :code:`make_nn_images` command::
~/$: make_nn_images testfile.h5 geofile.detx configfile.toml
OrcaSong will then generate a hdf5 file with images that will be put in a "Results" folder at the path that
you've specified in the configfile current path.
Please checkout the default_config.toml file in the orcasong folder of the OrcaSong repo in order to get an idea about
the structure of the config files.
All available configuration options of OrcaSong can be found in /orcasong/default_config::
--- Documentation for every config parameter that is available ---
None arguments should be written as string: 'None'
Parameters
----------
output_dirpath : str
Full path to the directory, where the orcasong output should be stored.
chunksize : int
Chunksize (along axis_0) that is used for saving the OrcaSong output to a .h5 file.
complib : str
Compression library that is used for saving the OrcaSong output to a .h5 file.
All PyTables compression filters are available, e.g. 'zlib', 'lzf', 'blosc', ... .
complevel : int
Compression level for the compression filter that is used for saving the OrcaSong output to a .h5 file.
n_bins : tuple of int
Declares the number of bins that should be used for each dimension, e.g. (x,y,z,t).
The option should be written as string, e.g. '11,13,18,60'.
det_geo : str
Declares what detector geometry should be used for the binning. E.g. 'Orca_115l_23m_h_9m_v'.
do2d : bool
Declares if 2D histograms, 'images', should be created.
do2d_plots : bool
Declares if pdf visualizations of the 2D histograms should be created, cannot be called if do2d=False.
do2d_plots_n: int
After how many events the event loop will be stopped (making the 2d plots in do2d_plots takes long time).
do3d : bool
Declares if 3D histograms should be created.
do4d : bool
Declares if 4D histograms should be created.
do4d_mode : str
If do4d is True, what should be used as the 4th dim after xyz.
Currently, only 'time' and 'channel_id' are available.
prod_ident : int
Optional int identifier for the used mc production.
This is e.g. useful, if you use events from two different mc productions, e.g. the 1-5GeV & 3-100GeV Orca 2016 MC.
In this case, the events are not fully distinguishable with only the run_id and the event_id!
In order to keep a separation, an integer can be set in the event_track for all events, such that they stay distinguishable.
timecut_mode : str
Defines what timecut should be used in hits_to_histograms.py.
Currently available:
'timeslice_relative': Cuts out the central 30% of the snapshot. The value of timecut_timespan doesn't matter in this case.
'trigger_cluster': Cuts based on the mean of the triggered hits.
'None': No timecut. The value of timecut_timespan doesn't matter in this case.
timecut_timespan : str/None
Defines what timespan should be used if a timecut is applied. Only relevant for timecut_mode = 'trigger_cluster'.
Currently available:
'all': [-350ns, 850ns] -> 20ns / bin (if e.g. 60 timebins)
'tight-0': [-450ns, 500ns] -> 15.8ns / bin (if e.g. 60 timebins)
'tight-1': [-250ns, 500ns] -> 12.5ns / bin (if e.g. 60 timebins)
'tight-2': [-150ns, 200ns] -> 5.8ns / bin (if e.g. 60 timebins)
do_mc_hits : bool
Declares if hits (False, mc_hits + BG) or mc_hits (True) should be processed.
data_cut_triggered : bool
Cuts away hits that haven't been triggered.
data_cut_e_low : float
Cuts away events that have an energy lower than data_cut_e_low.
data_cut_e_high : float
Cuts away events that have an energy higher than data_cut_e_high.
data_cut_throw_away : float
Cuts away random events with a certain probability (1: 100%, 0: 0%).
flush_freq : int
After how many events the accumulated output should be flushed to the harddisk.
A larger value leads to a faster orcasong execution, but it increases the RAM usage as well.
--- Documentation for every config parameter that is available ---
If anything is still unclear after this introduction just tell me in the deep_learning channel on chat.km3net.de or
write me an email at michael.m.moser@fau.de, such that I can improve this guide!
See the page :ref:`orcasong_page` for instructions on how to use it.
......
docs/imgs/orcasong_function.PNG

58 KiB

......@@ -11,33 +11,20 @@
|vspace|
Welcome to OrcaSong's documentation!
====================================
.. include:: ../Readme.rst
.. image:: https://git.km3net.de/ml/OrcaSong/badges/master/build.svg
:target: https://git.km3net.de/ml/OrcaSong/pipelines
| OrcaSong is a part of the Deep Learning efforts for the neutrino telescope KM3NeT.
| Find more information about KM3NeT on http://www.km3net.org.
In this regard, OrcaSong is a project that produces KM3NeT event images based on the raw detector data.
This means that OrcaSong takes a datafile with (neutrino-) events and based on this data, it produces 2D/3D/4D 'images' (histograms).
Currently, only simulations with a hdf5 data format are supported as an input.
These event 'images' are required for some Deep Learning machine learning algorithms, e.g. Convolutional Neural Networks.
As of now, only ORCA detector simulations are supported, but ARCA geometries can be easily implemented as well.
The main code for generating the images is located in orcanet/make_nn_images.py.
.. toctree::
:hidden:
:titlesonly:
As of now, the documentation contains a small introduction to get started and and a complete API documentation.
Please feel free to contact me or just open an issue on Gitlab / Github if you have any suggestions.
self
.. toctree::
:maxdepth: 2
:caption: Contents:
getting_started
orcasong_2
orcasong
CONTRIBUTING
Source (Git) <https://git.km3net.de/ml/OrcaSong.git>
......
OrcaSong 2
==========
.. _orcasong_page:
OrcaSong 2 is an alternative to orcasong, with (hopefully) more
accessible features.
It has a slightly reduced functionality (no plots), but apart from that
does the same job as orcasong.
Producing images
================
The main functionality of OrcaSong is to generate multidimensional images
out of ORCA data.
.. image:: imgs/orcasong_function.PNG
:height: 400px
Basic Use
---------
Import the main class, the FileBinner (see
:py:class:`orcasong_2.core.FileBinner`),
:py:class:`orcasong.core.FileBinner`),
like this:
.. code-block:: python
from orcasong_2.core import FileBinner
from orcasong.core import FileBinner
The FileBinner allows to make nd histograms ("images") from calibrated and
h5-converted root files.
The FileBinner allows to make nd histograms ("images") from h5-converted root files.
To do this, you can pass a list defining the binning. E.g., the following would
set up the file binner to generate zt data:
.. code-block:: python
bin_edges_list = [
["pos_z", np.linspace(0, 10, 11)],
["pos_z", np.linspace(0, 200, 11)],
["time", np.linspace(-50, 550, 101)],
]
......@@ -46,7 +48,7 @@ Convert a file like this:
fb.run(infile, outfile)
Or event this for multiple files, which will all be saved in the given folder:
Or convert multiple files, which will all be saved in the given folder:
.. code-block:: python
......@@ -54,6 +56,7 @@ Or event this for multiple files, which will all be saved in the given folder:
Calibration
-----------
You can supply a detx file to the file binner, in order to
calibrate the data on the fly:
......@@ -65,8 +68,8 @@ calibrate the data on the fly:
Adding mc_info
--------------
To add info from the mc_tracks (or from wherever), you can define some
function `my_mcinfo_extractor` which takes as an input a km3pipe blob,
To add info from the mc_tracks (or from anywhere in the blob), you can define some
function ``my_mcinfo_extractor`` which takes as an input a km3pipe blob,
and outputs a dict mapping str to float.
This will be saved as a numpy structured array "y" in the output file, with
......@@ -76,3 +79,22 @@ the str being the dtype names. Set up like follows:
fb = FileBinner(bin_edges_list, mc_info_extr=my_mcinfo_extractor)
Plotting binning statistics
---------------------------
After the binning has succeeded, you can generate a plot which shows the
distribution of hits among the bins you defined. For this, call the following
console command::
plot_binstats my_plotname.pdf file_1_binned.h5 file_2_binned.h5 ...
This will plot the statistics for the files file_1_binned.h5, file_2_binned.h5, ...
into the file my_plotname.pdf.
Using existing binnings
-----------------------
You can use existing bin edges and mc info extractors from ``orcasong.bin_edges``
and ``orcasong.mc_info_extr``. These were designed for specific detector layouts
and productions, though, and might not work properly when used on other data.
File deleted
File moved
File moved
File moved
File moved
File moved
File moved
......@@ -34,6 +34,7 @@ __version__ = '1.0'
__email__ = 'michael.m.moser@fau.de'
__status__ = 'Prototype'
import warnings
import os
import sys
#from memory_profiler import profile # for memory profiling, call with @profile; myfunc()
......@@ -45,11 +46,16 @@ from docopt import docopt
mpl.use('Agg')
from matplotlib.backends.backend_pdf import PdfPages
from orcasong.file_to_hits import EventDataExtractor
from orcasong.hits_to_histograms import HistogramMaker
from orcasong.io import load_config, check_user_input, make_output_dirs
from orcasong.geo_binning import calculate_bin_edges
from orcasong.utils import get_file_particle_type, EventSkipper
from legacy.file_to_hits import EventDataExtractor
from legacy.hits_to_histograms import HistogramMaker
from legacy.io import load_config, check_user_input, make_output_dirs
from legacy.geo_binning import calculate_bin_edges
from legacy.utils import get_file_particle_type, EventSkipper
# TODO deprecated
warnings.warn("The original Orcasong is deprecated, and is no longer supported. "
"Consider switching to the new orcasong.")
def parse_input():
......
File moved
from .__version__ import version
\ No newline at end of file
from .__version__ import version
__version__ = version
......@@ -19,7 +19,8 @@ try:
version = get_version(root='..', relative_to=__file__)
except LookupError:
try:
with open(join(realpath(dirname(__file__)), "version.txt"), 'r') as fobj:
with open(join(realpath(dirname(__file__)), "version.txt"),
'r') as fobj:
version = fobj.read()
except IOError:
pass
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment