file_to_hits.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Code that reads the h5 simulation files and extracts the necessary information for making event images."""

import numpy as np
import km3pipe as kp


def get_primary_track_index(event_blob):
    """
    Gets the index of the primary (neutrino) track.

    Uses bjorkeny in order to get the primary track, since bjorkeny!=0 for the initial interacting neutrino.

    Parameters
    ----------
    event_blob : kp.io.HDF5Pump.blob
        HDF5Pump event blob.

    Returns
    -------
    primary_index : int
        Index of the primary track (=neutrino) in the 'McTracks' branch.

    """
    bjorken_y_array = event_blob['McTracks'].bjorkeny
    primary_index = np.where(bjorken_y_array != 0.0)[0][0]
    return primary_index


def get_time_residual_nu_interaction_mean_triggered_hits(time_interaction, hits_time, triggered):
    """
    Gets the time_residual of the event with respect to mean time of the triggered hits.

    This is required for vertex_time reconstruction, as the absolute time scale needs to be relative to the triggered hits.

    Careful: sometimes, not the neutrino event is triggered, but just some random noise!
    This means that in very rare cases, the time_residual_vertex can be very large (Mio. of ns), which might throw off
    a NN with vertex_time reconstruction.

    Parameters
    ----------
    time_interaction : float
        Time of the neutrino interaction measured in JTE time.
    hits_time : ndarray(ndim=1)
        Time of the event_hits measured in JTE time.
    triggered : ndarray(ndim=1)
        Array with trigger flags that specifies if the hit is triggered or not.

    """
    hits_time_triggered = hits_time[triggered == 1]
    t_mean_triggered = np.mean(hits_time_triggered, dtype=np.float64)
    time_residual_vertex = t_mean_triggered - time_interaction

    return time_residual_vertex


def get_hits(event_blob, geo, do_mc_hits, data_cuts, do4d):
    """
    Returns a hits array that contains [pos_x, pos_y, pos_z, time, triggered, channel_id (optional)].

    Parameters
    ----------
    event_blob : kp.io.HDF5Pump.blob
        Event blob of the HDF5Pump which contains all information for one event.
    geo : kp.Geometry
        km3pipe Geometry instance that contains the geometry information of the detector.
        Only used if the event_blob is from a non-calibrated file!
    do_mc_hits : bool
        Tells the function of the hits (mc_hits + BG) or the mc_hits only should be parsed.
        In the case of mc_hits, the dom_id needs to be calculated thanks to the jpp output.
    data_cuts : dict
        Specifies if cuts should be applied.
        Contains the keys 'triggered' and 'energy_lower/upper_limit' and 'throw_away_prob'.
    do4d : tuple(bool, str)
        Tuple that declares if 4D histograms should be created [0] and if yes, what should be used as the 4th dim after xyz.
        In the case of 'channel_id', this information needs to be included in the event_hits as well.

    Returns
    -------
    event_hits : ndarray(ndim=2)
        2D array that contains the hits data for the input event [pos_x, pos_y, pos_z, time, triggered, (channel_id)].

    """
    # parse hits [x,y,z,time]
    hits = event_blob['Hits'] if do_mc_hits is False else event_blob['McHits']

    if 'pos_x' not in event_blob['Hits'].dtype.names: # check if blob already calibrated
        hits = geo.apply(hits)

    if data_cuts['triggered'] is True:
        hits = hits.__array__[hits.triggered.astype(bool)]
        #hits = hits.triggered_hits # alternative, though it only works for the triggered condition!

    pos_x, pos_y, pos_z = hits.pos_x, hits.pos_y, hits.pos_z
    hits_time = hits.time
    triggered = hits.triggered

    ax = np.newaxis
    event_hits = np.concatenate([pos_x[:, ax], pos_y[:, ax], pos_z[:, ax], hits_time[:, ax], triggered[:, ax]], axis=1) # dtype: np.float64

    if do4d[0] is True and do4d[1] == 'channel_id' or do4d[1] == 'xzt-c':
        event_hits = np.concatenate([event_hits, hits.channel_id[:, ax]], axis=1)

    return event_hits


def get_tracks(event_blob, file_particle_type, event_hits, prod_ident):
    """
    Returns the event_track, which contains important event_info and mc_tracks data for the input event.

    Parameters
    ----------
    event_blob : kp.io.HDF5Pump.blob
        Event blob of the HDF5Pump which contains all information for one event.
    file_particle_type : str
        String that specifies the type of particles that are contained in the file: ['undefined', 'muon', 'neutrino'].
    event_hits : ndarray(ndim=2)
        2D array that contains the hits data for the input event.
    prod_ident : int
        Optional int that identifies the used production, more documentation in the docs of the main function.

    Returns
    -------
    event_track : ndarray(ndim=1)
        1D array that contains important event_info and mc_tracks data for the input event.

        If file_particle_type = 'undefined':
        [event_id, run_id, (prod_ident)].

        If file_particle_type = 'neutrino'/'muon':
        [event_id, particle_type, energy, is_cc, bjorkeny, dir_x, dir_y, dir_z, time_track, run_id,
        vertex_pos_x, vertex_pos_y, vertex_pos_z, time_residual_vertex/n_muons, (prod_ident)].

    """
    # parse EventInfo and Header information
    event_id = event_blob['EventInfo'].event_id[0]
    run_id = event_blob['Header'].start_run.run_id.astype('float32')

    # if 'Header' in event_blob: # if Header exists in file, take run_id from it.
    #     run_id = event_blob['Header'].start_run.run_id.astype('float32')
    # else:
    #     if file_particle_type == 'muon':
    #         run_id = event_blob['RawHeader'][1][0].astype('float32')
    #     elif file_particle_type == 'undefined': # currently used with random_noise files
    #         run_id = event_blob['EventInfo'].run_id
    #     else:
    #         run_id = event_blob['RawHeader'][0][0].astype('float32')

    # collect all event_track information, dependent on file_particle_type

    if file_particle_type == 'undefined':
        particle_type = 0
        track = {'event_id': event_id, 'run_id': run_id, 'particle_type': particle_type}

    elif file_particle_type == 'muon':
        # take index 1, index 0 is the empty neutrino mc_track
        particle_type = event_blob['McTracks'][1].type # assumed that this is the same for all muons in a bundle
        is_cc = event_blob['McTracks'][1].is_cc # always 1 actually
        bjorkeny = event_blob['McTracks'][1].bjorkeny # always 0 actually
        time_interaction = event_blob['McTracks'][1].time  # same for all muons in a bundle
        n_muons = event_blob['McTracks'].shape[0] - 1 # takes position of time_residual_vertex in 'neutrino' case

        # sum up the energy of all muons
        energy = np.sum(event_blob['McTracks'].energy)

        # all muons in a bundle are parallel, so just take dir of first muon
        dir_x, dir_y, dir_z = event_blob['McTracks'][1].dir_x, event_blob['McTracks'][1].dir_y, event_blob['McTracks'][1].dir_z

        # vertex is the weighted (energy) mean of the individual vertices
        vertex_pos_x = np.average(event_blob['McTracks'][1:].pos_x, weights=event_blob['McTracks'][1:].energy)
        vertex_pos_y = np.average(event_blob['McTracks'][1:].pos_y, weights=event_blob['McTracks'][1:].energy)
        vertex_pos_z = np.average(event_blob['McTracks'][1:].pos_z, weights=event_blob['McTracks'][1:].energy)

        track = {'event_id': event_id, 'particle_type': particle_type, 'energy': energy, 'is_cc': is_cc,
                 'bjorkeny': bjorkeny, 'dir_x': dir_x, 'dir_y': dir_y, 'dir_z': dir_z,
                 'time_interaction': time_interaction,  'run_id': run_id, 'vertex_pos_x': vertex_pos_x,
                 'vertex_pos_y': vertex_pos_y, 'vertex_pos_z': vertex_pos_z, 'n_muons': n_muons}

    elif file_particle_type == 'neutrino':
        p = get_primary_track_index(event_blob)
        particle_type = event_blob['McTracks'][p].type
        energy = event_blob['McTracks'][p].energy
        is_cc = event_blob['McTracks'][p].is_cc
        bjorkeny = event_blob['McTracks'][p].bjorkeny
        dir_x, dir_y, dir_z = event_blob['McTracks'][p].dir_x, event_blob['McTracks'][p].dir_y, event_blob['McTracks'][p].dir_z
        time_interaction = event_blob['McTracks'][p].time  # actually always 0 for primary neutrino, measured in MC time
        vertex_pos_x, vertex_pos_y, vertex_pos_z = event_blob['McTracks'][p].pos_x, event_blob['McTracks'][p].pos_y, \
                                                   event_blob['McTracks'][p].pos_z

        hits_time, triggered = event_hits[:, 3], event_hits[:, 4]
        time_residual_vertex = get_time_residual_nu_interaction_mean_triggered_hits(time_interaction, hits_time, triggered)

        if event_id == 12627:
            print(time_interaction)
            hits_time_triggered = hits_time[triggered == 1]
            print(hits_time_triggered)
            t_mean_triggered = np.mean(hits_time_triggered, dtype=np.float64)
            print(t_mean_triggered)
            time_residual_vertex = t_mean_triggered - time_interaction
            print(time_residual_vertex)

        track = {'event_id': event_id, 'particle_type': particle_type, 'energy': energy, 'is_cc': is_cc,
                 'bjorkeny': bjorkeny, 'dir_x': dir_x, 'dir_y': dir_y, 'dir_z': dir_z,
                 'time_interaction': time_interaction,  'run_id': run_id, 'vertex_pos_x': vertex_pos_x,
                 'vertex_pos_y': vertex_pos_y, 'vertex_pos_z': vertex_pos_z,
                 'time_residual_vertex': time_residual_vertex}

    else:
        raise ValueError('The file_particle_type "', str(file_particle_type), '" is not known.')

    if prod_ident is not None: track['prod_ident'] = prod_ident

    dtypes = [(key, np.float64) for key in track.keys()]
    event_track = kp.dataclasses.Table(track, dtype=dtypes, h5loc='y', name='Event_Information')

    return event_track


class EventDataExtractor(kp.Module):
    """
    Class that takes a km3pipe blob which contains the information for one event and returns
    a blob with a hit array and a track array that contains all relevant information of the event.
    """
    def configure(self):
        """
        Sets up the input arguments of the EventDataExtractor class.
        """
        self.file_particle_type = self.require('file_particle_type')
        self.geo = self.require('geo')
        self.do_mc_hits = self.require('do_mc_hits')
        self.data_cuts = self.require('data_cuts')
        self.do4d = self.require('do4d')
        self.prod_ident = self.require('prod_ident')
        self.event_hits_key = self.get('event_hits', default='event_hits')
        self.event_track_key = self.get('event_track', default='event_track')

    def process(self, blob):
        """
        Returns a blob (dict), which contains the event_hits array and the event_track array.

        Parameters
        ----------
        blob : dict
            Km3pipe blob which contains all the data from the input file.

        Returns
        -------
        blob : dict
            Dictionary that contains the event_hits array and the event_track array.

        """
        blob[self.event_hits_key] = get_hits(blob, self.geo, self.do_mc_hits, self.data_cuts, self.do4d)
        blob[self.event_track_key] = get_tracks(blob, self.file_particle_type, blob[self.event_hits_key], self.prod_ident)
        return blob