Skip to content
Snippets Groups Projects
Commit c33bfa56 authored by Stefan Reck's avatar Stefan Reck
Browse files

Merge branch 'bundle_extractor' into 'master'

add muon bundle extractor

See merge request !21
parents 7efe0a81 4cef6b26
No related branches found
No related tags found
1 merge request!21add muon bundle extractor
......@@ -29,6 +29,7 @@ If you have an orcasong config file, you can use it via the command line like th
orcasong run aanet_file.h5 orcasong_config.toml --detx_file detector.detx
Check out the git repo here https://git.km3net.de/ml/OrcaSong/-/tree/master/examples for some examples of config files.
Alternatively, you can use the python frontend of orcasong.
See :ref:`orcasong_page` for instructions on how to do this.
......
# for atmospheric muon bundles, ORCA4, corsika sibyll 2.3c, graph network
mode = "graph"
max_n_hits = 2000
time_window = [-250, 1000]
extractor = "bundle_mc"
# center of mupage detector:
center_hits_to = [0.020, -0.040, 111.186]
[extractor_config]
inactive_du = 1
plane_point = [17, 17, 111]
only_downgoing_tracks = true
is_corsika = true
# for atmospheric muon bundles, data ORCA4, graph network
mode = "graph"
max_n_hits = 2000
time_window = [-250, 1000]
extractor = "bundle_data"
[extractor_config]
only_downgoing_tracks = true
# for atmospheric muon bundles, mupage ORCA4, graph network
mode = "graph"
max_n_hits = 2000
time_window = [-250, 1000]
extractor = "bundle_mc"
[extractor_config]
inactive_du = 1
plane_point = [17, 17, 111]
only_downgoing_tracks = true
......@@ -59,9 +59,11 @@ class BaseProcessor:
overwrite : bool
If True, overwrite the output file if it exists already.
If False, throw an error instead.
mc_info_to_float64 : bool
Convert everything in the mcinfo array to float 64 (Default: True).
Hint: int dtypes can not store nan!
sort_y : bool
Sort the columns in the y dataset alphabetically.
y_to_float64 : bool
Convert everything in the y dataset to float 64 (Default: True).
Hint: Not all other dtypes can store nan!
Attributes
----------
......@@ -97,7 +99,8 @@ class BaseProcessor:
chunksize=32,
keep_event_info=False,
overwrite=True,
mc_info_to_float64=True):
sort_y=True,
y_to_float64=True):
self.extractor = extractor
self.det_file = det_file
self.correct_mc_time = correct_mc_time
......@@ -109,7 +112,8 @@ class BaseProcessor:
self.chunksize = chunksize
self.keep_event_info = keep_event_info
self.overwrite = overwrite
self.mc_info_to_float64 = mc_info_to_float64
self.sort_y = sort_y
self.y_to_float64 = y_to_float64
self.n_statusbar = 1000
self.n_memory_observer = 1000
......@@ -212,7 +216,8 @@ class BaseProcessor:
if self.extractor is not None:
cmpts.append((modules.McInfoMaker, {
"extractor": self.extractor,
"to_float64": self.mc_info_to_float64,
"to_float64": self.y_to_float64,
"sort_y": self.sort_y,
"store_as": "mc_info"}))
if self.event_skipper is not None:
......
from .neutrino_chain import get_neutrino_mc_info_extr, get_muon_mc_info_extr, get_real_data_info_extr, get_random_noise_mc_info_extr
from .bundles import BundleMCExtractor, BundleDataExtractor
import warnings
import numpy as np
class BundleDataExtractor:
""" Get info present in real data. """
def __init__(self, infile, only_downgoing_tracks=False):
self.only_downgoing_tracks = only_downgoing_tracks
def __call__(self, blob):
# just take everything from event info
if not len(blob['EventInfo']) == 1:
warnings.warn(f"Event info has length {len(blob['EventInfo'])}, not 1")
track = dict(zip(blob['EventInfo'].dtype.names, blob['EventInfo'][0]))
track.update(**get_best_track(
blob, only_downgoing_tracks=self.only_downgoing_tracks))
track["n_hits"] = len(blob["Hits"])
track["n_triggered_hits"] = blob["Hits"]["triggered"].sum()
is_triggered = blob["Hits"]["triggered"].astype(bool)
track["n_triggered_doms"] = len(np.unique(blob["Hits"]["dom_id"][is_triggered]))
track["t_last_triggered"] = blob["Hits"]["time"][is_triggered].max()
unique_hits = get_only_first_hit_per_pmt(blob["Hits"])
track["n_pmts"] = len(unique_hits)
track["n_triggered_pmts"] = unique_hits["triggered"].sum()
if "n_hits_intime" in blob["EventInfo"]:
n_hits_intime = blob["EventInfo"]["n_hits_intime"]
else:
n_hits_intime = np.nan
track["n_hits_intime"] = n_hits_intime
return track
def get_only_first_hit_per_pmt(hits):
""" Keep only the first hit of each pmt. """
idents = np.stack((hits["dom_id"], hits["channel_id"]), axis=-1)
sorted_time_indices = np.argsort(hits["time"])
# indices of first hit per pmt in time sorted array:
indices = np.unique(idents[sorted_time_indices], axis=0, return_index=True)[1]
# indices of first hit per pmt in original array:
first_hit_indices = np.sort(sorted_time_indices[indices])
return hits[first_hit_indices]
def get_best_track(blob, missing_value=np.nan, only_downgoing_tracks=False):
"""
I mean first track, i.e. the one with longest chain and highest lkl/nhits.
Can also take the best track only of those that are downgoing.
"""
# hardcode names here since the first blob might not have Tracks
names = ('E',
'JCOPY_Z_M',
'JENERGY_CHI2',
'JENERGY_ENERGY',
'JENERGY_MUON_RANGE_METRES',
'JENERGY_NDF',
'JENERGY_NOISE_LIKELIHOOD',
'JENERGY_NUMBER_OF_HITS',
'JGANDALF_BETA0_RAD',
'JGANDALF_BETA1_RAD',
'JGANDALF_CHI2',
'JGANDALF_LAMBDA',
'JGANDALF_NUMBER_OF_HITS',
'JGANDALF_NUMBER_OF_ITERATIONS',
'JSHOWERFIT_ENERGY',
'JSTART_LENGTH_METRES',
'JSTART_NPE_MIP',
'JSTART_NPE_MIP_TOTAL',
'JVETO_NPE',
'JVETO_NUMBER_OF_HITS',
'dir_x',
'dir_y',
'dir_z',
'id',
'idx',
'length',
'likelihood',
'pos_x',
'pos_y',
'pos_z',
'rec_type',
't',
'group_id')
index = None
if "Tracks" in blob:
if only_downgoing_tracks:
downs = np.where(blob["Tracks"].dir_z < 0)[0]
if len(downs) != 0:
index = downs[0]
else:
index = 0
if index is not None:
track = blob["Tracks"][index]
return {f"jg_{name}_reco": track[name] for name in names}
else:
return {f"jg_{name}_reco": missing_value for name in names}
class BundleMCExtractor:
"""
For atmospheric muon studies on mupage or corsika simulations.
Parameters
----------
inactive_du : int or None
Don't count mchits in this du.
min_n_mchits_list : tuple
How many mchits does a muon have to produce to be counted?
Create a seperate set of entries for each number in the tuple.
plane_point : tuple
For bundle diameter: XYZ coordinates of where the center of the
plane is in which the muon positions get calculated. Should be set
to the center of the detector!
with_mc_index : bool
Add a column called mc_index containing the mc run number,
which is attempted to be read from the filename. This is for
when the same run id/event id combination appears in mc files,
which can happend e.g. in run by run simulations when there are
multiplie mc runs per data run.
is_corsika : bool
Use this when using Corsika!!!
only_downgoing_tracks : bool
For the best track (JG reco), consider only the ones that are downgoing.
missing_value : float
If a value is missing, use this value instead.
"""
def __init__(self,
infile,
inactive_du=1,
min_n_mchits_list=(0, 1, 10),
plane_point=(17, 17, 111),
with_mc_index=True,
is_corsika=False,
only_downgoing_tracks=False,
missing_value=np.nan,
):
self.inactive_du = inactive_du
self.min_n_mchits_list = min_n_mchits_list
self.plane_point = plane_point
self.with_mc_index = with_mc_index
self.missing_value = missing_value
self.is_corsika = is_corsika
self.only_downgoing_tracks = only_downgoing_tracks
self.data_extractor = BundleDataExtractor(
infile, only_downgoing_tracks=only_downgoing_tracks)
if self.with_mc_index:
self.mc_index = get_mc_index(infile)
print(f"Using mc_index {self.mc_index}")
else:
self.mc_index = None
def __call__(self, blob):
mc_info = self.data_extractor(blob)
if self.is_corsika:
# Corsika has a primary particle. Store infos about it
prim_track = blob["McTracks"][0]
# primary should be track 0 with id 0
if prim_track["id"] != 0:
raise ValueError("Error finding primary: mc_tracks[0]['id'] != 0")
# direction of the primary
mc_info["dir_x"] = prim_track.dir_x
mc_info["dir_y"] = prim_track.dir_y
mc_info["dir_z"] = prim_track.dir_z
# use primary direction as plane normal
plane_normal = np.array(prim_track[["dir_x", "dir_y", "dir_z"]])
for fld in ("pos_x", "pos_y", "pos_z", "pdgid", "energy", "time"):
mc_info[f"primary_{fld}"] = prim_track[fld]
# remove primary for the following, since it's not a muon
blob["McTracks"] = blob["McTracks"][1:]
else:
# In mupage, all muons in a bundle are parallel. So just take dir of first muon
mc_info["dir_x"] = blob["McTracks"].dir_x[0]
mc_info["dir_y"] = blob["McTracks"].dir_y[0]
mc_info["dir_z"] = blob["McTracks"].dir_z[0]
plane_normal = None
# n_mc_hits of each muon in active dus
mchits_per_muon = get_mchits_per_muon(blob, inactive_du=self.inactive_du)
for min_n_mchits in self.min_n_mchits_list:
if min_n_mchits == 0:
mc_tracks_sel = blob["McTracks"]
suffix = "sim"
else:
mc_tracks_sel = blob["McTracks"][mchits_per_muon >= min_n_mchits]
suffix = f"{min_n_mchits}_mchits"
# total number of mchits of all muons
mc_info[f"n_mc_hits_{suffix}"] = np.sum(
mchits_per_muon[mchits_per_muon >= min_n_mchits])
# number of muons with at least the given number of mchits
mc_info[f"n_muons_{suffix}"] = len(mc_tracks_sel)
# summed up energy of all muons
mc_info[f"energy_{suffix}"] = np.sum(mc_tracks_sel.energy)
mc_info[f"energy_lost_in_can_{suffix}"] = np.sum(
mc_tracks_sel.energy_lost_in_can)
# bundle diameter; only makes sense for 2+ muons
if len(mc_tracks_sel) >= 2:
positions_plane = get_plane_positions(
positions=mc_tracks_sel[["pos_x", "pos_y", "pos_z"]].to_dataframe().to_numpy(),
directions=mc_tracks_sel[["dir_x", "dir_y", "dir_z"]].to_dataframe().to_numpy(),
plane_point=self.plane_point,
plane_normal=plane_normal,
)
pairwise_distances = get_pairwise_distances(positions_plane)
mc_info[f"max_pair_dist_{suffix}"] = pairwise_distances.max()
mc_info[f"mean_pair_dist_{suffix}"] = pairwise_distances.mean()
else:
mc_info[f"max_pair_dist_{suffix}"] = self.missing_value
mc_info[f"mean_pair_dist_{suffix}"] = self.missing_value
if self.with_mc_index:
mc_info["mc_index"] = self.mc_index
return mc_info
def get_plane_positions(positions, directions, plane_point, plane_normal=None):
"""
Get the position of each muon in a 2d plane.
Length will be preserved, i.e. 1m in 3d space is also 1m in plane space.
Parameters
----------
positions : np.array
The position of each muon in 3d cartesian space, shape (n_muons, 3).
directions : np.array
The direction of each muon as a cartesian unit vector, shape (n_muons, 3).
plane_point : np.array
A 3d cartesian point on the plane. This will be (0, 0) in the plane
coordinate system. Shape (3, ).
plane_normal : np.array, optional
A 3d cartesian vector perpendicular to the plane, shape (3, ).
Default: Use directions if all muons are parallel, otherwise raise.
Returns
-------
positions_plane : np.array
The 2d position of each muon in the plane, shape (n_muons, 2).
"""
if plane_normal is None:
if not np.all(directions == directions[0]):
raise ValueError(
"Muon tracks are not all parallel: plane_normal has to be specified!")
plane_normal = directions[0]
# get the 3d points where each muon collides with the plane
points = []
for i in range(len(directions)):
ndotu = np.dot(plane_normal, directions[i])
if abs(ndotu) < 1e-6:
raise ValueError("no intersection or line is within plane")
w = positions[i] - plane_point
si = -np.dot(plane_normal, w) / ndotu
psi = w + si * directions[i] + plane_point
points.append(psi)
points = np.array(points)
# Get the unit vectors of the plane. u is 0 in x, v is 0 in y.
u = np.array([1, 0, -plane_normal[0] / plane_normal[2]])
v = np.array([0, 1, -plane_normal[1] / plane_normal[2]])
# norm:
u = u / np.linalg.norm(u)
v = v / np.linalg.norm(v)
# xy coordinates in plane
x_dash = (points[:, 0] - plane_point[0]) / u[0]
y_dash = (points[:, 1] - plane_point[1]) / v[1]
position_plane = np.array([x_dash, y_dash]).T
return position_plane
def get_pairwise_distances(positions_plane, as_matrix=False):
"""
Get the perpendicular distance between each muon pair.
Parameters
----------
positions_plane : np.array
The 2d position of each muon in a plane, shape (n_muons, 2).
as_matrix : bool
Return the whole 2D distance matrix.
Returns
-------
np.array
The distances between each pair of muons.
1D if as_matrix is False (default), else 2D.
"""
pos_x, pos_y = positions_plane[:, 0], positions_plane[:, 1]
dists_x = np.expand_dims(pos_x, -2) - np.expand_dims(pos_x, -1)
dists_y = np.expand_dims(pos_y, -2) - np.expand_dims(pos_y, -1)
l2_dists = np.sqrt(dists_x**2 + dists_y**2)
if as_matrix:
return l2_dists
else:
return l2_dists[np.triu_indices_from(l2_dists, k=1)]
def get_mchits_per_muon(blob, inactive_du=None):
"""
For each muon in McTracks, get the number of McHits.
Parameters
----------
blob
The blob.
inactive_du : int, optional
McHits in this DU will not be counted.
Returns
-------
np.array
n_mchits, len = number of muons --> blob["McTracks"]["id"]
"""
ids = blob["McTracks"]["id"]
# Origin of each mchit (as int) in the active line
origin = blob["McHits"]["origin"]
if inactive_du:
# only hits in active line
origin = origin[blob["McHits"]["du"] != inactive_du]
# get how many mchits were produced per muon in the bundle
origin_dict = dict(zip(*np.unique(origin, return_counts=True)))
return np.array([origin_dict.get(i, 0) for i in ids])
def get_mc_index(aanet_filename):
# e.g. mcv5.40.mupage_10G.sirene.jterbr00005782.jorcarec.aanet.365.h5
return int(aanet_filename.split(".")[-2])
File moved
......@@ -9,8 +9,8 @@ EXTRACTORS = {
"nu_chain_muon": extractors.get_muon_mc_info_extr,
"nu_chain_noise": extractors.get_random_noise_mc_info_extr,
"nu_chain_data": extractors.get_real_data_info_extr,
# TODO "bundle_mc": ???,
# TODO "bundle_data": ???,
"bundle_mc": extractors.bundles.BundleMCExtractor,
"bundle_data": extractors.bundles.BundleDataExtractor,
}
MODES = {
......
......@@ -28,9 +28,12 @@ class McInfoMaker(kp.Module):
self.extractor = self.require('extractor')
self.store_as = self.require('store_as')
self.to_float64 = self.get("to_float64", default=True)
self.sort_y = self.get("sort_y", default=True)
def process(self, blob):
track = self.extractor(blob)
if self.sort_y:
track = {k: track[k] for k in sorted(track)}
if self.to_float64:
dtypes = []
for key, v in track.items():
......
from unittest import TestCase
import numpy as np
import orcasong.extractors.bundles as bundles
class TestGetPlanePositions(TestCase):
def setUp(self) -> None:
self.positions = np.array([
[0, 0, 0],
[0, 0, 5],
[5, 3, 2],
[0, 0, 2],
])
self.directions = np.array([
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 1, -1],
])
def test_positions_flat_plane(self):
plane_point = np.array([0, 0, 0])
plane_normal = np.array([0, 0, 1])
result = bundles.get_plane_positions(
self.positions, self.directions, plane_point, plane_normal,
)
target = np.array([
[0, 0],
[0, 0],
[5, 3],
[0, 2],
])
np.testing.assert_array_equal(result, target)
def test_positions_flat_plane_no_plane_normal(self):
plane_point = np.array([0, 0, 0])
with self.assertRaises(ValueError):
bundles.get_plane_positions(
self.positions, self.directions, plane_point,
)
def test_positions_shifted_plane(self):
plane_point = np.array([1, 0, 0])
plane_normal = np.array([0, 0, 1])
result = bundles.get_plane_positions(
self.positions, self.directions, plane_point, plane_normal,
)
target = np.array([
[-1, 0],
[-1, 0],
[4, 3],
[-1, 2],
])
np.testing.assert_array_equal(result, target)
def test_positions_angled_plane(self):
plane_point = np.array([0, 0, 0])
plane_normal = np.array([1, 0, 1])
result = bundles.get_plane_positions(
self.positions, self.directions, plane_point, plane_normal,
)
target = np.array([
[0, 0],
[0, 0],
[np.sqrt(50), 3],
[0, 2],
])
np.testing.assert_array_equal(result, target)
def test_positions_angled_plane_2(self):
result = bundles.get_plane_positions(
positions=np.array([[-1, 0, 1]]),
directions=np.array([[1, 0, -1]]),
plane_point=np.array([0, 0, 0]),
plane_normal=np.array([-1, 0, 1]),
)
np.testing.assert_array_equal(result, np.array([[0, 0]]))
class TestPairwiseDistances(TestCase):
def setUp(self) -> None:
self.positions_plane = np.array([
[0, -3],
[4, 0],
[0, 3],
])
def test_matrix(self):
result = bundles.get_pairwise_distances(
positions_plane=self.positions_plane,
as_matrix=True,
)
target = np.array([
[0, 5, 6],
[5, 0, 5],
[6, 5, 0],
])
np.testing.assert_array_equal(result, target)
def test_flat(self):
result = bundles.get_pairwise_distances(
positions_plane=self.positions_plane,
)
target = np.array([5, 6, 5])
np.testing.assert_array_equal(result, target)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment