Skip to content
Snippets Groups Projects
Commit 5220b471 authored by Daniel Guderian's avatar Daniel Guderian
Browse files

adjusted get_std_reco of the extractor for only best tracks

parent 58ecd462
No related branches found
No related tags found
1 merge request!18Resolve "Update get standard reco function"
......@@ -4,7 +4,8 @@ in the h5 files.
These are made for the specific given runs. They might not be
applicable to other data, and could cause errors or produce unexpected
results when used on data other then the specified.
results when used on data other then the specified. Check for example the
primary position in the mc_tracks.
"""
......@@ -19,132 +20,53 @@ __author__ = "Daniel Guderian"
def get_std_reco(blob):
"""
Function to extract std reco info. The implemented strategy is the following:
First, look for whether a rec stag has been reached and only then extract the reconstructed
paramater from it. If not, set it to a dummy value (for now 0). This means that for an analysis the events with
exactly zero have to be filtered out!
The 'best track' is the first (highest lik) while a certain rec stage has to be reached. This might
have to be adjusted for other recos than JMuonGandalf chain.
Members of the Tracks:
dtype([('E', '<f8'), ('JCOPY_Z_M', '<f4'), ('JENERGY_CHI2', '<f4'), ('JENERGY_ENERGY', '<f4'),
('JENERGY_MUON_RANGE_METRES', '<f4'), ('JENERGY_NDF', '<f4'), ('JENERGY_NOISE_LIKELIHOOD', '<f4'),
('JENERGY_NUMBER_OF_HITS', '<f4'), ('JGANDALF_BETA0_RAD', '<f4'), ('JGANDALF_BETA1_RAD', '<f4'),
('JGANDALF_CHI2', '<f4'), ('JGANDALF_LAMBDA', '<f4'), ('JGANDALF_NUMBER_OF_HITS', '<f4'),
('JGANDALF_NUMBER_OF_ITERATIONS', '<f4'), ('JSHOWERFIT_ENERGY', '<f4'), ('JSTART_LENGTH_METRES', '<f4'),
('JSTART_NPE_MIP', '<f4'), ('JSTART_NPE_MIP_TOTAL', '<f4'), ('JVETO_NPE', '<f4'), ('JVETO_NUMBER_OF_HITS', '<f4'),
('dir_x', '<f8'), ('dir_y', '<f8'), ('dir_z', '<f8'), ('id', '<i4'), ('idx', '<i8'), ('length', '<f8'),
('likelihood', '<f8'), ('pos_x', '<f8'), ('pos_y', '<f8'), ('pos_z', '<f8'), ('rec_type', '<i4'),
('t', '<f8'), ('group_id', '<i8')])
members of rec stages:
.idx (corresponding to the track id),
.rec_stage (rec stage identifier, for JMuonGandalf for example: 1=prefit, 2=simplex, 3=gandalf,
4=engery, 5=start),
.group_id (event id in file)
Parameters
----------
blob : blob containing the reco info
Function to extract std reco info. This implementation requires h5 files
to be processed with the option "--best_tracks" which adds the selection
of best tracks for each reco type to the output using the km3io tools.
Returns
-------
std_reco_info : dict
Dict with the most common std reco params. Can be expanded.
Dict with the std reco info of the best tracks.
"""
# use this later to identify not reconstructed events
dummy_value = 0
# if there was no std reco at all, this will not exist
# these are events that stopped at/before prefit
try:
rec_stages = blob["RecStages"]
# get first track only
rec_stages_best_track = rec_stages.rec_stage[rec_stages.idx == 0]
# often enough: best track is the first
best_track = blob["Tracks"][0]
except KeyError:
rec_stages_best_track = []
print(
"An event didnt have any reco. Setting everything to"
+ str(dummy_value)
+ "."
)
# take the direction only if JGanalf was executed
if 3 in rec_stages_best_track:
std_dir_x = best_track["dir_x"]
std_dir_y = best_track["dir_y"]
std_dir_z = best_track["dir_z"]
std_beta0 = best_track["JGANDALF_BETA0_RAD"]
std_lik = best_track["likelihood"]
std_n_hits_gandalf = best_track["JGANDALF_NUMBER_OF_HITS"]
else:
std_dir_x = dummy_value
std_dir_y = dummy_value
std_dir_z = dummy_value
std_beta0 = dummy_value
std_lik = dummy_value
std_n_hits_gandalf = dummy_value
# energy fit from JEnergy
if 4 in rec_stages_best_track:
std_energy = best_track["E"]
lik_energy = best_track["JENERGY_CHI2"]
else:
std_energy = dummy_value
lik_energy = dummy_value
# vertex and length from JStart
if 5 in rec_stages_best_track:
std_pos_x = best_track["pos_x"]
std_pos_y = best_track["pos_y"]
std_pos_z = best_track["pos_z"]
std_length = best_track["JSTART_LENGTH_METRES"]
else:
std_pos_x = dummy_value
std_pos_y = dummy_value
std_pos_z = dummy_value
std_length = dummy_value
std_reco_info = {
"std_dir_x": std_dir_x,
"std_dir_y": std_dir_y,
"std_dir_z": std_dir_z,
"std_beta0": std_beta0,
"std_lik": std_lik,
"std_n_hits_gandalf": std_n_hits_gandalf,
"std_pos_x": std_pos_x,
"std_pos_y": std_pos_y,
"std_pos_z": std_pos_z,
"std_energy": std_energy,
"std_lik_energy": lik_energy,
"std_length": std_length,
}
#this dict will be filled up
std_reco_info = {}
#all known reco types to iterate over
reco_type_dict = {
"BestJmuon" : "jmuon_",
"BestJshower" : "jshower_",
"BestDusjshower" : "dusjshower_",
"BestAashower" : "aashower_",
}
for name_in_blob,identifier in reco_type_dict.items():
if name_in_blob in blob:
#get the previously identified best track
bt = blob[name_in_blob]
#get all its values
values = bt.item()
#get the names of the values and add specific tag
reco_names = bt.dtype.names
specific_reco_names = np.core.defchararray.add(identifier,reco_names)
#create a dict out of them
keys_list = list(specific_reco_names)
values_list = list(values)
zip_iterator = zip(keys_list, values_list)
reco_dict = dict(zip_iterator)
#add this dict to the complete std reco collection
std_reco_info.update(reco_dict)
return std_reco_info
def get_real_data_info_extr(input_file):
"""
......@@ -200,7 +122,7 @@ def get_real_data_info_extr(input_file):
# get all the std reco info
if has_std_reco:
std_reco_info = get_std_reco(blob)
track.update(std_reco_info)
......@@ -232,7 +154,7 @@ def get_random_noise_mc_info_extr(input_file):
f = File(input_file, "r")
has_std_reco = "reco" in f.keys()
def mc_info_extr(blob):
"""
......@@ -297,8 +219,7 @@ def get_neutrino_mc_info_extr(input_file):
# get the n_gen
header = HDF5Header.from_hdf5(input_file)
n_gen = header.genvol.numberOfEvents
def mc_info_extr(blob):
"""
......@@ -333,7 +254,7 @@ def get_neutrino_mc_info_extr(input_file):
mc_track = blob["McTracks"][p]
# some track mc truth info
particle_type = mc_track.type
particle_type = mc_track.pdgid #sometimes type, sometimes pdgid
energy = mc_track.energy
is_cc = mc_track.cc
bjorkeny = mc_track.by
......@@ -373,7 +294,7 @@ def get_neutrino_mc_info_extr(input_file):
# get all the std reco info
if has_std_reco:
std_reco_info = get_std_reco(blob)
track.update(std_reco_info)
......@@ -382,8 +303,40 @@ def get_neutrino_mc_info_extr(input_file):
return mc_info_extr
def get_muon_mc_info_extr(input_file,prod_identifier=2):
#function used by Stefan to identify which muons leave how many mc hits in the (active) detector.
def get_mchits_per_muon(blob, inactive_du=None):
"""
For each muon in McTracks, get the number of McHits.
Parameters
----------
blob
The blob.
inactive_du : int, optional
McHits in this DU will not be counted.
Returns
-------
np.array
n_mchits, len = number of muons
"""
ids = blob["McTracks"]["id"]
# Origin of each mchit (as int) in the active line
origin = blob["McHits"]["origin"]
if inactive_du:
# only hits in active line
origin = origin[blob["McHits"]["du"] != inactive_du]
# get how many mchits were produced per muon in the bundle
origin_dict = dict(zip(*np.unique(origin, return_counts=True)))
return np.array([origin_dict.get(i, 0) for i in ids])
def get_muon_mc_info_extr(input_file,prod_identifier=2,inactive_du=None):
"""
Wrapper function that includes the actual mc_info_extr
......@@ -446,9 +399,13 @@ def get_muon_mc_info_extr(input_file,prod_identifier=2):
time_interaction = mc_track.time # same for all muons in a bundle
# sum up the energy of all muons
energy = np.sum(blob["McTracks"].energy)
# sum up the energy from all muons that have at least x mc hits
n_hits_per_muon = get_mchits_per_muon(blob, inactive_du=inactive_du) #DU1 in ORCA4 is in the detx but not powered
#dont consider muons with less than 10 mc hits
suficient_hits_mask = n_hits_per_muon >= 15
energy = np.sum(blob["McTracks"][suficient_hits_mask].energy)
# all muons in a bundle are parallel, so just take dir of first muon
dir_x, dir_y, dir_z = mc_track.dir_x, mc_track.dir_y, mc_track.dir_z
......@@ -495,7 +452,7 @@ def get_muon_mc_info_extr(input_file,prod_identifier=2):
# get all the std reco info
if has_std_reco:
std_reco_info = get_std_reco(blob)
track.update(std_reco_info)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment