Skip to content
Snippets Groups Projects
Commit 2c06afb3 authored by Daniel Guderian's avatar Daniel Guderian
Browse files

padding implemented for when a reco of an event is not present

parent 392a0f50
No related branches found
No related tags found
1 merge request!19Resolve "error when using new extractor"
......@@ -15,58 +15,96 @@ from km3pipe.io.hdf5 import HDF5Header
from h5py import File
__author__ = "Daniel Guderian"
def get_std_reco(blob,rec_types,rec_parameters_names):
def get_std_reco(blob):
"""
Function to extract std reco info. This implementation requires h5 files
to be processed with the option "--best_tracks" which adds the selection
of best tracks for each reco type to the output using the km3io tools.
"""
Function to extract std reco info. This implementation requires h5 files
to be processed with the option "--best_tracks" which adds the selection
of best tracks for each reco type to the output using the km3io tools.
Returns
-------
std_reco_info : dict
Dict with the std reco info of the best tracks.
Returns
-------
std_reco_info : dict
Dict with the std reco info of the best tracks.
"""
#this dict will be filled up
std_reco_info = {}
#all known reco types to iterate over
reco_type_dict = {
"BestJmuon" : "jmuon_",
"BestJshower" : "jshower_",
"BestDusjshower" : "dusjshower_",
"BestAashower" : "aashower_",
}
for name_in_blob,identifier in reco_type_dict.items():
if name_in_blob in blob:
#get the previously identified best track
bt = blob[name_in_blob]
#get all its values
values = bt.item()
#get the names of the values and add specific tag
reco_names = bt.dtype.names
specific_reco_names = np.core.defchararray.add(identifier,reco_names)
#create a dict out of them
keys_list = list(specific_reco_names)
values_list = list(values)
zip_iterator = zip(keys_list, values_list)
reco_dict = dict(zip_iterator)
#add this dict to the complete std reco collection
std_reco_info.update(reco_dict)
return std_reco_info
"""
#this dict will be filled up
std_reco_info = {}
#all known reco types to iterate over
reco_type_dict = {
"BestJmuon" : ("jmuon_","best_jmuon"),
"BestJshower" : ("jshower_","best_jshower"),
"BestDusjshower" : ("dusjshower_","best_dusjshower"),
"BestAashower" : ("aashower_","best_aashower"),
}
for name_in_blob,(identifier,best_track_name) in reco_type_dict.items():
#always write out something for the generally present rec types
if best_track_name in rec_types:
#specific names are with the prefix from the rec type
specific_reco_names = np.core.defchararray.add(identifier,rec_parameters_names)
#extract actually present info
if name_in_blob in blob:
#get the previously identified best track
bt = blob[name_in_blob]
#get all its values
values = bt.item()
values_list = list(values)
#reco_names = bt.dtype.names #in case the fitinf and stuff will be tailored to the reco types
#at some point, get the names directly like this
#in case there is no reco for this event but the reco type was done in general
else:
#fill all values with nan's
values_array = np.empty(len(specific_reco_names))
values_array[:] = np.nan
values_list = values_array.tolist()
#create a dict out of them
keys_list = list(specific_reco_names)
zip_iterator = zip(keys_list, values_list)
reco_dict = dict(zip_iterator)
#add this dict to the complete std reco collection
std_reco_info.update(reco_dict)
return std_reco_info
def get_rec_types_in_file(file):
"""
Checks and returns which rec types are in the file and thus need to be present
in all best track and their fitinf information later.
"""
#the known rec types
rec_type_names = ["best_jmuon","best_jshower","best_dusjshower","best_aashower"]
#all reco related objects in the file
reco_objects_in_file = file["reco"].keys()
#check which ones are in there
rec_types_in_file = []
for rec_type in rec_type_names:
if rec_type in reco_objects_in_file:
rec_types_in_file.append(rec_type)
#also get from here the list of dtype names that is share for all recos
rec_parameters_names = file["reco"][rec_type].dtype.names
return rec_types_in_file,rec_parameters_names
def get_real_data_info_extr(input_file):
"""
......@@ -89,6 +127,9 @@ def get_real_data_info_extr(input_file):
f = File(input_file, "r")
has_std_reco = "reco" in f.keys()
#also check, which rec types are present
rec_types,rec_parameters_names = get_rec_types_in_file(f)
def mc_info_extr(blob):
"""
......@@ -123,7 +164,7 @@ def get_real_data_info_extr(input_file):
# get all the std reco info
if has_std_reco:
std_reco_info = get_std_reco(blob)
std_reco_info = get_std_reco(blob,rec_types,rec_parameters_names)
track.update(std_reco_info)
......@@ -154,6 +195,8 @@ def get_random_noise_mc_info_extr(input_file):
f = File(input_file, "r")
has_std_reco = "reco" in f.keys()
#also check, which rec types are present
rec_types,rec_parameters_names = get_rec_types_in_file(f)
def mc_info_extr(blob):
......@@ -184,7 +227,7 @@ def get_random_noise_mc_info_extr(input_file):
# get all the std reco info
if has_std_reco:
std_reco_info = get_std_reco(blob)
std_reco_info = get_std_reco(blob,rec_types,rec_parameters_names)
track.update(std_reco_info)
......@@ -192,23 +235,6 @@ def get_random_noise_mc_info_extr(input_file):
return mc_info_extr
def get_rec_types_in_file(file):
"""
Checks rand returns which rec types are in the file and thus need to be present
in all best track and their fitinf information.
"""
#the known rec types
rec_type_names = ["best_jmuon","best_jshower","best_dusjshower","best_aashower"]
#all reco related in the file
reco_objects_in_file = file["reco"].keys()
#
rec_types_in_file = 2
return rec_types_in_file
def get_neutrino_mc_info_extr(input_file):
......@@ -234,8 +260,8 @@ def get_neutrino_mc_info_extr(input_file):
has_std_reco = "reco" in f.keys()
#also check, which rec types are present
#rec_types = get_rec_types_in_file(f)
rec_types,rec_parameters_names = get_rec_types_in_file(f)
# get the n_gen
header = HDF5Header.from_hdf5(input_file)
n_gen = header.genvol.numberOfEvents
......@@ -315,7 +341,7 @@ def get_neutrino_mc_info_extr(input_file):
# get all the std reco info
if has_std_reco:
std_reco_info = get_std_reco(blob)
std_reco_info = get_std_reco(blob,rec_types,rec_parameters_names)
track.update(std_reco_info)
......@@ -381,6 +407,9 @@ def get_muon_mc_info_extr(input_file,prod_identifier=2,inactive_du=None):
f = File(input_file, "r")
has_std_reco = "reco" in f.keys()
#also check, which rec types are present
rec_types,rec_parameters_names = get_rec_types_in_file(f)
# no n_gen here, but needed for concatenation
n_gen = 1
......@@ -473,7 +502,7 @@ def get_muon_mc_info_extr(input_file,prod_identifier=2,inactive_du=None):
# get all the std reco info
if has_std_reco:
std_reco_info = get_std_reco(blob)
std_reco_info = get_std_reco(blob,rec_types,rec_parameters_names)
track.update(std_reco_info)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment