Skip to content
Snippets Groups Projects
Commit 2c06afb3 authored by Daniel Guderian's avatar Daniel Guderian
Browse files

padding implemented for when a reco of an event is not present

parent 392a0f50
No related branches found
No related tags found
1 merge request!19Resolve "error when using new extractor"
...@@ -15,58 +15,96 @@ from km3pipe.io.hdf5 import HDF5Header ...@@ -15,58 +15,96 @@ from km3pipe.io.hdf5 import HDF5Header
from h5py import File from h5py import File
__author__ = "Daniel Guderian" __author__ = "Daniel Guderian"
def get_std_reco(blob,rec_types,rec_parameters_names):
def get_std_reco(blob): """
Function to extract std reco info. This implementation requires h5 files
to be processed with the option "--best_tracks" which adds the selection
of best tracks for each reco type to the output using the km3io tools.
""" Returns
Function to extract std reco info. This implementation requires h5 files -------
to be processed with the option "--best_tracks" which adds the selection std_reco_info : dict
of best tracks for each reco type to the output using the km3io tools. Dict with the std reco info of the best tracks.
Returns
-------
std_reco_info : dict
Dict with the std reco info of the best tracks.
""" """
#this dict will be filled up #this dict will be filled up
std_reco_info = {} std_reco_info = {}
#all known reco types to iterate over #all known reco types to iterate over
reco_type_dict = { reco_type_dict = {
"BestJmuon" : "jmuon_", "BestJmuon" : ("jmuon_","best_jmuon"),
"BestJshower" : "jshower_", "BestJshower" : ("jshower_","best_jshower"),
"BestDusjshower" : "dusjshower_", "BestDusjshower" : ("dusjshower_","best_dusjshower"),
"BestAashower" : "aashower_", "BestAashower" : ("aashower_","best_aashower"),
} }
for name_in_blob,identifier in reco_type_dict.items(): for name_in_blob,(identifier,best_track_name) in reco_type_dict.items():
if name_in_blob in blob: #always write out something for the generally present rec types
if best_track_name in rec_types:
#get the previously identified best track
bt = blob[name_in_blob] #specific names are with the prefix from the rec type
specific_reco_names = np.core.defchararray.add(identifier,rec_parameters_names)
#get all its values
values = bt.item() #extract actually present info
if name_in_blob in blob:
#get the names of the values and add specific tag
reco_names = bt.dtype.names #get the previously identified best track
specific_reco_names = np.core.defchararray.add(identifier,reco_names) bt = blob[name_in_blob]
#create a dict out of them #get all its values
keys_list = list(specific_reco_names) values = bt.item()
values_list = list(values) values_list = list(values)
zip_iterator = zip(keys_list, values_list) #reco_names = bt.dtype.names #in case the fitinf and stuff will be tailored to the reco types
reco_dict = dict(zip_iterator) #at some point, get the names directly like this
#add this dict to the complete std reco collection #in case there is no reco for this event but the reco type was done in general
std_reco_info.update(reco_dict) else:
return std_reco_info #fill all values with nan's
values_array = np.empty(len(specific_reco_names))
values_array[:] = np.nan
values_list = values_array.tolist()
#create a dict out of them
keys_list = list(specific_reco_names)
zip_iterator = zip(keys_list, values_list)
reco_dict = dict(zip_iterator)
#add this dict to the complete std reco collection
std_reco_info.update(reco_dict)
return std_reco_info
def get_rec_types_in_file(file):
"""
Checks and returns which rec types are in the file and thus need to be present
in all best track and their fitinf information later.
"""
#the known rec types
rec_type_names = ["best_jmuon","best_jshower","best_dusjshower","best_aashower"]
#all reco related objects in the file
reco_objects_in_file = file["reco"].keys()
#check which ones are in there
rec_types_in_file = []
for rec_type in rec_type_names:
if rec_type in reco_objects_in_file:
rec_types_in_file.append(rec_type)
#also get from here the list of dtype names that is share for all recos
rec_parameters_names = file["reco"][rec_type].dtype.names
return rec_types_in_file,rec_parameters_names
def get_real_data_info_extr(input_file): def get_real_data_info_extr(input_file):
""" """
...@@ -89,6 +127,9 @@ def get_real_data_info_extr(input_file): ...@@ -89,6 +127,9 @@ def get_real_data_info_extr(input_file):
f = File(input_file, "r") f = File(input_file, "r")
has_std_reco = "reco" in f.keys() has_std_reco = "reco" in f.keys()
#also check, which rec types are present
rec_types,rec_parameters_names = get_rec_types_in_file(f)
def mc_info_extr(blob): def mc_info_extr(blob):
""" """
...@@ -123,7 +164,7 @@ def get_real_data_info_extr(input_file): ...@@ -123,7 +164,7 @@ def get_real_data_info_extr(input_file):
# get all the std reco info # get all the std reco info
if has_std_reco: if has_std_reco:
std_reco_info = get_std_reco(blob) std_reco_info = get_std_reco(blob,rec_types,rec_parameters_names)
track.update(std_reco_info) track.update(std_reco_info)
...@@ -154,6 +195,8 @@ def get_random_noise_mc_info_extr(input_file): ...@@ -154,6 +195,8 @@ def get_random_noise_mc_info_extr(input_file):
f = File(input_file, "r") f = File(input_file, "r")
has_std_reco = "reco" in f.keys() has_std_reco = "reco" in f.keys()
#also check, which rec types are present
rec_types,rec_parameters_names = get_rec_types_in_file(f)
def mc_info_extr(blob): def mc_info_extr(blob):
...@@ -184,7 +227,7 @@ def get_random_noise_mc_info_extr(input_file): ...@@ -184,7 +227,7 @@ def get_random_noise_mc_info_extr(input_file):
# get all the std reco info # get all the std reco info
if has_std_reco: if has_std_reco:
std_reco_info = get_std_reco(blob) std_reco_info = get_std_reco(blob,rec_types,rec_parameters_names)
track.update(std_reco_info) track.update(std_reco_info)
...@@ -192,23 +235,6 @@ def get_random_noise_mc_info_extr(input_file): ...@@ -192,23 +235,6 @@ def get_random_noise_mc_info_extr(input_file):
return mc_info_extr return mc_info_extr
def get_rec_types_in_file(file):
"""
Checks rand returns which rec types are in the file and thus need to be present
in all best track and their fitinf information.
"""
#the known rec types
rec_type_names = ["best_jmuon","best_jshower","best_dusjshower","best_aashower"]
#all reco related in the file
reco_objects_in_file = file["reco"].keys()
#
rec_types_in_file = 2
return rec_types_in_file
def get_neutrino_mc_info_extr(input_file): def get_neutrino_mc_info_extr(input_file):
...@@ -234,8 +260,8 @@ def get_neutrino_mc_info_extr(input_file): ...@@ -234,8 +260,8 @@ def get_neutrino_mc_info_extr(input_file):
has_std_reco = "reco" in f.keys() has_std_reco = "reco" in f.keys()
#also check, which rec types are present #also check, which rec types are present
#rec_types = get_rec_types_in_file(f) rec_types,rec_parameters_names = get_rec_types_in_file(f)
# get the n_gen # get the n_gen
header = HDF5Header.from_hdf5(input_file) header = HDF5Header.from_hdf5(input_file)
n_gen = header.genvol.numberOfEvents n_gen = header.genvol.numberOfEvents
...@@ -315,7 +341,7 @@ def get_neutrino_mc_info_extr(input_file): ...@@ -315,7 +341,7 @@ def get_neutrino_mc_info_extr(input_file):
# get all the std reco info # get all the std reco info
if has_std_reco: if has_std_reco:
std_reco_info = get_std_reco(blob) std_reco_info = get_std_reco(blob,rec_types,rec_parameters_names)
track.update(std_reco_info) track.update(std_reco_info)
...@@ -381,6 +407,9 @@ def get_muon_mc_info_extr(input_file,prod_identifier=2,inactive_du=None): ...@@ -381,6 +407,9 @@ def get_muon_mc_info_extr(input_file,prod_identifier=2,inactive_du=None):
f = File(input_file, "r") f = File(input_file, "r")
has_std_reco = "reco" in f.keys() has_std_reco = "reco" in f.keys()
#also check, which rec types are present
rec_types,rec_parameters_names = get_rec_types_in_file(f)
# no n_gen here, but needed for concatenation # no n_gen here, but needed for concatenation
n_gen = 1 n_gen = 1
...@@ -473,7 +502,7 @@ def get_muon_mc_info_extr(input_file,prod_identifier=2,inactive_du=None): ...@@ -473,7 +502,7 @@ def get_muon_mc_info_extr(input_file,prod_identifier=2,inactive_du=None):
# get all the std reco info # get all the std reco info
if has_std_reco: if has_std_reco:
std_reco_info = get_std_reco(blob) std_reco_info = get_std_reco(blob,rec_types,rec_parameters_names)
track.update(std_reco_info) track.update(std_reco_info)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment