diff --git a/orcasong/extractors.py b/orcasong/extractors.py index 95c245907562100f29f722b959d38177422b86d5..f99299675b91258d4ebb824823e100e6bd208a55 100644 --- a/orcasong/extractors.py +++ b/orcasong/extractors.py @@ -15,58 +15,96 @@ from km3pipe.io.hdf5 import HDF5Header from h5py import File __author__ = "Daniel Guderian" + +def get_std_reco(blob,rec_types,rec_parameters_names): -def get_std_reco(blob): + """ + Function to extract std reco info. This implementation requires h5 files + to be processed with the option "--best_tracks" which adds the selection + of best tracks for each reco type to the output using the km3io tools. - """ - Function to extract std reco info. This implementation requires h5 files - to be processed with the option "--best_tracks" which adds the selection - of best tracks for each reco type to the output using the km3io tools. - - Returns - ------- - std_reco_info : dict - Dict with the std reco info of the best tracks. + Returns + ------- + std_reco_info : dict + Dict with the std reco info of the best tracks. - """ - #this dict will be filled up - std_reco_info = {} - - #all known reco types to iterate over - reco_type_dict = { - "BestJmuon" : "jmuon_", - "BestJshower" : "jshower_", - "BestDusjshower" : "dusjshower_", - "BestAashower" : "aashower_", - } - - for name_in_blob,identifier in reco_type_dict.items(): - - if name_in_blob in blob: - - #get the previously identified best track - bt = blob[name_in_blob] - - #get all its values - values = bt.item() - - #get the names of the values and add specific tag - reco_names = bt.dtype.names - specific_reco_names = np.core.defchararray.add(identifier,reco_names) - - #create a dict out of them - keys_list = list(specific_reco_names) - values_list = list(values) - zip_iterator = zip(keys_list, values_list) - reco_dict = dict(zip_iterator) - - #add this dict to the complete std reco collection - std_reco_info.update(reco_dict) - - return std_reco_info - - + """ + #this dict will be filled up + std_reco_info = {} + + #all known reco types to iterate over + reco_type_dict = { + "BestJmuon" : ("jmuon_","best_jmuon"), + "BestJshower" : ("jshower_","best_jshower"), + "BestDusjshower" : ("dusjshower_","best_dusjshower"), + "BestAashower" : ("aashower_","best_aashower"), + } + + for name_in_blob,(identifier,best_track_name) in reco_type_dict.items(): + + #always write out something for the generally present rec types + if best_track_name in rec_types: + + #specific names are with the prefix from the rec type + specific_reco_names = np.core.defchararray.add(identifier,rec_parameters_names) + + #extract actually present info + if name_in_blob in blob: + + #get the previously identified best track + bt = blob[name_in_blob] + + #get all its values + values = bt.item() + values_list = list(values) + #reco_names = bt.dtype.names #in case the fitinf and stuff will be tailored to the reco types + #at some point, get the names directly like this + + #in case there is no reco for this event but the reco type was done in general + else: + + #fill all values with nan's + values_array = np.empty(len(specific_reco_names)) + values_array[:] = np.nan + values_list = values_array.tolist() + + #create a dict out of them + keys_list = list(specific_reco_names) + + zip_iterator = zip(keys_list, values_list) + reco_dict = dict(zip_iterator) + + #add this dict to the complete std reco collection + std_reco_info.update(reco_dict) + + return std_reco_info + + +def get_rec_types_in_file(file): + + """ + Checks and returns which rec types are in the file and thus need to be present + in all best track and their fitinf information later. + """ + + #the known rec types + rec_type_names = ["best_jmuon","best_jshower","best_dusjshower","best_aashower"] + + #all reco related objects in the file + reco_objects_in_file = file["reco"].keys() + + #check which ones are in there + rec_types_in_file = [] + for rec_type in rec_type_names: + if rec_type in reco_objects_in_file: + rec_types_in_file.append(rec_type) + + #also get from here the list of dtype names that is share for all recos + rec_parameters_names = file["reco"][rec_type].dtype.names + + return rec_types_in_file,rec_parameters_names + def get_real_data_info_extr(input_file): """ @@ -89,6 +127,9 @@ def get_real_data_info_extr(input_file): f = File(input_file, "r") has_std_reco = "reco" in f.keys() + #also check, which rec types are present + rec_types,rec_parameters_names = get_rec_types_in_file(f) + def mc_info_extr(blob): """ @@ -123,7 +164,7 @@ def get_real_data_info_extr(input_file): # get all the std reco info if has_std_reco: - std_reco_info = get_std_reco(blob) + std_reco_info = get_std_reco(blob,rec_types,rec_parameters_names) track.update(std_reco_info) @@ -154,6 +195,8 @@ def get_random_noise_mc_info_extr(input_file): f = File(input_file, "r") has_std_reco = "reco" in f.keys() + #also check, which rec types are present + rec_types,rec_parameters_names = get_rec_types_in_file(f) def mc_info_extr(blob): @@ -184,7 +227,7 @@ def get_random_noise_mc_info_extr(input_file): # get all the std reco info if has_std_reco: - std_reco_info = get_std_reco(blob) + std_reco_info = get_std_reco(blob,rec_types,rec_parameters_names) track.update(std_reco_info) @@ -192,23 +235,6 @@ def get_random_noise_mc_info_extr(input_file): return mc_info_extr - -def get_rec_types_in_file(file): - - """ - Checks rand returns which rec types are in the file and thus need to be present - in all best track and their fitinf information. - """ - #the known rec types - rec_type_names = ["best_jmuon","best_jshower","best_dusjshower","best_aashower"] - - #all reco related in the file - reco_objects_in_file = file["reco"].keys() - - # - rec_types_in_file = 2 - - return rec_types_in_file def get_neutrino_mc_info_extr(input_file): @@ -234,8 +260,8 @@ def get_neutrino_mc_info_extr(input_file): has_std_reco = "reco" in f.keys() #also check, which rec types are present - #rec_types = get_rec_types_in_file(f) - + rec_types,rec_parameters_names = get_rec_types_in_file(f) + # get the n_gen header = HDF5Header.from_hdf5(input_file) n_gen = header.genvol.numberOfEvents @@ -315,7 +341,7 @@ def get_neutrino_mc_info_extr(input_file): # get all the std reco info if has_std_reco: - std_reco_info = get_std_reco(blob) + std_reco_info = get_std_reco(blob,rec_types,rec_parameters_names) track.update(std_reco_info) @@ -381,6 +407,9 @@ def get_muon_mc_info_extr(input_file,prod_identifier=2,inactive_du=None): f = File(input_file, "r") has_std_reco = "reco" in f.keys() + #also check, which rec types are present + rec_types,rec_parameters_names = get_rec_types_in_file(f) + # no n_gen here, but needed for concatenation n_gen = 1 @@ -473,7 +502,7 @@ def get_muon_mc_info_extr(input_file,prod_identifier=2,inactive_du=None): # get all the std reco info if has_std_reco: - std_reco_info = get_std_reco(blob) + std_reco_info = get_std_reco(blob,rec_types,rec_parameters_names) track.update(std_reco_info)