Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
OrcaSong
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Machine Learning
OrcaSong
Commits
2c06afb3
Commit
2c06afb3
authored
3 years ago
by
Daniel Guderian
Browse files
Options
Downloads
Patches
Plain Diff
padding implemented for when a reco of an event is not present
parent
392a0f50
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!19
Resolve "error when using new extractor"
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
orcasong/extractors.py
+100
-71
100 additions, 71 deletions
orcasong/extractors.py
with
100 additions
and
71 deletions
orcasong/extractors.py
+
100
−
71
View file @
2c06afb3
...
...
@@ -15,58 +15,96 @@ from km3pipe.io.hdf5 import HDF5Header
from
h5py
import
File
__author__
=
"
Daniel Guderian
"
def
get_std_reco
(
blob
,
rec_types
,
rec_parameters_names
):
def
get_std_reco
(
blob
):
"""
Function to extract std reco info. This implementation requires h5 files
to be processed with the option
"
--best_tracks
"
which adds the selection
of best tracks for each reco type to the output using the km3io tools.
"""
Function to extract std reco info. This implementation requires h5 files
to be processed with the option
"
--best_tracks
"
which adds the selection
of best tracks for each reco type to the output using the km3io tools.
Returns
-------
std_reco_info : dict
Dict with the std reco info of the best tracks.
Returns
-------
std_reco_info : dict
Dict with the std reco info of the best tracks.
"""
#this dict will be filled up
std_reco_info
=
{}
#all known reco types to iterate over
reco_type_dict
=
{
"
BestJmuon
"
:
"
jmuon_
"
,
"
BestJshower
"
:
"
jshower_
"
,
"
BestDusjshower
"
:
"
dusjshower_
"
,
"
BestAashower
"
:
"
aashower_
"
,
}
for
name_in_blob
,
identifier
in
reco_type_dict
.
items
():
if
name_in_blob
in
blob
:
#get the previously identified best track
bt
=
blob
[
name_in_blob
]
#get all its values
values
=
bt
.
item
()
#get the names of the values and add specific tag
reco_names
=
bt
.
dtype
.
names
specific_reco_names
=
np
.
core
.
defchararray
.
add
(
identifier
,
reco_names
)
#create a dict out of them
keys_list
=
list
(
specific_reco_names
)
values_list
=
list
(
values
)
zip_iterator
=
zip
(
keys_list
,
values_list
)
reco_dict
=
dict
(
zip_iterator
)
#add this dict to the complete std reco collection
std_reco_info
.
update
(
reco_dict
)
return
std_reco_info
"""
#this dict will be filled up
std_reco_info
=
{}
#all known reco types to iterate over
reco_type_dict
=
{
"
BestJmuon
"
:
(
"
jmuon_
"
,
"
best_jmuon
"
),
"
BestJshower
"
:
(
"
jshower_
"
,
"
best_jshower
"
),
"
BestDusjshower
"
:
(
"
dusjshower_
"
,
"
best_dusjshower
"
),
"
BestAashower
"
:
(
"
aashower_
"
,
"
best_aashower
"
),
}
for
name_in_blob
,(
identifier
,
best_track_name
)
in
reco_type_dict
.
items
():
#always write out something for the generally present rec types
if
best_track_name
in
rec_types
:
#specific names are with the prefix from the rec type
specific_reco_names
=
np
.
core
.
defchararray
.
add
(
identifier
,
rec_parameters_names
)
#extract actually present info
if
name_in_blob
in
blob
:
#get the previously identified best track
bt
=
blob
[
name_in_blob
]
#get all its values
values
=
bt
.
item
()
values_list
=
list
(
values
)
#reco_names = bt.dtype.names #in case the fitinf and stuff will be tailored to the reco types
#at some point, get the names directly like this
#in case there is no reco for this event but the reco type was done in general
else
:
#fill all values with nan's
values_array
=
np
.
empty
(
len
(
specific_reco_names
))
values_array
[:]
=
np
.
nan
values_list
=
values_array
.
tolist
()
#create a dict out of them
keys_list
=
list
(
specific_reco_names
)
zip_iterator
=
zip
(
keys_list
,
values_list
)
reco_dict
=
dict
(
zip_iterator
)
#add this dict to the complete std reco collection
std_reco_info
.
update
(
reco_dict
)
return
std_reco_info
def
get_rec_types_in_file
(
file
):
"""
Checks and returns which rec types are in the file and thus need to be present
in all best track and their fitinf information later.
"""
#the known rec types
rec_type_names
=
[
"
best_jmuon
"
,
"
best_jshower
"
,
"
best_dusjshower
"
,
"
best_aashower
"
]
#all reco related objects in the file
reco_objects_in_file
=
file
[
"
reco
"
].
keys
()
#check which ones are in there
rec_types_in_file
=
[]
for
rec_type
in
rec_type_names
:
if
rec_type
in
reco_objects_in_file
:
rec_types_in_file
.
append
(
rec_type
)
#also get from here the list of dtype names that is share for all recos
rec_parameters_names
=
file
[
"
reco
"
][
rec_type
].
dtype
.
names
return
rec_types_in_file
,
rec_parameters_names
def
get_real_data_info_extr
(
input_file
):
"""
...
...
@@ -89,6 +127,9 @@ def get_real_data_info_extr(input_file):
f
=
File
(
input_file
,
"
r
"
)
has_std_reco
=
"
reco
"
in
f
.
keys
()
#also check, which rec types are present
rec_types
,
rec_parameters_names
=
get_rec_types_in_file
(
f
)
def
mc_info_extr
(
blob
):
"""
...
...
@@ -123,7 +164,7 @@ def get_real_data_info_extr(input_file):
# get all the std reco info
if
has_std_reco
:
std_reco_info
=
get_std_reco
(
blob
)
std_reco_info
=
get_std_reco
(
blob
,
rec_types
,
rec_parameters_names
)
track
.
update
(
std_reco_info
)
...
...
@@ -154,6 +195,8 @@ def get_random_noise_mc_info_extr(input_file):
f
=
File
(
input_file
,
"
r
"
)
has_std_reco
=
"
reco
"
in
f
.
keys
()
#also check, which rec types are present
rec_types
,
rec_parameters_names
=
get_rec_types_in_file
(
f
)
def
mc_info_extr
(
blob
):
...
...
@@ -184,7 +227,7 @@ def get_random_noise_mc_info_extr(input_file):
# get all the std reco info
if
has_std_reco
:
std_reco_info
=
get_std_reco
(
blob
)
std_reco_info
=
get_std_reco
(
blob
,
rec_types
,
rec_parameters_names
)
track
.
update
(
std_reco_info
)
...
...
@@ -192,23 +235,6 @@ def get_random_noise_mc_info_extr(input_file):
return
mc_info_extr
def
get_rec_types_in_file
(
file
):
"""
Checks rand returns which rec types are in the file and thus need to be present
in all best track and their fitinf information.
"""
#the known rec types
rec_type_names
=
[
"
best_jmuon
"
,
"
best_jshower
"
,
"
best_dusjshower
"
,
"
best_aashower
"
]
#all reco related in the file
reco_objects_in_file
=
file
[
"
reco
"
].
keys
()
#
rec_types_in_file
=
2
return
rec_types_in_file
def
get_neutrino_mc_info_extr
(
input_file
):
...
...
@@ -234,8 +260,8 @@ def get_neutrino_mc_info_extr(input_file):
has_std_reco
=
"
reco
"
in
f
.
keys
()
#also check, which rec types are present
#
rec_types = get_rec_types_in_file(f)
rec_types
,
rec_parameters_names
=
get_rec_types_in_file
(
f
)
# get the n_gen
header
=
HDF5Header
.
from_hdf5
(
input_file
)
n_gen
=
header
.
genvol
.
numberOfEvents
...
...
@@ -315,7 +341,7 @@ def get_neutrino_mc_info_extr(input_file):
# get all the std reco info
if
has_std_reco
:
std_reco_info
=
get_std_reco
(
blob
)
std_reco_info
=
get_std_reco
(
blob
,
rec_types
,
rec_parameters_names
)
track
.
update
(
std_reco_info
)
...
...
@@ -381,6 +407,9 @@ def get_muon_mc_info_extr(input_file,prod_identifier=2,inactive_du=None):
f
=
File
(
input_file
,
"
r
"
)
has_std_reco
=
"
reco
"
in
f
.
keys
()
#also check, which rec types are present
rec_types
,
rec_parameters_names
=
get_rec_types_in_file
(
f
)
# no n_gen here, but needed for concatenation
n_gen
=
1
...
...
@@ -473,7 +502,7 @@ def get_muon_mc_info_extr(input_file,prod_identifier=2,inactive_du=None):
# get all the std reco info
if
has_std_reco
:
std_reco_info
=
get_std_reco
(
blob
)
std_reco_info
=
get_std_reco
(
blob
,
rec_types
,
rec_parameters_names
)
track
.
update
(
std_reco_info
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment