Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
OrcaSong
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Machine Learning
OrcaSong
Commits
5220b471
Commit
5220b471
authored
4 years ago
by
Daniel Guderian
Browse files
Options
Downloads
Patches
Plain Diff
adjusted get_std_reco of the extractor for only best tracks
parent
58ecd462
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!18
Resolve "Update get standard reco function"
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
orcasong/extractors.py
+90
-133
90 additions, 133 deletions
orcasong/extractors.py
with
90 additions
and
133 deletions
orcasong/extractors.py
+
90
−
133
View file @
5220b471
...
...
@@ -4,7 +4,8 @@ in the h5 files.
These are made for the specific given runs. They might not be
applicable to other data, and could cause errors or produce unexpected
results when used on data other then the specified.
results when used on data other then the specified. Check for example the
primary position in the mc_tracks.
"""
...
...
@@ -19,132 +20,53 @@ __author__ = "Daniel Guderian"
def
get_std_reco
(
blob
):
"""
Function to extract std reco info. The implemented strategy is the following:
First, look for whether a rec stag has been reached and only then extract the reconstructed
paramater from it. If not, set it to a dummy value (for now 0). This means that for an analysis the events with
exactly zero have to be filtered out!
The
'
best track
'
is the first (highest lik) while a certain rec stage has to be reached. This might
have to be adjusted for other recos than JMuonGandalf chain.
Members of the Tracks:
dtype([(
'
E
'
,
'
<f8
'
), (
'
JCOPY_Z_M
'
,
'
<f4
'
), (
'
JENERGY_CHI2
'
,
'
<f4
'
), (
'
JENERGY_ENERGY
'
,
'
<f4
'
),
(
'
JENERGY_MUON_RANGE_METRES
'
,
'
<f4
'
), (
'
JENERGY_NDF
'
,
'
<f4
'
), (
'
JENERGY_NOISE_LIKELIHOOD
'
,
'
<f4
'
),
(
'
JENERGY_NUMBER_OF_HITS
'
,
'
<f4
'
), (
'
JGANDALF_BETA0_RAD
'
,
'
<f4
'
), (
'
JGANDALF_BETA1_RAD
'
,
'
<f4
'
),
(
'
JGANDALF_CHI2
'
,
'
<f4
'
), (
'
JGANDALF_LAMBDA
'
,
'
<f4
'
), (
'
JGANDALF_NUMBER_OF_HITS
'
,
'
<f4
'
),
(
'
JGANDALF_NUMBER_OF_ITERATIONS
'
,
'
<f4
'
), (
'
JSHOWERFIT_ENERGY
'
,
'
<f4
'
), (
'
JSTART_LENGTH_METRES
'
,
'
<f4
'
),
(
'
JSTART_NPE_MIP
'
,
'
<f4
'
), (
'
JSTART_NPE_MIP_TOTAL
'
,
'
<f4
'
), (
'
JVETO_NPE
'
,
'
<f4
'
), (
'
JVETO_NUMBER_OF_HITS
'
,
'
<f4
'
),
(
'
dir_x
'
,
'
<f8
'
), (
'
dir_y
'
,
'
<f8
'
), (
'
dir_z
'
,
'
<f8
'
), (
'
id
'
,
'
<i4
'
), (
'
idx
'
,
'
<i8
'
), (
'
length
'
,
'
<f8
'
),
(
'
likelihood
'
,
'
<f8
'
), (
'
pos_x
'
,
'
<f8
'
), (
'
pos_y
'
,
'
<f8
'
), (
'
pos_z
'
,
'
<f8
'
), (
'
rec_type
'
,
'
<i4
'
),
(
'
t
'
,
'
<f8
'
), (
'
group_id
'
,
'
<i8
'
)])
members of rec stages:
.idx (corresponding to the track id),
.rec_stage (rec stage identifier, for JMuonGandalf for example: 1=prefit, 2=simplex, 3=gandalf,
4=engery, 5=start),
.group_id (event id in file)
Parameters
----------
blob : blob containing the reco info
Function to extract std reco info. This implementation requires h5 files
to be processed with the option
"
--best_tracks
"
which adds the selection
of best tracks for each reco type to the output using the km3io tools.
Returns
-------
std_reco_info : dict
Dict with the
most common std reco params. Can be expanded
.
Dict with the
std reco info of the best tracks
.
"""
# use this later to identify not reconstructed events
dummy_value
=
0
# if there was no std reco at all, this will not exist
# these are events that stopped at/before prefit
try
:
rec_stages
=
blob
[
"
RecStages
"
]
# get first track only
rec_stages_best_track
=
rec_stages
.
rec_stage
[
rec_stages
.
idx
==
0
]
# often enough: best track is the first
best_track
=
blob
[
"
Tracks
"
][
0
]
except
KeyError
:
rec_stages_best_track
=
[]
print
(
"
An event didnt have any reco. Setting everything to
"
+
str
(
dummy_value
)
+
"
.
"
)
# take the direction only if JGanalf was executed
if
3
in
rec_stages_best_track
:
std_dir_x
=
best_track
[
"
dir_x
"
]
std_dir_y
=
best_track
[
"
dir_y
"
]
std_dir_z
=
best_track
[
"
dir_z
"
]
std_beta0
=
best_track
[
"
JGANDALF_BETA0_RAD
"
]
std_lik
=
best_track
[
"
likelihood
"
]
std_n_hits_gandalf
=
best_track
[
"
JGANDALF_NUMBER_OF_HITS
"
]
else
:
std_dir_x
=
dummy_value
std_dir_y
=
dummy_value
std_dir_z
=
dummy_value
std_beta0
=
dummy_value
std_lik
=
dummy_value
std_n_hits_gandalf
=
dummy_value
# energy fit from JEnergy
if
4
in
rec_stages_best_track
:
std_energy
=
best_track
[
"
E
"
]
lik_energy
=
best_track
[
"
JENERGY_CHI2
"
]
else
:
std_energy
=
dummy_value
lik_energy
=
dummy_value
# vertex and length from JStart
if
5
in
rec_stages_best_track
:
std_pos_x
=
best_track
[
"
pos_x
"
]
std_pos_y
=
best_track
[
"
pos_y
"
]
std_pos_z
=
best_track
[
"
pos_z
"
]
std_length
=
best_track
[
"
JSTART_LENGTH_METRES
"
]
else
:
std_pos_x
=
dummy_value
std_pos_y
=
dummy_value
std_pos_z
=
dummy_value
std_length
=
dummy_value
std_reco_info
=
{
"
std_dir_x
"
:
std_dir_x
,
"
std_dir_y
"
:
std_dir_y
,
"
std_dir_z
"
:
std_dir_z
,
"
std_beta0
"
:
std_beta0
,
"
std_lik
"
:
std_lik
,
"
std_n_hits_gandalf
"
:
std_n_hits_gandalf
,
"
std_pos_x
"
:
std_pos_x
,
"
std_pos_y
"
:
std_pos_y
,
"
std_pos_z
"
:
std_pos_z
,
"
std_energy
"
:
std_energy
,
"
std_lik_energy
"
:
lik_energy
,
"
std_length
"
:
std_length
,
}
#this dict will be filled up
std_reco_info
=
{}
#all known reco types to iterate over
reco_type_dict
=
{
"
BestJmuon
"
:
"
jmuon_
"
,
"
BestJshower
"
:
"
jshower_
"
,
"
BestDusjshower
"
:
"
dusjshower_
"
,
"
BestAashower
"
:
"
aashower_
"
,
}
for
name_in_blob
,
identifier
in
reco_type_dict
.
items
():
if
name_in_blob
in
blob
:
#get the previously identified best track
bt
=
blob
[
name_in_blob
]
#get all its values
values
=
bt
.
item
()
#get the names of the values and add specific tag
reco_names
=
bt
.
dtype
.
names
specific_reco_names
=
np
.
core
.
defchararray
.
add
(
identifier
,
reco_names
)
#create a dict out of them
keys_list
=
list
(
specific_reco_names
)
values_list
=
list
(
values
)
zip_iterator
=
zip
(
keys_list
,
values_list
)
reco_dict
=
dict
(
zip_iterator
)
#add this dict to the complete std reco collection
std_reco_info
.
update
(
reco_dict
)
return
std_reco_info
def
get_real_data_info_extr
(
input_file
):
"""
...
...
@@ -200,7 +122,7 @@ def get_real_data_info_extr(input_file):
# get all the std reco info
if
has_std_reco
:
std_reco_info
=
get_std_reco
(
blob
)
track
.
update
(
std_reco_info
)
...
...
@@ -232,7 +154,7 @@ def get_random_noise_mc_info_extr(input_file):
f
=
File
(
input_file
,
"
r
"
)
has_std_reco
=
"
reco
"
in
f
.
keys
()
def
mc_info_extr
(
blob
):
"""
...
...
@@ -297,8 +219,7 @@ def get_neutrino_mc_info_extr(input_file):
# get the n_gen
header
=
HDF5Header
.
from_hdf5
(
input_file
)
n_gen
=
header
.
genvol
.
numberOfEvents
def
mc_info_extr
(
blob
):
"""
...
...
@@ -333,7 +254,7 @@ def get_neutrino_mc_info_extr(input_file):
mc_track
=
blob
[
"
McTracks
"
][
p
]
# some track mc truth info
particle_type
=
mc_track
.
type
particle_type
=
mc_track
.
pdgid
#sometimes type, sometimes pdgid
energy
=
mc_track
.
energy
is_cc
=
mc_track
.
cc
bjorkeny
=
mc_track
.
by
...
...
@@ -373,7 +294,7 @@ def get_neutrino_mc_info_extr(input_file):
# get all the std reco info
if
has_std_reco
:
std_reco_info
=
get_std_reco
(
blob
)
track
.
update
(
std_reco_info
)
...
...
@@ -382,8 +303,40 @@ def get_neutrino_mc_info_extr(input_file):
return
mc_info_extr
def
get_muon_mc_info_extr
(
input_file
,
prod_identifier
=
2
):
#function used by Stefan to identify which muons leave how many mc hits in the (active) detector.
def
get_mchits_per_muon
(
blob
,
inactive_du
=
None
):
"""
For each muon in McTracks, get the number of McHits.
Parameters
----------
blob
The blob.
inactive_du : int, optional
McHits in this DU will not be counted.
Returns
-------
np.array
n_mchits, len = number of muons
"""
ids
=
blob
[
"
McTracks
"
][
"
id
"
]
# Origin of each mchit (as int) in the active line
origin
=
blob
[
"
McHits
"
][
"
origin
"
]
if
inactive_du
:
# only hits in active line
origin
=
origin
[
blob
[
"
McHits
"
][
"
du
"
]
!=
inactive_du
]
# get how many mchits were produced per muon in the bundle
origin_dict
=
dict
(
zip
(
*
np
.
unique
(
origin
,
return_counts
=
True
)))
return
np
.
array
([
origin_dict
.
get
(
i
,
0
)
for
i
in
ids
])
def
get_muon_mc_info_extr
(
input_file
,
prod_identifier
=
2
,
inactive_du
=
None
):
"""
Wrapper function that includes the actual mc_info_extr
...
...
@@ -446,9 +399,13 @@ def get_muon_mc_info_extr(input_file,prod_identifier=2):
time_interaction
=
mc_track
.
time
# same for all muons in a bundle
# sum up the energy of all muons
energy
=
np
.
sum
(
blob
[
"
McTracks
"
].
energy
)
# sum up the energy from all muons that have at least x mc hits
n_hits_per_muon
=
get_mchits_per_muon
(
blob
,
inactive_du
=
inactive_du
)
#DU1 in ORCA4 is in the detx but not powered
#dont consider muons with less than 10 mc hits
suficient_hits_mask
=
n_hits_per_muon
>=
15
energy
=
np
.
sum
(
blob
[
"
McTracks
"
][
suficient_hits_mask
].
energy
)
# all muons in a bundle are parallel, so just take dir of first muon
dir_x
,
dir_y
,
dir_z
=
mc_track
.
dir_x
,
mc_track
.
dir_y
,
mc_track
.
dir_z
...
...
@@ -495,7 +452,7 @@ def get_muon_mc_info_extr(input_file,prod_identifier=2):
# get all the std reco info
if
has_std_reco
:
std_reco_info
=
get_std_reco
(
blob
)
track
.
update
(
std_reco_info
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment