Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
OrcaSong
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Machine Learning
OrcaSong
Commits
2b130099
Commit
2b130099
authored
5 years ago
by
Stefan Reck
Browse files
Options
Downloads
Patches
Plain Diff
One binning plot for multiple files.
parent
c0257d23
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
orcasong_plag/core.py
+54
-26
54 additions, 26 deletions
orcasong_plag/core.py
orcasong_plag/modules.py
+77
-58
77 additions, 58 deletions
orcasong_plag/modules.py
with
131 additions
and
84 deletions
orcasong_plag/core.py
+
54
−
26
View file @
2b130099
...
...
@@ -2,7 +2,7 @@ import km3pipe as kp
import
km3modules
as
km
import
os
from
orcasong_plag.modules
import
TimePreproc
,
ImageMaker
,
McInfoMaker
,
BinningPlotter
from
orcasong_plag.modules
import
TimePreproc
,
ImageMaker
,
McInfoMaker
,
BinningPlotter
,
plot_hists
from
orcasong_plag.mc_info_types
import
get_mc_info_extr
...
...
@@ -59,8 +59,8 @@ class FileBinner:
self
.
mc_info_extr
=
mc_info_extr
self
.
bin_plot_freq
=
20
self
.
n_statusbar
=
2
00
self
.
n_memory_observer
=
4
00
self
.
n_statusbar
=
10
00
self
.
n_memory_observer
=
10
00
self
.
do_time_preproc
=
True
# self.data_cuts = None
...
...
@@ -75,8 +75,8 @@ class FileBinner:
Parameters
----------
infile : str
or List
Path to the input file
(s)
.
infile : str
Path to the input file.
outfile : str
Path to the output file.
...
...
@@ -84,34 +84,50 @@ class FileBinner:
name
,
shape
=
self
.
get_names_and_shape
()
print
(
"
Generating {} images with shape {}
"
.
format
(
name
,
shape
))
p
ipe
=
kp
.
Pipeline
()
p
lot_hists
=
self
.
bin_plot_freq
is
not
None
if
self
.
n_statusbar
is
not
None
:
pipe
.
attach
(
km
.
common
.
StatusBar
,
every
=
self
.
n_statusbar
)
if
self
.
n_memory_observer
is
not
None
:
pipe
.
attach
(
km
.
common
.
MemoryObserver
,
every
=
400
)
pipe
=
self
.
build_pipe
(
infile
,
outfile
,
plot_hists
=
plot_hists
)
pipe
.
drain
()
if
not
isinstance
(
infile
,
list
):
infile
=
[
infile
]
def
run_multi
(
self
,
infiles
,
outfolder
):
"""
Bin multiple files into their own output files each.
pipe
.
attach
(
kp
.
io
.
hdf5
.
HDF5Pump
,
filenames
=
in
file
)
Will also generate a summary binning plot for all of the
file
s.
self
.
attach_binning_modules
(
pipe
,
outfile
=
outfile
)
Parameters
----------
infiles : List
The path to infiles as str.
outfolder : str
The output folder to place them in.
pipe
.
attach
(
kp
.
io
.
HDF5Sink
,
filename
=
outfile
,
complib
=
self
.
complib
,
complevel
=
self
.
complevel
,
chunksize
=
self
.
chunksize
,
flush_frequency
=
self
.
flush_frequency
)
"""
hists
=
None
for
infile
in
infiles
:
outfile_name
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
infile
))[
0
]
+
"
_hist.h5
"
outfile
=
outfolder
+
outfile_name
pipe
=
self
.
build_pipe
(
infile
,
outfile
,
plot_hists
=
False
,
hists_start
=
hists
)
smry
=
pipe
.
drain
()
hists
=
smry
[
"
BinningPlotter
"
]
plot_hists
(
hists
,
pdf_path
=
outfolder
+
"
binning_summary.pdf
"
)
def
build_pipe
(
self
,
infile
,
outfile
,
plot_hists
=
True
,
hists_start
=
None
):
"""
Build the pipe to generate images and mc_info for a file.
"""
pipe
.
dra
in
()
pipe
=
kp
.
Pipel
in
e
()
def
attach_binning_modules
(
self
,
pipe
,
outfile
):
"""
Attach modules to a km3pipe which transform a blob to images and mc_info.
if
self
.
n_statusbar
is
not
None
:
pipe
.
attach
(
km
.
common
.
StatusBar
,
every
=
self
.
n_statusbar
)
if
self
.
n_memory_observer
is
not
None
:
pipe
.
attach
(
km
.
common
.
MemoryObserver
,
every
=
self
.
n_memory_observer
)
pipe
.
attach
(
kp
.
io
.
hdf5
.
HDF5Pump
,
filename
=
infile
)
"""
pipe
.
attach
(
km
.
common
.
Keep
,
keys
=
[
'
EventInfo
'
,
'
Header
'
,
'
RawHeader
'
,
'
McTracks
'
,
'
Hits
'
,
'
McHits
'
])
if
self
.
do_time_preproc
:
...
...
@@ -122,10 +138,14 @@ class FileBinner:
# pipe.attach(EventSkipper, data_cuts=self.data_cuts)
if
self
.
bin_plot_freq
is
not
None
:
pdf_name
=
os
.
path
.
splitext
(
outfile
)[
0
]
+
"
_hists.pdf
"
if
plot_hists
:
pdf_name
=
os
.
path
.
splitext
(
outfile
)[
0
]
+
"
_hists.pdf
"
else
:
pdf_name
=
None
pipe
.
attach
(
BinningPlotter
,
bin_plot_freq
=
self
.
bin_plot_freq
,
bin_edges_list
=
self
.
bin_edges_list
,
hists_start
=
hists_start
,
pdf_path
=
pdf_name
)
pipe
.
attach
(
ImageMaker
,
...
...
@@ -144,6 +164,14 @@ class FileBinner:
pipe
.
attach
(
km
.
common
.
Keep
,
keys
=
[
'
histogram
'
,
'
mc_info
'
])
pipe
.
attach
(
kp
.
io
.
HDF5Sink
,
filename
=
outfile
,
complib
=
self
.
complib
,
complevel
=
self
.
complevel
,
chunksize
=
self
.
chunksize
,
flush_frequency
=
self
.
flush_frequency
)
return
pipe
def
get_names_and_shape
(
self
):
"""
Get names and shape of the resulting x data, e.g. (pos_z, time), (18, 50).
...
...
This diff is collapsed.
Click to expand it.
orcasong_plag/modules.py
+
77
−
58
View file @
2b130099
...
...
@@ -137,7 +137,7 @@ class BinningPlotter(kp.Module):
bin_edges_list : List
List with the names of the fields to bin, and the respective bin edges,
including the left- and right-most bin edge.
pdf_path : str
pdf_path : str
, optional
Where to save the hists to. This pdf will contain all the field names
on their own page each.
bin_plot_freq : int
...
...
@@ -150,22 +150,35 @@ class BinningPlotter(kp.Module):
plot_bin_edges : bool
If true, will plot the bin edges as horizontal lines. Is never used
for the time binning (field name
"
time
"
).
hists_start : dict, optional
Starting values for the statistics.
"""
def
configure
(
self
):
self
.
bin_edges_list
=
self
.
require
(
'
bin_edges_list
'
)
self
.
pdf_path
=
self
.
require
(
'
pdf_path
'
)
self
.
pdf_path
=
self
.
get
(
'
pdf_path
'
,
default
=
None
)
self
.
bin_plot_freq
=
self
.
get
(
"
bin_plot_freq
"
,
default
=
20
)
self
.
res_increase
=
self
.
get
(
'
res_increase
'
,
default
=
5
)
self
.
plot_bin_edges
=
self
.
get
(
'
plot_bin_edges
'
,
default
=
True
)
self
.
hists_start
=
self
.
get
(
'
hists_start
'
,
default
=
None
)
self
.
hists
=
{}
for
bin_name
,
bin_edges
in
self
.
_yield_spaced_bin_edges
():
self
.
hists
[
bin_name
]
=
{
"
hist
"
:
np
.
zeros
(
len
(
bin_edges
)
-
1
),
"
out_pos
"
:
0
,
"
out_neg
"
:
0
,
}
if
self
.
hists_start
is
None
:
self
.
hists
=
{}
for
bin_name
,
org_bin_edges
in
self
.
bin_edges_list
:
if
bin_name
==
"
time
"
:
bin_edges
=
org_bin_edges
else
:
bin_edges
=
self
.
_space_bin_edges
(
org_bin_edges
)
self
.
hists
[
bin_name
]
=
{
"
hist
"
:
np
.
zeros
(
len
(
bin_edges
)
-
1
),
"
hist_bin_edges
"
:
bin_edges
,
"
bin_edges
"
:
org_bin_edges
,
"
out_pos
"
:
0
,
"
out_neg
"
:
0
,
}
else
:
self
.
hists
=
self
.
hists_start
self
.
i
=
0
...
...
@@ -179,23 +192,19 @@ class BinningPlotter(kp.Module):
return
bin_edges
def
_yield_spaced_bin_edges
(
self
):
for
bin_name
,
bin_edges
in
self
.
bin_edges_list
:
if
bin_name
!=
"
time
"
:
bin_edges
=
self
.
_space_bin_edges
(
bin_edges
)
yield
bin_name
,
bin_edges
def
process
(
self
,
blob
):
"""
Extract data from blob for the hist plots.
"""
if
self
.
i
%
self
.
bin_plot_freq
==
0
:
for
bin_name
,
bin_edges
in
self
.
_yield_spaced_bin_edges
():
for
bin_name
,
hists_data
in
self
.
hists
.
items
():
hist_bin_edges
=
hists_data
[
"
hist_bin_edges
"
]
data
=
blob
[
"
Hits
"
][
bin_name
]
hist
=
np
.
histogram
(
data
,
bins
=
bin_edges
)[
0
]
hist
=
np
.
histogram
(
data
,
bins
=
hist_
bin_edges
)[
0
]
out_pos
=
data
[
data
>
np
.
max
(
bin_edges
)].
size
out_neg
=
data
[
data
<
np
.
min
(
bin_edges
)].
size
out_pos
=
data
[
data
>
np
.
max
(
hist_
bin_edges
)].
size
out_neg
=
data
[
data
<
np
.
min
(
hist_
bin_edges
)].
size
self
.
hists
[
bin_name
][
"
hist
"
]
+=
hist
self
.
hists
[
bin_name
][
"
out_pos
"
]
+=
out_pos
...
...
@@ -208,44 +217,54 @@ class BinningPlotter(kp.Module):
"""
Make and save the histograms to pdf.
"""
with
PdfPages
(
self
.
pdf_path
)
as
pdf_file
:
for
bin_name
,
org_bin_edges
in
self
.
bin_edges_list
:
hist
=
self
.
hists
[
bin_name
][
"
hist
"
]
out_pos
=
self
.
hists
[
bin_name
][
"
out_pos
"
]
out_neg
=
self
.
hists
[
bin_name
][
"
out_neg
"
]
hist_frac
=
hist
/
(
np
.
sum
(
hist
)
+
out_pos
+
out_neg
)
if
self
.
pdf_path
is
not
None
:
plot_hists
(
self
.
hists
,
self
.
pdf_path
,
plot_bin_edges
=
self
.
plot_bin_edges
)
if
bin_name
!=
"
time
"
:
bin_edges
=
self
.
_space_bin_edges
(
org_bin_edges
)
else
:
bin_edges
=
org_bin_edges
return
self
.
hists
bin_spacing
=
bin_edges
[
1
]
-
bin_edges
[
0
]
fig
,
ax
=
plt
.
subplots
()
plt
.
bar
(
bin_edges
[:
-
1
],
hist_frac
,
align
=
"
edge
"
,
width
=
0.9
*
bin_spacing
,
)
ax
.
yaxis
.
set_major_formatter
(
ticker
.
PercentFormatter
(
xmax
=
1
))
if
self
.
plot_bin_edges
and
bin_name
!=
"
time
"
:
for
bin_edge
in
org_bin_edges
:
plt
.
axvline
(
x
=
bin_edge
,
color
=
'
grey
'
,
linestyle
=
'
-
'
,
linewidth
=
1
,
alpha
=
0.9
)
# place a text box in upper left in axes coords
out_pos_rel
=
out_pos
/
np
.
sum
(
hist
)
out_neg_rel
=
out_neg
/
np
.
sum
(
hist
)
textstr
=
"
Hits cut off:
\n
Left: {:.1%}
\n
"
\
"
Right: {:.1%}
"
.
format
(
out_neg_rel
,
out_pos_rel
)
props
=
dict
(
boxstyle
=
'
round
'
,
facecolor
=
'
white
'
,
alpha
=
0.9
)
ax
.
text
(
0.05
,
0.95
,
textstr
,
transform
=
ax
.
transAxes
,
verticalalignment
=
'
top
'
,
bbox
=
props
)
plt
.
xlabel
(
bin_name
)
plt
.
ylabel
(
"
Fraction of hits
"
)
pdf_file
.
savefig
(
fig
)
print
(
bin_name
,
out_neg
,
out_pos
)
print
(
"
Saved binning plot to
"
+
self
.
pdf_path
)
def
plot_hists
(
hists
,
pdf_path
,
plot_bin_edges
=
True
):
"""
Plot histograms made by the BinningPlotter to the given pdf path.
"""
with
PdfPages
(
pdf_path
)
as
pdf_file
:
for
bin_name
,
hists_data
in
hists
.
items
():
hist_bin_edges
=
hists_data
[
"
hist_bin_edges
"
]
bin_edges
=
hists_data
[
"
bin_edges
"
]
hist
=
hists_data
[
"
hist
"
]
out_pos
=
hists_data
[
"
out_pos
"
]
out_neg
=
hists_data
[
"
out_neg
"
]
hist_frac
=
hist
/
(
np
.
sum
(
hist
)
+
out_pos
+
out_neg
)
bin_spacing
=
hist_bin_edges
[
1
]
-
hist_bin_edges
[
0
]
fig
,
ax
=
plt
.
subplots
()
plt
.
bar
(
hist_bin_edges
[:
-
1
],
hist_frac
,
align
=
"
edge
"
,
width
=
0.9
*
bin_spacing
,
)
ax
.
yaxis
.
set_major_formatter
(
ticker
.
PercentFormatter
(
xmax
=
1
))
if
plot_bin_edges
and
bin_name
!=
"
time
"
:
for
bin_edge
in
bin_edges
:
plt
.
axvline
(
x
=
bin_edge
,
color
=
'
grey
'
,
linestyle
=
'
-
'
,
linewidth
=
1
,
alpha
=
0.9
)
# place a text box in upper left in axes coords
out_pos_rel
=
out_pos
/
np
.
sum
(
hist
)
out_neg_rel
=
out_neg
/
np
.
sum
(
hist
)
textstr
=
"
Hits cut off:
\n
Left: {:.1%}
\n
"
\
"
Right: {:.1%}
"
.
format
(
out_neg_rel
,
out_pos_rel
)
props
=
dict
(
boxstyle
=
'
round
'
,
facecolor
=
'
white
'
,
alpha
=
0.9
)
ax
.
text
(
0.05
,
0.95
,
textstr
,
transform
=
ax
.
transAxes
,
verticalalignment
=
'
top
'
,
bbox
=
props
)
plt
.
xlabel
(
bin_name
)
plt
.
ylabel
(
"
Fraction of hits
"
)
pdf_file
.
savefig
(
fig
)
print
(
"
Saved binning plot to
"
+
pdf_path
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment