Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
OrcaSong
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Machine Learning
OrcaSong
Commits
2b130099
Commit
2b130099
authored
5 years ago
by
Stefan Reck
Browse files
Options
Downloads
Patches
Plain Diff
One binning plot for multiple files.
parent
c0257d23
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
orcasong_plag/core.py
+54
-26
54 additions, 26 deletions
orcasong_plag/core.py
orcasong_plag/modules.py
+77
-58
77 additions, 58 deletions
orcasong_plag/modules.py
with
131 additions
and
84 deletions
orcasong_plag/core.py
+
54
−
26
View file @
2b130099
...
@@ -2,7 +2,7 @@ import km3pipe as kp
...
@@ -2,7 +2,7 @@ import km3pipe as kp
import
km3modules
as
km
import
km3modules
as
km
import
os
import
os
from
orcasong_plag.modules
import
TimePreproc
,
ImageMaker
,
McInfoMaker
,
BinningPlotter
from
orcasong_plag.modules
import
TimePreproc
,
ImageMaker
,
McInfoMaker
,
BinningPlotter
,
plot_hists
from
orcasong_plag.mc_info_types
import
get_mc_info_extr
from
orcasong_plag.mc_info_types
import
get_mc_info_extr
...
@@ -59,8 +59,8 @@ class FileBinner:
...
@@ -59,8 +59,8 @@ class FileBinner:
self
.
mc_info_extr
=
mc_info_extr
self
.
mc_info_extr
=
mc_info_extr
self
.
bin_plot_freq
=
20
self
.
bin_plot_freq
=
20
self
.
n_statusbar
=
2
00
self
.
n_statusbar
=
10
00
self
.
n_memory_observer
=
4
00
self
.
n_memory_observer
=
10
00
self
.
do_time_preproc
=
True
self
.
do_time_preproc
=
True
# self.data_cuts = None
# self.data_cuts = None
...
@@ -75,8 +75,8 @@ class FileBinner:
...
@@ -75,8 +75,8 @@ class FileBinner:
Parameters
Parameters
----------
----------
infile : str
or List
infile : str
Path to the input file
(s)
.
Path to the input file.
outfile : str
outfile : str
Path to the output file.
Path to the output file.
...
@@ -84,34 +84,50 @@ class FileBinner:
...
@@ -84,34 +84,50 @@ class FileBinner:
name
,
shape
=
self
.
get_names_and_shape
()
name
,
shape
=
self
.
get_names_and_shape
()
print
(
"
Generating {} images with shape {}
"
.
format
(
name
,
shape
))
print
(
"
Generating {} images with shape {}
"
.
format
(
name
,
shape
))
p
ipe
=
kp
.
Pipeline
()
p
lot_hists
=
self
.
bin_plot_freq
is
not
None
if
self
.
n_statusbar
is
not
None
:
pipe
=
self
.
build_pipe
(
infile
,
outfile
,
plot_hists
=
plot_hists
)
pipe
.
attach
(
km
.
common
.
StatusBar
,
every
=
self
.
n_statusbar
)
pipe
.
drain
()
if
self
.
n_memory_observer
is
not
None
:
pipe
.
attach
(
km
.
common
.
MemoryObserver
,
every
=
400
)
if
not
isinstance
(
infile
,
list
):
def
run_multi
(
self
,
infiles
,
outfolder
):
infile
=
[
infile
]
"""
Bin multiple files into their own output files each.
pipe
.
attach
(
kp
.
io
.
hdf5
.
HDF5Pump
,
filenames
=
in
file
)
Will also generate a summary binning plot for all of the
file
s.
self
.
attach_binning_modules
(
pipe
,
outfile
=
outfile
)
Parameters
----------
infiles : List
The path to infiles as str.
outfolder : str
The output folder to place them in.
pipe
.
attach
(
kp
.
io
.
HDF5Sink
,
"""
filename
=
outfile
,
hists
=
None
complib
=
self
.
complib
,
for
infile
in
infiles
:
complevel
=
self
.
complevel
,
outfile_name
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
infile
))[
0
]
+
"
_hist.h5
"
chunksize
=
self
.
chunksize
,
outfile
=
outfolder
+
outfile_name
flush_frequency
=
self
.
flush_frequency
)
pipe
=
self
.
build_pipe
(
infile
,
outfile
,
plot_hists
=
False
,
hists_start
=
hists
)
smry
=
pipe
.
drain
()
hists
=
smry
[
"
BinningPlotter
"
]
plot_hists
(
hists
,
pdf_path
=
outfolder
+
"
binning_summary.pdf
"
)
def
build_pipe
(
self
,
infile
,
outfile
,
plot_hists
=
True
,
hists_start
=
None
):
"""
Build the pipe to generate images and mc_info for a file.
"""
pipe
.
dra
in
()
pipe
=
kp
.
Pipel
in
e
()
def
attach_binning_modules
(
self
,
pipe
,
outfile
):
if
self
.
n_statusbar
is
not
None
:
"""
pipe
.
attach
(
km
.
common
.
StatusBar
,
every
=
self
.
n_statusbar
)
Attach modules to a km3pipe which transform a blob to images and mc_info.
if
self
.
n_memory_observer
is
not
None
:
pipe
.
attach
(
km
.
common
.
MemoryObserver
,
every
=
self
.
n_memory_observer
)
pipe
.
attach
(
kp
.
io
.
hdf5
.
HDF5Pump
,
filename
=
infile
)
"""
pipe
.
attach
(
km
.
common
.
Keep
,
keys
=
[
'
EventInfo
'
,
'
Header
'
,
'
RawHeader
'
,
pipe
.
attach
(
km
.
common
.
Keep
,
keys
=
[
'
EventInfo
'
,
'
Header
'
,
'
RawHeader
'
,
'
McTracks
'
,
'
Hits
'
,
'
McHits
'
])
'
McTracks
'
,
'
Hits
'
,
'
McHits
'
])
if
self
.
do_time_preproc
:
if
self
.
do_time_preproc
:
...
@@ -122,10 +138,14 @@ class FileBinner:
...
@@ -122,10 +138,14 @@ class FileBinner:
# pipe.attach(EventSkipper, data_cuts=self.data_cuts)
# pipe.attach(EventSkipper, data_cuts=self.data_cuts)
if
self
.
bin_plot_freq
is
not
None
:
if
self
.
bin_plot_freq
is
not
None
:
pdf_name
=
os
.
path
.
splitext
(
outfile
)[
0
]
+
"
_hists.pdf
"
if
plot_hists
:
pdf_name
=
os
.
path
.
splitext
(
outfile
)[
0
]
+
"
_hists.pdf
"
else
:
pdf_name
=
None
pipe
.
attach
(
BinningPlotter
,
pipe
.
attach
(
BinningPlotter
,
bin_plot_freq
=
self
.
bin_plot_freq
,
bin_plot_freq
=
self
.
bin_plot_freq
,
bin_edges_list
=
self
.
bin_edges_list
,
bin_edges_list
=
self
.
bin_edges_list
,
hists_start
=
hists_start
,
pdf_path
=
pdf_name
)
pdf_path
=
pdf_name
)
pipe
.
attach
(
ImageMaker
,
pipe
.
attach
(
ImageMaker
,
...
@@ -144,6 +164,14 @@ class FileBinner:
...
@@ -144,6 +164,14 @@ class FileBinner:
pipe
.
attach
(
km
.
common
.
Keep
,
keys
=
[
'
histogram
'
,
'
mc_info
'
])
pipe
.
attach
(
km
.
common
.
Keep
,
keys
=
[
'
histogram
'
,
'
mc_info
'
])
pipe
.
attach
(
kp
.
io
.
HDF5Sink
,
filename
=
outfile
,
complib
=
self
.
complib
,
complevel
=
self
.
complevel
,
chunksize
=
self
.
chunksize
,
flush_frequency
=
self
.
flush_frequency
)
return
pipe
def
get_names_and_shape
(
self
):
def
get_names_and_shape
(
self
):
"""
"""
Get names and shape of the resulting x data, e.g. (pos_z, time), (18, 50).
Get names and shape of the resulting x data, e.g. (pos_z, time), (18, 50).
...
...
This diff is collapsed.
Click to expand it.
orcasong_plag/modules.py
+
77
−
58
View file @
2b130099
...
@@ -137,7 +137,7 @@ class BinningPlotter(kp.Module):
...
@@ -137,7 +137,7 @@ class BinningPlotter(kp.Module):
bin_edges_list : List
bin_edges_list : List
List with the names of the fields to bin, and the respective bin edges,
List with the names of the fields to bin, and the respective bin edges,
including the left- and right-most bin edge.
including the left- and right-most bin edge.
pdf_path : str
pdf_path : str
, optional
Where to save the hists to. This pdf will contain all the field names
Where to save the hists to. This pdf will contain all the field names
on their own page each.
on their own page each.
bin_plot_freq : int
bin_plot_freq : int
...
@@ -150,22 +150,35 @@ class BinningPlotter(kp.Module):
...
@@ -150,22 +150,35 @@ class BinningPlotter(kp.Module):
plot_bin_edges : bool
plot_bin_edges : bool
If true, will plot the bin edges as horizontal lines. Is never used
If true, will plot the bin edges as horizontal lines. Is never used
for the time binning (field name
"
time
"
).
for the time binning (field name
"
time
"
).
hists_start : dict, optional
Starting values for the statistics.
"""
"""
def
configure
(
self
):
def
configure
(
self
):
self
.
bin_edges_list
=
self
.
require
(
'
bin_edges_list
'
)
self
.
bin_edges_list
=
self
.
require
(
'
bin_edges_list
'
)
self
.
pdf_path
=
self
.
require
(
'
pdf_path
'
)
self
.
pdf_path
=
self
.
get
(
'
pdf_path
'
,
default
=
None
)
self
.
bin_plot_freq
=
self
.
get
(
"
bin_plot_freq
"
,
default
=
20
)
self
.
bin_plot_freq
=
self
.
get
(
"
bin_plot_freq
"
,
default
=
20
)
self
.
res_increase
=
self
.
get
(
'
res_increase
'
,
default
=
5
)
self
.
res_increase
=
self
.
get
(
'
res_increase
'
,
default
=
5
)
self
.
plot_bin_edges
=
self
.
get
(
'
plot_bin_edges
'
,
default
=
True
)
self
.
plot_bin_edges
=
self
.
get
(
'
plot_bin_edges
'
,
default
=
True
)
self
.
hists_start
=
self
.
get
(
'
hists_start
'
,
default
=
None
)
self
.
hists
=
{}
if
self
.
hists_start
is
None
:
for
bin_name
,
bin_edges
in
self
.
_yield_spaced_bin_edges
():
self
.
hists
=
{}
self
.
hists
[
bin_name
]
=
{
for
bin_name
,
org_bin_edges
in
self
.
bin_edges_list
:
"
hist
"
:
np
.
zeros
(
len
(
bin_edges
)
-
1
),
if
bin_name
==
"
time
"
:
"
out_pos
"
:
0
,
bin_edges
=
org_bin_edges
"
out_neg
"
:
0
,
else
:
}
bin_edges
=
self
.
_space_bin_edges
(
org_bin_edges
)
self
.
hists
[
bin_name
]
=
{
"
hist
"
:
np
.
zeros
(
len
(
bin_edges
)
-
1
),
"
hist_bin_edges
"
:
bin_edges
,
"
bin_edges
"
:
org_bin_edges
,
"
out_pos
"
:
0
,
"
out_neg
"
:
0
,
}
else
:
self
.
hists
=
self
.
hists_start
self
.
i
=
0
self
.
i
=
0
...
@@ -179,23 +192,19 @@ class BinningPlotter(kp.Module):
...
@@ -179,23 +192,19 @@ class BinningPlotter(kp.Module):
return
bin_edges
return
bin_edges
def
_yield_spaced_bin_edges
(
self
):
for
bin_name
,
bin_edges
in
self
.
bin_edges_list
:
if
bin_name
!=
"
time
"
:
bin_edges
=
self
.
_space_bin_edges
(
bin_edges
)
yield
bin_name
,
bin_edges
def
process
(
self
,
blob
):
def
process
(
self
,
blob
):
"""
"""
Extract data from blob for the hist plots.
Extract data from blob for the hist plots.
"""
"""
if
self
.
i
%
self
.
bin_plot_freq
==
0
:
if
self
.
i
%
self
.
bin_plot_freq
==
0
:
for
bin_name
,
bin_edges
in
self
.
_yield_spaced_bin_edges
():
for
bin_name
,
hists_data
in
self
.
hists
.
items
():
hist_bin_edges
=
hists_data
[
"
hist_bin_edges
"
]
data
=
blob
[
"
Hits
"
][
bin_name
]
data
=
blob
[
"
Hits
"
][
bin_name
]
hist
=
np
.
histogram
(
data
,
bins
=
bin_edges
)[
0
]
hist
=
np
.
histogram
(
data
,
bins
=
hist_
bin_edges
)[
0
]
out_pos
=
data
[
data
>
np
.
max
(
bin_edges
)].
size
out_pos
=
data
[
data
>
np
.
max
(
hist_
bin_edges
)].
size
out_neg
=
data
[
data
<
np
.
min
(
bin_edges
)].
size
out_neg
=
data
[
data
<
np
.
min
(
hist_
bin_edges
)].
size
self
.
hists
[
bin_name
][
"
hist
"
]
+=
hist
self
.
hists
[
bin_name
][
"
hist
"
]
+=
hist
self
.
hists
[
bin_name
][
"
out_pos
"
]
+=
out_pos
self
.
hists
[
bin_name
][
"
out_pos
"
]
+=
out_pos
...
@@ -208,44 +217,54 @@ class BinningPlotter(kp.Module):
...
@@ -208,44 +217,54 @@ class BinningPlotter(kp.Module):
"""
"""
Make and save the histograms to pdf.
Make and save the histograms to pdf.
"""
"""
with
PdfPages
(
self
.
pdf_path
)
as
pdf_file
:
if
self
.
pdf_path
is
not
None
:
for
bin_name
,
org_bin_edges
in
self
.
bin_edges_list
:
plot_hists
(
self
.
hists
,
self
.
pdf_path
,
hist
=
self
.
hists
[
bin_name
][
"
hist
"
]
plot_bin_edges
=
self
.
plot_bin_edges
)
out_pos
=
self
.
hists
[
bin_name
][
"
out_pos
"
]
out_neg
=
self
.
hists
[
bin_name
][
"
out_neg
"
]
hist_frac
=
hist
/
(
np
.
sum
(
hist
)
+
out_pos
+
out_neg
)
if
bin_name
!=
"
time
"
:
return
self
.
hists
bin_edges
=
self
.
_space_bin_edges
(
org_bin_edges
)
else
:
bin_edges
=
org_bin_edges
bin_spacing
=
bin_edges
[
1
]
-
bin_edges
[
0
]
fig
,
ax
=
plt
.
subplots
()
def
plot_hists
(
hists
,
pdf_path
,
plot_bin_edges
=
True
):
plt
.
bar
(
bin_edges
[:
-
1
],
"""
hist_frac
,
Plot histograms made by the BinningPlotter to the given pdf path.
align
=
"
edge
"
,
width
=
0.9
*
bin_spacing
,
"""
)
with
PdfPages
(
pdf_path
)
as
pdf_file
:
ax
.
yaxis
.
set_major_formatter
(
ticker
.
PercentFormatter
(
xmax
=
1
))
for
bin_name
,
hists_data
in
hists
.
items
():
hist_bin_edges
=
hists_data
[
"
hist_bin_edges
"
]
if
self
.
plot_bin_edges
and
bin_name
!=
"
time
"
:
bin_edges
=
hists_data
[
"
bin_edges
"
]
for
bin_edge
in
org_bin_edges
:
hist
=
hists_data
[
"
hist
"
]
plt
.
axvline
(
x
=
bin_edge
,
color
=
'
grey
'
,
linestyle
=
'
-
'
,
out_pos
=
hists_data
[
"
out_pos
"
]
linewidth
=
1
,
alpha
=
0.9
)
out_neg
=
hists_data
[
"
out_neg
"
]
# place a text box in upper left in axes coords
hist_frac
=
hist
/
(
np
.
sum
(
hist
)
+
out_pos
+
out_neg
)
out_pos_rel
=
out_pos
/
np
.
sum
(
hist
)
out_neg_rel
=
out_neg
/
np
.
sum
(
hist
)
bin_spacing
=
hist_bin_edges
[
1
]
-
hist_bin_edges
[
0
]
textstr
=
"
Hits cut off:
\n
Left: {:.1%}
\n
"
\
fig
,
ax
=
plt
.
subplots
()
"
Right: {:.1%}
"
.
format
(
out_neg_rel
,
out_pos_rel
)
plt
.
bar
(
hist_bin_edges
[:
-
1
],
props
=
dict
(
boxstyle
=
'
round
'
,
facecolor
=
'
white
'
,
alpha
=
0.9
)
hist_frac
,
ax
.
text
(
0.05
,
0.95
,
textstr
,
transform
=
ax
.
transAxes
,
align
=
"
edge
"
,
verticalalignment
=
'
top
'
,
bbox
=
props
)
width
=
0.9
*
bin_spacing
,
)
plt
.
xlabel
(
bin_name
)
ax
.
yaxis
.
set_major_formatter
(
ticker
.
PercentFormatter
(
xmax
=
1
))
plt
.
ylabel
(
"
Fraction of hits
"
)
if
plot_bin_edges
and
bin_name
!=
"
time
"
:
pdf_file
.
savefig
(
fig
)
for
bin_edge
in
bin_edges
:
print
(
bin_name
,
out_neg
,
out_pos
)
plt
.
axvline
(
x
=
bin_edge
,
color
=
'
grey
'
,
linestyle
=
'
-
'
,
print
(
"
Saved binning plot to
"
+
self
.
pdf_path
)
linewidth
=
1
,
alpha
=
0.9
)
# place a text box in upper left in axes coords
out_pos_rel
=
out_pos
/
np
.
sum
(
hist
)
out_neg_rel
=
out_neg
/
np
.
sum
(
hist
)
textstr
=
"
Hits cut off:
\n
Left: {:.1%}
\n
"
\
"
Right: {:.1%}
"
.
format
(
out_neg_rel
,
out_pos_rel
)
props
=
dict
(
boxstyle
=
'
round
'
,
facecolor
=
'
white
'
,
alpha
=
0.9
)
ax
.
text
(
0.05
,
0.95
,
textstr
,
transform
=
ax
.
transAxes
,
verticalalignment
=
'
top
'
,
bbox
=
props
)
plt
.
xlabel
(
bin_name
)
plt
.
ylabel
(
"
Fraction of hits
"
)
pdf_file
.
savefig
(
fig
)
print
(
"
Saved binning plot to
"
+
pdf_path
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment