Skip to content
Snippets Groups Projects
Commit 2b130099 authored by Stefan Reck's avatar Stefan Reck
Browse files

One binning plot for multiple files.

parent c0257d23
No related branches found
No related tags found
No related merge requests found
......@@ -2,7 +2,7 @@ import km3pipe as kp
import km3modules as km
import os
from orcasong_plag.modules import TimePreproc, ImageMaker, McInfoMaker, BinningPlotter
from orcasong_plag.modules import TimePreproc, ImageMaker, McInfoMaker, BinningPlotter, plot_hists
from orcasong_plag.mc_info_types import get_mc_info_extr
......@@ -59,8 +59,8 @@ class FileBinner:
self.mc_info_extr = mc_info_extr
self.bin_plot_freq = 20
self.n_statusbar = 200
self.n_memory_observer = 400
self.n_statusbar = 1000
self.n_memory_observer = 1000
self.do_time_preproc = True
# self.data_cuts = None
......@@ -75,8 +75,8 @@ class FileBinner:
Parameters
----------
infile : str or List
Path to the input file(s).
infile : str
Path to the input file.
outfile : str
Path to the output file.
......@@ -84,34 +84,50 @@ class FileBinner:
name, shape = self.get_names_and_shape()
print("Generating {} images with shape {}".format(name, shape))
pipe = kp.Pipeline()
plot_hists = self.bin_plot_freq is not None
if self.n_statusbar is not None:
pipe.attach(km.common.StatusBar, every=self.n_statusbar)
if self.n_memory_observer is not None:
pipe.attach(km.common.MemoryObserver, every=400)
pipe = self.build_pipe(infile, outfile, plot_hists=plot_hists)
pipe.drain()
if not isinstance(infile, list):
infile = [infile]
def run_multi(self, infiles, outfolder):
"""
Bin multiple files into their own output files each.
pipe.attach(kp.io.hdf5.HDF5Pump, filenames=infile)
Will also generate a summary binning plot for all of the files.
self.attach_binning_modules(pipe, outfile=outfile)
Parameters
----------
infiles : List
The path to infiles as str.
outfolder : str
The output folder to place them in.
pipe.attach(kp.io.HDF5Sink,
filename=outfile,
complib=self.complib,
complevel=self.complevel,
chunksize=self.chunksize,
flush_frequency=self.flush_frequency)
"""
hists = None
for infile in infiles:
outfile_name = os.path.splitext(os.path.basename(infile))[0] + "_hist.h5"
outfile = outfolder + outfile_name
pipe = self.build_pipe(infile, outfile,
plot_hists=False, hists_start=hists)
smry = pipe.drain()
hists = smry["BinningPlotter"]
plot_hists(hists, pdf_path=outfolder + "binning_summary.pdf")
def build_pipe(self, infile, outfile, plot_hists=True, hists_start=None):
"""
Build the pipe to generate images and mc_info for a file.
"""
pipe.drain()
pipe = kp.Pipeline()
def attach_binning_modules(self, pipe, outfile):
"""
Attach modules to a km3pipe which transform a blob to images and mc_info.
if self.n_statusbar is not None:
pipe.attach(km.common.StatusBar, every=self.n_statusbar)
if self.n_memory_observer is not None:
pipe.attach(km.common.MemoryObserver, every=self.n_memory_observer)
pipe.attach(kp.io.hdf5.HDF5Pump, filename=infile)
"""
pipe.attach(km.common.Keep, keys=['EventInfo', 'Header', 'RawHeader',
'McTracks', 'Hits', 'McHits'])
if self.do_time_preproc:
......@@ -122,10 +138,14 @@ class FileBinner:
# pipe.attach(EventSkipper, data_cuts=self.data_cuts)
if self.bin_plot_freq is not None:
pdf_name = os.path.splitext(outfile)[0] + "_hists.pdf"
if plot_hists:
pdf_name = os.path.splitext(outfile)[0] + "_hists.pdf"
else:
pdf_name = None
pipe.attach(BinningPlotter,
bin_plot_freq=self.bin_plot_freq,
bin_edges_list=self.bin_edges_list,
hists_start=hists_start,
pdf_path=pdf_name)
pipe.attach(ImageMaker,
......@@ -144,6 +164,14 @@ class FileBinner:
pipe.attach(km.common.Keep, keys=['histogram', 'mc_info'])
pipe.attach(kp.io.HDF5Sink,
filename=outfile,
complib=self.complib,
complevel=self.complevel,
chunksize=self.chunksize,
flush_frequency=self.flush_frequency)
return pipe
def get_names_and_shape(self):
"""
Get names and shape of the resulting x data, e.g. (pos_z, time), (18, 50).
......
......@@ -137,7 +137,7 @@ class BinningPlotter(kp.Module):
bin_edges_list : List
List with the names of the fields to bin, and the respective bin edges,
including the left- and right-most bin edge.
pdf_path : str
pdf_path : str, optional
Where to save the hists to. This pdf will contain all the field names
on their own page each.
bin_plot_freq : int
......@@ -150,22 +150,35 @@ class BinningPlotter(kp.Module):
plot_bin_edges : bool
If true, will plot the bin edges as horizontal lines. Is never used
for the time binning (field name "time").
hists_start : dict, optional
Starting values for the statistics.
"""
def configure(self):
self.bin_edges_list = self.require('bin_edges_list')
self.pdf_path = self.require('pdf_path')
self.pdf_path = self.get('pdf_path', default=None)
self.bin_plot_freq = self.get("bin_plot_freq", default=20)
self.res_increase = self.get('res_increase', default=5)
self.plot_bin_edges = self.get('plot_bin_edges', default=True)
self.hists_start = self.get('hists_start', default=None)
self.hists = {}
for bin_name, bin_edges in self._yield_spaced_bin_edges():
self.hists[bin_name] = {
"hist": np.zeros(len(bin_edges) - 1),
"out_pos": 0,
"out_neg": 0,
}
if self.hists_start is None:
self.hists = {}
for bin_name, org_bin_edges in self.bin_edges_list:
if bin_name == "time":
bin_edges = org_bin_edges
else:
bin_edges = self._space_bin_edges(org_bin_edges)
self.hists[bin_name] = {
"hist": np.zeros(len(bin_edges) - 1),
"hist_bin_edges": bin_edges,
"bin_edges": org_bin_edges,
"out_pos": 0,
"out_neg": 0,
}
else:
self.hists = self.hists_start
self.i = 0
......@@ -179,23 +192,19 @@ class BinningPlotter(kp.Module):
return bin_edges
def _yield_spaced_bin_edges(self):
for bin_name, bin_edges in self.bin_edges_list:
if bin_name != "time":
bin_edges = self._space_bin_edges(bin_edges)
yield bin_name, bin_edges
def process(self, blob):
"""
Extract data from blob for the hist plots.
"""
if self.i % self.bin_plot_freq == 0:
for bin_name, bin_edges in self._yield_spaced_bin_edges():
for bin_name, hists_data in self.hists.items():
hist_bin_edges = hists_data["hist_bin_edges"]
data = blob["Hits"][bin_name]
hist = np.histogram(data, bins=bin_edges)[0]
hist = np.histogram(data, bins=hist_bin_edges)[0]
out_pos = data[data > np.max(bin_edges)].size
out_neg = data[data < np.min(bin_edges)].size
out_pos = data[data > np.max(hist_bin_edges)].size
out_neg = data[data < np.min(hist_bin_edges)].size
self.hists[bin_name]["hist"] += hist
self.hists[bin_name]["out_pos"] += out_pos
......@@ -208,44 +217,54 @@ class BinningPlotter(kp.Module):
"""
Make and save the histograms to pdf.
"""
with PdfPages(self.pdf_path) as pdf_file:
for bin_name, org_bin_edges in self.bin_edges_list:
hist = self.hists[bin_name]["hist"]
out_pos = self.hists[bin_name]["out_pos"]
out_neg = self.hists[bin_name]["out_neg"]
hist_frac = hist / (np.sum(hist) + out_pos + out_neg)
if self.pdf_path is not None:
plot_hists(self.hists, self.pdf_path,
plot_bin_edges=self.plot_bin_edges)
if bin_name != "time":
bin_edges = self._space_bin_edges(org_bin_edges)
else:
bin_edges = org_bin_edges
return self.hists
bin_spacing = bin_edges[1] - bin_edges[0]
fig, ax = plt.subplots()
plt.bar(bin_edges[:-1],
hist_frac,
align="edge",
width=0.9*bin_spacing,
)
ax.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1))
if self.plot_bin_edges and bin_name != "time":
for bin_edge in org_bin_edges:
plt.axvline(x=bin_edge, color='grey', linestyle='-',
linewidth=1, alpha=0.9)
# place a text box in upper left in axes coords
out_pos_rel = out_pos / np.sum(hist)
out_neg_rel = out_neg / np.sum(hist)
textstr = "Hits cut off:\n Left: {:.1%}\n" \
" Right: {:.1%}".format(out_neg_rel, out_pos_rel)
props = dict(boxstyle='round', facecolor='white', alpha=0.9)
ax.text(0.05, 0.95, textstr, transform=ax.transAxes,
verticalalignment='top', bbox=props)
plt.xlabel(bin_name)
plt.ylabel("Fraction of hits")
pdf_file.savefig(fig)
print(bin_name, out_neg, out_pos)
print("Saved binning plot to " + self.pdf_path)
def plot_hists(hists, pdf_path, plot_bin_edges=True):
"""
Plot histograms made by the BinningPlotter to the given pdf path.
"""
with PdfPages(pdf_path) as pdf_file:
for bin_name, hists_data in hists.items():
hist_bin_edges = hists_data["hist_bin_edges"]
bin_edges = hists_data["bin_edges"]
hist = hists_data["hist"]
out_pos = hists_data["out_pos"]
out_neg = hists_data["out_neg"]
hist_frac = hist / (np.sum(hist) + out_pos + out_neg)
bin_spacing = hist_bin_edges[1] - hist_bin_edges[0]
fig, ax = plt.subplots()
plt.bar(hist_bin_edges[:-1],
hist_frac,
align="edge",
width=0.9 * bin_spacing,
)
ax.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1))
if plot_bin_edges and bin_name != "time":
for bin_edge in bin_edges:
plt.axvline(x=bin_edge, color='grey', linestyle='-',
linewidth=1, alpha=0.9)
# place a text box in upper left in axes coords
out_pos_rel = out_pos / np.sum(hist)
out_neg_rel = out_neg / np.sum(hist)
textstr = "Hits cut off:\n Left: {:.1%}\n" \
" Right: {:.1%}".format(out_neg_rel, out_pos_rel)
props = dict(boxstyle='round', facecolor='white', alpha=0.9)
ax.text(0.05, 0.95, textstr, transform=ax.transAxes,
verticalalignment='top', bbox=props)
plt.xlabel(bin_name)
plt.ylabel("Fraction of hits")
pdf_file.savefig(fig)
print("Saved binning plot to " + pdf_path)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment