Skip to content
Snippets Groups Projects
Commit 2b130099 authored by Stefan Reck's avatar Stefan Reck
Browse files

One binning plot for multiple files.

parent c0257d23
No related branches found
No related tags found
No related merge requests found
...@@ -2,7 +2,7 @@ import km3pipe as kp ...@@ -2,7 +2,7 @@ import km3pipe as kp
import km3modules as km import km3modules as km
import os import os
from orcasong_plag.modules import TimePreproc, ImageMaker, McInfoMaker, BinningPlotter from orcasong_plag.modules import TimePreproc, ImageMaker, McInfoMaker, BinningPlotter, plot_hists
from orcasong_plag.mc_info_types import get_mc_info_extr from orcasong_plag.mc_info_types import get_mc_info_extr
...@@ -59,8 +59,8 @@ class FileBinner: ...@@ -59,8 +59,8 @@ class FileBinner:
self.mc_info_extr = mc_info_extr self.mc_info_extr = mc_info_extr
self.bin_plot_freq = 20 self.bin_plot_freq = 20
self.n_statusbar = 200 self.n_statusbar = 1000
self.n_memory_observer = 400 self.n_memory_observer = 1000
self.do_time_preproc = True self.do_time_preproc = True
# self.data_cuts = None # self.data_cuts = None
...@@ -75,8 +75,8 @@ class FileBinner: ...@@ -75,8 +75,8 @@ class FileBinner:
Parameters Parameters
---------- ----------
infile : str or List infile : str
Path to the input file(s). Path to the input file.
outfile : str outfile : str
Path to the output file. Path to the output file.
...@@ -84,34 +84,50 @@ class FileBinner: ...@@ -84,34 +84,50 @@ class FileBinner:
name, shape = self.get_names_and_shape() name, shape = self.get_names_and_shape()
print("Generating {} images with shape {}".format(name, shape)) print("Generating {} images with shape {}".format(name, shape))
pipe = kp.Pipeline() plot_hists = self.bin_plot_freq is not None
if self.n_statusbar is not None: pipe = self.build_pipe(infile, outfile, plot_hists=plot_hists)
pipe.attach(km.common.StatusBar, every=self.n_statusbar) pipe.drain()
if self.n_memory_observer is not None:
pipe.attach(km.common.MemoryObserver, every=400)
if not isinstance(infile, list): def run_multi(self, infiles, outfolder):
infile = [infile] """
Bin multiple files into their own output files each.
pipe.attach(kp.io.hdf5.HDF5Pump, filenames=infile) Will also generate a summary binning plot for all of the files.
self.attach_binning_modules(pipe, outfile=outfile) Parameters
----------
infiles : List
The path to infiles as str.
outfolder : str
The output folder to place them in.
pipe.attach(kp.io.HDF5Sink, """
filename=outfile, hists = None
complib=self.complib, for infile in infiles:
complevel=self.complevel, outfile_name = os.path.splitext(os.path.basename(infile))[0] + "_hist.h5"
chunksize=self.chunksize, outfile = outfolder + outfile_name
flush_frequency=self.flush_frequency)
pipe = self.build_pipe(infile, outfile,
plot_hists=False, hists_start=hists)
smry = pipe.drain()
hists = smry["BinningPlotter"]
plot_hists(hists, pdf_path=outfolder + "binning_summary.pdf")
def build_pipe(self, infile, outfile, plot_hists=True, hists_start=None):
"""
Build the pipe to generate images and mc_info for a file.
"""
pipe.drain() pipe = kp.Pipeline()
def attach_binning_modules(self, pipe, outfile): if self.n_statusbar is not None:
""" pipe.attach(km.common.StatusBar, every=self.n_statusbar)
Attach modules to a km3pipe which transform a blob to images and mc_info. if self.n_memory_observer is not None:
pipe.attach(km.common.MemoryObserver, every=self.n_memory_observer)
pipe.attach(kp.io.hdf5.HDF5Pump, filename=infile)
"""
pipe.attach(km.common.Keep, keys=['EventInfo', 'Header', 'RawHeader', pipe.attach(km.common.Keep, keys=['EventInfo', 'Header', 'RawHeader',
'McTracks', 'Hits', 'McHits']) 'McTracks', 'Hits', 'McHits'])
if self.do_time_preproc: if self.do_time_preproc:
...@@ -122,10 +138,14 @@ class FileBinner: ...@@ -122,10 +138,14 @@ class FileBinner:
# pipe.attach(EventSkipper, data_cuts=self.data_cuts) # pipe.attach(EventSkipper, data_cuts=self.data_cuts)
if self.bin_plot_freq is not None: if self.bin_plot_freq is not None:
pdf_name = os.path.splitext(outfile)[0] + "_hists.pdf" if plot_hists:
pdf_name = os.path.splitext(outfile)[0] + "_hists.pdf"
else:
pdf_name = None
pipe.attach(BinningPlotter, pipe.attach(BinningPlotter,
bin_plot_freq=self.bin_plot_freq, bin_plot_freq=self.bin_plot_freq,
bin_edges_list=self.bin_edges_list, bin_edges_list=self.bin_edges_list,
hists_start=hists_start,
pdf_path=pdf_name) pdf_path=pdf_name)
pipe.attach(ImageMaker, pipe.attach(ImageMaker,
...@@ -144,6 +164,14 @@ class FileBinner: ...@@ -144,6 +164,14 @@ class FileBinner:
pipe.attach(km.common.Keep, keys=['histogram', 'mc_info']) pipe.attach(km.common.Keep, keys=['histogram', 'mc_info'])
pipe.attach(kp.io.HDF5Sink,
filename=outfile,
complib=self.complib,
complevel=self.complevel,
chunksize=self.chunksize,
flush_frequency=self.flush_frequency)
return pipe
def get_names_and_shape(self): def get_names_and_shape(self):
""" """
Get names and shape of the resulting x data, e.g. (pos_z, time), (18, 50). Get names and shape of the resulting x data, e.g. (pos_z, time), (18, 50).
......
...@@ -137,7 +137,7 @@ class BinningPlotter(kp.Module): ...@@ -137,7 +137,7 @@ class BinningPlotter(kp.Module):
bin_edges_list : List bin_edges_list : List
List with the names of the fields to bin, and the respective bin edges, List with the names of the fields to bin, and the respective bin edges,
including the left- and right-most bin edge. including the left- and right-most bin edge.
pdf_path : str pdf_path : str, optional
Where to save the hists to. This pdf will contain all the field names Where to save the hists to. This pdf will contain all the field names
on their own page each. on their own page each.
bin_plot_freq : int bin_plot_freq : int
...@@ -150,22 +150,35 @@ class BinningPlotter(kp.Module): ...@@ -150,22 +150,35 @@ class BinningPlotter(kp.Module):
plot_bin_edges : bool plot_bin_edges : bool
If true, will plot the bin edges as horizontal lines. Is never used If true, will plot the bin edges as horizontal lines. Is never used
for the time binning (field name "time"). for the time binning (field name "time").
hists_start : dict, optional
Starting values for the statistics.
""" """
def configure(self): def configure(self):
self.bin_edges_list = self.require('bin_edges_list') self.bin_edges_list = self.require('bin_edges_list')
self.pdf_path = self.require('pdf_path') self.pdf_path = self.get('pdf_path', default=None)
self.bin_plot_freq = self.get("bin_plot_freq", default=20) self.bin_plot_freq = self.get("bin_plot_freq", default=20)
self.res_increase = self.get('res_increase', default=5) self.res_increase = self.get('res_increase', default=5)
self.plot_bin_edges = self.get('plot_bin_edges', default=True) self.plot_bin_edges = self.get('plot_bin_edges', default=True)
self.hists_start = self.get('hists_start', default=None)
self.hists = {} if self.hists_start is None:
for bin_name, bin_edges in self._yield_spaced_bin_edges(): self.hists = {}
self.hists[bin_name] = { for bin_name, org_bin_edges in self.bin_edges_list:
"hist": np.zeros(len(bin_edges) - 1), if bin_name == "time":
"out_pos": 0, bin_edges = org_bin_edges
"out_neg": 0, else:
} bin_edges = self._space_bin_edges(org_bin_edges)
self.hists[bin_name] = {
"hist": np.zeros(len(bin_edges) - 1),
"hist_bin_edges": bin_edges,
"bin_edges": org_bin_edges,
"out_pos": 0,
"out_neg": 0,
}
else:
self.hists = self.hists_start
self.i = 0 self.i = 0
...@@ -179,23 +192,19 @@ class BinningPlotter(kp.Module): ...@@ -179,23 +192,19 @@ class BinningPlotter(kp.Module):
return bin_edges return bin_edges
def _yield_spaced_bin_edges(self):
for bin_name, bin_edges in self.bin_edges_list:
if bin_name != "time":
bin_edges = self._space_bin_edges(bin_edges)
yield bin_name, bin_edges
def process(self, blob): def process(self, blob):
""" """
Extract data from blob for the hist plots. Extract data from blob for the hist plots.
""" """
if self.i % self.bin_plot_freq == 0: if self.i % self.bin_plot_freq == 0:
for bin_name, bin_edges in self._yield_spaced_bin_edges(): for bin_name, hists_data in self.hists.items():
hist_bin_edges = hists_data["hist_bin_edges"]
data = blob["Hits"][bin_name] data = blob["Hits"][bin_name]
hist = np.histogram(data, bins=bin_edges)[0] hist = np.histogram(data, bins=hist_bin_edges)[0]
out_pos = data[data > np.max(bin_edges)].size out_pos = data[data > np.max(hist_bin_edges)].size
out_neg = data[data < np.min(bin_edges)].size out_neg = data[data < np.min(hist_bin_edges)].size
self.hists[bin_name]["hist"] += hist self.hists[bin_name]["hist"] += hist
self.hists[bin_name]["out_pos"] += out_pos self.hists[bin_name]["out_pos"] += out_pos
...@@ -208,44 +217,54 @@ class BinningPlotter(kp.Module): ...@@ -208,44 +217,54 @@ class BinningPlotter(kp.Module):
""" """
Make and save the histograms to pdf. Make and save the histograms to pdf.
""" """
with PdfPages(self.pdf_path) as pdf_file: if self.pdf_path is not None:
for bin_name, org_bin_edges in self.bin_edges_list: plot_hists(self.hists, self.pdf_path,
hist = self.hists[bin_name]["hist"] plot_bin_edges=self.plot_bin_edges)
out_pos = self.hists[bin_name]["out_pos"]
out_neg = self.hists[bin_name]["out_neg"]
hist_frac = hist / (np.sum(hist) + out_pos + out_neg)
if bin_name != "time": return self.hists
bin_edges = self._space_bin_edges(org_bin_edges)
else:
bin_edges = org_bin_edges
bin_spacing = bin_edges[1] - bin_edges[0]
fig, ax = plt.subplots() def plot_hists(hists, pdf_path, plot_bin_edges=True):
plt.bar(bin_edges[:-1], """
hist_frac, Plot histograms made by the BinningPlotter to the given pdf path.
align="edge",
width=0.9*bin_spacing, """
) with PdfPages(pdf_path) as pdf_file:
ax.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1)) for bin_name, hists_data in hists.items():
hist_bin_edges = hists_data["hist_bin_edges"]
if self.plot_bin_edges and bin_name != "time": bin_edges = hists_data["bin_edges"]
for bin_edge in org_bin_edges: hist = hists_data["hist"]
plt.axvline(x=bin_edge, color='grey', linestyle='-', out_pos = hists_data["out_pos"]
linewidth=1, alpha=0.9) out_neg = hists_data["out_neg"]
# place a text box in upper left in axes coords hist_frac = hist / (np.sum(hist) + out_pos + out_neg)
out_pos_rel = out_pos / np.sum(hist)
out_neg_rel = out_neg / np.sum(hist) bin_spacing = hist_bin_edges[1] - hist_bin_edges[0]
textstr = "Hits cut off:\n Left: {:.1%}\n" \ fig, ax = plt.subplots()
" Right: {:.1%}".format(out_neg_rel, out_pos_rel) plt.bar(hist_bin_edges[:-1],
props = dict(boxstyle='round', facecolor='white', alpha=0.9) hist_frac,
ax.text(0.05, 0.95, textstr, transform=ax.transAxes, align="edge",
verticalalignment='top', bbox=props) width=0.9 * bin_spacing,
)
plt.xlabel(bin_name) ax.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1))
plt.ylabel("Fraction of hits")
if plot_bin_edges and bin_name != "time":
pdf_file.savefig(fig) for bin_edge in bin_edges:
print(bin_name, out_neg, out_pos) plt.axvline(x=bin_edge, color='grey', linestyle='-',
print("Saved binning plot to " + self.pdf_path) linewidth=1, alpha=0.9)
# place a text box in upper left in axes coords
out_pos_rel = out_pos / np.sum(hist)
out_neg_rel = out_neg / np.sum(hist)
textstr = "Hits cut off:\n Left: {:.1%}\n" \
" Right: {:.1%}".format(out_neg_rel, out_pos_rel)
props = dict(boxstyle='round', facecolor='white', alpha=0.9)
ax.text(0.05, 0.95, textstr, transform=ax.transAxes,
verticalalignment='top', bbox=props)
plt.xlabel(bin_name)
plt.ylabel("Fraction of hits")
pdf_file.savefig(fig)
print("Saved binning plot to " + pdf_path)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment