diff --git a/talks/images/orca-du4.png b/talks/images/orca-du4.png new file mode 100644 index 0000000000000000000000000000000000000000..67808c77343b17cc71252c28ca392da2512b1cd0 Binary files /dev/null and b/talks/images/orca-du4.png differ diff --git a/talks/premiere.org b/talks/premiere.org new file mode 100644 index 0000000000000000000000000000000000000000..bc2801f458d7229fd4463895cdda1425be78e847 --- /dev/null +++ b/talks/premiere.org @@ -0,0 +1,320 @@ +#+OPTIONS: num:nil toc:nil reveal_single_file:t +#+REVEAL_ROOT: ~/opt/reveal.js-3.9.2 +#+REVEAL_TRANS: none +#+REVEAL_THEME: white +#+REVEAL_MIN_SCALE: 1.0 +#+REVEAL_MAX_SCALE: 1.0 +#+REVEAL_TITLE_SLIDE: <h1>%t</h1><h3>%s</h3><p>%A %a</p><p><a href="%u">%u</a></p> + +#+Title: km3io +#+Subtitle: Reading KM3NeT ROOT files without ROOT +#+Author: Zineb Aly (CPPM) and Tamas Gal (ECAP) +#+Email: zaly@km3et.de tgal@km3net.de +#+REVEAL_TALK_URL: https://indico.cern.ch/event/878692/ + + +* Prerequisites +#+BEGIN_SRC bash :results silent :async t +python3 -m venv ~/tmp/km3io-premiere-venv +. ~/tmp/km3io-permiere-venv/bin/activate +pip install km3io==0.8.1 +#+END_SRC + +#+BEGIN_SRC elisp +(setq org-babel-python-command "~/tmp/km3io-permiere-venv/bin/python") +#+END_SRC + +#+RESULTS: +: ~/tmp/km3io-permiere-venv/bin/python + +* km3io +#+ATTR_REVEAL: :frag (appear) +- [[https://git.km3net.de/km3py/km3io][km3io]] tiny Python package with minimal dependencies to read KM3NeT ROOT files +- Uses the [[https://github.com/scikit-hep/uproot][uproot]] library to access ROOT data +- Provides convenient wrapper classes +- Maximum performance due to numpy and numba +- Data are read lazily + - only loaded into memory when directly accessed + - apply several cut masks on huge datasets without reading them into the memory + +** Installation +- Dependencies: + - Python 3.5+ + - uproot (installed automatically) +- Releases are published on the official Python package repository: + - ~pip install km3io~ + +* Online (DAQ) Data +** km3io supports the following DAQ datatypes +#+ATTR_REVEAL: :frag (appear) +- ~JDAQEvent~ (the event dataformat) + - header information + - snapshot hits + - triggered hits +- ~JDAQSummaryslices~ + - header information + - accessor methods for bit flags (HRV, FIFO, UDP status) +- ~JDAQTimeslices~ + - header information + - frame hits + - currently only L1, L2 and SN streams, the L0 stream is work in progress + +** Examples +*** Opening a run file +#+BEGIN_SRC python :results output replace :session km3io :exports both +import km3io +f = km3io.DAQReader("KM3NeT_00000044_00006880.root") +print(f.events) +print(f.timeslices) +print(f.summaryslices) +#+END_SRC + +#+RESULTS: +: Number of events: 115038 +: Available timeslice streams: SN, L1 +: <km3io.daq.SummmarySlices object at 0x7f8ee90cc970> + +*** Investigating timeslice frames + +#+BEGIN_SRC python :results output replace :session km3io :exports both +a_timeslice = f.timeslices.stream("L1", 23) +print(a_timeslice.frames.keys()) +#+END_SRC + +#+RESULTS: +: dict_keys([806451572, 806455814, 806465101, 806483369, 806487219, 806487226, 806487231, 808432835, 808435278, 808447180, 808447186, 808451904, 808451907, 808469129, 808472260, 808472265, 808488895, 808488990, 808489014, 808489117, 808493910, 808946818, 808949744, 808951460, 808956908, 808959411, 808961448, 808961480, 808961504, 808961655, 808964815, 808964852, 808964883, 808964908, 808969848, 808969857, 808972593, 808972598, 808972698, 808974758, 808974773, 808974811, 808974972, 808976377, 808979567, 808979721, 808979729, 808981510, 808981523, 808981672, 808981812, 808981864, 808982005, 808982018, 808982041, 808982066, 808982077, 808982547, 808984711, 808996773, 808997793, 809006037, 809007627, 809503416, 809521500, 809524432, 809526097, 809544058, 809544061]) + +*** Reading the first 100 ToTs and channel IDs of a frame sent by the DOM 806451572 + +#+BEGIN_SRC python :results output replace :session km3io :exports both +print(a_timeslice.frames[806451572].tot[:100]) +print(a_timeslice.frames[806451572].pmt[:100]) +#+END_SRC + +#+RESULTS: +#+begin_example +[33 16 22 24 7 27 4 31 31 15 5 26 30 24 7 26 26 26 27 15 7 3 63 28 + 26 30 25 24 20 7 23 6 22 22 26 15 29 25 24 22 23 21 26 20 27 27 25 12 + 27 15 21 9 17 27 21 30 29 5 25 24 22 27 34 10 13 17 25 10 15 28 15 20 + 29 24 24 5 16 26 25 33 26 26 26 14 30 6 27 10 18 24 26 49 59 25 31 30 + 23 25 28 11] +[ 0 9 4 12 13 2 10 10 8 9 27 27 28 18 27 2 6 15 4 2 2 12 27 3 + 15 10 23 14 19 9 9 24 24 6 7 20 7 20 27 22 24 25 22 14 18 27 30 29 + 29 11 28 1 1 11 12 18 29 20 20 19 10 23 21 2 2 16 1 3 3 24 26 9 + 13 10 29 30 30 10 22 0 22 30 6 19 21 25 25 27 27 26 27 25 29 9 10 5 + 13 6 13 11] +#+end_example + +*** Checking the number of UDP packets in summary slices + +#+BEGIN_SRC python :results output replace :session km3io :exports both +f = km3io.DAQReader("KM3NeT_00000044_00006880.root") +sumslice = f.summaryslices.slices[23] +print(sumslice.dom_id) +print(km3io.daq.get_number_udp_packets(sumslice.dq_status)) +#+END_SRC + +#+RESULTS: +#+begin_example +[806451572 806483369 806487231 808435278 808447180 808451907 808472265 + 808488895 808489014 808489117 808493910 808946818 808949744 808951460 + 808956908 808959411 808961448 808961504 808961655 808964815 808964883 + 808964908 808969848 808969857 808972593 808972598 808972698 808974972 + 808976377 808979721 808979729 808981510 808981523 808981672 808981812 + 808981864 808982005 808982018 808982041 808982066 808982547 808984711 + 808996773 808997793 809006037 809007627 809521500 809524432 809544058] +[17 17 16 16 25 16 27 17 18 17 21 16 16 16 34 18 18 18 17 18 16 18 15 17 + 20 18 15 17 17 19 16 18 16 17 17 16 18 18 17 27 18 20 16 17 15 18 17 17 + 17] +#+end_example + +* Offline (MC/reco) Data +** Reading offline files (aka aanet-ROOT files) +#+ATTR_REVEAL: :frag (appear) +- Events + - header information + - hits + - tracks (from reconstruction) + - MC tracks +- MC information +- Reco information + +** Opening a reconstructed MUPAGE file +#+BEGIN_SRC python :results output replace :session km3io :exports both +f = km3io.OfflineReader("mc.root") +print(f) +#+END_SRC + +#+RESULTS: +: <km3io.offline.OfflineReader object at 0x7f8eeb436e20> + +** Investigating events and tracks +#+BEGIN_SRC python :results output replace :session km3io :exports both +print(f.events) +#+END_SRC + +#+RESULTS: +: Number of events: 10 + +#+BEGIN_SRC python :results output replace :session km3io :exports both +print(f.tracks.lik) +print(f.tracks.dir_z) +#+END_SRC + +#+RESULTS: +: [[294.6407542676734 294.6407542676734 294.6407542676734 ... 67.81221253265059 67.7756405143316 67.77250505700384] [96.75133289411137 96.75133289411137 96.75133289411137 ... 39.21916536442286 39.184645826013806 38.870325146341884] [560.2775306614813 560.2775306614813 560.2775306614813 ... 118.88577278801066 118.72271313687405 117.80785995187605] ... [71.03251451148226 71.03251451148226 71.03251451148226 ... 16.714140573909347 16.444395245214945 16.34639241716669] [326.440133294878 326.440133294878 326.440133294878 ... 87.79818671079849 87.75488082571873 87.74839444768625] [159.77779654216795 159.77779654216795 159.77779654216795 ... 33.8669134999348 33.821631538334984 33.77240735670646]] +: [[-0.872885221293917 -0.872885221293917 -0.872885221293917 ... -0.6631226836266504 -0.5680647731737454 -0.5680647731737454] [-0.8351996698137462 -0.8351996698137462 -0.8351996698137462 ... -0.7485107718446855 -0.8229838871876581 -0.239315690284641] [-0.989148723802379 -0.989148723802379 -0.989148723802379 ... -0.9350162572437829 -0.88545604390297 -0.88545604390297] ... [-0.5704611045902105 -0.5704611045902105 -0.5704611045902105 ... -0.9350162572437829 -0.4647231989130516 -0.4647231989130516] [-0.9779941383490359 -0.9779941383490359 -0.9779941383490359 ... -0.88545604390297 -0.88545604390297 -0.8229838871876581] [-0.7396916780974963 -0.7396916780974963 -0.7396916780974963 ... -0.6631226836266504 -0.7485107718446855 -0.7485107718446855]] + +** Some pretty print features for single objects +*** Hits +#+BEGIN_SRC python :results output replace :session km3io :exports both +print(f[0].hits[1]) +#+END_SRC + +#+RESULTS: +#+begin_example +offline hit: + id : 0 + dom_id : 806451572 + channel_id : 9 + tdc : 0 + tot : 30 + trig : 1 + pmt_id : 0 + t : 70104016.0 + a : 0.0 + pos_x : 0.0 + pos_y : 0.0 + pos_z : 0.0 + dir_x : 0.0 + dir_y : 0.0 + dir_z : 0.0 + pure_t : 0.0 + pure_a : 0.0 + type : 0 + origin : 0 + pattern_flags : 0 +#+end_example + +*** Tracks + +#+BEGIN_SRC python :results output replace :session km3io :exports both +print(f[0].tracks[0]) +#+END_SRC + +#+RESULTS: +#+begin_example +offline track: + fUniqueID : 0 + fBits : 33554432 + id : 1 + pos_x : 445.835395997812 + pos_y : 615.1089636184813 + pos_z : 125.1448339836911 + dir_x : 0.0368711082700674 + dir_y : -0.48653048395923415 + dir_z : -0.872885221293917 + t : 70311446.46401498 + E : 99.10458562488608 + len : 0.0 + lik : 294.6407542676734 + type : 0 + rec_type : 4000 + rec_stages : [1, 3, 5, 4] + status : 0 + mother_id : -1 + hit_ids : [] + error_matrix : [] + comment : 0 + JGANDALF_BETA0_RAD : 0.004957442219414389 + JGANDALF_BETA1_RAD : 0.003417848024252858 + JGANDALF_CHI2 : -294.6407542676734 + JGANDALF_NUMBER_OF_HITS : 142.0 + JENERGY_ENERGY : 99.10458562488608 + JENERGY_CHI2 : 1.7976931348623157e+308 + JGANDALF_LAMBDA : 4.2409761837248484e-12 + JGANDALF_NUMBER_OF_ITERATIONS : 10.0 + JSTART_NPE_MIP : 24.88469697331908 + JSTART_NPE_MIP_TOTAL : 55.88169412579765 + JSTART_LENGTH_METRES : 98.89582506402911 + JVETO_NPE : 0.0 + JVETO_NUMBER_OF_HITS : 0.0 + JENERGY_MUON_RANGE_METRES : 344.9767431592819 + JENERGY_NOISE_LIKELIHOOD : -333.87773581129136 + JENERGY_NDF : 1471.0 + JENERGY_NUMBER_OF_HITS : 101.0 +#+end_example + +** Extracting the energy of every first reco track in each event + +#+BEGIN_SRC python :results output replace :session km3io :exports both +f = km3io.OfflineReader("mupage.root") +print(f.mc_tracks) +print(f.mc_tracks.id.counts) +mask = f.mc_tracks.id.counts > 0 +print(f.mc_tracks.E[mask, 0]) +#+END_SRC + +#+RESULTS: +: Number of tracks: 12236 +: [11 2 3 ... 10 1 4] +: [17.72 73.213 10884.78 1694.332 1221.061 22945.123 11019.418 ...] + +* ORCA DU4 RBR Analysis Example +** A tiny function to extract track attributes from a list of files + +#+BEGIN_SRC python +def extract_features(files, features): + """Return a dict with the features from every best reco track""" + data = defaultdict(list) + for f in tqdm(files): + tracks = km3io.OfflineReader(f).tracks + mask = tracks.len.counts > 0 + for feature in features: + data[feature].append(getattr(tracks, feature)[mask, 0]) + return {k: np.hstack(v) for k, v in data.items()} +#+END_SRC + +** Extracting ~E~, ~lik~, ~pos[xyz]~ and ~dir[xyz]~ +- Only takes a few seconds per file +- Results are numpy arrays + +#+BEGIN_SRC python +sea_files = glob("data/reco-sea/*aanet*.root") +features = ['E', 'lik', *[e + '_' + q for q in 'xyz' for e in ['pos', 'dir']]] +sea_data = extract_features(sea_files, features) +#+END_SRC + +** Plotting some data with ~matplotlib~ +#+BEGIN_SRC python +fig, ax = plt.subplots() +plot_options = { + 'histtype': 'step', + 'bins': 100, + 'log': True, + 'linewidth': 2 +} +ax.hist(sea_data['E'], label="sea data", **plot_options) +ax.hist(mc_data['E'], label="atm. muons MC (JSirene)", **plot_options) +ax.set_xlabel("energy / GeV") +ax.legend(); ax.grid(); +#+END_SRC + +#+REVEAL: split + +[[file:./images/orca-du4.png]] + +* Command line tool(s) +- We are working on some counter parts to the Jpp tools + - ~KPrintTree -f FILENAME~: similar to ~JPrintTree~ + - more to come (feel free to request or contribute) + +* Thanks +- Zineb Aly (CPPM) +- Tamas Gal (ECAP) +- Johannes Schumann (ECAP) + +** Important links +- Docs: [[https://km3py.pages.km3net.de/km3io]] +- Source: [[https://git.km3net.de/km3py/km3io]] +- uproot: [[https://github.com/scikit-hep/uproot]]