Skip to content
Snippets Groups Projects
Verified Commit 058cd92d authored by Tamas Gal's avatar Tamas Gal :speech_balloon:
Browse files

Update log analyser to support the new log format

parent ada2053e
No related branches found
No related tags found
No related merge requests found
Pipeline #32454 passed
#!/usr/bin/env python
# coding=utf-8
# Filename: log_analyser.py
# Author: Rodrigo Gracia Ruiz <rgracia@km3net.de>
# Author: Tamas Gal <tgal@km3net.de>
# vim: ts=4 sw=4 et
from collections import defaultdict
import sys
import re
import numpy as np
import matplotlib
# Force matplotlib to not use any Xwindows backend.
matplotlib.use('Agg')
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import os
import datetime
......@@ -16,100 +18,104 @@ from datetime import datetime as dt
from datetime import timezone as tz
import time
class Message:
regexp = re.compile('(\w+.\w+)\s+\[(\w+)\]:\s+(.*)\s+(\d+\.\d+\.\d+\.\d+)\s+(\w+\/*\w+)\s+(\w+)\s+(.*)')
def __init__(self, msg):
self.matches = self.regexp.match(msg)
self.fields = self.regexp.split(msg)
def is_error(self):
return self.matches!=None and self.fields[6]=='ERROR'
def is_warning(self):
return self.matches!=None and self.fields[6]=='WARNING'
def get_process(self):
if (self.matches!=None):
return self.fields[2]
def plot_log_statistics(errors,warnings,title,output):
err_keys = [k for k in sorted(errors .keys() , key=str.casefold)]
war_keys = [k for k in sorted(warnings.keys() , key=str.casefold)]
if (err_keys != war_keys):
sys.exit("plot_log_statistics ERROR: Dictionaries with different keys")
x_labels = [str(k) for k in err_keys]
x = np.arange(len(x_labels))
y_e = [errors [k] for k in err_keys ]
y_w = [warnings[k] for k in war_keys ]
fig = plt.figure()
ax = fig.add_subplot(111)
bar_width = 0.25
err_plt = ax.bar( x, y_e, bar_width, color = 'r')
war_plt = ax.bar(x + bar_width, y_w, bar_width, color = 'b')
ax.set_ylabel('Counts')
ax.set_xticks(x + 0.5*bar_width)
ax.set_xticklabels(x_labels)
ax.legend((err_plt, war_plt), ('errors', 'warnings'))
ax.set_ylim(1e-1,1e6)
ax.set_yscale('log')
# Event names and colours
EVENTS = dict(ERROR="red", WARNING="orange", Died="deeppink", Born="steelblue")
BUFFER_SIZE = 16*1024**2 # buffer size for the lines when parsing the log
REGEX_LOG_LINE = re.compile(".+ ([A-Za-z]+) \[([A-Za-z]+)\]: .+")
def plot_log_statistics(out_file, summary, title):
"""Creates a categorical bar plot for each event and process"""
processes = sorted(summary.keys())
xs = np.arange(len(processes))
w = 0.8 / len(EVENTS)
fig, ax = plt.subplots()
for idx, (event, color) in enumerate(EVENTS.items()):
x_offset = idx*w + w/2 - w*len(EVENTS)/2
ax.bar(xs + x_offset, [summary[process][event] for process in processes], width=w, color=color, label=event)
ax.set_xticks(xs, processes);
ax.set_ylabel("count")
ax.legend()
ax.set_ylim(1e-1, 1e6)
ax.set_yscale("log")
ax.grid(True)
ax.set_title(title)
plt.savefig(out_file)
plt.close("all")
plt.title(title)
plt.savefig(output)
plt.close('all')
def seconds_to_UTC_midnight():
def seconds_to_UTC_midnight():
"""Returns the seconds until next midnight"""
tomorrow = dt.now(tz.utc) + datetime.timedelta(days=1)
midnight = dt(year=tomorrow.year, month=tomorrow.month,
day=tomorrow.day, hour=0, minute=0, second=0, tzinfo=tz.utc)
midnight = dt(
year=tomorrow.year,
month=tomorrow.month,
day=tomorrow.day,
hour=0,
minute=0,
second=0,
tzinfo=tz.utc,
)
return (midnight - dt.now(tz.utc)).seconds
def process_log_file(log_file,out_file):
warnings = {}
errors = {}
f = open(log_file, 'r')
for line in f.readlines():
msg = Message(line)
if (msg.matches!=None):
errors [msg.get_process()] = errors .get(msg.get_process(), 0) + 1 if msg.is_error() else errors .get(msg.get_process(), 0)
warnings[msg.get_process()] = warnings.get(msg.get_process(), 0) + 1 if msg.is_warning() else warnings.get(msg.get_process(), 0)
def process_log_file(log_file):
"""Generates a dictionary of event counts in a log file
The returned dictionary has the structure dict[PROCESS][EVENT] => count.
"""
summary = defaultdict(lambda: defaultdict(int))
# for event in EVENTS:
# summary[event] = defaultdict(int)
n_lines_parsed = 0
n_lines_unparsed = 0
with open(log_file, "r") as fobj:
lines_chunk = fobj.readlines(BUFFER_SIZE)
while lines_chunk:
for line in lines_chunk:
m = REGEX_LOG_LINE.match(line)
if m is not None:
tag = m[1]
process = m[2]
if tag in ("Born", "Died"):
summary[process][tag] += 1
for severity in ("WARNING", "ERROR"):
if severity in line:
summary[process][severity] += 1
n_lines_parsed += 1
else:
n_lines_unparsed += 1
lines_chunk = fobj.readlines(BUFFER_SIZE)
print(f"Successfully parsed {n_lines_parsed} lines")
print(f"A total of {n_lines_unparsed} could not be parsed.")
for process, stats in summary.items():
print(f"{process}:")
for event, n_lines in stats.items():
print(f" {event}: {n_lines}")
return summary
print(f"Warnings: {warnings}")
print(f"Errors: {errors}")
title = os.path.basename(f.name)
plot_log_statistics(errors,warnings,title,out_file)
def main():
log_dir = '/logs/'
regexp = '^MSG_(.+)\.log$'
for file in os.listdir(log_dir):
if (re.match(regexp,file) and (not os.path.exists(log_dir + os.path.splitext(file)[0] + '.png'))):
print ('processing ', log_dir + file)
process_log_file(log_dir + file, log_dir + os.path.splitext(file)[0] + '.png')
while True:
basename = log_dir+'MSG_' + (dt.now(tz.utc) - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
log_file = basename + '.log'
out_file = basename + '.png'
if not os.path.exists(log_file):
time.sleep(60)
continue
print ('processing ', log_file)
process_log_file(log_file,out_file)
time.sleep(seconds_to_UTC_midnight() + 60)
if __name__ == '__main__':
log_dir = "logs/"
regexp = "^MSG_(.+)\.log$"
while True:
for fname in os.listdir(log_dir):
plot_fpath = os.path.join(log_dir, os.path.splitext(fname)[0] + ".png")
log_fpath = os.path.join(log_dir, fname)
if re.match(regexp, fname) and not os.path.exists(plot_fpath):
print("-> Processing ", fname)
summary = process_log_file(log_fpath)
title = os.path.basename(fname)
plot_log_statistics(plot_fpath, summary, title)
time.sleep(seconds_to_UTC_midnight() + 5*60)
if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment