From 058cd92d75a18ffe49084a452d529e548cfdb5a3 Mon Sep 17 00:00:00 2001 From: Tamas Gal <himself@tamasgal.com> Date: Mon, 14 Nov 2022 20:56:00 +0100 Subject: [PATCH] Update log analyser to support the new log format --- backend/scripts/log_analyser.py | 180 +++++++++++++++++--------------- 1 file changed, 93 insertions(+), 87 deletions(-) diff --git a/backend/scripts/log_analyser.py b/backend/scripts/log_analyser.py index 525d911..4184c3e 100755 --- a/backend/scripts/log_analyser.py +++ b/backend/scripts/log_analyser.py @@ -1,14 +1,16 @@ #!/usr/bin/env python # coding=utf-8 # Filename: log_analyser.py -# Author: Rodrigo Gracia Ruiz <rgracia@km3net.de> +# Author: Tamas Gal <tgal@km3net.de> # vim: ts=4 sw=4 et +from collections import defaultdict import sys import re import numpy as np import matplotlib + # Force matplotlib to not use any Xwindows backend. -matplotlib.use('Agg') +matplotlib.use("Agg") import matplotlib.pyplot as plt import os import datetime @@ -16,100 +18,104 @@ from datetime import datetime as dt from datetime import timezone as tz import time -class Message: - regexp = re.compile('(\w+.\w+)\s+\[(\w+)\]:\s+(.*)\s+(\d+\.\d+\.\d+\.\d+)\s+(\w+\/*\w+)\s+(\w+)\s+(.*)') - - def __init__(self, msg): - self.matches = self.regexp.match(msg) - self.fields = self.regexp.split(msg) - - def is_error(self): - return self.matches!=None and self.fields[6]=='ERROR' - def is_warning(self): - return self.matches!=None and self.fields[6]=='WARNING' - def get_process(self): - if (self.matches!=None): - return self.fields[2] - -def plot_log_statistics(errors,warnings,title,output): - err_keys = [k for k in sorted(errors .keys() , key=str.casefold)] - war_keys = [k for k in sorted(warnings.keys() , key=str.casefold)] - - if (err_keys != war_keys): - sys.exit("plot_log_statistics ERROR: Dictionaries with different keys") - - x_labels = [str(k) for k in err_keys] - x = np.arange(len(x_labels)) - y_e = [errors [k] for k in err_keys ] - y_w = [warnings[k] for k in war_keys ] - - fig = plt.figure() - ax = fig.add_subplot(111) - - bar_width = 0.25 - err_plt = ax.bar( x, y_e, bar_width, color = 'r') - war_plt = ax.bar(x + bar_width, y_w, bar_width, color = 'b') - - ax.set_ylabel('Counts') - ax.set_xticks(x + 0.5*bar_width) - ax.set_xticklabels(x_labels) - ax.legend((err_plt, war_plt), ('errors', 'warnings')) - ax.set_ylim(1e-1,1e6) - ax.set_yscale('log') +# Event names and colours +EVENTS = dict(ERROR="red", WARNING="orange", Died="deeppink", Born="steelblue") +BUFFER_SIZE = 16*1024**2 # buffer size for the lines when parsing the log +REGEX_LOG_LINE = re.compile(".+ ([A-Za-z]+) \[([A-Za-z]+)\]: .+") + + +def plot_log_statistics(out_file, summary, title): + """Creates a categorical bar plot for each event and process""" + processes = sorted(summary.keys()) + xs = np.arange(len(processes)) + w = 0.8 / len(EVENTS) + + fig, ax = plt.subplots() + for idx, (event, color) in enumerate(EVENTS.items()): + x_offset = idx*w + w/2 - w*len(EVENTS)/2 + ax.bar(xs + x_offset, [summary[process][event] for process in processes], width=w, color=color, label=event) + ax.set_xticks(xs, processes); + ax.set_ylabel("count") + ax.legend() + ax.set_ylim(1e-1, 1e6) + ax.set_yscale("log") ax.grid(True) + ax.set_title(title) + + plt.savefig(out_file) + plt.close("all") - plt.title(title) - plt.savefig(output) - plt.close('all') - -def seconds_to_UTC_midnight(): +def seconds_to_UTC_midnight(): + """Returns the seconds until next midnight""" tomorrow = dt.now(tz.utc) + datetime.timedelta(days=1) - midnight = dt(year=tomorrow.year, month=tomorrow.month, - day=tomorrow.day, hour=0, minute=0, second=0, tzinfo=tz.utc) + midnight = dt( + year=tomorrow.year, + month=tomorrow.month, + day=tomorrow.day, + hour=0, + minute=0, + second=0, + tzinfo=tz.utc, + ) return (midnight - dt.now(tz.utc)).seconds -def process_log_file(log_file,out_file): - - warnings = {} - errors = {} - f = open(log_file, 'r') - for line in f.readlines(): - msg = Message(line) - if (msg.matches!=None): - errors [msg.get_process()] = errors .get(msg.get_process(), 0) + 1 if msg.is_error() else errors .get(msg.get_process(), 0) - warnings[msg.get_process()] = warnings.get(msg.get_process(), 0) + 1 if msg.is_warning() else warnings.get(msg.get_process(), 0) +def process_log_file(log_file): + """Generates a dictionary of event counts in a log file + + The returned dictionary has the structure dict[PROCESS][EVENT] => count. + """ + summary = defaultdict(lambda: defaultdict(int)) + # for event in EVENTS: + # summary[event] = defaultdict(int) + + n_lines_parsed = 0 + n_lines_unparsed = 0 + with open(log_file, "r") as fobj: + lines_chunk = fobj.readlines(BUFFER_SIZE) + while lines_chunk: + for line in lines_chunk: + m = REGEX_LOG_LINE.match(line) + if m is not None: + tag = m[1] + process = m[2] + if tag in ("Born", "Died"): + summary[process][tag] += 1 + for severity in ("WARNING", "ERROR"): + if severity in line: + summary[process][severity] += 1 + n_lines_parsed += 1 + else: + n_lines_unparsed += 1 + lines_chunk = fobj.readlines(BUFFER_SIZE) + + print(f"Successfully parsed {n_lines_parsed} lines") + print(f"A total of {n_lines_unparsed} could not be parsed.") + for process, stats in summary.items(): + print(f"{process}:") + for event, n_lines in stats.items(): + print(f" {event}: {n_lines}") + + return summary - print(f"Warnings: {warnings}") - print(f"Errors: {errors}") - - title = os.path.basename(f.name) - plot_log_statistics(errors,warnings,title,out_file) def main(): - log_dir = '/logs/' - regexp = '^MSG_(.+)\.log$' - - for file in os.listdir(log_dir): - if (re.match(regexp,file) and (not os.path.exists(log_dir + os.path.splitext(file)[0] + '.png'))): - print ('processing ', log_dir + file) - process_log_file(log_dir + file, log_dir + os.path.splitext(file)[0] + '.png') - - while True: - basename = log_dir+'MSG_' + (dt.now(tz.utc) - datetime.timedelta(days=1)).strftime("%Y-%m-%d") - log_file = basename + '.log' - out_file = basename + '.png' - - if not os.path.exists(log_file): - time.sleep(60) - continue - - print ('processing ', log_file) - process_log_file(log_file,out_file) - time.sleep(seconds_to_UTC_midnight() + 60) - - -if __name__ == '__main__': + log_dir = "logs/" + regexp = "^MSG_(.+)\.log$" + + while True: + for fname in os.listdir(log_dir): + plot_fpath = os.path.join(log_dir, os.path.splitext(fname)[0] + ".png") + log_fpath = os.path.join(log_dir, fname) + if re.match(regexp, fname) and not os.path.exists(plot_fpath): + print("-> Processing ", fname) + summary = process_log_file(log_fpath) + title = os.path.basename(fname) + plot_log_statistics(plot_fpath, summary, title) + time.sleep(seconds_to_UTC_midnight() + 5*60) + + +if __name__ == "__main__": main() -- GitLab