Skip to content
Snippets Groups Projects
Verified Commit 058cd92d authored by Tamas Gal's avatar Tamas Gal :speech_balloon:
Browse files

Update log analyser to support the new log format

parent ada2053e
No related branches found
No related tags found
No related merge requests found
Pipeline #32454 passed
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8 # coding=utf-8
# Filename: log_analyser.py # Filename: log_analyser.py
# Author: Rodrigo Gracia Ruiz <rgracia@km3net.de> # Author: Tamas Gal <tgal@km3net.de>
# vim: ts=4 sw=4 et # vim: ts=4 sw=4 et
from collections import defaultdict
import sys import sys
import re import re
import numpy as np import numpy as np
import matplotlib import matplotlib
# Force matplotlib to not use any Xwindows backend. # Force matplotlib to not use any Xwindows backend.
matplotlib.use('Agg') matplotlib.use("Agg")
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import os import os
import datetime import datetime
...@@ -16,100 +18,104 @@ from datetime import datetime as dt ...@@ -16,100 +18,104 @@ from datetime import datetime as dt
from datetime import timezone as tz from datetime import timezone as tz
import time import time
class Message: # Event names and colours
regexp = re.compile('(\w+.\w+)\s+\[(\w+)\]:\s+(.*)\s+(\d+\.\d+\.\d+\.\d+)\s+(\w+\/*\w+)\s+(\w+)\s+(.*)') EVENTS = dict(ERROR="red", WARNING="orange", Died="deeppink", Born="steelblue")
BUFFER_SIZE = 16*1024**2 # buffer size for the lines when parsing the log
def __init__(self, msg): REGEX_LOG_LINE = re.compile(".+ ([A-Za-z]+) \[([A-Za-z]+)\]: .+")
self.matches = self.regexp.match(msg)
self.fields = self.regexp.split(msg)
def plot_log_statistics(out_file, summary, title):
def is_error(self): """Creates a categorical bar plot for each event and process"""
return self.matches!=None and self.fields[6]=='ERROR' processes = sorted(summary.keys())
def is_warning(self): xs = np.arange(len(processes))
return self.matches!=None and self.fields[6]=='WARNING' w = 0.8 / len(EVENTS)
def get_process(self):
if (self.matches!=None): fig, ax = plt.subplots()
return self.fields[2] for idx, (event, color) in enumerate(EVENTS.items()):
x_offset = idx*w + w/2 - w*len(EVENTS)/2
def plot_log_statistics(errors,warnings,title,output): ax.bar(xs + x_offset, [summary[process][event] for process in processes], width=w, color=color, label=event)
err_keys = [k for k in sorted(errors .keys() , key=str.casefold)] ax.set_xticks(xs, processes);
war_keys = [k for k in sorted(warnings.keys() , key=str.casefold)] ax.set_ylabel("count")
ax.legend()
if (err_keys != war_keys): ax.set_ylim(1e-1, 1e6)
sys.exit("plot_log_statistics ERROR: Dictionaries with different keys") ax.set_yscale("log")
x_labels = [str(k) for k in err_keys]
x = np.arange(len(x_labels))
y_e = [errors [k] for k in err_keys ]
y_w = [warnings[k] for k in war_keys ]
fig = plt.figure()
ax = fig.add_subplot(111)
bar_width = 0.25
err_plt = ax.bar( x, y_e, bar_width, color = 'r')
war_plt = ax.bar(x + bar_width, y_w, bar_width, color = 'b')
ax.set_ylabel('Counts')
ax.set_xticks(x + 0.5*bar_width)
ax.set_xticklabels(x_labels)
ax.legend((err_plt, war_plt), ('errors', 'warnings'))
ax.set_ylim(1e-1,1e6)
ax.set_yscale('log')
ax.grid(True) ax.grid(True)
ax.set_title(title)
plt.savefig(out_file)
plt.close("all")
plt.title(title)
plt.savefig(output)
plt.close('all')
def seconds_to_UTC_midnight():
def seconds_to_UTC_midnight():
"""Returns the seconds until next midnight"""
tomorrow = dt.now(tz.utc) + datetime.timedelta(days=1) tomorrow = dt.now(tz.utc) + datetime.timedelta(days=1)
midnight = dt(year=tomorrow.year, month=tomorrow.month, midnight = dt(
day=tomorrow.day, hour=0, minute=0, second=0, tzinfo=tz.utc) year=tomorrow.year,
month=tomorrow.month,
day=tomorrow.day,
hour=0,
minute=0,
second=0,
tzinfo=tz.utc,
)
return (midnight - dt.now(tz.utc)).seconds return (midnight - dt.now(tz.utc)).seconds
def process_log_file(log_file,out_file):
warnings = {}
errors = {}
f = open(log_file, 'r') def process_log_file(log_file):
for line in f.readlines(): """Generates a dictionary of event counts in a log file
msg = Message(line)
if (msg.matches!=None): The returned dictionary has the structure dict[PROCESS][EVENT] => count.
errors [msg.get_process()] = errors .get(msg.get_process(), 0) + 1 if msg.is_error() else errors .get(msg.get_process(), 0) """
warnings[msg.get_process()] = warnings.get(msg.get_process(), 0) + 1 if msg.is_warning() else warnings.get(msg.get_process(), 0) summary = defaultdict(lambda: defaultdict(int))
# for event in EVENTS:
# summary[event] = defaultdict(int)
n_lines_parsed = 0
n_lines_unparsed = 0
with open(log_file, "r") as fobj:
lines_chunk = fobj.readlines(BUFFER_SIZE)
while lines_chunk:
for line in lines_chunk:
m = REGEX_LOG_LINE.match(line)
if m is not None:
tag = m[1]
process = m[2]
if tag in ("Born", "Died"):
summary[process][tag] += 1
for severity in ("WARNING", "ERROR"):
if severity in line:
summary[process][severity] += 1
n_lines_parsed += 1
else:
n_lines_unparsed += 1
lines_chunk = fobj.readlines(BUFFER_SIZE)
print(f"Successfully parsed {n_lines_parsed} lines")
print(f"A total of {n_lines_unparsed} could not be parsed.")
for process, stats in summary.items():
print(f"{process}:")
for event, n_lines in stats.items():
print(f" {event}: {n_lines}")
return summary
print(f"Warnings: {warnings}")
print(f"Errors: {errors}")
title = os.path.basename(f.name)
plot_log_statistics(errors,warnings,title,out_file)
def main(): def main():
log_dir = '/logs/' log_dir = "logs/"
regexp = '^MSG_(.+)\.log$' regexp = "^MSG_(.+)\.log$"
for file in os.listdir(log_dir): while True:
if (re.match(regexp,file) and (not os.path.exists(log_dir + os.path.splitext(file)[0] + '.png'))): for fname in os.listdir(log_dir):
print ('processing ', log_dir + file) plot_fpath = os.path.join(log_dir, os.path.splitext(fname)[0] + ".png")
process_log_file(log_dir + file, log_dir + os.path.splitext(file)[0] + '.png') log_fpath = os.path.join(log_dir, fname)
if re.match(regexp, fname) and not os.path.exists(plot_fpath):
while True: print("-> Processing ", fname)
basename = log_dir+'MSG_' + (dt.now(tz.utc) - datetime.timedelta(days=1)).strftime("%Y-%m-%d") summary = process_log_file(log_fpath)
log_file = basename + '.log' title = os.path.basename(fname)
out_file = basename + '.png' plot_log_statistics(plot_fpath, summary, title)
time.sleep(seconds_to_UTC_midnight() + 5*60)
if not os.path.exists(log_file):
time.sleep(60)
continue if __name__ == "__main__":
print ('processing ', log_file)
process_log_file(log_file,out_file)
time.sleep(seconds_to_UTC_midnight() + 60)
if __name__ == '__main__':
main() main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment