project/log_analyzer.py

238 lines
7.0 KiB
Python

import json
import logging
from typing import List
import numpy as np
import analyzers
from analyzers import get_renderer, Analyzer, render, Store
from analyzers.analyzer import ResultStore
from analyzers.render.default import LogEntryCountCSV
from analyzers.settings import LogSettings, load_settings
from loaders import LOADERS
logging.basicConfig(format='%(levelname)s %(name)s:%(message)s', level=logging.DEBUG)
log: logging.Logger = logging.getLogger(__name__)
requests_log = logging.getLogger('requests')
requests_log.setLevel(logging.WARN)
def process_log(log_id: str, settings: LogSettings) -> List[Analyzer]:
logfile: str = "data/inst_{id}.{format}".format(id=log_id, format=settings.log_format)
logfile = log_id
loader = LOADERS[settings.log_format]()
try:
loader.load(logfile)
except BaseException as e:
raise RuntimeError(e)
analyzers: List[Analyzer] = []
log.debug("build analyzers")
for analyzer in settings.analyzers:
analyzers.append(analyzer(settings))
log.debug("process entries")
for entry in loader.get_entry():
for analyzer in analyzers:
try:
if analyzer.process(entry):
break
except KeyError as e:
log.exception(e)
return analyzers
if __name__ == '__main__':
settings: LogSettings = load_settings("biogames2.json")
log_ids: List[str] = [
"20d4244719404ffab0ca386c76e4b112",
"56d9b64144ab44e7b90bf766f3be32e3",
"dc2cdc28ca074715b905e4aa5badff10",
"e32b16998440475b994ab46d481d3e0c",
]
log_ids: List[str] = [
# "34fecf49dbaca3401d745fb467",
# "44ea194de594cd8d63ac0314be",
# "57c444470dbf88605433ca935c",
# "78e0c545b594e82edfad55bd7f",
# "91abfd4b31a5562b1c66be37d9",
"597b704fe9ace475316c345903",
"e01a684aa29dff9ddd9705edf8",
"fbf9d64ae0bdad0de7efa3eec6",
# "fe1331481f85560681f86827ec",
"fe1331481f85560681f86827ec"]
# "fec57041458e6cef98652df625", ]
log_ids = []
# with open("/home/clemens/git/ma/test/filtered") as src:
with open("/home/agp8x/git/uni/ma/project/data/0000_ref") as src:
for line in src:
line = line.strip()
log_ids.append(line)
store: ResultStore = ResultStore()
for log_id in log_ids:
for analysis in process_log(log_id, settings):
log.info("* Result for " + analysis.name())
# print(analysis.result())
# print(analysis.render())
analysis.result(store, name=log_id)
if False:
for r in get_renderer(analyzers.LocomotionActionAnalyzer):
r().render(store.get_all())
if False:
render(analyzers.LocationAnalyzer, store.get_all())
# print(json.dumps(store.serializable(), indent=1))
if False:
render(analyzers.ActivityMapper, store.get_all())
render(analyzers.ProgressAnalyzer, store.get_all())
if False:
from analyzers.postprocessing import graph
g = graph.Cache(settings)
g.run(store)
if False:
# render(analyzers.SimulationOrderAnalyzer, store.get_all())
for cat in store.get_categories():
data = store.get_category(cat)
render(analyzers.SimulationOrderAnalyzer, data, name=cat)
if False:
LogEntryCountCSV.summary = None
for cat in store.get_categories():
data = store.get_category(cat)
render(analyzers.LogEntryCountAnalyzer, data, name=cat)
if LogEntryCountCSV.summary:
headers = []
lines = []
for name in LogEntryCountCSV.summary:
data = LogEntryCountCSV.summary[name]
for head in data:
if not head in headers:
headers.append(head)
line = [name]
for head in headers:
line.append(data[head]) if head in data else line.append(0)
lines.append(line)
import csv
with open('logentrycount.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile, quoting=csv.QUOTE_NONE)
writer.writerow(["name"] + [h.split(".")[-1] for h in headers])
for line in lines:
writer.writerow(line)
if True:
from datetime import datetime
json.dump(store.serializable(), open("simus.json", "w"), indent=2)
with open("simus.csv", "w") as csvfile:
csvfile.write("instanceconfig,log,simu,answered,universe_state,selected_actions,timestamp,time\n")
for key in store.get_store():
csvfile.write("{}\n".format(key))
for result in store.store[key]:
csvfile.write(",{}\n".format(result.name))
for i in result.get():
csvfile.write(",,{},{},{},{},{},{}\n".format(
i['answers']['@id'],
i['answers']['answered'],
len(i['answers']['universe_state']) if i['answers']['universe_state'] else 0,
len(i['selected_actions']) if i['selected_actions'] else 0,
i['timestamp'],
str(datetime.fromtimestamp(i['timestamp']/1000))
))
if False:
#json.dump(store.serializable(), open("new.json", "w"), indent=1)
from collections import defaultdict
keys = [
"simu",
"question",
"image",
"audio",
"video",
"other",
"map"
]
import matplotlib.pyplot as plt
#results = []
places = defaultdict(list)
for log in store.get_all():
result = defaultdict(lambda: 0)
for i in log.get()['track']:
duration = i['properties']['end_timestamp'] - i['properties']['start_timestamp']
result[i['properties']['activity_type']] += duration
print(json.dumps(result, indent=4))
total = sum(result.values())
print(total)
percentage = defaultdict(lambda :0)
minutes = defaultdict(lambda:0)
for i in result:
percentage[i]= result[i]/total
minutes[i] = result[i]/60_000
print(json.dumps(percentage,indent=4))
if not 'error' in result:
#places[log.get()['instance']].append(percentage)
places[log.get()['instance']].append(minutes)
for place in places:
places[place] = sorted(places[place], key=lambda item:item['map'])
dummy = [0]*len(keys)
results = []
sites = []
from util.meta_temp import CONFIG_NAMES
for i in places:
for j in places[i]:
ordered = []
for k in keys:
ordered.append(j[k])
results.append(ordered)
results.append(dummy)
sites.append(CONFIG_NAMES[i] if i in CONFIG_NAMES else "---")
size = len(results)
ind = np.arange(size)
width=0.9
print(results)
data = list(zip(*results))
print(data)
lines = []
bottom = [0]*len(results)
for i in range(0, len(data)):
lines.append(plt.bar(ind,data[i], bottom=bottom, width=width)[0])
for k,x in enumerate(data[i]):
bottom[k] += x
plt.legend(lines, keys)
plt.title(", ".join(sites))
plt.show()
#size = len(results)
#ind = np.arange(size)
#width = 0.9
#print(results)
#data = list(zip(*results))
#print(data)
#lines = []
#bottom = [0] * len(results)
#for i in range(0, len(data)):
# lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0])
# for k, x in enumerate(data[i]):
# bottom[k] += x
#plt.legend(lines, keys)
#plt.title("Zwei Spiele in Filderstadt (t1=237min; t2=67min)")
#plt.show()
# for analyzers in analyzers:
# if analyzers.name() in ["LogEntryCount", "ActionSequenceAnalyzer"]:
# print(json.dumps(analyzers.result(), indent=2))
# for analyzers in analyzers:
# if analyzers.name() in ["BoardDuration"]:
# print(json.dumps(analyzers.result(), indent=2))
# print(analyzers.render())
# coords = analyzers[1].render()
# with open("test.js", "w") as out:
# out.write("coords = "+coords)