project/log_analyzer.py

268 lines
7.5 KiB
Python

import json
import logging
from typing import List
import numpy as np
import analyzers
from analyzers import get_renderer, Analyzer, render, Store
from analyzers.analyzer import ResultStore
from analyzers.render.default import LogEntryCountCSV
from analyzers.settings import LogSettings, load_settings
from loaders import LOADERS
logging.basicConfig(format='%(levelname)s %(name)s:%(message)s', level=logging.DEBUG)
log: logging.Logger = logging.getLogger(__name__)
logging.getLogger('requests').setLevel(logging.WARN)
logging.getLogger("urllib3").setLevel(logging.WARNING)
def process_log(logfile: str, settings: LogSettings) -> List[Analyzer]:
loader = LOADERS[settings.log_format]()
try:
loader.load(logfile)
except BaseException as e:
raise RuntimeError(e)
analyzers: List[Analyzer] = []
log.debug("build analyzers")
for analyzer in settings.analyzers:
analyzers.append(analyzer(settings))
log.debug("process entries")
for entry in loader.get_entry():
for analyzer in analyzers:
try:
if analyzer.process(entry):
break
except KeyError as e:
log.exception(e)
return analyzers
def run_analysis(log_ids: list, settings):
store: ResultStore = ResultStore()
for log_id in log_ids:
for analysis in process_log(log_id, settings):
log.info("* Result for " + analysis.name())
analysis.result(store, name=log_id)
return store
def load_ids(name: str):
log_ids = []
with open(name) as src:
for line in src:
line = line.strip()
log_ids.append(line)
return log_ids
def urach_logs(log_ids, settings):
return ["data/inst_{id}.{format}".format(id=log_id, format=settings.log_format) for log_id in log_ids]
def write_logentry_count_csv():
global cat, data, lines, csvfile
LogEntryCountCSV.summary = None
for cat in store.get_categories():
data = store.get_category(cat)
render(analyzers.LogEntryCountAnalyzer, data, name=cat)
if LogEntryCountCSV.summary:
headers = []
lines = []
for name in LogEntryCountCSV.summary:
data = LogEntryCountCSV.summary[name]
for head in data:
if not head in headers:
headers.append(head)
line = [name]
for head in headers:
line.append(data[head]) if head in data else line.append(0)
lines.append(line)
import csv
with open('logentrycount.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile, quoting=csv.QUOTE_NONE)
writer.writerow(["name"] + [h.split(".")[-1] for h in headers])
for line in lines:
writer.writerow(line)
def write_simulation_flag_csv():
global csvfile, result, i
from datetime import datetime
json.dump(store.serializable(), open("simus.json", "w"), indent=2)
with open("simus.csv", "w") as csvfile:
csvfile.write("instanceconfig,log,simu,answered,universe_state,selected_actions,timestamp,time\n")
for key in store.get_store():
csvfile.write("{}\n".format(key))
for result in store.store[key]:
csvfile.write(",{}\n".format(result.name))
for i in result.get():
csvfile.write(",,{},{},{},{},{},{}\n".format(
i['answers']['@id'],
i['answers']['answered'],
len(i['answers']['universe_state']) if i['answers']['universe_state'] else 0,
len(i['selected_actions']) if i['selected_actions'] else 0,
i['timestamp'],
str(datetime.fromtimestamp(i['timestamp'] / 1000))
))
if __name__ == '__main__':
settings: LogSettings = load_settings("biogames2.json")
log_ids_urach: List[str] = urach_logs([
# "34fecf49dbaca3401d745fb467",
# "44ea194de594cd8d63ac0314be",
# "57c444470dbf88605433ca935c",
# "78e0c545b594e82edfad55bd7f",
# "91abfd4b31a5562b1c66be37d9",
# "597b704fe9ace475316c345903",
# "e01a684aa29dff9ddd9705edf8",
# "fbf9d64ae0bdad0de7efa3eec6",
"fe1331481f85560681f86827ec",
# "fe1331481f85560681f86827ec"]
"fec57041458e6cef98652df625", ]
,settings)
store: ResultStore = run_analysis(log_ids_urach, settings)
if False:
for r in get_renderer(analyzers.LocomotionActionAnalyzer):
r().render(store.get_all())
if False:
render(analyzers.LocationAnalyzer, store.get_all())
# print(json.dumps(store.serializable(), indent=1))
if True:
for cat in store.get_categories():
render(analyzers.ActivityMapper, store.get_category(cat), name=cat)
# render(analyzers.ProgressAnalyzer, store.get_all())
if False:
from analyzers.postprocessing import graph
g = graph.Cache(settings)
g.run(store)
if False:
# render(analyzers.SimulationOrderAnalyzer, store.get_all())
for cat in store.get_categories():
data = store.get_category(cat)
render(analyzers.SimulationOrderAnalyzer, data, name=cat)
if False:
write_logentry_count_csv()
if False:
write_simulation_flag_csv()
def calc_distance(geojson: str):
from shapely.geometry import LineString
from shapely.ops import transform
from functools import partial
import pyproj
track = LineString(json.loads(geojson)['coordinates'])
project = partial(
pyproj.transform,
pyproj.Proj(init='EPSG:4326'),
pyproj.Proj(init='EPSG:32633'))
return transform(project, track).length
if False:
# json.dump(store.serializable(), open("new.json", "w"), indent=1)
from collections import defaultdict
keys = [
"simu",
"question",
"image",
"audio",
"video",
"other",
"map"
]
import matplotlib.pyplot as plt
# results = []
places = defaultdict(list)
for log in store.get_all():
result = defaultdict(lambda: 0)
for i in log.get()['track']:
duration = i['properties']['end_timestamp'] - i['properties']['start_timestamp']
result[i['properties']['activity_type']] += duration
print(json.dumps(result, indent=4))
total = sum(result.values())
print(total)
percentage = defaultdict(lambda: 0)
minutes = defaultdict(lambda: 0)
for i in result:
percentage[i] = result[i] / total
minutes[i] = result[i] / 60_000
print(json.dumps(percentage, indent=4))
if not 'error' in result:
# places[log.get()['instance']].append(percentage)
places[log.get()['instance']].append(minutes)
for place in places:
places[place] = sorted(places[place], key=lambda item: item['map'])
dummy = [0] * len(keys)
results = []
sites = []
from util.meta_temp import CONFIG_NAMES
for i in places:
for j in places[i]:
ordered = []
for k in keys:
ordered.append(j[k])
results.append(ordered)
results.append(dummy)
sites.append(CONFIG_NAMES[i] if i in CONFIG_NAMES else "---")
size = len(results)
ind = np.arange(size)
width = 0.9
print(results)
data = list(zip(*results))
print(data)
lines = []
bottom = [0] * len(results)
for i in range(0, len(data)):
lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0])
for k, x in enumerate(data[i]):
bottom[k] += x
plt.legend(lines, keys)
plt.title(", ".join(sites))
plt.show()
# size = len(results)
# ind = np.arange(size)
# width = 0.9
# print(results)
# data = list(zip(*results))
# print(data)
# lines = []
# bottom = [0] * len(results)
# for i in range(0, len(data)):
# lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0])
# for k, x in enumerate(data[i]):
# bottom[k] += x
# plt.legend(lines, keys)
# plt.title("Zwei Spiele in Filderstadt (t1=237min; t2=67min)")
# plt.show()
# for analyzers in analyzers:
# if analyzers.name() in ["LogEntryCount", "ActionSequenceAnalyzer"]:
# print(json.dumps(analyzers.result(), indent=2))
# for analyzers in analyzers:
# if analyzers.name() in ["BoardDuration"]:
# print(json.dumps(analyzers.result(), indent=2))
# print(analyzers.render())
# coords = analyzers[1].render()
# with open("test.js", "w") as out:
# out.write("coords = "+coords)