import json import logging from typing import List import numpy as np import analyzers from analyzers import get_renderer, Analyzer, render, Store from analyzers.analyzer import ResultStore from analyzers.render.default import LogEntryCountCSV from analyzers.settings import LogSettings, load_settings from loaders import LOADERS logging.basicConfig(format='%(levelname)s %(name)s:%(message)s', level=logging.DEBUG) log: logging.Logger = logging.getLogger(__name__) requests_log = logging.getLogger('requests') requests_log.setLevel(logging.WARN) def process_log(log_id: str, settings: LogSettings) -> List[Analyzer]: logfile: str = "data/inst_{id}.{format}".format(id=log_id, format=settings.log_format) logfile = log_id loader = LOADERS[settings.log_format]() try: loader.load(logfile) except BaseException as e: raise RuntimeError(e) analyzers: List[Analyzer] = [] log.debug("build analyzers") for analyzer in settings.analyzers: analyzers.append(analyzer(settings)) log.debug("process entries") for entry in loader.get_entry(): for analyzer in analyzers: try: if analyzer.process(entry): break except KeyError as e: log.exception(e) return analyzers if __name__ == '__main__': settings: LogSettings = load_settings("biogames2.json") log_ids: List[str] = [ "20d4244719404ffab0ca386c76e4b112", "56d9b64144ab44e7b90bf766f3be32e3", "dc2cdc28ca074715b905e4aa5badff10", "e32b16998440475b994ab46d481d3e0c", ] log_ids: List[str] = [ # "34fecf49dbaca3401d745fb467", # "44ea194de594cd8d63ac0314be", # "57c444470dbf88605433ca935c", # "78e0c545b594e82edfad55bd7f", # "91abfd4b31a5562b1c66be37d9", "597b704fe9ace475316c345903", "e01a684aa29dff9ddd9705edf8", "fbf9d64ae0bdad0de7efa3eec6", # "fe1331481f85560681f86827ec", "fe1331481f85560681f86827ec"] # "fec57041458e6cef98652df625", ] log_ids = [] # with open("/home/clemens/git/ma/test/filtered") as src: if False: with open("/home/clemens/git/ma/test/filtered_5_actions") as src: for line in src: line = line.strip() log_ids.append(line) store: ResultStore = ResultStore() for log_id in log_ids: for analysis in process_log(log_id, settings): log.info("* Result for " + analysis.name()) # print(analysis.result()) # print(analysis.render()) analysis.result(store) if False: for r in get_renderer(analyzers.LocomotionActionAnalyzer): r().render(store.get_all()) if False: render(analyzers.LocationAnalyzer, store.get_all()) # print(json.dumps(store.serializable(), indent=1)) if False: render(analyzers.ActivityMapper, store.get_all()) render(analyzers.ProgressAnalyzer, store.get_all()) if False: from analyzers.postprocessing import graph g = graph.Cache(settings) g.run(store) if False: # render(analyzers.SimulationOrderAnalyzer, store.get_all()) for cat in store.get_categories(): data = store.get_category(cat) render(analyzers.SimulationOrderAnalyzer, data, name=cat) if False: LogEntryCountCSV.summary = None for cat in store.get_categories(): data = store.get_category(cat) render(analyzers.LogEntryCountAnalyzer, data, name=cat) if LogEntryCountCSV.summary: headers = [] lines = [] for name in LogEntryCountCSV.summary: data = LogEntryCountCSV.summary[name] for head in data: if not head in headers: headers.append(head) line = [name] for head in headers: line.append(data[head]) if head in data else line.append(0) lines.append(line) import csv with open('logentrycount.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile, quoting=csv.QUOTE_NONE) writer.writerow(["name"] + [h.split(".")[-1] for h in headers]) for line in lines: writer.writerow(line) if True: # json.dump(store.serializable(), open("new.json", "w"), indent=1) from collections import defaultdict import matplotlib.pyplot as plt from util.meta_temp import CONFIG_NAMES keys = [ "simu", "question", "image", "audio", "video", "other", "map", # "error" ] loc_keys=[ "question", "image", "audio", "video" ] def get_data(store, relative_values=True, sort=True, show_errors=False): places = defaultdict(list) for log in store.get_all(): if not log.analysis() == analyzers.ActivityMapper: continue result = defaultdict(lambda: 0) for i in log.get()['track']: duration = i['properties']['end_timestamp'] - i['properties']['start_timestamp'] result[i['properties']['activity_type']] += duration print(json.dumps(result, indent=4)) total = sum(result.values()) print(total) percentage = defaultdict(lambda: 0) minutes = defaultdict(lambda: 0) for i in result: percentage[i] = result[i] / total minutes[i] = result[i] / 60_000 print(json.dumps(percentage, indent=4)) if not 'error' in result or show_errors: if relative_values: places[log.get()['instance']].append(percentage) else: places[log.get()['instance']].append(minutes) if sort: for place in places: places[place] = sorted(places[place], key=lambda item: item['map']) return places from shapely.geometry import LineString from shapely.ops import transform from functools import partial import pyproj def calc_distance(coordinates): track = LineString(coordinates) project = partial( pyproj.transform, pyproj.Proj(init='EPSG:4326'), pyproj.Proj(init='EPSG:32633')) return transform(project, track).length whitelist = ['16fc3117-61db-4f50-b84f-81de6310206f', '5e64ce07-1c16-4d50-ac4e-b3117847ea43', '90278021-4c57-464e-90b1-d603799d07eb', 'ff8f1e8f-6cf5-4a7b-835b-5e2226c1e771'] def get_data_distance(store, relative_values=True, sort=True, show_errors=False): places = defaultdict(list) for log in store.get_all(): if not log.analysis() == analyzers.ActivityMapper: continue result = defaultdict(lambda: 0) for i in log.get()['track']: coords = i['coordinates'] if len(coords) > 1: distance = calc_distance(coords) result[i['properties']['activity_type']] += distance total = sum(result.values()) percentage = defaultdict(lambda: 0) for i in result: if not total == 0: percentage[i] = result[i] / total if not 'error' in result or show_errors: if relative_values: places[log.get()['instance']].append(percentage) else: places[log.get()['instance']].append(result) if sort: for place in places: places[place] = sorted(places[place], key=lambda item: item['map']) return places def get_all_data(store, sort=False, relative=True): places = defaultdict(list) simu_distribution = defaultdict(lambda: 0) # divisiors = {"time":60_000, "space":1000000} for log in store.get_all(): if not log.analysis() == analyzers.ActivityMapper: continue result = defaultdict(lambda: defaultdict(lambda: 0)) for i in log.get()['track']: coords = i['coordinates'] if len(coords) > 1: distance = calc_distance(coords) else: distance = 0.0 result["space"][i['properties']['activity_type']] += distance duration = i['properties']['end_timestamp'] - i['properties']['start_timestamp'] result["time"][i['properties']['activity_type']] += duration total_space = sum(result["space"].values()) total_time = sum(result["time"].values()) percentage = defaultdict(lambda: defaultdict(lambda: 0)) total = defaultdict(lambda: defaultdict(lambda: 0)) for i in result["space"]: if not total_space == 0: percentage[i]["space"] = result["space"][i] / total_space else: percentage[i]["space"] = 0 if not total_time == 0: percentage[i]["time"] = result["time"][i] / total_time else: percentage[i]["time"] = 0 for t in ("space", "time"): # total[i][t] += (result[t][i] / divisiors[t]) total[i][t] += result[t][i] print(percentage) if not 'error' in result: if relative: value = percentage else: value = total places[log.get()['instance']].append(value) simus = defaultdict(lambda: 0) for item in log.get()['boards']: if item["extra_data"]["activity_type"] == "simu": simus[item["board_id"]] += 1 simu_distribution[len(simus)] += 1 if sort: for place in places: places[place] = sorted(places[place], key=lambda item: item['map']['time']) print(simu_distribution) return places def stack_data(keys, places, type="space"): divisiors = {"time": 60_000, "space": 1000} #divisiors = {"time": 1, "space": 1} dummy = [0] * len(keys) results = [] sites = [] for i in sorted(places): if not i in whitelist: continue place = sorted(places[i], key=lambda item: item['map'][type]) for j in place: ordered = [] for k in keys: if k in j: ordered.append(j[k][type] / divisiors[type]) else: ordered.append(0) print(sum(ordered)) #if sum(ordered) > 0.9 and sum(ordered) < 4000 and sum(ordered)>10: if sum(ordered) > 0.9 and sum(ordered)<100: # print(sum(ordered), 1-sum(ordered)) # if sum(ordered)<1: # ordered[-2] = 1-sum(ordered[:-2], ordered[-1]) results.append(ordered) results.append(dummy) sites.append(CONFIG_NAMES[i] if i in CONFIG_NAMES else "---") return results, sites def plot_data(places, keys): results, sites = stack_data(keys, places) dpi = 86.1 plt.figure(figsize=(1280 / dpi, 720 / dpi)) size = len(results) print("{} elements total".format(size)) ind = np.arange(size) width = 1 # print(results) data = list(zip(*results)) # print(data) lines = [] bottom = [0] * size plt.ticklabel_format(useMathText=False) for i in range(0, len(data)): lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0]) for k, x in enumerate(data[i]): bottom[k] += x plt.legend(lines, keys) plt.title(", ".join(sites)) # plt.show() dpi = 86 plt.savefig("space_abs_{}.png".format(size), dpi=dpi, bbox_inches="tight") colors = { "simu": "blue", "question": "orange", "image": "green", "audio": "red", "video": "purple", "other": "brown", "map": "violet", # "error":"grey", "tasks": "olive", } markers = [".", "o", "x", "s", "*", "D", "p", ",", "<", ">", "^", "v", "1", "2", "3", "4"] def plot_time_space(time_data, space_data, keys): # assuming time_data and space_data are in same order! marker = 0 for id in time_data: for k in keys: for i in range(len(time_data[id])): print(time_data[id][i][k], space_data[id][i][k]) plt.plot(time_data[id][i][k], space_data[id][i][k], color=colors[k], marker=markers[marker]) marker += 1 plt.show() # plt.cla() # plt.clf() # plt.close() def group_locationbased_tasks(data): for id in data: for log in data[id]: loc = {"space":0,"time":0} for k in log: if k in loc_keys: for i in ["space","time"]: loc[i] += log[k][i] log["tasks"] = loc def plot_time_space_rel(combined, keys): groups = defaultdict(list) keys = list(keys) keys.remove("other") for i in loc_keys: keys.remove(i) keys.append("tasks") ids = [] group_locationbased_tasks(combined) for k in keys: for id in sorted(combined): if id not in whitelist: continue if not id in ids: ids.append(id) group = 0.0 count = 0 for item in combined[id]: if k in item: time = item[k]["time"] / 1000 distance = item[k]["space"] if time > 0: group += (distance / time) count += 1 else: print("div by zero", distance, time) if count > 0: groups[k].append(group / count) else: groups[k].append(0.0) print(ids) ind = np.arange(len(ids)) width = .7 / len(groups) print(ind) print(json.dumps(groups, indent=1)) bars = [] dpi = 200 plt.figure(figsize=(1280 / dpi, 720 / dpi)) fig, ax = plt.subplots() for k in groups: print(groups[k]) if not len(groups[k]): groups[k].append(0) ind = ind + (width) bars.append(ax.bar((ind + width * len(groups) / 2), groups[k], width, color=colors[k])) ax.set_xticks(ind + width / 2) ax.set_xticklabels(list([CONFIG_NAMES[i] if i in CONFIG_NAMES else "---" for i in ids])) kmh = plt.hlines((1 / 3.6), 0.3, 4.2, linestyles="dashed", label="1 km/h", linewidths=1) plt.legend(bars+[kmh], keys+[kmh.get_label()]) print(combined.keys(), ids) print([CONFIG_NAMES[i] if i in CONFIG_NAMES else "---" for i in ids]) # plt.show() dpi = 200 plt.savefig("speed2.png", dpi=dpi) # spatial_data = get_data_distance(store,relative_values=False) # temporal_data = get_data(store,relative_values=False) # spatial_data_rel = get_data_distance(store,relative_values=True) # temporal_data_rel = get_data(store,relative_values=True) # temporal_data_rel = json.load(open("temporal_rel.json")) # spatial_data_rel = json.load(open("spatial_rel.json")) # import IPython # IPython.embed() # print(json.dumps(get_all_data(store))) # json.dump(get_all_data(store), open("combined.json", "w")) # combined = get_all_data(store, sort=True, relative=True) # json.dump(combined, open("combined_rel.json", "w")) # combined = json.load(open("combined_rel.json")) combined = json.load(open("combined_total.json")) #plot_time_space_rel(combined, keys) plot_data(combined, keys) # plot_time_space_rel(temporal_data_rel, spatial_data_rel, keys) # plot_data(combined, keys) # plot_data(get_data_distance(store,relative_values=False), keys) # for analyzers in analyzers: # if analyzers.name() in ["LogEntryCount", "ActionSequenceAnalyzer"]: # print(json.dumps(analyzers.result(), indent=2)) # for analyzers in analyzers: # if analyzers.name() in ["BoardDuration"]: # print(json.dumps(analyzers.result(), indent=2)) # print(analyzers.render()) # coords = analyzers[1].render() # with open("test.js", "w") as out: # out.write("coords = "+coords)