From cbe414232cc2168f10f84ce3d2bcd9b752605360 Mon Sep 17 00:00:00 2001 From: Clemens Klug Date: Tue, 15 Oct 2019 14:42:51 +0200 Subject: [PATCH] improve time tracking of tasks and CSV formatting --- analysis/analyzers/analyzer/biogames.py | 18 ++++++++++++------ analysis/util/output.py | 19 ++++++++++++++++--- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/analysis/analyzers/analyzer/biogames.py b/analysis/analyzers/analyzer/biogames.py index 970cb62..3dc0d9e 100644 --- a/analysis/analyzers/analyzer/biogames.py +++ b/analysis/analyzers/analyzer/biogames.py @@ -365,6 +365,7 @@ class BiogamesTasks(Analyzer): super().__init__(settings) self.settings: LogSettings = settings self.tasks = {} + self.first_board_view = {} self.last_board = None self.instance_config_id: str = None @@ -372,11 +373,15 @@ class BiogamesTasks(Analyzer): if self.instance_config_id is None: if entry[self.settings.type_field] in self.settings.custom['instance_start']: self.instance_config_id = json_path(entry, self.settings.custom['instance_config_id']) - if self.is_task(entry) and self.last_board: - entry['__duration'] = entry['timestamp'] - self.last_board['timestamp'] - self.tasks[self.ids()] = entry + if self.is_task(entry): + task_id = entry['answers']['@id'] + if task_id not in self.first_board_view: + logger.error("task_id is not in self.first_board_view!", task_id, entry) + else: + entry['__duration'] = entry['timestamp'] - self.first_board_view[task_id]['timestamp'] + self.tasks[self.ids(task_id)] = entry if self.is_board(entry): - self.last_board = entry + self.first_board_view[entry['board_id']] = entry return False def result(self, store: ResultStore, name=None) -> None: @@ -388,8 +393,9 @@ class BiogamesTasks(Analyzer): results[ids] = {"duration": task['__duration'], "result": action['increment']} store.add(Result(type(self), results)) - def ids(self): - return f"{self.instance_config_id}_{self.last_board['sequence_id']}_{self.last_board['board_id']}" + def ids(self, task_id): + task = self.first_board_view[task_id] + return f"{self.instance_config_id}_{task['sequence_id']}_{task['board_id']}" def is_task(self, entry) -> bool: return entry['@class'] in self.DATA_CLASSES diff --git a/analysis/util/output.py b/analysis/util/output.py index 1ad705f..1908773 100644 --- a/analysis/util/output.py +++ b/analysis/util/output.py @@ -1,14 +1,27 @@ from datetime import datetime as dt +SEP = "\",\"" +LS = "\"" +LE = "\"" +NL = LS + "\n" + LE + + + def flat_dict_to_csv(data): keys = set() for i in data: keys = keys.union(set(i.keys())) keys = sorted(keys) - out = ",".join(keys) + out = SEP.join(keys) for i in data: - out += "\n" + ",".join([str(i.get(j, "")) for j in keys]) - return out + out += NL + SEP.join([escape(i.get(j, "")) for j in keys]) + return LS + out + LE + + +def escape(value): + val = str(value) + val = val.replace(".", ",") + return val def pretty_ts(timestamp, fmt="%Y-%m-%d %H:%M:%S"): d = dt.fromtimestamp(int(timestamp)/1000.0)