diff --git a/.gitignore b/.gitignore index 428a58b..8c01398 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,5 @@ _* !__init__.py *.pyc logs/ -data/ +*data/ plots/* diff --git a/Dockerfile b/Dockerfile index 94f8894..c197353 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,9 @@ FROM alpine:edge ADD ["requirements.txt", "/"] RUN echo "http://dl-cdn.alpinelinux.org/alpine/edge/testing/" >> /etc/apk/repositories && \ - apk add --update --no-cache libpng libpng-dev freetype freetype-dev g++ python3 python3-dev libstdc++ openblas-dev && \ - pip3 --no-cache-dir install -r requirements.txt && \ - apk del libpng-dev freetype-dev g++ python3-dev openblas-dev && \ - rm requirements.txt \ No newline at end of file + apk add --update --no-cache libpng freetype python3 libstdc++ libxml2 libxslt openblas && \ + apk add --update --no-cache --virtual .build-deps libpng-dev freetype-dev g++ python3-dev openblas-dev libxml2-dev libxslt-dev && \ + pip3 --no-cache-dir install -r requirements.txt && \ + apk del .build-deps && \ + rm requirements.txt +USER guest \ No newline at end of file diff --git a/analysis/analyzers/__init__.py b/analysis/analyzers/__init__.py index 07a8186..97d33b6 100644 --- a/analysis/analyzers/__init__.py +++ b/analysis/analyzers/__init__.py @@ -5,7 +5,7 @@ from .analyzer.biogames import BoardDurationAnalyzer, SimulationRoundsAnalyzer, BiogamesCategorizer, ActivityMapper, BiogamesStore, InstanceConfig, SimulationOrderAnalyzer, SimulationCategorizer, \ SimulationFlagsAnalyzer from .analyzer.default import LogEntryCountAnalyzer, LocationAnalyzer, LogEntrySequenceAnalyzer, ActionSequenceAnalyzer, \ - CategorizerStub, Store, ProgressAnalyzer + CategorizerStub, Store, ProgressAnalyzer, SimpleCategorizer from .analyzer.locomotion import LocomotionActionAnalyzer, CacheSequenceAnalyzer from .analyzer.mask import MaskSpatials from .render import Render diff --git a/analysis/analyzers/analyzer/default.py b/analysis/analyzers/analyzer/default.py index fce0e35..f17ae2f 100644 --- a/analysis/analyzers/analyzer/default.py +++ b/analysis/analyzers/analyzer/default.py @@ -90,12 +90,16 @@ class CategorizerStub(Analyzer): __name__ = "Categorizer" def result(self, store: ResultStore, name=None) -> None: - store.new_category(self.key) + store.new_category(name if name else self.key) def __init__(self, settings: LogSettings): super().__init__(settings) self.key = "default" +class SimpleCategorizer(CategorizerStub): + def process(self, entry): + return False + class Store(Analyzer): """ diff --git a/analysis/analyzers/settings.py b/analysis/analyzers/settings.py index 269eb00..61a4494 100644 --- a/analysis/analyzers/settings.py +++ b/analysis/analyzers/settings.py @@ -1,13 +1,17 @@ import json +import logging import sys from clients.webclients import CLIENTS +log: logging.Logger = logging.getLogger(__name__) def load_source(config): if config["type"] in CLIENTS: source = CLIENTS[config["type"]](**config) source.login() return source + else: + log.warn(f"client {config['type']} not found!") class LogSettings: diff --git a/analysis/loaders/__init__.py b/analysis/loaders/__init__.py index 4829227..f889e09 100644 --- a/analysis/loaders/__init__.py +++ b/analysis/loaders/__init__.py @@ -1,8 +1,10 @@ from .biogames import SQLiteLoader, ZipSQLiteLoader from .loader import JSONLoader +from .neocart import NeoCartLoader LOADERS = { "json": JSONLoader, "sqlite": SQLiteLoader, - "zip": ZipSQLiteLoader + "zip": ZipSQLiteLoader, + "neocartographer": NeoCartLoader, } diff --git a/analysis/loaders/neocart.py b/analysis/loaders/neocart.py new file mode 100644 index 0000000..b6b0975 --- /dev/null +++ b/analysis/loaders/neocart.py @@ -0,0 +1,70 @@ +import logging +from datetime import datetime + +from lxml import etree + +from .loader import Loader + +log = logging.getLogger(__name__) + +NS = {'gpx':"http://www.topografix.com/GPX/1/1"} + +class NeoCartLoader(Loader): + def load(self, file: str): + src = open(file, "r") + parser = etree.XMLParser(recover=True) + tree = etree.parse(src, parser=parser) + self.entries = [] + for point in tree.xpath("//gpx:trkpt", namespaces=NS): + try: + self.entries.append(self.parse_point(point)) + except ValueError as e: + print(e, etree.tostring(point, pretty_print=True).decode()) + log.exception(e) + + def parse_point(self, point): + raw_lat = point.xpath("@lat")[0] + if raw_lat.count(".") > 1: + log.warning(f"recreate lat/lon from: {raw_lat}") + log.warn(etree.tostring(point, pretty_print=True).decode()) + start_offset = 4 + x = raw_lat[start_offset:].index(".") + offset = start_offset + x + raw_lon = raw_lat[offset:] + raw_lat = raw_lat[:offset] + else: + raw_lon = point.xpath("@lon")[0] + lat = float(raw_lat) + lon = float(raw_lon) + times = point.xpath("gpx:time",namespaces=NS) + assert len(times) == 1 + time = times[0].text + dt = datetime.strptime(time, "%Y-%m-%dT%H:%M:%SZ") + timestamp = int(dt.timestamp() * 1000) # python3.6 has no timestamp_ns (yet) + events = point.xpath(".//gpx:event",namespaces=NS) + assert 0 <= len(events) <= 1 + event = {} + if events: + event = dict(events[0].attrib) + if events[0].tail and events[0].tail.strip(): + try: + # base case: trailing 'geoid="0"/>' + key, v = events[0].tail.strip().split("=") + value = v.split('"')[1] + event[key] = value + except: + event['__tail__'] = events[0].tail.strip() + + return { + "location": { + "type": "Point", + "coordinates": [lon, lat] + }, + "timestamp": timestamp, + "event": event, + "type": "event" if event else "location" + } + + def get_entry(self) -> object: + for i in self.entries: + yield i \ No newline at end of file diff --git a/clients/webclients.py b/clients/webclients.py index 2e2821a..f414da1 100644 --- a/clients/webclients.py +++ b/clients/webclients.py @@ -48,10 +48,10 @@ class Client: return target def login(self): - pass + pass #TODO def list(self): - pass + pass #TODO class BiogamesClient(Client): @@ -105,9 +105,42 @@ class BiogamesClient(Client): def load_all_logs(self) -> tempfile.TemporaryDirectory: return self.download_files([i["file_url"] for i in self.list()]) +class GeogamesClient(Client): + config_fields = ("host", "path") + + def __init__(self, **kwargs): + for field in self.config_fields: + if not field in kwargs: + raise ValueError(f"missing parameter: {field}") + self.host = kwargs['host'] + self.path = kwargs['path'] + + def list(self): + logs = self.get(self.path) + data = logs.json() + prepared_logs = [] + for log in data: + players = self.get(f"{self.path}/{log['name']}/").json() + for player in players: + prepared_logs.append({ + '@id': f"{log['name']}/{player['name']}", + 'start_date': player['mtime'], + 'player_group_name': player['name'], + 'file_url': f"{self.path}/{log['name']}/{player['name']}", + }) + return prepared_logs + + def download_files(self, urls, **kwargs) -> tempfile.TemporaryDirectory: + target = tempfile.TemporaryDirectory() + for path in urls: + filename = os.path.join(target.name, "-".join +(path.split("/")[-2:])) + self.download_file(path, filename, **kwargs) + return target CLIENTS: typing.Dict[str, typing.Type[Client]] = { "Biogames": BiogamesClient, + "Geogames": GeogamesClient, } if __name__ == '__main__': diff --git a/docker-compose.yml b/docker-compose.yml index fe8efe9..9a91058 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,9 +2,8 @@ version: "2.2" services: app: - image: docker.clkl.de/ma/celery:0.3.3 - build: ./selector - cpu_count: 4 + image: docker.clkl.de/ma/celery:0.4.1 + build: . volumes: - ./:/app working_dir: /app/selector @@ -21,7 +20,7 @@ services: - "traefik.url.frontend.rule=Host:select.ma.potato.kinf.wiai.uni-bamberg.de" celery: - image: docker.clkl.de/ma/celery:0.3.3 + image: docker.clkl.de/ma/celery:0.4.1 environment: - PYTHONPATH=/app volumes: @@ -49,6 +48,12 @@ services: - "traefik.docker.network=traefik_net" - "traefik.url.frontend.rule=Host:results.ma.potato.kinf.wiai.uni-bamberg.de" + log_data: + image: nginx:1.13-alpine + volumes: + - ./log_data/:/srv/:ro + - ./log_data.conf:/etc/nginx/conf.d/log_data.conf + networks: traefik_net: diff --git a/log_data.conf b/log_data.conf new file mode 100644 index 0000000..0c92fcb --- /dev/null +++ b/log_data.conf @@ -0,0 +1,9 @@ +server { + listen 80; + server_name log_data; + location / { + root /srv/; + autoindex on; + autoindex_format json; + } +} \ No newline at end of file diff --git a/log_data/.gitkeep b/log_data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/neocart.json b/neocart.json new file mode 100644 index 0000000..392d8ad --- /dev/null +++ b/neocart.json @@ -0,0 +1,34 @@ +{ + "logFormat": "neocartographer", + "entryType": "type", + "spatials": [ + "location" + ], + "actions": [], + "boards": [], + "analyzers": { + "analysis.analyzers": [ + "SimpleCategorizer", + "LocationAnalyzer" + ] + }, + "sequences": {}, + "custom": { + "coordinates": "location.coordinates", + "metadata": { + "timestamp": "timestamp", + "gamefield": "instance_id", + "user": "player_group_name" + } + }, + "source": { + "type": "Geogames", + "host": "http://log_data/", + "path": "neocartographer" + }, + "render": [ + "KMLRender" + ] +} + + diff --git a/requirements.txt b/requirements.txt index fc7f208..eae5dfa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,5 +9,7 @@ scipy==1.0.1 flask==0.12.2 -celery==4.1.0 -redis==2.10.6 \ No newline at end of file +celery==4.1.1 +redis==2.10.6 + +lxml==4.2.1 \ No newline at end of file diff --git a/selector/templates/games.html b/selector/templates/games.html index 1dc2418..b8acfaf 100644 --- a/selector/templates/games.html +++ b/selector/templates/games.html @@ -6,7 +6,7 @@ {% for log in logs %}
  • - {{log.start_date}}: {{log.player_group_name}} (→{{log.end_date}}) + {{log.start_date}}: {{log.player_group_name}}
  • {% endfor %} diff --git a/test_neo.py b/test_neo.py new file mode 100644 index 0000000..3d52d34 --- /dev/null +++ b/test_neo.py @@ -0,0 +1,41 @@ + +from analysis import log_analyzer as la +settings = la.load_settings("neocart.json") +client = settings.source +logs = client.list() +id_urls = {str(x['@id']): x['file_url'] for x in logs} + +log_ids=['20351/playerid1430317168972.gpx','20351/playerid1430317188358.gpx'] + +urls = [id_urls[i] for i in log_ids] +tmpdir = client.download_files(urls) +import os +store = la.run_analysis([p.path for p in os.scandir(tmpdir.name)], settings, la.LOADERS) + +import json +print(json.dumps(store.serializable(), indent=1)) + + +from analysis.analyzers import KMLRender, ActivityMapperRender +RENDERERS = { # TODO + "KMLRender": KMLRender, + "ActivityMapper": ActivityMapperRender, +} +render = RENDERERS[settings.render[0]]() +files = render.render(store.get_all()) +DATA_PATH = "/app/data/results/" +import uuid +uid = str(uuid.uuid4()) +results = [] +os.mkdir(os.path.join(DATA_PATH, uid)) +import shutil + +for file in files: + try: + head, tail = os.path.split(file) + target = os.path.join(DATA_PATH, uid, tail) + shutil.move(file, target) + results.append(target) + except FileNotFoundError as e: + log.exception(e) +tmpdir.cleanup() \ No newline at end of file