evaluation working

clients
Clemens Klug 2018-05-29 17:21:36 +02:00
parent bba8c0719c
commit 4d0e5e7ac1
15 changed files with 223 additions and 17 deletions

2
.gitignore vendored
View File

@ -3,5 +3,5 @@ _*
!__init__.py !__init__.py
*.pyc *.pyc
logs/ logs/
data/ *data/
plots/* plots/*

View File

@ -2,7 +2,9 @@ FROM alpine:edge
ADD ["requirements.txt", "/"] ADD ["requirements.txt", "/"]
RUN echo "http://dl-cdn.alpinelinux.org/alpine/edge/testing/" >> /etc/apk/repositories && \ RUN echo "http://dl-cdn.alpinelinux.org/alpine/edge/testing/" >> /etc/apk/repositories && \
apk add --update --no-cache libpng libpng-dev freetype freetype-dev g++ python3 python3-dev libstdc++ openblas-dev && \ apk add --update --no-cache libpng freetype python3 libstdc++ libxml2 libxslt openblas && \
pip3 --no-cache-dir install -r requirements.txt && \ apk add --update --no-cache --virtual .build-deps libpng-dev freetype-dev g++ python3-dev openblas-dev libxml2-dev libxslt-dev && \
apk del libpng-dev freetype-dev g++ python3-dev openblas-dev && \ pip3 --no-cache-dir install -r requirements.txt && \
rm requirements.txt apk del .build-deps && \
rm requirements.txt
USER guest

View File

@ -5,7 +5,7 @@ from .analyzer.biogames import BoardDurationAnalyzer, SimulationRoundsAnalyzer,
BiogamesCategorizer, ActivityMapper, BiogamesStore, InstanceConfig, SimulationOrderAnalyzer, SimulationCategorizer, \ BiogamesCategorizer, ActivityMapper, BiogamesStore, InstanceConfig, SimulationOrderAnalyzer, SimulationCategorizer, \
SimulationFlagsAnalyzer SimulationFlagsAnalyzer
from .analyzer.default import LogEntryCountAnalyzer, LocationAnalyzer, LogEntrySequenceAnalyzer, ActionSequenceAnalyzer, \ from .analyzer.default import LogEntryCountAnalyzer, LocationAnalyzer, LogEntrySequenceAnalyzer, ActionSequenceAnalyzer, \
CategorizerStub, Store, ProgressAnalyzer CategorizerStub, Store, ProgressAnalyzer, SimpleCategorizer
from .analyzer.locomotion import LocomotionActionAnalyzer, CacheSequenceAnalyzer from .analyzer.locomotion import LocomotionActionAnalyzer, CacheSequenceAnalyzer
from .analyzer.mask import MaskSpatials from .analyzer.mask import MaskSpatials
from .render import Render from .render import Render

View File

@ -90,12 +90,16 @@ class CategorizerStub(Analyzer):
__name__ = "Categorizer" __name__ = "Categorizer"
def result(self, store: ResultStore, name=None) -> None: def result(self, store: ResultStore, name=None) -> None:
store.new_category(self.key) store.new_category(name if name else self.key)
def __init__(self, settings: LogSettings): def __init__(self, settings: LogSettings):
super().__init__(settings) super().__init__(settings)
self.key = "default" self.key = "default"
class SimpleCategorizer(CategorizerStub):
def process(self, entry):
return False
class Store(Analyzer): class Store(Analyzer):
""" """

View File

@ -1,13 +1,17 @@
import json import json
import logging
import sys import sys
from clients.webclients import CLIENTS from clients.webclients import CLIENTS
log: logging.Logger = logging.getLogger(__name__)
def load_source(config): def load_source(config):
if config["type"] in CLIENTS: if config["type"] in CLIENTS:
source = CLIENTS[config["type"]](**config) source = CLIENTS[config["type"]](**config)
source.login() source.login()
return source return source
else:
log.warn(f"client {config['type']} not found!")
class LogSettings: class LogSettings:

View File

@ -1,8 +1,10 @@
from .biogames import SQLiteLoader, ZipSQLiteLoader from .biogames import SQLiteLoader, ZipSQLiteLoader
from .loader import JSONLoader from .loader import JSONLoader
from .neocart import NeoCartLoader
LOADERS = { LOADERS = {
"json": JSONLoader, "json": JSONLoader,
"sqlite": SQLiteLoader, "sqlite": SQLiteLoader,
"zip": ZipSQLiteLoader "zip": ZipSQLiteLoader,
"neocartographer": NeoCartLoader,
} }

View File

@ -0,0 +1,70 @@
import logging
from datetime import datetime
from lxml import etree
from .loader import Loader
log = logging.getLogger(__name__)
NS = {'gpx':"http://www.topografix.com/GPX/1/1"}
class NeoCartLoader(Loader):
def load(self, file: str):
src = open(file, "r")
parser = etree.XMLParser(recover=True)
tree = etree.parse(src, parser=parser)
self.entries = []
for point in tree.xpath("//gpx:trkpt", namespaces=NS):
try:
self.entries.append(self.parse_point(point))
except ValueError as e:
print(e, etree.tostring(point, pretty_print=True).decode())
log.exception(e)
def parse_point(self, point):
raw_lat = point.xpath("@lat")[0]
if raw_lat.count(".") > 1:
log.warning(f"recreate lat/lon from: {raw_lat}")
log.warn(etree.tostring(point, pretty_print=True).decode())
start_offset = 4
x = raw_lat[start_offset:].index(".")
offset = start_offset + x
raw_lon = raw_lat[offset:]
raw_lat = raw_lat[:offset]
else:
raw_lon = point.xpath("@lon")[0]
lat = float(raw_lat)
lon = float(raw_lon)
times = point.xpath("gpx:time",namespaces=NS)
assert len(times) == 1
time = times[0].text
dt = datetime.strptime(time, "%Y-%m-%dT%H:%M:%SZ")
timestamp = int(dt.timestamp() * 1000) # python3.6 has no timestamp_ns (yet)
events = point.xpath(".//gpx:event",namespaces=NS)
assert 0 <= len(events) <= 1
event = {}
if events:
event = dict(events[0].attrib)
if events[0].tail and events[0].tail.strip():
try:
# base case: trailing 'geoid="0"/>'
key, v = events[0].tail.strip().split("=")
value = v.split('"')[1]
event[key] = value
except:
event['__tail__'] = events[0].tail.strip()
return {
"location": {
"type": "Point",
"coordinates": [lon, lat]
},
"timestamp": timestamp,
"event": event,
"type": "event" if event else "location"
}
def get_entry(self) -> object:
for i in self.entries:
yield i

View File

@ -48,10 +48,10 @@ class Client:
return target return target
def login(self): def login(self):
pass pass #TODO
def list(self): def list(self):
pass pass #TODO
class BiogamesClient(Client): class BiogamesClient(Client):
@ -105,9 +105,42 @@ class BiogamesClient(Client):
def load_all_logs(self) -> tempfile.TemporaryDirectory: def load_all_logs(self) -> tempfile.TemporaryDirectory:
return self.download_files([i["file_url"] for i in self.list()]) return self.download_files([i["file_url"] for i in self.list()])
class GeogamesClient(Client):
config_fields = ("host", "path")
def __init__(self, **kwargs):
for field in self.config_fields:
if not field in kwargs:
raise ValueError(f"missing parameter: {field}")
self.host = kwargs['host']
self.path = kwargs['path']
def list(self):
logs = self.get(self.path)
data = logs.json()
prepared_logs = []
for log in data:
players = self.get(f"{self.path}/{log['name']}/").json()
for player in players:
prepared_logs.append({
'@id': f"{log['name']}/{player['name']}",
'start_date': player['mtime'],
'player_group_name': player['name'],
'file_url': f"{self.path}/{log['name']}/{player['name']}",
})
return prepared_logs
def download_files(self, urls, **kwargs) -> tempfile.TemporaryDirectory:
target = tempfile.TemporaryDirectory()
for path in urls:
filename = os.path.join(target.name, "-".join
(path.split("/")[-2:]))
self.download_file(path, filename, **kwargs)
return target
CLIENTS: typing.Dict[str, typing.Type[Client]] = { CLIENTS: typing.Dict[str, typing.Type[Client]] = {
"Biogames": BiogamesClient, "Biogames": BiogamesClient,
"Geogames": GeogamesClient,
} }
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -2,9 +2,8 @@ version: "2.2"
services: services:
app: app:
image: docker.clkl.de/ma/celery:0.3.3 image: docker.clkl.de/ma/celery:0.4.1
build: ./selector build: .
cpu_count: 4
volumes: volumes:
- ./:/app - ./:/app
working_dir: /app/selector working_dir: /app/selector
@ -21,7 +20,7 @@ services:
- "traefik.url.frontend.rule=Host:select.ma.potato.kinf.wiai.uni-bamberg.de" - "traefik.url.frontend.rule=Host:select.ma.potato.kinf.wiai.uni-bamberg.de"
celery: celery:
image: docker.clkl.de/ma/celery:0.3.3 image: docker.clkl.de/ma/celery:0.4.1
environment: environment:
- PYTHONPATH=/app - PYTHONPATH=/app
volumes: volumes:
@ -49,6 +48,12 @@ services:
- "traefik.docker.network=traefik_net" - "traefik.docker.network=traefik_net"
- "traefik.url.frontend.rule=Host:results.ma.potato.kinf.wiai.uni-bamberg.de" - "traefik.url.frontend.rule=Host:results.ma.potato.kinf.wiai.uni-bamberg.de"
log_data:
image: nginx:1.13-alpine
volumes:
- ./log_data/:/srv/:ro
- ./log_data.conf:/etc/nginx/conf.d/log_data.conf
networks: networks:
traefik_net: traefik_net:

9
log_data.conf Normal file
View File

@ -0,0 +1,9 @@
server {
listen 80;
server_name log_data;
location / {
root /srv/;
autoindex on;
autoindex_format json;
}
}

0
log_data/.gitkeep Normal file
View File

34
neocart.json Normal file
View File

@ -0,0 +1,34 @@
{
"logFormat": "neocartographer",
"entryType": "type",
"spatials": [
"location"
],
"actions": [],
"boards": [],
"analyzers": {
"analysis.analyzers": [
"SimpleCategorizer",
"LocationAnalyzer"
]
},
"sequences": {},
"custom": {
"coordinates": "location.coordinates",
"metadata": {
"timestamp": "timestamp",
"gamefield": "instance_id",
"user": "player_group_name"
}
},
"source": {
"type": "Geogames",
"host": "http://log_data/",
"path": "neocartographer"
},
"render": [
"KMLRender"
]
}

View File

@ -9,5 +9,7 @@ scipy==1.0.1
flask==0.12.2 flask==0.12.2
celery==4.1.0 celery==4.1.1
redis==2.10.6 redis==2.10.6
lxml==4.2.1

View File

@ -6,7 +6,7 @@
{% for log in logs %} {% for log in logs %}
<li> <li>
<input type="checkbox" name="logs" value="{{log['@id']}}"> <input type="checkbox" name="logs" value="{{log['@id']}}">
{{log.start_date}}: {{log.player_group_name}} (→{{log.end_date}}) {{log.start_date}}: {{log.player_group_name}}
</li> </li>
<!--{{log}}--> <!--{{log}}-->
{% endfor %} {% endfor %}

41
test_neo.py Normal file
View File

@ -0,0 +1,41 @@
from analysis import log_analyzer as la
settings = la.load_settings("neocart.json")
client = settings.source
logs = client.list()
id_urls = {str(x['@id']): x['file_url'] for x in logs}
log_ids=['20351/playerid1430317168972.gpx','20351/playerid1430317188358.gpx']
urls = [id_urls[i] for i in log_ids]
tmpdir = client.download_files(urls)
import os
store = la.run_analysis([p.path for p in os.scandir(tmpdir.name)], settings, la.LOADERS)
import json
print(json.dumps(store.serializable(), indent=1))
from analysis.analyzers import KMLRender, ActivityMapperRender
RENDERERS = { # TODO
"KMLRender": KMLRender,
"ActivityMapper": ActivityMapperRender,
}
render = RENDERERS[settings.render[0]]()
files = render.render(store.get_all())
DATA_PATH = "/app/data/results/"
import uuid
uid = str(uuid.uuid4())
results = []
os.mkdir(os.path.join(DATA_PATH, uid))
import shutil
for file in files:
try:
head, tail = os.path.split(file)
target = os.path.join(DATA_PATH, uid, tail)
shutil.move(file, target)
results.append(target)
except FileNotFoundError as e:
log.exception(e)
tmpdir.cleanup()