master
Clemens Klug 2018-06-08 21:24:58 +02:00
parent 81caf933b3
commit c4e2072795
5 changed files with 142 additions and 5 deletions

30
ThesTeX/code/biogames.py Normal file
View File

@ -0,0 +1,30 @@
import os
import sqlite3
import tempfile
import zipfile
from json import loads as json_loads
from .loader import Loader
DB_FILE = "instance_log.sqlite"
class SQLiteLoader(Loader):
conn = None
def load(self, file: str):
self.conn = sqlite3.connect(file)
def get_entry(self) -> dict:
cursor = self.conn.cursor()
cursor.execute("SELECT * FROM log_entry")
for seq, timestamp, json in cursor.fetchall():
yield json_loads(json)
class ZipSQLiteLoader(SQLiteLoader):
def load(self, file: str):
with zipfile.ZipFile(file, "r") as zipped_log, tempfile.TemporaryDirectory() as tmp:
zipped_log.extract(DB_FILE, path=tmp)
super(ZipSQLiteLoader, self).load(os.path.join(tmp, DB_FILE))

70
ThesTeX/code/neocart.py Normal file
View File

@ -0,0 +1,70 @@
import logging
from datetime import datetime
from lxml import etree
from .loader import Loader
log = logging.getLogger(__name__)
NS = {'gpx':"http://www.topografix.com/GPX/1/1"}
class NeoCartLoader(Loader):
def load(self, file: str):
src = open(file, "r")
parser = etree.XMLParser(recover=True)
tree = etree.parse(src, parser=parser)
self.entries = []
for point in tree.xpath("//gpx:trkpt", namespaces=NS):
try:
self.entries.append(self.parse_point(point))
except ValueError as e:
print(e, etree.tostring(point, pretty_print=True).decode())
log.exception(e)
def parse_point(self, point):
raw_lat = point.xpath("@lat")[0]
if raw_lat.count(".") > 1:
log.warning(f"recreate lat/lon from: {raw_lat}")
log.warn(etree.tostring(point, pretty_print=True).decode())
start_offset = 4
x = raw_lat[start_offset:].index(".")
offset = start_offset + x
raw_lon = raw_lat[offset:]
raw_lat = raw_lat[:offset]
else:
raw_lon = point.xpath("@lon")[0]
lat = float(raw_lat)
lon = float(raw_lon)
times = point.xpath("gpx:time",namespaces=NS)
assert len(times) == 1
time = times[0].text
dt = datetime.strptime(time, "%Y-%m-%dT%H:%M:%SZ")
timestamp = int(dt.timestamp() * 1000) # python3.6 has no timestamp_ns (yet)
events = point.xpath(".//gpx:event",namespaces=NS)
assert 0 <= len(events) <= 1
event = {}
if events:
event = dict(events[0].attrib)
if events[0].tail and events[0].tail.strip():
try:
# base case: trailing 'geoid="0"/>'
key, v = events[0].tail.strip().split("=")
value = v.split('"')[1]
event[key] = value
except:
event['__tail__'] = events[0].tail.strip()
return {
"location": {
"type": "Point",
"coordinates": [lon, lat]
},
"timestamp": timestamp,
"event": event,
"type": event['message'] if event else "location"
}
def get_entry(self) -> object:
for i in self.entries:
yield i

View File

@ -34,7 +34,7 @@ By extending this class, \texttt{ZipSQLiteLoader} focuses on unzipping the archi
This avoids code duplication and, with little amount of tweaking, would present a generic way to handle SQLite database files.
\paragraph{Neocart(ographer)}
was the evaluation step described in \autoref{sec:eval}.
is the evaluation step described in \autoref{sec:eval}.
This \texttt{Loader} deals with some seriously broken XML files.
\paragraph{Module settings} are stored in the \texttt{\_\_init\_\_} module.

View File

@ -33,12 +33,15 @@ Equilibrium\furl{http://www.geogames-team.org/?p=148} & $\approx40$ & GPX with m
\label{tab:logs3}
\end{longtable}
The following section \autoref{sec:neocart} describes the intergration efforts for Neocartographer.
\section{Integration of Neocartographer}\label{sec:neocart}
\subsection{Data basis}
\subsection{Neocartographer Game Log Files}
The log files are grouped by folders and contain the GPX tracks and media, mainly photos.%TODO
Many Neocartographer GPX files have invalid XML markup, as \autoref{tab:xml} show.
\begin{longtable}[H]{rl}
@ -48,9 +51,39 @@ missing attribute space & <desc><event message="leaveObject"geoid="9"/></desc>\\
unclosed tag & <desc><event </desc>\\
missing attribute name & <trkpt lat="48.3689110.897709">\\
invalid attribute values & <trkpt lat="UNKNOWN" lon="UNKNOWN">\\
\caption{Neocartographer GPX log errors}
\caption{Neocartographer GPX log error types}
\label{tab:xml}
\end{longtable}
The first two error types (missing separation between two attributes and unclosed tags) are syntactic XML errors.
With the lxml\furl{http://lxml.de/} revocery parser\footnote{\texttt{lxml.etree.XMLParser(recover=True)}} the unclosed tag error is suppressed without further data loss\footnote{With an empty event tag, the data is obviously still missing}.
\section{conclusion}
In the missing attribute separation case, the recovery parser parses only the first attribute properly.
Any additional attributes are stored in the \texttt{tail} field of the XML element's object as raw string.
With string manipulation, the \texttt{geoid} attribute can be restored\footnote{In the data probe, this error occured only with the \texttt{geoid} attribute}.
The other two errors lead to data corruption, as both cases fail to qualify to valid latitude/longitude pairs.
With the assumption of a two-digit longitude\footnote{The names and other valid longitudes suggest the location of the gamefield in the eastern part of bavaria}, the correct value can be restored through string parsing from the offset of the second decimal separator.%TODO
Good practice requires the parser to issue a loud warning to indicate possible errors here.
The last error type occurs with nearly all first and second entries.
They contain the players' \emph{join} and \emph{start} events, when there is no position fix available, yet.
Currently these log entries are discared with an accompanying log message.
A possible improvement would be the to keep a reference to these entries, and add the first appearing valid location entry.
\subsection{Log Retrieval}
As there is only a playtime server, the files are stored on the filesystem of the server.
Therefore, an Nginx HTTP server was configured to serve folder indices formatted as JSON (see \autoref{sec:ggt-server}).
This allows the retrieval of the log files in a clean manner by the frameworks loaders.
An additional client implenetation in the framework (see \autoref{sec:source}) converts the JSON index to the structure used internally and uses the given functionality to handle file downloads.
\subsection{Analysis Functionality}
Using the \texttt{LocationAnalyzer} in combination with a \texttt{KMLRender} renderer, the analysis of log files was successfull on the first run.
\section{Conclusion}
While the implementation of a new client to download log files was straightforward, the parsing of these files prooved quite difficult.
However, it was not the integration into the framework but the syntactical errors in the log files that was hard.
While the BioDiv2Go parser requires less than 20 lines of code, the newly written parser scratches the 60 line mark with all the error handling code (see \autoref{code:bd2l} and \ref{code:ncl}).
Once this obstacle is passed, the integration is nearly seamless.
%TODO: webclient

View File

@ -21,6 +21,10 @@
\subsection{Geogame Log Analysis project setup}\label{app:dcs}
\lstinputlisting[language=yaml,caption={Docker-compose file for Geogame Log Analysis project},label=code:gglap,numbers=left]{code/project.yml}
\section{Loader implementations}
\lstinputlisting[language=python,caption={Log loader for BioDiv2Go},label=code:bd2l,numbers=left]{code/biogames.py}
\lstinputlisting[language=python,caption={Log loader for Neocartographer},label=code:ncl,numbers=left]{code/neocart.py}
\section{TODO}
\subsection{Examples} %TODO ?!?!
Configuration \& results