final rc

p3: contents ready
merge both previous presentations
2018-08-06 10:20:27 +02:00 · 2018-07-26 16:34:43 +02:00 · 2018-07-24 21:25:45 +02:00 · 2018-06-13 20:24:19 +02:00 · 2018-06-13 15:30:26 +02:00 · 2018-06-13 14:13:35 +02:00
31 changed files with 354 additions and 254 deletions
--- a/PresTeX/config/metainfo.tex
+++ b/PresTeX/config/metainfo.tex
@ -2,10 +2,10 @@
 %Necessary Information
 \author{Clemens Klug}
 \title{A Framework for the Analysis of Spatial Game Data}
-\subtitle{Ein Analyseframework f\"ur raumbezogene Spieldaten\\2. Vortrag}
+\subtitle{Ein Analyseframework f\"ur raumbezogene Spieldaten\\3. Vortrag}
 %The day of the presentation
 %\date{\today}
-\date{May 24, 2018}
+\date{7. August 2018}

 %Optional Information
 \subject{A Framework for the Analysis of Spatial Game Data}
--- a/PresTeX/images/demo.png
+++ b/PresTeX/images/demo.png
--- a/PresTeX/images/eval-changes.png
+++ b/PresTeX/images/eval-changes.png
--- a/PresTeX/images/eval.jpg
+++ b/PresTeX/images/eval.jpg
--- a/PresTeX/images/oeb-raw.png
+++ b/PresTeX/images/oeb-raw.png
--- a/PresTeX/images/oeb-simplified.png
+++ b/PresTeX/images/oeb-simplified.png
--- a/PresTeX/presentation.tex
+++ b/PresTeX/presentation.tex
@ -100,78 +100,98 @@ compress
 %%%%%%%%%% Content starts here %%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-\section{Solution approach}
-  \frame
-  {
-    \tableofcontents[sectionstyle=show/hide,hideothersubsections]
-  }
-\subsection{Requirements}
-\begin{frame}{Requirements}
-\framesubtitle{Wait, what did I want to do again?}
+\section{A Framework for the Analysis of Spatial Game Data}
+\begin{frame}{A Framework for the Analysis of Spatial Game Data}
+\framesubtitle{Goal definition}
+	\begin{columns}
+	\column{0.49\linewidth}
+		Geogames: Intersection of GIS and gaming technology\footnotemark
+		\begin{itemize}
+			\item Game actions tied to real-world spatial places ('Break the magic circle')
+			\item Locomotion as essential game part
+			\item Trade-offs between board and race style games\footnotemark
+			\item Foster recognition of environment
+			\item Focus through game related tasks
+		\end{itemize}
+	\column{.49\linewidth}
+		Scope of the analysis framework
+		\begin{itemize}
+			\item Framework for analysis
+			\item Two target groups:
+				\begin{itemize}
+				\item Expert users/researchers
+				\item Staging/designing staff
+				\end{itemize}
+			\item Integration of external data (questionnaire results)
+			\item Adaptable for multiple games
+		\end{itemize}
+	\end{columns}
+	\addtocounter{footnote}{-1}
+	\footcitetext{Ahlqvist2018}\stepcounter{footnote}
+	\footcitetext{1705427}
+\end{frame}
+
+\begin{frame}{Framework components}
+Prerequisites: Game log
+\begin{itemize}
+	\item Creation
+	\item Storage
+\end{itemize}
+New components
 \begin{itemize}
 	\item Per-game importer (Web client, File loader, …)
 	\item Analyzer modules (number crunching)
 	\item Output \& Visualization (CSV, [Geo]JSON, KML, Graphs, …)
 	\item Interface (Configuration)
-		\begin{itemize}
-		\item Expert users/researchers
-		\item Staging/designing staff
-		\end{itemize}
 	\item Cross-game comparisons
-	\item Integration of external data (questionnaire results)
 \end{itemize}
 \end{frame}

-\subsection{Architecture}
-%\begin{frame}{Experiment: Kibana}
-%\image{.85\textwidth}{kibana}{Game trace in Kibana}{img:kibana}
-%\end{frame}
-\begin{frame}{Experiment: Kibana}
-\image{.85\textwidth}{kibana2}{Game trace in Kibana}{img:kibana2}
-\end{frame}
-\begin{frame}{Experiment: Grafana}\hspace{.3\textwidth}a\\[-24pt]
-\image{.85\textwidth}{grafana}{Side project: Weather station with Grafana}{img:grafana}
-\end{frame}
+\section{Modular map-reduce architecture}

-\begin{frame}{Architecture scheme}
+\begin{frame}{Architecture}
+
+Log processing turn-key solutions
+\begin{itemize}
+	\item Fast update cycles
+	\item Low spatial resolution
+	\item Query languages as entry barrier
+\end{itemize}
+
+Custom stack solution
+\begin{itemize}
+	\item Based on map-reduce
+	\item Map: Analysis
+	\begin{itemize}
+		\item Iterate Log entries
+		\item Feed log entry through analyzer queue
+		\begin{itemize}
+			\item Augment entries
+			\item Filter entries
+			\item Sequential order
+		\end{itemize}
+	\end{itemize}
+	\item Reduce: Collect summaries from analyzers
+	\begin{itemize}
+		\item Rendering
+		\item Post-processing, Comparison, …
+	\end{itemize}
+\end{itemize}
+\end{frame}
+\begin{frame}{Log processing scheme}
 \image{\textwidth}{../../ThesTeX/images/map-reduce.pdf}{Data flows}{img:flow}
 \end{frame}

-\section{Implementation}
+%%%%%%%%%%%%%%%%%55
+% PRESENTATION 2
+%%%%%%%%%%%%%%%%%%%%

-\subsection{Technologies}
-\begin{frame}{Implementation}
-	\begin{columns}
-	\column{0.55\linewidth}
-		Analysis
-		\begin{itemize}
-			\item Python (3.6)
-			\item Standalone library/CLI tool
-			\item Web based configuration/Runner/API (Flask, Celery)
-		\end{itemize}
-		Rendering
-		\begin{itemize}
-			\item Matplotlib, Numpy
-			\begin{itemize}
-				\item Graphs
-			\end{itemize}
-			\item Javascript
-			\begin{itemize}
-				\item Leaflet
-				\item Web visualization: Maps, Tracks, …
-			\end{itemize}
-		\end{itemize}
-		\pic{.5\textwidth}{matplotlib}
-	\column{0.4\linewidth}
-		\pic{.5\textwidth}{python}
-		\pic{.4\textwidth}{flask}
-		\pic{.4\textwidth}{leaflet}
-	\end{columns}
-\end{frame}

-\subsection{Microservice Composition}
+\section{Microservice oriented implementation}
+
+\subsection{Service Composition}
 \begin{frame}{Microservice Composition}
-\framesubtitle{Docker everything!}
+\framesubtitle{Dockerize everything!}
 	\begin{columns}
 	\column{0.45\linewidth}
 		\begin{itemize}
@ -187,36 +207,31 @@ compress
 	\end{columns}
 \end{frame}

-\subsection{Classes}
+\subsection{Results}

-\begin{frame}{Analysis}
-\twofigures{0.5}{code/analyzer}{Analyzer interface}{code:anif}{code/mask-spatials}{Sample analyzer}{code:mask}{Analyzer stub and implementation}{fig:pyan}
+\begin{frame}{ActivityMapper}
+\image{.7\textwidth}{track-fi}{Combined screen activity and spatial progress}{img:trackfi}
+\end{frame}
+\begin{frame}{Track length evaluation}
+	\begin{columns}
+	\column{0.49\linewidth}
+		\image{\textwidth}{oeb-raw}{Raw track lengths}{img:oeb-raw}
+	\column{0.49\linewidth}
+		\image{\textwidth}{oeb-simplified}{Simplified track lengths}{img:oeb-simpe}
+	\end{columns}
 \end{frame}

-\begin{frame}{Result}
-\image{\textwidth}{code/result}{Result class}{code:result}
-\end{frame}
-
-\begin{frame}{Render}
-\twofigures{0.5}{code/render}{Render interface}{code:reif}{code/render-board}{Sample render}{code:rebo}{Render stub and implementation}{fig:pyre}
-\end{frame}
-
-\subsection{Configuration}
-\begin{frame}{Configuration \& result}
-\twofigures{0.5}{oeb-kml}{Analyzer configuration}{img:oebkml}{oeb-ge}{Result visualized}{img:oebge}{Example: Generate KML tracks (BioDiv2Go; Oberelsbach2016)}{fig:oeb2016}
-\end{frame}
-
-\section{Outlook: Evaluation}
-%TODO
-
+\section{Evaluation}
+\subsection{Setup}

 \begin{frame}{Evaluation}
-\begin{itemize}
-	\item Analyse other geogames
-	\item Describe effort
-	\item ?
-	\item Profit
-\end{itemize}
+%\begin{itemize}
+%	\item Analyse other geogames
+%	\item Describe effort
+%	\item ?
+%	\item Profit
+%\end{itemize}
+\image{\textwidth}{eval}{Evaluation setup}{img:evalplan}
 \end{frame}

 \begin{frame}{Evaluation}
@ -227,7 +242,7 @@ Geogame & Log files & Notes \\
 BioDiv2Go & $\approx430$ & SQLite database with JSON log entries, references to game config; import base case\\
 GeoTicTacToe & $\approx13$ & CSV with pipes; no temporal data; events + tracks\\
 \caption{Geogame client log data}
-\label{tab:logs}
+\label{tab:logscli}
 \end{longtable}

 Servers
@ -240,10 +255,47 @@ Neocartographer & $\approx400$ & Partly broken GPX: missing description informat
 MissingLink & $\approx6$ & Partly broken GPX: missing spatial information; one GPX file per player\\
 Equilibrium & $\approx40$ & GPX with missing end tag\\
 \caption{Geogame servers log data}
-\label{tab:logs}
+\label{tab:logssrv}
 \end{longtable}
 \end{frame}

+\subsection{Results}
+\begin{frame}{Integration of Neocartographer}
+Challenges
+\begin{itemize}
+	\item Corrupted XML files
+	\item No game server with API for log retrieval
+\end{itemize}
+
+	\begin{longtable}[H]{rl}
+	Geogame & Log files \\
+	\hline
+	missing attribute space & <desc><event message="leaveObject"geoid="9"/></desc>\\
+	unclosed tag & <desc><event </desc>\\
+	missing attribute name & <trkpt lat="48.3689110.897709">\\
+	invalid attribute values & <trkpt lat="UNKNOWN" lon="UNKNOWN">\\
+	\caption{Neocartographer GPX log error types}
+	\label{tab:xml}
+	\end{longtable}
+Solutions
+\begin{itemize}
+	\item Recovery parser \& custom cleanup (new dependency: lxml)
+	\item Additional log server (Nginx with JSON autoindex)
+\end{itemize}
+\end{frame}
+
+
+\begin{frame}{Evaluation results}
+\image{\textwidth}{eval-changes}{Code changes necessary for the integration of another game}{img:eval}
+\end{frame}
+
+\section{Demotime}
+\begin{frame}{It's time for...}
+\huge{...a demo!}
+\vspace{2cm}
+\pic{.9\textwidth}{demo}
+\end{frame}
+

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%% References          %%%%%%%%%%
@ -260,38 +312,6 @@ Equilibrium & $\approx40$ & GPX with missing end tag\\
 \appendix
 \backupbegin

-\begin{frame}{Architecture}
-\begin{itemize}
-	\item Based on map-reduce
-	\item Map: Analysis
-	\begin{itemize}
-		\item Iterate Log entries
-		\item Feed log entry through analyzer queue
-		\begin{itemize}
-			\item Augment entries
-			\item Filter entries
-			\item Sequential order
-		\end{itemize}
-	\end{itemize}
-	\item Reduce: Collect summaries from analyzers
-	\begin{itemize}
-		\item Post-processing, Comparison, …
-		\item Rendering
-	\end{itemize}
-\end{itemize}
-\end{frame}

-\begin{frame}{Graphs}
-	\begin{columns}
-	\column{0.45\linewidth}
-		\image{\textwidth}{simu-retries}{Experimentational rounds}{img:retries}
-	\column{0.45\linewidth}
-		\image{\textwidth}{speed}{Speed distribution}{img:speed}
-	\end{columns}
-\end{frame}
-\begin{frame}{Graphs}
-\image{.9\textwidth}{time-rel}{Time distribution}{img:time}
-\end{frame}
-\backupend

 \end{document}
--- a/ThesTeX/Makefile
+++ b/ThesTeX/Makefile
@ -30,7 +30,7 @@ initialize:


 DOTTYPE := pdf
-LANG := de
+LANG := en
 F :=
 DOT_WILDCARD := images/*.dot
 .PHONY: fast latex bibtex dot spell spell1 todo
--- a/ThesTeX/config/metainfo.tex
+++ b/ThesTeX/config/metainfo.tex
@ -4,10 +4,10 @@
 \subtitle{A Framework for the Analysis of Spatial Game Data}
 \newcommand\degree{Master}
 \newcommand\studycourse{Angewandte Informatik}
-\newcommand\advisor{Christoph Schlieder}
+\newcommand\advisor{Prof. Dr. Christoph Schlieder}
 \newcommand\location{Bamberg}
 \subject{\degree arbeit im Studiengang \studycourse\ der Fakultät Wirtschaftsinformatik und Angewandte Informatik der Otto-Friedrich-Universität Bamberg}
-\date{19.06.2018?} %TODO
+\date{14.06.2018}

-\gittrue
+\gitfalse
 \thesistrue
--- a/ThesTeX/content/0-introduction.tex
+++ b/ThesTeX/content/0-introduction.tex
@ -1,35 +1,22 @@
+In this thesis, a framework for the analysis of spatial game data is developed.
+This game data is collected during the game sessions and stored in log files.
+The following chapters describe the basics of the development process.

-\section{Location based Games: Put the 'fun' in education}
-Location based Games are at the intersection of GIS and gaming technology \cite{Ahlqvist2018}.
+\section{Location based Games: Put the `fun' in education}
+
+Spatial games, also known as location based games, are at the intersection of GIS and gaming technology \cite{Ahlqvist2018}.
 With game actions tied to real-world spatial places, this genre breaks the magic circle of games: they are embedded into the environment and the boundary between game and non-game are vanishing \cite{montola2009games}.
 As they feature locomotion as an essential game part, a focus on certain aspects of the environment can be achieved by game related tasks.
 These tasks can include educational aspects or reward special behaviour through ingame benefits as mean of gamification.
 A playable game with good balance and a lasting impact needs to consider the trade-off between board and race style games \cite{1705427,kremer2013spatial}.
 Board style games are dominated by strategic planning with less temporal restrictions, while race styled games favour the physical capabilities of the fastest players.

-Popular examples of mobile geogames are Ingress\furl{https://www.ingress.com/} and the more recent Pokemon Go\furl{https://www.pokemongo.com/}.
-These worldwide playable games barely embed the surroundings into the game except for the base map and some landmark-derived attributes \footnote{Pokemon Go aligns the land types with the possible types of Pokemons available}.
+Popular examples of mobile geogames are Ingress\furl{https://www.ingress.com/} and the more recent Pokemon Go\furl{https://www.pokemongo.com/}\!.
+These worldwide playable games barely embed the surroundings into the game except for the base map and some landmark-derived attributes\footnote{Pokemon Go aligns the land types with the possible types of Pokemon's available}\!.
 With a fine tuned setup of educational content, game elements and integration of locomotion on the other hand, location based games (also known as geogames) foster recognition of the environment.

-\autoref{img:gg2} shows the map overview of such a game: FindeVielfalt Simulation\furl{https://biodivlb.jimdo.com/english-1/project-finde-vielfalt/finde-vielfalt-simulation/}.
+\autoref{img:gg2} shows the map overview of such a game: FindeVielfalt Simulation\furl{https://biodivlb.jimdo.com/english-1/project-finde-vielfalt/finde-vielfalt-simulation/}\!.
 Located in an orchard, the blue dots are caches tied to game actions.
 To proceed in the games narrative story, the caches are to be completed.
-The players have to complete a task with context of the caches' location.
+The players have to complete a task within the context of the caches' location.
 \image{.5\textwidth}{../../PresTeX/images/gg2}{Geogame map view}{img:gg2}
-
-\section{Research with location based games}\label{sec:gg-res}
-Usually, when the effectiveness of location based educational games is to be measured, the following pattern is applied:
-
-After a mission statement has been defined and approved, a fitting statistical framework has to be developed.
-Based on such a framework, questionnaires have to be derived.
-As some metrics cannot be retrieved directly from the questionnaires answers, the statistical framework needs to considers these and consider measureable informations to derive the original metric from.
-The finished and for alignment with the mission statement approved questionnaires are then applied at field test with users from the target groups.
-Each field test consists of an upstream questionnaire, a pass of the location based game and a final round of questionnaires.
-After an data entry step for paper-based questionnaires, the raw results are fed into the statistical framework implemented in a statistical processing software to retrieve the final results.
-
-\cite{Schaal2017} describes this development in the context of the BioDiv2Go project.
-\autoref{img:biodiv-schaal} shows the resulting statistical framework for the valuing of biodiversity as target variable of the location based geogame developed in the BioDiv2Go project.
-
-
-\image{\textwidth}{../../PresTeX/images/biodiv-schaal}{Statistical framework for BioDiv2Go\cite{Schaal2017}}{img:biodiv-schaal}
-
--- a/ThesTeX/content/1-scope.tex
+++ b/ThesTeX/content/1-scope.tex
@ -7,10 +7,13 @@ To be a true framework, a tight coupling to any specific game is out of bounds.
 For an holistic solution, the integration of external data (e.g. questionnaire results) allows the framework to compute an overall result.

 \subsection{Target groups}
-With researches extracting influences of games into the players' behaviour, the first target group of the analysis framework is clear.
+There are two target groups for this project: Researchers and staging staff.
+With researchers extracting influences of games into the players' behaviour, the first target group of the analysis framework is clear.
+They need a sufficient data basis for evaluations of broader scope.
 A second user group are the game staging and game designing staff.
-They require feedback about difficult sections in the game, hard to reach locations, or overcrowded situations to improve the game emjoyment of the players.
+They require feedback about difficult sections in the game, hard to reach locations, or overcrowded situations to improve the game enjoyment of the players.
 Additionally they need statistics to conclude the game session with a review of the game round.
+With an result for each player, these statistics allow individual feedback.

 With spatial trajectories of possibly individual players, some basic privacy protection needs to restrict the access to the log files to a minimal amount of people.
 Additionally, a user interface for the creation of predefined analysis settings with custom logs provides the stagers and designers a reasonable information basis.
@ -23,7 +26,7 @@ Not only the creation but also the storage of the game logs is not to be conside
 As long as an interface to access the logs is provided, an access client belongs to the scope of the framework.

 \subsection{New components}
-The framework can be defined by the follwing components:
+The framework can be defined by the following components:
 \begin{itemize}
 	\item Per-game importer (Web client, File loader, …) to load log files for analysis
 	\item Analyzer modules to perform the number crunching and extraction of information
--- a/ThesTeX/content/2.1-text.tex
+++ b/ThesTeX/content/2.1-text.tex
@ -1,7 +1,24 @@
-With an administrative background, the first approach to log processing which comes to mind are the various log processing frameworks.
-\autoref{sec:logproctheo} shows the current state of tools and processes for managing large volumes of log and time series data.
+In \autoref{sec:gg-res} example the involvement of location based games in the research field is reviewed. 
+Covering the basic data aggregation, \autoref{sec:logproctheo} shows the current state of tools and processes for managing large volumes of log and time series data.
 An overview of the field of pedestrian track analysis is located in \autoref{sec:pedest}.
-Finally, in \autoref{sec:gametheo} the connection of spatial anaylses and digital game optimizations is showcased.
+Finally, in \autoref{sec:gametheo} the connection of spatial analysis and digital game optimizations is showcased.
+
+\section{Research with location based games}\label{sec:gg-res}
+\cite{Schaal2017} describes the evaluation of a location based game.
+To measure the effectiveness of the game, the following pattern is applied:
+
+After a mission statement has been defined and approved, a fitting statistical framework has to be developed.
+Based on such a framework, questionnaires have to be derived.
+As some metrics cannot be retrieved directly from the questionnaires answers, the statistical framework needs to considers these and consider measurable information to derive the original metric from.
+The finished and for alignment with the mission statement approved questionnaires are then applied at field test with users from the target groups.
+Each field test consists of an upstream questionnaire, a pass of the location based game and a final round of questionnaires.
+After an data entry step for paper-based questionnaires, the raw results are fed into the statistical framework implemented in a statistical processing software to retrieve the final results.
+
+\autoref{img:biodiv-schaal} shows the resulting statistical framework for the valuing of biodiversity as target variable of the location based geogame developed in the BioDiv2Go project.
+
+
+\image{\textwidth}{../../PresTeX/images/biodiv-schaal}{Statistical framework for BioDiv2Go\cite{Schaal2017}}{img:biodiv-schaal}
+

 \section{Log processing}\label{sec:logproctheo}
 System administrators and developers face a daily surge of log files from applications, systems, and servers.
@ -16,7 +33,7 @@ A clear classification is not always possible, as some modules integrate virtual
 \begin{longtable}[H]{cp{0.2\textwidth}p{0.2\textwidth}}
 Collection & Database & Frontend\\
 \hline
-Logstash\furl{https://www.elastic.co/de/products/logstash} & Elatisc Search\furl{https://www.elastic.co/de/products/elasticsearch} & Kibana\furl{https://www.elastic.co/de/products/kibana}\\
+Logstash\furl{https://www.elastic.co/de/products/logstash} & Elastic Search\furl{https://www.elastic.co/de/products/elasticsearch} & Kibana\furl{https://www.elastic.co/de/products/kibana}\\
 Collectd\furl{https://collectd.org/} & Influx DB\furl{https://www.influxdata.com/} & Grafana\furl{https://grafana.com}\\
 Icinga\furl{https://www.icinga.com/products/icinga-2/} & Whisper\furl{https://github.com/graphite-project/whisper} & Graphite\furl{https://graphiteapp.org/}\\
 StatsD\furl{https://github.com/etsy/statsd} & Prometheus\furl{https://prometheus.io/} & \\
@ -30,7 +47,7 @@ StatsD\furl{https://github.com/etsy/statsd} & Prometheus\furl{https://prometheus
 Nearly all services designed for log collection offer multiple interfaces for submitting log data.
 By way of illustration, Logstash features a long list of input plugins from streaming files over an HTTP API to proprietary vendor sources like Amazon Web Services (AWS)\furl{https://www.elastic.co/guide/en/logstash/current/input-plugins.html}. \nomenclature{\m{A}mazon \m{W}eb \m{S}ervices}{AWS} \nomenclature{\m{A}pplication \m{P}rogramming \m{I}nterface}{API}\nomenclature{\m{H}yper\m{t}ext \m{T}ransport \m{P}rotocol}{HTTP}

-Aside from aggreation, the topic of log creation is covered from host-based monitoring solutions like Icinga to application centric approaches with e.g. StatsD embedded in the application source code\furl{https://thenewstack.io/collecting-metrics-using-statsd-a-standard-for-real-time-monitoring/}.
+Aside from aggregation, the topic of log creation is covered from host-based monitoring solutions like Icinga to application-centric approaches with e.g. StatsD embedded in the application source code\furl{https://thenewstack.io/collecting-metrics-using-statsd-a-standard-for-real-time-monitoring/}.

 \subsection{Databases}
 The key component for a log processing system is the storage.
@ -71,10 +88,10 @@ To eliminate such biases of one system, \cite{Li2015} describes the combination

 \subsection{Activity Mining}\label{sssec:act}
 GPS (or GNSS) tracks generally only contain the raw tempo-spatial data (possibly accompanied by metadata like accuracy, visible satellites, etc.).
-Any additional information needs either be logged seperately or needs to be derived from the track data itself.
+Any additional information needs either be logged separately or needs to be derived from the track data itself.
 This activity mining allows e.g. the determination of the modes of transport used while creating the track \cite{Gong_2014}.
 \cite{Gong_2015} shows the extraction of activity stop locations to identify locations where locomotion suspends for an activity in contrast to stops without activities.
-Informations of this kind are relevant e.g. for improvements for tourist management in popular destinations \cite{tourist_analysis2012,koshak2008analyzing,Modsching:2008:1098-3058:31}.
+Information of this kind are relevant e.g. for improvements for tourist management in popular destinations \cite{tourist_analysis2012,koshak2008analyzing,Modsching:2008:1098-3058:31}.

 Beside points of interest (POIs), individual behaviour patterns can be mined from tracks, as described in \cite{ren2015mining}.
 Post-processing of these patterns with machine learning enables predictions of future trajectories \cite{10.1007/978-3-642-23199-5_37}.
@ -88,7 +105,7 @@ One of the most basic visualization of large amounts of spatial data is the heat
 As the example in \autoref{img:strava} shows, it allows to identify areas with high densities of data points very quickly.
 This comes however with the loss of nearly all context information.
 For example, the temporal information - both the time slice and the relative order of the data points - is completely absent.
-A workaround is an external control element for such information to control the unerlying dataset.
+A workaround is an external control element for such information to control the underlying data set.

 \image{\textwidth}{../../PresTeX/images/strava}{Heatmap: Fitnesstracker \cite{strava}}{img:strava}

@ -106,8 +123,8 @@ However, such views are limited in the amount of tracks and attributes to displa
 One way to address the lack of temporal context is the space-time cube concept reviewed in \cite{kraak2003space}.
 By mapping an additional temporal axis as third dimension on a two-dimensional map, tracks can be rendered in a three-dimensional context.
 The example in \autoref{img:spacetime} shows how such a rendering allows to identify individual movement patterns and locations of activity in between.
-However, it also demonstrates the problems of the difficult interpretation of the 3D map, especially with overlappig tracks.
-Beside from overcrouded ares, many people have difficulties of miss-interpreting the 3D movements.
+However, it also demonstrates the problems of the difficult interpretation of the 3D map, especially with overlapping tracks.
+Beside from overcrowded ares, many people have difficulties of miss-interpreting the 3D movements.
 The space flattened alternative on the right tries to reduce this problem with a spatial abstraction.

 \image{\textwidth}{../../PresTeX/images/space-time}{Space-time cube examples \cite{bach2014review}}{img:spacetime}
@ -117,7 +134,7 @@ This highlights hotspots of activity over an temporal axis.
 \image{\textwidth}{space-time-density}{Space-time cube density examples \cite{demvsar2015analysis}}{img:spacetime2}

 \subsubsection{Trajectory patterns and generalizations}
-To simplify the visualization of large amounts of indiviual tracks, the derivation of patterns applying to the tracks allows to highlight key areas.
+To simplify the visualization of large amounts of individual tracks, the derivation of patterns applying to the tracks allows to highlight key areas.
 \autoref{img:traj-pattern} shows two examples of such patterns: Flock, where a group of tracks are aligned for some time, and meet, which defines an area of shared presence.
 It is possible to apply such pattern time aware or time agnostic, i.e. whether to take the simultaneous appearance into account. \cite{jeung2011trajectory}

@ -131,14 +148,14 @@ With an increasing generalization parameter, the flows refine to more abstract r


 \section{Analyzing games}\label{sec:gametheo}
-Modern video games with always-on copyprotection or online masterservers allow game studios to collect metrics about players' performances.
+Modern video games with always-on copy-protection or online master servers allow game studios to collect metrics about players' performances.
 In \cite{Drachen2013}, the authors describe the use of GIS technologies for such environments.
 For example, \autoref{img:chatlogs} shows a correlation between the frequency of certain keywords in the chat messages and the players' current location.
 This indicates a possible bug in the game to look out for.

 Not only technical problems, design errors or bad balancing can be visualized, too.
-\autoref{img:ac3death} uses a heatmap to highlight areas with high failure rates during playtesting.
-These failure hotspots points can then be addressed for a convienient game flow.
+\autoref{img:ac3death} uses a heatmap to highlight areas with high failure rates during play-testing.
+These failure hotspots points can then be addressed for a convenient game flow.

 \image{\textwidth}{../../PresTeX/images/game-an}{Chat logs with players location \cite{Drachen2013}}{img:chatlogs}
 \image{\textwidth}{../../PresTeX/images/ac3-death}{Identify critical sections \cite{Drachen2013}}{img:ac3death}
--- a/ThesTeX/content/3-solution.tex
+++ b/ThesTeX/content/3-solution.tex
@ -1,5 +1,4 @@
-With an administrative background, the first approach to log processing which comes to mind is the various log processing frameworks.
-The following chapter \autoref{sec:logproc} takes a dive into this world and evaluates the feasability of two such system for the scope of this thesis.
+The following chapter \autoref{sec:logproc} takes a dive into the world of log processing frameworks and evaluates the feasibility of two such system for the scope of this thesis.
 Based on the findings, an alternative approach is then outlined in \autoref{sec:alternative-design}.


@ -8,16 +7,16 @@ This chapter looks into the possibilities of existing log processing solutions.
 By example, Kibana with an Elastic Search backend and Grafana with an InfluxDB will be evaluated.

 \subsection{Evaluating Kibana}
-To evaluate whether Kibana is a viable approach for the given requirements, a test environmentwas built.
-This setup with Docker, definied with Docker-compose, is documented in \autoref{app:kibana}.
-Two sample datasets were loaded into the Elasticsearch container through HTTP POST requests. %: \texttt{curl -H 'Content-Type: application/x-ndjson' -XPOST 'elastic:9200/\_bulk?pretty' --data-binary @gamelog.json}.
+To evaluate whether Kibana is a viable approach for the given requirements, a test environment was built.
+This setup with Docker, defined with Docker-compose, is documented in \autoref{app:kibana}.
+Two sample data sets were loaded into the Elasticsearch container through HTTP POST requests. %: \texttt{curl -H 'Content-Type: application/x-ndjson' -XPOST 'elastic:9200/\_bulk?pretty' --data-binary @gamelog.json}.
 Once Kibana was told which fields hold the spatial information, it was possible to have a first visualization on the workbench.
 However, this view is optimized for the context of web log processing, so it has a rather low spatial resolution as shown in \autoref{img:kibana} and \autoref{img:kibana2}.
-Dealing mostly with unprecise locations from GeoIP lookups and in respect of the web users` privacy this choice avoids false conclusions\footnote{GeoIP database providers can not always return qualified resolutions, instead rely on default locations, leadling to bizare events like \url{https://splinternews.com/how-an-internet-mapping-glitch-turned-a-random-kansas-f-1793856052}} and enforces privacy-by-default.
+Dealing mostly with imprecise locations from GeoIP lookups and in respect of the web users` privacy this choice avoids false conclusions\footnote{GeoIP database providers can not always return qualified resolutions, instead rely on default locations, leading to bizarre events like \url{https://splinternews.com/how-an-internet-mapping-glitch-turned-a-random-kansas-f-1793856052}} and enforces privacy-by-default.

 As an additional restraint to application in the geogame context, the query language restricts the possible research questions the solution can resolve.
-This means only the questions expressable in the query language can be answered.
-Additionally, this requires the users to master the query language before any resonable conclusions can be extracted.
+This means only the questions expressible in the query language can be answered.
+Additionally, this requires the users to master the query language before any reasonable conclusions can be extracted.

 By building a custom plugin, extension, or modified version, it is possible to circumvent this obstacle.
 However, the fast-paced environment of the industry either requires a constant effort of keeping pace, or results in an outdated system rather quickly. (E.g. the next major release Kibana v6.0.0\footnote{\url{https://github.com/elastic/kibana/releases/tag/v6.0.0}} was released about a year after Kibana v5.0.0\footnote{\url{https://github.com/elastic/kibana/releases/tag/v5.0.0}}. However, the previous major version seems to receive updates for about an year, too.)
@ -40,34 +39,38 @@ This chapter once again instantiates the phrase "spatial is special" \cite{spati
 After all, the monitoring solutions are no perfect match for this special - spatial - use case.
 The privacy concerns vital in web monitoring prohibit detailed spatial analyzes, the query languages can restrict some questions, and custom extensions require constant integration effort.

-Regarding the specified use cases, expecially the non-expert users benefit from a simple to use interface.
-The default Kibana worchbench does not qualify for this, a custom interface could improve the situation.
+Regarding the specified use cases, especially the non-expert users benefit from a simple to use interface.
+The default Kibana workbench does not qualify for this, a custom interface could improve the situation.
 Grafana does have support for shared dashboards with a fixed set of data, however precise spatial support is still lacking.
 A third party plugin recently does provide such support\furl{https://github.com/CitiLogics/citilogics-geoloop-panel}, unfortunately it missed the timeframe during the evaluation of grafana for this thesis.
 Such a plugin would still be a possibly fragile component given the fast pace of web development shown by these kind of projects.

-\section{Architectural Design}\label{sec:alternative-design}
-\subsection{Overview}
+\section{Developing a modular architectural design}\label{sec:alternative-design}
 While the development of a custom stack requires a lot of infrastructural work to get the project running, the learnings above give points to build a custom solution as a feasible alternative:
 \begin{itemize}
-	\item Developing from buttom-up takes less time than diving into complex turn-key monitoring solutions.
+	\item Developing from bottom-up takes less time than diving into complex turn-key monitoring solutions.
 	\item With rather limited amounts of data\footnote{From a sample of 436 game logs from BioDiv2go, an average log file is 800 kB in size, with a median of 702 kB}, scalable solutions are no hard requirement
-	\item No core dependecies on fast-paced projects
+	\item No core dependencies on fast-paced projects
 	\item Interfaces tailored on requirements: Simple web interface for non-expert users, CLI and API for researchers with unrestricted possibilities.
 	\item A focus on key points allows simple, easily extendable interfaces and implementations.
 	\item Reducing the complexity to an overseeable level, the processes and results can be verified for accuracy and reliability.
 \end{itemize}
+With the requirements from \autoref{sec:require} and the learnings from log processing evaluations in mind, a modular processing pipeline depicted in \autoref{img:flowchart} allows for a configurable solution.
+It comprises the stages of input, analysis and rendering.
+With interfaces defined between the stages, this approach allows the exchange of single modules without affecting the remaining pipeline.
+\image{.75\textwidth}{flowchart.pdf}{Modular processing pipeline}{img:flowchart}

-With the requirements from \autoref{sec:require} and the learnings from log processing evaluations in mind, a first architectural approach is visualized in \autoref{img:solution}.
+\subsection{Overview}
+An architectural approach surrounding the processing pipeline is visualized in \autoref{img:solution}.
 It outlines three main components of the project: Two user facing services (Web \& CLI / API), and an analysis framework.
 The interfaces (Web and CLI/API) for both target groups (see \autoref{sec:require}) are completely dependent on the analysis framework at the core.
-\image{\textwidth}{solution.pdf}{Architecture approach}{img:solution}
+\image{.75\textwidth}{solution.pdf}{Architecture approach}{img:solution}

 The following sections describe each of those components.
 \subsection{Analysis Framework}

 The analysis framework takes game logs, processes their entries, collects results, and renders them to an output.
-With a Map-Reduce pattern as basic structure for the data flow, an ordered collection of analyzing, matching prostprocessing and render operations defines an analysis run.
+With a Map-Reduce pattern as basic structure for the data flow, an ordered collection of analyzing, matching postprocessing and render operations defines an analysis run.
 \autoref{img:flow} shows the data flows through the framework.
 Every processed log file has its own chain of analyzer instances.
 The log entries are fed sequentially into the analysis chain.
@ -138,5 +141,5 @@ Games without central server can provide a mocked server to supply logged data,
 By acting like any normal client, the framework can avoid obstacles like CORS/XSS prevention.


-The independence to user interfaces, mainly the web interface, allows scalability through load-balancing with mulitple API workers.
+The independence to user interfaces, mainly the web interface, allows scalability through load-balancing with multiple API workers.
 Expert users with special requirements can embed the framework in projects without pulling in large amounts of dependencies for user interfaces or games/game servers.
--- a/ThesTeX/content/4-implementation.tex
+++ b/ThesTeX/content/4-implementation.tex
@ -1,5 +1,5 @@
 Based on the findings in \autoref{sec:solution}, an implementation with Python was realized.
-The following sections describe the structure and service composition utilized to fullfill the requirements.
+The following sections describe the structure and service composition utilized to fulfill the requirements.

 \section{Code structure}
 There are four packages forming the Analysis Framework project:
@ -14,8 +14,8 @@ The analysis and clients packages are described in \autoref{sec:analysisframewor

 \subsection{Analysis Framework}\label{sec:analysisframework}
 The internal structure of the analysis package is shown in \autoref{img:pack-analysis}.
-Besides the subpackages for analysing work (analyzers: \autoref{sec:analysiswork}) and log parsing (loaders: \autoref{sec:loaders}), it contains helper functionalities and finally the Python module \texttt{log\_analyzer} as entrypoint for researches experimenting and outline of the intended workflow.
-\image{.7\textwidth}{packages-analysis}{anaylsis package overview}{img:pack-analysis}
+Besides the sub-packages for analysing work (analyzers: \autoref{sec:analysiswork}) and log parsing (loaders: \autoref{sec:loaders}), it contains helper functionalities and finally the Python module \texttt{log\_analyzer} as entry point for researches experimenting and outline of the intended workflow.
+\image{.7\textwidth}{packages-analysis}{analysis package overview}{img:pack-analysis}

 \subsubsection{Log parsing}\label{sec:loaders}
 Outlined in \autoref{img:pack-loader}, the parsing of log files into an internal structure happens here.
@ -41,10 +41,10 @@ This \texttt{Loader} deals with some seriously broken XML files.
 This is mainly a mapping to allow references to \texttt{Loader}s in the JSON files for configuration (see \autoref{sec:settings}).

 \subsubsection{Analysis Work package}\label{sec:analysiswork}
-\autoref{img:pack-analyzers} shows the subpackages of \texttt{anaylsis.analyzers}.
-There are subpackages for doing the actual analysis work, as well as for the postprocess and rendering step.
+\autoref{img:pack-analyzers} shows the sub-packages of \texttt{analysis.analyzers}.
+There are sub-packages for doing the actual analysis work, as well as for the postprocess and rendering step.
 Additional the \texttt{settings} module defines the LogSettings class.
-\image{.7\textwidth}{packages-analysis-analyzers}{anaylsis.analyzers package overview}{img:pack-analyzers}
+\image{.7\textwidth}{packages-analysis-analyzers}{analysis.analyzers package overview}{img:pack-analyzers}

 \paragraph{LogSettings}\label{sec:settings}
 This class holds the configuration for an analysis run:
@ -69,25 +69,30 @@ This class holds the configuration for an analysis run:
 The settings are stored as JSON files, and parsed by runtime into a \texttt{LogSetting} object (see \autoref{img:oebkml} for a sample JSON settings file).
 The helper functions in \texttt{analysis.util} provide a very basic implementation of an query language for Python dictionaries:
 A dot-separated string defines the path to take through the dictionary, providing basically syntactic sugar to avoid lines like \texttt{entry["instance"]["config"]["@id"]}.
-As this prooves quite difficult to configure using JSON, the path-string \texttt{"instance.config.@id"} is much more deserialization friendly.
+As this proves quite difficult to configure using JSON, the path-string \texttt{"instance.config.@id"} is much more deserialization friendly.

 \paragraph{The Analyzer package} defines the work classes to extract information from log entries.
 The packages' init-module defines the Result and ResultStore classes, as well as the abstract base class for the Analyzers.

-As shown in \autoref{code:anaylzer}, this base class provides the basic mechanics to access the settings.
+As shown in \autoref{code:analyzer}, this base class provides the basic mechanics to access the settings.
 The core feature of this project is condensed in the method stub \texttt{process}.
 It is fed with an parsed entry from \autoref{sec:loaders}, processes it, possibly updates the internal state of the class, and the can decide to end the processing of the particular log entry or continue to feed down into the remainder of the analysis chain.

 When all log entries of a log file are processed, the \texttt{result} method returns the findings of this analysis instance (see \autoref{par:result}).

-\lstinputlisting[language=python,caption={Analyzer base class},label=code:anaylzer]{code/analyzer.py}
+\lstinputlisting[language=python,caption={Analyzer base class},label=code:analyzer]{code/analyzer.py}

-There are 23 classes implementing analysis functionality, splitted into modules for generic use, Biodiv2go analysis, and filtering purposes.
+There are 23 classes implementing analysis functionality, partitioned into modules for generic use, Biodiv2go analysis, and filtering purposes.
+
+The settings provided by the base class include access to the client connecting to the game's server.
+This allows the Analyzers to fetch additional data like game configurations or media files.
+The ActivityMapper analyzer already makes use of that.
+In a similar fashion it is possible to load other external data like questionnaire spreadsheets.

 \paragraph{Results}\label{par:result} are stored in a \texttt{Result} object (\texttt{analysis.analyzers.analyzer.\_\_init\_\_}).
-This class keeps track of the origin of the resulting data to allow filtering for results by arbitrary analzing classes.
+This class keeps track of the origin of the resulting data to allow filtering for results by arbitrary analyzing classes.

-As \autoref{code:anaylzer} shows, the \texttt{Result}s are stored in a \texttt{ResultStore}.
+As \autoref{code:analyzer} shows, the \texttt{Result}s are stored in a \texttt{ResultStore}.
 This store - defined next to the \texttt{Result} class - provides means to structure the results by arbitrary measures.
 By passing the store's reference into the analyzers, any analyzer can introduce categorization measures.
 This allows for example to distinguish several log files by name, or to combine log files and merge the results by events happening during the games' progress.
@ -95,12 +100,12 @@ With an default of an dictionary of lists, the API supports a callable factory f

 \paragraph{Rendering of the Results} is done in the \texttt{render} package.
 Similar to the Analyzers' package, the render package defines its common base class in the initialization module, as shown in \autoref{code:render}.
-It provides implementors means to filter the result set to relevant analysis types through the \texttt{filter} methods.
+It provides implementer means to filter the result set to relevant analysis types through the \texttt{filter} methods.
 Of course, the implementation of the rendering method is left open.

 \lstinputlisting[language=python,caption={Render base class},label=code:render]{code/render.py}

-There are 18 implementations, again splitted into generic and game-specific ones.
+There are 18 implementations, again split ted into generic and game-specific ones.

 The most generic renderers just dump the results into JSON files or echo them to the console.
 A more advanced implementation relies on the \texttt{LocationAnalyzer} and creates a KML file with a track animation (example: \autoref{img:oebge}).
@ -115,12 +120,12 @@ The two implementing classes are designed for Biodiv2go and a Geogames-Team log
 Using a REST API, the \texttt{Biogames} client integrates seamlessly into the authentication and authorization of the game server.
 The client acts as proxy for users to avoid issues with cross-origin scripting (XSS) or resource Sharing (CORS).

-The Geogames-Team's geogames like Neocartographer wirte game logs to files and only have a server running during the active game.
+The Geogames-Team's geogames like Neocartographer write game logs to files and only have a server running during the active game.
 Therefore, an additional log providing server was created to allow access to the log files (see also: \autoref{sec:ggt-server}).

 Clients can have arbitrary amounts of options, as all fields in the JSON settings file are passed through (see \autoref{img:oebkml}, section "source").

-\subsection{Web Interface}\label{sec:web}
+\subsection{Web Interface for prepared results}\label{sec:web}
 The selector package holds a Flask\furl{http://flask.pocoo.org/} app for an web interface for non-expert users.
 It utilizes the provided clients (see \autoref{sec:source}) for authentication, and gives users the following options:
 \begin{itemize}
@ -138,6 +143,25 @@ When problems occur, the status page informs the user, too.

 As Flask does not recommend serving static files trough itself, a Nginx HTTP server\furl{https://www.nginx.com/} is configured to serve the result files.

+\subsubsection{User workflow}
+The index page of the web UI features a login form.
+It offers a selection for the different configured game backends (see \autoref{img:webindex}).
+
+While a failed login stays at the index, a successful attempt redirects the user to the result overview (see \autoref{img:webresults}).
+Here, the both the results of completed analysis runs and the status of scheduled and running jobs are visible.
+For finished runs, there are links to the result artifacts.
+
+The link \emph{create new analysis} leads to the configuration menu for new analysis runs (see \autoref{img:webcreate}).
+It lists the game logs available for the logged in user, and offers a selection of the predefined analysis configurations.
+With a given name, it is easy to identify the results for each analysis run in the result overview page.
+
+\subsection{Result interface}
+Accompanying the Web interface above is the result interface.
+Here, results of the analysis runs issued in the Web interface are displayed to the users.
+\autoref{img:trackfi} shows a result by example: The combination of spatial positions of players and the screen activity.
+
+\image{\textwidth}{../../PresTeX/images/track-fi}{ActivityMapper: Combined screen activity and spatial progress}{img:trackfi}
+
 \subsection{Task definition}\label{sec:tasks} in the \texttt{package} provides tasks available for execution.
 This package is the interface for celery\furl{http://www.celeryproject.org/} workers and issuers.
 The key point is the task \texttt{analyze} to start new analysis runs.
@ -146,7 +170,7 @@ A free worker node claims the task and executes it.
 During the runtime, status updates are stored in the Redis Db to inform the issuer about progress, failures and results artifacts.


-\section{Service \& Service composition}
+\section{Services \& Service composition}

 Following the implementation above, the following services are necessary:
 \begin{itemize}
@ -159,28 +183,28 @@ Following the implementation above, the following services are necessary:
 Two additional services were used, one for a local BioDiv2Go server, one as log provider for the Neocartographer logs.

 The services are managed using Docker\furl{https://www.docker.com/}.
-This provides a clear ground for development as well as a easily integratable solution.
+This provides a clear ground for development as well as a easily integrable solution.
 Although docker as technology may be a current hype, the build scripts in human readable format provide documentation about dependencies and installation steps if necessary.

 \subsection{Background worker: Celery}\label{sec:srv-celery}
 The Celery worker process provides the tasks defined in \autoref{sec:tasks}.
 Therefore, it requires all the analysis tools, access to the game log data, and access to a storage location to store results.
 Additionally, a connection to the Redis DB for the job queue is required.
-Access to redis and to game log providers is granted via a docker network, a storage is mounted with a writable docker volume.
+Access to Redis and to game log providers is granted via a docker network, a storage is mounted with a writable docker volume.

 \subsection{User interface: Flask}
 The user interface needs to be available to the public, and needs to be attached to the Redis DB to append analysis jobs to the job queue.
 In order to use the celery API, it too has to include the whole analysis project.

 Therefore it is appropriate to use a single docker image for both the celery and the flask container.
-Although it would be possible to use seperate images without much overhead in disk space\footnote{
+Although it would be possible to use separate images without much overhead in disk space\footnote{
 	Docker saves each step defined in the Dockerfile as layer.
 	Using such a layer as basis for another image allows to ship additions with only the difference layer.
 	Unfortunately, each additional layer consumes more space, and optimizations like removal of build-time requirements may lead to increased runtime overhead when building then images.
 	},
-this reuse with less dependecies helps to keep development on track.
+this reuse with less dependencies helps to keep development on track.
 The image itself is rather straightforward.
-With an Alpine Linux\furl{https://alpinelinux.org/} image as basis, build-time and runtime dependecies are installed with alpine's packet management system.
+With an Alpine Linux\furl{https://alpinelinux.org/} image as basis, build-time and runtime dependencies are installed with Alpine's packet management system.
 Then the Python libraries are installed using pip, and the build-time requirements are cleared.
 To reduce the size of the image, once these steps are working they are combined into a single layer.

@ -200,22 +224,28 @@ Running in the docker network, the only configuration is the volume for persisti

 \subsection{Geogame Log file provider}\label{sec:ggt-server}
 To provide an HTTP interface for geogames without a permanent game server, this service does not need to be public.
-With an already integrated HTTP server running nginx, it is obvious to reuse this image, too.
+With an already integrated HTTP server running Nginx, it is obvious to reuse this image, too.

 This service, however does need a little configuration:
-To avoid parsing HTML index sites or generating metadata indices, the autoindex feature of nginx is used.
-With the format option\furl{http://nginx.org/en/docs/http/ngx_http_autoindex_module.html\#autoindex_format}, this delievers JSON data instead of HTML, leading to a much more pleasant client.
+To avoid parsing HTML index sites or generating metadata indices, the autoindex feature of Nginx is used.
+With the format option\furl{http://nginx.org/en/docs/http/ngx_http_autoindex_module.html\#autoindex_format}, this delivers JSON data instead of HTML, leading to a much more pleasant client.

 \subsection{BioDiv2Go Server}
 To integrate nicely into the project and the development machines used during this thesis, the BioDiv2Go server was packaged into docker containers, too (see \autoref{app:biogames}).

 \subsection{Frontend \& Reverse Proxy: Traefik}\label{sec:srv-traefik}
 Traefik\furl{https://traefik.io/} is a reverse proxy.
-It offers intergration in service orchestration systems like Docker, Swarm, Kubernetes.
+It offers integration in service orchestration systems like Docker, Swarm, Kubernetes.
 With few lines of configuration, it detects new services automatically, and can create appropriate SSL/TLS certificates on the fly via Let's encrypt.

 Here, it is configured to watch docker containers, and create forwarding rules for those marked with docker labels.
 For fine-grained control, the creation of default forwards, is disabled, so only explicitly marked containers are subject to this automatic proxy.
+The label \texttt{traefik.enable=true} enables Traefik's reverse proxy pipeline for this container, while \texttt{traefik.port=8080} documents the port where the container exposes its service.
+
+The proxy rule to forward traffic to this container is configured with \texttt{traefik.frontend.rule= Host:select.ma.potato.kinf.wiai.uni-bamberg.de}.
+Here Traefik supports a wide range of options\furl{https://docs.traefik.io/basics/\#frontends}, including grouping by any or all semantics with multiple rules.
+
+For the purposes of this project, a wildcard domain record was used for the development machine, so each service can be accessible with an own subdomain.

 See also for an example configuration: \autoref{app:traefik}.

@ -227,4 +257,4 @@ The advantage of docker-compose is the definition of all images, volumes and net
 When a scenario with high load occurs, this definition allows for simple scaling.
 To create more celery worker nodes, issuing the command \textit{docker-compose scale worker=8} suffices to create 8 worker containers running in parallel.

-\image{\textwidth}{architecture.pdf}{Service composition overview}{img:arch}
+\image{.75\textwidth}{architecture.pdf}{Service composition overview}{img:arch}
--- a/ThesTeX/content/5-evaluation.tex
+++ b/ThesTeX/content/5-evaluation.tex
@ -1,16 +1,16 @@
 \section{Methodology}
 BioDiv2Go's Geogame2 (FindeVielfalt Simulation) was base case during the development of the analysis stack.
 It was chosen due to its well defined REST API, including log retrieval and user authentication.
-This section shows how the framework copes with the integration of another game with completly different architecture and log style.
+This section shows how the framework copes with the integration of another game with completely different architecture and log style.

 \subsection{Choosing an additional game}

 \autoref{tab:logs2} and \ref{tab:logs3} show an overview of the log files of the different games available.
 The game with the highest amount of available log files is Neocartographer.
 Neocartographer saves its log files as GPX track.
-Additional game states are embedded into the event tag of some of the GPX trackpoints.
+Additional game states are embedded into the event tag of some of the GPX track-points.
 A first overlook yields some GPX files with few bytes, just an GPX header with few Trackpoints and no game actions at all.
-However, compared to the other games it has a comprehensible log structure and even with some empty logs there should be a reasonable number of useable game logs.
+However, compared to the other games it has a comprehensible log structure and even with some empty logs there should be a reasonable number of usable game logs.

 \begin{longtable}[H]{ccp{0.6\textwidth}}
 Geogame & Log files & Notes \\
@ -25,7 +25,7 @@ GeoTicTacToe & $\approx13$ & CSV with pipes; no temporal data; events + tracks\\
 Geogame & Log files & Notes \\
 \hline
 GeoTicTacToe & $\approx2$ & intermediate log format\\
-GeoTTT & $\approx130$ & fragmented structure: incomplete or splitted?\\
+GeoTTT & $\approx130$ & fragmented structure: incomplete or fragmented\\
 Neocartographer\furl{http://www.geogames-team.org/?p=23} & $\approx400$ & Partly broken GPX: missing description information; one GPX file per player\\
 MissingLink & $\approx6$ & Partly broken GPX: missing spatial information; one GPX file per player\\
 Equilibrium\furl{http://www.geogames-team.org/?p=148} & $\approx40$ & GPX with missing end tag\\
@ -33,14 +33,14 @@ Equilibrium\furl{http://www.geogames-team.org/?p=148} & $\approx40$ & GPX with m
 \label{tab:logs3}
 \end{longtable}

-The following section \autoref{sec:neocart} describes the intergration efforts for Neocartographer.
+The following section \autoref{sec:neocart} describes the integration efforts for Neocartographer.



 \section{Integration of Neocartographer}\label{sec:neocart}

 \subsection{Neocartographer Game Log Files}
-The log files are grouped by folders and contain the GPX tracks and media, mainly photos.%TODO
+The log files are grouped by folders and contain the GPX tracks and media, mainly photos (see \autoref{img:nclog}).

 Many Neocartographer GPX files have invalid XML markup, as \autoref{tab:xml} show.

@ -56,36 +56,45 @@ invalid attribute values & <trkpt lat="UNKNOWN" lon="UNKNOWN">\\
 \end{longtable}

 The first two error types (missing separation between two attributes and unclosed tags) are syntactic XML errors.
-With the lxml\furl{http://lxml.de/} revocery parser\footnote{\texttt{lxml.etree.XMLParser(recover=True)}} the unclosed tag error is suppressed without further data loss\footnote{With an empty event tag, the data is obviously still missing}.
+With the lxml\furl{http://lxml.de/} recovery parser\footnote{\texttt{lxml.etree.XMLParser(recover=True)}} the unclosed tag error is suppressed without further data loss\footnote{With an empty event tag, the data is obviously still missing}.

 In the missing attribute separation case, the recovery parser parses only the first attribute properly.
 Any additional attributes are stored in the \texttt{tail} field of the XML element's object as raw string.
-With string manipulation, the \texttt{geoid} attribute can be restored\footnote{In the data probe, this error occured only with the \texttt{geoid} attribute}.
+With string manipulation, the \texttt{geoid} attribute can be restored\footnote{In the data probe, this error occurred only with the \texttt{geoid} attribute}.

 The other two errors lead to data corruption, as both cases fail to qualify to valid latitude/longitude pairs.
-With the assumption of a two-digit longitude\footnote{The names and other valid longitudes suggest the location of the gamefield in the eastern part of bavaria}, the correct value can be restored through string parsing from the offset of the second decimal separator.%TODO
+With the assumption of a two-digit longitude\footnote{The names and other valid longitudes suggest the location of the game field in the eastern part of Bavaria (Augsburg, Bamberg)}, the correct value can be restored through string parsing from the offset of the second decimal separator.
 Good practice requires the parser to issue a loud warning to indicate possible errors here.

 The last error type occurs with nearly all first and second entries.
 They contain the players' \emph{join} and \emph{start} events, when there is no position fix available, yet.
-Currently these log entries are discared with an accompanying log message.
+Currently these log entries are discarded with an accompanying log message.
 A possible improvement would be the to keep a reference to these entries, and add the first appearing valid location entry.

 \subsection{Log Retrieval}
-As there is only a playtime server, the files are stored on the filesystem of the server.
+As there is only a playtime server, the files are stored on the file system of the server.
 Therefore, an Nginx HTTP server was configured to serve folder indices formatted as JSON (see \autoref{sec:ggt-server}).
 This allows the retrieval of the log files in a clean manner by the frameworks loaders.

-An additional client implenetation in the framework (see \autoref{sec:source}) converts the JSON index to the structure used internally and uses the given functionality to handle file downloads.
+An additional client implementation in the framework (see \autoref{sec:source}) converts the JSON index to the structure used internally and uses the given functionality to handle file downloads.

 \subsection{Analysis Functionality}
-Using the \texttt{LocationAnalyzer} in combination with a \texttt{KMLRender} renderer, the analysis of log files was successfull on the first run.
+Using the \texttt{LocationAnalyzer} in combination with a \texttt{KMLRender} renderer, the analysis of log files was successful on the first run.
+
+\subsection{UI Integration}
+The game selection on the login page (see \autoref{img:webindex}) uses a dictionary in the \texttt{clients} package.
+With the client implementation is registered with a name, it is automatically added to the selection box.
+The URL to the log providing service has to be configured in the \texttt{selector} flask application, too.
+

 \section{Conclusion}
-While the implementation of a new client to download log files was straightforward, the parsing of these files prooved quite difficult.
+While the implementation of a new client to download log files was straightforward, the parsing of these files proved quite difficult.
 However, it was not the integration into the framework but the syntactical errors in the log files that was hard.
 While the BioDiv2Go parser requires less than 20 lines of code, the newly written parser scratches the 60 line mark with all the error handling code (see \autoref{code:bd2l} and \ref{code:ncl}).
 Once this obstacle is passed, the integration is nearly seamless.
-%TODO: webclient

-As further challenge prooved - like with BioDiv2Go - the understanding of the structure of the log, i.e. deriving the games' internal state machine.
+As further challenge proved - like with BioDiv2Go - the understanding of the structure of the log, i.e. deriving the games' internal state machine.
+
+On the down side, the addition of lxml has increased the size of the complete image by nearly 40 MB, breaking the 300 MB mark (see \autoref{img:image-size}).
+Naturally this is a rather superficial price to pay for improved functionality.
+\image{\textwidth}{image-size}{Increased image size with Lxml}{img:image-size}
--- a/ThesTeX/content/6-discussion.tex
+++ b/ThesTeX/content/6-discussion.tex
@ -1,9 +1,18 @@
 \section{Review}
 As shown in \autoref{sec:eval}, the proposed framework (see \autoref{sec:solution}) and its implementation (see \autoref{sec:implementation}) deliver what \autoref{sec:scope} asked for regarding the portability aspect.
-With the web interface depicted in \autoref{app:webif}, it is possible for non-expert users to generate pre-defined reports, while researchers can dive into the API of the framework either as preprocessing step or integrated into a larger project.
+
+\subsection{Modular framework}
 Given the lean framework core, the development of new analyzers and rendering target is encouraged.
 This is backed by the focus on a standalone application instead of extensions to log processing systems struggling with spatial data in the required resolution.

+As experienced in \autoref{sec:eval}, a change in the import stage of the processing pipeline is completely unnoticed in the other parts.
+The same is true for the addition or modification of analyzering or rendering functionality.
+
+\subsection{Web UI}
+With the web interface depicted in \autoref{app:webif}, it is possible for non-expert users to generate pre-defined reports, while researchers can dive into the API of the framework either as preprocessing step or integrated into a larger project.
+
+The web ui also gives direct access to the results for the non-expert users.
+
 \subsection{Results}
 Th selection of rendered results in \autoref{img:oebkml}, \ref{img:oebge}, \ref{img:retries}, \ref{img:trackfi}, \ref{img:time} showcases the already possible descriptive analysis capabilities.
 \autoref{img:trackfi} features a map view accessible through a browser, which aligns the active screen content of the mobile device with the spatial track.
@ -11,10 +20,12 @@ Selecting the preview image in the timeline below the map, a marker shows the po

 \autoref{img:speed} is based on the same analysis result used for \autoref{img:trackfi} with additional postprocessing in between.
 This aggregates the speeds of all selected game sessions and calculates the average.
-With a baseline reference of 1 kph speed, this plot can hint at gamefield design issues, e.g. overly long walking passages at high speed.
+With a baseline reference of 1 kph speed, this plot can hint at game field design issues, e.g. overly long walking passages at high speed.

 \section{Outlook}
 Considering the future, there are many analysis and rendering targets to come.
 As with any kind of software, sooner or later the point may be reached where the proposed architecture fails.
 Given the positive result of the integration evaluation, this sure seems pessimistic.
 The framework shows some self-debugging feature: Once a log parser is working, analyzers can help to find patterns of log messages to outline the logs structure.
+
+Especially the spatial generalizations presented in \cite{adrienko2011spatial} or the overestimating error accumulation effect of GPS described in \cite{Ranacher_2015} compared to simplified tracks (e.g. following the methodology of \cite{Chen2009TrajectorySM}) are analyses the author looks forward to give a try.
--- a/ThesTeX/content/appendix.tex
+++ b/ThesTeX/content/appendix.tex
@ -5,16 +5,15 @@
 \image{\textwidth}{../../PresTeX/images/oeb-ge}{Result visualized}{img:oebge}
 \image{\textwidth}{../../PresTeX/images/simu-retries}{Experimentational rounds}{img:retries}

-\image{.7\textwidth}{../../PresTeX/images/track-fi}{ActivityMapper: Combined screen activity and spatial progress}{img:trackfi}
-\image{\textwidth}{../../PresTeX/images/speed}{Aggregated speed distribution of four gamefields}{img:speed}
-\image{.9\textwidth}{../../PresTeX/images/time-rel}{Time distribution of game sessions overview of four gamefields}{img:time}
+\image{\textwidth}{../../PresTeX/images/speed}{Aggregated speed distribution of four game fields}{img:speed}
+\image{\textwidth}{../../PresTeX/images/time-rel}{Time distribution of game sessions overview of four game fields}{img:time}

 \section{Containers}
 \subsection{Kibana test setup} \label{app:kibana}
 \lstinputlisting[language=yaml,caption={Docker-compose file for Kibana test setup},label=code:kibana,numbers=left]{code/kibana-docker-compose.yml}

 \subsection{Biogames server dockerized} \label{app:biogames}
-\image{\textwidth}{biogames.pdf}{Dockerized setup for biogames}{img:bd2gdocker}
+\image{.75\textwidth}{biogames.pdf}{Dockerized setup for biogames}{img:bd2gdocker}
 \lstinputlisting[language=yaml,caption={Docker-compose file for Biogames server},label=code:bd2s,numbers=left]{code/biogames/docker-compose.yml}
 \lstinputlisting[language=yaml,caption={Dockerfile for Biogames server},label=code:bd2d,numbers=left]{code/biogames/Dockerfile}
 \lstinputlisting[language=bash,caption={Entrypoint for Biogames docker container},label=code:bd2e,numbers=left]{code/biogames/start.sh}
@ -29,7 +28,15 @@
 \section{Loader implementations}
 \lstinputlisting[language=python,caption={Log loader for BioDiv2Go},label=code:bd2l,numbers=left]{code/biogames.py}
 \lstinputlisting[language=python,caption={Log loader for Neocartographer},label=code:ncl,numbers=left]{code/neocart.py}
+\image{\textwidth}{nclog}{Neocartographer game log}{img:nclog}

-\section{Web interface}\label{app:webif} %TODO
+\section{Web interface}\label{app:webif}
+\subsection{Workflow}
+\image{\textwidth}{webgui}{Web workflow}{img:webflow}

-TODO: screenshots
+\subsection{Samples}
+\image{\textwidth}{webgui/index}{Web login}{img:webindex}
+\image{\textwidth}{webgui/results}{Analysis result overview}{img:webresults}
+\image{\textwidth}{webgui/create}{Configuration for a new analysis run}{img:webcreate}
+\image{\textwidth}{webgui/status}{Raw JSON status data}{img:webstatus}
+The status page (\autoref{img:webstatus}) is not linked anywhere in the UI directly.
--- a/ThesTeX/content/literature.bib
+++ b/ThesTeX/content/literature.bib
@ -573,3 +573,9 @@ isbn="978-3-642-23199-5"
  year={2013},
  organization={IEEE}
 }
+@inproceedings{Chen2009TrajectorySM,
+  title={Trajectory simplification method for location-based social networking services},
+  author={Yukun Chen and Kai Jiang and Yu Zheng and Chunping Li and Nenghai Yu},
+  booktitle={GIS-LBSN},
+  year={2009}
+}
--- a/ThesTeX/content/outline.tex
+++ b/ThesTeX/content/outline.tex
@ -7,7 +7,7 @@
 \chapter{Log processing, trajectories \& game analysis}
 \input{content/2-state-of-the-art}

-\chapter{Design of the analysis framework}\label{sec:solution}
+\chapter{Design of the modular analysis framework}\label{sec:solution}
 \input{content/3-solution}

 \chapter{Implementation of the analysis framework}\label{sec:implementation}
@ -16,5 +16,5 @@
 \chapter{Portability evaluation of the analysis framework}\label{sec:eval}
 \input{content/5-evaluation}

-\chapter{Discussion and outlook}
+\chapter{A modular framework: Discussion and outlook}
 \input{content/6-discussion}
--- a/ThesTeX/images/architecture.dot
+++ b/ThesTeX/images/architecture.dot
@ -12,25 +12,20 @@ subgraph{rank = max;
 }
 subgraph{
 	rank=same;
-in [label="Internal network",shape="note"];
+	in [label="Internal network",shape="diamond"];
 	n [label="Static file server\nNginx"];
 }
-{rank=source;
-t [label="HTTP frontend\nTraefik"];
+subgraph{
+	rank=source;
+	t [label="HTTP frontend\nTraefik",style=dotted];
+	en [label="External network",shape="diamond"];
 }
-/*{
-	rank=same;
-	b [label="Biogames"];
-	g [label="Geogames server"];
-};*/

-/*s -- db [label="Internal network   "];
-c -- db [];*/
-t -- s [label=" External network,\nlabel"];
-t -- n [];
-c -- n [label="Shared volume               "];
-/*s -- gg
-c -- gg [];*/
+t -- en [style=dotted];
+s -- en [label="labels"];
+n --en [label="labels"];
+c -- n [label="Shared volume  ",style=dashed];
+
 s -- in;
 c -- in;
 db --in;
--- a/ThesTeX/images/image-size.png
+++ b/ThesTeX/images/image-size.png
--- a/ThesTeX/images/nclog.png
+++ b/ThesTeX/images/nclog.png
--- a/ThesTeX/images/webgui.dot
+++ b/ThesTeX/images/webgui.dot
@ -0,0 +1,14 @@
+digraph{
+margin=0;
+//rankdir="LR";
+index;
+results;
+create;
+status;
+
+index -> results [label=" login"];
+index -> index [label=" failed login"];
+create -> results [label=" redirect"];
+results -> create [label="click"];
+results -> results [label=" refresh"];
+}
--- a/ThesTeX/images/webgui/create.png
+++ b/ThesTeX/images/webgui/create.png
--- a/ThesTeX/images/webgui/index.png
+++ b/ThesTeX/images/webgui/index.png
--- a/ThesTeX/images/webgui/results.png
+++ b/ThesTeX/images/webgui/results.png
--- a/ThesTeX/images/webgui/status.png
+++ b/ThesTeX/images/webgui/status.png
--- a/ThesTeX/thesis.tex
+++ b/ThesTeX/thesis.tex
@ -104,9 +104,9 @@
 % Stichwortverzeichnis soll im Inhaltsverzeichnis auftauchen
 % Sprungmarke mit Phantomsection korrigiert
 \phantomsection%
-\addcontentsline{toc}{chapter}{Index}%
+%\addcontentsline{toc}{chapter}{Index}%
 % Stichwortverzeichnis endgueltig anzeigen
-\printindex%
+%\printindex%


 \appendix
--- a/meta/config/commands.tex
+++ b/meta/config/commands.tex
@ -44,7 +44,7 @@
 \makeatletter
 \ifposter
 \else
-\hypersetup{pdftitle={\@title}, pdfauthor={\@author}, linktoc=page, pdfborder={0 0 0 [3 3]}, breaklinks=true, linkbordercolor=unibablueI, menubordercolor=unibablueI, urlbordercolor=unibablueI, citebordercolor=unibablueI, filebordercolor=unibablueI}
+\hypersetup{pdftitle={\@title}, pdfauthor={\@author}, pdfsubject={\@subtitle}, pdfkeywords={\gitAbbrevHash}, linktoc=page, pdfborder={0 0 0 [3 3]}, breaklinks=true, linkbordercolor=unibablueI, menubordercolor=unibablueI, urlbordercolor=unibablueI, citebordercolor=unibablueI, filebordercolor=unibablueI}
 \fi
 %% Define a new 'leo' style for the package that will use a smaller font.
 \def\url@leostyle{%
@ -451,6 +451,4 @@ major line width/.initial=1pt,
 \changemenucolor{gray}{txt}{named}{unibablueI}
 \fi

-\newcommand{\furl}[1]{
-\footnote{\url{#1}}
-}
+\newcommand{\furl}[1]{\footnote{\url{#1}}}
--- a/meta/config/hyphenation.tex
+++ b/meta/config/hyphenation.tex
--- a/meta/config/packages.tex
+++ b/meta/config/packages.tex
@ -1,9 +1,9 @@
 \usepackage[utf8]{inputenc}
 \usepackage{lmodern}
 \usepackage[T1]{fontenc}
+\usepackage{gitinfo2}
 \ifgit
 \ifpresentation
-  \usepackage{gitinfo2}
  \else
  \ifthesis
  \usepackage{gitinfo2}
Author	SHA1	Message	Date
Clemens Klug	68af833961	final rc	2018-08-06 10:20:27 +02:00
Clemens Klug	d150f74d79	p3: contents ready	2018-07-26 16:34:43 +02:00
Clemens Klug	7995172970	merge both previous presentations	2018-07-24 21:25:45 +02:00
Clemens Klug	f674b9549e	final tuning	2018-06-13 20:24:19 +02:00
clemens	23c9ddfa3e	wörk(review)	2018-06-13 15:30:26 +02:00
clemens	1ba55ed7e8	wörk (review)	2018-06-13 14:13:35 +02:00
Clemens Klug	923d692ee8	wörk (review issues)	2018-06-13 12:06:43 +02:00
Clemens Klug	edc7cba50c	add remark of favourite future analysis	2018-06-13 10:00:33 +02:00
Clemens Klug	846f467cd1	add note about external data	2018-06-12 19:54:46 +02:00
Clemens Klug	f48b0bf426	add missing image	2018-06-12 19:51:38 +02:00
Clemens Klug	5d4ed02c20	add eval lxml size increase	2018-06-12 18:14:48 +02:00
Clemens Klug	fdbb751c74	wörk	2018-06-11 15:52:07 +02:00
Clemens Klug	7596c93658	wörk * speelcheck * add webgui	2018-06-11 15:17:33 +02:00