From d9fa60dfe57c5b83484077079f0637495494d8fa Mon Sep 17 00:00:00 2001 From: Clemens Klug Date: Wed, 14 Mar 2018 18:03:13 +0100 Subject: [PATCH] replace source with clients --- analysis/analyzers/settings.py | 8 ++-- analysis/sources/__init__.py | 5 -- analysis/sources/biogames.py | 85 ---------------------------------- analysis/sources/source.py | 18 ------- analysis/util/download.py | 4 +- clients/webclients.py | 8 +++- 6 files changed, 13 insertions(+), 115 deletions(-) delete mode 100644 analysis/sources/__init__.py delete mode 100644 analysis/sources/biogames.py delete mode 100644 analysis/sources/source.py diff --git a/analysis/analyzers/settings.py b/analysis/analyzers/settings.py index 36f7682..17631a5 100644 --- a/analysis/analyzers/settings.py +++ b/analysis/analyzers/settings.py @@ -1,12 +1,12 @@ import json import sys -from analysis.sources import SOURCES +from clients.webclients import CLIENTS def load_source(config): - if config["type"] in SOURCES: - source = SOURCES[config["type"]]() - source.connect(**config) + if config["type"] in CLIENTS: + source = CLIENTS[config["type"]](**config) + source.login() return source diff --git a/analysis/sources/__init__.py b/analysis/sources/__init__.py deleted file mode 100644 index a1db87b..0000000 --- a/analysis/sources/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .biogames import Biogames - -SOURCES = { - "Biogames": Biogames, -} \ No newline at end of file diff --git a/analysis/sources/biogames.py b/analysis/sources/biogames.py deleted file mode 100644 index ac25e10..0000000 --- a/analysis/sources/biogames.py +++ /dev/null @@ -1,85 +0,0 @@ -import json -import logging -import typing -from tempfile import TemporaryDirectory - -import os - -from sources.source import Source - -import shutil -import requests - -log: logging.Logger = logging.getLogger(__name__) - - -class Biogames(Source): - def __init__(self): - self.headers: typing.Dict[str, str] = {'Accept': 'application/json'} - self.cookies: typing.Dict[str, str] = {} - self.id2link: typing.Dict[str, str] = {} - self.host: str = None - - def connect(self, **kwargs): - for i in ['username', 'password', 'url', 'login_url', 'host']: - if not i in kwargs: - raise ValueError("missing value " + i) - csrf_request = requests.get(kwargs['url']) - if csrf_request.status_code != 200: - raise ConnectionError("unable to obtain CSRF token (" + str(csrf_request) + ")", csrf_request.url) - self.cookies['csrftoken'] = csrf_request.cookies['csrftoken'] - log.info("obtained CSRF token (" + self.cookies['csrftoken'] + ")") - login_payload = { - 'username': kwargs['username'], - 'password': kwargs['password'], - 'next': '', - 'csrfmiddlewaretoken': 'csrftoken' - } - login = requests.post(kwargs['login_url'], data=json.dumps(login_payload), cookies=self.cookies) - if login.status_code != 200: - raise ConnectionError("Unable to authenticate!", login, login.text) - self.cookies['sessionid'] = login.cookies['sessionid'] - log.info("obtained sessionid (" + self.cookies['sessionid'] + ")") - self.url = kwargs['url'] - self.host = kwargs['host'] - log.info("stored url (" + self.url + ")") - - def list(self): - logs = self.get_json(self.url) - log.info(len(logs)) - for i in logs: - self.id2link[i["id"]] = i["link"] # TODO - return logs - - def get(self, ids: typing.Collection): - dir = TemporaryDirectory() - files = [] - for i in ids: - url = self.id2link[i] - filename = os.path.join(dir.name, url.split("/")[-1]) - file = self.download_file(url, filename) - if file: - files.append(file) - return dir - - def download_file(self, url, filename): - with open(filename, "wb") as out: - try: - download = self._get(url) - shutil.copyfileobj(download.raw, out) - return filename - except Exception as e: - log.exception(e) - os.remove(filename) - - def get_json(self, url): - http = self._get(url, stream=False) - if not http.ok: - raise ConnectionError("HTTP status is not OK", http.url) - return http.json() - - def close(self): - pass - - def _get(self, url, stream=True): - return requests.get(self.host + url, cookies=self.cookies, headers=self.headers, stream=stream) diff --git a/analysis/sources/source.py b/analysis/sources/source.py deleted file mode 100644 index 905b897..0000000 --- a/analysis/sources/source.py +++ /dev/null @@ -1,18 +0,0 @@ -import typing - - -class Source: - def connect(self, **kwargs): - raise NotImplementedError - - def list(self): - raise NotImplementedError - - def get(self, ids: typing.Collection): - raise NotImplementedError - - def get_json(self, url:str) -> dict: - raise NotImplementedError - - def close(self): - raise NotImplementedError diff --git a/analysis/util/download.py b/analysis/util/download.py index e5dfb78..0f3a0d9 100644 --- a/analysis/util/download.py +++ b/analysis/util/download.py @@ -18,7 +18,7 @@ def download_board(board_id, instance_config_id, sequence_id, source): sequence_id=sequence_id, board_id=board_id ) - board = source._get(url) + board = source.get(url) if not board.ok: raise ConnectionError() data = board.json() @@ -68,7 +68,7 @@ def get_json(source, url): if url in cache: return cache[url] try: - data = source.get_json(url) + data = source.get(url).json() except Exception as e: print("exception", e, e.args) # TODO: logging diff --git a/clients/webclients.py b/clients/webclients.py index 95f5c6c..025dbf4 100644 --- a/clients/webclients.py +++ b/clients/webclients.py @@ -45,6 +45,12 @@ class Client: self.download_file(path, filename, **kwargs) return target + def login(self): + pass + + def list(self): + pass + class BiogamesClient(Client): config_fields: typing.Dict[str, typing.List[str]] = { @@ -95,7 +101,7 @@ class BiogamesClient(Client): return self.download_files([i["file_url"] for i in self.list()]) -CLIENTS: typing.Dict[str, Client] = { +CLIENTS: typing.Dict[str, typing.Type[Client]] = { "Biogames": BiogamesClient, }