From 2ea11ef038f0f5babf2e22bcbefae36819546111 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Thu, 4 Apr 2024 15:20:12 +0200 Subject: [PATCH 001/129] added suds and pandas conversion libraries --- bfabric/src/pandas_helper.py | 44 ++++++++++++++++++++++++++++++++++ bfabric/src/suds_format.py | 46 ++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 bfabric/src/pandas_helper.py create mode 100644 bfabric/src/suds_format.py diff --git a/bfabric/src/pandas_helper.py b/bfabric/src/pandas_helper.py new file mode 100644 index 00000000..86fbf45a --- /dev/null +++ b/bfabric/src/pandas_helper.py @@ -0,0 +1,44 @@ +import pandas as pd +from typing import Any, List, Dict + + +def _stringify(a: Any) -> Any: + """ + :param a: Any variable + :return: Stringified variable + + Convert variable to a string if it is of non-basic data type, otherwise keep it as it is + TODO: Make a better separation between what is and what is not a basic data type + """ + if isinstance(a, list) or isinstance(a, dict) or isinstance(a, tuple): + return str(a) + else: + return a + +def _stringify_dict(d: dict) -> dict: + """ + :param d: A dictionary + :return: Same dictionary, with all values stringified if necessary + """ + return {k: _stringify(v) for k, v in d.items()} + +def list_dict_to_df(l: List[Dict]) -> pd.DataFrame: + """ + :param l: A list of dictionaries + :return: Pandas dataframe, where every list element is a new row + + * Columns are a union of all keys that appear in the dictionaries. Any missing key is treated as a NAN + * All non-basic data types are converted to strings + """ + return pd.concat([pd.DataFrame(_stringify_dict(r), index=[0]) for r in l]) + + +if __name__ == "__main__": + exampleLstDict = [ + {'cat': 1, 'dog': 2}, + {'cat': 3, 'mouse': ["a", "b"]}, + {'mouse': 5}, + {'cat': 1, 'dog': 2, 'mouse': 7}, + ] + + print(list_dict_to_df(exampleLstDict)) diff --git a/bfabric/src/suds_format.py b/bfabric/src/suds_format.py new file mode 100644 index 00000000..bef16671 --- /dev/null +++ b/bfabric/src/suds_format.py @@ -0,0 +1,46 @@ +from collections import OrderedDict +from typing import Any, Union +from suds.sax.text import Text +from suds.sudsobject import asdict + + +def _recursive_asdict(d, convert_types: bool) -> OrderedDict: + """Convert Suds object into serializable format. + https://stackoverflow.com/a/15678861 + :param d: The input suds object + :param convert_types: A boolean to determine if the simple types return should be cast to python types + :return: The suds object converted to an OrderedDict + """ + out = {} + for k, v in asdict(d).items(): + if hasattr(v, '__keylist__'): + out[k] = _recursive_asdict(v, convert_types) + elif isinstance(v, list): + out[k] = [] + for item in v: + if hasattr(item, '__keylist__'): + out[k].append(_recursive_asdict(item, convert_types)) + else: + out[k].append(convert_suds_type(item) if convert_types else item) + else: + out[k] = convert_suds_type(v) if convert_types else v + return OrderedDict(out) + + +def convert_suds_type(item: Any) -> Union[int, str]: + """ + Converts the suds type to an equivalent python type. There is, to my knowledge, only a single suds type which + is currently ever return, namely 'Text'. Integers and doubles are already cast to their python equivalents and + thus do not need to be explicitly handled. This may be subject to change in future versions + :param item: The suds item + :return: The item as a built-in python type + """ + if type(item) == Text: + return str(item) + return item + + +def suds_to_json(data, convert_types: bool = False): + if type(data) == list: + return [_recursive_asdict(d, convert_types) for d in data] + return _recursive_asdict(data, convert_types) From 48d47fb02464ba66f3fe8682662f8501cc446a2f Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Thu, 4 Apr 2024 16:43:49 +0200 Subject: [PATCH 002/129] minor --- bfabric/src/suds_format.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bfabric/src/suds_format.py b/bfabric/src/suds_format.py index bef16671..60c53d5b 100644 --- a/bfabric/src/suds_format.py +++ b/bfabric/src/suds_format.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from typing import Any, Union +from typing import Any, Union, List from suds.sax.text import Text from suds.sudsobject import asdict @@ -40,7 +40,7 @@ def convert_suds_type(item: Any) -> Union[int, str]: return item -def suds_to_json(data, convert_types: bool = False): +def suds_to_json(data, convert_types: bool = False) -> Union[OrderedDict, List[OrderedDict]]: if type(data) == list: return [_recursive_asdict(d, convert_types) for d in data] return _recursive_asdict(data, convert_types) From e5ee4203eb73f79003f27bbaedfd201e33fff2ec Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Mon, 8 Apr 2024 10:38:18 +0200 Subject: [PATCH 003/129] made example usage for read --- bfabric/examples/suds_to_listdict.py | 14 ++++++++++++++ bfabric/src/suds_format.py | 7 ++++--- 2 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 bfabric/examples/suds_to_listdict.py diff --git a/bfabric/examples/suds_to_listdict.py b/bfabric/examples/suds_to_listdict.py new file mode 100644 index 00000000..44ee3e62 --- /dev/null +++ b/bfabric/examples/suds_to_listdict.py @@ -0,0 +1,14 @@ +from bfabric import bfabric +from bfabric.src.suds_format import suds_to_json +from typing import List + + +def read_shallow(b: bfabric.Bfabric, endpoint: str, obj: dict) -> List[dict]: + response = b.read_object(endpoint=endpoint, obj=obj, plain=True) + responseDict = suds_to_json(response) + return responseDict[endpoint] + +b = bfabric.Bfabric() +resLstDict = read_shallow(b, 'user', {'login': 'fomins'}) + +print(resLstDict) \ No newline at end of file diff --git a/bfabric/src/suds_format.py b/bfabric/src/suds_format.py index 60c53d5b..03ab0ac4 100644 --- a/bfabric/src/suds_format.py +++ b/bfabric/src/suds_format.py @@ -4,7 +4,7 @@ from suds.sudsobject import asdict -def _recursive_asdict(d, convert_types: bool) -> OrderedDict: +def _recursive_asdict(d, convert_types: bool) -> dict: """Convert Suds object into serializable format. https://stackoverflow.com/a/15678861 :param d: The input suds object @@ -24,7 +24,8 @@ def _recursive_asdict(d, convert_types: bool) -> OrderedDict: out[k].append(convert_suds_type(item) if convert_types else item) else: out[k] = convert_suds_type(v) if convert_types else v - return OrderedDict(out) + # return OrderedDict(out) + return out def convert_suds_type(item: Any) -> Union[int, str]: @@ -40,7 +41,7 @@ def convert_suds_type(item: Any) -> Union[int, str]: return item -def suds_to_json(data, convert_types: bool = False) -> Union[OrderedDict, List[OrderedDict]]: +def suds_to_json(data, convert_types: bool = False) -> Union[dict, List[dict]]: if type(data) == list: return [_recursive_asdict(d, convert_types) for d in data] return _recursive_asdict(data, convert_types) From a5b22b0428c386f542e28533751c932d8ecffae2 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Mon, 8 Apr 2024 13:43:50 +0200 Subject: [PATCH 004/129] paginator impl as lib+example --- bfabric/examples/read_paginated_single.py | 8 +++ bfabric/src/paginator.py | 85 +++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 bfabric/examples/read_paginated_single.py create mode 100644 bfabric/src/paginator.py diff --git a/bfabric/examples/read_paginated_single.py b/bfabric/examples/read_paginated_single.py new file mode 100644 index 00000000..e53025ee --- /dev/null +++ b/bfabric/examples/read_paginated_single.py @@ -0,0 +1,8 @@ +from bfabric import bfabric +from bfabric.src.paginator import read + +b = bfabric.Bfabric() + +responseLst = read(b, 'run', query={}) + +print(len(responseLst)) \ No newline at end of file diff --git a/bfabric/src/paginator.py b/bfabric/src/paginator.py new file mode 100644 index 00000000..9df418d2 --- /dev/null +++ b/bfabric/src/paginator.py @@ -0,0 +1,85 @@ +from typing import Dict, Union, List, Any +from copy import deepcopy + +BFABRIC_QUERY_LIMIT = 100 + + +def read(engine, endpoint: str, query: dict = None) -> List[Dict]: + """ + Make a query to the engine. Determine the number of pages. Make calls for every page, concatenate results + :param engine: A BFabric API engine + :param endpoint: endpoint + :param query: query dictionary + :return: List of responses + """ + + + # Get the first page + response = engine.read_object(endpoint, query, plain=True) + nPages = response["numberofpages"] + + # Return empty list if nothing found + if not nPages: + return [] + + # Get results from other pages as well, if need be + # NOTE: Page numbering starts at 1 + responseLst = response[endpoint] + for iPage in range(2, nPages+1): + print('-- reading page', iPage, 'of', nPages) + + responseLst += engine.read_object(endpoint, query, page=iPage) + + return responseLst + + +# TODO: Is this scope sufficient? Is there ever more than one mutiquery parameter, and/or not at the root of dict? +def read_multi(engine, endpoint: str, query: dict, multiQueryKey: str, multiQueryVals: list) -> List[Dict]: + """ + Makes a 1-parameter multi-query (there is 1 parameter that takes a list of values) + Since the API only allows 100 queries per page, split the list into chunks before querying + :param engine: A BFabric API engine + :param endpoint: endpoint + :param multiQueryKey: key for which the multi-query is performed + :param multiQueryVals: list of values for which the multi-query is performed + :return: List of responses, concatenated over all multiquery values and all pages + + NOTE: It is assumed that there is only 1 response for each value. + TODO: Test what happens if there are multiple responses. Is read_multi even necessary? Maybe the API would + paginate by itself? + """ + + responseLst = [] + queryThis = deepcopy(query) # Make a copy of the query, not to make edits to the argument + + for i in range(0, len(multiQueryVals), BFABRIC_QUERY_LIMIT): + # Limit the multi-query parameter to an acceptable chunk size + queryThis[multiQueryKey] = multiQueryVals[i:i + BFABRIC_QUERY_LIMIT] + responseLst += read(engine, endpoint, queryThis) + + return responseLst + + +def save_multi(engine, endpoint: str, objLst: list) -> List[Dict]: + # We must account for the possibility that the number of query values exceeds the BFabric maximum, + # so we must split it into smaller chunks + + responseLst = [] + for i in range(0, len(objLst), BFABRIC_QUERY_LIMIT): + objLstThis = objLst[i:i + BFABRIC_QUERY_LIMIT] + responseLst += engine.save_object(endpoint, objLstThis) + + return responseLst + + +def delete_multi(engine, endpoint: str, idLst: list) -> List[Dict]: + if len(idLst) == 0: + print('Warning, empty list provided for deletion, ignoring') + return [] + + responseLst = [] + for i in range(0, len(idLst), BFABRIC_QUERY_LIMIT): + idLstThis = idLst[i:i + BFABRIC_QUERY_LIMIT] + responseLst += engine.delete_object(endpoint, idLstThis) + + return responseLst From bee0b536948c3bd0fbeae6607d4bb8d1c5cb8a72 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Mon, 8 Apr 2024 17:50:09 +0200 Subject: [PATCH 005/129] skeleton BFabricPy_V2.0000001, engines for suds and zeep, resultsContainer --- bfabric/bfabric2.py | 93 +++++++++++++++++++++++++++++++++ bfabric/src/engine_suds.py | 59 +++++++++++++++++++++ bfabric/src/engine_zeep.py | 75 ++++++++++++++++++++++++++ bfabric/src/result_container.py | 24 +++++++++ 4 files changed, 251 insertions(+) create mode 100755 bfabric/bfabric2.py create mode 100644 bfabric/src/engine_suds.py create mode 100644 bfabric/src/engine_zeep.py create mode 100644 bfabric/src/result_container.py diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py new file mode 100755 index 00000000..f5971f8f --- /dev/null +++ b/bfabric/bfabric2.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +# -*- coding: latin1 -*- + +"""B-Fabric Application Interface using WSDL + +The code contains classes for wrapper_creator and submitter. + +Ensure that this file is available on the bfabric exec host. + +Copyright (C) 2014 - 2024 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. + +Licensed under GPL version 3 + +Original Authors: + Marco Schmidt + Christian Panse + +BFabric V2 Authors: + Leonardo Schwarz + Aleksejs Fomins + +History + The python3 library first appeared in 2014. +""" + + + +# TODO: Move login checks to Auth +# if login is None: +# login = self.config.login +# +# if password is None: +# password = self.config.password +# +# if len(login) >= 32: +# raise ValueError("Sorry, login >= 32 characters.") +# +# if len(password) != 32: +# raise ValueError("Sorry, password != 32 characters.") + + + +from enum import Enum +from typing import Union, List + +from bfabric.src.engine_suds import EngineSUDS +from bfabric.src.engine_zeep import EngineZeep +from bfabric.src.result_container import ResultContainer, BFABRIC_RESULT_TYPE + +class BFABRIC_API_ENGINE(Enum): + SUDS = 1 + ZEEP = 2 + + +# TODO: What does idonly do for SUDS? Does it make sense for Zeep? +# TODO: What does includedeletableupdateable do for Zeep? Does it make sense for Suds? +# TODO: How to deal with save-skip fields in Zeep? Does it happen in SUDS? +class Bfabric(object): + """B-Fabric python3 module + Implements read and save object methods for B-Fabric wsdl interface + """ + + def __init__(self, authClass, configClass, engine: BFABRIC_API_ENGINE = BFABRIC_API_ENGINE.SUDS, + verbose: bool = False): + + self.verbose = verbose + self.query_counter = 0 + + if engine == BFABRIC_API_ENGINE.SUDS: + self.engine = EngineSUDS(authClass.login(), authClass.password(), configClass.webbase()) + self.resultType = BFABRIC_RESULT_TYPE.LISTSUDS + elif engine == BFABRIC_API_ENGINE.ZEEP: + self.engine = EngineZeep(authClass.login(), authClass.password(), configClass.webbase()) + self.resultType = BFABRIC_RESULT_TYPE.LISTZEEP + else: + raise ValueError("Unexpected engine", BFABRIC_API_ENGINE) + + def read(self, endpoint: str, obj: dict, page: int = 1, plain: bool = False, **kwargs) -> ResultContainer: + results = self.engine.read(endpoint, obj, page = page, **kwargs) + return ResultContainer(results, self.resultType) + + + def readid(self, endpoint: str, obj: dict, page: int = 1, plain: bool = False, **kwargs) -> ResultContainer: + results = self.engine.readid(endpoint, obj, page=page, **kwargs) + return ResultContainer(results, self.resultType) + + def save(self, endpoint: str, obj: dict, **kwargs) -> ResultContainer: + results = self.engine.save(endpoint, obj, **kwargs) + return ResultContainer(results, self.resultType) + + def delete(self, endpoint: str, id: Union[List, int]) -> ResultContainer: + results = self.engine.delete(endpoint, id) + return ResultContainer(results, self.resultType) diff --git a/bfabric/src/engine_suds.py b/bfabric/src/engine_suds.py new file mode 100644 index 00000000..12ff4b8f --- /dev/null +++ b/bfabric/src/engine_suds.py @@ -0,0 +1,59 @@ +from typing import Union, List + +from suds.client import Client + + +class EngineSUDS(object): + """B-Fabric API SUDS Engine""" + + def __init__(self, login: str, password: str, webbase: str): + self.cl = {} + self.login = login + self.password = password + self.webbase = webbase + + def _get_client(self, endpoint: str): + try: + if not endpoint in self.cl: + wsdl = "".join((self.webbase, '/', endpoint, "?wsdl")) + self.cl[endpoint] = Client(wsdl, cache=None) + return self.cl[endpoint] + except Exception as e: + print(e) + raise + + def read(self, endpoint: str, obj: dict, page: int = 1, idonly: bool = False): + """ + A generic method which can connect to any endpoint, e.g., workunit, project, order, + externaljob, etc, and returns the object with the requested id. + obj is a python dictionary which contains all the attributes of the endpoint + for the "query". + """ + + query = dict(login=self.login, page=page, password=self.password, query=obj, idonly=idonly) + + client = self._get_client(endpoint) + return client.service.read(query) + + # TODO: How is client.service.readid different from client.service.read. Do we need this method? + def readid(self, endpoint: str, obj: dict, page: int = 1): + query = dict(login=self.login, page=page, password=self.password, query=obj) + + client = self._get_client(endpoint) + return client.service.readid(query) + + def save(self, endpoint: str, obj: dict): + query = {'login': self.login, 'password': self.password, endpoint: obj} + + client = self._get_client(endpoint) + return client.service.save(query) + + def delete(self, endpoint: str, id: Union[int, List]): + if isinstance(id, list) and len(id) == 0: + print("Warning, attempted to delete an empty list, ignoring") + return [] + + query = {'login': self.login, 'password': self.password, 'id': id} + + client = self._get_client(endpoint) + return client.service.delete(query) diff --git a/bfabric/src/engine_zeep.py b/bfabric/src/engine_zeep.py new file mode 100644 index 00000000..10ba7a2b --- /dev/null +++ b/bfabric/src/engine_zeep.py @@ -0,0 +1,75 @@ +from typing import Union, List + +import zeep +import copy + + +# TODO: Check if this is a bug of BFabric or Zeep. Specifically, see if the same call to bFabricPy has the same bug +def _zeep_query_append_skipped(query: dict, skippedKeys: list) -> dict: + """ + This function is used to fix a buggy behaviour of Zeep/BFabric. Specifically, Zeep does not return correct + query results if some of the optional parameters are not mentioned in the query. + + :param query: Original query + :param skippedKeys: Optional keys to skip + :return: Adds optional keys to query as skipped values. + """ + queryThis = query.copy() + for key in skippedKeys: + queryThis[key] = zeep.xsd.SkipValue + return queryThis + + +class EngineZeep(object): + """B-Fabric API SUDS Engine""" + + def __init__(self, login: str, password: str, webbase: str): + self.cl = {} + self.login = login + self.password = password + self.webbase = webbase + + def _get_client(self, endpoint: str): + try: + if not endpoint in self.cl: + wsdl = "".join((self.webbase, '/', endpoint, "?wsdl")) + self.cl[endpoint] = zeep.Client(wsdl) + return self.cl[endpoint] + except Exception as e: + print(e) + raise + + def read(self, endpoint: str, obj: dict, page: int = 1, includedeletableupdateable: bool = True): + query = copy.deepcopy(obj) + if includedeletableupdateable: + query['includedeletableupdateable'] = True + + query = dict(login=self.login, page=page, password=self.password, query=query) + + client = self._get_client(endpoint) + with client.settings(strict=False): + return client.service.read(query) + + def readid(self, endpoint: str, obj: dict, page: int = 1, includedeletableupdateable: bool = True): + raise NotImplementedError("Attempted to use a method `readid` of Zeep, which does not exist") + + def save(self, endpoint: str, obj: dict, skippedKeys: list = None): + query = {'login': self.login, 'password': self.password, endpoint: obj} + + # If necessary, add skipped keys to the query + if skippedKeys is not None: + query = _zeep_query_append_skipped(query, skippedKeys) + + client = self._get_client(endpoint) + with client.settings(strict=False): + return client.service.save(query) + + def delete(self, endpoint: str, id: Union[int, List]): + if isinstance(id, list) and len(id) == 0: + print("Warning, attempted to delete an empty list, ignoring") + return [] + + query = {'login': self.login, 'password': self.password, 'id': id} + + client = self._get_client(endpoint) + return client.service.delete(query) diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py new file mode 100644 index 00000000..350e3341 --- /dev/null +++ b/bfabric/src/result_container.py @@ -0,0 +1,24 @@ + +from enum import Enum + +class BFABRIC_RESULT_TYPE(Enum): + LISTDICT = 1 + LISTSUDS = 2 + LISTZEEP = 3 + + +class ResultContainer: + def __init__(self, results: list, resultType: BFABRIC_RESULT_TYPE): + self.results = results + self.resultType = resultType + + def to_dict(self): + match self.resultType: + case BFABRIC_RESULT_TYPE.LISTDICT: + return self.results + case BFABRIC_RESULT_TYPE.LISTSUDS: + return self.results # TODO: Implement me + case BFABRIC_RESULT_TYPE.LISTZEEP: + return self.results # TODO: Implement me + case _: + raise ValueError("Unexpected results type", self.resultType) \ No newline at end of file From 778e37b138623e7d7dd12aabf6ff8269c82a3980 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Mon, 8 Apr 2024 18:00:20 +0200 Subject: [PATCH 006/129] applied SUDS->Dict to ResultContainer --- bfabric/bfabric2.py | 7 ++++-- bfabric/src/result_container.py | 6 ++++-- bfabric/src/suds_format.py | 38 ++++++++++++++------------------- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index f5971f8f..f8903989 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -75,19 +75,22 @@ def __init__(self, authClass, configClass, engine: BFABRIC_API_ENGINE = BFABRIC_ else: raise ValueError("Unexpected engine", BFABRIC_API_ENGINE) - def read(self, endpoint: str, obj: dict, page: int = 1, plain: bool = False, **kwargs) -> ResultContainer: + # TODO: Perform pagination. Return inner values, i.e. val[endpoint]. + def read(self, endpoint: str, obj: dict, page: int = 1, **kwargs) -> ResultContainer: results = self.engine.read(endpoint, obj, page = page, **kwargs) return ResultContainer(results, self.resultType) - + # TODO: Perform pagination. Return inner values, i.e. val[endpoint]. def readid(self, endpoint: str, obj: dict, page: int = 1, plain: bool = False, **kwargs) -> ResultContainer: results = self.engine.readid(endpoint, obj, page=page, **kwargs) return ResultContainer(results, self.resultType) + # TODO: Perform pagination. Return inner values, i.e. val[endpoint]. def save(self, endpoint: str, obj: dict, **kwargs) -> ResultContainer: results = self.engine.save(endpoint, obj, **kwargs) return ResultContainer(results, self.resultType) + # TODO: Perform pagination. Return inner values, i.e. val[endpoint]. def delete(self, endpoint: str, id: Union[List, int]) -> ResultContainer: results = self.engine.delete(endpoint, id) return ResultContainer(results, self.resultType) diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py index 350e3341..6b38365c 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/src/result_container.py @@ -1,6 +1,8 @@ - from enum import Enum +from bfabric.src.suds_format import suds_asdict_recursive + + class BFABRIC_RESULT_TYPE(Enum): LISTDICT = 1 LISTSUDS = 2 @@ -17,7 +19,7 @@ def to_dict(self): case BFABRIC_RESULT_TYPE.LISTDICT: return self.results case BFABRIC_RESULT_TYPE.LISTSUDS: - return self.results # TODO: Implement me + return [suds_asdict_recursive(v) for v in self.results] case BFABRIC_RESULT_TYPE.LISTZEEP: return self.results # TODO: Implement me case _: diff --git a/bfabric/src/suds_format.py b/bfabric/src/suds_format.py index 03ab0ac4..d3a7feed 100644 --- a/bfabric/src/suds_format.py +++ b/bfabric/src/suds_format.py @@ -4,7 +4,20 @@ from suds.sudsobject import asdict -def _recursive_asdict(d, convert_types: bool) -> dict: +def convert_suds_type(item: Any) -> Union[int, str]: + """ + Converts the suds type to an equivalent python type. There is, to my knowledge, only a single suds type which + is currently ever return, namely 'Text'. Integers and doubles are already cast to their python equivalents and + thus do not need to be explicitly handled. This may be subject to change in future versions + :param item: The suds item + :return: The item as a built-in python type + """ + if type(item) == Text: + return str(item) + return item + + +def suds_asdict_recursive(d, convert_types: bool) -> dict: """Convert Suds object into serializable format. https://stackoverflow.com/a/15678861 :param d: The input suds object @@ -14,34 +27,15 @@ def _recursive_asdict(d, convert_types: bool) -> dict: out = {} for k, v in asdict(d).items(): if hasattr(v, '__keylist__'): - out[k] = _recursive_asdict(v, convert_types) + out[k] = suds_asdict_recursive(v, convert_types) elif isinstance(v, list): out[k] = [] for item in v: if hasattr(item, '__keylist__'): - out[k].append(_recursive_asdict(item, convert_types)) + out[k].append(suds_asdict_recursive(item, convert_types)) else: out[k].append(convert_suds_type(item) if convert_types else item) else: out[k] = convert_suds_type(v) if convert_types else v # return OrderedDict(out) return out - - -def convert_suds_type(item: Any) -> Union[int, str]: - """ - Converts the suds type to an equivalent python type. There is, to my knowledge, only a single suds type which - is currently ever return, namely 'Text'. Integers and doubles are already cast to their python equivalents and - thus do not need to be explicitly handled. This may be subject to change in future versions - :param item: The suds item - :return: The item as a built-in python type - """ - if type(item) == Text: - return str(item) - return item - - -def suds_to_json(data, convert_types: bool = False) -> Union[dict, List[dict]]: - if type(data) == list: - return [_recursive_asdict(d, convert_types) for d in data] - return _recursive_asdict(data, convert_types) From b3968c8644639547ba6e588dc3b107934a4da3fd Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 9 Apr 2024 15:22:55 +0200 Subject: [PATCH 007/129] Update bfabric/src/pandas_helper.py Co-authored-by: Leonardo Schwarz --- bfabric/src/pandas_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/src/pandas_helper.py b/bfabric/src/pandas_helper.py index 86fbf45a..ca84ff06 100644 --- a/bfabric/src/pandas_helper.py +++ b/bfabric/src/pandas_helper.py @@ -30,7 +30,7 @@ def list_dict_to_df(l: List[Dict]) -> pd.DataFrame: * Columns are a union of all keys that appear in the dictionaries. Any missing key is treated as a NAN * All non-basic data types are converted to strings """ - return pd.concat([pd.DataFrame(_stringify_dict(r), index=[0]) for r in l]) + return pd.DataFrame([_stringify_dict(r) for r in l]) if __name__ == "__main__": From f5b05de7d0e4297dc25e3d727205591fe9bad828 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 9 Apr 2024 15:23:19 +0200 Subject: [PATCH 008/129] Update bfabric/src/pandas_helper.py Co-authored-by: Leonardo Schwarz --- bfabric/src/pandas_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/src/pandas_helper.py b/bfabric/src/pandas_helper.py index ca84ff06..5328fd92 100644 --- a/bfabric/src/pandas_helper.py +++ b/bfabric/src/pandas_helper.py @@ -34,7 +34,7 @@ def list_dict_to_df(l: List[Dict]) -> pd.DataFrame: if __name__ == "__main__": - exampleLstDict = [ + example_list_dict = [ {'cat': 1, 'dog': 2}, {'cat': 3, 'mouse': ["a", "b"]}, {'mouse': 5}, From 7fd17f30c890afbc04405e496bbf8b46fa54748e Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 9 Apr 2024 15:23:29 +0200 Subject: [PATCH 009/129] Update bfabric/src/pandas_helper.py Co-authored-by: Leonardo Schwarz --- bfabric/src/pandas_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/src/pandas_helper.py b/bfabric/src/pandas_helper.py index 5328fd92..05711dd5 100644 --- a/bfabric/src/pandas_helper.py +++ b/bfabric/src/pandas_helper.py @@ -41,4 +41,4 @@ def list_dict_to_df(l: List[Dict]) -> pd.DataFrame: {'cat': 1, 'dog': 2, 'mouse': 7}, ] - print(list_dict_to_df(exampleLstDict)) + print(list_dict_to_df(example_list_dict)) From 0af5a9526e2913f8306fd31b531340f252c4509a Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 9 Apr 2024 15:23:38 +0200 Subject: [PATCH 010/129] Update bfabric/examples/suds_to_listdict.py Co-authored-by: Leonardo Schwarz --- bfabric/examples/suds_to_listdict.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bfabric/examples/suds_to_listdict.py b/bfabric/examples/suds_to_listdict.py index 44ee3e62..52aef982 100644 --- a/bfabric/examples/suds_to_listdict.py +++ b/bfabric/examples/suds_to_listdict.py @@ -5,8 +5,8 @@ def read_shallow(b: bfabric.Bfabric, endpoint: str, obj: dict) -> List[dict]: response = b.read_object(endpoint=endpoint, obj=obj, plain=True) - responseDict = suds_to_json(response) - return responseDict[endpoint] + response_dict = suds_to_json(response) + return response_dict[endpoint] b = bfabric.Bfabric() resLstDict = read_shallow(b, 'user', {'login': 'fomins'}) From 967b44e060a1736e977c66a09c399d6c0d898869 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 9 Apr 2024 15:23:53 +0200 Subject: [PATCH 011/129] Update bfabric/examples/suds_to_listdict.py Co-authored-by: Leonardo Schwarz --- bfabric/examples/suds_to_listdict.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bfabric/examples/suds_to_listdict.py b/bfabric/examples/suds_to_listdict.py index 52aef982..da903ca4 100644 --- a/bfabric/examples/suds_to_listdict.py +++ b/bfabric/examples/suds_to_listdict.py @@ -9,6 +9,6 @@ def read_shallow(b: bfabric.Bfabric, endpoint: str, obj: dict) -> List[dict]: return response_dict[endpoint] b = bfabric.Bfabric() -resLstDict = read_shallow(b, 'user', {'login': 'fomins'}) +res_list_dict = read_shallow(b, 'user', {'login': 'fomins'}) -print(resLstDict) \ No newline at end of file +print(res_list_dict) \ No newline at end of file From 823b1c2ff1261dfb78bac196503a73f487dd3e94 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 9 Apr 2024 15:24:38 +0200 Subject: [PATCH 012/129] Update bfabric/src/suds_format.py Co-authored-by: Leonardo Schwarz --- bfabric/src/suds_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/src/suds_format.py b/bfabric/src/suds_format.py index d3a7feed..92c03fab 100644 --- a/bfabric/src/suds_format.py +++ b/bfabric/src/suds_format.py @@ -12,7 +12,7 @@ def convert_suds_type(item: Any) -> Union[int, str]: :param item: The suds item :return: The item as a built-in python type """ - if type(item) == Text: + if isinstance(item, Text): return str(item) return item From 26774025275936b1270db2d441cc8e00c8050fb9 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 9 Apr 2024 15:28:30 +0200 Subject: [PATCH 013/129] Update bfabric/src/result_container.py Co-authored-by: Leonardo Schwarz --- bfabric/src/result_container.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py index 6b38365c..e84aa1c4 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/src/result_container.py @@ -3,7 +3,7 @@ from bfabric.src.suds_format import suds_asdict_recursive -class BFABRIC_RESULT_TYPE(Enum): +class BfabricResultType(Enum): LISTDICT = 1 LISTSUDS = 2 LISTZEEP = 3 From 5f52f4b838114e0088ef35d59ac96c5aa21d5462 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Tue, 9 Apr 2024 15:35:24 +0200 Subject: [PATCH 014/129] converted some names to to PEP8 --- bfabric/bfabric2.py | 20 ++++++++++---------- bfabric/src/engine_zeep.py | 6 +++--- bfabric/src/result_container.py | 10 +++++----- bfabric/src/suds_format.py | 2 +- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index f8903989..99fcbdc3 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -45,9 +45,9 @@ from bfabric.src.engine_suds import EngineSUDS from bfabric.src.engine_zeep import EngineZeep -from bfabric.src.result_container import ResultContainer, BFABRIC_RESULT_TYPE +from bfabric.src.result_container import ResultContainer, BfabricResultType -class BFABRIC_API_ENGINE(Enum): +class BfabricAPIEngine(Enum): SUDS = 1 ZEEP = 2 @@ -60,20 +60,20 @@ class Bfabric(object): Implements read and save object methods for B-Fabric wsdl interface """ - def __init__(self, authClass, configClass, engine: BFABRIC_API_ENGINE = BFABRIC_API_ENGINE.SUDS, + def __init__(self, auth_class, config_class, engine: BfabricAPIEngine = BfabricAPIEngine.SUDS, verbose: bool = False): self.verbose = verbose self.query_counter = 0 - if engine == BFABRIC_API_ENGINE.SUDS: - self.engine = EngineSUDS(authClass.login(), authClass.password(), configClass.webbase()) - self.resultType = BFABRIC_RESULT_TYPE.LISTSUDS - elif engine == BFABRIC_API_ENGINE.ZEEP: - self.engine = EngineZeep(authClass.login(), authClass.password(), configClass.webbase()) - self.resultType = BFABRIC_RESULT_TYPE.LISTZEEP + if engine == BfabricAPIEngine.SUDS: + self.engine = EngineSUDS(auth_class.login(), auth_class.password(), config_class.webbase()) + self.resultType = BfabricResultType.LISTSUDS + elif engine == BfabricAPIEngine.ZEEP: + self.engine = EngineZeep(auth_class.login(), auth_class.password(), config_class.webbase()) + self.resultType = BfabricResultType.LISTZEEP else: - raise ValueError("Unexpected engine", BFABRIC_API_ENGINE) + raise ValueError("Unexpected engine", BfabricAPIEngine) # TODO: Perform pagination. Return inner values, i.e. val[endpoint]. def read(self, endpoint: str, obj: dict, page: int = 1, **kwargs) -> ResultContainer: diff --git a/bfabric/src/engine_zeep.py b/bfabric/src/engine_zeep.py index 10ba7a2b..d63be9f3 100644 --- a/bfabric/src/engine_zeep.py +++ b/bfabric/src/engine_zeep.py @@ -53,12 +53,12 @@ def read(self, endpoint: str, obj: dict, page: int = 1, includedeletableupdateab def readid(self, endpoint: str, obj: dict, page: int = 1, includedeletableupdateable: bool = True): raise NotImplementedError("Attempted to use a method `readid` of Zeep, which does not exist") - def save(self, endpoint: str, obj: dict, skippedKeys: list = None): + def save(self, endpoint: str, obj: dict, skipped_keys: list = None): query = {'login': self.login, 'password': self.password, endpoint: obj} # If necessary, add skipped keys to the query - if skippedKeys is not None: - query = _zeep_query_append_skipped(query, skippedKeys) + if skipped_keys is not None: + query = _zeep_query_append_skipped(query, skipped_keys) client = self._get_client(endpoint) with client.settings(strict=False): diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py index e84aa1c4..4dc49fe1 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/src/result_container.py @@ -10,17 +10,17 @@ class BfabricResultType(Enum): class ResultContainer: - def __init__(self, results: list, resultType: BFABRIC_RESULT_TYPE): + def __init__(self, results: list, resultType: BfabricResultType): self.results = results self.resultType = resultType def to_dict(self): match self.resultType: - case BFABRIC_RESULT_TYPE.LISTDICT: + case BfabricResultType.LISTDICT: return self.results - case BFABRIC_RESULT_TYPE.LISTSUDS: + case BfabricResultType.LISTSUDS: return [suds_asdict_recursive(v) for v in self.results] - case BFABRIC_RESULT_TYPE.LISTZEEP: + case BfabricResultType.LISTZEEP: return self.results # TODO: Implement me case _: - raise ValueError("Unexpected results type", self.resultType) \ No newline at end of file + raise ValueError("Unexpected results type", self.resultType) diff --git a/bfabric/src/suds_format.py b/bfabric/src/suds_format.py index 92c03fab..290b5c6d 100644 --- a/bfabric/src/suds_format.py +++ b/bfabric/src/suds_format.py @@ -1,4 +1,4 @@ -from collections import OrderedDict +# from collections import OrderedDict from typing import Any, Union, List from suds.sax.text import Text from suds.sudsobject import asdict From 09335421b1ef37da11e2a803a1c4311ae88c37c9 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Tue, 9 Apr 2024 15:39:54 +0200 Subject: [PATCH 015/129] minor --- bfabric/bfabric2.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 99fcbdc3..8b21b7cf 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -47,7 +47,7 @@ from bfabric.src.engine_zeep import EngineZeep from bfabric.src.result_container import ResultContainer, BfabricResultType -class BfabricAPIEngine(Enum): +class BfabricAPIEngineType(Enum): SUDS = 1 ZEEP = 2 @@ -60,20 +60,20 @@ class Bfabric(object): Implements read and save object methods for B-Fabric wsdl interface """ - def __init__(self, auth_class, config_class, engine: BfabricAPIEngine = BfabricAPIEngine.SUDS, + def __init__(self, auth_class, config_class, engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, verbose: bool = False): self.verbose = verbose self.query_counter = 0 - if engine == BfabricAPIEngine.SUDS: + if engine == BfabricAPIEngineType.SUDS: self.engine = EngineSUDS(auth_class.login(), auth_class.password(), config_class.webbase()) self.resultType = BfabricResultType.LISTSUDS - elif engine == BfabricAPIEngine.ZEEP: + elif engine == BfabricAPIEngineType.ZEEP: self.engine = EngineZeep(auth_class.login(), auth_class.password(), config_class.webbase()) self.resultType = BfabricResultType.LISTZEEP else: - raise ValueError("Unexpected engine", BfabricAPIEngine) + raise ValueError("Unexpected engine", BfabricAPIEngineType) # TODO: Perform pagination. Return inner values, i.e. val[endpoint]. def read(self, endpoint: str, obj: dict, page: int = 1, **kwargs) -> ResultContainer: From 7b4656c5133f006e5c172f963f375205dd07fe09 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Wed, 10 Apr 2024 14:21:21 +0200 Subject: [PATCH 016/129] implementation of pagination and multi-query functionality for BFabric2 --- bfabric/bfabric2.py | 136 +++++++++++++++++++--- bfabric/examples/read_paginated_single.py | 10 +- bfabric/examples/suds_to_listdict.py | 12 +- bfabric/src/math_helper.py | 20 ++++ bfabric/src/paginator.py | 85 -------------- bfabric/src/result_container.py | 43 ++++++- bfabric/src/suds_format.py | 6 +- 7 files changed, 193 insertions(+), 119 deletions(-) create mode 100644 bfabric/src/math_helper.py delete mode 100644 bfabric/src/paginator.py diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 8b21b7cf..b15708ae 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -41,8 +41,10 @@ from enum import Enum -from typing import Union, List +from copy import deepcopy +from typing import Union, List, Optional +from bfabric.src.math_helper import div_int_ceil from bfabric.src.engine_suds import EngineSUDS from bfabric.src.engine_zeep import EngineZeep from bfabric.src.result_container import ResultContainer, BfabricResultType @@ -51,6 +53,9 @@ class BfabricAPIEngineType(Enum): SUDS = 1 ZEEP = 2 +# Single page query limit for BFabric API (as of time of writing, adapt if it changes) +BFABRIC_QUERY_LIMIT = 100 + # TODO: What does idonly do for SUDS? Does it make sense for Zeep? # TODO: What does includedeletableupdateable do for Zeep? Does it make sense for Suds? @@ -68,29 +73,130 @@ def __init__(self, auth_class, config_class, engine: BfabricAPIEngineType = Bfab if engine == BfabricAPIEngineType.SUDS: self.engine = EngineSUDS(auth_class.login(), auth_class.password(), config_class.webbase()) - self.resultType = BfabricResultType.LISTSUDS + self.result_type = BfabricResultType.LISTSUDS elif engine == BfabricAPIEngineType.ZEEP: self.engine = EngineZeep(auth_class.login(), auth_class.password(), config_class.webbase()) - self.resultType = BfabricResultType.LISTZEEP + self.result_type = BfabricResultType.LISTZEEP else: raise ValueError("Unexpected engine", BfabricAPIEngineType) - # TODO: Perform pagination. Return inner values, i.e. val[endpoint]. - def read(self, endpoint: str, obj: dict, page: int = 1, **kwargs) -> ResultContainer: - results = self.engine.read(endpoint, obj, page = page, **kwargs) - return ResultContainer(results, self.resultType) + def _read_method(self, readid: bool, endpoint: str, obj: dict, page: int = 1, **kwargs): + if readid: + return self.engine.readid(endpoint, obj, page=page, **kwargs) + else: + return self.engine.read(endpoint, obj, page=page, **kwargs) + + def read(self, endpoint: str, obj: dict, max_results: Optional[int] = 100, readid: bool = False, + **kwargs) -> ResultContainer: + """ + Make a read query to the engine. Determine the number of pages. Make calls for every page, concatenate + results. + :param endpoint: endpoint + :param obj: query dictionary + :param max_results: cap on the number of results to query. The code will keep reading pages until all pages + are read or expected number of results has been reached. If None, load all available pages. + NOTE: max_results will be rounded upwards to the nearest multiple of BFABRIC_QUERY_LIMIT, because results + come in blocks, and there is little overhead to providing results over integer number of pages. + :param readid: whether to use reading by ID. Currently only available for engine=SUDS + TODO: Test the extent to which this method works. Add safeguards + :return: List of responses, packaged in the results container + """ + + # Get the first page. + # NOTE: According to old interface, this is equivalent to plain=True + response = self._read_method(readid, endpoint, obj, page=1, **kwargs) + n_pages = response["numberofpages"] + + # Return empty list if nothing found + if not n_pages: + return ResultContainer([], self.result_type, total_pages_api=0) + + # Get results from other pages as well, if need be + # Only load as many pages as user has interest in + if max_results is None: + n_pages_trg = n_pages + else: + n_pages_trg = min(n_pages, div_int_ceil(max_results, BFABRIC_QUERY_LIMIT)) + + # NOTE: Page numbering starts at 1 + response_list = response[endpoint] + for i_page in range(2, n_pages_trg + 1): + print('-- reading page', i_page, 'of', n_pages) + response_list += self._read_method(readid, endpoint, obj, page=i_page, **kwargs)[endpoint] - # TODO: Perform pagination. Return inner values, i.e. val[endpoint]. - def readid(self, endpoint: str, obj: dict, page: int = 1, plain: bool = False, **kwargs) -> ResultContainer: - results = self.engine.readid(endpoint, obj, page=page, **kwargs) - return ResultContainer(results, self.resultType) + return ResultContainer(response_list, self.result_type, total_pages_api=n_pages) - # TODO: Perform pagination. Return inner values, i.e. val[endpoint]. def save(self, endpoint: str, obj: dict, **kwargs) -> ResultContainer: results = self.engine.save(endpoint, obj, **kwargs) - return ResultContainer(results, self.resultType) + return ResultContainer(results[endpoint], self.result_type) - # TODO: Perform pagination. Return inner values, i.e. val[endpoint]. def delete(self, endpoint: str, id: Union[List, int]) -> ResultContainer: results = self.engine.delete(endpoint, id) - return ResultContainer(results, self.resultType) + return ResultContainer(results[endpoint], self.result_type) + + + ############################ + # Multi-query functionality + ############################ + + # TODO: Is this scope sufficient? Is there ever more than one multi-query parameter, and/or not at the root of dict? + def read_multi(self, endpoint: str, obj: dict, multi_query_key: str, multi_query_vals: list, + readid: bool = False, **kwargs) -> ResultContainer: + """ + Makes a 1-parameter multi-query (there is 1 parameter that takes a list of values) + Since the API only allows BFABRIC_QUERY_LIMIT queries per page, split the list into chunks before querying + :param endpoint: endpoint + :param obj: query dictionary + :param multi_query_key: key for which the multi-query is performed + :param multi_query_vals: list of values for which the multi-query is performed + :param readid: whether to use reading by ID. Currently only available for engine=SUDS + TODO: Test the extent to which this method works. Add safeguards + :return: List of responses, packaged in the results container + + NOTE: It is assumed that there is only 1 response for each value. + """ + + response_tot = ResultContainer([], self.result_type, total_pages_api = 0) + obj_exteded = deepcopy(obj) # Make a copy of the query, not to make edits to the argument + + for i in range(0, len(multi_query_vals), BFABRIC_QUERY_LIMIT): + # Limit the multi-query parameter to an acceptable chunk size + obj_exteded[multi_query_key] = multi_query_vals[i:i + BFABRIC_QUERY_LIMIT] + + # TODO: Test what happens if there are multiple responses to each of the individual queries. + # * What would happen? + # * What would happen if total number of responses would exceed 100 now? + # * What would happen if we naively made a multi-query with more than 100 values? Would API paginate + # automatically? If yes, perhaps we don't need this method at all? + # TODO: It is assumed that a user requesting multi_query always wants all of the pages. Can anybody think of + # exceptions to this? + response_this = self.read(endpoint, obj_exteded, max_results=None, readid=readid, **kwargs) + response_tot.append(response_this) + + return response_tot + + def save_multi(self, endpoint: str, obj_lst: list, **kwargs) -> ResultContainer: + # We must account for the possibility that the number of query values exceeds the BFabric maximum, + # so we must split it into smaller chunks + + response_tot = ResultContainer([], self.result_type, total_pages_api = 0) + for i in range(0, len(obj_lst), BFABRIC_QUERY_LIMIT): + obj_list_this = obj_lst[i:i + BFABRIC_QUERY_LIMIT] + response_this = self.save(endpoint, obj_list_this, **kwargs) + response_tot.append(response_this) + + return response_tot + + def delete_multi(self, endpoint: str, id_list: list) -> ResultContainer: + response_tot = ResultContainer([], self.result_type, total_pages_api=0) + + if len(id_list) == 0: + print('Warning, empty list provided for deletion, ignoring') + return response_tot + + for i in range(0, len(id_list), BFABRIC_QUERY_LIMIT): + id_list_this = id_list[i:i + BFABRIC_QUERY_LIMIT] + response_this = self.delete(endpoint, id_list_this) + response_tot.append(response_this) + + return response_tot diff --git a/bfabric/examples/read_paginated_single.py b/bfabric/examples/read_paginated_single.py index e53025ee..78a08128 100644 --- a/bfabric/examples/read_paginated_single.py +++ b/bfabric/examples/read_paginated_single.py @@ -1,8 +1,8 @@ -from bfabric import bfabric -from bfabric.src.paginator import read +from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType -b = bfabric.Bfabric() +b = Bfabric(engine=BfabricAPIEngineType.SUDS) -responseLst = read(b, 'run', query={}) +responseClass = b.read('run', {}, max_results=None) +responseDict = responseClass.to_dict() -print(len(responseLst)) \ No newline at end of file +print(len(responseDict)) \ No newline at end of file diff --git a/bfabric/examples/suds_to_listdict.py b/bfabric/examples/suds_to_listdict.py index da903ca4..179b6277 100644 --- a/bfabric/examples/suds_to_listdict.py +++ b/bfabric/examples/suds_to_listdict.py @@ -1,14 +1,14 @@ -from bfabric import bfabric -from bfabric.src.suds_format import suds_to_json +from bfabric.bfabric2 import Bfabric +from bfabric.src.suds_format import suds_asdict_recursive from typing import List -def read_shallow(b: bfabric.Bfabric, endpoint: str, obj: dict) -> List[dict]: - response = b.read_object(endpoint=endpoint, obj=obj, plain=True) - response_dict = suds_to_json(response) +def read_shallow(b: Bfabric, endpoint: str, obj: dict) -> List[dict]: + response = b.read(endpoint, obj) + response_dict = suds_asdict_recursive(response) return response_dict[endpoint] -b = bfabric.Bfabric() +b = Bfabric() res_list_dict = read_shallow(b, 'user', {'login': 'fomins'}) print(res_list_dict) \ No newline at end of file diff --git a/bfabric/src/math_helper.py b/bfabric/src/math_helper.py new file mode 100644 index 00000000..edc9400b --- /dev/null +++ b/bfabric/src/math_helper.py @@ -0,0 +1,20 @@ + + +def div_int_ceil(n: int, d: int) -> int: + """ + :param n: Numerator + :param d: Denominator + :return: Performs integer ceiling division + Theoretically equivalent to math.ceil(n/d), but not subject to floating-point errors. + """ + q, r = divmod(n, d) + return q + bool(r) + + + +if __name__ == "__main__": + print( + div_int_ceil(120, 100), + div_int_ceil(200, 100), + div_int_ceil(245, 100) + ) \ No newline at end of file diff --git a/bfabric/src/paginator.py b/bfabric/src/paginator.py deleted file mode 100644 index 9df418d2..00000000 --- a/bfabric/src/paginator.py +++ /dev/null @@ -1,85 +0,0 @@ -from typing import Dict, Union, List, Any -from copy import deepcopy - -BFABRIC_QUERY_LIMIT = 100 - - -def read(engine, endpoint: str, query: dict = None) -> List[Dict]: - """ - Make a query to the engine. Determine the number of pages. Make calls for every page, concatenate results - :param engine: A BFabric API engine - :param endpoint: endpoint - :param query: query dictionary - :return: List of responses - """ - - - # Get the first page - response = engine.read_object(endpoint, query, plain=True) - nPages = response["numberofpages"] - - # Return empty list if nothing found - if not nPages: - return [] - - # Get results from other pages as well, if need be - # NOTE: Page numbering starts at 1 - responseLst = response[endpoint] - for iPage in range(2, nPages+1): - print('-- reading page', iPage, 'of', nPages) - - responseLst += engine.read_object(endpoint, query, page=iPage) - - return responseLst - - -# TODO: Is this scope sufficient? Is there ever more than one mutiquery parameter, and/or not at the root of dict? -def read_multi(engine, endpoint: str, query: dict, multiQueryKey: str, multiQueryVals: list) -> List[Dict]: - """ - Makes a 1-parameter multi-query (there is 1 parameter that takes a list of values) - Since the API only allows 100 queries per page, split the list into chunks before querying - :param engine: A BFabric API engine - :param endpoint: endpoint - :param multiQueryKey: key for which the multi-query is performed - :param multiQueryVals: list of values for which the multi-query is performed - :return: List of responses, concatenated over all multiquery values and all pages - - NOTE: It is assumed that there is only 1 response for each value. - TODO: Test what happens if there are multiple responses. Is read_multi even necessary? Maybe the API would - paginate by itself? - """ - - responseLst = [] - queryThis = deepcopy(query) # Make a copy of the query, not to make edits to the argument - - for i in range(0, len(multiQueryVals), BFABRIC_QUERY_LIMIT): - # Limit the multi-query parameter to an acceptable chunk size - queryThis[multiQueryKey] = multiQueryVals[i:i + BFABRIC_QUERY_LIMIT] - responseLst += read(engine, endpoint, queryThis) - - return responseLst - - -def save_multi(engine, endpoint: str, objLst: list) -> List[Dict]: - # We must account for the possibility that the number of query values exceeds the BFabric maximum, - # so we must split it into smaller chunks - - responseLst = [] - for i in range(0, len(objLst), BFABRIC_QUERY_LIMIT): - objLstThis = objLst[i:i + BFABRIC_QUERY_LIMIT] - responseLst += engine.save_object(endpoint, objLstThis) - - return responseLst - - -def delete_multi(engine, endpoint: str, idLst: list) -> List[Dict]: - if len(idLst) == 0: - print('Warning, empty list provided for deletion, ignoring') - return [] - - responseLst = [] - for i in range(0, len(idLst), BFABRIC_QUERY_LIMIT): - idLstThis = idLst[i:i + BFABRIC_QUERY_LIMIT] - responseLst += engine.delete_object(endpoint, idLstThis) - - return responseLst diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py index 4dc49fe1..7dbe41d5 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/src/result_container.py @@ -1,5 +1,10 @@ -from enum import Enum +# NOTE: This allows class type annotations inside the same class. According to +# https://stackoverflow.com/questions/44798635/how-can-i-set-the-same-type-as-class-in-methods-parameter-following-pep484 +# this should become default behaviour in one of the future versions of python. Remove this import +# once it is no longer necessary +from __future__ import annotations +from enum import Enum from bfabric.src.suds_format import suds_asdict_recursive @@ -10,12 +15,40 @@ class BfabricResultType(Enum): class ResultContainer: - def __init__(self, results: list, resultType: BfabricResultType): + def __init__(self, results: list, result_type: BfabricResultType, total_pages_api: int = None): + """ + :param results: List of BFabric query results + :param result_type: Format of each result (All must be of the same format) + :param total_pages_api: Maximal number of pages that were available for reading. + NOTE: User may have requested to cap the total number of results. Thus, it may be of interest to know + the (approximate) total number of results the API had for the query. The total number of results is + somewhere between max_pages * (BFABRIC_QUERY_LIMIT - 1) and max_pages * BFABRIC_QUERY_LIMIT + """ self.results = results - self.resultType = resultType + self.result_type = result_type + self.total_pages_api = total_pages_api + + def append(self, other: ResultContainer) -> None: + """ + Can merge results of two queries. This can happen if the engine splits a complicated query in two + :param other: The other query results that should be appended to this + :return: + """ + + if self.result_type != other.result_type: + raise ValueError("Attempting to merge results of two different types", self.result_type, other.result_type) + + self.results += other.results + if (self.total_pages_api is not None) and (other.total_pages_api is not None): + self.total_pages_api += other.total_pages_api + else: + self.total_pages_api = None + + def total_pages_api(self): + return self.total_pages_api def to_dict(self): - match self.resultType: + match self.result_type: case BfabricResultType.LISTDICT: return self.results case BfabricResultType.LISTSUDS: @@ -23,4 +56,4 @@ def to_dict(self): case BfabricResultType.LISTZEEP: return self.results # TODO: Implement me case _: - raise ValueError("Unexpected results type", self.resultType) + raise ValueError("Unexpected results type", self.result_type) diff --git a/bfabric/src/suds_format.py b/bfabric/src/suds_format.py index 290b5c6d..672d36fe 100644 --- a/bfabric/src/suds_format.py +++ b/bfabric/src/suds_format.py @@ -17,7 +17,7 @@ def convert_suds_type(item: Any) -> Union[int, str]: return item -def suds_asdict_recursive(d, convert_types: bool) -> dict: +def suds_asdict_recursive(d, convert_types: bool = False) -> dict: """Convert Suds object into serializable format. https://stackoverflow.com/a/15678861 :param d: The input suds object @@ -27,12 +27,12 @@ def suds_asdict_recursive(d, convert_types: bool) -> dict: out = {} for k, v in asdict(d).items(): if hasattr(v, '__keylist__'): - out[k] = suds_asdict_recursive(v, convert_types) + out[k] = suds_asdict_recursive(v, convert_types=convert_types) elif isinstance(v, list): out[k] = [] for item in v: if hasattr(item, '__keylist__'): - out[k].append(suds_asdict_recursive(item, convert_types)) + out[k].append(suds_asdict_recursive(item, convert_types=convert_types)) else: out[k].append(convert_suds_type(item) if convert_types else item) else: From 8b7d61ef52b142ca4607cc5cee55cfb248a8a187 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Thu, 11 Apr 2024 16:22:48 +0200 Subject: [PATCH 017/129] using auth class, minor bug in engines, added __repr__ etc to ResourceContainer --- bfabric/bfabric2.py | 39 +++++++++++------------ bfabric/examples/read_paginated_single.py | 16 +++++++--- bfabric/src/engine_suds.py | 2 +- bfabric/src/engine_zeep.py | 6 ++-- bfabric/src/result_container.py | 17 ++++++++-- 5 files changed, 48 insertions(+), 32 deletions(-) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index b15708ae..cc3f7927 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -23,23 +23,7 @@ The python3 library first appeared in 2014. """ - - -# TODO: Move login checks to Auth -# if login is None: -# login = self.config.login -# -# if password is None: -# password = self.config.password -# -# if len(login) >= 32: -# raise ValueError("Sorry, login >= 32 characters.") -# -# if len(password) != 32: -# raise ValueError("Sorry, password != 32 characters.") - - - +import os from enum import Enum from copy import deepcopy from typing import Union, List, Optional @@ -48,6 +32,7 @@ from bfabric.src.engine_suds import EngineSUDS from bfabric.src.engine_zeep import EngineZeep from bfabric.src.result_container import ResultContainer, BfabricResultType +from bfabric.bfabric_config import BfabricAuth, BfabricConfig, parse_bfabricrc_py class BfabricAPIEngineType(Enum): SUDS = 1 @@ -57,6 +42,17 @@ class BfabricAPIEngineType(Enum): BFABRIC_QUERY_LIMIT = 100 +def get_system_auth(): + path_bfabricrc = os.path.normpath(os.path.expanduser("~/.bfabricrc.py")) + if not os.path.isfile(path_bfabricrc): + raise IOError("Config file not found:", path_bfabricrc) + + with open(path_bfabricrc, "r", encoding="utf-8") as file: + config, auth = parse_bfabricrc_py(file) + + return config, auth + + # TODO: What does idonly do for SUDS? Does it make sense for Zeep? # TODO: What does includedeletableupdateable do for Zeep? Does it make sense for Suds? # TODO: How to deal with save-skip fields in Zeep? Does it happen in SUDS? @@ -65,23 +61,24 @@ class Bfabric(object): Implements read and save object methods for B-Fabric wsdl interface """ - def __init__(self, auth_class, config_class, engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, - verbose: bool = False): + def __init__(self, config: BfabricConfig, auth: BfabricAuth, + engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, verbose: bool = False): self.verbose = verbose self.query_counter = 0 if engine == BfabricAPIEngineType.SUDS: - self.engine = EngineSUDS(auth_class.login(), auth_class.password(), config_class.webbase()) + self.engine = EngineSUDS(auth.login, auth.password, config.base_url) self.result_type = BfabricResultType.LISTSUDS elif engine == BfabricAPIEngineType.ZEEP: - self.engine = EngineZeep(auth_class.login(), auth_class.password(), config_class.webbase()) + self.engine = EngineZeep(auth.login, auth.password, config.base_url) self.result_type = BfabricResultType.LISTZEEP else: raise ValueError("Unexpected engine", BfabricAPIEngineType) def _read_method(self, readid: bool, endpoint: str, obj: dict, page: int = 1, **kwargs): if readid: + # https://fgcz-bfabric.uzh.ch/wiki/tiki-index.php?page=endpoint.workunit#Web_Method_readid_ return self.engine.readid(endpoint, obj, page=page, **kwargs) else: return self.engine.read(endpoint, obj, page=page, **kwargs) diff --git a/bfabric/examples/read_paginated_single.py b/bfabric/examples/read_paginated_single.py index 78a08128..6f328297 100644 --- a/bfabric/examples/read_paginated_single.py +++ b/bfabric/examples/read_paginated_single.py @@ -1,8 +1,14 @@ -from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType +from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth +from bfabric.src.pandas_helper import list_dict_to_df -b = Bfabric(engine=BfabricAPIEngineType.SUDS) -responseClass = b.read('run', {}, max_results=None) -responseDict = responseClass.to_dict() +config, auth = get_system_auth() -print(len(responseDict)) \ No newline at end of file +# b = Bfabric(config, auth, engine=BfabricAPIEngineType.SUDS) +b = Bfabric(config, auth, engine=BfabricAPIEngineType.ZEEP) + +responseClass = b.read('user', {}, max_results=300) +responseDict = responseClass.to_list_dict() +responseDF = list_dict_to_df(responseDict) + +print(responseDF) \ No newline at end of file diff --git a/bfabric/src/engine_suds.py b/bfabric/src/engine_suds.py index 12ff4b8f..74d36e95 100644 --- a/bfabric/src/engine_suds.py +++ b/bfabric/src/engine_suds.py @@ -17,7 +17,7 @@ def _get_client(self, endpoint: str): if not endpoint in self.cl: wsdl = "".join((self.webbase, '/', endpoint, "?wsdl")) self.cl[endpoint] = Client(wsdl, cache=None) - return self.cl[endpoint] + return self.cl[endpoint] except Exception as e: print(e) raise diff --git a/bfabric/src/engine_zeep.py b/bfabric/src/engine_zeep.py index d63be9f3..6376df83 100644 --- a/bfabric/src/engine_zeep.py +++ b/bfabric/src/engine_zeep.py @@ -34,7 +34,7 @@ def _get_client(self, endpoint: str): if not endpoint in self.cl: wsdl = "".join((self.webbase, '/', endpoint, "?wsdl")) self.cl[endpoint] = zeep.Client(wsdl) - return self.cl[endpoint] + return self.cl[endpoint] except Exception as e: print(e) raise @@ -44,11 +44,11 @@ def read(self, endpoint: str, obj: dict, page: int = 1, includedeletableupdateab if includedeletableupdateable: query['includedeletableupdateable'] = True - query = dict(login=self.login, page=page, password=self.password, query=query) + fullQuery = dict(login=self.login, page=page, password=self.password, query=query) client = self._get_client(endpoint) with client.settings(strict=False): - return client.service.read(query) + return client.service.read(fullQuery) def readid(self, endpoint: str, obj: dict, page: int = 1, includedeletableupdateable: bool = True): raise NotImplementedError("Attempted to use a method `readid` of Zeep, which does not exist") diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py index 7dbe41d5..de017c75 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/src/result_container.py @@ -6,6 +6,7 @@ from enum import Enum from bfabric.src.suds_format import suds_asdict_recursive +from zeep.helpers import serialize_object class BfabricResultType(Enum): @@ -28,6 +29,18 @@ def __init__(self, results: list, result_type: BfabricResultType, total_pages_ap self.result_type = result_type self.total_pages_api = total_pages_api + def __getitem__(self, idx: int): + return self.results[idx] + + def __repr__(self): + return self.__str__() + + def __str__(self): + return str(self.to_list_dict()) + + def __len__(self): + return len(self.results) + def append(self, other: ResultContainer) -> None: """ Can merge results of two queries. This can happen if the engine splits a complicated query in two @@ -47,13 +60,13 @@ def append(self, other: ResultContainer) -> None: def total_pages_api(self): return self.total_pages_api - def to_dict(self): + def to_list_dict(self): match self.result_type: case BfabricResultType.LISTDICT: return self.results case BfabricResultType.LISTSUDS: return [suds_asdict_recursive(v) for v in self.results] case BfabricResultType.LISTZEEP: - return self.results # TODO: Implement me + return [dict(serialize_object(v)) for v in self.results] case _: raise ValueError("Unexpected results type", self.result_type) From f3fec555479a07df5a0fbb0d09293516eccd5019 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Fri, 12 Apr 2024 17:07:11 +0200 Subject: [PATCH 018/129] minor naming, progress on testing of consistency between Zeep and SUDS --- bfabric/bfabric.py | 14 +- bfabric/examples/compare_raw_xml_zeep_suds.py | 137 ++++++++++++++++++ bfabric/examples/read_paginated_single.py | 51 ++++++- bfabric/src/engine_suds.py | 11 +- bfabric/src/engine_zeep.py | 16 +- bfabric/src/iter_helper.py | 73 ++++++++++ bfabric/src/result_container.py | 2 +- 7 files changed, 279 insertions(+), 25 deletions(-) create mode 100644 bfabric/examples/compare_raw_xml_zeep_suds.py create mode 100644 bfabric/src/iter_helper.py diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index ad03121d..1f2ef850 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -50,13 +50,13 @@ 'formatter': 'verbose', }, }, - 'loggers': { - 'zeep.transports': { - 'level': 'DEBUG', - 'propagate': True, - 'handlers': ['console'], - }, - } + # 'loggers': { + # 'zeep.transports': { + # 'level': 'DEBUG', + # 'propagate': True, + # 'handlers': ['console'], + # }, + # } }) import bfabric.gridengine as gridengine diff --git a/bfabric/examples/compare_raw_xml_zeep_suds.py b/bfabric/examples/compare_raw_xml_zeep_suds.py new file mode 100644 index 00000000..f4ceef6d --- /dev/null +++ b/bfabric/examples/compare_raw_xml_zeep_suds.py @@ -0,0 +1,137 @@ +import sys +from collections import OrderedDict +from lxml import etree +from pprint import pprint + +import zeep +import suds + +from bfabric.bfabric2 import get_system_auth +from bfabric.src.suds_format import suds_asdict_recursive +from bfabric.src.iter_helper import drop_empty_response_elements, map_response_element_keys + + +def read_zeep(wsdl, fullQuery, raw=True): + client = zeep.Client(wsdl) + with client.settings(strict=False, raw_response=raw): + ret = client.service.read(fullQuery) + if raw: + return ret.content + else: + return dict(zeep.helpers.serialize_object(ret, target_cls=dict)) + +def read_suds(wsdl, fullQuery, raw=True): + client = suds.client.Client(wsdl, cache=None, retxml=raw) + ret = client.service.read(fullQuery) + if raw: + return ret + else: + return suds_asdict_recursive(ret, convert_types=True) + + +config, auth = get_system_auth() + +endpoint = 'user' +wsdl = "".join((config.base_url, '/', endpoint, "?wsdl")) + +fullQuery = { + 'login': auth.login, + 'password': auth.password, + 'query': {'includedeletableupdateable': False} + # 'query': {'id': 9026, 'includedeletableupdateable': False} +} + +###################### +# Raw XML comparison +###################### + +# retZeepDict = read_zeep(wsdl, fullQuery, raw=False) +# retSudsDict = read_suds(wsdl, fullQuery, raw=False) +# retZeep = read_zeep(wsdl, fullQuery, raw=True) +# retSuds = read_suds(wsdl, fullQuery, raw=True) +# print(retZeep) +# print(retSuds) + +# print(len(retZeep)) +# print(len(retSuds)) +# print(retZeep == retSuds) + +# print(retZeep) +# print('coachedorder' in str(retZeep)) + +# root = etree.fromstring(retZeep) +# print(etree.tostring(root, pretty_print=True).decode()) +# pprint(retZeepDict['user'][0]['order']) + + +###################### +# Parsed dict comparison +###################### + +# Find the set of all basic types used in the nested container (made of dicts, ordered dicts and lists) +def recursive_get_types(generic_container) -> set: + if isinstance(generic_container, (dict, OrderedDict)): + type_sets_lst = [recursive_get_types(v) for k, v in generic_container.items()] + return set().union(*type_sets_lst) + elif isinstance(generic_container, list): + type_sets_lst = [recursive_get_types(el) for el in generic_container] + return set().union(*type_sets_lst) + else: + return {type(generic_container)} + + +# Compare two dictionaries/lists recursively. Print every time there is a discrepancy +def recursive_comparison(generic_container1, generic_container2, prefix: list): + if type(generic_container1) != type(generic_container2): + print(prefix, "type", type(generic_container1), "!=", type(generic_container2)) + return + if isinstance(generic_container1, dict): + allKeys = set(list(generic_container1.keys()) + list(generic_container2.keys())) + for k in allKeys: + if k not in generic_container1: + print(prefix, "Not in 1: ", k, '=', generic_container2[k]) + print("- 1:", generic_container1) + print("- 2:", generic_container2) + elif k not in generic_container2: + print(prefix, "Not in 2: ", k, '=', generic_container1[k]) + else: + recursive_comparison(generic_container1[k], generic_container2[k], prefix + [k]) + elif isinstance(generic_container1, list): + if len(generic_container1) != len(generic_container2): + print(prefix, "length", len(generic_container1), '!=', len(generic_container2)) + else: + for i, (el1, el2) in enumerate(zip(generic_container1, generic_container2)): + recursive_comparison(el1, el2, prefix + [i]) + else: + if generic_container1 != generic_container2: + print(prefix, "value", generic_container1, "!=", generic_container2) + +retZeep = read_zeep(wsdl, fullQuery, raw=False) +retSuds = read_suds(wsdl, fullQuery, raw=False) + +# print("Zeep", retZeep['user'][0]['project'][0]) +# print("Suds", retSuds['user'][0]['project'][0]) + +# print("Zeep", retZeep['user'][0]) +# print("Suds", retSuds['user'][0]) + + +drop_empty_response_elements(retZeep, inplace=True) +drop_empty_response_elements(retSuds, inplace=True) +map_response_element_keys(retSuds, {'_id': 'id', '_classname': 'classname', '_projectid': 'projectid'}, inplace=True) + + +# print(len(retZeep)) +# print(retZeep) +# print(retSuds) + +# print(recursive_get_types(retZeep)) + +from contextlib import redirect_stdout + +# with open('compare_result.txt', 'w') as f: +# with redirect_stdout(f): +# recursive_comparison(retZeep, retSuds, prefix = []) +recursive_comparison(retZeep, retSuds, prefix = []) + +# print(retSuds) \ No newline at end of file diff --git a/bfabric/examples/read_paginated_single.py b/bfabric/examples/read_paginated_single.py index 6f328297..683388d6 100644 --- a/bfabric/examples/read_paginated_single.py +++ b/bfabric/examples/read_paginated_single.py @@ -1,14 +1,53 @@ +import pandas as pd + from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth from bfabric.src.pandas_helper import list_dict_to_df +''' +Alpha-version of a system test, which would eventually make +basic queries to both Zeep and SUDS and test that the parsed responses match +''' + config, auth = get_system_auth() -# b = Bfabric(config, auth, engine=BfabricAPIEngineType.SUDS) -b = Bfabric(config, auth, engine=BfabricAPIEngineType.ZEEP) +def calc_query(engine): + print("Sending query via", engine) + b = Bfabric(config, auth, engine=engine) + + responseClass = b.read('user', {}, max_results=300, idonly=False, includedeletableupdateable=True) + responseDict = responseClass.to_list_dict() + return list_dict_to_df(responseDict) + +def set_partition(a, b) -> None: + aSet = set(a) + bSet = set(b) + print("Shared:", aSet.intersection(bSet)) + print("Unique(1):", aSet - bSet) + print("Unique(2):", bSet - aSet) + +# respDFSuds = calc_query(BfabricAPIEngineType.SUDS) +# respDFZeep = calc_query(BfabricAPIEngineType.ZEEP) +# +# respDFSuds.to_csv("tmp_suds.csv") +# respDFZeep.to_csv("tmp_zeep.csv") +respDFSuds = pd.read_csv("tmp_suds.csv") +respDFZeep = pd.read_csv("tmp_zeep.csv") + +# Rename suds to remove underscores +respDFSuds.rename(columns={"_id": "id", "_classname": "classname"}, inplace=True) + +sudsCols = list(sorted(respDFSuds.columns)) +zeepCols = list(sorted(respDFZeep.columns)) + +set_partition(sudsCols, zeepCols) + +for colName in sudsCols: + if not respDFSuds[colName].equals(respDFZeep[colName]): + print('-------', colName, '-------') + # print('Suds', list(respDFSuds[colName])) + # print('Zeep', list(respDFZeep[colName])) -responseClass = b.read('user', {}, max_results=300) -responseDict = responseClass.to_list_dict() -responseDF = list_dict_to_df(responseDict) -print(responseDF) \ No newline at end of file +print(respDFSuds['order']) +print(respDFZeep['order']) \ No newline at end of file diff --git a/bfabric/src/engine_suds.py b/bfabric/src/engine_suds.py index 74d36e95..7e3e6c2c 100644 --- a/bfabric/src/engine_suds.py +++ b/bfabric/src/engine_suds.py @@ -1,4 +1,5 @@ from typing import Union, List +import copy from suds.client import Client @@ -22,18 +23,22 @@ def _get_client(self, endpoint: str): print(e) raise - def read(self, endpoint: str, obj: dict, page: int = 1, idonly: bool = False): + def read(self, endpoint: str, obj: dict, page: int = 1, idonly: bool = False, + includedeletableupdateable: bool = False): """ A generic method which can connect to any endpoint, e.g., workunit, project, order, externaljob, etc, and returns the object with the requested id. obj is a python dictionary which contains all the attributes of the endpoint for the "query". """ + query = copy.deepcopy(obj) + query['includedeletableupdateable'] = includedeletableupdateable - query = dict(login=self.login, page=page, password=self.password, query=obj, idonly=idonly) + full_query = dict(login=self.login, page=page, password=self.password, query=query, + idonly=idonly) client = self._get_client(endpoint) - return client.service.read(query) + return client.service.read(full_query) # TODO: How is client.service.readid different from client.service.read. Do we need this method? def readid(self, endpoint: str, obj: dict, page: int = 1): diff --git a/bfabric/src/engine_zeep.py b/bfabric/src/engine_zeep.py index 6376df83..279a3eb0 100644 --- a/bfabric/src/engine_zeep.py +++ b/bfabric/src/engine_zeep.py @@ -5,17 +5,17 @@ # TODO: Check if this is a bug of BFabric or Zeep. Specifically, see if the same call to bFabricPy has the same bug -def _zeep_query_append_skipped(query: dict, skippedKeys: list) -> dict: +def _zeep_query_append_skipped(query: dict, skipped_keys: list) -> dict: """ This function is used to fix a buggy behaviour of Zeep/BFabric. Specifically, Zeep does not return correct query results if some of the optional parameters are not mentioned in the query. :param query: Original query - :param skippedKeys: Optional keys to skip + :param skipped_keys: Optional keys to skip :return: Adds optional keys to query as skipped values. """ queryThis = query.copy() - for key in skippedKeys: + for key in skipped_keys: queryThis[key] = zeep.xsd.SkipValue return queryThis @@ -39,16 +39,16 @@ def _get_client(self, endpoint: str): print(e) raise - def read(self, endpoint: str, obj: dict, page: int = 1, includedeletableupdateable: bool = True): + def read(self, endpoint: str, obj: dict, page: int = 1, idonly: bool = False, + includedeletableupdateable: bool = False): query = copy.deepcopy(obj) - if includedeletableupdateable: - query['includedeletableupdateable'] = True + query['includedeletableupdateable'] = includedeletableupdateable - fullQuery = dict(login=self.login, page=page, password=self.password, query=query) + full_query = dict(login=self.login, page=page, password=self.password, query=query, idonly=idonly) client = self._get_client(endpoint) with client.settings(strict=False): - return client.service.read(fullQuery) + return client.service.read(full_query) def readid(self, endpoint: str, obj: dict, page: int = 1, includedeletableupdateable: bool = True): raise NotImplementedError("Attempted to use a method `readid` of Zeep, which does not exist") diff --git a/bfabric/src/iter_helper.py b/bfabric/src/iter_helper.py new file mode 100644 index 00000000..ff6b7297 --- /dev/null +++ b/bfabric/src/iter_helper.py @@ -0,0 +1,73 @@ +from typing import Optional, Union +from copy import deepcopy + + +def _recursive_drop_empty(response_elem: Union[list, dict]) -> None: + """ + Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary value is encountered, that is + either an empty list or None, the key-value pair gets deleted from the dictionary + :param response_elem: One of the sub-objects in the hierarchical storage of the response. Initially the root + :return: Nothing + """ + if isinstance(response_elem, list): + for el in response_elem: + _recursive_drop_empty(el) + elif isinstance(response_elem, dict): + keys_to_delete = [] # NOTE: Avoid deleting keys inside iterator, may break iterator + for k, v in response_elem.items(): + if (v is None) or (isinstance(v, list) and len(v) == 0): + keys_to_delete += [k] + else: + _recursive_drop_empty(v) + for k in keys_to_delete: + del response_elem[k] + +def drop_empty_response_elements(response: Union[list, dict], inplace: bool = True) -> Optional[Union[list, dict]]: + """ + Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary value is encountered, that is + either an empty list or None, the key-value pair gets deleted from the dictionary + :param response: A parsed query response, consisting of nested lists, dicts and basic types (int, str) + :param inplace: If true, will return nothing and edit the argument. Otherwise, will preserve the argument + and return an edited copy + :return: Nothing, or an edited response, depending on `inplace` + """ + response_filtered = deepcopy(response) if not inplace else response + _recursive_drop_empty(response_filtered) + return response_filtered + +def _recursive_map_keys(response_elem, keymap: dict) -> None: + """ + Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary key is found for which + the mapping is requested, that the key is renamed to the corresponding mapped one + :param response_elem: One of the sub-objects in the hierarchical storage of the response. Initially the root + :param keymap: A map containing key names that should be renamed, and values - the new names. + :return: Nothing + """ + if isinstance(response_elem, list): + for el in response_elem: + _recursive_map_keys(el, keymap) + elif isinstance(response_elem, dict): + keys_to_delete = [] # NOTE: Avoid deleting keys inside iterator, may break iterator + for k, v in response_elem.items(): + _recursive_map_keys(v, keymap) + if k in keymap.keys(): + keys_to_delete += [k] + + for k in keys_to_delete: + response_elem[keymap[k]] = response_elem[k] # Copy old value to the new key + del response_elem[k] # Delete old key + +def map_response_element_keys(response: Union[list, dict], keymap: dict, + inplace: bool = True) -> Union[list, dict]: + """ + Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary key is found for which + the mapping is requested, that the key is renamed to the corresponding mapped one + :param response: A parsed query response, consisting of nested lists, dicts and basic types (int, str) + :param keymap: A map containing key names that should be renamed, and values - the new names. + :param inplace: If true, will return nothing and edit the argument. Otherwise, will preserve the argument + and return an edited copy + :return: The edited response (original or copy, depending on inplace) + """ + response_filtered = deepcopy(response) if not inplace else response + _recursive_map_keys(response_filtered, keymap) + return response_filtered diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py index de017c75..b480a121 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/src/result_container.py @@ -65,7 +65,7 @@ def to_list_dict(self): case BfabricResultType.LISTDICT: return self.results case BfabricResultType.LISTSUDS: - return [suds_asdict_recursive(v) for v in self.results] + return [suds_asdict_recursive(v, convert_types=True) for v in self.results] case BfabricResultType.LISTZEEP: return [dict(serialize_object(v)) for v in self.results] case _: From d6c8cccafc6dbbca1a323f9a75f6341ca8c853c4 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Tue, 16 Apr 2024 16:01:39 +0200 Subject: [PATCH 019/129] rewrote test script in a more readable way --- bfabric/examples/compare_raw_xml_zeep_suds.py | 144 ++++++++++++------ 1 file changed, 100 insertions(+), 44 deletions(-) diff --git a/bfabric/examples/compare_raw_xml_zeep_suds.py b/bfabric/examples/compare_raw_xml_zeep_suds.py index f4ceef6d..9f498642 100644 --- a/bfabric/examples/compare_raw_xml_zeep_suds.py +++ b/bfabric/examples/compare_raw_xml_zeep_suds.py @@ -1,7 +1,9 @@ import sys from collections import OrderedDict +from copy import deepcopy from lxml import etree from pprint import pprint +from contextlib import redirect_stdout import zeep import suds @@ -10,6 +12,23 @@ from bfabric.src.suds_format import suds_asdict_recursive from bfabric.src.iter_helper import drop_empty_response_elements, map_response_element_keys +''' +This file is intended to eventually become a test to compare that Zeep and SUDS produce +the same or at least comparable output for the same requests. Important features +* Test if raw XML matches +* Test if parsed response (asdict) matches +For both, it is important to test +* different endpoints (user, run, ...) +* single match queries (e.g. {id=5}) vs multi match queries (e.g. {}) + +Observations: +* SUDS produces underscores in front of 'id', 'projectid' and 'classname'. Reasons currently unknown, may also affect + other keywords. Currently, we remove underscores by explicitly providing keywords which to purge +* ZEEP does not match XML + - Zeep generates additional keywords not present in XML, all of them have values None or empty list + - Zeep misses some important keywords like 'id' and 'projectid' inside of nested XML, such as user->project. This + behaviour is inconsistent, and only affects a fraction of users. +''' def read_zeep(wsdl, fullQuery, raw=True): client = zeep.Client(wsdl) @@ -28,36 +47,39 @@ def read_suds(wsdl, fullQuery, raw=True): else: return suds_asdict_recursive(ret, convert_types=True) +def full_query(auth, query: dict, includedeletableupdateable: bool = False) -> dict: + thisQuery = deepcopy(query) + thisQuery['includedeletableupdateable'] = includedeletableupdateable -config, auth = get_system_auth() + return { + 'login': auth.login, + 'password': auth.password, + 'query': thisQuery + } -endpoint = 'user' -wsdl = "".join((config.base_url, '/', endpoint, "?wsdl")) +def calc_both(auth, config, endpoint: str, query: dict, raw: bool = True): + wsdl = "".join((config.base_url, '/', endpoint, "?wsdl")) + fullQuery = full_query(auth, query) + retZeep = read_zeep(wsdl, fullQuery, raw=raw) + retSuds = read_suds(wsdl, fullQuery, raw=raw) + return retZeep, retSuds -fullQuery = { - 'login': auth.login, - 'password': auth.password, - 'query': {'includedeletableupdateable': False} - # 'query': {'id': 9026, 'includedeletableupdateable': False} -} ###################### -# Raw XML comparison +# Raw XML tests ###################### -# retZeepDict = read_zeep(wsdl, fullQuery, raw=False) -# retSudsDict = read_suds(wsdl, fullQuery, raw=False) -# retZeep = read_zeep(wsdl, fullQuery, raw=True) -# retSuds = read_suds(wsdl, fullQuery, raw=True) -# print(retZeep) -# print(retSuds) +def raw_test(auth, config, endpoint, query): + print("Testing raw XML match for", endpoint, query) + retZeep, retSuds = calc_both(auth, config, endpoint, query, raw=True) + assert len(retZeep) == len(retSuds) + assert retZeep == retSuds + print('-- passed --') -# print(len(retZeep)) -# print(len(retSuds)) -# print(retZeep == retSuds) -# print(retZeep) -# print('coachedorder' in str(retZeep)) +config, auth = get_system_auth() +# raw_test(auth, config, 'user', {'id': 9026}) +# raw_test(auth, config, 'user', {}) # root = etree.fromstring(retZeep) # print(etree.tostring(root, pretty_print=True).decode()) @@ -80,11 +102,26 @@ def recursive_get_types(generic_container) -> set: return {type(generic_container)} +def basic_data_type_match_test(auth, config, endpoint, query): + print("Testing data types for", endpoint, query) + retZeepDict, retSudsDict = calc_both(auth, config, endpoint, query, raw=False) + typesZeep = recursive_get_types(retZeepDict) + typesSuds = recursive_get_types(retZeepDict) + print('Zeep', typesZeep) + print('Suds', typesSuds) + + +# basic_data_type_match_test(auth, config, 'user', {'id': 9026}) +# basic_data_type_match_test(auth, config, 'user', {}) + + # Compare two dictionaries/lists recursively. Print every time there is a discrepancy -def recursive_comparison(generic_container1, generic_container2, prefix: list): +def recursive_comparison(generic_container1, generic_container2, prefix: list) -> bool: + matched = True + if type(generic_container1) != type(generic_container2): print(prefix, "type", type(generic_container1), "!=", type(generic_container2)) - return + return False if isinstance(generic_container1, dict): allKeys = set(list(generic_container1.keys()) + list(generic_container2.keys())) for k in allKeys: @@ -92,46 +129,65 @@ def recursive_comparison(generic_container1, generic_container2, prefix: list): print(prefix, "Not in 1: ", k, '=', generic_container2[k]) print("- 1:", generic_container1) print("- 2:", generic_container2) + matched = False elif k not in generic_container2: print(prefix, "Not in 2: ", k, '=', generic_container1[k]) + matched = False else: - recursive_comparison(generic_container1[k], generic_container2[k], prefix + [k]) + matched_recursive = recursive_comparison(generic_container1[k], generic_container2[k], prefix + [k]) + matched = matched and matched_recursive elif isinstance(generic_container1, list): if len(generic_container1) != len(generic_container2): print(prefix, "length", len(generic_container1), '!=', len(generic_container2)) + matched = False else: for i, (el1, el2) in enumerate(zip(generic_container1, generic_container2)): - recursive_comparison(el1, el2, prefix + [i]) + matched_recursive = recursive_comparison(el1, el2, prefix + [i]) + matched = matched and matched_recursive else: if generic_container1 != generic_container2: print(prefix, "value", generic_container1, "!=", generic_container2) + matched = False -retZeep = read_zeep(wsdl, fullQuery, raw=False) -retSuds = read_suds(wsdl, fullQuery, raw=False) + return matched -# print("Zeep", retZeep['user'][0]['project'][0]) -# print("Suds", retSuds['user'][0]['project'][0]) +def parsed_data_match_test(auth, config, endpoint, query, drop_empty: bool = True, + drop_underscores_suds: bool = True, log_file_path: str = None): + print("Testing parsed data match for", endpoint, query) + retZeepDict, retSudsDict = calc_both(auth, config, endpoint, query, raw=False) -# print("Zeep", retZeep['user'][0]) -# print("Suds", retSuds['user'][0]) + if drop_empty: + drop_empty_response_elements(retZeepDict, inplace=True) + drop_empty_response_elements(retSudsDict, inplace=True) + if drop_underscores_suds: + map_response_element_keys(retSudsDict, {'_id': 'id', '_classname': 'classname', '_projectid': 'projectid'}, + inplace=True) -drop_empty_response_elements(retZeep, inplace=True) -drop_empty_response_elements(retSuds, inplace=True) -map_response_element_keys(retSuds, {'_id': 'id', '_classname': 'classname', '_projectid': 'projectid'}, inplace=True) + if log_file_path is not None: + with open(log_file_path, 'w') as f: + with redirect_stdout(f): + recursive_comparison(retZeepDict, retSudsDict, prefix = []) + else: + matched = recursive_comparison(retZeepDict, retSudsDict, prefix=[]) + if matched: + print("-- passed --") + else: + print("-- failed --") -# print(len(retZeep)) -# print(retZeep) -# print(retSuds) -# print(recursive_get_types(retZeep)) +# parsed_data_match_test(auth, config, 'user', {'id': 9026}, drop_empty=True, drop_underscores_suds=True, +# log_file_path=None) +# +# parsed_data_match_test(auth, config, 'user', {}, drop_empty=True, drop_underscores_suds=True, +# log_file_path=None) -from contextlib import redirect_stdout +parsed_data_match_test(auth, config, 'run', {}, drop_empty=True, drop_underscores_suds=True, + log_file_path=None) -# with open('compare_result.txt', 'w') as f: -# with redirect_stdout(f): -# recursive_comparison(retZeep, retSuds, prefix = []) -recursive_comparison(retZeep, retSuds, prefix = []) +# print("Zeep", retZeep['user'][0]['project'][0]) +# print("Suds", retSuds['user'][0]['project'][0]) -# print(retSuds) \ No newline at end of file +# print("Zeep", retZeep['user'][0]) +# print("Suds", retSuds['user'][0]) \ No newline at end of file From 6d38069ceb69dd576070743c682709494a3bbb17 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Wed, 17 Apr 2024 14:43:12 +0200 Subject: [PATCH 020/129] reformulated comparison examples closer to test-like, added comments, added dict-sorting by key feature for easier comparison of Zeep and Suds --- .../examples/compare_zeep_suds_pagination.py | 91 +++++++++++++++++++ ...eep_suds.py => compare_zeep_suds_query.py} | 8 +- bfabric/examples/read_paginated_single.py | 53 ----------- bfabric/examples/suds_to_listdict.py | 14 --- bfabric/examples/zeep_debug.py | 68 ++++++++++++++ bfabric/src/dict_helper.py | 7 ++ bfabric/src/iter_helper.py | 34 +++++++ bfabric/src/result_container.py | 42 ++++++++- 8 files changed, 241 insertions(+), 76 deletions(-) create mode 100644 bfabric/examples/compare_zeep_suds_pagination.py rename bfabric/examples/{compare_raw_xml_zeep_suds.py => compare_zeep_suds_query.py} (97%) delete mode 100644 bfabric/examples/read_paginated_single.py delete mode 100644 bfabric/examples/suds_to_listdict.py create mode 100644 bfabric/examples/zeep_debug.py create mode 100644 bfabric/src/dict_helper.py diff --git a/bfabric/examples/compare_zeep_suds_pagination.py b/bfabric/examples/compare_zeep_suds_pagination.py new file mode 100644 index 00000000..88332b6a --- /dev/null +++ b/bfabric/examples/compare_zeep_suds_pagination.py @@ -0,0 +1,91 @@ +import os +import pandas as pd + +from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth +from bfabric.src.pandas_helper import list_dict_to_df + +''' +This will eventually become a test that will compare Zeep and Suds paginated output +Strategy: +1. Make a query for 300 entries from user for both Zeep and Suds +2. Concatenate all entries into a dataframe, transforming all hierarchical non-basic types to a string +3. For all practical purposes, the resulting dataframes should be the same + +Observations: +* There are mismatches in the fields of "project" and "formerproject", where about half of projects are not + correctly parsed by Zeep. +''' + + +def report_test_result(rez: bool, prefix: str): + if rez: + print("--", prefix, "test passed --") + else: + print("--", prefix, "test failed --") + +def _calc_query(config, auth, engine, endpoint): + print("Sending query via", engine) + b = Bfabric(config, auth, engine=engine) + + response_class = b.read(endpoint, {}, max_results=300, idonly=False, includedeletableupdateable=True) + response_dict = response_class.to_list_dict(drop_empty=True, drop_underscores_suds=True, + have_sort_responses=True) + return list_dict_to_df(response_dict) + +def _set_partition_test(a, b) -> bool: + aSet = set(a) + bSet = set(b) + shared = aSet.intersection(bSet) + unique1 = aSet - bSet + unique2 = bSet - aSet + + print("Shared:", shared) + print("Unique(1):", unique1) + print("Unique(2):", unique2) + + # Test passes if there are no entities unique to only one of the sets + return (len(unique1) == 0) and (len(unique2) == 0) + +def dataframe_pagination_test(config, auth, endpoint, use_cached: bool = False, store_cached: bool = True): + pwd_zeep = "tmp_zeep_" + endpoint + ".csv" + pwd_suds = "tmp_suds_" + endpoint + ".csv" + + if use_cached and os.path.isfile(pwd_zeep) and os.path.isfile(pwd_suds): + print("Reading cached dataframes for", endpoint) + resp_df_suds = pd.read_csv(pwd_zeep) + resp_df_zeep = pd.read_csv(pwd_suds) + else: + print("Running queries from scratch for", endpoint) + resp_df_suds = _calc_query(config, auth, BfabricAPIEngineType.SUDS, endpoint) + resp_df_zeep = _calc_query(config, auth, BfabricAPIEngineType.ZEEP, endpoint) + if store_cached: + resp_df_suds.to_csv(pwd_zeep) + resp_df_zeep.to_csv(pwd_suds) + + # Rename suds to remove underscores + resp_df_suds.rename(columns={"_id": "id", "_classname": "classname"}, inplace=True) + + suds_cols = list(sorted(resp_df_suds.columns)) + zeep_cols = list(sorted(resp_df_zeep.columns)) + + # Report + set_test_result = _set_partition_test(suds_cols, zeep_cols) + report_test_result(set_test_result, "set") + if not set_test_result: + return False + + match_test_result = True + for col_name in suds_cols: + if not resp_df_suds[col_name].equals(resp_df_zeep[col_name]): + print('------- Mismatch in: ', col_name, '-------') + print('Suds', list(resp_df_suds[col_name])) + print('Zeep', list(resp_df_zeep[col_name])) + match_test_result = False + + return match_test_result + + +config, auth = get_system_auth() + +result = dataframe_pagination_test(config, auth, 'user', use_cached=False, store_cached=True) +report_test_result(result, "pagination") diff --git a/bfabric/examples/compare_raw_xml_zeep_suds.py b/bfabric/examples/compare_zeep_suds_query.py similarity index 97% rename from bfabric/examples/compare_raw_xml_zeep_suds.py rename to bfabric/examples/compare_zeep_suds_query.py index 9f498642..2b99e395 100644 --- a/bfabric/examples/compare_raw_xml_zeep_suds.py +++ b/bfabric/examples/compare_zeep_suds_query.py @@ -177,14 +177,14 @@ def parsed_data_match_test(auth, config, endpoint, query, drop_empty: bool = Tru print("-- failed --") -# parsed_data_match_test(auth, config, 'user', {'id': 9026}, drop_empty=True, drop_underscores_suds=True, -# log_file_path=None) +parsed_data_match_test(auth, config, 'user', {'id': 9026}, drop_empty=True, drop_underscores_suds=True, + log_file_path=None) # # parsed_data_match_test(auth, config, 'user', {}, drop_empty=True, drop_underscores_suds=True, # log_file_path=None) -parsed_data_match_test(auth, config, 'run', {}, drop_empty=True, drop_underscores_suds=True, - log_file_path=None) +# parsed_data_match_test(auth, config, 'run', {}, drop_empty=True, drop_underscores_suds=True, +# log_file_path=None) # print("Zeep", retZeep['user'][0]['project'][0]) # print("Suds", retSuds['user'][0]['project'][0]) diff --git a/bfabric/examples/read_paginated_single.py b/bfabric/examples/read_paginated_single.py deleted file mode 100644 index 683388d6..00000000 --- a/bfabric/examples/read_paginated_single.py +++ /dev/null @@ -1,53 +0,0 @@ -import pandas as pd - -from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth -from bfabric.src.pandas_helper import list_dict_to_df - -''' -Alpha-version of a system test, which would eventually make -basic queries to both Zeep and SUDS and test that the parsed responses match -''' - - -config, auth = get_system_auth() - -def calc_query(engine): - print("Sending query via", engine) - b = Bfabric(config, auth, engine=engine) - - responseClass = b.read('user', {}, max_results=300, idonly=False, includedeletableupdateable=True) - responseDict = responseClass.to_list_dict() - return list_dict_to_df(responseDict) - -def set_partition(a, b) -> None: - aSet = set(a) - bSet = set(b) - print("Shared:", aSet.intersection(bSet)) - print("Unique(1):", aSet - bSet) - print("Unique(2):", bSet - aSet) - -# respDFSuds = calc_query(BfabricAPIEngineType.SUDS) -# respDFZeep = calc_query(BfabricAPIEngineType.ZEEP) -# -# respDFSuds.to_csv("tmp_suds.csv") -# respDFZeep.to_csv("tmp_zeep.csv") -respDFSuds = pd.read_csv("tmp_suds.csv") -respDFZeep = pd.read_csv("tmp_zeep.csv") - -# Rename suds to remove underscores -respDFSuds.rename(columns={"_id": "id", "_classname": "classname"}, inplace=True) - -sudsCols = list(sorted(respDFSuds.columns)) -zeepCols = list(sorted(respDFZeep.columns)) - -set_partition(sudsCols, zeepCols) - -for colName in sudsCols: - if not respDFSuds[colName].equals(respDFZeep[colName]): - print('-------', colName, '-------') - # print('Suds', list(respDFSuds[colName])) - # print('Zeep', list(respDFZeep[colName])) - - -print(respDFSuds['order']) -print(respDFZeep['order']) \ No newline at end of file diff --git a/bfabric/examples/suds_to_listdict.py b/bfabric/examples/suds_to_listdict.py deleted file mode 100644 index 179b6277..00000000 --- a/bfabric/examples/suds_to_listdict.py +++ /dev/null @@ -1,14 +0,0 @@ -from bfabric.bfabric2 import Bfabric -from bfabric.src.suds_format import suds_asdict_recursive -from typing import List - - -def read_shallow(b: Bfabric, endpoint: str, obj: dict) -> List[dict]: - response = b.read(endpoint, obj) - response_dict = suds_asdict_recursive(response) - return response_dict[endpoint] - -b = Bfabric() -res_list_dict = read_shallow(b, 'user', {'login': 'fomins'}) - -print(res_list_dict) \ No newline at end of file diff --git a/bfabric/examples/zeep_debug.py b/bfabric/examples/zeep_debug.py new file mode 100644 index 00000000..feff6da9 --- /dev/null +++ b/bfabric/examples/zeep_debug.py @@ -0,0 +1,68 @@ +from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth +import zeep +from copy import deepcopy +from lxml import etree + +''' +Attempt to understand why Zeep does not correctly parse XML +* Problem 1: (minor) Zeep generates additional Null fields not available in XML, but likely (hypothetically) available in XSD +* Problem 2: (major) Zeep fails to parse parameters in some users. + +Effort: +[+] helpers.serialize_object is NOT the culprit, the parsed XMLResponse is already missing the values. +[-] Manipulating client.settings does not seem to affect the output + +Intermediate conclusions: +* Both behaviours are most likely internal bugs of Zeep. Unfortunately, developer does not respond to issues at the moment. +''' + + +def full_query(auth, query: dict, includedeletableupdateable: bool = False) -> dict: + thisQuery = deepcopy(query) + thisQuery['includedeletableupdateable'] = includedeletableupdateable + + return { + 'login': auth.login, + 'password': auth.password, + 'query': thisQuery + } + +def read_zeep(wsdl, fullQuery, raw=True): + client = zeep.Client(wsdl) + with client.settings(strict=False, raw_response=raw): + ret = client.service.read(fullQuery) + if raw: + return ret.content + else: + return ret + + +def read(auth, config, endpoint: str, query: dict, raw: bool = True): + wsdl = "".join((config.base_url, '/', endpoint, "?wsdl")) + fullQuery = full_query(auth, query) + return read_zeep(wsdl, fullQuery, raw=raw) + + +config, auth = get_system_auth() + +print('============== RAW ================') + +rez = read(auth, config, 'user', {'id': 9026}, raw = True) +root = etree.fromstring(rez) +print(etree.tostring(root, pretty_print=True).decode()) + +rez = read(auth, config, 'user', {'id': 9026}, raw = False) + +print('============== ORIG ================') +print(rez['user'][0]['project']) +print(rez['user'][0]['project']['id']) + +# trg = rez['project'] +# +# print('============== ORIG ================') +# print(trg) +# +# +# print('============== SERIAL ================') +# +# print(zeep.helpers.serialize_object(trg, target_cls=dict)) diff --git a/bfabric/src/dict_helper.py b/bfabric/src/dict_helper.py new file mode 100644 index 00000000..d6464e60 --- /dev/null +++ b/bfabric/src/dict_helper.py @@ -0,0 +1,7 @@ +def sort_dict(d: dict) -> dict: + """ + :param d: A dictionary + :return: A dictionary with items sorted by key. + Affects how the dictionary appears, when mapped to a string + """ + return dict(sorted(d.items())) diff --git a/bfabric/src/iter_helper.py b/bfabric/src/iter_helper.py index ff6b7297..e277aca5 100644 --- a/bfabric/src/iter_helper.py +++ b/bfabric/src/iter_helper.py @@ -1,5 +1,8 @@ from typing import Optional, Union from copy import deepcopy +from collections import OrderedDict + +from bfabric.src.dict_helper import sort_dict def _recursive_drop_empty(response_elem: Union[list, dict]) -> None: @@ -71,3 +74,34 @@ def map_response_element_keys(response: Union[list, dict], keymap: dict, response_filtered = deepcopy(response) if not inplace else response _recursive_map_keys(response_filtered, keymap) return response_filtered + +def _recursive_sort_dicts(response_elem) -> None: + """ + Iterates over all nested lists, dictionaries and basic values. Whenever a nested dictionary is found, it is sorted + by key by converting into OrderedDict and back + :param response_elem: One of the sub-objects in the hierarchical storage of the response. Initially the root + :return: Nothing + """ + if isinstance(response_elem, list): + for idx, el in enumerate(response_elem): + if isinstance(el, dict): + response_elem[idx] = sort_dict(el) + _recursive_sort_dicts(el) + elif isinstance(response_elem, dict): + for k, v in response_elem.items(): + if isinstance(v, dict): + response_elem[k] = sort_dict(v) + _recursive_sort_dicts(v) + +def sort_response_dicts(response: Union[list, dict], inplace: bool = True) -> Optional[Union[list, dict]]: + """ + Iterates over all nested lists, dictionaries and basic values. Whenever a nested dictionary is found, it is sorted + by key by converting into OrderedDict and back + :param response: A parsed query response, consisting of nested lists, dicts and basic types (int, str) + :param inplace: If true, will return nothing and edit the argument. Otherwise, will preserve the argument + and return an edited copy + :return: Nothing, or an edited response, depending on `inplace` + """ + response_filtered = deepcopy(response) if not inplace else response + _recursive_sort_dicts(response_filtered) + return response_filtered diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py index b480a121..9e8b49fa 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/src/result_container.py @@ -3,11 +3,13 @@ # this should become default behaviour in one of the future versions of python. Remove this import # once it is no longer necessary from __future__ import annotations - from enum import Enum -from bfabric.src.suds_format import suds_asdict_recursive + from zeep.helpers import serialize_object +from bfabric.src.suds_format import suds_asdict_recursive +from bfabric.src.iter_helper import drop_empty_response_elements, map_response_element_keys, sort_response_dicts + class BfabricResultType(Enum): LISTDICT = 1 @@ -15,6 +17,21 @@ class BfabricResultType(Enum): LISTZEEP = 3 +def _clean_result(rez: dict, drop_empty: bool = True, drop_underscores_suds: bool = True, + have_sort_responses: bool = False) -> dict: + if drop_empty: + drop_empty_response_elements(rez, inplace=True) + + if drop_underscores_suds: + map_response_element_keys(rez, + {'_id': 'id', '_classname': 'classname', '_projectid': 'projectid'}, + inplace=True) + if have_sort_responses: + sort_response_dicts(rez, inplace=True) + + return rez + + class ResultContainer: def __init__(self, results: list, result_type: BfabricResultType, total_pages_api: int = None): """ @@ -60,13 +77,28 @@ def append(self, other: ResultContainer) -> None: def total_pages_api(self): return self.total_pages_api - def to_list_dict(self): + def to_list_dict(self, drop_empty: bool = True, drop_underscores_suds: bool = True, + have_sort_responses: bool = False): match self.result_type: case BfabricResultType.LISTDICT: return self.results case BfabricResultType.LISTSUDS: - return [suds_asdict_recursive(v, convert_types=True) for v in self.results] + results = [] + for rez in self.results: + rez_parsed = suds_asdict_recursive(rez, convert_types=True) + rez_parsed = _clean_result(rez_parsed, drop_empty=drop_empty, + drop_underscores_suds=drop_underscores_suds, + have_sort_responses=have_sort_responses) + results += [rez_parsed] + return results case BfabricResultType.LISTZEEP: - return [dict(serialize_object(v)) for v in self.results] + results = [] + for rez in self.results: + rez_parsed = dict(serialize_object(rez, target_cls=dict)) + rez_parsed = _clean_result(rez_parsed, drop_empty=drop_empty, + drop_underscores_suds=False, # NOTE: Underscore problem specific to SUDS + have_sort_responses=have_sort_responses) + results += [rez_parsed] + return results case _: raise ValueError("Unexpected results type", self.result_type) From ece5b9b558e3cdb4296f74ca2f0c078ea6d89174 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Wed, 17 Apr 2024 15:02:57 +0200 Subject: [PATCH 021/129] minor naming improvement --- bfabric/examples/compare_zeep_suds_query.py | 12 +++++------ ...iter_helper.py => response_format_dict.py} | 16 +++++++-------- ...suds_format.py => response_format_suds.py} | 0 bfabric/src/result_container.py | 20 +++++++++---------- 4 files changed, 24 insertions(+), 24 deletions(-) rename bfabric/src/{iter_helper.py => response_format_dict.py} (91%) rename bfabric/src/{suds_format.py => response_format_suds.py} (100%) diff --git a/bfabric/examples/compare_zeep_suds_query.py b/bfabric/examples/compare_zeep_suds_query.py index 2b99e395..fc426be5 100644 --- a/bfabric/examples/compare_zeep_suds_query.py +++ b/bfabric/examples/compare_zeep_suds_query.py @@ -9,8 +9,8 @@ import suds from bfabric.bfabric2 import get_system_auth -from bfabric.src.suds_format import suds_asdict_recursive -from bfabric.src.iter_helper import drop_empty_response_elements, map_response_element_keys +from bfabric.src.response_format_suds import suds_asdict_recursive +from bfabric.src.response_format_dict import drop_empty_elements, map_element_keys ''' This file is intended to eventually become a test to compare that Zeep and SUDS produce @@ -157,12 +157,12 @@ def parsed_data_match_test(auth, config, endpoint, query, drop_empty: bool = Tru retZeepDict, retSudsDict = calc_both(auth, config, endpoint, query, raw=False) if drop_empty: - drop_empty_response_elements(retZeepDict, inplace=True) - drop_empty_response_elements(retSudsDict, inplace=True) + drop_empty_elements(retZeepDict, inplace=True) + drop_empty_elements(retSudsDict, inplace=True) if drop_underscores_suds: - map_response_element_keys(retSudsDict, {'_id': 'id', '_classname': 'classname', '_projectid': 'projectid'}, - inplace=True) + map_element_keys(retSudsDict, {'_id': 'id', '_classname': 'classname', '_projectid': 'projectid'}, + inplace=True) if log_file_path is not None: with open(log_file_path, 'w') as f: diff --git a/bfabric/src/iter_helper.py b/bfabric/src/response_format_dict.py similarity index 91% rename from bfabric/src/iter_helper.py rename to bfabric/src/response_format_dict.py index e277aca5..8315ff5b 100644 --- a/bfabric/src/iter_helper.py +++ b/bfabric/src/response_format_dict.py @@ -25,7 +25,7 @@ def _recursive_drop_empty(response_elem: Union[list, dict]) -> None: for k in keys_to_delete: del response_elem[k] -def drop_empty_response_elements(response: Union[list, dict], inplace: bool = True) -> Optional[Union[list, dict]]: +def drop_empty_elements(response: Union[list, dict], inplace: bool = True) -> Optional[Union[list, dict]]: """ Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary value is encountered, that is either an empty list or None, the key-value pair gets deleted from the dictionary @@ -60,8 +60,8 @@ def _recursive_map_keys(response_elem, keymap: dict) -> None: response_elem[keymap[k]] = response_elem[k] # Copy old value to the new key del response_elem[k] # Delete old key -def map_response_element_keys(response: Union[list, dict], keymap: dict, - inplace: bool = True) -> Union[list, dict]: +def map_element_keys(response: Union[list, dict], keymap: dict, + inplace: bool = True) -> Union[list, dict]: """ Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary key is found for which the mapping is requested, that the key is renamed to the corresponding mapped one @@ -75,7 +75,7 @@ def map_response_element_keys(response: Union[list, dict], keymap: dict, _recursive_map_keys(response_filtered, keymap) return response_filtered -def _recursive_sort_dicts(response_elem) -> None: +def _recursive_sort_dicts_by_key(response_elem) -> None: """ Iterates over all nested lists, dictionaries and basic values. Whenever a nested dictionary is found, it is sorted by key by converting into OrderedDict and back @@ -86,14 +86,14 @@ def _recursive_sort_dicts(response_elem) -> None: for idx, el in enumerate(response_elem): if isinstance(el, dict): response_elem[idx] = sort_dict(el) - _recursive_sort_dicts(el) + _recursive_sort_dicts_by_key(el) elif isinstance(response_elem, dict): for k, v in response_elem.items(): if isinstance(v, dict): response_elem[k] = sort_dict(v) - _recursive_sort_dicts(v) + _recursive_sort_dicts_by_key(v) -def sort_response_dicts(response: Union[list, dict], inplace: bool = True) -> Optional[Union[list, dict]]: +def sort_dicts_by_key(response: Union[list, dict], inplace: bool = True) -> Optional[Union[list, dict]]: """ Iterates over all nested lists, dictionaries and basic values. Whenever a nested dictionary is found, it is sorted by key by converting into OrderedDict and back @@ -103,5 +103,5 @@ def sort_response_dicts(response: Union[list, dict], inplace: bool = True) -> Op :return: Nothing, or an edited response, depending on `inplace` """ response_filtered = deepcopy(response) if not inplace else response - _recursive_sort_dicts(response_filtered) + _recursive_sort_dicts_by_key(response_filtered) return response_filtered diff --git a/bfabric/src/suds_format.py b/bfabric/src/response_format_suds.py similarity index 100% rename from bfabric/src/suds_format.py rename to bfabric/src/response_format_suds.py diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py index 9e8b49fa..257aa00a 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/src/result_container.py @@ -7,8 +7,8 @@ from zeep.helpers import serialize_object -from bfabric.src.suds_format import suds_asdict_recursive -from bfabric.src.iter_helper import drop_empty_response_elements, map_response_element_keys, sort_response_dicts +from bfabric.src.response_format_suds import suds_asdict_recursive +import bfabric.src.response_format_dict as formatter class BfabricResultType(Enum): @@ -18,16 +18,16 @@ class BfabricResultType(Enum): def _clean_result(rez: dict, drop_empty: bool = True, drop_underscores_suds: bool = True, - have_sort_responses: bool = False) -> dict: + sort_responses: bool = False) -> dict: if drop_empty: - drop_empty_response_elements(rez, inplace=True) + formatter.drop_empty_elements(rez, inplace=True) if drop_underscores_suds: - map_response_element_keys(rez, + formatter.map_element_keys(rez, {'_id': 'id', '_classname': 'classname', '_projectid': 'projectid'}, inplace=True) - if have_sort_responses: - sort_response_dicts(rez, inplace=True) + if sort_responses: + formatter.sort_dicts_by_key(rez, inplace=True) return rez @@ -88,7 +88,7 @@ def to_list_dict(self, drop_empty: bool = True, drop_underscores_suds: bool = Tr rez_parsed = suds_asdict_recursive(rez, convert_types=True) rez_parsed = _clean_result(rez_parsed, drop_empty=drop_empty, drop_underscores_suds=drop_underscores_suds, - have_sort_responses=have_sort_responses) + sort_responses=have_sort_responses) results += [rez_parsed] return results case BfabricResultType.LISTZEEP: @@ -96,8 +96,8 @@ def to_list_dict(self, drop_empty: bool = True, drop_underscores_suds: bool = Tr for rez in self.results: rez_parsed = dict(serialize_object(rez, target_cls=dict)) rez_parsed = _clean_result(rez_parsed, drop_empty=drop_empty, - drop_underscores_suds=False, # NOTE: Underscore problem specific to SUDS - have_sort_responses=have_sort_responses) + drop_underscores_suds=False, # NOTE: Underscore problem specific to SUDS + sort_responses=have_sort_responses) results += [rez_parsed] return results case _: From de7992bc62f583489bd80c81b1f8e025e135a05e Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Wed, 17 Apr 2024 16:01:43 +0200 Subject: [PATCH 022/129] added bfabric.exists() to check if objects with requested ids exist. Modularized pagination iterator --- bfabric/bfabric2.py | 59 +++++++++++++++++++++++--------- bfabric/examples/exists_multi.py | 16 +++++++++ bfabric/src/paginator.py | 12 +++++++ 3 files changed, 70 insertions(+), 17 deletions(-) create mode 100644 bfabric/examples/exists_multi.py create mode 100644 bfabric/src/paginator.py diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index cc3f7927..21d4c22d 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -32,15 +32,13 @@ from bfabric.src.engine_suds import EngineSUDS from bfabric.src.engine_zeep import EngineZeep from bfabric.src.result_container import ResultContainer, BfabricResultType +from bfabric.src.paginator import page_iter, BFABRIC_QUERY_LIMIT from bfabric.bfabric_config import BfabricAuth, BfabricConfig, parse_bfabricrc_py class BfabricAPIEngineType(Enum): SUDS = 1 ZEEP = 2 -# Single page query limit for BFabric API (as of time of writing, adapt if it changes) -BFABRIC_QUERY_LIMIT = 100 - def get_system_auth(): path_bfabricrc = os.path.normpath(os.path.expanduser("~/.bfabricrc.py")) @@ -156,9 +154,9 @@ def read_multi(self, endpoint: str, obj: dict, multi_query_key: str, multi_query response_tot = ResultContainer([], self.result_type, total_pages_api = 0) obj_exteded = deepcopy(obj) # Make a copy of the query, not to make edits to the argument - for i in range(0, len(multi_query_vals), BFABRIC_QUERY_LIMIT): - # Limit the multi-query parameter to an acceptable chunk size - obj_exteded[multi_query_key] = multi_query_vals[i:i + BFABRIC_QUERY_LIMIT] + # Iterate over request chunks that fit into a single API page + for page_vals in page_iter(multi_query_vals): + obj_exteded[multi_query_key] = page_vals # TODO: Test what happens if there are multiple responses to each of the individual queries. # * What would happen? @@ -173,14 +171,12 @@ def read_multi(self, endpoint: str, obj: dict, multi_query_key: str, multi_query return response_tot def save_multi(self, endpoint: str, obj_lst: list, **kwargs) -> ResultContainer: - # We must account for the possibility that the number of query values exceeds the BFabric maximum, - # so we must split it into smaller chunks - response_tot = ResultContainer([], self.result_type, total_pages_api = 0) - for i in range(0, len(obj_lst), BFABRIC_QUERY_LIMIT): - obj_list_this = obj_lst[i:i + BFABRIC_QUERY_LIMIT] - response_this = self.save(endpoint, obj_list_this, **kwargs) - response_tot.append(response_this) + + # Iterate over request chunks that fit into a single API page + for page_objs in page_iter(obj_lst): + response_page = self.save(endpoint, page_objs, **kwargs) + response_tot.append(response_page) return response_tot @@ -191,9 +187,38 @@ def delete_multi(self, endpoint: str, id_list: list) -> ResultContainer: print('Warning, empty list provided for deletion, ignoring') return response_tot - for i in range(0, len(id_list), BFABRIC_QUERY_LIMIT): - id_list_this = id_list[i:i + BFABRIC_QUERY_LIMIT] - response_this = self.delete(endpoint, id_list_this) - response_tot.append(response_this) + # Iterate over request chunks that fit into a single API page + for page_ids in page_iter(id_list): + response_page = self.delete(endpoint, page_ids) + response_tot.append(response_page) return response_tot + + def exists(self, endpoint: str, id: Union[List, int]) -> Union[bool, List[bool]]: + """ + :param endpoint: endpoint + :param id: an id or a list of ids + :return: for each + """ + + # 1. Read data for this id + if isinstance(id, int): + results = self.read(endpoint, {'id': id}) + elif isinstance(id, list): + results = self.read_multi(endpoint, {}, 'id', id) + else: + raise ValueError("Unexpected data type", type(id)) + + # 2. Extract all the ids for which there was a response + result_ids = [] + for r in results.results: + if 'id' in r: + result_ids += [r['id']] + elif '_id' in r: # TODO: Remove this if SUDS bug is ever resolved + result_ids += [r['_id']] + + # 3. For each of the requested ids, return true if there was a response and false if there was not + if isinstance(id, int): + return id in result_ids + else: + return [id_this in result_ids for id_this in id] diff --git a/bfabric/examples/exists_multi.py b/bfabric/examples/exists_multi.py new file mode 100644 index 00000000..1b852a79 --- /dev/null +++ b/bfabric/examples/exists_multi.py @@ -0,0 +1,16 @@ +from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth +from bfabric.src.pandas_helper import list_dict_to_df + + +config, auth = get_system_auth() + +b1 = Bfabric(config, auth, engine = BfabricAPIEngineType.SUDS) +b2 = Bfabric(config, auth, engine = BfabricAPIEngineType.ZEEP) + +target_user_ids = [1,2,3,4,5, 12345] + +response1 = b1.exists("user", target_user_ids) +response2 = b2.exists("user", target_user_ids) + +print(response1) +print(response2) diff --git a/bfabric/src/paginator.py b/bfabric/src/paginator.py new file mode 100644 index 00000000..f1a183e9 --- /dev/null +++ b/bfabric/src/paginator.py @@ -0,0 +1,12 @@ +# Single page query limit for BFabric API (as of time of writing, adapt if it changes) +BFABRIC_QUERY_LIMIT = 100 + +def page_iter(objs: list, page_size: int = BFABRIC_QUERY_LIMIT) -> list: + """ + :param objs: A list of objects to provide to bfabric as part of a query + :param page_size: Number of objects per page + :return: An iterator over chunks that would be sent to bfabric, 1 chunk per query + """ + + for i in range(0, len(objs), page_size): + yield objs[i:i + page_size] From 250aaf02ba797319148142a04bb234a37cc73be2 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Wed, 17 Apr 2024 17:08:18 +0200 Subject: [PATCH 023/129] extended exists interface to arbitrary key. Makes sense to also query it by name --- bfabric/bfabric2.py | 35 +++++++++++++++++--------------- bfabric/examples/exists_multi.py | 26 +++++++++++++++++++----- bfabric/examples/save_delete.py | 21 +++++++++++++++++++ 3 files changed, 61 insertions(+), 21 deletions(-) create mode 100644 bfabric/examples/save_delete.py diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 21d4c22d..689674ad 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -194,31 +194,34 @@ def delete_multi(self, endpoint: str, id_list: list) -> ResultContainer: return response_tot - def exists(self, endpoint: str, id: Union[List, int]) -> Union[bool, List[bool]]: + def exists(self, endpoint: str, key: str, value: Union[List, Union[int, str]]) -> Union[bool, List[bool]]: """ :param endpoint: endpoint - :param id: an id or a list of ids - :return: for each + :param key: A key for the query (e.g. id or name) + :param value: A value or a list of values + :return: Return a single bool or a list of bools for each value + For each value, test if a key with that value is found in the API. """ + is_scalar = isinstance(value, (int, str)) # 1. Read data for this id - if isinstance(id, int): - results = self.read(endpoint, {'id': id}) - elif isinstance(id, list): - results = self.read_multi(endpoint, {}, 'id', id) + if is_scalar: + results = self.read(endpoint, {key: value}) + elif isinstance(value, list): + results = self.read_multi(endpoint, {}, key, value) else: - raise ValueError("Unexpected data type", type(id)) + raise ValueError("Unexpected data type", type(value)) # 2. Extract all the ids for which there was a response - result_ids = [] + result_vals = [] for r in results.results: - if 'id' in r: - result_ids += [r['id']] - elif '_id' in r: # TODO: Remove this if SUDS bug is ever resolved - result_ids += [r['_id']] + if key in r: + result_vals += [r[key]] + elif '_' + key in r: # TODO: Remove this if SUDS bug is ever resolved + result_vals += [r['_' + key]] # 3. For each of the requested ids, return true if there was a response and false if there was not - if isinstance(id, int): - return id in result_ids + if is_scalar: + return key in result_vals else: - return [id_this in result_ids for id_this in id] + return [val in result_vals for val in value] diff --git a/bfabric/examples/exists_multi.py b/bfabric/examples/exists_multi.py index 1b852a79..9696ca53 100644 --- a/bfabric/examples/exists_multi.py +++ b/bfabric/examples/exists_multi.py @@ -1,5 +1,4 @@ from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth -from bfabric.src.pandas_helper import list_dict_to_df config, auth = get_system_auth() @@ -7,10 +6,27 @@ b1 = Bfabric(config, auth, engine = BfabricAPIEngineType.SUDS) b2 = Bfabric(config, auth, engine = BfabricAPIEngineType.ZEEP) -target_user_ids = [1,2,3,4,5, 12345] -response1 = b1.exists("user", target_user_ids) -response2 = b2.exists("user", target_user_ids) +################### +# Testing IDs +################### + +# target_user_ids = [1,2,3,4,5, 12345] +# +# response1 = b1.exists("user", 'id', target_user_ids) +# response2 = b2.exists("user", 'id', target_user_ids) +# +# print(response1) +# print(response2) + +################### +# Testing Names +################### + +target_workunit_names = ['tomcat', 'tomcat2'] + +response1 = b1.exists("workunit", 'name', target_workunit_names) +response2 = b2.exists("workunit", 'name', target_workunit_names) print(response1) -print(response2) +print(response2) \ No newline at end of file diff --git a/bfabric/examples/save_delete.py b/bfabric/examples/save_delete.py new file mode 100644 index 00000000..3dfa5bdd --- /dev/null +++ b/bfabric/examples/save_delete.py @@ -0,0 +1,21 @@ +from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth + + +config, auth = get_system_auth() + +b1 = Bfabric(config, auth, engine = BfabricAPIEngineType.SUDS) +# b2 = Bfabric(config, auth, engine = BfabricAPIEngineType.ZEEP) + +workunit1 = {'name': 'tomcat', 'applicationid': 2, 'description': 'is warm and fluffy', 'containerid': 123} +response = b1.save('workunit', workunit1) + +print(response.results[0]) + + +# target_user_ids = [1,2,3,4,5, 12345] +# +# response1 = b1.exists("user", target_user_ids) +# response2 = b2.exists("user", target_user_ids) +# +# print(response1) +# print(response2) \ No newline at end of file From 491c30ac141139a01253b3ab89fe2e85556d2458 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Wed, 17 Apr 2024 17:45:44 +0200 Subject: [PATCH 024/129] experimenting with exists-save-delete pipeline --- bfabric/bfabric2.py | 3 ++ bfabric/examples/save_delete.py | 52 ++++++++++++++++++++++++++------- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 689674ad..4937dd7a 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -170,6 +170,9 @@ def read_multi(self, endpoint: str, obj: dict, multi_query_key: str, multi_query return response_tot + # TODO: This is likely useless. When saving multiple objects, they all have different fields. + # One option would be to provide a dataframe, but it might struggle with nested dicts + # Likely best solution is to not provide this method, and let users run a for-loop themselves. def save_multi(self, endpoint: str, obj_lst: list, **kwargs) -> ResultContainer: response_tot = ResultContainer([], self.result_type, total_pages_api = 0) diff --git a/bfabric/examples/save_delete.py b/bfabric/examples/save_delete.py index 3dfa5bdd..1227f478 100644 --- a/bfabric/examples/save_delete.py +++ b/bfabric/examples/save_delete.py @@ -1,21 +1,53 @@ from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth +def find_delete_existing_objects_by_name(b: Bfabric, endpoint: str, name_list: list) -> None: + # 1. Check which objects exist + objs_exist = b1.exists(endpoint, 'name', name_list) + objs_exist_names = [name for i, name in enumerate(all_names) if objs_exist[i]] + if len(objs_exist_names) == 0: + print("No", endpoint, "exists") + else: + print("Already exist:", objs_exist_names) + + ids_to_delete = [] + for name in enumerate(objs_exist_names): + # 2.1 Get IDs of all existing workunits + response_dict = b.read(endpoint, {'name': name}).to_list_dict() + ids_this = [r['id'] for r in response_dict] + + print('--', name, 'exist with ids', ids_this) + ids_to_delete += ids_this + + # Delete + response_dict = b.delete(endpoint, ids_to_delete).to_list_dict() + print('Deletion results:', response_dict) + + +# TODO: Check if works with ZEEP +# TODO: Why id=1525 matches random name queries, but is not deleteable??? +# TODO: Adapt to tests config, auth = get_system_auth() b1 = Bfabric(config, auth, engine = BfabricAPIEngineType.SUDS) # b2 = Bfabric(config, auth, engine = BfabricAPIEngineType.ZEEP) -workunit1 = {'name': 'tomcat', 'applicationid': 2, 'description': 'is warm and fluffy', 'containerid': 123} -response = b1.save('workunit', workunit1) +endpoint = 'workunit' +workunit_names = ['MewThePokemon', 'TomMGM', 'MinkyLeChat'] +fake_name = 'SpikeTheDog' +all_names = workunit_names + [fake_name] + + +# 1. Find and delete any workunits with these names, if they already exist +find_delete_existing_objects_by_name(b1, endpoint, all_names) -print(response.results[0]) +# 2. Create some workunits +for name in workunit_names: + workunit1 = {'name': name, 'applicationid': 2, 'description': 'is warm and fluffy', 'containerid': 123} + response = b1.save('workunit', workunit1) + print(response.results[0]) +# 3. Find and delete any workunits with these names, now that they have been created +find_delete_existing_objects_by_name(b1, endpoint, all_names) -# target_user_ids = [1,2,3,4,5, 12345] -# -# response1 = b1.exists("user", target_user_ids) -# response2 = b2.exists("user", target_user_ids) -# -# print(response1) -# print(response2) \ No newline at end of file +# target_user_ids = [291792] From 9910300d95ed87702a37bbd865aa4b9a563f174d Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Fri, 19 Apr 2024 12:19:13 +0200 Subject: [PATCH 025/129] unit and integration tests. Not full coverage, but hopefully will do for now --- bfabric/examples/save_delete.py | 53 ------- bfabric/src/engine_zeep.py | 2 +- bfabric/src/pandas_helper.py | 11 -- bfabric/src/result_container.py | 2 +- bfabric/tests/integration/groundtruth.json | 144 ++++++++++++++++++ .../tests/integration/test_bfabric2_read.py | 82 ++++++++++ .../test_bfabric2_read_pagination.py | 57 +++++++ .../integration/test_bfabric2_save_delete.py | 98 ++++++++++++ bfabric/tests/unit/test_dict_helper.py | 15 ++ bfabric/tests/unit/test_math_helper.py | 15 ++ bfabric/tests/unit/test_paginator.py | 18 +++ bfabric/tests/unit/test_pandas_helper.py | 25 +++ .../tests/unit/test_response_format_dict.py | 34 +++++ bfabric/tests/unit/test_results_container.py | 46 ++++++ 14 files changed, 536 insertions(+), 66 deletions(-) delete mode 100644 bfabric/examples/save_delete.py create mode 100644 bfabric/tests/integration/groundtruth.json create mode 100755 bfabric/tests/integration/test_bfabric2_read.py create mode 100644 bfabric/tests/integration/test_bfabric2_read_pagination.py create mode 100644 bfabric/tests/integration/test_bfabric2_save_delete.py create mode 100644 bfabric/tests/unit/test_dict_helper.py create mode 100644 bfabric/tests/unit/test_math_helper.py create mode 100644 bfabric/tests/unit/test_paginator.py create mode 100644 bfabric/tests/unit/test_pandas_helper.py create mode 100644 bfabric/tests/unit/test_response_format_dict.py create mode 100644 bfabric/tests/unit/test_results_container.py diff --git a/bfabric/examples/save_delete.py b/bfabric/examples/save_delete.py deleted file mode 100644 index 1227f478..00000000 --- a/bfabric/examples/save_delete.py +++ /dev/null @@ -1,53 +0,0 @@ -from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth - -def find_delete_existing_objects_by_name(b: Bfabric, endpoint: str, name_list: list) -> None: - # 1. Check which objects exist - objs_exist = b1.exists(endpoint, 'name', name_list) - objs_exist_names = [name for i, name in enumerate(all_names) if objs_exist[i]] - - if len(objs_exist_names) == 0: - print("No", endpoint, "exists") - else: - print("Already exist:", objs_exist_names) - - ids_to_delete = [] - for name in enumerate(objs_exist_names): - # 2.1 Get IDs of all existing workunits - response_dict = b.read(endpoint, {'name': name}).to_list_dict() - ids_this = [r['id'] for r in response_dict] - - print('--', name, 'exist with ids', ids_this) - ids_to_delete += ids_this - - # Delete - response_dict = b.delete(endpoint, ids_to_delete).to_list_dict() - print('Deletion results:', response_dict) - - -# TODO: Check if works with ZEEP -# TODO: Why id=1525 matches random name queries, but is not deleteable??? -# TODO: Adapt to tests -config, auth = get_system_auth() - -b1 = Bfabric(config, auth, engine = BfabricAPIEngineType.SUDS) -# b2 = Bfabric(config, auth, engine = BfabricAPIEngineType.ZEEP) - -endpoint = 'workunit' -workunit_names = ['MewThePokemon', 'TomMGM', 'MinkyLeChat'] -fake_name = 'SpikeTheDog' -all_names = workunit_names + [fake_name] - - -# 1. Find and delete any workunits with these names, if they already exist -find_delete_existing_objects_by_name(b1, endpoint, all_names) - -# 2. Create some workunits -for name in workunit_names: - workunit1 = {'name': name, 'applicationid': 2, 'description': 'is warm and fluffy', 'containerid': 123} - response = b1.save('workunit', workunit1) - print(response.results[0]) - -# 3. Find and delete any workunits with these names, now that they have been created -find_delete_existing_objects_by_name(b1, endpoint, all_names) - -# target_user_ids = [291792] diff --git a/bfabric/src/engine_zeep.py b/bfabric/src/engine_zeep.py index 279a3eb0..59d8c8a1 100644 --- a/bfabric/src/engine_zeep.py +++ b/bfabric/src/engine_zeep.py @@ -47,7 +47,7 @@ def read(self, endpoint: str, obj: dict, page: int = 1, idonly: bool = False, full_query = dict(login=self.login, page=page, password=self.password, query=query, idonly=idonly) client = self._get_client(endpoint) - with client.settings(strict=False): + with client.settings(strict=False, xml_huge_tree=True, xsd_ignore_sequence_order=True): return client.service.read(full_query) def readid(self, endpoint: str, obj: dict, page: int = 1, includedeletableupdateable: bool = True): diff --git a/bfabric/src/pandas_helper.py b/bfabric/src/pandas_helper.py index 05711dd5..3eee7d5e 100644 --- a/bfabric/src/pandas_helper.py +++ b/bfabric/src/pandas_helper.py @@ -31,14 +31,3 @@ def list_dict_to_df(l: List[Dict]) -> pd.DataFrame: * All non-basic data types are converted to strings """ return pd.DataFrame([_stringify_dict(r) for r in l]) - - -if __name__ == "__main__": - example_list_dict = [ - {'cat': 1, 'dog': 2}, - {'cat': 3, 'mouse': ["a", "b"]}, - {'mouse': 5}, - {'cat': 1, 'dog': 2, 'mouse': 7}, - ] - - print(list_dict_to_df(example_list_dict)) diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py index 257aa00a..66ea9303 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/src/result_container.py @@ -74,7 +74,7 @@ def append(self, other: ResultContainer) -> None: else: self.total_pages_api = None - def total_pages_api(self): + def get_total_pages_api(self): return self.total_pages_api def to_list_dict(self, drop_empty: bool = True, drop_underscores_suds: bool = True, diff --git a/bfabric/tests/integration/groundtruth.json b/bfabric/tests/integration/groundtruth.json new file mode 100644 index 00000000..fc7ffbe4 --- /dev/null +++ b/bfabric/tests/integration/groundtruth.json @@ -0,0 +1,144 @@ + { + "project":[ + [ + { + "id":3000 + }, + { + "name":"FGCZ Internal" + } + ] + ], + "container":[ + [ + { + "id":3000 + }, + { + "name":"FGCZ Internal" + } + ] + ], + "application":[ + [ + { + "id":224 + }, + { + "name":"MaxQuant" + } + ] + ], + "workunit":[ + [ + { + "createdby":"gerritsb", + "name":"Lovorka SUZ12 control" + }, + { + "id":162, + "status":"AVAILABLE" + } + ] + ], + "sample":[ + [ + { + "id":190249 + }, + { + "name":"autoQC4L", + "type":"Biological Sample - Proteomics User Lab" + } + ] + ], + "annotation":[ + [ + { + "id":2710 + }, + { + "name":"A", + "type":"Grouping Var" + } + ] + ], + "resource":[ + [ + { + "filechecksum":"090a3f025d3ebbad75213e3d4886e17c" + }, + { + "name":"20190903_07_autoQC4L.raw", + "size":264773059 + } + ], + [ + { + "filechecksum":"090a3f02%" + }, + { + "name":"20190903_07_autoQC4L.raw", + "size":264773059, + "filechecksum":"090a3f025d3ebbad75213e3d4886e17c" + } + ] + ], + "user":[ + [ + { + "id":482 + }, + { + "login":"cpanse", + "city":"Zürich", + "zip":"8057" + } + ], + [ + { + "login":"cpanse" + }, + { + "id":482, + "city":"Zürich", + "zip":"8057" + } + ], + [ + { + "login":"mderrico" + }, + { + "id":7133, + "lastname":"d'Errico", + "city":"Zürich", + "zip":"8057" + } + ] + ], + "executable":[ + [ + { + "id":16375 + }, + { + "createdby":"cpanse", + "name":"yaml / Grid Engine executable", + "size":1593, + "context":"SUBMITTER" + } + ], + [ + { + "id":16374 + }, + { + "createdby":"cpanse", + "name":"yaml 004", + "size":953, + "context":"WRAPPERCREATOR" + } + ] + ] + } diff --git a/bfabric/tests/integration/test_bfabric2_read.py b/bfabric/tests/integration/test_bfabric2_read.py new file mode 100755 index 00000000..b69f024d --- /dev/null +++ b/bfabric/tests/integration/test_bfabric2_read.py @@ -0,0 +1,82 @@ +import json +import os +import unittest + +from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth + + +class BfabricTestCase(unittest.TestCase): + def __init__(self, *args, **kwargs): + super(BfabricTestCase, self).__init__(*args, **kwargs) + + # Load ground truth + path = os.path.join(os.path.dirname(__file__), "groundtruth.json") + with open(path) as json_file: + self.ground_truth = json.load(json_file) + + # Load config and authentification + self.config, self.auth = get_system_auth() + + # Init the engines + self.bfZeep = Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP) + self.bfSuds = Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS) + + def read(self, bf: Bfabric, endpoint: str): + """Executes read queries for `endpoint` and compares results with ground truth.""" + self.assertIn(endpoint, self.ground_truth) + for query, ground_truth in self.ground_truth[endpoint]: + res = bf.read(endpoint=endpoint, obj=query).to_list_dict() + + # print(query, res) + + self.assertEqual(len(res), 1) # Expecting only one query result in all cases + for gt_attr, gt_value in ground_truth.items(): + self.assertEqual(str(gt_value), str(res[0][gt_attr])) + + def _test_empty_project(self, bf: Bfabric): + res = bf.read(endpoint="project", obj={"name": "this project does not exist"}).to_list_dict() + self.assertEqual(res, []) + + def test_user(self): + self.read(self.bfSuds, "user") + self.read(self.bfZeep, "user") + + def test_container(self): + self.read(self.bfSuds, "container") + self.read(self.bfZeep, "container") + + def test_project(self): + self.read(self.bfSuds, "project") + # self.read(self.bfZeep, "project") # FIXME: Zeep does not parse name correctly for project queries + + def test_project_when_not_exists(self): + self._test_empty_project(self.bfZeep) + self._test_empty_project(self.bfSuds) + + def test_application(self): + self.read(self.bfSuds, "application") + self.read(self.bfZeep, "application") + + def test_sample(self): + self.read(self.bfSuds, "sample") + self.read(self.bfZeep, "sample") + + def test_workunit(self): + self.read(self.bfSuds, "workunit") + self.read(self.bfZeep, "workunit") + + def test_resource(self): + self.read(self.bfSuds, "resource") + self.read(self.bfZeep, "resource") + + def test_executable(self): + self.read(self.bfSuds, "executable") + self.read(self.bfZeep, "executable") + + def test_annotation(self): + self.read(self.bfSuds, "annotation") + self.read(self.bfZeep, "annotation") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/integration/test_bfabric2_read_pagination.py b/bfabric/tests/integration/test_bfabric2_read_pagination.py new file mode 100644 index 00000000..15270fed --- /dev/null +++ b/bfabric/tests/integration/test_bfabric2_read_pagination.py @@ -0,0 +1,57 @@ +import unittest +import pandas as pd + +from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth +from bfabric.src.pandas_helper import list_dict_to_df + + +def _calc_query(config, auth, engine: BfabricAPIEngineType, endpoint: str, + max_results: int = 300) -> pd.DataFrame: + print("Sending query via", engine) + b = Bfabric(config, auth, engine=engine) + + response_class = b.read(endpoint, {}, max_results=max_results, idonly=False, includedeletableupdateable=True) + response_dict = response_class.to_list_dict(drop_empty=True, drop_underscores_suds=True, + have_sort_responses=True) + return list_dict_to_df(response_dict) + + +class BfabricTestCase(unittest.TestCase): + def __init__(self, *args, **kwargs): + super(BfabricTestCase, self).__init__(*args, **kwargs) + self.config, self.auth = get_system_auth() + + def test_composite_user(self): + endpoint = 'user' + max_results = 300 + + # Test SUDS + print("Testing if SUDS returns the requested number of entries") + resp_df_suds = _calc_query(self.config, self.auth, BfabricAPIEngineType.SUDS, endpoint, + max_results=max_results) + assert len(resp_df_suds.index) == max_results + + # Test ZEEP + print("Testing if ZEEP returns the requested number of entries") + resp_df_zeep = _calc_query(self.config, self.auth, BfabricAPIEngineType.ZEEP, endpoint, + max_results=max_results) + assert len(resp_df_zeep.index) == max_results + + # Rename suds to remove underscores + # resp_df_suds.rename(columns={"_id": "id", "_classname": "classname"}, inplace=True) + + # Test that columns are exactly the same + print("Testing if SUDS and ZEEP parsed responses have the same root fields") + suds_cols = list(sorted(resp_df_suds.columns)) + zeep_cols = list(sorted(resp_df_zeep.columns)) + assert suds_cols == zeep_cols + + print("Testing if SUDS and ZEEP responses are the same field by field") + mismatch_cols = [] + for col_name in suds_cols: + if not resp_df_suds[col_name].equals(resp_df_zeep[col_name]): + mismatch_cols += [col_name] + + # TODO: Make the test strict if Zeep bug is ever resolved. + assert mismatch_cols == ['formerproject', 'project'] + print("SUDS and ZEEP mismatch in", mismatch_cols, "(expected)") diff --git a/bfabric/tests/integration/test_bfabric2_save_delete.py b/bfabric/tests/integration/test_bfabric2_save_delete.py new file mode 100644 index 00000000..014fe12f --- /dev/null +++ b/bfabric/tests/integration/test_bfabric2_save_delete.py @@ -0,0 +1,98 @@ +from typing import Tuple +import unittest + +from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth + + +def _find_delete_existing_objects_by_name(b: Bfabric, endpoint: str, name_list: list) -> Tuple[list, list]: + """ + Checks if workunits with requested names exist. Attempts to delete the existing workunits + + :param b: Bfabric instance + :param endpoint: Endpoint + :param name_list: List of names to check + :return: Subset of workunit names that are found to exist, and deletion reports for those workunits + """ + + # 1. Check which objects exist + objs_exist = b.exists(endpoint, 'name', name_list) + objs_exist_names = [name for i, name in enumerate(name_list) if objs_exist[i]] + + if len(objs_exist_names) == 0: + print("No", endpoint, "exists") + return [], [] + else: + print("Already exist:", objs_exist_names) + + ids_to_delete = [] + for name in objs_exist_names: + # 2.1 Get IDs of all existing workunits + response_dict = b.read(endpoint, {'name': name}).to_list_dict() + ids_this = [r['id'] for r in response_dict] + + print('--', name, 'exist with ids', ids_this) + ids_to_delete += ids_this + + # Delete + delete_response_dict = b.delete(endpoint, ids_to_delete).to_list_dict() + print('Deletion results:', delete_response_dict) + + return objs_exist_names, delete_response_dict + +def _save_delete_workunit(b: Bfabric, verbose: bool = False) -> None: + """ + Integration test. Attempts to create some work units, then delete them. + - We check whether, after creation, the workunits with the target names are found in the API, + and the control workunit is not found (because it is not created) + - We check whether the deletion of the created workunits is successful + + :param b: BFabric Instance + :param verbose: Verbosity + :return: + """ + + endpoint = 'workunit' + workunit_names = ['MewThePokemon', 'TomMGM', 'MinkyLeChat'] + fake_name = 'SpikeTheDog' + all_names = workunit_names + [fake_name] + + # 1. Find and delete any workunits with these names, if they already exist + print("Phase 1: Make sure to clean up workunits with target names, if they somehow already exist") + _find_delete_existing_objects_by_name(b, endpoint, all_names) + + # 2. Create some workunits + print("Phase 2: Creating the target units") + new_ids = [] + for name in workunit_names: + workunit1 = {'name': name, 'applicationid': 2, 'description': 'is warm and fluffy', 'containerid': 123} + response = b.save('workunit', workunit1).to_list_dict() # We do the conversion to drop underscores in SUDS + if verbose: + print(response[0]) + + assert len(response) == 1, "Expected a single response from a single saved workunit" + new_ids += [response[0]['id']] + + # 3. Find and delete any workunits with these names, now that they have been created + print("Phase 3: Finding and deleting the created work units, checking if they match expectation") + found_names, deleted_responses = _find_delete_existing_objects_by_name(b, endpoint, all_names) + + assert found_names == workunit_names, "Expected the names found in the API to be the ones we just created" + for resp, trg_id in zip(deleted_responses, new_ids): + assert len(resp) == 1, "Deletion response format unexpected" + assert 'deletionreport' in resp, "Deletion response format unexpected" + assert resp['deletionreport'] == 'Workunit ' + str( + trg_id) + ' removed successfully.', "Deletion response format unexpected" + + +class BfabricTestCase(unittest.TestCase): + def __init__(self, *args, **kwargs): + super(BfabricTestCase, self).__init__(*args, **kwargs) + self.config, self.auth = get_system_auth() + + def test_zeep(self): + bZeep = Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP) + _save_delete_workunit(bZeep) + + def test_suds(self): + bSuds = Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS) + _save_delete_workunit(bSuds) diff --git a/bfabric/tests/unit/test_dict_helper.py b/bfabric/tests/unit/test_dict_helper.py new file mode 100644 index 00000000..6a79e0b1 --- /dev/null +++ b/bfabric/tests/unit/test_dict_helper.py @@ -0,0 +1,15 @@ +import unittest + +import bfabric.src.dict_helper as dict_helper + + +class BfabricTestCase(unittest.TestCase): + def test_sort_dict(self): + # Main purpose of dictionary sorting is that they appear consistent when printed + d = {'c': 5, 'b': 10} + d_sorted = dict_helper.sort_dict(d) + self.assertEqual(str(d_sorted), "{'b': 10, 'c': 5}") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/unit/test_math_helper.py b/bfabric/tests/unit/test_math_helper.py new file mode 100644 index 00000000..cc52b1e8 --- /dev/null +++ b/bfabric/tests/unit/test_math_helper.py @@ -0,0 +1,15 @@ +import unittest + +import bfabric.src.math_helper as math_helper + + +class BfabricTestCase(unittest.TestCase): + def test_integer_division(self): + # Main purpose of dictionary sorting is that they appear consistent when printed + self.assertEqual(math_helper.div_int_ceil(120, 100), 2) + self.assertEqual(math_helper.div_int_ceil(200, 100), 2) + self.assertEqual(math_helper.div_int_ceil(245, 100), 3) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/unit/test_paginator.py b/bfabric/tests/unit/test_paginator.py new file mode 100644 index 00000000..29655794 --- /dev/null +++ b/bfabric/tests/unit/test_paginator.py @@ -0,0 +1,18 @@ +import unittest + +import bfabric.src.paginator as paginator + + +class BfabricTestCase(unittest.TestCase): + def test_page_iter(self): + # Main purpose of dictionary sorting is that they appear consistent when printed + data = list(range(123)) + + rez = list(paginator.page_iter(data, page_size=100)) + self.assertEqual(len(rez), 2) + self.assertEqual(rez[0], list(range(100))) + self.assertEqual(rez[1], list(range(100, 123))) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/unit/test_pandas_helper.py b/bfabric/tests/unit/test_pandas_helper.py new file mode 100644 index 00000000..ba4c1e01 --- /dev/null +++ b/bfabric/tests/unit/test_pandas_helper.py @@ -0,0 +1,25 @@ +import unittest +import numpy as np + +import bfabric.src.pandas_helper as pandas_helper + + +class BfabricTestCase(unittest.TestCase): + def test_list_dict_to_df(self): + # Main purpose of dictionary sorting is that they appear consistent when printed + example_list_dict = [ + {'cat': 1, 'dog': 2}, + {'cat': 3, 'rat': ["a", "b"]}, + {'rat': 5}, + {'cat': 1, 'dog': 2, 'rat': 7}, + ] + + df = pandas_helper.list_dict_to_df(example_list_dict) + self.assertEqual(list(df.columns), ['cat', 'dog', 'rat']) + np.testing.assert_equal(list(df['cat']), [1, 3, np.nan, 1]) + np.testing.assert_equal(list(df['dog']), [2, np.nan, np.nan, 2]) + np.testing.assert_equal(list(df['rat']), [np.nan, "['a', 'b']", 5, 7]) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/unit/test_response_format_dict.py b/bfabric/tests/unit/test_response_format_dict.py new file mode 100644 index 00000000..f0b8af7e --- /dev/null +++ b/bfabric/tests/unit/test_response_format_dict.py @@ -0,0 +1,34 @@ +import unittest +import numpy as np + +import bfabric.src.response_format_dict as response_format_dict + + +class BfabricTestCase(unittest.TestCase): + def test_drop_empty_elements(self): + # Should delete all hierarchical instances of key-value pairs, where value is None or empty dict + input_list_dict = [{'a': [], 'b': [1, {'aa': 14, 'gg': None}], 'c': []}, {'zz': None, 'uu': 'cat'}] + target_list_dict = [{'b': [1, {'aa': 14}]}, {'uu': 'cat'}] + + output_list_dict = response_format_dict.drop_empty_elements(input_list_dict, inplace=False) + self.assertEqual(output_list_dict, target_list_dict) + + def test_map_element_keys(self): + # Main use is to delete underscores in specific keys + input_list_dict = [{'a': [], 'b': [1, {'_aa': 14, 'gg': None}], 'c': []}, {'zz': None, 'uu': 'cat'}] + target_list_dict = [{'a': [], 'b': [1, {'aa': 14, 'gg': None}], 'c': []}, {'zz': None, 'uu': 'cat'}] + + output_list_dict = response_format_dict.map_element_keys(input_list_dict, {'_aa': 'aa'}, inplace=False) + self.assertEqual(output_list_dict, target_list_dict) + + def test_sort_dicts_by_key(self): + # NOTE: The main purpose of sorting is to ensure consistent string representation + input_list_dict = [{'b': 1, 'a': 2, 'c': 3}, {'dog': 25, 'cat': [1,2,3]}] + target_list_dict = [{'a': 2, 'b': 1, 'c': 3}, {'cat': [1,2,3], 'dog': 25}] + + output_list_dict = response_format_dict.sort_dicts_by_key(input_list_dict, inplace=False) + self.assertEqual(str(output_list_dict), str(target_list_dict)) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/unit/test_results_container.py b/bfabric/tests/unit/test_results_container.py new file mode 100644 index 00000000..08981162 --- /dev/null +++ b/bfabric/tests/unit/test_results_container.py @@ -0,0 +1,46 @@ +import unittest + +import bfabric.src.result_container as result_container + + +# TODO: Add coverage for LISTSUDS and LISTZEEP +class BfabricTestCase(unittest.TestCase): + def __init__(self, *args, **kwargs): + super(BfabricTestCase, self).__init__(*args, **kwargs) + + self.c1 = result_container.ResultContainer([1,2,3], total_pages_api=1, + result_type=result_container.BfabricResultType.LISTDICT) + self.c2 = result_container.ResultContainer([4,5], total_pages_api=1, + result_type=result_container.BfabricResultType.LISTDICT) + + def test_str_repr(self): + self.assertEqual(str(self.c1), "[1, 2, 3]") + self.assertEqual(str(self.c2), "[4, 5]") + + self.assertEqual(self.c1.__repr__(), "[1, 2, 3]") + self.assertEqual(self.c2.__repr__(), "[4, 5]") + + def test_len(self): + self.assertEqual(len(self.c1), 3) + self.assertEqual(len(self.c2), 2) + + def test_get_item(self): + self.assertEqual(self.c1[2], 3) + self.assertEqual(self.c2[0], 4) + + def test_append(self): + c3 = result_container.ResultContainer(list(range(200, 400)), total_pages_api=2, + result_type=result_container.BfabricResultType.LISTDICT) + c3.append(self.c1) + + self.assertEqual(len(c3), 203) + self.assertEqual(c3.results, list(range(200, 400)) + [1,2,3]) + self.assertEqual(c3.get_total_pages_api(), 3) + + def test_to_list_dict(self): + # NOTE: For LISTDICT format, the conversion to listdict does nothing + self.assertEqual(self.c1.to_list_dict(), self.c1.results) + + +if __name__ == "__main__": + unittest.main(verbosity=2) From 8849dc0e536e59d77f4133931fc39f789823ea76 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Mon, 22 Apr 2024 10:44:55 +0200 Subject: [PATCH 026/129] Update bfabric/tests/unit/test_results_container.py Co-authored-by: Leonardo Schwarz --- bfabric/tests/unit/test_results_container.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bfabric/tests/unit/test_results_container.py b/bfabric/tests/unit/test_results_container.py index 08981162..ec3fe1f7 100644 --- a/bfabric/tests/unit/test_results_container.py +++ b/bfabric/tests/unit/test_results_container.py @@ -17,8 +17,8 @@ def test_str_repr(self): self.assertEqual(str(self.c1), "[1, 2, 3]") self.assertEqual(str(self.c2), "[4, 5]") - self.assertEqual(self.c1.__repr__(), "[1, 2, 3]") - self.assertEqual(self.c2.__repr__(), "[4, 5]") + self.assertEqual(repr(self.c1), "[1, 2, 3]") + self.assertEqual(repr(self.c2), "[4, 5]") def test_len(self): self.assertEqual(len(self.c1), 3) From 7fad60b7a679305b2852230a86d3cab30d991ba5 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Mon, 22 Apr 2024 10:46:06 +0200 Subject: [PATCH 027/129] test naming, moved dict_helper into same file --- bfabric/bfabric2.py | 6 +++--- bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py | 2 +- bfabric/scripts/fgcz_maxquant_wrapper.py | 12 ++++++------ bfabric/src/dict_helper.py | 7 ------- bfabric/src/math_helper.py | 9 --------- bfabric/src/response_format_dict.py | 8 +++++++- bfabric/src/result_container.py | 2 +- bfabric/tests/integration/test_bfabric2_read.py | 4 ++-- .../integration/test_bfabric2_read_pagination.py | 4 ++-- .../tests/integration/test_bfabric2_save_delete.py | 4 ++-- bfabric/tests/test_bfabric_executable.py | 4 ++-- bfabric/tests/test_bfabric_workunit.py | 10 +++++----- bfabric/tests/unit/test_dict_helper.py | 6 +++--- bfabric/tests/unit/test_math_helper.py | 2 +- bfabric/tests/unit/test_paginator.py | 2 +- bfabric/tests/unit/test_pandas_helper.py | 2 +- bfabric/tests/unit/test_response_format_dict.py | 2 +- bfabric/tests/unit/test_results_container.py | 6 +++--- 18 files changed, 41 insertions(+), 51 deletions(-) delete mode 100644 bfabric/src/dict_helper.py diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 4937dd7a..008b12da 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -166,7 +166,7 @@ def read_multi(self, endpoint: str, obj: dict, multi_query_key: str, multi_query # TODO: It is assumed that a user requesting multi_query always wants all of the pages. Can anybody think of # exceptions to this? response_this = self.read(endpoint, obj_exteded, max_results=None, readid=readid, **kwargs) - response_tot.append(response_this) + response_tot.extend(response_this) return response_tot @@ -179,7 +179,7 @@ def save_multi(self, endpoint: str, obj_lst: list, **kwargs) -> ResultContainer: # Iterate over request chunks that fit into a single API page for page_objs in page_iter(obj_lst): response_page = self.save(endpoint, page_objs, **kwargs) - response_tot.append(response_page) + response_tot.extend(response_page) return response_tot @@ -193,7 +193,7 @@ def delete_multi(self, endpoint: str, id_list: list) -> ResultContainer: # Iterate over request chunks that fit into a single API page for page_ids in page_iter(id_list): response_page = self.delete(endpoint, page_ids) - response_tot.append(response_page) + response_tot.extend(response_page) return response_tot diff --git a/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py b/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py index ff2d89ac..5ba0cb1c 100755 --- a/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py +++ b/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py @@ -129,7 +129,7 @@ def run(self): eFastaDatabase.attrib['path'] = "{}/{}".format(os.getcwd(), self.fasta) for s in self.samples: - eExperiment.append(self.getBiologicalSample(category=s, InputFile = self.zipfilename)) + eExperiment.extend(self.getBiologicalSample(category=s, InputFile = self.zipfilename)) xml.write('/dev/stdout' , pretty_print=True, xml_declaration=True, method='xml', encoding="UTF-8") diff --git a/bfabric/scripts/fgcz_maxquant_wrapper.py b/bfabric/scripts/fgcz_maxquant_wrapper.py index 2f9109d4..3d7bcb81 100755 --- a/bfabric/scripts/fgcz_maxquant_wrapper.py +++ b/bfabric/scripts/fgcz_maxquant_wrapper.py @@ -68,7 +68,7 @@ def generate_mqpar(self, xml_filename, xml_template): for a in value.split(","): estring = etree.Element("string") estring.text = a - element.append(estring) + element.extend(estring) pass else: print ("replacing xpath expression {} by {}.".format(query, value)) @@ -101,7 +101,7 @@ def generate_mqpar(self, xml_filename, xml_template): estring = etree.Element("string") estring.text = targetRawFile - element.append(estring) + element.extend(estring) element = xml_template.find("/experiments") if element is None: @@ -110,7 +110,7 @@ def generate_mqpar(self, xml_filename, xml_template): estring = etree.Element("string") estring.text = "{}".format(os.path.basename(input).replace(".raw", "").replace(".RAW", "")) ecount += 1 - element.append(estring) + element.extend(estring) element = xml_template.find("/fractions") if element is None: @@ -118,7 +118,7 @@ def generate_mqpar(self, xml_filename, xml_template): estring = etree.Element("short") estring.text = "32767" - element.append(estring) + element.extend(estring) element = xml_template.find("/ptms") if element is None: @@ -126,7 +126,7 @@ def generate_mqpar(self, xml_filename, xml_template): estring = etree.Element("boolean") estring.text = "false" - element.append(estring) + element.extend(estring) element = xml_template.find("/paramGroupIndices") if element is None: @@ -134,7 +134,7 @@ def generate_mqpar(self, xml_filename, xml_template): estring = etree.Element("int") estring.text = "0" - element.append(estring) + element.extend(estring) #return(xml_template) xml_template.write(xml_filename)#, pretty_print=True) diff --git a/bfabric/src/dict_helper.py b/bfabric/src/dict_helper.py deleted file mode 100644 index d6464e60..00000000 --- a/bfabric/src/dict_helper.py +++ /dev/null @@ -1,7 +0,0 @@ -def sort_dict(d: dict) -> dict: - """ - :param d: A dictionary - :return: A dictionary with items sorted by key. - Affects how the dictionary appears, when mapped to a string - """ - return dict(sorted(d.items())) diff --git a/bfabric/src/math_helper.py b/bfabric/src/math_helper.py index edc9400b..b55b8018 100644 --- a/bfabric/src/math_helper.py +++ b/bfabric/src/math_helper.py @@ -9,12 +9,3 @@ def div_int_ceil(n: int, d: int) -> int: """ q, r = divmod(n, d) return q + bool(r) - - - -if __name__ == "__main__": - print( - div_int_ceil(120, 100), - div_int_ceil(200, 100), - div_int_ceil(245, 100) - ) \ No newline at end of file diff --git a/bfabric/src/response_format_dict.py b/bfabric/src/response_format_dict.py index 8315ff5b..a16e8eeb 100644 --- a/bfabric/src/response_format_dict.py +++ b/bfabric/src/response_format_dict.py @@ -2,8 +2,14 @@ from copy import deepcopy from collections import OrderedDict -from bfabric.src.dict_helper import sort_dict +def sort_dict(d: dict) -> dict: + """ + :param d: A dictionary + :return: A dictionary with items sorted by key. + Affects how the dictionary appears, when mapped to a string + """ + return dict(sorted(d.items())) def _recursive_drop_empty(response_elem: Union[list, dict]) -> None: """ diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py index 66ea9303..57d84ba9 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/src/result_container.py @@ -58,7 +58,7 @@ def __str__(self): def __len__(self): return len(self.results) - def append(self, other: ResultContainer) -> None: + def extend(self, other: ResultContainer) -> None: """ Can merge results of two queries. This can happen if the engine splits a complicated query in two :param other: The other query results that should be appended to this diff --git a/bfabric/tests/integration/test_bfabric2_read.py b/bfabric/tests/integration/test_bfabric2_read.py index b69f024d..1754b59c 100755 --- a/bfabric/tests/integration/test_bfabric2_read.py +++ b/bfabric/tests/integration/test_bfabric2_read.py @@ -5,9 +5,9 @@ from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth -class BfabricTestCase(unittest.TestCase): +class BfabricTestRead(unittest.TestCase): def __init__(self, *args, **kwargs): - super(BfabricTestCase, self).__init__(*args, **kwargs) + super(BfabricTestRead, self).__init__(*args, **kwargs) # Load ground truth path = os.path.join(os.path.dirname(__file__), "groundtruth.json") diff --git a/bfabric/tests/integration/test_bfabric2_read_pagination.py b/bfabric/tests/integration/test_bfabric2_read_pagination.py index 15270fed..31d88200 100644 --- a/bfabric/tests/integration/test_bfabric2_read_pagination.py +++ b/bfabric/tests/integration/test_bfabric2_read_pagination.py @@ -16,9 +16,9 @@ def _calc_query(config, auth, engine: BfabricAPIEngineType, endpoint: str, return list_dict_to_df(response_dict) -class BfabricTestCase(unittest.TestCase): +class BfabricTestPagination(unittest.TestCase): def __init__(self, *args, **kwargs): - super(BfabricTestCase, self).__init__(*args, **kwargs) + super(BfabricTestPagination, self).__init__(*args, **kwargs) self.config, self.auth = get_system_auth() def test_composite_user(self): diff --git a/bfabric/tests/integration/test_bfabric2_save_delete.py b/bfabric/tests/integration/test_bfabric2_save_delete.py index 014fe12f..ac517788 100644 --- a/bfabric/tests/integration/test_bfabric2_save_delete.py +++ b/bfabric/tests/integration/test_bfabric2_save_delete.py @@ -84,9 +84,9 @@ def _save_delete_workunit(b: Bfabric, verbose: bool = False) -> None: trg_id) + ' removed successfully.', "Deletion response format unexpected" -class BfabricTestCase(unittest.TestCase): +class BfabricTestSaveDelete(unittest.TestCase): def __init__(self, *args, **kwargs): - super(BfabricTestCase, self).__init__(*args, **kwargs) + super(BfabricTestSaveDelete, self).__init__(*args, **kwargs) self.config, self.auth = get_system_auth() def test_zeep(self): diff --git a/bfabric/tests/test_bfabric_executable.py b/bfabric/tests/test_bfabric_executable.py index ca2bb294..00339636 100755 --- a/bfabric/tests/test_bfabric_executable.py +++ b/bfabric/tests/test_bfabric_executable.py @@ -47,7 +47,7 @@ def test_executable(self, filename=os.path.abspath(__file__)): 'description': 'unit test', 'applicationid': 61 }) - self.endpoint['workunit'].append(wu_res[0]) + self.endpoint['workunit'].extend(wu_res[0]) # print(json.dumps(wu_res, cls=bfabricEncoder, indent=2)) # save with open(filename, 'r') as f: @@ -73,7 +73,7 @@ def test_executable(self, filename=os.path.abspath(__file__)): #'masterexecutableid': 11871, 'base64': input_b64_executable } - self.endpoint['executable'].append(self.B.save_object('executable', query)[0]) + self.endpoint['executable'].extend(self.B.save_object('executable', query)[0]) # read for e in self.endpoint['executable']: diff --git a/bfabric/tests/test_bfabric_workunit.py b/bfabric/tests/test_bfabric_workunit.py index 105045f6..f12dad6e 100755 --- a/bfabric/tests/test_bfabric_workunit.py +++ b/bfabric/tests/test_bfabric_workunit.py @@ -50,7 +50,7 @@ def resource_save(self, filename, workunitid): 'description': content, 'workunitid': workunitid}) - self.endpoint['resource'].append(res[0]) + self.endpoint['resource'].extend(res[0]) def delete_endpoint_entries(self, endpoint=None): @@ -78,7 +78,7 @@ def _01_executable_save(self, filename=os.path.abspath(__file__)): res = self.bfapp.save_object('executable', query)[0] print (res) - self.endpoint['executable'].append(res) + self.endpoint['executable'].extend(res) def _02_sample_save(self): sample_type = 'Biological Sample - Proteomics' @@ -95,7 +95,7 @@ def _02_sample_save(self): }) print(res[0]) - self.endpoint['sample'].append(res[0]) + self.endpoint['sample'].extend(res[0]) def _03_application_save(self): @@ -107,7 +107,7 @@ def _03_application_save(self): res = self.bfapp.save_object(endpoint='application', obj=query) print(json.dumps(res, cls=bfabricEncoder, indent=2)) - self.endpoint['application'].append(res[0]) + self.endpoint['application'].extend(res[0]) def _04_workunit_save(self): @@ -122,7 +122,7 @@ def _04_workunit_save(self): 'description': '68b329da9893e34099c7d8ad5cb9c940', 'applicationid': applicationid }) - self.endpoint['workunit'].append(res[0]) + self.endpoint['workunit'].extend(res[0]) print(json.dumps(self.endpoint['workunit'], cls=bfabricEncoder, indent=2)) self.resource_save(os.path.abspath(__file__), res[0]._id) diff --git a/bfabric/tests/unit/test_dict_helper.py b/bfabric/tests/unit/test_dict_helper.py index 6a79e0b1..7c663bcd 100644 --- a/bfabric/tests/unit/test_dict_helper.py +++ b/bfabric/tests/unit/test_dict_helper.py @@ -1,13 +1,13 @@ import unittest -import bfabric.src.dict_helper as dict_helper +from bfabric.src.response_format_dict import sort_dict -class BfabricTestCase(unittest.TestCase): +class BfabricTestSortDict(unittest.TestCase): def test_sort_dict(self): # Main purpose of dictionary sorting is that they appear consistent when printed d = {'c': 5, 'b': 10} - d_sorted = dict_helper.sort_dict(d) + d_sorted = sort_dict(d) self.assertEqual(str(d_sorted), "{'b': 10, 'c': 5}") diff --git a/bfabric/tests/unit/test_math_helper.py b/bfabric/tests/unit/test_math_helper.py index cc52b1e8..dac57b4c 100644 --- a/bfabric/tests/unit/test_math_helper.py +++ b/bfabric/tests/unit/test_math_helper.py @@ -3,7 +3,7 @@ import bfabric.src.math_helper as math_helper -class BfabricTestCase(unittest.TestCase): +class BfabricTestMath(unittest.TestCase): def test_integer_division(self): # Main purpose of dictionary sorting is that they appear consistent when printed self.assertEqual(math_helper.div_int_ceil(120, 100), 2) diff --git a/bfabric/tests/unit/test_paginator.py b/bfabric/tests/unit/test_paginator.py index 29655794..bb506692 100644 --- a/bfabric/tests/unit/test_paginator.py +++ b/bfabric/tests/unit/test_paginator.py @@ -3,7 +3,7 @@ import bfabric.src.paginator as paginator -class BfabricTestCase(unittest.TestCase): +class BfabricTestBasicPagination(unittest.TestCase): def test_page_iter(self): # Main purpose of dictionary sorting is that they appear consistent when printed data = list(range(123)) diff --git a/bfabric/tests/unit/test_pandas_helper.py b/bfabric/tests/unit/test_pandas_helper.py index ba4c1e01..7dbac543 100644 --- a/bfabric/tests/unit/test_pandas_helper.py +++ b/bfabric/tests/unit/test_pandas_helper.py @@ -4,7 +4,7 @@ import bfabric.src.pandas_helper as pandas_helper -class BfabricTestCase(unittest.TestCase): +class BfabricTestPandasHelper(unittest.TestCase): def test_list_dict_to_df(self): # Main purpose of dictionary sorting is that they appear consistent when printed example_list_dict = [ diff --git a/bfabric/tests/unit/test_response_format_dict.py b/bfabric/tests/unit/test_response_format_dict.py index f0b8af7e..c33d4f50 100644 --- a/bfabric/tests/unit/test_response_format_dict.py +++ b/bfabric/tests/unit/test_response_format_dict.py @@ -4,7 +4,7 @@ import bfabric.src.response_format_dict as response_format_dict -class BfabricTestCase(unittest.TestCase): +class BfabricTestResponseFormatDict(unittest.TestCase): def test_drop_empty_elements(self): # Should delete all hierarchical instances of key-value pairs, where value is None or empty dict input_list_dict = [{'a': [], 'b': [1, {'aa': 14, 'gg': None}], 'c': []}, {'zz': None, 'uu': 'cat'}] diff --git a/bfabric/tests/unit/test_results_container.py b/bfabric/tests/unit/test_results_container.py index 08981162..b036771a 100644 --- a/bfabric/tests/unit/test_results_container.py +++ b/bfabric/tests/unit/test_results_container.py @@ -4,9 +4,9 @@ # TODO: Add coverage for LISTSUDS and LISTZEEP -class BfabricTestCase(unittest.TestCase): +class BfabricTestResultsContainer(unittest.TestCase): def __init__(self, *args, **kwargs): - super(BfabricTestCase, self).__init__(*args, **kwargs) + super(BfabricTestResultsContainer, self).__init__(*args, **kwargs) self.c1 = result_container.ResultContainer([1,2,3], total_pages_api=1, result_type=result_container.BfabricResultType.LISTDICT) @@ -31,7 +31,7 @@ def test_get_item(self): def test_append(self): c3 = result_container.ResultContainer(list(range(200, 400)), total_pages_api=2, result_type=result_container.BfabricResultType.LISTDICT) - c3.append(self.c1) + c3.extend(self.c1) self.assertEqual(len(c3), 203) self.assertEqual(c3.results, list(range(200, 400)) + [1,2,3]) From 6d0e283f440fce9d54a27698b8c1fb0a66d6444d Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Mon, 22 Apr 2024 10:48:08 +0200 Subject: [PATCH 028/129] Update bfabric/bfabric2.py Co-authored-by: Leonardo Schwarz --- bfabric/bfabric2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 4937dd7a..ff5890ab 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -152,7 +152,7 @@ def read_multi(self, endpoint: str, obj: dict, multi_query_key: str, multi_query """ response_tot = ResultContainer([], self.result_type, total_pages_api = 0) - obj_exteded = deepcopy(obj) # Make a copy of the query, not to make edits to the argument + obj_extended = deepcopy(obj) # Make a copy of the query, not to make edits to the argument # Iterate over request chunks that fit into a single API page for page_vals in page_iter(multi_query_vals): From f4d2840cc7b33ef8d799b7a49da612d2e998836e Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Mon, 22 Apr 2024 10:48:45 +0200 Subject: [PATCH 029/129] removed save_multi --- bfabric/bfabric2.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 008b12da..bd69f86a 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -170,18 +170,18 @@ def read_multi(self, endpoint: str, obj: dict, multi_query_key: str, multi_query return response_tot - # TODO: This is likely useless. When saving multiple objects, they all have different fields. + # NOTE: Save-multi method is likely useless. When saving multiple objects, they all have different fields. # One option would be to provide a dataframe, but it might struggle with nested dicts # Likely best solution is to not provide this method, and let users run a for-loop themselves. - def save_multi(self, endpoint: str, obj_lst: list, **kwargs) -> ResultContainer: - response_tot = ResultContainer([], self.result_type, total_pages_api = 0) - - # Iterate over request chunks that fit into a single API page - for page_objs in page_iter(obj_lst): - response_page = self.save(endpoint, page_objs, **kwargs) - response_tot.extend(response_page) - - return response_tot + # def save_multi(self, endpoint: str, obj_lst: list, **kwargs) -> ResultContainer: + # response_tot = ResultContainer([], self.result_type, total_pages_api = 0) + # + # # Iterate over request chunks that fit into a single API page + # for page_objs in page_iter(obj_lst): + # response_page = self.save(endpoint, page_objs, **kwargs) + # response_tot.extend(response_page) + # + # return response_tot def delete_multi(self, endpoint: str, id_list: list) -> ResultContainer: response_tot = ResultContainer([], self.result_type, total_pages_api=0) From 64bb5f4b61534664a4dc28646ce723b75cbec590 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Mon, 22 Apr 2024 11:10:17 +0200 Subject: [PATCH 030/129] Update bfabric/tests/integration/test_bfabric2_read_pagination.py Co-authored-by: Leonardo Schwarz --- bfabric/tests/integration/test_bfabric2_read_pagination.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bfabric/tests/integration/test_bfabric2_read_pagination.py b/bfabric/tests/integration/test_bfabric2_read_pagination.py index 15270fed..f7b3b3af 100644 --- a/bfabric/tests/integration/test_bfabric2_read_pagination.py +++ b/bfabric/tests/integration/test_bfabric2_read_pagination.py @@ -42,8 +42,8 @@ def test_composite_user(self): # Test that columns are exactly the same print("Testing if SUDS and ZEEP parsed responses have the same root fields") - suds_cols = list(sorted(resp_df_suds.columns)) - zeep_cols = list(sorted(resp_df_zeep.columns)) + suds_cols = sorted(resp_df_suds.columns) + zeep_cols = sorted(resp_df_zeep.columns) assert suds_cols == zeep_cols print("Testing if SUDS and ZEEP responses are the same field by field") From 9be4615513b52a0afa0f51b9c8273f25c038371d Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Mon, 22 Apr 2024 11:10:35 +0200 Subject: [PATCH 031/129] naming and subtest for read test --- .../tests/integration/test_bfabric2_read.py | 68 ++++++++++--------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/bfabric/tests/integration/test_bfabric2_read.py b/bfabric/tests/integration/test_bfabric2_read.py index 1754b59c..b0d7fc7d 100755 --- a/bfabric/tests/integration/test_bfabric2_read.py +++ b/bfabric/tests/integration/test_bfabric2_read.py @@ -6,9 +6,7 @@ class BfabricTestRead(unittest.TestCase): - def __init__(self, *args, **kwargs): - super(BfabricTestRead, self).__init__(*args, **kwargs) - + def setUp(self, *args, **kwargs): # Load ground truth path = os.path.join(os.path.dirname(__file__), "groundtruth.json") with open(path) as json_file: @@ -18,64 +16,68 @@ def __init__(self, *args, **kwargs): self.config, self.auth = get_system_auth() # Init the engines - self.bfZeep = Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP) - self.bfSuds = Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS) + self.clients = { + "zeep": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP), + "suds": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS) + } - def read(self, bf: Bfabric, endpoint: str): + def read(self, engine: str, endpoint: str): """Executes read queries for `endpoint` and compares results with ground truth.""" - self.assertIn(endpoint, self.ground_truth) - for query, ground_truth in self.ground_truth[endpoint]: - res = bf.read(endpoint=endpoint, obj=query).to_list_dict() + with self.subTest(engine=engine): + bf = self.clients[engine] + self.assertIn(endpoint, self.ground_truth) + for query, ground_truth in self.ground_truth[endpoint]: + res = bf.read(endpoint=endpoint, obj=query).to_list_dict() - # print(query, res) + # print(query, res) - self.assertEqual(len(res), 1) # Expecting only one query result in all cases - for gt_attr, gt_value in ground_truth.items(): - self.assertEqual(str(gt_value), str(res[0][gt_attr])) + self.assertEqual(len(res), 1) # Expecting only one query result in all cases + for gt_attr, gt_value in ground_truth.items(): + self.assertEqual(str(gt_value), str(res[0][gt_attr])) def _test_empty_project(self, bf: Bfabric): res = bf.read(endpoint="project", obj={"name": "this project does not exist"}).to_list_dict() self.assertEqual(res, []) def test_user(self): - self.read(self.bfSuds, "user") - self.read(self.bfZeep, "user") + self.read("suds", "user") + self.read("zeep", "user") def test_container(self): - self.read(self.bfSuds, "container") - self.read(self.bfZeep, "container") + self.read("suds", "container") + self.read("zeep", "container") def test_project(self): - self.read(self.bfSuds, "project") - # self.read(self.bfZeep, "project") # FIXME: Zeep does not parse name correctly for project queries + self.read("suds", "project") + # self.read("zeep", "project") # FIXME: Zeep does not parse name correctly for project queries def test_project_when_not_exists(self): - self._test_empty_project(self.bfZeep) - self._test_empty_project(self.bfSuds) + self._test_empty_project("zeep") + self._test_empty_project("suds") def test_application(self): - self.read(self.bfSuds, "application") - self.read(self.bfZeep, "application") + self.read("suds", "application") + self.read("zeep", "application") def test_sample(self): - self.read(self.bfSuds, "sample") - self.read(self.bfZeep, "sample") + self.read("suds", "sample") + self.read("zeep", "sample") def test_workunit(self): - self.read(self.bfSuds, "workunit") - self.read(self.bfZeep, "workunit") + self.read("suds", "workunit") + self.read("zeep", "workunit") def test_resource(self): - self.read(self.bfSuds, "resource") - self.read(self.bfZeep, "resource") + self.read("suds", "resource") + self.read("zeep", "resource") def test_executable(self): - self.read(self.bfSuds, "executable") - self.read(self.bfZeep, "executable") + self.read("suds", "executable") + self.read("zeep", "executable") def test_annotation(self): - self.read(self.bfSuds, "annotation") - self.read(self.bfZeep, "annotation") + self.read("suds", "annotation") + self.read("zeep", "annotation") if __name__ == "__main__": From f6d415f5ea931373119686809772edb888297208 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Mon, 22 Apr 2024 11:10:46 +0200 Subject: [PATCH 032/129] Update bfabric/tests/integration/test_bfabric2_read_pagination.py Co-authored-by: Leonardo Schwarz --- bfabric/tests/integration/test_bfabric2_read_pagination.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bfabric/tests/integration/test_bfabric2_read_pagination.py b/bfabric/tests/integration/test_bfabric2_read_pagination.py index f7b3b3af..b56bd172 100644 --- a/bfabric/tests/integration/test_bfabric2_read_pagination.py +++ b/bfabric/tests/integration/test_bfabric2_read_pagination.py @@ -17,8 +17,7 @@ def _calc_query(config, auth, engine: BfabricAPIEngineType, endpoint: str, class BfabricTestCase(unittest.TestCase): - def __init__(self, *args, **kwargs): - super(BfabricTestCase, self).__init__(*args, **kwargs) + def setUp(self): self.config, self.auth = get_system_auth() def test_composite_user(self): From e81c70d9166a4ec80ea45b80ab29b696637d5bbb Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Mon, 22 Apr 2024 11:10:58 +0200 Subject: [PATCH 033/129] Update bfabric/tests/integration/test_bfabric2_save_delete.py Co-authored-by: Leonardo Schwarz --- bfabric/tests/integration/test_bfabric2_save_delete.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bfabric/tests/integration/test_bfabric2_save_delete.py b/bfabric/tests/integration/test_bfabric2_save_delete.py index 014fe12f..5078f519 100644 --- a/bfabric/tests/integration/test_bfabric2_save_delete.py +++ b/bfabric/tests/integration/test_bfabric2_save_delete.py @@ -85,8 +85,7 @@ def _save_delete_workunit(b: Bfabric, verbose: bool = False) -> None: class BfabricTestCase(unittest.TestCase): - def __init__(self, *args, **kwargs): - super(BfabricTestCase, self).__init__(*args, **kwargs) + def setUp(self): self.config, self.auth = get_system_auth() def test_zeep(self): From 6a6891968efde3d8130cf1a9e8e6e78c76b4bcaa Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Mon, 22 Apr 2024 11:11:06 +0200 Subject: [PATCH 034/129] Update bfabric/tests/unit/test_results_container.py Co-authored-by: Leonardo Schwarz --- bfabric/tests/unit/test_results_container.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bfabric/tests/unit/test_results_container.py b/bfabric/tests/unit/test_results_container.py index ec3fe1f7..6b3c6a77 100644 --- a/bfabric/tests/unit/test_results_container.py +++ b/bfabric/tests/unit/test_results_container.py @@ -5,8 +5,7 @@ # TODO: Add coverage for LISTSUDS and LISTZEEP class BfabricTestCase(unittest.TestCase): - def __init__(self, *args, **kwargs): - super(BfabricTestCase, self).__init__(*args, **kwargs) + def setUp(self): self.c1 = result_container.ResultContainer([1,2,3], total_pages_api=1, result_type=result_container.BfabricResultType.LISTDICT) From 8f6f5a54a277da7e1b76397588ae5c21766b04d4 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Mon, 22 Apr 2024 11:28:11 +0200 Subject: [PATCH 035/129] minor naming --- bfabric/src/result_container.py | 13 +++++++------ bfabric/tests/integration/test_bfabric2_read.py | 3 ++- bfabric/tests/unit/test_results_container.py | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py index 57d84ba9..4e37d61f 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/src/result_container.py @@ -44,7 +44,7 @@ def __init__(self, results: list, result_type: BfabricResultType, total_pages_ap """ self.results = results self.result_type = result_type - self.total_pages_api = total_pages_api + self._total_pages_api = total_pages_api def __getitem__(self, idx: int): return self.results[idx] @@ -69,13 +69,14 @@ def extend(self, other: ResultContainer) -> None: raise ValueError("Attempting to merge results of two different types", self.result_type, other.result_type) self.results += other.results - if (self.total_pages_api is not None) and (other.total_pages_api is not None): - self.total_pages_api += other.total_pages_api + if (self._total_pages_api is not None) and (other._total_pages_api is not None): + self._total_pages_api += other._total_pages_api else: - self.total_pages_api = None + self._total_pages_api = None - def get_total_pages_api(self): - return self.total_pages_api + @property + def total_pages_api(self): + return self._total_pages_api def to_list_dict(self, drop_empty: bool = True, drop_underscores_suds: bool = True, have_sort_responses: bool = False): diff --git a/bfabric/tests/integration/test_bfabric2_read.py b/bfabric/tests/integration/test_bfabric2_read.py index b0d7fc7d..a3189bb4 100755 --- a/bfabric/tests/integration/test_bfabric2_read.py +++ b/bfabric/tests/integration/test_bfabric2_read.py @@ -35,7 +35,8 @@ def read(self, engine: str, endpoint: str): for gt_attr, gt_value in ground_truth.items(): self.assertEqual(str(gt_value), str(res[0][gt_attr])) - def _test_empty_project(self, bf: Bfabric): + def _test_empty_project(self, engine: str): + bf = self.clients[engine] res = bf.read(endpoint="project", obj={"name": "this project does not exist"}).to_list_dict() self.assertEqual(res, []) diff --git a/bfabric/tests/unit/test_results_container.py b/bfabric/tests/unit/test_results_container.py index 11295920..98d5cd81 100644 --- a/bfabric/tests/unit/test_results_container.py +++ b/bfabric/tests/unit/test_results_container.py @@ -35,7 +35,7 @@ def test_append(self): self.assertEqual(len(c3), 203) self.assertEqual(c3.results, list(range(200, 400)) + [1,2,3]) - self.assertEqual(c3.get_total_pages_api(), 3) + self.assertEqual(c3.total_pages_api(), 3) def test_to_list_dict(self): # NOTE: For LISTDICT format, the conversion to listdict does nothing From 98337b6cf8d25eef451a7bd46aee93988b037e7e Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Mon, 22 Apr 2024 13:29:22 +0200 Subject: [PATCH 036/129] hack to enable Zeep to read from samples --- bfabric/src/engine_zeep.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/bfabric/src/engine_zeep.py b/bfabric/src/engine_zeep.py index 59d8c8a1..01afdd48 100644 --- a/bfabric/src/engine_zeep.py +++ b/bfabric/src/engine_zeep.py @@ -5,19 +5,22 @@ # TODO: Check if this is a bug of BFabric or Zeep. Specifically, see if the same call to bFabricPy has the same bug -def _zeep_query_append_skipped(query: dict, skipped_keys: list) -> dict: +def _zeep_query_append_skipped(query: dict, skipped_keys: list, inplace: bool = False, overwrite: bool = False) -> dict: """ This function is used to fix a buggy behaviour of Zeep/BFabric. Specifically, Zeep does not return correct query results if some of the optional parameters are not mentioned in the query. :param query: Original query - :param skipped_keys: Optional keys to skip + :param skipped_keys: Optional keys to skip + :param inplace: Whether to change the argument, or make a new copy to return + :param overwrite: Whether to overwrite the key if it is already present in the query :return: Adds optional keys to query as skipped values. """ - queryThis = query.copy() + query_this = query.copy() if not inplace else query for key in skipped_keys: - queryThis[key] = zeep.xsd.SkipValue - return queryThis + if overwrite or (key not in query_this.keys()): + query_this[key] = zeep.xsd.SkipValue + return query_this class EngineZeep(object): @@ -44,6 +47,12 @@ def read(self, endpoint: str, obj: dict, page: int = 1, idonly: bool = False, query = copy.deepcopy(obj) query['includedeletableupdateable'] = includedeletableupdateable + # FIXME: Hacks for the cases where Zeep thinks a parameter is compulsory and it is actually not + if endpoint == 'sample': + excl_keys = ['includefamily', 'includeassociations', 'includeplates', 'includeresources', 'includeruns', + 'includechildren', 'includeparents', 'includereplacements'] + _zeep_query_append_skipped(query, excl_keys, inplace=True, overwrite=False) + full_query = dict(login=self.login, page=page, password=self.password, query=query, idonly=idonly) client = self._get_client(endpoint) From 0e11c5fd90ec76ee37e83ae08a671ed64ec42752 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Mon, 22 Apr 2024 13:32:03 +0200 Subject: [PATCH 037/129] added deepcopy to Zeep Hack for potential hierarchical queries --- bfabric/src/engine_zeep.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/src/engine_zeep.py b/bfabric/src/engine_zeep.py index 01afdd48..9257d3a5 100644 --- a/bfabric/src/engine_zeep.py +++ b/bfabric/src/engine_zeep.py @@ -16,7 +16,7 @@ def _zeep_query_append_skipped(query: dict, skipped_keys: list, inplace: bool = :param overwrite: Whether to overwrite the key if it is already present in the query :return: Adds optional keys to query as skipped values. """ - query_this = query.copy() if not inplace else query + query_this = copy.deepcopy(query) if not inplace else query for key in skipped_keys: if overwrite or (key not in query_this.keys()): query_this[key] = zeep.xsd.SkipValue From b32f2c1ff1d7b97227b91c875756691555f4354f Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Tue, 23 Apr 2024 11:46:20 +0200 Subject: [PATCH 038/129] migrated bfabricpy to .yml config files --- bfabric/bfabric.py | 51 ++++--- bfabric/bfabric_config.py | 166 ++++++++++++++++------ bfabric/tests/test_bfabric_functional.py | 2 +- bfabric/tests/unit/example_config.yml | 20 +++ bfabric/tests/unit/test_bfabric_config.py | 112 +++++++++------ 5 files changed, 245 insertions(+), 106 deletions(-) create mode 100644 bfabric/tests/unit/example_config.yml diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index ad03121d..45a684ed 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -26,7 +26,7 @@ import sys from pprint import pprint -from bfabric.bfabric_config import BfabricAuth, BfabricConfig, parse_bfabricrc_py +from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_bfabricrc_py from suds.client import Client from suds.wsdl import Service @@ -84,33 +84,46 @@ class Bfabric(object): def warning(self, msg): sys.stderr.write("\033[93m{}\033[0m\n".format(msg)) - def __init__(self, login=None, password=None, webbase=None, externaljobid=None, bfabricrc=None, verbose=False): + def __init__(self, login: str = None, password: str = None, webbase: str = None, externaljobid=None, + config_path: str = None, config_env: str = None, optional_auth: bool = False, verbose: bool = False): self.verbose = verbose self.cl = {} self.verbose = False self.query_counter = 0 - bfabricrc = bfabricrc or os.path.normpath(os.path.expanduser("~/.bfabricrc.py")) - if not os.path.isfile(bfabricrc): - self.warning("could not find '.bfabricrc.py' file in home directory.") - self.config = BfabricConfig(base_url=webbase) + # Get default path config file path + config_path = config_path or os.path.normpath(os.path.expanduser("~/.bfabricpy.yml")) + + # Use the provided config data from arguments instead of the file + if not os.path.isfile(config_path): + self.warning("could not find '.bfabricpy.yml' file in home directory.") + self.config = BfabricConfig(webbase=webbase) self.auth = BfabricAuth(login=login, password=password) + + # Load config from file, override some of the fields with the provided ones else: - with open(bfabricrc, "r", encoding="utf-8") as file: - config, auth = parse_bfabricrc_py(file) - self.config = config.with_overrides(base_url=webbase) - self.auth = auth if login is None and password is None else BfabricAuth(login=login, password=password) + config, auth = read_bfabricrc_py(config_path, config_env=config_env, optional_auth=optional_auth) + self.config = config.with_overrides(webbase=webbase) + if (login is not None) and (password is not None): + self.auth = BfabricAuth(login=login, password=password) + elif (login is None) and (password is None): + self.auth = auth + else: + raise IOError("Must provide both username and password, or neither.") - if not self.auth.login or not self.auth.password: - raise ValueError("login or password missing") + if not self.config.webbase: + raise ValueError("webbase missing") + if not optional_auth: + if not self.auth or not self.auth.login or not self.auth.password: + raise ValueError("Authentification not initialized but required") + + msg = f"\033[93m--- webbase {self.config.webbase}; login; {self.auth.login} ---\033[0m\n" + sys.stderr.write(msg) if self.verbose: pprint(self.config) - msg = f"\033[93m--- webbase {self.config.base_url}; login; {self.auth.login} ---\033[0m\n" - sys.stderr.write(msg) - def read_object(self, endpoint, obj, page=1, plain=False, idonly=False): """ A generic method which can connect to any endpoint, e.g., workunit, project, order, @@ -189,7 +202,7 @@ def upload_file(self, filename, workunitid): def _get_service(self, endpoint: str) -> Service: """Returns a `suds.client.Service` object for the given endpoint name.""" if endpoint not in self.cl: - self.cl[endpoint] = Client(f"{self.config.base_url}/{endpoint}?wsdl", cache=None) + self.cl[endpoint] = Client(f"{self.config.webbase}/{endpoint}?wsdl", cache=None) return self.cl[endpoint].service def _perform_request( @@ -835,12 +848,12 @@ def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style= sample_id = self.get_sampleid(int(resource_iterator._id)) _resource_sample = {'resource_id': int(resource_iterator._id), - 'resource_url': "{0}/userlab/show-resource.html?id={1}".format(self.config.base_url,resource_iterator._id)} + 'resource_url': "{0}/userlab/show-resource.html?id={1}".format(self.config.webbase, resource_iterator._id)} if not sample_id is None: _resource_sample['sample_id'] = int(sample_id) - _resource_sample['sample_url'] = "{0}/userlab/show-sample.html?id={1}".format(self.config.base_url, sample_id) + _resource_sample['sample_url'] = "{0}/userlab/show-sample.html?id={1}".format(self.config.webbase, sample_id) resource_ids[_application_name].append(_resource_sample) except: @@ -935,7 +948,7 @@ def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style= }, 'workunit_id': int(workunit._id), 'workunit_createdby': str(workunit.createdby), - 'workunit_url': "{0}/userlab/show-workunit.html?workunitId={1}".format(self.config.base_url, workunit._id), + 'workunit_url': "{0}/userlab/show-workunit.html?workunitId={1}".format(self.config.webbase, workunit._id), 'external_job_id': int(yaml_workunit_externaljob._id), 'order_id': order_id, 'project_id': project_id, diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index a8e2cc2e..e511b440 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -6,6 +6,9 @@ import os from typing import Optional, Dict, Tuple import dataclasses +# from configparser import ConfigParser +import yaml +from pathlib import Path @dataclasses.dataclass(frozen=True) @@ -27,73 +30,146 @@ class BfabricConfig: """Holds the configuration for the B-Fabric client for connecting to particular instance of B-Fabric. Attributes: - base_url (optional): The API base url + webbase (optional): The API base url application_ids (optional): Map of application names to ids. job_notification_emails (optional): Space-separated list of email addresses to notify when a job finishes. """ - base_url: str = "https://fgcz-bfabric.uzh.ch/bfabric" + webbase: str = "https://fgcz-bfabric.uzh.ch/bfabric" application_ids: Dict[str, int] = dataclasses.field(default_factory=dict) job_notification_emails: str = "" def with_overrides( self, - base_url: Optional[str] = None, + webbase: Optional[str] = None, application_ids: Optional[Dict[str, int]] = None, ) -> BfabricConfig: """Returns a copy of the configuration with new values applied, if they are not None.""" return BfabricConfig( - base_url=base_url if base_url is not None else self.base_url, + webbase=webbase if webbase is not None else self.webbase, application_ids=application_ids if application_ids is not None else self.application_ids, ) -def parse_bfabricrc_py(file: io.FileIO) -> Tuple[BfabricConfig, Optional[BfabricAuth]]: +''' +NOTE: BFabricPy expects a bfabricpy.yml of the format, as seen in bfabricPy/tests/unit/example_config.yml +* The general field always has to be present +* There may be any number of environments, and they may have arbitrary names. Here, they are called PRODUCTION and TEST +* Must specify correct login, password and webbase for each environment. +* application and job_notification_emails fields are optional +* The default environment will be selected as follows: + - First, parser will check if the optional argument `config_env` is provided directly to the parser function + - If not, secondly, the parser will check if the environment variable `BFABRICPY_CONFIG_ENV` is declared + - If not, finally, the parser will select the default_config specified in [GENERAL] of the .bfabricpy.ini file +''' + + +def _read_config_env_as_dict(config_path: str, config_env: str = None) -> Tuple[str, dict]: + """ + Reads and partially parses a bfabricpy.yml file + :param config_path: Path to the configuration file. It is assumed that it exists + :param config_env: Specific environment to parse. If not provided, it is deduced from an environment variable + or the config file itself. + :return: Returns a target environment name, and the corresponding data from bfabricpy.yml file as a dictionary + """ + """Parses a .bfabricrc.py file and returns a tuple of BfabricConfig and BfabricAuth objects.""" - values = {} - file_path = os.path.realpath(file.name) logger = logging.getLogger(__name__) - logger.info(f"Reading configuration from: {file_path}") - - for line in file: - if line.startswith("#"): - continue - - key, _, value = [part.strip() for part in line.partition("=")] - if key not in [ - "_PASSWD", - "_LOGIN", - "_WEBBASE", - "_APPLICATION", - "_JOB_NOTIFICATION_EMAILS", - ]: - continue - - # In case of multiple definitions, the first rule counts! - if key not in values: - if key in ["_APPLICATION"]: - try: - values[key] = json.loads(value) - except json.JSONDecodeError as e: - raise ValueError( - f"While reading {file_path}. '{key}' is not a valid JSON string." - ) from e - else: - # to make it downward compatible; so we replace quotes in login and password - values[key] = value.replace('"', "").replace("'", "") + logger.info(f"Reading configuration from: {config_path}") + + # Read the config file + config_dict = yaml.safe_load(Path(config_path).read_text()) + + # config = ConfigParser() + # config.read(config_path) + # config_dict = {s: dict(config.items(s)) for s in config.sections()} + + if "GENERAL" not in config_dict: + raise IOError("Config file must have a general section") + if 'default_config' not in config_dict['GENERAL']: + raise IOError("Config file must provide a default environment") + config_env_default = config_dict['GENERAL']['default_config'] + + # Determine which environment we will use + # By default, use the one provided by config_env + if config_env is None: + # Try to find a relevant + config_env = os.getenv("BFABRICPY_CONFIG_ENV") + if config_env is None: + logger.log(20, "BFABRICPY_CONFIG_ENV not found, using default environment " + config_env_default) + config_env = config_env_default else: - logger.warning(f"While reading {file_path}. '{key}' is already set.") - - args = dict( - base_url=values.get("_WEBBASE"), - application_ids=values.get("_APPLICATION"), - job_notification_emails=values.get("_JOB_NOTIFICATION_EMAILS"), - ) - config = BfabricConfig(**{k: v for k, v in args.items() if v is not None}) - if "_LOGIN" in values and "_PASSWD" in values: - auth = BfabricAuth(login=values["_LOGIN"], password=values["_PASSWD"]) + logger.log(20, "found BFABRICPY_CONFIG_ENV = " + config_env) else: + logger.log(20, "config environment specified explicitly as " + config_env) + + if config_env not in config_dict: + raise IOError("The requested config environment", config_env, "is not present in the config file") + + return config_env, config_dict[config_env] + +def _have_all_keys(d: dict, l: list) -> bool: + """True if all elements in list l are present as keys in dict d, otherwise false""" + return all([k in d for k in l]) + +def _parse_dict(d: dict, mandatory_keys: list, optional_keys: list = None, error_prefix: str = None): + """ + Returns a copy of an existing dictionary, only keeping mandatory and optional keys + If a mandatory key is not found, an exception is raised + :param d: Starting dictionary + :param mandatory_keys: A list of mandatory keys + :param optional_keys: A list of optional keys + :param error_prefix: A string to print if a mandatory key is not found + :return: Copy of a starting dictionary, only containing mandatory and optional keys + """ + d_rez = {} + + # Get all mandatory fields, and complain if not found + for k in mandatory_keys: + if k in d: + d_rez[k] = d[k] + else: + raise ValueError(error_prefix + k) + + # Get all optional fields + if optional_keys is not None: + for k in optional_keys: + if k in d: + d_rez[k] = d[k] + + # Ignore all other fields + return d_rez + +def read_bfabricrc_py(config_path: str, config_env: str = None, + optional_auth: bool = False) -> Tuple[BfabricConfig, Optional[BfabricAuth]]: + """ + Reads bfabricpy.yml file, parses it, extracting authentication and configuration data + :param config_path: Path to the configuration file. It is assumed the file exists + :param config_env: Configuration environment to use. If not given, it is deduced. + :param optional_auth: Whether authentication is optional. + If not, both login and password must be present in the config file, otherwise an exception is thrown + If yes, missing login and password would result in authentication class being None, but no exception + :return: Configuration and Authentication class instances + """ + + + config_env_final, config_dict = _read_config_env_as_dict(config_path, config_env=config_env) + + error_prefix = "Config environment " + config_env_final + " does not have a compulsory field: " + + # Parse authentification + if optional_auth and not _have_all_keys(config_dict, ['login', 'password']): + # Allow returning None auth if enabled auth = None + else: + auth_dict = _parse_dict(config_dict, ['login', 'password'], error_prefix=error_prefix) + auth = BfabricAuth(**auth_dict) + + # Parse config + config_dict = _parse_dict(config_dict, ['webbase'], optional_keys=['application_ids', 'job_notification_emails'], + error_prefix=error_prefix) + config = BfabricConfig(**config_dict) + return config, auth diff --git a/bfabric/tests/test_bfabric_functional.py b/bfabric/tests/test_bfabric_functional.py index c29261be..02b91de3 100755 --- a/bfabric/tests/test_bfabric_functional.py +++ b/bfabric/tests/test_bfabric_functional.py @@ -41,7 +41,7 @@ def test_wrappercreator_submitter(self): self.assertEqual(B.auth.login, 'pfeeder', msg) msg = "This test case requires a bfabric test system!" - self.assertIn("bfabric-test", B.config.base_url, msg) + self.assertIn("bfabric-test", B.config.webbase, msg) # TODO # create input resource diff --git a/bfabric/tests/unit/example_config.yml b/bfabric/tests/unit/example_config.yml new file mode 100644 index 00000000..ba218cbb --- /dev/null +++ b/bfabric/tests/unit/example_config.yml @@ -0,0 +1,20 @@ +GENERAL: + default_config: PRODUCTION + +PRODUCTION: + login: my_epic_production_login + password: my_secret_production_password + webbase: https://mega-production-server.uzh.ch/myprod + +TEST: + login: my_epic_test_login + password: my_secret_test_password + webbase: https://mega-test-server.uzh.ch/mytest + application_ids: + Proteomics/CAT_123: 7 + Proteomics/DOG_552: 6 + Proteomics/DUCK_666: 12 + job_notification_emails: john.snow@fgcz.uzh.ch billy.the.kid@fgcz.ethz.ch + +STANDBY: + webbase: https://standby-server.uzh.ch/mystandby \ No newline at end of file diff --git a/bfabric/tests/unit/test_bfabric_config.py b/bfabric/tests/unit/test_bfabric_config.py index bf04eef5..a310a868 100644 --- a/bfabric/tests/unit/test_bfabric_config.py +++ b/bfabric/tests/unit/test_bfabric_config.py @@ -1,7 +1,8 @@ +import os import io import unittest -from bfabric.bfabric_config import BfabricConfig, BfabricAuth, parse_bfabricrc_py +from bfabric.bfabric_config import BfabricConfig, BfabricAuth, read_bfabricrc_py class TestBfabricAuth(unittest.TestCase): @@ -19,76 +20,105 @@ def test_str(self): class TestBfabricConfig(unittest.TestCase): def setUp(self): self.config = BfabricConfig( - base_url="url", + webbase="url", application_ids={"app": 1}, ) def test_with_overrides(self): new_config = self.config.with_overrides( - base_url="new_url", + webbase="new_url", application_ids={"new": 2}, ) - self.assertEqual("new_url", new_config.base_url) + self.assertEqual("new_url", new_config.webbase) self.assertEqual({"new": 2}, new_config.application_ids) - self.assertEqual("url", self.config.base_url) + self.assertEqual("url", self.config.webbase) self.assertEqual({"app": 1}, self.config.application_ids) def test_with_replaced_when_none(self): - new_config = self.config.with_overrides(base_url=None, application_ids=None) - self.assertEqual("url", new_config.base_url) + new_config = self.config.with_overrides(webbase=None, application_ids=None) + self.assertEqual("url", new_config.webbase) self.assertEqual({"app": 1}, new_config.application_ids) - self.assertEqual("url", self.config.base_url) + self.assertEqual("url", self.config.webbase) self.assertEqual({"app": 1}, self.config.application_ids) - def test_read_bfabricrc_py(self): - input_text = ( - "# Some comment\n" - "_LOGIN = login\n" - "_PASSWD = 'user'\n" - "_UKNOWNKEY = 'value'\n" - "# Another comment\n" - """_WEBBASE = "url"\n""" - """_APPLICATION = {"app": 1}\n""" - """_JOB_NOTIFICATION_EMAILS = "email1 email2"\n""" - ) - file = io.StringIO(input_text) - setattr(file, "name", "/file") + # Testing default initialization + # TODO: Test that logging is consistent with initialization + def test_read_yml_bypath_default(self): + # Ensure environment variable is not available, and the default is environment is loaded + os.environ.pop('BFABRICPY_CONFIG_ENV', None) + + config, auth = read_bfabricrc_py('example_config.yml') # Should deduce + self.assertEqual("my_epic_production_login", auth.login) + self.assertEqual("my_secret_production_password", auth.password) + self.assertEqual("https://mega-production-server.uzh.ch/myprod", config.webbase) + + # Testing environment variable initialization + # TODO: Test that logging is consistent with default config + def test_read_yml_bypath_environment_variable(self): + # Explicitly set the environment variable for this process + os.environ["BFABRICPY_CONFIG_ENV"] = "TEST" + + config, auth = read_bfabricrc_py('example_config.yml') # Should deduce + self.assertEqual("my_epic_test_login", auth.login) + self.assertEqual("my_secret_test_password", auth.password) + self.assertEqual("https://mega-test-server.uzh.ch/mytest", config.webbase) + + # Testing explicit initialization, as well as extra fields (application_ids, job_notification_emails) + # TODO: Test that logging is consistent with default config + def test_read_yml_bypath_allfields(self): with self.assertLogs(level="INFO") as log_context: - config, auth = parse_bfabricrc_py(file) - self.assertEqual("login", auth.login) - self.assertEqual("user", auth.password) - self.assertEqual("url", config.base_url) - self.assertEqual({"app": 1}, config.application_ids) - self.assertEqual("email1 email2", config.job_notification_emails) - self.assertEqual( - [ - "INFO:bfabric.bfabric_config:Reading configuration from: /file" - ], - log_context.output, - ) + config, auth = read_bfabricrc_py('example_config.yml', config_env='TEST') - def test_read_bfabricrc_py_when_empty(self): - input_text = "" - file = io.StringIO(input_text) - setattr(file, "name", "/file") + # # Testing log + # self.assertEqual( + # [ + # "INFO:bfabric.bfabric_config:Reading configuration from: example_config.yml" + # "INFO:bfabric.bfabric_config:config environment specified explicitly as TEST" + # ], + # log_context.output, + # ) + + self.assertEqual("my_epic_test_login", auth.login) + self.assertEqual("my_secret_test_password", auth.password) + self.assertEqual("https://mega-test-server.uzh.ch/mytest", config.webbase) + + applications_dict_ground_truth = { + 'Proteomics/CAT_123': 7, + 'Proteomics/DOG_552': 6, + 'Proteomics/DUCK_666': 12 + } + + job_notification_emails_ground_truth = "john.snow@fgcz.uzh.ch billy.the.kid@fgcz.ethz.ch" + + self.assertEqual(applications_dict_ground_truth, config.application_ids) + self.assertEqual(job_notification_emails_ground_truth, config.job_notification_emails) + + # Testing that we can load webbase without authentication if correctly requested + def test_read_yml_when_empty_optional(self): with self.assertLogs(level="INFO"): - config, auth = parse_bfabricrc_py(file) + config, auth = read_bfabricrc_py('example_config.yml', config_env='STANDBY', optional_auth=True) + self.assertIsNone(auth) - self.assertEqual("https://fgcz-bfabric.uzh.ch/bfabric", config.base_url) + self.assertEqual("https://standby-server.uzh.ch/mystandby", config.webbase) self.assertEqual({}, config.application_ids) self.assertEqual("", config.job_notification_emails) + # Test that missing authentication will raise an error if required + def test_read_yml_when_empty_mandatory(self): + with self.assertRaises(ValueError): + read_bfabricrc_py('example_config.yml', config_env='STANDBY', optional_auth=False) + def test_repr(self): rep = repr(self.config) self.assertEqual( - "BfabricConfig(base_url='url', application_ids={'app': 1}, job_notification_emails='')", + "BfabricConfig(webbase='url', application_ids={'app': 1}, job_notification_emails='')", rep, ) def test_str(self): rep = str(self.config) self.assertEqual( - "BfabricConfig(base_url='url', application_ids={'app': 1}, job_notification_emails='')", + "BfabricConfig(webbase='url', application_ids={'app': 1}, job_notification_emails='')", rep, ) From d0d77799d24d9057f44a12e982b9549e62469ce1 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Tue, 23 Apr 2024 11:57:26 +0200 Subject: [PATCH 039/129] docstring for bfabric class --- bfabric/bfabric.py | 12 ++++++++++++ bfabric/bfabric_config.py | 4 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index 45a684ed..d0916550 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -86,6 +86,18 @@ def warning(self, msg): def __init__(self, login: str = None, password: str = None, webbase: str = None, externaljobid=None, config_path: str = None, config_env: str = None, optional_auth: bool = False, verbose: bool = False): + """ + :param login: Login string for overriding config file + :param password: Password for overriding config file + :param webbase: Webbase for overriding config file + :param externaljobid: ? + :param config_path: Path to the config file, in case it is different from default + :param config_env: Which config environment to use. Can also specify via environment variable or use + default in the config file (at your own risk) + :param optional_auth: Whether authentification is optional. If yes, missing authentification will be ignored, + otherwise an exception will be raised + :param verbose: Verbosity (TODO: resolve potential redundancy with logger) + """ self.verbose = verbose self.cl = {} diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index e511b440..fbbd2b3f 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -54,7 +54,7 @@ def with_overrides( ''' -NOTE: BFabricPy expects a bfabricpy.yml of the format, as seen in bfabricPy/tests/unit/example_config.yml +NOTE: BFabricPy expects a .bfabricpy.yml of the format, as seen in bfabricPy/tests/unit/example_config.yml * The general field always has to be present * There may be any number of environments, and they may have arbitrary names. Here, they are called PRODUCTION and TEST * Must specify correct login, password and webbase for each environment. @@ -62,7 +62,7 @@ def with_overrides( * The default environment will be selected as follows: - First, parser will check if the optional argument `config_env` is provided directly to the parser function - If not, secondly, the parser will check if the environment variable `BFABRICPY_CONFIG_ENV` is declared - - If not, finally, the parser will select the default_config specified in [GENERAL] of the .bfabricpy.ini file + - If not, finally, the parser will select the default_config specified in [GENERAL] of the .bfabricpy.yml file ''' From c407a88e9904521d8fac814a87dc9234c8a4739a Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Tue, 23 Apr 2024 14:48:22 +0200 Subject: [PATCH 040/129] added authentication to the actual bfabric2 class, fixed a few minor bugs introduced by previous revision --- bfabric/bfabric.py | 4 +- bfabric/bfabric2.py | 64 +++++++++++++++---- bfabric/bfabric_config.py | 4 +- .../examples/compare_zeep_suds_pagination.py | 2 +- bfabric/examples/compare_zeep_suds_query.py | 14 ++-- bfabric/examples/exists_multi.py | 2 +- bfabric/examples/zeep_debug.py | 14 ++-- .../tests/integration/test_bfabric2_read.py | 2 +- .../test_bfabric2_read_pagination.py | 2 +- .../integration/test_bfabric2_save_delete.py | 2 +- bfabric/tests/unit/test_bfabric_config.py | 12 ++-- 11 files changed, 82 insertions(+), 40 deletions(-) diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index a8216331..67b453ad 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -26,7 +26,7 @@ import sys from pprint import pprint -from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_bfabricrc_py +from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_bfabricpy_yml from suds.client import Client from suds.wsdl import Service @@ -115,7 +115,7 @@ def __init__(self, login: str = None, password: str = None, webbase: str = None, # Load config from file, override some of the fields with the provided ones else: - config, auth = read_bfabricrc_py(config_path, config_env=config_env, optional_auth=optional_auth) + config, auth = read_bfabricpy_yml(config_path, config_env=config_env, optional_auth=optional_auth) self.config = config.with_overrides(webbase=webbase) if (login is not None) and (password is not None): self.auth = BfabricAuth(login=login, password=password) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 305f01c7..7743841e 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -24,6 +24,8 @@ """ import os +import sys +from pprint import pprint from enum import Enum from copy import deepcopy from typing import Union, List, Optional @@ -33,20 +35,60 @@ from bfabric.src.engine_zeep import EngineZeep from bfabric.src.result_container import ResultContainer, BfabricResultType from bfabric.src.paginator import page_iter, BFABRIC_QUERY_LIMIT -from bfabric.bfabric_config import BfabricAuth, BfabricConfig, parse_bfabricrc_py +from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_bfabricpy_yml class BfabricAPIEngineType(Enum): SUDS = 1 ZEEP = 2 -def get_system_auth(): - path_bfabricrc = os.path.normpath(os.path.expanduser("~/.bfabricrc.py")) - if not os.path.isfile(path_bfabricrc): - raise IOError("Config file not found:", path_bfabricrc) +def get_system_auth(login: str = None, password: str = None, webbase: str = None, externaljobid=None, + config_path: str = None, config_env: str = None, optional_auth: bool = False, verbose: bool = False): + """ + :param login: Login string for overriding config file + :param password: Password for overriding config file + :param webbase: Webbase for overriding config file + :param externaljobid: ? + :param config_path: Path to the config file, in case it is different from default + :param config_env: Which config environment to use. Can also specify via environment variable or use + default in the config file (at your own risk) + :param optional_auth: Whether authentification is optional. If yes, missing authentification will be ignored, + otherwise an exception will be raised + :param verbose: Verbosity (TODO: resolve potential redundancy with logger) + """ + + # Get default path config file path + config_path = config_path or os.path.normpath(os.path.expanduser("~/.bfabricpy.yml")) + + # Use the provided config data from arguments instead of the file + if not os.path.isfile(config_path): + # TODO: Convert to log + print("Warning: could not find '.bfabricpy.yml' file in home directory.") + config = BfabricConfig(webbase=webbase) + auth = BfabricAuth(login=login, password=password) + + # Load config from file, override some of the fields with the provided ones + else: + config, auth = read_bfabricpy_yml(config_path, config_env=config_env, optional_auth=optional_auth) + config = config.with_overrides(webbase=webbase) + if (login is not None) and (password is not None): + auth = BfabricAuth(login=login, password=password) + elif (login is None) and (password is None): + auth = auth + else: + raise IOError("Must provide both username and password, or neither.") + + if not config.webbase: + raise ValueError("webbase missing") + if not optional_auth: + if not auth or not auth.login or not auth.password: + raise ValueError("Authentification not initialized but required") + + msg = f"\033[93m--- webbase {config.webbase}; login; {auth.login} ---\033[0m\n" + sys.stderr.write(msg) - with open(path_bfabricrc, "r", encoding="utf-8") as file: - config, auth = parse_bfabricrc_py(file) + if verbose: + pprint(config) return config, auth @@ -66,10 +108,10 @@ def __init__(self, config: BfabricConfig, auth: BfabricAuth, self.query_counter = 0 if engine == BfabricAPIEngineType.SUDS: - self.engine = EngineSUDS(auth.login, auth.password, config.base_url) + self.engine = EngineSUDS(auth.login, auth.password, config.webbase) self.result_type = BfabricResultType.LISTSUDS elif engine == BfabricAPIEngineType.ZEEP: - self.engine = EngineZeep(auth.login, auth.password, config.base_url) + self.engine = EngineZeep(auth.login, auth.password, config.webbase) self.result_type = BfabricResultType.LISTZEEP else: raise ValueError("Unexpected engine", BfabricAPIEngineType) @@ -156,7 +198,7 @@ def read_multi(self, endpoint: str, obj: dict, multi_query_key: str, multi_query # Iterate over request chunks that fit into a single API page for page_vals in page_iter(multi_query_vals): - obj_exteded[multi_query_key] = page_vals + obj_extended[multi_query_key] = page_vals # TODO: Test what happens if there are multiple responses to each of the individual queries. # * What would happen? @@ -165,7 +207,7 @@ def read_multi(self, endpoint: str, obj: dict, multi_query_key: str, multi_query # automatically? If yes, perhaps we don't need this method at all? # TODO: It is assumed that a user requesting multi_query always wants all of the pages. Can anybody think of # exceptions to this? - response_this = self.read(endpoint, obj_exteded, max_results=None, readid=readid, **kwargs) + response_this = self.read(endpoint, obj_extended, max_results=None, readid=readid, **kwargs) response_tot.extend(response_this) return response_tot diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index fbbd2b3f..0dda3337 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -142,8 +142,8 @@ def _parse_dict(d: dict, mandatory_keys: list, optional_keys: list = None, error # Ignore all other fields return d_rez -def read_bfabricrc_py(config_path: str, config_env: str = None, - optional_auth: bool = False) -> Tuple[BfabricConfig, Optional[BfabricAuth]]: +def read_bfabricpy_yml(config_path: str, config_env: str = None, + optional_auth: bool = False) -> Tuple[BfabricConfig, Optional[BfabricAuth]]: """ Reads bfabricpy.yml file, parses it, extracting authentication and configuration data :param config_path: Path to the configuration file. It is assumed the file exists diff --git a/bfabric/examples/compare_zeep_suds_pagination.py b/bfabric/examples/compare_zeep_suds_pagination.py index 88332b6a..9c901275 100644 --- a/bfabric/examples/compare_zeep_suds_pagination.py +++ b/bfabric/examples/compare_zeep_suds_pagination.py @@ -85,7 +85,7 @@ def dataframe_pagination_test(config, auth, endpoint, use_cached: bool = False, return match_test_result -config, auth = get_system_auth() +config, auth = get_system_auth(config_env="TEST") result = dataframe_pagination_test(config, auth, 'user', use_cached=False, store_cached=True) report_test_result(result, "pagination") diff --git a/bfabric/examples/compare_zeep_suds_query.py b/bfabric/examples/compare_zeep_suds_query.py index fc426be5..fe9e412a 100644 --- a/bfabric/examples/compare_zeep_suds_query.py +++ b/bfabric/examples/compare_zeep_suds_query.py @@ -8,7 +8,7 @@ import zeep import suds -from bfabric.bfabric2 import get_system_auth +from bfabric.bfabric2 import get_system_auth, BfabricAuth, BfabricConfig from bfabric.src.response_format_suds import suds_asdict_recursive from bfabric.src.response_format_dict import drop_empty_elements, map_element_keys @@ -47,7 +47,7 @@ def read_suds(wsdl, fullQuery, raw=True): else: return suds_asdict_recursive(ret, convert_types=True) -def full_query(auth, query: dict, includedeletableupdateable: bool = False) -> dict: +def full_query(auth: BfabricAuth, query: dict, includedeletableupdateable: bool = False) -> dict: thisQuery = deepcopy(query) thisQuery['includedeletableupdateable'] = includedeletableupdateable @@ -57,8 +57,8 @@ def full_query(auth, query: dict, includedeletableupdateable: bool = False) -> d 'query': thisQuery } -def calc_both(auth, config, endpoint: str, query: dict, raw: bool = True): - wsdl = "".join((config.base_url, '/', endpoint, "?wsdl")) +def calc_both(auth: BfabricAuth, config: BfabricConfig, endpoint: str, query: dict, raw: bool = True): + wsdl = "".join((config.webbase, '/', endpoint, "?wsdl")) fullQuery = full_query(auth, query) retZeep = read_zeep(wsdl, fullQuery, raw=raw) retSuds = read_suds(wsdl, fullQuery, raw=raw) @@ -69,7 +69,7 @@ def calc_both(auth, config, endpoint: str, query: dict, raw: bool = True): # Raw XML tests ###################### -def raw_test(auth, config, endpoint, query): +def raw_test(auth: BfabricAuth, config: BfabricConfig, endpoint, query): print("Testing raw XML match for", endpoint, query) retZeep, retSuds = calc_both(auth, config, endpoint, query, raw=True) assert len(retZeep) == len(retSuds) @@ -77,7 +77,7 @@ def raw_test(auth, config, endpoint, query): print('-- passed --') -config, auth = get_system_auth() +config, auth = get_system_auth(config_env="TEST") # raw_test(auth, config, 'user', {'id': 9026}) # raw_test(auth, config, 'user', {}) @@ -167,7 +167,7 @@ def parsed_data_match_test(auth, config, endpoint, query, drop_empty: bool = Tru if log_file_path is not None: with open(log_file_path, 'w') as f: with redirect_stdout(f): - recursive_comparison(retZeepDict, retSudsDict, prefix = []) + matched = recursive_comparison(retZeepDict, retSudsDict, prefix = []) else: matched = recursive_comparison(retZeepDict, retSudsDict, prefix=[]) diff --git a/bfabric/examples/exists_multi.py b/bfabric/examples/exists_multi.py index 9696ca53..b1ba7469 100644 --- a/bfabric/examples/exists_multi.py +++ b/bfabric/examples/exists_multi.py @@ -1,7 +1,7 @@ from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth -config, auth = get_system_auth() +config, auth = get_system_auth(config_env="TEST") b1 = Bfabric(config, auth, engine = BfabricAPIEngineType.SUDS) b2 = Bfabric(config, auth, engine = BfabricAPIEngineType.ZEEP) diff --git a/bfabric/examples/zeep_debug.py b/bfabric/examples/zeep_debug.py index feff6da9..fd9b9b37 100644 --- a/bfabric/examples/zeep_debug.py +++ b/bfabric/examples/zeep_debug.py @@ -1,4 +1,4 @@ -from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth +from bfabric.bfabric2 import get_system_auth, BfabricAuth, BfabricConfig import zeep from copy import deepcopy from lxml import etree @@ -17,7 +17,7 @@ ''' -def full_query(auth, query: dict, includedeletableupdateable: bool = False) -> dict: +def full_query(auth: BfabricAuth, query: dict, includedeletableupdateable: bool = False) -> dict: thisQuery = deepcopy(query) thisQuery['includedeletableupdateable'] = includedeletableupdateable @@ -37,21 +37,21 @@ def read_zeep(wsdl, fullQuery, raw=True): return ret -def read(auth, config, endpoint: str, query: dict, raw: bool = True): - wsdl = "".join((config.base_url, '/', endpoint, "?wsdl")) +def read(auth: BfabricAuth, config: BfabricConfig, endpoint: str, query: dict, raw: bool = True): + wsdl = "".join((config.webbase, '/', endpoint, "?wsdl")) fullQuery = full_query(auth, query) return read_zeep(wsdl, fullQuery, raw=raw) -config, auth = get_system_auth() +bfconfig, bfauth = get_system_auth(config_env="TEST") print('============== RAW ================') -rez = read(auth, config, 'user', {'id': 9026}, raw = True) +rez = read(bfauth, bfconfig, 'user', {'id': 9026}, raw = True) root = etree.fromstring(rez) print(etree.tostring(root, pretty_print=True).decode()) -rez = read(auth, config, 'user', {'id': 9026}, raw = False) +rez = read(bfauth, bfconfig, 'user', {'id': 9026}, raw = False) print('============== ORIG ================') print(rez['user'][0]['project']) diff --git a/bfabric/tests/integration/test_bfabric2_read.py b/bfabric/tests/integration/test_bfabric2_read.py index a3189bb4..19fa5a86 100755 --- a/bfabric/tests/integration/test_bfabric2_read.py +++ b/bfabric/tests/integration/test_bfabric2_read.py @@ -13,7 +13,7 @@ def setUp(self, *args, **kwargs): self.ground_truth = json.load(json_file) # Load config and authentification - self.config, self.auth = get_system_auth() + self.config, self.auth = get_system_auth(config_env="TEST") # Init the engines self.clients = { diff --git a/bfabric/tests/integration/test_bfabric2_read_pagination.py b/bfabric/tests/integration/test_bfabric2_read_pagination.py index 0e1ff85f..f894d910 100644 --- a/bfabric/tests/integration/test_bfabric2_read_pagination.py +++ b/bfabric/tests/integration/test_bfabric2_read_pagination.py @@ -18,7 +18,7 @@ def _calc_query(config, auth, engine: BfabricAPIEngineType, endpoint: str, class BfabricTestPagination(unittest.TestCase): def setUp(self): - self.config, self.auth = get_system_auth() + self.config, self.auth = get_system_auth(config_env="TEST") def test_composite_user(self): endpoint = 'user' diff --git a/bfabric/tests/integration/test_bfabric2_save_delete.py b/bfabric/tests/integration/test_bfabric2_save_delete.py index 8567b74e..e8236d52 100644 --- a/bfabric/tests/integration/test_bfabric2_save_delete.py +++ b/bfabric/tests/integration/test_bfabric2_save_delete.py @@ -86,7 +86,7 @@ def _save_delete_workunit(b: Bfabric, verbose: bool = False) -> None: class BfabricTestSaveDelete(unittest.TestCase): def setUp(self): - self.config, self.auth = get_system_auth() + self.config, self.auth = get_system_auth(config_env="TEST") def test_zeep(self): bZeep = Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP) diff --git a/bfabric/tests/unit/test_bfabric_config.py b/bfabric/tests/unit/test_bfabric_config.py index a310a868..8b74e569 100644 --- a/bfabric/tests/unit/test_bfabric_config.py +++ b/bfabric/tests/unit/test_bfabric_config.py @@ -2,7 +2,7 @@ import io import unittest -from bfabric.bfabric_config import BfabricConfig, BfabricAuth, read_bfabricrc_py +from bfabric.bfabric_config import BfabricConfig, BfabricAuth, read_bfabricpy_yml class TestBfabricAuth(unittest.TestCase): @@ -47,7 +47,7 @@ def test_read_yml_bypath_default(self): # Ensure environment variable is not available, and the default is environment is loaded os.environ.pop('BFABRICPY_CONFIG_ENV', None) - config, auth = read_bfabricrc_py('example_config.yml') # Should deduce + config, auth = read_bfabricpy_yml('example_config.yml') # Should deduce self.assertEqual("my_epic_production_login", auth.login) self.assertEqual("my_secret_production_password", auth.password) self.assertEqual("https://mega-production-server.uzh.ch/myprod", config.webbase) @@ -58,7 +58,7 @@ def test_read_yml_bypath_environment_variable(self): # Explicitly set the environment variable for this process os.environ["BFABRICPY_CONFIG_ENV"] = "TEST" - config, auth = read_bfabricrc_py('example_config.yml') # Should deduce + config, auth = read_bfabricpy_yml('example_config.yml') # Should deduce self.assertEqual("my_epic_test_login", auth.login) self.assertEqual("my_secret_test_password", auth.password) self.assertEqual("https://mega-test-server.uzh.ch/mytest", config.webbase) @@ -67,7 +67,7 @@ def test_read_yml_bypath_environment_variable(self): # TODO: Test that logging is consistent with default config def test_read_yml_bypath_allfields(self): with self.assertLogs(level="INFO") as log_context: - config, auth = read_bfabricrc_py('example_config.yml', config_env='TEST') + config, auth = read_bfabricpy_yml('example_config.yml', config_env='TEST') # # Testing log # self.assertEqual( @@ -96,7 +96,7 @@ def test_read_yml_bypath_allfields(self): # Testing that we can load webbase without authentication if correctly requested def test_read_yml_when_empty_optional(self): with self.assertLogs(level="INFO"): - config, auth = read_bfabricrc_py('example_config.yml', config_env='STANDBY', optional_auth=True) + config, auth = read_bfabricpy_yml('example_config.yml', config_env='STANDBY', optional_auth=True) self.assertIsNone(auth) self.assertEqual("https://standby-server.uzh.ch/mystandby", config.webbase) @@ -106,7 +106,7 @@ def test_read_yml_when_empty_optional(self): # Test that missing authentication will raise an error if required def test_read_yml_when_empty_mandatory(self): with self.assertRaises(ValueError): - read_bfabricrc_py('example_config.yml', config_env='STANDBY', optional_auth=False) + read_bfabricpy_yml('example_config.yml', config_env='STANDBY', optional_auth=False) def test_repr(self): rep = repr(self.config) From f4506ff398ff72d27d16622b788e2c6a632732f1 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Tue, 23 Apr 2024 14:55:31 +0200 Subject: [PATCH 041/129] renamed webbase to base_url everywhere --- bfabric/bfabric.py | 22 +++++++++--------- bfabric/bfabric_config.py | 12 +++++----- bfabric/scripts/bfabric_flask.py | 6 ++--- bfabric/tests/test_bfabric_functional.py | 2 +- bfabric/tests/unit/example_config.yml | 6 ++--- bfabric/tests/unit/test_bfabric_config.py | 28 +++++++++++------------ 6 files changed, 38 insertions(+), 38 deletions(-) diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index d0916550..becc39ff 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -84,12 +84,12 @@ class Bfabric(object): def warning(self, msg): sys.stderr.write("\033[93m{}\033[0m\n".format(msg)) - def __init__(self, login: str = None, password: str = None, webbase: str = None, externaljobid=None, + def __init__(self, login: str = None, password: str = None, base_url: str = None, externaljobid=None, config_path: str = None, config_env: str = None, optional_auth: bool = False, verbose: bool = False): """ :param login: Login string for overriding config file :param password: Password for overriding config file - :param webbase: Webbase for overriding config file + :param base_url: Base url of the BFabric server for overriding config file :param externaljobid: ? :param config_path: Path to the config file, in case it is different from default :param config_env: Which config environment to use. Can also specify via environment variable or use @@ -110,13 +110,13 @@ def __init__(self, login: str = None, password: str = None, webbase: str = None, # Use the provided config data from arguments instead of the file if not os.path.isfile(config_path): self.warning("could not find '.bfabricpy.yml' file in home directory.") - self.config = BfabricConfig(webbase=webbase) + self.config = BfabricConfig(base_url=base_url) self.auth = BfabricAuth(login=login, password=password) # Load config from file, override some of the fields with the provided ones else: config, auth = read_bfabricrc_py(config_path, config_env=config_env, optional_auth=optional_auth) - self.config = config.with_overrides(webbase=webbase) + self.config = config.with_overrides(base_url=base_url) if (login is not None) and (password is not None): self.auth = BfabricAuth(login=login, password=password) elif (login is None) and (password is None): @@ -124,13 +124,13 @@ def __init__(self, login: str = None, password: str = None, webbase: str = None, else: raise IOError("Must provide both username and password, or neither.") - if not self.config.webbase: - raise ValueError("webbase missing") + if not self.config.base_url: + raise ValueError("base server url missing") if not optional_auth: if not self.auth or not self.auth.login or not self.auth.password: raise ValueError("Authentification not initialized but required") - msg = f"\033[93m--- webbase {self.config.webbase}; login; {self.auth.login} ---\033[0m\n" + msg = f"\033[93m--- base_url {self.config.base_url}; login; {self.auth.login} ---\033[0m\n" sys.stderr.write(msg) if self.verbose: @@ -214,7 +214,7 @@ def upload_file(self, filename, workunitid): def _get_service(self, endpoint: str) -> Service: """Returns a `suds.client.Service` object for the given endpoint name.""" if endpoint not in self.cl: - self.cl[endpoint] = Client(f"{self.config.webbase}/{endpoint}?wsdl", cache=None) + self.cl[endpoint] = Client(f"{self.config.base_url}/{endpoint}?wsdl", cache=None) return self.cl[endpoint].service def _perform_request( @@ -860,12 +860,12 @@ def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style= sample_id = self.get_sampleid(int(resource_iterator._id)) _resource_sample = {'resource_id': int(resource_iterator._id), - 'resource_url': "{0}/userlab/show-resource.html?id={1}".format(self.config.webbase, resource_iterator._id)} + 'resource_url': "{0}/userlab/show-resource.html?id={1}".format(self.config.base_url, resource_iterator._id)} if not sample_id is None: _resource_sample['sample_id'] = int(sample_id) - _resource_sample['sample_url'] = "{0}/userlab/show-sample.html?id={1}".format(self.config.webbase, sample_id) + _resource_sample['sample_url'] = "{0}/userlab/show-sample.html?id={1}".format(self.config.base_url, sample_id) resource_ids[_application_name].append(_resource_sample) except: @@ -960,7 +960,7 @@ def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style= }, 'workunit_id': int(workunit._id), 'workunit_createdby': str(workunit.createdby), - 'workunit_url': "{0}/userlab/show-workunit.html?workunitId={1}".format(self.config.webbase, workunit._id), + 'workunit_url': "{0}/userlab/show-workunit.html?workunitId={1}".format(self.config.base_url, workunit._id), 'external_job_id': int(yaml_workunit_externaljob._id), 'order_id': order_id, 'project_id': project_id, diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index fbbd2b3f..75f430b1 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -30,23 +30,23 @@ class BfabricConfig: """Holds the configuration for the B-Fabric client for connecting to particular instance of B-Fabric. Attributes: - webbase (optional): The API base url + base_url (optional): The API base url application_ids (optional): Map of application names to ids. job_notification_emails (optional): Space-separated list of email addresses to notify when a job finishes. """ - webbase: str = "https://fgcz-bfabric.uzh.ch/bfabric" + base_url: str = "https://fgcz-bfabric.uzh.ch/bfabric" application_ids: Dict[str, int] = dataclasses.field(default_factory=dict) job_notification_emails: str = "" def with_overrides( self, - webbase: Optional[str] = None, + base_url: Optional[str] = None, application_ids: Optional[Dict[str, int]] = None, ) -> BfabricConfig: """Returns a copy of the configuration with new values applied, if they are not None.""" return BfabricConfig( - webbase=webbase if webbase is not None else self.webbase, + base_url=base_url if base_url is not None else self.base_url, application_ids=application_ids if application_ids is not None else self.application_ids, @@ -57,7 +57,7 @@ def with_overrides( NOTE: BFabricPy expects a .bfabricpy.yml of the format, as seen in bfabricPy/tests/unit/example_config.yml * The general field always has to be present * There may be any number of environments, and they may have arbitrary names. Here, they are called PRODUCTION and TEST -* Must specify correct login, password and webbase for each environment. +* Must specify correct login, password and base_url for each environment. * application and job_notification_emails fields are optional * The default environment will be selected as follows: - First, parser will check if the optional argument `config_env` is provided directly to the parser function @@ -168,7 +168,7 @@ def read_bfabricrc_py(config_path: str, config_env: str = None, auth = BfabricAuth(**auth_dict) # Parse config - config_dict = _parse_dict(config_dict, ['webbase'], optional_keys=['application_ids', 'job_notification_emails'], + config_dict = _parse_dict(config_dict, ['base_url'], optional_keys=['application_ids', 'job_notification_emails'], error_prefix=error_prefix) config = BfabricConfig(**config_dict) diff --git a/bfabric/scripts/bfabric_flask.py b/bfabric/scripts/bfabric_flask.py index b49e9bff..b20dfeab 100755 --- a/bfabric/scripts/bfabric_flask.py +++ b/bfabric/scripts/bfabric_flask.py @@ -430,9 +430,9 @@ def query(): print ("PASSWORD CLEARTEXT", content['webservicepassword']) - bf = bfabric.Bfabric(login=content['login'], - password=content['webservicepassword'], - webbase='http://fgcz-bfabric.uzh.ch/bfabric') + bf = bfabric.Bfabric(login=content['login'], + password=content['webservicepassword'], + base_url='http://fgcz-bfabric.uzh.ch/bfabric') for i in content.keys(): print ("{}\t{}".format(i, content[i])) diff --git a/bfabric/tests/test_bfabric_functional.py b/bfabric/tests/test_bfabric_functional.py index 02b91de3..c29261be 100755 --- a/bfabric/tests/test_bfabric_functional.py +++ b/bfabric/tests/test_bfabric_functional.py @@ -41,7 +41,7 @@ def test_wrappercreator_submitter(self): self.assertEqual(B.auth.login, 'pfeeder', msg) msg = "This test case requires a bfabric test system!" - self.assertIn("bfabric-test", B.config.webbase, msg) + self.assertIn("bfabric-test", B.config.base_url, msg) # TODO # create input resource diff --git a/bfabric/tests/unit/example_config.yml b/bfabric/tests/unit/example_config.yml index ba218cbb..75e7eaec 100644 --- a/bfabric/tests/unit/example_config.yml +++ b/bfabric/tests/unit/example_config.yml @@ -4,12 +4,12 @@ GENERAL: PRODUCTION: login: my_epic_production_login password: my_secret_production_password - webbase: https://mega-production-server.uzh.ch/myprod + base_url: https://mega-production-server.uzh.ch/myprod TEST: login: my_epic_test_login password: my_secret_test_password - webbase: https://mega-test-server.uzh.ch/mytest + base_url: https://mega-test-server.uzh.ch/mytest application_ids: Proteomics/CAT_123: 7 Proteomics/DOG_552: 6 @@ -17,4 +17,4 @@ TEST: job_notification_emails: john.snow@fgcz.uzh.ch billy.the.kid@fgcz.ethz.ch STANDBY: - webbase: https://standby-server.uzh.ch/mystandby \ No newline at end of file + base_url: https://standby-server.uzh.ch/mystandby \ No newline at end of file diff --git a/bfabric/tests/unit/test_bfabric_config.py b/bfabric/tests/unit/test_bfabric_config.py index a310a868..6f3f916e 100644 --- a/bfabric/tests/unit/test_bfabric_config.py +++ b/bfabric/tests/unit/test_bfabric_config.py @@ -20,25 +20,25 @@ def test_str(self): class TestBfabricConfig(unittest.TestCase): def setUp(self): self.config = BfabricConfig( - webbase="url", + base_url="url", application_ids={"app": 1}, ) def test_with_overrides(self): new_config = self.config.with_overrides( - webbase="new_url", + base_url="new_url", application_ids={"new": 2}, ) - self.assertEqual("new_url", new_config.webbase) + self.assertEqual("new_url", new_config.base_url) self.assertEqual({"new": 2}, new_config.application_ids) - self.assertEqual("url", self.config.webbase) + self.assertEqual("url", self.config.base_url) self.assertEqual({"app": 1}, self.config.application_ids) def test_with_replaced_when_none(self): - new_config = self.config.with_overrides(webbase=None, application_ids=None) - self.assertEqual("url", new_config.webbase) + new_config = self.config.with_overrides(base_url=None, application_ids=None) + self.assertEqual("url", new_config.base_url) self.assertEqual({"app": 1}, new_config.application_ids) - self.assertEqual("url", self.config.webbase) + self.assertEqual("url", self.config.base_url) self.assertEqual({"app": 1}, self.config.application_ids) # Testing default initialization @@ -50,7 +50,7 @@ def test_read_yml_bypath_default(self): config, auth = read_bfabricrc_py('example_config.yml') # Should deduce self.assertEqual("my_epic_production_login", auth.login) self.assertEqual("my_secret_production_password", auth.password) - self.assertEqual("https://mega-production-server.uzh.ch/myprod", config.webbase) + self.assertEqual("https://mega-production-server.uzh.ch/myprod", config.base_url) # Testing environment variable initialization # TODO: Test that logging is consistent with default config @@ -61,7 +61,7 @@ def test_read_yml_bypath_environment_variable(self): config, auth = read_bfabricrc_py('example_config.yml') # Should deduce self.assertEqual("my_epic_test_login", auth.login) self.assertEqual("my_secret_test_password", auth.password) - self.assertEqual("https://mega-test-server.uzh.ch/mytest", config.webbase) + self.assertEqual("https://mega-test-server.uzh.ch/mytest", config.base_url) # Testing explicit initialization, as well as extra fields (application_ids, job_notification_emails) # TODO: Test that logging is consistent with default config @@ -80,7 +80,7 @@ def test_read_yml_bypath_allfields(self): self.assertEqual("my_epic_test_login", auth.login) self.assertEqual("my_secret_test_password", auth.password) - self.assertEqual("https://mega-test-server.uzh.ch/mytest", config.webbase) + self.assertEqual("https://mega-test-server.uzh.ch/mytest", config.base_url) applications_dict_ground_truth = { 'Proteomics/CAT_123': 7, @@ -93,13 +93,13 @@ def test_read_yml_bypath_allfields(self): self.assertEqual(applications_dict_ground_truth, config.application_ids) self.assertEqual(job_notification_emails_ground_truth, config.job_notification_emails) - # Testing that we can load webbase without authentication if correctly requested + # Testing that we can load base_url without authentication if correctly requested def test_read_yml_when_empty_optional(self): with self.assertLogs(level="INFO"): config, auth = read_bfabricrc_py('example_config.yml', config_env='STANDBY', optional_auth=True) self.assertIsNone(auth) - self.assertEqual("https://standby-server.uzh.ch/mystandby", config.webbase) + self.assertEqual("https://standby-server.uzh.ch/mystandby", config.base_url) self.assertEqual({}, config.application_ids) self.assertEqual("", config.job_notification_emails) @@ -111,14 +111,14 @@ def test_read_yml_when_empty_mandatory(self): def test_repr(self): rep = repr(self.config) self.assertEqual( - "BfabricConfig(webbase='url', application_ids={'app': 1}, job_notification_emails='')", + "BfabricConfig(base_url='url', application_ids={'app': 1}, job_notification_emails='')", rep, ) def test_str(self): rep = str(self.config) self.assertEqual( - "BfabricConfig(webbase='url', application_ids={'app': 1}, job_notification_emails='')", + "BfabricConfig(base_url='url', application_ids={'app': 1}, job_notification_emails='')", rep, ) From 06bef1d3a9b988c4dac3b38a0292b5bdf192750d Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:06:40 +0200 Subject: [PATCH 042/129] Update bfabric/bfabric_config.py Co-authored-by: Leonardo Schwarz --- bfabric/bfabric_config.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index 75f430b1..aa85e265 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -82,10 +82,6 @@ def _read_config_env_as_dict(config_path: str, config_env: str = None) -> Tuple[ # Read the config file config_dict = yaml.safe_load(Path(config_path).read_text()) - # config = ConfigParser() - # config.read(config_path) - # config_dict = {s: dict(config.items(s)) for s in config.sections()} - if "GENERAL" not in config_dict: raise IOError("Config file must have a general section") if 'default_config' not in config_dict['GENERAL']: From d20cc47de58ce17d1eff1f9ebf20063854455c1b Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:06:50 +0200 Subject: [PATCH 043/129] Update bfabric/bfabric_config.py Co-authored-by: Leonardo Schwarz --- bfabric/bfabric_config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index aa85e265..23d5800e 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -6,7 +6,6 @@ import os from typing import Optional, Dict, Tuple import dataclasses -# from configparser import ConfigParser import yaml from pathlib import Path From 0edef123120f2c266799c251eab4f2efb5d1dba1 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:07:16 +0200 Subject: [PATCH 044/129] Update bfabric/bfabric_config.py Co-authored-by: Leonardo Schwarz --- bfabric/bfabric_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index 23d5800e..6127337e 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -93,7 +93,7 @@ def _read_config_env_as_dict(config_path: str, config_env: str = None) -> Tuple[ # Try to find a relevant config_env = os.getenv("BFABRICPY_CONFIG_ENV") if config_env is None: - logger.log(20, "BFABRICPY_CONFIG_ENV not found, using default environment " + config_env_default) + logger.info(f"BFABRICPY_CONFIG_ENV not found, using default environment {config_env_default}") config_env = config_env_default else: logger.log(20, "found BFABRICPY_CONFIG_ENV = " + config_env) From 9292abdfe17a9c76900c996521811db9eb215ffc Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:07:23 +0200 Subject: [PATCH 045/129] Update bfabric/bfabric_config.py Co-authored-by: Leonardo Schwarz --- bfabric/bfabric_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index 6127337e..461630be 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -96,7 +96,7 @@ def _read_config_env_as_dict(config_path: str, config_env: str = None) -> Tuple[ logger.info(f"BFABRICPY_CONFIG_ENV not found, using default environment {config_env_default}") config_env = config_env_default else: - logger.log(20, "found BFABRICPY_CONFIG_ENV = " + config_env) + logger.info(f"found BFABRICPY_CONFIG_ENV = {config_env}") else: logger.log(20, "config environment specified explicitly as " + config_env) From f8b55d901a932f289624d3ceffecfd345dc84016 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:07:29 +0200 Subject: [PATCH 046/129] Update bfabric/bfabric_config.py Co-authored-by: Leonardo Schwarz --- bfabric/bfabric_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index 461630be..d2952555 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -98,7 +98,7 @@ def _read_config_env_as_dict(config_path: str, config_env: str = None) -> Tuple[ else: logger.info(f"found BFABRICPY_CONFIG_ENV = {config_env}") else: - logger.log(20, "config environment specified explicitly as " + config_env) + logger.info(f"config environment specified explicitly as {config_env}") if config_env not in config_dict: raise IOError("The requested config environment", config_env, "is not present in the config file") From fd6d47ce16ae94dcfeb75ead3ff22b87d4bc0866 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Tue, 23 Apr 2024 15:08:56 +0200 Subject: [PATCH 047/129] renamed bfabric2 from webbase to base_url --- bfabric/bfabric2.py | 20 ++++++++++---------- bfabric/examples/compare_zeep_suds_query.py | 2 +- bfabric/examples/zeep_debug.py | 2 +- bfabric/src/engine_suds.py | 6 +++--- bfabric/src/engine_zeep.py | 6 +++--- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 7743841e..4f5f84b1 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -42,12 +42,12 @@ class BfabricAPIEngineType(Enum): ZEEP = 2 -def get_system_auth(login: str = None, password: str = None, webbase: str = None, externaljobid=None, - config_path: str = None, config_env: str = None, optional_auth: bool = False, verbose: bool = False): +def get_system_auth(login: str = None, password: str = None, base_url: str = None, externaljobid=None, + config_path: str = None, config_env: str = None, optional_auth: bool = False, verbose: bool = False): """ :param login: Login string for overriding config file :param password: Password for overriding config file - :param webbase: Webbase for overriding config file + :param base_url: Base server url for overriding config file :param externaljobid: ? :param config_path: Path to the config file, in case it is different from default :param config_env: Which config environment to use. Can also specify via environment variable or use @@ -64,13 +64,13 @@ def get_system_auth(login: str = None, password: str = None, webbase: str = None if not os.path.isfile(config_path): # TODO: Convert to log print("Warning: could not find '.bfabricpy.yml' file in home directory.") - config = BfabricConfig(webbase=webbase) + config = BfabricConfig(base_url=base_url) auth = BfabricAuth(login=login, password=password) # Load config from file, override some of the fields with the provided ones else: config, auth = read_bfabricpy_yml(config_path, config_env=config_env, optional_auth=optional_auth) - config = config.with_overrides(webbase=webbase) + config = config.with_overrides(base_url=base_url) if (login is not None) and (password is not None): auth = BfabricAuth(login=login, password=password) elif (login is None) and (password is None): @@ -78,13 +78,13 @@ def get_system_auth(login: str = None, password: str = None, webbase: str = None else: raise IOError("Must provide both username and password, or neither.") - if not config.webbase: - raise ValueError("webbase missing") + if not config.base_url: + raise ValueError("base_url missing") if not optional_auth: if not auth or not auth.login or not auth.password: raise ValueError("Authentification not initialized but required") - msg = f"\033[93m--- webbase {config.webbase}; login; {auth.login} ---\033[0m\n" + msg = f"\033[93m--- base_url {config.base_url}; login; {auth.login} ---\033[0m\n" sys.stderr.write(msg) if verbose: @@ -108,10 +108,10 @@ def __init__(self, config: BfabricConfig, auth: BfabricAuth, self.query_counter = 0 if engine == BfabricAPIEngineType.SUDS: - self.engine = EngineSUDS(auth.login, auth.password, config.webbase) + self.engine = EngineSUDS(auth.login, auth.password, config.base_url) self.result_type = BfabricResultType.LISTSUDS elif engine == BfabricAPIEngineType.ZEEP: - self.engine = EngineZeep(auth.login, auth.password, config.webbase) + self.engine = EngineZeep(auth.login, auth.password, config.base_url) self.result_type = BfabricResultType.LISTZEEP else: raise ValueError("Unexpected engine", BfabricAPIEngineType) diff --git a/bfabric/examples/compare_zeep_suds_query.py b/bfabric/examples/compare_zeep_suds_query.py index fe9e412a..885ad073 100644 --- a/bfabric/examples/compare_zeep_suds_query.py +++ b/bfabric/examples/compare_zeep_suds_query.py @@ -58,7 +58,7 @@ def full_query(auth: BfabricAuth, query: dict, includedeletableupdateable: bool } def calc_both(auth: BfabricAuth, config: BfabricConfig, endpoint: str, query: dict, raw: bool = True): - wsdl = "".join((config.webbase, '/', endpoint, "?wsdl")) + wsdl = "".join((config.base_url, '/', endpoint, "?wsdl")) fullQuery = full_query(auth, query) retZeep = read_zeep(wsdl, fullQuery, raw=raw) retSuds = read_suds(wsdl, fullQuery, raw=raw) diff --git a/bfabric/examples/zeep_debug.py b/bfabric/examples/zeep_debug.py index fd9b9b37..a5225576 100644 --- a/bfabric/examples/zeep_debug.py +++ b/bfabric/examples/zeep_debug.py @@ -38,7 +38,7 @@ def read_zeep(wsdl, fullQuery, raw=True): def read(auth: BfabricAuth, config: BfabricConfig, endpoint: str, query: dict, raw: bool = True): - wsdl = "".join((config.webbase, '/', endpoint, "?wsdl")) + wsdl = "".join((config.base_url, '/', endpoint, "?wsdl")) fullQuery = full_query(auth, query) return read_zeep(wsdl, fullQuery, raw=raw) diff --git a/bfabric/src/engine_suds.py b/bfabric/src/engine_suds.py index 7e3e6c2c..e37ba21f 100644 --- a/bfabric/src/engine_suds.py +++ b/bfabric/src/engine_suds.py @@ -7,16 +7,16 @@ class EngineSUDS(object): """B-Fabric API SUDS Engine""" - def __init__(self, login: str, password: str, webbase: str): + def __init__(self, login: str, password: str, base_url: str): self.cl = {} self.login = login self.password = password - self.webbase = webbase + self.base_url = base_url def _get_client(self, endpoint: str): try: if not endpoint in self.cl: - wsdl = "".join((self.webbase, '/', endpoint, "?wsdl")) + wsdl = "".join((self.base_url, '/', endpoint, "?wsdl")) self.cl[endpoint] = Client(wsdl, cache=None) return self.cl[endpoint] except Exception as e: diff --git a/bfabric/src/engine_zeep.py b/bfabric/src/engine_zeep.py index 9257d3a5..98dfe51b 100644 --- a/bfabric/src/engine_zeep.py +++ b/bfabric/src/engine_zeep.py @@ -26,16 +26,16 @@ def _zeep_query_append_skipped(query: dict, skipped_keys: list, inplace: bool = class EngineZeep(object): """B-Fabric API SUDS Engine""" - def __init__(self, login: str, password: str, webbase: str): + def __init__(self, login: str, password: str, base_url: str): self.cl = {} self.login = login self.password = password - self.webbase = webbase + self.base_url = base_url def _get_client(self, endpoint: str): try: if not endpoint in self.cl: - wsdl = "".join((self.webbase, '/', endpoint, "?wsdl")) + wsdl = "".join((self.base_url, '/', endpoint, "?wsdl")) self.cl[endpoint] = zeep.Client(wsdl) return self.cl[endpoint] except Exception as e: From 78b1e0948d5cabb23a4eb6326b8805de1061accb Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:11:11 +0200 Subject: [PATCH 048/129] Update bfabric/bfabric_config.py Co-authored-by: Leonardo Schwarz --- bfabric/bfabric_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index d2952555..380903fa 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -101,7 +101,7 @@ def _read_config_env_as_dict(config_path: str, config_env: str = None) -> Tuple[ logger.info(f"config environment specified explicitly as {config_env}") if config_env not in config_dict: - raise IOError("The requested config environment", config_env, "is not present in the config file") + raise IOError(f"The requested config environment {config_env} is not present in the config file") return config_env, config_dict[config_env] From 9fc88bd7cb08c7948bfda0bd8a707c58e83253c4 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:14:30 +0200 Subject: [PATCH 049/129] Update bfabric/bfabric_config.py Co-authored-by: Leonardo Schwarz --- bfabric/bfabric_config.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index 380903fa..649ebda4 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -119,20 +119,11 @@ def _parse_dict(d: dict, mandatory_keys: list, optional_keys: list = None, error :param error_prefix: A string to print if a mandatory key is not found :return: Copy of a starting dictionary, only containing mandatory and optional keys """ - d_rez = {} - - # Get all mandatory fields, and complain if not found - for k in mandatory_keys: - if k in d: - d_rez[k] = d[k] - else: - raise ValueError(error_prefix + k) - - # Get all optional fields - if optional_keys is not None: - for k in optional_keys: - if k in d: - d_rez[k] = d[k] + missing_keys = set(mandatory_keys) - set(d) + if missing_keys: + raise ValueError(f"{error_prefix}{missing_keys}") + result_keys = set(mandatory_keys) + set(optional_keys or []) + d_rez = {d[k] for k in result_keys} # Ignore all other fields return d_rez From 7c1882858b0ce7a4a147480f4de392c4b2565bb2 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:15:04 +0200 Subject: [PATCH 050/129] Update bfabric/bfabric_config.py Co-authored-by: Leonardo Schwarz --- bfabric/bfabric_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index 649ebda4..5876af37 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -109,7 +109,7 @@ def _have_all_keys(d: dict, l: list) -> bool: """True if all elements in list l are present as keys in dict d, otherwise false""" return all([k in d for k in l]) -def _parse_dict(d: dict, mandatory_keys: list, optional_keys: list = None, error_prefix: str = None): +def _parse_dict(d: dict, mandatory_keys: list, optional_keys: list = None, error_prefix: str = " "): """ Returns a copy of an existing dictionary, only keeping mandatory and optional keys If a mandatory key is not found, an exception is raised From 135c09ff738b8308c4e71a515f3d838c0bee4419 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:16:27 +0200 Subject: [PATCH 051/129] Update bfabric/bfabric_config.py Co-authored-by: Leonardo Schwarz --- bfabric/bfabric_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index 5876af37..f7574f4e 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -143,7 +143,7 @@ def read_bfabricrc_py(config_path: str, config_env: str = None, config_env_final, config_dict = _read_config_env_as_dict(config_path, config_env=config_env) - error_prefix = "Config environment " + config_env_final + " does not have a compulsory field: " + error_prefix = f"Config environment {config_env_final} does not have a compulsory field: " # Parse authentification if optional_auth and not _have_all_keys(config_dict, ['login', 'password']): From 3b3bc14832cbb92ccdb88ae27548be96199ac127 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins <29147544+aleksejs-fomins@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:20:13 +0200 Subject: [PATCH 052/129] Update bfabric/bfabric_config.py Co-authored-by: Leonardo Schwarz --- bfabric/bfabric_config.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index f7574f4e..ee3be6c4 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -73,8 +73,6 @@ def _read_config_env_as_dict(config_path: str, config_env: str = None) -> Tuple[ or the config file itself. :return: Returns a target environment name, and the corresponding data from bfabricpy.yml file as a dictionary """ - - """Parses a .bfabricrc.py file and returns a tuple of BfabricConfig and BfabricAuth objects.""" logger = logging.getLogger(__name__) logger.info(f"Reading configuration from: {config_path}") From 21d9cd01f1779c976d471164efb4b5490e2aeb47 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Tue, 23 Apr 2024 15:21:04 +0200 Subject: [PATCH 053/129] revision changes --- bfabric/bfabric.py | 4 ++-- bfabric/bfabric_config.py | 28 +++++++++++------------ bfabric/tests/unit/test_bfabric_config.py | 12 +++++----- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index becc39ff..aca84691 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -26,7 +26,7 @@ import sys from pprint import pprint -from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_bfabricrc_py +from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_config from suds.client import Client from suds.wsdl import Service @@ -115,7 +115,7 @@ def __init__(self, login: str = None, password: str = None, base_url: str = None # Load config from file, override some of the fields with the provided ones else: - config, auth = read_bfabricrc_py(config_path, config_env=config_env, optional_auth=optional_auth) + config, auth = read_config(config_path, config_env=config_env, optional_auth=optional_auth) self.config = config.with_overrides(base_url=base_url) if (login is not None) and (password is not None): self.auth = BfabricAuth(login=login, password=password) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index d2952555..c04a0b26 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -52,19 +52,6 @@ def with_overrides( ) -''' -NOTE: BFabricPy expects a .bfabricpy.yml of the format, as seen in bfabricPy/tests/unit/example_config.yml -* The general field always has to be present -* There may be any number of environments, and they may have arbitrary names. Here, they are called PRODUCTION and TEST -* Must specify correct login, password and base_url for each environment. -* application and job_notification_emails fields are optional -* The default environment will be selected as follows: - - First, parser will check if the optional argument `config_env` is provided directly to the parser function - - If not, secondly, the parser will check if the environment variable `BFABRICPY_CONFIG_ENV` is declared - - If not, finally, the parser will select the default_config specified in [GENERAL] of the .bfabricpy.yml file -''' - - def _read_config_env_as_dict(config_path: str, config_env: str = None) -> Tuple[str, dict]: """ Reads and partially parses a bfabricpy.yml file @@ -137,8 +124,8 @@ def _parse_dict(d: dict, mandatory_keys: list, optional_keys: list = None, error # Ignore all other fields return d_rez -def read_bfabricrc_py(config_path: str, config_env: str = None, - optional_auth: bool = False) -> Tuple[BfabricConfig, Optional[BfabricAuth]]: +def read_config(config_path: str, config_env: str = None, + optional_auth: bool = False) -> Tuple[BfabricConfig, Optional[BfabricAuth]]: """ Reads bfabricpy.yml file, parses it, extracting authentication and configuration data :param config_path: Path to the configuration file. It is assumed the file exists @@ -147,6 +134,17 @@ def read_bfabricrc_py(config_path: str, config_env: str = None, If not, both login and password must be present in the config file, otherwise an exception is thrown If yes, missing login and password would result in authentication class being None, but no exception :return: Configuration and Authentication class instances + + + NOTE: BFabricPy expects a .bfabricpy.yml of the format, as seen in bfabricPy/tests/unit/example_config.yml + * The general field always has to be present + * There may be any number of environments, and they may have arbitrary names. Here, they are called PRODUCTION and TEST + * Must specify correct login, password and base_url for each environment. + * application and job_notification_emails fields are optional + * The default environment will be selected as follows: + - First, parser will check if the optional argument `config_env` is provided directly to the parser function + - If not, secondly, the parser will check if the environment variable `BFABRICPY_CONFIG_ENV` is declared + - If not, finally, the parser will select the default_config specified in [GENERAL] of the .bfabricpy.yml file """ diff --git a/bfabric/tests/unit/test_bfabric_config.py b/bfabric/tests/unit/test_bfabric_config.py index 6f3f916e..16803e47 100644 --- a/bfabric/tests/unit/test_bfabric_config.py +++ b/bfabric/tests/unit/test_bfabric_config.py @@ -2,7 +2,7 @@ import io import unittest -from bfabric.bfabric_config import BfabricConfig, BfabricAuth, read_bfabricrc_py +from bfabric.bfabric_config import BfabricConfig, BfabricAuth, read_config class TestBfabricAuth(unittest.TestCase): @@ -47,7 +47,7 @@ def test_read_yml_bypath_default(self): # Ensure environment variable is not available, and the default is environment is loaded os.environ.pop('BFABRICPY_CONFIG_ENV', None) - config, auth = read_bfabricrc_py('example_config.yml') # Should deduce + config, auth = read_config('example_config.yml') # Should deduce self.assertEqual("my_epic_production_login", auth.login) self.assertEqual("my_secret_production_password", auth.password) self.assertEqual("https://mega-production-server.uzh.ch/myprod", config.base_url) @@ -58,7 +58,7 @@ def test_read_yml_bypath_environment_variable(self): # Explicitly set the environment variable for this process os.environ["BFABRICPY_CONFIG_ENV"] = "TEST" - config, auth = read_bfabricrc_py('example_config.yml') # Should deduce + config, auth = read_config('example_config.yml') # Should deduce self.assertEqual("my_epic_test_login", auth.login) self.assertEqual("my_secret_test_password", auth.password) self.assertEqual("https://mega-test-server.uzh.ch/mytest", config.base_url) @@ -67,7 +67,7 @@ def test_read_yml_bypath_environment_variable(self): # TODO: Test that logging is consistent with default config def test_read_yml_bypath_allfields(self): with self.assertLogs(level="INFO") as log_context: - config, auth = read_bfabricrc_py('example_config.yml', config_env='TEST') + config, auth = read_config('example_config.yml', config_env='TEST') # # Testing log # self.assertEqual( @@ -96,7 +96,7 @@ def test_read_yml_bypath_allfields(self): # Testing that we can load base_url without authentication if correctly requested def test_read_yml_when_empty_optional(self): with self.assertLogs(level="INFO"): - config, auth = read_bfabricrc_py('example_config.yml', config_env='STANDBY', optional_auth=True) + config, auth = read_config('example_config.yml', config_env='STANDBY', optional_auth=True) self.assertIsNone(auth) self.assertEqual("https://standby-server.uzh.ch/mystandby", config.base_url) @@ -106,7 +106,7 @@ def test_read_yml_when_empty_optional(self): # Test that missing authentication will raise an error if required def test_read_yml_when_empty_mandatory(self): with self.assertRaises(ValueError): - read_bfabricrc_py('example_config.yml', config_env='STANDBY', optional_auth=False) + read_config('example_config.yml', config_env='STANDBY', optional_auth=False) def test_repr(self): rep = repr(self.config) From c12d289aff4b17f128d400cf1d721071789b12b4 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Tue, 23 Apr 2024 15:24:19 +0200 Subject: [PATCH 054/129] warning for old config file --- bfabric/bfabric.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index aca84691..be2412cf 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -107,6 +107,11 @@ def __init__(self, login: str = None, password: str = None, base_url: str = None # Get default path config file path config_path = config_path or os.path.normpath(os.path.expanduser("~/.bfabricpy.yml")) + # TODO: Convert to an exception when this branch becomes main + config_path_old = config_path or os.path.normpath(os.path.expanduser("~/.bfabricrc.py")) + if os.path.isfile(config_path): + self.warning("WARNING! The old .bfabricrc.py was found in the home directory. Delete and make sure to use the new .bfabricpy.yml") + # Use the provided config data from arguments instead of the file if not os.path.isfile(config_path): self.warning("could not find '.bfabricpy.yml' file in home directory.") From 3ba61590a5f925e5ca79829f0260d5628a4ccfad Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Tue, 23 Apr 2024 15:43:16 +0200 Subject: [PATCH 055/129] fixed a few bugs in _parse_dict --- bfabric/bfabric_config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index 5dc4a9e5..2494f408 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -94,7 +94,7 @@ def _have_all_keys(d: dict, l: list) -> bool: """True if all elements in list l are present as keys in dict d, otherwise false""" return all([k in d for k in l]) -def _parse_dict(d: dict, mandatory_keys: list, optional_keys: list = None, error_prefix: str = " "): +def _parse_dict(d: dict, mandatory_keys: list, optional_keys: list = None, error_prefix: str = " ") -> dict: """ Returns a copy of an existing dictionary, only keeping mandatory and optional keys If a mandatory key is not found, an exception is raised @@ -107,8 +107,8 @@ def _parse_dict(d: dict, mandatory_keys: list, optional_keys: list = None, error missing_keys = set(mandatory_keys) - set(d) if missing_keys: raise ValueError(f"{error_prefix}{missing_keys}") - result_keys = set(mandatory_keys) + set(optional_keys or []) - d_rez = {d[k] for k in result_keys} + result_keys = set(mandatory_keys) | set(optional_keys or []) + d_rez = {k: d[k] for k in result_keys if k in d} # Ignore all other fields return d_rez From 1a7dd24347e274d01fd4a6393d87bccad3af616b Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Tue, 23 Apr 2024 16:55:25 +0200 Subject: [PATCH 056/129] minor test if config file has .yml extension --- bfabric/bfabric2.py | 2 +- bfabric/bfabric_config.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index e8636028..1c74ac7f 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -63,7 +63,7 @@ def get_system_auth(login: str = None, password: str = None, base_url: str = Non # Use the provided config data from arguments instead of the file if not os.path.isfile(config_path): # TODO: Convert to log - print("Warning: could not find '.bfabricpy.yml' file in home directory.") + print(f"Warning: could not find the config file in {config_path}") config = BfabricConfig(base_url=base_url) auth = BfabricAuth(login=login, password=password) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index c545d781..3982c438 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -63,6 +63,9 @@ def _read_config_env_as_dict(config_path: str, config_env: str = None) -> Tuple[ logger = logging.getLogger(__name__) logger.info(f"Reading configuration from: {config_path}") + if os.path.splitext(config_path)[1] != '.yml': + raise IOError(f"Expected config file with .yml extension, got {config_path}") + # Read the config file config_dict = yaml.safe_load(Path(config_path).read_text()) From 1d6e7b6ee366e21bdde3e0a81be9f96a1dcc1bec Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Wed, 24 Apr 2024 10:43:31 +0200 Subject: [PATCH 057/129] throw exception if explicitly-provided path to the config file is wrong --- bfabric/bfabric2.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 1c74ac7f..39d2085c 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -57,13 +57,18 @@ def get_system_auth(login: str = None, password: str = None, base_url: str = Non :param verbose: Verbosity (TODO: resolve potential redundancy with logger) """ - # Get default path config file path - config_path = config_path or os.path.normpath(os.path.expanduser("~/.bfabricpy.yml")) + have_config_path = config_path is not None + if not have_config_path: + # Get default path config file path + config_path = os.path.normpath(os.path.expanduser("~/.bfabricpy.yml")) # Use the provided config data from arguments instead of the file if not os.path.isfile(config_path): + if have_config_path: + # NOTE: If user explicitly specifies a path to a wrong config file, this has to be an exception + raise IOError(f"Explicitly specified config file does not exist: {config_path}") # TODO: Convert to log - print(f"Warning: could not find the config file in {config_path}") + print(f"Warning: could not find the config file in the default location: {config_path}") config = BfabricConfig(base_url=base_url) auth = BfabricAuth(login=login, password=password) From c32cbc3271a7e31b7a31304c2a7c49784e12757b Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 24 Apr 2024 13:24:30 +0200 Subject: [PATCH 058/129] Handle default `None` parameters in BfabricConfig --- .gitignore | 2 +- README.md | 15 +++++++--- bfabric/bfabric_config.py | 35 +++++++++++++++++++---- bfabric/tests/unit/test_bfabric_config.py | 12 ++++++++ 4 files changed, 53 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index 5c4e49d1..3d81f558 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ .idea/ +__pycache__ bfabric.egg-info/ -bfabric/__pycache__/ bfabric/scripts/query_result.txt dist/ diff --git a/README.md b/README.md index 423430f3..0db87501 100644 --- a/README.md +++ b/README.md @@ -35,10 +35,17 @@ python3 setup.py install --user ## Configuration ```{bash} -cat ~/.bfabricrc.py -_WEBBASE="https://fgcz-bfabric-test.uzh.ch/bfabric" -_LOGIN="yourBfabricLogin" -_PASSWD='yourBfabricWebPassword' +cat ~/.bfabricpy.yml +``` + +```{yaml} +GENERAL: + default_config: PRODUCTION + +PRODUCTION: + login: yourBfabricLogin + password: yourBfabricWebPassword + base_url: https://fgcz-bfabric.uzh.ch/bfabric ``` ## CheatSheet diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index 3982c438..f55a8b65 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -1,7 +1,5 @@ from __future__ import annotations -import io -import json import logging import os from typing import Optional, Dict, Tuple @@ -24,7 +22,6 @@ def __str__(self): return repr(self) -@dataclasses.dataclass(frozen=True) class BfabricConfig: """Holds the configuration for the B-Fabric client for connecting to particular instance of B-Fabric. @@ -34,9 +31,30 @@ class BfabricConfig: job_notification_emails (optional): Space-separated list of email addresses to notify when a job finishes. """ - base_url: str = "https://fgcz-bfabric.uzh.ch/bfabric" - application_ids: Dict[str, int] = dataclasses.field(default_factory=dict) - job_notification_emails: str = "" + def __init__( + self, + base_url: Optional[str] = None, + application_ids: Optional[Dict[str, int]] = None, + job_notification_emails: Optional[str] = None + ): + self._base_url = base_url or "https://fgcz-bfabric.uzh.ch/bfabric" + self._application_ids = application_ids or {} + self._job_notification_emails = job_notification_emails or "" + + @property + def base_url(self) -> str: + """The API base url.""" + return self._base_url + + @property + def application_ids(self) -> Dict[str, int]: + """Map of known application names to ids.""" + return self._application_ids + + @property + def job_notification_emails(self) -> str: + """Space-separated list of email addresses to notify when a job finishes.""" + return self._job_notification_emails def with_overrides( self, @@ -51,6 +69,11 @@ def with_overrides( else self.application_ids, ) + def __repr__(self): + return ( + f"BfabricConfig(base_url={repr(self.base_url)}, application_ids={repr(self.application_ids)}, " + f"job_notification_emails={repr(self.job_notification_emails)})" + ) def _read_config_env_as_dict(config_path: str, config_env: str = None) -> Tuple[str, dict]: """ diff --git a/bfabric/tests/unit/test_bfabric_config.py b/bfabric/tests/unit/test_bfabric_config.py index 16803e47..2c5fbbe1 100644 --- a/bfabric/tests/unit/test_bfabric_config.py +++ b/bfabric/tests/unit/test_bfabric_config.py @@ -24,6 +24,18 @@ def setUp(self): application_ids={"app": 1}, ) + def test_default_params_when_omitted(self): + config = BfabricConfig() + self.assertEqual("https://fgcz-bfabric.uzh.ch/bfabric", config.base_url) + self.assertEqual({}, config.application_ids) + self.assertEqual("", config.job_notification_emails) + + def test_default_params_when_specified(self): + config = BfabricConfig(base_url=None, application_ids=None, job_notification_emails=None) + self.assertEqual("https://fgcz-bfabric.uzh.ch/bfabric", config.base_url) + self.assertEqual({}, config.application_ids) + self.assertEqual("", config.job_notification_emails) + def test_with_overrides(self): new_config = self.config.with_overrides( base_url="new_url", From 33127e9c503c5567a42a3af341e74eed303160c4 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 24 Apr 2024 14:41:40 +0200 Subject: [PATCH 059/129] Detect error responses in ResultContainer --- bfabric/bfabric2.py | 51 ++++++++++++++----- bfabric/src/engine_suds.py | 4 +- bfabric/src/engine_zeep.py | 6 +-- bfabric/src/errors.py | 7 +++ bfabric/src/result_container.py | 16 +++++- .../tests/integration/test_bfabric2_read.py | 22 ++++++-- bfabric/tests/unit/test_results_container.py | 2 +- 7 files changed, 85 insertions(+), 23 deletions(-) create mode 100644 bfabric/src/errors.py diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 39d2085c..4e05c671 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -36,6 +36,8 @@ from bfabric.src.result_container import ResultContainer, BfabricResultType from bfabric.src.paginator import page_iter, BFABRIC_QUERY_LIMIT from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_config +from bfabric.src.errors import BfabricRequestError + class BfabricAPIEngineType(Enum): SUDS = 1 @@ -52,7 +54,7 @@ def get_system_auth(login: str = None, password: str = None, base_url: str = Non :param config_path: Path to the config file, in case it is different from default :param config_env: Which config environment to use. Can also specify via environment variable or use default in the config file (at your own risk) - :param optional_auth: Whether authentification is optional. If yes, missing authentification will be ignored, + :param optional_auth: Whether authentication is optional. If yes, missing authentication will be ignored, otherwise an exception will be raised :param verbose: Verbosity (TODO: resolve potential redundancy with logger) """ @@ -128,7 +130,7 @@ def _read_method(self, readid: bool, endpoint: str, obj: dict, page: int = 1, ** else: return self.engine.read(endpoint, obj, page=page, **kwargs) - def read(self, endpoint: str, obj: dict, max_results: Optional[int] = 100, readid: bool = False, + def read(self, endpoint: str, obj: dict, max_results: Optional[int] = 100, readid: bool = False, check: bool = True, **kwargs) -> ResultContainer: """ Make a read query to the engine. Determine the number of pages. Make calls for every page, concatenate @@ -141,17 +143,24 @@ def read(self, endpoint: str, obj: dict, max_results: Optional[int] = 100, readi come in blocks, and there is little overhead to providing results over integer number of pages. :param readid: whether to use reading by ID. Currently only available for engine=SUDS TODO: Test the extent to which this method works. Add safeguards + :param check: whether to check for errors in the response :return: List of responses, packaged in the results container """ # Get the first page. # NOTE: According to old interface, this is equivalent to plain=True response = self._read_method(readid, endpoint, obj, page=1, **kwargs) - n_pages = response["numberofpages"] + try: + n_pages = response["numberofpages"] + except AttributeError: + n_pages = 0 # Return empty list if nothing found if not n_pages: - return ResultContainer([], self.result_type, total_pages_api=0) + result = ResultContainer([], self.result_type, total_pages_api=0, errors=self._get_response_errors(response)) + if check: + result.assert_success() + return result # Get results from other pages as well, if need be # Only load as many pages as user has interest in @@ -161,21 +170,32 @@ def read(self, endpoint: str, obj: dict, max_results: Optional[int] = 100, readi n_pages_trg = min(n_pages, div_int_ceil(max_results, BFABRIC_QUERY_LIMIT)) # NOTE: Page numbering starts at 1 - response_list = response[endpoint] + response_items = response[endpoint] + errors = [] for i_page in range(2, n_pages_trg + 1): print('-- reading page', i_page, 'of', n_pages) - response_list += self._read_method(readid, endpoint, obj, page=i_page, **kwargs)[endpoint] + response = self._read_method(readid, endpoint, obj, page=i_page, **kwargs) + errors += self._get_response_errors(response) + response_items += response[endpoint] - return ResultContainer(response_list, self.result_type, total_pages_api=n_pages) + result = ResultContainer(response_items, self.result_type, total_pages_api=n_pages, errors=errors) + if check: + result.assert_success() + return result - def save(self, endpoint: str, obj: dict, **kwargs) -> ResultContainer: + def save(self, endpoint: str, obj: dict, check: bool = True, **kwargs) -> ResultContainer: results = self.engine.save(endpoint, obj, **kwargs) - return ResultContainer(results[endpoint], self.result_type) + result = ResultContainer(results[endpoint], self.result_type, errors=self._get_response_errors(results)) + if check: + result.assert_success() + return result - def delete(self, endpoint: str, id: Union[List, int]) -> ResultContainer: + def delete(self, endpoint: str, id: Union[List, int], check: bool = True) -> ResultContainer: results = self.engine.delete(endpoint, id) - return ResultContainer(results[endpoint], self.result_type) - + result = ResultContainer(results[endpoint], self.result_type, errors=self._get_response_errors(results)) + if check: + result.assert_success() + return result ############################ # Multi-query functionality @@ -275,3 +295,10 @@ def exists(self, endpoint: str, key: str, value: Union[List, Union[int, str]]) - return key in result_vals else: return [val in result_vals for val in value] + + def _get_response_errors(self, response) -> List[BfabricRequestError]: + """Returns reported errors from the response.""" + if getattr(response, "errorreport", None): + return [BfabricRequestError(response.errorreport)] + else: + return [] diff --git a/bfabric/src/engine_suds.py b/bfabric/src/engine_suds.py index e37ba21f..0e07b23e 100644 --- a/bfabric/src/engine_suds.py +++ b/bfabric/src/engine_suds.py @@ -4,7 +4,7 @@ from suds.client import Client -class EngineSUDS(object): +class EngineSUDS: """B-Fabric API SUDS Engine""" def __init__(self, login: str, password: str, base_url: str): @@ -15,7 +15,7 @@ def __init__(self, login: str, password: str, base_url: str): def _get_client(self, endpoint: str): try: - if not endpoint in self.cl: + if endpoint not in self.cl: wsdl = "".join((self.base_url, '/', endpoint, "?wsdl")) self.cl[endpoint] = Client(wsdl, cache=None) return self.cl[endpoint] diff --git a/bfabric/src/engine_zeep.py b/bfabric/src/engine_zeep.py index 98dfe51b..e799c4e7 100644 --- a/bfabric/src/engine_zeep.py +++ b/bfabric/src/engine_zeep.py @@ -23,8 +23,8 @@ def _zeep_query_append_skipped(query: dict, skipped_keys: list, inplace: bool = return query_this -class EngineZeep(object): - """B-Fabric API SUDS Engine""" +class EngineZeep: + """B-Fabric API Zeep Engine""" def __init__(self, login: str, password: str, base_url: str): self.cl = {} @@ -34,7 +34,7 @@ def __init__(self, login: str, password: str, base_url: str): def _get_client(self, endpoint: str): try: - if not endpoint in self.cl: + if endpoint not in self.cl: wsdl = "".join((self.base_url, '/', endpoint, "?wsdl")) self.cl[endpoint] = zeep.Client(wsdl) return self.cl[endpoint] diff --git a/bfabric/src/errors.py b/bfabric/src/errors.py new file mode 100644 index 00000000..dad94f77 --- /dev/null +++ b/bfabric/src/errors.py @@ -0,0 +1,7 @@ +class BfabricRequestError: + """An error that is returned by the server in response to a full request.""" + def __init__(self, message: str): + self.message = message + + def __repr__(self): + return f"RequestError(message={repr(self.message)})" diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py index 4e37d61f..bd8efa4e 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/src/result_container.py @@ -33,7 +33,7 @@ def _clean_result(rez: dict, drop_empty: bool = True, drop_underscores_suds: boo class ResultContainer: - def __init__(self, results: list, result_type: BfabricResultType, total_pages_api: int = None): + def __init__(self, results: list, result_type: BfabricResultType, total_pages_api: int = None, errors: list = None): """ :param results: List of BFabric query results :param result_type: Format of each result (All must be of the same format) @@ -45,6 +45,7 @@ def __init__(self, results: list, result_type: BfabricResultType, total_pages_ap self.results = results self.result_type = result_type self._total_pages_api = total_pages_api + self._errors = errors or [] def __getitem__(self, idx: int): return self.results[idx] @@ -58,6 +59,18 @@ def __str__(self): def __len__(self): return len(self.results) + def assert_success(self): + if not self.is_success: + raise RuntimeError("Query was not successful", self._errors) + + @property + def is_success(self) -> bool: + return len(self._errors) == 0 + + @property + def errors(self) -> list: + return self._errors + def extend(self, other: ResultContainer) -> None: """ Can merge results of two queries. This can happen if the engine splits a complicated query in two @@ -69,6 +82,7 @@ def extend(self, other: ResultContainer) -> None: raise ValueError("Attempting to merge results of two different types", self.result_type, other.result_type) self.results += other.results + self._errors += other.errors if (self._total_pages_api is not None) and (other._total_pages_api is not None): self._total_pages_api += other._total_pages_api else: diff --git a/bfabric/tests/integration/test_bfabric2_read.py b/bfabric/tests/integration/test_bfabric2_read.py index 19fa5a86..bd0377b7 100755 --- a/bfabric/tests/integration/test_bfabric2_read.py +++ b/bfabric/tests/integration/test_bfabric2_read.py @@ -3,6 +3,7 @@ import unittest from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth +from bfabric_config import BfabricAuth class BfabricTestRead(unittest.TestCase): @@ -12,7 +13,7 @@ def setUp(self, *args, **kwargs): with open(path) as json_file: self.ground_truth = json.load(json_file) - # Load config and authentification + # Load config and authentication self.config, self.auth = get_system_auth(config_env="TEST") # Init the engines @@ -36,9 +37,10 @@ def read(self, engine: str, endpoint: str): self.assertEqual(str(gt_value), str(res[0][gt_attr])) def _test_empty_project(self, engine: str): - bf = self.clients[engine] - res = bf.read(endpoint="project", obj={"name": "this project does not exist"}).to_list_dict() - self.assertEqual(res, []) + with self.subTest(engine=engine): + bf = self.clients[engine] + res = bf.read(endpoint="project", obj={"name": "this project does not exist"}).to_list_dict() + self.assertEqual(res, []) def test_user(self): self.read("suds", "user") @@ -80,6 +82,18 @@ def test_annotation(self): self.read("suds", "annotation") self.read("zeep", "annotation") + def test_invalid_auth(self): + auth = BfabricAuth(login=self.auth.login, password="invalid_password") + clients = { + "zeep": Bfabric(self.config, auth, engine=BfabricAPIEngineType.ZEEP), + "suds": Bfabric(self.config, auth, engine=BfabricAPIEngineType.SUDS) + } + for engine, bf in clients.items(): + with self.subTest(engine=engine): + with self.assertRaises(RuntimeError) as e: + bf.read(endpoint="workunit", obj={}) + self.assertIn("Invalid login or password. Could not login.", str(e.exception)) + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/bfabric/tests/unit/test_results_container.py b/bfabric/tests/unit/test_results_container.py index 4e428436..5657a9f7 100644 --- a/bfabric/tests/unit/test_results_container.py +++ b/bfabric/tests/unit/test_results_container.py @@ -34,7 +34,7 @@ def test_append(self): self.assertEqual(len(c3), 203) self.assertEqual(c3.results, list(range(200, 400)) + [1,2,3]) - self.assertEqual(c3.total_pages_api(), 3) + self.assertEqual(c3.total_pages_api, 3) def test_to_list_dict(self): # NOTE: For LISTDICT format, the conversion to listdict does nothing From 25e39c8e90a11673814a41cb96f839272f9b6718 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Thu, 25 Apr 2024 15:22:00 +0200 Subject: [PATCH 060/129] added integration tests for bad queries. added common handling of bad endpoints. extended use BfabricRequestError to correctly handle bad delete queries and multiple responses --- bfabric/bfabric2.py | 16 ++--- bfabric/src/engine_suds.py | 9 ++- bfabric/src/engine_zeep.py | 13 +++- bfabric/src/errors.py | 20 +++++- .../integration/test_bfabric2_bad_requests.py | 61 +++++++++++++++++++ .../tests/integration/test_bfabric2_read.py | 2 +- 6 files changed, 105 insertions(+), 16 deletions(-) create mode 100755 bfabric/tests/integration/test_bfabric2_bad_requests.py diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 4e05c671..81a61979 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -36,7 +36,7 @@ from bfabric.src.result_container import ResultContainer, BfabricResultType from bfabric.src.paginator import page_iter, BFABRIC_QUERY_LIMIT from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_config -from bfabric.src.errors import BfabricRequestError +from bfabric.src.errors import get_response_errors class BfabricAPIEngineType(Enum): @@ -157,7 +157,7 @@ def read(self, endpoint: str, obj: dict, max_results: Optional[int] = 100, readi # Return empty list if nothing found if not n_pages: - result = ResultContainer([], self.result_type, total_pages_api=0, errors=self._get_response_errors(response)) + result = ResultContainer([], self.result_type, total_pages_api=0, errors=get_response_errors(response, endpoint)) if check: result.assert_success() return result @@ -175,7 +175,7 @@ def read(self, endpoint: str, obj: dict, max_results: Optional[int] = 100, readi for i_page in range(2, n_pages_trg + 1): print('-- reading page', i_page, 'of', n_pages) response = self._read_method(readid, endpoint, obj, page=i_page, **kwargs) - errors += self._get_response_errors(response) + errors += get_response_errors(response, endpoint) response_items += response[endpoint] result = ResultContainer(response_items, self.result_type, total_pages_api=n_pages, errors=errors) @@ -185,14 +185,14 @@ def read(self, endpoint: str, obj: dict, max_results: Optional[int] = 100, readi def save(self, endpoint: str, obj: dict, check: bool = True, **kwargs) -> ResultContainer: results = self.engine.save(endpoint, obj, **kwargs) - result = ResultContainer(results[endpoint], self.result_type, errors=self._get_response_errors(results)) + result = ResultContainer(results[endpoint], self.result_type, errors=get_response_errors(results, endpoint)) if check: result.assert_success() return result def delete(self, endpoint: str, id: Union[List, int], check: bool = True) -> ResultContainer: results = self.engine.delete(endpoint, id) - result = ResultContainer(results[endpoint], self.result_type, errors=self._get_response_errors(results)) + result = ResultContainer(results[endpoint], self.result_type, errors=get_response_errors(results, endpoint)) if check: result.assert_success() return result @@ -296,9 +296,3 @@ def exists(self, endpoint: str, key: str, value: Union[List, Union[int, str]]) - else: return [val in result_vals for val in value] - def _get_response_errors(self, response) -> List[BfabricRequestError]: - """Returns reported errors from the response.""" - if getattr(response, "errorreport", None): - return [BfabricRequestError(response.errorreport)] - else: - return [] diff --git a/bfabric/src/engine_suds.py b/bfabric/src/engine_suds.py index 0e07b23e..b04d9b33 100644 --- a/bfabric/src/engine_suds.py +++ b/bfabric/src/engine_suds.py @@ -2,6 +2,9 @@ import copy from suds.client import Client +from suds import MethodNotFound + +from bfabric.src.errors import BfabricRequestError class EngineSUDS: @@ -51,7 +54,11 @@ def save(self, endpoint: str, obj: dict): query = {'login': self.login, 'password': self.password, endpoint: obj} client = self._get_client(endpoint) - return client.service.save(query) + try: + res = client.service.save(query) + except MethodNotFound as e: + raise BfabricRequestError(f"SUDS failed to find save method for the {endpoint} endpoint.") from e + return res def delete(self, endpoint: str, id: Union[int, List]): if isinstance(id, list) and len(id) == 0: diff --git a/bfabric/src/engine_zeep.py b/bfabric/src/engine_zeep.py index e799c4e7..0a9a13ef 100644 --- a/bfabric/src/engine_zeep.py +++ b/bfabric/src/engine_zeep.py @@ -3,6 +3,8 @@ import zeep import copy +from bfabric.src.errors import BfabricRequestError + # TODO: Check if this is a bug of BFabric or Zeep. Specifically, see if the same call to bFabricPy has the same bug def _zeep_query_append_skipped(query: dict, skipped_keys: list, inplace: bool = False, overwrite: bool = False) -> dict: @@ -70,8 +72,15 @@ def save(self, endpoint: str, obj: dict, skipped_keys: list = None): query = _zeep_query_append_skipped(query, skipped_keys) client = self._get_client(endpoint) - with client.settings(strict=False): - return client.service.save(query) + + try: + with client.settings(strict=False): + res = client.service.save(query) + except AttributeError as e: + if repr(e) == '''AttributeError("Service has no operation 'save'")''': + raise BfabricRequestError(f"ZEEP failed to find save method for the {endpoint} endpoint.") from e + raise e + return res def delete(self, endpoint: str, id: Union[int, List]): if isinstance(id, list) and len(id) == 0: diff --git a/bfabric/src/errors.py b/bfabric/src/errors.py index dad94f77..269ba924 100644 --- a/bfabric/src/errors.py +++ b/bfabric/src/errors.py @@ -1,7 +1,25 @@ -class BfabricRequestError: +from typing import List + + +class BfabricRequestError(Exception): """An error that is returned by the server in response to a full request.""" def __init__(self, message: str): self.message = message def __repr__(self): return f"RequestError(message={repr(self.message)})" + +# TODO: Also test for response-level errors +def get_response_errors(response, endpoint: str) -> List[BfabricRequestError]: + """ + :param response: A raw response to a query from an underlying engine + :param endpoint: The target endpoint + :return: A list of errors for each query result, if that result failed + Thus, a successful query would result in an empty list + """ + if getattr(response, "errorreport", None): + return [BfabricRequestError(response.errorreport)] + elif endpoint in response: + return [BfabricRequestError(r.errorreport) for r in response[endpoint] if getattr(r, "errorreport", None)] + else: + return [] diff --git a/bfabric/tests/integration/test_bfabric2_bad_requests.py b/bfabric/tests/integration/test_bfabric2_bad_requests.py new file mode 100755 index 00000000..146dbf62 --- /dev/null +++ b/bfabric/tests/integration/test_bfabric2_bad_requests.py @@ -0,0 +1,61 @@ +import json +import os +import unittest + +from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth +from bfabric.src.errors import BfabricRequestError + + +class BfabricTestBadRequest(unittest.TestCase): + def setUp(self, *args, **kwargs): + # Load ground truth + path = os.path.join(os.path.dirname(__file__), "groundtruth.json") + with open(path) as json_file: + self.ground_truth = json.load(json_file) + + # Load config and authentication + self.config, self.auth = get_system_auth(config_env="TEST") + + # Init the engines + self.clients = { + "zeep": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP), + "suds": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS) + } + + def _test_non_existing_read(self, engine_name: str): + # NOTE: Currently a bad read request simply returns no matches, but does not throw errors + res = self.clients[engine_name].read('user', {'id': 'cat'}).to_list_dict() + self.assertEqual([], res) + + def _test_forbidden_save(self, engine_name: str): + # Test what happens if we save to an endpoint that does not accept saving + self.assertRaises( + BfabricRequestError, + self.clients[engine_name].save, + 'project', + {'name': 'TheForbiddenPlan'} + ) + + def _test_wrong_delete(self, engine_name: str): + self.assertRaises( + RuntimeError, + self.clients[engine_name].delete, + 'workunit', + 101010101010101 + ) + + def test_non_existing_read(self): + self._test_non_existing_read("suds") + self._test_non_existing_read("zeep") + + def test_forbidden_save(self): + self._test_forbidden_save("suds") + self._test_forbidden_save("zeep") + + def test_wrong_delete(self): + self._test_wrong_delete("suds") + self._test_wrong_delete("zeep") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/integration/test_bfabric2_read.py b/bfabric/tests/integration/test_bfabric2_read.py index bd0377b7..5d4b5ae4 100755 --- a/bfabric/tests/integration/test_bfabric2_read.py +++ b/bfabric/tests/integration/test_bfabric2_read.py @@ -3,7 +3,7 @@ import unittest from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth -from bfabric_config import BfabricAuth +from bfabric.bfabric_config import BfabricAuth class BfabricTestRead(unittest.TestCase): From 56e248202da680c6eab8b9b666e5ab6014b47dbd Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 25 Apr 2024 17:08:12 +0200 Subject: [PATCH 061/129] Minor changes --- bfabric/src/engine_zeep.py | 2 +- .../integration/test_bfabric2_bad_requests.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/bfabric/src/engine_zeep.py b/bfabric/src/engine_zeep.py index 0a9a13ef..88413f2d 100644 --- a/bfabric/src/engine_zeep.py +++ b/bfabric/src/engine_zeep.py @@ -77,7 +77,7 @@ def save(self, endpoint: str, obj: dict, skipped_keys: list = None): with client.settings(strict=False): res = client.service.save(query) except AttributeError as e: - if repr(e) == '''AttributeError("Service has no operation 'save'")''': + if e.args[0] == "Service has no operation 'save'": raise BfabricRequestError(f"ZEEP failed to find save method for the {endpoint} endpoint.") from e raise e return res diff --git a/bfabric/tests/integration/test_bfabric2_bad_requests.py b/bfabric/tests/integration/test_bfabric2_bad_requests.py index 146dbf62..287e969a 100755 --- a/bfabric/tests/integration/test_bfabric2_bad_requests.py +++ b/bfabric/tests/integration/test_bfabric2_bad_requests.py @@ -7,7 +7,7 @@ class BfabricTestBadRequest(unittest.TestCase): - def setUp(self, *args, **kwargs): + def setUp(self): # Load ground truth path = os.path.join(os.path.dirname(__file__), "groundtruth.json") with open(path) as json_file: @@ -44,16 +44,22 @@ def _test_wrong_delete(self, engine_name: str): 101010101010101 ) - def test_non_existing_read(self): + def test_non_existing_read_when_suds(self): self._test_non_existing_read("suds") + + def test_non_existing_read_when_zeep(self): self._test_non_existing_read("zeep") - def test_forbidden_save(self): + def test_forbidden_save_when_suds(self): self._test_forbidden_save("suds") + + def test_forbidden_save_when_zeep(self): self._test_forbidden_save("zeep") - def test_wrong_delete(self): + def test_wrong_delete_when_suds(self): self._test_wrong_delete("suds") + + def test_wrong_delete_when_zeep(self): self._test_wrong_delete("zeep") From 1922635257a9d68991c6203537c040f4a0f9446f Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Fri, 26 Apr 2024 16:09:13 +0200 Subject: [PATCH 062/129] added zeep as a requirement --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 8e4d062a..09f5d537 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ Flask==2.2.5 PyYAML>=3.11 suds-py3>=1.4.1 slugify +zeep>=4.2.1 \ No newline at end of file From 501a4b7a8ab4f79e07f015a8ebd908ec09989a0f Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Mon, 29 Apr 2024 15:45:17 +0200 Subject: [PATCH 063/129] fixed minor bug, added exists test --- bfabric/bfabric2.py | 2 +- .../tests/integration/test_bfabric2_exists.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 bfabric/tests/integration/test_bfabric2_exists.py diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 81a61979..0ffe92e4 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -292,7 +292,7 @@ def exists(self, endpoint: str, key: str, value: Union[List, Union[int, str]]) - # 3. For each of the requested ids, return true if there was a response and false if there was not if is_scalar: - return key in result_vals + return value in result_vals else: return [val in result_vals for val in value] diff --git a/bfabric/tests/integration/test_bfabric2_exists.py b/bfabric/tests/integration/test_bfabric2_exists.py new file mode 100644 index 00000000..b1a36748 --- /dev/null +++ b/bfabric/tests/integration/test_bfabric2_exists.py @@ -0,0 +1,19 @@ +import unittest + +from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth + + +class BfabricTestExists(unittest.TestCase): + def setUp(self): + self.config, self.auth = get_system_auth(config_env="TEST") + + def _test_single_exists(self, engine: BfabricAPIEngineType): + bf = Bfabric(self.config, self.auth, engine=engine) + res = bf.exists('dataset', 'id', 31024) + self.assertEqual(res, True) + + def test_zeep(self): + self._test_single_exists(engine=BfabricAPIEngineType.ZEEP) + + def test_suds(self): + self._test_single_exists(engine=BfabricAPIEngineType.SUDS) From e0c2b68ee0743a59efcf43cbfafe858c02e96abf Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Mon, 29 Apr 2024 15:46:56 +0200 Subject: [PATCH 064/129] minor --- bfabric/tests/integration/test_bfabric2_exists.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/tests/integration/test_bfabric2_exists.py b/bfabric/tests/integration/test_bfabric2_exists.py index b1a36748..7ae74718 100644 --- a/bfabric/tests/integration/test_bfabric2_exists.py +++ b/bfabric/tests/integration/test_bfabric2_exists.py @@ -9,7 +9,7 @@ def setUp(self): def _test_single_exists(self, engine: BfabricAPIEngineType): bf = Bfabric(self.config, self.auth, engine=engine) - res = bf.exists('dataset', 'id', 31024) + res = bf.exists('dataset', 'id', 30721) # Take ID which is the same as in production self.assertEqual(res, True) def test_zeep(self): From e2679d19087f5a9e3d6c7136a68d7146f70a85cd Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 30 Apr 2024 16:30:39 +0200 Subject: [PATCH 065/129] Python 3.9 compatibility --- bfabric/src/result_container.py | 52 ++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py index bd8efa4e..1da4cb5b 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/src/result_container.py @@ -94,26 +94,32 @@ def total_pages_api(self): def to_list_dict(self, drop_empty: bool = True, drop_underscores_suds: bool = True, have_sort_responses: bool = False): - match self.result_type: - case BfabricResultType.LISTDICT: - return self.results - case BfabricResultType.LISTSUDS: - results = [] - for rez in self.results: - rez_parsed = suds_asdict_recursive(rez, convert_types=True) - rez_parsed = _clean_result(rez_parsed, drop_empty=drop_empty, - drop_underscores_suds=drop_underscores_suds, - sort_responses=have_sort_responses) - results += [rez_parsed] - return results - case BfabricResultType.LISTZEEP: - results = [] - for rez in self.results: - rez_parsed = dict(serialize_object(rez, target_cls=dict)) - rez_parsed = _clean_result(rez_parsed, drop_empty=drop_empty, - drop_underscores_suds=False, # NOTE: Underscore problem specific to SUDS - sort_responses=have_sort_responses) - results += [rez_parsed] - return results - case _: - raise ValueError("Unexpected results type", self.result_type) + """ + Converts the results to a list of dictionaries. + :param drop_empty: If True, empty attributes will be removed from the results + :param drop_underscores_suds: If True, leading underscores will be removed from the keys of the results + :param have_sort_responses: If True, keys of dictionaries in the response will be sorted. + TODO what about the order of items in the list? + """ + if self.result_type == BfabricResultType.LISTDICT: + return self.results + elif self.result_type == BfabricResultType.LISTSUDS: + results = [] + for rez in self.results: + rez_parsed = suds_asdict_recursive(rez, convert_types=True) + rez_parsed = _clean_result(rez_parsed, drop_empty=drop_empty, + drop_underscores_suds=drop_underscores_suds, + sort_responses=have_sort_responses) + results += [rez_parsed] + return results + elif self.result_type == BfabricResultType.LISTZEEP: + results = [] + for rez in self.results: + rez_parsed = dict(serialize_object(rez, target_cls=dict)) + rez_parsed = _clean_result(rez_parsed, drop_empty=drop_empty, + drop_underscores_suds=False, # NOTE: Underscore problem specific to SUDS + sort_responses=have_sort_responses) + results += [rez_parsed] + return results + else: + raise ValueError("Unexpected results type", self.result_type) From 301bc547b99ef12f881b4617f9b19436d9e6fd10 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 30 Apr 2024 17:16:13 +0200 Subject: [PATCH 066/129] fix some test failures --- bfabric/bfabric_config.py | 6 +++--- bfabric/tests/unit/test_bfabric_config.py | 15 ++++++++------- bfabric/tests/unit/test_response_format_dict.py | 2 -- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index f55a8b65..c5ae3b17 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -2,7 +2,7 @@ import logging import os -from typing import Optional, Dict, Tuple +from typing import Optional, Dict, Tuple, Union import dataclasses import yaml from pathlib import Path @@ -75,7 +75,7 @@ def __repr__(self): f"job_notification_emails={repr(self.job_notification_emails)})" ) -def _read_config_env_as_dict(config_path: str, config_env: str = None) -> Tuple[str, dict]: +def _read_config_env_as_dict(config_path: Union[str, Path], config_env: str = None) -> Tuple[str, dict]: """ Reads and partially parses a bfabricpy.yml file :param config_path: Path to the configuration file. It is assumed that it exists @@ -139,7 +139,7 @@ def _parse_dict(d: dict, mandatory_keys: list, optional_keys: list = None, error # Ignore all other fields return d_rez -def read_config(config_path: str, config_env: str = None, +def read_config(config_path: Union[str, Path], config_env: str = None, optional_auth: bool = False) -> Tuple[BfabricConfig, Optional[BfabricAuth]]: """ Reads bfabricpy.yml file, parses it, extracting authentication and configuration data diff --git a/bfabric/tests/unit/test_bfabric_config.py b/bfabric/tests/unit/test_bfabric_config.py index 2c5fbbe1..c0389ac8 100644 --- a/bfabric/tests/unit/test_bfabric_config.py +++ b/bfabric/tests/unit/test_bfabric_config.py @@ -1,6 +1,6 @@ import os -import io import unittest +from pathlib import Path from bfabric.bfabric_config import BfabricConfig, BfabricAuth, read_config @@ -23,6 +23,7 @@ def setUp(self): base_url="url", application_ids={"app": 1}, ) + self.example_config_path = Path(__file__).parent / "example_config.yml" def test_default_params_when_omitted(self): config = BfabricConfig() @@ -59,7 +60,7 @@ def test_read_yml_bypath_default(self): # Ensure environment variable is not available, and the default is environment is loaded os.environ.pop('BFABRICPY_CONFIG_ENV', None) - config, auth = read_config('example_config.yml') # Should deduce + config, auth = read_config(self.example_config_path) self.assertEqual("my_epic_production_login", auth.login) self.assertEqual("my_secret_production_password", auth.password) self.assertEqual("https://mega-production-server.uzh.ch/myprod", config.base_url) @@ -70,16 +71,16 @@ def test_read_yml_bypath_environment_variable(self): # Explicitly set the environment variable for this process os.environ["BFABRICPY_CONFIG_ENV"] = "TEST" - config, auth = read_config('example_config.yml') # Should deduce + config, auth = read_config(self.example_config_path) self.assertEqual("my_epic_test_login", auth.login) self.assertEqual("my_secret_test_password", auth.password) self.assertEqual("https://mega-test-server.uzh.ch/mytest", config.base_url) # Testing explicit initialization, as well as extra fields (application_ids, job_notification_emails) # TODO: Test that logging is consistent with default config - def test_read_yml_bypath_allfields(self): + def test_read_yml_bypath_all_fields(self): with self.assertLogs(level="INFO") as log_context: - config, auth = read_config('example_config.yml', config_env='TEST') + config, auth = read_config(self.example_config_path, config_env='TEST') # # Testing log # self.assertEqual( @@ -108,7 +109,7 @@ def test_read_yml_bypath_allfields(self): # Testing that we can load base_url without authentication if correctly requested def test_read_yml_when_empty_optional(self): with self.assertLogs(level="INFO"): - config, auth = read_config('example_config.yml', config_env='STANDBY', optional_auth=True) + config, auth = read_config(self.example_config_path, config_env='STANDBY', optional_auth=True) self.assertIsNone(auth) self.assertEqual("https://standby-server.uzh.ch/mystandby", config.base_url) @@ -118,7 +119,7 @@ def test_read_yml_when_empty_optional(self): # Test that missing authentication will raise an error if required def test_read_yml_when_empty_mandatory(self): with self.assertRaises(ValueError): - read_config('example_config.yml', config_env='STANDBY', optional_auth=False) + read_config(self.example_config_path, config_env='STANDBY', optional_auth=False) def test_repr(self): rep = repr(self.config) diff --git a/bfabric/tests/unit/test_response_format_dict.py b/bfabric/tests/unit/test_response_format_dict.py index c33d4f50..fca47804 100644 --- a/bfabric/tests/unit/test_response_format_dict.py +++ b/bfabric/tests/unit/test_response_format_dict.py @@ -1,6 +1,4 @@ import unittest -import numpy as np - import bfabric.src.response_format_dict as response_format_dict From 5457adb6f02719c82e7c2da02868b67b7dee769f Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 2 May 2024 09:01:00 +0200 Subject: [PATCH 067/129] Migrate to pyproject.toml (#76) - This migrates the project configuration to `pyproject.toml`. - In a next step we can update the folder structure as well. - Scripts can be added once we add a `main` function to them, which will be done in separate PRs. --- MANIFEST.in | 2 -- bfabric/__init__.py | 4 ++- bfabric/_version.py | 1 - pyproject.toml | 57 +++++++++++++++++++++++++++++++++++++ requirements.txt | 5 ---- setup.py | 68 --------------------------------------------- 6 files changed, 60 insertions(+), 77 deletions(-) delete mode 100644 MANIFEST.in delete mode 100644 bfabric/_version.py create mode 100644 pyproject.toml delete mode 100644 requirements.txt delete mode 100755 setup.py diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 95084b27..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -recursive-include bfabric/scripts bfabric_delete.py bfabric_flask.py bfabric_read.py bfabric_save_resource.py -include README.md setup.py requirements.txt diff --git a/bfabric/__init__.py b/bfabric/__init__.py index 506f6d64..71ef6d82 100755 --- a/bfabric/__init__.py +++ b/bfabric/__init__.py @@ -1,4 +1,6 @@ -from ._version import __version__ +import importlib.metadata + +__version__ = importlib.metadata.version("bfabric") name = "bfabricPy" alias = "suds-py3" diff --git a/bfabric/_version.py b/bfabric/_version.py deleted file mode 100644 index 49638273..00000000 --- a/bfabric/_version.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "0.13.8" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..9739f951 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,57 @@ +[build-system] +requires = ["setuptools >= 61.0"] +build-backend = "setuptools.build_meta" + +[tool.setuptools] +py-modules = ["bfabric"] + +[project] +name = "bfabric" +description = "Python client for the B-Fabric WSDL API" +version = "0.13.8" +license = { text = "GPL-3.0" } +authors = [ + {name = "Christian Panse", email = "cp@fgcz.ethz.ch"}, + {name = "Leonardo Schwarz", email = "leonardo.schwarz@fgcz.ethz.ch"}, + {name = "Aleksejs Fomins"}, + {name = "Marco Schmidt"}, + {name = "Maria d'Errico"}, + {name = "Witold Eryk Wolski"} +] +requires-python = ">=3.9" +dependencies = [ + "suds >= 1.1.2", + "PyYAML >= 6.0", + "Flask == 2.2.5", + "zeep >= 4.2.1", + "pandas >= 2.2.2" +] + +[project.urls] +Homepage = "https://github.com/fgcz/bfabricPy" +Repository = "https://github.com/fgcz/bfabricPy" + +[project.scripts] +#bfabric_flask="bfabric.scripts.bfabric_flask:main" +#bfabric_feeder_resource_autoQC="bfabric.scripts.bfabric_feeder_resource_autoQC:main" +#bfabric_list_not_existing_storage_directories="bfabric.scripts.bfabric_list_not_existing_storage_directories:main" +#bfabric_list_not_available_proteomics_workunits="bfabric.scripts.bfabric_list_not_available_proteomics_workunits:main" +#bfabric_upload_resource="bfabric.scripts.bfabric_upload_resource:main" +#bfabric_logthis="bfabric.scripts.bfabric_logthis:main" +#bfabric_setResourceStatus_available="bfabric.scripts.bfabric_setResourceStatus_available:main" +#bfabric_setExternalJobStatus_done="bfabric.scripts.bfabric_setExternalJobStatus_done:main" +#bfabric_setWorkunitStatus_available="bfabric.scripts.bfabric_setWorkunitStatus_available:main" +#bfabric_setWorkunitStatus_processing="bfabric.scripts.bfabric_setWorkunitStatus_processing:main" +#bfabric_setWorkunitStatus_failed="bfabric.scripts.bfabric_setWorkunitStatus_failed:main" +#bfabric_delete="bfabric.scripts.bfabric_delete:main" +#bfabric_read"="bfabric.scripts.bfabric_read:main" +#bfabric_read_samples_of_workunit="bfabric.scripts.bfabric_read_samples_of_workunit:main" +#bfabric_read_samples_from_dataset="bfabric.scripts.bfabric_read_samples_from_dataset:main" +#bfabric_save_csv2dataset="bfabric.scripts.bfabric_save_csv2dataset:main" +#bfabric_save_dataset2csv="bfabric.scripts.bfabric_save_dataset2csv:main" +#bfabric_save_fasta="bfabric.scripts.bfabric_save_fasta:main" +#bfabric_save_importresource_sample="bfabric.scripts.bfabric_save_importresource_sample:main" +#bfabric_save_link_to_workunit="bfabric.scripts.bfabric_save_link_to_workunit:main" +#bfabric_save_resource="bfabric.scripts.bfabric_save_resource:main" +#bfabric_save_workunit_attribute="bfabric.scripts.bfabric_save_workunit_attribute:main" +#bfabric_save_workflowstep="bfabric.scripts.bfabric_save_workflowstep:main" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 09f5d537..00000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -Flask==2.2.5 -PyYAML>=3.11 -suds-py3>=1.4.1 -slugify -zeep>=4.2.1 \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100755 index 5698ba2b..00000000 --- a/setup.py +++ /dev/null @@ -1,68 +0,0 @@ -""" - -B-Fabric Appliaction Interface using WSDL - -The code contains classes for wrapper_creator and submitter. - -Ensure that this file is available on the bfabric exec host. - -Copyright (C) 2014-2024 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. - -Authors: - Christian Panse - Maria d'Errico - -Licensed under GPL version 3 - -""" - -from setuptools import setup, find_packages -import os - -with open('requirements.txt') as f: - INSTALL_REQUIRES = f.read().splitlines() -ver_file = os.path.join('bfabric', '_version.py') -with open(ver_file) as f: - exec(f.read()) - -VERSION = __version__ - -setup(name = 'bfabric', - version = VERSION, - description = """ -B-Fabric Appliaction Interface using WSDL. The code contains classes for wrapper_creator and submitter. -""", - url = 'git@github.com:fgcz/bfabricPy.git ', - author = 'Christian Panse', - author_email = 'cp@fgcz.ethz.ch', - license = 'GPLv3 / apache 2.0', - packages = ['bfabric'], - python_requires = ">=3.9", - install_requires = INSTALL_REQUIRES, - scripts = [ - 'bfabric/scripts/bfabric_flask.py', - 'bfabric/scripts/bfabric_feeder_resource_autoQC.py', - 'bfabric/scripts/bfabric_list_not_existing_storage_directories.py', - 'bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py', - 'bfabric/scripts/bfabric_upload_resource.py', - 'bfabric/scripts/bfabric_logthis.py', - 'bfabric/scripts/bfabric_setResourceStatus_available.py', - 'bfabric/scripts/bfabric_setExternalJobStatus_done.py', - 'bfabric/scripts/bfabric_setWorkunitStatus_available.py', - 'bfabric/scripts/bfabric_setWorkunitStatus_processing.py', - 'bfabric/scripts/bfabric_setWorkunitStatus_failed.py', - 'bfabric/scripts/bfabric_delete.py', - 'bfabric/scripts/bfabric_read.py', - 'bfabric/scripts/bfabric_read_samples_of_workunit.py', - 'bfabric/scripts/bfabric_read_samples_from_dataset.py', - 'bfabric/scripts/bfabric_save_csv2dataset.py', - 'bfabric/scripts/bfabric_save_dataset2csv.py', - 'bfabric/scripts/bfabric_save_fasta.py', - 'bfabric/scripts/bfabric_save_importresource_sample.py', - 'bfabric/scripts/bfabric_save_link_to_workunit.py', - 'bfabric/scripts/bfabric_save_resource.py', - 'bfabric/scripts/bfabric_save_workunit_attribute.py', - 'bfabric/scripts/bfabric_save_workflowstep.py' - ], - zip_safe=True) - From dca1cc0403f2695aa427cc80c76d44cb7d504840 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 2 May 2024 09:48:45 +0200 Subject: [PATCH 068/129] Extract wrapper creator code (#78) The code itself has not been changed, nor does it use the new API. --- bfabric/__init__.py | 6 +- bfabric/bfabric.py | 709 ------------------ .../scripts/bfabric_demo_register_resource.py | 3 +- bfabric/scripts/bfabric_logthis.py | 3 +- .../bfabric_setExternalJobStatus_done.py | 3 +- .../bfabric_setResourceStatus_available.py | 3 +- .../bfabric_setWorkunitStatus_available.py | 3 +- .../bfabric_setWorkunitStatus_failed.py | 3 +- .../bfabric_setWorkunitStatus_processing.py | 4 +- bfabric/tests/test_bfabric_functional.py | 6 +- bfabric/wrapper_creator/__init__.py | 0 .../wrapper_creator/bfabric_external_job.py | 82 ++ bfabric/wrapper_creator/bfabric_feeder.py | 48 ++ bfabric/wrapper_creator/bfabric_submitter.py | 281 +++++++ .../bfabric_wrapper_creator.py | 315 ++++++++ 15 files changed, 748 insertions(+), 721 deletions(-) create mode 100644 bfabric/wrapper_creator/__init__.py create mode 100644 bfabric/wrapper_creator/bfabric_external_job.py create mode 100644 bfabric/wrapper_creator/bfabric_feeder.py create mode 100644 bfabric/wrapper_creator/bfabric_submitter.py create mode 100644 bfabric/wrapper_creator/bfabric_wrapper_creator.py diff --git a/bfabric/__init__.py b/bfabric/__init__.py index 71ef6d82..69898973 100755 --- a/bfabric/__init__.py +++ b/bfabric/__init__.py @@ -44,7 +44,7 @@ application = 217 from bfabric.bfabric import Bfabric -from bfabric.bfabric import BfabricWrapperCreator -from bfabric.bfabric import BfabricSubmitter -from bfabric.bfabric import BfabricFeeder +from bfabric.wrapper_creator.bfabric_wrapper_creator import BfabricWrapperCreator +from bfabric.wrapper_creator.bfabric_submitter import BfabricSubmitter +from bfabric.wrapper_creator.bfabric_feeder import BfabricFeeder from bfabric.bfabric_config import BfabricConfig diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index 04f9f980..49218389 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -30,10 +30,8 @@ from suds.client import Client from suds.wsdl import Service -import hashlib import os import base64 -import datetime import logging.config logging.config.dictConfig({ @@ -59,9 +57,6 @@ # } }) -import bfabric.gridengine as gridengine -import bfabric.slurm as slurm - class bfabricEncoder(json.JSONEncoder): """ @@ -293,710 +288,6 @@ def get_sampleid(self, resourceid=None): self.warning("fetching sampleid of resource.workunitid = {} failed.".format(resource.workunit._id)) return (None) -class BfabricFeeder(Bfabric): - """ - this class is used for reporting 'resource' status - """ - - def report_resource(self, resourceid): - """ - this function determines the 'md5 checksum', 'the file size', - and set the status of the resource available. - - this is gonna executed on the storage host - - """ - res = self.read_object('resource', {'id': resourceid})[0] - print (res) - - if not hasattr(res, 'storage'): - return -1 - - storage = self.read_object('storage', {'id': res.storage._id})[0] - - filename = "{0}/{1}".format(storage.basepath, res.relativepath) - - if os.path.isfile(filename): - try: - fmd5 = hashlib.md5(open(filename, 'rb').read()).hexdigest() - print ("md5sum ({}) = {}".format(filename, fmd5)) - - fsize = int(os.path.getsize(filename)) + 1 - print ("size ({}) = {}".format(filename, fsize)) - - - return self.save_object('resource', {'id': resourceid, - 'size': fsize, - 'status': 'available', - 'filechecksum': fmd5}) - except: - print ("computing md5 failed") - # print ("{} {}".format(Exception, err)) - raise - - return self.save_object('resource', {'id': resourceid, 'status': 'failed'}) - - -class BfabricExternalJob(Bfabric): - """ - ExternalJobs can use logging. - if you have a valid externaljobid use this class instead of - using Bfabric. - - - TODO check if an external job id is provided - """ - externaljobid = None - - def __init__(self, login=None, password=None, externaljobid=None): - super(BfabricExternalJob, self).__init__(login, password) - if not externaljobid: - print("Error: no externaljobid provided.") - raise - else: - self.externaljobid = externaljobid - - print(("BfabricExternalJob externaljobid={}".format(self.externaljobid))) - - def logger(self, msg): - if self.externaljobid: - super(BfabricExternalJob, self).save_object('externaljob', {'id': self.externaljobid, 'logthis': str(msg)}) - else: - print((str(msg))) - - def save_object(self, endpoint, obj, debug=None): - res = super(BfabricExternalJob, self).save_object(endpoint, obj, debug) - jsonres = json.dumps(res, cls=bfabricEncoder, sort_keys=True, indent=2) - self.logger('saved ' + endpoint + '=' + str(jsonres)) - return res - - def get_workunitid_of_externaljob(self): - print(("DEBUG get_workunitid_of_externaljob self.externaljobid={}".format(self.externaljobid))) - res = self.read_object(endpoint='externaljob', obj={'id': self.externaljobid})[0] - print(res) - print("DEBUG END") - workunit_id = None - try: - workunit_id = res.cliententityid - print(("workunitid={}".format(workunit_id))) - except: - pass - return workunit_id - - def get_application_name(self): - workunitid = self.get_workunitid_of_externaljob() - if workunitid is None: - raise ValueError("no workunit available for the given externaljobid.") - workunit = self.read_object(endpoint='workunit', obj={'id': workunitid})[0] - if workunit is None: - raise ValueError("ERROR: no workunit available for the given externaljobid.") - assert isinstance(workunit._id, int) - application = self.read_object('application', obj={'id': workunit.application._id})[0] - return application.name.replace(' ', '_') - - - def get_executable_of_externaljobid(self): - """ - It takes as input an `externaljobid` and fetches the the `executables` - out of the bfabric system using wsdl into a file. - returns a list of executables. - - todo: this function should check if base64 is provided or - just a program. - """ - workunitid = self.get_workunitid_of_externaljob() - if workunitid is None: - return None - - executables = list() - for executable in self.read_object(endpoint='executable', obj={'workunitid': workunitid}): - if hasattr(executable, 'base64'): - executables.append(executable) - - return executables if len(executables) > 0 else None - - -class BfabricSubmitter(): - """ - the class is used by the submitter which is executed by the bfabric system. - """ - - (G, B) = (None, None) - - workunitid = None - workunit = None - parameters = None - execfilelist = [] - slurm_dict = {"MaxQuant_textfiles_sge" : {'partition': "prx", 'nodelist': "fgcz-r-033", 'memory':"1G"}, - "fragpipe" : {'partition': "prx", 'nodelist': "fgcz-r-033", 'memory':"256G"}, - "MaxQuant" : {'partition': "maxquant", 'nodelist': "fgcz-r-033", 'memory':"4G"}, - "scaffold_generic" : {'partition': "scaffold", 'nodelist': "fgcz-r-033", 'memory':"256G"}, - "MSstats dataProcess" : {'partition': "prx", 'nodelist': "fgcz-r-033", 'memory':"64G"}, - "MaxQuant_sampleSizeEstimation" : {'partition': "prx", 'nodelist': "fgcz-r-028", 'memory': "2G"}, - "ProteomeDiscovererQC" : {'partition': "prx", 'nodelist': "fgcz-r-035", 'memory': "2G"} - } - - def __init__(self, login=None, password=None, externaljobid=None, - user='*', node="PRX@fgcz-r-018", partition="prx", nodelist="fgcz-r-028", memory="10G", SCHEDULEROOT='/export/bfabric/bfabric/', scheduler="GridEngine"): - """ - :rtype : object - """ - self.B = BfabricExternalJob(login=login, password=password, externaljobid=externaljobid) - self.partition = partition - self.nodelist = nodelist - self.memory = memory - self.SCHEDULEROOT = SCHEDULEROOT - self.user = user - self.scheduler = scheduler - - print(self.B.auth.login) - print(self.B.externaljobid) - - self.workunitid = self.B.get_workunitid_of_externaljob() - - try: - self.workunit = self.B.read_object(endpoint='workunit', obj={'id': self.workunitid})[0] - except: - print ("ERROR: could not fetch workunit while calling constructor in BfabricSubmitter.") - raise - - - try: - self.parameters = [self.B.read_object(endpoint='parameter', obj={'id': x._id})[0] for x in self.workunit.parameter] - except: - self.parameters = list() - print ("Warning: could not fetch parameter.") - - partition = [x for x in self.parameters if x.key == "partition"] - nodelist = [x for x in self.parameters if x.key == "nodelist"] - memory = [x for x in self.parameters if x.key == "memory"] - application_name = self.B.get_application_name() - - if len(partition) > 0 and len(nodelist) > 0 and len(memory)>0: - self.partition = partition[0].value - self.nodelist = nodelist[0].value - self.memory = memory[0].value - elif "queue" in [x.key for x in self.parameters] and application_name in self.slurm_dict: - # Temporary check for old workunit previously run with SGE - self.partition = self.slurm_dict[application_name]['partition'] - self.nodelist = self.slurm_dict[application_name]['nodelist'] - self.memory = self.slurm_dict[application_name]['memory'] - else: - pass - - print(("partition={0}".format(self.partition))) - print(("nodelist={0}".format(self.nodelist))) - print(("memory={0}".format(self.memory))) - print("__init__ DONE") - - - def submit_gridengine(self, script="/tmp/runme.bash", arguments=""): - - GE = gridengine.GridEngine(user=self.user, queue=self.queue, GRIDENGINEROOT=self.SCHEDULEROOT) - - print(script) - print((type(script))) - resQsub = GE.qsub(script=script, arguments=arguments) - - self.B.logger("{}".format(resQsub)) - - - def submit_slurm(self, script="/tmp/runme.bash", arguments=""): - - SL = slurm.SLURM(user=self.user, SLURMROOT=self.SCHEDULEROOT) - - print(script) - print((type(script))) - resSbatch = SL.sbatch(script=script, arguments=arguments) - - self.B.logger("{}".format(resSbatch)) - - - def compose_bash_script(self, configuration=None, configuration_parser=lambda x: yaml.safe_load(x)): - """ - composes the bash script which is executed by the submitter (sun grid engine). - as argument it takes a configuration file, e.g., yaml, xml, json, or whatsoever, and a parser function. - - it returns a str object containing the code. - - :rtype : str - """ - - - #assert isinstance(configuration, str) - - try: - config = configuration_parser(configuration) - except: - raise ValueError("error: parsing configuration content failed.") - - - _cmd_template = """#!/bin/bash -# Maria d'Errico -# Christian Panse -# 2020-09-28 -# 2020-09-29 -# https://GitHub.com/fgcz/bfabricPy/ -# Slurm -#SBATCH --partition={0} -#SBATCH --nodelist={11} -#SBATCH -n 1 -#SBATCH -N 1 -#SBATCH --cpus-per-task=1 -#SBATCH --mem-per-cpu={12} -#SBATCH -e {1} -#SBATCH -o {2} -#SBATCH --job-name=WU{10} -#SBATCH --workdir=/home/bfabric -#SBATCH --export=ALL,HOME=/home/bfabric - -# Grid Engine Parameters -#$ -q {0}&{11} -#$ -e {1} -#$ -o {2} - - -set -e -set -o pipefail - -export EMAIL="{job_notification_emails}" -export EXTERNALJOB_ID={3} -export RESSOURCEID_OUTPUT={4} -export RESSOURCEID_STDOUT_STDERR="{5} {6}" -export OUTPUT="{7}" -export WORKUNIT_ID="{10}" -STAMP=`/bin/date +%Y%m%d%H%M`.$$.$JOB_ID -TEMPDIR="/home/bfabric/prx" - -_OUTPUT=`echo $OUTPUT | cut -d"," -f1` -test $? -eq 0 && _OUTPUTHOST=`echo $_OUTPUT | cut -d":" -f1` -test $? -eq 0 && _OUTPUTPATH=`echo $_OUTPUT | cut -d":" -f2` -test $? -eq 0 && _OUTPUTPATH=`dirname $_OUTPUTPATH` -test $? -eq 0 && ssh $_OUTPUTHOST "mkdir -p $_OUTPUTPATH" -test $? -eq 0 && echo $$ > $TEMPDIR/$$ -test $? -eq 0 && scp $TEMPDIR/$$ $OUTPUT - -if [ $? -eq 1 ]; -then - echo "writting to output url failed!"; - exit 1; -fi - -# job configuration set by B-Fabrics wrapper_creator executable -# application parameter/configuration -cat > $TEMPDIR/config_WU$WORKUNIT_ID.yaml < $TEMPDIR/$JOB_ID.bash - - (who am i; hostname; uptime; echo $0; pwd; ps;) \ - | mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID" $EMAIL \ - -a $TEMPDIR/$JOB_ID.bash $TEMPDIR/config_WU$WORKUNIT_ID.yaml -fi -# exit 0 - -# run the application -test -f $TEMPDIR/config_WU$WORKUNIT_ID.yaml && {9} $TEMPDIR/config_WU$WORKUNIT_ID.yaml - - -if [ $? -eq 0 ]; -then - ssh fgcz-r-035.uzh.ch "bfabric_setResourceStatus_available.py $RESSOURCEID_OUTPUT" \ - | mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID DONE" $EMAIL - - bfabric_save_workflowstep.py $WORKUNIT_ID - bfabric_setExternalJobStatus_done.py $EXTERNALJOB_ID - bfabric_setWorkunitStatus_available.py $WORKUNIT_ID - echo $? -else - echo "application failed" - mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID failed" $EMAIL < /dev/null - bfabric_setResourceStatus_available.py $RESSOURCEID_STDOUT_STDERR $RESSOURCEID; - exit 1; -fi - -# should be available also as zero byte files -bfabric_setResourceStatus_available.py $RESSOURCEID_STDOUT_STDERR - - -exit 0 -""".format(self.partition, - config['job_configuration']['stderr']['url'], - config['job_configuration']['stdout']['url'], - config['job_configuration']['external_job_id'], - config['job_configuration']['output']['resource_id'], - config['job_configuration']['stderr']['resource_id'], - config['job_configuration']['stdout']['resource_id'], - ",".join(config['application']['output']), - configuration, - config['job_configuration']['executable'], - config['job_configuration']['workunit_id'], - self.nodelist, - self.memory, - job_notification_emails=self.B.config.job_notification_emails) - - return _cmd_template - - - def submitter_yaml(self): - """ - implements the default submitter - - the function fetches the yaml base64 configuration file linked to the external job id out of the B-Fabric - system. Since the file can not be stagged to the LRMS as argument, we copy the yaml file into the bash script - and stage it on execution the application. - - TODO(cp): create the output url before the application is started. - - return None - """ - - # foreach (executable in external job): - for executable in self.B.get_executable_of_externaljobid(): - self.B.logger("executable = {0}".format(executable)) - - try: - content = base64.b64decode(executable.base64.encode()).decode() - except: - raise ValueError("error: decoding executable.base64 failed.") - - - print(content) - _cmd_template = self.compose_bash_script(configuration=content, - configuration_parser=lambda x: yaml.safe_load(x)) - - _bash_script_filename = "/home/bfabric/prx/workunitid-{0}_externaljobid-{1}_executableid-{2}.bash"\ - .format(self.B.get_workunitid_of_externaljob(), self.B.externaljobid, executable._id) - - with open(_bash_script_filename, 'w') as f: - f.write(_cmd_template) - - if self.scheduler=="GridEngine" : - self.submit_gridengine(_bash_script_filename) - else: - self.submit_slurm(_bash_script_filename) - self.execfilelist.append(_bash_script_filename) - - - res = self.B.save_object(endpoint='externaljob', - obj={'id': self.B.externaljobid, 'status': 'done'}) - def get_job_script(self): - return self.execfilelist - - -class BfabricWrapperCreator(BfabricExternalJob): - """ - the class is used for the wrapper_creator which is executed by the bfabtic system - (non batch) so each resource is processed seperate - """ - - (externaljobid_submitter, workunit_executableid) = (None, None) - - def get_externaljobid_yaml_workunit(self): - return self.externaljobid_yaml_workunit - - def uploadGridEngineScript(self, para={'INPUTHOST': 'fgcz-r-035.uzh.ch'}): - """ - the methode creates and uploads an executebale. - """ - - self.warning( - "This python method is superfluously and will be removed. Please use the write_yaml method of the BfabricWrapperCreato class.") - - _cmd_template = """#!/bin/bash -# $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/bfabric.py $ -# $Id: bfabric.py 3000 2017-08-18 14:18:30Z cpanse $ -# Christian Panse -#$ -q PRX@fgcz-r-028 -#$ -e {1} -#$ -o {2} - -set -e -set -o pipefail - - -# debug -hostname -uptime -echo $0 -pwd - -# variables to be set by the wrapper_creator executable -{0} - - -# create output directory -ssh $SSHARGS $OUTPUTHOST "mkdir -p $OUTPUTPATH" || exit 1 - -# staging input and output data and proc -ssh $SSHARGS $INPUTHOST "cat $INPUTPATH/$INPUTFILE" \\ -| $APPLICATION --inputfile $INPUTFILE --ssh "$OUTPUTHOST:$OUTPUTPATH/$OUTPUTFILE" \\ -&& bfabric_setResourceStatus_available.py $RESSOURCEID \\ -&& bfabric_setExternalJobStatus_done.py $EXTERNALJOBID \\ -|| exit 1 - -exit 0 -""".format("\n".join(sorted(['%s="%s"' % (key, info) for key, info in para.iteritems()])), para['STDERR'], - para['STDOUT']) - - resExecutable = self.save_object('executable', {'name': os.path.basename(para['APPLICATION']) + "_executable", - 'context': 'WORKUNIT', - 'parameter': None, - 'description': "This script should run as 'bfabric' user in the FGCZ compute infrastructure.", - 'workunitid': para['WORKUNITID'], - 'base64': base64.b64encode(_cmd_template), - 'version': 0.2}) - - return (resExecutable) - - def get_executableid(self): - return (self.workunit_executableid) - - def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style=False, encoding=None)): - """ - This method writes all related parameters into a yaml file which is than upload as base64 encoded - file into the b-fabric system. - - if the method does not excepted at the end it reports also the status of the external_job. - - TODO(cp): make this function more generic so that it can also export xml, json, yaml, ... - """ - - # Inherits all parameters of the application executable out of B-Fabric to create an executable script - workunitid = self.get_workunitid_of_externaljob() - - if workunitid is None: - raise ValueError("no workunit available for the given externaljobid.") - - workunit = self.read_object(endpoint='workunit', obj={'id': workunitid})[0] - if workunit is None: - raise ValueError("ERROR: no workunit available for the given externaljobid.") - - assert isinstance(workunit._id, int) - - application = self.read_object('application', obj={'id': workunit.application._id})[0] - # TODO(cp): rename to application_execuatbel - workunit_executable = self.read_object('executable', obj={'id': workunit.applicationexecutable._id})[0] - try: - self.workunit_executableid = workunit_executable._id - except: - self.workunit_executableid = None - - # Get container details - container = workunit.container - fastasequence = "" - if container._classname=="order": - order = self.read_object('order', obj={'id': container._id})[0] - order_id = order._id - if "project" in order: - project_id = order.project._id - else: - project_id = None - if "fastasequence" in order: - fastasequence = "\n".join([x.strip() for x in str(order.fastasequence).split("\r")]) - else: - order_id = None - project_id = container._id - - today = datetime.date.today() - - # merge all information into the executable script - _output_storage = self.read_object('storage', obj={'id': application.storage._id})[0] - - _output_relative_path = "p{0}/bfabric/{1}/{2}/{3}/workunit_{4}/".format( - container._id, - application.technology.replace(' ', '_'), - application.name.replace(' ', '_'), - today.strftime('%Y/%Y-%m/%Y-%m-%d/'), - workunitid) - - # Setup the log_storage to SlurmLog with id 13 - _log_storage = self.read_object('storage', obj={'id': 13})[0] - - #_cmd_applicationList = [workunit_executable.program] - - application_parameter = {} - - if not getattr(workunit, "parameter", None) is None: - for para in workunit.parameter: - parameter = self.read_object('parameter', obj={'id': para._id}) - if parameter: - for p in parameter: - try: - application_parameter["{}".format(p.key)] = "{}".format(p.value) - except: - application_parameter["{}".format(p.key)] = "" - - try: - input_resources = [x._id for x in workunit.inputresource] - input_resources = [self.read_object(endpoint='resource', obj={'id': x})[0] for x in input_resources] - except: - print("no input resources found. continue with empty list.") - input_resources = [] - - - # query all urls and ids of the input resources - resource_urls = dict() - resource_ids = dict() - - for resource_iterator in input_resources: - try: - _appication_id = self.read_object(endpoint='workunit', - obj={'id': resource_iterator.workunit._id})[0].application._id - - _application_name = "{0}".format(self.read_object('application', obj={'id': _appication_id})[0].name) - - _storage = self.read_object('storage', {'id': resource_iterator.storage._id})[0] - - _inputUrl = "bfabric@{0}:/{1}/{2}".format(_storage.host, _storage.basepath, resource_iterator.relativepath) - - if not _application_name in resource_urls: - resource_urls[_application_name] = [] - resource_ids[_application_name] = [] - - resource_urls[_application_name].append(_inputUrl) - - sample_id = self.get_sampleid(int(resource_iterator._id)) - - _resource_sample = {'resource_id': int(resource_iterator._id), - 'resource_url': "{0}/userlab/show-resource.html?id={1}".format(self.config.base_url, resource_iterator._id)} - - - if not sample_id is None: - _resource_sample['sample_id'] = int(sample_id) - _resource_sample['sample_url'] = "{0}/userlab/show-sample.html?id={1}".format(self.config.base_url, sample_id) - - resource_ids[_application_name].append(_resource_sample) - except: - print ("resource_iterator failed. continue ...") - pass - - - # create resources for output, stderr, stdout - _ressource_output = self.save_object('resource', { - 'name': "{0} {1} - resource".format(application.name, len(input_resources)), - 'workunitid': workunit._id, - 'storageid': int(application.storage._id), - 'relativepath': _output_relative_path})[0] - - - print(_ressource_output) - _output_filename = "{0}.{1}".format(_ressource_output._id, application.outputfileformat) - # we want to include the resource._id into the filename - _ressource_output = self.save_object('resource', - {'id': int(_ressource_output._id), - 'relativepath': "{0}/{1}".format(_output_relative_path, _output_filename)})[0] - - print (_ressource_output) - _resource_stderr = self.save_object('resource', { - 'name': 'slurm_stderr', - 'workunitid': int(workunit._id), - 'storageid': _log_storage._id, - 'relativepath': "/workunitid-{0}_resourceid-{1}.err".format(workunit._id, _ressource_output._id)})[0] - - _resource_stdout = self.save_object('resource', { - 'name': 'slurm_stdout', - 'workunitid': workunit._id, - 'storageid': _log_storage._id, - 'relativepath': "/workunitid-{0}_resourceid-{1}.out".format(workunit._id, _ressource_output._id)})[0] - - - # Creates the workunit executable - # The config includes the externaljobid: the yaml_workunit_externaljob has to be created before it. - # The yaml_workunit_externaljob cannot be created without specifying an executableid: - # a yaml_workunit_executable is thus created before the config definition in order to provide - # the correct executableid to the yaml_workunit_externaljob. - # However this yaml_workunit_executable has to be updated later to include 'base64': base64.b64encode(config_serialized.encode()).decode() - yaml_workunit_executable = self.save_object('executable', {'name': 'job configuration (executable) in YAML', - 'context': 'WORKUNIT', - 'workunitid': workunit._id, - 'description': "This is a job configuration as YAML base64 encoded. It is configured to be executed by the B-Fabric yaml submitter."})[0] - print(yaml_workunit_executable) - - yaml_workunit_externaljob = self.save_object('externaljob', - {"workunitid": workunit._id, - 'status': 'new', - 'executableid' : yaml_workunit_executable._id, - 'action': "WORKUNIT"})[0] - print(yaml_workunit_externaljob) - assert isinstance(yaml_workunit_externaljob._id, int) - self.externaljobid_yaml_workunit = int(yaml_workunit_externaljob._id) - print(("XXXXXXX self.externaljobid_yaml_workunit ={} XXXXXXX".format(self.externaljobid_yaml_workunit))) - - _output_url = "bfabric@{0}:{1}{2}/{3}".format(_output_storage.host, - _output_storage.basepath, - _output_relative_path, - _output_filename) - - try: - query_obj = {'id': workunit.inputdataset._id} - inputdataset = self.read_object(endpoint='dataset', obj=query_obj)[0] - inputdataset_json = json.dumps(inputdataset, cls=bfabricEncoder, sort_keys=True, indent=2) - inputdataset = json.loads(inputdataset_json) - except: - inputdataset = None - - # Compose configuration structure - config = { - 'job_configuration': { - 'executable': "{}".format(workunit_executable.program), - 'inputdataset': inputdataset, - 'input': resource_ids, - 'output': { - 'protocol': 'scp', - 'resource_id': int(_ressource_output._id), - 'ssh_args': "-o StrictHostKeyChecking=no -2 -l bfabric -x" - }, - 'stderr': { - 'protocol': 'file', - 'resource_id': int(_resource_stderr._id) , - 'url': "{0}/workunitid-{1}_resourceid-{2}.err".format(_log_storage.basepath, workunit._id, _ressource_output._id) - }, - 'stdout': { - 'protocol': 'file', - 'resource_id': int(_resource_stdout._id), - 'url': "{0}/workunitid-{1}_resourceid-{2}.out".format(_log_storage.basepath, workunit._id, _ressource_output._id) - }, - 'workunit_id': int(workunit._id), - 'workunit_createdby': str(workunit.createdby), - 'workunit_url': "{0}/userlab/show-workunit.html?workunitId={1}".format(self.config.base_url, workunit._id), - 'external_job_id': int(yaml_workunit_externaljob._id), - 'order_id': order_id, - 'project_id': project_id, - 'fastasequence': fastasequence - }, - 'application' : { - 'protocol': 'scp', - 'parameters': application_parameter, - 'input': resource_urls, - 'output': [_output_url] - } - } - - config_serialized = data_serializer(config) - print(config_serialized) - - yaml_workunit_executable = self.save_object('executable', {'id': yaml_workunit_executable._id, - 'base64': base64.b64encode(config_serialized.encode()).decode(), - 'version': "{}".format(10)})[0] - print(yaml_workunit_executable) - - # The WrapperCreator executable is successful, and the status of the its external job is set to done, - # which triggers B-Fabric to create an external job for the submitter executable. - - wrapper_creator_externaljob = self.save_object(endpoint='externaljob', - obj={'id': self.externaljobid, 'status': 'done'}) - - print(("\n\nquery_counter={0}".format(self.query_counter))) - - - if __name__ == "__main__": bfapp = Bfabric(verbose=True) diff --git a/bfabric/scripts/bfabric_demo_register_resource.py b/bfabric/scripts/bfabric_demo_register_resource.py index 4fa290fd..b824c3bb 100755 --- a/bfabric/scripts/bfabric_demo_register_resource.py +++ b/bfabric/scripts/bfabric_demo_register_resource.py @@ -12,11 +12,12 @@ import time import sys import bfabric +import bfabric.wrapper_creator.bfabric_feeder def main(): BFABRICSTORAGEID = 2 - bfapp = bfabric.BfabricFeeder() + bfapp = bfabric.wrapper_creator.bfabric_feeder.BfabricFeeder() # create workunit wuobj = { 'applicationid': 155, diff --git a/bfabric/scripts/bfabric_logthis.py b/bfabric/scripts/bfabric_logthis.py index 0684e0b0..fb01298b 100755 --- a/bfabric/scripts/bfabric_logthis.py +++ b/bfabric/scripts/bfabric_logthis.py @@ -17,10 +17,11 @@ from random import randint from time import sleep +import bfabric.wrapper_creator.bfabric_feeder if __name__ == "__main__": if len(sys.argv) > 1: - B = bfabric.BfabricFeeder() + B = bfabric.wrapper_creator.bfabric_feeder.BfabricFeeder() try: externaljobid, msg = (int(sys.argv[1]), sys.argv[2]) except: diff --git a/bfabric/scripts/bfabric_setExternalJobStatus_done.py b/bfabric/scripts/bfabric_setExternalJobStatus_done.py index 73d0581c..4727252e 100755 --- a/bfabric/scripts/bfabric_setExternalJobStatus_done.py +++ b/bfabric/scripts/bfabric_setExternalJobStatus_done.py @@ -19,9 +19,10 @@ import sys import bfabric +import bfabric.wrapper_creator.bfabric_feeder if __name__ == "__main__": - bfapp = bfabric.BfabricFeeder() + bfapp = bfabric.wrapper_creator.bfabric_feeder.BfabricFeeder() if len(sys.argv) > 1: for i in range(1, len(sys.argv)): diff --git a/bfabric/scripts/bfabric_setResourceStatus_available.py b/bfabric/scripts/bfabric_setResourceStatus_available.py index f72c94ed..d68b2c02 100755 --- a/bfabric/scripts/bfabric_setResourceStatus_available.py +++ b/bfabric/scripts/bfabric_setResourceStatus_available.py @@ -22,10 +22,11 @@ from random import randint from time import sleep +import bfabric.wrapper_creator.bfabric_feeder if __name__ == "__main__": if len(sys.argv) > 1: - bfapp = bfabric.BfabricFeeder() + bfapp = bfabric.wrapper_creator.bfabric_feeder.BfabricFeeder() for i in range(1, len(sys.argv)): sleep(randint(2, 20)) diff --git a/bfabric/scripts/bfabric_setWorkunitStatus_available.py b/bfabric/scripts/bfabric_setWorkunitStatus_available.py index e1f9d67d..ca5f0290 100755 --- a/bfabric/scripts/bfabric_setWorkunitStatus_available.py +++ b/bfabric/scripts/bfabric_setWorkunitStatus_available.py @@ -22,10 +22,11 @@ from random import randint from time import sleep +import bfabric.wrapper_creator.bfabric_feeder if __name__ == "__main__": if len(sys.argv) > 1: - B = bfabric.BfabricFeeder() + B = bfabric.wrapper_creator.bfabric_feeder.BfabricFeeder() res = B.save_object(endpoint='workunit', obj={'id': int(sys.argv[1]), 'status': 'available'}) B.print_json(res) diff --git a/bfabric/scripts/bfabric_setWorkunitStatus_failed.py b/bfabric/scripts/bfabric_setWorkunitStatus_failed.py index c71e6ab0..847e0840 100755 --- a/bfabric/scripts/bfabric_setWorkunitStatus_failed.py +++ b/bfabric/scripts/bfabric_setWorkunitStatus_failed.py @@ -19,10 +19,11 @@ from random import randint from time import sleep +import bfabric.wrapper_creator.bfabric_feeder if __name__ == "__main__": if len(sys.argv) > 1: - bfapp = bfabric.BfabricFeeder() + bfapp = bfabric.wrapper_creator.bfabric_feeder.BfabricFeeder() workunitid = int(sys.argv[1]) print("workunitit={}".format(workunitid)) diff --git a/bfabric/scripts/bfabric_setWorkunitStatus_processing.py b/bfabric/scripts/bfabric_setWorkunitStatus_processing.py index 57f4948b..91ff96f1 100755 --- a/bfabric/scripts/bfabric_setWorkunitStatus_processing.py +++ b/bfabric/scripts/bfabric_setWorkunitStatus_processing.py @@ -20,8 +20,10 @@ from random import randint from time import sleep +import bfabric.wrapper_creator.bfabric_feeder + if __name__ == "__main__": if len(sys.argv) > 1: - B = bfabric.BfabricFeeder() + B = bfabric.wrapper_creator.bfabric_feeder.BfabricFeeder() res = B.save_object(endpoint='workunit', obj={'id': int(sys.argv[1]), 'status': 'processing'}) B.print_json(res) diff --git a/bfabric/tests/test_bfabric_functional.py b/bfabric/tests/test_bfabric_functional.py index c29261be..14c44e2f 100755 --- a/bfabric/tests/test_bfabric_functional.py +++ b/bfabric/tests/test_bfabric_functional.py @@ -15,6 +15,8 @@ import logging import time +import bfabric.wrapper_creator.bfabric_submitter +import bfabric.wrapper_creator.bfabric_wrapper_creator logging.basicConfig(filename="test_functional.log", filemode='a', @@ -153,7 +155,7 @@ def test_wrappercreator_submitter(self): ## this information is contained in the application definition try: - W = bfabric.BfabricWrapperCreator(externaljobid=externaljobid_wc) + W = wrapper_creator.bfabric_wrapper_creator.BfabricWrapperCreator(externaljobid=externaljobid_wc) W.write_yaml() # TODO(cp): write getter of execuableid except: @@ -188,7 +190,7 @@ def test_wrappercreator_submitter(self): logging.info("Executing the Submitter executable: function submitter_yaml from BfabricSubmitter") # Submitter executable is supposed to download all workunit executables and submit them. # When finished successfully, the status of its external job is set to done, else to failed. - S = bfabric.BfabricSubmitter(externaljobid=externaljobid_submitter, SCHEDULEROOT="/usr/", scheduler="Slurm") + S = wrapper_creator.bfabric_submitter.BfabricSubmitter(externaljobid=externaljobid_submitter, SCHEDULEROOT="/usr/", scheduler="Slurm") ## this information is contained in the application definition try: S.submitter_yaml() diff --git a/bfabric/wrapper_creator/__init__.py b/bfabric/wrapper_creator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/wrapper_creator/bfabric_external_job.py b/bfabric/wrapper_creator/bfabric_external_job.py new file mode 100644 index 00000000..7d30683c --- /dev/null +++ b/bfabric/wrapper_creator/bfabric_external_job.py @@ -0,0 +1,82 @@ +import json + +from bfabric.bfabric import Bfabric, bfabricEncoder + + +class BfabricExternalJob(Bfabric): + """ + ExternalJobs can use logging. + if you have a valid externaljobid use this class instead of + using Bfabric. + + + TODO check if an external job id is provided + """ + externaljobid = None + + def __init__(self, login=None, password=None, externaljobid=None): + super(BfabricExternalJob, self).__init__(login, password) + if not externaljobid: + print("Error: no externaljobid provided.") + raise + else: + self.externaljobid = externaljobid + + print(("BfabricExternalJob externaljobid={}".format(self.externaljobid))) + + def logger(self, msg): + if self.externaljobid: + super(BfabricExternalJob, self).save_object('externaljob', {'id': self.externaljobid, 'logthis': str(msg)}) + else: + print((str(msg))) + + def save_object(self, endpoint, obj, debug=None): + res = super(BfabricExternalJob, self).save_object(endpoint, obj, debug) + jsonres = json.dumps(res, cls=bfabricEncoder, sort_keys=True, indent=2) + self.logger('saved ' + endpoint + '=' + str(jsonres)) + return res + + def get_workunitid_of_externaljob(self): + print(("DEBUG get_workunitid_of_externaljob self.externaljobid={}".format(self.externaljobid))) + res = self.read_object(endpoint='externaljob', obj={'id': self.externaljobid})[0] + print(res) + print("DEBUG END") + workunit_id = None + try: + workunit_id = res.cliententityid + print(("workunitid={}".format(workunit_id))) + except: + pass + return workunit_id + + def get_application_name(self): + workunitid = self.get_workunitid_of_externaljob() + if workunitid is None: + raise ValueError("no workunit available for the given externaljobid.") + workunit = self.read_object(endpoint='workunit', obj={'id': workunitid})[0] + if workunit is None: + raise ValueError("ERROR: no workunit available for the given externaljobid.") + assert isinstance(workunit._id, int) + application = self.read_object('application', obj={'id': workunit.application._id})[0] + return application.name.replace(' ', '_') + + + def get_executable_of_externaljobid(self): + """ + It takes as input an `externaljobid` and fetches the the `executables` + out of the bfabric system using wsdl into a file. + returns a list of executables. + + todo: this function should check if base64 is provided or + just a program. + """ + workunitid = self.get_workunitid_of_externaljob() + if workunitid is None: + return None + + executables = list() + for executable in self.read_object(endpoint='executable', obj={'workunitid': workunitid}): + if hasattr(executable, 'base64'): + executables.append(executable) + + return executables if len(executables) > 0 else None diff --git a/bfabric/wrapper_creator/bfabric_feeder.py b/bfabric/wrapper_creator/bfabric_feeder.py new file mode 100644 index 00000000..1888807b --- /dev/null +++ b/bfabric/wrapper_creator/bfabric_feeder.py @@ -0,0 +1,48 @@ +import hashlib +import os + +from bfabric import Bfabric + + +class BfabricFeeder(Bfabric): + """ + this class is used for reporting 'resource' status + """ + + def report_resource(self, resourceid): + """ + this function determines the 'md5 checksum', 'the file size', + and set the status of the resource available. + + this is gonna executed on the storage host + + """ + res = self.read_object('resource', {'id': resourceid})[0] + print (res) + + if not hasattr(res, 'storage'): + return -1 + + storage = self.read_object('storage', {'id': res.storage._id})[0] + + filename = "{0}/{1}".format(storage.basepath, res.relativepath) + + if os.path.isfile(filename): + try: + fmd5 = hashlib.md5(open(filename, 'rb').read()).hexdigest() + print ("md5sum ({}) = {}".format(filename, fmd5)) + + fsize = int(os.path.getsize(filename)) + 1 + print ("size ({}) = {}".format(filename, fsize)) + + + return self.save_object('resource', {'id': resourceid, + 'size': fsize, + 'status': 'available', + 'filechecksum': fmd5}) + except: + print ("computing md5 failed") + # print ("{} {}".format(Exception, err)) + raise + + return self.save_object('resource', {'id': resourceid, 'status': 'failed'}) diff --git a/bfabric/wrapper_creator/bfabric_submitter.py b/bfabric/wrapper_creator/bfabric_submitter.py new file mode 100644 index 00000000..0838f05a --- /dev/null +++ b/bfabric/wrapper_creator/bfabric_submitter.py @@ -0,0 +1,281 @@ +import base64 + +import yaml + +import bfabric.gridengine as gridengine +import bfabric.slurm as slurm +from bfabric.wrapper_creator.bfabric_external_job import BfabricExternalJob + + +class BfabricSubmitter: + """ + the class is used by the submitter which is executed by the bfabric system. + """ + + (G, B) = (None, None) + + workunitid = None + workunit = None + parameters = None + execfilelist = [] + slurm_dict = {"MaxQuant_textfiles_sge" : {'partition': "prx", 'nodelist': "fgcz-r-033", 'memory':"1G"}, + "fragpipe" : {'partition': "prx", 'nodelist': "fgcz-r-033", 'memory':"256G"}, + "MaxQuant" : {'partition': "maxquant", 'nodelist': "fgcz-r-033", 'memory':"4G"}, + "scaffold_generic" : {'partition': "scaffold", 'nodelist': "fgcz-r-033", 'memory':"256G"}, + "MSstats dataProcess" : {'partition': "prx", 'nodelist': "fgcz-r-033", 'memory':"64G"}, + "MaxQuant_sampleSizeEstimation" : {'partition': "prx", 'nodelist': "fgcz-r-028", 'memory': "2G"}, + "ProteomeDiscovererQC" : {'partition': "prx", 'nodelist': "fgcz-r-035", 'memory': "2G"} + } + + def __init__(self, login=None, password=None, externaljobid=None, + user='*', node="PRX@fgcz-r-018", partition="prx", nodelist="fgcz-r-028", memory="10G", SCHEDULEROOT='/export/bfabric/bfabric/', scheduler="GridEngine"): + """ + :rtype : object + """ + self.B = BfabricExternalJob(login=login, password=password, externaljobid=externaljobid) + self.partition = partition + self.nodelist = nodelist + self.memory = memory + self.SCHEDULEROOT = SCHEDULEROOT + self.user = user + self.scheduler = scheduler + + print(self.B.auth.login) + print(self.B.externaljobid) + + self.workunitid = self.B.get_workunitid_of_externaljob() + + try: + self.workunit = self.B.read_object(endpoint='workunit', obj={'id': self.workunitid})[0] + except: + print ("ERROR: could not fetch workunit while calling constructor in BfabricSubmitter.") + raise + + + try: + self.parameters = [self.B.read_object(endpoint='parameter', obj={'id': x._id})[0] for x in self.workunit.parameter] + except: + self.parameters = list() + print ("Warning: could not fetch parameter.") + + partition = [x for x in self.parameters if x.key == "partition"] + nodelist = [x for x in self.parameters if x.key == "nodelist"] + memory = [x for x in self.parameters if x.key == "memory"] + application_name = self.B.get_application_name() + + if len(partition) > 0 and len(nodelist) > 0 and len(memory)>0: + self.partition = partition[0].value + self.nodelist = nodelist[0].value + self.memory = memory[0].value + elif "queue" in [x.key for x in self.parameters] and application_name in self.slurm_dict: + # Temporary check for old workunit previously run with SGE + self.partition = self.slurm_dict[application_name]['partition'] + self.nodelist = self.slurm_dict[application_name]['nodelist'] + self.memory = self.slurm_dict[application_name]['memory'] + else: + pass + + print(("partition={0}".format(self.partition))) + print(("nodelist={0}".format(self.nodelist))) + print(("memory={0}".format(self.memory))) + print("__init__ DONE") + + + def submit_gridengine(self, script="/tmp/runme.bash", arguments=""): + + GE = gridengine.GridEngine(user=self.user, queue=self.queue, GRIDENGINEROOT=self.SCHEDULEROOT) + + print(script) + print((type(script))) + resQsub = GE.qsub(script=script, arguments=arguments) + + self.B.logger("{}".format(resQsub)) + + + def submit_slurm(self, script="/tmp/runme.bash", arguments=""): + + SL = slurm.SLURM(user=self.user, SLURMROOT=self.SCHEDULEROOT) + + print(script) + print((type(script))) + resSbatch = SL.sbatch(script=script, arguments=arguments) + + self.B.logger("{}".format(resSbatch)) + + + def compose_bash_script(self, configuration=None, configuration_parser=lambda x: yaml.safe_load(x)): + """ + composes the bash script which is executed by the submitter (sun grid engine). + as argument it takes a configuration file, e.g., yaml, xml, json, or whatsoever, and a parser function. + + it returns a str object containing the code. + + :rtype : str + """ + + + #assert isinstance(configuration, str) + + try: + config = configuration_parser(configuration) + except: + raise ValueError("error: parsing configuration content failed.") + + + _cmd_template = """#!/bin/bash +# Maria d'Errico +# Christian Panse +# 2020-09-28 +# 2020-09-29 +# https://GitHub.com/fgcz/bfabricPy/ +# Slurm +#SBATCH --partition={0} +#SBATCH --nodelist={11} +#SBATCH -n 1 +#SBATCH -N 1 +#SBATCH --cpus-per-task=1 +#SBATCH --mem-per-cpu={12} +#SBATCH -e {1} +#SBATCH -o {2} +#SBATCH --job-name=WU{10} +#SBATCH --workdir=/home/bfabric +#SBATCH --export=ALL,HOME=/home/bfabric + +# Grid Engine Parameters +#$ -q {0}&{11} +#$ -e {1} +#$ -o {2} + + +set -e +set -o pipefail + +export EMAIL="{job_notification_emails}" +export EXTERNALJOB_ID={3} +export RESSOURCEID_OUTPUT={4} +export RESSOURCEID_STDOUT_STDERR="{5} {6}" +export OUTPUT="{7}" +export WORKUNIT_ID="{10}" +STAMP=`/bin/date +%Y%m%d%H%M`.$$.$JOB_ID +TEMPDIR="/home/bfabric/prx" + +_OUTPUT=`echo $OUTPUT | cut -d"," -f1` +test $? -eq 0 && _OUTPUTHOST=`echo $_OUTPUT | cut -d":" -f1` +test $? -eq 0 && _OUTPUTPATH=`echo $_OUTPUT | cut -d":" -f2` +test $? -eq 0 && _OUTPUTPATH=`dirname $_OUTPUTPATH` +test $? -eq 0 && ssh $_OUTPUTHOST "mkdir -p $_OUTPUTPATH" +test $? -eq 0 && echo $$ > $TEMPDIR/$$ +test $? -eq 0 && scp $TEMPDIR/$$ $OUTPUT + +if [ $? -eq 1 ]; +then + echo "writting to output url failed!"; + exit 1; +fi + +# job configuration set by B-Fabrics wrapper_creator executable +# application parameter/configuration +cat > $TEMPDIR/config_WU$WORKUNIT_ID.yaml < $TEMPDIR/$JOB_ID.bash + + (who am i; hostname; uptime; echo $0; pwd; ps;) \ + | mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID" $EMAIL \ + -a $TEMPDIR/$JOB_ID.bash $TEMPDIR/config_WU$WORKUNIT_ID.yaml +fi +# exit 0 + +# run the application +test -f $TEMPDIR/config_WU$WORKUNIT_ID.yaml && {9} $TEMPDIR/config_WU$WORKUNIT_ID.yaml + + +if [ $? -eq 0 ]; +then + ssh fgcz-r-035.uzh.ch "bfabric_setResourceStatus_available.py $RESSOURCEID_OUTPUT" \ + | mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID DONE" $EMAIL + + bfabric_save_workflowstep.py $WORKUNIT_ID + bfabric_setExternalJobStatus_done.py $EXTERNALJOB_ID + bfabric_setWorkunitStatus_available.py $WORKUNIT_ID + echo $? +else + echo "application failed" + mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID failed" $EMAIL < /dev/null + bfabric_setResourceStatus_available.py $RESSOURCEID_STDOUT_STDERR $RESSOURCEID; + exit 1; +fi + +# should be available also as zero byte files +bfabric_setResourceStatus_available.py $RESSOURCEID_STDOUT_STDERR + + +exit 0 +""".format(self.partition, + config['job_configuration']['stderr']['url'], + config['job_configuration']['stdout']['url'], + config['job_configuration']['external_job_id'], + config['job_configuration']['output']['resource_id'], + config['job_configuration']['stderr']['resource_id'], + config['job_configuration']['stdout']['resource_id'], + ",".join(config['application']['output']), + configuration, + config['job_configuration']['executable'], + config['job_configuration']['workunit_id'], + self.nodelist, + self.memory, + job_notification_emails=self.B.config.job_notification_emails) + + return _cmd_template + + + def submitter_yaml(self): + """ + implements the default submitter + + the function fetches the yaml base64 configuration file linked to the external job id out of the B-Fabric + system. Since the file can not be stagged to the LRMS as argument, we copy the yaml file into the bash script + and stage it on execution the application. + + TODO(cp): create the output url before the application is started. + + return None + """ + + # foreach (executable in external job): + for executable in self.B.get_executable_of_externaljobid(): + self.B.logger("executable = {0}".format(executable)) + + try: + content = base64.b64decode(executable.base64.encode()).decode() + except: + raise ValueError("error: decoding executable.base64 failed.") + + + print(content) + _cmd_template = self.compose_bash_script(configuration=content, + configuration_parser=lambda x: yaml.safe_load(x)) + + _bash_script_filename = "/home/bfabric/prx/workunitid-{0}_externaljobid-{1}_executableid-{2}.bash"\ + .format(self.B.get_workunitid_of_externaljob(), self.B.externaljobid, executable._id) + + with open(_bash_script_filename, 'w') as f: + f.write(_cmd_template) + + if self.scheduler=="GridEngine" : + self.submit_gridengine(_bash_script_filename) + else: + self.submit_slurm(_bash_script_filename) + self.execfilelist.append(_bash_script_filename) + + + res = self.B.save_object(endpoint='externaljob', + obj={'id': self.B.externaljobid, 'status': 'done'}) + def get_job_script(self): + return self.execfilelist diff --git a/bfabric/wrapper_creator/bfabric_wrapper_creator.py b/bfabric/wrapper_creator/bfabric_wrapper_creator.py new file mode 100644 index 00000000..3d3e11ca --- /dev/null +++ b/bfabric/wrapper_creator/bfabric_wrapper_creator.py @@ -0,0 +1,315 @@ +import base64 +import datetime +import json +import os + +import yaml + +from bfabric.bfabric import bfabricEncoder +from bfabric.wrapper_creator.bfabric_external_job import BfabricExternalJob + + +class BfabricWrapperCreator(BfabricExternalJob): + """ + the class is used for the wrapper_creator which is executed by the bfabtic system + (non batch) so each resource is processed seperate + """ + + (externaljobid_submitter, workunit_executableid) = (None, None) + + def get_externaljobid_yaml_workunit(self): + return self.externaljobid_yaml_workunit + + def uploadGridEngineScript(self, para={'INPUTHOST': 'fgcz-r-035.uzh.ch'}): + """ + the methode creates and uploads an executebale. + """ + + self.warning( + "This python method is superfluously and will be removed. Please use the write_yaml method of the BfabricWrapperCreato class.") + + _cmd_template = """#!/bin/bash +# $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/bfabric.py $ +# $Id: bfabric.py 3000 2017-08-18 14:18:30Z cpanse $ +# Christian Panse +#$ -q PRX@fgcz-r-028 +#$ -e {1} +#$ -o {2} + +set -e +set -o pipefail + + +# debug +hostname +uptime +echo $0 +pwd + +# variables to be set by the wrapper_creator executable +{0} + + +# create output directory +ssh $SSHARGS $OUTPUTHOST "mkdir -p $OUTPUTPATH" || exit 1 + +# staging input and output data and proc +ssh $SSHARGS $INPUTHOST "cat $INPUTPATH/$INPUTFILE" \\ +| $APPLICATION --inputfile $INPUTFILE --ssh "$OUTPUTHOST:$OUTPUTPATH/$OUTPUTFILE" \\ +&& bfabric_setResourceStatus_available.py $RESSOURCEID \\ +&& bfabric_setExternalJobStatus_done.py $EXTERNALJOBID \\ +|| exit 1 + +exit 0 +""".format("\n".join(sorted(['%s="%s"' % (key, info) for key, info in para.iteritems()])), para['STDERR'], + para['STDOUT']) + + resExecutable = self.save_object('executable', {'name': os.path.basename(para['APPLICATION']) + "_executable", + 'context': 'WORKUNIT', + 'parameter': None, + 'description': "This script should run as 'bfabric' user in the FGCZ compute infrastructure.", + 'workunitid': para['WORKUNITID'], + 'base64': base64.b64encode(_cmd_template), + 'version': 0.2}) + + return (resExecutable) + + def get_executableid(self): + return (self.workunit_executableid) + + def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style=False, encoding=None)): + """ + This method writes all related parameters into a yaml file which is than upload as base64 encoded + file into the b-fabric system. + + if the method does not excepted at the end it reports also the status of the external_job. + + TODO(cp): make this function more generic so that it can also export xml, json, yaml, ... + """ + + # Inherits all parameters of the application executable out of B-Fabric to create an executable script + workunitid = self.get_workunitid_of_externaljob() + + if workunitid is None: + raise ValueError("no workunit available for the given externaljobid.") + + workunit = self.read_object(endpoint='workunit', obj={'id': workunitid})[0] + if workunit is None: + raise ValueError("ERROR: no workunit available for the given externaljobid.") + + assert isinstance(workunit._id, int) + + application = self.read_object('application', obj={'id': workunit.application._id})[0] + # TODO(cp): rename to application_execuatbel + workunit_executable = self.read_object('executable', obj={'id': workunit.applicationexecutable._id})[0] + try: + self.workunit_executableid = workunit_executable._id + except: + self.workunit_executableid = None + + # Get container details + container = workunit.container + fastasequence = "" + if container._classname=="order": + order = self.read_object('order', obj={'id': container._id})[0] + order_id = order._id + if "project" in order: + project_id = order.project._id + else: + project_id = None + if "fastasequence" in order: + fastasequence = "\n".join([x.strip() for x in str(order.fastasequence).split("\r")]) + else: + order_id = None + project_id = container._id + + today = datetime.date.today() + + # merge all information into the executable script + _output_storage = self.read_object('storage', obj={'id': application.storage._id})[0] + + _output_relative_path = "p{0}/bfabric/{1}/{2}/{3}/workunit_{4}/".format( + container._id, + application.technology.replace(' ', '_'), + application.name.replace(' ', '_'), + today.strftime('%Y/%Y-%m/%Y-%m-%d/'), + workunitid) + + # Setup the log_storage to SlurmLog with id 13 + _log_storage = self.read_object('storage', obj={'id': 13})[0] + + #_cmd_applicationList = [workunit_executable.program] + + application_parameter = {} + + if not getattr(workunit, "parameter", None) is None: + for para in workunit.parameter: + parameter = self.read_object('parameter', obj={'id': para._id}) + if parameter: + for p in parameter: + try: + application_parameter["{}".format(p.key)] = "{}".format(p.value) + except: + application_parameter["{}".format(p.key)] = "" + + try: + input_resources = [x._id for x in workunit.inputresource] + input_resources = [self.read_object(endpoint='resource', obj={'id': x})[0] for x in input_resources] + except: + print("no input resources found. continue with empty list.") + input_resources = [] + + + # query all urls and ids of the input resources + resource_urls = dict() + resource_ids = dict() + + for resource_iterator in input_resources: + try: + _appication_id = self.read_object(endpoint='workunit', + obj={'id': resource_iterator.workunit._id})[0].application._id + + _application_name = "{0}".format(self.read_object('application', obj={'id': _appication_id})[0].name) + + _storage = self.read_object('storage', {'id': resource_iterator.storage._id})[0] + + _inputUrl = "bfabric@{0}:/{1}/{2}".format(_storage.host, _storage.basepath, resource_iterator.relativepath) + + if not _application_name in resource_urls: + resource_urls[_application_name] = [] + resource_ids[_application_name] = [] + + resource_urls[_application_name].append(_inputUrl) + + sample_id = self.get_sampleid(int(resource_iterator._id)) + + _resource_sample = {'resource_id': int(resource_iterator._id), + 'resource_url': "{0}/userlab/show-resource.html?id={1}".format(self.config.base_url, resource_iterator._id)} + + + if not sample_id is None: + _resource_sample['sample_id'] = int(sample_id) + _resource_sample['sample_url'] = "{0}/userlab/show-sample.html?id={1}".format(self.config.base_url, sample_id) + + resource_ids[_application_name].append(_resource_sample) + except: + print ("resource_iterator failed. continue ...") + pass + + + # create resources for output, stderr, stdout + _ressource_output = self.save_object('resource', { + 'name': "{0} {1} - resource".format(application.name, len(input_resources)), + 'workunitid': workunit._id, + 'storageid': int(application.storage._id), + 'relativepath': _output_relative_path})[0] + + + print(_ressource_output) + _output_filename = "{0}.{1}".format(_ressource_output._id, application.outputfileformat) + # we want to include the resource._id into the filename + _ressource_output = self.save_object('resource', + {'id': int(_ressource_output._id), + 'relativepath': "{0}/{1}".format(_output_relative_path, _output_filename)})[0] + + print (_ressource_output) + _resource_stderr = self.save_object('resource', { + 'name': 'slurm_stderr', + 'workunitid': int(workunit._id), + 'storageid': _log_storage._id, + 'relativepath': "/workunitid-{0}_resourceid-{1}.err".format(workunit._id, _ressource_output._id)})[0] + + _resource_stdout = self.save_object('resource', { + 'name': 'slurm_stdout', + 'workunitid': workunit._id, + 'storageid': _log_storage._id, + 'relativepath': "/workunitid-{0}_resourceid-{1}.out".format(workunit._id, _ressource_output._id)})[0] + + + # Creates the workunit executable + # The config includes the externaljobid: the yaml_workunit_externaljob has to be created before it. + # The yaml_workunit_externaljob cannot be created without specifying an executableid: + # a yaml_workunit_executable is thus created before the config definition in order to provide + # the correct executableid to the yaml_workunit_externaljob. + # However this yaml_workunit_executable has to be updated later to include 'base64': base64.b64encode(config_serialized.encode()).decode() + yaml_workunit_executable = self.save_object('executable', {'name': 'job configuration (executable) in YAML', + 'context': 'WORKUNIT', + 'workunitid': workunit._id, + 'description': "This is a job configuration as YAML base64 encoded. It is configured to be executed by the B-Fabric yaml submitter."})[0] + print(yaml_workunit_executable) + + yaml_workunit_externaljob = self.save_object('externaljob', + {"workunitid": workunit._id, + 'status': 'new', + 'executableid' : yaml_workunit_executable._id, + 'action': "WORKUNIT"})[0] + print(yaml_workunit_externaljob) + assert isinstance(yaml_workunit_externaljob._id, int) + self.externaljobid_yaml_workunit = int(yaml_workunit_externaljob._id) + print(("XXXXXXX self.externaljobid_yaml_workunit ={} XXXXXXX".format(self.externaljobid_yaml_workunit))) + + _output_url = "bfabric@{0}:{1}{2}/{3}".format(_output_storage.host, + _output_storage.basepath, + _output_relative_path, + _output_filename) + + try: + query_obj = {'id': workunit.inputdataset._id} + inputdataset = self.read_object(endpoint='dataset', obj=query_obj)[0] + inputdataset_json = json.dumps(inputdataset, cls=bfabricEncoder, sort_keys=True, indent=2) + inputdataset = json.loads(inputdataset_json) + except: + inputdataset = None + + # Compose configuration structure + config = { + 'job_configuration': { + 'executable': "{}".format(workunit_executable.program), + 'inputdataset': inputdataset, + 'input': resource_ids, + 'output': { + 'protocol': 'scp', + 'resource_id': int(_ressource_output._id), + 'ssh_args': "-o StrictHostKeyChecking=no -2 -l bfabric -x" + }, + 'stderr': { + 'protocol': 'file', + 'resource_id': int(_resource_stderr._id) , + 'url': "{0}/workunitid-{1}_resourceid-{2}.err".format(_log_storage.basepath, workunit._id, _ressource_output._id) + }, + 'stdout': { + 'protocol': 'file', + 'resource_id': int(_resource_stdout._id), + 'url': "{0}/workunitid-{1}_resourceid-{2}.out".format(_log_storage.basepath, workunit._id, _ressource_output._id) + }, + 'workunit_id': int(workunit._id), + 'workunit_createdby': str(workunit.createdby), + 'workunit_url': "{0}/userlab/show-workunit.html?workunitId={1}".format(self.config.base_url, workunit._id), + 'external_job_id': int(yaml_workunit_externaljob._id), + 'order_id': order_id, + 'project_id': project_id, + 'fastasequence': fastasequence + }, + 'application' : { + 'protocol': 'scp', + 'parameters': application_parameter, + 'input': resource_urls, + 'output': [_output_url] + } + } + + config_serialized = data_serializer(config) + print(config_serialized) + + yaml_workunit_executable = self.save_object('executable', {'id': yaml_workunit_executable._id, + 'base64': base64.b64encode(config_serialized.encode()).decode(), + 'version': "{}".format(10)})[0] + print(yaml_workunit_executable) + + # The WrapperCreator executable is successful, and the status of the its external job is set to done, + # which triggers B-Fabric to create an external job for the submitter executable. + + wrapper_creator_externaljob = self.save_object(endpoint='externaljob', + obj={'id': self.externaljobid, 'status': 'done'}) + + print(("\n\nquery_counter={0}".format(self.query_counter))) From 74c60a0a53be70157ebc7d0ac727dbdf639f5f03 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 3 May 2024 11:54:01 +0200 Subject: [PATCH 069/129] Update script bfabric_list_not_available_proteomics_workunits.py --- ...list_not_available_proteomics_workunits.py | 120 ++++++++++++------ pyproject.toml | 3 +- 2 files changed, 80 insertions(+), 43 deletions(-) diff --git a/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py b/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py index 56e01f49..face1688 100755 --- a/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py +++ b/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py @@ -1,57 +1,93 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- """ Copyright (C) 2023 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. -Lists applications that are not available on bfabric. +Lists proteomics workunits that are not available on bfabric. Author: Christian Panse Licensed under GPL version 3 """ +from __future__ import annotations -import sys -import bfabric - +from argparse import ArgumentParser from datetime import datetime, timedelta +from typing import Any + +from rich.console import Console +from rich.table import Column, Table + +from bfabric.bfabric2 import Bfabric, get_system_auth + + +def render_output(workunits_by_status: dict[str, list[dict[str, Any]]]) -> None: + """Renders the output as a table.""" + table = Table( + Column("AID", no_wrap=True), + Column("WUID", no_wrap=True), + Column("Created", no_wrap=True), + Column("Status", no_wrap=True), + Column("Created by", no_wrap=True, max_width=12), + Column("Name", no_wrap=False), + ) + + for status, workunits in workunits_by_status.items(): + workunits = [ + x for x in workunits if x["createdby"] not in ["gfeeder", "itfeeder"] + ] + status_color = { + "Pending": "yellow", + "Processing": "blue", + "Failed": "red", + }.get(status, "black") + + for wu in workunits: + app_url = f"https://fgcz-bfabric.uzh.ch/bfabric/application/show.html?id={wu['application']['id']}" + wu_url = f"https://fgcz-bfabric.uzh.ch/bfabric/workunit/show.html?id={wu['id']}&tab=details" + table.add_row( + f"[link={app_url}]A{wu['application']['id']:3}[/link]", + f"[link={wu_url}]WU{wu['id']}[/link]", + wu["created"], + f"[{status_color}]{status}[/{status_color}]", + wu["createdby"], + wu["name"], + ) + + console = Console() + console.print(table) + + +def list_not_available_proteomics_workunits(date_cutoff: datetime) -> None: + """Lists proteomics work units that are not available on bfabric.""" + Console(stderr=True).print( + f"--- list not available proteomics work units created after {date_cutoff}---", + style="bright_yellow", + ) + + workunits_by_status = {} + client = Bfabric(*get_system_auth()) + for status in ["Pending", "Processing", "Failed"]: + workunits_by_status[status] = client.read( + endpoint="workunit", + obj={"status": status, "createdafter": date_cutoff}, + ).to_list_dict() + + render_output(workunits_by_status) + + +def main() -> None: + """Parses the command line arguments and calls `list_not_available_proteomics_workunits`.""" + parser = ArgumentParser( + description="Lists proteomics work units that are not available on bfabric." + ) + parser.add_argument( + "--max-age", type=int, help="Max age of work units in days", default=14 + ) + args = parser.parse_args() + date_cutoff = datetime.today() - timedelta(days=args.max_age) + list_not_available_proteomics_workunits(date_cutoff) -def print_color_msg(msg, color="93"): - msg = "\033[{color}m--- {} ---\033[0m\n".format(msg, color=color) - sys.stderr.write(msg) - -def render_output(wu): - wu = list(filter(lambda x: x.createdby not in ["gfeeder", "itfeeder"], wu)) - - cm = {"PENDING" : "\033[33mPending \033[0m", - "PROCESSING": "\033[34mProcessing\033[0m", - "FAILED" : "\033[31mFailed \033[0m"} - - for x in wu: - if x.status in cm: - statuscol = cm[x.status] - else: - statuscol = "\033[36m{} \033[0m".format(x.status) - print("A{aid:3} WU{wuid} {cdate} {status} {createdby:12} {name}" - .format(status = statuscol, - cdate = x.created, - wuid = x._id, - createdby = x.createdby, - name = x.name, - aid = x.application._id)) if __name__ == "__main__": - B = bfabric.Bfabric() - d = datetime.today() - timedelta(days=14) - - print_color_msg("list not available proteomics workunits created after {}".format(d)) - - for status in ['Pending', 'Processing', 'Failed']: - pwu = B.read_object(endpoint = 'workunit', - obj = {'status': status, 'createdafter': d}, - plain = True, - page = 1) - try: - render_output(pwu.workunit) - except: - pass + main() diff --git a/pyproject.toml b/pyproject.toml index 9739f951..3ac6ae7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ "suds >= 1.1.2", "PyYAML >= 6.0", "Flask == 2.2.5", + "rich >= 13.7.1", "zeep >= 4.2.1", "pandas >= 2.2.2" ] @@ -35,7 +36,7 @@ Repository = "https://github.com/fgcz/bfabricPy" #bfabric_flask="bfabric.scripts.bfabric_flask:main" #bfabric_feeder_resource_autoQC="bfabric.scripts.bfabric_feeder_resource_autoQC:main" #bfabric_list_not_existing_storage_directories="bfabric.scripts.bfabric_list_not_existing_storage_directories:main" -#bfabric_list_not_available_proteomics_workunits="bfabric.scripts.bfabric_list_not_available_proteomics_workunits:main" +"bfabric_list_not_available_proteomics_workunits.py"="bfabric.scripts.bfabric_list_not_available_proteomics_workunits:main" #bfabric_upload_resource="bfabric.scripts.bfabric_upload_resource:main" #bfabric_logthis="bfabric.scripts.bfabric_logthis:main" #bfabric_setResourceStatus_available="bfabric.scripts.bfabric_setResourceStatus_available:main" From 1b13f10b6f12eb6552058f9a51ebe4142b80840e Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Fri, 3 May 2024 14:53:37 +0200 Subject: [PATCH 070/129] adapt 'make install' to toml --- Makefile | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index a8837c76..bc423fdc 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -test: test_workunit test_read +test: test_read test_workunit: python3 -m unittest -v bfabric/tests/test_bfabric_workunit.py @@ -6,12 +6,8 @@ test_workunit: test_read: cd bfabric/tests && python3 -m unittest -v test_bfabric_read.py -install: test build - sudo pip3 install dist/bfabric*.gz -e . - -build: clean - python3 setup.py sdist - +install: test + pip install -e . clean: rm -vf dist/* From e0f795998edf11fb510eda0744e3e6ae28487f1d Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 3 May 2024 15:22:32 +0200 Subject: [PATCH 071/129] correct url, better version message --- bfabric/bfabric2.py | 95 +++++++++++++------ ...list_not_available_proteomics_workunits.py | 23 ++--- bfabric/src/cli_formatting.py | 11 +++ 3 files changed, 87 insertions(+), 42 deletions(-) create mode 100644 bfabric/src/cli_formatting.py diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 0ffe92e4..645cf2cf 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """B-Fabric Application Interface using WSDL The code contains classes for wrapper_creator and submitter. @@ -22,21 +20,24 @@ History The python3 library first appeared in 2014. """ - import os -import sys -from pprint import pprint -from enum import Enum from copy import deepcopy +from datetime import datetime +from enum import Enum +from pprint import pprint from typing import Union, List, Optional -from bfabric.src.math_helper import div_int_ceil +from rich.console import Console + +from bfabric import __version__ as PACKAGE_VERSION +from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_config +from bfabric.src.cli_formatting import HostnameHighlighter, DEFAULT_THEME from bfabric.src.engine_suds import EngineSUDS from bfabric.src.engine_zeep import EngineZeep -from bfabric.src.result_container import ResultContainer, BfabricResultType -from bfabric.src.paginator import page_iter, BFABRIC_QUERY_LIMIT -from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_config from bfabric.src.errors import get_response_errors +from bfabric.src.math_helper import div_int_ceil +from bfabric.src.paginator import page_iter, BFABRIC_QUERY_LIMIT +from bfabric.src.result_container import ResultContainer, BfabricResultType class BfabricAPIEngineType(Enum): @@ -91,9 +92,6 @@ def get_system_auth(login: str = None, password: str = None, base_url: str = Non if not auth or not auth.login or not auth.password: raise ValueError("Authentification not initialized but required") - msg = f"\033[93m--- base_url {config.base_url}; login; {auth.login} ---\033[0m\n" - sys.stderr.write(msg) - if verbose: pprint(config) @@ -103,16 +101,20 @@ def get_system_auth(login: str = None, password: str = None, base_url: str = Non # TODO: What does idonly do for SUDS? Does it make sense for Zeep? # TODO: What does includedeletableupdateable do for Zeep? Does it make sense for Suds? # TODO: How to deal with save-skip fields in Zeep? Does it happen in SUDS? -class Bfabric(object): - """B-Fabric python3 module - Implements read and save object methods for B-Fabric wsdl interface - """ - - def __init__(self, config: BfabricConfig, auth: BfabricAuth, - engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, verbose: bool = False): - +class Bfabric: + """Bfabric client class, providing general functionality for interaction with the B-Fabric API.""" + + def __init__( + self, + config: BfabricConfig, + auth: Optional[BfabricAuth], + engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, + verbose: bool = False + ): self.verbose = verbose self.query_counter = 0 + self._config = config + self._auth = auth if engine == BfabricAPIEngineType.SUDS: self.engine = EngineSUDS(auth.login, auth.password, config.base_url) @@ -121,14 +123,24 @@ def __init__(self, config: BfabricConfig, auth: BfabricAuth, self.engine = EngineZeep(auth.login, auth.password, config.base_url) self.result_type = BfabricResultType.LISTZEEP else: - raise ValueError("Unexpected engine", BfabricAPIEngineType) + raise ValueError(f"Unexpected engine: {engine}") - def _read_method(self, readid: bool, endpoint: str, obj: dict, page: int = 1, **kwargs): - if readid: - # https://fgcz-bfabric.uzh.ch/wiki/tiki-index.php?page=endpoint.workunit#Web_Method_readid_ - return self.engine.readid(endpoint, obj, page=page, **kwargs) - else: - return self.engine.read(endpoint, obj, page=page, **kwargs) + if self.verbose: + self.print_version_message() + + @property + def config(self) -> BfabricConfig: + """Returns the config object.""" + return self._config + + @property + def auth(self) -> BfabricAuth: + """Returns the auth object. + :raises ValueError: If authentication is not available + """ + if self._auth is None: + raise ValueError("Authentication not available") + return self._auth def read(self, endpoint: str, obj: dict, max_results: Optional[int] = 100, readid: bool = False, check: bool = True, **kwargs) -> ResultContainer: @@ -141,6 +153,8 @@ def read(self, endpoint: str, obj: dict, max_results: Optional[int] = 100, readi are read or expected number of results has been reached. If None, load all available pages. NOTE: max_results will be rounded upwards to the nearest multiple of BFABRIC_QUERY_LIMIT, because results come in blocks, and there is little overhead to providing results over integer number of pages. + :param offset: the number of elements to skip before starting to return results (useful for pagination, default + is 0 which means no skipping) :param readid: whether to use reading by ID. Currently only available for engine=SUDS TODO: Test the extent to which this method works. Add safeguards :param check: whether to check for errors in the response @@ -197,6 +211,13 @@ def delete(self, endpoint: str, id: Union[List, int], check: bool = True) -> Res result.assert_success() return result + def _read_method(self, readid: bool, endpoint: str, obj: dict, page: int = 1, **kwargs): + if readid: + # https://fgcz-bfabric.uzh.ch/wiki/tiki-index.php?page=endpoint.workunit#Web_Method_readid_ + return self.engine.readid(endpoint, obj, page=page, **kwargs) + else: + return self.engine.read(endpoint, obj, page=page, **kwargs) + ############################ # Multi-query functionality ############################ @@ -296,3 +317,21 @@ def exists(self, endpoint: str, key: str, value: Union[List, Union[int, str]]) - else: return [val in result_vals for val in value] + def get_version_message(self) -> str: + """Returns the version message as a string.""" + year = datetime.now().year + engine_name = self.engine.__class__.__name__ + base_url = self.config.base_url + user_name = f"U={self._auth.login if self._auth else None}" + return ( + f"--- bfabricPy v{PACKAGE_VERSION} ({engine_name}, {base_url}, {user_name}) ---\n" + f"--- Copyright (C) 2014-{year} Functional Genomics Center Zurich ---" + ) + + def print_version_message(self, stderr: bool = True) -> None: + """Prints the version message to the console. + :param stderr: Whether to print to stderr (True, default) or stdout (False) + """ + console = Console(stderr=stderr, highlighter=HostnameHighlighter(), theme=DEFAULT_THEME) + console.print(self.get_version_message(), style="bright_yellow") + diff --git a/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py b/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py index face1688..51c7d137 100755 --- a/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py +++ b/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py @@ -18,10 +18,11 @@ from rich.console import Console from rich.table import Column, Table +from bfabric import BfabricConfig from bfabric.bfabric2 import Bfabric, get_system_auth -def render_output(workunits_by_status: dict[str, list[dict[str, Any]]]) -> None: +def render_output(workunits_by_status: dict[str, list[dict[str, Any]]], config: BfabricConfig) -> None: """Renders the output as a table.""" table = Table( Column("AID", no_wrap=True), @@ -33,9 +34,7 @@ def render_output(workunits_by_status: dict[str, list[dict[str, Any]]]) -> None: ) for status, workunits in workunits_by_status.items(): - workunits = [ - x for x in workunits if x["createdby"] not in ["gfeeder", "itfeeder"] - ] + workunits = [x for x in workunits if x["createdby"] not in ["gfeeder", "itfeeder"]] status_color = { "Pending": "yellow", "Processing": "blue", @@ -43,8 +42,8 @@ def render_output(workunits_by_status: dict[str, list[dict[str, Any]]]) -> None: }.get(status, "black") for wu in workunits: - app_url = f"https://fgcz-bfabric.uzh.ch/bfabric/application/show.html?id={wu['application']['id']}" - wu_url = f"https://fgcz-bfabric.uzh.ch/bfabric/workunit/show.html?id={wu['id']}&tab=details" + app_url = f"{config.base_url}/application/show.html?id={wu['application']['id']}" + wu_url = f"{config.base_url}/workunit/show.html?id={wu['id']}&tab=details" table.add_row( f"[link={app_url}]A{wu['application']['id']:3}[/link]", f"[link={wu_url}]WU{wu['id']}[/link]", @@ -60,30 +59,26 @@ def render_output(workunits_by_status: dict[str, list[dict[str, Any]]]) -> None: def list_not_available_proteomics_workunits(date_cutoff: datetime) -> None: """Lists proteomics work units that are not available on bfabric.""" + client = Bfabric(*get_system_auth(), verbose=True) Console(stderr=True).print( f"--- list not available proteomics work units created after {date_cutoff}---", style="bright_yellow", ) workunits_by_status = {} - client = Bfabric(*get_system_auth()) for status in ["Pending", "Processing", "Failed"]: workunits_by_status[status] = client.read( endpoint="workunit", obj={"status": status, "createdafter": date_cutoff}, ).to_list_dict() - render_output(workunits_by_status) + render_output(workunits_by_status, config=client.config) def main() -> None: """Parses the command line arguments and calls `list_not_available_proteomics_workunits`.""" - parser = ArgumentParser( - description="Lists proteomics work units that are not available on bfabric." - ) - parser.add_argument( - "--max-age", type=int, help="Max age of work units in days", default=14 - ) + parser = ArgumentParser(description="Lists proteomics work units that are not available on bfabric.") + parser.add_argument("--max-age", type=int, help="Max age of work units in days", default=14) args = parser.parse_args() date_cutoff = datetime.today() - timedelta(days=args.max_age) list_not_available_proteomics_workunits(date_cutoff) diff --git a/bfabric/src/cli_formatting.py b/bfabric/src/cli_formatting.py new file mode 100644 index 00000000..aa41f0a7 --- /dev/null +++ b/bfabric/src/cli_formatting.py @@ -0,0 +1,11 @@ +from rich.highlighter import RegexHighlighter +from rich.theme import Theme + + +class HostnameHighlighter(RegexHighlighter): + """Highlights hostnames in URLs.""" + base_style = "bfabric." + highlights = [r"https://(?P[^.]+)"] + + +DEFAULT_THEME = Theme({"bfabric.hostname": "bold red"}) From 04b9252583ec3034c03ab4d8e7fc32721e6f91ea Mon Sep 17 00:00:00 2001 From: Christian Panse Date: Tue, 7 May 2024 09:25:18 +0200 Subject: [PATCH 072/129] cosmetics Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bc423fdc..ea370964 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ test_workunit: test_read: cd bfabric/tests && python3 -m unittest -v test_bfabric_read.py -install: test +install: pip install -e . clean: From 76daee1d7343173428a6e3519e22d2218afc9d7a Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 7 May 2024 10:03:37 +0200 Subject: [PATCH 073/129] Extend PR Checks (#83) - Use `uv` - Adds: - Code style (does not need to pass) - License check --- .github/actions/setup-bfabricpy/action.yml | 17 +++++++++++ .github/workflows/run_unit_tests.yml | 33 +++++++++++++++++----- Makefile | 5 ++++ pyproject.toml | 23 +++++++++++++++ 4 files changed, 71 insertions(+), 7 deletions(-) create mode 100644 .github/actions/setup-bfabricpy/action.yml diff --git a/.github/actions/setup-bfabricpy/action.yml b/.github/actions/setup-bfabricpy/action.yml new file mode 100644 index 00000000..0543a337 --- /dev/null +++ b/.github/actions/setup-bfabricpy/action.yml @@ -0,0 +1,17 @@ +name: "Setup bfabricPy" +inputs: + python-version: + description: "Python version to use" + required: true +runs: + using: "composite" + steps: + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + - name: Install bfabricPy + shell: bash + run: | + pip install uv + uv pip install --system ".[dev]" diff --git a/.github/workflows/run_unit_tests.yml b/.github/workflows/run_unit_tests.yml index 85543fd4..aebbd7ba 100644 --- a/.github/workflows/run_unit_tests.yml +++ b/.github/workflows/run_unit_tests.yml @@ -1,4 +1,4 @@ -name: unit tests +name: PR Checks on: push: @@ -8,15 +8,34 @@ on: workflow_dispatch: jobs: - build: + unit_tests: + name: Unit Tests runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 + - uses: ./.github/actions/setup-bfabricpy with: python-version: 3.9 - - name: Install bfabricPy - run: python -m pip install . - name: Run unit tests run: python -m unittest discover -s bfabric/tests/unit -p 'test_*.py' + code_style: + name: Code Style + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/setup-bfabricpy + with: + python-version: 3.9 + - name: Check code with ruff + run: + ruff bfabric || true + license_check: + name: License Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/setup-bfabricpy + with: + python-version: 3.9 + - name: Check licenses + run: licensecheck diff --git a/Makefile b/Makefile index ea370964..fa96778b 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,5 @@ +.PHONY: install install_dev build clean + test: test_read test_workunit: @@ -9,6 +11,9 @@ test_read: install: pip install -e . +install_dev: + pip install -e ".[dev]" + clean: rm -vf dist/* diff --git a/pyproject.toml b/pyproject.toml index 3ac6ae7c..843b1844 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,14 @@ dependencies = [ "pandas >= 2.2.2" ] +[project.optional-dependencies] +dev = [ + "black", + "isort", + "ruff", + "licensecheck" +] + [project.urls] Homepage = "https://github.com/fgcz/bfabricPy" Repository = "https://github.com/fgcz/bfabricPy" @@ -56,3 +64,18 @@ Repository = "https://github.com/fgcz/bfabricPy" #bfabric_save_resource="bfabric.scripts.bfabric_save_resource:main" #bfabric_save_workunit_attribute="bfabric.scripts.bfabric_save_workunit_attribute:main" #bfabric_save_workflowstep="bfabric.scripts.bfabric_save_workflowstep:main" + +[tool.black] +line-length = 120 +target-version = ["py39"] + +[tool.ruff] +line-length = 120 +indent-width = 4 +target-version = "py39" + +[tool.ruff.lint] +select = ["D103", "E", "F", "ANN", "PTH", "UP", "BLE", "SIM"] + +[tool.licensecheck] +using = "PEP631" From 2afcd0ccc260e4612a9ab231b58787af61add71b Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 7 May 2024 11:28:41 +0200 Subject: [PATCH 074/129] Move auth logic to Bfabric class (#79) - Make managing the `BfabricAuth` object the responsibility of the `Bfabric` class. This allows us to implement the contextmanager, which will be useful for the REST-proxy server. - Notably this shouldn't change anything about code that already uses the new class. - Rename `BfabricConfig.with_overrides` to `BfabricConfig.copy_with` for clarity. - Some unit tests for the Bfabric class, I will add more as I go. --- bfabric/bfabric2.py | 37 +++++++--- bfabric/bfabric_config.py | 4 +- bfabric/src/engine_suds.py | 89 ++++++++++++----------- bfabric/src/engine_zeep.py | 19 +++-- bfabric/tests/unit/test_bfabric.py | 59 +++++++++++++++ bfabric/tests/unit/test_bfabric_config.py | 8 +- 6 files changed, 145 insertions(+), 71 deletions(-) create mode 100644 bfabric/tests/unit/test_bfabric.py diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 645cf2cf..07ecaa62 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -21,6 +21,8 @@ The python3 library first appeared in 2014. """ import os +import sys +from contextlib import contextmanager from copy import deepcopy from datetime import datetime from enum import Enum @@ -45,13 +47,12 @@ class BfabricAPIEngineType(Enum): ZEEP = 2 -def get_system_auth(login: str = None, password: str = None, base_url: str = None, externaljobid=None, +def get_system_auth(login: str = None, password: str = None, base_url: str = None, config_path: str = None, config_env: str = None, optional_auth: bool = False, verbose: bool = False): """ :param login: Login string for overriding config file :param password: Password for overriding config file :param base_url: Base server url for overriding config file - :param externaljobid: ? :param config_path: Path to the config file, in case it is different from default :param config_env: Which config environment to use. Can also specify via environment variable or use default in the config file (at your own risk) @@ -78,7 +79,7 @@ def get_system_auth(login: str = None, password: str = None, base_url: str = Non # Load config from file, override some of the fields with the provided ones else: config, auth = read_config(config_path, config_env=config_env, optional_auth=optional_auth) - config = config.with_overrides(base_url=base_url) + config = config.copy_with(base_url=base_url) if (login is not None) and (password is not None): auth = BfabricAuth(login=login, password=password) elif (login is None) and (password is None): @@ -117,10 +118,10 @@ def __init__( self._auth = auth if engine == BfabricAPIEngineType.SUDS: - self.engine = EngineSUDS(auth.login, auth.password, config.base_url) + self.engine = EngineSUDS(base_url=config.base_url) self.result_type = BfabricResultType.LISTSUDS elif engine == BfabricAPIEngineType.ZEEP: - self.engine = EngineZeep(auth.login, auth.password, config.base_url) + self.engine = EngineZeep(base_url=config.base_url) self.result_type = BfabricResultType.LISTZEEP else: raise ValueError(f"Unexpected engine: {engine}") @@ -142,11 +143,23 @@ def auth(self) -> BfabricAuth: raise ValueError("Authentication not available") return self._auth + @contextmanager + def with_auth(self, auth: BfabricAuth): + """Context manager that temporarily (within the scope of the context) sets the authentication for + the Bfabric object to the provided value. This is useful when authenticating multiple users, to avoid accidental + use of the wrong credentials. + """ + old_auth = self._auth + self._auth = auth + try: + yield + finally: + self._auth = old_auth + def read(self, endpoint: str, obj: dict, max_results: Optional[int] = 100, readid: bool = False, check: bool = True, **kwargs) -> ResultContainer: - """ - Make a read query to the engine. Determine the number of pages. Make calls for every page, concatenate - results. + """Reads objects from the specified endpoint that match all specified attributes in `obj`. + By setting `max_results` it is possible to change the number of results that are returned. :param endpoint: endpoint :param obj: query dictionary :param max_results: cap on the number of results to query. The code will keep reading pages until all pages @@ -198,14 +211,14 @@ def read(self, endpoint: str, obj: dict, max_results: Optional[int] = 100, readi return result def save(self, endpoint: str, obj: dict, check: bool = True, **kwargs) -> ResultContainer: - results = self.engine.save(endpoint, obj, **kwargs) + results = self.engine.save(endpoint, obj, auth=self.auth, **kwargs) result = ResultContainer(results[endpoint], self.result_type, errors=get_response_errors(results, endpoint)) if check: result.assert_success() return result def delete(self, endpoint: str, id: Union[List, int], check: bool = True) -> ResultContainer: - results = self.engine.delete(endpoint, id) + results = self.engine.delete(endpoint, id, auth=self.auth) result = ResultContainer(results[endpoint], self.result_type, errors=get_response_errors(results, endpoint)) if check: result.assert_success() @@ -214,9 +227,9 @@ def delete(self, endpoint: str, id: Union[List, int], check: bool = True) -> Res def _read_method(self, readid: bool, endpoint: str, obj: dict, page: int = 1, **kwargs): if readid: # https://fgcz-bfabric.uzh.ch/wiki/tiki-index.php?page=endpoint.workunit#Web_Method_readid_ - return self.engine.readid(endpoint, obj, page=page, **kwargs) + return self.engine.readid(endpoint, obj, auth=self.auth, page=page, **kwargs) else: - return self.engine.read(endpoint, obj, page=page, **kwargs) + return self.engine.read(endpoint, obj, auth=self.auth, page=page, **kwargs) ############################ # Multi-query functionality diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index c5ae3b17..50b27ab2 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -25,7 +25,7 @@ def __str__(self): class BfabricConfig: """Holds the configuration for the B-Fabric client for connecting to particular instance of B-Fabric. - Attributes: + Parameters: base_url (optional): The API base url application_ids (optional): Map of application names to ids. job_notification_emails (optional): Space-separated list of email addresses to notify when a job finishes. @@ -56,7 +56,7 @@ def job_notification_emails(self) -> str: """Space-separated list of email addresses to notify when a job finishes.""" return self._job_notification_emails - def with_overrides( + def copy_with( self, base_url: Optional[str] = None, application_ids: Optional[Dict[str, int]] = None, diff --git a/bfabric/src/engine_suds.py b/bfabric/src/engine_suds.py index b04d9b33..a3148b05 100644 --- a/bfabric/src/engine_suds.py +++ b/bfabric/src/engine_suds.py @@ -1,71 +1,74 @@ -from typing import Union, List +from __future__ import annotations + import copy +from typing import Any -from suds.client import Client from suds import MethodNotFound +from suds.client import Client +from suds.serviceproxy import ServiceProxy +from bfabric.bfabric_config import BfabricAuth from bfabric.src.errors import BfabricRequestError class EngineSUDS: """B-Fabric API SUDS Engine""" - def __init__(self, login: str, password: str, base_url: str): + def __init__(self, base_url: str) -> None: self.cl = {} - self.login = login - self.password = password self.base_url = base_url - def _get_client(self, endpoint: str): - try: - if endpoint not in self.cl: - wsdl = "".join((self.base_url, '/', endpoint, "?wsdl")) - self.cl[endpoint] = Client(wsdl, cache=None) - return self.cl[endpoint] - except Exception as e: - print(e) - raise - - def read(self, endpoint: str, obj: dict, page: int = 1, idonly: bool = False, - includedeletableupdateable: bool = False): - """ - A generic method which can connect to any endpoint, e.g., workunit, project, order, - externaljob, etc, and returns the object with the requested id. - obj is a python dictionary which contains all the attributes of the endpoint - for the "query". + def read( + self, + endpoint: str, + obj: dict[str, Any], + auth: BfabricAuth, + page: int = 1, + idonly: bool = False, + includedeletableupdateable: bool = False, + ): + """Reads the requested `obj` from `endpoint`. + :param endpoint: the endpoint to read, e.g. `workunit`, `project`, `order`, `externaljob`, etc. + :param obj: a python dictionary which contains all the attribute values that have to match + :param auth: the authentication handle of the user performing the request + :param page: the page number to read + :param idonly: whether to return only the ids of the objects + :param includedeletableupdateable: TODO """ query = copy.deepcopy(obj) - query['includedeletableupdateable'] = includedeletableupdateable - - full_query = dict(login=self.login, page=page, password=self.password, query=query, - idonly=idonly) + query["includedeletableupdateable"] = includedeletableupdateable - client = self._get_client(endpoint) - return client.service.read(full_query) + full_query = dict(login=auth.login, page=page, password=auth.password, query=query, idonly=idonly) + service = self._get_suds_service(endpoint) + return service.read(full_query) # TODO: How is client.service.readid different from client.service.read. Do we need this method? - def readid(self, endpoint: str, obj: dict, page: int = 1): - query = dict(login=self.login, page=page, password=self.password, query=obj) - - client = self._get_client(endpoint) - return client.service.readid(query) - - def save(self, endpoint: str, obj: dict): - query = {'login': self.login, 'password': self.password, endpoint: obj} - - client = self._get_client(endpoint) + def readid(self, endpoint: str, obj: dict, auth: BfabricAuth, page: int = 1): + query = dict(login=auth.login, page=page, password=auth.password, query=obj) + service = self._get_suds_service(endpoint) + return service.readid(query) + + def save(self, endpoint: str, obj: dict, auth: BfabricAuth): + query = {"login": auth.login, "password": auth.password, endpoint: obj} + service = self._get_suds_service(endpoint) try: - res = client.service.save(query) + res = service.save(query) except MethodNotFound as e: raise BfabricRequestError(f"SUDS failed to find save method for the {endpoint} endpoint.") from e return res - def delete(self, endpoint: str, id: Union[int, List]): + def delete(self, endpoint: str, id: int | list[int], auth: BfabricAuth): if isinstance(id, list) and len(id) == 0: print("Warning, attempted to delete an empty list, ignoring") return [] - query = {'login': self.login, 'password': self.password, 'id': id} + query = {"login": auth.login, "password": auth.password, "id": id} + service = self._get_suds_service(endpoint) + return service.delete(query) - client = self._get_client(endpoint) - return client.service.delete(query) + def _get_suds_service(self, endpoint: str) -> ServiceProxy: + """Returns a SUDS service for the given endpoint. Reuses existing instances when possible.""" + if endpoint not in self.cl: + wsdl = "".join((self.base_url, "/", endpoint, "?wsdl")) + self.cl[endpoint] = Client(wsdl, cache=None) + return self.cl[endpoint].service diff --git a/bfabric/src/engine_zeep.py b/bfabric/src/engine_zeep.py index 88413f2d..57e628ed 100644 --- a/bfabric/src/engine_zeep.py +++ b/bfabric/src/engine_zeep.py @@ -3,6 +3,7 @@ import zeep import copy +from bfabric.bfabric_config import BfabricAuth from bfabric.src.errors import BfabricRequestError @@ -28,10 +29,8 @@ def _zeep_query_append_skipped(query: dict, skipped_keys: list, inplace: bool = class EngineZeep: """B-Fabric API Zeep Engine""" - def __init__(self, login: str, password: str, base_url: str): + def __init__(self, base_url: str): self.cl = {} - self.login = login - self.password = password self.base_url = base_url def _get_client(self, endpoint: str): @@ -44,7 +43,7 @@ def _get_client(self, endpoint: str): print(e) raise - def read(self, endpoint: str, obj: dict, page: int = 1, idonly: bool = False, + def read(self, endpoint: str, obj: dict, auth: BfabricAuth, page: int = 1, idonly: bool = False, includedeletableupdateable: bool = False): query = copy.deepcopy(obj) query['includedeletableupdateable'] = includedeletableupdateable @@ -55,17 +54,17 @@ def read(self, endpoint: str, obj: dict, page: int = 1, idonly: bool = False, 'includechildren', 'includeparents', 'includereplacements'] _zeep_query_append_skipped(query, excl_keys, inplace=True, overwrite=False) - full_query = dict(login=self.login, page=page, password=self.password, query=query, idonly=idonly) + full_query = dict(login=auth.login, page=page, password=auth.password, query=query, idonly=idonly) client = self._get_client(endpoint) with client.settings(strict=False, xml_huge_tree=True, xsd_ignore_sequence_order=True): return client.service.read(full_query) - def readid(self, endpoint: str, obj: dict, page: int = 1, includedeletableupdateable: bool = True): + def readid(self, endpoint: str, obj: dict, auth: BfabricAuth, page: int = 1, includedeletableupdateable: bool = True): raise NotImplementedError("Attempted to use a method `readid` of Zeep, which does not exist") - def save(self, endpoint: str, obj: dict, skipped_keys: list = None): - query = {'login': self.login, 'password': self.password, endpoint: obj} + def save(self, endpoint: str, obj: dict, auth: BfabricAuth, skipped_keys: list = None): + query = {'login': auth.login, 'password': auth.password, endpoint: obj} # If necessary, add skipped keys to the query if skipped_keys is not None: @@ -82,12 +81,12 @@ def save(self, endpoint: str, obj: dict, skipped_keys: list = None): raise e return res - def delete(self, endpoint: str, id: Union[int, List]): + def delete(self, endpoint: str, id: Union[int, List], auth: BfabricAuth): if isinstance(id, list) and len(id) == 0: print("Warning, attempted to delete an empty list, ignoring") return [] - query = {'login': self.login, 'password': self.password, 'id': id} + query = {'login': auth.login, 'password': auth.password, 'id': id} client = self._get_client(endpoint) return client.service.delete(query) diff --git a/bfabric/tests/unit/test_bfabric.py b/bfabric/tests/unit/test_bfabric.py new file mode 100644 index 00000000..23a2b178 --- /dev/null +++ b/bfabric/tests/unit/test_bfabric.py @@ -0,0 +1,59 @@ +import unittest +from functools import cached_property +from unittest.mock import MagicMock + +from bfabric import BfabricConfig +from bfabric.bfabric2 import BfabricAPIEngineType, Bfabric +from bfabric.src.engine_suds import EngineSUDS + + +class TestBfabric(unittest.TestCase): + def setUp(self): + self.mock_config = MagicMock(name="mock_config", spec=BfabricConfig) + self.mock_auth = None + self.mock_engine_type = BfabricAPIEngineType.SUDS + self.mock_engine = MagicMock(name="mock_engine", spec=EngineSUDS) + + @cached_property + def mock_bfabric(self) -> Bfabric: + return Bfabric(config=self.mock_config, auth=self.mock_auth, engine=self.mock_engine_type) + + def test_query_counter(self): + self.assertEqual(0, self.mock_bfabric.query_counter) + + def test_config(self): + self.assertEqual(self.mock_config, self.mock_bfabric.config) + + def test_auth_when_missing(self): + with self.assertRaises(ValueError) as error: + _ = self.mock_bfabric.auth + self.assertIn("Authentication not available", str(error.exception)) + + def test_auth_when_provided(self): + self.mock_auth = MagicMock(name="mock_auth") + self.assertEqual(self.mock_auth, self.mock_bfabric.auth) + + def test_with_auth(self): + mock_old_auth = MagicMock(name="mock_old_auth") + mock_new_auth = MagicMock(name="mock_new_auth") + self.mock_auth = mock_old_auth + with self.mock_bfabric.with_auth(mock_new_auth): + self.assertEqual(mock_new_auth, self.mock_bfabric.auth) + self.assertEqual(mock_old_auth, self.mock_bfabric.auth) + + def test_with_auth_when_exception(self): + mock_old_auth = MagicMock(name="mock_old_auth") + mock_new_auth = MagicMock(name="mock_new_auth") + self.mock_auth = mock_old_auth + try: + with self.mock_bfabric.with_auth(mock_new_auth): + raise ValueError("Test exception") + except ValueError: + pass + self.assertEqual(mock_old_auth, self.mock_bfabric.auth) + + # TODO further unit tests + + +if __name__ == "__main__": + unittest.main() diff --git a/bfabric/tests/unit/test_bfabric_config.py b/bfabric/tests/unit/test_bfabric_config.py index c0389ac8..c51c802a 100644 --- a/bfabric/tests/unit/test_bfabric_config.py +++ b/bfabric/tests/unit/test_bfabric_config.py @@ -37,8 +37,8 @@ def test_default_params_when_specified(self): self.assertEqual({}, config.application_ids) self.assertEqual("", config.job_notification_emails) - def test_with_overrides(self): - new_config = self.config.with_overrides( + def test_copy_with_overrides(self): + new_config = self.config.copy_with( base_url="new_url", application_ids={"new": 2}, ) @@ -47,8 +47,8 @@ def test_with_overrides(self): self.assertEqual("url", self.config.base_url) self.assertEqual({"app": 1}, self.config.application_ids) - def test_with_replaced_when_none(self): - new_config = self.config.with_overrides(base_url=None, application_ids=None) + def test_copy_with_replaced_when_none(self): + new_config = self.config.copy_with(base_url=None, application_ids=None) self.assertEqual("url", new_config.base_url) self.assertEqual({"app": 1}, new_config.application_ids) self.assertEqual("url", self.config.base_url) From 8f5635b34602b9576af8037dde4628053a20f3f0 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 7 May 2024 12:02:09 +0200 Subject: [PATCH 075/129] add config field server_timezone (#84) --- bfabric/bfabric_config.py | 127 +++++++++++++--------- bfabric/src/errors.py | 17 ++- bfabric/tests/unit/example_config.yml | 1 + bfabric/tests/unit/test_bfabric_config.py | 30 +++-- 4 files changed, 107 insertions(+), 68 deletions(-) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index 50b27ab2..b57dc86b 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -1,12 +1,14 @@ from __future__ import annotations +import dataclasses import logging import os -from typing import Optional, Dict, Tuple, Union -import dataclasses -import yaml from pathlib import Path +import yaml + +from bfabric.src.errors import BfabricConfigError + @dataclasses.dataclass(frozen=True) class BfabricAuth: @@ -15,10 +17,10 @@ class BfabricAuth: login: str password: str - def __repr__(self): + def __repr__(self) -> str: return f"BfabricAuth(login={repr(self.login)}, password=...)" - def __str__(self): + def __str__(self) -> str: return repr(self) @@ -29,17 +31,20 @@ class BfabricConfig: base_url (optional): The API base url application_ids (optional): Map of application names to ids. job_notification_emails (optional): Space-separated list of email addresses to notify when a job finishes. + server_timezone (optional): Timezone name of the server (used for queries) """ def __init__( self, - base_url: Optional[str] = None, - application_ids: Optional[Dict[str, int]] = None, - job_notification_emails: Optional[str] = None - ): + base_url: str | None = None, + application_ids: dict[str, int] = None, + job_notification_emails: str | None = None, + server_timezone: str = "Europe/Zurich", + ) -> None: self._base_url = base_url or "https://fgcz-bfabric.uzh.ch/bfabric" self._application_ids = application_ids or {} self._job_notification_emails = job_notification_emails or "" + self._server_timezone = server_timezone @property def base_url(self) -> str: @@ -47,7 +52,7 @@ def base_url(self) -> str: return self._base_url @property - def application_ids(self) -> Dict[str, int]: + def application_ids(self) -> dict[str, int]: """Map of known application names to ids.""" return self._application_ids @@ -56,26 +61,33 @@ def job_notification_emails(self) -> str: """Space-separated list of email addresses to notify when a job finishes.""" return self._job_notification_emails + @property + def server_timezone(self) -> str: + """Timezone name of the server (used for queries).""" + return self._server_timezone + def copy_with( self, - base_url: Optional[str] = None, - application_ids: Optional[Dict[str, int]] = None, + base_url: str | None = None, + application_ids: dict[str, int] | None = None, ) -> BfabricConfig: """Returns a copy of the configuration with new values applied, if they are not None.""" return BfabricConfig( base_url=base_url if base_url is not None else self.base_url, - application_ids=application_ids - if application_ids is not None - else self.application_ids, + application_ids=(application_ids if application_ids is not None else self.application_ids), + job_notification_emails=self.job_notification_emails, + server_timezone=self.server_timezone, ) - def __repr__(self): + def __repr__(self) -> str: return ( f"BfabricConfig(base_url={repr(self.base_url)}, application_ids={repr(self.application_ids)}, " - f"job_notification_emails={repr(self.job_notification_emails)})" + f"job_notification_emails={repr(self.job_notification_emails)}, " + f"server_timezone={repr(self.server_timezone)})" ) -def _read_config_env_as_dict(config_path: Union[str, Path], config_env: str = None) -> Tuple[str, dict]: + +def _read_config_env_as_dict(config_path: Path, config_env: str | None = None) -> tuple[str, dict]: """ Reads and partially parses a bfabricpy.yml file :param config_path: Path to the configuration file. It is assumed that it exists @@ -86,39 +98,50 @@ def _read_config_env_as_dict(config_path: Union[str, Path], config_env: str = No logger = logging.getLogger(__name__) logger.info(f"Reading configuration from: {config_path}") - if os.path.splitext(config_path)[1] != '.yml': - raise IOError(f"Expected config file with .yml extension, got {config_path}") + if config_path.suffix != ".yml": + raise OSError(f"Expected config file with .yml extension, got {config_path}") # Read the config file - config_dict = yaml.safe_load(Path(config_path).read_text()) + config_dict = yaml.safe_load(config_path.read_text()) - if "GENERAL" not in config_dict: - raise IOError("Config file must have a general section") - if 'default_config' not in config_dict['GENERAL']: - raise IOError("Config file must provide a default environment") - config_env_default = config_dict['GENERAL']['default_config'] + if "default_config" not in config_dict.get("GENERAL", {}): + raise BfabricConfigError("Config file must provide a `default_config` in the `GENERAL` section") + config_env_default = config_dict["GENERAL"]["default_config"] # Determine which environment we will use # By default, use the one provided by config_env - if config_env is None: - # Try to find a relevant + config_env = _select_config_env( + explicit_config_env=config_env, config_file_default_config=config_env_default, logger=logger + ) + if config_env not in config_dict: + raise BfabricConfigError(f"The requested config environment {config_env} is not present in the config file") + + return config_env, config_dict[config_env] + + +def _select_config_env(explicit_config_env: str | None, config_file_default_config: str, logger: logging.Logger) -> str: + """Selects the appropriate configuration environment to use, based on the provided arguments. + :param explicit_config_env: Explicitly provided configuration environment to use (i.e. from a function argument) + :param config_file_default_config: Default configuration environment to use, as specified in the config file + :param logger: Logger to use for output + """ + if explicit_config_env is None: config_env = os.getenv("BFABRICPY_CONFIG_ENV") if config_env is None: - logger.info(f"BFABRICPY_CONFIG_ENV not found, using default environment {config_env_default}") - config_env = config_env_default + logger.info(f"BFABRICPY_CONFIG_ENV not found, using default environment {config_file_default_config}") + config_env = config_file_default_config else: logger.info(f"found BFABRICPY_CONFIG_ENV = {config_env}") else: + config_env = explicit_config_env logger.info(f"config environment specified explicitly as {config_env}") + return config_env - if config_env not in config_dict: - raise IOError(f"The requested config environment {config_env} is not present in the config file") - return config_env, config_dict[config_env] +def _have_all_keys(dict_: dict, expected_keys: list) -> bool: + """Returns True if all elements in list l are present as keys in dict d, otherwise false""" + return all(k in dict_ for k in expected_keys) -def _have_all_keys(d: dict, l: list) -> bool: - """True if all elements in list l are present as keys in dict d, otherwise false""" - return all([k in d for k in l]) def _parse_dict(d: dict, mandatory_keys: list, optional_keys: list = None, error_prefix: str = " ") -> dict: """ @@ -132,27 +155,27 @@ def _parse_dict(d: dict, mandatory_keys: list, optional_keys: list = None, error """ missing_keys = set(mandatory_keys) - set(d) if missing_keys: - raise ValueError(f"{error_prefix}{missing_keys}") + raise BfabricConfigError(f"{error_prefix}{missing_keys}") result_keys = set(mandatory_keys) | set(optional_keys or []) d_rez = {k: d[k] for k in result_keys if k in d} # Ignore all other fields return d_rez -def read_config(config_path: Union[str, Path], config_env: str = None, - optional_auth: bool = False) -> Tuple[BfabricConfig, Optional[BfabricAuth]]: + +def read_config( + config_path: str | Path, + config_env: str = None, +) -> tuple[BfabricConfig, BfabricAuth | None]: """ Reads bfabricpy.yml file, parses it, extracting authentication and configuration data :param config_path: Path to the configuration file. It is assumed the file exists :param config_env: Configuration environment to use. If not given, it is deduced. - :param optional_auth: Whether authentication is optional. - If not, both login and password must be present in the config file, otherwise an exception is thrown - If yes, missing login and password would result in authentication class being None, but no exception :return: Configuration and Authentication class instances NOTE: BFabricPy expects a .bfabricpy.yml of the format, as seen in bfabricPy/tests/unit/example_config.yml * The general field always has to be present - * There may be any number of environments, and they may have arbitrary names. Here, they are called PRODUCTION and TEST + * There may be any number of environments, with arbitrary names. Here, they are called PRODUCTION and TEST * Must specify correct login, password and base_url for each environment. * application and job_notification_emails fields are optional * The default environment will be selected as follows: @@ -161,22 +184,24 @@ def read_config(config_path: Union[str, Path], config_env: str = None, - If not, finally, the parser will select the default_config specified in [GENERAL] of the .bfabricpy.yml file """ - - config_env_final, config_dict = _read_config_env_as_dict(config_path, config_env=config_env) + config_env_final, config_dict = _read_config_env_as_dict(Path(config_path), config_env=config_env) error_prefix = f"Config environment {config_env_final} does not have a compulsory field: " - # Parse authentification - if optional_auth and not _have_all_keys(config_dict, ['login', 'password']): - # Allow returning None auth if enabled + # Parse authentication + if not _have_all_keys(config_dict, ["login", "password"]): auth = None else: - auth_dict = _parse_dict(config_dict, ['login', 'password'], error_prefix=error_prefix) + auth_dict = _parse_dict(config_dict, ["login", "password"], error_prefix=error_prefix) auth = BfabricAuth(**auth_dict) # Parse config - config_dict = _parse_dict(config_dict, ['base_url'], optional_keys=['application_ids', 'job_notification_emails'], - error_prefix=error_prefix) + config_dict = _parse_dict( + config_dict, + ["base_url"], + optional_keys=["application_ids", "job_notification_emails", "server_timezone"], + error_prefix=error_prefix, + ) config = BfabricConfig(**config_dict) return config, auth diff --git a/bfabric/src/errors.py b/bfabric/src/errors.py index 269ba924..c2259a4e 100644 --- a/bfabric/src/errors.py +++ b/bfabric/src/errors.py @@ -1,18 +1,25 @@ -from typing import List +from __future__ import annotations class BfabricRequestError(Exception): """An error that is returned by the server in response to a full request.""" - def __init__(self, message: str): + + def __init__(self, message: str) -> None: self.message = message - def __repr__(self): + def __repr__(self) -> str: return f"RequestError(message={repr(self.message)})" + +class BfabricConfigError(RuntimeError): + """An error that is raised when the configuration is invalid.""" + pass + + # TODO: Also test for response-level errors -def get_response_errors(response, endpoint: str) -> List[BfabricRequestError]: +def get_response_errors(response, endpoint: str) -> list[BfabricRequestError]: """ - :param response: A raw response to a query from an underlying engine + :param response: A raw response to a query from an underlying engine :param endpoint: The target endpoint :return: A list of errors for each query result, if that result failed Thus, a successful query would result in an empty list diff --git a/bfabric/tests/unit/example_config.yml b/bfabric/tests/unit/example_config.yml index 75e7eaec..2f2d1134 100644 --- a/bfabric/tests/unit/example_config.yml +++ b/bfabric/tests/unit/example_config.yml @@ -15,6 +15,7 @@ TEST: Proteomics/DOG_552: 6 Proteomics/DUCK_666: 12 job_notification_emails: john.snow@fgcz.uzh.ch billy.the.kid@fgcz.ethz.ch + server_timezone: UTC STANDBY: base_url: https://standby-server.uzh.ch/mystandby \ No newline at end of file diff --git a/bfabric/tests/unit/test_bfabric_config.py b/bfabric/tests/unit/test_bfabric_config.py index c51c802a..3e7dfbd0 100644 --- a/bfabric/tests/unit/test_bfabric_config.py +++ b/bfabric/tests/unit/test_bfabric_config.py @@ -2,7 +2,7 @@ import unittest from pathlib import Path -from bfabric.bfabric_config import BfabricConfig, BfabricAuth, read_config +from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_config class TestBfabricAuth(unittest.TestCase): @@ -22,6 +22,7 @@ def setUp(self): self.config = BfabricConfig( base_url="url", application_ids={"app": 1}, + server_timezone="t/z", ) self.example_config_path = Path(__file__).parent / "example_config.yml" @@ -58,7 +59,7 @@ def test_copy_with_replaced_when_none(self): # TODO: Test that logging is consistent with initialization def test_read_yml_bypath_default(self): # Ensure environment variable is not available, and the default is environment is loaded - os.environ.pop('BFABRICPY_CONFIG_ENV', None) + os.environ.pop("BFABRICPY_CONFIG_ENV", None) config, auth = read_config(self.example_config_path) self.assertEqual("my_epic_production_login", auth.login) @@ -80,7 +81,7 @@ def test_read_yml_bypath_environment_variable(self): # TODO: Test that logging is consistent with default config def test_read_yml_bypath_all_fields(self): with self.assertLogs(level="INFO") as log_context: - config, auth = read_config(self.example_config_path, config_env='TEST') + config, auth = read_config(self.example_config_path, config_env="TEST") # # Testing log # self.assertEqual( @@ -96,42 +97,47 @@ def test_read_yml_bypath_all_fields(self): self.assertEqual("https://mega-test-server.uzh.ch/mytest", config.base_url) applications_dict_ground_truth = { - 'Proteomics/CAT_123': 7, - 'Proteomics/DOG_552': 6, - 'Proteomics/DUCK_666': 12 + "Proteomics/CAT_123": 7, + "Proteomics/DOG_552": 6, + "Proteomics/DUCK_666": 12, } job_notification_emails_ground_truth = "john.snow@fgcz.uzh.ch billy.the.kid@fgcz.ethz.ch" self.assertEqual(applications_dict_ground_truth, config.application_ids) self.assertEqual(job_notification_emails_ground_truth, config.job_notification_emails) + self.assertEqual("UTC", config.server_timezone) # Testing that we can load base_url without authentication if correctly requested def test_read_yml_when_empty_optional(self): with self.assertLogs(level="INFO"): - config, auth = read_config(self.example_config_path, config_env='STANDBY', optional_auth=True) + config, auth = read_config(self.example_config_path, config_env="STANDBY") self.assertIsNone(auth) self.assertEqual("https://standby-server.uzh.ch/mystandby", config.base_url) self.assertEqual({}, config.application_ids) self.assertEqual("", config.job_notification_emails) + self.assertEqual("Europe/Zurich", config.server_timezone) + # TODO delete if no mandatory fields are reintroduced # Test that missing authentication will raise an error if required - def test_read_yml_when_empty_mandatory(self): - with self.assertRaises(ValueError): - read_config(self.example_config_path, config_env='STANDBY', optional_auth=False) + #def test_read_yml_when_empty_mandatory(self): + # with self.assertRaises(BfabricConfigError): + # read_config(self.example_config_path, config_env="STANDBY") def test_repr(self): rep = repr(self.config) self.assertEqual( - "BfabricConfig(base_url='url', application_ids={'app': 1}, job_notification_emails='')", + "BfabricConfig(base_url='url', application_ids={'app': 1}, " + "job_notification_emails='', server_timezone='t/z')", rep, ) def test_str(self): rep = str(self.config) self.assertEqual( - "BfabricConfig(base_url='url', application_ids={'app': 1}, job_notification_emails='')", + "BfabricConfig(base_url='url', application_ids={'app': 1}, " + "job_notification_emails='', server_timezone='t/z')", rep, ) From 1472332a875a383819c06735f2aa5c8912b5ff61 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 7 May 2024 14:08:35 +0200 Subject: [PATCH 076/129] new pagination interface, more stable reads, Bfabric.from_config (#85) - New pagination logic: - Specify max number of items - Specify offset of items to skip - Stable reading - Reading passes a `createdbefore` query field (if it's not part of the query) to ensure consistent reads/pagination in the presence of insertions into the database. Deletions are not handled and might require an API feature. - It will need to be tested further before releasing, but my idea is that this is a lot more flexible moving forward - New `Bfabric.from_config` arguably it is a bit redundant with the `get_system_auth` method, however I do feel like 99% of use cases will be handled with this method now and it will be easier to do so. I'm not changing the usage in this PR yet. --- bfabric/bfabric2.py | 180 +++++++++++++++++++-------- bfabric/src/paginator.py | 40 +++++- bfabric/tests/unit/test_bfabric.py | 99 ++++++++++++++- bfabric/tests/unit/test_paginator.py | 64 ++++++++++ 4 files changed, 328 insertions(+), 55 deletions(-) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 07ecaa62..c6dc6379 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -20,26 +20,29 @@ History The python3 library first appeared in 2014. """ +from __future__ import annotations + +import base64 +import logging import os -import sys from contextlib import contextmanager from copy import deepcopy from datetime import datetime from enum import Enum from pprint import pprint -from typing import Union, List, Optional +from typing import Any, Literal, ContextManager +from zoneinfo import ZoneInfo from rich.console import Console from bfabric import __version__ as PACKAGE_VERSION from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_config -from bfabric.src.cli_formatting import HostnameHighlighter, DEFAULT_THEME +from bfabric.src.cli_formatting import DEFAULT_THEME, HostnameHighlighter from bfabric.src.engine_suds import EngineSUDS from bfabric.src.engine_zeep import EngineZeep from bfabric.src.errors import get_response_errors -from bfabric.src.math_helper import div_int_ceil -from bfabric.src.paginator import page_iter, BFABRIC_QUERY_LIMIT -from bfabric.src.result_container import ResultContainer, BfabricResultType +from bfabric.src.paginator import BFABRIC_QUERY_LIMIT, compute_requested_pages, page_iter +from bfabric.src.result_container import BfabricResultType, ResultContainer class BfabricAPIEngineType(Enum): @@ -47,8 +50,15 @@ class BfabricAPIEngineType(Enum): ZEEP = 2 -def get_system_auth(login: str = None, password: str = None, base_url: str = None, - config_path: str = None, config_env: str = None, optional_auth: bool = False, verbose: bool = False): +def get_system_auth( + login: str = None, + password: str = None, + base_url: str = None, + config_path: str = None, + config_env: str = None, + optional_auth: bool = True, + verbose: bool = False, +) -> tuple[BfabricConfig, BfabricAuth]: """ :param login: Login string for overriding config file :param password: Password for overriding config file @@ -70,22 +80,25 @@ def get_system_auth(login: str = None, password: str = None, base_url: str = Non if not os.path.isfile(config_path): if have_config_path: # NOTE: If user explicitly specifies a path to a wrong config file, this has to be an exception - raise IOError(f"Explicitly specified config file does not exist: {config_path}") + raise OSError(f"Explicitly specified config file does not exist: {config_path}") # TODO: Convert to log print(f"Warning: could not find the config file in the default location: {config_path}") config = BfabricConfig(base_url=base_url) - auth = BfabricAuth(login=login, password=password) + if login is None and password is None: + auth = None + else: + auth = BfabricAuth(login=login, password=password) # Load config from file, override some of the fields with the provided ones else: - config, auth = read_config(config_path, config_env=config_env, optional_auth=optional_auth) + config, auth = read_config(config_path, config_env=config_env) config = config.copy_with(base_url=base_url) if (login is not None) and (password is not None): auth = BfabricAuth(login=login, password=password) elif (login is None) and (password is None): auth = auth else: - raise IOError("Must provide both username and password, or neither.") + raise OSError("Must provide both username and password, or neither.") if not config.base_url: raise ValueError("base_url missing") @@ -108,14 +121,15 @@ class Bfabric: def __init__( self, config: BfabricConfig, - auth: Optional[BfabricAuth], + auth: BfabricAuth | None, engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, - verbose: bool = False - ): + verbose: bool = False, + ) -> None: self.verbose = verbose self.query_counter = 0 self._config = config self._auth = auth + self._zone_info = ZoneInfo(config.server_timezone) if engine == BfabricAPIEngineType.SUDS: self.engine = EngineSUDS(base_url=config.base_url) @@ -129,6 +143,29 @@ def __init__( if self.verbose: self.print_version_message() + @classmethod + def from_config( + cls, + config_env: str | None = None, + auth: BfabricAuth | Literal["config"] | None = "config", + engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, + verbose: bool = False, + ) -> Bfabric: + """Returns a new Bfabric instance, configured with the user configuration file. + If the `config_env` is specified then it will be used, if it is not specified the default environment will be + determined by checking the following in order (picking the first one that is found): + - The `BFABRICPY_CONFIG_ENV` environment variable + - The `default_config` field in the config file "GENERAL" section + :param config_env: Configuration environment to use. If not given, it is deduced as described above. + :param auth: Authentication to use. If "config" is given, the authentication will be read from the config file. + If it is set to None, no authentication will be used. + :param engine: Engine to use for the API. Default is SUDS. + :param verbose: Print a system info message to standard error console + """ + config, auth_config = get_system_auth(config_env=config_env) + auth_used: BfabricAuth | None = auth_config if auth == "config" else auth + return cls(config, auth_used, engine=engine, verbose=verbose) + @property def config(self) -> BfabricConfig: """Returns the config object.""" @@ -144,7 +181,7 @@ def auth(self) -> BfabricAuth: return self._auth @contextmanager - def with_auth(self, auth: BfabricAuth): + def with_auth(self, auth: BfabricAuth) -> ContextManager[Bfabric]: """Context manager that temporarily (within the scope of the context) sets the authentication for the Bfabric object to the provided value. This is useful when authenticating multiple users, to avoid accidental use of the wrong credentials. @@ -156,8 +193,15 @@ def with_auth(self, auth: BfabricAuth): finally: self._auth = old_auth - def read(self, endpoint: str, obj: dict, max_results: Optional[int] = 100, readid: bool = False, check: bool = True, - **kwargs) -> ResultContainer: + def read( + self, + endpoint: str, + obj: dict[str, Any], + max_results: int | None = 100, + offset: int = 0, + readid: bool = False, + check: bool = True, + ) -> ResultContainer: """Reads objects from the specified endpoint that match all specified attributes in `obj`. By setting `max_results` it is possible to change the number of results that are returned. :param endpoint: endpoint @@ -173,71 +217,104 @@ def read(self, endpoint: str, obj: dict, max_results: Optional[int] = 100, readi :param check: whether to check for errors in the response :return: List of responses, packaged in the results container """ + # Ensure stability + obj = self._add_query_timestamp(obj) # Get the first page. # NOTE: According to old interface, this is equivalent to plain=True - response = self._read_method(readid, endpoint, obj, page=1, **kwargs) + response, errors = self._read_page(readid, endpoint, obj, page=1) + try: - n_pages = response["numberofpages"] + n_available_pages = response["numberofpages"] except AttributeError: - n_pages = 0 + n_available_pages = 0 # Return empty list if nothing found - if not n_pages: - result = ResultContainer([], self.result_type, total_pages_api=0, errors=get_response_errors(response, endpoint)) + if not n_available_pages: + result = ResultContainer( + [], self.result_type, total_pages_api=0, errors=get_response_errors(response, endpoint) + ) if check: result.assert_success() return result # Get results from other pages as well, if need be - # Only load as many pages as user has interest in - if max_results is None: - n_pages_trg = n_pages - else: - n_pages_trg = min(n_pages, div_int_ceil(max_results, BFABRIC_QUERY_LIMIT)) + requested_pages, initial_offset = compute_requested_pages( + n_page_total=n_available_pages, + n_item_per_page=BFABRIC_QUERY_LIMIT, + n_item_offset=offset, + n_item_return_max=max_results, + ) + logging.info(f"Requested pages: {requested_pages}") # NOTE: Page numbering starts at 1 - response_items = response[endpoint] - errors = [] - for i_page in range(2, n_pages_trg + 1): - print('-- reading page', i_page, 'of', n_pages) - response = self._read_method(readid, endpoint, obj, page=i_page, **kwargs) - errors += get_response_errors(response, endpoint) - response_items += response[endpoint] - - result = ResultContainer(response_items, self.result_type, total_pages_api=n_pages, errors=errors) + response_items = [] + page_offset = initial_offset + for i_iter, i_page in enumerate(requested_pages): + if not (i_iter == 0 and i_page == 1): + print("-- reading page", i_page, "of", n_available_pages) + response, errors_page = self._read_page(readid, endpoint, obj, page=i_page) + errors += errors_page + + response_items += response[endpoint][page_offset:] + page_offset = 0 + + result = ResultContainer(response_items, self.result_type, total_pages_api=n_available_pages, errors=errors) if check: result.assert_success() return result - def save(self, endpoint: str, obj: dict, check: bool = True, **kwargs) -> ResultContainer: - results = self.engine.save(endpoint, obj, auth=self.auth, **kwargs) + def _add_query_timestamp(self, query: dict[str, Any]) -> dict[str, Any]: + """Adds the current time as a createdbefore timestamp to the query, if there is no time in the query already. + This ensures pagination will be robust to insertion of new items during the query. + If a time is already present, it will be left as is, but a warning will be printed if it is in the future as + the query will not be robust to insertion of new items. + Note that this does not ensure robustness against deletion of items. + """ + server_time = datetime.now(self._zone_info) + if "createdbefore" in query: + query_time = datetime.fromisoformat(query["createdbefore"]) + if query_time > server_time: + logging.warning( + f"Warning: Query timestamp is in the future: {query_time}. " + "This will not be robust to insertion of new items." + ) + return query + else: + return {**query, "createdbefore": server_time.strftime("%Y-%m-%dT%H:%M:%S")} + + def save(self, endpoint: str, obj: dict, check: bool = True) -> ResultContainer: + results = self.engine.save(endpoint, obj, auth=self.auth) result = ResultContainer(results[endpoint], self.result_type, errors=get_response_errors(results, endpoint)) if check: result.assert_success() return result - def delete(self, endpoint: str, id: Union[List, int], check: bool = True) -> ResultContainer: + def delete(self, endpoint: str, id: int | list[int], check: bool = True) -> ResultContainer: results = self.engine.delete(endpoint, id, auth=self.auth) result = ResultContainer(results[endpoint], self.result_type, errors=get_response_errors(results, endpoint)) if check: result.assert_success() return result - def _read_method(self, readid: bool, endpoint: str, obj: dict, page: int = 1, **kwargs): + def _read_page(self, readid: bool, endpoint: str, query: dict[str, Any], page: int = 1): + """Reads the specified page of objects from the specified endpoint that match the query.""" if readid: # https://fgcz-bfabric.uzh.ch/wiki/tiki-index.php?page=endpoint.workunit#Web_Method_readid_ - return self.engine.readid(endpoint, obj, auth=self.auth, page=page, **kwargs) + response = self.engine.readid(endpoint, query, auth=self.auth, page=page) else: - return self.engine.read(endpoint, obj, auth=self.auth, page=page, **kwargs) + response = self.engine.read(endpoint, query, auth=self.auth, page=page) + + return response, get_response_errors(response, endpoint) ############################ # Multi-query functionality ############################ # TODO: Is this scope sufficient? Is there ever more than one multi-query parameter, and/or not at the root of dict? - def read_multi(self, endpoint: str, obj: dict, multi_query_key: str, multi_query_vals: list, - readid: bool = False, **kwargs) -> ResultContainer: + def read_multi( + self, endpoint: str, obj: dict, multi_query_key: str, multi_query_vals: list, readid: bool = False + ) -> ResultContainer: """ Makes a 1-parameter multi-query (there is 1 parameter that takes a list of values) Since the API only allows BFABRIC_QUERY_LIMIT queries per page, split the list into chunks before querying @@ -252,7 +329,7 @@ def read_multi(self, endpoint: str, obj: dict, multi_query_key: str, multi_query NOTE: It is assumed that there is only 1 response for each value. """ - response_tot = ResultContainer([], self.result_type, total_pages_api = 0) + response_tot = ResultContainer([], self.result_type, total_pages_api=0) obj_extended = deepcopy(obj) # Make a copy of the query, not to make edits to the argument # Iterate over request chunks that fit into a single API page @@ -266,7 +343,7 @@ def read_multi(self, endpoint: str, obj: dict, multi_query_key: str, multi_query # automatically? If yes, perhaps we don't need this method at all? # TODO: It is assumed that a user requesting multi_query always wants all of the pages. Can anybody think of # exceptions to this? - response_this = self.read(endpoint, obj_extended, max_results=None, readid=readid, **kwargs) + response_this = self.read(endpoint, obj_extended, max_results=None, readid=readid) response_tot.extend(response_this) return response_tot @@ -288,7 +365,7 @@ def delete_multi(self, endpoint: str, id_list: list) -> ResultContainer: response_tot = ResultContainer([], self.result_type, total_pages_api=0) if len(id_list) == 0: - print('Warning, empty list provided for deletion, ignoring') + print("Warning, empty list provided for deletion, ignoring") return response_tot # Iterate over request chunks that fit into a single API page @@ -298,7 +375,7 @@ def delete_multi(self, endpoint: str, id_list: list) -> ResultContainer: return response_tot - def exists(self, endpoint: str, key: str, value: Union[List, Union[int, str]]) -> Union[bool, List[bool]]: + def exists(self, endpoint: str, key: str, value: list[int | str] | int | str) -> bool | list[bool]: """ :param endpoint: endpoint :param key: A key for the query (e.g. id or name) @@ -321,8 +398,8 @@ def exists(self, endpoint: str, key: str, value: Union[List, Union[int, str]]) - for r in results.results: if key in r: result_vals += [r[key]] - elif '_' + key in r: # TODO: Remove this if SUDS bug is ever resolved - result_vals += [r['_' + key]] + elif "_" + key in r: # TODO: Remove this if SUDS bug is ever resolved + result_vals += [r["_" + key]] # 3. For each of the requested ids, return true if there was a response and false if there was not if is_scalar: @@ -347,4 +424,3 @@ def print_version_message(self, stderr: bool = True) -> None: """ console = Console(stderr=stderr, highlighter=HostnameHighlighter(), theme=DEFAULT_THEME) console.print(self.get_version_message(), style="bright_yellow") - diff --git a/bfabric/src/paginator.py b/bfabric/src/paginator.py index f1a183e9..f20312b3 100644 --- a/bfabric/src/paginator.py +++ b/bfabric/src/paginator.py @@ -1,6 +1,11 @@ +from __future__ import annotations + +import math + # Single page query limit for BFabric API (as of time of writing, adapt if it changes) BFABRIC_QUERY_LIMIT = 100 + def page_iter(objs: list, page_size: int = BFABRIC_QUERY_LIMIT) -> list: """ :param objs: A list of objects to provide to bfabric as part of a query @@ -9,4 +14,37 @@ def page_iter(objs: list, page_size: int = BFABRIC_QUERY_LIMIT) -> list: """ for i in range(0, len(objs), page_size): - yield objs[i:i + page_size] + yield objs[i : i + page_size] + + +def compute_requested_pages( + n_page_total: int, + n_item_per_page: int, + n_item_offset: int, + n_item_return_max: int | None, +) -> tuple[list[int], int]: + """Returns the page indices that need to be requested to get all requested items. + :param n_page_total: Total number of pages available + :param n_item_per_page: Number of items per page + :param n_item_offset: Number of items to skip from the beginning + :param n_item_return_max: Maximum number of items to return + :return: + - list of page indices that need to be requested + - initial page offset (0-based), i.e. the i-th item from which onwards to retain results + """ + # B-Fabric API uses 1-based indexing for pages + index_start = 1 + + # Determine the page indices to request + # If n_item_return_max is not provided, we will return all items + if n_item_return_max is None: + n_item_return_max = n_page_total * n_item_per_page + + # Determine the page indices to request + idx_max_return = math.ceil((n_item_return_max + n_item_offset) / n_item_per_page) + idx_arr = [idx + index_start for idx in range(n_item_offset // n_item_per_page, min(n_page_total, idx_max_return))] + + # Determine the initial offset on the first page + initial_offset = min(n_item_offset, n_item_return_max) % n_item_per_page + + return idx_arr, initial_offset diff --git a/bfabric/tests/unit/test_bfabric.py b/bfabric/tests/unit/test_bfabric.py index 23a2b178..58c2db03 100644 --- a/bfabric/tests/unit/test_bfabric.py +++ b/bfabric/tests/unit/test_bfabric.py @@ -1,6 +1,9 @@ +import datetime +import logging import unittest from functools import cached_property -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch, ANY +from zoneinfo import ZoneInfo from bfabric import BfabricConfig from bfabric.bfabric2 import BfabricAPIEngineType, Bfabric @@ -10,6 +13,7 @@ class TestBfabric(unittest.TestCase): def setUp(self): self.mock_config = MagicMock(name="mock_config", spec=BfabricConfig) + self.mock_config.server_timezone = "Pacific/Kiritimati" self.mock_auth = None self.mock_engine_type = BfabricAPIEngineType.SUDS self.mock_engine = MagicMock(name="mock_engine", spec=EngineSUDS) @@ -18,6 +22,42 @@ def setUp(self): def mock_bfabric(self) -> Bfabric: return Bfabric(config=self.mock_config, auth=self.mock_auth, engine=self.mock_engine_type) + @patch("bfabric.bfabric2.get_system_auth") + def test_from_config_when_no_args(self, mock_get_system_auth): + mock_config = MagicMock(name="mock_config", server_timezone="Pacific/Kiritimati") + mock_auth = MagicMock(name="mock_auth") + mock_get_system_auth.return_value = (mock_config, mock_auth) + client = Bfabric.from_config() + self.assertIsInstance(client, Bfabric) + self.assertEqual(mock_config, client.config) + self.assertEqual(mock_auth, client.auth) + mock_get_system_auth.assert_called_once_with(config_env=None) + + @patch("bfabric.bfabric2.get_system_auth") + def test_from_config_when_explicit_auth(self, mock_get_system_auth): + mock_config = MagicMock(name="mock_config", server_timezone="Pacific/Kiritimati") + mock_auth = MagicMock(name="mock_auth") + mock_config_auth = MagicMock(name="mock_config_auth") + mock_get_system_auth.return_value = (mock_config, mock_config_auth) + client = Bfabric.from_config(config_env="TestingEnv", auth=mock_auth) + self.assertIsInstance(client, Bfabric) + self.assertEqual(mock_config, client.config) + self.assertEqual(mock_auth, client.auth) + mock_get_system_auth.assert_called_once_with(config_env="TestingEnv") + + @patch("bfabric.bfabric2.get_system_auth") + def test_from_config_when_none_auth(self, mock_get_system_auth): + mock_config = MagicMock(name="mock_config", server_timezone="Pacific/Kiritimati") + mock_auth = MagicMock(name="mock_auth") + mock_get_system_auth.return_value = (mock_config, mock_auth) + client = Bfabric.from_config(config_env="TestingEnv", auth=None) + self.assertIsInstance(client, Bfabric) + self.assertEqual(mock_config, client.config) + with self.assertRaises(ValueError) as error: + _ = client.auth + self.assertIn("Authentication not available", str(error.exception)) + mock_get_system_auth.assert_called_once_with(config_env="TestingEnv") + def test_query_counter(self): self.assertEqual(0, self.mock_bfabric.query_counter) @@ -52,7 +92,62 @@ def test_with_auth_when_exception(self): pass self.assertEqual(mock_old_auth, self.mock_bfabric.auth) - # TODO further unit tests + @patch("bfabric.bfabric2.datetime") + def test_add_query_timestamp_when_not_present(self, module_datetime): + module_datetime.now.return_value = datetime.datetime(2020, 1, 2, 3, 4, 5) + query = self.mock_bfabric._add_query_timestamp( {"a": "b", "c": 1}) + self.assertDictEqual( + {"a": "b", "c": 1, 'createdbefore': '2020-01-02T03:04:05'}, + query, + ) + module_datetime.now.assert_called_once_with(ZoneInfo('Pacific/Kiritimati')) + + @patch("bfabric.bfabric2.datetime") + def test_add_query_timestamp_when_set_and_past(self, module_datetime): + module_datetime.now.return_value = datetime.datetime(2020, 1, 2, 3, 4, 5) + module_datetime.fromisoformat = datetime.datetime.fromisoformat + query_before = {"a": "b", "createdbefore": "2019-12-31T23:59:59"} + # TODO once py3.10 is available, use assertNoLogs + query = self.mock_bfabric._add_query_timestamp(query_before) + self.assertDictEqual( + {"a": "b", "createdbefore": "2019-12-31T23:59:59"}, + query, + ) + module_datetime.now.assert_called_once_with(ZoneInfo('Pacific/Kiritimati')) + + @patch("bfabric.bfabric2.datetime") + def test_add_query_timestamp_when_set_and_future(self, module_datetime): + module_datetime.now.return_value = datetime.datetime(2020, 1, 2, 3, 4, 5) + module_datetime.fromisoformat = datetime.datetime.fromisoformat + query_before = {"a": "b", "createdbefore": "2020-01-02T03:04:06"} + with self.assertLogs(level=logging.WARNING) as logs: + query = self.mock_bfabric._add_query_timestamp(query_before) + self.assertDictEqual( + {"a": "b", "createdbefore": "2020-01-02T03:04:06"}, + query, + ) + self.assertEqual(1, len(logs.output)) + self.assertIn("Query timestamp is in the future: 2020-01-02 03:04:06", logs.output[0]) + + def test_get_version_message(self): + self.mock_config.base_url = "dummy_url" + message = self.mock_bfabric.get_version_message() + lines = message.split("\n") + self.assertEqual(2, len(lines)) + # first line + pattern = r"--- bfabricPy v\d+\.\d+\.\d+ \(EngineSUDS, dummy_url, U=None\) ---" + self.assertRegex(lines[0], pattern) + # second line + year = datetime.datetime.now().year + self.assertEqual(f"--- Copyright (C) 2014-{year} Functional Genomics Center Zurich ---", lines[1]) + + @patch("bfabric.bfabric2.Console") + @patch.object(Bfabric, "get_version_message") + def test_print_version_message(self, method_get_version_message, mock_console): + mock_stderr = MagicMock(name="mock_stderr") + self.mock_bfabric.print_version_message(stderr=mock_stderr) + mock_console.assert_called_once_with(stderr=mock_stderr, highlighter=ANY, theme=ANY) + mock_console.return_value.print.assert_called_once_with(method_get_version_message.return_value, style="bright_yellow") if __name__ == "__main__": diff --git a/bfabric/tests/unit/test_paginator.py b/bfabric/tests/unit/test_paginator.py index bb506692..8618b82e 100644 --- a/bfabric/tests/unit/test_paginator.py +++ b/bfabric/tests/unit/test_paginator.py @@ -13,6 +13,70 @@ def test_page_iter(self): self.assertEqual(rez[0], list(range(100))) self.assertEqual(rez[1], list(range(100, 123))) + def test_compute_requested_pages_when_no_offset(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=0, n_item_return_max=None + ) + self.assertListEqual([1, 2, 3, 4, 5], pages) + self.assertEqual(0, init_offset) + + def test_compute_requested_pages_when_offset_2(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=2, n_item_return_max=None + ) + self.assertListEqual([1, 2, 3, 4, 5], pages) + self.assertEqual(2, init_offset) + + def test_compute_requested_pages_when_offset_3(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=3, n_item_return_max=None + ) + self.assertListEqual([2, 3, 4, 5], pages) + self.assertEqual(0, init_offset) + + def test_compute_requested_pages_when_offset_4(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=4, n_item_return_max=None + ) + self.assertListEqual([2, 3, 4, 5], pages) + self.assertEqual(1, init_offset) + + def test_compute_requested_pages_when_offset_6(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=6, n_item_return_max=None + ) + self.assertListEqual([3, 4, 5], pages) + self.assertEqual(0, init_offset) + + def test_compute_requested_pages_when_offset_out_of_bounds(self): + # TODO maybe it should yield an error? + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=100, n_item_return_max=None + ) + self.assertListEqual([], pages) + self.assertEqual(0, init_offset) + + def test_compute_requested_pages_when_max(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=0, n_item_return_max=10 + ) + self.assertListEqual([1, 2, 3, 4], pages) + self.assertEqual(0, init_offset) + + def test_compute_requested_pages_when_max_9(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=0, n_item_return_max=9 + ) + self.assertListEqual([1, 2, 3], pages) + self.assertEqual(0, init_offset) + + def test_compute_requested_pages_when_max_6(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=0, n_item_return_max=6 + ) + self.assertListEqual([1, 2], pages) + self.assertEqual(0, init_offset) + if __name__ == "__main__": unittest.main(verbosity=2) From 45e2c4af36499bd8d7942cb4544d3b389b365f33 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 7 May 2024 14:17:49 +0200 Subject: [PATCH 077/129] Deprecate some scripts --- bfabric/deprecated_scripts/__init__.py | 0 .../bfabric_create_bfabricrc.py | 0 .../{scripts => deprecated_scripts}/bfabric_csv2dataset.py | 0 .../bfabric_demo_register_resource.py | 5 ----- .../bfabric_feeder_importresource.py | 0 .../bfabric_list_executables.py | 0 .../bfabric_list_proteomics_projects.py | 0 .../{scripts => deprecated_scripts}/bfabric_read_dataset.py | 0 .../bfabric_read_sample_of_order.py | 0 .../bfabric_sample_graph_traversal.py | 0 bfabric/{scripts => deprecated_scripts}/bfabric_save.py | 0 .../bfabric_save_customattributes.py | 0 .../{scripts => deprecated_scripts}/bfabric_save_dataset.py | 0 .../bfabric_save_importresource.py | 0 .../bfabric_save_qcloud2_annotation.py | 0 .../{scripts => deprecated_scripts}/bfabric_save_resource.py | 0 .../bfabric_save_resource_description.py | 0 .../bfabric_submitter_yaml.py | 0 .../bfabric_upload_wrapper_creator_executable.py | 0 bfabric/{scripts => deprecated_scripts}/demo_config.yaml | 0 .../{scripts => deprecated_scripts}/fgcz_pd_rpc_client.py | 0 .../{scripts => deprecated_scripts}/fgcz_pd_rpc_server.py | 0 bfabric/{scripts => deprecated_scripts}/fgcz_pd_wrapper.py | 0 bfabric/scripts/__init__.py | 0 24 files changed, 5 deletions(-) create mode 100644 bfabric/deprecated_scripts/__init__.py rename bfabric/{scripts => deprecated_scripts}/bfabric_create_bfabricrc.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_csv2dataset.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_demo_register_resource.py (97%) rename bfabric/{scripts => deprecated_scripts}/bfabric_feeder_importresource.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_list_executables.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_list_proteomics_projects.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_read_dataset.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_read_sample_of_order.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_sample_graph_traversal.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_save.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_save_customattributes.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_save_dataset.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_save_importresource.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_save_qcloud2_annotation.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_save_resource.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_save_resource_description.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_submitter_yaml.py (100%) rename bfabric/{scripts => deprecated_scripts}/bfabric_upload_wrapper_creator_executable.py (100%) rename bfabric/{scripts => deprecated_scripts}/demo_config.yaml (100%) rename bfabric/{scripts => deprecated_scripts}/fgcz_pd_rpc_client.py (100%) rename bfabric/{scripts => deprecated_scripts}/fgcz_pd_rpc_server.py (100%) rename bfabric/{scripts => deprecated_scripts}/fgcz_pd_wrapper.py (100%) create mode 100644 bfabric/scripts/__init__.py diff --git a/bfabric/deprecated_scripts/__init__.py b/bfabric/deprecated_scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/scripts/bfabric_create_bfabricrc.py b/bfabric/deprecated_scripts/bfabric_create_bfabricrc.py similarity index 100% rename from bfabric/scripts/bfabric_create_bfabricrc.py rename to bfabric/deprecated_scripts/bfabric_create_bfabricrc.py diff --git a/bfabric/scripts/bfabric_csv2dataset.py b/bfabric/deprecated_scripts/bfabric_csv2dataset.py similarity index 100% rename from bfabric/scripts/bfabric_csv2dataset.py rename to bfabric/deprecated_scripts/bfabric_csv2dataset.py diff --git a/bfabric/scripts/bfabric_demo_register_resource.py b/bfabric/deprecated_scripts/bfabric_demo_register_resource.py similarity index 97% rename from bfabric/scripts/bfabric_demo_register_resource.py rename to bfabric/deprecated_scripts/bfabric_demo_register_resource.py index b824c3bb..dc352156 100755 --- a/bfabric/scripts/bfabric_demo_register_resource.py +++ b/bfabric/deprecated_scripts/bfabric_demo_register_resource.py @@ -6,11 +6,6 @@ # $Date: 2017-06-12 12:55:55 +0200 (Mon, 12 Jun 2017) $ - -import os -import re -import time -import sys import bfabric import bfabric.wrapper_creator.bfabric_feeder diff --git a/bfabric/scripts/bfabric_feeder_importresource.py b/bfabric/deprecated_scripts/bfabric_feeder_importresource.py similarity index 100% rename from bfabric/scripts/bfabric_feeder_importresource.py rename to bfabric/deprecated_scripts/bfabric_feeder_importresource.py diff --git a/bfabric/scripts/bfabric_list_executables.py b/bfabric/deprecated_scripts/bfabric_list_executables.py similarity index 100% rename from bfabric/scripts/bfabric_list_executables.py rename to bfabric/deprecated_scripts/bfabric_list_executables.py diff --git a/bfabric/scripts/bfabric_list_proteomics_projects.py b/bfabric/deprecated_scripts/bfabric_list_proteomics_projects.py similarity index 100% rename from bfabric/scripts/bfabric_list_proteomics_projects.py rename to bfabric/deprecated_scripts/bfabric_list_proteomics_projects.py diff --git a/bfabric/scripts/bfabric_read_dataset.py b/bfabric/deprecated_scripts/bfabric_read_dataset.py similarity index 100% rename from bfabric/scripts/bfabric_read_dataset.py rename to bfabric/deprecated_scripts/bfabric_read_dataset.py diff --git a/bfabric/scripts/bfabric_read_sample_of_order.py b/bfabric/deprecated_scripts/bfabric_read_sample_of_order.py similarity index 100% rename from bfabric/scripts/bfabric_read_sample_of_order.py rename to bfabric/deprecated_scripts/bfabric_read_sample_of_order.py diff --git a/bfabric/scripts/bfabric_sample_graph_traversal.py b/bfabric/deprecated_scripts/bfabric_sample_graph_traversal.py similarity index 100% rename from bfabric/scripts/bfabric_sample_graph_traversal.py rename to bfabric/deprecated_scripts/bfabric_sample_graph_traversal.py diff --git a/bfabric/scripts/bfabric_save.py b/bfabric/deprecated_scripts/bfabric_save.py similarity index 100% rename from bfabric/scripts/bfabric_save.py rename to bfabric/deprecated_scripts/bfabric_save.py diff --git a/bfabric/scripts/bfabric_save_customattributes.py b/bfabric/deprecated_scripts/bfabric_save_customattributes.py similarity index 100% rename from bfabric/scripts/bfabric_save_customattributes.py rename to bfabric/deprecated_scripts/bfabric_save_customattributes.py diff --git a/bfabric/scripts/bfabric_save_dataset.py b/bfabric/deprecated_scripts/bfabric_save_dataset.py similarity index 100% rename from bfabric/scripts/bfabric_save_dataset.py rename to bfabric/deprecated_scripts/bfabric_save_dataset.py diff --git a/bfabric/scripts/bfabric_save_importresource.py b/bfabric/deprecated_scripts/bfabric_save_importresource.py similarity index 100% rename from bfabric/scripts/bfabric_save_importresource.py rename to bfabric/deprecated_scripts/bfabric_save_importresource.py diff --git a/bfabric/scripts/bfabric_save_qcloud2_annotation.py b/bfabric/deprecated_scripts/bfabric_save_qcloud2_annotation.py similarity index 100% rename from bfabric/scripts/bfabric_save_qcloud2_annotation.py rename to bfabric/deprecated_scripts/bfabric_save_qcloud2_annotation.py diff --git a/bfabric/scripts/bfabric_save_resource.py b/bfabric/deprecated_scripts/bfabric_save_resource.py similarity index 100% rename from bfabric/scripts/bfabric_save_resource.py rename to bfabric/deprecated_scripts/bfabric_save_resource.py diff --git a/bfabric/scripts/bfabric_save_resource_description.py b/bfabric/deprecated_scripts/bfabric_save_resource_description.py similarity index 100% rename from bfabric/scripts/bfabric_save_resource_description.py rename to bfabric/deprecated_scripts/bfabric_save_resource_description.py diff --git a/bfabric/scripts/bfabric_submitter_yaml.py b/bfabric/deprecated_scripts/bfabric_submitter_yaml.py similarity index 100% rename from bfabric/scripts/bfabric_submitter_yaml.py rename to bfabric/deprecated_scripts/bfabric_submitter_yaml.py diff --git a/bfabric/scripts/bfabric_upload_wrapper_creator_executable.py b/bfabric/deprecated_scripts/bfabric_upload_wrapper_creator_executable.py similarity index 100% rename from bfabric/scripts/bfabric_upload_wrapper_creator_executable.py rename to bfabric/deprecated_scripts/bfabric_upload_wrapper_creator_executable.py diff --git a/bfabric/scripts/demo_config.yaml b/bfabric/deprecated_scripts/demo_config.yaml similarity index 100% rename from bfabric/scripts/demo_config.yaml rename to bfabric/deprecated_scripts/demo_config.yaml diff --git a/bfabric/scripts/fgcz_pd_rpc_client.py b/bfabric/deprecated_scripts/fgcz_pd_rpc_client.py similarity index 100% rename from bfabric/scripts/fgcz_pd_rpc_client.py rename to bfabric/deprecated_scripts/fgcz_pd_rpc_client.py diff --git a/bfabric/scripts/fgcz_pd_rpc_server.py b/bfabric/deprecated_scripts/fgcz_pd_rpc_server.py similarity index 100% rename from bfabric/scripts/fgcz_pd_rpc_server.py rename to bfabric/deprecated_scripts/fgcz_pd_rpc_server.py diff --git a/bfabric/scripts/fgcz_pd_wrapper.py b/bfabric/deprecated_scripts/fgcz_pd_wrapper.py similarity index 100% rename from bfabric/scripts/fgcz_pd_wrapper.py rename to bfabric/deprecated_scripts/fgcz_pd_wrapper.py diff --git a/bfabric/scripts/__init__.py b/bfabric/scripts/__init__.py new file mode 100644 index 00000000..e69de29b From 707bc7b63e9be7006c0209910baab453240bee53 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 7 May 2024 14:33:11 +0200 Subject: [PATCH 078/129] Merge pull request #88 from fgcz/protinf-flask Refactor flask server - Use the new API - Provides access to the new pagination API. (breaking) - Remove/deactivate some of the old API --- bfabric/scripts/bfabric_flask.py | 651 ++++++++++++------------------- pyproject.toml | 2 +- 2 files changed, 256 insertions(+), 397 deletions(-) diff --git a/bfabric/scripts/bfabric_flask.py b/bfabric/scripts/bfabric_flask.py index b20dfeab..20343be4 100755 --- a/bfabric/scripts/bfabric_flask.py +++ b/bfabric/scripts/bfabric_flask.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - - """ This script is thought to be used as a Rest SOAP proxy. @@ -34,443 +31,305 @@ Of note, do not forget rerun the flask service after modification! """ - -import base64 +from __future__ import annotations +import os import json -from flask import Flask, jsonify, request -from flask.json import JSONEncoder -from slugify import slugify -from os.path import exists - -import bfabric - import logging import logging.handlers -from flask.logging import default_handler - +from pathlib import Path +from typing import Any -def create_logger(name="bfabric11_flask", address=("fgcz-ms.uzh.ch", 514)): - """ - create a logger object - """ - syslog_handler = logging.handlers.SysLogHandler(address=address) - formatter = logging.Formatter('%(name)s %(message)s') - syslog_handler.setFormatter(formatter) +from flask import Flask, Response, jsonify, request - logger = logging.getLogger(name) - logger.setLevel(20) - logger.addHandler(syslog_handler) +from bfabric.bfabric2 import Bfabric +from bfabric.bfabric_config import BfabricAuth - return logger - -logger = create_logger() - -class BfabricJSONEncoder(JSONEncoder): - """ - enables to serialize (jsonify) bfabric wsdl objects - """ - - def default(self, obj): - try: - iterable = iter(obj) - except TypeError: - pass - else: - return(dict(iterable)) - - return JSONEncoder.default(self, obj) - - -address=("fgcz-ms.uzh.ch", 514) -name="bfabric_flask" -formatter = logging.Formatter('%(name)s %(message)s') - -syslog_handler = logging.handlers.SysLogHandler(address=address) -syslog_handler.setFormatter(formatter) - +if "BFABRICPY_CONFIG_ENV" not in os.environ: + # Set the environment to the name of the PROD config section to use + os.environ["BFABRICPY_CONFIG_ENV"] = "TEST" +DEFAULT_LOGGER_NAME = "bfabric13_flask" +logger = logging.getLogger(DEFAULT_LOGGER_NAME) app = Flask(__name__) +client = Bfabric.from_config(auth=None, verbose=True) -app.json_encoder = BfabricJSONEncoder -bfapp = bfabric.Bfabric() - -inlcude_child_extracts = True - -@app.route('/read', methods=['GET', 'POST']) -def read(): - idonly = None - try: - content = json.loads(request.data) - except: - return jsonify({'error': 'could not get POST content.'}) - - try: - # TODO(cp): check if meaningful page - page = content['page'][0] - print("page = ", page) - except: - logger.info("set page to 1.") - page = 1 - - try: - # TODO(cp): check if meaningful page - idonly = content['idonly'][0] - print("idonly = ", idonly) - except: - idonly = False - try: - webservicepassword = content['webservicepassword'][0].replace("\t", "") - login = content['login'][0] - # logger.info("debug {}".format(webservicepassword)) - - bf = bfabric.Bfabric(login=login, password=webservicepassword) - res = bf.read_object(endpoint=content['endpoint'][0], obj=content['query'], plain=True, page=page, idonly=idonly) - logger.info("'{}' login success query {} ...".format(login, content['query'])) - except: - logger.info("'{}' query failed ...".format(login)) - return jsonify({'status': 'jsonify failed: bfabric python module.'}) - - try: - return jsonify({'res': res}) - except: - logger.info("'{}' query failed ...".format(login)) - return jsonify({'status': 'jsonify failed'}) +def get_request_auth(request_data: dict[str, Any]) -> BfabricAuth: + """Extracts the login and password from a JSON request body. Assumes it has been filtered beforehand.""" + webservicepassword = request_data["webservicepassword"].replace("\t", "") + login = request_data["login"] + return BfabricAuth(login=login, password=webservicepassword) -""" -generic query interface for read interface +@app.errorhandler(Exception) +def handle_unknown_exception(e: Exception) -> Response: + """Handles exceptions which are not handled by a more specific handler.""" + logger.error("Unknown exception", exc_info=e) + return jsonify({"error": f"unknown exception occurred: {e}"}) -example (assumes the proxy runs on localhost): -R> rv <- POST('http://localhost:5000/query', - body = toJSON(list(login = login, - webservicepassword = webservicepassword, - query = 'resource', - containerid = 3000, - applicationid = 205)), - encode = 'json') - -R> rv <- content(rv) +@app.errorhandler(json.JSONDecodeError) +def handle_json_decode_error(e: json.JSONDecodeError) -> Response: + """Handles JSON decode errors.""" + logger.error("JSON decode error", exc_info=e) + return jsonify({"error": "could not parse JSON request content"}) -TODO(cp@fgcz.ethz.ch): also provide an argument for the webbase -""" -@app.route('/q', methods=['GET', 'POST']) -def q(): - try: - content = json.loads(request.data) - except: - return jsonify({'error': 'could not get POST content.'}) - try: - # TODO(cp): check if meaningful page - page = content['page'][0] - except: - logger.info("set page to 1.") - page = 1 +class InvalidRequestContent(RuntimeError): + """Raised when the request content is invalid.""" - # TODO(cp): more finetuning on paging - try: - webservicepassword = content['webservicepassword'][0].replace("\t", "") - login = content['login'][0] - #logger.info("debug {}".format(webservicepassword)) - - - bf = bfabric.Bfabric(login=login, password=webservicepassword) - res = bf.read_object(endpoint=content['endpoint'][0], obj=content['query'], page=page) - logger.info("'{}' login success query {} ...".format(login, content['query'])) - except: - logger.info("'{}' login failed ...".format(login)) - return jsonify({'status': 'jsonify failed: bfabric python module.'}) + def __init__(self, missing_fields: list[str]) -> None: + super().__init__(f"missing fields: {missing_fields}") - try: - return jsonify({'res': res}) - except: - logger.info("'{}' query failed ...".format(login)) - return jsonify({'status': 'jsonify failed'}) +@app.errorhandler(InvalidRequestContent) +def handle_invalid_request_content(e: InvalidRequestContent) -> Response: + """Handles invalid request content errors.""" + logger.error("Invalid request content", exc_info=e) + return jsonify({"error": f"invalid request content: {e}"}) -@app.route('/s', methods=['GET', 'POST']) -def s(): +def get_fields(required_fields: list[str], optional_fields: dict[str, Any]) -> dict[str, Any]: + """Extracts fields from a JSON request body. All `required_fields` must be present, or an error will be raised + indicating the missing fields. The optional fields are filled with the default values if not present. + :param required_fields: list of required fields + :param optional_fields: dictionary of optional fields with default values + :return: dictionary of all field values, if all required fields are present + """ + available_fields = request.json.keys() + missing_fields = set(required_fields) - set(available_fields) + if missing_fields: + raise InvalidRequestContent(sorted(missing_fields)) + else: + required_values = {field: request.json[field] for field in required_fields} + optional_values = {field: request.json.get(field, default) for field, default in optional_fields.items()} + return {**required_values, **optional_values} + + +@app.route("/read", methods=["POST"]) +def read() -> Response: + """Reads data from a particular B-Fabric endpoint matching a query.""" + params = get_fields( + required_fields=["endpoint", "login", "webservicepassword"], + optional_fields={"query": {}, "page_offset": 0, "page_max_results": 100}, + ) + query = params["query"] + page_offset = params["page_offset"] + page_max_results = params["page_max_results"] + endpoint = params["endpoint"] + auth = get_request_auth(params) + + logger.info(f"'{auth.login}' /read {page_offset=}, {page_max_results=}, {query=}") try: - content = json.loads(request.data) - except: - msg = 'could not get POST content.' - print("Exception: {}".format(msg)) - return jsonify({'error': msg}) + with client.with_auth(auth): + client.print_version_message() + res = client.read( + endpoint=endpoint, + obj=query, + offset=page_offset, + max_results=page_max_results, + ) + logger.info(f"'{auth.login}' login success query {query} ...") + except Exception: + logger.exception(f"'{auth.login}' query failed ...") + return jsonify({"status": "jsonify failed: bfabric python module."}) try: - webservicepassword = content['webservicepassword'][0].replace("\t", "") - login = content['login'][0] - except: - msg = 'Could not extract login|webservicepassword.' - print("Exception: {}".format(msg)) - return jsonify({'error': msg}) + return jsonify({"res": res.to_list_dict()}) + except Exception: + logger.exception(f"'{auth.login}' query failed ...") + return jsonify({"status": "jsonify failed"}) - try: - endpoint = content['endpoint'][0] - except: - msg = 'Could not extract endpoint.' - print("Exception: {}".format(msg)) - return jsonify({'error': msg}) - try: - query = content['query'] - except: - msg = 'Could not extract query.' - print("Exception: {}".format(msg)) - return jsonify({'error': msg}) +@app.route("/save", methods=["POST"]) +def save() -> Response: + """Saves data to a particular B-Fabric endpoint.""" + params = get_fields(required_fields=["endpoint", "query", "login", "webservicepassword"], optional_fields={}) + endpoint = params["endpoint"] + query = params["query"] + auth = get_request_auth(params) try: - print("Calling constructor and save method using login", login) - bf = bfabric.Bfabric(login=login, password=webservicepassword) - res = bf.save_object(endpoint=endpoint, obj=content['query']) - - logger.info("'{}' login success save method ...".format(login)) - except: - logger.info("save method failed for login {}.".format(login)) - return jsonify({'status': 'jsonify failed: bfabric python module.'}) + with client.with_auth(auth): + res = client.save(endpoint=endpoint, obj=query) + logger.info(f"'{auth.login}' login success save method ...") + except Exception: + logger.exception(f"save method failed for login {auth.login}.") + return jsonify({"status": "jsonify failed: bfabric python module."}) try: - return jsonify({'res': res}) - except: - return jsonify({'status': 'jsonify failed'}) - -def dfs__(extract_id): - stack = list() - visited = dict() - stack.append(extract_id) - - extract_dict = dict() - - while len(stack) > 0: - o = stack.pop() - visited[u] = True - + return jsonify({"res": res.to_list_dict()}) + except Exception: + return jsonify({"status": "jsonify failed"}) + + +@app.route("/add_resource", methods=["POST"]) +def add_resource() -> Response: + """Adds a resource to a workunit.""" + params = get_fields( + required_fields=[ + "name", + "workunitdescription", + "containerid", + "applicationid", + "base64", + "resourcename", + "login", + "webservicepassword", + ], + optional_fields={}, + ) + auth = get_request_auth(params) + + # Save the workunit + with client.with_auth(auth): + res = client.save( + "workunit", + { + "name": params["name"], + "description": params["workunitdescription"], + "containerid": params["containerid"], + "applicationid": params["applicationid"], + }, + ).to_list_dict() + logger.info(res) + + workunit_id = res[0]["id"] + logger.info(f"workunit_id = {workunit_id}") + + with client.with_auth(auth): + client.save( + "resource", + { + "base64": params["base64"], + "name": params["resourcename"], + "workunitid": workunit_id, + }, + ) + client.save("workunit", {"id": workunit_id, "status": "available"}) - extract = bfapp.read_object(endpoint='extract', obj={'id': u}) - extract_dict[u] = extract[0] - - try: - for child_extract in extract[0].childextract: - if (child_extract._id not in visited): - - stack.append(child_extract._id) - - except: - pass - - return extract_dict + return jsonify(dict(workunit_id=workunit_id)) -#def wsdl_sample(containerid): +# @app.route("/add_dataset/", methods=["GET", "POST"]) +# def add_dataset(containerid): # try: -# return map(lambda x: {'id': x._id, 'name': x.name}, -# bfapp.read_object(endpoint='sample', obj={'containerid': containerid})) +# queue_content = json.loads(request.data) # except: -# pass - -def compose_ms_queue_dataset(jsoncontent, workunitid, containerid): - obj = {} - try: - obj['name'] = 'generated through http://fgcz-s-028.uzh.ch:8080/queue_generator/' - obj['workunitid'] = workunitid - obj['containerid'] = containerid - obj['attribute'] = [ - {'name': 'File Name', 'position':1, 'type':'String'}, - {'name': 'Condition', 'position':2, 'type':'String'}, - {'name': 'Path', 'position': 3}, - {'name': 'Position', 'position': 4}, - {'name': 'Inj Vol', 'position': 5, 'type': 'numeric'}, - {'name': 'ExtractID', 'position': 6, 'type': 'extract'} ] - - obj['item'] = list() - - for idx in range(0, len(jsoncontent)): - obj['item'].append({'field': map(lambda x: {'attributeposition': x + 1, 'value': jsoncontent[idx][x]}, range(0, len(jsoncontent[idx]))), 'position': idx + 1}) - - except: - pass - - return obj - - -@app.route('/add_resource', methods=['POST']) -def add_resource(): - try: - queue_content = json.loads(request.data) - print (queue_content) - print("--") - except: - print ("failed: could not get POST content") - return jsonify({'error': 'could not get POST content.'}) - - res = bfapp.save_object('workunit', {'name': queue_content['name'], - 'description': "{}".format(queue_content['workunitdescription'][0]), - 'containerid': queue_content['containerid'], - 'applicationid': queue_content['applicationid'] - }) - print (res) - - workunit_id = res[0]._id - - print (workunit_id) - - res = bfapp.save_object('resource', {'base64': queue_content['base64'], - 'name': queue_content['resourcename'], - 'workunitid': workunit_id}) - - res = bfapp.save_object('workunit', {'id': workunit_id, 'status': 'available'}) - - return jsonify(dict(workunit_id=workunit_id)) - -@app.route('/add_dataset/', methods=['GET', 'POST']) -def add_dataset(containerid): - try: - queue_content = json.loads(request.data) - except: - return jsonify({'error': 'could not get POST content.'}) - - try: - obj = {} - obj['name'] = 'autogenerated dataset by http://fgcz-s-028.uzh.ch:8080/queue_generator/' - obj['containerid'] = containerid - obj['attribute'] = [ {'name':'File Name', 'position':1, 'type':'String'}, - {'name':'Path', 'position':2}, - {'name':'Position', 'position':3}, - {'name':'Inj Vol', 'position':4, 'type':'numeric'}, - {'name':'ExtractID', 'position':5, 'type':'extract'} ] - - obj['item'] = list() - - for idx in range(0, len(queue_content)): - obj['item']\ - .append({'field': map(lambda x: {'attributeposition': x + 1, 'value': queue_content[idx][x]}, range(0, len(queue_content[idx]))), 'position': idx + 1}) - - print (obj) - - except: - return jsonify({'error': 'composing bfabric object failed.'}) - - try: - res = bfapp.save_object(endpoint='dataset', obj=obj)[0] - print ("added dataset {} to bfabric.".format(res._id)) - return (jsonify({'id':res._id})) - - except: - print(res) - return jsonify({'error': 'beaming dataset to bfabric failed.'}) - - - -# @deprecated("Use read instead") -@app.route('/user/', methods=['GET']) -def get_user(containerid): - - users = bfapp.read_object(endpoint='user', obj={'containerid': containerid}) - #not users or - if not users or len(users) == 0: - return jsonify({'error': 'no resources found.'}) - # abort(404) - - return jsonify({'user': users}) +# return jsonify({"error": "could not get POST content."}) +# +# try: +# obj = {} +# obj["name"] = "autogenerated dataset by http://fgcz-s-028.uzh.ch:8080/queue_generator/" +# obj["containerid"] = containerid +# obj["attribute"] = [ +# {"name": "File Name", "position": 1, "type": "String"}, +# {"name": "Path", "position": 2}, +# {"name": "Position", "position": 3}, +# {"name": "Inj Vol", "position": 4, "type": "numeric"}, +# {"name": "ExtractID", "position": 5, "type": "extract"}, +# ] +# +# obj["item"] = list() +# +# for idx in range(0, len(queue_content)): +# obj["item"].append( +# { +# "field": map( +# lambda x: { +# "attributeposition": x + 1, +# "value": queue_content[idx][x], +# }, +# range(0, len(queue_content[idx])), +# ), +# "position": idx + 1, +# } +# ) +# +# print(obj) +# +# except: +# return jsonify({"error": "composing bfabric object failed."}) +# +# try: +# res = bfapp.save_object(endpoint="dataset", obj=obj)[0] +# print("added dataset {} to bfabric.".format(res._id)) +# return jsonify({"id": res._id}) +# +# except: +# print(res) +# return jsonify({"error": "beaming dataset to bfabric failed."}) + + +# @app.route("/zip_resource_of_workunitid/", methods=["GET"]) +# def get_zip_resources_of_workunit(workunitid): +# res = map( +# lambda x: x.relativepath, +# bfapp.read_object(endpoint="resource", obj={"workunitid": workunitid}), +# ) +# print(res) +# res = filter(lambda x: x.endswith(".zip"), res) +# return jsonify(res) + +# @app.route("/addworkunit", methods=["GET", "POST"]) +# def add_workunit(): +# appid = request.args.get("appid", None) +# pid = request.args.get("pid", None) +# rname = request.args.get("rname", None) +# +# try: +# content = json.loads(request.data) +# # print content +# except: +# return jsonify({"error": "could not get POST content.", "appid": appid}) +# +# resource_base64 = content["base64"] +# # base64.b64encode(content) +# print(resource_base64) +# +# return jsonify({"rv": "ok"}) -# @deprecated("Use read instead") -@app.route('/sample/', methods=['GET']) -def get_all_sample(containerid): +def setup_logger_prod(name: str = DEFAULT_LOGGER_NAME, address: tuple[str, int] = ("fgcz-ms.uzh.ch", 514)) -> None: + """Sets up the production logger.""" + syslog_handler = logging.handlers.SysLogHandler(address=address) + formatter = logging.Formatter("%(name)s %(message)s") + syslog_handler.setFormatter(formatter) - samples = [] - rv = list(map(lambda p: bfapp.read_object(endpoint='sample', obj={'containerid': containerid}, page=p), range(1,10))) - rv = list(map(lambda x: [] if x is None else x, rv)) - for el in rv: samples.extend(el) + logger = logging.getLogger(name) + logger.setLevel(logging.INFO) + logger.addHandler(syslog_handler) + return logger - try: - annotationDict = {} - for annotationId in filter(lambda x: x is not None, set(map(lambda x: x.groupingvar._id if "groupingvar" in x else None, samples))): - print (annotationId) - annotation = bfapp.read_object(endpoint='annotation', obj={'id': annotationId}) - annotationDict[annotationId] = annotation[0].name - except: - pass - - for sample in samples: - try: - sample['condition'] = annotationDict[sample.groupingvar._id] - except: - sample['condition'] = None - - if len(samples) == 0: - return jsonify({'error': 'no extract found.'}) - # abort(404) - - return jsonify({'samples': samples}) -""" -example -curl http://localhost:5000/zip_resource_of_workunitid/154547 -""" -@app.route('/zip_resource_of_workunitid/', methods=['GET']) -def get_zip_resources_of_workunit(workunitid): - res = map(lambda x: x.relativepath, bfapp.read_object(endpoint='resource', obj={'workunitid': workunitid})) - print (res) - res = filter(lambda x: x.endswith(".zip"), res) - return jsonify(res) +def setup_logger_debug(name: str = DEFAULT_LOGGER_NAME) -> None: + """Sets up the debug logger.""" + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) + return logger -@app.route('/query', methods=['GET', 'POST']) -def query(): - try: - content = json.loads(request.data) - except: - return jsonify({'error': 'could not get POST content.', 'appid': appid}) - - print ("PASSWORD CLEARTEXT", content['webservicepassword']) - - bf = bfabric.Bfabric(login=content['login'], - password=content['webservicepassword'], - base_url='http://fgcz-bfabric.uzh.ch/bfabric') - - for i in content.keys(): - print ("{}\t{}".format(i, content[i])) - - if 'containerid' in content: - workunits = bf.read_object(endpoint='workunit', - obj={'applicationid': content['applicationid'], - 'containerid': content['containerid']}) - print (workunits) - return jsonify({'workunits': map(lambda x: x._id, workunits)}) - #elif 'query' in content and "{}".format(content['query']) is 'project': +def main() -> None: + """Starts the server, auto-detecting production mode if SSL keys are present.""" + ssl_key_pub = Path("/etc/ssl/fgcz-host.pem") + ssl_key_priv = Path("/etc/ssl/private/fgcz-host_key.pem") + if ssl_key_pub.exists() and ssl_key_priv.exists(): + setup_logger_prod() + app.run( + debug=False, + host="0.0.0.0", + port=5001, + ssl_context=( + str(ssl_key_pub), + str(ssl_key_priv), + ), + ) else: - user = bf.read_object(endpoint='user', obj={'login': content['login']})[0] - projects = map(lambda x: x._id, user.project) - return jsonify({'projects': projects}) - - return jsonify({'error': 'could not process query'}) - -@app.route('/addworkunit', methods=['GET', 'POST']) -def add_workunit(): - appid = request.args.get('appid', None) - pid = request.args.get('pid', None) - rname = request.args.get('rname', None) - - try: - content = json.loads(request.data) - # print content - except: - return jsonify({'error': 'could not get POST content.', 'appid': appid}) - - resource_base64 = content['base64'] - #base64.b64encode(content) - print (resource_base64) + setup_logger_debug() + app.run(debug=False, host="127.0.0.1", port=5000) - return jsonify({'rv': 'ok'}) -if __name__ == '__main__': - if exists('/etc/ssl/fgcz-host.pem') and exists('/etc/ssl/private/fgcz-host_key.pem'): - app.run(debug=False, host="0.0.0.0", port=5001, ssl_context=('/etc/ssl/fgcz-host.pem', '/etc/ssl/private/fgcz-host_key.pem')) - else: - app.run(debug=False, host="127.0.0.1", port=5000) +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 843b1844..bd2e8088 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ Homepage = "https://github.com/fgcz/bfabricPy" Repository = "https://github.com/fgcz/bfabricPy" [project.scripts] -#bfabric_flask="bfabric.scripts.bfabric_flask:main" +"bfabric_flask.py"="bfabric.scripts.bfabric_flask:main" #bfabric_feeder_resource_autoQC="bfabric.scripts.bfabric_feeder_resource_autoQC:main" #bfabric_list_not_existing_storage_directories="bfabric.scripts.bfabric_list_not_existing_storage_directories:main" "bfabric_list_not_available_proteomics_workunits.py"="bfabric.scripts.bfabric_list_not_available_proteomics_workunits:main" From 1810b6437aedd62c76c54e2e5984160df8b2d357 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Wed, 8 May 2024 11:13:38 +0200 Subject: [PATCH 079/129] added another hack for Zeep save, fixed minor read_pagination test bug, refactored save_delete test --- bfabric/src/engine_zeep.py | 16 ++-- .../test_bfabric2_read_pagination.py | 2 +- .../integration/test_bfabric2_save_delete.py | 92 +++++++++---------- 3 files changed, 57 insertions(+), 53 deletions(-) diff --git a/bfabric/src/engine_zeep.py b/bfabric/src/engine_zeep.py index 57e628ed..d895337f 100644 --- a/bfabric/src/engine_zeep.py +++ b/bfabric/src/engine_zeep.py @@ -63,18 +63,22 @@ def read(self, endpoint: str, obj: dict, auth: BfabricAuth, page: int = 1, idonl def readid(self, endpoint: str, obj: dict, auth: BfabricAuth, page: int = 1, includedeletableupdateable: bool = True): raise NotImplementedError("Attempted to use a method `readid` of Zeep, which does not exist") - def save(self, endpoint: str, obj: dict, auth: BfabricAuth, skipped_keys: list = None): - query = {'login': auth.login, 'password': auth.password, endpoint: obj} + def save(self, endpoint: str, obj: dict, auth: BfabricAuth): + query = copy.deepcopy(obj) + + # FIXME: Hacks for the cases where Zeep thinks a parameter is compulsory and it is actually not + if endpoint == 'resource': + excl_keys = ['name', 'sampleid', 'storageid', 'workunitid', 'relativepath'] + _zeep_query_append_skipped(query, excl_keys, inplace=True, overwrite=False) + - # If necessary, add skipped keys to the query - if skipped_keys is not None: - query = _zeep_query_append_skipped(query, skipped_keys) + full_query = {'login': auth.login, 'password': auth.password, endpoint: query} client = self._get_client(endpoint) try: with client.settings(strict=False): - res = client.service.save(query) + res = client.service.save(full_query) except AttributeError as e: if e.args[0] == "Service has no operation 'save'": raise BfabricRequestError(f"ZEEP failed to find save method for the {endpoint} endpoint.") from e diff --git a/bfabric/tests/integration/test_bfabric2_read_pagination.py b/bfabric/tests/integration/test_bfabric2_read_pagination.py index f894d910..979c3f47 100644 --- a/bfabric/tests/integration/test_bfabric2_read_pagination.py +++ b/bfabric/tests/integration/test_bfabric2_read_pagination.py @@ -10,7 +10,7 @@ def _calc_query(config, auth, engine: BfabricAPIEngineType, endpoint: str, print("Sending query via", engine) b = Bfabric(config, auth, engine=engine) - response_class = b.read(endpoint, {}, max_results=max_results, idonly=False, includedeletableupdateable=True) + response_class = b.read(endpoint, {}, max_results=max_results) response_dict = response_class.to_list_dict(drop_empty=True, drop_underscores_suds=True, have_sort_responses=True) return list_dict_to_df(response_dict) diff --git a/bfabric/tests/integration/test_bfabric2_save_delete.py b/bfabric/tests/integration/test_bfabric2_save_delete.py index e8236d52..2774744c 100644 --- a/bfabric/tests/integration/test_bfabric2_save_delete.py +++ b/bfabric/tests/integration/test_bfabric2_save_delete.py @@ -39,59 +39,59 @@ def _find_delete_existing_objects_by_name(b: Bfabric, endpoint: str, name_list: return objs_exist_names, delete_response_dict -def _save_delete_workunit(b: Bfabric, verbose: bool = False) -> None: - """ - Integration test. Attempts to create some work units, then delete them. - - We check whether, after creation, the workunits with the target names are found in the API, - and the control workunit is not found (because it is not created) - - We check whether the deletion of the created workunits is successful - - :param b: BFabric Instance - :param verbose: Verbosity - :return: - """ - - endpoint = 'workunit' - workunit_names = ['MewThePokemon', 'TomMGM', 'MinkyLeChat'] - fake_name = 'SpikeTheDog' - all_names = workunit_names + [fake_name] - - # 1. Find and delete any workunits with these names, if they already exist - print("Phase 1: Make sure to clean up workunits with target names, if they somehow already exist") - _find_delete_existing_objects_by_name(b, endpoint, all_names) - - # 2. Create some workunits - print("Phase 2: Creating the target units") - new_ids = [] - for name in workunit_names: - workunit1 = {'name': name, 'applicationid': 2, 'description': 'is warm and fluffy', 'containerid': 123} - response = b.save('workunit', workunit1).to_list_dict() # We do the conversion to drop underscores in SUDS - if verbose: - print(response[0]) - - assert len(response) == 1, "Expected a single response from a single saved workunit" - new_ids += [response[0]['id']] - - # 3. Find and delete any workunits with these names, now that they have been created - print("Phase 3: Finding and deleting the created work units, checking if they match expectation") - found_names, deleted_responses = _find_delete_existing_objects_by_name(b, endpoint, all_names) - - assert found_names == workunit_names, "Expected the names found in the API to be the ones we just created" - for resp, trg_id in zip(deleted_responses, new_ids): - assert len(resp) == 1, "Deletion response format unexpected" - assert 'deletionreport' in resp, "Deletion response format unexpected" - assert resp['deletionreport'] == 'Workunit ' + str( - trg_id) + ' removed successfully.', "Deletion response format unexpected" - class BfabricTestSaveDelete(unittest.TestCase): def setUp(self): self.config, self.auth = get_system_auth(config_env="TEST") + def _save_delete_workunit(self, b: Bfabric, verbose: bool = False) -> None: + """ + Integration test. Attempts to create some work units, then delete them. + - We check whether, after creation, the workunits with the target names are found in the API, + and the control workunit is not found (because it is not created) + - We check whether the deletion of the created workunits is successful + + :param b: BFabric Instance + :param verbose: Verbosity + :return: + """ + + endpoint = 'workunit' + workunit_names = ['MewThePokemon', 'TomMGM', 'MinkyLeChat'] + fake_name = 'SpikeTheDog' + all_names = workunit_names + [fake_name] + + # 1. Find and delete any workunits with these names, if they already exist + print("Phase 1: Make sure to clean up workunits with target names, if they somehow already exist") + _find_delete_existing_objects_by_name(b, endpoint, all_names) + + # 2. Create some workunits + print("Phase 2: Creating the target units") + new_ids = [] + for name in workunit_names: + workunit1 = {'name': name, 'applicationid': 2, 'description': 'is warm and fluffy', 'containerid': 123} + response = b.save('workunit', workunit1).to_list_dict() # We do the conversion to drop underscores in SUDS + if verbose: + print(response[0]) + + self.assertEqual(len(response), 1, msg="Expected a single response from a single saved workunit") + new_ids += [response[0]['id']] + + # 3. Find and delete any workunits with these names, now that they have been created + print("Phase 3: Finding and deleting the created work units, checking if they match expectation") + found_names, deleted_responses = _find_delete_existing_objects_by_name(b, endpoint, all_names) + + self.assertEqual(found_names, workunit_names, msg="Expected the names found in the API to be the ones we just created") + for resp, trg_id in zip(deleted_responses, new_ids): + self.assertEqual(len(resp), 1, msg="Deletion response format unexpected") + self.assertIn('deletionreport', resp, msg="Deletion response format unexpected") + self.assertEqual(resp['deletionreport'], 'Workunit ' + str(trg_id) + ' removed successfully.', + msg="Deletion response format unexpected") + def test_zeep(self): bZeep = Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP) - _save_delete_workunit(bZeep) + self._save_delete_workunit(bZeep, verbose=True) def test_suds(self): bSuds = Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS) - _save_delete_workunit(bSuds) + self._save_delete_workunit(bSuds, verbose=True) From 3bc2fd951c684f7ad73e15d9cc6c07cf7e982bda Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Wed, 8 May 2024 12:47:18 +0200 Subject: [PATCH 080/129] timestamp safeguard temporarily disabled until bug fixed. Adjusted container for save_delete test to be usable by different logins --- bfabric/bfabric2.py | 2 +- bfabric/tests/integration/test_bfabric2_save_delete.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index c6dc6379..e5cbe0f2 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -218,7 +218,7 @@ def read( :return: List of responses, packaged in the results container """ # Ensure stability - obj = self._add_query_timestamp(obj) + # obj = self._add_query_timestamp(obj) # Get the first page. # NOTE: According to old interface, this is equivalent to plain=True diff --git a/bfabric/tests/integration/test_bfabric2_save_delete.py b/bfabric/tests/integration/test_bfabric2_save_delete.py index 2774744c..c31be6f3 100644 --- a/bfabric/tests/integration/test_bfabric2_save_delete.py +++ b/bfabric/tests/integration/test_bfabric2_save_delete.py @@ -69,7 +69,7 @@ def _save_delete_workunit(self, b: Bfabric, verbose: bool = False) -> None: print("Phase 2: Creating the target units") new_ids = [] for name in workunit_names: - workunit1 = {'name': name, 'applicationid': 2, 'description': 'is warm and fluffy', 'containerid': 123} + workunit1 = {'name': name, 'applicationid': 2, 'description': 'is warm and fluffy', 'containerid': 3000} response = b.save('workunit', workunit1).to_list_dict() # We do the conversion to drop underscores in SUDS if verbose: print(response[0]) From 981a17b93abec16d43b2ed312719464cde308196 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Wed, 8 May 2024 13:43:06 +0200 Subject: [PATCH 081/129] added explicit idonly to read() and read_multi() --- bfabric/bfabric2.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index e5cbe0f2..4b26b503 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -201,6 +201,7 @@ def read( offset: int = 0, readid: bool = False, check: bool = True, + idonly: bool = False ) -> ResultContainer: """Reads objects from the specified endpoint that match all specified attributes in `obj`. By setting `max_results` it is possible to change the number of results that are returned. @@ -215,6 +216,7 @@ def read( :param readid: whether to use reading by ID. Currently only available for engine=SUDS TODO: Test the extent to which this method works. Add safeguards :param check: whether to check for errors in the response + :param idonly: whether to return only the ids of the objects :return: List of responses, packaged in the results container """ # Ensure stability @@ -222,7 +224,7 @@ def read( # Get the first page. # NOTE: According to old interface, this is equivalent to plain=True - response, errors = self._read_page(readid, endpoint, obj, page=1) + response, errors = self._read_page(readid, endpoint, obj, page=1, idonly=idonly) try: n_available_pages = response["numberofpages"] @@ -253,7 +255,7 @@ def read( for i_iter, i_page in enumerate(requested_pages): if not (i_iter == 0 and i_page == 1): print("-- reading page", i_page, "of", n_available_pages) - response, errors_page = self._read_page(readid, endpoint, obj, page=i_page) + response, errors_page = self._read_page(readid, endpoint, obj, page=i_page, idonly=idonly) errors += errors_page response_items += response[endpoint][page_offset:] @@ -297,13 +299,13 @@ def delete(self, endpoint: str, id: int | list[int], check: bool = True) -> Resu result.assert_success() return result - def _read_page(self, readid: bool, endpoint: str, query: dict[str, Any], page: int = 1): + def _read_page(self, readid: bool, endpoint: str, query: dict[str, Any], idonly: bool = False, page: int = 1): """Reads the specified page of objects from the specified endpoint that match the query.""" if readid: # https://fgcz-bfabric.uzh.ch/wiki/tiki-index.php?page=endpoint.workunit#Web_Method_readid_ response = self.engine.readid(endpoint, query, auth=self.auth, page=page) else: - response = self.engine.read(endpoint, query, auth=self.auth, page=page) + response = self.engine.read(endpoint, query, auth=self.auth, page=page, idonly=idonly) return response, get_response_errors(response, endpoint) @@ -313,7 +315,8 @@ def _read_page(self, readid: bool, endpoint: str, query: dict[str, Any], page: i # TODO: Is this scope sufficient? Is there ever more than one multi-query parameter, and/or not at the root of dict? def read_multi( - self, endpoint: str, obj: dict, multi_query_key: str, multi_query_vals: list, readid: bool = False + self, endpoint: str, obj: dict, multi_query_key: str, multi_query_vals: list, readid: bool = False, + idonly: bool = False ) -> ResultContainer: """ Makes a 1-parameter multi-query (there is 1 parameter that takes a list of values) @@ -324,6 +327,7 @@ def read_multi( :param multi_query_vals: list of values for which the multi-query is performed :param readid: whether to use reading by ID. Currently only available for engine=SUDS TODO: Test the extent to which this method works. Add safeguards + :param idonly: whether to return only the ids of the objects :return: List of responses, packaged in the results container NOTE: It is assumed that there is only 1 response for each value. @@ -343,7 +347,7 @@ def read_multi( # automatically? If yes, perhaps we don't need this method at all? # TODO: It is assumed that a user requesting multi_query always wants all of the pages. Can anybody think of # exceptions to this? - response_this = self.read(endpoint, obj_extended, max_results=None, readid=readid) + response_this = self.read(endpoint, obj_extended, max_results=None, readid=readid, idonly=idonly) response_tot.extend(response_this) return response_tot From d6f00d87126a75d86897c2112ed28f3b53835f63 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 8 May 2024 16:02:52 +0200 Subject: [PATCH 082/129] Update csv2dataset and dataset2csv (#87) - Ports the CSV import/export scripts to the new API - Correctly handle characters like commas. --- bfabric/scripts/bfabric_save_csv2dataset.py | 140 ++++++++++-------- bfabric/scripts/bfabric_save_dataset2csv.py | 84 ++++++----- .../integration/integration_test_helper.py | 18 +++ bfabric/tests/integration/scripts/__init__.py | 0 .../scripts/test_save_csv2dataset.py | 119 +++++++++++++++ .../scripts/test_save_dataset2csv.py | 47 ++++++ pyproject.toml | 7 +- 7 files changed, 312 insertions(+), 103 deletions(-) create mode 100644 bfabric/tests/integration/integration_test_helper.py create mode 100644 bfabric/tests/integration/scripts/__init__.py create mode 100644 bfabric/tests/integration/scripts/test_save_csv2dataset.py create mode 100644 bfabric/tests/integration/scripts/test_save_dataset2csv.py diff --git a/bfabric/scripts/bfabric_save_csv2dataset.py b/bfabric/scripts/bfabric_save_csv2dataset.py index cc67048f..d730814a 100755 --- a/bfabric/scripts/bfabric_save_csv2dataset.py +++ b/bfabric/scripts/bfabric_save_csv2dataset.py @@ -1,5 +1,4 @@ #!/usr/bin/python3 - """ Author: Maria d'Errico @@ -27,71 +26,86 @@ Usage: bfabric_save_csv2dataset.py [-h] --csvfile CSVFILE --name NAME --containerid int [--workunitid int] """ +from __future__ import annotations -import sys -from bfabric import Bfabric -import csv - -def csv2json(csvFilePath): - obj = {} - obj["item"] = [] - obj["attribute"] = [] - types = {int: "Integer", str: "String", float: "Float"} - # Open the csv file in read mode and create a file object - with open(csvFilePath, encoding='utf-8') as csv_file: - # Creating the DictReader iterator - csv_reader = csv.DictReader(csv_file) - nrow = 0 - # Read individual rows of the csv file as a dictionary - for row in csv_reader: - nrow = nrow + 1 - fields = [] - for attr in range(0, len(list(row.keys()))): - if nrow == 1: - # Fill in attributes info - attr_type = type(list(row.values())[attr]) - entry = {"name": list(row.keys())[attr], "position": attr+1, - "type": types[attr_type]} - obj["attribute"].append(entry) - else: - pass - # Fill in values info - field = {"attributeposition": attr+1, - "value": list(row.values())[attr]} - fields.append(field) - item = {"field": fields, "position": nrow} - obj["item"].append(item) - return(obj) - -def main(csv_file, dataset_name, container_id, workunit_id = None): - bfapp = Bfabric() - obj = csv2json(csv_file) - obj['name'] = dataset_name - obj['containerid'] = container_id - if workunit_id is not None: - obj['workunitid'] = workunit_id +import argparse +from pathlib import Path + +import polars as pl + +from bfabric.bfabric2 import Bfabric + + +def polars_to_bfabric_type(dtype: pl.DataType) -> str | None: + if str(dtype).startswith("Int"): + return "Integer" + elif str(dtype).startswith("String"): + return "String" + elif str(dtype).startswith("Float"): + return "Float" else: - pass - endpoint = 'dataset' - res = bfapp.save_object(endpoint=endpoint, obj=obj) - print(res[0]) + return "String" -if __name__ == "__main__": - import argparse - parser = argparse.ArgumentParser(description='Create a B-Fabric dataset') - parser.add_argument('--csvfile', required=True, - help='the path to the csv file to be uploaded as dataset') - parser.add_argument('--name', required=True, - help='dataset name as a string') - parser.add_argument('--containerid', metavar='int', required=True, - help='container id') - parser.add_argument('--workunitid', metavar='int', required=False, - help='workunit id') +def polars_to_bfabric_dataset(data: pl.DataFrame) -> dict[str, list[dict[str, int | str | float]]]: + attributes = [ + {"name": col, "position": i + 1, "type": polars_to_bfabric_type(data[col].dtype)} + for i, col in enumerate(data.columns) + ] + items = [ + { + "field": [{"attributeposition": i_field + 1, "value": value} for i_field, value in enumerate(row)], + "position": i_row + 1, + } + for i_row, row in enumerate(data.iter_rows()) + ] + return {"attribute": attributes, "item": items} + + +def bfabric_save_csv2dataset( + client: Bfabric, + csv_file: Path, + dataset_name: str, + container_id: int, + workunit_id: int | None, + sep: str, + has_header: bool, +) -> None: + """Creates a dataset in B-Fabric from a csv file.""" + data = pl.read_csv(csv_file, separator=sep, has_header=has_header) + obj = polars_to_bfabric_dataset(data) + obj["name"] = dataset_name + obj["containerid"] = container_id + if workunit_id is not None: + obj["workunitid"] = workunit_id + endpoint = "dataset" + res = client.save(endpoint=endpoint, obj=obj) + print(res.to_list_dict()[0]) + + +def main() -> None: + """Parses command line arguments and calls `bfabric_save_csv2dataset`.""" + client = Bfabric.from_config(verbose=True) + parser = argparse.ArgumentParser(description="Create a B-Fabric dataset") + parser.add_argument( + "--csvfile", required=True, help="the path to the csv file to be uploaded as dataset", type=Path + ) + parser.add_argument("--name", required=True, help="dataset name as a string") + parser.add_argument("--containerid", type=int, required=True, help="container id") + parser.add_argument("--workunitid", type=int, required=False, help="workunit id") + parser.add_argument("--sep", type=str, default=",", help="the separator to use in the csv file e.g. ',' or '\\t'") + parser.add_argument("--no-header", action="store_false", dest="has_header", help="the csv file has no header") args = parser.parse_args() - if args.workunitid is None: - main(csv_file = args.csvfile, dataset_name = args.name, container_id = args.containerid) - else: - main(csv_file = args.csvfile, dataset_name = args.name, container_id = args.containerid, - workunit_id = args.workunitid) + bfabric_save_csv2dataset( + client=client, + csv_file=args.csvfile, + dataset_name=args.name, + container_id=args.containerid, + workunit_id=args.workunitid, + sep=args.sep, + has_header=args.has_header, + ) + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_save_dataset2csv.py b/bfabric/scripts/bfabric_save_dataset2csv.py index a583d9a1..ab2df541 100755 --- a/bfabric/scripts/bfabric_save_dataset2csv.py +++ b/bfabric/scripts/bfabric_save_dataset2csv.py @@ -1,5 +1,4 @@ #!/usr/bin/python3 - """ Author: Maria d'Errico @@ -14,46 +13,57 @@ Usage: bfabric_save_dataset2csv.py [-h] --id DATASET_ID [--dir SCRATCHDIR] Example: bfabric_save_dataset2csv.py --id 32335 && cat dataset.csv """ +from __future__ import annotations + +import argparse +from pathlib import Path + +import polars as pl + +from bfabric.bfabric2 import Bfabric -import sys -from bfabric import Bfabric - -def dataset2csv(ds, outputfile, sep=","): - # ds.attribute contains the list of columns name - with open(outputfile, "w") as f: - f.write("{}\n".format(sep.join(map(lambda x: x.name, ds.attribute)))) - for i in ds.item: - # sort values based on the columns order in attributeposition - for x in i.field: - if not hasattr(x,"value") or x.value == None: - x.value = '' - fields = [(x.value, x.attributeposition) for x in i.field] - fields.sort(key=lambda y: int(y[1])) - f.write("{}\n".format(sep.join([t[0] for t in fields]))) - - -def main(dataset_id, scratchdir): - bfapp = Bfabric() - try: - query_obj = {'id': dataset_id} - ds = bfapp.read_object(endpoint='dataset', obj=query_obj)[0] - except: - print("No input dataset found") - raise +def dataset2csv(dataset: dict, output_path: Path, sep: str) -> None: + """Writes the `dataset` content to csv file at `output_path`.""" + column_names = [x["name"] for x in dataset["attribute"]] + data = [] + for item in dataset["item"]: + row_values = [x.get("value") for x in item["field"]] + data.append(dict(zip(column_names, row_values))) + df = pl.DataFrame(data) + df.write_csv(output_path, separator=sep) + + +def bfabric_save_dataset2csv(client: Bfabric, dataset_id: int, out_dir: Path, sep: str) -> None: + """Saves the dataset with id `dataset_id` to a csv file at `out_dir`.""" + results = client.read(endpoint="dataset", obj={"id": dataset_id}).to_list_dict() + if not results: + raise RuntimeError(f"No dataset found with id '{dataset_id}'") + dataset = results[0] + output_path = out_dir / "dataset.csv" try: - dataset2csv(ds, "{}/dataset.csv".format(scratchdir)) - except: - print("The writing process to '{}'/dataset.csv failed.".format(scratchdir)) + dataset2csv(dataset, output_path=output_path, sep=sep) + except Exception: + print(f"The writing process to '{output_path}' failed.") raise -if __name__ == "__main__": - import argparse - parser = argparse.ArgumentParser(description='Save a B-Fabric dataset to a csv file') - parser.add_argument('--id', metavar='int', required=True, - help='dataset id') - parser.add_argument('--dir', required=False, default='./', - help='the path to the directory where to save the csv file') +def main() -> None: + """Parses arguments and calls `bfabric_save_dataset2csv`.""" + client = Bfabric.from_config(verbose=True) + parser = argparse.ArgumentParser(description="Save a B-Fabric dataset to a csv file") + parser.add_argument("--id", metavar="int", required=True, help="dataset id", type=int) + parser.add_argument( + "--dir", + required=False, + type=Path, + default=".", + help="the path to the directory where to save the csv file", + ) + parser.add_argument("--sep", default=",", help="the separator to use in the csv file e.g. ',' or '\\t'") args = parser.parse_args() - main(scratchdir = args.dir, dataset_id = args.id) + bfabric_save_dataset2csv(client=client, out_dir=args.dir, dataset_id=args.id, sep=args.sep) + + +if __name__ == "__main__": + main() diff --git a/bfabric/tests/integration/integration_test_helper.py b/bfabric/tests/integration/integration_test_helper.py new file mode 100644 index 00000000..66a4a1df --- /dev/null +++ b/bfabric/tests/integration/integration_test_helper.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from bfabric.bfabric2 import Bfabric + + +class DeleteEntities: + def __init__(self, client: Bfabric, created_entities: list[tuple[str, int]]): + self.client = client + self.created_entities = created_entities + + def __call__(self): + errors = [] + for entity_type, entity_id in self.created_entities: + errors += self.client.delete(entity_type, entity_id, check=False).errors + if errors: + print("Error deleting entities:", errors) + else: + print("Successfully deleted entities:", self.created_entities) diff --git a/bfabric/tests/integration/scripts/__init__.py b/bfabric/tests/integration/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/tests/integration/scripts/test_save_csv2dataset.py b/bfabric/tests/integration/scripts/test_save_csv2dataset.py new file mode 100644 index 00000000..26bff4fa --- /dev/null +++ b/bfabric/tests/integration/scripts/test_save_csv2dataset.py @@ -0,0 +1,119 @@ +from __future__ import annotations +import time +import unittest +import datetime +from pathlib import Path +from tempfile import TemporaryDirectory + +import polars as pl +from bfabric.bfabric2 import Bfabric +from bfabric.scripts.bfabric_save_csv2dataset import bfabric_save_csv2dataset +from bfabric.tests.integration.integration_test_helper import DeleteEntities + + +class TestSaveCsv2Dataset(unittest.TestCase): + def setUp(self): + self.mock_client = Bfabric.from_config(config_env="TEST", verbose=True) + self.created_entities = [] + self.addCleanup(DeleteEntities(self.mock_client, self.created_entities)) + + self.sample_data = pl.DataFrame( + [ + { + "Normal": "just a normal string", + "Comma": "contains,some,commas,,,", + "Backslash": "testing\\backslash/support", + "Apostrophe": 'Lot\'s"of"apostrophes', + } + ] + ) + + def test_save_csv2dataset(self): + timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + with TemporaryDirectory() as work_dir: + work_dir = Path(work_dir) + sample_file = work_dir / "sample_table.csv" + self.sample_data.write_csv(sample_file) + + bfabric_save_csv2dataset( + self.mock_client, + csv_file=sample_file, + dataset_name=f"test_dataset {timestamp}", + container_id=3000, + workunit_id=None, + sep=",", + has_header=True, + ) + + # check the result + time.sleep(1) + response = self.mock_client.read("dataset", {"name": f"test_dataset {timestamp}"}).to_list_dict()[0] + self.created_entities.append(("dataset", response["id"])) + + expected_attribute = [ + {"name": "Normal", "position": "1", "type": "String"}, + {"name": "Comma", "position": "2", "type": "String"}, + {"name": "Backslash", "position": "3", "type": "String"}, + {"name": "Apostrophe", "position": "4", "type": "String"}, + ] + self.assertListEqual(expected_attribute, response["attribute"]) + + expected_item = [ + { + "field": [ + {"attributeposition": "1", "value": "just a normal string"}, + {"attributeposition": "2", "value": "contains,some,commas,,,"}, + {"attributeposition": "3", "value": "testing\\backslash/support"}, + {"attributeposition": "4", "value": 'Lot\'s"of"apostrophes'}, + ], + "position": "1", + } + ] + self.assertListEqual(expected_item, response["item"]) + + def test_save_csv2dataset_no_header(self): + timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + with TemporaryDirectory() as work_dir: + work_dir = Path(work_dir) + sample_file = work_dir / "sample_table.csv" + self.sample_data.write_csv(sample_file, include_header=False) + + bfabric_save_csv2dataset( + self.mock_client, + csv_file=sample_file, + dataset_name=f"test_dataset {timestamp}", + container_id=3000, + workunit_id=None, + sep=",", + has_header=False, + ) + + # check the result + time.sleep(1) + response = self.mock_client.read("dataset", {"name": f"test_dataset {timestamp}"}).to_list_dict()[0] + self.created_entities.append(("dataset", response["id"])) + + expected_attribute = [ + {"name": "Column_1", "position": "1", "type": "String"}, + {"name": "Column_2", "position": "2", "type": "String"}, + {"name": "Column_3", "position": "3", "type": "String"}, + {"name": "Column_4", "position": "4", "type": "String"}, + ] + self.assertListEqual(expected_attribute, response["attribute"]) + + expected_item = [ + { + "field": [ + {"attributeposition": "1", "value": "just a normal string"}, + {"attributeposition": "2", "value": "contains,some,commas,,,"}, + {"attributeposition": "3", "value": "testing\\backslash/support"}, + {"attributeposition": "4", "value": 'Lot\'s"of"apostrophes'}, + ], + "position": "1", + } + ] + self.assertListEqual(expected_item, response["item"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/bfabric/tests/integration/scripts/test_save_dataset2csv.py b/bfabric/tests/integration/scripts/test_save_dataset2csv.py new file mode 100644 index 00000000..4821bcf0 --- /dev/null +++ b/bfabric/tests/integration/scripts/test_save_dataset2csv.py @@ -0,0 +1,47 @@ +import unittest +from pathlib import Path +from tempfile import TemporaryDirectory +import polars.testing +import polars as pl + +from bfabric.bfabric2 import Bfabric +from bfabric.scripts.bfabric_save_dataset2csv import bfabric_save_dataset2csv + + +class TestSaveDataset2Csv(unittest.TestCase): + def setUp(self): + self.mock_client = Bfabric.from_config(config_env="TEST", verbose=True) + self.dataset_id = 46184 + + def test_save_dataset2csv(self): + with TemporaryDirectory() as temp_dir: + out_dir = Path(temp_dir) + bfabric_save_dataset2csv(self.mock_client, self.dataset_id, out_dir, sep=",") + + expected_lines = [ + r"Normal,Comma,Backslash,Apostrophe", + r"""just a normal string,"contains,some,commas,,,",testing\backslash/support,"Lot's""of""apostrophes""" + '"', + ] + + out_file = out_dir / "dataset.csv" + actual_lines = out_file.read_text().splitlines() + + self.assertListEqual(expected_lines, actual_lines) + + df = pl.read_csv(out_file) + expected_df = pl.DataFrame( + [ + { + "Normal": "just a normal string", + "Comma": "contains,some,commas,,,", + "Backslash": "testing\\backslash/support", + "Apostrophe": 'Lot\'s"of"apostrophes', + } + ] + ) + pl.testing.assert_frame_equal(expected_df, df) + + +if __name__ == "__main__": + unittest.main() diff --git a/pyproject.toml b/pyproject.toml index bd2e8088..efccefe6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,8 @@ dependencies = [ "Flask == 2.2.5", "rich >= 13.7.1", "zeep >= 4.2.1", - "pandas >= 2.2.2" + "pandas >= 2.2.2", + "polars >= 0.20.25", ] [project.optional-dependencies] @@ -56,8 +57,8 @@ Repository = "https://github.com/fgcz/bfabricPy" #bfabric_read"="bfabric.scripts.bfabric_read:main" #bfabric_read_samples_of_workunit="bfabric.scripts.bfabric_read_samples_of_workunit:main" #bfabric_read_samples_from_dataset="bfabric.scripts.bfabric_read_samples_from_dataset:main" -#bfabric_save_csv2dataset="bfabric.scripts.bfabric_save_csv2dataset:main" -#bfabric_save_dataset2csv="bfabric.scripts.bfabric_save_dataset2csv:main" +"bfabric_save_csv2dataset.py"="bfabric.scripts.bfabric_save_csv2dataset:main" +"bfabric_save_dataset2csv.py"="bfabric.scripts.bfabric_save_dataset2csv:main" #bfabric_save_fasta="bfabric.scripts.bfabric_save_fasta:main" #bfabric_save_importresource_sample="bfabric.scripts.bfabric_save_importresource_sample:main" #bfabric_save_link_to_workunit="bfabric.scripts.bfabric_save_link_to_workunit:main" From 1f68300b907d0fa865698c9e4a4f5ee842d405e3 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 8 May 2024 16:33:18 +0200 Subject: [PATCH 083/129] Refactor read scripts (#86) Refactors some of the protinf read scripts. --- bfabric/scripts/bfabric_list.py | 1 - ...list_not_available_proteomics_workunits.py | 4 +- bfabric/scripts/bfabric_read.py | 194 ++++++++++-------- .../bfabric_read_samples_from_dataset.py | 62 +++--- .../bfabric_read_samples_of_workunit.py | 75 +++---- .../tests/integration/scripts/test_read.py | 41 ++++ pyproject.toml | 6 +- 7 files changed, 209 insertions(+), 174 deletions(-) delete mode 120000 bfabric/scripts/bfabric_list.py create mode 100644 bfabric/tests/integration/scripts/test_read.py diff --git a/bfabric/scripts/bfabric_list.py b/bfabric/scripts/bfabric_list.py deleted file mode 120000 index 5084efd0..00000000 --- a/bfabric/scripts/bfabric_list.py +++ /dev/null @@ -1 +0,0 @@ -bfabric_read.py \ No newline at end of file diff --git a/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py b/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py index 51c7d137..1a4f6ce2 100755 --- a/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py +++ b/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py @@ -19,7 +19,7 @@ from rich.table import Column, Table from bfabric import BfabricConfig -from bfabric.bfabric2 import Bfabric, get_system_auth +from bfabric.bfabric2 import Bfabric def render_output(workunits_by_status: dict[str, list[dict[str, Any]]], config: BfabricConfig) -> None: @@ -59,7 +59,7 @@ def render_output(workunits_by_status: dict[str, list[dict[str, Any]]], config: def list_not_available_proteomics_workunits(date_cutoff: datetime) -> None: """Lists proteomics work units that are not available on bfabric.""" - client = Bfabric(*get_system_auth(), verbose=True) + client = Bfabric.from_config(verbose=True) Console(stderr=True).print( f"--- list not available proteomics work units created after {date_cutoff}---", style="bright_yellow", diff --git a/bfabric/scripts/bfabric_read.py b/bfabric/scripts/bfabric_read.py index 15bcbd30..19733e19 100755 --- a/bfabric/scripts/bfabric_read.py +++ b/bfabric/scripts/bfabric_read.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """B-Fabric command line reader Copyright: @@ -15,95 +13,115 @@ See also: http://fgcz-bfabric.uzh.ch/bfabric/executable?wsdl """ - -import signal -import sys +from __future__ import annotations +import argparse +import json import time -import bfabric - - -def signal_handler(signal, frame): - print('You pressed Ctrl+C!') - sys.exit(0) - -signal.signal(signal.SIGINT, signal_handler) - -def print_color_msg(msg, color = "93"): - sys.stderr.write(f"\033[{color}m--- {msg} ---\033[0m\n") - -def usage(): - print(__doc__) - print("usage:\n") - msg = f"\t{sys.argv[0]} " - print(msg) - msg = "\t{} \n\n".format(sys.argv[0]) - print(msg) - print("valid endpoints are: [{}]\n\n".format(",\n\t ".join(bfabric.endpoints))) - print("example:") - msg = "\t{} user login cpanse\n\n".format(sys.argv[0]) - print(msg) - -if __name__ == "__main__": - B = bfabric.Bfabric(verbose=False) - - sys.stderr.write(bfabric.msg) - - query_obj = {} - - try: - endpoint = sys.argv[1] - except: - usage() - sys.exit(1) - - if len(sys.argv) == 4: - attribute = sys.argv[2] - name = sys.argv[3] - query_obj[attribute] = name - - if endpoint in bfabric.endpoints: - print_color_msg(f"query = {query_obj}") - start_time = time.time() - res = B.read_object(endpoint = endpoint, obj = query_obj) - end_time = time.time() - - if res is None: - print_color_msg("Empty result set or invalid query.", color=95) - sys.exit(0) - - try: - res = sorted(res, key=lambda x: x._id) - except: - print_color_msg("sorting failed.") - - try: - # print json object - if len(res) < 2: - print(res[0]) - except Exception as e: - print_color_msg(f"invalid query. {e}.", color=95) - sys.exit(1) - - try: - print_color_msg("possible attributes are: {}.".format((", ".join([at[0] for at in res[0]])))) - except Exception as e: - print_color_msg(f"Exception: {e}") - - for x in res: - try: - print(f"{x._id}\t{x.createdby}\t{x.modified}\t{x.name}\t{x.groupingvar.name}") - except Exception as e: - print(f"{x._id}\t{x.createdby}\t{x.modified}") +import yaml +from typing import Any +from rich.console import Console +from rich.table import Table +import bfabric +from bfabric import BfabricConfig +from bfabric.bfabric2 import Bfabric + + +def bfabric_read( + client: Bfabric, endpoint: str, attribute: str | None, value: str | None, output_format: str +) -> None: + """Reads one or several items from a B-Fabric endpoint and prints them.""" + if attribute is not None and value is None: + message = "value must be provided if attribute is provided" + raise ValueError(message) + + query_obj = {attribute: value} if value is not None else {} + console_info = Console(style="bright_yellow", stderr=True) + console_info.print(f"--- query = {query_obj} ---") + console_out = Console() + + start_time = time.time() + results = client.read(endpoint=endpoint, obj=query_obj) + end_time = time.time() + res = sorted(results.to_list_dict(drop_empty=False), key=lambda x: x["id"]) + if res: + possible_attributes = sorted(set(res[0].keys())) + console_info.print(f"--- possible attributes = {possible_attributes} ---") + + output_format = _determine_output_format(console_out=console_out, output_format=output_format, n_results=len(res)) + console_info.print(f"--- output format = {output_format} ---") + + if output_format == "json": + print(json.dumps(res, indent=2)) + elif output_format == "yaml": + print(yaml.dump(res)) + elif output_format == "table_tsv": + _print_table_tsv(res) + elif output_format == "table_rich": + _print_table_rich(client.config, console_out, endpoint, res) else: - print_color_msg("The first argument must be a valid endpoint.", color=95) - usage() - sys.exit(1) + raise ValueError(f"output format {output_format} not supported") + + console_info.print(f"--- number of query result items = {len(res)} ---") + console_info.print(f"--- query time = {end_time - start_time:.2f} seconds ---") + + +def _print_table_rich(config: BfabricConfig, console_out: Console, endpoint: str, res: list[dict[str, Any]]) -> None: + """Prints the results as a rich table to the console.""" + table = Table("Id", "Created By", "Modified", "Name", "Grouping Var") + for x in res: + entry_url = f"{config.base_url}/{endpoint}/show.html?id={x['id']}" + table.add_row( + f"[link={entry_url}]{x['id']}[/link]", + str(x["createdby"]), + str(x["modified"]), + str(x["name"]), + str(x.get("groupingvar", {}).get("name", "")), + ) + console_out.print(table) + + +def _print_table_tsv(res: list[dict[str, Any]]) -> None: + """Prints the results as a tab-separated table, using the original cols this script returned.""" + for x in res: + try: + print(f'{x["id"]}\t{x["createdby"]}\t{x["modified"]}\t{x["name"]}\t{x["groupingvar"]["name"]}') + except (KeyError, TypeError): + print(f'{x["id"]}\t{x["createdby"]}\t{x["modified"]}') + + +def _determine_output_format(console_out: Console, output_format: str, n_results: int) -> str: + """Returns the format to use, based on the number of results, and whether the output is an interactive console. + If the format is already set to a concrete value instead of "auto", it will be returned unchanged. + """ + if output_format == "auto": + if n_results < 2: + output_format = "json" + elif console_out.is_interactive: + output_format = "table_rich" + else: + output_format = "table_tsv" + return output_format + + +def main() -> None: + """Parses command line arguments and calls `bfabric_read`.""" + client = Bfabric.from_config(verbose=True) + parser = argparse.ArgumentParser() + parser.add_argument( + "--format", + help="output format", + choices=["json", "yaml", "table_tsv", "table_rich", "auto"], + default="auto", + dest="output_format", + ) + parser.add_argument("endpoint", help="endpoint to query", choices=bfabric.endpoints) + parser.add_argument("attribute", help="attribute to query for", nargs="?") + parser.add_argument("value", help="value to query for", nargs="?") + args = parser.parse_args() + bfabric_read(client=client, **vars(args)) - try: - print_color_msg(f"number of query result items = {len(res)}") - except: - pass - print_color_msg(f"query time = {round(end_time - start_time, 2)} seconds") +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_read_samples_from_dataset.py b/bfabric/scripts/bfabric_read_samples_from_dataset.py index 77b4f92b..b3e2bf51 100755 --- a/bfabric/scripts/bfabric_read_samples_from_dataset.py +++ b/bfabric/scripts/bfabric_read_samples_from_dataset.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ Author: Maria d'Errico @@ -16,43 +14,45 @@ Usage: bfabric_read_samples_from_dataset.py datasetid """ +import argparse +from bfabric.bfabric2 import Bfabric -import sys -import os -import csv -import bfabric +def get_table_row(client: Bfabric, relative_path: str) -> tuple[str, int, str, str, str]: + """Returns the row of the table with the information of the resource with the given relative path.""" + resource = client.read(endpoint="resource", obj={"relativepath": relative_path}).to_list_dict()[0] + sample = client.read(endpoint="sample", obj={"id": resource["sample"]["id"]}).to_list_dict()[0] + groupingvar = (sample.get("groupingvar") or {}).get("name") or "" + return resource["workunit"]["id"], resource["id"], resource["name"], sample["name"], groupingvar -B = bfabric.Bfabric() +def bfabric_read_samples_from_dataset(dataset_id: int) -> None: + """Prints the workunit id, inputresource id, inputresource name, sample name and groupingvar name for each resource + in the dataset with the given id.""" + client = Bfabric.from_config(verbose=True) + dataset = client.read(endpoint="dataset", obj={"id": dataset_id}).to_list_dict()[0] -def read_dataset(dataset_id): - ds = B.read_object(endpoint="dataset", obj={'id': dataset_id})[0] - return ds + positions = [a["position"] for a in dataset["attribute"] if a["name"] == "Relative Path"] + if not positions: + raise ValueError(f"No 'Relative Path' attribute found in the dataset {dataset_id}") + relative_path_position = positions[0] -def get_table(relativepath): - res = B.read_object(endpoint='resource', obj={'relativepath': relativepath})[0] - sample = B.read_object(endpoint='sample', obj={'id': res.sample._id})[0] - try: - groupingvar = sample.groupingvar.name - except: - groupingvar = "" - pass - return res.workunit._id, res._id, res.name, sample.name, groupingvar + print("\t".join(["workunit.id", "inputresource.id", "inputresource.name", "sample.name", "groupingvar.name"])) + for item in dataset["item"]: + relative_path = [ + field["value"] for field in item["field"] if field["attributeposition"] == relative_path_position + ][0] + workunitid, resourceid, resourcename, samplename, groupingvar = get_table_row(client, relative_path) + print(f"{workunitid}\t{resourceid}\t{resourcename}\t{samplename}\t{groupingvar}") -def run(dataset_id): - ds = read_dataset(dataset_id) - attributeposition = [x.position for x in ds.attribute if x.name == "Relative Path"][0] - print ("{}\t{}\t{}\t{}\t{}".format('workunit.id', 'inputresource.id', 'inputresource.name', 'sample.name', 'groupingvar.name')) - for i in ds.item: - for x in i.field: - if hasattr(x, "value") and x.attributeposition == attributeposition: - workunitid, resourceid, resourcename, samplename, groupingvar = get_table(x.value) - print ("{}\t{}\t{}\t{}\t{}".format(workunitid, resourceid, resourcename, samplename, groupingvar)) +def main() -> None: + """Parses the command line arguments and calls the function bfabric_read_samples_from_dataset.""" + parser = argparse.ArgumentParser() + parser.add_argument("dataset_id", type=int) + args = parser.parse_args() + bfabric_read_samples_from_dataset(dataset_id=args.dataset_id) if __name__ == "__main__": - dataset_id = int(sys.argv[1]) - run(dataset_id) - + main() diff --git a/bfabric/scripts/bfabric_read_samples_of_workunit.py b/bfabric/scripts/bfabric_read_samples_of_workunit.py index 1841949f..4fb00815 100755 --- a/bfabric/scripts/bfabric_read_samples_of_workunit.py +++ b/bfabric/scripts/bfabric_read_samples_of_workunit.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ Copyright (C) 2022 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. @@ -15,64 +13,43 @@ Usage example: bfabric_read_samples_of_workunit.py 278175 """ - -import signal -import sys +import argparse import time -import bfabric - - -def signal_handler(signal, frame): - print('You pressed Ctrl+C!') - sys.exit(0) -signal.signal(signal.SIGINT, signal_handler) +from rich.console import Console -def print_color_msg(msg, color="93"): - msg = "\033[{color}m--- {} ---\033[0m\n".format(msg, color=color) - sys.stderr.write(msg) - -def usage(): - print("usage:\n") - msg = "\t{} ".format(sys.argv[0]) - print(msg) - - -if __name__ == "__main__": +from bfabric.bfabric2 import Bfabric - try: - if len(sys.argv) == 2: - workunitid = sys.argv[1] - except: - raise - - B = bfabric.Bfabric(verbose=False) - sys.stderr.write(bfabric.msg) +def bfabric_read_samples_of_workunit(workunit_id: int) -> None: + """Reads the samples of the specified workunit and prints the results to stdout.""" + client = Bfabric.from_config(verbose=True) start_time = time.time() + res_workunit = client.read(endpoint="workunit", obj={"id": workunit_id}).to_list_dict()[0] + input_resource_ids = [x["id"] for x in res_workunit.get("inputresource", [])] + input_resources = client.read(endpoint="resource", obj={"id": input_resource_ids}).to_list_dict() + input_resources_name = [(r["id"], r["name"]) for r in input_resources] - res = B.read_object(endpoint="workunit", obj={'id': workunitid}) - - inputresources = list(map(lambda x: B.read_object(endpoint="resource", obj={'id': x._id})[0], res[0].inputresource)) - - inputresourcesname = list(map(lambda x: (x._id, x.name), inputresources)) - - samples = list(map(lambda x: B.read_object(endpoint="sample", obj={'id': x.sample._id})[0], inputresources)) + samples = client.read(endpoint="sample", obj={"id": [x["sample"]["id"] for x in input_resources]}).to_list_dict() + groupingvars = [(s["id"], s["name"], (s.get("groupingvar") or {}).get("name", "NA")) for s in samples] + print("\t".join(["workunit_id", "inputresource_id", "inputresource_name", "sample_name", "groupingvar_name"])) + for i in zip(input_resources_name, groupingvars): + print("\t".join([str(workunit_id), str(i[0][0]), i[0][1], i[1][1], i[1][2]])) - # no x.groupingvar.name defined - try: - groupingvars = list(map(lambda x: (x._id, x.name, x.groupingvar.name), samples)) - except: - groupingvars = list(map(lambda x: (x._id, x.name, "NA"), samples)) + end_time = time.time() + Console(stderr=True).print(f"--- query time = {end_time - start_time:.2f} seconds ---", style="bright_yellow") - print ("{}\t{}\t{}\t{}\t{}".format('workunit.id', 'inputresource.id', 'inputresource.name', 'sample.name', 'groupingvar.name')) - for i in zip(inputresourcesname, groupingvars): - print ("{}\t{}\t{}\t{}\t{}".format(workunitid, i[0][0], i[0][1], i[1][1], i[1][2])) +def main() -> None: + """Parses the command line arguments and calls `bfabric_read_samples_of_workunit`.""" + parser = argparse.ArgumentParser() + parser.add_argument("workunit_id", type=int, help="workunit id") + args = parser.parse_args() + bfabric_read_samples_of_workunit(workunit_id=args.workunit_id) - end_time = time.time() - print_color_msg("query time = {} seconds".format(round(end_time - start_time, 2))) - sys.exit(0) +if __name__ == "__main__": + # main() + bfabric_read_samples_of_workunit(285689) diff --git a/bfabric/tests/integration/scripts/test_read.py b/bfabric/tests/integration/scripts/test_read.py new file mode 100644 index 00000000..33f8f4c4 --- /dev/null +++ b/bfabric/tests/integration/scripts/test_read.py @@ -0,0 +1,41 @@ +import contextlib +import json +import unittest +from io import StringIO + +import yaml + +from bfabric.bfabric2 import Bfabric +from bfabric.scripts.bfabric_read import bfabric_read +from bfabric.tests.integration.integration_test_helper import DeleteEntities + + +class TestRead(unittest.TestCase): + def setUp(self): + self.client = Bfabric.from_config(config_env="TEST") + self.delete_entities = DeleteEntities(client=self.client, created_entities=[]) + self.addCleanup(self.delete_entities) + + self.example = {"endpoint": "resource"} + + def test_read_json(self): + out = StringIO() + with contextlib.redirect_stdout(out): + bfabric_read( + client=self.client, endpoint=self.example["endpoint"], attribute=None, value=None, output_format="json" + ) + parsed = json.loads(out.getvalue()) + self.assertEqual(100, len(parsed)) + + def test_read_yaml(self): + out = StringIO() + with contextlib.redirect_stdout(out): + bfabric_read( + client=self.client, endpoint=self.example["endpoint"], attribute=None, value=None, output_format="yaml" + ) + parsed = yaml.safe_load(out.getvalue()) + self.assertEqual(100, len(parsed)) + + +if __name__ == "__main__": + unittest.main() diff --git a/pyproject.toml b/pyproject.toml index efccefe6..b7ecf1f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,9 +54,9 @@ Repository = "https://github.com/fgcz/bfabricPy" #bfabric_setWorkunitStatus_processing="bfabric.scripts.bfabric_setWorkunitStatus_processing:main" #bfabric_setWorkunitStatus_failed="bfabric.scripts.bfabric_setWorkunitStatus_failed:main" #bfabric_delete="bfabric.scripts.bfabric_delete:main" -#bfabric_read"="bfabric.scripts.bfabric_read:main" -#bfabric_read_samples_of_workunit="bfabric.scripts.bfabric_read_samples_of_workunit:main" -#bfabric_read_samples_from_dataset="bfabric.scripts.bfabric_read_samples_from_dataset:main" +"bfabric_read.py"="bfabric.scripts.bfabric_read:main" +"bfabric_read_samples_of_workunit.py"="bfabric.scripts.bfabric_read_samples_of_workunit:main" +"bfabric_read_samples_from_dataset.py"="bfabric.scripts.bfabric_read_samples_from_dataset:main" "bfabric_save_csv2dataset.py"="bfabric.scripts.bfabric_save_csv2dataset:main" "bfabric_save_dataset2csv.py"="bfabric.scripts.bfabric_save_dataset2csv:main" #bfabric_save_fasta="bfabric.scripts.bfabric_save_fasta:main" From 59c1724b371fc325873f134af00c2f89c423ba5b Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 8 May 2024 17:06:04 +0200 Subject: [PATCH 084/129] Refactor upload scripts (#92) Refactors bfabric/scripts/bfabric_upload_resource.py and bfabric/scripts/bfabric_upload_submitter_executable.py, however the latter was not tested. --- bfabric/bfabric2.py | 22 +++ bfabric/scripts/bfabric_upload_resource.py | 33 ++-- .../bfabric_upload_submitter_executable.py | 165 +++++++++--------- .../scripts/test_upload_resource.py | 78 +++++++++ bfabric/tests/unit/test_bfabric.py | 30 +++- pyproject.toml | 2 +- 6 files changed, 230 insertions(+), 100 deletions(-) create mode 100644 bfabric/tests/integration/scripts/test_upload_resource.py diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 4b26b503..62cbc7a3 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -299,6 +299,28 @@ def delete(self, endpoint: str, id: int | list[int], check: bool = True) -> Resu result.assert_success() return result + def upload_resource( + self, resource_name: str, content: bytes, workunit_id: int, check: bool = True + ) -> ResultContainer: + """Uploads a resource to B-Fabric, only intended for relatively small files that will be tracked by B-Fabric + and not one of the dedicated experimental data stores. + :param resource_name: the name of the resource to create (the same name can only exist once per workunit) + :param content: the content of the resource as bytes + :param workunit_id: the workunit ID to which the resource belongs + :param check: whether to check for errors in the response + """ + content_encoded = base64.b64encode(content).decode() + return self.save( + endpoint="resource", + obj={ + "base64": content_encoded, + "name": resource_name, + "description": "base64 encoded file", + "workunitid": workunit_id, + }, + check=check, + ) + def _read_page(self, readid: bool, endpoint: str, query: dict[str, Any], idonly: bool = False, page: int = 1): """Reads the specified page of objects from the specified endpoint that match the query.""" if readid: diff --git a/bfabric/scripts/bfabric_upload_resource.py b/bfabric/scripts/bfabric_upload_resource.py index fe8c6192..0228dec6 100755 --- a/bfabric/scripts/bfabric_upload_resource.py +++ b/bfabric/scripts/bfabric_upload_resource.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ Copyright (C) 2017,2020 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. @@ -12,15 +10,28 @@ this script takes a blob file and a workunit id as input and adds the file as resource to bfabric """ +import argparse +import json +from pathlib import Path + +from bfabric.bfabric2 import Bfabric + + +def bfabric_upload_resource(client: Bfabric, filename: Path, workunit_id: int) -> None: + """Uploads the specified file to the workunit with the name of the file as resource name.""" + result = client.upload_resource(resource_name=filename.name, content=filename.read_bytes(), workunit_id=workunit_id) + print(json.dumps(result.to_list_dict(), indent=2)) + + +def main() -> None: + """Parses the command line arguments and calls `bfabric_upload_resource`.""" + client = Bfabric.from_config(verbose=True) + parser = argparse.ArgumentParser() + parser.add_argument("filename", help="filename", type=Path) + parser.add_argument("workunitid", help="workunitid", type=int) + args = parser.parse_args() + bfabric_upload_resource(client=client, filename=args.filename, workunit_id=args.workunitid) -import sys -import os -from bfabric import Bfabric if __name__ == "__main__": - if len(sys.argv) == 3 and os.path.isfile(sys.argv[1]): - B = Bfabric() - B.print_json(B.upload_file(filename = sys.argv[1], workunitid = int(sys.argv[2]))) - else: - print("usage:\nbfabric_upload_resource.py ") - sys.exit(1) + main() diff --git a/bfabric/scripts/bfabric_upload_submitter_executable.py b/bfabric/scripts/bfabric_upload_submitter_executable.py index f44d9117..8aa6ecad 100755 --- a/bfabric/scripts/bfabric_upload_submitter_executable.py +++ b/bfabric/scripts/bfabric_upload_submitter_executable.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ Uploader for B-Fabric """ @@ -30,7 +28,7 @@ # # # Example of use: -# +# # For bfabric.__version__ < 0.10.22 # # ./bfabric_upload_submitter_executable.py bfabric_executable_submitter_functionalTest.py gridengine --name "Dummy - yaml / Grid Engine executable" --description "Dummy submitter for the bfabric functional test using Grid Engine." @@ -45,100 +43,101 @@ # ./bfabric_upload_submitter_executable.py bfabric_executable_submitter_functionalTest.py slurm --name "Dummy_-_yaml___Slurm_executable" --description "test new submitter's parameters" # -import os -import sys -import base64 -from bfabric import Bfabric import argparse +import base64 + +import yaml + +from bfabric.bfabric2 import Bfabric -SVN="$HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/scripts/bfabric_upload_submitter_executable.py $" - -def setup(argv=sys.argv[1:]): - argparser = argparse.ArgumentParser(description="Arguments for new submitter executable.\nFor more details run: ./bfabric_upload_submitter_executable.py --help") - argparser.add_argument('filename', type=str, help="Bash executable of the submitter") - argparser.add_argument('engine', type=str, choices=['slurm', 'gridengine'], help="Valid engines for job handling are: slurm, gridengine") - argparser.add_argument('--name', type=str, help="Name of the submitter", required=False) - argparser.add_argument('--description', type=str, help="Description about the submitter", required=False) - if len(sys.argv) < 3: - argparser.print_help(sys.stderr) - sys.exit(1) - options = argparser.parse_args() - return options - -def main(options): + +def main_upload_submitter_executable(options) -> None: executableFileName = options.filename engine = options.engine - bfapp = Bfabric() + client = Bfabric.from_config(verbose=True) - with open(executableFileName, 'r') as f: + with open(executableFileName) as f: executable = f.read() - attr = { 'context': 'SUBMITTER', - 'parameter': [{'modifiable': 'true', - 'required': 'true', - 'type':'STRING'}, - {'modifiable': 'true', - 'required': 'true', - 'type':'STRING'}, - {'modifiable': 'true', - 'required': 'true', - 'type':'STRING'}], - 'masterexecutableid': 11871, - 'status': 'available', - 'enabled': 'true', - 'valid': 'true', - 'base64': base64.b64encode(executable.encode()).decode() } + attr = { + "context": "SUBMITTER", + "parameter": [ + {"modifiable": "true", "required": "true", "type": "STRING"}, + {"modifiable": "true", "required": "true", "type": "STRING"}, + {"modifiable": "true", "required": "true", "type": "STRING"}, + ], + "masterexecutableid": 11871, + "status": "available", + "enabled": "true", + "valid": "true", + "base64": base64.b64encode(executable.encode()).decode(), + } if engine == "slurm": - attr['name'] = 'yaml / Slurm executable' - attr['parameter'][0]['description'] = 'Which Slurm partition should be used.' - attr['parameter'][0]['enumeration'] = ['prx','maxquant','scaffold','mascot'] - attr['parameter'][0]['key'] = 'partition' - attr['parameter'][0]['label'] = 'partition' - attr['parameter'][0]['value'] = 'prx' - attr['parameter'][1]['description'] = 'Which Slurm nodelist should be used.' - attr['parameter'][1]['enumeration'] = ['fgcz-r-[035,028]','fgcz-r-035','fgcz-r-033','fgcz-r-028','fgcz-r-018'] - attr['parameter'][1]['key'] = 'nodelist' - attr['parameter'][1]['label'] = 'nodelist' - attr['parameter'][1]['value'] = 'fgcz-r-[035,028]' - attr['parameter'][2]['description'] = 'Which Slurm memory should be used.' - attr['parameter'][2]['enumeration'] = ['10G','50G','128G','256G','512G','960G'] - attr['parameter'][2]['key'] = 'memory' - attr['parameter'][2]['label'] = 'memory' - attr['parameter'][2]['value'] = '10G' - attr['version'] = 1.02 - attr['description'] = 'Stage the yaml config file to application using Slurm.' + attr["name"] = "yaml / Slurm executable" + attr["parameter"][0]["description"] = "Which Slurm partition should be used." + attr["parameter"][0]["enumeration"] = ["prx", "maxquant", "scaffold", "mascot"] + attr["parameter"][0]["key"] = "partition" + attr["parameter"][0]["label"] = "partition" + attr["parameter"][0]["value"] = "prx" + attr["parameter"][1]["description"] = "Which Slurm nodelist should be used." + attr["parameter"][1]["enumeration"] = [ + "fgcz-r-[035,028]", + "fgcz-r-035", + "fgcz-r-033", + "fgcz-r-028", + "fgcz-r-018", + ] + attr["parameter"][1]["key"] = "nodelist" + attr["parameter"][1]["label"] = "nodelist" + attr["parameter"][1]["value"] = "fgcz-r-[035,028]" + attr["parameter"][2]["description"] = "Which Slurm memory should be used." + attr["parameter"][2]["enumeration"] = ["10G", "50G", "128G", "256G", "512G", "960G"] + attr["parameter"][2]["key"] = "memory" + attr["parameter"][2]["label"] = "memory" + attr["parameter"][2]["value"] = "10G" + attr["version"] = 1.02 + attr["description"] = "Stage the yaml config file to application using Slurm." elif engine == "gridengine": - attr['name'] = 'yaml / Grid Engine executable' - attr['parameter'][0]['description'] = 'Which Grid Engine partition should be used.' - attr['parameter'][0]['enumeration'] = 'PRX' - attr['parameter'][0]['key'] = 'partition' - attr['parameter'][0]['label'] = 'partition' - attr['parameter'][0]['value'] = 'PRX' - attr['parameter'][1]['description'] = 'Which Grid Engine node should be used.' - attr['parameter'][1]['enumeration'] = ['fgcz-r-033','fgcz-r-028','fgcz-r-018'] - attr['parameter'][1]['key'] = 'nodelist' - attr['parameter'][1]['label'] = 'nodelist' - attr['parameter'][1]['value'] = 'fgcz-r-028' - attr['version'] = 1.00 - attr['description'] = 'Stage the yaml config file to an application using Grid Engine.' + attr["name"] = "yaml / Grid Engine executable" + attr["parameter"][0]["description"] = "Which Grid Engine partition should be used." + attr["parameter"][0]["enumeration"] = "PRX" + attr["parameter"][0]["key"] = "partition" + attr["parameter"][0]["label"] = "partition" + attr["parameter"][0]["value"] = "PRX" + attr["parameter"][1]["description"] = "Which Grid Engine node should be used." + attr["parameter"][1]["enumeration"] = ["fgcz-r-033", "fgcz-r-028", "fgcz-r-018"] + attr["parameter"][1]["key"] = "nodelist" + attr["parameter"][1]["label"] = "nodelist" + attr["parameter"][1]["value"] = "fgcz-r-028" + attr["version"] = 1.00 + attr["description"] = "Stage the yaml config file to an application using Grid Engine." if options.name: - attr['name'] = options.name - else: - pass + attr["name"] = options.name if options.description: - attr['description'] = options.description - else: - pass - - res = bfapp.save_object('executable', attr) - - bfapp.print_yaml(res) + attr["description"] = options.description + + res = client.save("executable", attr) + print(yaml.dump(res)) + + +def main() -> None: + """Parses command line arguments and calls `main_upload_submitter_executable`.""" + parser = argparse.ArgumentParser() + parser.add_argument("filename", type=str, help="Bash executable of the submitter") + parser.add_argument( + "engine", + type=str, + choices=["slurm", "gridengine"], + help="Valid engines for job handling are: slurm, gridengine", + ) + parser.add_argument("--name", type=str, help="Name of the submitter", required=False) + parser.add_argument("--description", type=str, help="Description about the submitter", required=False) + options = parser.parse_args() + main(options) if __name__ == "__main__": - options = setup() - main(options) - + main() diff --git a/bfabric/tests/integration/scripts/test_upload_resource.py b/bfabric/tests/integration/scripts/test_upload_resource.py new file mode 100644 index 00000000..7e99a282 --- /dev/null +++ b/bfabric/tests/integration/scripts/test_upload_resource.py @@ -0,0 +1,78 @@ +import contextlib +import datetime +import hashlib +import json +import unittest +from io import StringIO +from pathlib import Path +from tempfile import TemporaryDirectory + +from bfabric.bfabric2 import Bfabric +from bfabric.scripts.bfabric_upload_resource import bfabric_upload_resource +from bfabric.tests.integration.integration_test_helper import DeleteEntities + + +class TestUploadResource(unittest.TestCase): + def setUp(self): + self.client = Bfabric.from_config(config_env="TEST", verbose=True) + self.delete_results = DeleteEntities(client=self.client, created_entities=[]) + self.addCleanup(self.delete_results) + self.container_id = 3000 + + self.ts = datetime.datetime.now().isoformat() + + def _create_workunit(self): + # create workunit + workunit = self.client.save( + "workunit", {"containerid": self.container_id, "name": f"Testing {self.ts}", "applicationid": 1} + ).to_list_dict()[0] + self.delete_results.created_entities.append(("workunit", workunit["id"])) + return workunit["id"] + + def test_upload_resource(self): + with TemporaryDirectory() as work_dir: + work_dir = Path(work_dir) + file = work_dir / "test.txt" + file.write_text("Hello World!") + + workunit_id = self._create_workunit() + + # upload resource + out_text = StringIO() + with contextlib.redirect_stdout(out_text): + bfabric_upload_resource(client=self.client, filename=file, workunit_id=workunit_id) + resp = json.loads(out_text.getvalue())[0] + + # expected checksum + expected_checksum = hashlib.md5(file.read_bytes()).hexdigest() + + # check resource + resource = self.client.read("resource", {"id": resp["id"]}).to_list_dict()[0] + self.assertEqual(file.name, resource["name"]) + self.assertEqual("base64 encoded file", resource["description"]) + self.assertEqual(expected_checksum, resource["filechecksum"]) + + def test_upload_resource_when_already_exists(self): + with TemporaryDirectory() as work_dir: + work_dir = Path(work_dir) + file = work_dir / "test.txt" + file.write_text("Hello World!") + + workunit_id = self._create_workunit() + + # upload resource + out_text = StringIO() + with contextlib.redirect_stdout(out_text): + bfabric_upload_resource(client=self.client, filename=file, workunit_id=workunit_id) + resp = json.loads(out_text.getvalue())[0] + self.assertEqual(workunit_id, resp["workunit"]["id"]) + + # upload resource again + with self.assertRaises(RuntimeError) as error: + bfabric_upload_resource(client=self.client, filename=file, workunit_id=workunit_id) + + self.assertIn("Resource with the specified attribute combination already exists", str(error.exception)) + + +if __name__ == "__main__": + unittest.main() diff --git a/bfabric/tests/unit/test_bfabric.py b/bfabric/tests/unit/test_bfabric.py index 58c2db03..937d731e 100644 --- a/bfabric/tests/unit/test_bfabric.py +++ b/bfabric/tests/unit/test_bfabric.py @@ -95,12 +95,12 @@ def test_with_auth_when_exception(self): @patch("bfabric.bfabric2.datetime") def test_add_query_timestamp_when_not_present(self, module_datetime): module_datetime.now.return_value = datetime.datetime(2020, 1, 2, 3, 4, 5) - query = self.mock_bfabric._add_query_timestamp( {"a": "b", "c": 1}) + query = self.mock_bfabric._add_query_timestamp({"a": "b", "c": 1}) self.assertDictEqual( - {"a": "b", "c": 1, 'createdbefore': '2020-01-02T03:04:05'}, + {"a": "b", "c": 1, "createdbefore": "2020-01-02T03:04:05"}, query, ) - module_datetime.now.assert_called_once_with(ZoneInfo('Pacific/Kiritimati')) + module_datetime.now.assert_called_once_with(ZoneInfo("Pacific/Kiritimati")) @patch("bfabric.bfabric2.datetime") def test_add_query_timestamp_when_set_and_past(self, module_datetime): @@ -113,7 +113,7 @@ def test_add_query_timestamp_when_set_and_past(self, module_datetime): {"a": "b", "createdbefore": "2019-12-31T23:59:59"}, query, ) - module_datetime.now.assert_called_once_with(ZoneInfo('Pacific/Kiritimati')) + module_datetime.now.assert_called_once_with(ZoneInfo("Pacific/Kiritimati")) @patch("bfabric.bfabric2.datetime") def test_add_query_timestamp_when_set_and_future(self, module_datetime): @@ -129,6 +129,24 @@ def test_add_query_timestamp_when_set_and_future(self, module_datetime): self.assertEqual(1, len(logs.output)) self.assertIn("Query timestamp is in the future: 2020-01-02 03:04:06", logs.output[0]) + @patch.object(Bfabric, "save") + def test_upload_resource(self, method_save): + resource_name = "hello_world.txt" + content = b"Hello, World!" + workunit_id = 123 + check = MagicMock(name="check") + self.mock_bfabric.upload_resource(resource_name, content, workunit_id, check) + method_save.assert_called_once_with( + endpoint="resource", + obj={ + "base64": "SGVsbG8sIFdvcmxkIQ==", + "workunitid": 123, + "name": "hello_world.txt", + "description": "base64 encoded file", + }, + check=check, + ) + def test_get_version_message(self): self.mock_config.base_url = "dummy_url" message = self.mock_bfabric.get_version_message() @@ -147,7 +165,9 @@ def test_print_version_message(self, method_get_version_message, mock_console): mock_stderr = MagicMock(name="mock_stderr") self.mock_bfabric.print_version_message(stderr=mock_stderr) mock_console.assert_called_once_with(stderr=mock_stderr, highlighter=ANY, theme=ANY) - mock_console.return_value.print.assert_called_once_with(method_get_version_message.return_value, style="bright_yellow") + mock_console.return_value.print.assert_called_once_with( + method_get_version_message.return_value, style="bright_yellow" + ) if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index b7ecf1f3..78b1b2ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ Repository = "https://github.com/fgcz/bfabricPy" #bfabric_feeder_resource_autoQC="bfabric.scripts.bfabric_feeder_resource_autoQC:main" #bfabric_list_not_existing_storage_directories="bfabric.scripts.bfabric_list_not_existing_storage_directories:main" "bfabric_list_not_available_proteomics_workunits.py"="bfabric.scripts.bfabric_list_not_available_proteomics_workunits:main" -#bfabric_upload_resource="bfabric.scripts.bfabric_upload_resource:main" +"bfabric_upload_resource.py"="bfabric.scripts.bfabric_upload_resource:main" #bfabric_logthis="bfabric.scripts.bfabric_logthis:main" #bfabric_setResourceStatus_available="bfabric.scripts.bfabric_setResourceStatus_available:main" #bfabric_setExternalJobStatus_done="bfabric.scripts.bfabric_setExternalJobStatus_done:main" From a2c5f64ffb9a0e5b4aba3044bc6024e3a697fafb Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 10 May 2024 08:44:58 +0200 Subject: [PATCH 085/129] Remove add_query_timestamp logic This was a flawed attempt at introducing some consistency on the API. As we discussed earlier this week, there might be a simpler solution to achieve the same result, and there are some general limitations on the consistency that is achieved by this. --- bfabric/bfabric2.py | 24 ------------ bfabric/bfabric_config.py | 14 +------ bfabric/tests/unit/example_config.yml | 1 - bfabric/tests/unit/test_bfabric.py | 46 ++--------------------- bfabric/tests/unit/test_bfabric_config.py | 7 +--- 5 files changed, 7 insertions(+), 85 deletions(-) diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 62cbc7a3..5be114b3 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -31,7 +31,6 @@ from enum import Enum from pprint import pprint from typing import Any, Literal, ContextManager -from zoneinfo import ZoneInfo from rich.console import Console @@ -129,7 +128,6 @@ def __init__( self.query_counter = 0 self._config = config self._auth = auth - self._zone_info = ZoneInfo(config.server_timezone) if engine == BfabricAPIEngineType.SUDS: self.engine = EngineSUDS(base_url=config.base_url) @@ -219,9 +217,6 @@ def read( :param idonly: whether to return only the ids of the objects :return: List of responses, packaged in the results container """ - # Ensure stability - # obj = self._add_query_timestamp(obj) - # Get the first page. # NOTE: According to old interface, this is equivalent to plain=True response, errors = self._read_page(readid, endpoint, obj, page=1, idonly=idonly) @@ -266,25 +261,6 @@ def read( result.assert_success() return result - def _add_query_timestamp(self, query: dict[str, Any]) -> dict[str, Any]: - """Adds the current time as a createdbefore timestamp to the query, if there is no time in the query already. - This ensures pagination will be robust to insertion of new items during the query. - If a time is already present, it will be left as is, but a warning will be printed if it is in the future as - the query will not be robust to insertion of new items. - Note that this does not ensure robustness against deletion of items. - """ - server_time = datetime.now(self._zone_info) - if "createdbefore" in query: - query_time = datetime.fromisoformat(query["createdbefore"]) - if query_time > server_time: - logging.warning( - f"Warning: Query timestamp is in the future: {query_time}. " - "This will not be robust to insertion of new items." - ) - return query - else: - return {**query, "createdbefore": server_time.strftime("%Y-%m-%dT%H:%M:%S")} - def save(self, endpoint: str, obj: dict, check: bool = True) -> ResultContainer: results = self.engine.save(endpoint, obj, auth=self.auth) result = ResultContainer(results[endpoint], self.result_type, errors=get_response_errors(results, endpoint)) diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index b57dc86b..a462a99c 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -31,7 +31,6 @@ class BfabricConfig: base_url (optional): The API base url application_ids (optional): Map of application names to ids. job_notification_emails (optional): Space-separated list of email addresses to notify when a job finishes. - server_timezone (optional): Timezone name of the server (used for queries) """ def __init__( @@ -39,12 +38,10 @@ def __init__( base_url: str | None = None, application_ids: dict[str, int] = None, job_notification_emails: str | None = None, - server_timezone: str = "Europe/Zurich", ) -> None: self._base_url = base_url or "https://fgcz-bfabric.uzh.ch/bfabric" self._application_ids = application_ids or {} self._job_notification_emails = job_notification_emails or "" - self._server_timezone = server_timezone @property def base_url(self) -> str: @@ -61,11 +58,6 @@ def job_notification_emails(self) -> str: """Space-separated list of email addresses to notify when a job finishes.""" return self._job_notification_emails - @property - def server_timezone(self) -> str: - """Timezone name of the server (used for queries).""" - return self._server_timezone - def copy_with( self, base_url: str | None = None, @@ -76,14 +68,12 @@ def copy_with( base_url=base_url if base_url is not None else self.base_url, application_ids=(application_ids if application_ids is not None else self.application_ids), job_notification_emails=self.job_notification_emails, - server_timezone=self.server_timezone, ) def __repr__(self) -> str: return ( f"BfabricConfig(base_url={repr(self.base_url)}, application_ids={repr(self.application_ids)}, " - f"job_notification_emails={repr(self.job_notification_emails)}, " - f"server_timezone={repr(self.server_timezone)})" + f"job_notification_emails={repr(self.job_notification_emails)})" ) @@ -199,7 +189,7 @@ def read_config( config_dict = _parse_dict( config_dict, ["base_url"], - optional_keys=["application_ids", "job_notification_emails", "server_timezone"], + optional_keys=["application_ids", "job_notification_emails"], error_prefix=error_prefix, ) config = BfabricConfig(**config_dict) diff --git a/bfabric/tests/unit/example_config.yml b/bfabric/tests/unit/example_config.yml index 2f2d1134..75e7eaec 100644 --- a/bfabric/tests/unit/example_config.yml +++ b/bfabric/tests/unit/example_config.yml @@ -15,7 +15,6 @@ TEST: Proteomics/DOG_552: 6 Proteomics/DUCK_666: 12 job_notification_emails: john.snow@fgcz.uzh.ch billy.the.kid@fgcz.ethz.ch - server_timezone: UTC STANDBY: base_url: https://standby-server.uzh.ch/mystandby \ No newline at end of file diff --git a/bfabric/tests/unit/test_bfabric.py b/bfabric/tests/unit/test_bfabric.py index 937d731e..1892ac34 100644 --- a/bfabric/tests/unit/test_bfabric.py +++ b/bfabric/tests/unit/test_bfabric.py @@ -1,9 +1,7 @@ import datetime -import logging import unittest from functools import cached_property from unittest.mock import MagicMock, patch, ANY -from zoneinfo import ZoneInfo from bfabric import BfabricConfig from bfabric.bfabric2 import BfabricAPIEngineType, Bfabric @@ -13,7 +11,6 @@ class TestBfabric(unittest.TestCase): def setUp(self): self.mock_config = MagicMock(name="mock_config", spec=BfabricConfig) - self.mock_config.server_timezone = "Pacific/Kiritimati" self.mock_auth = None self.mock_engine_type = BfabricAPIEngineType.SUDS self.mock_engine = MagicMock(name="mock_engine", spec=EngineSUDS) @@ -24,7 +21,7 @@ def mock_bfabric(self) -> Bfabric: @patch("bfabric.bfabric2.get_system_auth") def test_from_config_when_no_args(self, mock_get_system_auth): - mock_config = MagicMock(name="mock_config", server_timezone="Pacific/Kiritimati") + mock_config = MagicMock(name="mock_config") mock_auth = MagicMock(name="mock_auth") mock_get_system_auth.return_value = (mock_config, mock_auth) client = Bfabric.from_config() @@ -35,7 +32,7 @@ def test_from_config_when_no_args(self, mock_get_system_auth): @patch("bfabric.bfabric2.get_system_auth") def test_from_config_when_explicit_auth(self, mock_get_system_auth): - mock_config = MagicMock(name="mock_config", server_timezone="Pacific/Kiritimati") + mock_config = MagicMock(name="mock_config") mock_auth = MagicMock(name="mock_auth") mock_config_auth = MagicMock(name="mock_config_auth") mock_get_system_auth.return_value = (mock_config, mock_config_auth) @@ -47,7 +44,7 @@ def test_from_config_when_explicit_auth(self, mock_get_system_auth): @patch("bfabric.bfabric2.get_system_auth") def test_from_config_when_none_auth(self, mock_get_system_auth): - mock_config = MagicMock(name="mock_config", server_timezone="Pacific/Kiritimati") + mock_config = MagicMock(name="mock_config") mock_auth = MagicMock(name="mock_auth") mock_get_system_auth.return_value = (mock_config, mock_auth) client = Bfabric.from_config(config_env="TestingEnv", auth=None) @@ -92,43 +89,6 @@ def test_with_auth_when_exception(self): pass self.assertEqual(mock_old_auth, self.mock_bfabric.auth) - @patch("bfabric.bfabric2.datetime") - def test_add_query_timestamp_when_not_present(self, module_datetime): - module_datetime.now.return_value = datetime.datetime(2020, 1, 2, 3, 4, 5) - query = self.mock_bfabric._add_query_timestamp({"a": "b", "c": 1}) - self.assertDictEqual( - {"a": "b", "c": 1, "createdbefore": "2020-01-02T03:04:05"}, - query, - ) - module_datetime.now.assert_called_once_with(ZoneInfo("Pacific/Kiritimati")) - - @patch("bfabric.bfabric2.datetime") - def test_add_query_timestamp_when_set_and_past(self, module_datetime): - module_datetime.now.return_value = datetime.datetime(2020, 1, 2, 3, 4, 5) - module_datetime.fromisoformat = datetime.datetime.fromisoformat - query_before = {"a": "b", "createdbefore": "2019-12-31T23:59:59"} - # TODO once py3.10 is available, use assertNoLogs - query = self.mock_bfabric._add_query_timestamp(query_before) - self.assertDictEqual( - {"a": "b", "createdbefore": "2019-12-31T23:59:59"}, - query, - ) - module_datetime.now.assert_called_once_with(ZoneInfo("Pacific/Kiritimati")) - - @patch("bfabric.bfabric2.datetime") - def test_add_query_timestamp_when_set_and_future(self, module_datetime): - module_datetime.now.return_value = datetime.datetime(2020, 1, 2, 3, 4, 5) - module_datetime.fromisoformat = datetime.datetime.fromisoformat - query_before = {"a": "b", "createdbefore": "2020-01-02T03:04:06"} - with self.assertLogs(level=logging.WARNING) as logs: - query = self.mock_bfabric._add_query_timestamp(query_before) - self.assertDictEqual( - {"a": "b", "createdbefore": "2020-01-02T03:04:06"}, - query, - ) - self.assertEqual(1, len(logs.output)) - self.assertIn("Query timestamp is in the future: 2020-01-02 03:04:06", logs.output[0]) - @patch.object(Bfabric, "save") def test_upload_resource(self, method_save): resource_name = "hello_world.txt" diff --git a/bfabric/tests/unit/test_bfabric_config.py b/bfabric/tests/unit/test_bfabric_config.py index 3e7dfbd0..f79d8be6 100644 --- a/bfabric/tests/unit/test_bfabric_config.py +++ b/bfabric/tests/unit/test_bfabric_config.py @@ -22,7 +22,6 @@ def setUp(self): self.config = BfabricConfig( base_url="url", application_ids={"app": 1}, - server_timezone="t/z", ) self.example_config_path = Path(__file__).parent / "example_config.yml" @@ -106,7 +105,6 @@ def test_read_yml_bypath_all_fields(self): self.assertEqual(applications_dict_ground_truth, config.application_ids) self.assertEqual(job_notification_emails_ground_truth, config.job_notification_emails) - self.assertEqual("UTC", config.server_timezone) # Testing that we can load base_url without authentication if correctly requested def test_read_yml_when_empty_optional(self): @@ -117,7 +115,6 @@ def test_read_yml_when_empty_optional(self): self.assertEqual("https://standby-server.uzh.ch/mystandby", config.base_url) self.assertEqual({}, config.application_ids) self.assertEqual("", config.job_notification_emails) - self.assertEqual("Europe/Zurich", config.server_timezone) # TODO delete if no mandatory fields are reintroduced # Test that missing authentication will raise an error if required @@ -129,7 +126,7 @@ def test_repr(self): rep = repr(self.config) self.assertEqual( "BfabricConfig(base_url='url', application_ids={'app': 1}, " - "job_notification_emails='', server_timezone='t/z')", + "job_notification_emails='')", rep, ) @@ -137,7 +134,7 @@ def test_str(self): rep = str(self.config) self.assertEqual( "BfabricConfig(base_url='url', application_ids={'app': 1}, " - "job_notification_emails='', server_timezone='t/z')", + "job_notification_emails='')", rep, ) From c79cf1ff77c3b8ccb39365c93c63a2e020ad76f8 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 10 May 2024 09:04:23 +0200 Subject: [PATCH 086/129] Refactor status scripts (#89) The scripts used to report job status --- bfabric/scripts/bfabric_logthis.py | 39 +++++++-------- .../bfabric_setExternalJobStatus_done.py | 42 +++++++++------- .../bfabric_setResourceStatus_available.py | 48 ++++++++++--------- bfabric/scripts/bfabric_setWorkunitStatus.py | 39 +++++++++++++++ .../bfabric_setWorkunitStatus_available.py | 32 ------------- .../bfabric_setWorkunitStatus_failed.py | 33 ------------- .../bfabric_setWorkunitStatus_processing.py | 29 ----------- .../integration/integration_test_helper.py | 13 ++++- pyproject.toml | 15 +++--- 9 files changed, 126 insertions(+), 164 deletions(-) create mode 100755 bfabric/scripts/bfabric_setWorkunitStatus.py delete mode 100755 bfabric/scripts/bfabric_setWorkunitStatus_available.py delete mode 100755 bfabric/scripts/bfabric_setWorkunitStatus_failed.py delete mode 100755 bfabric/scripts/bfabric_setWorkunitStatus_processing.py diff --git a/bfabric/scripts/bfabric_logthis.py b/bfabric/scripts/bfabric_logthis.py index fb01298b..512be559 100755 --- a/bfabric/scripts/bfabric_logthis.py +++ b/bfabric/scripts/bfabric_logthis.py @@ -1,31 +1,28 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - -""" -set status of a resource of a given external job -input -""" - # Copyright (C) 2023 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. # # Author: # Christian Panse +from __future__ import annotations +import argparse -import sys -import bfabric +from bfabric.bfabric2 import Bfabric -from random import randint -from time import sleep -import bfabric.wrapper_creator.bfabric_feeder +def bfabric_logthis(client: Bfabric, external_job_id: int, message: str) -> None: + """Logs a message for an external job.""" + client.save("externaljob", {"id": external_job_id, "logthis": message}) -if __name__ == "__main__": - if len(sys.argv) > 1: - B = bfabric.wrapper_creator.bfabric_feeder.BfabricFeeder() - try: - externaljobid, msg = (int(sys.argv[1]), sys.argv[2]) - except: - raise ("Usage: bfabric_logthis.py ") - rv = B.save_object('externaljob', {'id': externaljobid, 'logthis': msg}) - # B.print_json(rv) +def main() -> None: + """Parses the command line arguments and calls `bfabric_logthis`.""" + client = Bfabric.from_config() + parser = argparse.ArgumentParser(description="log message of external job") + parser.add_argument("external_job_id", type=int, help="external job id") + parser.add_argument("message", type=str, help="message") + args = vars(parser.parse_args()) + bfabric_logthis(client=client, **args) + + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_setExternalJobStatus_done.py b/bfabric/scripts/bfabric_setExternalJobStatus_done.py index 4727252e..96e1498d 100755 --- a/bfabric/scripts/bfabric_setExternalJobStatus_done.py +++ b/bfabric/scripts/bfabric_setExternalJobStatus_done.py @@ -1,9 +1,11 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ set status of a resource of a given resource id """ +from __future__ import annotations +import argparse + +from bfabric.bfabric2 import Bfabric # Copyright (C) 2014 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. # @@ -14,21 +16,27 @@ # # Licensed under GPL version 3 # -# $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/scripts/bfabric_setExternalJobStatus_done.py $ -# $Id: bfabric_setExternalJobStatus_done.py 2996 2017-08-18 12:11:17Z cpanse $ -import sys -import bfabric -import bfabric.wrapper_creator.bfabric_feeder + +def set_external_job_status_done(client: Bfabric, external_job_id: list[int]) -> None: + """Sets the status of the specified external jobs to 'done'.""" + for job_id in external_job_id: + try: + res = client.save("externaljob", {"id": job_id, "status": "done"}).to_list_dict() + print(res) + except Exception: + print(f"failed to set externaljob with id={job_id} 'available'.") + raise + + +def main() -> None: + """Parses command line arguments and calls `set_external_job_status_done`.""" + parser = argparse.ArgumentParser(description="set external job status to 'done'") + parser.add_argument("external_job_id", type=int, help="external job id", nargs="+") + args = parser.parse_args() + client = Bfabric.from_config(verbose=True) + set_external_job_status_done(client, args.external_job_id) + if __name__ == "__main__": - bfapp = bfabric.wrapper_creator.bfabric_feeder.BfabricFeeder() - - if len(sys.argv) > 1: - for i in range(1, len(sys.argv)): - try: - res = bfapp.save_object('externaljob', {'id':int(sys.argv[i]), 'status':'done'}) - print(res) - except: - print("failed to set externaljob with id={} 'available'.".format(int(sys.argv[i]))) - raise + main() diff --git a/bfabric/scripts/bfabric_setResourceStatus_available.py b/bfabric/scripts/bfabric_setResourceStatus_available.py index d68b2c02..7f0ec07e 100755 --- a/bfabric/scripts/bfabric_setResourceStatus_available.py +++ b/bfabric/scripts/bfabric_setResourceStatus_available.py @@ -1,9 +1,13 @@ -#!/usr/bin/env python3 -# -*- coding: latin1 -*- - +#!/usr/bin/env python3 """ set status of a resource of a given resource id """ +from __future__ import annotations + +import argparse + +from bfabric.bfabric2 import Bfabric + # Copyright (C) 2014 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. # @@ -13,28 +17,26 @@ # # Licensed under GPL version 3 # -# $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/fgcz_bfabric_setResourceStatus_available.py $ -# $Id: fgcz_bfabric_setResourceStatus_available.py 2397 2016-09-06 07:04:35Z cpanse $ -import sys -import bfabric -from random import randint -from time import sleep +def set_resource_status_available(client: Bfabric, resource_id: list[int]) -> None: + """Sets the status of the specified resources to 'available'.""" + for resource_id in resource_id: + try: + res = client.save("resource", {"id": resource_id, "status": "available"}).to_list_dict() + print(res) + except Exception: + print(f"failed to set resourceid {resource_id} 'available'.") + raise + +def main() -> None: + """Parses command line arguments and calls `set_resource_status_available`.""" + parser = argparse.ArgumentParser() + parser.add_argument("resource_id", type=int, help="resource id", nargs="+") + args = parser.parse_args() + client = Bfabric.from_config(verbose=True) + set_resource_status_available(client, args.resource_id) -import bfabric.wrapper_creator.bfabric_feeder if __name__ == "__main__": - if len(sys.argv) > 1: - bfapp = bfabric.wrapper_creator.bfabric_feeder.BfabricFeeder() - - for i in range(1, len(sys.argv)): - sleep(randint(2, 20)) - try: - print(bfapp.report_resource(resourceid=int(sys.argv[i]))) - except: - print( "failed to set resourceid {} 'available'.".format(int(sys.argv[i]))) - raise - else: - print("Invalid argument: no resourceid is provided") - sys.exit(0) + main() diff --git a/bfabric/scripts/bfabric_setWorkunitStatus.py b/bfabric/scripts/bfabric_setWorkunitStatus.py new file mode 100755 index 00000000..670caf65 --- /dev/null +++ b/bfabric/scripts/bfabric_setWorkunitStatus.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +import argparse +import json + +from bfabric.bfabric2 import Bfabric + +# Copyright (C) 2014 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. +# +# Authors: +# Marco Schmidt +# Christian Panse +# +# Licensed under GPL version 3 +# + + +def main_generic(result_status: str) -> None: + """Main function for setting workunit status to `result_status`.""" + parser = argparse.ArgumentParser(description=f"Sets workunit status to '{result_status}'") + parser.add_argument("workunit_id", type=int, help="workunit id") + args = parser.parse_args() + client = Bfabric.from_config(verbose=True) + res = client.save("workunit", {"id": args.workunit_id, "status": "available"}) + print(json.dumps(res.to_list_dict(), indent=2)) + + +def main_available() -> None: + """Calls `main_generic` with 'available' as argument.""" + main_generic("available") + + +def main_failed() -> None: + """Calls `main_generic` with 'failed' as argument.""" + main_generic("failed") + + +def main_processing() -> None: + """Calls `main_generic` with 'processing' as argument.""" + main_generic("processing") diff --git a/bfabric/scripts/bfabric_setWorkunitStatus_available.py b/bfabric/scripts/bfabric_setWorkunitStatus_available.py deleted file mode 100755 index ca5f0290..00000000 --- a/bfabric/scripts/bfabric_setWorkunitStatus_available.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/python -# -*- coding: latin1 -*- - -""" -set status of a resource of a given resource id -""" - -# Copyright (C) 2014 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. -# -# Authors: -# Marco Schmidt -# Christian Panse -# -# Licensed under GPL version 3 -# -# $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/fgcz_bfabric_setResourceStatus_available.py $ -# $Id: fgcz_bfabric_setResourceStatus_available.py 2397 2016-09-06 07:04:35Z cpanse $ - -import sys -import bfabric - -from random import randint -from time import sleep - -import bfabric.wrapper_creator.bfabric_feeder - -if __name__ == "__main__": - if len(sys.argv) > 1: - B = bfabric.wrapper_creator.bfabric_feeder.BfabricFeeder() - - res = B.save_object(endpoint='workunit', obj={'id': int(sys.argv[1]), 'status': 'available'}) - B.print_json(res) diff --git a/bfabric/scripts/bfabric_setWorkunitStatus_failed.py b/bfabric/scripts/bfabric_setWorkunitStatus_failed.py deleted file mode 100755 index 847e0840..00000000 --- a/bfabric/scripts/bfabric_setWorkunitStatus_failed.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/python -# -*- coding: latin1 -*- - -""" -set status of a resource of a given resource id -""" - -# Copyright (C) 2021 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. -# -# Authors: -# Christian Panse -# Maria - -# 2021-02-02 - -import sys -import bfabric - -from random import randint -from time import sleep - -import bfabric.wrapper_creator.bfabric_feeder - -if __name__ == "__main__": - if len(sys.argv) > 1: - bfapp = bfabric.wrapper_creator.bfabric_feeder.BfabricFeeder() - - workunitid = int(sys.argv[1]) - print("workunitit={}".format(workunitid)) - - res = bfapp.save_object(endpoint='workunit', obj={'id': workunitid, 'status': 'failed'}) - bfapp.print_json(res) - print ("alive") diff --git a/bfabric/scripts/bfabric_setWorkunitStatus_processing.py b/bfabric/scripts/bfabric_setWorkunitStatus_processing.py deleted file mode 100755 index 91ff96f1..00000000 --- a/bfabric/scripts/bfabric_setWorkunitStatus_processing.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/python -# -*- coding: latin1 -*- - -""" -set status of a resource of a given resource id -""" - -# Copyright (C) 2023 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. -# -# Authors: -# Christian Panse -# -# Licensed under GPL version 3 -# -# https://github.com/fgcz/bfabricPy/ - -import sys -import bfabric - -from random import randint -from time import sleep - -import bfabric.wrapper_creator.bfabric_feeder - -if __name__ == "__main__": - if len(sys.argv) > 1: - B = bfabric.wrapper_creator.bfabric_feeder.BfabricFeeder() - res = B.save_object(endpoint='workunit', obj={'id': int(sys.argv[1]), 'status': 'processing'}) - B.print_json(res) diff --git a/bfabric/tests/integration/integration_test_helper.py b/bfabric/tests/integration/integration_test_helper.py index 66a4a1df..89db1b3b 100644 --- a/bfabric/tests/integration/integration_test_helper.py +++ b/bfabric/tests/integration/integration_test_helper.py @@ -1,14 +1,17 @@ from __future__ import annotations +from typing import Any + from bfabric.bfabric2 import Bfabric class DeleteEntities: - def __init__(self, client: Bfabric, created_entities: list[tuple[str, int]]): + def __init__(self, client: Bfabric, created_entities: list[tuple[str, int]] | None = None): self.client = client - self.created_entities = created_entities + self.created_entities = created_entities or [] def __call__(self): + """Deletes all created entities.""" errors = [] for entity_type, entity_id in self.created_entities: errors += self.client.delete(entity_type, entity_id, check=False).errors @@ -16,3 +19,9 @@ def __call__(self): print("Error deleting entities:", errors) else: print("Successfully deleted entities:", self.created_entities) + + def register_entity(self, entity: dict[str, Any], entity_type: str | None = None): + """Registers an entity to be deleted when the test is torn down.""" + if entity_type is None: + entity_type = entity["classname"] + self.created_entities.append((entity_type, entity["id"])) diff --git a/pyproject.toml b/pyproject.toml index 78b1b2ec..facc2073 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ requires-python = ">=3.9" dependencies = [ "suds >= 1.1.2", "PyYAML >= 6.0", - "Flask == 2.2.5", + "Flask >= 3.0.3", "rich >= 13.7.1", "zeep >= 4.2.1", "pandas >= 2.2.2", @@ -47,12 +47,12 @@ Repository = "https://github.com/fgcz/bfabricPy" #bfabric_list_not_existing_storage_directories="bfabric.scripts.bfabric_list_not_existing_storage_directories:main" "bfabric_list_not_available_proteomics_workunits.py"="bfabric.scripts.bfabric_list_not_available_proteomics_workunits:main" "bfabric_upload_resource.py"="bfabric.scripts.bfabric_upload_resource:main" -#bfabric_logthis="bfabric.scripts.bfabric_logthis:main" -#bfabric_setResourceStatus_available="bfabric.scripts.bfabric_setResourceStatus_available:main" -#bfabric_setExternalJobStatus_done="bfabric.scripts.bfabric_setExternalJobStatus_done:main" -#bfabric_setWorkunitStatus_available="bfabric.scripts.bfabric_setWorkunitStatus_available:main" -#bfabric_setWorkunitStatus_processing="bfabric.scripts.bfabric_setWorkunitStatus_processing:main" -#bfabric_setWorkunitStatus_failed="bfabric.scripts.bfabric_setWorkunitStatus_failed:main" +"bfabric_logthis.py"="bfabric.scripts.bfabric_logthis:main" +"bfabric_setResourceStatus_available.py"="bfabric.scripts.bfabric_setResourceStatus_available:main" +"bfabric_setExternalJobStatus_done.py"="bfabric.scripts.bfabric_setExternalJobStatus_done:main" +"bfabric_setWorkunitStatus_available.py"="bfabric.scripts.bfabric_setWorkunitStatus_generic:main_available" +"bfabric_setWorkunitStatus_processing.py"="bfabric.scripts.bfabric_setWorkunitStatus_processing:main_processing" +"bfabric_setWorkunitStatus_failed.py"="bfabric.scripts.bfabric_setWorkunitStatus_failed:main_failed" #bfabric_delete="bfabric.scripts.bfabric_delete:main" "bfabric_read.py"="bfabric.scripts.bfabric_read:main" "bfabric_read_samples_of_workunit.py"="bfabric.scripts.bfabric_read_samples_of_workunit:main" @@ -77,6 +77,7 @@ target-version = "py39" [tool.ruff.lint] select = ["D103", "E", "F", "ANN", "PTH", "UP", "BLE", "SIM"] +ignore = ["ANN101"] [tool.licensecheck] using = "PEP631" From 0b672465c9a9f4375ffa8cb1835b6c9f2f22d366 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 10 May 2024 09:07:23 +0200 Subject: [PATCH 087/129] Refactor save scripts (#91) --- bfabric/scripts/bfabric_save_fasta.py | 143 ++++++++-------- .../bfabric_save_importresource_sample.py | 161 +++++++++--------- .../scripts/bfabric_save_link_to_workunit.py | 43 ++--- bfabric/scripts/bfabric_save_workflowstep.py | 79 +++++---- .../bfabric_save_workunit_attribute.py | 48 +++--- pyproject.toml | 10 +- 6 files changed, 249 insertions(+), 235 deletions(-) diff --git a/bfabric/scripts/bfabric_save_fasta.py b/bfabric/scripts/bfabric_save_fasta.py index 3ebbb15c..69650e2e 100755 --- a/bfabric/scripts/bfabric_save_fasta.py +++ b/bfabric/scripts/bfabric_save_fasta.py @@ -1,74 +1,81 @@ -#!/usr/bin/python +#!/usr/bin/env python3 -import sys -import os -import yaml -#import xmlrpclib +# TODO this file was refactored without testing anything +# TODO this file was refactored without testing anything +import argparse import hashlib -from optparse import OptionParser -from bfabric import Bfabric +import json +import sys +from pathlib import Path -FASTAHTTPROOT="/fasta/" -BFABRICSTORAGEID = 2 -BFABRICAPPLIATIONID = 61 - -def save_fasta(containerid=1875, fasta_file="p1875_db10_20170817.fasta"): - bfapp = Bfabric() - - try: - print("reading stdin") - description = sys.stdin.read() - except: - print("reading from stdin failed.") - raise - - try: - md5 = hashlib.md5(open(fasta_file, 'rb').read()).hexdigest() - except: - print("computing file checksum failed.") - raise - - resource = bfapp.read_object(endpoint='resource', obj={'filechecksum': md5}) - - try: - print("resource(s) already exist.".format(resource[0]._id)) - resource = bfapp.save_object(endpoint='resource', obj={'id': resource[0]._id, 'description': description}) - print(resource) - return - except: - pass - - - try: - workunit = bfapp.save_object(endpoint='workunit', - obj={'name': "FASTA: {}".format(os.path.basename(fasta_file)), - 'containerid': containerid, - 'applicationid': BFABRICAPPLIATIONID}) - print (workunit) - except: - raise - - - obj = {'workunitid': workunit[0]._id, - 'filechecksum': md5, - 'relativepath': "{}{}".format(FASTAHTTPROOT, os.path.basename(fasta_file)), - 'name': os.path.basename(fasta_file), - 'size': os.path.getsize(fasta_file), - 'status': 'available', - 'description': description, - 'storageid': BFABRICSTORAGEID - } - - - resource = bfapp.save_object(endpoint='resource', obj=obj) - print(resource) - - workunit = bfapp.save_object(endpoint='workunit', - obj={'id': workunit[0]._id, 'status': 'available'}) - print (workunit) +from bfabric.bfabric2 import Bfabric -if __name__ == "__main__": - save_fasta(containerid=sys.argv[1], fasta_file=sys.argv[2]) +FASTAHTTPROOT = "/fasta/" +BFABRICSTORAGEID = 2 +BFABRIC_APPLICATION_ID = 61 + + +def save_fasta(container_id: int, fasta_file: Path) -> None: + """Save a fasta file to bfabric.""" + client = Bfabric.from_config(verbose=True) + + print("Reading description from stdin") + description = sys.stdin.read() + + if not fasta_file.exists(): + raise FileNotFoundError(fasta_file) + + with fasta_file.open("rb") as f: + md5 = hashlib.md5(f.read()).hexdigest() + + resources = client.read(endpoint="resource", obj={"filechecksum": md5}).to_list_dict() + if resources: + print("resource(s) already exist.") + # TODO this logic was mostly carried over from before, does it still make sense? + try: + resources = client.save(endpoint="resource", obj={"id": resources[0]["id"], "description": description}) + print(json.dumps(resources.to_list_dict(), indent=2)) + return + except Exception: + pass + + workunit = client.save( + endpoint="workunit", + obj={ + "name": f"FASTA: {fasta_file.name}", + "containerid": container_id, + # TODO make configurable if needed in the future + "applicationid": BFABRIC_APPLICATION_ID, + }, + ).to_list_dict() + print(json.dumps(workunit, indent=2)) + + obj = { + "workunitid": workunit[0]["id"], + "filechecksum": md5, + "relativepath": f"{FASTAHTTPROOT}{fasta_file.name}", + "name": fasta_file.name, + "size": fasta_file.stat().st_size, + "status": "available", + "description": description, + "storageid": BFABRICSTORAGEID, + } + + resource = client.save(endpoint="resource", obj=obj).to_list_dict() + print(json.dumps(resource, indent=2)) + + workunit = client.save(endpoint="workunit", obj={"id": workunit[0]._id, "status": "available"}).to_list_dict() + print(json.dumps(workunit, indent=2)) + + +def main() -> None: + """Parses command line arguments and calls `save_fasta`.""" + parser = argparse.ArgumentParser() + parser.add_argument("container_id", help="container_id", type=int) + parser.add_argument("fasta_file", help="fasta_file", type=Path) + args = parser.parse_args() + save_fasta(container_id=args.container_id, fasta_file=args.fasta_file) - #p#rint (workunit) +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_save_importresource_sample.py b/bfabric/scripts/bfabric_save_importresource_sample.py index 12611806..efb0ed28 100755 --- a/bfabric/scripts/bfabric_save_importresource_sample.py +++ b/bfabric/scripts/bfabric_save_importresource_sample.py @@ -1,6 +1,4 @@ #!/usr/bin/python3 -# -*- coding: latin1 -*- - """General Importresource Feeder for bfabric Author: @@ -14,38 +12,22 @@ History: The first version of the script appeared on Wed Oct 24 17:02:04 CEST 2012. """ +from __future__ import annotations - - +import logging +import logging.handlers import os import re -import time import sys -from bfabric import Bfabric - - -import logging, logging.handlers - -logger = logging.getLogger('sync_feeder') -hdlr_syslog = logging.handlers.SysLogHandler(address=("130.60.81.21", 514)) -formatter = logging.Formatter('%(name)s %(message)s', datefmt="%Y-%m-%d %H:%M:%S") -hdlr_syslog.setFormatter(formatter) -logger.addHandler(hdlr_syslog) -logger.setLevel(logging.INFO) +import time +from bfabric.bfabric2 import Bfabric -################################################################################ -bfabric_storageid = 2 -bfapp = Bfabric() +BFABRIC_STORAGE_ID = 2 -# maps the 'real world' to the BFabric application._id -if bfapp.config.application_ids is None: - raise RuntimeError("No bfapp.config.application_ids variable configured. check '~/.bfabricrc.py' file!") -print(bfapp.config.application_ids) -bfabric_application_ids = bfapp.config.application_ids -def save_importresource(line): - """ reads, splits and submit the input line to the bfabric system +def save_importresource(client: Bfabric, line: str) -> None: + """reads, splits and submit the input line to the bfabric system Input: a line containg md5sum;date;size;path @@ -57,70 +39,87 @@ def save_importresource(line): Output: True on success otherwise an exception raise """ - - _bfabric_applicationid = -1 - _bfabric_projectid = -1, - _file_size = -1 - _file_date = -1 - - # empty string / file - _md5 = "d41d8cd98f00b204e9800998ecf8427e" - - _sampleid = None + mdf5_checksum, file_date, file_size, file_path = line.split(";") + + # Format the timestamp for bfabric + file_date = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(int(file_date))) + + bfabric_application_ids = client.config.application_ids + if not bfabric_application_ids: + raise RuntimeError("No bfabric_application_ids configured. check '~/.bfabricpy.yml' file!") + + bfabric_application_id, bfabric_projectid = get_bfabric_application_and_project_id( + bfabric_application_ids, file_path + ) + + obj = { + "applicationid": bfabric_application_id, + "filechecksum": mdf5_checksum, + "containerid": bfabric_projectid, + "filedate": file_date, + "relativepath": file_path, + "name": os.path.basename(file_path), + "size": file_size, + "storageid": BFABRIC_STORAGE_ID, + } try: - (_md5, _file_date, _file_size, _file_path) = line.split(";") - except: - raise - + m = re.search( + r"p([0-9]+)\/(Proteomics\/[A-Z]+_[1-9])\/.*_\d\d\d_S([0-9][0-9][0-9][0-9][0-9][0-9]+)_.*(raw|zip)$", + file_path, + ) + print(f"found sampleid={m.group(3)} pattern") + obj["sampleid"] = int(m.group(3)) + except Exception: + pass + print(obj) + res = client.save(endpoint="importresource", obj=obj).to_list_dict() + print(res[0]) - # the timeformat bfabric understands - #_file_date = time.strftime("%FT%H:%M:%S-01:00",time.gmtime(int(_file_date))) - _file_date = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(int(_file_date))) +def get_bfabric_application_and_project_id(bfabric_application_ids: dict[str, int], file_path: str) -> tuple[int, int]: + """Returns the bfabric application id and project id for a given file path.""" # linear search through dictionary. first hit counts! - for i in bfabric_application_ids.keys(): + bfabric_applicationid = -1 + bfabric_projectid = (-1,) + for i in bfabric_application_ids: # first match counts! - if re.search(i, _file_path): - _bfabric_applicationid = bfabric_application_ids[i] - re_result = re.search(r"^p([0-9]+)\/.+", _file_path) - _bfabric_projectid = re_result.group(1) + if re.search(i, file_path): + bfabric_applicationid = bfabric_application_ids[i] + re_result = re.search(r"^p([0-9]+)\/.+", file_path) + bfabric_projectid = re_result.group(1) break - - if _bfabric_applicationid < 0: - logger.error("{0}; no bfabric application id.".format(_file_path)) - return - - obj = { 'applicationid':_bfabric_applicationid, - 'filechecksum':_md5, - 'containerid':_bfabric_projectid, - 'filedate':_file_date, - 'relativepath':_file_path, - 'name': os.path.basename(_file_path), - 'size':_file_size, - 'storageid': bfabric_storageid - } - - try: - m = re.search(r"p([0-9]+)\/(Proteomics\/[A-Z]+_[1-9])\/.*_\d\d\d_S([0-9][0-9][0-9][0-9][0-9][0-9]+)_.*(raw|zip)$", _file_path) - print ("found sampleid={} pattern".format(m.group(3))) - obj['sampleid'] = int(m.group(3)) - except: - pass - - - print (obj) - res = bfapp.save_object(endpoint='importresource', obj=obj) - print (res[0]) - -if __name__ == "__main__": - if sys.argv[1] == '-': - print ("reading from stdin ...") + if bfabric_applicationid < 0: + logger = logging.getLogger("sync_feeder") + logger.error(f"{file_path}; no bfabric application id.") + raise RuntimeError("no bfabric application id.") + return bfabric_applicationid, bfabric_projectid + + +def setup_logger() -> None: + """Sets up a logger for the script.""" + logger = logging.getLogger("sync_feeder") + hdlr_syslog = logging.handlers.SysLogHandler(address=("130.60.81.21", 514)) + formatter = logging.Formatter("%(name)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S") + hdlr_syslog.setFormatter(formatter) + logger.addHandler(hdlr_syslog) + logger.setLevel(logging.INFO) + + +def main() -> None: + """Parses arguments and calls `save_importresource`.""" + setup_logger() + client = Bfabric.from_config(verbose=True) + if sys.argv[1] == "-": + print("reading from stdin ...") for input_line in sys.stdin: - save_importresource(input_line.rstrip()) - elif sys.argv[1] == '-h': + save_importresource(client, input_line.rstrip()) + elif sys.argv[1] == "-h": print(__doc__) else: - save_importresource(sys.argv[1]) + save_importresource(client, sys.argv[1]) + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_save_link_to_workunit.py b/bfabric/scripts/bfabric_save_link_to_workunit.py index 53b58504..473dff3f 100755 --- a/bfabric/scripts/bfabric_save_link_to_workunit.py +++ b/bfabric/scripts/bfabric_save_link_to_workunit.py @@ -1,32 +1,33 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ Copyright (C) 2023 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. Christian Panse 20231011 """ +import argparse +import json -import sys -import os -from bfabric import Bfabric +from bfabric.bfabric2 import Bfabric -def save_link(wuid=294156, link="", name=""): - B = Bfabric() - rv = B.save_object('link', - obj={'name': name, - 'parentclassname': 'workunit', - 'parentid': wuid, - 'url': link}) - B.print_json(rv) +def save_link(workunit_id: int, url: str, name: str) -> None: + """Saves a link to a workunit.""" + client = Bfabric.from_config(verbose=True) + results = client.save( + endpoint="link", obj={"name": name, "parentclassname": "workunit", "parentid": workunit_id, "url": url} + ).to_list_dict() + print(json.dumps(results[0], indent=2)) -if __name__ == "__main__": - if len(sys.argv) == 4: - save_link(wuid=sys.argv[1], link=sys.argv[2], name=sys.argv[3]) - else: - print ("Usage:") - print ("{} ".format(sys.argv[0])) - print ("Example:") - print ("{} 294156 'https://fgcz-shiny.uzh.ch/exploreDE_prot/?data=p3000/bfabric/Proteomics/SummarizedExperiment/2023/2023-09/2023-09-29/workunit_294156/2363303.rds' 'demo1 link'".format(sys.argv[0])) +def main() -> None: + """Parses the command line arguments and calls `save_link`.""" + parser = argparse.ArgumentParser() + parser.add_argument("workunit_id", type=int, help="the workunit ID") + parser.add_argument("link", type=str, help="the url to save") + parser.add_argument("name", type=str, help="the name of the link") + args = parser.parse_args() + save_link(workunit_id=args.workunit_id, url=args.link, name=args.name) + + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_save_workflowstep.py b/bfabric/scripts/bfabric_save_workflowstep.py index d90cc466..e9c6420a 100755 --- a/bfabric/scripts/bfabric_save_workflowstep.py +++ b/bfabric/scripts/bfabric_save_workflowstep.py @@ -15,58 +15,71 @@ Usage: bfabric_save_workflowstep.py 285507 """ +from __future__ import annotations -import sys -from bfabric import Bfabric +import argparse +from bfabric.bfabric2 import Bfabric -def main(workunit_id = None): - B = Bfabric() - workflowtemplatestep_ids = {224: 247, # MaxQuant - #295: 248, # FragPipe-RESOURCE - 314: 254, # DIANN - 255: 256, # maxquant_scaffold - 266: 258 # MaxQuant-sampleSizeEstimation - } - workflowtemplate_ids = {224: 59, # Proteomics Data analysis - #295: 59, - 314: 59, - 255: 60, # Proteomics Results - 266: 60 - } +def save_workflowstep(workunit_id: int | None = None) -> None: + """Creates an analysis workflow step for a given workunit id.""" + client = Bfabric.from_config(verbose=True) + workflowtemplatestep_ids = { + 224: 247, # MaxQuant + # 295: 248, # FragPipe-RESOURCE + 314: 254, # DIANN + 255: 256, # maxquant_scaffold + 266: 258, # MaxQuant-sampleSizeEstimation + } + workflowtemplate_ids = { + 224: 59, # Proteomics Data analysis + # 295: 59, + 314: 59, + 255: 60, # Proteomics Results + 266: 60, + } - workunit = B.read_object("workunit", obj={"id": workunit_id})[0] + workunit = client.read("workunit", obj={"id": workunit_id}).to_list_dict()[0] application_id = workunit["application"]["_id"] container_id = workunit["container"]["_id"] if application_id in workflowtemplatestep_ids and application_id in workflowtemplate_ids: - workflows = B.read_object("workflow", obj={"containerid": container_id}) + workflows = client.read("workflow", obj={"containerid": container_id}).to_list_dict() # if workflows is None, no workflow is available - > create a new one daw_id = -1 - if workflows is not None: + if workflows: # check if the corresponding workflow exists (template id 59) for item in workflows: - if item["workflowtemplate"]["_id"] == workflowtemplate_ids[application_id]: - daw_id = item["_id"] + if item["workflowtemplate"]["id"] == workflowtemplate_ids[application_id]: + daw_id = item["id"] break - else: - pass # case when no workflows are available (workflows == None) if daw_id == -1: - daw = B.save_object("workflow", obj={"containerid": container_id, "workflowtemplateid": workflowtemplate_ids[application_id]}) + daw = client.save( + "workflow", + obj={"containerid": container_id, "workflowtemplateid": workflowtemplate_ids[application_id]}, + ) daw_id = daw[0]["_id"] - res = B.save_object("workflowstep", obj = {"workflowid": daw_id, "workflowtemplatestepid": workflowtemplatestep_ids[application_id], "workunitid": workunit_id}) + res = client.save( + "workflowstep", + obj={ + "workflowid": daw_id, + "workflowtemplatestepid": workflowtemplatestep_ids[application_id], + "workunitid": workunit_id, + }, + ).to_list_dict() print(res[0]) - else: - pass -if __name__ == "__main__": - import argparse - parser = argparse.ArgumentParser(description='Create an analysis workflow step') - parser.add_argument('workunitid', metavar='workunitid', type=int, - help='workunit id') + +def main() -> None: + """Parses command line args and calls `save_workflowstep`.""" + parser = argparse.ArgumentParser(description="Create an analysis workflow step") + parser.add_argument("workunitid", metavar="workunitid", type=int, help="workunit id") args = parser.parse_args() - main(workunit_id = args.workunitid) + save_workflowstep(workunit_id=args.workunitid) + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_save_workunit_attribute.py b/bfabric/scripts/bfabric_save_workunit_attribute.py index 481a30c3..8bbc4845 100755 --- a/bfabric/scripts/bfabric_save_workunit_attribute.py +++ b/bfabric/scripts/bfabric_save_workunit_attribute.py @@ -1,6 +1,4 @@ #!/usr/bin/python -# -*- coding: latin1 -*- - """ Copyright (C) 2021 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. @@ -12,32 +10,28 @@ """ +import argparse +import json + +from bfabric.bfabric2 import Bfabric + + +def bfabric_save_workunit_attribute(workunit_id: int, attribute: str, value: str) -> None: + """Sets the specified attribute to the specified value for the specified workunit.""" + client = Bfabric.from_config(verbose=True) + result = client.save(endpoint="workunit", obj={"id": workunit_id, attribute: value}).to_list_dict() + print(json.dumps(result[0], indent=2)) + -import os -import sys -import bfabric -import datetime +def main() -> None: + """Parses the command line arguments and calls `bfabric_save_workunit_attribute`.""" + parser = argparse.ArgumentParser() + parser.add_argument("workunit_id", type=int, help="the workunit ID") + parser.add_argument("attribute", type=str, help="the attribute to save") + parser.add_argument("value", type=str, help="the value to save") + args = vars(parser.parse_args()) + bfabric_save_workunit_attribute(**args) -def usage(): - print("usage:\n") - msg = "\t{} ".format(sys.argv[0]) - print(msg) if __name__ == "__main__": - B = bfabric.Bfabric() - - query_obj = {} - - try: - workunitID = sys.argv[1] - attribute = sys.argv[2] - value = sys.argv[3] - query_obj["id"] = workunitID - query_obj[attribute] = value - except: - usage() - sys.exit(1) - - - res = B.save_object(endpoint='workunit', obj=query_obj) - print(res) + main() diff --git a/pyproject.toml b/pyproject.toml index facc2073..f8447c1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,12 +59,12 @@ Repository = "https://github.com/fgcz/bfabricPy" "bfabric_read_samples_from_dataset.py"="bfabric.scripts.bfabric_read_samples_from_dataset:main" "bfabric_save_csv2dataset.py"="bfabric.scripts.bfabric_save_csv2dataset:main" "bfabric_save_dataset2csv.py"="bfabric.scripts.bfabric_save_dataset2csv:main" -#bfabric_save_fasta="bfabric.scripts.bfabric_save_fasta:main" -#bfabric_save_importresource_sample="bfabric.scripts.bfabric_save_importresource_sample:main" -#bfabric_save_link_to_workunit="bfabric.scripts.bfabric_save_link_to_workunit:main" +"bfabric_save_fasta.py"="bfabric.scripts.bfabric_save_fasta:main" +"bfabric_save_importresource_sample.py"="bfabric.scripts.bfabric_save_importresource_sample:main" +"bfabric_save_link_to_workunit.py"="bfabric.scripts.bfabric_save_link_to_workunit:main" #bfabric_save_resource="bfabric.scripts.bfabric_save_resource:main" -#bfabric_save_workunit_attribute="bfabric.scripts.bfabric_save_workunit_attribute:main" -#bfabric_save_workflowstep="bfabric.scripts.bfabric_save_workflowstep:main" +"bfabric_save_workunit_attribute.py"="bfabric.scripts.bfabric_save_workunit_attribute:main" +"bfabric_save_workflowstep.py"="bfabric.scripts.bfabric_save_workflowstep:main" [tool.black] line-length = 120 From ebbefd4be1201cc72953cf001f8b4cf1627f1649 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 10 May 2024 09:37:23 +0200 Subject: [PATCH 088/129] Replace bfabric.py by bfabric2.py To ensure a smooth transition for new code, the existing bfabric2.py module is kept with reexports of its old contents. The old code was renamed to bfabric_legacy.py --- bfabric/__init__.py | 11 +- bfabric/bfabric.py | 641 +++++++++++------- bfabric/bfabric2.py | 431 +----------- bfabric/bfabric_legacy.py | 244 +++++++ .../examples/compare_zeep_suds_pagination.py | 3 +- bfabric/examples/compare_zeep_suds_query.py | 12 +- bfabric/examples/exists_multi.py | 4 +- bfabric/examples/zeep_debug.py | 3 +- bfabric/scripts/bfabric_delete.py | 4 +- .../scripts/bfabric_feeder_resource_autoQC.py | 4 +- bfabric/scripts/bfabric_flask.py | 3 +- ...list_not_available_proteomics_workunits.py | 3 +- ...c_list_not_existing_storage_directories.py | 5 +- bfabric/scripts/bfabric_logthis.py | 2 +- bfabric/scripts/bfabric_read.py | 3 +- .../bfabric_read_samples_from_dataset.py | 2 +- .../bfabric_read_samples_of_workunit.py | 2 +- bfabric/scripts/bfabric_save_csv2dataset.py | 2 +- bfabric/scripts/bfabric_save_dataset2csv.py | 2 +- bfabric/scripts/bfabric_save_fasta.py | 2 +- .../bfabric_save_importresource_sample.py | 2 +- .../scripts/bfabric_save_link_to_workunit.py | 2 +- bfabric/scripts/bfabric_save_workflowstep.py | 2 +- bfabric/scripts/bfabric_save_workunit.py | 3 +- .../bfabric_save_workunit_attribute.py | 2 +- .../bfabric_setExternalJobStatus_done.py | 3 +- .../bfabric_setResourceStatus_available.py | 2 +- bfabric/scripts/bfabric_setWorkunitStatus.py | 3 +- bfabric/scripts/bfabric_upload_resource.py | 2 +- .../bfabric_upload_submitter_executable.py | 2 +- bfabric/tests/__test_bfabric.py | 4 +- .../integration/integration_test_helper.py | 2 +- .../tests/integration/scripts/test_read.py | 2 +- .../scripts/test_save_csv2dataset.py | 2 +- .../scripts/test_save_dataset2csv.py | 2 +- .../scripts/test_upload_resource.py | 2 +- .../integration/test_bfabric2_bad_requests.py | 3 +- .../tests/integration/test_bfabric2_exists.py | 3 +- .../tests/integration/test_bfabric2_read.py | 4 +- .../test_bfabric2_read_pagination.py | 3 +- .../integration/test_bfabric2_save_delete.py | 3 +- bfabric/tests/test_bfabric_executable.py | 5 +- bfabric/tests/test_bfabric_functional.py | 3 +- bfabric/tests/test_bfabric_read.py | 3 +- bfabric/tests/test_bfabric_sample.py | 4 +- bfabric/tests/test_bfabric_workunit.py | 5 +- bfabric/tests/unit/test_bfabric.py | 11 +- .../wrapper_creator/bfabric_external_job.py | 4 +- bfabric/wrapper_creator/bfabric_feeder.py | 4 +- .../bfabric_wrapper_creator.py | 2 +- 50 files changed, 720 insertions(+), 757 deletions(-) create mode 100644 bfabric/bfabric_legacy.py diff --git a/bfabric/__init__.py b/bfabric/__init__.py index 69898973..6e5d4f6c 100755 --- a/bfabric/__init__.py +++ b/bfabric/__init__.py @@ -2,12 +2,9 @@ __version__ = importlib.metadata.version("bfabric") -name = "bfabricPy" -alias = "suds-py3" +from bfabric.bfabric import Bfabric, BfabricAPIEngineType +from bfabric.bfabric_config import BfabricAuth, BfabricConfig -msg = "\033[93m{} version {} (2023-11-03) -- \"{}\"\ - \nCopyright (C) 2014-2023 Functional Genomics Center Zurich\033[0m\n\n"\ - .format(name, __version__, alias) endpoints = sorted([ 'annotation', @@ -43,8 +40,8 @@ container = project application = 217 -from bfabric.bfabric import Bfabric + +from bfabric.bfabric_legacy import BfabricLegacy from bfabric.wrapper_creator.bfabric_wrapper_creator import BfabricWrapperCreator from bfabric.wrapper_creator.bfabric_submitter import BfabricSubmitter from bfabric.wrapper_creator.bfabric_feeder import BfabricFeeder -from bfabric.bfabric_config import BfabricConfig diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index 49218389..2f9c459e 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -1,12 +1,6 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """B-Fabric Application Interface using WSDL -The code contains classes for wrapper_creator and submitter. - -Ensure that this file is available on the bfabric exec host. - Copyright (C) 2014 - 2024 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. Licensed under GPL version 3 @@ -14,280 +8,419 @@ Authors: Marco Schmidt Christian Panse - - -History - The python3 library first appeared in 2014. + Leonardo Schwarz + Aleksejs Fomins """ -from typing import Dict, Any - -import yaml -import json -import sys -from pprint import pprint - -from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_config -from suds.client import Client -from suds.wsdl import Service +from __future__ import annotations -import os import base64 -import logging.config - -logging.config.dictConfig({ - 'version': 1, - 'formatters': { - 'verbose': { - 'format': 'DEBUG %(name)s: %(message)s' - } - }, - 'handlers': { - 'console': { - 'level': 'DEBUG', - 'class': 'logging.StreamHandler', - 'formatter': 'verbose', - }, - }, - # 'loggers': { - # 'zeep.transports': { - # 'level': 'DEBUG', - # 'propagate': True, - # 'handlers': ['console'], - # }, - # } -}) - - -class bfabricEncoder(json.JSONEncoder): - """ - Implements json encoder for the Bfabric.print_json method - """ - def default(self, o): - try: - return dict(o) - except TypeError: - pass - else: - return list(o) - return JSONEncoder.default(self, o) - - -class Bfabric(object): - """B-Fabric python3 module - Implements read and save object methods for B-Fabric wsdl interface - """ - def warning(self, msg): - sys.stderr.write("\033[93m{}\033[0m\n".format(msg)) - - def __init__(self, login: str = None, password: str = None, base_url: str = None, externaljobid=None, - config_path: str = None, config_env: str = None, optional_auth: bool = False, verbose: bool = False): - """ - :param login: Login string for overriding config file - :param password: Password for overriding config file - :param base_url: Base url of the BFabric server for overriding config file - :param externaljobid: ? - :param config_path: Path to the config file, in case it is different from default - :param config_env: Which config environment to use. Can also specify via environment variable or use - default in the config file (at your own risk) - :param optional_auth: Whether authentification is optional. If yes, missing authentification will be ignored, - otherwise an exception will be raised - :param verbose: Verbosity (TODO: resolve potential redundancy with logger) - """ +import importlib.metadata +import logging +import os +from contextlib import contextmanager +from copy import deepcopy +from datetime import datetime +from enum import Enum +from pprint import pprint +from typing import Literal, ContextManager, Any + +from rich.console import Console + +from bfabric.bfabric_config import BfabricAuth, read_config +from bfabric.bfabric_config import BfabricConfig +from bfabric.src.cli_formatting import HostnameHighlighter, DEFAULT_THEME +from bfabric.src.engine_suds import EngineSUDS +from bfabric.src.engine_zeep import EngineZeep +from bfabric.src.errors import get_response_errors +from bfabric.src.paginator import compute_requested_pages, BFABRIC_QUERY_LIMIT, page_iter +from bfabric.src.result_container import BfabricResultType, ResultContainer + + +class BfabricAPIEngineType(Enum): + SUDS = 1 + ZEEP = 2 + + +# TODO: What does idonly do for SUDS? Does it make sense for Zeep? +# TODO: What does includedeletableupdateable do for Zeep? Does it make sense for Suds? +# TODO: How to deal with save-skip fields in Zeep? Does it happen in SUDS? +class Bfabric: + """Bfabric client class, providing general functionality for interaction with the B-Fabric API.""" + + def __init__( + self, + config: BfabricConfig, + auth: BfabricAuth | None, + engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, + verbose: bool = False, + ) -> None: self.verbose = verbose - - self.cl = {} - self.verbose = False self.query_counter = 0 - - # Get default path config file path - config_path = config_path or os.path.normpath(os.path.expanduser("~/.bfabricpy.yml")) - - # TODO: Convert to an exception when this branch becomes main - config_path_old = config_path or os.path.normpath(os.path.expanduser("~/.bfabricrc.py")) - if os.path.isfile(config_path): - self.warning("WARNING! The old .bfabricrc.py was found in the home directory. Delete and make sure to use the new .bfabricpy.yml") - - # Use the provided config data from arguments instead of the file - if not os.path.isfile(config_path): - self.warning("could not find '.bfabricpy.yml' file in home directory.") - self.config = BfabricConfig(base_url=base_url) - self.auth = BfabricAuth(login=login, password=password) - - # Load config from file, override some of the fields with the provided ones + self._config = config + self._auth = auth + + if engine == BfabricAPIEngineType.SUDS: + self.engine = EngineSUDS(base_url=config.base_url) + self.result_type = BfabricResultType.LISTSUDS + elif engine == BfabricAPIEngineType.ZEEP: + self.engine = EngineZeep(base_url=config.base_url) + self.result_type = BfabricResultType.LISTZEEP else: - config, auth = read_config(config_path, config_env=config_env, optional_auth=optional_auth) - self.config = config.with_overrides(base_url=base_url) - if (login is not None) and (password is not None): - self.auth = BfabricAuth(login=login, password=password) - elif (login is None) and (password is None): - self.auth = auth - else: - raise IOError("Must provide both username and password, or neither.") - - if not self.config.base_url: - raise ValueError("base server url missing") - if not optional_auth: - if not self.auth or not self.auth.login or not self.auth.password: - raise ValueError("Authentification not initialized but required") - - msg = f"\033[93m--- base_url {self.config.base_url}; login; {self.auth.login} ---\033[0m\n" - sys.stderr.write(msg) + raise ValueError(f"Unexpected engine: {engine}") if self.verbose: - pprint(self.config) - - def read_object(self, endpoint, obj, page=1, plain=False, idonly=False): - """ - A generic method which can connect to any endpoint, e.g., workunit, project, order, - externaljob, etc, and returns the object with the requested id. - obj is a python dictionary which contains all the attributes of the endpoint - for the "query". - """ - return self._perform_request( - endpoint=endpoint, - method="read", - plain=plain, - params=dict(query=obj, idonly=idonly, page=page) - ) - - def readid_object(self, endpoint, obj, page=1, plain=False): + self.print_version_message() + + @classmethod + def from_config( + cls, + config_env: str | None = None, + auth: BfabricAuth | Literal["config"] | None = "config", + engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, + verbose: bool = False, + ) -> Bfabric: + """Returns a new Bfabric instance, configured with the user configuration file. + If the `config_env` is specified then it will be used, if it is not specified the default environment will be + determined by checking the following in order (picking the first one that is found): + - The `BFABRICPY_CONFIG_ENV` environment variable + - The `default_config` field in the config file "GENERAL" section + :param config_env: Configuration environment to use. If not given, it is deduced as described above. + :param auth: Authentication to use. If "config" is given, the authentication will be read from the config file. + If it is set to None, no authentication will be used. + :param engine: Engine to use for the API. Default is SUDS. + :param verbose: Print a system info message to standard error console """ - A generic method which can connect to any endpoint, e.g., workunit, project, order, - externaljob, etc, and returns the object with the requested id. - obj is a python dictionary which contains only the id of the endpoint for the "query". + config, auth_config = get_system_auth(config_env=config_env) + auth_used: BfabricAuth | None = auth_config if auth == "config" else auth + return cls(config, auth_used, engine=engine, verbose=verbose) + + @property + def config(self) -> BfabricConfig: + """Returns the config object.""" + return self._config + + @property + def auth(self) -> BfabricAuth: + """Returns the auth object. + :raises ValueError: If authentication is not available """ - return self._perform_request( - endpoint=endpoint, - method="readid", - plain=plain, - params=dict(query=obj, page=page) - ) - - def save_object(self, endpoint, obj, debug=None): + if self._auth is None: + raise ValueError("Authentication not available") + return self._auth + + @contextmanager + def with_auth(self, auth: BfabricAuth) -> ContextManager[Bfabric]: + """Context manager that temporarily (within the scope of the context) sets the authentication for + the Bfabric object to the provided value. This is useful when authenticating multiple users, to avoid accidental + use of the wrong credentials. """ - same as read_object above but uses the save method. + old_auth = self._auth + self._auth = auth + try: + yield + finally: + self._auth = old_auth + + def read( + self, + endpoint: str, + obj: dict[str, Any], + max_results: int | None = 100, + offset: int = 0, + readid: bool = False, + check: bool = True, + idonly: bool = False, + ) -> ResultContainer: + """Reads objects from the specified endpoint that match all specified attributes in `obj`. + By setting `max_results` it is possible to change the number of results that are returned. + :param endpoint: endpoint + :param obj: query dictionary + :param max_results: cap on the number of results to query. The code will keep reading pages until all pages + are read or expected number of results has been reached. If None, load all available pages. + NOTE: max_results will be rounded upwards to the nearest multiple of BFABRIC_QUERY_LIMIT, because results + come in blocks, and there is little overhead to providing results over integer number of pages. + :param offset: the number of elements to skip before starting to return results (useful for pagination, default + is 0 which means no skipping) + :param readid: whether to use reading by ID. Currently only available for engine=SUDS + TODO: Test the extent to which this method works. Add safeguards + :param check: whether to check for errors in the response + :param idonly: whether to return only the ids of the objects + :return: List of responses, packaged in the results container """ - return self._perform_request( - endpoint=endpoint, - method="save", - plain=debug is not None, - params={endpoint: obj} - ) + # Get the first page. + # NOTE: According to old interface, this is equivalent to plain=True + response, errors = self._read_page(readid, endpoint, obj, page=1, idonly=idonly) - def checkandinsert_object(self, endpoint, obj, debug=None): - """ - wsdl method to check iff dependencies are fulfilled - """ - # TODO This method was changed a while ago to use the "save"endpoint, which makes it functionally identical - # to the save_object method. Check if this was intended. - return self._perform_request( - endpoint=endpoint, - method="save", - plain=debug is not None, - params={endpoint: obj} + try: + n_available_pages = response["numberofpages"] + except AttributeError: + n_available_pages = 0 + + # Return empty list if nothing found + if not n_available_pages: + result = ResultContainer( + [], self.result_type, total_pages_api=0, errors=get_response_errors(response, endpoint) + ) + if check: + result.assert_success() + return result + + # Get results from other pages as well, if need be + requested_pages, initial_offset = compute_requested_pages( + n_page_total=n_available_pages, + n_item_per_page=BFABRIC_QUERY_LIMIT, + n_item_offset=offset, + n_item_return_max=max_results, ) - - def delete_object(self, endpoint, id=None, debug=None): + logging.info(f"Requested pages: {requested_pages}") + + # NOTE: Page numbering starts at 1 + response_items = [] + page_offset = initial_offset + for i_iter, i_page in enumerate(requested_pages): + if not (i_iter == 0 and i_page == 1): + print("-- reading page", i_page, "of", n_available_pages) + response, errors_page = self._read_page(readid, endpoint, obj, page=i_page, idonly=idonly) + errors += errors_page + + response_items += response[endpoint][page_offset:] + page_offset = 0 + + result = ResultContainer(response_items, self.result_type, total_pages_api=n_available_pages, errors=errors) + if check: + result.assert_success() + return result + + def save(self, endpoint: str, obj: dict, check: bool = True) -> ResultContainer: + results = self.engine.save(endpoint, obj, auth=self.auth) + result = ResultContainer(results[endpoint], self.result_type, errors=get_response_errors(results, endpoint)) + if check: + result.assert_success() + return result + + def delete(self, endpoint: str, id: int | list[int], check: bool = True) -> ResultContainer: + results = self.engine.delete(endpoint, id, auth=self.auth) + result = ResultContainer(results[endpoint], self.result_type, errors=get_response_errors(results, endpoint)) + if check: + result.assert_success() + return result + + def upload_resource( + self, resource_name: str, content: bytes, workunit_id: int, check: bool = True + ) -> ResultContainer: + """Uploads a resource to B-Fabric, only intended for relatively small files that will be tracked by B-Fabric + and not one of the dedicated experimental data stores. + :param resource_name: the name of the resource to create (the same name can only exist once per workunit) + :param content: the content of the resource as bytes + :param workunit_id: the workunit ID to which the resource belongs + :param check: whether to check for errors in the response """ - same as read_object above but uses the delete method. - """ - return self._perform_request( - endpoint=endpoint, - method="delete", - plain=debug is not None, - params=dict(id=id) + content_encoded = base64.b64encode(content).decode() + return self.save( + endpoint="resource", + obj={ + "base64": content_encoded, + "name": resource_name, + "description": "base64 encoded file", + "workunitid": workunit_id, + }, + check=check, ) - def upload_file(self, filename, workunitid): - with open(filename, 'rb') as f: - content = f.read() - - resource_base64 = base64.b64encode(content).decode() - - res = self.save_object('resource', {'base64': resource_base64, - 'name': os.path.basename(filename), - 'description': "base64 encoded file", - 'workunitid': workunitid}) - - return res - - def _get_service(self, endpoint: str) -> Service: - """Returns a `suds.client.Service` object for the given endpoint name.""" - if endpoint not in self.cl: - self.cl[endpoint] = Client(f"{self.config.base_url}/{endpoint}?wsdl", cache=None) - return self.cl[endpoint].service - - def _perform_request( - self, endpoint: str, method: str, plain: bool, params: Dict[str, Any] - ) -> Any: - """Performs a request to the given endpoint and returns the result.""" - self.query_counter += 1 - request_params = dict(login=self.auth.login, password=self.auth.password, **params) - service = self._get_service(endpoint=endpoint) - response = getattr(service, method)(request_params) - if plain: - return response - elif getattr(response, "entitiesonpage", None) == 0: - return [] - return getattr(response, endpoint) - - @staticmethod - def print_json(queryres=None): + def _read_page(self, readid: bool, endpoint: str, query: dict[str, Any], idonly: bool = False, page: int = 1): + """Reads the specified page of objects from the specified endpoint that match the query.""" + if readid: + # https://fgcz-bfabric.uzh.ch/wiki/tiki-index.php?page=endpoint.workunit#Web_Method_readid_ + response = self.engine.readid(endpoint, query, auth=self.auth, page=page) + else: + response = self.engine.read(endpoint, query, auth=self.auth, page=page, idonly=idonly) + + return response, get_response_errors(response, endpoint) + + ############################ + # Multi-query functionality + ############################ + + # TODO: Is this scope sufficient? Is there ever more than one multi-query parameter, and/or not at the root of dict? + def read_multi( + self, + endpoint: str, + obj: dict, + multi_query_key: str, + multi_query_vals: list, + readid: bool = False, + idonly: bool = False, + ) -> ResultContainer: """ - This method prints the query result as returned by ``read_object`` in JSON format. - - Parameter - --------- - - queryres : the object returned by ``read_object`` method. + Makes a 1-parameter multi-query (there is 1 parameter that takes a list of values) + Since the API only allows BFABRIC_QUERY_LIMIT queries per page, split the list into chunks before querying + :param endpoint: endpoint + :param obj: query dictionary + :param multi_query_key: key for which the multi-query is performed + :param multi_query_vals: list of values for which the multi-query is performed + :param readid: whether to use reading by ID. Currently only available for engine=SUDS + TODO: Test the extent to which this method works. Add safeguards + :param idonly: whether to return only the ids of the objects + :return: List of responses, packaged in the results container + + NOTE: It is assumed that there is only 1 response for each value. """ - if queryres is None: - raise TypeError("print_json() missing 1 required positional argument: please provide the output from read_object as parameter to print_json") - - res = json.dumps(queryres, cls=bfabricEncoder, sort_keys=True, indent=2) - print(res) - @staticmethod - def print_yaml(queryres=None): + response_tot = ResultContainer([], self.result_type, total_pages_api=0) + obj_extended = deepcopy(obj) # Make a copy of the query, not to make edits to the argument + + # Iterate over request chunks that fit into a single API page + for page_vals in page_iter(multi_query_vals): + obj_extended[multi_query_key] = page_vals + + # TODO: Test what happens if there are multiple responses to each of the individual queries. + # * What would happen? + # * What would happen if total number of responses would exceed 100 now? + # * What would happen if we naively made a multi-query with more than 100 values? Would API paginate + # automatically? If yes, perhaps we don't need this method at all? + # TODO: It is assumed that a user requesting multi_query always wants all of the pages. Can anybody think of + # exceptions to this? + response_this = self.read(endpoint, obj_extended, max_results=None, readid=readid, idonly=idonly) + response_tot.extend(response_this) + + return response_tot + + # NOTE: Save-multi method is likely useless. When saving multiple objects, they all have different fields. + # One option would be to provide a dataframe, but it might struggle with nested dicts + # Likely best solution is to not provide this method, and let users run a for-loop themselves. + # def save_multi(self, endpoint: str, obj_lst: list, **kwargs) -> ResultContainer: + # response_tot = ResultContainer([], self.result_type, total_pages_api = 0) + # + # # Iterate over request chunks that fit into a single API page + # for page_objs in page_iter(obj_lst): + # response_page = self.save(endpoint, page_objs, **kwargs) + # response_tot.extend(response_page) + # + # return response_tot + + def delete_multi(self, endpoint: str, id_list: list) -> ResultContainer: + response_tot = ResultContainer([], self.result_type, total_pages_api=0) + + if len(id_list) == 0: + print("Warning, empty list provided for deletion, ignoring") + return response_tot + + # Iterate over request chunks that fit into a single API page + for page_ids in page_iter(id_list): + response_page = self.delete(endpoint, page_ids) + response_tot.extend(response_page) + + return response_tot + + def exists(self, endpoint: str, key: str, value: list[int | str] | int | str) -> bool | list[bool]: """ - This method prints the query result as returned by ``read_object`` in YAML format. - - Parameter - --------- - - queryres : the object returned by ``read_object`` method. + :param endpoint: endpoint + :param key: A key for the query (e.g. id or name) + :param value: A value or a list of values + :return: Return a single bool or a list of bools for each value + For each value, test if a key with that value is found in the API. """ - if queryres is None: - raise TypeError("print_yaml() missing 1 required positional argument: please provide the output from read_object as parameter to print_yaml") + is_scalar = isinstance(value, (int, str)) - res_json = json.dumps(queryres, cls=bfabricEncoder, sort_keys=True) - res = yaml.dump(res_json, default_flow_style=False, encoding=None, default_style=None) - print(res) - - def get_sampleid(self, resourceid=None): - """ - determines the sample_id of a given resource_id. - it performs a recursive dfs. - TODO(cp): check if the method should be implemented using a stack + # 1. Read data for this id + if is_scalar: + results = self.read(endpoint, {key: value}) + elif isinstance(value, list): + results = self.read_multi(endpoint, {}, key, value) + else: + raise ValueError("Unexpected data type", type(value)) + + # 2. Extract all the ids for which there was a response + result_vals = [] + for r in results.results: + if key in r: + result_vals += [r[key]] + elif "_" + key in r: # TODO: Remove this if SUDS bug is ever resolved + result_vals += [r["_" + key]] + + # 3. For each of the requested ids, return true if there was a response and false if there was not + if is_scalar: + return value in result_vals + else: + return [val in result_vals for val in value] + + def get_version_message(self) -> str: + """Returns the version message as a string.""" + package_version = importlib.metadata.version("bfabric") + year = datetime.now().year + engine_name = self.engine.__class__.__name__ + base_url = self.config.base_url + user_name = f"U={self._auth.login if self._auth else None}" + return ( + f"--- bfabricPy v{package_version} ({engine_name}, {base_url}, {user_name}) ---\n" + f"--- Copyright (C) 2014-{year} Functional Genomics Center Zurich ---" + ) - :param resourceid: - :return: (int, int) + def print_version_message(self, stderr: bool = True) -> None: + """Prints the version message to the console. + :param stderr: Whether to print to stderr (True, default) or stdout (False) """ + console = Console(stderr=stderr, highlighter=HostnameHighlighter(), theme=DEFAULT_THEME) + console.print(self.get_version_message(), style="bright_yellow") + + +def get_system_auth( + login: str = None, + password: str = None, + base_url: str = None, + config_path: str = None, + config_env: str = None, + optional_auth: bool = True, + verbose: bool = False, +) -> tuple[BfabricConfig, BfabricAuth]: + """ + :param login: Login string for overriding config file + :param password: Password for overriding config file + :param base_url: Base server url for overriding config file + :param config_path: Path to the config file, in case it is different from default + :param config_env: Which config environment to use. Can also specify via environment variable or use + default in the config file (at your own risk) + :param optional_auth: Whether authentication is optional. If yes, missing authentication will be ignored, + otherwise an exception will be raised + :param verbose: Verbosity (TODO: resolve potential redundancy with logger) + """ - assert isinstance(resourceid, int) - - try: - resource = self.read_object('resource', obj={'id': resourceid})[0] - except: - return (None) + have_config_path = config_path is not None + if not have_config_path: + # Get default path config file path + config_path = os.path.normpath(os.path.expanduser("~/.bfabricpy.yml")) + + # Use the provided config data from arguments instead of the file + if not os.path.isfile(config_path): + if have_config_path: + # NOTE: If user explicitly specifies a path to a wrong config file, this has to be an exception + raise OSError(f"Explicitly specified config file does not exist: {config_path}") + # TODO: Convert to log + print(f"Warning: could not find the config file in the default location: {config_path}") + config = BfabricConfig(base_url=base_url) + if login is None and password is None: + auth = None + else: + auth = BfabricAuth(login=login, password=password) + + # Load config from file, override some of the fields with the provided ones + else: + config, auth = read_config(config_path, config_env=config_env) + config = config.copy_with(base_url=base_url) + if (login is not None) and (password is not None): + auth = BfabricAuth(login=login, password=password) + elif (login is None) and (password is None): + auth = auth + else: + raise OSError("Must provide both username and password, or neither.") - try: - workunit = self.read_object(endpoint='workunit', obj={'id': resource.workunit._id})[0] - return (self.get_sampleid(resourceid=int(workunit.inputresource[0]._id))) - except: - self.warning("fetching sampleid of resource.workunitid = {} failed.".format(resource.workunit._id)) - return (None) + if not config.base_url: + raise ValueError("base_url missing") + if not optional_auth: + if not auth or not auth.login or not auth.password: + raise ValueError("Authentification not initialized but required") + if verbose: + pprint(config) -if __name__ == "__main__": - bfapp = Bfabric(verbose=True) + return config, auth diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 5be114b3..2763985b 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -1,428 +1,5 @@ #!/usr/bin/env python3 -"""B-Fabric Application Interface using WSDL - -The code contains classes for wrapper_creator and submitter. - -Ensure that this file is available on the bfabric exec host. - -Copyright (C) 2014 - 2024 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. - -Licensed under GPL version 3 - -Original Authors: - Marco Schmidt - Christian Panse - -BFabric V2 Authors: - Leonardo Schwarz - Aleksejs Fomins - -History - The python3 library first appeared in 2014. -""" -from __future__ import annotations - -import base64 -import logging -import os -from contextlib import contextmanager -from copy import deepcopy -from datetime import datetime -from enum import Enum -from pprint import pprint -from typing import Any, Literal, ContextManager - -from rich.console import Console - -from bfabric import __version__ as PACKAGE_VERSION -from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_config -from bfabric.src.cli_formatting import DEFAULT_THEME, HostnameHighlighter -from bfabric.src.engine_suds import EngineSUDS -from bfabric.src.engine_zeep import EngineZeep -from bfabric.src.errors import get_response_errors -from bfabric.src.paginator import BFABRIC_QUERY_LIMIT, compute_requested_pages, page_iter -from bfabric.src.result_container import BfabricResultType, ResultContainer - - -class BfabricAPIEngineType(Enum): - SUDS = 1 - ZEEP = 2 - - -def get_system_auth( - login: str = None, - password: str = None, - base_url: str = None, - config_path: str = None, - config_env: str = None, - optional_auth: bool = True, - verbose: bool = False, -) -> tuple[BfabricConfig, BfabricAuth]: - """ - :param login: Login string for overriding config file - :param password: Password for overriding config file - :param base_url: Base server url for overriding config file - :param config_path: Path to the config file, in case it is different from default - :param config_env: Which config environment to use. Can also specify via environment variable or use - default in the config file (at your own risk) - :param optional_auth: Whether authentication is optional. If yes, missing authentication will be ignored, - otherwise an exception will be raised - :param verbose: Verbosity (TODO: resolve potential redundancy with logger) - """ - - have_config_path = config_path is not None - if not have_config_path: - # Get default path config file path - config_path = os.path.normpath(os.path.expanduser("~/.bfabricpy.yml")) - - # Use the provided config data from arguments instead of the file - if not os.path.isfile(config_path): - if have_config_path: - # NOTE: If user explicitly specifies a path to a wrong config file, this has to be an exception - raise OSError(f"Explicitly specified config file does not exist: {config_path}") - # TODO: Convert to log - print(f"Warning: could not find the config file in the default location: {config_path}") - config = BfabricConfig(base_url=base_url) - if login is None and password is None: - auth = None - else: - auth = BfabricAuth(login=login, password=password) - - # Load config from file, override some of the fields with the provided ones - else: - config, auth = read_config(config_path, config_env=config_env) - config = config.copy_with(base_url=base_url) - if (login is not None) and (password is not None): - auth = BfabricAuth(login=login, password=password) - elif (login is None) and (password is None): - auth = auth - else: - raise OSError("Must provide both username and password, or neither.") - - if not config.base_url: - raise ValueError("base_url missing") - if not optional_auth: - if not auth or not auth.login or not auth.password: - raise ValueError("Authentification not initialized but required") - - if verbose: - pprint(config) - - return config, auth - - -# TODO: What does idonly do for SUDS? Does it make sense for Zeep? -# TODO: What does includedeletableupdateable do for Zeep? Does it make sense for Suds? -# TODO: How to deal with save-skip fields in Zeep? Does it happen in SUDS? -class Bfabric: - """Bfabric client class, providing general functionality for interaction with the B-Fabric API.""" - - def __init__( - self, - config: BfabricConfig, - auth: BfabricAuth | None, - engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, - verbose: bool = False, - ) -> None: - self.verbose = verbose - self.query_counter = 0 - self._config = config - self._auth = auth - - if engine == BfabricAPIEngineType.SUDS: - self.engine = EngineSUDS(base_url=config.base_url) - self.result_type = BfabricResultType.LISTSUDS - elif engine == BfabricAPIEngineType.ZEEP: - self.engine = EngineZeep(base_url=config.base_url) - self.result_type = BfabricResultType.LISTZEEP - else: - raise ValueError(f"Unexpected engine: {engine}") - - if self.verbose: - self.print_version_message() - - @classmethod - def from_config( - cls, - config_env: str | None = None, - auth: BfabricAuth | Literal["config"] | None = "config", - engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, - verbose: bool = False, - ) -> Bfabric: - """Returns a new Bfabric instance, configured with the user configuration file. - If the `config_env` is specified then it will be used, if it is not specified the default environment will be - determined by checking the following in order (picking the first one that is found): - - The `BFABRICPY_CONFIG_ENV` environment variable - - The `default_config` field in the config file "GENERAL" section - :param config_env: Configuration environment to use. If not given, it is deduced as described above. - :param auth: Authentication to use. If "config" is given, the authentication will be read from the config file. - If it is set to None, no authentication will be used. - :param engine: Engine to use for the API. Default is SUDS. - :param verbose: Print a system info message to standard error console - """ - config, auth_config = get_system_auth(config_env=config_env) - auth_used: BfabricAuth | None = auth_config if auth == "config" else auth - return cls(config, auth_used, engine=engine, verbose=verbose) - - @property - def config(self) -> BfabricConfig: - """Returns the config object.""" - return self._config - - @property - def auth(self) -> BfabricAuth: - """Returns the auth object. - :raises ValueError: If authentication is not available - """ - if self._auth is None: - raise ValueError("Authentication not available") - return self._auth - - @contextmanager - def with_auth(self, auth: BfabricAuth) -> ContextManager[Bfabric]: - """Context manager that temporarily (within the scope of the context) sets the authentication for - the Bfabric object to the provided value. This is useful when authenticating multiple users, to avoid accidental - use of the wrong credentials. - """ - old_auth = self._auth - self._auth = auth - try: - yield - finally: - self._auth = old_auth - - def read( - self, - endpoint: str, - obj: dict[str, Any], - max_results: int | None = 100, - offset: int = 0, - readid: bool = False, - check: bool = True, - idonly: bool = False - ) -> ResultContainer: - """Reads objects from the specified endpoint that match all specified attributes in `obj`. - By setting `max_results` it is possible to change the number of results that are returned. - :param endpoint: endpoint - :param obj: query dictionary - :param max_results: cap on the number of results to query. The code will keep reading pages until all pages - are read or expected number of results has been reached. If None, load all available pages. - NOTE: max_results will be rounded upwards to the nearest multiple of BFABRIC_QUERY_LIMIT, because results - come in blocks, and there is little overhead to providing results over integer number of pages. - :param offset: the number of elements to skip before starting to return results (useful for pagination, default - is 0 which means no skipping) - :param readid: whether to use reading by ID. Currently only available for engine=SUDS - TODO: Test the extent to which this method works. Add safeguards - :param check: whether to check for errors in the response - :param idonly: whether to return only the ids of the objects - :return: List of responses, packaged in the results container - """ - # Get the first page. - # NOTE: According to old interface, this is equivalent to plain=True - response, errors = self._read_page(readid, endpoint, obj, page=1, idonly=idonly) - - try: - n_available_pages = response["numberofpages"] - except AttributeError: - n_available_pages = 0 - - # Return empty list if nothing found - if not n_available_pages: - result = ResultContainer( - [], self.result_type, total_pages_api=0, errors=get_response_errors(response, endpoint) - ) - if check: - result.assert_success() - return result - - # Get results from other pages as well, if need be - requested_pages, initial_offset = compute_requested_pages( - n_page_total=n_available_pages, - n_item_per_page=BFABRIC_QUERY_LIMIT, - n_item_offset=offset, - n_item_return_max=max_results, - ) - logging.info(f"Requested pages: {requested_pages}") - - # NOTE: Page numbering starts at 1 - response_items = [] - page_offset = initial_offset - for i_iter, i_page in enumerate(requested_pages): - if not (i_iter == 0 and i_page == 1): - print("-- reading page", i_page, "of", n_available_pages) - response, errors_page = self._read_page(readid, endpoint, obj, page=i_page, idonly=idonly) - errors += errors_page - - response_items += response[endpoint][page_offset:] - page_offset = 0 - - result = ResultContainer(response_items, self.result_type, total_pages_api=n_available_pages, errors=errors) - if check: - result.assert_success() - return result - - def save(self, endpoint: str, obj: dict, check: bool = True) -> ResultContainer: - results = self.engine.save(endpoint, obj, auth=self.auth) - result = ResultContainer(results[endpoint], self.result_type, errors=get_response_errors(results, endpoint)) - if check: - result.assert_success() - return result - - def delete(self, endpoint: str, id: int | list[int], check: bool = True) -> ResultContainer: - results = self.engine.delete(endpoint, id, auth=self.auth) - result = ResultContainer(results[endpoint], self.result_type, errors=get_response_errors(results, endpoint)) - if check: - result.assert_success() - return result - - def upload_resource( - self, resource_name: str, content: bytes, workunit_id: int, check: bool = True - ) -> ResultContainer: - """Uploads a resource to B-Fabric, only intended for relatively small files that will be tracked by B-Fabric - and not one of the dedicated experimental data stores. - :param resource_name: the name of the resource to create (the same name can only exist once per workunit) - :param content: the content of the resource as bytes - :param workunit_id: the workunit ID to which the resource belongs - :param check: whether to check for errors in the response - """ - content_encoded = base64.b64encode(content).decode() - return self.save( - endpoint="resource", - obj={ - "base64": content_encoded, - "name": resource_name, - "description": "base64 encoded file", - "workunitid": workunit_id, - }, - check=check, - ) - - def _read_page(self, readid: bool, endpoint: str, query: dict[str, Any], idonly: bool = False, page: int = 1): - """Reads the specified page of objects from the specified endpoint that match the query.""" - if readid: - # https://fgcz-bfabric.uzh.ch/wiki/tiki-index.php?page=endpoint.workunit#Web_Method_readid_ - response = self.engine.readid(endpoint, query, auth=self.auth, page=page) - else: - response = self.engine.read(endpoint, query, auth=self.auth, page=page, idonly=idonly) - - return response, get_response_errors(response, endpoint) - - ############################ - # Multi-query functionality - ############################ - - # TODO: Is this scope sufficient? Is there ever more than one multi-query parameter, and/or not at the root of dict? - def read_multi( - self, endpoint: str, obj: dict, multi_query_key: str, multi_query_vals: list, readid: bool = False, - idonly: bool = False - ) -> ResultContainer: - """ - Makes a 1-parameter multi-query (there is 1 parameter that takes a list of values) - Since the API only allows BFABRIC_QUERY_LIMIT queries per page, split the list into chunks before querying - :param endpoint: endpoint - :param obj: query dictionary - :param multi_query_key: key for which the multi-query is performed - :param multi_query_vals: list of values for which the multi-query is performed - :param readid: whether to use reading by ID. Currently only available for engine=SUDS - TODO: Test the extent to which this method works. Add safeguards - :param idonly: whether to return only the ids of the objects - :return: List of responses, packaged in the results container - - NOTE: It is assumed that there is only 1 response for each value. - """ - - response_tot = ResultContainer([], self.result_type, total_pages_api=0) - obj_extended = deepcopy(obj) # Make a copy of the query, not to make edits to the argument - - # Iterate over request chunks that fit into a single API page - for page_vals in page_iter(multi_query_vals): - obj_extended[multi_query_key] = page_vals - - # TODO: Test what happens if there are multiple responses to each of the individual queries. - # * What would happen? - # * What would happen if total number of responses would exceed 100 now? - # * What would happen if we naively made a multi-query with more than 100 values? Would API paginate - # automatically? If yes, perhaps we don't need this method at all? - # TODO: It is assumed that a user requesting multi_query always wants all of the pages. Can anybody think of - # exceptions to this? - response_this = self.read(endpoint, obj_extended, max_results=None, readid=readid, idonly=idonly) - response_tot.extend(response_this) - - return response_tot - - # NOTE: Save-multi method is likely useless. When saving multiple objects, they all have different fields. - # One option would be to provide a dataframe, but it might struggle with nested dicts - # Likely best solution is to not provide this method, and let users run a for-loop themselves. - # def save_multi(self, endpoint: str, obj_lst: list, **kwargs) -> ResultContainer: - # response_tot = ResultContainer([], self.result_type, total_pages_api = 0) - # - # # Iterate over request chunks that fit into a single API page - # for page_objs in page_iter(obj_lst): - # response_page = self.save(endpoint, page_objs, **kwargs) - # response_tot.extend(response_page) - # - # return response_tot - - def delete_multi(self, endpoint: str, id_list: list) -> ResultContainer: - response_tot = ResultContainer([], self.result_type, total_pages_api=0) - - if len(id_list) == 0: - print("Warning, empty list provided for deletion, ignoring") - return response_tot - - # Iterate over request chunks that fit into a single API page - for page_ids in page_iter(id_list): - response_page = self.delete(endpoint, page_ids) - response_tot.extend(response_page) - - return response_tot - - def exists(self, endpoint: str, key: str, value: list[int | str] | int | str) -> bool | list[bool]: - """ - :param endpoint: endpoint - :param key: A key for the query (e.g. id or name) - :param value: A value or a list of values - :return: Return a single bool or a list of bools for each value - For each value, test if a key with that value is found in the API. - """ - is_scalar = isinstance(value, (int, str)) - - # 1. Read data for this id - if is_scalar: - results = self.read(endpoint, {key: value}) - elif isinstance(value, list): - results = self.read_multi(endpoint, {}, key, value) - else: - raise ValueError("Unexpected data type", type(value)) - - # 2. Extract all the ids for which there was a response - result_vals = [] - for r in results.results: - if key in r: - result_vals += [r[key]] - elif "_" + key in r: # TODO: Remove this if SUDS bug is ever resolved - result_vals += [r["_" + key]] - - # 3. For each of the requested ids, return true if there was a response and false if there was not - if is_scalar: - return value in result_vals - else: - return [val in result_vals for val in value] - - def get_version_message(self) -> str: - """Returns the version message as a string.""" - year = datetime.now().year - engine_name = self.engine.__class__.__name__ - base_url = self.config.base_url - user_name = f"U={self._auth.login if self._auth else None}" - return ( - f"--- bfabricPy v{PACKAGE_VERSION} ({engine_name}, {base_url}, {user_name}) ---\n" - f"--- Copyright (C) 2014-{year} Functional Genomics Center Zurich ---" - ) - - def print_version_message(self, stderr: bool = True) -> None: - """Prints the version message to the console. - :param stderr: Whether to print to stderr (True, default) or stdout (False) - """ - console = Console(stderr=stderr, highlighter=HostnameHighlighter(), theme=DEFAULT_THEME) - console.print(self.get_version_message(), style="bright_yellow") +import warnings +warnings.warn("bfabric.bfabric2 module is deprecated, use bfabric instead", DeprecationWarning) +# TODO deprecated - import from bfabric instead +from bfabric.bfabric import Bfabric, BfabricAPIEngineType, get_system_auth diff --git a/bfabric/bfabric_legacy.py b/bfabric/bfabric_legacy.py new file mode 100644 index 00000000..cdd135d6 --- /dev/null +++ b/bfabric/bfabric_legacy.py @@ -0,0 +1,244 @@ +import base64 +import json +import os +import sys +from pprint import pprint +from typing import Dict, Any + +import yaml +from suds.client import Client +from suds.wsdl import Service + +from bfabric import BfabricConfig +from bfabric.bfabric_config import BfabricAuth, read_config + + +class BfabricLegacy(object): + """B-Fabric python3 module + Implements read and save object methods for B-Fabric wsdl interface + """ + def warning(self, msg): + sys.stderr.write("\033[93m{}\033[0m\n".format(msg)) + + def __init__(self, login: str = None, password: str = None, base_url: str = None, externaljobid=None, + config_path: str = None, config_env: str = None, optional_auth: bool = False, verbose: bool = False): + """ + :param login: Login string for overriding config file + :param password: Password for overriding config file + :param base_url: Base url of the BFabric server for overriding config file + :param externaljobid: ? + :param config_path: Path to the config file, in case it is different from default + :param config_env: Which config environment to use. Can also specify via environment variable or use + default in the config file (at your own risk) + :param optional_auth: Whether authentification is optional. If yes, missing authentification will be ignored, + otherwise an exception will be raised + :param verbose: Verbosity (TODO: resolve potential redundancy with logger) + """ + self.verbose = verbose + + self.cl = {} + self.verbose = False + self.query_counter = 0 + + # Get default path config file path + config_path = config_path or os.path.normpath(os.path.expanduser("~/.bfabricpy.yml")) + + # TODO: Convert to an exception when this branch becomes main + config_path_old = config_path or os.path.normpath(os.path.expanduser("~/.bfabricrc.py")) + if os.path.isfile(config_path): + self.warning("WARNING! The old .bfabricrc.py was found in the home directory. Delete and make sure to use the new .bfabricpy.yml") + + # Use the provided config data from arguments instead of the file + if not os.path.isfile(config_path): + self.warning("could not find '.bfabricpy.yml' file in home directory.") + self.config = BfabricConfig(base_url=base_url) + self.auth = BfabricAuth(login=login, password=password) + + # Load config from file, override some of the fields with the provided ones + else: + config, auth = read_config(config_path, config_env=config_env, optional_auth=optional_auth) + self.config = config.with_overrides(base_url=base_url) + if (login is not None) and (password is not None): + self.auth = BfabricAuth(login=login, password=password) + elif (login is None) and (password is None): + self.auth = auth + else: + raise IOError("Must provide both username and password, or neither.") + + if not self.config.base_url: + raise ValueError("base server url missing") + if not optional_auth: + if not self.auth or not self.auth.login or not self.auth.password: + raise ValueError("Authentification not initialized but required") + + msg = f"\033[93m--- base_url {self.config.base_url}; login; {self.auth.login} ---\033[0m\n" + sys.stderr.write(msg) + + if self.verbose: + pprint(self.config) + + def read_object(self, endpoint, obj, page=1, plain=False, idonly=False): + """ + A generic method which can connect to any endpoint, e.g., workunit, project, order, + externaljob, etc, and returns the object with the requested id. + obj is a python dictionary which contains all the attributes of the endpoint + for the "query". + """ + return self._perform_request( + endpoint=endpoint, + method="read", + plain=plain, + params=dict(query=obj, idonly=idonly, page=page) + ) + + def readid_object(self, endpoint, obj, page=1, plain=False): + """ + A generic method which can connect to any endpoint, e.g., workunit, project, order, + externaljob, etc, and returns the object with the requested id. + obj is a python dictionary which contains only the id of the endpoint for the "query". + """ + return self._perform_request( + endpoint=endpoint, + method="readid", + plain=plain, + params=dict(query=obj, page=page) + ) + + def save_object(self, endpoint, obj, debug=None): + """ + same as read_object above but uses the save method. + """ + return self._perform_request( + endpoint=endpoint, + method="save", + plain=debug is not None, + params={endpoint: obj} + ) + + def checkandinsert_object(self, endpoint, obj, debug=None): + """ + wsdl method to check iff dependencies are fulfilled + """ + # TODO This method was changed a while ago to use the "save"endpoint, which makes it functionally identical + # to the save_object method. Check if this was intended. + return self._perform_request( + endpoint=endpoint, + method="save", + plain=debug is not None, + params={endpoint: obj} + ) + + def delete_object(self, endpoint, id=None, debug=None): + """ + same as read_object above but uses the delete method. + """ + return self._perform_request( + endpoint=endpoint, + method="delete", + plain=debug is not None, + params=dict(id=id) + ) + + def upload_file(self, filename, workunitid): + with open(filename, 'rb') as f: + content = f.read() + + resource_base64 = base64.b64encode(content).decode() + + res = self.save_object('resource', {'base64': resource_base64, + 'name': os.path.basename(filename), + 'description': "base64 encoded file", + 'workunitid': workunitid}) + + return res + + def _get_service(self, endpoint: str) -> Service: + """Returns a `suds.client.Service` object for the given endpoint name.""" + if endpoint not in self.cl: + self.cl[endpoint] = Client(f"{self.config.base_url}/{endpoint}?wsdl", cache=None) + return self.cl[endpoint].service + + def _perform_request( + self, endpoint: str, method: str, plain: bool, params: Dict[str, Any] + ) -> Any: + """Performs a request to the given endpoint and returns the result.""" + self.query_counter += 1 + request_params = dict(login=self.auth.login, password=self.auth.password, **params) + service = self._get_service(endpoint=endpoint) + response = getattr(service, method)(request_params) + if plain: + return response + elif getattr(response, "entitiesonpage", None) == 0: + return [] + return getattr(response, endpoint) + + @staticmethod + def print_json(queryres=None): + """ + This method prints the query result as returned by ``read_object`` in JSON format. + + Parameter + --------- + + queryres : the object returned by ``read_object`` method. + """ + if queryres is None: + raise TypeError("print_json() missing 1 required positional argument: please provide the output from read_object as parameter to print_json") + + res = json.dumps(queryres, cls=bfabricEncoder, sort_keys=True, indent=2) + print(res) + + @staticmethod + def print_yaml(queryres=None): + """ + This method prints the query result as returned by ``read_object`` in YAML format. + + Parameter + --------- + + queryres : the object returned by ``read_object`` method. + """ + if queryres is None: + raise TypeError("print_yaml() missing 1 required positional argument: please provide the output from read_object as parameter to print_yaml") + + res_json = json.dumps(queryres, cls=bfabricEncoder, sort_keys=True) + res = yaml.dump(res_json, default_flow_style=False, encoding=None, default_style=None) + print(res) + + def get_sampleid(self, resourceid=None): + """ + determines the sample_id of a given resource_id. + it performs a recursive dfs. + TODO(cp): check if the method should be implemented using a stack + + :param resourceid: + :return: (int, int) + """ + + assert isinstance(resourceid, int) + + try: + resource = self.read_object('resource', obj={'id': resourceid})[0] + except: + return (None) + + try: + workunit = self.read_object(endpoint='workunit', obj={'id': resource.workunit._id})[0] + return (self.get_sampleid(resourceid=int(workunit.inputresource[0]._id))) + except: + self.warning("fetching sampleid of resource.workunitid = {} failed.".format(resource.workunit._id)) + return (None) + + +class bfabricEncoder(json.JSONEncoder): + """ + Implements json encoder for the Bfabric.print_json method + """ + def default(self, o): + try: + return dict(o) + except TypeError: + pass + else: + return list(o) + return JSONEncoder.default(self, o) diff --git a/bfabric/examples/compare_zeep_suds_pagination.py b/bfabric/examples/compare_zeep_suds_pagination.py index 9c901275..ebb1ae57 100644 --- a/bfabric/examples/compare_zeep_suds_pagination.py +++ b/bfabric/examples/compare_zeep_suds_pagination.py @@ -1,7 +1,8 @@ import os import pandas as pd -from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth +from bfabric import Bfabric, BfabricAPIEngineType +from bfabric.bfabric import get_system_auth from bfabric.src.pandas_helper import list_dict_to_df ''' diff --git a/bfabric/examples/compare_zeep_suds_query.py b/bfabric/examples/compare_zeep_suds_query.py index 885ad073..601b2cea 100644 --- a/bfabric/examples/compare_zeep_suds_query.py +++ b/bfabric/examples/compare_zeep_suds_query.py @@ -1,16 +1,14 @@ -import sys from collections import OrderedDict -from copy import deepcopy -from lxml import etree -from pprint import pprint from contextlib import redirect_stdout +from copy import deepcopy -import zeep import suds +import zeep -from bfabric.bfabric2 import get_system_auth, BfabricAuth, BfabricConfig -from bfabric.src.response_format_suds import suds_asdict_recursive +from bfabric import BfabricAuth, BfabricConfig +from bfabric.bfabric import get_system_auth from bfabric.src.response_format_dict import drop_empty_elements, map_element_keys +from bfabric.src.response_format_suds import suds_asdict_recursive ''' This file is intended to eventually become a test to compare that Zeep and SUDS produce diff --git a/bfabric/examples/exists_multi.py b/bfabric/examples/exists_multi.py index b1ba7469..70340138 100644 --- a/bfabric/examples/exists_multi.py +++ b/bfabric/examples/exists_multi.py @@ -1,5 +1,5 @@ -from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth - +from bfabric import BfabricAPIEngineType, Bfabric +from bfabric.bfabric import get_system_auth config, auth = get_system_auth(config_env="TEST") diff --git a/bfabric/examples/zeep_debug.py b/bfabric/examples/zeep_debug.py index a5225576..acbdb3dc 100644 --- a/bfabric/examples/zeep_debug.py +++ b/bfabric/examples/zeep_debug.py @@ -1,4 +1,5 @@ -from bfabric.bfabric2 import get_system_auth, BfabricAuth, BfabricConfig +from bfabric import BfabricAuth, BfabricConfig +from bfabric.bfabric import get_system_auth import zeep from copy import deepcopy from lxml import etree diff --git a/bfabric/scripts/bfabric_delete.py b/bfabric/scripts/bfabric_delete.py index 643b8c4c..0ea030bb 100755 --- a/bfabric/scripts/bfabric_delete.py +++ b/bfabric/scripts/bfabric_delete.py @@ -20,10 +20,10 @@ """ import sys -import bfabric +import bfabric.bfabric_legacy if __name__ == "__main__": - bfapp = bfabric.Bfabric() + bfapp = bfabric.bfabric_legacy.BfabricLegacy() query_obj = {} diff --git a/bfabric/scripts/bfabric_feeder_resource_autoQC.py b/bfabric/scripts/bfabric_feeder_resource_autoQC.py index 841c9108..76a33446 100755 --- a/bfabric/scripts/bfabric_feeder_resource_autoQC.py +++ b/bfabric/scripts/bfabric_feeder_resource_autoQC.py @@ -18,7 +18,7 @@ import re import time import unittest -from bfabric import Bfabric +from bfabric.bfabric_legacy import BfabricLegacy class autoQC(): @@ -31,7 +31,7 @@ class autoQC(): config = yaml.load(file, Loader=yaml.FullLoader) bfabric_application_ids = config['applicationId'] - bfapp = Bfabric(verbose=False) + bfapp = BfabricLegacy(verbose=False) @property def getId(self, obj): diff --git a/bfabric/scripts/bfabric_flask.py b/bfabric/scripts/bfabric_flask.py index 20343be4..96f72104 100755 --- a/bfabric/scripts/bfabric_flask.py +++ b/bfabric/scripts/bfabric_flask.py @@ -41,8 +41,7 @@ from flask import Flask, Response, jsonify, request -from bfabric.bfabric2 import Bfabric -from bfabric.bfabric_config import BfabricAuth +from bfabric import Bfabric, BfabricAuth if "BFABRICPY_CONFIG_ENV" not in os.environ: diff --git a/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py b/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py index 1a4f6ce2..7f02f2c7 100755 --- a/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py +++ b/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py @@ -18,8 +18,7 @@ from rich.console import Console from rich.table import Column, Table -from bfabric import BfabricConfig -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric, BfabricConfig def render_output(workunits_by_status: dict[str, list[dict[str, Any]]], config: BfabricConfig) -> None: diff --git a/bfabric/scripts/bfabric_list_not_existing_storage_directories.py b/bfabric/scripts/bfabric_list_not_existing_storage_directories.py index ba26da36..f5b227af 100755 --- a/bfabric/scripts/bfabric_list_not_existing_storage_directories.py +++ b/bfabric/scripts/bfabric_list_not_existing_storage_directories.py @@ -15,8 +15,9 @@ import os -import bfabric -B = bfabric.Bfabric() +import bfabric.bfabric_legacy + +B = bfabric.bfabric_legacy.BfabricLegacy() ROOTDIR="/srv/www/htdocs/" diff --git a/bfabric/scripts/bfabric_logthis.py b/bfabric/scripts/bfabric_logthis.py index 512be559..91a58e04 100755 --- a/bfabric/scripts/bfabric_logthis.py +++ b/bfabric/scripts/bfabric_logthis.py @@ -6,7 +6,7 @@ from __future__ import annotations import argparse -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric def bfabric_logthis(client: Bfabric, external_job_id: int, message: str) -> None: diff --git a/bfabric/scripts/bfabric_read.py b/bfabric/scripts/bfabric_read.py index 19733e19..041be111 100755 --- a/bfabric/scripts/bfabric_read.py +++ b/bfabric/scripts/bfabric_read.py @@ -24,8 +24,7 @@ from rich.table import Table import bfabric -from bfabric import BfabricConfig -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric, BfabricConfig def bfabric_read( diff --git a/bfabric/scripts/bfabric_read_samples_from_dataset.py b/bfabric/scripts/bfabric_read_samples_from_dataset.py index b3e2bf51..e40d5ced 100755 --- a/bfabric/scripts/bfabric_read_samples_from_dataset.py +++ b/bfabric/scripts/bfabric_read_samples_from_dataset.py @@ -15,7 +15,7 @@ bfabric_read_samples_from_dataset.py datasetid """ import argparse -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric def get_table_row(client: Bfabric, relative_path: str) -> tuple[str, int, str, str, str]: diff --git a/bfabric/scripts/bfabric_read_samples_of_workunit.py b/bfabric/scripts/bfabric_read_samples_of_workunit.py index 4fb00815..2a69daad 100755 --- a/bfabric/scripts/bfabric_read_samples_of_workunit.py +++ b/bfabric/scripts/bfabric_read_samples_of_workunit.py @@ -18,7 +18,7 @@ from rich.console import Console -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric def bfabric_read_samples_of_workunit(workunit_id: int) -> None: diff --git a/bfabric/scripts/bfabric_save_csv2dataset.py b/bfabric/scripts/bfabric_save_csv2dataset.py index d730814a..1f880387 100755 --- a/bfabric/scripts/bfabric_save_csv2dataset.py +++ b/bfabric/scripts/bfabric_save_csv2dataset.py @@ -33,7 +33,7 @@ import polars as pl -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric def polars_to_bfabric_type(dtype: pl.DataType) -> str | None: diff --git a/bfabric/scripts/bfabric_save_dataset2csv.py b/bfabric/scripts/bfabric_save_dataset2csv.py index ab2df541..df24007f 100755 --- a/bfabric/scripts/bfabric_save_dataset2csv.py +++ b/bfabric/scripts/bfabric_save_dataset2csv.py @@ -20,7 +20,7 @@ import polars as pl -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric def dataset2csv(dataset: dict, output_path: Path, sep: str) -> None: diff --git a/bfabric/scripts/bfabric_save_fasta.py b/bfabric/scripts/bfabric_save_fasta.py index 69650e2e..4e2da311 100755 --- a/bfabric/scripts/bfabric_save_fasta.py +++ b/bfabric/scripts/bfabric_save_fasta.py @@ -8,7 +8,7 @@ import sys from pathlib import Path -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric FASTAHTTPROOT = "/fasta/" BFABRICSTORAGEID = 2 diff --git a/bfabric/scripts/bfabric_save_importresource_sample.py b/bfabric/scripts/bfabric_save_importresource_sample.py index efb0ed28..ca914409 100755 --- a/bfabric/scripts/bfabric_save_importresource_sample.py +++ b/bfabric/scripts/bfabric_save_importresource_sample.py @@ -21,7 +21,7 @@ import sys import time -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric BFABRIC_STORAGE_ID = 2 diff --git a/bfabric/scripts/bfabric_save_link_to_workunit.py b/bfabric/scripts/bfabric_save_link_to_workunit.py index 473dff3f..2dd7a22c 100755 --- a/bfabric/scripts/bfabric_save_link_to_workunit.py +++ b/bfabric/scripts/bfabric_save_link_to_workunit.py @@ -7,7 +7,7 @@ import argparse import json -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric def save_link(workunit_id: int, url: str, name: str) -> None: diff --git a/bfabric/scripts/bfabric_save_workflowstep.py b/bfabric/scripts/bfabric_save_workflowstep.py index e9c6420a..5884ea9e 100755 --- a/bfabric/scripts/bfabric_save_workflowstep.py +++ b/bfabric/scripts/bfabric_save_workflowstep.py @@ -19,7 +19,7 @@ import argparse -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric def save_workflowstep(workunit_id: int | None = None) -> None: diff --git a/bfabric/scripts/bfabric_save_workunit.py b/bfabric/scripts/bfabric_save_workunit.py index 0c66369b..cec04fcd 100755 --- a/bfabric/scripts/bfabric_save_workunit.py +++ b/bfabric/scripts/bfabric_save_workunit.py @@ -20,9 +20,10 @@ import bfabric import datetime +import bfabric.bfabric_legacy if __name__ == "__main__": - bfapp = bfabric.Bfabric() + bfapp = bfabric.bfabric_legacy.BfabricLegacy() workunit = bfapp.save_object(endpoint='workunit', obj={'name': 'MaxQuant report', 'projectid': '1000', 'applicationid': 217, 'status': 'available'}) diff --git a/bfabric/scripts/bfabric_save_workunit_attribute.py b/bfabric/scripts/bfabric_save_workunit_attribute.py index 8bbc4845..cef3fbc5 100755 --- a/bfabric/scripts/bfabric_save_workunit_attribute.py +++ b/bfabric/scripts/bfabric_save_workunit_attribute.py @@ -13,7 +13,7 @@ import argparse import json -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric def bfabric_save_workunit_attribute(workunit_id: int, attribute: str, value: str) -> None: diff --git a/bfabric/scripts/bfabric_setExternalJobStatus_done.py b/bfabric/scripts/bfabric_setExternalJobStatus_done.py index 96e1498d..5e81f3d5 100755 --- a/bfabric/scripts/bfabric_setExternalJobStatus_done.py +++ b/bfabric/scripts/bfabric_setExternalJobStatus_done.py @@ -5,7 +5,8 @@ from __future__ import annotations import argparse -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric + # Copyright (C) 2014 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. # diff --git a/bfabric/scripts/bfabric_setResourceStatus_available.py b/bfabric/scripts/bfabric_setResourceStatus_available.py index 7f0ec07e..be94a6b6 100755 --- a/bfabric/scripts/bfabric_setResourceStatus_available.py +++ b/bfabric/scripts/bfabric_setResourceStatus_available.py @@ -6,7 +6,7 @@ import argparse -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric # Copyright (C) 2014 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. diff --git a/bfabric/scripts/bfabric_setWorkunitStatus.py b/bfabric/scripts/bfabric_setWorkunitStatus.py index 670caf65..719f33b9 100755 --- a/bfabric/scripts/bfabric_setWorkunitStatus.py +++ b/bfabric/scripts/bfabric_setWorkunitStatus.py @@ -2,7 +2,8 @@ import argparse import json -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric + # Copyright (C) 2014 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. # diff --git a/bfabric/scripts/bfabric_upload_resource.py b/bfabric/scripts/bfabric_upload_resource.py index 0228dec6..5302320e 100755 --- a/bfabric/scripts/bfabric_upload_resource.py +++ b/bfabric/scripts/bfabric_upload_resource.py @@ -14,7 +14,7 @@ import json from pathlib import Path -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric def bfabric_upload_resource(client: Bfabric, filename: Path, workunit_id: int) -> None: diff --git a/bfabric/scripts/bfabric_upload_submitter_executable.py b/bfabric/scripts/bfabric_upload_submitter_executable.py index 8aa6ecad..23d41af3 100755 --- a/bfabric/scripts/bfabric_upload_submitter_executable.py +++ b/bfabric/scripts/bfabric_upload_submitter_executable.py @@ -48,7 +48,7 @@ import yaml -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric def main_upload_submitter_executable(options) -> None: diff --git a/bfabric/tests/__test_bfabric.py b/bfabric/tests/__test_bfabric.py index dc505716..4960be54 100755 --- a/bfabric/tests/__test_bfabric.py +++ b/bfabric/tests/__test_bfabric.py @@ -6,13 +6,13 @@ """ import unittest -from bfabric import Bfabric +from bfabric import BfabricLegacy """ ssh localhost "cat > /tmp/bb.py && /usr/bin/python /tmp/bb.py" < PycharmProjects/untitled/bfabric_wsdl.py """ class BfabricTestCase(unittest.TestCase): - bfapp = Bfabric(verbose=True) + bfapp = BfabricLegacy(verbose=True) workunits = [] samples = [] diff --git a/bfabric/tests/integration/integration_test_helper.py b/bfabric/tests/integration/integration_test_helper.py index 89db1b3b..7a544d2b 100644 --- a/bfabric/tests/integration/integration_test_helper.py +++ b/bfabric/tests/integration/integration_test_helper.py @@ -2,7 +2,7 @@ from typing import Any -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric class DeleteEntities: diff --git a/bfabric/tests/integration/scripts/test_read.py b/bfabric/tests/integration/scripts/test_read.py index 33f8f4c4..f1636e47 100644 --- a/bfabric/tests/integration/scripts/test_read.py +++ b/bfabric/tests/integration/scripts/test_read.py @@ -5,7 +5,7 @@ import yaml -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric from bfabric.scripts.bfabric_read import bfabric_read from bfabric.tests.integration.integration_test_helper import DeleteEntities diff --git a/bfabric/tests/integration/scripts/test_save_csv2dataset.py b/bfabric/tests/integration/scripts/test_save_csv2dataset.py index 26bff4fa..89a17043 100644 --- a/bfabric/tests/integration/scripts/test_save_csv2dataset.py +++ b/bfabric/tests/integration/scripts/test_save_csv2dataset.py @@ -6,7 +6,7 @@ from tempfile import TemporaryDirectory import polars as pl -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric from bfabric.scripts.bfabric_save_csv2dataset import bfabric_save_csv2dataset from bfabric.tests.integration.integration_test_helper import DeleteEntities diff --git a/bfabric/tests/integration/scripts/test_save_dataset2csv.py b/bfabric/tests/integration/scripts/test_save_dataset2csv.py index 4821bcf0..096eafdc 100644 --- a/bfabric/tests/integration/scripts/test_save_dataset2csv.py +++ b/bfabric/tests/integration/scripts/test_save_dataset2csv.py @@ -4,7 +4,7 @@ import polars.testing import polars as pl -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric from bfabric.scripts.bfabric_save_dataset2csv import bfabric_save_dataset2csv diff --git a/bfabric/tests/integration/scripts/test_upload_resource.py b/bfabric/tests/integration/scripts/test_upload_resource.py index 7e99a282..8be63682 100644 --- a/bfabric/tests/integration/scripts/test_upload_resource.py +++ b/bfabric/tests/integration/scripts/test_upload_resource.py @@ -7,7 +7,7 @@ from pathlib import Path from tempfile import TemporaryDirectory -from bfabric.bfabric2 import Bfabric +from bfabric import Bfabric from bfabric.scripts.bfabric_upload_resource import bfabric_upload_resource from bfabric.tests.integration.integration_test_helper import DeleteEntities diff --git a/bfabric/tests/integration/test_bfabric2_bad_requests.py b/bfabric/tests/integration/test_bfabric2_bad_requests.py index 287e969a..ef7dbca9 100755 --- a/bfabric/tests/integration/test_bfabric2_bad_requests.py +++ b/bfabric/tests/integration/test_bfabric2_bad_requests.py @@ -2,7 +2,8 @@ import os import unittest -from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth +from bfabric import BfabricAPIEngineType, Bfabric +from bfabric.bfabric import get_system_auth from bfabric.src.errors import BfabricRequestError diff --git a/bfabric/tests/integration/test_bfabric2_exists.py b/bfabric/tests/integration/test_bfabric2_exists.py index 7ae74718..673726b3 100644 --- a/bfabric/tests/integration/test_bfabric2_exists.py +++ b/bfabric/tests/integration/test_bfabric2_exists.py @@ -1,6 +1,7 @@ import unittest -from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth +from bfabric import BfabricAPIEngineType, Bfabric +from bfabric.bfabric import get_system_auth class BfabricTestExists(unittest.TestCase): diff --git a/bfabric/tests/integration/test_bfabric2_read.py b/bfabric/tests/integration/test_bfabric2_read.py index 5d4b5ae4..fb307284 100755 --- a/bfabric/tests/integration/test_bfabric2_read.py +++ b/bfabric/tests/integration/test_bfabric2_read.py @@ -2,8 +2,8 @@ import os import unittest -from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth -from bfabric.bfabric_config import BfabricAuth +from bfabric import BfabricAPIEngineType, Bfabric, BfabricAuth +from bfabric.bfabric import get_system_auth class BfabricTestRead(unittest.TestCase): diff --git a/bfabric/tests/integration/test_bfabric2_read_pagination.py b/bfabric/tests/integration/test_bfabric2_read_pagination.py index 979c3f47..22801f2e 100644 --- a/bfabric/tests/integration/test_bfabric2_read_pagination.py +++ b/bfabric/tests/integration/test_bfabric2_read_pagination.py @@ -1,7 +1,8 @@ import unittest import pandas as pd -from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth +from bfabric import BfabricAPIEngineType, Bfabric +from bfabric.bfabric import get_system_auth from bfabric.src.pandas_helper import list_dict_to_df diff --git a/bfabric/tests/integration/test_bfabric2_save_delete.py b/bfabric/tests/integration/test_bfabric2_save_delete.py index c31be6f3..c7c53074 100644 --- a/bfabric/tests/integration/test_bfabric2_save_delete.py +++ b/bfabric/tests/integration/test_bfabric2_save_delete.py @@ -1,7 +1,8 @@ from typing import Tuple import unittest -from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth +from bfabric import BfabricAPIEngineType, Bfabric +from bfabric.bfabric import get_system_auth def _find_delete_existing_objects_by_name(b: Bfabric, endpoint: str, name_list: list) -> Tuple[list, list]: diff --git a/bfabric/tests/test_bfabric_executable.py b/bfabric/tests/test_bfabric_executable.py index 00339636..caa3349f 100755 --- a/bfabric/tests/test_bfabric_executable.py +++ b/bfabric/tests/test_bfabric_executable.py @@ -11,6 +11,9 @@ import os import json +import bfabric.bfabric_legacy + + class bfabricEncoder(json.JSONEncoder): def default(self, o): try: @@ -29,7 +32,7 @@ class BfabricTestCase(unittest.TestCase): def __init__(self, *args, **kwargs): super(BfabricTestCase, self).__init__(*args, **kwargs) - self.B = bfabric.Bfabric(verbose=False) + self.B = bfabric.bfabric_legacy.BfabricLegacy(verbose=False) for e in ['executable', 'sample', 'application', 'workunit', 'resource']: self.endpoint[e] = [] diff --git a/bfabric/tests/test_bfabric_functional.py b/bfabric/tests/test_bfabric_functional.py index 14c44e2f..709cd4af 100755 --- a/bfabric/tests/test_bfabric_functional.py +++ b/bfabric/tests/test_bfabric_functional.py @@ -15,6 +15,7 @@ import logging import time +import bfabric.bfabric_legacy import bfabric.wrapper_creator.bfabric_submitter import bfabric.wrapper_creator.bfabric_wrapper_creator @@ -35,7 +36,7 @@ def __init__(self, *args, **kwargs): def test_wrappercreator_submitter(self): logging.info("XXX start functional testing") - B = bfabric.Bfabric() + B = bfabric.bfabric_legacy.BfabricLegacy() logging.info("Running functional test on bfabricPy") diff --git a/bfabric/tests/test_bfabric_read.py b/bfabric/tests/test_bfabric_read.py index efaae2f1..1c46206e 100755 --- a/bfabric/tests/test_bfabric_read.py +++ b/bfabric/tests/test_bfabric_read.py @@ -9,6 +9,7 @@ import unittest import bfabric +import bfabric.bfabric_legacy class BfabricTestCaseReadEndPoints(unittest.TestCase): @@ -17,7 +18,7 @@ def setUpClass(cls): path = os.path.join(os.path.dirname(__file__), "groundtruth.json") with open(path) as json_file: cls.ground_truth = json.load(json_file) - cls.bfapp = bfabric.Bfabric(verbose=False) + cls.bfapp = bfabric.bfabric_legacy.BfabricLegacy(verbose=False) def read(self, endpoint): """Executes read queries for `endpoint` and compares results with ground truth.""" diff --git a/bfabric/tests/test_bfabric_sample.py b/bfabric/tests/test_bfabric_sample.py index 867b0b25..0f34b922 100755 --- a/bfabric/tests/test_bfabric_sample.py +++ b/bfabric/tests/test_bfabric_sample.py @@ -6,7 +6,7 @@ """ import unittest -from bfabric import Bfabric +from bfabric.bfabric_legacy import BfabricLegacy """ ssh localhost "cat > /tmp/bb.py && /usr/bin/python /tmp/bb.py" < PycharmProjects/untitled/bfabric_wsdl.py @@ -17,7 +17,7 @@ class BfabricTestCase(unittest.TestCase): samples = [] - bfapp = Bfabric(verbose=True) + bfapp = BfabricLegacy(verbose=True) def sample_save(self): print("SAVE SAMPLE") sample_type = 'Biological Sample - Proteomics' diff --git a/bfabric/tests/test_bfabric_workunit.py b/bfabric/tests/test_bfabric_workunit.py index f12dad6e..9ebd0ad2 100755 --- a/bfabric/tests/test_bfabric_workunit.py +++ b/bfabric/tests/test_bfabric_workunit.py @@ -12,6 +12,9 @@ import json import datetime +import bfabric.bfabric_legacy + + class bfabricEncoder(json.JSONEncoder): def default(self, o): try: @@ -30,7 +33,7 @@ class BfabricTestCase(unittest.TestCase): def __init__(self, *args, **kwargs): super(BfabricTestCase, self).__init__(*args, **kwargs) - self.bfapp = bfabric.Bfabric(verbose=False) + self.bfapp = bfabric.bfabric_legacy.BfabricLegacy(verbose=False) for e in ['executable', 'sample', 'application', 'workunit', 'resource']: self.endpoint[e] = [] diff --git a/bfabric/tests/unit/test_bfabric.py b/bfabric/tests/unit/test_bfabric.py index 1892ac34..33ca1aff 100644 --- a/bfabric/tests/unit/test_bfabric.py +++ b/bfabric/tests/unit/test_bfabric.py @@ -3,8 +3,7 @@ from functools import cached_property from unittest.mock import MagicMock, patch, ANY -from bfabric import BfabricConfig -from bfabric.bfabric2 import BfabricAPIEngineType, Bfabric +from bfabric import Bfabric, BfabricAPIEngineType, BfabricConfig from bfabric.src.engine_suds import EngineSUDS @@ -19,7 +18,7 @@ def setUp(self): def mock_bfabric(self) -> Bfabric: return Bfabric(config=self.mock_config, auth=self.mock_auth, engine=self.mock_engine_type) - @patch("bfabric.bfabric2.get_system_auth") + @patch("bfabric.bfabric.get_system_auth") def test_from_config_when_no_args(self, mock_get_system_auth): mock_config = MagicMock(name="mock_config") mock_auth = MagicMock(name="mock_auth") @@ -30,7 +29,7 @@ def test_from_config_when_no_args(self, mock_get_system_auth): self.assertEqual(mock_auth, client.auth) mock_get_system_auth.assert_called_once_with(config_env=None) - @patch("bfabric.bfabric2.get_system_auth") + @patch("bfabric.bfabric.get_system_auth") def test_from_config_when_explicit_auth(self, mock_get_system_auth): mock_config = MagicMock(name="mock_config") mock_auth = MagicMock(name="mock_auth") @@ -42,7 +41,7 @@ def test_from_config_when_explicit_auth(self, mock_get_system_auth): self.assertEqual(mock_auth, client.auth) mock_get_system_auth.assert_called_once_with(config_env="TestingEnv") - @patch("bfabric.bfabric2.get_system_auth") + @patch("bfabric.bfabric.get_system_auth") def test_from_config_when_none_auth(self, mock_get_system_auth): mock_config = MagicMock(name="mock_config") mock_auth = MagicMock(name="mock_auth") @@ -119,7 +118,7 @@ def test_get_version_message(self): year = datetime.datetime.now().year self.assertEqual(f"--- Copyright (C) 2014-{year} Functional Genomics Center Zurich ---", lines[1]) - @patch("bfabric.bfabric2.Console") + @patch("bfabric.bfabric.Console") @patch.object(Bfabric, "get_version_message") def test_print_version_message(self, method_get_version_message, mock_console): mock_stderr = MagicMock(name="mock_stderr") diff --git a/bfabric/wrapper_creator/bfabric_external_job.py b/bfabric/wrapper_creator/bfabric_external_job.py index 7d30683c..ede8f3a5 100644 --- a/bfabric/wrapper_creator/bfabric_external_job.py +++ b/bfabric/wrapper_creator/bfabric_external_job.py @@ -1,9 +1,9 @@ import json -from bfabric.bfabric import Bfabric, bfabricEncoder +from bfabric.bfabric_legacy import bfabricEncoder, BfabricLegacy -class BfabricExternalJob(Bfabric): +class BfabricExternalJob(BfabricLegacy): """ ExternalJobs can use logging. if you have a valid externaljobid use this class instead of diff --git a/bfabric/wrapper_creator/bfabric_feeder.py b/bfabric/wrapper_creator/bfabric_feeder.py index 1888807b..6052a66e 100644 --- a/bfabric/wrapper_creator/bfabric_feeder.py +++ b/bfabric/wrapper_creator/bfabric_feeder.py @@ -1,10 +1,10 @@ import hashlib import os -from bfabric import Bfabric +from bfabric.bfabric_legacy import BfabricLegacy -class BfabricFeeder(Bfabric): +class BfabricFeeder(BfabricLegacy): """ this class is used for reporting 'resource' status """ diff --git a/bfabric/wrapper_creator/bfabric_wrapper_creator.py b/bfabric/wrapper_creator/bfabric_wrapper_creator.py index 3d3e11ca..80cf30ad 100644 --- a/bfabric/wrapper_creator/bfabric_wrapper_creator.py +++ b/bfabric/wrapper_creator/bfabric_wrapper_creator.py @@ -5,7 +5,7 @@ import yaml -from bfabric.bfabric import bfabricEncoder +from bfabric.bfabric_legacy import bfabricEncoder from bfabric.wrapper_creator.bfabric_external_job import BfabricExternalJob From 077c3063fbe8e9d862b260935f46b72bc2e07a7c Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 10 May 2024 09:47:56 +0200 Subject: [PATCH 089/129] Move some remaining wrapper creator related code into the relevant folder --- bfabric/wrapper_creator/bfabric_submitter.py | 4 ++-- bfabric/{ => wrapper_creator}/gridengine.py | 0 bfabric/{ => wrapper_creator}/slurm.py | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename bfabric/{ => wrapper_creator}/gridengine.py (100%) rename bfabric/{ => wrapper_creator}/slurm.py (100%) diff --git a/bfabric/wrapper_creator/bfabric_submitter.py b/bfabric/wrapper_creator/bfabric_submitter.py index 0838f05a..97b94f5a 100644 --- a/bfabric/wrapper_creator/bfabric_submitter.py +++ b/bfabric/wrapper_creator/bfabric_submitter.py @@ -2,8 +2,8 @@ import yaml -import bfabric.gridengine as gridengine -import bfabric.slurm as slurm +import bfabric.wrapper_creator.gridengine as gridengine +import bfabric.wrapper_creator.slurm as slurm from bfabric.wrapper_creator.bfabric_external_job import BfabricExternalJob diff --git a/bfabric/gridengine.py b/bfabric/wrapper_creator/gridengine.py similarity index 100% rename from bfabric/gridengine.py rename to bfabric/wrapper_creator/gridengine.py diff --git a/bfabric/slurm.py b/bfabric/wrapper_creator/slurm.py similarity index 100% rename from bfabric/slurm.py rename to bfabric/wrapper_creator/slurm.py From 83dfaf571d3507c62ae3d25e6674bd8f8de9a24c Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 13 May 2024 09:47:32 +0200 Subject: [PATCH 090/129] Move result conversion into engines (#93) - Move result conversion responsibility into engine - Simplify ResultContainer.to_list_dict() interface to have only the `drop_empty` parameter - Sort by default - Remove leading underscore by default - Why: - Engines now return a type which we define, could be mocked in the future for testing purposes - `to_list_dict()` has less options because the defaults above are sensible and not too costly - Removing empty elements is a bit problematic for some use cases so it is optional --- bfabric/bfabric.py | 61 ++++---- bfabric/bfabric_config.py | 2 +- bfabric/engine/__init__.py | 0 bfabric/engine/engine_suds.py | 103 ++++++++++++++ bfabric/engine/engine_zeep.py | 134 ++++++++++++++++++ .../{src => engine}/response_format_suds.py | 11 +- bfabric/{src => }/errors.py | 0 .../examples/compare_zeep_suds_pagination.py | 19 +-- bfabric/examples/compare_zeep_suds_query.py | 2 +- bfabric/src/engine_suds.py | 74 ---------- bfabric/src/engine_zeep.py | 96 ------------- bfabric/src/response_format_dict.py | 14 +- bfabric/src/result_container.py | 121 +++++++--------- .../integration/test_bfabric2_bad_requests.py | 2 +- .../test_bfabric2_read_pagination.py | 16 +-- bfabric/tests/unit/test_bfabric.py | 2 +- bfabric/tests/unit/test_result_container.py | 89 ++++++++++++ bfabric/tests/unit/test_results_container.py | 45 ------ 18 files changed, 435 insertions(+), 356 deletions(-) create mode 100644 bfabric/engine/__init__.py create mode 100644 bfabric/engine/engine_suds.py create mode 100644 bfabric/engine/engine_zeep.py rename bfabric/{src => engine}/response_format_suds.py (85%) rename bfabric/{src => }/errors.py (100%) delete mode 100644 bfabric/src/engine_suds.py delete mode 100644 bfabric/src/engine_zeep.py create mode 100644 bfabric/tests/unit/test_result_container.py delete mode 100644 bfabric/tests/unit/test_results_container.py diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index 2f9c459e..00c56736 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -28,12 +28,11 @@ from bfabric.bfabric_config import BfabricAuth, read_config from bfabric.bfabric_config import BfabricConfig +from bfabric.engine.engine_suds import EngineSUDS +from bfabric.engine.engine_zeep import EngineZeep from bfabric.src.cli_formatting import HostnameHighlighter, DEFAULT_THEME -from bfabric.src.engine_suds import EngineSUDS -from bfabric.src.engine_zeep import EngineZeep -from bfabric.src.errors import get_response_errors from bfabric.src.paginator import compute_requested_pages, BFABRIC_QUERY_LIMIT, page_iter -from bfabric.src.result_container import BfabricResultType, ResultContainer +from bfabric.src.result_container import ResultContainer class BfabricAPIEngineType(Enum): @@ -61,10 +60,8 @@ def __init__( if engine == BfabricAPIEngineType.SUDS: self.engine = EngineSUDS(base_url=config.base_url) - self.result_type = BfabricResultType.LISTSUDS elif engine == BfabricAPIEngineType.ZEEP: self.engine = EngineZeep(base_url=config.base_url) - self.result_type = BfabricResultType.LISTZEEP else: raise ValueError(f"Unexpected engine: {engine}") @@ -149,21 +146,12 @@ def read( """ # Get the first page. # NOTE: According to old interface, this is equivalent to plain=True - response, errors = self._read_page(readid, endpoint, obj, page=1, idonly=idonly) - - try: - n_available_pages = response["numberofpages"] - except AttributeError: - n_available_pages = 0 - - # Return empty list if nothing found + results = self._read_page(readid, endpoint, obj, page=1, idonly=idonly) + n_available_pages = results.total_pages_api if not n_available_pages: - result = ResultContainer( - [], self.result_type, total_pages_api=0, errors=get_response_errors(response, endpoint) - ) if check: - result.assert_success() - return result + results.assert_success() + return results.get_first_n_results(max_results) # Get results from other pages as well, if need be requested_pages, initial_offset = compute_requested_pages( @@ -176,34 +164,33 @@ def read( # NOTE: Page numbering starts at 1 response_items = [] + errors = results.errors page_offset = initial_offset for i_iter, i_page in enumerate(requested_pages): if not (i_iter == 0 and i_page == 1): print("-- reading page", i_page, "of", n_available_pages) - response, errors_page = self._read_page(readid, endpoint, obj, page=i_page, idonly=idonly) - errors += errors_page + results = self._read_page(readid, endpoint, obj, page=i_page, idonly=idonly) + errors += results.errors - response_items += response[endpoint][page_offset:] + response_items += results[page_offset:] page_offset = 0 - result = ResultContainer(response_items, self.result_type, total_pages_api=n_available_pages, errors=errors) + result = ResultContainer(response_items, total_pages_api=n_available_pages, errors=errors) if check: result.assert_success() - return result + return result.get_first_n_results(max_results) def save(self, endpoint: str, obj: dict, check: bool = True) -> ResultContainer: results = self.engine.save(endpoint, obj, auth=self.auth) - result = ResultContainer(results[endpoint], self.result_type, errors=get_response_errors(results, endpoint)) if check: - result.assert_success() - return result + results.assert_success() + return results def delete(self, endpoint: str, id: int | list[int], check: bool = True) -> ResultContainer: results = self.engine.delete(endpoint, id, auth=self.auth) - result = ResultContainer(results[endpoint], self.result_type, errors=get_response_errors(results, endpoint)) if check: - result.assert_success() - return result + results.assert_success() + return results def upload_resource( self, resource_name: str, content: bytes, workunit_id: int, check: bool = True @@ -227,15 +214,15 @@ def upload_resource( check=check, ) - def _read_page(self, readid: bool, endpoint: str, query: dict[str, Any], idonly: bool = False, page: int = 1): + def _read_page( + self, readid: bool, endpoint: str, query: dict[str, Any], idonly: bool = False, page: int = 1 + ) -> ResultContainer: """Reads the specified page of objects from the specified endpoint that match the query.""" if readid: # https://fgcz-bfabric.uzh.ch/wiki/tiki-index.php?page=endpoint.workunit#Web_Method_readid_ - response = self.engine.readid(endpoint, query, auth=self.auth, page=page) + return self.engine.readid(endpoint=endpoint, obj=query, auth=self.auth, page=page) else: - response = self.engine.read(endpoint, query, auth=self.auth, page=page, idonly=idonly) - - return response, get_response_errors(response, endpoint) + return self.engine.read(endpoint=endpoint, obj=query, auth=self.auth, page=page, idonly=idonly) ############################ # Multi-query functionality @@ -266,7 +253,7 @@ def read_multi( NOTE: It is assumed that there is only 1 response for each value. """ - response_tot = ResultContainer([], self.result_type, total_pages_api=0) + response_tot = ResultContainer([], total_pages_api=0) obj_extended = deepcopy(obj) # Make a copy of the query, not to make edits to the argument # Iterate over request chunks that fit into a single API page @@ -299,7 +286,7 @@ def read_multi( # return response_tot def delete_multi(self, endpoint: str, id_list: list) -> ResultContainer: - response_tot = ResultContainer([], self.result_type, total_pages_api=0) + response_tot = ResultContainer([], total_pages_api=0) if len(id_list) == 0: print("Warning, empty list provided for deletion, ignoring") diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index a462a99c..d3970dba 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -7,7 +7,7 @@ import yaml -from bfabric.src.errors import BfabricConfigError +from bfabric.errors import BfabricConfigError @dataclasses.dataclass(frozen=True) diff --git a/bfabric/engine/__init__.py b/bfabric/engine/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/engine/engine_suds.py b/bfabric/engine/engine_suds.py new file mode 100644 index 00000000..5b219a2d --- /dev/null +++ b/bfabric/engine/engine_suds.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +import copy +from typing import Any + +from suds import MethodNotFound +from suds.client import Client +from suds.serviceproxy import ServiceProxy + +from bfabric.bfabric_config import BfabricAuth +from bfabric.engine.response_format_suds import suds_asdict_recursive +from bfabric.errors import BfabricRequestError, get_response_errors +from bfabric.src.result_container import _clean_result, ResultContainer + + +class EngineSUDS: + """B-Fabric API SUDS Engine.""" + + def __init__(self, base_url: str, drop_underscores: bool = True) -> None: + self._cl = {} + self._base_url = base_url + self._drop_underscores = drop_underscores + + def read( + self, + endpoint: str, + obj: dict[str, Any], + auth: BfabricAuth, + page: int = 1, + idonly: bool = False, + includedeletableupdateable: bool = False, + ) -> ResultContainer: + """Reads the requested `obj` from `endpoint`. + :param endpoint: the endpoint to read, e.g. `workunit`, `project`, `order`, `externaljob`, etc. + :param obj: a python dictionary which contains all the attribute values that have to match + :param auth: the authentication handle of the user performing the request + :param page: the page number to read + :param idonly: whether to return only the ids of the objects + :param includedeletableupdateable: TODO + """ + query = copy.deepcopy(obj) + query["includedeletableupdateable"] = includedeletableupdateable + + full_query = dict(login=auth.login, page=page, password=auth.password, query=query, idonly=idonly) + service = self._get_suds_service(endpoint) + response = service.read(full_query) + return self._convert_results(response=response, endpoint=endpoint) + + # TODO: How is client.service.readid different from client.service.read. Do we need this method? + def readid(self, endpoint: str, query: dict, auth: BfabricAuth, page: int = 1) -> ResultContainer: + query = dict(login=auth.login, page=page, password=auth.password, query=query) + service = self._get_suds_service(endpoint) + response = service.readid(query) + return self._convert_results(response=response, endpoint=endpoint) + + def save(self, endpoint: str, obj: dict, auth: BfabricAuth) -> ResultContainer: + query = {"login": auth.login, "password": auth.password, endpoint: obj} + service = self._get_suds_service(endpoint) + try: + response = service.save(query) + except MethodNotFound as e: + raise BfabricRequestError(f"SUDS failed to find save method for the {endpoint} endpoint.") from e + return self._convert_results(response=response, endpoint=endpoint) + + def delete(self, endpoint: str, id: int | list[int], auth: BfabricAuth) -> ResultContainer: + if isinstance(id, list) and len(id) == 0: + print("Warning, attempted to delete an empty list, ignoring") + # TODO maybe use error here (and make sure it's consistent) + return ResultContainer([], total_pages_api=0) + + query = {"login": auth.login, "password": auth.password, "id": id} + service = self._get_suds_service(endpoint) + response = service.delete(query) + return self._convert_results(response=response, endpoint=endpoint) + + def _get_suds_service(self, endpoint: str) -> ServiceProxy: + """Returns a SUDS service for the given endpoint. Reuses existing instances when possible.""" + if endpoint not in self._cl: + wsdl = "".join((self._base_url, "/", endpoint, "?wsdl")) + self._cl[endpoint] = Client(wsdl, cache=None) + return self._cl[endpoint].service + + def _convert_results(self, response: Any, endpoint: str) -> ResultContainer: + try: + n_available_pages = response["numberofpages"] + except AttributeError: + n_available_pages = 0 + errors = get_response_errors(response, endpoint=endpoint) + if not hasattr(response, endpoint): + return ResultContainer([], total_pages_api=0, errors=errors) + # TODO up until here it's duplicated with engine_zeep + results = [] + for result in response[endpoint]: + result_parsed = suds_asdict_recursive(result, convert_types=True) + result_parsed = _clean_result( + result_parsed, + drop_underscores_suds=self._drop_underscores, + sort_responses=True, + ) + results += [result_parsed] + return ResultContainer( + results=results, total_pages_api=n_available_pages, errors=errors + ) diff --git a/bfabric/engine/engine_zeep.py b/bfabric/engine/engine_zeep.py new file mode 100644 index 00000000..a5444c70 --- /dev/null +++ b/bfabric/engine/engine_zeep.py @@ -0,0 +1,134 @@ +from __future__ import annotations +import copy +from typing import Any + +import zeep +from zeep.helpers import serialize_object + +from bfabric.bfabric_config import BfabricAuth +from bfabric.errors import BfabricRequestError, get_response_errors +from bfabric.src.result_container import ResultContainer, _clean_result + + +class EngineZeep: + """B-Fabric API Zeep Engine""" + + def __init__(self, base_url: str) -> None: + self._cl = {} + self._base_url = base_url + + def read( + self, + endpoint: str, + obj: dict, + auth: BfabricAuth, + page: int = 1, + idonly: bool = False, + includedeletableupdateable: bool = False, + ) -> ResultContainer: + query = copy.deepcopy(obj) + query["includedeletableupdateable"] = includedeletableupdateable + + # FIXME: Hacks for the cases where Zeep thinks a parameter is compulsory and it is actually not + if endpoint == "sample": + excl_keys = [ + "includefamily", + "includeassociations", + "includeplates", + "includeresources", + "includeruns", + "includechildren", + "includeparents", + "includereplacements", + ] + _zeep_query_append_skipped(query, excl_keys, inplace=True, overwrite=False) + + full_query = dict(login=auth.login, page=page, password=auth.password, query=query, idonly=idonly) + + client = self._get_client(endpoint) + with client.settings(strict=False, xml_huge_tree=True, xsd_ignore_sequence_order=True): + response = client.service.read(full_query) + return self._convert_results(response=response, endpoint=endpoint) + + def readid( + self, endpoint: str, obj: dict, auth: BfabricAuth, page: int = 1, includedeletableupdateable: bool = True + ) -> ResultContainer: + raise NotImplementedError("Attempted to use a method `readid` of Zeep, which does not exist") + + def save(self, endpoint: str, obj: dict, auth: BfabricAuth) -> ResultContainer: + query = copy.deepcopy(obj) + + # FIXME: Hacks for the cases where Zeep thinks a parameter is compulsory and it is actually not + if endpoint == "resource": + excl_keys = ["name", "sampleid", "storageid", "workunitid", "relativepath"] + _zeep_query_append_skipped(query, excl_keys, inplace=True, overwrite=False) + + full_query = {"login": auth.login, "password": auth.password, endpoint: query} + + client = self._get_client(endpoint) + + try: + with client.settings(strict=False): + response = client.service.save(full_query) + except AttributeError as e: + if e.args[0] == "Service has no operation 'save'": + raise BfabricRequestError(f"ZEEP failed to find save method for the {endpoint} endpoint.") from e + raise e + return self._convert_results(response=response, endpoint=endpoint) + + def delete(self, endpoint: str, id: int | list[int], auth: BfabricAuth) -> ResultContainer: + if isinstance(id, list) and len(id) == 0: + print("Warning, attempted to delete an empty list, ignoring") + # TODO maybe use error here (and make sure it's consistent) + return ResultContainer([], total_pages_api=0) + + query = {"login": auth.login, "password": auth.password, "id": id} + + client = self._get_client(endpoint) + response = client.service.delete(query) + return self._convert_results(response=response, endpoint=endpoint) + + def _get_client(self, endpoint: str) -> zeep.Client: + if endpoint not in self._cl: + wsdl = "".join((self._base_url, "/", endpoint, "?wsdl")) + self._cl[endpoint] = zeep.Client(wsdl) + return self._cl[endpoint] + + def _convert_results(self, response: Any, endpoint: str) -> ResultContainer: + try: + n_available_pages = response["numberofpages"] + except AttributeError: + n_available_pages = 0 + errors = get_response_errors(response, endpoint=endpoint) + if not hasattr(response, endpoint): + return ResultContainer([], total_pages_api=0, errors=errors) + # TODO up until here it's duplicated with engine_suds + results = [] + for result in response[endpoint]: + results_parsed = dict(serialize_object(result, target_cls=dict)) + results_parsed = _clean_result( + results_parsed, + drop_underscores_suds=False, # NOTE: Underscore problem specific to SUDS + sort_responses=True, + ) + results += [results_parsed] + return ResultContainer(results=results, total_pages_api=n_available_pages, errors=errors) + + +# TODO: Check if this is a bug of BFabric or Zeep. Specifically, see if the same call to bFabricPy has the same bug +def _zeep_query_append_skipped(query: dict, skipped_keys: list, inplace: bool = False, overwrite: bool = False) -> dict: + """ + This function is used to fix a buggy behaviour of Zeep/BFabric. Specifically, Zeep does not return correct + query results if some of the optional parameters are not mentioned in the query. + + :param query: Original query + :param skipped_keys: Optional keys to skip + :param inplace: Whether to change the argument, or make a new copy to return + :param overwrite: Whether to overwrite the key if it is already present in the query + :return: Adds optional keys to query as skipped values. + """ + query_this = copy.deepcopy(query) if not inplace else query + for key in skipped_keys: + if overwrite or (key not in query_this.keys()): + query_this[key] = zeep.xsd.SkipValue + return query_this diff --git a/bfabric/src/response_format_suds.py b/bfabric/engine/response_format_suds.py similarity index 85% rename from bfabric/src/response_format_suds.py rename to bfabric/engine/response_format_suds.py index 672d36fe..aaf64d54 100644 --- a/bfabric/src/response_format_suds.py +++ b/bfabric/engine/response_format_suds.py @@ -1,10 +1,10 @@ -# from collections import OrderedDict -from typing import Any, Union, List +from __future__ import annotations +from typing import Any from suds.sax.text import Text from suds.sudsobject import asdict -def convert_suds_type(item: Any) -> Union[int, str]: +def convert_suds_type(item: Any) -> int | str: """ Converts the suds type to an equivalent python type. There is, to my knowledge, only a single suds type which is currently ever return, namely 'Text'. Integers and doubles are already cast to their python equivalents and @@ -26,16 +26,15 @@ def suds_asdict_recursive(d, convert_types: bool = False) -> dict: """ out = {} for k, v in asdict(d).items(): - if hasattr(v, '__keylist__'): + if hasattr(v, "__keylist__"): out[k] = suds_asdict_recursive(v, convert_types=convert_types) elif isinstance(v, list): out[k] = [] for item in v: - if hasattr(item, '__keylist__'): + if hasattr(item, "__keylist__"): out[k].append(suds_asdict_recursive(item, convert_types=convert_types)) else: out[k].append(convert_suds_type(item) if convert_types else item) else: out[k] = convert_suds_type(v) if convert_types else v - # return OrderedDict(out) return out diff --git a/bfabric/src/errors.py b/bfabric/errors.py similarity index 100% rename from bfabric/src/errors.py rename to bfabric/errors.py diff --git a/bfabric/examples/compare_zeep_suds_pagination.py b/bfabric/examples/compare_zeep_suds_pagination.py index ebb1ae57..42235f68 100644 --- a/bfabric/examples/compare_zeep_suds_pagination.py +++ b/bfabric/examples/compare_zeep_suds_pagination.py @@ -1,11 +1,12 @@ import os + import pandas as pd from bfabric import Bfabric, BfabricAPIEngineType from bfabric.bfabric import get_system_auth from bfabric.src.pandas_helper import list_dict_to_df -''' +""" This will eventually become a test that will compare Zeep and Suds paginated output Strategy: 1. Make a query for 300 entries from user for both Zeep and Suds @@ -15,7 +16,7 @@ Observations: * There are mismatches in the fields of "project" and "formerproject", where about half of projects are not correctly parsed by Zeep. -''' +""" def report_test_result(rez: bool, prefix: str): @@ -24,15 +25,16 @@ def report_test_result(rez: bool, prefix: str): else: print("--", prefix, "test failed --") + def _calc_query(config, auth, engine, endpoint): print("Sending query via", engine) b = Bfabric(config, auth, engine=engine) response_class = b.read(endpoint, {}, max_results=300, idonly=False, includedeletableupdateable=True) - response_dict = response_class.to_list_dict(drop_empty=True, drop_underscores_suds=True, - have_sort_responses=True) + response_dict = response_class.to_list_dict(drop_empty=True, have_sort_responses=True) return list_dict_to_df(response_dict) + def _set_partition_test(a, b) -> bool: aSet = set(a) bSet = set(b) @@ -47,6 +49,7 @@ def _set_partition_test(a, b) -> bool: # Test passes if there are no entities unique to only one of the sets return (len(unique1) == 0) and (len(unique2) == 0) + def dataframe_pagination_test(config, auth, endpoint, use_cached: bool = False, store_cached: bool = True): pwd_zeep = "tmp_zeep_" + endpoint + ".csv" pwd_suds = "tmp_suds_" + endpoint + ".csv" @@ -78,9 +81,9 @@ def dataframe_pagination_test(config, auth, endpoint, use_cached: bool = False, match_test_result = True for col_name in suds_cols: if not resp_df_suds[col_name].equals(resp_df_zeep[col_name]): - print('------- Mismatch in: ', col_name, '-------') - print('Suds', list(resp_df_suds[col_name])) - print('Zeep', list(resp_df_zeep[col_name])) + print("------- Mismatch in: ", col_name, "-------") + print("Suds", list(resp_df_suds[col_name])) + print("Zeep", list(resp_df_zeep[col_name])) match_test_result = False return match_test_result @@ -88,5 +91,5 @@ def dataframe_pagination_test(config, auth, endpoint, use_cached: bool = False, config, auth = get_system_auth(config_env="TEST") -result = dataframe_pagination_test(config, auth, 'user', use_cached=False, store_cached=True) +result = dataframe_pagination_test(config, auth, "user", use_cached=False, store_cached=True) report_test_result(result, "pagination") diff --git a/bfabric/examples/compare_zeep_suds_query.py b/bfabric/examples/compare_zeep_suds_query.py index 601b2cea..73c05761 100644 --- a/bfabric/examples/compare_zeep_suds_query.py +++ b/bfabric/examples/compare_zeep_suds_query.py @@ -8,7 +8,7 @@ from bfabric import BfabricAuth, BfabricConfig from bfabric.bfabric import get_system_auth from bfabric.src.response_format_dict import drop_empty_elements, map_element_keys -from bfabric.src.response_format_suds import suds_asdict_recursive +from bfabric.engine.response_format_suds import suds_asdict_recursive ''' This file is intended to eventually become a test to compare that Zeep and SUDS produce diff --git a/bfabric/src/engine_suds.py b/bfabric/src/engine_suds.py deleted file mode 100644 index a3148b05..00000000 --- a/bfabric/src/engine_suds.py +++ /dev/null @@ -1,74 +0,0 @@ -from __future__ import annotations - -import copy -from typing import Any - -from suds import MethodNotFound -from suds.client import Client -from suds.serviceproxy import ServiceProxy - -from bfabric.bfabric_config import BfabricAuth -from bfabric.src.errors import BfabricRequestError - - -class EngineSUDS: - """B-Fabric API SUDS Engine""" - - def __init__(self, base_url: str) -> None: - self.cl = {} - self.base_url = base_url - - def read( - self, - endpoint: str, - obj: dict[str, Any], - auth: BfabricAuth, - page: int = 1, - idonly: bool = False, - includedeletableupdateable: bool = False, - ): - """Reads the requested `obj` from `endpoint`. - :param endpoint: the endpoint to read, e.g. `workunit`, `project`, `order`, `externaljob`, etc. - :param obj: a python dictionary which contains all the attribute values that have to match - :param auth: the authentication handle of the user performing the request - :param page: the page number to read - :param idonly: whether to return only the ids of the objects - :param includedeletableupdateable: TODO - """ - query = copy.deepcopy(obj) - query["includedeletableupdateable"] = includedeletableupdateable - - full_query = dict(login=auth.login, page=page, password=auth.password, query=query, idonly=idonly) - service = self._get_suds_service(endpoint) - return service.read(full_query) - - # TODO: How is client.service.readid different from client.service.read. Do we need this method? - def readid(self, endpoint: str, obj: dict, auth: BfabricAuth, page: int = 1): - query = dict(login=auth.login, page=page, password=auth.password, query=obj) - service = self._get_suds_service(endpoint) - return service.readid(query) - - def save(self, endpoint: str, obj: dict, auth: BfabricAuth): - query = {"login": auth.login, "password": auth.password, endpoint: obj} - service = self._get_suds_service(endpoint) - try: - res = service.save(query) - except MethodNotFound as e: - raise BfabricRequestError(f"SUDS failed to find save method for the {endpoint} endpoint.") from e - return res - - def delete(self, endpoint: str, id: int | list[int], auth: BfabricAuth): - if isinstance(id, list) and len(id) == 0: - print("Warning, attempted to delete an empty list, ignoring") - return [] - - query = {"login": auth.login, "password": auth.password, "id": id} - service = self._get_suds_service(endpoint) - return service.delete(query) - - def _get_suds_service(self, endpoint: str) -> ServiceProxy: - """Returns a SUDS service for the given endpoint. Reuses existing instances when possible.""" - if endpoint not in self.cl: - wsdl = "".join((self.base_url, "/", endpoint, "?wsdl")) - self.cl[endpoint] = Client(wsdl, cache=None) - return self.cl[endpoint].service diff --git a/bfabric/src/engine_zeep.py b/bfabric/src/engine_zeep.py deleted file mode 100644 index d895337f..00000000 --- a/bfabric/src/engine_zeep.py +++ /dev/null @@ -1,96 +0,0 @@ -from typing import Union, List - -import zeep -import copy - -from bfabric.bfabric_config import BfabricAuth -from bfabric.src.errors import BfabricRequestError - - -# TODO: Check if this is a bug of BFabric or Zeep. Specifically, see if the same call to bFabricPy has the same bug -def _zeep_query_append_skipped(query: dict, skipped_keys: list, inplace: bool = False, overwrite: bool = False) -> dict: - """ - This function is used to fix a buggy behaviour of Zeep/BFabric. Specifically, Zeep does not return correct - query results if some of the optional parameters are not mentioned in the query. - - :param query: Original query - :param skipped_keys: Optional keys to skip - :param inplace: Whether to change the argument, or make a new copy to return - :param overwrite: Whether to overwrite the key if it is already present in the query - :return: Adds optional keys to query as skipped values. - """ - query_this = copy.deepcopy(query) if not inplace else query - for key in skipped_keys: - if overwrite or (key not in query_this.keys()): - query_this[key] = zeep.xsd.SkipValue - return query_this - - -class EngineZeep: - """B-Fabric API Zeep Engine""" - - def __init__(self, base_url: str): - self.cl = {} - self.base_url = base_url - - def _get_client(self, endpoint: str): - try: - if endpoint not in self.cl: - wsdl = "".join((self.base_url, '/', endpoint, "?wsdl")) - self.cl[endpoint] = zeep.Client(wsdl) - return self.cl[endpoint] - except Exception as e: - print(e) - raise - - def read(self, endpoint: str, obj: dict, auth: BfabricAuth, page: int = 1, idonly: bool = False, - includedeletableupdateable: bool = False): - query = copy.deepcopy(obj) - query['includedeletableupdateable'] = includedeletableupdateable - - # FIXME: Hacks for the cases where Zeep thinks a parameter is compulsory and it is actually not - if endpoint == 'sample': - excl_keys = ['includefamily', 'includeassociations', 'includeplates', 'includeresources', 'includeruns', - 'includechildren', 'includeparents', 'includereplacements'] - _zeep_query_append_skipped(query, excl_keys, inplace=True, overwrite=False) - - full_query = dict(login=auth.login, page=page, password=auth.password, query=query, idonly=idonly) - - client = self._get_client(endpoint) - with client.settings(strict=False, xml_huge_tree=True, xsd_ignore_sequence_order=True): - return client.service.read(full_query) - - def readid(self, endpoint: str, obj: dict, auth: BfabricAuth, page: int = 1, includedeletableupdateable: bool = True): - raise NotImplementedError("Attempted to use a method `readid` of Zeep, which does not exist") - - def save(self, endpoint: str, obj: dict, auth: BfabricAuth): - query = copy.deepcopy(obj) - - # FIXME: Hacks for the cases where Zeep thinks a parameter is compulsory and it is actually not - if endpoint == 'resource': - excl_keys = ['name', 'sampleid', 'storageid', 'workunitid', 'relativepath'] - _zeep_query_append_skipped(query, excl_keys, inplace=True, overwrite=False) - - - full_query = {'login': auth.login, 'password': auth.password, endpoint: query} - - client = self._get_client(endpoint) - - try: - with client.settings(strict=False): - res = client.service.save(full_query) - except AttributeError as e: - if e.args[0] == "Service has no operation 'save'": - raise BfabricRequestError(f"ZEEP failed to find save method for the {endpoint} endpoint.") from e - raise e - return res - - def delete(self, endpoint: str, id: Union[int, List], auth: BfabricAuth): - if isinstance(id, list) and len(id) == 0: - print("Warning, attempted to delete an empty list, ignoring") - return [] - - query = {'login': auth.login, 'password': auth.password, 'id': id} - - client = self._get_client(endpoint) - return client.service.delete(query) diff --git a/bfabric/src/response_format_dict.py b/bfabric/src/response_format_dict.py index a16e8eeb..772c69d7 100644 --- a/bfabric/src/response_format_dict.py +++ b/bfabric/src/response_format_dict.py @@ -1,6 +1,5 @@ -from typing import Optional, Union from copy import deepcopy -from collections import OrderedDict +from typing import Optional, Union def sort_dict(d: dict) -> dict: @@ -11,6 +10,7 @@ def sort_dict(d: dict) -> dict: """ return dict(sorted(d.items())) + def _recursive_drop_empty(response_elem: Union[list, dict]) -> None: """ Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary value is encountered, that is @@ -31,6 +31,7 @@ def _recursive_drop_empty(response_elem: Union[list, dict]) -> None: for k in keys_to_delete: del response_elem[k] + def drop_empty_elements(response: Union[list, dict], inplace: bool = True) -> Optional[Union[list, dict]]: """ Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary value is encountered, that is @@ -44,6 +45,7 @@ def drop_empty_elements(response: Union[list, dict], inplace: bool = True) -> Op _recursive_drop_empty(response_filtered) return response_filtered + def _recursive_map_keys(response_elem, keymap: dict) -> None: """ Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary key is found for which @@ -64,10 +66,10 @@ def _recursive_map_keys(response_elem, keymap: dict) -> None: for k in keys_to_delete: response_elem[keymap[k]] = response_elem[k] # Copy old value to the new key - del response_elem[k] # Delete old key + del response_elem[k] # Delete old key + -def map_element_keys(response: Union[list, dict], keymap: dict, - inplace: bool = True) -> Union[list, dict]: +def map_element_keys(response: Union[list, dict], keymap: dict, inplace: bool = True) -> Union[list, dict]: """ Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary key is found for which the mapping is requested, that the key is renamed to the corresponding mapped one @@ -81,6 +83,7 @@ def map_element_keys(response: Union[list, dict], keymap: dict, _recursive_map_keys(response_filtered, keymap) return response_filtered + def _recursive_sort_dicts_by_key(response_elem) -> None: """ Iterates over all nested lists, dictionaries and basic values. Whenever a nested dictionary is found, it is sorted @@ -99,6 +102,7 @@ def _recursive_sort_dicts_by_key(response_elem) -> None: response_elem[k] = sort_dict(v) _recursive_sort_dicts_by_key(v) + def sort_dicts_by_key(response: Union[list, dict], inplace: bool = True) -> Optional[Union[list, dict]]: """ Iterates over all nested lists, dictionaries and basic values. Whenever a nested dictionary is found, it is sorted diff --git a/bfabric/src/result_container.py b/bfabric/src/result_container.py index 1da4cb5b..ccbb2c82 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/src/result_container.py @@ -1,74 +1,61 @@ -# NOTE: This allows class type annotations inside the same class. According to -# https://stackoverflow.com/questions/44798635/how-can-i-set-the-same-type-as-class-in-methods-parameter-following-pep484 -# this should become default behaviour in one of the future versions of python. Remove this import -# once it is no longer necessary from __future__ import annotations -from enum import Enum -from zeep.helpers import serialize_object +import logging +from typing import Any -from bfabric.src.response_format_suds import suds_asdict_recursive import bfabric.src.response_format_dict as formatter -class BfabricResultType(Enum): - LISTDICT = 1 - LISTSUDS = 2 - LISTZEEP = 3 - - -def _clean_result(rez: dict, drop_empty: bool = True, drop_underscores_suds: bool = True, - sort_responses: bool = False) -> dict: - if drop_empty: - formatter.drop_empty_elements(rez, inplace=True) - - if drop_underscores_suds: - formatter.map_element_keys(rez, - {'_id': 'id', '_classname': 'classname', '_projectid': 'projectid'}, - inplace=True) - if sort_responses: - formatter.sort_dicts_by_key(rez, inplace=True) - - return rez - - class ResultContainer: - def __init__(self, results: list, result_type: BfabricResultType, total_pages_api: int = None, errors: list = None): + """Container structure for query results.""" + + def __init__( + self, results: list[dict[str, Any]], total_pages_api: int | None = None, errors: list | None = None + ) -> None: """ :param results: List of BFabric query results - :param result_type: Format of each result (All must be of the same format) :param total_pages_api: Maximal number of pages that were available for reading. NOTE: User may have requested to cap the total number of results. Thus, it may be of interest to know the (approximate) total number of results the API had for the query. The total number of results is somewhere between max_pages * (BFABRIC_QUERY_LIMIT - 1) and max_pages * BFABRIC_QUERY_LIMIT + :param errors: List of errors that occurred during the query (if any) """ self.results = results - self.result_type = result_type self._total_pages_api = total_pages_api self._errors = errors or [] - def __getitem__(self, idx: int): + def __getitem__(self, idx: int) -> dict[str, Any]: return self.results[idx] - def __repr__(self): + def __repr__(self) -> str: return self.__str__() - def __str__(self): + def __str__(self) -> str: return str(self.to_list_dict()) - def __len__(self): + def __len__(self) -> int: return len(self.results) - def assert_success(self): + def get_first_n_results(self, n_results: int | None) -> ResultContainer: + """Returns a shallow copy of self, containing at most `n_results` results.""" + if n_results is None: + return self + else: + return ResultContainer(self.results[:n_results], total_pages_api=self._total_pages_api, errors=self._errors) + + def assert_success(self) -> None: + """Asserts that the query was successful. Raises a `RuntimeError` if it was not.""" if not self.is_success: raise RuntimeError("Query was not successful", self._errors) @property def is_success(self) -> bool: + """Whether the query was successful.""" return len(self._errors) == 0 @property def errors(self) -> list: + """List of errors that occurred during the query. An empty list means the query was successful.""" return self._errors def extend(self, other: ResultContainer) -> None: @@ -77,49 +64,41 @@ def extend(self, other: ResultContainer) -> None: :param other: The other query results that should be appended to this :return: """ - - if self.result_type != other.result_type: - raise ValueError("Attempting to merge results of two different types", self.result_type, other.result_type) - self.results += other.results self._errors += other.errors - if (self._total_pages_api is not None) and (other._total_pages_api is not None): - self._total_pages_api += other._total_pages_api - else: - self._total_pages_api = None + if self._total_pages_api != other.total_pages_api: + logging.warning( + f"Results observed with different total pages counts: " + f"{self._total_pages_api} != {other.total_pages_api}" + ) @property - def total_pages_api(self): + def total_pages_api(self) -> int | None: + """Number of pages available from the API.""" return self._total_pages_api - def to_list_dict(self, drop_empty: bool = True, drop_underscores_suds: bool = True, - have_sort_responses: bool = False): + def to_list_dict(self, drop_empty: bool = False) -> list[dict[str, Any]]: """ Converts the results to a list of dictionaries. :param drop_empty: If True, empty attributes will be removed from the results - :param drop_underscores_suds: If True, leading underscores will be removed from the keys of the results - :param have_sort_responses: If True, keys of dictionaries in the response will be sorted. - TODO what about the order of items in the list? """ - if self.result_type == BfabricResultType.LISTDICT: - return self.results - elif self.result_type == BfabricResultType.LISTSUDS: - results = [] - for rez in self.results: - rez_parsed = suds_asdict_recursive(rez, convert_types=True) - rez_parsed = _clean_result(rez_parsed, drop_empty=drop_empty, - drop_underscores_suds=drop_underscores_suds, - sort_responses=have_sort_responses) - results += [rez_parsed] - return results - elif self.result_type == BfabricResultType.LISTZEEP: - results = [] - for rez in self.results: - rez_parsed = dict(serialize_object(rez, target_cls=dict)) - rez_parsed = _clean_result(rez_parsed, drop_empty=drop_empty, - drop_underscores_suds=False, # NOTE: Underscore problem specific to SUDS - sort_responses=have_sort_responses) - results += [rez_parsed] - return results + if drop_empty: + return formatter.drop_empty_elements(self.results, inplace=False) else: - raise ValueError("Unexpected results type", self.result_type) + return self.results + + +def _clean_result(result: dict, drop_underscores_suds: bool = True, sort_responses: bool = False) -> dict: + """ + :param drop_underscores_suds: if True, the keys of the dictionaries in the response will have leading + underscores removed in some cases (relevant for SUDS) + :param sort_responses: the keys of the dictionaries in the response will be sorted (recursively) + """ + if drop_underscores_suds: + formatter.map_element_keys( + result, {"_id": "id", "_classname": "classname", "_projectid": "projectid"}, inplace=True + ) + if sort_responses: + formatter.sort_dicts_by_key(result, inplace=True) + + return result diff --git a/bfabric/tests/integration/test_bfabric2_bad_requests.py b/bfabric/tests/integration/test_bfabric2_bad_requests.py index ef7dbca9..a37d475e 100755 --- a/bfabric/tests/integration/test_bfabric2_bad_requests.py +++ b/bfabric/tests/integration/test_bfabric2_bad_requests.py @@ -4,7 +4,7 @@ from bfabric import BfabricAPIEngineType, Bfabric from bfabric.bfabric import get_system_auth -from bfabric.src.errors import BfabricRequestError +from bfabric.errors import BfabricRequestError class BfabricTestBadRequest(unittest.TestCase): diff --git a/bfabric/tests/integration/test_bfabric2_read_pagination.py b/bfabric/tests/integration/test_bfabric2_read_pagination.py index 22801f2e..0d02d589 100644 --- a/bfabric/tests/integration/test_bfabric2_read_pagination.py +++ b/bfabric/tests/integration/test_bfabric2_read_pagination.py @@ -6,14 +6,12 @@ from bfabric.src.pandas_helper import list_dict_to_df -def _calc_query(config, auth, engine: BfabricAPIEngineType, endpoint: str, - max_results: int = 300) -> pd.DataFrame: +def _calc_query(config, auth, engine: BfabricAPIEngineType, endpoint: str, max_results: int = 300) -> pd.DataFrame: print("Sending query via", engine) b = Bfabric(config, auth, engine=engine) response_class = b.read(endpoint, {}, max_results=max_results) - response_dict = response_class.to_list_dict(drop_empty=True, drop_underscores_suds=True, - have_sort_responses=True) + response_dict = response_class.to_list_dict(drop_empty=True) return list_dict_to_df(response_dict) @@ -22,19 +20,17 @@ def setUp(self): self.config, self.auth = get_system_auth(config_env="TEST") def test_composite_user(self): - endpoint = 'user' + endpoint = "user" max_results = 300 # Test SUDS print("Testing if SUDS returns the requested number of entries") - resp_df_suds = _calc_query(self.config, self.auth, BfabricAPIEngineType.SUDS, endpoint, - max_results=max_results) + resp_df_suds = _calc_query(self.config, self.auth, BfabricAPIEngineType.SUDS, endpoint, max_results=max_results) assert len(resp_df_suds.index) == max_results # Test ZEEP print("Testing if ZEEP returns the requested number of entries") - resp_df_zeep = _calc_query(self.config, self.auth, BfabricAPIEngineType.ZEEP, endpoint, - max_results=max_results) + resp_df_zeep = _calc_query(self.config, self.auth, BfabricAPIEngineType.ZEEP, endpoint, max_results=max_results) assert len(resp_df_zeep.index) == max_results # Rename suds to remove underscores @@ -53,5 +49,5 @@ def test_composite_user(self): mismatch_cols += [col_name] # TODO: Make the test strict if Zeep bug is ever resolved. - assert mismatch_cols == ['formerproject', 'project'] + self.assertListEqual(["formerproject", "project"], mismatch_cols) print("SUDS and ZEEP mismatch in", mismatch_cols, "(expected)") diff --git a/bfabric/tests/unit/test_bfabric.py b/bfabric/tests/unit/test_bfabric.py index 33ca1aff..f43e9c2d 100644 --- a/bfabric/tests/unit/test_bfabric.py +++ b/bfabric/tests/unit/test_bfabric.py @@ -4,7 +4,7 @@ from unittest.mock import MagicMock, patch, ANY from bfabric import Bfabric, BfabricAPIEngineType, BfabricConfig -from bfabric.src.engine_suds import EngineSUDS +from bfabric.engine.engine_suds import EngineSUDS class TestBfabric(unittest.TestCase): diff --git a/bfabric/tests/unit/test_result_container.py b/bfabric/tests/unit/test_result_container.py new file mode 100644 index 00000000..738cd58d --- /dev/null +++ b/bfabric/tests/unit/test_result_container.py @@ -0,0 +1,89 @@ +import logging +import unittest + +from bfabric.src.result_container import ResultContainer + + +class BfabricTestResultContainer(unittest.TestCase): + def setUp(self): + self.res1 = ResultContainer([1, 2, 3], total_pages_api=1) + self.res2 = ResultContainer([4, 5], total_pages_api=1) + self.res_with_empty = ResultContainer( + [{"a": None, "b": 1, "c": []}, {"a": 2, "b": 3, "c": None}] + ) + + def test_str(self): + self.assertEqual("[1, 2, 3]", str(self.res1)) + self.assertEqual("[4, 5]", str(self.res2)) + + def test_repr(self): + self.assertEqual("[1, 2, 3]", str(self.res1)) + self.assertEqual("[4, 5]", str(self.res2)) + + def test_len(self): + self.assertEqual(3, len(self.res1)) + self.assertEqual(2, len(self.res2)) + + def test_getitem(self): + self.assertEqual(3, self.res1[2]) + self.assertEqual(4, self.res2[0]) + + def test_get_first_n_results_when_available(self): + res3 = self.res1.get_first_n_results(2) + self.assertEqual(2, len(res3)) + self.assertEqual([1, 2], res3.results) + + def test_get_first_n_results_when_not_available(self): + res3 = self.res1.get_first_n_results(4) + self.assertEqual(3, len(res3)) + self.assertEqual([1, 2, 3], res3.results) + + def test_get_first_n_results_when_none(self): + res3 = self.res1.get_first_n_results(None) + self.assertEqual(3, len(res3)) + self.assertEqual([1, 2, 3], res3.results) + + def test_assert_success_when_success(self): + self.res1.assert_success() + + def test_assert_success_when_error(self): + self.res1.errors.append("MockedError") + with self.assertRaises(RuntimeError) as error: + self.res1.assert_success() + self.assertEqual("('Query was not successful', ['MockedError'])", str(error.exception)) + + def test_extend_when_same_lengths(self): + res1 = ResultContainer([{"a": 1}, {"a": 2}], total_pages_api=5) + res2 = ResultContainer([{"b": 3}, {"b": 4}], total_pages_api=5) + res1.extend(res2) + self.assertEqual(4, len(res1)) + self.assertEqual([{"a": 1}, {"a": 2}, {"b": 3}, {"b": 4}], res1.results) + self.assertEqual(5, res1.total_pages_api) + + def test_extend_when_different_lengths(self): + res3 = ResultContainer( + list(range(200, 400)), + total_pages_api=2, + ) + with self.assertLogs(level=logging.WARNING) as error: + res3.extend(self.res1) + + self.assertEqual(203, len(res3)) + self.assertEqual(list(range(200, 400)) + [1, 2, 3], res3.results) + self.assertEqual(2, res3.total_pages_api) + self.assertIn("Results observed with different total pages counts: 2 != 1", str(error)) + + def test_to_list_dict_when_not_drop_empty(self): + expected = [{"a": None, "b": 1, "c": []}, {"a": 2, "b": 3, "c": None}] + with self.subTest(case="default"): + self.assertListEqual(expected, self.res_with_empty.to_list_dict()) + with self.subTest(case="explicit"): + self.assertListEqual(expected, self.res_with_empty.to_list_dict(drop_empty=False)) + + def test_to_list_dict_when_drop_empty(self): + expected = [{"b": 1}, {"a": 2, "b": 3}] + self.assertListEqual(expected, self.res_with_empty.to_list_dict(drop_empty=True)) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/unit/test_results_container.py b/bfabric/tests/unit/test_results_container.py deleted file mode 100644 index 5657a9f7..00000000 --- a/bfabric/tests/unit/test_results_container.py +++ /dev/null @@ -1,45 +0,0 @@ -import unittest - -import bfabric.src.result_container as result_container - - -# TODO: Add coverage for LISTSUDS and LISTZEEP -class BfabricTestResultsContainer(unittest.TestCase): - def setUp(self): - - self.c1 = result_container.ResultContainer([1,2,3], total_pages_api=1, - result_type=result_container.BfabricResultType.LISTDICT) - self.c2 = result_container.ResultContainer([4,5], total_pages_api=1, - result_type=result_container.BfabricResultType.LISTDICT) - - def test_str_repr(self): - self.assertEqual(str(self.c1), "[1, 2, 3]") - self.assertEqual(str(self.c2), "[4, 5]") - - self.assertEqual(repr(self.c1), "[1, 2, 3]") - self.assertEqual(repr(self.c2), "[4, 5]") - - def test_len(self): - self.assertEqual(len(self.c1), 3) - self.assertEqual(len(self.c2), 2) - - def test_get_item(self): - self.assertEqual(self.c1[2], 3) - self.assertEqual(self.c2[0], 4) - - def test_append(self): - c3 = result_container.ResultContainer(list(range(200, 400)), total_pages_api=2, - result_type=result_container.BfabricResultType.LISTDICT) - c3.extend(self.c1) - - self.assertEqual(len(c3), 203) - self.assertEqual(c3.results, list(range(200, 400)) + [1,2,3]) - self.assertEqual(c3.total_pages_api, 3) - - def test_to_list_dict(self): - # NOTE: For LISTDICT format, the conversion to listdict does nothing - self.assertEqual(self.c1.to_list_dict(), self.c1.results) - - -if __name__ == "__main__": - unittest.main(verbosity=2) From 4190f5e96ab41031e744574ceaaf9961ebbd03f1 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 13 May 2024 09:51:57 +0200 Subject: [PATCH 091/129] Move some files to bfabric.results module --- bfabric/bfabric.py | 2 +- bfabric/engine/engine_suds.py | 2 +- bfabric/engine/engine_zeep.py | 2 +- bfabric/examples/compare_zeep_suds_pagination.py | 2 +- bfabric/examples/compare_zeep_suds_query.py | 2 +- bfabric/results/__init__.py | 0 bfabric/{src => results}/pandas_helper.py | 0 bfabric/{src => results}/response_format_dict.py | 0 bfabric/{src => results}/result_container.py | 2 +- bfabric/tests/integration/test_bfabric2_read_pagination.py | 2 +- bfabric/tests/unit/test_dict_helper.py | 2 +- bfabric/tests/unit/test_pandas_helper.py | 2 +- bfabric/tests/unit/test_response_format_dict.py | 2 +- bfabric/tests/unit/test_result_container.py | 2 +- 14 files changed, 11 insertions(+), 11 deletions(-) create mode 100644 bfabric/results/__init__.py rename bfabric/{src => results}/pandas_helper.py (100%) rename bfabric/{src => results}/response_format_dict.py (100%) rename bfabric/{src => results}/result_container.py (98%) diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index 00c56736..4a336538 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -32,7 +32,7 @@ from bfabric.engine.engine_zeep import EngineZeep from bfabric.src.cli_formatting import HostnameHighlighter, DEFAULT_THEME from bfabric.src.paginator import compute_requested_pages, BFABRIC_QUERY_LIMIT, page_iter -from bfabric.src.result_container import ResultContainer +from bfabric.results.result_container import ResultContainer class BfabricAPIEngineType(Enum): diff --git a/bfabric/engine/engine_suds.py b/bfabric/engine/engine_suds.py index 5b219a2d..8db2c96f 100644 --- a/bfabric/engine/engine_suds.py +++ b/bfabric/engine/engine_suds.py @@ -10,7 +10,7 @@ from bfabric.bfabric_config import BfabricAuth from bfabric.engine.response_format_suds import suds_asdict_recursive from bfabric.errors import BfabricRequestError, get_response_errors -from bfabric.src.result_container import _clean_result, ResultContainer +from bfabric.results.result_container import _clean_result, ResultContainer class EngineSUDS: diff --git a/bfabric/engine/engine_zeep.py b/bfabric/engine/engine_zeep.py index a5444c70..2f32094f 100644 --- a/bfabric/engine/engine_zeep.py +++ b/bfabric/engine/engine_zeep.py @@ -7,7 +7,7 @@ from bfabric.bfabric_config import BfabricAuth from bfabric.errors import BfabricRequestError, get_response_errors -from bfabric.src.result_container import ResultContainer, _clean_result +from bfabric.results.result_container import ResultContainer, _clean_result class EngineZeep: diff --git a/bfabric/examples/compare_zeep_suds_pagination.py b/bfabric/examples/compare_zeep_suds_pagination.py index 42235f68..1e1be1a1 100644 --- a/bfabric/examples/compare_zeep_suds_pagination.py +++ b/bfabric/examples/compare_zeep_suds_pagination.py @@ -4,7 +4,7 @@ from bfabric import Bfabric, BfabricAPIEngineType from bfabric.bfabric import get_system_auth -from bfabric.src.pandas_helper import list_dict_to_df +from bfabric.results.pandas_helper import list_dict_to_df """ This will eventually become a test that will compare Zeep and Suds paginated output diff --git a/bfabric/examples/compare_zeep_suds_query.py b/bfabric/examples/compare_zeep_suds_query.py index 73c05761..09dbc1a3 100644 --- a/bfabric/examples/compare_zeep_suds_query.py +++ b/bfabric/examples/compare_zeep_suds_query.py @@ -7,7 +7,7 @@ from bfabric import BfabricAuth, BfabricConfig from bfabric.bfabric import get_system_auth -from bfabric.src.response_format_dict import drop_empty_elements, map_element_keys +from bfabric.results.response_format_dict import drop_empty_elements, map_element_keys from bfabric.engine.response_format_suds import suds_asdict_recursive ''' diff --git a/bfabric/results/__init__.py b/bfabric/results/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/src/pandas_helper.py b/bfabric/results/pandas_helper.py similarity index 100% rename from bfabric/src/pandas_helper.py rename to bfabric/results/pandas_helper.py diff --git a/bfabric/src/response_format_dict.py b/bfabric/results/response_format_dict.py similarity index 100% rename from bfabric/src/response_format_dict.py rename to bfabric/results/response_format_dict.py diff --git a/bfabric/src/result_container.py b/bfabric/results/result_container.py similarity index 98% rename from bfabric/src/result_container.py rename to bfabric/results/result_container.py index ccbb2c82..88f9d5cd 100644 --- a/bfabric/src/result_container.py +++ b/bfabric/results/result_container.py @@ -3,7 +3,7 @@ import logging from typing import Any -import bfabric.src.response_format_dict as formatter +import bfabric.results.response_format_dict as formatter class ResultContainer: diff --git a/bfabric/tests/integration/test_bfabric2_read_pagination.py b/bfabric/tests/integration/test_bfabric2_read_pagination.py index 0d02d589..8b3e2f32 100644 --- a/bfabric/tests/integration/test_bfabric2_read_pagination.py +++ b/bfabric/tests/integration/test_bfabric2_read_pagination.py @@ -3,7 +3,7 @@ from bfabric import BfabricAPIEngineType, Bfabric from bfabric.bfabric import get_system_auth -from bfabric.src.pandas_helper import list_dict_to_df +from bfabric.results.pandas_helper import list_dict_to_df def _calc_query(config, auth, engine: BfabricAPIEngineType, endpoint: str, max_results: int = 300) -> pd.DataFrame: diff --git a/bfabric/tests/unit/test_dict_helper.py b/bfabric/tests/unit/test_dict_helper.py index 7c663bcd..e9df1595 100644 --- a/bfabric/tests/unit/test_dict_helper.py +++ b/bfabric/tests/unit/test_dict_helper.py @@ -1,6 +1,6 @@ import unittest -from bfabric.src.response_format_dict import sort_dict +from bfabric.results.response_format_dict import sort_dict class BfabricTestSortDict(unittest.TestCase): diff --git a/bfabric/tests/unit/test_pandas_helper.py b/bfabric/tests/unit/test_pandas_helper.py index 7dbac543..d8c9f91b 100644 --- a/bfabric/tests/unit/test_pandas_helper.py +++ b/bfabric/tests/unit/test_pandas_helper.py @@ -1,7 +1,7 @@ import unittest import numpy as np -import bfabric.src.pandas_helper as pandas_helper +import bfabric.results.pandas_helper as pandas_helper class BfabricTestPandasHelper(unittest.TestCase): diff --git a/bfabric/tests/unit/test_response_format_dict.py b/bfabric/tests/unit/test_response_format_dict.py index fca47804..054be593 100644 --- a/bfabric/tests/unit/test_response_format_dict.py +++ b/bfabric/tests/unit/test_response_format_dict.py @@ -1,5 +1,5 @@ import unittest -import bfabric.src.response_format_dict as response_format_dict +import bfabric.results.response_format_dict as response_format_dict class BfabricTestResponseFormatDict(unittest.TestCase): diff --git a/bfabric/tests/unit/test_result_container.py b/bfabric/tests/unit/test_result_container.py index 738cd58d..6bdd26d9 100644 --- a/bfabric/tests/unit/test_result_container.py +++ b/bfabric/tests/unit/test_result_container.py @@ -1,7 +1,7 @@ import logging import unittest -from bfabric.src.result_container import ResultContainer +from bfabric.results.result_container import ResultContainer class BfabricTestResultContainer(unittest.TestCase): From 8ed48389ae86a74dea146a3b68a2e4572700c570 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 13 May 2024 09:54:21 +0200 Subject: [PATCH 092/129] Rename remaining src module contents --- bfabric/bfabric.py | 4 ++-- bfabric/{src => }/cli_formatting.py | 0 bfabric/tests/unit/test_math_helper.py | 2 +- bfabric/tests/unit/test_paginator.py | 2 +- bfabric/utils/__init__.py | 0 bfabric/{src => utils}/math_helper.py | 0 bfabric/{src => utils}/paginator.py | 0 7 files changed, 4 insertions(+), 4 deletions(-) rename bfabric/{src => }/cli_formatting.py (100%) create mode 100644 bfabric/utils/__init__.py rename bfabric/{src => utils}/math_helper.py (100%) rename bfabric/{src => utils}/paginator.py (100%) diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index 4a336538..c7df955a 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -30,8 +30,8 @@ from bfabric.bfabric_config import BfabricConfig from bfabric.engine.engine_suds import EngineSUDS from bfabric.engine.engine_zeep import EngineZeep -from bfabric.src.cli_formatting import HostnameHighlighter, DEFAULT_THEME -from bfabric.src.paginator import compute_requested_pages, BFABRIC_QUERY_LIMIT, page_iter +from bfabric.cli_formatting import HostnameHighlighter, DEFAULT_THEME +from bfabric.utils.paginator import compute_requested_pages, BFABRIC_QUERY_LIMIT, page_iter from bfabric.results.result_container import ResultContainer diff --git a/bfabric/src/cli_formatting.py b/bfabric/cli_formatting.py similarity index 100% rename from bfabric/src/cli_formatting.py rename to bfabric/cli_formatting.py diff --git a/bfabric/tests/unit/test_math_helper.py b/bfabric/tests/unit/test_math_helper.py index dac57b4c..0f81be22 100644 --- a/bfabric/tests/unit/test_math_helper.py +++ b/bfabric/tests/unit/test_math_helper.py @@ -1,6 +1,6 @@ import unittest -import bfabric.src.math_helper as math_helper +import bfabric.utils.math_helper as math_helper class BfabricTestMath(unittest.TestCase): diff --git a/bfabric/tests/unit/test_paginator.py b/bfabric/tests/unit/test_paginator.py index 8618b82e..c65a40fb 100644 --- a/bfabric/tests/unit/test_paginator.py +++ b/bfabric/tests/unit/test_paginator.py @@ -1,6 +1,6 @@ import unittest -import bfabric.src.paginator as paginator +import bfabric.utils.paginator as paginator class BfabricTestBasicPagination(unittest.TestCase): diff --git a/bfabric/utils/__init__.py b/bfabric/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/src/math_helper.py b/bfabric/utils/math_helper.py similarity index 100% rename from bfabric/src/math_helper.py rename to bfabric/utils/math_helper.py diff --git a/bfabric/src/paginator.py b/bfabric/utils/paginator.py similarity index 100% rename from bfabric/src/paginator.py rename to bfabric/utils/paginator.py From 8461e0c57d0cda992a8d8c6ed30303f503c8839f Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 13 May 2024 10:39:01 +0200 Subject: [PATCH 093/129] Reformat files --- bfabric/__init__.py | 58 ++--- bfabric/bfabric2.py | 1 + bfabric/bfabric_legacy.py | 89 ++++--- bfabric/cli_formatting.py | 1 + bfabric/engine/engine_suds.py | 4 +- bfabric/errors.py | 1 + bfabric/examples/compare_zeep_suds_query.py | 58 +++-- bfabric/examples/exists_multi.py | 12 +- bfabric/examples/zeep_debug.py | 27 +-- bfabric/results/pandas_helper.py | 2 + bfabric/scripts/bfabric_delete.py | 7 +- .../scripts/bfabric_feeder_resource_autoQC.py | 222 +++++++++--------- ...c_list_not_existing_storage_directories.py | 8 +- bfabric/scripts/bfabric_read.py | 4 +- bfabric/scripts/bfabric_save_workunit.py | 8 +- .../bfabric_setResourceStatus_available.py | 1 + .../integration/test_bfabric2_bad_requests.py | 18 +- .../tests/integration/test_bfabric2_exists.py | 2 +- .../tests/integration/test_bfabric2_read.py | 4 +- .../integration/test_bfabric2_save_delete.py | 35 +-- bfabric/tests/unit/test_bfabric_config.py | 8 +- bfabric/tests/unit/test_dict_helper.py | 2 +- bfabric/tests/unit/test_pandas_helper.py | 16 +- .../tests/unit/test_response_format_dict.py | 14 +- bfabric/tests/unit/test_result_container.py | 4 +- bfabric/utils/math_helper.py | 2 - 26 files changed, 309 insertions(+), 299 deletions(-) diff --git a/bfabric/__init__.py b/bfabric/__init__.py index 6e5d4f6c..664403fc 100755 --- a/bfabric/__init__.py +++ b/bfabric/__init__.py @@ -6,34 +6,36 @@ from bfabric.bfabric_config import BfabricAuth, BfabricConfig -endpoints = sorted([ - 'annotation', - 'application', - 'attachement', - 'barcodes', - 'charge', - 'comment', - 'container', - 'dataset', - 'executable', - 'externaljob', - 'groupingvar', - 'importresource', - 'instrument', - 'instrumentevent', - 'mail', - 'order', - 'parameter', - 'plate', - 'project', - 'resource', - 'sample', - 'storage', - 'user', - 'workflow', - 'workflowstep', - 'workunit' -]) +endpoints = sorted( + [ + "annotation", + "application", + "attachement", + "barcodes", + "charge", + "comment", + "container", + "dataset", + "executable", + "externaljob", + "groupingvar", + "importresource", + "instrument", + "instrumentevent", + "mail", + "order", + "parameter", + "plate", + "project", + "resource", + "sample", + "storage", + "user", + "workflow", + "workflowstep", + "workunit", + ] +) # for unit tests project = 403 diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py index 2763985b..869842f7 100755 --- a/bfabric/bfabric2.py +++ b/bfabric/bfabric2.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import warnings + warnings.warn("bfabric.bfabric2 module is deprecated, use bfabric instead", DeprecationWarning) # TODO deprecated - import from bfabric instead from bfabric.bfabric import Bfabric, BfabricAPIEngineType, get_system_auth diff --git a/bfabric/bfabric_legacy.py b/bfabric/bfabric_legacy.py index cdd135d6..ce49cd59 100644 --- a/bfabric/bfabric_legacy.py +++ b/bfabric/bfabric_legacy.py @@ -17,11 +17,21 @@ class BfabricLegacy(object): """B-Fabric python3 module Implements read and save object methods for B-Fabric wsdl interface """ + def warning(self, msg): sys.stderr.write("\033[93m{}\033[0m\n".format(msg)) - def __init__(self, login: str = None, password: str = None, base_url: str = None, externaljobid=None, - config_path: str = None, config_env: str = None, optional_auth: bool = False, verbose: bool = False): + def __init__( + self, + login: str = None, + password: str = None, + base_url: str = None, + externaljobid=None, + config_path: str = None, + config_env: str = None, + optional_auth: bool = False, + verbose: bool = False, + ): """ :param login: Login string for overriding config file :param password: Password for overriding config file @@ -46,7 +56,9 @@ def __init__(self, login: str = None, password: str = None, base_url: str = None # TODO: Convert to an exception when this branch becomes main config_path_old = config_path or os.path.normpath(os.path.expanduser("~/.bfabricrc.py")) if os.path.isfile(config_path): - self.warning("WARNING! The old .bfabricrc.py was found in the home directory. Delete and make sure to use the new .bfabricpy.yml") + self.warning( + "WARNING! The old .bfabricrc.py was found in the home directory. Delete and make sure to use the new .bfabricpy.yml" + ) # Use the provided config data from arguments instead of the file if not os.path.isfile(config_path): @@ -85,10 +97,7 @@ def read_object(self, endpoint, obj, page=1, plain=False, idonly=False): for the "query". """ return self._perform_request( - endpoint=endpoint, - method="read", - plain=plain, - params=dict(query=obj, idonly=idonly, page=page) + endpoint=endpoint, method="read", plain=plain, params=dict(query=obj, idonly=idonly, page=page) ) def readid_object(self, endpoint, obj, page=1, plain=False): @@ -97,23 +106,13 @@ def readid_object(self, endpoint, obj, page=1, plain=False): externaljob, etc, and returns the object with the requested id. obj is a python dictionary which contains only the id of the endpoint for the "query". """ - return self._perform_request( - endpoint=endpoint, - method="readid", - plain=plain, - params=dict(query=obj, page=page) - ) + return self._perform_request(endpoint=endpoint, method="readid", plain=plain, params=dict(query=obj, page=page)) def save_object(self, endpoint, obj, debug=None): """ same as read_object above but uses the save method. """ - return self._perform_request( - endpoint=endpoint, - method="save", - plain=debug is not None, - params={endpoint: obj} - ) + return self._perform_request(endpoint=endpoint, method="save", plain=debug is not None, params={endpoint: obj}) def checkandinsert_object(self, endpoint, obj, debug=None): """ @@ -121,34 +120,29 @@ def checkandinsert_object(self, endpoint, obj, debug=None): """ # TODO This method was changed a while ago to use the "save"endpoint, which makes it functionally identical # to the save_object method. Check if this was intended. - return self._perform_request( - endpoint=endpoint, - method="save", - plain=debug is not None, - params={endpoint: obj} - ) + return self._perform_request(endpoint=endpoint, method="save", plain=debug is not None, params={endpoint: obj}) def delete_object(self, endpoint, id=None, debug=None): """ same as read_object above but uses the delete method. """ - return self._perform_request( - endpoint=endpoint, - method="delete", - plain=debug is not None, - params=dict(id=id) - ) + return self._perform_request(endpoint=endpoint, method="delete", plain=debug is not None, params=dict(id=id)) def upload_file(self, filename, workunitid): - with open(filename, 'rb') as f: + with open(filename, "rb") as f: content = f.read() resource_base64 = base64.b64encode(content).decode() - res = self.save_object('resource', {'base64': resource_base64, - 'name': os.path.basename(filename), - 'description': "base64 encoded file", - 'workunitid': workunitid}) + res = self.save_object( + "resource", + { + "base64": resource_base64, + "name": os.path.basename(filename), + "description": "base64 encoded file", + "workunitid": workunitid, + }, + ) return res @@ -158,9 +152,7 @@ def _get_service(self, endpoint: str) -> Service: self.cl[endpoint] = Client(f"{self.config.base_url}/{endpoint}?wsdl", cache=None) return self.cl[endpoint].service - def _perform_request( - self, endpoint: str, method: str, plain: bool, params: Dict[str, Any] - ) -> Any: + def _perform_request(self, endpoint: str, method: str, plain: bool, params: Dict[str, Any]) -> Any: """Performs a request to the given endpoint and returns the result.""" self.query_counter += 1 request_params = dict(login=self.auth.login, password=self.auth.password, **params) @@ -183,7 +175,9 @@ def print_json(queryres=None): queryres : the object returned by ``read_object`` method. """ if queryres is None: - raise TypeError("print_json() missing 1 required positional argument: please provide the output from read_object as parameter to print_json") + raise TypeError( + "print_json() missing 1 required positional argument: please provide the output from read_object as parameter to print_json" + ) res = json.dumps(queryres, cls=bfabricEncoder, sort_keys=True, indent=2) print(res) @@ -199,7 +193,9 @@ def print_yaml(queryres=None): queryres : the object returned by ``read_object`` method. """ if queryres is None: - raise TypeError("print_yaml() missing 1 required positional argument: please provide the output from read_object as parameter to print_yaml") + raise TypeError( + "print_yaml() missing 1 required positional argument: please provide the output from read_object as parameter to print_yaml" + ) res_json = json.dumps(queryres, cls=bfabricEncoder, sort_keys=True) res = yaml.dump(res_json, default_flow_style=False, encoding=None, default_style=None) @@ -218,22 +214,23 @@ def get_sampleid(self, resourceid=None): assert isinstance(resourceid, int) try: - resource = self.read_object('resource', obj={'id': resourceid})[0] + resource = self.read_object("resource", obj={"id": resourceid})[0] except: - return (None) + return None try: - workunit = self.read_object(endpoint='workunit', obj={'id': resource.workunit._id})[0] - return (self.get_sampleid(resourceid=int(workunit.inputresource[0]._id))) + workunit = self.read_object(endpoint="workunit", obj={"id": resource.workunit._id})[0] + return self.get_sampleid(resourceid=int(workunit.inputresource[0]._id)) except: self.warning("fetching sampleid of resource.workunitid = {} failed.".format(resource.workunit._id)) - return (None) + return None class bfabricEncoder(json.JSONEncoder): """ Implements json encoder for the Bfabric.print_json method """ + def default(self, o): try: return dict(o) diff --git a/bfabric/cli_formatting.py b/bfabric/cli_formatting.py index aa41f0a7..b8acc7ff 100644 --- a/bfabric/cli_formatting.py +++ b/bfabric/cli_formatting.py @@ -4,6 +4,7 @@ class HostnameHighlighter(RegexHighlighter): """Highlights hostnames in URLs.""" + base_style = "bfabric." highlights = [r"https://(?P[^.]+)"] diff --git a/bfabric/engine/engine_suds.py b/bfabric/engine/engine_suds.py index 8db2c96f..519a9803 100644 --- a/bfabric/engine/engine_suds.py +++ b/bfabric/engine/engine_suds.py @@ -98,6 +98,4 @@ def _convert_results(self, response: Any, endpoint: str) -> ResultContainer: sort_responses=True, ) results += [result_parsed] - return ResultContainer( - results=results, total_pages_api=n_available_pages, errors=errors - ) + return ResultContainer(results=results, total_pages_api=n_available_pages, errors=errors) diff --git a/bfabric/errors.py b/bfabric/errors.py index c2259a4e..556f7899 100644 --- a/bfabric/errors.py +++ b/bfabric/errors.py @@ -13,6 +13,7 @@ def __repr__(self) -> str: class BfabricConfigError(RuntimeError): """An error that is raised when the configuration is invalid.""" + pass diff --git a/bfabric/examples/compare_zeep_suds_query.py b/bfabric/examples/compare_zeep_suds_query.py index 09dbc1a3..c510fbaa 100644 --- a/bfabric/examples/compare_zeep_suds_query.py +++ b/bfabric/examples/compare_zeep_suds_query.py @@ -10,7 +10,7 @@ from bfabric.results.response_format_dict import drop_empty_elements, map_element_keys from bfabric.engine.response_format_suds import suds_asdict_recursive -''' +""" This file is intended to eventually become a test to compare that Zeep and SUDS produce the same or at least comparable output for the same requests. Important features * Test if raw XML matches @@ -26,7 +26,8 @@ - Zeep generates additional keywords not present in XML, all of them have values None or empty list - Zeep misses some important keywords like 'id' and 'projectid' inside of nested XML, such as user->project. This behaviour is inconsistent, and only affects a fraction of users. -''' +""" + def read_zeep(wsdl, fullQuery, raw=True): client = zeep.Client(wsdl) @@ -37,6 +38,7 @@ def read_zeep(wsdl, fullQuery, raw=True): else: return dict(zeep.helpers.serialize_object(ret, target_cls=dict)) + def read_suds(wsdl, fullQuery, raw=True): client = suds.client.Client(wsdl, cache=None, retxml=raw) ret = client.service.read(fullQuery) @@ -45,18 +47,16 @@ def read_suds(wsdl, fullQuery, raw=True): else: return suds_asdict_recursive(ret, convert_types=True) + def full_query(auth: BfabricAuth, query: dict, includedeletableupdateable: bool = False) -> dict: thisQuery = deepcopy(query) - thisQuery['includedeletableupdateable'] = includedeletableupdateable + thisQuery["includedeletableupdateable"] = includedeletableupdateable + + return {"login": auth.login, "password": auth.password, "query": thisQuery} - return { - 'login': auth.login, - 'password': auth.password, - 'query': thisQuery - } def calc_both(auth: BfabricAuth, config: BfabricConfig, endpoint: str, query: dict, raw: bool = True): - wsdl = "".join((config.base_url, '/', endpoint, "?wsdl")) + wsdl = "".join((config.base_url, "/", endpoint, "?wsdl")) fullQuery = full_query(auth, query) retZeep = read_zeep(wsdl, fullQuery, raw=raw) retSuds = read_suds(wsdl, fullQuery, raw=raw) @@ -67,12 +67,13 @@ def calc_both(auth: BfabricAuth, config: BfabricConfig, endpoint: str, query: di # Raw XML tests ###################### + def raw_test(auth: BfabricAuth, config: BfabricConfig, endpoint, query): print("Testing raw XML match for", endpoint, query) retZeep, retSuds = calc_both(auth, config, endpoint, query, raw=True) assert len(retZeep) == len(retSuds) assert retZeep == retSuds - print('-- passed --') + print("-- passed --") config, auth = get_system_auth(config_env="TEST") @@ -88,6 +89,7 @@ def raw_test(auth: BfabricAuth, config: BfabricConfig, endpoint, query): # Parsed dict comparison ###################### + # Find the set of all basic types used in the nested container (made of dicts, ordered dicts and lists) def recursive_get_types(generic_container) -> set: if isinstance(generic_container, (dict, OrderedDict)): @@ -105,8 +107,8 @@ def basic_data_type_match_test(auth, config, endpoint, query): retZeepDict, retSudsDict = calc_both(auth, config, endpoint, query, raw=False) typesZeep = recursive_get_types(retZeepDict) typesSuds = recursive_get_types(retZeepDict) - print('Zeep', typesZeep) - print('Suds', typesSuds) + print("Zeep", typesZeep) + print("Suds", typesSuds) # basic_data_type_match_test(auth, config, 'user', {'id': 9026}) @@ -124,19 +126,19 @@ def recursive_comparison(generic_container1, generic_container2, prefix: list) - allKeys = set(list(generic_container1.keys()) + list(generic_container2.keys())) for k in allKeys: if k not in generic_container1: - print(prefix, "Not in 1: ", k, '=', generic_container2[k]) + print(prefix, "Not in 1: ", k, "=", generic_container2[k]) print("- 1:", generic_container1) print("- 2:", generic_container2) matched = False elif k not in generic_container2: - print(prefix, "Not in 2: ", k, '=', generic_container1[k]) + print(prefix, "Not in 2: ", k, "=", generic_container1[k]) matched = False else: matched_recursive = recursive_comparison(generic_container1[k], generic_container2[k], prefix + [k]) matched = matched and matched_recursive elif isinstance(generic_container1, list): if len(generic_container1) != len(generic_container2): - print(prefix, "length", len(generic_container1), '!=', len(generic_container2)) + print(prefix, "length", len(generic_container1), "!=", len(generic_container2)) matched = False else: for i, (el1, el2) in enumerate(zip(generic_container1, generic_container2)): @@ -149,8 +151,16 @@ def recursive_comparison(generic_container1, generic_container2, prefix: list) - return matched -def parsed_data_match_test(auth, config, endpoint, query, drop_empty: bool = True, - drop_underscores_suds: bool = True, log_file_path: str = None): + +def parsed_data_match_test( + auth, + config, + endpoint, + query, + drop_empty: bool = True, + drop_underscores_suds: bool = True, + log_file_path: str = None, +): print("Testing parsed data match for", endpoint, query) retZeepDict, retSudsDict = calc_both(auth, config, endpoint, query, raw=False) @@ -159,13 +169,12 @@ def parsed_data_match_test(auth, config, endpoint, query, drop_empty: bool = Tru drop_empty_elements(retSudsDict, inplace=True) if drop_underscores_suds: - map_element_keys(retSudsDict, {'_id': 'id', '_classname': 'classname', '_projectid': 'projectid'}, - inplace=True) + map_element_keys(retSudsDict, {"_id": "id", "_classname": "classname", "_projectid": "projectid"}, inplace=True) if log_file_path is not None: - with open(log_file_path, 'w') as f: + with open(log_file_path, "w") as f: with redirect_stdout(f): - matched = recursive_comparison(retZeepDict, retSudsDict, prefix = []) + matched = recursive_comparison(retZeepDict, retSudsDict, prefix=[]) else: matched = recursive_comparison(retZeepDict, retSudsDict, prefix=[]) @@ -175,8 +184,9 @@ def parsed_data_match_test(auth, config, endpoint, query, drop_empty: bool = Tru print("-- failed --") -parsed_data_match_test(auth, config, 'user', {'id': 9026}, drop_empty=True, drop_underscores_suds=True, - log_file_path=None) +parsed_data_match_test( + auth, config, "user", {"id": 9026}, drop_empty=True, drop_underscores_suds=True, log_file_path=None +) # # parsed_data_match_test(auth, config, 'user', {}, drop_empty=True, drop_underscores_suds=True, # log_file_path=None) @@ -188,4 +198,4 @@ def parsed_data_match_test(auth, config, endpoint, query, drop_empty: bool = Tru # print("Suds", retSuds['user'][0]['project'][0]) # print("Zeep", retZeep['user'][0]) -# print("Suds", retSuds['user'][0]) \ No newline at end of file +# print("Suds", retSuds['user'][0]) diff --git a/bfabric/examples/exists_multi.py b/bfabric/examples/exists_multi.py index 70340138..c20c6555 100644 --- a/bfabric/examples/exists_multi.py +++ b/bfabric/examples/exists_multi.py @@ -3,8 +3,8 @@ config, auth = get_system_auth(config_env="TEST") -b1 = Bfabric(config, auth, engine = BfabricAPIEngineType.SUDS) -b2 = Bfabric(config, auth, engine = BfabricAPIEngineType.ZEEP) +b1 = Bfabric(config, auth, engine=BfabricAPIEngineType.SUDS) +b2 = Bfabric(config, auth, engine=BfabricAPIEngineType.ZEEP) ################### @@ -23,10 +23,10 @@ # Testing Names ################### -target_workunit_names = ['tomcat', 'tomcat2'] +target_workunit_names = ["tomcat", "tomcat2"] -response1 = b1.exists("workunit", 'name', target_workunit_names) -response2 = b2.exists("workunit", 'name', target_workunit_names) +response1 = b1.exists("workunit", "name", target_workunit_names) +response2 = b2.exists("workunit", "name", target_workunit_names) print(response1) -print(response2) \ No newline at end of file +print(response2) diff --git a/bfabric/examples/zeep_debug.py b/bfabric/examples/zeep_debug.py index acbdb3dc..33ebdcab 100644 --- a/bfabric/examples/zeep_debug.py +++ b/bfabric/examples/zeep_debug.py @@ -4,7 +4,7 @@ from copy import deepcopy from lxml import etree -''' +""" Attempt to understand why Zeep does not correctly parse XML * Problem 1: (minor) Zeep generates additional Null fields not available in XML, but likely (hypothetically) available in XSD * Problem 2: (major) Zeep fails to parse parameters in some users. @@ -15,18 +15,15 @@ Intermediate conclusions: * Both behaviours are most likely internal bugs of Zeep. Unfortunately, developer does not respond to issues at the moment. -''' +""" def full_query(auth: BfabricAuth, query: dict, includedeletableupdateable: bool = False) -> dict: thisQuery = deepcopy(query) - thisQuery['includedeletableupdateable'] = includedeletableupdateable + thisQuery["includedeletableupdateable"] = includedeletableupdateable + + return {"login": auth.login, "password": auth.password, "query": thisQuery} - return { - 'login': auth.login, - 'password': auth.password, - 'query': thisQuery - } def read_zeep(wsdl, fullQuery, raw=True): client = zeep.Client(wsdl) @@ -39,24 +36,24 @@ def read_zeep(wsdl, fullQuery, raw=True): def read(auth: BfabricAuth, config: BfabricConfig, endpoint: str, query: dict, raw: bool = True): - wsdl = "".join((config.base_url, '/', endpoint, "?wsdl")) + wsdl = "".join((config.base_url, "/", endpoint, "?wsdl")) fullQuery = full_query(auth, query) return read_zeep(wsdl, fullQuery, raw=raw) bfconfig, bfauth = get_system_auth(config_env="TEST") -print('============== RAW ================') +print("============== RAW ================") -rez = read(bfauth, bfconfig, 'user', {'id': 9026}, raw = True) +rez = read(bfauth, bfconfig, "user", {"id": 9026}, raw=True) root = etree.fromstring(rez) print(etree.tostring(root, pretty_print=True).decode()) -rez = read(bfauth, bfconfig, 'user', {'id': 9026}, raw = False) +rez = read(bfauth, bfconfig, "user", {"id": 9026}, raw=False) -print('============== ORIG ================') -print(rez['user'][0]['project']) -print(rez['user'][0]['project']['id']) +print("============== ORIG ================") +print(rez["user"][0]["project"]) +print(rez["user"][0]["project"]["id"]) # trg = rez['project'] # diff --git a/bfabric/results/pandas_helper.py b/bfabric/results/pandas_helper.py index 3eee7d5e..2c76180e 100644 --- a/bfabric/results/pandas_helper.py +++ b/bfabric/results/pandas_helper.py @@ -15,6 +15,7 @@ def _stringify(a: Any) -> Any: else: return a + def _stringify_dict(d: dict) -> dict: """ :param d: A dictionary @@ -22,6 +23,7 @@ def _stringify_dict(d: dict) -> dict: """ return {k: _stringify(v) for k, v in d.items()} + def list_dict_to_df(l: List[Dict]) -> pd.DataFrame: """ :param l: A list of dictionaries diff --git a/bfabric/scripts/bfabric_delete.py b/bfabric/scripts/bfabric_delete.py index 0ea030bb..9d668a80 100755 --- a/bfabric/scripts/bfabric_delete.py +++ b/bfabric/scripts/bfabric_delete.py @@ -26,8 +26,8 @@ bfapp = bfabric.bfabric_legacy.BfabricLegacy() query_obj = {} - - print (len(sys.argv)) + + print(len(sys.argv)) endpoint = sys.argv[1] @@ -37,7 +37,6 @@ if endpoint in bfabric.endpoints: res = bfapp.delete_object(endpoint=endpoint, id=id) for i in res: - print (i) + print(i) else: raise "1st argument must be a valid endpoint." - diff --git a/bfabric/scripts/bfabric_feeder_resource_autoQC.py b/bfabric/scripts/bfabric_feeder_resource_autoQC.py index 76a33446..e30d0f72 100755 --- a/bfabric/scripts/bfabric_feeder_resource_autoQC.py +++ b/bfabric/scripts/bfabric_feeder_resource_autoQC.py @@ -21,25 +21,26 @@ from bfabric.bfabric_legacy import BfabricLegacy -class autoQC(): +class autoQC: """ - feeder for autoQC raw files + feeder for autoQC raw files """ + bfabric_storageid = 2 - configfile = os.path.normpath("{0}/{1}".format(os.path.expanduser('~'), r'.bfabricrc.yaml')) - with open(configfile, 'r') as file: + configfile = os.path.normpath("{0}/{1}".format(os.path.expanduser("~"), r".bfabricrc.yaml")) + with open(configfile, "r") as file: config = yaml.load(file, Loader=yaml.FullLoader) - bfabric_application_ids = config['applicationId'] + bfabric_application_ids = config["applicationId"] bfapp = BfabricLegacy(verbose=False) @property def getId(self, obj): - print ("==============") - print (obj) + print("==============") + print(obj) try: - print ("DEBGUG obj: {}".format(obj[0]._id)) + print("DEBGUG obj: {}".format(obj[0]._id)) return int(obj[0]._id) except: raise @@ -56,47 +57,51 @@ def sample_check(self, projectid, name): """ try: - res = self.bfapp.read_object(endpoint='sample', - obj={'containerid': projectid, 'name': name}) + res = self.bfapp.read_object(endpoint="sample", obj={"containerid": projectid, "name": name}) except: print(res) raise + sample_type = "Biological Sample - Proteomics" + + query_autoQC01 = { + "name": "{}".format(name), + "type": sample_type, + "containerid": projectid, + "species": "Bos taurus", + "groupingvar": "A", + "samplingdate": "2018-11-15", + "description": "core4life standard: sample BSA + iRT 1:800", + } + + query_autoQC4L = { + "name": "{}".format(name), + "type": sample_type, + "containerid": projectid, + "species": "n/a", + "groupingvar": "A", + "samplingdate": "2018-11-15", + "description": "core4life standard: 6 x 5 LC-MS/MS Peptide Reference Mix", + } - sample_type = 'Biological Sample - Proteomics' - - query_autoQC01 = {'name': "{}".format(name), - 'type': sample_type, - 'containerid': projectid, - 'species': "Bos taurus", - 'groupingvar': "A", - 'samplingdate': "2018-11-15", - 'description': 'core4life standard: sample BSA + iRT 1:800'} - - query_autoQC4L = {'name': "{}".format(name), - 'type': sample_type, - 'containerid': projectid, - 'species': "n/a", - 'groupingvar': "A", - 'samplingdate': "2018-11-15", - 'description': 'core4life standard: 6 x 5 LC-MS/MS Peptide Reference Mix'} - - query_lipidQC01 = {'name': "{}".format(name), - 'type': 'Biological Sample - Metabolomics', - 'containerid': projectid, - 'species': "n/a", - 'extractionprotocolannotation': "n/a", - 'organismpart': "n/a", - 'compoundclass': "Lipids", - 'description': 'Lipidmix containing 2uM of FFA, BA, LPC. positive mode, C18.'} + query_lipidQC01 = { + "name": "{}".format(name), + "type": "Biological Sample - Metabolomics", + "containerid": projectid, + "species": "n/a", + "extractionprotocolannotation": "n/a", + "organismpart": "n/a", + "compoundclass": "Lipids", + "description": "Lipidmix containing 2uM of FFA, BA, LPC. positive mode, C18.", + } if res is None: - if name == 'autoQC4L': - res = self.bfapp.save_object(endpoint='sample', obj=query_autoQC4L) - elif name == 'autoQC01': - res = self.bfapp.save_object(endpoint='sample', obj=query_autoQC01) - elif name == 'lipidQC01': - res = self.bfapp.save_object(endpoint='sample', obj=query_lipidQC01) + if name == "autoQC4L": + res = self.bfapp.save_object(endpoint="sample", obj=query_autoQC4L) + elif name == "autoQC01": + res = self.bfapp.save_object(endpoint="sample", obj=query_autoQC01) + elif name == "lipidQC01": + res = self.bfapp.save_object(endpoint="sample", obj=query_lipidQC01) print(res) print(res[0]) @@ -113,9 +118,9 @@ def workunit_check(self, projectid, name, applicationid): :return: int WUID """ - query = {'projectid': projectid, 'name': name, 'applicationid': applicationid} + query = {"projectid": projectid, "name": name, "applicationid": applicationid} try: - res = self.bfapp.read_object(endpoint='workunit', obj=query) + res = self.bfapp.read_object(endpoint="workunit", obj=query) except: raise @@ -126,28 +131,34 @@ def workunit_check(self, projectid, name, applicationid): listed below. """ - if name == 'autoQC4L': - links = ['http://fgcz-ms.uzh.ch/~cpanse/autoQC4L.html', - 'http://fgcz-ms-shiny.uzh.ch:8080/bfabric_rawDiag/', - 'http://qcloud.crg.eu', - 'https://panoramaweb.org'] - elif name == 'autoQC01': - links = ['http://fgcz-ms.uzh.ch/~cpanse/autoQC01.html', - 'http://fgcz-ms-shiny.uzh.ch:8080/bfabric_rawDiag/', - 'http://qcloud.crg.eu', - 'https://panoramaweb.org'] - elif name == 'lipidQC01': + if name == "autoQC4L": + links = [ + "http://fgcz-ms.uzh.ch/~cpanse/autoQC4L.html", + "http://fgcz-ms-shiny.uzh.ch:8080/bfabric_rawDiag/", + "http://qcloud.crg.eu", + "https://panoramaweb.org", + ] + elif name == "autoQC01": + links = [ + "http://fgcz-ms.uzh.ch/~cpanse/autoQC01.html", + "http://fgcz-ms-shiny.uzh.ch:8080/bfabric_rawDiag/", + "http://qcloud.crg.eu", + "https://panoramaweb.org", + ] + elif name == "lipidQC01": description = "Contains automatic registered quality control (QC) measurements, positive mode." - links = ['http://fgcz-ms.uzh.ch/~cpanse/lipidQC01.html'] + links = ["http://fgcz-ms.uzh.ch/~cpanse/lipidQC01.html"] if res is None: - query = {'projectid': projectid, 'name': name, - 'applicationid': applicationid, - 'description': description, - 'link': links} + query = { + "projectid": projectid, + "name": name, + "applicationid": applicationid, + "description": description, + "link": links, + } - res = self.bfapp.save_object(endpoint='workunit', - obj=query) + res = self.bfapp.save_object(endpoint="workunit", obj=query) else: pass @@ -172,36 +183,34 @@ def resource_check(self, projectid, name, workunitid, filename, filedate, size, _file_date = time.strftime("%FT%H:%M:%S-01:00", time.gmtime(int(filedate))) query = { - 'filechecksum': md5, - 'workunitid': workunitid, - 'projectid': projectid, + "filechecksum": md5, + "workunitid": workunitid, + "projectid": projectid, } try: - res = self.bfapp.read_object(endpoint='resource', obj=query) + res = self.bfapp.read_object(endpoint="resource", obj=query) except: raise if res is None: query = { - 'workunitid': workunitid, - 'sampleid': sampleid, - 'filechecksum': md5, - 'relativepath': filename, - 'name': os.path.basename(filename), - 'status': 'available', - 'size': size, - 'storageid': self.bfabric_storageid + "workunitid": workunitid, + "sampleid": sampleid, + "filechecksum": md5, + "relativepath": filename, + "name": os.path.basename(filename), + "status": "available", + "size": size, + "storageid": self.bfabric_storageid, } - res = self.bfapp.save_object(endpoint='resource', obj=query) + res = self.bfapp.save_object(endpoint="resource", obj=query) - query = {'id': workunitid, 'status': 'available'} - res2 = self.bfapp.save_object(endpoint='workunit', obj=query) + query = {"id": workunitid, "status": "available"} + res2 = self.bfapp.save_object(endpoint="workunit", obj=query) return res[0]._id - - def feed(self, line): """ feeds one line example: @@ -209,51 +218,52 @@ def feed(self, line): :return: """ - try: (_md5, _file_date, _file_size, filename) = line.split(";") except Exception as err: return try: - m = re.search(r"p([0-9]+)\/((Metabolomics|Proteomics)\/[A-Z]+_[1-9])\/.*(autoQC01|autoQC4L|lipidQC01).+raw$", - filename) + m = re.search( + r"p([0-9]+)\/((Metabolomics|Proteomics)\/[A-Z]+_[1-9])\/.*(autoQC01|autoQC4L|lipidQC01).+raw$", filename + ) projectid = m.group(1) applicationid = self.bfabric_application_ids[m.group(2)] autoQCType = m.group(4) - except Exception as err: - print ("# no match '{}'.".format(filename)) + print("# no match '{}'.".format(filename)) return - print ("{}\t{}\t{}\n".format(projectid, applicationid, autoQCType)) + print("{}\t{}\t{}\n".format(projectid, applicationid, autoQCType)) try: sampleid = self.sample_check(projectid, name=autoQCType) sys.exit(0) - #print sampleid + # print sampleid workunitid = self.workunit_check(projectid, name=autoQCType, applicationid=applicationid) - #print "WUID={}".format(workunitid) - - resourceid = self.resource_check(projectid=projectid, name=os.path.basename(filename), - workunitid=workunitid, - filename=filename, - filedate=_file_date, - size=_file_size, - md5=_md5, - sampleid=sampleid) + # print "WUID={}".format(workunitid) + + resourceid = self.resource_check( + projectid=projectid, + name=os.path.basename(filename), + workunitid=workunitid, + filename=filename, + filedate=_file_date, + size=_file_size, + md5=_md5, + sampleid=sampleid, + ) # sampleid=0 - print ("p{p}\tA{A}\t{filename}\tS{S}\tWU{WU}\tR{R}".format(p=projectid, - A=applicationid, - filename=filename, - S=sampleid, - WU=workunitid, - R=resourceid)) + print( + "p{p}\tA{A}\t{filename}\tS{S}\tWU{WU}\tR{R}".format( + p=projectid, A=applicationid, filename=filename, S=sampleid, WU=workunitid, R=resourceid + ) + ) except Exception as err: - print('# Failed to register to bfabric: {}'.format(err)) + print("# Failed to register to bfabric: {}".format(err)) class TestCaseAutoQC(unittest.TestCase): @@ -261,6 +271,7 @@ class TestCaseAutoQC(unittest.TestCase): python -m unittest bfabric_feeder_resource_autoQC """ + BF = autoQC() def setUp(self): @@ -268,13 +279,14 @@ def setUp(self): def test_feed(self): line = "61cf7e172713344bdf6ebe5b1ed61d99;1549963879;306145606;p2928/Proteomics/QEXACTIVEHF_2/ciuffar_20190211_190211_TNF_PRM_rT_again_AQUA_LHration/20190211_013_autoQC4L.raw" - #self.BF.feed(line) + # self.BF.feed(line) line = "efdf5e375d6e0e4e4abf9c2b3e1e97d5;1542134408;59129652;p1000/Proteomics/QEXACTIVEHF_2/tobiasko_20181113/20181113_003_autoQC01.raw" - #self.BF.feed(line) + # self.BF.feed(line) line = "d0412c1aae029d21bb261c1e4c682ea9;1549441215;207803452;p2947/Metabolomics/QEXACTIVE_3/sstreb_20190206_o5292/p2947_o5292_20190205_FFA_BA_LPC_2um_lipidQC01_1.raw" self.BF.feed(line) -if __name__ == '__main__': + +if __name__ == "__main__": BF = autoQC() for input_line in sys.stdin: diff --git a/bfabric/scripts/bfabric_list_not_existing_storage_directories.py b/bfabric/scripts/bfabric_list_not_existing_storage_directories.py index f5b227af..1784a8ef 100755 --- a/bfabric/scripts/bfabric_list_not_existing_storage_directories.py +++ b/bfabric/scripts/bfabric_list_not_existing_storage_directories.py @@ -19,16 +19,16 @@ B = bfabric.bfabric_legacy.BfabricLegacy() -ROOTDIR="/srv/www/htdocs/" +ROOTDIR = "/srv/www/htdocs/" + def listNotExistingStorageDirs(technologyid=2): - rv = B.read_object('container', {'technologyid': technologyid}) + rv = B.read_object("container", {"technologyid": technologyid}) containerIDs = list(set(map(lambda x: x._id, rv))) - for cid in containerIDs: if not os.path.isdir("{}/p{}".format(ROOTDIR, cid)): - print (cid) + print(cid) listNotExistingStorageDirs(technologyid=2) diff --git a/bfabric/scripts/bfabric_read.py b/bfabric/scripts/bfabric_read.py index 041be111..5b22e16c 100755 --- a/bfabric/scripts/bfabric_read.py +++ b/bfabric/scripts/bfabric_read.py @@ -27,9 +27,7 @@ from bfabric import Bfabric, BfabricConfig -def bfabric_read( - client: Bfabric, endpoint: str, attribute: str | None, value: str | None, output_format: str -) -> None: +def bfabric_read(client: Bfabric, endpoint: str, attribute: str | None, value: str | None, output_format: str) -> None: """Reads one or several items from a B-Fabric endpoint and prints them.""" if attribute is not None and value is None: message = "value must be provided if attribute is provided" diff --git a/bfabric/scripts/bfabric_save_workunit.py b/bfabric/scripts/bfabric_save_workunit.py index cec04fcd..ce2bc502 100755 --- a/bfabric/scripts/bfabric_save_workunit.py +++ b/bfabric/scripts/bfabric_save_workunit.py @@ -25,6 +25,8 @@ if __name__ == "__main__": bfapp = bfabric.bfabric_legacy.BfabricLegacy() - - workunit = bfapp.save_object(endpoint='workunit', obj={'name': 'MaxQuant report', 'projectid': '1000', 'applicationid': 217, 'status': 'available'}) - print (workunit) + workunit = bfapp.save_object( + endpoint="workunit", + obj={"name": "MaxQuant report", "projectid": "1000", "applicationid": 217, "status": "available"}, + ) + print(workunit) diff --git a/bfabric/scripts/bfabric_setResourceStatus_available.py b/bfabric/scripts/bfabric_setResourceStatus_available.py index be94a6b6..45c678c8 100755 --- a/bfabric/scripts/bfabric_setResourceStatus_available.py +++ b/bfabric/scripts/bfabric_setResourceStatus_available.py @@ -29,6 +29,7 @@ def set_resource_status_available(client: Bfabric, resource_id: list[int]) -> No print(f"failed to set resourceid {resource_id} 'available'.") raise + def main() -> None: """Parses command line arguments and calls `set_resource_status_available`.""" parser = argparse.ArgumentParser() diff --git a/bfabric/tests/integration/test_bfabric2_bad_requests.py b/bfabric/tests/integration/test_bfabric2_bad_requests.py index a37d475e..0e1c0f42 100755 --- a/bfabric/tests/integration/test_bfabric2_bad_requests.py +++ b/bfabric/tests/integration/test_bfabric2_bad_requests.py @@ -20,30 +20,20 @@ def setUp(self): # Init the engines self.clients = { "zeep": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP), - "suds": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS) + "suds": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS), } def _test_non_existing_read(self, engine_name: str): # NOTE: Currently a bad read request simply returns no matches, but does not throw errors - res = self.clients[engine_name].read('user', {'id': 'cat'}).to_list_dict() + res = self.clients[engine_name].read("user", {"id": "cat"}).to_list_dict() self.assertEqual([], res) def _test_forbidden_save(self, engine_name: str): # Test what happens if we save to an endpoint that does not accept saving - self.assertRaises( - BfabricRequestError, - self.clients[engine_name].save, - 'project', - {'name': 'TheForbiddenPlan'} - ) + self.assertRaises(BfabricRequestError, self.clients[engine_name].save, "project", {"name": "TheForbiddenPlan"}) def _test_wrong_delete(self, engine_name: str): - self.assertRaises( - RuntimeError, - self.clients[engine_name].delete, - 'workunit', - 101010101010101 - ) + self.assertRaises(RuntimeError, self.clients[engine_name].delete, "workunit", 101010101010101) def test_non_existing_read_when_suds(self): self._test_non_existing_read("suds") diff --git a/bfabric/tests/integration/test_bfabric2_exists.py b/bfabric/tests/integration/test_bfabric2_exists.py index 673726b3..b4c025b0 100644 --- a/bfabric/tests/integration/test_bfabric2_exists.py +++ b/bfabric/tests/integration/test_bfabric2_exists.py @@ -10,7 +10,7 @@ def setUp(self): def _test_single_exists(self, engine: BfabricAPIEngineType): bf = Bfabric(self.config, self.auth, engine=engine) - res = bf.exists('dataset', 'id', 30721) # Take ID which is the same as in production + res = bf.exists("dataset", "id", 30721) # Take ID which is the same as in production self.assertEqual(res, True) def test_zeep(self): diff --git a/bfabric/tests/integration/test_bfabric2_read.py b/bfabric/tests/integration/test_bfabric2_read.py index fb307284..50117080 100755 --- a/bfabric/tests/integration/test_bfabric2_read.py +++ b/bfabric/tests/integration/test_bfabric2_read.py @@ -19,7 +19,7 @@ def setUp(self, *args, **kwargs): # Init the engines self.clients = { "zeep": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP), - "suds": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS) + "suds": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS), } def read(self, engine: str, endpoint: str): @@ -86,7 +86,7 @@ def test_invalid_auth(self): auth = BfabricAuth(login=self.auth.login, password="invalid_password") clients = { "zeep": Bfabric(self.config, auth, engine=BfabricAPIEngineType.ZEEP), - "suds": Bfabric(self.config, auth, engine=BfabricAPIEngineType.SUDS) + "suds": Bfabric(self.config, auth, engine=BfabricAPIEngineType.SUDS), } for engine, bf in clients.items(): with self.subTest(engine=engine): diff --git a/bfabric/tests/integration/test_bfabric2_save_delete.py b/bfabric/tests/integration/test_bfabric2_save_delete.py index c7c53074..f93a1fd3 100644 --- a/bfabric/tests/integration/test_bfabric2_save_delete.py +++ b/bfabric/tests/integration/test_bfabric2_save_delete.py @@ -16,7 +16,7 @@ def _find_delete_existing_objects_by_name(b: Bfabric, endpoint: str, name_list: """ # 1. Check which objects exist - objs_exist = b.exists(endpoint, 'name', name_list) + objs_exist = b.exists(endpoint, "name", name_list) objs_exist_names = [name for i, name in enumerate(name_list) if objs_exist[i]] if len(objs_exist_names) == 0: @@ -28,15 +28,15 @@ def _find_delete_existing_objects_by_name(b: Bfabric, endpoint: str, name_list: ids_to_delete = [] for name in objs_exist_names: # 2.1 Get IDs of all existing workunits - response_dict = b.read(endpoint, {'name': name}).to_list_dict() - ids_this = [r['id'] for r in response_dict] + response_dict = b.read(endpoint, {"name": name}).to_list_dict() + ids_this = [r["id"] for r in response_dict] - print('--', name, 'exist with ids', ids_this) + print("--", name, "exist with ids", ids_this) ids_to_delete += ids_this # Delete delete_response_dict = b.delete(endpoint, ids_to_delete).to_list_dict() - print('Deletion results:', delete_response_dict) + print("Deletion results:", delete_response_dict) return objs_exist_names, delete_response_dict @@ -57,9 +57,9 @@ def _save_delete_workunit(self, b: Bfabric, verbose: bool = False) -> None: :return: """ - endpoint = 'workunit' - workunit_names = ['MewThePokemon', 'TomMGM', 'MinkyLeChat'] - fake_name = 'SpikeTheDog' + endpoint = "workunit" + workunit_names = ["MewThePokemon", "TomMGM", "MinkyLeChat"] + fake_name = "SpikeTheDog" all_names = workunit_names + [fake_name] # 1. Find and delete any workunits with these names, if they already exist @@ -70,24 +70,29 @@ def _save_delete_workunit(self, b: Bfabric, verbose: bool = False) -> None: print("Phase 2: Creating the target units") new_ids = [] for name in workunit_names: - workunit1 = {'name': name, 'applicationid': 2, 'description': 'is warm and fluffy', 'containerid': 3000} - response = b.save('workunit', workunit1).to_list_dict() # We do the conversion to drop underscores in SUDS + workunit1 = {"name": name, "applicationid": 2, "description": "is warm and fluffy", "containerid": 3000} + response = b.save("workunit", workunit1).to_list_dict() # We do the conversion to drop underscores in SUDS if verbose: print(response[0]) self.assertEqual(len(response), 1, msg="Expected a single response from a single saved workunit") - new_ids += [response[0]['id']] + new_ids += [response[0]["id"]] # 3. Find and delete any workunits with these names, now that they have been created print("Phase 3: Finding and deleting the created work units, checking if they match expectation") found_names, deleted_responses = _find_delete_existing_objects_by_name(b, endpoint, all_names) - self.assertEqual(found_names, workunit_names, msg="Expected the names found in the API to be the ones we just created") + self.assertEqual( + found_names, workunit_names, msg="Expected the names found in the API to be the ones we just created" + ) for resp, trg_id in zip(deleted_responses, new_ids): self.assertEqual(len(resp), 1, msg="Deletion response format unexpected") - self.assertIn('deletionreport', resp, msg="Deletion response format unexpected") - self.assertEqual(resp['deletionreport'], 'Workunit ' + str(trg_id) + ' removed successfully.', - msg="Deletion response format unexpected") + self.assertIn("deletionreport", resp, msg="Deletion response format unexpected") + self.assertEqual( + resp["deletionreport"], + "Workunit " + str(trg_id) + " removed successfully.", + msg="Deletion response format unexpected", + ) def test_zeep(self): bZeep = Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP) diff --git a/bfabric/tests/unit/test_bfabric_config.py b/bfabric/tests/unit/test_bfabric_config.py index f79d8be6..dd10e6a2 100644 --- a/bfabric/tests/unit/test_bfabric_config.py +++ b/bfabric/tests/unit/test_bfabric_config.py @@ -118,23 +118,21 @@ def test_read_yml_when_empty_optional(self): # TODO delete if no mandatory fields are reintroduced # Test that missing authentication will raise an error if required - #def test_read_yml_when_empty_mandatory(self): + # def test_read_yml_when_empty_mandatory(self): # with self.assertRaises(BfabricConfigError): # read_config(self.example_config_path, config_env="STANDBY") def test_repr(self): rep = repr(self.config) self.assertEqual( - "BfabricConfig(base_url='url', application_ids={'app': 1}, " - "job_notification_emails='')", + "BfabricConfig(base_url='url', application_ids={'app': 1}, " "job_notification_emails='')", rep, ) def test_str(self): rep = str(self.config) self.assertEqual( - "BfabricConfig(base_url='url', application_ids={'app': 1}, " - "job_notification_emails='')", + "BfabricConfig(base_url='url', application_ids={'app': 1}, " "job_notification_emails='')", rep, ) diff --git a/bfabric/tests/unit/test_dict_helper.py b/bfabric/tests/unit/test_dict_helper.py index e9df1595..ee136824 100644 --- a/bfabric/tests/unit/test_dict_helper.py +++ b/bfabric/tests/unit/test_dict_helper.py @@ -6,7 +6,7 @@ class BfabricTestSortDict(unittest.TestCase): def test_sort_dict(self): # Main purpose of dictionary sorting is that they appear consistent when printed - d = {'c': 5, 'b': 10} + d = {"c": 5, "b": 10} d_sorted = sort_dict(d) self.assertEqual(str(d_sorted), "{'b': 10, 'c': 5}") diff --git a/bfabric/tests/unit/test_pandas_helper.py b/bfabric/tests/unit/test_pandas_helper.py index d8c9f91b..a65fcba3 100644 --- a/bfabric/tests/unit/test_pandas_helper.py +++ b/bfabric/tests/unit/test_pandas_helper.py @@ -8,17 +8,17 @@ class BfabricTestPandasHelper(unittest.TestCase): def test_list_dict_to_df(self): # Main purpose of dictionary sorting is that they appear consistent when printed example_list_dict = [ - {'cat': 1, 'dog': 2}, - {'cat': 3, 'rat': ["a", "b"]}, - {'rat': 5}, - {'cat': 1, 'dog': 2, 'rat': 7}, + {"cat": 1, "dog": 2}, + {"cat": 3, "rat": ["a", "b"]}, + {"rat": 5}, + {"cat": 1, "dog": 2, "rat": 7}, ] df = pandas_helper.list_dict_to_df(example_list_dict) - self.assertEqual(list(df.columns), ['cat', 'dog', 'rat']) - np.testing.assert_equal(list(df['cat']), [1, 3, np.nan, 1]) - np.testing.assert_equal(list(df['dog']), [2, np.nan, np.nan, 2]) - np.testing.assert_equal(list(df['rat']), [np.nan, "['a', 'b']", 5, 7]) + self.assertEqual(list(df.columns), ["cat", "dog", "rat"]) + np.testing.assert_equal(list(df["cat"]), [1, 3, np.nan, 1]) + np.testing.assert_equal(list(df["dog"]), [2, np.nan, np.nan, 2]) + np.testing.assert_equal(list(df["rat"]), [np.nan, "['a', 'b']", 5, 7]) if __name__ == "__main__": diff --git a/bfabric/tests/unit/test_response_format_dict.py b/bfabric/tests/unit/test_response_format_dict.py index 054be593..35c312b3 100644 --- a/bfabric/tests/unit/test_response_format_dict.py +++ b/bfabric/tests/unit/test_response_format_dict.py @@ -5,24 +5,24 @@ class BfabricTestResponseFormatDict(unittest.TestCase): def test_drop_empty_elements(self): # Should delete all hierarchical instances of key-value pairs, where value is None or empty dict - input_list_dict = [{'a': [], 'b': [1, {'aa': 14, 'gg': None}], 'c': []}, {'zz': None, 'uu': 'cat'}] - target_list_dict = [{'b': [1, {'aa': 14}]}, {'uu': 'cat'}] + input_list_dict = [{"a": [], "b": [1, {"aa": 14, "gg": None}], "c": []}, {"zz": None, "uu": "cat"}] + target_list_dict = [{"b": [1, {"aa": 14}]}, {"uu": "cat"}] output_list_dict = response_format_dict.drop_empty_elements(input_list_dict, inplace=False) self.assertEqual(output_list_dict, target_list_dict) def test_map_element_keys(self): # Main use is to delete underscores in specific keys - input_list_dict = [{'a': [], 'b': [1, {'_aa': 14, 'gg': None}], 'c': []}, {'zz': None, 'uu': 'cat'}] - target_list_dict = [{'a': [], 'b': [1, {'aa': 14, 'gg': None}], 'c': []}, {'zz': None, 'uu': 'cat'}] + input_list_dict = [{"a": [], "b": [1, {"_aa": 14, "gg": None}], "c": []}, {"zz": None, "uu": "cat"}] + target_list_dict = [{"a": [], "b": [1, {"aa": 14, "gg": None}], "c": []}, {"zz": None, "uu": "cat"}] - output_list_dict = response_format_dict.map_element_keys(input_list_dict, {'_aa': 'aa'}, inplace=False) + output_list_dict = response_format_dict.map_element_keys(input_list_dict, {"_aa": "aa"}, inplace=False) self.assertEqual(output_list_dict, target_list_dict) def test_sort_dicts_by_key(self): # NOTE: The main purpose of sorting is to ensure consistent string representation - input_list_dict = [{'b': 1, 'a': 2, 'c': 3}, {'dog': 25, 'cat': [1,2,3]}] - target_list_dict = [{'a': 2, 'b': 1, 'c': 3}, {'cat': [1,2,3], 'dog': 25}] + input_list_dict = [{"b": 1, "a": 2, "c": 3}, {"dog": 25, "cat": [1, 2, 3]}] + target_list_dict = [{"a": 2, "b": 1, "c": 3}, {"cat": [1, 2, 3], "dog": 25}] output_list_dict = response_format_dict.sort_dicts_by_key(input_list_dict, inplace=False) self.assertEqual(str(output_list_dict), str(target_list_dict)) diff --git a/bfabric/tests/unit/test_result_container.py b/bfabric/tests/unit/test_result_container.py index 6bdd26d9..8ced19ab 100644 --- a/bfabric/tests/unit/test_result_container.py +++ b/bfabric/tests/unit/test_result_container.py @@ -8,9 +8,7 @@ class BfabricTestResultContainer(unittest.TestCase): def setUp(self): self.res1 = ResultContainer([1, 2, 3], total_pages_api=1) self.res2 = ResultContainer([4, 5], total_pages_api=1) - self.res_with_empty = ResultContainer( - [{"a": None, "b": 1, "c": []}, {"a": 2, "b": 3, "c": None}] - ) + self.res_with_empty = ResultContainer([{"a": None, "b": 1, "c": []}, {"a": 2, "b": 3, "c": None}]) def test_str(self): self.assertEqual("[1, 2, 3]", str(self.res1)) diff --git a/bfabric/utils/math_helper.py b/bfabric/utils/math_helper.py index b55b8018..7e20278f 100644 --- a/bfabric/utils/math_helper.py +++ b/bfabric/utils/math_helper.py @@ -1,5 +1,3 @@ - - def div_int_ceil(n: int, d: int) -> int: """ :param n: Numerator From f5aab811c0d3ac3a056aecd94ed256fc4d42be94 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Mon, 13 May 2024 16:03:53 +0200 Subject: [PATCH 094/129] Integration test for time range queries --- bfabric/engine/engine_zeep.py | 6 +- .../test_bfabric2_time_inequality_query.py | 91 +++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) create mode 100755 bfabric/tests/integration/test_bfabric2_time_inequality_query.py diff --git a/bfabric/engine/engine_zeep.py b/bfabric/engine/engine_zeep.py index 2f32094f..b6db0284 100644 --- a/bfabric/engine/engine_zeep.py +++ b/bfabric/engine/engine_zeep.py @@ -115,7 +115,11 @@ def _convert_results(self, response: Any, endpoint: str) -> ResultContainer: return ResultContainer(results=results, total_pages_api=n_available_pages, errors=errors) -# TODO: Check if this is a bug of BFabric or Zeep. Specifically, see if the same call to bFabricPy has the same bug +# TODO: The reason why Zeep requires to explicitly skip certain values remains unclear +# To the best of our current understanding, the fields are actually optional, but because of some differences in +# formatting they appear to zeep as compulsory. The current solution is envisioned by developers of Zeep, but +# it is a hack, and should ideally be handled internally by Zeep. +# If developers of Zeep ever resume its maintenance, it would make sense to revisit def _zeep_query_append_skipped(query: dict, skipped_keys: list, inplace: bool = False, overwrite: bool = False) -> dict: """ This function is used to fix a buggy behaviour of Zeep/BFabric. Specifically, Zeep does not return correct diff --git a/bfabric/tests/integration/test_bfabric2_time_inequality_query.py b/bfabric/tests/integration/test_bfabric2_time_inequality_query.py new file mode 100755 index 00000000..91264f07 --- /dev/null +++ b/bfabric/tests/integration/test_bfabric2_time_inequality_query.py @@ -0,0 +1,91 @@ +import unittest +from datetime import datetime, timedelta + + +from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth + + +class BfabricTestTimeInequalityQuery(unittest.TestCase): + def setUp(self): + # Load config and authentication + self.config, self.auth = get_system_auth(config_env="TEST") + + # Init the engines + self.clients = { + "zeep": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP), + "suds": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS) + } + + def _test_created_before_after(self, engine: str): + with self.subTest(engine=engine): + n_resources = 50 + bf = self.clients[engine] + + # 0. Create a workunit + query = { + 'name': 'CatPetter9000', + 'applicationid': 1, + 'containerid': 3000, + 'description': 'Best cat petter ever', + } + res = bf.save('workunit', query).to_list_dict() + self.assertIsNotNone(res) + self.assertEquals(len(res), 1) + self.assertIn('id', res[0]) + workunit_id = res[0]['id'] + + # 1. Create a bunch of resources + resource_ids = [] + resource_created = [] + for i_resource in range(n_resources): + query = { + 'name': 'kitten_' + str(i_resource), + # 'sampleid': 1, + 'filechecksum': 0, + 'relativepath': '/catpath/kitten_' + str(i_resource), + 'size': 0, + 'status': "pending", + 'storageid': 1, + 'workunitid': workunit_id + } + + res = bf.save('resource', query).to_list_dict() + self.assertIsNotNone(res) + self.assertEquals(len(res), 1) + self.assertIn('id', res[0]) + self.assertIn('created', res[0]) + + resource_ids += [res[0]['id']] + resource_created += [datetime.fromisoformat(res[0]['created'])] + + # 2. attempt to find the resources we just created by datetime + # NOTE: + query = { + 'workunitid': workunit_id, + 'createdbefore': str(max(resource_created) + timedelta(seconds=1)), + 'createdafter': str(min(resource_created)), + } + results = bf.read('resource', query, idonly = True).to_list_dict() + + # 3. delete all created resources. Do this before test not to leave undeleted resources behind if possible + bf.delete('resource', resource_ids) + bf.delete('workunit', workunit_id) + + # 4. Check that the found resources are the ones we created + # NOTE: We might find more resources, if somebody created resources at the same time as us + # Hence, we are testing for a subset, not an exact match + resource_ids_found = [r['id'] for r in results] + isSubset = set(resource_ids).issubset(set(resource_ids_found)) + # if not isSubset: + # print(min(resource_ids), max(resource_ids), set(resource_ids) - set(resource_ids_found), set(resource_ids_found) - set(resource_ids)) + + self.assertTrue(isSubset) + + + def test_created(self): + self._test_created_before_after("suds") + self._test_created_before_after("zeep") + + +if __name__ == "__main__": + unittest.main(verbosity=2) From cd6b179c5be3227ce0c86f8fbf056fd10809e5af Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 16 May 2024 13:52:58 +0200 Subject: [PATCH 095/129] update project files --- .github/actions/setup-bfabricpy/action.yml | 1 + .github/workflows/run_unit_tests.yml | 10 ++++++++ Makefile | 2 +- README.md | 27 +++++++++++++++------- pyproject.toml | 6 ++--- 5 files changed, 34 insertions(+), 12 deletions(-) diff --git a/.github/actions/setup-bfabricpy/action.yml b/.github/actions/setup-bfabricpy/action.yml index 0543a337..42de3bea 100644 --- a/.github/actions/setup-bfabricpy/action.yml +++ b/.github/actions/setup-bfabricpy/action.yml @@ -1,4 +1,5 @@ name: "Setup bfabricPy" +description: "Set up bfabricPy for use in GitHub Actions" inputs: python-version: description: "Python version to use" diff --git a/.github/workflows/run_unit_tests.yml b/.github/workflows/run_unit_tests.yml index aebbd7ba..a18af677 100644 --- a/.github/workflows/run_unit_tests.yml +++ b/.github/workflows/run_unit_tests.yml @@ -29,6 +29,16 @@ jobs: - name: Check code with ruff run: ruff bfabric || true + list_todos: + name: List TODOs + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: sudo apt-get install -y ripgrep + name: Install ripgrep + - run: + rg -n TODO bfabric + name: List TODOs license_check: name: License Check runs-on: ubuntu-latest diff --git a/Makefile b/Makefile index fa96778b..c6d3000e 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ install: pip install -e . install_dev: - pip install -e ".[dev]" + pip install -e ".[dev]" clean: rm -vf dist/* diff --git a/README.md b/README.md index 0db87501..c0d990c9 100644 --- a/README.md +++ b/README.md @@ -15,24 +15,35 @@ For more advanced users the *bfabricPy* package also provides a powerful query i ![bfabricPy-read](https://user-images.githubusercontent.com/4901987/65025926-db77c900-d937-11e9-8c92-f2412d6793ee.gif) [see also #14](https://github.com/fgcz/bfabricPy/issues/14) -## Requirements -- install current stable Debian Linux release (any current BSD like or Microsoft OS will do) +## Install +There are many ways to install Python packages. +Generally it's recommended to use some type of virtual environment manager, like [conda](https://docs.conda.io/en/latest/), [uv](https://github.com/astral-sh/uv), or Python's [venv](https://docs.python.org/3/library/venv.html). Then the following commands work. +If you don't, you might need to specify `--user` to the pip commands, so they get installed into the user's Python package directory. + +To use bfabricPy a normal installation is good enough: +```{bash} +pip install git+https://github.com/fgcz/bfabricPy.git +``` -- install the python3 package as follows: +As a user: (i.e. a regular install, files will be used from your current directory instead of properly installing a copy of it) ```{bash} -git clone git@github.com:fgcz/bfabricPy.git \ - && cd bfabricPy +# variant 1) clone to a folder +git clone https://github.com/fgcz/bfabricPy.git && cd bfabricPy +pip install . + +# variant 2) direct install from GitHub +pip install git+https://github.com/fgcz/bfabricPy.git ``` -## Install +As a bfabricPy developer: (i.e. an editable install) ```{bash} -python3 setup.py install --user +pip install -e ".[dev]" ``` -## Configuration +## Configuration [outdated] ```{bash} cat ~/.bfabricpy.yml diff --git a/pyproject.toml b/pyproject.toml index f8447c1e..4277722e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ Repository = "https://github.com/fgcz/bfabricPy" [project.scripts] "bfabric_flask.py"="bfabric.scripts.bfabric_flask:main" #bfabric_feeder_resource_autoQC="bfabric.scripts.bfabric_feeder_resource_autoQC:main" -#bfabric_list_not_existing_storage_directories="bfabric.scripts.bfabric_list_not_existing_storage_directories:main" +"bfabric_list_not_existing_storage_directories.py"="bfabric.scripts.bfabric_list_not_existing_storage_directories:main" "bfabric_list_not_available_proteomics_workunits.py"="bfabric.scripts.bfabric_list_not_available_proteomics_workunits:main" "bfabric_upload_resource.py"="bfabric.scripts.bfabric_upload_resource:main" "bfabric_logthis.py"="bfabric.scripts.bfabric_logthis:main" @@ -53,7 +53,7 @@ Repository = "https://github.com/fgcz/bfabricPy" "bfabric_setWorkunitStatus_available.py"="bfabric.scripts.bfabric_setWorkunitStatus_generic:main_available" "bfabric_setWorkunitStatus_processing.py"="bfabric.scripts.bfabric_setWorkunitStatus_processing:main_processing" "bfabric_setWorkunitStatus_failed.py"="bfabric.scripts.bfabric_setWorkunitStatus_failed:main_failed" -#bfabric_delete="bfabric.scripts.bfabric_delete:main" +"bfabric_delete.py"="bfabric.scripts.bfabric_delete:main" "bfabric_read.py"="bfabric.scripts.bfabric_read:main" "bfabric_read_samples_of_workunit.py"="bfabric.scripts.bfabric_read_samples_of_workunit:main" "bfabric_read_samples_from_dataset.py"="bfabric.scripts.bfabric_read_samples_from_dataset:main" @@ -76,7 +76,7 @@ indent-width = 4 target-version = "py39" [tool.ruff.lint] -select = ["D103", "E", "F", "ANN", "PTH", "UP", "BLE", "SIM"] +select = ["ANN", "BLE", "D103", "E", "F", "PLW", "PTH", "SIM", "UP"] ignore = ["ANN101"] [tool.licensecheck] From 8e6a6e101d3013297605d8c8b1f5edf591eb4d46 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 16 May 2024 13:53:45 +0200 Subject: [PATCH 096/129] add polars conversion convenience method --- bfabric/results/result_container.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/bfabric/results/result_container.py b/bfabric/results/result_container.py index 88f9d5cd..8ae2c1f9 100644 --- a/bfabric/results/result_container.py +++ b/bfabric/results/result_container.py @@ -1,10 +1,13 @@ from __future__ import annotations import logging -from typing import Any +from typing import Any, TYPE_CHECKING import bfabric.results.response_format_dict as formatter +if TYPE_CHECKING: + import polars + class ResultContainer: """Container structure for query results.""" @@ -87,6 +90,13 @@ def to_list_dict(self, drop_empty: bool = False) -> list[dict[str, Any]]: else: return self.results + def to_polars(self, drop_empty: bool = False) -> polars.DataFrame: + """Returns the results as a polars DataFrame. + :param drop_empty: If True, empty attributes will be removed from the results + """ + import polars + return polars.DataFrame(self.to_list_dict(drop_empty=drop_empty)) + def _clean_result(result: dict, drop_underscores_suds: bool = True, sort_responses: bool = False) -> dict: """ From 35fbfcf90ff68f7c4cbdc04aee641eda206bcd66 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 16 May 2024 13:54:45 +0200 Subject: [PATCH 097/129] update some more proteomics scripts --- bfabric/scripts/bfabric_delete.py | 43 +-- bfabric/scripts/bfabric_feeder_mascot.py | 324 +++++++++--------- .../scripts/bfabric_feeder_resource_autoQC.py | 105 +++--- ...c_list_not_existing_storage_directories.py | 28 +- .../bfabric_save_importresource_sample.py | 40 ++- bfabric/scripts/bfabric_save_workunit.py | 32 -- .../scripts/fgcz_maxquant_scaffold-wrapper.py | 92 ++--- bfabric/scripts/fgcz_maxquant_wrapper.py | 140 ++++---- 8 files changed, 369 insertions(+), 435 deletions(-) delete mode 100755 bfabric/scripts/bfabric_save_workunit.py diff --git a/bfabric/scripts/bfabric_delete.py b/bfabric/scripts/bfabric_delete.py index 9d668a80..a1ee7681 100755 --- a/bfabric/scripts/bfabric_delete.py +++ b/bfabric/scripts/bfabric_delete.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ Copyright (C) 2014 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. @@ -9,34 +7,29 @@ Christian Panse Licensed under GPL version 3 - -$HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/scripts/bfabric_delete.py $ -$Id: bfabric_delete.py 2525 2016-10-17 09:52:59Z cpanse $ - - - -http://fgcz-bfabric.uzh.ch/bfabric/executable?wsdl - """ +import argparse +import json -import sys -import bfabric.bfabric_legacy +import bfabric +from bfabric import Bfabric -if __name__ == "__main__": - bfapp = bfabric.bfabric_legacy.BfabricLegacy() - query_obj = {} +def bfabric_delete(client: Bfabric, endpoint: str, id: int) -> None: + """Deletes the object with id `id` from the `endpoint`.""" + res = client.delete(endpoint=endpoint, id=id).to_list_dict() + print(json.dumps(res, indent=2)) - print(len(sys.argv)) - endpoint = sys.argv[1] +def main() -> None: + """Parses arguments and calls `bfabric_delete`.""" + client = Bfabric.from_config(verbose=True) + parser = argparse.ArgumentParser() + parser.add_argument("endpoint", help="endpoint", choices=bfabric.endpoints) + parser.add_argument("id", help="id", type=int) + args = parser.parse_args() + bfabric_delete(client=client, endpoint=args.endpoint, id=args.id) - if len(sys.argv) == 3: - id = sys.argv[2] - if endpoint in bfabric.endpoints: - res = bfapp.delete_object(endpoint=endpoint, id=id) - for i in res: - print(i) - else: - raise "1st argument must be a valid endpoint." +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/bfabric/scripts/bfabric_feeder_mascot.py b/bfabric/scripts/bfabric_feeder_mascot.py index b599b997..e0eb201e 100755 --- a/bfabric/scripts/bfabric_feeder_mascot.py +++ b/bfabric/scripts/bfabric_feeder_mascot.py @@ -1,12 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ -# $HeadURL: https://fgcz-svn.uzh.ch/repos/fgcz/computer/fgcz-s-018/bfabric-feeder/fgcz_dataFeederMascot.py $ -# $Id: fgcz_dataFeederMascot.py 9097 2021-02-05 15:38:38Z cpanse $ -# $Date: 2021-02-05 16:38:38 +0100 (Fri, 05 Feb 2021) $ - - # Author 2012-10-08 Christian Panse 2012-10-10 Christian Panse @@ -24,114 +17,109 @@ 3 */2 * * 1-6 nice -19 /usr/local/fgcz-s-018/bfabric-feeder/run_fgcz_dataFeederMascot.bash 7 2>&1 >/dev/null */7 5-22 * * 1-5 nice -19 /usr/local/fgcz-s-018/bfabric-feeder/run_fgcz_dataFeederMascot.bash 1 2>&1 >/dev/null """ +from __future__ import annotations +import argparse +import hashlib +import itertools +import json import os import re import sys import urllib -import hashlib -import getopt -from suds.client import Client +from collections import Counter from datetime import datetime -import json -import itertools -import http.client -http.client.HTTPConnection._http_vsn = 10 -http.client.HTTPConnection._http_vsn_str = 'HTTP/1.0' +from pathlib import Path +from typing import Any + +from suds.client import Client -workuniturl = 'http://fgcz-bfabric.uzh.ch/bfabric/workunit?wsdl' +workuniturl = "http://fgcz-bfabric.uzh.ch/bfabric/workunit?wsdl" clientWorkUnit = Client(workuniturl) -BFLOGIN = 'pfeeder' -BFPASSWORD = '!ForYourEyesOnly!' +BFLOGIN = "pfeeder" +BFPASSWORD = "!ForYourEyesOnly!" -DB = dict() -DBfilename = "{}/mascot.json".format(os.getenv("HOME")) -DBwritten = False +DB = {} +DBfilename = Path.home() / "mascot.json" try: - DB = json.load(open(DBfilename)) - print("Read {len} data items from {name} using {size:.1f} GBytes.".format(len=len(DB), - name=DBfilename, - size=sum(map(lambda x: int(x['resource']['size']), DB.values())) / (1024 * 1024 * 1024))) -except: - print("loading '{}' failed".format(DBfilename)) + with DBfilename.open() as file: + DB = json.load(file) + print( + "Read {len} data items from {name} using {size:.1f} GBytes.".format( + len=len(DB), + name=DBfilename, + size=sum(map(lambda x: int(x["resource"]["size"]), DB.values())) / (1024 * 1024 * 1024), + ) + ) +except OSError: + print(f"loading '{DBfilename}' failed") pass -def signal_handler(signal, frame): - print(("sys exit 1; signal=" + str(signal) + "; frame=" + str(frame))) - sys.exit(1) - - -# TODO(cp): read .bfabricrc.py -def read_bfabricrc(): - with open(os.environ['HOME'] + "/.bfabricrc") as myfile: - for line in myfile: - return (line.strip()) - - -def query_mascot_result(f): - global DBwritten - regex2 = re.compile(".*.+/(data/.+\.dat)$") - regex2Result = regex2.match(f) - if True: - print("{} input>".format(datetime.now())) - print("\t{}".format(f)) - if f in DB: - print("\thit") - wu = DB[f] - if 'workunitid' in wu: - print("\tdat file {} already registered as workunit id {}. continue ...".format(f, wu['workunitid'])) - return - else: - print('\tno workunitid found') +def query_mascot_result(file_path: str) -> bool: + db_written = False + print(f"{datetime.now()} input>") + print(f"\t{file_path}") + if file_path in DB: + print("\thit") + wu = DB[file_path] + if "workunitid" in wu: + print( + "\tdat file {} already registered as workunit id {}. continue ...".format(file_path, wu["workunitid"]) + ) + return else: - print("\tparsing mascot result file '{}'...".format(f)) - wu = parse_mascot_result_file(f) - print("\tupdating cache '{}' file ...".format(DBfilename)) - DBwritten = True - DB[f] = wu - - if len(wu['inputresource']) > 0: - if re.search("autoQC4L", wu['name']) or re.search("autoQC01", wu['name']): - print("WARNING This script ignores autoQC based mascot dat file {}.".format(f)) - return - - print("\tquerying bfabric ...") - - # jsut in case - if 'errorreport' in wu: - del (wu['errorreport']) - - try: - resultClientWorkUnit = clientWorkUnit.service.checkandinsert( - dict(login=BFLOGIN, password=BFPASSWORD, workunit=wu)) - except ValueError: - print("Exception {}".format(ValueError)) - raise - - try: - rv = resultClientWorkUnit.workunit[0] - except ValueError: - print("Exception {}".format(ValueError)) - raise - - print("{} output>".format(datetime.now())) - if 'errorreport' in rv: - print("\tfound errorreport '{}'.".format(rv['errorreport'])) - - if '_id' in rv: - wu['workunitid'] = rv['_id'] - print("\tfound workunitid'{}'.".format(wu['workunitid'])) - DB[f] = wu - DBwritten = True - - if not '_id' in rv and not 'errorreport' in rv: - print("something went wrong.") - raise - # print(resultClientWorkUnit) - # print("exception for file {} with error {}".format(f, e)) - return + print("\tno workunitid found") + else: + print(f"\tparsing mascot result file '{file_path}'...") + wu = parse_mascot_result_file(file_path) + print(f"\tupdating cache '{DBfilename}' file ...") + db_written = True + DB[file_path] = wu + + if len(wu["inputresource"]) > 0: + if re.search("autoQC4L", wu["name"]) or re.search("autoQC01", wu["name"]): + print(f"WARNING This script ignores autoQC based mascot dat file {file_path}.") + return + + print("\tquerying bfabric ...") + + # just in case + if "errorreport" in wu: + del wu["errorreport"] + + try: + resultClientWorkUnit = clientWorkUnit.service.checkandinsert( + dict(login=BFLOGIN, password=BFPASSWORD, workunit=wu) + ) + except ValueError: + print(f"Exception {ValueError}") + raise + + try: + rv = resultClientWorkUnit.workunit[0] + except ValueError: + print(f"Exception {ValueError}") + raise + + print(f"{datetime.now()} output>") + if "errorreport" in rv: + print("\tfound errorreport '{}'.".format(rv["errorreport"])) + + if "_id" in rv: + wu["workunitid"] = rv["_id"] + print("\tfound workunitid'{}'.".format(wu["workunitid"])) + DB[file_path] = wu + db_written = True + + if "_id" not in rv and "errorreport" not in rv: + print("something went wrong.") + raise + # print(resultClientWorkUnit) + # print("exception for file {} with error {}".format(f, e)) + + return db_written """ @@ -185,35 +173,33 @@ def query_mascot_result(f): """ -def parse_mascot_result_file(f): - +def parse_mascot_result_file(file_path: str) -> dict[str, Any]: # Getting the current date and time - print("{} DEBUG parse_mascot_result_file".format(datetime.now())) + print(f"{datetime.now()} DEBUG parse_mascot_result_file") regex0 = re.compile("^title=.*(p([0-9]+).+Proteomics.*(raw|RAW|wiff)).*") - regex3 = re.compile("^(FILE|COM|release|USERNAME|USERID|TOL|TOLU|ITOL|ITOLU|MODS|IT_MODS|CHARGE|INSTRUMENT|QUANTITATION|DECOY)=(.+)$") - - # control_chars = ''.join(map(chr, [range(0x00, 0x20) , range(0x7f, 0xa0)])) - control_chars = ''.join(map(chr, itertools.chain(range(0x00, 0x20), range(0x7f, 0xa0)))) + regex3 = re.compile( + "^(FILE|COM|release|USERNAME|USERID|TOL|TOLU|ITOL|ITOLU|MODS|IT_MODS|CHARGE|INSTRUMENT|QUANTITATION|DECOY)=(.+)$" + ) - control_char_re = re.compile('[%s]' % re.escape(control_chars)) + control_chars = "".join(map(chr, itertools.chain(range(0x00, 0x20), range(0x7F, 0xA0)))) + control_char_re = re.compile(f"[{re.escape(control_chars)}]") line_count = 0 - meta_data_dict = dict(COM='', FILE='', release='', relativepath=f.replace('/usr/local/mascot/', '')) + meta_data_dict = dict(COM="", FILE="", release="", relativepath=file_path.replace("/usr/local/mascot/", "")) inputresourceHitHash = dict() inputresourceList = list() md5 = hashlib.md5() project = -1 desc = "" - with open(f) as dat: + with Path(file_path).open() as dat: for line in dat: line_count = line_count + 1 md5.update(line.encode()) # check if the first character of the line is a 't' for title to save regex time - if line[0] == 't': - # result = regex0.match(urllib.url2pathname(line.strip()).replace('\\', "/").replace("//", "/")) - result = regex0.match(urllib.parse.unquote(line.strip()).replace('\\', "/").replace("//", "/")) - if result and not result.group(1) in inputresourceHitHash: + if line[0] == "t": + result = regex0.match(urllib.parse.unquote(line.strip()).replace("\\", "/").replace("//", "/")) + if result and result.group(1) not in inputresourceHitHash: inputresourceHitHash[result.group(1)] = result.group(2) inputresourceList.append(dict(storageid=2, relativepath=result.group(1))) project = result.group(2) @@ -228,78 +214,78 @@ def parse_mascot_result_file(f): desc = desc + result.group(1) + "=" + result.group(2) + "; " meta_data_dict[result.group(1)] = result.group(2) - desc = desc.encode('ascii', errors='ignore') - - name = "{}; {}".format(meta_data_dict['COM'], os.path.basename(meta_data_dict['relativepath']))[:255] - + desc = desc.encode("ascii", errors="ignore") + name = f"{meta_data_dict['COM']}; {os.path.basename(meta_data_dict['relativepath'])}"[:255] rv = dict( applicationid=19, containerid=project, - name=control_char_re.sub('', name), - description=control_char_re.sub('', desc.decode()), + name=control_char_re.sub("", name), + description=control_char_re.sub("", desc.decode()), inputresource=inputresourceList, resource=dict( - name=meta_data_dict['relativepath'], + name=meta_data_dict["relativepath"], storageid=4, - status='available', - relativepath=meta_data_dict['relativepath'], - size=os.path.getsize(f), - filechecksum=md5.hexdigest() - ) + status="available", + relativepath=meta_data_dict["relativepath"], + size=os.path.getsize(file_path), + filechecksum=md5.hexdigest(), + ), ) - #TODO + # TODO - print("{}".format(datetime.now())) + print(f"{datetime.now()}") print(rv) print("DEBUG END") - return (rv) - + return rv -def printFrequency(S): - count = dict() - for x in S: - if x in count: - count[x] = count[x] + 1 - else: - count[x] = 1 - for key in sorted(count.keys(), key=lambda key: int(key)): - print("p{}\t{}".format(key, count[key])) +def print_project_frequency(project_numbers: list[int | str]) -> None: + """Prints the frequency of the project numbers in the list, assuming they are either integers or strings of + individual integers.""" + count = Counter(project_numbers) + for key in sorted(count.keys(), key=int): + print(f"p{key}\t{count[key]}") -def statistics(): +def print_statistics() -> None: + """Prints statistics about the provided database.""" print("Statistics ...") - print("len(DB)\t=\t{}".format(len(DB))) - printFrequency(map(lambda x: x['containerid'], DB.values())) - print("file size\t=\t{} GBytes".format(sum(map(lambda x: int(x['resource']['size']), DB.values())) / (1024 * 1024 * 1024))) + print(f"len(DB)\t=\t{len(DB)}") + print_project_frequency(map(lambda x: x["containerid"], DB.values())) + print( + "file size\t=\t{} GBytes".format( + sum(map(lambda x: int(x["resource"]["size"]), DB.values())) / (1024 * 1024 * 1024) + ) + ) - # printFrequency(map(lambda x: x['description'].split(";"), DB.values())) - # print(json.dumps(list(DB.values())[100], indent=4)) +def main() -> None: + """Parses the CLI arguments and calls the appropriate functions.""" + parser = argparse.ArgumentParser() + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--stdin", action="store_true", help="read file names from stdin") + group.add_argument("--file", type=str, help="processes the provided file") + parser.add_argument("--statistics", action="store_true", help="print statistics") -if __name__ == "__main__": - BFPASSWORD = read_bfabricrc() - try: - opts, args = getopt.getopt(sys.argv[1:], "f:s", ["file=", "stdin", "statistics"]) - except getopt.GetoptError as err: - print(str(err)) - sys.exit(2) - - for o, value in opts: - if o == "--stdin": - print("reading file names from stdin ...") - for f in sys.stdin.readlines(): - query_mascot_result(f.strip()) - elif o == "--file" or o == '-f': - print("processesing", value, "...") - query_mascot_result(value) - elif o == "--statistics" or o == '-s': - statistics() - sys.exit(0) - -if DBwritten: - print("dumping json file '{}' ...".format(DBfilename)) - json.dump(DB, open(DBfilename, 'w'), sort_keys=True, indent=4) - sys.exit(0) + args = parser.parse_args() + + db_written = False + if args.stdin: + print("reading file names from stdin ...") + for filename in sys.stdin.readlines(): + db_written = query_mascot_result(filename.strip()) or db_written + elif args.file: + print("processesing", args.file, "...") + db_written = query_mascot_result(args.file) + if args.statistics: + print_statistics() + if db_written: + print(f"dumping json file '{DBfilename}' ...") + with DBfilename.open("w") as file: + json.dump(DB, file, sort_keys=True, indent=4) + + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_feeder_resource_autoQC.py b/bfabric/scripts/bfabric_feeder_resource_autoQC.py index e30d0f72..51c86188 100755 --- a/bfabric/scripts/bfabric_feeder_resource_autoQC.py +++ b/bfabric/scripts/bfabric_feeder_resource_autoQC.py @@ -12,60 +12,49 @@ | ./bfabric/scripts/bfabric_feeder_resource_autoQC.py """ -import sys import os -import yaml import re +import sys import time import unittest -from bfabric.bfabric_legacy import BfabricLegacy + +from bfabric import Bfabric -class autoQC: +class AutoQC: """ feeder for autoQC raw files """ - bfabric_storageid = 2 - configfile = os.path.normpath("{0}/{1}".format(os.path.expanduser("~"), r".bfabricrc.yaml")) - with open(configfile, "r") as file: - config = yaml.load(file, Loader=yaml.FullLoader) - bfabric_application_ids = config["applicationId"] - - bfapp = BfabricLegacy(verbose=False) + def __init__(self): + self.bfabric_storageid = 2 + self.client = Bfabric.from_config(verbose=True) + self.bfabric_application_ids = self.client.config.application_ids @property - def getId(self, obj): + def get_id(self, obj) -> int: print("==============") print(obj) + print(f"DEBGUG obj: {obj[0]._id}") + return int(obj[0]._id) - try: - print("DEBGUG obj: {}".format(obj[0]._id)) - return int(obj[0]._id) - except: - raise - - def __init__(self): - pass - - def sample_check(self, projectid, name): + def sample_check(self, projectid: int, name: str): """ checks wether a S exists or not. if not the S is created. :param projectid: :param name: :return: SID """ - try: - res = self.bfapp.read_object(endpoint="sample", obj={"containerid": projectid, "name": name}) - except: + res = self.client.read(endpoint="sample", obj={"containerid": projectid, "name": name}).to_list_dict() + except Exception: print(res) raise sample_type = "Biological Sample - Proteomics" query_autoQC01 = { - "name": "{}".format(name), + "name": f"{name}", "type": sample_type, "containerid": projectid, "species": "Bos taurus", @@ -75,7 +64,7 @@ def sample_check(self, projectid, name): } query_autoQC4L = { - "name": "{}".format(name), + "name": f"{name}", "type": sample_type, "containerid": projectid, "species": "n/a", @@ -85,7 +74,7 @@ def sample_check(self, projectid, name): } query_lipidQC01 = { - "name": "{}".format(name), + "name": f"{name}", "type": "Biological Sample - Metabolomics", "containerid": projectid, "species": "n/a", @@ -95,19 +84,19 @@ def sample_check(self, projectid, name): "description": "Lipidmix containing 2uM of FFA, BA, LPC. positive mode, C18.", } - if res is None: + if not res: if name == "autoQC4L": - res = self.bfapp.save_object(endpoint="sample", obj=query_autoQC4L) + res = self.client.save(endpoint="sample", obj=query_autoQC4L).to_list_dict() elif name == "autoQC01": - res = self.bfapp.save_object(endpoint="sample", obj=query_autoQC01) + res = self.client.save(endpoint="sample", obj=query_autoQC01).to_list_dict() elif name == "lipidQC01": - res = self.bfapp.save_object(endpoint="sample", obj=query_lipidQC01) + res = self.client.save(endpoint="sample", obj=query_lipidQC01).to_list_dict() print(res) print(res[0]) - return res[0]._id + return res[0]["id"] - def workunit_check(self, projectid, name, applicationid): + def workunit_check(self, projectid: int, name: str, applicationid: int): """ checks wether a WU exists or not. if not the WU is created. @@ -119,10 +108,7 @@ def workunit_check(self, projectid, name, applicationid): """ query = {"projectid": projectid, "name": name, "applicationid": applicationid} - try: - res = self.bfapp.read_object(endpoint="workunit", obj=query) - except: - raise + res = self.client.read(endpoint="workunit", obj=query).to_list_dict() description = """ contains automatic registered quality control (QC) @@ -149,7 +135,7 @@ def workunit_check(self, projectid, name, applicationid): description = "Contains automatic registered quality control (QC) measurements, positive mode." links = ["http://fgcz-ms.uzh.ch/~cpanse/lipidQC01.html"] - if res is None: + if not res: query = { "projectid": projectid, "name": name, @@ -158,14 +144,11 @@ def workunit_check(self, projectid, name, applicationid): "link": links, } - res = self.bfapp.save_object(endpoint="workunit", obj=query) - - else: - pass + res = self.client.save(endpoint="workunit", obj=query).to_list_dict() - return res[0]._id + return res[0]["id"] - def resource_check(self, projectid, name, workunitid, filename, filedate, size, md5, sampleid): + def resource_check(self, projectid: int, name: str, workunitid: int, filename: str, filedate, size, md5, sampleid): """ checks wether a R exists or not. if not the R is created. :param projectid: @@ -187,12 +170,9 @@ def resource_check(self, projectid, name, workunitid, filename, filedate, size, "workunitid": workunitid, "projectid": projectid, } - try: - res = self.bfapp.read_object(endpoint="resource", obj=query) - except: - raise + res = self.client.read(endpoint="resource", obj=query).to_list_dict() - if res is None: + if not res: query = { "workunitid": workunitid, "sampleid": sampleid, @@ -204,12 +184,12 @@ def resource_check(self, projectid, name, workunitid, filename, filedate, size, "storageid": self.bfabric_storageid, } - res = self.bfapp.save_object(endpoint="resource", obj=query) + res = self.client.save(endpoint="resource", obj=query).to_list_dict() query = {"id": workunitid, "status": "available"} - res2 = self.bfapp.save_object(endpoint="workunit", obj=query) + res2 = self.client.save(endpoint="workunit", obj=query).to_list_dict() - return res[0]._id + return res[0]["id"] def feed(self, line): """ @@ -220,7 +200,7 @@ def feed(self, line): try: (_md5, _file_date, _file_size, filename) = line.split(";") - except Exception as err: + except Exception: return try: @@ -232,11 +212,11 @@ def feed(self, line): applicationid = self.bfabric_application_ids[m.group(2)] autoQCType = m.group(4) - except Exception as err: - print("# no match '{}'.".format(filename)) + except Exception: + print(f"# no match '{filename}'.") return - print("{}\t{}\t{}\n".format(projectid, applicationid, autoQCType)) + print(f"{projectid}\t{applicationid}\t{autoQCType}\n") try: sampleid = self.sample_check(projectid, name=autoQCType) @@ -258,12 +238,10 @@ def feed(self, line): # sampleid=0 print( - "p{p}\tA{A}\t{filename}\tS{S}\tWU{WU}\tR{R}".format( - p=projectid, A=applicationid, filename=filename, S=sampleid, WU=workunitid, R=resourceid - ) + f"p{projectid}\tA{applicationid}\t{filename}\tS{sampleid}\tWU{workunitid}\tR{resourceid}" ) except Exception as err: - print("# Failed to register to bfabric: {}".format(err)) + print(f"# Failed to register to bfabric: {err}") class TestCaseAutoQC(unittest.TestCase): @@ -272,7 +250,7 @@ class TestCaseAutoQC(unittest.TestCase): """ - BF = autoQC() + BF = AutoQC() def setUp(self): pass @@ -287,7 +265,6 @@ def test_feed(self): if __name__ == "__main__": - - BF = autoQC() + BF = AutoQC() for input_line in sys.stdin: BF.feed(input_line.rstrip()) diff --git a/bfabric/scripts/bfabric_list_not_existing_storage_directories.py b/bfabric/scripts/bfabric_list_not_existing_storage_directories.py index 1784a8ef..4262148a 100755 --- a/bfabric/scripts/bfabric_list_not_existing_storage_directories.py +++ b/bfabric/scripts/bfabric_list_not_existing_storage_directories.py @@ -1,6 +1,4 @@ #!/usr/bin/python3 -# -*- coding: latin1 -*- - """ Copyright (C) 2020 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. @@ -12,24 +10,28 @@ http://fgcz-bfabric.uzh.ch/bfabric/executable?wsdl """ +from __future__ import annotations import os - -import bfabric.bfabric_legacy - -B = bfabric.bfabric_legacy.BfabricLegacy() +from bfabric import Bfabric ROOTDIR = "/srv/www/htdocs/" -def listNotExistingStorageDirs(technologyid=2): - rv = B.read_object("container", {"technologyid": technologyid}) - containerIDs = list(set(map(lambda x: x._id, rv))) +def list_not_existing_storage_dirs(client: Bfabric, technologyid: int = 2) -> None: + results = client.read(endpoint="container", obj={"technologyid": technologyid}).to_list_dict() + container_ids = sorted({x["id"] for x in results}) - for cid in containerIDs: - if not os.path.isdir("{}/p{}".format(ROOTDIR, cid)): + for cid in container_ids: + if not os.path.isdir(os.path.join(ROOTDIR, f"p{cid}")): print(cid) -listNotExistingStorageDirs(technologyid=2) -listNotExistingStorageDirs(technologyid=4) +def main() -> None: + client = Bfabric.from_config(verbose=True) + list_not_existing_storage_dirs(client=client, technologyid=2) + list_not_existing_storage_dirs(client=client, technologyid=4) + + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_save_importresource_sample.py b/bfabric/scripts/bfabric_save_importresource_sample.py index ca914409..53f25753 100755 --- a/bfabric/scripts/bfabric_save_importresource_sample.py +++ b/bfabric/scripts/bfabric_save_importresource_sample.py @@ -1,4 +1,5 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 +# TODO add integration test (with and without sample id) """General Importresource Feeder for bfabric Author: @@ -20,6 +21,7 @@ import re import sys import time +import json from bfabric import Bfabric @@ -39,7 +41,7 @@ def save_importresource(client: Bfabric, line: str) -> None: Output: True on success otherwise an exception raise """ - mdf5_checksum, file_date, file_size, file_path = line.split(";") + md5_checksum, file_date, file_size, file_path = line.split(";") # Format the timestamp for bfabric file_date = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(int(file_date))) @@ -54,7 +56,7 @@ def save_importresource(client: Bfabric, line: str) -> None: obj = { "applicationid": bfabric_application_id, - "filechecksum": mdf5_checksum, + "filechecksum": md5_checksum, "containerid": bfabric_projectid, "filedate": file_date, "relativepath": file_path, @@ -63,19 +65,29 @@ def save_importresource(client: Bfabric, line: str) -> None: "storageid": BFABRIC_STORAGE_ID, } - try: - m = re.search( - r"p([0-9]+)\/(Proteomics\/[A-Z]+_[1-9])\/.*_\d\d\d_S([0-9][0-9][0-9][0-9][0-9][0-9]+)_.*(raw|zip)$", - file_path, - ) - print(f"found sampleid={m.group(3)} pattern") - obj["sampleid"] = int(m.group(3)) - except Exception: - pass + match = re.search( + r"p([0-9]+)\/(Proteomics\/[A-Z]+_[1-9])\/.*_\d\d\d_S([0-9][0-9][0-9][0-9][0-9][0-9]+)_.*(raw|zip)$", + file_path, + ) + if match: + print(f"found sampleid={match.group(3)} pattern") + obj["sampleid"] = int(match.group(3)) print(obj) - res = client.save(endpoint="importresource", obj=obj).to_list_dict() - print(res[0]) + res = client.save(endpoint="importresource", obj=obj) + print(json.dumps(res, indent=2)) + + +def get_sample_id_from_path(file_path: str) -> int | None: + match = re.search( + r"p([0-9]+)\/(Proteomics\/[A-Z]+_[1-9])\/.*_\d\d\d_S([0-9][0-9][0-9][0-9][0-9][0-9]+)_.*(raw|zip)$", + file_path, + ) + if match: + print(f"found sampleid={match.group(3)} pattern") + return int(match.group(3)) + else: + return None def get_bfabric_application_and_project_id(bfabric_application_ids: dict[str, int], file_path: str) -> tuple[int, int]: diff --git a/bfabric/scripts/bfabric_save_workunit.py b/bfabric/scripts/bfabric_save_workunit.py deleted file mode 100755 index ce2bc502..00000000 --- a/bfabric/scripts/bfabric_save_workunit.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/python -# -*- coding: latin1 -*- - -""" - -Copyright (C) 2016 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. - -Author: - Christian Panse - -Licensed under GPL version 3 - -$HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/scripts/bfabric_save_workunit.py $ -$Id: bfabric_save_workunit.py 2956 2017-08-09 07:14:59Z cpanse $ - -""" - -import os -import sys -import bfabric -import datetime - -import bfabric.bfabric_legacy - -if __name__ == "__main__": - bfapp = bfabric.bfabric_legacy.BfabricLegacy() - - workunit = bfapp.save_object( - endpoint="workunit", - obj={"name": "MaxQuant report", "projectid": "1000", "applicationid": 217, "status": "available"}, - ) - print(workunit) diff --git a/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py b/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py index 5ba0cb1c..d003af46 100755 --- a/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py +++ b/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py @@ -19,8 +19,10 @@ import yaml from io import StringIO, BytesIO from optparse import OptionParser + # import unittest + class FgczMaxQuantScaffold: """ input: @@ -38,27 +40,26 @@ def __init__(self, yamlfilename=None, zipfilename=None): print("ERROR: no such file '{0}'".format(zipfilename)) sys.exit(1) - self.zipfilename = zipfilename - with open(yamlfilename, 'r') as f: + with open(yamlfilename, "r") as f: content = f.read() self.config = yaml.load(content, Loader=yaml.FullLoader) try: - self.fasta = os.path.basename(self.config['application']['parameters']['/fastaFiles/FastaFileInfo/fastaFilePath']) + self.fasta = os.path.basename( + self.config["application"]["parameters"]["/fastaFiles/FastaFileInfo/fastaFilePath"] + ) except: raise - L = [value for values in self.config['application']['input'].values() for value in values] - - self.samples = list(map(lambda x: os.path.basename(x).replace('.raw', ''), L)) + L = [value for values in self.config["application"]["input"].values() for value in values] + self.samples = list(map(lambda x: os.path.basename(x).replace(".raw", ""), L)) + def getBiologicalSample(selfs, InputFile=None, category="***BASENAME***"): - def getBiologicalSample(selfs, InputFile = None, category = '***BASENAME***'): - - scaffold_BiologicalSample = ''' + scaffold_BiologicalSample = """ WU192418/output-WU192418.zip - ''' + """ pBioSample = etree.XML(scaffold_BiologicalSample) @@ -78,18 +79,17 @@ def getBiologicalSample(selfs, InputFile = None, category = '***BASENAME***'): if eInputFile is None: raise TypeError - eInputFile.text = '{}'.format(InputFile) - eInputFile.attrib['maxQuantExperiment'] = "{}".format(category) + eInputFile.text = "{}".format(InputFile) + eInputFile.attrib["maxQuantExperiment"] = "{}".format(category) eBiologicalSample = eInputFile.getparent() - eBiologicalSample.attrib['category'] = "{}".format(category) - eBiologicalSample.attrib['name'] = "{}".format(category) - - return(pBioSample) + eBiologicalSample.attrib["category"] = "{}".format(category) + eBiologicalSample.attrib["name"] = "{}".format(category) + return pBioSample def getScaffold(selfs): - xml = ''' + xml = """ -''' +""" pxml = etree.parse(StringIO(xml)) - #pxml = etree.XML(xml) - return(pxml) - - + # pxml = etree.XML(xml) + return pxml def run(self): xml = self.getScaffold() - eExperiment = xml.find('/Experiment') - eFastaDatabase = xml.find('/Experiment/FastaDatabase') - eFastaDatabase.attrib['path'] = "{}/{}".format(os.getcwd(), self.fasta) + eExperiment = xml.find("/Experiment") + eFastaDatabase = xml.find("/Experiment/FastaDatabase") + eFastaDatabase.attrib["path"] = "{}/{}".format(os.getcwd(), self.fasta) for s in self.samples: - eExperiment.extend(self.getBiologicalSample(category=s, InputFile = self.zipfilename)) + eExperiment.extend(self.getBiologicalSample(category=s, InputFile=self.zipfilename)) + xml.write("/dev/stdout", pretty_print=True, xml_declaration=True, method="xml", encoding="UTF-8") - xml.write('/dev/stdout' , pretty_print=True, xml_declaration=True, method='xml', encoding="UTF-8") if __name__ == "__main__": - parser = OptionParser(usage="usage: %prog -y -z ", - version="%prog 1.0") - - parser.add_option("-y", "--yaml", - type='string', - action="store", - dest="yaml_filename", - default="/Users/cp/WU199270.yaml ", - help="config file.yaml") - - parser.add_option("-z", "--zip", - type='string', - action="store", - dest="zip_filename", - default="output-WU199270.zip", - help="config file.yaml") + parser = OptionParser( + usage="usage: %prog -y -z ", + version="%prog 1.0", + ) + + parser.add_option( + "-y", + "--yaml", + type="string", + action="store", + dest="yaml_filename", + default="/Users/cp/WU199270.yaml ", + help="config file.yaml", + ) + + parser.add_option( + "-z", + "--zip", + type="string", + action="store", + dest="zip_filename", + default="output-WU199270.zip", + help="config file.yaml", + ) (options, args) = parser.parse_args() driver = FgczMaxQuantScaffold(yamlfilename=options.yaml_filename, zipfilename=options.zip_filename) diff --git a/bfabric/scripts/fgcz_maxquant_wrapper.py b/bfabric/scripts/fgcz_maxquant_wrapper.py index 3d7bcb81..6c147ed7 100755 --- a/bfabric/scripts/fgcz_maxquant_wrapper.py +++ b/bfabric/scripts/fgcz_maxquant_wrapper.py @@ -1,6 +1,4 @@ #!/usr/bin/python3 -# -*- coding: latin1 -*- - # Copyright (C) 2017, 2018 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. # # Authors: @@ -8,23 +6,16 @@ # # Licensed under GPL version 3 # -# $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/scripts/fgcz_pd_wrapper.py $ -# $Id: fgcz_pd_wrapper.py 2992 2017-08-17 13:37:36Z cpanse $ -import logging -import logging.handlers import os -import pprint -import re import sys -import time -import urllib +from io import StringIO from optparse import OptionParser -from lxml import etree -import yaml from pathlib import Path -import hashlib -from io import StringIO, BytesIO + +import yaml +from lxml import etree + # import warnings """ @@ -33,69 +24,67 @@ """ -import unittest class FgczMaxQuantConfig: """ - input: - QEXACTIVE_2: - - bfabric@fgczdata.fgcz-net.unizh.ch://srv/www/htdocs//p1946/Proteomics/QEXACTIVE_2/paolo_20150811_course/20150811_01_Fetuin40fmol.raw - - bfabric@fgczdata.fgcz-net.unizh.ch://srv/www/htdocs//p1946/Proteomics/QEXACTIVE_2/paolo_20150811_course/20150811_02_YPG1.raw - output: + input: + QEXACTIVE_2: + - bfabric@fgczdata.fgcz-net.unizh.ch://srv/www/htdocs//p1946/Proteomics/QEXACTIVE_2/paolo_20150811_course/20150811_01_Fetuin40fmol.raw + - bfabric@fgczdata.fgcz-net.unizh.ch://srv/www/htdocs//p1946/Proteomics/QEXACTIVE_2/paolo_20150811_course/20150811_02_YPG1.raw + output: """ config = None scratchdir = None - def __init__(self, config=None, scratch = "/scratch/MAXQUANT/"): + def __init__(self, config=None, scratch="/scratch/MAXQUANT/"): if config: self.config = config - self.scratchdir = Path("{0}/WU{1}".format(scratch, self.config['job_configuration']['workunit_id'])) + self.scratchdir = Path("{0}/WU{1}".format(scratch, self.config["job_configuration"]["workunit_id"])) if not os.path.isdir(self.scratchdir): - print ("no scratch dir '{0}'.".format(self.scratchdir)) + print("no scratch dir '{0}'.".format(self.scratchdir)) # raise SystemError def generate_mqpar(self, xml_filename, xml_template): - """ PARAMETER """ - for query, value in self.config['application']['parameters'].items(): + """PARAMETER""" + for query, value in self.config["application"]["parameters"].items(): element = xml_template.find(query) if element is not None: if value == "None": - element.text = '' + element.text = "" elif query == "/parameterGroups/parameterGroup/variableModifications": for a in value.split(","): - estring = etree.Element("string") - estring.text = a - element.extend(estring) + estring = etree.Element("string") + estring.text = a + element.extend(estring) pass else: - print ("replacing xpath expression {} by {}.".format(query, value)) + print("replacing xpath expression {} by {}.".format(query, value)) element.text = value - ecount = 0; + ecount = 0 """ INPUT """ - for query, value in self.config['application']['input'].items(): - for input in self.config['application']['input'][query]: + for query, value in self.config["application"]["input"].items(): + for input in self.config["application"]["input"][query]: element = xml_template.find("/filePaths") if element is None: raise TypeError - host, file = input.split(":") - print ("{}\t{}".format(os.path.basename(input), file)) + print("{}\t{}".format(os.path.basename(input), file)) if not os.path.isfile(file): print("'{}' do not exists.".format(file)) - #raise SystemError + # raise SystemError targetRawFile = "{}/{}".format(self.scratchdir, os.path.basename(input)) if not os.path.islink(targetRawFile): try: - os.symlink(file, targetRawFile) + os.symlink(file, targetRawFile) except: print("linking '{}' failed.".format(file)) @@ -136,15 +125,14 @@ def generate_mqpar(self, xml_filename, xml_template): estring.text = "0" element.extend(estring) - #return(xml_template) - xml_template.write(xml_filename)#, pretty_print=True) - + # return(xml_template) + xml_template.write(xml_filename) # , pretty_print=True) def run(self): - pass + pass -mqpar_templ_xml =''' +mqpar_templ_xml = """ test.fasta @@ -484,42 +472,43 @@ def run(self): -''' +""" if __name__ == "__main__": - parser = OptionParser(usage="usage: %prog -y ", - version="%prog 1.0") - - parser.add_option("-y", "--yaml", - type='string', - action="store", - dest="yaml_filename", - default=None, - help="config file.yaml") - - parser.add_option("-x", "--xml", - type='string', - action="store", - dest="xml_filename", - default=None, - help="MaxQuant mqpar xml parameter filename.") - - parser.add_option("-t", "--xmltemplate", - type='string', - action="store", - dest="xml_template_filename", - default=None, - help="MaxQuant mqpar template xml parameter filename.") + parser = OptionParser(usage="usage: %prog -y ", version="%prog 1.0") + + parser.add_option( + "-y", "--yaml", type="string", action="store", dest="yaml_filename", default=None, help="config file.yaml" + ) + + parser.add_option( + "-x", + "--xml", + type="string", + action="store", + dest="xml_filename", + default=None, + help="MaxQuant mqpar xml parameter filename.", + ) + + parser.add_option( + "-t", + "--xmltemplate", + type="string", + action="store", + dest="xml_template_filename", + default=None, + help="MaxQuant mqpar template xml parameter filename.", + ) (options, args) = parser.parse_args() if not os.path.isfile(options.yaml_filename): - print ("ERROR: no such file '{0}'".format(options.yaml_filename)) + print("ERROR: no such file '{0}'".format(options.yaml_filename)) sys.exit(1) try: - with open(options.yaml_filename, 'r') as f: - content = f.read() - job_config = yaml.load(content) + with open(options.yaml_filename, "r") as f: + job_config = yaml.safe_load(f) if options.xml_template_filename is None: try: @@ -527,23 +516,24 @@ def run(self): except: raise else: - with open(options.xml_template_filename, 'r') as f: + with open(options.xml_template_filename, "r") as f: mqpartree = etree.parse(f) - MQC = FgczMaxQuantConfig(config = job_config, scratch="d:/scratch/") + MQC = FgczMaxQuantConfig(config=job_config, scratch="d:/scratch/") output = MQC.generate_mqpar(options.xml_filename, xml_template=mqpartree) except: - print ("ERROR: exit 1") + print("ERROR: exit 1") raise - import unittest """ python3 -m unittest fgcz_maxquant_wrapper.py """ + + class TestFgczMaxQuantConfig(unittest.TestCase): def test_xml(self): input_WU181492_yaml = """ @@ -631,7 +621,7 @@ def test_xml(self): """ - job_config = yaml.load(input_WU181492_yaml) + job_config = yaml.safe_load(input_WU181492_yaml) mqpartree = etree.parse(StringIO(mqpar_templ_xml)) MQC = FgczMaxQuantConfig(config=job_config) From a020bd74bf5373ffe0cd1e7fdadbbe5b075c6159 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 16 May 2024 14:06:50 +0200 Subject: [PATCH 098/129] compatibility change --- bfabric/tests/integration/test_bfabric2_save_delete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bfabric/tests/integration/test_bfabric2_save_delete.py b/bfabric/tests/integration/test_bfabric2_save_delete.py index f93a1fd3..2596857e 100644 --- a/bfabric/tests/integration/test_bfabric2_save_delete.py +++ b/bfabric/tests/integration/test_bfabric2_save_delete.py @@ -35,7 +35,7 @@ def _find_delete_existing_objects_by_name(b: Bfabric, endpoint: str, name_list: ids_to_delete += ids_this # Delete - delete_response_dict = b.delete(endpoint, ids_to_delete).to_list_dict() + delete_response_dict = b.delete(endpoint, ids_to_delete).to_list_dict(drop_empty=True) print("Deletion results:", delete_response_dict) return objs_exist_names, delete_response_dict From db32df1c84e41a578971631395f8c6a91fc541d1 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 16 May 2024 14:13:26 +0200 Subject: [PATCH 099/129] add docstrings, typehints, __init__ --- bfabric/results/response_format_dict.py | 20 +++++++++---------- bfabric/tests/__init__.py | 0 bfabric/tests/integration/__init__.py | 0 .../integration/integration_test_helper.py | 10 +++++++--- 4 files changed, 17 insertions(+), 13 deletions(-) create mode 100644 bfabric/tests/__init__.py create mode 100644 bfabric/tests/integration/__init__.py diff --git a/bfabric/results/response_format_dict.py b/bfabric/results/response_format_dict.py index 772c69d7..47ce298a 100644 --- a/bfabric/results/response_format_dict.py +++ b/bfabric/results/response_format_dict.py @@ -1,17 +1,17 @@ +from __future__ import annotations from copy import deepcopy -from typing import Optional, Union def sort_dict(d: dict) -> dict: - """ + """Returns a copy of the dictionary with items sorted by key. + Affects how the dictionary appears, when mapped to a string. :param d: A dictionary :return: A dictionary with items sorted by key. - Affects how the dictionary appears, when mapped to a string """ return dict(sorted(d.items())) -def _recursive_drop_empty(response_elem: Union[list, dict]) -> None: +def _recursive_drop_empty(response_elem: list | dict) -> None: """ Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary value is encountered, that is either an empty list or None, the key-value pair gets deleted from the dictionary @@ -32,7 +32,7 @@ def _recursive_drop_empty(response_elem: Union[list, dict]) -> None: del response_elem[k] -def drop_empty_elements(response: Union[list, dict], inplace: bool = True) -> Optional[Union[list, dict]]: +def drop_empty_elements(response: list | dict, inplace: bool = True) -> list | dict | None: """ Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary value is encountered, that is either an empty list or None, the key-value pair gets deleted from the dictionary @@ -46,7 +46,7 @@ def drop_empty_elements(response: Union[list, dict], inplace: bool = True) -> Op return response_filtered -def _recursive_map_keys(response_elem, keymap: dict) -> None: +def _recursive_map_keys(response_elem: list | dict, keymap: dict) -> None: """ Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary key is found for which the mapping is requested, that the key is renamed to the corresponding mapped one @@ -61,7 +61,7 @@ def _recursive_map_keys(response_elem, keymap: dict) -> None: keys_to_delete = [] # NOTE: Avoid deleting keys inside iterator, may break iterator for k, v in response_elem.items(): _recursive_map_keys(v, keymap) - if k in keymap.keys(): + if k in keymap: keys_to_delete += [k] for k in keys_to_delete: @@ -69,7 +69,7 @@ def _recursive_map_keys(response_elem, keymap: dict) -> None: del response_elem[k] # Delete old key -def map_element_keys(response: Union[list, dict], keymap: dict, inplace: bool = True) -> Union[list, dict]: +def map_element_keys(response: list | dict, keymap: dict, inplace: bool = True) -> list | dict: """ Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary key is found for which the mapping is requested, that the key is renamed to the corresponding mapped one @@ -84,7 +84,7 @@ def map_element_keys(response: Union[list, dict], keymap: dict, inplace: bool = return response_filtered -def _recursive_sort_dicts_by_key(response_elem) -> None: +def _recursive_sort_dicts_by_key(response_elem: list | dict) -> None: """ Iterates over all nested lists, dictionaries and basic values. Whenever a nested dictionary is found, it is sorted by key by converting into OrderedDict and back @@ -103,7 +103,7 @@ def _recursive_sort_dicts_by_key(response_elem) -> None: _recursive_sort_dicts_by_key(v) -def sort_dicts_by_key(response: Union[list, dict], inplace: bool = True) -> Optional[Union[list, dict]]: +def sort_dicts_by_key(response: list | dict, inplace: bool = True) -> list | dict | None: """ Iterates over all nested lists, dictionaries and basic values. Whenever a nested dictionary is found, it is sorted by key by converting into OrderedDict and back diff --git a/bfabric/tests/__init__.py b/bfabric/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/tests/integration/__init__.py b/bfabric/tests/integration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/tests/integration/integration_test_helper.py b/bfabric/tests/integration/integration_test_helper.py index 7a544d2b..83ed0314 100644 --- a/bfabric/tests/integration/integration_test_helper.py +++ b/bfabric/tests/integration/integration_test_helper.py @@ -6,11 +6,15 @@ class DeleteEntities: - def __init__(self, client: Bfabric, created_entities: list[tuple[str, int]] | None = None): + """Deletes entities that were registered, when a test is torn down. + Please use `self.addCleanup` to ensure that the entities are deleted even if the test fails. + """ + + def __init__(self, client: Bfabric, created_entities: list[tuple[str, int]] | None = None) -> None: self.client = client self.created_entities = created_entities or [] - def __call__(self): + def __call__(self) -> None: """Deletes all created entities.""" errors = [] for entity_type, entity_id in self.created_entities: @@ -20,7 +24,7 @@ def __call__(self): else: print("Successfully deleted entities:", self.created_entities) - def register_entity(self, entity: dict[str, Any], entity_type: str | None = None): + def register_entity(self, entity: dict[str, Any], entity_type: str | None = None) -> None: """Registers an entity to be deleted when the test is torn down.""" if entity_type is None: entity_type = entity["classname"] From eddf274ddd10a5dc01e4f9071b83a97af5ce4d90 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 16 May 2024 14:18:15 +0200 Subject: [PATCH 100/129] small changes in tests --- .../integration/test_bfabric2_bad_requests.py | 12 ++++-------- .../tests/integration/test_bfabric2_read.py | 18 +++++++----------- .../test_bfabric2_read_pagination.py | 6 ++++++ bfabric/tests/unit/test_bfabric_config.py | 4 ++-- 4 files changed, 19 insertions(+), 21 deletions(-) diff --git a/bfabric/tests/integration/test_bfabric2_bad_requests.py b/bfabric/tests/integration/test_bfabric2_bad_requests.py index 0e1c0f42..a45f45ed 100755 --- a/bfabric/tests/integration/test_bfabric2_bad_requests.py +++ b/bfabric/tests/integration/test_bfabric2_bad_requests.py @@ -2,8 +2,7 @@ import os import unittest -from bfabric import BfabricAPIEngineType, Bfabric -from bfabric.bfabric import get_system_auth +from bfabric import Bfabric, BfabricAPIEngineType from bfabric.errors import BfabricRequestError @@ -14,13 +13,10 @@ def setUp(self): with open(path) as json_file: self.ground_truth = json.load(json_file) - # Load config and authentication - self.config, self.auth = get_system_auth(config_env="TEST") - - # Init the engines + # Create clients self.clients = { - "zeep": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP), - "suds": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS), + "zeep": Bfabric.from_config("TEST", engine=BfabricAPIEngineType.ZEEP), + "suds": Bfabric.from_config("TEST", engine=BfabricAPIEngineType.SUDS), } def _test_non_existing_read(self, engine_name: str): diff --git a/bfabric/tests/integration/test_bfabric2_read.py b/bfabric/tests/integration/test_bfabric2_read.py index 50117080..5fb6ff6a 100755 --- a/bfabric/tests/integration/test_bfabric2_read.py +++ b/bfabric/tests/integration/test_bfabric2_read.py @@ -2,8 +2,7 @@ import os import unittest -from bfabric import BfabricAPIEngineType, Bfabric, BfabricAuth -from bfabric.bfabric import get_system_auth +from bfabric import Bfabric, BfabricAuth, BfabricAPIEngineType class BfabricTestRead(unittest.TestCase): @@ -13,13 +12,10 @@ def setUp(self, *args, **kwargs): with open(path) as json_file: self.ground_truth = json.load(json_file) - # Load config and authentication - self.config, self.auth = get_system_auth(config_env="TEST") - - # Init the engines + # Create clients self.clients = { - "zeep": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP), - "suds": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS), + "zeep": Bfabric.from_config("TEST", engine=BfabricAPIEngineType.ZEEP), + "suds": Bfabric.from_config("TEST", engine=BfabricAPIEngineType.SUDS), } def read(self, engine: str, endpoint: str): @@ -83,10 +79,10 @@ def test_annotation(self): self.read("zeep", "annotation") def test_invalid_auth(self): - auth = BfabricAuth(login=self.auth.login, password="invalid_password") + auth = BfabricAuth(login=self.clients["suds"].auth.login, password="invalid_password") clients = { - "zeep": Bfabric(self.config, auth, engine=BfabricAPIEngineType.ZEEP), - "suds": Bfabric(self.config, auth, engine=BfabricAPIEngineType.SUDS), + "zeep": Bfabric.from_config("TEST", auth, engine=BfabricAPIEngineType.ZEEP), + "suds": Bfabric.from_config("TEST", auth, engine=BfabricAPIEngineType.SUDS), } for engine, bf in clients.items(): with self.subTest(engine=engine): diff --git a/bfabric/tests/integration/test_bfabric2_read_pagination.py b/bfabric/tests/integration/test_bfabric2_read_pagination.py index 8b3e2f32..135b76d6 100644 --- a/bfabric/tests/integration/test_bfabric2_read_pagination.py +++ b/bfabric/tests/integration/test_bfabric2_read_pagination.py @@ -1,3 +1,4 @@ +import logging import unittest import pandas as pd @@ -51,3 +52,8 @@ def test_composite_user(self): # TODO: Make the test strict if Zeep bug is ever resolved. self.assertListEqual(["formerproject", "project"], mismatch_cols) print("SUDS and ZEEP mismatch in", mismatch_cols, "(expected)") + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + unittest.main() diff --git a/bfabric/tests/unit/test_bfabric_config.py b/bfabric/tests/unit/test_bfabric_config.py index dd10e6a2..ce976a44 100644 --- a/bfabric/tests/unit/test_bfabric_config.py +++ b/bfabric/tests/unit/test_bfabric_config.py @@ -125,14 +125,14 @@ def test_read_yml_when_empty_optional(self): def test_repr(self): rep = repr(self.config) self.assertEqual( - "BfabricConfig(base_url='url', application_ids={'app': 1}, " "job_notification_emails='')", + "BfabricConfig(base_url='url', application_ids={'app': 1}, job_notification_emails='')", rep, ) def test_str(self): rep = str(self.config) self.assertEqual( - "BfabricConfig(base_url='url', application_ids={'app': 1}, " "job_notification_emails='')", + "BfabricConfig(base_url='url', application_ids={'app': 1}, job_notification_emails='')", rep, ) From e6530ea2830a909a37b9873312a0ae65fd95fe7e Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 16 May 2024 14:21:31 +0200 Subject: [PATCH 101/129] documentation, clarify return_id_only parameter the other one does not exist anymore, and the name was renamed for clarity --- bfabric/bfabric.py | 73 +++---- bfabric/engine/engine_suds.py | 34 ++-- bfabric/engine/engine_zeep.py | 32 ++- .../examples/compare_zeep_suds_pagination.py | 2 +- .../test_bfabric2_time_inequality_query.py | 57 +++--- bfabric/tests/unit/test_bfabric.py | 187 +++++++++++++++++- 6 files changed, 294 insertions(+), 91 deletions(-) diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index c7df955a..137eddd7 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -28,23 +28,29 @@ from bfabric.bfabric_config import BfabricAuth, read_config from bfabric.bfabric_config import BfabricConfig +from bfabric.cli_formatting import HostnameHighlighter, DEFAULT_THEME from bfabric.engine.engine_suds import EngineSUDS from bfabric.engine.engine_zeep import EngineZeep -from bfabric.cli_formatting import HostnameHighlighter, DEFAULT_THEME -from bfabric.utils.paginator import compute_requested_pages, BFABRIC_QUERY_LIMIT, page_iter from bfabric.results.result_container import ResultContainer +from bfabric.utils.paginator import compute_requested_pages, BFABRIC_QUERY_LIMIT, page_iter class BfabricAPIEngineType(Enum): + """Choice of engine to use.""" + SUDS = 1 ZEEP = 2 -# TODO: What does idonly do for SUDS? Does it make sense for Zeep? -# TODO: What does includedeletableupdateable do for Zeep? Does it make sense for Suds? # TODO: How to deal with save-skip fields in Zeep? Does it happen in SUDS? class Bfabric: - """Bfabric client class, providing general functionality for interaction with the B-Fabric API.""" + """Bfabric client class, providing general functionality for interaction with the B-Fabric API. + Use `Bfabric.from_config` to create a new instance. + :param config: Configuration object + :param auth: Authentication object (if `None`, it has to be provided using the `with_auth` context manager) + :param engine: Engine to use for the API. Default is SUDS. + :param verbose: Print a system info message to standard error console + """ def __init__( self, @@ -53,7 +59,6 @@ def __init__( engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, verbose: bool = False, ) -> None: - self.verbose = verbose self.query_counter = 0 self._config = config self._auth = auth @@ -65,7 +70,7 @@ def __init__( else: raise ValueError(f"Unexpected engine: {engine}") - if self.verbose: + if verbose: self.print_version_message() @classmethod @@ -124,29 +129,27 @@ def read( obj: dict[str, Any], max_results: int | None = 100, offset: int = 0, - readid: bool = False, check: bool = True, - idonly: bool = False, + return_id_only: bool = False, ) -> ResultContainer: - """Reads objects from the specified endpoint that match all specified attributes in `obj`. + """Reads from the specified endpoint matching all specified attributes in `obj`. By setting `max_results` it is possible to change the number of results that are returned. - :param endpoint: endpoint - :param obj: query dictionary + :param endpoint: the endpoint to read from, e.g. "sample" + :param obj: a dictionary containing the query, for every field multiple possible values can be provided, the + final query requires the condition for each field to be met :param max_results: cap on the number of results to query. The code will keep reading pages until all pages are read or expected number of results has been reached. If None, load all available pages. NOTE: max_results will be rounded upwards to the nearest multiple of BFABRIC_QUERY_LIMIT, because results come in blocks, and there is little overhead to providing results over integer number of pages. :param offset: the number of elements to skip before starting to return results (useful for pagination, default is 0 which means no skipping) - :param readid: whether to use reading by ID. Currently only available for engine=SUDS - TODO: Test the extent to which this method works. Add safeguards - :param check: whether to check for errors in the response - :param idonly: whether to return only the ids of the objects + :param check: whether to raise an error if the response is not successful + :param return_id_only: whether to return only the ids of the found objects :return: List of responses, packaged in the results container """ # Get the first page. # NOTE: According to old interface, this is equivalent to plain=True - results = self._read_page(readid, endpoint, obj, page=1, idonly=idonly) + results = self.engine.read(endpoint=endpoint, obj=obj, auth=self.auth, page=1, return_id_only=return_id_only) n_available_pages = results.total_pages_api if not n_available_pages: if check: @@ -169,7 +172,9 @@ def read( for i_iter, i_page in enumerate(requested_pages): if not (i_iter == 0 and i_page == 1): print("-- reading page", i_page, "of", n_available_pages) - results = self._read_page(readid, endpoint, obj, page=i_page, idonly=idonly) + results = self.engine.read( + endpoint=endpoint, obj=obj, auth=self.auth, page=i_page, return_id_only=return_id_only + ) errors += results.errors response_items += results[page_offset:] @@ -180,14 +185,26 @@ def read( result.assert_success() return result.get_first_n_results(max_results) - def save(self, endpoint: str, obj: dict, check: bool = True) -> ResultContainer: - results = self.engine.save(endpoint, obj, auth=self.auth) + def save(self, endpoint: str, obj: dict[str, Any], check: bool = True) -> ResultContainer: + """Saves the provided object to the specified endpoint. + :param endpoint: the endpoint to save to, e.g. "sample" + :param obj: the object to save + :param check: whether to raise an error if the response is not successful + :return a ResultContainer describing the saved object if successful + """ + results = self.engine.save(endpoint=endpoint, obj=obj, auth=self.auth) if check: results.assert_success() return results def delete(self, endpoint: str, id: int | list[int], check: bool = True) -> ResultContainer: - results = self.engine.delete(endpoint, id, auth=self.auth) + """Deletes the object with the specified ID from the specified endpoint. + :param endpoint: the endpoint to delete from, e.g. "sample" + :param id: the ID of the object to delete + :param check: whether to raise an error if the response is not successful + :return a ResultContainer describing the deleted object if successful + """ + results = self.engine.delete(endpoint=endpoint, id=id, auth=self.auth) if check: results.assert_success() return results @@ -214,16 +231,6 @@ def upload_resource( check=check, ) - def _read_page( - self, readid: bool, endpoint: str, query: dict[str, Any], idonly: bool = False, page: int = 1 - ) -> ResultContainer: - """Reads the specified page of objects from the specified endpoint that match the query.""" - if readid: - # https://fgcz-bfabric.uzh.ch/wiki/tiki-index.php?page=endpoint.workunit#Web_Method_readid_ - return self.engine.readid(endpoint=endpoint, obj=query, auth=self.auth, page=page) - else: - return self.engine.read(endpoint=endpoint, obj=query, auth=self.auth, page=page, idonly=idonly) - ############################ # Multi-query functionality ############################ @@ -267,7 +274,7 @@ def read_multi( # automatically? If yes, perhaps we don't need this method at all? # TODO: It is assumed that a user requesting multi_query always wants all of the pages. Can anybody think of # exceptions to this? - response_this = self.read(endpoint, obj_extended, max_results=None, readid=readid, idonly=idonly) + response_this = self.read(endpoint, obj_extended, max_results=None, return_id_only=idonly) response_tot.extend(response_this) return response_tot @@ -405,7 +412,7 @@ def get_system_auth( raise ValueError("base_url missing") if not optional_auth: if not auth or not auth.login or not auth.password: - raise ValueError("Authentification not initialized but required") + raise ValueError("Authentication not initialized but required") if verbose: pprint(config) diff --git a/bfabric/engine/engine_suds.py b/bfabric/engine/engine_suds.py index 519a9803..724e4442 100644 --- a/bfabric/engine/engine_suds.py +++ b/bfabric/engine/engine_suds.py @@ -27,33 +27,32 @@ def read( obj: dict[str, Any], auth: BfabricAuth, page: int = 1, - idonly: bool = False, - includedeletableupdateable: bool = False, + return_id_only: bool = False, + include_deletable_and_updatable_fields: bool = False, ) -> ResultContainer: """Reads the requested `obj` from `endpoint`. - :param endpoint: the endpoint to read, e.g. `workunit`, `project`, `order`, `externaljob`, etc. - :param obj: a python dictionary which contains all the attribute values that have to match + :param endpoint: the endpoint to read from, e.g. "sample" + :param obj: a dictionary containing the query, for every field multiple possible values can be provided, the + final query requires the condition for each field to be met :param auth: the authentication handle of the user performing the request :param page: the page number to read - :param idonly: whether to return only the ids of the objects - :param includedeletableupdateable: TODO + :param return_id_only: whether to return only the ids of the objects + :param include_deletable_and_updatable_fields: whether to include the deletable and updatable fields """ query = copy.deepcopy(obj) - query["includedeletableupdateable"] = includedeletableupdateable + query["includedeletableupdateable"] = include_deletable_and_updatable_fields - full_query = dict(login=auth.login, page=page, password=auth.password, query=query, idonly=idonly) + full_query = dict(login=auth.login, page=page, password=auth.password, query=query, idonly=return_id_only) service = self._get_suds_service(endpoint) response = service.read(full_query) return self._convert_results(response=response, endpoint=endpoint) - # TODO: How is client.service.readid different from client.service.read. Do we need this method? - def readid(self, endpoint: str, query: dict, auth: BfabricAuth, page: int = 1) -> ResultContainer: - query = dict(login=auth.login, page=page, password=auth.password, query=query) - service = self._get_suds_service(endpoint) - response = service.readid(query) - return self._convert_results(response=response, endpoint=endpoint) - def save(self, endpoint: str, obj: dict, auth: BfabricAuth) -> ResultContainer: + """Saves the provided object to the specified endpoint. + :param endpoint: the endpoint to save to, e.g. "sample" + :param obj: the object to save + :param auth: the authentication handle of the user performing the request + """ query = {"login": auth.login, "password": auth.password, endpoint: obj} service = self._get_suds_service(endpoint) try: @@ -63,6 +62,11 @@ def save(self, endpoint: str, obj: dict, auth: BfabricAuth) -> ResultContainer: return self._convert_results(response=response, endpoint=endpoint) def delete(self, endpoint: str, id: int | list[int], auth: BfabricAuth) -> ResultContainer: + """Deletes the object with the specified ID from the specified endpoint. + :param endpoint: the endpoint to delete from, e.g. "sample" + :param id: the ID of the object to delete + :param auth: the authentication handle of the user performing the request + """ if isinstance(id, list) and len(id) == 0: print("Warning, attempted to delete an empty list, ignoring") # TODO maybe use error here (and make sure it's consistent) diff --git a/bfabric/engine/engine_zeep.py b/bfabric/engine/engine_zeep.py index b6db0284..b631da0d 100644 --- a/bfabric/engine/engine_zeep.py +++ b/bfabric/engine/engine_zeep.py @@ -23,11 +23,20 @@ def read( obj: dict, auth: BfabricAuth, page: int = 1, - idonly: bool = False, - includedeletableupdateable: bool = False, + return_id_only: bool = False, + include_deletable_and_updatable_fields: bool = False, ) -> ResultContainer: + """Reads the requested `obj` from `endpoint`. + :param endpoint: the endpoint to read from, e.g. "sample" + :param obj: a dictionary containing the query, for every field multiple possible values can be provided, the + final query requires the condition for each field to be met + :param auth: the authentication handle of the user performing the request + :param page: the page number to read + :param return_id_only: whether to return only the ids of the objects + :param include_deletable_and_updatable_fields: whether to include the deletable and updatable fields + """ query = copy.deepcopy(obj) - query["includedeletableupdateable"] = includedeletableupdateable + query["includedeletableupdateable"] = include_deletable_and_updatable_fields # FIXME: Hacks for the cases where Zeep thinks a parameter is compulsory and it is actually not if endpoint == "sample": @@ -43,19 +52,19 @@ def read( ] _zeep_query_append_skipped(query, excl_keys, inplace=True, overwrite=False) - full_query = dict(login=auth.login, page=page, password=auth.password, query=query, idonly=idonly) + full_query = dict(login=auth.login, page=page, password=auth.password, query=query, idonly=return_id_only) client = self._get_client(endpoint) with client.settings(strict=False, xml_huge_tree=True, xsd_ignore_sequence_order=True): response = client.service.read(full_query) return self._convert_results(response=response, endpoint=endpoint) - def readid( - self, endpoint: str, obj: dict, auth: BfabricAuth, page: int = 1, includedeletableupdateable: bool = True - ) -> ResultContainer: - raise NotImplementedError("Attempted to use a method `readid` of Zeep, which does not exist") - def save(self, endpoint: str, obj: dict, auth: BfabricAuth) -> ResultContainer: + """Saves the provided object to the specified endpoint. + :param endpoint: the endpoint to save to, e.g. "sample" + :param obj: the object to save + :param auth: the authentication handle of the user performing the request + """ query = copy.deepcopy(obj) # FIXME: Hacks for the cases where Zeep thinks a parameter is compulsory and it is actually not @@ -77,6 +86,11 @@ def save(self, endpoint: str, obj: dict, auth: BfabricAuth) -> ResultContainer: return self._convert_results(response=response, endpoint=endpoint) def delete(self, endpoint: str, id: int | list[int], auth: BfabricAuth) -> ResultContainer: + """Deletes the object with the specified ID from the specified endpoint. + :param endpoint: the endpoint to delete from, e.g. "sample" + :param id: the ID of the object to delete + :param auth: the authentication handle of the user performing the request + """ if isinstance(id, list) and len(id) == 0: print("Warning, attempted to delete an empty list, ignoring") # TODO maybe use error here (and make sure it's consistent) diff --git a/bfabric/examples/compare_zeep_suds_pagination.py b/bfabric/examples/compare_zeep_suds_pagination.py index 1e1be1a1..041f2137 100644 --- a/bfabric/examples/compare_zeep_suds_pagination.py +++ b/bfabric/examples/compare_zeep_suds_pagination.py @@ -30,7 +30,7 @@ def _calc_query(config, auth, engine, endpoint): print("Sending query via", engine) b = Bfabric(config, auth, engine=engine) - response_class = b.read(endpoint, {}, max_results=300, idonly=False, includedeletableupdateable=True) + response_class = b.read(endpoint, {}, max_results=300, return_id_only=False, includedeletableupdateable=True) response_dict = response_class.to_list_dict(drop_empty=True, have_sort_responses=True) return list_dict_to_df(response_dict) diff --git a/bfabric/tests/integration/test_bfabric2_time_inequality_query.py b/bfabric/tests/integration/test_bfabric2_time_inequality_query.py index 91264f07..437496b6 100755 --- a/bfabric/tests/integration/test_bfabric2_time_inequality_query.py +++ b/bfabric/tests/integration/test_bfabric2_time_inequality_query.py @@ -13,7 +13,7 @@ def setUp(self): # Init the engines self.clients = { "zeep": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP), - "suds": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS) + "suds": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS), } def _test_created_before_after(self, engine: str): @@ -23,65 +23,64 @@ def _test_created_before_after(self, engine: str): # 0. Create a workunit query = { - 'name': 'CatPetter9000', - 'applicationid': 1, - 'containerid': 3000, - 'description': 'Best cat petter ever', + "name": "CatPetter9000", + "applicationid": 1, + "containerid": 3000, + "description": "Best cat petter ever", } - res = bf.save('workunit', query).to_list_dict() + res = bf.save("workunit", query).to_list_dict() self.assertIsNotNone(res) - self.assertEquals(len(res), 1) - self.assertIn('id', res[0]) - workunit_id = res[0]['id'] + self.assertEqual(1, len(res)) + self.assertIn("id", res[0]) + workunit_id = res[0]["id"] # 1. Create a bunch of resources resource_ids = [] resource_created = [] for i_resource in range(n_resources): query = { - 'name': 'kitten_' + str(i_resource), + "name": "kitten_" + str(i_resource), # 'sampleid': 1, - 'filechecksum': 0, - 'relativepath': '/catpath/kitten_' + str(i_resource), - 'size': 0, - 'status': "pending", - 'storageid': 1, - 'workunitid': workunit_id + "filechecksum": 0, + "relativepath": "/catpath/kitten_" + str(i_resource), + "size": 0, + "status": "pending", + "storageid": 1, + "workunitid": workunit_id, } - res = bf.save('resource', query).to_list_dict() + res = bf.save("resource", query).to_list_dict() self.assertIsNotNone(res) self.assertEquals(len(res), 1) - self.assertIn('id', res[0]) - self.assertIn('created', res[0]) + self.assertIn("id", res[0]) + self.assertIn("created", res[0]) - resource_ids += [res[0]['id']] - resource_created += [datetime.fromisoformat(res[0]['created'])] + resource_ids += [res[0]["id"]] + resource_created += [datetime.fromisoformat(res[0]["created"])] # 2. attempt to find the resources we just created by datetime # NOTE: query = { - 'workunitid': workunit_id, - 'createdbefore': str(max(resource_created) + timedelta(seconds=1)), - 'createdafter': str(min(resource_created)), + "workunitid": workunit_id, + "createdbefore": str(max(resource_created) + timedelta(seconds=1)), + "createdafter": str(min(resource_created)), } - results = bf.read('resource', query, idonly = True).to_list_dict() + results = bf.read("resource", query, return_id_only=True).to_list_dict() # 3. delete all created resources. Do this before test not to leave undeleted resources behind if possible - bf.delete('resource', resource_ids) - bf.delete('workunit', workunit_id) + bf.delete("resource", resource_ids) + bf.delete("workunit", workunit_id) # 4. Check that the found resources are the ones we created # NOTE: We might find more resources, if somebody created resources at the same time as us # Hence, we are testing for a subset, not an exact match - resource_ids_found = [r['id'] for r in results] + resource_ids_found = [r["id"] for r in results] isSubset = set(resource_ids).issubset(set(resource_ids_found)) # if not isSubset: # print(min(resource_ids), max(resource_ids), set(resource_ids) - set(resource_ids_found), set(resource_ids_found) - set(resource_ids)) self.assertTrue(isSubset) - def test_created(self): self._test_created_before_after("suds") self._test_created_before_after("zeep") diff --git a/bfabric/tests/unit/test_bfabric.py b/bfabric/tests/unit/test_bfabric.py index f43e9c2d..355114be 100644 --- a/bfabric/tests/unit/test_bfabric.py +++ b/bfabric/tests/unit/test_bfabric.py @@ -1,7 +1,7 @@ import datetime import unittest from functools import cached_property -from unittest.mock import MagicMock, patch, ANY +from unittest.mock import MagicMock, patch, ANY, call from bfabric import Bfabric, BfabricAPIEngineType, BfabricConfig from bfabric.engine.engine_suds import EngineSUDS @@ -19,7 +19,8 @@ def mock_bfabric(self) -> Bfabric: return Bfabric(config=self.mock_config, auth=self.mock_auth, engine=self.mock_engine_type) @patch("bfabric.bfabric.get_system_auth") - def test_from_config_when_no_args(self, mock_get_system_auth): + @patch("bfabric.bfabric.EngineSUDS") + def test_from_config_when_no_args(self, _mock_engine_suds, mock_get_system_auth): mock_config = MagicMock(name="mock_config") mock_auth = MagicMock(name="mock_auth") mock_get_system_auth.return_value = (mock_config, mock_auth) @@ -30,7 +31,8 @@ def test_from_config_when_no_args(self, mock_get_system_auth): mock_get_system_auth.assert_called_once_with(config_env=None) @patch("bfabric.bfabric.get_system_auth") - def test_from_config_when_explicit_auth(self, mock_get_system_auth): + @patch("bfabric.bfabric.EngineSUDS") + def test_from_config_when_explicit_auth(self, _mock_engine_suds, mock_get_system_auth): mock_config = MagicMock(name="mock_config") mock_auth = MagicMock(name="mock_auth") mock_config_auth = MagicMock(name="mock_config_auth") @@ -42,7 +44,8 @@ def test_from_config_when_explicit_auth(self, mock_get_system_auth): mock_get_system_auth.assert_called_once_with(config_env="TestingEnv") @patch("bfabric.bfabric.get_system_auth") - def test_from_config_when_none_auth(self, mock_get_system_auth): + @patch("bfabric.bfabric.EngineSUDS") + def test_from_config_when_none_auth(self, _mock_engine_suds, mock_get_system_auth): mock_config = MagicMock(name="mock_config") mock_auth = MagicMock(name="mock_auth") mock_get_system_auth.return_value = (mock_config, mock_auth) @@ -54,6 +57,50 @@ def test_from_config_when_none_auth(self, mock_get_system_auth): self.assertIn("Authentication not available", str(error.exception)) mock_get_system_auth.assert_called_once_with(config_env="TestingEnv") + @patch("bfabric.bfabric.get_system_auth") + @patch("bfabric.bfabric.EngineSUDS") + def test_from_config_when_engine_suds(self, mock_engine_suds, mock_get_system_auth): + mock_config = MagicMock(name="mock_config") + mock_auth = MagicMock(name="mock_auth") + mock_get_system_auth.return_value = (mock_config, mock_auth) + client = Bfabric.from_config(engine=BfabricAPIEngineType.SUDS) + + self.assertIsInstance(client, Bfabric) + self.assertEqual(mock_config, client.config) + self.assertEqual(mock_auth, client.auth) + self.assertEqual(mock_engine_suds.return_value, client.engine) + mock_get_system_auth.assert_called_once_with(config_env=None) + + mock_engine_suds.assert_called_once_with(base_url=mock_config.base_url) + self.assertEqual(mock_engine_suds.return_value, client.engine) + + @patch("bfabric.bfabric.get_system_auth") + @patch("bfabric.bfabric.EngineZeep") + def test_from_config_when_engine_zeep(self, mock_engine_zeep, mock_get_system_auth): + mock_config = MagicMock(name="mock_config") + mock_auth = MagicMock(name="mock_auth") + mock_get_system_auth.return_value = (mock_config, mock_auth) + client = Bfabric.from_config(engine=BfabricAPIEngineType.ZEEP) + + self.assertIsInstance(client, Bfabric) + self.assertEqual(mock_config, client.config) + self.assertEqual(mock_auth, client.auth) + self.assertEqual(mock_engine_zeep.return_value, client.engine) + mock_get_system_auth.assert_called_once_with(config_env=None) + + mock_engine_zeep.assert_called_once_with(base_url=mock_config.base_url) + self.assertEqual(mock_engine_zeep.return_value, client.engine) + + @patch.object(Bfabric, "print_version_message") + @patch("bfabric.bfabric.get_system_auth") + @patch("bfabric.bfabric.EngineSUDS") + def test_from_config_when_verbose(self, _mock_engine_suds, mock_get_system_auth, mock_print_version_message): + mock_config = MagicMock(name="mock_config") + mock_auth = MagicMock(name="mock_auth") + mock_get_system_auth.return_value = (mock_config, mock_auth) + client = Bfabric.from_config(verbose=True) + mock_print_version_message.assert_called_once_with() + def test_query_counter(self): self.assertEqual(0, self.mock_bfabric.query_counter) @@ -88,6 +135,138 @@ def test_with_auth_when_exception(self): pass self.assertEqual(mock_old_auth, self.mock_bfabric.auth) + def test_read_when_no_pages_available_and_check(self): + self.mock_auth = MagicMock(name="mock_auth") + with patch.object(self.mock_bfabric, "engine") as mock_engine: + mock_result = MagicMock(name="mock_result", total_pages_api=0, assert_success=MagicMock()) + mock_engine.read.return_value = mock_result + result = self.mock_bfabric.read(endpoint="mock_endpoint", obj="mock_obj") + self.assertEqual(mock_result.get_first_n_results.return_value, result) + mock_engine.read.assert_called_once_with( + endpoint="mock_endpoint", obj="mock_obj", auth=self.mock_auth, page=1, return_id_only=False + ) + mock_result.assert_success.assert_called_once_with() + mock_result.get_first_n_results.assert_called_once_with(100) + + @patch("bfabric.bfabric.compute_requested_pages") + def test_read_when_pages_available_and_check(self, mock_compute_requested_pages): + self.mock_auth = MagicMock(name="mock_auth") + with patch.object(self.mock_bfabric, "engine") as mock_engine: + mock_page_results = [ + MagicMock( + name="mock_page_result_1", + assert_success=MagicMock(), + total_pages_api=3, + errors=[], + ), + MagicMock( + name="mock_page_result_2", + assert_success=MagicMock(), + total_pages_api=3, + errors=[], + ), + MagicMock( + name="mock_page_result_3", + assert_success=MagicMock(), + total_pages_api=3, + errors=[], + ), + ] + mock_page_results[0].__getitem__.side_effect = lambda i: [1, 2, 3, 4, 5][i] + mock_page_results[1].__getitem__.side_effect = lambda i: [6, 7, 8, 9, 10][i] + mock_page_results[2].__getitem__.side_effect = lambda i: [11, 12, 13, 14, 15][i] + + mock_engine.read.side_effect = lambda **kwargs: mock_page_results[kwargs["page"] - 1] + mock_compute_requested_pages.return_value = ([1, 2], 4) + + result = self.mock_bfabric.read(endpoint="mock_endpoint", obj="mock_obj") + + mock_compute_requested_pages.assert_called_once_with( + n_page_total=3, + n_item_per_page=100, + n_item_offset=0, + n_item_return_max=100, + ) + self.assertListEqual([], result.errors) + self.assertListEqual( + [ + call.read( + endpoint="mock_endpoint", obj="mock_obj", auth=self.mock_auth, page=1, return_id_only=False + ), + call.read( + endpoint="mock_endpoint", obj="mock_obj", auth=self.mock_auth, page=2, return_id_only=False + ), + ], + mock_engine.mock_calls, + ) + self.assertEqual(6, len(result)) + self.assertEqual(5, result[0]) + self.assertEqual(10, result[5]) + + def test_save_when_no_auth(self): + endpoint = "test_endpoint" + obj = {"key": "value"} + with patch.object(self.mock_bfabric, "engine") as mock_engine: + with self.assertRaises(ValueError) as error: + self.mock_bfabric.save(endpoint, obj) + self.assertEqual("Authentication not available", str(error.exception)) + mock_engine.save.assert_not_called() + + def test_save_when_auth_and_check_false(self): + endpoint = "test_endpoint" + obj = {"key": "value"} + self.mock_auth = MagicMock(name="mock_auth") + method_assert_success = MagicMock(name="method_assert_success") + with patch.object(self.mock_bfabric, "engine") as mock_engine: + mock_engine.save.return_value.assert_success = method_assert_success + result = self.mock_bfabric.save(endpoint, obj, check=False) + self.assertEqual(mock_engine.save.return_value, result) + method_assert_success.assert_not_called() + mock_engine.save.assert_called_once_with(endpoint=endpoint, obj=obj, auth=self.mock_auth) + + def test_save_when_auth_and_check_true(self): + endpoint = "test_endpoint" + obj = {"key": "value"} + self.mock_auth = MagicMock(name="mock_auth") + method_assert_success = MagicMock(name="method_assert_success") + with patch.object(self.mock_bfabric, "engine") as mock_engine: + mock_engine.save.return_value.assert_success = method_assert_success + result = self.mock_bfabric.save(endpoint, obj) + self.assertEqual(mock_engine.save.return_value, result) + method_assert_success.assert_called_once_with() + mock_engine.save.assert_called_once_with(endpoint=endpoint, obj=obj, auth=self.mock_auth) + + def test_delete_when_no_auth(self): + endpoint = "test_endpoint" + obj = {"key": "value"} + with patch.object(self.mock_bfabric, "engine") as mock_engine: + with self.assertRaises(ValueError) as error: + self.mock_bfabric.delete(endpoint, obj) + self.assertEqual("Authentication not available", str(error.exception)) + mock_engine.delete.assert_not_called() + + def test_delete_when_auth_and_check_false(self): + endpoint = "test_endpoint" + self.mock_auth = MagicMock(name="mock_auth") + method_assert_success = MagicMock(name="method_assert_success") + with patch.object(self.mock_bfabric, "engine") as mock_engine: + mock_engine.delete.return_value.assert_success = method_assert_success + result = self.mock_bfabric.delete(endpoint=endpoint, id=10, check=False) + self.assertEqual(mock_engine.delete.return_value, result) + method_assert_success.assert_not_called() + mock_engine.delete.assert_called_once_with(endpoint=endpoint, id=10, auth=self.mock_auth) + + def test_delete_when_auth_and_check_true(self): + endpoint = "test_endpoint" + self.mock_auth = MagicMock(name="mock_auth") + method_assert_success = MagicMock(name="method_assert_success") + with patch.object(self.mock_bfabric, "engine") as mock_engine: + mock_engine.delete.return_value.assert_success = method_assert_success + result = self.mock_bfabric.delete(endpoint=endpoint, id=10) + self.assertEqual(mock_engine.delete.return_value, result) + method_assert_success.assert_called_once_with() + mock_engine.delete.assert_called_once_with(endpoint=endpoint, id=10, auth=self.mock_auth) + @patch.object(Bfabric, "save") def test_upload_resource(self, method_save): resource_name = "hello_world.txt" From a45675d22f09fa43eeb95467f6177c11aa0f199a Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 16 May 2024 15:54:31 +0200 Subject: [PATCH 102/129] Extract multiquery (#94) As there remain several important questions to be answered about some MultiQuery functionality, this PR will move these methods into a separate module to prevent too much of the code getting entangled again until we have something that will be maintained long term. ```python client.read_multi(...) ``` becomes ```python from bfabric.experimental.multi_query import MultiQuery MultiQuery(client).read_multi(...) ``` --- bfabric/bfabric.py | 127 +++--------------- bfabric/examples/exists_multi.py | 10 +- bfabric/experimental/__init__.py | 0 bfabric/experimental/multi_query.py | 118 ++++++++++++++++ .../tests/integration/test_bfabric2_exists.py | 12 +- .../integration/test_bfabric2_save_delete.py | 3 +- pyproject.toml | 2 +- 7 files changed, 151 insertions(+), 121 deletions(-) create mode 100644 bfabric/experimental/__init__.py create mode 100644 bfabric/experimental/multi_query.py diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index 137eddd7..11c4d634 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -18,7 +18,6 @@ import logging import os from contextlib import contextmanager -from copy import deepcopy from datetime import datetime from enum import Enum from pprint import pprint @@ -32,7 +31,7 @@ from bfabric.engine.engine_suds import EngineSUDS from bfabric.engine.engine_zeep import EngineZeep from bfabric.results.result_container import ResultContainer -from bfabric.utils.paginator import compute_requested_pages, BFABRIC_QUERY_LIMIT, page_iter +from bfabric.utils.paginator import compute_requested_pages, BFABRIC_QUERY_LIMIT class BfabricAPIEngineType(Enum): @@ -209,6 +208,23 @@ def delete(self, endpoint: str, id: int | list[int], check: bool = True) -> Resu results.assert_success() return results + def exists( + self, endpoint: str, key: str, value: int | str, query: dict[str, Any] | None = None, check: bool = True + ) -> bool: + """Returns whether an object with the specified key-value pair exists in the specified endpoint. + Further conditions can be specified in the query. + :param endpoint: the endpoint to check, e.g. "sample" + :param key: the key to check, e.g. "id" + :param value: the value to check, e.g. 123 + :param query: additional query conditions (optional) + :param check: whether to raise an error if the response is not successful + """ + query = query or {} + results = self.read( + endpoint=endpoint, obj={**query, key: value}, max_results=1, check=check, return_id_only=True + ) + return len(results) > 0 + def upload_resource( self, resource_name: str, content: bytes, workunit_id: int, check: bool = True ) -> ResultContainer: @@ -231,113 +247,6 @@ def upload_resource( check=check, ) - ############################ - # Multi-query functionality - ############################ - - # TODO: Is this scope sufficient? Is there ever more than one multi-query parameter, and/or not at the root of dict? - def read_multi( - self, - endpoint: str, - obj: dict, - multi_query_key: str, - multi_query_vals: list, - readid: bool = False, - idonly: bool = False, - ) -> ResultContainer: - """ - Makes a 1-parameter multi-query (there is 1 parameter that takes a list of values) - Since the API only allows BFABRIC_QUERY_LIMIT queries per page, split the list into chunks before querying - :param endpoint: endpoint - :param obj: query dictionary - :param multi_query_key: key for which the multi-query is performed - :param multi_query_vals: list of values for which the multi-query is performed - :param readid: whether to use reading by ID. Currently only available for engine=SUDS - TODO: Test the extent to which this method works. Add safeguards - :param idonly: whether to return only the ids of the objects - :return: List of responses, packaged in the results container - - NOTE: It is assumed that there is only 1 response for each value. - """ - - response_tot = ResultContainer([], total_pages_api=0) - obj_extended = deepcopy(obj) # Make a copy of the query, not to make edits to the argument - - # Iterate over request chunks that fit into a single API page - for page_vals in page_iter(multi_query_vals): - obj_extended[multi_query_key] = page_vals - - # TODO: Test what happens if there are multiple responses to each of the individual queries. - # * What would happen? - # * What would happen if total number of responses would exceed 100 now? - # * What would happen if we naively made a multi-query with more than 100 values? Would API paginate - # automatically? If yes, perhaps we don't need this method at all? - # TODO: It is assumed that a user requesting multi_query always wants all of the pages. Can anybody think of - # exceptions to this? - response_this = self.read(endpoint, obj_extended, max_results=None, return_id_only=idonly) - response_tot.extend(response_this) - - return response_tot - - # NOTE: Save-multi method is likely useless. When saving multiple objects, they all have different fields. - # One option would be to provide a dataframe, but it might struggle with nested dicts - # Likely best solution is to not provide this method, and let users run a for-loop themselves. - # def save_multi(self, endpoint: str, obj_lst: list, **kwargs) -> ResultContainer: - # response_tot = ResultContainer([], self.result_type, total_pages_api = 0) - # - # # Iterate over request chunks that fit into a single API page - # for page_objs in page_iter(obj_lst): - # response_page = self.save(endpoint, page_objs, **kwargs) - # response_tot.extend(response_page) - # - # return response_tot - - def delete_multi(self, endpoint: str, id_list: list) -> ResultContainer: - response_tot = ResultContainer([], total_pages_api=0) - - if len(id_list) == 0: - print("Warning, empty list provided for deletion, ignoring") - return response_tot - - # Iterate over request chunks that fit into a single API page - for page_ids in page_iter(id_list): - response_page = self.delete(endpoint, page_ids) - response_tot.extend(response_page) - - return response_tot - - def exists(self, endpoint: str, key: str, value: list[int | str] | int | str) -> bool | list[bool]: - """ - :param endpoint: endpoint - :param key: A key for the query (e.g. id or name) - :param value: A value or a list of values - :return: Return a single bool or a list of bools for each value - For each value, test if a key with that value is found in the API. - """ - is_scalar = isinstance(value, (int, str)) - - # 1. Read data for this id - if is_scalar: - results = self.read(endpoint, {key: value}) - elif isinstance(value, list): - results = self.read_multi(endpoint, {}, key, value) - else: - raise ValueError("Unexpected data type", type(value)) - - # 2. Extract all the ids for which there was a response - result_vals = [] - for r in results.results: - if key in r: - result_vals += [r[key]] - elif "_" + key in r: # TODO: Remove this if SUDS bug is ever resolved - result_vals += [r["_" + key]] - - # 3. For each of the requested ids, return true if there was a response and false if there was not - if is_scalar: - return value in result_vals - else: - return [val in result_vals for val in value] - def get_version_message(self) -> str: """Returns the version message as a string.""" package_version = importlib.metadata.version("bfabric") diff --git a/bfabric/examples/exists_multi.py b/bfabric/examples/exists_multi.py index c20c6555..0e322350 100644 --- a/bfabric/examples/exists_multi.py +++ b/bfabric/examples/exists_multi.py @@ -1,10 +1,12 @@ from bfabric import BfabricAPIEngineType, Bfabric from bfabric.bfabric import get_system_auth +from bfabric.experimental.multi_query import MultiQuery + config, auth = get_system_auth(config_env="TEST") -b1 = Bfabric(config, auth, engine=BfabricAPIEngineType.SUDS) -b2 = Bfabric(config, auth, engine=BfabricAPIEngineType.ZEEP) +b1 = MultiQuery(Bfabric(config, auth, engine=BfabricAPIEngineType.SUDS)) +b2 = MultiQuery(Bfabric(config, auth, engine=BfabricAPIEngineType.ZEEP)) ################### @@ -25,8 +27,8 @@ target_workunit_names = ["tomcat", "tomcat2"] -response1 = b1.exists("workunit", "name", target_workunit_names) -response2 = b2.exists("workunit", "name", target_workunit_names) +response1 = b1.exists_multi("workunit", "name", target_workunit_names) +response2 = b2.exists_multi("workunit", "name", target_workunit_names) print(response1) print(response2) diff --git a/bfabric/experimental/__init__.py b/bfabric/experimental/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/experimental/multi_query.py b/bfabric/experimental/multi_query.py new file mode 100644 index 00000000..5f772ddb --- /dev/null +++ b/bfabric/experimental/multi_query.py @@ -0,0 +1,118 @@ +from __future__ import annotations + +from copy import deepcopy + +from bfabric.results.result_container import ResultContainer +from bfabric.utils.paginator import page_iter +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from bfabric.bfabric import Bfabric + + +class MultiQuery: + """Some advanced functionality that supports paginating over a list of conditions that is larger than the 100 + conditions limit of the API. + This functionality might eventually be merged into the main Bfabric class but will probably be subject to some + breaking changes and is not as thoroughly tested as the main classes functionality. + """ + def __init__(self, client: Bfabric) -> None: + self._client = client + + # TODO: Is this scope sufficient? Is there ever more than one multi-query parameter, and/or not at the root of dict? + def read_multi( + self, + endpoint: str, + obj: dict, + multi_query_key: str, + multi_query_vals: list, + return_id_only: bool = False, + ) -> ResultContainer: + """Performs a 1-parameter multi-query (there is 1 parameter that takes a list of values) + Since the API only allows BFABRIC_QUERY_LIMIT queries per page, split the list into chunks before querying + :param endpoint: endpoint + :param obj: query dictionary + :param multi_query_key: key for which the multi-query is performed + :param multi_query_vals: list of values for which the multi-query is performed + :param return_id_only: whether to return only the ids of the objects + :return: List of responses, packaged in the results container + + NOTE: It is assumed that there is only 1 response for each value. + """ + # TODO add `check` parameter + response_tot = ResultContainer([], total_pages_api=0) + obj_extended = deepcopy(obj) # Make a copy of the query, not to make edits to the argument + + # Iterate over request chunks that fit into a single API page + for page_vals in page_iter(multi_query_vals): + obj_extended[multi_query_key] = page_vals + + # TODO: Test what happens if there are multiple responses to each of the individual queries. + # * What would happen? + # * What would happen if total number of responses would exceed 100 now? + # * What would happen if we naively made a multi-query with more than 100 values? Would API paginate + # automatically? If yes, perhaps we don't need this method at all? + # TODO: It is assumed that a user requesting multi_query always wants all of the pages. Can anybody think of + # exceptions to this? + response_this = self._client.read(endpoint, obj_extended, max_results=None, return_id_only=return_id_only) + response_tot.extend(response_this) + + return response_tot + + # NOTE: Save-multi method is likely useless. When saving multiple objects, they all have different fields. + # One option would be to provide a dataframe, but it might struggle with nested dicts + # Likely best solution is to not provide this method, and let users run a for-loop themselves. + # def save_multi(self, endpoint: str, obj_lst: list, **kwargs) -> ResultContainer: + # response_tot = ResultContainer([], self.result_type, total_pages_api = 0) + # + # # Iterate over request chunks that fit into a single API page + # for page_objs in page_iter(obj_lst): + # response_page = self.save(endpoint, page_objs, **kwargs) + # response_tot.extend(response_page) + # + # return response_tot + + def delete_multi(self, endpoint: str, id_list: list[int]) -> ResultContainer: + """Deletes multiple objects from `endpoint` by their ids.""" + # TODO document and test error handling + # TODO add `check` parameter + response_tot = ResultContainer([], total_pages_api=0) + + if not id_list: + print("Warning, empty list provided for deletion, ignoring") + return response_tot + + # Iterate over request chunks that fit into a single API page + for page_ids in page_iter(id_list): + response_page = self._client.delete(endpoint, page_ids) + response_tot.extend(response_page) + + return response_tot + + def exists_multi(self, endpoint: str, key: str, value: list[int | str] | int | str) -> bool | list[bool]: + """ + :param endpoint: endpoint + :param key: A key for the query (e.g. id or name) + :param value: A value or a list of values + :return: Return a single bool or a list of bools for each value + For each value, test if a key with that value is found in the API. + """ + is_scalar = isinstance(value, (int, str)) + if is_scalar: + return self._client.exists(endpoint=endpoint, key=key, value=value, check=True) + elif not isinstance(value, list): + raise ValueError("Unexpected data type", type(value)) + + # 1. Read data for this id + results = self.read_multi(endpoint, {}, key, value) + + # 2. Extract all the ids for which there was a response + result_vals = [] + for r in results.results: + if key in r: + result_vals += [r[key]] + elif "_" + key in r: # TODO: Remove this if SUDS bug is ever resolved + result_vals += [r["_" + key]] + + # 3. For each of the requested ids, return true if there was a response and false if there was not + return [val in result_vals for val in value] diff --git a/bfabric/tests/integration/test_bfabric2_exists.py b/bfabric/tests/integration/test_bfabric2_exists.py index b4c025b0..d3a69c75 100644 --- a/bfabric/tests/integration/test_bfabric2_exists.py +++ b/bfabric/tests/integration/test_bfabric2_exists.py @@ -1,16 +1,12 @@ import unittest from bfabric import BfabricAPIEngineType, Bfabric -from bfabric.bfabric import get_system_auth class BfabricTestExists(unittest.TestCase): - def setUp(self): - self.config, self.auth = get_system_auth(config_env="TEST") - def _test_single_exists(self, engine: BfabricAPIEngineType): - bf = Bfabric(self.config, self.auth, engine=engine) - res = bf.exists("dataset", "id", 30721) # Take ID which is the same as in production + client = Bfabric.from_config("TEST", engine=engine) + res = client.exists("dataset", "id", 30721) self.assertEqual(res, True) def test_zeep(self): @@ -18,3 +14,7 @@ def test_zeep(self): def test_suds(self): self._test_single_exists(engine=BfabricAPIEngineType.SUDS) + + +if __name__ == "__main__": + pass diff --git a/bfabric/tests/integration/test_bfabric2_save_delete.py b/bfabric/tests/integration/test_bfabric2_save_delete.py index 2596857e..4026ac5b 100644 --- a/bfabric/tests/integration/test_bfabric2_save_delete.py +++ b/bfabric/tests/integration/test_bfabric2_save_delete.py @@ -3,6 +3,7 @@ from bfabric import BfabricAPIEngineType, Bfabric from bfabric.bfabric import get_system_auth +from bfabric.experimental.multi_query import MultiQuery def _find_delete_existing_objects_by_name(b: Bfabric, endpoint: str, name_list: list) -> Tuple[list, list]: @@ -16,7 +17,7 @@ def _find_delete_existing_objects_by_name(b: Bfabric, endpoint: str, name_list: """ # 1. Check which objects exist - objs_exist = b.exists(endpoint, "name", name_list) + objs_exist = MultiQuery(b).exists_multi(endpoint, "name", name_list) objs_exist_names = [name for i, name in enumerate(name_list) if objs_exist[i]] if len(objs_exist_names) == 0: diff --git a/pyproject.toml b/pyproject.toml index 4277722e..1608406f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,7 +76,7 @@ indent-width = 4 target-version = "py39" [tool.ruff.lint] -select = ["ANN", "BLE", "D103", "E", "F", "PLW", "PTH", "SIM", "UP"] +select = ["ANN", "BLE", "D103", "E", "F", "PLW", "PTH", "SIM", "UP", "TCH"] ignore = ["ANN101"] [tool.licensecheck] From 4fa8debaacab48b349e2060070305a9e1814d6ae Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 16 May 2024 16:37:38 +0200 Subject: [PATCH 103/129] Fix pyproject.toml for installation --- pyproject.toml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1608406f..45e0c3e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,8 +2,10 @@ requires = ["setuptools >= 61.0"] build-backend = "setuptools.build_meta" -[tool.setuptools] -py-modules = ["bfabric"] +[tool.setuptools.packages.find] +include = ["bfabric*"] # package names should match these glob patterns (["*"] by default) +#exclude = ["bfabric.tests*"] # exclude packages matching these glob patterns (empty by default) +#namespaces = false # to disable scanning PEP 420 namespaces (true by default) [project] name = "bfabric" @@ -27,6 +29,7 @@ dependencies = [ "zeep >= 4.2.1", "pandas >= 2.2.2", "polars >= 0.20.25", + "setuptools" ] [project.optional-dependencies] From 9eb7ea609bfcca85001bfac6f873a00c06c3e3c2 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 16 May 2024 17:15:24 +0200 Subject: [PATCH 104/129] make the github actions more strict --- .github/workflows/run_unit_tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run_unit_tests.yml b/.github/workflows/run_unit_tests.yml index a18af677..d4df8852 100644 --- a/.github/workflows/run_unit_tests.yml +++ b/.github/workflows/run_unit_tests.yml @@ -17,7 +17,8 @@ jobs: with: python-version: 3.9 - name: Run unit tests - run: python -m unittest discover -s bfabric/tests/unit -p 'test_*.py' + # Note: we use cd to double-check that the installation actually worked + run: cd bfabric/tests && python -m unittest discover -s ./unit -p 'test_*.py' code_style: name: Code Style runs-on: ubuntu-latest From 76faf79940d09f451c26e4f2f44eba164a0d4e7d Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 16 May 2024 17:23:27 +0200 Subject: [PATCH 105/129] clean up pyproject.toml --- .gitignore | 1 + pyproject.toml | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 3d81f558..d6656746 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ __pycache__ bfabric.egg-info/ bfabric/scripts/query_result.txt +build/ dist/ diff --git a/pyproject.toml b/pyproject.toml index 45e0c3e5..143ed6ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,14 +3,12 @@ requires = ["setuptools >= 61.0"] build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] -include = ["bfabric*"] # package names should match these glob patterns (["*"] by default) -#exclude = ["bfabric.tests*"] # exclude packages matching these glob patterns (empty by default) -#namespaces = false # to disable scanning PEP 420 namespaces (true by default) +include = ["bfabric*"] [project] name = "bfabric" description = "Python client for the B-Fabric WSDL API" -version = "0.13.8" +version = "0.13.9" license = { text = "GPL-3.0" } authors = [ {name = "Christian Panse", email = "cp@fgcz.ethz.ch"}, From 7bf2caa43d95dd396781ea12d082eb4e0034cb06 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 16 May 2024 17:24:55 +0200 Subject: [PATCH 106/129] move some type-checking only imports --- bfabric/engine/engine_suds.py | 8 +++++--- bfabric/engine/engine_zeep.py | 6 ++++-- bfabric/tests/integration/integration_test_helper.py | 6 ++++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/bfabric/engine/engine_suds.py b/bfabric/engine/engine_suds.py index 724e4442..258d0f1d 100644 --- a/bfabric/engine/engine_suds.py +++ b/bfabric/engine/engine_suds.py @@ -1,17 +1,19 @@ from __future__ import annotations import copy -from typing import Any +from typing import Any, TYPE_CHECKING from suds import MethodNotFound from suds.client import Client -from suds.serviceproxy import ServiceProxy -from bfabric.bfabric_config import BfabricAuth from bfabric.engine.response_format_suds import suds_asdict_recursive from bfabric.errors import BfabricRequestError, get_response_errors from bfabric.results.result_container import _clean_result, ResultContainer +if TYPE_CHECKING: + from suds.serviceproxy import ServiceProxy + from bfabric.bfabric_config import BfabricAuth + class EngineSUDS: """B-Fabric API SUDS Engine.""" diff --git a/bfabric/engine/engine_zeep.py b/bfabric/engine/engine_zeep.py index b631da0d..dec76780 100644 --- a/bfabric/engine/engine_zeep.py +++ b/bfabric/engine/engine_zeep.py @@ -1,14 +1,16 @@ from __future__ import annotations import copy -from typing import Any +from typing import Any, TYPE_CHECKING import zeep from zeep.helpers import serialize_object -from bfabric.bfabric_config import BfabricAuth from bfabric.errors import BfabricRequestError, get_response_errors from bfabric.results.result_container import ResultContainer, _clean_result +if TYPE_CHECKING: + from bfabric.bfabric_config import BfabricAuth + class EngineZeep: """B-Fabric API Zeep Engine""" diff --git a/bfabric/tests/integration/integration_test_helper.py b/bfabric/tests/integration/integration_test_helper.py index 83ed0314..85e20da2 100644 --- a/bfabric/tests/integration/integration_test_helper.py +++ b/bfabric/tests/integration/integration_test_helper.py @@ -1,8 +1,10 @@ from __future__ import annotations -from typing import Any +from typing import Any, TYPE_CHECKING -from bfabric import Bfabric + +if TYPE_CHECKING: + from bfabric import Bfabric class DeleteEntities: From 697899aef5408b84f5ad7cd7a50141835a80ef00 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Thu, 16 May 2024 17:29:56 +0200 Subject: [PATCH 107/129] test for multi-methods --- .../integration/test_bfabric2_save_delete.py | 4 +- .../tests/integration/test_multi_methods.py | 76 +++++++++++++++++++ 2 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 bfabric/tests/integration/test_multi_methods.py diff --git a/bfabric/tests/integration/test_bfabric2_save_delete.py b/bfabric/tests/integration/test_bfabric2_save_delete.py index 4026ac5b..4bb09692 100644 --- a/bfabric/tests/integration/test_bfabric2_save_delete.py +++ b/bfabric/tests/integration/test_bfabric2_save_delete.py @@ -71,8 +71,8 @@ def _save_delete_workunit(self, b: Bfabric, verbose: bool = False) -> None: print("Phase 2: Creating the target units") new_ids = [] for name in workunit_names: - workunit1 = {"name": name, "applicationid": 2, "description": "is warm and fluffy", "containerid": 3000} - response = b.save("workunit", workunit1).to_list_dict() # We do the conversion to drop underscores in SUDS + query = {"name": name, "applicationid": 2, "description": "is warm and fluffy", "containerid": 3000} + response = b.save("workunit", query).to_list_dict() # We do the conversion to drop underscores in SUDS if verbose: print(response[0]) diff --git a/bfabric/tests/integration/test_multi_methods.py b/bfabric/tests/integration/test_multi_methods.py new file mode 100644 index 00000000..286c7910 --- /dev/null +++ b/bfabric/tests/integration/test_multi_methods.py @@ -0,0 +1,76 @@ +import json +import os +import unittest + +from bfabric import Bfabric, BfabricAPIEngineType +from bfabric.experimental.multi_query import MultiQuery + + +class BfabricTestMulti(unittest.TestCase): + def setUp(self, *args, **kwargs): + # Load ground truth + path = os.path.join(os.path.dirname(__file__), "groundtruth.json") + with open(path) as json_file: + self.ground_truth = json.load(json_file) + + # Create clients + self.clients = { + "zeep": Bfabric.from_config("TEST", engine=BfabricAPIEngineType.ZEEP), + "suds": Bfabric.from_config("TEST", engine=BfabricAPIEngineType.SUDS), + } + + def _test_multi_read_delete(self, engine: str): + """ + Create many workunits + * Test if reading multiple of those workunits works + * Test if exists on multiple workunits works + * Test if deleting multiple workunits works + """ + with self.subTest(engine=engine): + bf: Bfabric = self.clients[engine] + mq = MultiQuery(bf) + + # 1. Create a bunch of workunits + # Note: we crate more than 100, to make sure pagination works correctly + n_units = 105 + workunit_ids = [] + for i in range(n_units): + query = {"name": "fancy_workunit_"+str(i), "applicationid": 2, "description": "is very fancy", "containerid": 3000} + res = bf.save("workunit", query).to_list_dict() + self.assertEqual(len(res), 1) + self.assertIn("id", res[0]) + workunit_ids += [res[0]['id']] + + + #2. TODO: Make sure that the results are indeed read correctly, not just read + res = mq.read_multi('workunit', {}, 'id', workunit_ids, return_id_only=True) + + #3. Check if correct ones exist and fake one does not + res = mq.exists_multi('workunit', 'id', workunit_ids + [10101010101010]) + self.assertEqual(len(res), n_units + 1) + self.assertTrue(all(res[:n_units])) + self.assertFalse(res[n_units]) + + # 4. Delete all workunits at the same time + res = mq.delete_multi('workunit', workunit_ids) + self.assertEqual(len(res), n_units) + + + # TODO: Implement me + def _test_multi_read_complex(self, engine: str): + """ + The main idea is to test how BFabric API behaves in case it is given multiple of the same field, + where for each field there is more than one result. + * e.g. for 'id' there is only one result, but for 'status there could be many' + * a test could try to get all files with {'status': ['archived', 'archiving']} that have been recently created, + such that in total there is more than 100 results. + """ + pass + + def test_multi_delete(self): + self._test_multi_read_delete("suds") + self._test_multi_read_delete("zeep") + + +if __name__ == "__main__": + unittest.main(verbosity=2) From 9ae1ac9446526f2bcb3102c02f994543bded845b Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 17 May 2024 08:07:30 +0200 Subject: [PATCH 108/129] set total_pages_api to `None` and avoid warnings in multiquery use case --- bfabric/experimental/multi_query.py | 6 +- bfabric/results/result_container.py | 13 ++-- .../tests/integration/test_multi_methods.py | 65 ++++++++++--------- 3 files changed, 44 insertions(+), 40 deletions(-) diff --git a/bfabric/experimental/multi_query.py b/bfabric/experimental/multi_query.py index 5f772ddb..4ecdf4fa 100644 --- a/bfabric/experimental/multi_query.py +++ b/bfabric/experimental/multi_query.py @@ -55,7 +55,7 @@ def read_multi( # TODO: It is assumed that a user requesting multi_query always wants all of the pages. Can anybody think of # exceptions to this? response_this = self._client.read(endpoint, obj_extended, max_results=None, return_id_only=return_id_only) - response_tot.extend(response_this) + response_tot.extend(response_this, reset_total_pages_api=True) return response_tot @@ -68,7 +68,7 @@ def read_multi( # # Iterate over request chunks that fit into a single API page # for page_objs in page_iter(obj_lst): # response_page = self.save(endpoint, page_objs, **kwargs) - # response_tot.extend(response_page) + # response_tot.extend(response_page, reset_total_pages_api=True # # return response_tot @@ -85,7 +85,7 @@ def delete_multi(self, endpoint: str, id_list: list[int]) -> ResultContainer: # Iterate over request chunks that fit into a single API page for page_ids in page_iter(id_list): response_page = self._client.delete(endpoint, page_ids) - response_tot.extend(response_page) + response_tot.extend(response_page, reset_total_pages_api=True) return response_tot diff --git a/bfabric/results/result_container.py b/bfabric/results/result_container.py index 8ae2c1f9..4a7e69cd 100644 --- a/bfabric/results/result_container.py +++ b/bfabric/results/result_container.py @@ -61,15 +61,16 @@ def errors(self) -> list: """List of errors that occurred during the query. An empty list means the query was successful.""" return self._errors - def extend(self, other: ResultContainer) -> None: - """ - Can merge results of two queries. This can happen if the engine splits a complicated query in two - :param other: The other query results that should be appended to this - :return: + def extend(self, other: ResultContainer, reset_total_pages_api: bool = False) -> None: + """Merges the results of `other` into this container. + :param other: The container whose elements to append to the end of this container + :param reset_total_pages_api: If True, the total_pages_api attribute will be reset to None """ self.results += other.results self._errors += other.errors - if self._total_pages_api != other.total_pages_api: + if reset_total_pages_api: + self._total_pages_api = None + elif self._total_pages_api != other.total_pages_api: logging.warning( f"Results observed with different total pages counts: " f"{self._total_pages_api} != {other.total_pages_api}" diff --git a/bfabric/tests/integration/test_multi_methods.py b/bfabric/tests/integration/test_multi_methods.py index 286c7910..db7c03f0 100644 --- a/bfabric/tests/integration/test_multi_methods.py +++ b/bfabric/tests/integration/test_multi_methods.py @@ -1,6 +1,6 @@ import json -import os import unittest +from pathlib import Path from bfabric import Bfabric, BfabricAPIEngineType from bfabric.experimental.multi_query import MultiQuery @@ -9,8 +9,8 @@ class BfabricTestMulti(unittest.TestCase): def setUp(self, *args, **kwargs): # Load ground truth - path = os.path.join(os.path.dirname(__file__), "groundtruth.json") - with open(path) as json_file: + path = Path(__file__).parent / "groundtruth.json" + with path.open() as json_file: self.ground_truth = json.load(json_file) # Create clients @@ -20,41 +20,42 @@ def setUp(self, *args, **kwargs): } def _test_multi_read_delete(self, engine: str): - """ - Create many workunits + """Creates many workunits * Test if reading multiple of those workunits works * Test if exists on multiple workunits works * Test if deleting multiple workunits works """ - with self.subTest(engine=engine): - bf: Bfabric = self.clients[engine] - mq = MultiQuery(bf) - - # 1. Create a bunch of workunits - # Note: we crate more than 100, to make sure pagination works correctly - n_units = 105 - workunit_ids = [] - for i in range(n_units): - query = {"name": "fancy_workunit_"+str(i), "applicationid": 2, "description": "is very fancy", "containerid": 3000} - res = bf.save("workunit", query).to_list_dict() - self.assertEqual(len(res), 1) - self.assertIn("id", res[0]) - workunit_ids += [res[0]['id']] + bf: Bfabric = self.clients[engine] + mq = MultiQuery(bf) + # 1. Create a bunch of workunits + # Note: we crate more than 100, to make sure pagination works correctly + n_units = 105 + workunit_ids = [] + for i in range(n_units): + query = { + "name": f"fancy_workunit_{i}", + "applicationid": 2, + "description": "is very fancy", + "containerid": 3000, + } + res = bf.save("workunit", query).to_list_dict() + self.assertEqual(len(res), 1) + self.assertIn("id", res[0]) + workunit_ids += [res[0]["id"]] - #2. TODO: Make sure that the results are indeed read correctly, not just read - res = mq.read_multi('workunit', {}, 'id', workunit_ids, return_id_only=True) + # 2. TODO: Make sure that the results are indeed read correctly, not just read + res = mq.read_multi("workunit", {}, "id", workunit_ids, return_id_only=True) - #3. Check if correct ones exist and fake one does not - res = mq.exists_multi('workunit', 'id', workunit_ids + [10101010101010]) - self.assertEqual(len(res), n_units + 1) - self.assertTrue(all(res[:n_units])) - self.assertFalse(res[n_units]) - - # 4. Delete all workunits at the same time - res = mq.delete_multi('workunit', workunit_ids) - self.assertEqual(len(res), n_units) + # 3. Check if correct ones exist and fake one does not + res = mq.exists_multi("workunit", "id", workunit_ids + [10101010101010]) + self.assertEqual(len(res), n_units + 1) + self.assertTrue(all(res[:n_units])) + self.assertFalse(res[n_units]) + # 4. Delete all workunits at the same time + res = mq.delete_multi("workunit", workunit_ids) + self.assertEqual(len(res), n_units) # TODO: Implement me def _test_multi_read_complex(self, engine: str): @@ -67,8 +68,10 @@ def _test_multi_read_complex(self, engine: str): """ pass - def test_multi_delete(self): + def test_multi_delete_when_suds(self): self._test_multi_read_delete("suds") + + def test_multi_delete_when_zeep(self): self._test_multi_read_delete("zeep") From cb1e01fdda2893846c381e8ebf5c5e4c2d8fd62c Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 17 May 2024 08:08:53 +0200 Subject: [PATCH 109/129] move the old integration tests --- bfabric/tests/old_integration/__init__.py | 0 bfabric/tests/{ => old_integration}/__test_bfabric.py | 0 bfabric/tests/{ => old_integration}/groundtruth.json | 0 bfabric/tests/{ => old_integration}/test_bfabric_executable.py | 0 bfabric/tests/{ => old_integration}/test_bfabric_functional.py | 0 bfabric/tests/{ => old_integration}/test_bfabric_read.py | 0 bfabric/tests/{ => old_integration}/test_bfabric_sample.py | 0 bfabric/tests/{ => old_integration}/test_bfabric_workunit.py | 0 8 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 bfabric/tests/old_integration/__init__.py rename bfabric/tests/{ => old_integration}/__test_bfabric.py (100%) rename bfabric/tests/{ => old_integration}/groundtruth.json (100%) rename bfabric/tests/{ => old_integration}/test_bfabric_executable.py (100%) rename bfabric/tests/{ => old_integration}/test_bfabric_functional.py (100%) rename bfabric/tests/{ => old_integration}/test_bfabric_read.py (100%) rename bfabric/tests/{ => old_integration}/test_bfabric_sample.py (100%) rename bfabric/tests/{ => old_integration}/test_bfabric_workunit.py (100%) diff --git a/bfabric/tests/old_integration/__init__.py b/bfabric/tests/old_integration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/tests/__test_bfabric.py b/bfabric/tests/old_integration/__test_bfabric.py similarity index 100% rename from bfabric/tests/__test_bfabric.py rename to bfabric/tests/old_integration/__test_bfabric.py diff --git a/bfabric/tests/groundtruth.json b/bfabric/tests/old_integration/groundtruth.json similarity index 100% rename from bfabric/tests/groundtruth.json rename to bfabric/tests/old_integration/groundtruth.json diff --git a/bfabric/tests/test_bfabric_executable.py b/bfabric/tests/old_integration/test_bfabric_executable.py similarity index 100% rename from bfabric/tests/test_bfabric_executable.py rename to bfabric/tests/old_integration/test_bfabric_executable.py diff --git a/bfabric/tests/test_bfabric_functional.py b/bfabric/tests/old_integration/test_bfabric_functional.py similarity index 100% rename from bfabric/tests/test_bfabric_functional.py rename to bfabric/tests/old_integration/test_bfabric_functional.py diff --git a/bfabric/tests/test_bfabric_read.py b/bfabric/tests/old_integration/test_bfabric_read.py similarity index 100% rename from bfabric/tests/test_bfabric_read.py rename to bfabric/tests/old_integration/test_bfabric_read.py diff --git a/bfabric/tests/test_bfabric_sample.py b/bfabric/tests/old_integration/test_bfabric_sample.py similarity index 100% rename from bfabric/tests/test_bfabric_sample.py rename to bfabric/tests/old_integration/test_bfabric_sample.py diff --git a/bfabric/tests/test_bfabric_workunit.py b/bfabric/tests/old_integration/test_bfabric_workunit.py similarity index 100% rename from bfabric/tests/test_bfabric_workunit.py rename to bfabric/tests/old_integration/test_bfabric_workunit.py From 62c72f2490bc84861b052b6f623539800cacf135 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 17 May 2024 08:13:44 +0200 Subject: [PATCH 110/129] move demo_config.yaml --- demo_config.yaml => bfabric/wrapper_creator/demo_config.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename demo_config.yaml => bfabric/wrapper_creator/demo_config.yaml (100%) diff --git a/demo_config.yaml b/bfabric/wrapper_creator/demo_config.yaml similarity index 100% rename from demo_config.yaml rename to bfabric/wrapper_creator/demo_config.yaml From 2a0d859a627ef52dd12728d8fb60b990bc52aae4 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 17 May 2024 08:19:32 +0200 Subject: [PATCH 111/129] clean up shebang line and encoding declaration --- bfabric/scripts/bfabric_executable_submitter_functionalTest.py | 1 - bfabric/scripts/bfabric_executable_submitter_gridengine.py | 1 - bfabric/scripts/bfabric_executable_submitter_slurm.py | 1 - bfabric/scripts/bfabric_executable_wrappercreator.py | 1 - bfabric/scripts/bfabric_feeder_resource_autoQC.py | 2 +- .../scripts/bfabric_list_not_existing_storage_directories.py | 2 +- bfabric/scripts/bfabric_save_csv2dataset.py | 2 +- bfabric/scripts/bfabric_save_dataset2csv.py | 2 +- bfabric/scripts/bfabric_save_workflowstep.py | 2 +- bfabric/scripts/bfabric_save_workunit_attribute.py | 2 +- bfabric/scripts/bfabric_wrapper_creator_yaml.py | 3 +-- bfabric/scripts/fgcz_maxquant_wrapper.py | 2 +- bfabric/tests/old_integration/__test_bfabric.py | 1 - bfabric/tests/old_integration/test_bfabric_executable.py | 1 - bfabric/tests/old_integration/test_bfabric_functional.py | 1 - bfabric/tests/old_integration/test_bfabric_sample.py | 1 - bfabric/tests/old_integration/test_bfabric_workunit.py | 1 - 17 files changed, 8 insertions(+), 18 deletions(-) diff --git a/bfabric/scripts/bfabric_executable_submitter_functionalTest.py b/bfabric/scripts/bfabric_executable_submitter_functionalTest.py index da5ce62c..c20a038f 100755 --- a/bfabric/scripts/bfabric_executable_submitter_functionalTest.py +++ b/bfabric/scripts/bfabric_executable_submitter_functionalTest.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- """ Submitter for B-Fabric functional test diff --git a/bfabric/scripts/bfabric_executable_submitter_gridengine.py b/bfabric/scripts/bfabric_executable_submitter_gridengine.py index 6c8859b4..149f90b9 100755 --- a/bfabric/scripts/bfabric_executable_submitter_gridengine.py +++ b/bfabric/scripts/bfabric_executable_submitter_gridengine.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- """ Submitter for B-Fabric diff --git a/bfabric/scripts/bfabric_executable_submitter_slurm.py b/bfabric/scripts/bfabric_executable_submitter_slurm.py index b2d25762..1d787ef2 100755 --- a/bfabric/scripts/bfabric_executable_submitter_slurm.py +++ b/bfabric/scripts/bfabric_executable_submitter_slurm.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- """ Submitter for B-Fabric diff --git a/bfabric/scripts/bfabric_executable_wrappercreator.py b/bfabric/scripts/bfabric_executable_wrappercreator.py index 42640a53..1004dccb 100755 --- a/bfabric/scripts/bfabric_executable_wrappercreator.py +++ b/bfabric/scripts/bfabric_executable_wrappercreator.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- """ A wrapper_creator for B-Fabric diff --git a/bfabric/scripts/bfabric_feeder_resource_autoQC.py b/bfabric/scripts/bfabric_feeder_resource_autoQC.py index 51c86188..111d0484 100755 --- a/bfabric/scripts/bfabric_feeder_resource_autoQC.py +++ b/bfabric/scripts/bfabric_feeder_resource_autoQC.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 """ feeds autoQC runs into bfabric diff --git a/bfabric/scripts/bfabric_list_not_existing_storage_directories.py b/bfabric/scripts/bfabric_list_not_existing_storage_directories.py index 4262148a..95f9534a 100755 --- a/bfabric/scripts/bfabric_list_not_existing_storage_directories.py +++ b/bfabric/scripts/bfabric_list_not_existing_storage_directories.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 """ Copyright (C) 2020 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. diff --git a/bfabric/scripts/bfabric_save_csv2dataset.py b/bfabric/scripts/bfabric_save_csv2dataset.py index 1f880387..212cad6f 100755 --- a/bfabric/scripts/bfabric_save_csv2dataset.py +++ b/bfabric/scripts/bfabric_save_csv2dataset.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 """ Author: Maria d'Errico diff --git a/bfabric/scripts/bfabric_save_dataset2csv.py b/bfabric/scripts/bfabric_save_dataset2csv.py index df24007f..81b39fbf 100755 --- a/bfabric/scripts/bfabric_save_dataset2csv.py +++ b/bfabric/scripts/bfabric_save_dataset2csv.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 """ Author: Maria d'Errico diff --git a/bfabric/scripts/bfabric_save_workflowstep.py b/bfabric/scripts/bfabric_save_workflowstep.py index 5884ea9e..81e20fe0 100755 --- a/bfabric/scripts/bfabric_save_workflowstep.py +++ b/bfabric/scripts/bfabric_save_workflowstep.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 """ Author: diff --git a/bfabric/scripts/bfabric_save_workunit_attribute.py b/bfabric/scripts/bfabric_save_workunit_attribute.py index cef3fbc5..e0a4b7fa 100755 --- a/bfabric/scripts/bfabric_save_workunit_attribute.py +++ b/bfabric/scripts/bfabric_save_workunit_attribute.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python3 """ Copyright (C) 2021 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. diff --git a/bfabric/scripts/bfabric_wrapper_creator_yaml.py b/bfabric/scripts/bfabric_wrapper_creator_yaml.py index 462d68c3..08adf5a4 100755 --- a/bfabric/scripts/bfabric_wrapper_creator_yaml.py +++ b/bfabric/scripts/bfabric_wrapper_creator_yaml.py @@ -1,5 +1,4 @@ -#!/usr/bin/python -# -*- coding: latin1 -*- +#!/usr/bin/env python3 """ A wrapper_creator for B-Fabric diff --git a/bfabric/scripts/fgcz_maxquant_wrapper.py b/bfabric/scripts/fgcz_maxquant_wrapper.py index 6c147ed7..eb250504 100755 --- a/bfabric/scripts/fgcz_maxquant_wrapper.py +++ b/bfabric/scripts/fgcz_maxquant_wrapper.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Copyright (C) 2017, 2018 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. # # Authors: diff --git a/bfabric/tests/old_integration/__test_bfabric.py b/bfabric/tests/old_integration/__test_bfabric.py index 4960be54..67c7c203 100755 --- a/bfabric/tests/old_integration/__test_bfabric.py +++ b/bfabric/tests/old_integration/__test_bfabric.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- """ unittest by diff --git a/bfabric/tests/old_integration/test_bfabric_executable.py b/bfabric/tests/old_integration/test_bfabric_executable.py index caa3349f..09e970bb 100755 --- a/bfabric/tests/old_integration/test_bfabric_executable.py +++ b/bfabric/tests/old_integration/test_bfabric_executable.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- """ unittest by diff --git a/bfabric/tests/old_integration/test_bfabric_functional.py b/bfabric/tests/old_integration/test_bfabric_functional.py index 709cd4af..e00081ef 100755 --- a/bfabric/tests/old_integration/test_bfabric_functional.py +++ b/bfabric/tests/old_integration/test_bfabric_functional.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- # Modified to use Slurm on November 9th 2020 diff --git a/bfabric/tests/old_integration/test_bfabric_sample.py b/bfabric/tests/old_integration/test_bfabric_sample.py index 0f34b922..9cd06163 100755 --- a/bfabric/tests/old_integration/test_bfabric_sample.py +++ b/bfabric/tests/old_integration/test_bfabric_sample.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- """ unittest by diff --git a/bfabric/tests/old_integration/test_bfabric_workunit.py b/bfabric/tests/old_integration/test_bfabric_workunit.py index 9ebd0ad2..1fded8b4 100755 --- a/bfabric/tests/old_integration/test_bfabric_workunit.py +++ b/bfabric/tests/old_integration/test_bfabric_workunit.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- """ unittest by From 494c6e754edeb40ae100c61eb43da5866506fd3e Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 17 May 2024 08:45:23 +0200 Subject: [PATCH 112/129] convenience re-export --- bfabric/experimental/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bfabric/experimental/__init__.py b/bfabric/experimental/__init__.py index e69de29b..eba22563 100644 --- a/bfabric/experimental/__init__.py +++ b/bfabric/experimental/__init__.py @@ -0,0 +1 @@ +from .multi_query import MultiQuery From af87c598a7539f8cdca79db9cd90e74771f43447 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 17 May 2024 08:46:10 +0200 Subject: [PATCH 113/129] reformat code --- .../bfabric_list_executables.py | 8 +- .../bfabric_read_dataset.py | 13 +- .../bfabric_read_sample_of_order.py | 2 +- .../bfabric_sample_graph_traversal.py | 79 ++--- .../bfabric_save_customattributes.py | 18 +- .../bfabric_save_qcloud2_annotation.py | 51 ++-- .../bfabric_save_resource.py | 64 ++-- bfabric/experimental/multi_query.py | 1 + bfabric/results/result_container.py | 1 + bfabric/scripts/bfabric_delete.py | 2 +- ...ric_executable_submitter_functionalTest.py | 26 +- ...bfabric_executable_submitter_gridengine.py | 26 +- .../bfabric_executable_submitter_slurm.py | 26 +- .../bfabric_executable_wrappercreator.py | 7 +- .../scripts/bfabric_feeder_resource_autoQC.py | 4 +- .../scripts/bfabric_wrapper_creator_yaml.py | 6 +- .../tests/old_integration/__test_bfabric.py | 60 ++-- .../test_bfabric_executable.py | 76 ++--- .../test_bfabric_functional.py | 159 ++++++---- .../old_integration/test_bfabric_sample.py | 35 ++- .../old_integration/test_bfabric_workunit.py | 144 ++++----- .../wrapper_creator/bfabric_external_job.py | 18 +- bfabric/wrapper_creator/bfabric_feeder.py | 28 +- bfabric/wrapper_creator/bfabric_submitter.py | 111 +++---- .../bfabric_wrapper_creator.py | 280 +++++++++++------- bfabric/wrapper_creator/gridengine.py | 32 +- bfabric/wrapper_creator/slurm.py | 29 +- 27 files changed, 725 insertions(+), 581 deletions(-) diff --git a/bfabric/deprecated_scripts/bfabric_list_executables.py b/bfabric/deprecated_scripts/bfabric_list_executables.py index 25ce1a1c..37cdcf7d 100755 --- a/bfabric/deprecated_scripts/bfabric_list_executables.py +++ b/bfabric/deprecated_scripts/bfabric_list_executables.py @@ -24,6 +24,8 @@ if __name__ == "__main__": bfapp = Bfabric() - res = bfapp.read_object(endpoint='executable', obj={}) - map(lambda x: sys.stdout.write("{}\t{}\t{}\t{}\t{}\n" - .format(x._id, x.createdby, x.modified, x.context, x.name)), res) + res = bfapp.read_object(endpoint="executable", obj={}) + map( + lambda x: sys.stdout.write("{}\t{}\t{}\t{}\t{}\n".format(x._id, x.createdby, x.modified, x.context, x.name)), + res, + ) diff --git a/bfabric/deprecated_scripts/bfabric_read_dataset.py b/bfabric/deprecated_scripts/bfabric_read_dataset.py index 1c9631ee..20767918 100755 --- a/bfabric/deprecated_scripts/bfabric_read_dataset.py +++ b/bfabric/deprecated_scripts/bfabric_read_dataset.py @@ -18,15 +18,18 @@ def signal_handler(signal, frame): - print('You pressed Ctrl+C!') + print("You pressed Ctrl+C!") sys.exit(0) + signal.signal(signal.SIGINT, signal_handler) + def print_color_msg(msg, color="93"): msg = "\033[{color}m--- {} ---\033[0m\n".format(msg, color=color) sys.stderr.write(msg) + def usage(): print("usage:\n") msg = "\t{} ".format(sys.argv[0]) @@ -34,14 +37,15 @@ def usage(): def dataset2csv(ds, sep="\t"): - print (type(ds.attribute)) + print(type(ds.attribute)) # print header - print (sep.join(map(lambda x: x.name, ds.attribute))) + print(sep.join(map(lambda x: x.name, ds.attribute))) # print values for i in ds.item: print(sep.join(map(lambda x: x.value, i.field))) + if __name__ == "__main__": bfapp = bfabric.Bfabric(verbose=False) @@ -50,12 +54,11 @@ def dataset2csv(ds, sep="\t"): query_obj = {} endpoint = "dataset" - if len(sys.argv) == 2: datasetid = sys.argv[1] start_time = time.time() - query_obj = {'id': '32003'} + query_obj = {"id": "32003"} print_color_msg("query = {}".format(query_obj)) res = bfapp.read_object(endpoint=endpoint, obj=query_obj) diff --git a/bfabric/deprecated_scripts/bfabric_read_sample_of_order.py b/bfabric/deprecated_scripts/bfabric_read_sample_of_order.py index 3744fc1b..dd3fe5ff 100644 --- a/bfabric/deprecated_scripts/bfabric_read_sample_of_order.py +++ b/bfabric/deprecated_scripts/bfabric_read_sample_of_order.py @@ -9,6 +9,7 @@ class bfabricEncoder(json.JSONEncoder): """ Implements json encoder for the Bfabric.print_json method """ + def default(self, o): try: return dict(o) @@ -23,4 +24,3 @@ def default(self, o): B = bfabric.Bfabric() - diff --git a/bfabric/deprecated_scripts/bfabric_sample_graph_traversal.py b/bfabric/deprecated_scripts/bfabric_sample_graph_traversal.py index 4d8b64ce..12de5240 100755 --- a/bfabric/deprecated_scripts/bfabric_sample_graph_traversal.py +++ b/bfabric/deprecated_scripts/bfabric_sample_graph_traversal.py @@ -36,7 +36,7 @@ class SampleGraph: # annotation.txt # data structure for keeping annotation.txt infos (de-multiplexed data) containing the tagging - #annotation = {} + # annotation = {} links = {} @@ -44,35 +44,40 @@ def __init__(self, annotation_template): self.annotation_template = annotation_template self.annotation = {} - def read_dataset(self, dataset_id): - ds = self.B.read_object(endpoint="dataset", obj={'id': dataset_id})[0] + ds = self.B.read_object(endpoint="dataset", obj={"id": dataset_id})[0] return ds def get_sampleID(self, relativepath): - res = self.B.read_object(endpoint='resource', obj={'relativepath': relativepath})[0] + res = self.B.read_object(endpoint="resource", obj={"relativepath": relativepath})[0] print("\t{} -> {}".format(res.sample._id, res._id)) return res.sample._id - def traverse(self, childSampleId): """ fill up the internal data structure for producing the manifest and annotation.txt files for each exp. """ - res = self.B.read_object(endpoint='sample', obj={'id': childSampleId}) + res = self.B.read_object(endpoint="sample", obj={"id": childSampleId}) childSample = res[0] - if "multiplexid" in childSample: + if "multiplexid" in childSample: # in this special case we reached last level keeping the tag - print ('''\t{} [shape=box label="{}\\n{}"];'''.format(childSample._id, childSample._id, childSample.multiplexid)) + print( + """\t{} [shape=box label="{}\\n{}"];""".format( + childSample._id, childSample._id, childSample.multiplexid + ) + ) try: self.annotation[childSample.multiplexid] = childSample.parent[0]._id except: - print("multiplexid {} for sample {} not in the annotation file template".format(childSample.multiplexid, childSample._id)) - + print( + "multiplexid {} for sample {} not in the annotation file template".format( + childSample.multiplexid, childSample._id + ) + ) - if 'parent' in childSample: + if "parent" in childSample: self.links[childSampleId] = [x._id for x in childSample.parent] for parent in childSample.parent: print("\t{} -> {}".format(parent._id, childSampleId)) @@ -80,9 +85,9 @@ def traverse(self, childSampleId): self.VISITED.append(parent._id) self.L.append(parent._id) - #print("\t# DEBUG = {}".format(len(self.L))) + # print("\t# DEBUG = {}".format(len(self.L))) - while (len(self.L) > 0): + while len(self.L) > 0: u = self.L[0] self.L.remove(u) self.traverse(u) @@ -93,69 +98,71 @@ def run(self, dataset_id): for i in ds.item: for x in i.field: if hasattr(x, "value") and x.attributeposition == attributeposition: - print ("# relativepath = {}".format(x.value)) + print("# relativepath = {}".format(x.value)) sampleID = self.get_sampleID(x.value) - print ("# inputSampleId = {}".format(sampleID)) + print("# inputSampleId = {}".format(sampleID)) self.annotation = self.annotation_template self.traverse(sampleID) experiment = self.links[sampleID] - if len(experiment)==1: + if len(experiment) == 1: self.write_annotation(experiment[0]) self.write_manifest(x.value, experiment[0]) else: - print("# Wrong inputSampleId, please check the sample ID {}, it should be after fractionation".format(sampleID)) + print( + "# Wrong inputSampleId, please check the sample ID {}, it should be after fractionation".format( + sampleID + ) + ) def write_annotation(self, experiment): dirname = str(experiment) if not os.path.isdir(dirname): print("# creating directory {}".format(dirname)) os.makedirs(dirname) - with open("./"+dirname+"/annotation.txt", "w") as f: - w = csv.writer(f, delimiter = '\t') + with open("./" + dirname + "/annotation.txt", "w") as f: + w = csv.writer(f, delimiter="\t") w.writerows(self.annotation.items()) else: pass def write_manifest(self, resource, experiment): filename = "manifest.fp-manifest" - pathtoresource = os.getcwd()+"/"+os.path.basename(resource) + pathtoresource = os.getcwd() + "/" + os.path.basename(resource) if not os.path.exists(filename): - with open (filename, "w") as f: - line = '\t'.join([pathtoresource, str(experiment), "", "", "DDA"]) + "\n" + with open(filename, "w") as f: + line = "\t".join([pathtoresource, str(experiment), "", "", "DDA"]) + "\n" f.write(line) else: - with open (filename, "a") as f: - line = '\t'.join([pathtoresource, str(experiment), "", "", "DDA"]) + "\n" + with open(filename, "a") as f: + line = "\t".join([pathtoresource, str(experiment), "", "", "DDA"]) + "\n" f.write(line) - if __name__ == "__main__": - dataset_id = 44384 #int(sys.argv[1]) - - infile = open(sys.argv[1], 'r') - annotation_template = {} + dataset_id = 44384 # int(sys.argv[1]) + + infile = open(sys.argv[1], "r") + annotation_template = {} for line in infile: line = line.strip() - content = line.split(' ', 1) - annotation_template.update({content[0]:content[1]}) + content = line.split(" ", 1) + annotation_template.update({content[0]: content[1]}) infile.close() # constructor - print ('''digraph G{\n\trankdir="LR";''') + print("""digraph G{\n\trankdir="LR";""") G = SampleGraph(annotation_template) G.run(dataset_id) - #for s in [461042, 461041, 461017]: + # for s in [461042, 461041, 461017]: # G.annotation = G.annotation_template.copy() # G.traverse(s) # G.write_annotation(s) # print("# {}".format(G.annotation)) # print("# {}".format(G.annotation_template)) - #print("# {}".format(G.links)) - - print ('''}''') + # print("# {}".format(G.links)) + print("""}""") """ diff --git a/bfabric/deprecated_scripts/bfabric_save_customattributes.py b/bfabric/deprecated_scripts/bfabric_save_customattributes.py index 064d978f..b60c2409 100755 --- a/bfabric/deprecated_scripts/bfabric_save_customattributes.py +++ b/bfabric/deprecated_scripts/bfabric_save_customattributes.py @@ -20,8 +20,9 @@ """ bf = bfabric.Bfabric(verbose=False) + def annotate(sampleid=None, name=None, value=None): - res = bf.read_object(endpoint='sample', obj={'id': sampleid}) + res = bf.read_object(endpoint="sample", obj={"id": sampleid}) try: customattribute = res[0].customattribute @@ -33,29 +34,30 @@ def annotate(sampleid=None, name=None, value=None): # there are no customattributes defined yet customattribute = [] - customattribute.append({'name': "{}".format(name), 'value': "{}".format(value)}) - res = bf.save_object(endpoint='sample', - obj={'id': sampleid, 'customattribute': customattribute}) + customattribute.append({"name": "{}".format(name), "value": "{}".format(value)}) + res = bf.save_object(endpoint="sample", obj={"id": sampleid, "customattribute": customattribute}) print(res[0]) -def process(filename = "/Users/cp/Desktop/annotation.csv", tryrun = True): + +def process(filename="/Users/cp/Desktop/annotation.csv", tryrun=True): with open(filename) as csv_file: - csv_reader = csv.reader(csv_file, delimiter=',') + csv_reader = csv.reader(csv_file, delimiter=",") count = 0 for row in csv_reader: if count == 0: colnames = row else: - #print("{}\t{}".format(count, row)) + # print("{}\t{}".format(count, row)) x = re.search(".*_[sS]([0-9]+)_.+", row[0]) if x is not None: print("sampleID={sample}".format(sample=x.group(1))) for idx in range(1, len(row)): - print ("\t{}={}".format(colnames[idx], row[idx])) + print("\t{}={}".format(colnames[idx], row[idx])) if tryrun is False: annotate(sampleid=x.group(1), name=colnames[idx], value=row[idx]) count = count + 1 + if __name__ == "__main__": process(tryrun=False) diff --git a/bfabric/deprecated_scripts/bfabric_save_qcloud2_annotation.py b/bfabric/deprecated_scripts/bfabric_save_qcloud2_annotation.py index a38d68f9..b1882bae 100755 --- a/bfabric/deprecated_scripts/bfabric_save_qcloud2_annotation.py +++ b/bfabric/deprecated_scripts/bfabric_save_qcloud2_annotation.py @@ -7,42 +7,45 @@ if __name__ == "__main__": B = bfabric.Bfabric(verbose=False) obj = {} - obj['name'] = 'qcloud2 annotaion test dataset by CP' - obj['containerid'] = 3000 - obj['attribute'] = [ - {'name': 'user_date', 'position':1}, - {'name': 'user_email', 'position':2}, - {'name': 'additional_information', 'position':3}, - {'name': 'problems', 'position':4}, - {'name': 'actions', 'position':5} + obj["name"] = "qcloud2 annotaion test dataset by CP" + obj["containerid"] = 3000 + obj["attribute"] = [ + {"name": "user_date", "position": 1}, + {"name": "user_email", "position": 2}, + {"name": "additional_information", "position": 3}, + {"name": "problems", "position": 4}, + {"name": "actions", "position": 5}, ] - obj['item'] = [] + obj["item"] = [] - with open('LUMOS_2.json') as json_file: + with open("LUMOS_2.json") as json_file: d = json.load(json_file) for i in range(len(d)): try: - problems = " | ".join([ "{} ({})".format(j['name'], j['qccv']) for j in d[i]['problems'] ]) + problems = " | ".join(["{} ({})".format(j["name"], j["qccv"]) for j in d[i]["problems"]]) except: - problems = '-' + problems = "-" try: - actions = " | ".join([ "{} ({})".format(j['name'], j['qccv']) for j in d[i]['actions'] ]) + actions = " | ".join(["{} ({})".format(j["name"], j["qccv"]) for j in d[i]["actions"]]) except: - actions = '-' + actions = "-" - it = {'field':[ - {'value': d[i]['user_date'], 'attributeposition':1}, - {'value': d[i]['user_email'], 'attributeposition':2}, - {'value': d[i]['additional_information'], 'attributeposition':3}, - {'value': problems, 'attributeposition':4}, - {'value': actions, 'attributeposition':5} - ], 'position': i + 1} - obj['item'].append(it) + it = { + "field": [ + {"value": d[i]["user_date"], "attributeposition": 1}, + {"value": d[i]["user_email"], "attributeposition": 2}, + {"value": d[i]["additional_information"], "attributeposition": 3}, + {"value": problems, "attributeposition": 4}, + {"value": actions, "attributeposition": 5}, + ], + "position": i + 1, + } + obj["item"].append(it) print(obj) - #res = B.save_object(endpoint='dataset', obj=obj) - #print (res[0]) + # res = B.save_object(endpoint='dataset', obj=obj) + # print (res[0]) """ curl --location --request GET 'https://api.qcloud2.crg.eu/annotations?start_date=2019-04-01&end_date=2021-10-03&labsystem_name=LUMOS_2' --header "Authorization: Bearer ${ACCESSTOKEN}" > LUMOS_2.json diff --git a/bfabric/deprecated_scripts/bfabric_save_resource.py b/bfabric/deprecated_scripts/bfabric_save_resource.py index a7435a31..a1559323 100755 --- a/bfabric/deprecated_scripts/bfabric_save_resource.py +++ b/bfabric/deprecated_scripts/bfabric_save_resource.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -''' +""" author: Christian Panse 20200424-1300 @@ -22,7 +22,7 @@ && unzip -l ${resourcefile} \ | ./bfabric_save_resource.py -p 3000 -a 273 -r ${resourcefile} --stdin -''' +""" import sys @@ -37,12 +37,13 @@ BFABRICSTORAGEID = 2 + def save_resource(projectid=None, resourcefile=None, applicationid=None, read_stdin=False): bfapp = Bfabric() description = None - print ("DEBUG {}".format(read_stdin)) + print("DEBUG {}".format(read_stdin)) if read_stdin is True: try: print("reading stdin") @@ -52,60 +53,63 @@ def save_resource(projectid=None, resourcefile=None, applicationid=None, read_st raise try: - md5 = hashlib.md5(open(resourcefile, 'rb').read()).hexdigest() + md5 = hashlib.md5(open(resourcefile, "rb").read()).hexdigest() except: print("computing file checksum failed.") raise - resource = bfapp.read_object(endpoint='resource', obj={'filechecksum': md5}) + resource = bfapp.read_object(endpoint="resource", obj={"filechecksum": md5}) - try: + try: print("resource(s) already exist.".format(resource[0]._id)) - resource = bfapp.save_object(endpoint='resource', obj={'id': resource[0]._id, 'description': description}) + resource = bfapp.save_object(endpoint="resource", obj={"id": resource[0]._id, "description": description}) print(resource[0]) return except: pass - try: - workunit = bfapp.save_object(endpoint='workunit', - obj={'name': "{}".format(os.path.basename(resourcefile)), - 'projectid': projectid, - 'applicationid': applicationid}) + workunit = bfapp.save_object( + endpoint="workunit", + obj={ + "name": "{}".format(os.path.basename(resourcefile)), + "projectid": projectid, + "applicationid": applicationid, + }, + ) print(workunit) except: raise - - obj = {'workunitid': workunit[0]._id, - 'filechecksum': md5, - 'relativepath': "{}".format(resourcefile), - 'name': os.path.basename(resourcefile), - 'size': os.path.getsize(resourcefile), - 'status': 'available', - 'description': description, - 'storageid': BFABRICSTORAGEID - } - - - resource = bfapp.save_object(endpoint='resource', obj=obj)[0] + obj = { + "workunitid": workunit[0]._id, + "filechecksum": md5, + "relativepath": "{}".format(resourcefile), + "name": os.path.basename(resourcefile), + "size": os.path.getsize(resourcefile), + "status": "available", + "description": description, + "storageid": BFABRICSTORAGEID, + } + + resource = bfapp.save_object(endpoint="resource", obj=obj)[0] print(resource) - workunit = bfapp.save_object(endpoint='workunit', - obj={'id': workunit[0]._id, 'status': 'available'}) + workunit = bfapp.save_object(endpoint="workunit", obj={"id": workunit[0]._id, "status": "available"}) print(workunit) if __name__ == "__main__": - #resource_file = "/srv/www/htdocs/p3061/Proteomics/Analysis/fragpipe/cpanse_20200424/DS32024.zip" - #save_resource(projectid=3061, resource_file=resource_file, applicationid=274) + # resource_file = "/srv/www/htdocs/p3061/Proteomics/Analysis/fragpipe/cpanse_20200424/DS32024.zip" + # save_resource(projectid=3061, resource_file=resource_file, applicationid=274) (projectid, applicationid, resourefile) = (None, None, None) read_stdin = False try: - opts, args = getopt.getopt(sys.argv[1:],"hp:a:r:", ["help", "projectid=", "applicationid=", "resourcefile=", "stdin"]) + opts, args = getopt.getopt( + sys.argv[1:], "hp:a:r:", ["help", "projectid=", "applicationid=", "resourcefile=", "stdin"] + ) except getopt.GetoptError: usage() sys.exit(2) diff --git a/bfabric/experimental/multi_query.py b/bfabric/experimental/multi_query.py index 4ecdf4fa..13ef6638 100644 --- a/bfabric/experimental/multi_query.py +++ b/bfabric/experimental/multi_query.py @@ -16,6 +16,7 @@ class MultiQuery: This functionality might eventually be merged into the main Bfabric class but will probably be subject to some breaking changes and is not as thoroughly tested as the main classes functionality. """ + def __init__(self, client: Bfabric) -> None: self._client = client diff --git a/bfabric/results/result_container.py b/bfabric/results/result_container.py index 4a7e69cd..0a5c086c 100644 --- a/bfabric/results/result_container.py +++ b/bfabric/results/result_container.py @@ -96,6 +96,7 @@ def to_polars(self, drop_empty: bool = False) -> polars.DataFrame: :param drop_empty: If True, empty attributes will be removed from the results """ import polars + return polars.DataFrame(self.to_list_dict(drop_empty=drop_empty)) diff --git a/bfabric/scripts/bfabric_delete.py b/bfabric/scripts/bfabric_delete.py index a1ee7681..a29d2325 100755 --- a/bfabric/scripts/bfabric_delete.py +++ b/bfabric/scripts/bfabric_delete.py @@ -32,4 +32,4 @@ def main() -> None: if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/bfabric/scripts/bfabric_executable_submitter_functionalTest.py b/bfabric/scripts/bfabric_executable_submitter_functionalTest.py index c20a038f..fb814b63 100755 --- a/bfabric/scripts/bfabric_executable_submitter_functionalTest.py +++ b/bfabric/scripts/bfabric_executable_submitter_functionalTest.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 """ Submitter for B-Fabric functional test @@ -30,21 +30,24 @@ """ -#import os -#import sys +# import os +# import sys from optparse import OptionParser + def main(): - parser = OptionParser(usage="usage: %prog -j ", - version="%prog 1.0") + parser = OptionParser(usage="usage: %prog -j ", version="%prog 1.0") - parser.add_option("-j", "--externaljobid", - type='int', - action="store", - dest="externaljobid", - default=None, - help="external job id is required.") + parser.add_option( + "-j", + "--externaljobid", + type="int", + action="store", + dest="externaljobid", + default=None, + help="external job id is required.", + ) (options, args) = parser.parse_args() @@ -53,5 +56,6 @@ def main(): print("Dummy submitter xecutable defined for the bfabricPy functional test") + if __name__ == "__main__": main() diff --git a/bfabric/scripts/bfabric_executable_submitter_gridengine.py b/bfabric/scripts/bfabric_executable_submitter_gridengine.py index 149f90b9..28cf8ff4 100755 --- a/bfabric/scripts/bfabric_executable_submitter_gridengine.py +++ b/bfabric/scripts/bfabric_executable_submitter_gridengine.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 """ Submitter for B-Fabric @@ -30,22 +30,25 @@ """ -#import os -#import sys +# import os +# import sys from optparse import OptionParser from bfabric import BfabricSubmitter + def main(): - parser = OptionParser(usage="usage: %prog -j ", - version="%prog 1.0") + parser = OptionParser(usage="usage: %prog -j ", version="%prog 1.0") - parser.add_option("-j", "--externaljobid", - type='int', - action="store", - dest="externaljobid", - default=None, - help="external job id is required.") + parser.add_option( + "-j", + "--externaljobid", + type="int", + action="store", + dest="externaljobid", + default=None, + help="external job id is required.", + ) (options, args) = parser.parse_args() @@ -58,5 +61,6 @@ def main(): # TODO(cp): fix that # print(bfapp.query_counter) + if __name__ == "__main__": main() diff --git a/bfabric/scripts/bfabric_executable_submitter_slurm.py b/bfabric/scripts/bfabric_executable_submitter_slurm.py index 1d787ef2..c30f33e9 100755 --- a/bfabric/scripts/bfabric_executable_submitter_slurm.py +++ b/bfabric/scripts/bfabric_executable_submitter_slurm.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 """ Submitter for B-Fabric @@ -31,22 +31,25 @@ """ -#import os -#import sys +# import os +# import sys from optparse import OptionParser from bfabric import BfabricSubmitter + def main(): - parser = OptionParser(usage="usage: %prog -j ", - version="%prog 1.0") + parser = OptionParser(usage="usage: %prog -j ", version="%prog 1.0") - parser.add_option("-j", "--externaljobid", - type='int', - action="store", - dest="externaljobid", - default=None, - help="external job id is required.") + parser.add_option( + "-j", + "--externaljobid", + type="int", + action="store", + dest="externaljobid", + default=None, + help="external job id is required.", + ) (options, args) = parser.parse_args() @@ -59,5 +62,6 @@ def main(): # TODO(cp): fix that # print(bfapp.query_counter) + if __name__ == "__main__": main() diff --git a/bfabric/scripts/bfabric_executable_wrappercreator.py b/bfabric/scripts/bfabric_executable_wrappercreator.py index 1004dccb..eab039b6 100755 --- a/bfabric/scripts/bfabric_executable_wrappercreator.py +++ b/bfabric/scripts/bfabric_executable_wrappercreator.py @@ -20,7 +20,7 @@ # Licensed under GPL version 3 # # $HeadURL: http://fgcz-svn/repos/scripts/trunk/linux/bfabric/apps/python/wrapper_creator_yaml.py $ -# $Id: wrapper_creator_yaml.py 2397 2016-09-06 07:04:35Z cpanse $ +# $Id: wrapper_creator_yaml.py 2397 2016-09-06 07:04:35Z cpanse $ import os import sys @@ -28,13 +28,12 @@ if __name__ == "__main__": - externaljobid = -1 - if len(sys.argv) == 3 and sys.argv[1] == '-j' and int(sys.argv[2]) > 0: + if len(sys.argv) == 3 and sys.argv[1] == "-j" and int(sys.argv[2]) > 0: externaljobid = int(sys.argv[2]) else: - print("usage: " + sys.argv[0] + " -j ") + print("usage: " + sys.argv[0] + " -j ") sys.exit(1) bfapp = BfabricWrapperCreator(externaljobid=externaljobid) diff --git a/bfabric/scripts/bfabric_feeder_resource_autoQC.py b/bfabric/scripts/bfabric_feeder_resource_autoQC.py index 111d0484..d877605e 100755 --- a/bfabric/scripts/bfabric_feeder_resource_autoQC.py +++ b/bfabric/scripts/bfabric_feeder_resource_autoQC.py @@ -237,9 +237,7 @@ def feed(self, line): ) # sampleid=0 - print( - f"p{projectid}\tA{applicationid}\t{filename}\tS{sampleid}\tWU{workunitid}\tR{resourceid}" - ) + print(f"p{projectid}\tA{applicationid}\t{filename}\tS{sampleid}\tWU{workunitid}\tR{resourceid}") except Exception as err: print(f"# Failed to register to bfabric: {err}") diff --git a/bfabric/scripts/bfabric_wrapper_creator_yaml.py b/bfabric/scripts/bfabric_wrapper_creator_yaml.py index 08adf5a4..ab5b28c5 100755 --- a/bfabric/scripts/bfabric_wrapper_creator_yaml.py +++ b/bfabric/scripts/bfabric_wrapper_creator_yaml.py @@ -19,7 +19,7 @@ # Licensed under GPL version 3 # # $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/wrapper_creator_yaml.py $ -# $Id: wrapper_creator_yaml.py 2478 2016-09-26 09:46:53Z cpanse $ +# $Id: wrapper_creator_yaml.py 2478 2016-09-26 09:46:53Z cpanse $ import os import sys @@ -29,10 +29,10 @@ externaljobid = -1 - if len(sys.argv) == 3 and sys.argv[1] == '-j' and int(sys.argv[2]) > 0: + if len(sys.argv) == 3 and sys.argv[1] == "-j" and int(sys.argv[2]) > 0: externaljobid = int(sys.argv[2]) else: - print ("usage: {} -j ".format(sys.argv[0])) + print("usage: {} -j ".format(sys.argv[0])) sys.exit(1) bfapp = BfabricWrapperCreator(externaljobid=externaljobid) diff --git a/bfabric/tests/old_integration/__test_bfabric.py b/bfabric/tests/old_integration/__test_bfabric.py index 67c7c203..6243c681 100755 --- a/bfabric/tests/old_integration/__test_bfabric.py +++ b/bfabric/tests/old_integration/__test_bfabric.py @@ -10,21 +10,26 @@ """ ssh localhost "cat > /tmp/bb.py && /usr/bin/python /tmp/bb.py" < PycharmProjects/untitled/bfabric_wsdl.py """ + + class BfabricTestCase(unittest.TestCase): bfapp = BfabricLegacy(verbose=True) workunits = [] samples = [] - def workunit_save(self): print("WORKUNIT SAVE") - for name in ['test1', 'test2', 'test3']: - res = self.bfapp.save_object(endpoint='workunit', obj={'name': "unit test - {}".format(name), - 'containerid': 3000, - 'description': '68b329da9893e34099c7d8ad5cb9c940', - 'applicationid': 217 - }) + for name in ["test1", "test2", "test3"]: + res = self.bfapp.save_object( + endpoint="workunit", + obj={ + "name": "unit test - {}".format(name), + "containerid": 3000, + "description": "68b329da9893e34099c7d8ad5cb9c940", + "applicationid": 217, + }, + ) self.workunits.append(res[0]._id) print(res) @@ -32,42 +37,46 @@ def workunit_save(self): def workunit_read(self): print("WORKUNIT READ") - res = [self.bfapp.delete_object(endpoint='workunit', id=x)[0] for x in self.workunits] + res = [self.bfapp.delete_object(endpoint="workunit", id=x)[0] for x in self.workunits] print(res) self.assertEqual(len(res), len(self.workunits)) def workunit_delete(self): print("WORKUNIT DELETE") - res = [self.bfapp.delete_object(endpoint='workunit', id=x)[0] for x in self.workunits] + res = [self.bfapp.delete_object(endpoint="workunit", id=x)[0] for x in self.workunits] print(res) self.assertEqual(len(res), len(self.workunits)) def sample_save(self): print("SAVE SAMPLE") - sample_type = 'Biological Sample - Proteomics' + sample_type = "Biological Sample - Proteomics" species = "n/a" - for name in ['test1', 'test2', 'test3']: - res = self.bfapp.save_object(endpoint='sample', obj={'name': "unit test - {} - {}".format(name, sample_type), - 'containerid': 3000, - 'type' : sample_type, - 'species' : species, - 'samplingdate' : "2017-10-12", - 'groupingvar' : "A", - 'description': '68b329da9893e34099c7d8ad5cb9c940' - }) - - #print(res[0]._id) + for name in ["test1", "test2", "test3"]: + res = self.bfapp.save_object( + endpoint="sample", + obj={ + "name": "unit test - {} - {}".format(name, sample_type), + "containerid": 3000, + "type": sample_type, + "species": species, + "samplingdate": "2017-10-12", + "groupingvar": "A", + "description": "68b329da9893e34099c7d8ad5cb9c940", + }, + ) + + # print(res[0]._id) print("=== BEGIN DEBUG") for i in res: - print (i) + print(i) self.samples.append(res[0]._id) print("=== END DEBUG") def sample_delete(self): print("SAMPLE DELETE") print(self.samples) - res = [self.bfapp.delete_object(endpoint='sample', id=x)[0] for x in self.samples] - #res = [x for x in res if "removed successfully." in x.deletionreport] + res = [self.bfapp.delete_object(endpoint="sample", id=x)[0] for x in self.samples] + # res = [x for x in res if "removed successfully." in x.deletionreport] print(res) self.assertEqual(len(res), len(self.samples)) @@ -80,5 +89,6 @@ def test_sample(self): self.sample_save() self.sample_delete() -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/bfabric/tests/old_integration/test_bfabric_executable.py b/bfabric/tests/old_integration/test_bfabric_executable.py index 09e970bb..29f565e9 100755 --- a/bfabric/tests/old_integration/test_bfabric_executable.py +++ b/bfabric/tests/old_integration/test_bfabric_executable.py @@ -23,17 +23,17 @@ def default(self, o): return list(o) return JSONEncoder.default(self, o) + class BfabricTestCase(unittest.TestCase): endpoint = {} - def __init__(self, *args, **kwargs): super(BfabricTestCase, self).__init__(*args, **kwargs) self.B = bfabric.bfabric_legacy.BfabricLegacy(verbose=False) - for e in ['executable', 'sample', 'application', 'workunit', 'resource']: + for e in ["executable", "sample", "application", "workunit", "resource"]: self.endpoint[e] = [] def delete_endpoint_entries(self, endpoint=None): @@ -42,57 +42,61 @@ def delete_endpoint_entries(self, endpoint=None): res = [x for x in res if "removed successfully." in x.deletionreport] self.assertEqual(len(res), len(self.endpoint[endpoint])) - def test_executable(self, filename=os.path.abspath(__file__)): - wu_res = self.B.save_object(endpoint='workunit', obj={'name': "unit test - #{}.".format(1234), - 'containerid': 3000, - 'description': 'unit test', - 'applicationid': 61 - }) - self.endpoint['workunit'].extend(wu_res[0]) + wu_res = self.B.save_object( + endpoint="workunit", + obj={ + "name": "unit test - #{}.".format(1234), + "containerid": 3000, + "description": "unit test", + "applicationid": 61, + }, + ) + self.endpoint["workunit"].extend(wu_res[0]) # print(json.dumps(wu_res, cls=bfabricEncoder, indent=2)) # save - with open(filename, 'r') as f: + with open(filename, "r") as f: executable = f.read() - - #executable = "echo 'hello, world!'" + # executable = "echo 'hello, world!'" input_executable = executable - input_b64_executable = base64.b64encode(input_executable.encode()).decode() - - query = { 'name': 'unit test', - 'context': 'WORKUNIT', - 'parameter': {'modifiable': 'true', - 'description': 'will be ignored.', - 'key': 'argument1', - 'label': 'argument1', - 'required': 'true', - 'type':'string', - 'value': 'PRX@fgcz-r-028'}, - 'workunitid': wu_res[0]._id, - 'description': 'python3 unit test executable.', - #'masterexecutableid': 11871, - 'base64': input_b64_executable } - - self.endpoint['executable'].extend(self.B.save_object('executable', query)[0]) + input_b64_executable = base64.b64encode(input_executable.encode()).decode() + + query = { + "name": "unit test", + "context": "WORKUNIT", + "parameter": { + "modifiable": "true", + "description": "will be ignored.", + "key": "argument1", + "label": "argument1", + "required": "true", + "type": "string", + "value": "PRX@fgcz-r-028", + }, + "workunitid": wu_res[0]._id, + "description": "python3 unit test executable.", + #'masterexecutableid': 11871, + "base64": input_b64_executable, + } + + self.endpoint["executable"].extend(self.B.save_object("executable", query)[0]) # read - for e in self.endpoint['executable']: - res = self.B.read_object('executable', obj={'id': e._id}) + for e in self.endpoint["executable"]: + res = self.B.read_object("executable", obj={"id": e._id}) output_b64_executable = res[0].base64 output_executable = base64.b64decode(output_b64_executable.encode()).decode() - self.assertEqual(input_b64_executable, output_b64_executable) self.assertEqual(input_executable, output_executable) # delete - self.delete_endpoint_entries(endpoint='executable') - self.delete_endpoint_entries(endpoint='workunit') + self.delete_endpoint_entries(endpoint="executable") + self.delete_endpoint_entries(endpoint="workunit") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main(verbosity=2) - diff --git a/bfabric/tests/old_integration/test_bfabric_functional.py b/bfabric/tests/old_integration/test_bfabric_functional.py index e00081ef..d6de2f50 100755 --- a/bfabric/tests/old_integration/test_bfabric_functional.py +++ b/bfabric/tests/old_integration/test_bfabric_functional.py @@ -18,21 +18,22 @@ import bfabric.wrapper_creator.bfabric_submitter import bfabric.wrapper_creator.bfabric_wrapper_creator -logging.basicConfig(filename="test_functional.log", - filemode='a', - format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', - datefmt='%H:%M:%S', - level=logging.DEBUG) +logging.basicConfig( + filename="test_functional.log", + filemode="a", + format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s", + datefmt="%H:%M:%S", + level=logging.DEBUG, +) -class BfabricFunctionalTestCase(unittest.TestCase): +class BfabricFunctionalTestCase(unittest.TestCase): externaljobid = 0 def __init__(self, *args, **kwargs): super(BfabricFunctionalTestCase, self).__init__(*args, **kwargs) - def test_wrappercreator_submitter(self): logging.info("XXX start functional testing") B = bfabric.bfabric_legacy.BfabricLegacy() @@ -40,30 +41,31 @@ def test_wrappercreator_submitter(self): logging.info("Running functional test on bfabricPy") msg = "This test case requires user 'pfeeder'." - self.assertEqual(B.auth.login, 'pfeeder', msg) + self.assertEqual(B.auth.login, "pfeeder", msg) msg = "This test case requires a bfabric test system!" self.assertIn("bfabric-test", B.config.base_url, msg) # TODO # create input resource - # 0. THIS IS ALL DONE PRIOR TO THE APPLICATION LAUNCH # 0.1 logging.info("Creating a new executable for the test application") - executable = B.save_object("executable", obj={"name": "exec_func_test", "context": "APPLICATION", "program": "/usr/bin/wc"}) + executable = B.save_object( + "executable", obj={"name": "exec_func_test", "context": "APPLICATION", "program": "/usr/bin/wc"} + ) try: if executable[0].errorreport: logging.error("Error while creating the executable") - logging.info('Errorreport present: {}'.format(executable[0].errorreport)) + logging.info("Errorreport present: {}".format(executable[0].errorreport)) raise except: - logging.info('Executable successfully created') + logging.info("Executable successfully created") try: executableid = int(executable[0]._id) logging.info("executableid = {}".format(executableid)) except: - logging.error('Error while getting the executable id') + logging.error("Error while getting the executable id") msg = "executableid should be a positig integer." self.assertTrue(executableid > 0, msg) @@ -75,46 +77,63 @@ def test_wrappercreator_submitter(self): # The executable for submitterid=11 has been created in the test system running the following script: # ./bfabric_upload_submitter_executable.py bfabric_executable_submitter_functionalTest.py slurm --name "Dummy_-_yaml___Slurm_executable" --description "test new submitter's parameters" # Note that the executable bfabric_executable_submitter_functionalTest.py only prints "Dummy submitter executable defined for the bfabricPy functional test". - application = B.save_object("application", obj={"name": "appl_func_test", 'type': 'Analysis', 'technologyid': 2, 'description': "Application functional test", 'executableid': executableid, "wrappercreatorid": 8, "submitterid": 11, 'storageid': 1, 'outputfileformat': 'txt'}) - try: + application = B.save_object( + "application", + obj={ + "name": "appl_func_test", + "type": "Analysis", + "technologyid": 2, + "description": "Application functional test", + "executableid": executableid, + "wrappercreatorid": 8, + "submitterid": 11, + "storageid": 1, + "outputfileformat": "txt", + }, + ) + try: if application[0].errorreport: logging.error("Error while creating the application") - logging.info('Errorreport present: {}'.format(application[0].errorreport)) + logging.info("Errorreport present: {}".format(application[0].errorreport)) raise except: - logging.info('Application successfully created') + logging.info("Application successfully created") try: applicationid = int(application[0]._id) logging.info("applicationid = {}".format(applicationid)) except: - logging.error('Error while getting the application id') + logging.error("Error while getting the application id") raise msg = "applicationid should be a positig integer." self.assertTrue(applicationid > 0, msg) - - # 1. THIS CODE SNIPPET IS TRIGGERED BY THE BFABRIC SYSTEM AFTER THE USER RUN THE APPLICATION + # 1. THIS CODE SNIPPET IS TRIGGERED BY THE BFABRIC SYSTEM AFTER THE USER RUN THE APPLICATION # 1.1 logging.info("Creating new workunit connecting the test application executable to the execution anvironment") - workunit = B.save_object("workunit", - obj={"name": "unit test run - bfabricPy", - "status": "PENDING", 'containerid': 3061, - 'applicationid': applicationid, - 'description': "https://github.com/fgcz/bfabricPy/blob/iss27/bfabric/tests/test_bfabric_functional.py", - 'inputdatasetid': 32428}) + workunit = B.save_object( + "workunit", + obj={ + "name": "unit test run - bfabricPy", + "status": "PENDING", + "containerid": 3061, + "applicationid": applicationid, + "description": "https://github.com/fgcz/bfabricPy/blob/iss27/bfabric/tests/test_bfabric_functional.py", + "inputdatasetid": 32428, + }, + ) try: if workunit[0].errorreport: - logging.error('Error while creating workunit') - logging.info('Errorreport present: {}'.format(workunit[0].errorreport)) + logging.error("Error while creating workunit") + logging.info("Errorreport present: {}".format(workunit[0].errorreport)) raise except: - logging.info('Workunit successfully created') + logging.info("Workunit successfully created") try: workunitid = int(workunit[0]._id) logging.info("workunit = {}".format(workunitid)) except: - logging.error('Error while getting the workunit id') + logging.error("Error while getting the workunit id") raise msg = "workunitid should be a positig integer." @@ -125,14 +144,17 @@ def test_wrappercreator_submitter(self): logging.info("Creating new externaljob for the WrapperCreator executable") # Here a precomputed test executable is replacing the wrappercreatorid in the application definition wrapper_creator_executableid = 16374 - externaljob_wc = B.save_object("externaljob", obj={'workunitid': workunitid, 'action': 'CREATE', 'executableid': wrapper_creator_executableid}) + externaljob_wc = B.save_object( + "externaljob", + obj={"workunitid": workunitid, "action": "CREATE", "executableid": wrapper_creator_executableid}, + ) try: if externaljob_wc[0].errorreport: - logging.error('Error while creating externaljob_wc') - logging.info('Errorreport present: {}'.format(externaljob_wc[0].errorreport)) + logging.error("Error while creating externaljob_wc") + logging.info("Errorreport present: {}".format(externaljob_wc[0].errorreport)) raise except: - logging.info('Externaljob_wc successfully created') + logging.info("Externaljob_wc successfully created") try: externaljobid_wc = int(externaljob_wc[0]._id) logging.info("externaljob = {}".format(externaljobid_wc)) @@ -163,13 +185,17 @@ def test_wrappercreator_submitter(self): logging.info("Checking if wrapper creator's externaljob with id={} was set to 'done'".format(externaljobid_wc)) try: - res = B.read_object('externaljob', {'id': externaljobid_wc, 'status':'DONE'}) - self.assertEqual(res[0].status, 'done', 'set externaljob id={} of wrapper creator failed.'.format(externaljobid_wc)) + res = B.read_object("externaljob", {"id": externaljobid_wc, "status": "DONE"}) + self.assertEqual( + res[0].status, "done", "set externaljob id={} of wrapper creator failed.".format(externaljobid_wc) + ) except: logging.error("Error while setting wrapper creator's externaljob status to done") # 2.3 - logging.info("Fetching the id of the yaml_workunit_externaljob in order to set it as DONE at the end of this functional test") + logging.info( + "Fetching the id of the yaml_workunit_externaljob in order to set it as DONE at the end of this functional test" + ) try: # The method W.get_externaljobid_yaml_workunit() returns the external job with Action=WORKUNIT externaljobid_yaml_workunit = W.get_externaljobid_yaml_workunit() @@ -181,7 +207,9 @@ def test_wrappercreator_submitter(self): # 3.1 logging.info("Fetching the submitter's externaljob automatically triggered by B-Fabric") try: - externaljobid_submitter = B.read_object('externaljob', {'cliententityid': workunitid, "action": "SUBMIT", 'cliententityclass': 'Workunit'})[0]._id + externaljobid_submitter = B.read_object( + "externaljob", {"cliententityid": workunitid, "action": "SUBMIT", "cliententityclass": "Workunit"} + )[0]._id logging.info("externaljobid for submitter is {}.".format(externaljobid_submitter)) except: logging.error("Error while fetching the id of the submitter's externaljob") @@ -190,7 +218,9 @@ def test_wrappercreator_submitter(self): logging.info("Executing the Submitter executable: function submitter_yaml from BfabricSubmitter") # Submitter executable is supposed to download all workunit executables and submit them. # When finished successfully, the status of its external job is set to done, else to failed. - S = wrapper_creator.bfabric_submitter.BfabricSubmitter(externaljobid=externaljobid_submitter, SCHEDULEROOT="/usr/", scheduler="Slurm") + S = wrapper_creator.bfabric_submitter.BfabricSubmitter( + externaljobid=externaljobid_submitter, SCHEDULEROOT="/usr/", scheduler="Slurm" + ) ## this information is contained in the application definition try: S.submitter_yaml() @@ -201,10 +231,12 @@ def test_wrappercreator_submitter(self): time.sleep(10) logging.info("Checking if submitter's externaljob with id={} was set to 'done'".format(externaljobid_submitter)) try: - #res = B.read_object('externaljob', {'id': externaljobid_submitter, 'status': 'DONE'}) - res = B.read_object('externaljob', {'id': externaljobid_submitter}) + # res = B.read_object('externaljob', {'id': externaljobid_submitter, 'status': 'DONE'}) + res = B.read_object("externaljob", {"id": externaljobid_submitter}) logging.info("Status of externaljob for submitter {}".format(res[0].status)) - self.assertEqual(res[0].status, 'done', 'submitter externaljob with id={} failed.'.format(externaljobid_submitter)) + self.assertEqual( + res[0].status, "done", "submitter externaljob with id={} failed.".format(externaljobid_submitter) + ) except: logging.error("Error while setting submitter externaljob status to DONE") raise @@ -212,10 +244,16 @@ def test_wrappercreator_submitter(self): # 4. SETTING YAML_WORKUNIT_EXTERNALJOB TO DONE logging.info("Setting the yaml_workunit_externaljob created by the WrapperCreator to 'done'") try: - res = B.save_object(endpoint='externaljob', obj={'id': externaljobid_yaml_workunit, 'status': 'done'}) - logging.info("Checking if WORKUNIT's externaljob with id={} was set to 'done'".format(externaljobid_yaml_workunit)) - res = B.read_object('externaljob', {'id': externaljobid_yaml_workunit, 'status':'DONE'}) - self.assertEqual(res[0].status, 'done', 'yaml_workunit_externaljob with id={} failed.'.format(externaljobid_yaml_workunit)) + res = B.save_object(endpoint="externaljob", obj={"id": externaljobid_yaml_workunit, "status": "done"}) + logging.info( + "Checking if WORKUNIT's externaljob with id={} was set to 'done'".format(externaljobid_yaml_workunit) + ) + res = B.read_object("externaljob", {"id": externaljobid_yaml_workunit, "status": "DONE"}) + self.assertEqual( + res[0].status, + "done", + "yaml_workunit_externaljob with id={} failed.".format(externaljobid_yaml_workunit), + ) except: logging.error("Error while setting yaml_workunit externaljob status to done") @@ -231,11 +269,10 @@ def test_wrappercreator_submitter(self): time.sleep(1) logging.info("end processing job.") - logging.info("Deleting superfluous resources of test run workunit.") - res = B.read_object('workunit', {'id', workunit[0]._id})[0] + res = B.read_object("workunit", {"id", workunit[0]._id})[0] for i in res.resource: - resdel = B.delete_object('resource', i._id) + resdel = B.delete_object("resource", i._id) self.assertIn("removed successfully", resdel[0].deletionreport) logging.info("deleted resource id={}.".format(i._id)) @@ -247,44 +284,42 @@ def test_wrappercreator_submitter(self): # 6. THIS LINE IS CALLED WHEN THE APPLICATION IS DONE logging.info(f"set workunit {workunitid} status available.") - res = B.save_object('workunit', {'id': workunitid, 'status': 'available'}) + res = B.save_object("workunit", {"id": workunitid, "status": "available"}) logging.info("Cleanup for the python test: whatever is possible to be removed") logging.info(f"trying to delete executable {executableid} [expect to fail].") - res = B.delete_object('executable', executableid) + res = B.delete_object("executable", executableid) self.assertNotIn("removed successfully", res[0].deletionreport) logging.info(f"trying to delete application {applicationid} [expect to fail; since we have a workunit].") - res = B.delete_object('application', applicationid) + res = B.delete_object("application", applicationid) self.assertNotIn("removed successfully", res[0].deletionreport) - logging.info(f"trying to delete submitter externaljob {externaljobid_submitter} [expect to fail].") - res = B.delete_object('externaljob', externaljobid_submitter) + res = B.delete_object("externaljob", externaljobid_submitter) msg = "should fail" self.assertNotIn("removed successfully", res[0].deletionreport) logging.info(f"trying to delete workunit {workunitid}.") - res = B.delete_object('workunit', workunitid) + res = B.delete_object("workunit", workunitid) self.assertIn("removed successfully", res[0].deletionreport) logging.info(f"trying to delete submitter executable {executableid} [expect to fail].") - res = B.delete_object('executable', executableid) + res = B.delete_object("executable", executableid) self.assertNotIn("removed successfully", res[0].deletionreport) logging.info(f"trying to delete wrapper creator externaljob {externaljobid_wc} [expect to fail].") - res = B.delete_object('externaljob', externaljobid_wc) + res = B.delete_object("externaljob", externaljobid_wc) self.assertNotIn("removed successfully", res[0].deletionreport) logging.info(f"trying to delete application {applicationid}.") - res = B.delete_object('application', applicationid) + res = B.delete_object("application", applicationid) self.assertIn("removed successfully", res[0].deletionreport) -if __name__ == '__main__': +if __name__ == "__main__": suite = unittest.TestSuite() - suite.addTest(BfabricFunctionalTestCase('test_wrappercreator_submitter')) + suite.addTest(BfabricFunctionalTestCase("test_wrappercreator_submitter")) runner = unittest.TextTestRunner(verbosity=1) - runner.run(suite ) - + runner.run(suite) diff --git a/bfabric/tests/old_integration/test_bfabric_sample.py b/bfabric/tests/old_integration/test_bfabric_sample.py index 9cd06163..21b5236d 100755 --- a/bfabric/tests/old_integration/test_bfabric_sample.py +++ b/bfabric/tests/old_integration/test_bfabric_sample.py @@ -10,35 +10,41 @@ """ ssh localhost "cat > /tmp/bb.py && /usr/bin/python /tmp/bb.py" < PycharmProjects/untitled/bfabric_wsdl.py """ + + class BfabricTestCase(unittest.TestCase): workunits = [] samples = [] - bfapp = BfabricLegacy(verbose=True) + def sample_save(self): print("SAVE SAMPLE") - sample_type = 'Biological Sample - Proteomics' + sample_type = "Biological Sample - Proteomics" species = "n/a" for name in [1, 2, 3]: - res = self.bfapp.save_object(endpoint='sample', obj={'name': "unit test - #{} - {}".format(name, sample_type), - 'containerid': 3000, - 'type' : sample_type, - 'species' : species, - 'samplingdate' : "2017-10-12", - 'groupingvar' : "A", - 'description': '68b329da9893e34099c7d8ad5cb9c940' - }) + res = self.bfapp.save_object( + endpoint="sample", + obj={ + "name": "unit test - #{} - {}".format(name, sample_type), + "containerid": 3000, + "type": sample_type, + "species": species, + "samplingdate": "2017-10-12", + "groupingvar": "A", + "description": "68b329da9893e34099c7d8ad5cb9c940", + }, + ) for i in res: - print (i) - #self.samples.append(res[0].id) + print(i) + # self.samples.append(res[0].id) def sample_delete(self): print("SAMPLE DELETE") print(self.samples) - res = [self.bfapp.delete_object(endpoint='sample', id=x)[0] for x in self.samples] + res = [self.bfapp.delete_object(endpoint="sample", id=x)[0] for x in self.samples] res = [x for x in res if "removed successfully." in x.deletionreport] print(res) self.assertEqual(len(res), len(self.samples)) @@ -47,5 +53,6 @@ def test_sample(self): self.sample_save() # self.sample_delete() -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/bfabric/tests/old_integration/test_bfabric_workunit.py b/bfabric/tests/old_integration/test_bfabric_workunit.py index 1fded8b4..8f25cc07 100755 --- a/bfabric/tests/old_integration/test_bfabric_workunit.py +++ b/bfabric/tests/old_integration/test_bfabric_workunit.py @@ -24,21 +24,21 @@ def default(self, o): return list(o) return JSONEncoder.default(self, o) + class BfabricTestCase(unittest.TestCase): endpoint = {} - def __init__(self, *args, **kwargs): super(BfabricTestCase, self).__init__(*args, **kwargs) self.bfapp = bfabric.bfabric_legacy.BfabricLegacy(verbose=False) - for e in ['executable', 'sample', 'application', 'workunit', 'resource']: + for e in ["executable", "sample", "application", "workunit", "resource"]: self.endpoint[e] = [] def resource_save(self, filename, workunitid): - with open(filename, 'r') as f: + with open(filename, "r") as f: content = f.read() try: @@ -46,97 +46,109 @@ def resource_save(self, filename, workunitid): except: raise ("error: could not encode content") - res = self.bfapp.save_object('resource', - {'base64': resource_base64, - 'name': os.path.basename(filename), - 'description': content, - 'workunitid': workunitid}) - - self.endpoint['resource'].extend(res[0]) + res = self.bfapp.save_object( + "resource", + { + "base64": resource_base64, + "name": os.path.basename(filename), + "description": content, + "workunitid": workunitid, + }, + ) + self.endpoint["resource"].extend(res[0]) def delete_endpoint_entries(self, endpoint=None): - res = [ self.bfapp.delete_object(endpoint=endpoint, id=x._id)[0] for x in self.endpoint[endpoint] ] + res = [self.bfapp.delete_object(endpoint=endpoint, id=x._id)[0] for x in self.endpoint[endpoint]] print(json.dumps(res, cls=bfabricEncoder, indent=2)) res = [x for x in res if "removed successfully." in x.deletionreport] self.assertEqual(len(res), len(self.endpoint[endpoint])) def _01_executable_save(self, filename=os.path.abspath(__file__)): - with open(filename, 'r') as f: + with open(filename, "r") as f: executable = f.read() - query = { 'name': 'unit test', - 'context': 'APPLICATION', - 'parameter': {'modifiable': 'true', - 'description': 'will be ignored.', - 'key': 'argument1', - 'label': 'argument1', - 'required': 'true', - 'type':'string', - 'value': 'PRX@fgcz-r-028'}, - 'description': 'python3 unit test executable.', - #'masterexecutableid': 11871, - 'base64': base64.b64encode(executable.encode()) } - - res = self.bfapp.save_object('executable', query)[0] - print (res) - self.endpoint['executable'].extend(res) + query = { + "name": "unit test", + "context": "APPLICATION", + "parameter": { + "modifiable": "true", + "description": "will be ignored.", + "key": "argument1", + "label": "argument1", + "required": "true", + "type": "string", + "value": "PRX@fgcz-r-028", + }, + "description": "python3 unit test executable.", + #'masterexecutableid': 11871, + "base64": base64.b64encode(executable.encode()), + } + + res = self.bfapp.save_object("executable", query)[0] + print(res) + self.endpoint["executable"].extend(res) def _02_sample_save(self): - sample_type = 'Biological Sample - Proteomics' + sample_type = "Biological Sample - Proteomics" species = "n/a" for name in [1, 2, 3]: - res = self.bfapp.save_object(endpoint='sample', - obj={'name': "unit test - #{}; {} {}".format(name, sample_type, datetime.datetime.now()), - 'containerid': 3000, - 'type' : sample_type, - 'species' : species, - 'samplingdate' : "2017-10-12", - 'groupingvar' : "A", - 'description': '68b329da9893e34099c7d8ad5cb9c940' - }) + res = self.bfapp.save_object( + endpoint="sample", + obj={ + "name": "unit test - #{}; {} {}".format(name, sample_type, datetime.datetime.now()), + "containerid": 3000, + "type": sample_type, + "species": species, + "samplingdate": "2017-10-12", + "groupingvar": "A", + "description": "68b329da9893e34099c7d8ad5cb9c940", + }, + ) print(res[0]) - self.endpoint['sample'].extend(res[0]) - + self.endpoint["sample"].extend(res[0]) def _03_application_save(self): - query={'name': "unit test", - 'description': '68b329da9893e34099c7d8ad5cb9c940', - 'type': "Analysis", - 'technologyid' : 2 - } - - res = self.bfapp.save_object(endpoint='application', obj=query) + query = { + "name": "unit test", + "description": "68b329da9893e34099c7d8ad5cb9c940", + "type": "Analysis", + "technologyid": 2, + } + + res = self.bfapp.save_object(endpoint="application", obj=query) print(json.dumps(res, cls=bfabricEncoder, indent=2)) - self.endpoint['application'].extend(res[0]) - + self.endpoint["application"].extend(res[0]) def _04_workunit_save(self): queue = range(1, 4) try: - applicationid = self.endpoint['application'][0]._id + applicationid = self.endpoint["application"][0]._id except: applicationid = 61 for j in queue: - res = self.bfapp.save_object(endpoint='workunit', obj={'name': "unit test - #{}.".format(j), - 'containerid': bfabric.project, - 'description': '68b329da9893e34099c7d8ad5cb9c940', - 'applicationid': applicationid - }) - self.endpoint['workunit'].extend(res[0]) - print(json.dumps(self.endpoint['workunit'], cls=bfabricEncoder, indent=2)) + res = self.bfapp.save_object( + endpoint="workunit", + obj={ + "name": "unit test - #{}.".format(j), + "containerid": bfabric.project, + "description": "68b329da9893e34099c7d8ad5cb9c940", + "applicationid": applicationid, + }, + ) + self.endpoint["workunit"].extend(res[0]) + print(json.dumps(self.endpoint["workunit"], cls=bfabricEncoder, indent=2)) self.resource_save(os.path.abspath(__file__), res[0]._id) - #self.assertEqual(len(queue), len(self.workunits)) + # self.assertEqual(len(queue), len(self.workunits)) - def _98_statistics(self): print("\nsummary:") for k, v in self.endpoint.items(): try: res = [x._id for x in v] - print ("{}\n\t{}".format(k, [x._id for x in v])) + print("{}\n\t{}".format(k, [x._id for x in v])) except: pass @@ -147,13 +159,11 @@ def test_01(self): self._04_workunit_save() self._98_statistics() + self.delete_endpoint_entries(endpoint="executable") + self.delete_endpoint_entries(endpoint="sample") + self.delete_endpoint_entries(endpoint="workunit") + # self.delete_endpoint_entries(endpoint='application') - self.delete_endpoint_entries(endpoint='executable') - self.delete_endpoint_entries(endpoint='sample') - self.delete_endpoint_entries(endpoint='workunit') - #self.delete_endpoint_entries(endpoint='application') - -if __name__ == '__main__': +if __name__ == "__main__": unittest.main(verbosity=2) - diff --git a/bfabric/wrapper_creator/bfabric_external_job.py b/bfabric/wrapper_creator/bfabric_external_job.py index ede8f3a5..20642ddf 100644 --- a/bfabric/wrapper_creator/bfabric_external_job.py +++ b/bfabric/wrapper_creator/bfabric_external_job.py @@ -12,6 +12,7 @@ class BfabricExternalJob(BfabricLegacy): TODO check if an external job id is provided """ + externaljobid = None def __init__(self, login=None, password=None, externaljobid=None): @@ -26,19 +27,19 @@ def __init__(self, login=None, password=None, externaljobid=None): def logger(self, msg): if self.externaljobid: - super(BfabricExternalJob, self).save_object('externaljob', {'id': self.externaljobid, 'logthis': str(msg)}) + super(BfabricExternalJob, self).save_object("externaljob", {"id": self.externaljobid, "logthis": str(msg)}) else: print((str(msg))) def save_object(self, endpoint, obj, debug=None): res = super(BfabricExternalJob, self).save_object(endpoint, obj, debug) jsonres = json.dumps(res, cls=bfabricEncoder, sort_keys=True, indent=2) - self.logger('saved ' + endpoint + '=' + str(jsonres)) + self.logger("saved " + endpoint + "=" + str(jsonres)) return res def get_workunitid_of_externaljob(self): print(("DEBUG get_workunitid_of_externaljob self.externaljobid={}".format(self.externaljobid))) - res = self.read_object(endpoint='externaljob', obj={'id': self.externaljobid})[0] + res = self.read_object(endpoint="externaljob", obj={"id": self.externaljobid})[0] print(res) print("DEBUG END") workunit_id = None @@ -53,13 +54,12 @@ def get_application_name(self): workunitid = self.get_workunitid_of_externaljob() if workunitid is None: raise ValueError("no workunit available for the given externaljobid.") - workunit = self.read_object(endpoint='workunit', obj={'id': workunitid})[0] + workunit = self.read_object(endpoint="workunit", obj={"id": workunitid})[0] if workunit is None: raise ValueError("ERROR: no workunit available for the given externaljobid.") assert isinstance(workunit._id, int) - application = self.read_object('application', obj={'id': workunit.application._id})[0] - return application.name.replace(' ', '_') - + application = self.read_object("application", obj={"id": workunit.application._id})[0] + return application.name.replace(" ", "_") def get_executable_of_externaljobid(self): """ @@ -75,8 +75,8 @@ def get_executable_of_externaljobid(self): return None executables = list() - for executable in self.read_object(endpoint='executable', obj={'workunitid': workunitid}): - if hasattr(executable, 'base64'): + for executable in self.read_object(endpoint="executable", obj={"workunitid": workunitid}): + if hasattr(executable, "base64"): executables.append(executable) return executables if len(executables) > 0 else None diff --git a/bfabric/wrapper_creator/bfabric_feeder.py b/bfabric/wrapper_creator/bfabric_feeder.py index 6052a66e..03e5d4c7 100644 --- a/bfabric/wrapper_creator/bfabric_feeder.py +++ b/bfabric/wrapper_creator/bfabric_feeder.py @@ -6,7 +6,7 @@ class BfabricFeeder(BfabricLegacy): """ - this class is used for reporting 'resource' status + this class is used for reporting 'resource' status """ def report_resource(self, resourceid): @@ -17,32 +17,30 @@ def report_resource(self, resourceid): this is gonna executed on the storage host """ - res = self.read_object('resource', {'id': resourceid})[0] - print (res) + res = self.read_object("resource", {"id": resourceid})[0] + print(res) - if not hasattr(res, 'storage'): + if not hasattr(res, "storage"): return -1 - storage = self.read_object('storage', {'id': res.storage._id})[0] + storage = self.read_object("storage", {"id": res.storage._id})[0] filename = "{0}/{1}".format(storage.basepath, res.relativepath) if os.path.isfile(filename): try: - fmd5 = hashlib.md5(open(filename, 'rb').read()).hexdigest() - print ("md5sum ({}) = {}".format(filename, fmd5)) + fmd5 = hashlib.md5(open(filename, "rb").read()).hexdigest() + print("md5sum ({}) = {}".format(filename, fmd5)) fsize = int(os.path.getsize(filename)) + 1 - print ("size ({}) = {}".format(filename, fsize)) + print("size ({}) = {}".format(filename, fsize)) - - return self.save_object('resource', {'id': resourceid, - 'size': fsize, - 'status': 'available', - 'filechecksum': fmd5}) + return self.save_object( + "resource", {"id": resourceid, "size": fsize, "status": "available", "filechecksum": fmd5} + ) except: - print ("computing md5 failed") + print("computing md5 failed") # print ("{} {}".format(Exception, err)) raise - return self.save_object('resource', {'id': resourceid, 'status': 'failed'}) + return self.save_object("resource", {"id": resourceid, "status": "failed"}) diff --git a/bfabric/wrapper_creator/bfabric_submitter.py b/bfabric/wrapper_creator/bfabric_submitter.py index 97b94f5a..aab29ff0 100644 --- a/bfabric/wrapper_creator/bfabric_submitter.py +++ b/bfabric/wrapper_creator/bfabric_submitter.py @@ -12,23 +12,35 @@ class BfabricSubmitter: the class is used by the submitter which is executed by the bfabric system. """ - (G, B) = (None, None) + (G, B) = (None, None) workunitid = None workunit = None parameters = None execfilelist = [] - slurm_dict = {"MaxQuant_textfiles_sge" : {'partition': "prx", 'nodelist': "fgcz-r-033", 'memory':"1G"}, - "fragpipe" : {'partition': "prx", 'nodelist': "fgcz-r-033", 'memory':"256G"}, - "MaxQuant" : {'partition': "maxquant", 'nodelist': "fgcz-r-033", 'memory':"4G"}, - "scaffold_generic" : {'partition': "scaffold", 'nodelist': "fgcz-r-033", 'memory':"256G"}, - "MSstats dataProcess" : {'partition': "prx", 'nodelist': "fgcz-r-033", 'memory':"64G"}, - "MaxQuant_sampleSizeEstimation" : {'partition': "prx", 'nodelist': "fgcz-r-028", 'memory': "2G"}, - "ProteomeDiscovererQC" : {'partition': "prx", 'nodelist': "fgcz-r-035", 'memory': "2G"} - } - - def __init__(self, login=None, password=None, externaljobid=None, - user='*', node="PRX@fgcz-r-018", partition="prx", nodelist="fgcz-r-028", memory="10G", SCHEDULEROOT='/export/bfabric/bfabric/', scheduler="GridEngine"): + slurm_dict = { + "MaxQuant_textfiles_sge": {"partition": "prx", "nodelist": "fgcz-r-033", "memory": "1G"}, + "fragpipe": {"partition": "prx", "nodelist": "fgcz-r-033", "memory": "256G"}, + "MaxQuant": {"partition": "maxquant", "nodelist": "fgcz-r-033", "memory": "4G"}, + "scaffold_generic": {"partition": "scaffold", "nodelist": "fgcz-r-033", "memory": "256G"}, + "MSstats dataProcess": {"partition": "prx", "nodelist": "fgcz-r-033", "memory": "64G"}, + "MaxQuant_sampleSizeEstimation": {"partition": "prx", "nodelist": "fgcz-r-028", "memory": "2G"}, + "ProteomeDiscovererQC": {"partition": "prx", "nodelist": "fgcz-r-035", "memory": "2G"}, + } + + def __init__( + self, + login=None, + password=None, + externaljobid=None, + user="*", + node="PRX@fgcz-r-018", + partition="prx", + nodelist="fgcz-r-028", + memory="10G", + SCHEDULEROOT="/export/bfabric/bfabric/", + scheduler="GridEngine", + ): """ :rtype : object """ @@ -46,32 +58,33 @@ def __init__(self, login=None, password=None, externaljobid=None, self.workunitid = self.B.get_workunitid_of_externaljob() try: - self.workunit = self.B.read_object(endpoint='workunit', obj={'id': self.workunitid})[0] + self.workunit = self.B.read_object(endpoint="workunit", obj={"id": self.workunitid})[0] except: - print ("ERROR: could not fetch workunit while calling constructor in BfabricSubmitter.") + print("ERROR: could not fetch workunit while calling constructor in BfabricSubmitter.") raise - try: - self.parameters = [self.B.read_object(endpoint='parameter', obj={'id': x._id})[0] for x in self.workunit.parameter] + self.parameters = [ + self.B.read_object(endpoint="parameter", obj={"id": x._id})[0] for x in self.workunit.parameter + ] except: self.parameters = list() - print ("Warning: could not fetch parameter.") + print("Warning: could not fetch parameter.") partition = [x for x in self.parameters if x.key == "partition"] nodelist = [x for x in self.parameters if x.key == "nodelist"] memory = [x for x in self.parameters if x.key == "memory"] application_name = self.B.get_application_name() - if len(partition) > 0 and len(nodelist) > 0 and len(memory)>0: + if len(partition) > 0 and len(nodelist) > 0 and len(memory) > 0: self.partition = partition[0].value self.nodelist = nodelist[0].value self.memory = memory[0].value elif "queue" in [x.key for x in self.parameters] and application_name in self.slurm_dict: # Temporary check for old workunit previously run with SGE - self.partition = self.slurm_dict[application_name]['partition'] - self.nodelist = self.slurm_dict[application_name]['nodelist'] - self.memory = self.slurm_dict[application_name]['memory'] + self.partition = self.slurm_dict[application_name]["partition"] + self.nodelist = self.slurm_dict[application_name]["nodelist"] + self.memory = self.slurm_dict[application_name]["memory"] else: pass @@ -80,7 +93,6 @@ def __init__(self, login=None, password=None, externaljobid=None, print(("memory={0}".format(self.memory))) print("__init__ DONE") - def submit_gridengine(self, script="/tmp/runme.bash", arguments=""): GE = gridengine.GridEngine(user=self.user, queue=self.queue, GRIDENGINEROOT=self.SCHEDULEROOT) @@ -91,7 +103,6 @@ def submit_gridengine(self, script="/tmp/runme.bash", arguments=""): self.B.logger("{}".format(resQsub)) - def submit_slurm(self, script="/tmp/runme.bash", arguments=""): SL = slurm.SLURM(user=self.user, SLURMROOT=self.SCHEDULEROOT) @@ -102,7 +113,6 @@ def submit_slurm(self, script="/tmp/runme.bash", arguments=""): self.B.logger("{}".format(resSbatch)) - def compose_bash_script(self, configuration=None, configuration_parser=lambda x: yaml.safe_load(x)): """ composes the bash script which is executed by the submitter (sun grid engine). @@ -113,15 +123,13 @@ def compose_bash_script(self, configuration=None, configuration_parser=lambda x: :rtype : str """ - - #assert isinstance(configuration, str) + # assert isinstance(configuration, str) try: config = configuration_parser(configuration) except: raise ValueError("error: parsing configuration content failed.") - _cmd_template = """#!/bin/bash # Maria d'Errico # Christian Panse @@ -217,24 +225,25 @@ def compose_bash_script(self, configuration=None, configuration_parser=lambda x: exit 0 -""".format(self.partition, - config['job_configuration']['stderr']['url'], - config['job_configuration']['stdout']['url'], - config['job_configuration']['external_job_id'], - config['job_configuration']['output']['resource_id'], - config['job_configuration']['stderr']['resource_id'], - config['job_configuration']['stdout']['resource_id'], - ",".join(config['application']['output']), - configuration, - config['job_configuration']['executable'], - config['job_configuration']['workunit_id'], - self.nodelist, - self.memory, - job_notification_emails=self.B.config.job_notification_emails) +""".format( + self.partition, + config["job_configuration"]["stderr"]["url"], + config["job_configuration"]["stdout"]["url"], + config["job_configuration"]["external_job_id"], + config["job_configuration"]["output"]["resource_id"], + config["job_configuration"]["stderr"]["resource_id"], + config["job_configuration"]["stdout"]["resource_id"], + ",".join(config["application"]["output"]), + configuration, + config["job_configuration"]["executable"], + config["job_configuration"]["workunit_id"], + self.nodelist, + self.memory, + job_notification_emails=self.B.config.job_notification_emails, + ) return _cmd_template - def submitter_yaml(self): """ implements the default submitter @@ -257,25 +266,25 @@ def submitter_yaml(self): except: raise ValueError("error: decoding executable.base64 failed.") - print(content) - _cmd_template = self.compose_bash_script(configuration=content, - configuration_parser=lambda x: yaml.safe_load(x)) + _cmd_template = self.compose_bash_script( + configuration=content, configuration_parser=lambda x: yaml.safe_load(x) + ) - _bash_script_filename = "/home/bfabric/prx/workunitid-{0}_externaljobid-{1}_executableid-{2}.bash"\ - .format(self.B.get_workunitid_of_externaljob(), self.B.externaljobid, executable._id) + _bash_script_filename = "/home/bfabric/prx/workunitid-{0}_externaljobid-{1}_executableid-{2}.bash".format( + self.B.get_workunitid_of_externaljob(), self.B.externaljobid, executable._id + ) - with open(_bash_script_filename, 'w') as f: + with open(_bash_script_filename, "w") as f: f.write(_cmd_template) - if self.scheduler=="GridEngine" : + if self.scheduler == "GridEngine": self.submit_gridengine(_bash_script_filename) else: self.submit_slurm(_bash_script_filename) self.execfilelist.append(_bash_script_filename) + res = self.B.save_object(endpoint="externaljob", obj={"id": self.B.externaljobid, "status": "done"}) - res = self.B.save_object(endpoint='externaljob', - obj={'id': self.B.externaljobid, 'status': 'done'}) def get_job_script(self): return self.execfilelist diff --git a/bfabric/wrapper_creator/bfabric_wrapper_creator.py b/bfabric/wrapper_creator/bfabric_wrapper_creator.py index 80cf30ad..819bd984 100644 --- a/bfabric/wrapper_creator/bfabric_wrapper_creator.py +++ b/bfabric/wrapper_creator/bfabric_wrapper_creator.py @@ -20,13 +20,14 @@ class BfabricWrapperCreator(BfabricExternalJob): def get_externaljobid_yaml_workunit(self): return self.externaljobid_yaml_workunit - def uploadGridEngineScript(self, para={'INPUTHOST': 'fgcz-r-035.uzh.ch'}): + def uploadGridEngineScript(self, para={"INPUTHOST": "fgcz-r-035.uzh.ch"}): """ the methode creates and uploads an executebale. """ self.warning( - "This python method is superfluously and will be removed. Please use the write_yaml method of the BfabricWrapperCreato class.") + "This python method is superfluously and will be removed. Please use the write_yaml method of the BfabricWrapperCreato class." + ) _cmd_template = """#!/bin/bash # $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/bfabric.py $ @@ -61,23 +62,31 @@ def uploadGridEngineScript(self, para={'INPUTHOST': 'fgcz-r-035.uzh.ch'}): || exit 1 exit 0 -""".format("\n".join(sorted(['%s="%s"' % (key, info) for key, info in para.iteritems()])), para['STDERR'], - para['STDOUT']) - - resExecutable = self.save_object('executable', {'name': os.path.basename(para['APPLICATION']) + "_executable", - 'context': 'WORKUNIT', - 'parameter': None, - 'description': "This script should run as 'bfabric' user in the FGCZ compute infrastructure.", - 'workunitid': para['WORKUNITID'], - 'base64': base64.b64encode(_cmd_template), - 'version': 0.2}) +""".format( + "\n".join(sorted(['%s="%s"' % (key, info) for key, info in para.iteritems()])), + para["STDERR"], + para["STDOUT"], + ) + + resExecutable = self.save_object( + "executable", + { + "name": os.path.basename(para["APPLICATION"]) + "_executable", + "context": "WORKUNIT", + "parameter": None, + "description": "This script should run as 'bfabric' user in the FGCZ compute infrastructure.", + "workunitid": para["WORKUNITID"], + "base64": base64.b64encode(_cmd_template), + "version": 0.2, + }, + ) - return (resExecutable) + return resExecutable def get_executableid(self): - return (self.workunit_executableid) + return self.workunit_executableid - def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style=False, encoding=None)): + def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style=False, encoding=None)): """ This method writes all related parameters into a yaml file which is than upload as base64 encoded file into the b-fabric system. @@ -93,15 +102,15 @@ def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style= if workunitid is None: raise ValueError("no workunit available for the given externaljobid.") - workunit = self.read_object(endpoint='workunit', obj={'id': workunitid})[0] + workunit = self.read_object(endpoint="workunit", obj={"id": workunitid})[0] if workunit is None: raise ValueError("ERROR: no workunit available for the given externaljobid.") assert isinstance(workunit._id, int) - application = self.read_object('application', obj={'id': workunit.application._id})[0] + application = self.read_object("application", obj={"id": workunit.application._id})[0] # TODO(cp): rename to application_execuatbel - workunit_executable = self.read_object('executable', obj={'id': workunit.applicationexecutable._id})[0] + workunit_executable = self.read_object("executable", obj={"id": workunit.applicationexecutable._id})[0] try: self.workunit_executableid = workunit_executable._id except: @@ -110,8 +119,8 @@ def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style= # Get container details container = workunit.container fastasequence = "" - if container._classname=="order": - order = self.read_object('order', obj={'id': container._id})[0] + if container._classname == "order": + order = self.read_object("order", obj={"id": container._id})[0] order_id = order._id if "project" in order: project_id = order.project._id @@ -126,25 +135,26 @@ def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style= today = datetime.date.today() # merge all information into the executable script - _output_storage = self.read_object('storage', obj={'id': application.storage._id})[0] + _output_storage = self.read_object("storage", obj={"id": application.storage._id})[0] _output_relative_path = "p{0}/bfabric/{1}/{2}/{3}/workunit_{4}/".format( container._id, - application.technology.replace(' ', '_'), - application.name.replace(' ', '_'), - today.strftime('%Y/%Y-%m/%Y-%m-%d/'), - workunitid) + application.technology.replace(" ", "_"), + application.name.replace(" ", "_"), + today.strftime("%Y/%Y-%m/%Y-%m-%d/"), + workunitid, + ) # Setup the log_storage to SlurmLog with id 13 - _log_storage = self.read_object('storage', obj={'id': 13})[0] + _log_storage = self.read_object("storage", obj={"id": 13})[0] - #_cmd_applicationList = [workunit_executable.program] + # _cmd_applicationList = [workunit_executable.program] application_parameter = {} if not getattr(workunit, "parameter", None) is None: for para in workunit.parameter: - parameter = self.read_object('parameter', obj={'id': para._id}) + parameter = self.read_object("parameter", obj={"id": para._id}) if parameter: for p in parameter: try: @@ -154,26 +164,28 @@ def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style= try: input_resources = [x._id for x in workunit.inputresource] - input_resources = [self.read_object(endpoint='resource', obj={'id': x})[0] for x in input_resources] + input_resources = [self.read_object(endpoint="resource", obj={"id": x})[0] for x in input_resources] except: print("no input resources found. continue with empty list.") input_resources = [] - # query all urls and ids of the input resources resource_urls = dict() resource_ids = dict() for resource_iterator in input_resources: try: - _appication_id = self.read_object(endpoint='workunit', - obj={'id': resource_iterator.workunit._id})[0].application._id + _appication_id = self.read_object(endpoint="workunit", obj={"id": resource_iterator.workunit._id})[ + 0 + ].application._id - _application_name = "{0}".format(self.read_object('application', obj={'id': _appication_id})[0].name) + _application_name = "{0}".format(self.read_object("application", obj={"id": _appication_id})[0].name) - _storage = self.read_object('storage', {'id': resource_iterator.storage._id})[0] + _storage = self.read_object("storage", {"id": resource_iterator.storage._id})[0] - _inputUrl = "bfabric@{0}:/{1}/{2}".format(_storage.host, _storage.basepath, resource_iterator.relativepath) + _inputUrl = "bfabric@{0}:/{1}/{2}".format( + _storage.host, _storage.basepath, resource_iterator.relativepath + ) if not _application_name in resource_urls: resource_urls[_application_name] = [] @@ -183,48 +195,66 @@ def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style= sample_id = self.get_sampleid(int(resource_iterator._id)) - _resource_sample = {'resource_id': int(resource_iterator._id), - 'resource_url': "{0}/userlab/show-resource.html?id={1}".format(self.config.base_url, resource_iterator._id)} - + _resource_sample = { + "resource_id": int(resource_iterator._id), + "resource_url": "{0}/userlab/show-resource.html?id={1}".format( + self.config.base_url, resource_iterator._id + ), + } if not sample_id is None: - _resource_sample['sample_id'] = int(sample_id) - _resource_sample['sample_url'] = "{0}/userlab/show-sample.html?id={1}".format(self.config.base_url, sample_id) + _resource_sample["sample_id"] = int(sample_id) + _resource_sample["sample_url"] = "{0}/userlab/show-sample.html?id={1}".format( + self.config.base_url, sample_id + ) resource_ids[_application_name].append(_resource_sample) except: - print ("resource_iterator failed. continue ...") + print("resource_iterator failed. continue ...") pass - # create resources for output, stderr, stdout - _ressource_output = self.save_object('resource', { - 'name': "{0} {1} - resource".format(application.name, len(input_resources)), - 'workunitid': workunit._id, - 'storageid': int(application.storage._id), - 'relativepath': _output_relative_path})[0] - + _ressource_output = self.save_object( + "resource", + { + "name": "{0} {1} - resource".format(application.name, len(input_resources)), + "workunitid": workunit._id, + "storageid": int(application.storage._id), + "relativepath": _output_relative_path, + }, + )[0] print(_ressource_output) _output_filename = "{0}.{1}".format(_ressource_output._id, application.outputfileformat) # we want to include the resource._id into the filename - _ressource_output = self.save_object('resource', - {'id': int(_ressource_output._id), - 'relativepath': "{0}/{1}".format(_output_relative_path, _output_filename)})[0] - - print (_ressource_output) - _resource_stderr = self.save_object('resource', { - 'name': 'slurm_stderr', - 'workunitid': int(workunit._id), - 'storageid': _log_storage._id, - 'relativepath': "/workunitid-{0}_resourceid-{1}.err".format(workunit._id, _ressource_output._id)})[0] - - _resource_stdout = self.save_object('resource', { - 'name': 'slurm_stdout', - 'workunitid': workunit._id, - 'storageid': _log_storage._id, - 'relativepath': "/workunitid-{0}_resourceid-{1}.out".format(workunit._id, _ressource_output._id)})[0] + _ressource_output = self.save_object( + "resource", + { + "id": int(_ressource_output._id), + "relativepath": "{0}/{1}".format(_output_relative_path, _output_filename), + }, + )[0] + print(_ressource_output) + _resource_stderr = self.save_object( + "resource", + { + "name": "slurm_stderr", + "workunitid": int(workunit._id), + "storageid": _log_storage._id, + "relativepath": "/workunitid-{0}_resourceid-{1}.err".format(workunit._id, _ressource_output._id), + }, + )[0] + + _resource_stdout = self.save_object( + "resource", + { + "name": "slurm_stdout", + "workunitid": workunit._id, + "storageid": _log_storage._id, + "relativepath": "/workunitid-{0}_resourceid-{1}.out".format(workunit._id, _ressource_output._id), + }, + )[0] # Creates the workunit executable # The config includes the externaljobid: the yaml_workunit_externaljob has to be created before it. @@ -232,30 +262,38 @@ def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style= # a yaml_workunit_executable is thus created before the config definition in order to provide # the correct executableid to the yaml_workunit_externaljob. # However this yaml_workunit_executable has to be updated later to include 'base64': base64.b64encode(config_serialized.encode()).decode() - yaml_workunit_executable = self.save_object('executable', {'name': 'job configuration (executable) in YAML', - 'context': 'WORKUNIT', - 'workunitid': workunit._id, - 'description': "This is a job configuration as YAML base64 encoded. It is configured to be executed by the B-Fabric yaml submitter."})[0] + yaml_workunit_executable = self.save_object( + "executable", + { + "name": "job configuration (executable) in YAML", + "context": "WORKUNIT", + "workunitid": workunit._id, + "description": "This is a job configuration as YAML base64 encoded. It is configured to be executed by the B-Fabric yaml submitter.", + }, + )[0] print(yaml_workunit_executable) - yaml_workunit_externaljob = self.save_object('externaljob', - {"workunitid": workunit._id, - 'status': 'new', - 'executableid' : yaml_workunit_executable._id, - 'action': "WORKUNIT"})[0] + yaml_workunit_externaljob = self.save_object( + "externaljob", + { + "workunitid": workunit._id, + "status": "new", + "executableid": yaml_workunit_executable._id, + "action": "WORKUNIT", + }, + )[0] print(yaml_workunit_externaljob) assert isinstance(yaml_workunit_externaljob._id, int) self.externaljobid_yaml_workunit = int(yaml_workunit_externaljob._id) print(("XXXXXXX self.externaljobid_yaml_workunit ={} XXXXXXX".format(self.externaljobid_yaml_workunit))) - _output_url = "bfabric@{0}:{1}{2}/{3}".format(_output_storage.host, - _output_storage.basepath, - _output_relative_path, - _output_filename) + _output_url = "bfabric@{0}:{1}{2}/{3}".format( + _output_storage.host, _output_storage.basepath, _output_relative_path, _output_filename + ) try: - query_obj = {'id': workunit.inputdataset._id} - inputdataset = self.read_object(endpoint='dataset', obj=query_obj)[0] + query_obj = {"id": workunit.inputdataset._id} + inputdataset = self.read_object(endpoint="dataset", obj=query_obj)[0] inputdataset_json = json.dumps(inputdataset, cls=bfabricEncoder, sort_keys=True, indent=2) inputdataset = json.loads(inputdataset_json) except: @@ -263,53 +301,65 @@ def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style= # Compose configuration structure config = { - 'job_configuration': { - 'executable': "{}".format(workunit_executable.program), - 'inputdataset': inputdataset, - 'input': resource_ids, - 'output': { - 'protocol': 'scp', - 'resource_id': int(_ressource_output._id), - 'ssh_args': "-o StrictHostKeyChecking=no -2 -l bfabric -x" + "job_configuration": { + "executable": "{}".format(workunit_executable.program), + "inputdataset": inputdataset, + "input": resource_ids, + "output": { + "protocol": "scp", + "resource_id": int(_ressource_output._id), + "ssh_args": "-o StrictHostKeyChecking=no -2 -l bfabric -x", }, - 'stderr': { - 'protocol': 'file', - 'resource_id': int(_resource_stderr._id) , - 'url': "{0}/workunitid-{1}_resourceid-{2}.err".format(_log_storage.basepath, workunit._id, _ressource_output._id) - }, - 'stdout': { - 'protocol': 'file', - 'resource_id': int(_resource_stdout._id), - 'url': "{0}/workunitid-{1}_resourceid-{2}.out".format(_log_storage.basepath, workunit._id, _ressource_output._id) - }, - 'workunit_id': int(workunit._id), - 'workunit_createdby': str(workunit.createdby), - 'workunit_url': "{0}/userlab/show-workunit.html?workunitId={1}".format(self.config.base_url, workunit._id), - 'external_job_id': int(yaml_workunit_externaljob._id), - 'order_id': order_id, - 'project_id': project_id, - 'fastasequence': fastasequence + "stderr": { + "protocol": "file", + "resource_id": int(_resource_stderr._id), + "url": "{0}/workunitid-{1}_resourceid-{2}.err".format( + _log_storage.basepath, workunit._id, _ressource_output._id + ), + }, + "stdout": { + "protocol": "file", + "resource_id": int(_resource_stdout._id), + "url": "{0}/workunitid-{1}_resourceid-{2}.out".format( + _log_storage.basepath, workunit._id, _ressource_output._id + ), + }, + "workunit_id": int(workunit._id), + "workunit_createdby": str(workunit.createdby), + "workunit_url": "{0}/userlab/show-workunit.html?workunitId={1}".format( + self.config.base_url, workunit._id + ), + "external_job_id": int(yaml_workunit_externaljob._id), + "order_id": order_id, + "project_id": project_id, + "fastasequence": fastasequence, + }, + "application": { + "protocol": "scp", + "parameters": application_parameter, + "input": resource_urls, + "output": [_output_url], }, - 'application' : { - 'protocol': 'scp', - 'parameters': application_parameter, - 'input': resource_urls, - 'output': [_output_url] - } } config_serialized = data_serializer(config) print(config_serialized) - yaml_workunit_executable = self.save_object('executable', {'id': yaml_workunit_executable._id, - 'base64': base64.b64encode(config_serialized.encode()).decode(), - 'version': "{}".format(10)})[0] + yaml_workunit_executable = self.save_object( + "executable", + { + "id": yaml_workunit_executable._id, + "base64": base64.b64encode(config_serialized.encode()).decode(), + "version": "{}".format(10), + }, + )[0] print(yaml_workunit_executable) # The WrapperCreator executable is successful, and the status of the its external job is set to done, # which triggers B-Fabric to create an external job for the submitter executable. - wrapper_creator_externaljob = self.save_object(endpoint='externaljob', - obj={'id': self.externaljobid, 'status': 'done'}) + wrapper_creator_externaljob = self.save_object( + endpoint="externaljob", obj={"id": self.externaljobid, "status": "done"} + ) print(("\n\nquery_counter={0}".format(self.query_counter))) diff --git a/bfabric/wrapper_creator/gridengine.py b/bfabric/wrapper_creator/gridengine.py index ad6937bb..cd0e4fa5 100755 --- a/bfabric/wrapper_creator/gridengine.py +++ b/bfabric/wrapper_creator/gridengine.py @@ -37,20 +37,20 @@ # $Date: 2016-09-23 16:55:50 +0200 (Fri, 23 Sep 2016) $ # $Author: cpanse $ -__docformat__ = 'reStructuredText' -__version__ = '$Revision: 2463 $' - +__docformat__ = "reStructuredText" +__version__ = "$Revision: 2463 $" import os import subprocess + class GridEngine(object): """ - interface to Open Grid Sceduler qsub + interface to Open Grid Sceduler qsub """ - def __init__(self, user='*', queue="PRX@fgcz-r-035", GRIDENGINEROOT='/export/bfabric/bfabric/'): + def __init__(self, user="*", queue="PRX@fgcz-r-035", GRIDENGINEROOT="/export/bfabric/bfabric/"): """ Set up parameters for querying Grid Engine. @@ -65,41 +65,37 @@ def __init__(self, user='*', queue="PRX@fgcz-r-035", GRIDENGINEROOT='/export/bfa def qsub(self, script, arguments=""): """ - if qsub and script are files do - qsub as fire and forget + if qsub and script are files do + qsub as fire and forget - todo: pass stderr and stdout file location as argument + todo: pass stderr and stdout file location as argument """ qsub_cmd = [self.qsubbin, "-q", self.queue, script, " ".join(arguments)] if not os.path.isfile(self.qsubbin): - print ("{0} can not be found.".format(self.qsubbin)) + print("{0} can not be found.".format(self.qsubbin)) return if not os.path.isfile(script): - print ("'{0}' - no such file.".format(script)) + print("'{0}' - no such file.".format(script)) return try: - qsub_process = subprocess.Popen( - qsub_cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=False) + qsub_process = subprocess.Popen(qsub_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False) stdout, stderr = qsub_process.communicate() return stdout # except subprocess.CalledProcessError, ex: except: - #logging.error("Error running '%s': '%s'; exit code %d", str.join(' ', qstat_cmd), stderr, ex.returncode) + # logging.error("Error running '%s': '%s'; exit code %d", str.join(' ', qstat_cmd), stderr, ex.returncode) raise def main(): - print ("hello world!") + print("hello world!") pass -if __name__ == "__main__": +if __name__ == "__main__": main() diff --git a/bfabric/wrapper_creator/slurm.py b/bfabric/wrapper_creator/slurm.py index cf45eeb3..0eda4b8f 100755 --- a/bfabric/wrapper_creator/slurm.py +++ b/bfabric/wrapper_creator/slurm.py @@ -30,20 +30,20 @@ # limitations under the License. # -__docformat__ = 'reStructuredText' -#__version__ = '$Revision: 2463 $' - +__docformat__ = "reStructuredText" +# __version__ = '$Revision: 2463 $' import os import subprocess + class SLURM(object): """ - interface to Slurm sbatch + interface to Slurm sbatch """ - def __init__(self, user='*', SLURMROOT='/usr/'): + def __init__(self, user="*", SLURMROOT="/usr/"): """ Set up parameters for querying Slurm. @@ -57,26 +57,19 @@ def __init__(self, user='*', SLURMROOT='/usr/'): def sbatch(self, script, arguments=""): """ - todo: pass stderr and stdout file location as argument + todo: pass stderr and stdout file location as argument """ sbatch_cmd = [self.sbatchbin, script, " ".join(arguments)] if not os.path.isfile(self.sbatchbin): - print ("{0} can not be found.".format(self.sbatchbin)) + print("{0} can not be found.".format(self.sbatchbin)) return if not os.path.isfile(script): - print ("'{0}' - no such file.".format(script)) + print("'{0}' - no such file.".format(script)) return - sbatch_process = subprocess.Popen( - sbatch_cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=False) - result = [x.decode('utf-8') for x in sbatch_process.communicate()] - - return ''.join(result) - - + sbatch_process = subprocess.Popen(sbatch_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False) + result = [x.decode("utf-8") for x in sbatch_process.communicate()] + return "".join(result) From 27b459d69d8160fd1d2ea5532c55fdd5a27a0d5c Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 17 May 2024 08:48:04 +0200 Subject: [PATCH 114/129] code quality fixes --- bfabric/__init__.py | 20 +++++++------ bfabric/bfabric.py | 16 ++++------- bfabric/bfabric_legacy.py | 23 +++++++-------- bfabric/engine/engine_zeep.py | 2 +- bfabric/errors.py | 4 ++- .../examples/compare_zeep_suds_pagination.py | 2 +- bfabric/examples/compare_zeep_suds_query.py | 11 ++++---- bfabric/experimental/__init__.py | 2 ++ bfabric/results/pandas_helper.py | 7 +++-- ...ric_executable_submitter_functionalTest.py | 2 +- ...bfabric_executable_submitter_gridengine.py | 2 +- .../bfabric_executable_submitter_slurm.py | 2 +- .../bfabric_executable_wrappercreator.py | 1 - .../scripts/bfabric_feeder_resource_autoQC.py | 10 +++---- ...c_list_not_existing_storage_directories.py | 9 ++++-- bfabric/scripts/bfabric_save_csv2dataset.py | 2 ++ .../bfabric_save_importresource_sample.py | 1 + .../scripts/bfabric_wrapper_creator_yaml.py | 3 +- .../scripts/fgcz_maxquant_scaffold-wrapper.py | 21 +++++++------- bfabric/scripts/fgcz_maxquant_wrapper.py | 28 +++++++++---------- .../tests/old_integration/__test_bfabric.py | 2 +- pyproject.toml | 2 +- 22 files changed, 89 insertions(+), 83 deletions(-) diff --git a/bfabric/__init__.py b/bfabric/__init__.py index 664403fc..b43bdfda 100755 --- a/bfabric/__init__.py +++ b/bfabric/__init__.py @@ -1,11 +1,19 @@ import importlib.metadata - -__version__ = importlib.metadata.version("bfabric") - from bfabric.bfabric import Bfabric, BfabricAPIEngineType from bfabric.bfabric_config import BfabricAuth, BfabricConfig +__all__ = [ + "Bfabric", + "BfabricAPIEngineType", + "BfabricAuth", + "BfabricConfig", +] + + +__version__ = importlib.metadata.version("bfabric") + + endpoints = sorted( [ "annotation", @@ -41,9 +49,3 @@ project = 403 container = project application = 217 - - -from bfabric.bfabric_legacy import BfabricLegacy -from bfabric.wrapper_creator.bfabric_wrapper_creator import BfabricWrapperCreator -from bfabric.wrapper_creator.bfabric_submitter import BfabricSubmitter -from bfabric.wrapper_creator.bfabric_feeder import BfabricFeeder diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index 11c4d634..5a6ddf58 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -16,10 +16,10 @@ import base64 import importlib.metadata import logging -import os from contextlib import contextmanager from datetime import datetime from enum import Enum +from pathlib import Path from pprint import pprint from typing import Literal, ContextManager, Any @@ -291,20 +291,17 @@ def get_system_auth( have_config_path = config_path is not None if not have_config_path: # Get default path config file path - config_path = os.path.normpath(os.path.expanduser("~/.bfabricpy.yml")) + config_path = Path("~/.bfabricpy.yml").expanduser() # Use the provided config data from arguments instead of the file - if not os.path.isfile(config_path): + if not config_path.is_file(): if have_config_path: # NOTE: If user explicitly specifies a path to a wrong config file, this has to be an exception raise OSError(f"Explicitly specified config file does not exist: {config_path}") # TODO: Convert to log print(f"Warning: could not find the config file in the default location: {config_path}") config = BfabricConfig(base_url=base_url) - if login is None and password is None: - auth = None - else: - auth = BfabricAuth(login=login, password=password) + auth = None if login is None and password is None else BfabricAuth(login=login, password=password) # Load config from file, override some of the fields with the provided ones else: @@ -319,9 +316,8 @@ def get_system_auth( if not config.base_url: raise ValueError("base_url missing") - if not optional_auth: - if not auth or not auth.login or not auth.password: - raise ValueError("Authentication not initialized but required") + if not optional_auth and (not auth or not auth.login or not auth.password): + raise ValueError("Authentication not initialized but required") if verbose: pprint(config) diff --git a/bfabric/bfabric_legacy.py b/bfabric/bfabric_legacy.py index ce49cd59..aa864324 100644 --- a/bfabric/bfabric_legacy.py +++ b/bfabric/bfabric_legacy.py @@ -1,9 +1,10 @@ +from __future__ import annotations import base64 import json import os import sys from pprint import pprint -from typing import Dict, Any +from typing import Any import yaml from suds.client import Client @@ -13,13 +14,13 @@ from bfabric.bfabric_config import BfabricAuth, read_config -class BfabricLegacy(object): +class BfabricLegacy: """B-Fabric python3 module Implements read and save object methods for B-Fabric wsdl interface """ - def warning(self, msg): - sys.stderr.write("\033[93m{}\033[0m\n".format(msg)) + def warning(self, msg) -> None: + sys.stderr.write(f"\033[93m{msg}\033[0m\n") def __init__( self, @@ -31,7 +32,7 @@ def __init__( config_env: str = None, optional_auth: bool = False, verbose: bool = False, - ): + ) -> None: """ :param login: Login string for overriding config file :param password: Password for overriding config file @@ -54,7 +55,7 @@ def __init__( config_path = config_path or os.path.normpath(os.path.expanduser("~/.bfabricpy.yml")) # TODO: Convert to an exception when this branch becomes main - config_path_old = config_path or os.path.normpath(os.path.expanduser("~/.bfabricrc.py")) + config_path or os.path.normpath(os.path.expanduser("~/.bfabricrc.py")) if os.path.isfile(config_path): self.warning( "WARNING! The old .bfabricrc.py was found in the home directory. Delete and make sure to use the new .bfabricpy.yml" @@ -75,7 +76,7 @@ def __init__( elif (login is None) and (password is None): self.auth = auth else: - raise IOError("Must provide both username and password, or neither.") + raise OSError("Must provide both username and password, or neither.") if not self.config.base_url: raise ValueError("base server url missing") @@ -152,7 +153,7 @@ def _get_service(self, endpoint: str) -> Service: self.cl[endpoint] = Client(f"{self.config.base_url}/{endpoint}?wsdl", cache=None) return self.cl[endpoint].service - def _perform_request(self, endpoint: str, method: str, plain: bool, params: Dict[str, Any]) -> Any: + def _perform_request(self, endpoint: str, method: str, plain: bool, params: dict[str, Any]) -> Any: """Performs a request to the given endpoint and returns the result.""" self.query_counter += 1 request_params = dict(login=self.auth.login, password=self.auth.password, **params) @@ -165,7 +166,7 @@ def _perform_request(self, endpoint: str, method: str, plain: bool, params: Dict return getattr(response, endpoint) @staticmethod - def print_json(queryres=None): + def print_json(queryres=None) -> None: """ This method prints the query result as returned by ``read_object`` in JSON format. @@ -183,7 +184,7 @@ def print_json(queryres=None): print(res) @staticmethod - def print_yaml(queryres=None): + def print_yaml(queryres=None) -> None: """ This method prints the query result as returned by ``read_object`` in YAML format. @@ -222,7 +223,7 @@ def get_sampleid(self, resourceid=None): workunit = self.read_object(endpoint="workunit", obj={"id": resource.workunit._id})[0] return self.get_sampleid(resourceid=int(workunit.inputresource[0]._id)) except: - self.warning("fetching sampleid of resource.workunitid = {} failed.".format(resource.workunit._id)) + self.warning(f"fetching sampleid of resource.workunitid = {resource.workunit._id} failed.") return None diff --git a/bfabric/engine/engine_zeep.py b/bfabric/engine/engine_zeep.py index dec76780..0e4cf752 100644 --- a/bfabric/engine/engine_zeep.py +++ b/bfabric/engine/engine_zeep.py @@ -149,6 +149,6 @@ def _zeep_query_append_skipped(query: dict, skipped_keys: list, inplace: bool = """ query_this = copy.deepcopy(query) if not inplace else query for key in skipped_keys: - if overwrite or (key not in query_this.keys()): + if overwrite or (key not in query_this): query_this[key] = zeep.xsd.SkipValue return query_this diff --git a/bfabric/errors.py b/bfabric/errors.py index 556f7899..28545748 100644 --- a/bfabric/errors.py +++ b/bfabric/errors.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import Any + class BfabricRequestError(Exception): """An error that is returned by the server in response to a full request.""" @@ -18,7 +20,7 @@ class BfabricConfigError(RuntimeError): # TODO: Also test for response-level errors -def get_response_errors(response, endpoint: str) -> list[BfabricRequestError]: +def get_response_errors(response: Any, endpoint: str) -> list[BfabricRequestError]: """ :param response: A raw response to a query from an underlying engine :param endpoint: The target endpoint diff --git a/bfabric/examples/compare_zeep_suds_pagination.py b/bfabric/examples/compare_zeep_suds_pagination.py index 041f2137..868a92c4 100644 --- a/bfabric/examples/compare_zeep_suds_pagination.py +++ b/bfabric/examples/compare_zeep_suds_pagination.py @@ -19,7 +19,7 @@ """ -def report_test_result(rez: bool, prefix: str): +def report_test_result(rez: bool, prefix: str) -> None: if rez: print("--", prefix, "test passed --") else: diff --git a/bfabric/examples/compare_zeep_suds_query.py b/bfabric/examples/compare_zeep_suds_query.py index c510fbaa..28d7f58c 100644 --- a/bfabric/examples/compare_zeep_suds_query.py +++ b/bfabric/examples/compare_zeep_suds_query.py @@ -68,7 +68,7 @@ def calc_both(auth: BfabricAuth, config: BfabricConfig, endpoint: str, query: di ###################### -def raw_test(auth: BfabricAuth, config: BfabricConfig, endpoint, query): +def raw_test(auth: BfabricAuth, config: BfabricConfig, endpoint, query) -> None: print("Testing raw XML match for", endpoint, query) retZeep, retSuds = calc_both(auth, config, endpoint, query, raw=True) assert len(retZeep) == len(retSuds) @@ -102,7 +102,7 @@ def recursive_get_types(generic_container) -> set: return {type(generic_container)} -def basic_data_type_match_test(auth, config, endpoint, query): +def basic_data_type_match_test(auth, config, endpoint, query) -> None: print("Testing data types for", endpoint, query) retZeepDict, retSudsDict = calc_both(auth, config, endpoint, query, raw=False) typesZeep = recursive_get_types(retZeepDict) @@ -160,7 +160,7 @@ def parsed_data_match_test( drop_empty: bool = True, drop_underscores_suds: bool = True, log_file_path: str = None, -): +) -> None: print("Testing parsed data match for", endpoint, query) retZeepDict, retSudsDict = calc_both(auth, config, endpoint, query, raw=False) @@ -172,9 +172,8 @@ def parsed_data_match_test( map_element_keys(retSudsDict, {"_id": "id", "_classname": "classname", "_projectid": "projectid"}, inplace=True) if log_file_path is not None: - with open(log_file_path, "w") as f: - with redirect_stdout(f): - matched = recursive_comparison(retZeepDict, retSudsDict, prefix=[]) + with open(log_file_path, "w") as f, redirect_stdout(f): + matched = recursive_comparison(retZeepDict, retSudsDict, prefix=[]) else: matched = recursive_comparison(retZeepDict, retSudsDict, prefix=[]) diff --git a/bfabric/experimental/__init__.py b/bfabric/experimental/__init__.py index eba22563..1ad6df74 100644 --- a/bfabric/experimental/__init__.py +++ b/bfabric/experimental/__init__.py @@ -1 +1,3 @@ from .multi_query import MultiQuery + +__all__ = ["MultiQuery"] diff --git a/bfabric/results/pandas_helper.py b/bfabric/results/pandas_helper.py index 2c76180e..beff67e9 100644 --- a/bfabric/results/pandas_helper.py +++ b/bfabric/results/pandas_helper.py @@ -1,5 +1,6 @@ +from __future__ import annotations import pandas as pd -from typing import Any, List, Dict +from typing import Any def _stringify(a: Any) -> Any: @@ -10,7 +11,7 @@ def _stringify(a: Any) -> Any: Convert variable to a string if it is of non-basic data type, otherwise keep it as it is TODO: Make a better separation between what is and what is not a basic data type """ - if isinstance(a, list) or isinstance(a, dict) or isinstance(a, tuple): + if isinstance(a, (list, dict, tuple)): return str(a) else: return a @@ -24,7 +25,7 @@ def _stringify_dict(d: dict) -> dict: return {k: _stringify(v) for k, v in d.items()} -def list_dict_to_df(l: List[Dict]) -> pd.DataFrame: +def list_dict_to_df(l: list[dict]) -> pd.DataFrame: """ :param l: A list of dictionaries :return: Pandas dataframe, where every list element is a new row diff --git a/bfabric/scripts/bfabric_executable_submitter_functionalTest.py b/bfabric/scripts/bfabric_executable_submitter_functionalTest.py index fb814b63..fbd3a543 100755 --- a/bfabric/scripts/bfabric_executable_submitter_functionalTest.py +++ b/bfabric/scripts/bfabric_executable_submitter_functionalTest.py @@ -35,7 +35,7 @@ from optparse import OptionParser -def main(): +def main() -> None: parser = OptionParser(usage="usage: %prog -j ", version="%prog 1.0") diff --git a/bfabric/scripts/bfabric_executable_submitter_gridengine.py b/bfabric/scripts/bfabric_executable_submitter_gridengine.py index 28cf8ff4..baef6e43 100755 --- a/bfabric/scripts/bfabric_executable_submitter_gridengine.py +++ b/bfabric/scripts/bfabric_executable_submitter_gridengine.py @@ -36,7 +36,7 @@ from bfabric import BfabricSubmitter -def main(): +def main() -> None: parser = OptionParser(usage="usage: %prog -j ", version="%prog 1.0") diff --git a/bfabric/scripts/bfabric_executable_submitter_slurm.py b/bfabric/scripts/bfabric_executable_submitter_slurm.py index c30f33e9..bba8a5a3 100755 --- a/bfabric/scripts/bfabric_executable_submitter_slurm.py +++ b/bfabric/scripts/bfabric_executable_submitter_slurm.py @@ -37,7 +37,7 @@ from bfabric import BfabricSubmitter -def main(): +def main() -> None: parser = OptionParser(usage="usage: %prog -j ", version="%prog 1.0") diff --git a/bfabric/scripts/bfabric_executable_wrappercreator.py b/bfabric/scripts/bfabric_executable_wrappercreator.py index eab039b6..5f1d7e02 100755 --- a/bfabric/scripts/bfabric_executable_wrappercreator.py +++ b/bfabric/scripts/bfabric_executable_wrappercreator.py @@ -22,7 +22,6 @@ # $HeadURL: http://fgcz-svn/repos/scripts/trunk/linux/bfabric/apps/python/wrapper_creator_yaml.py $ # $Id: wrapper_creator_yaml.py 2397 2016-09-06 07:04:35Z cpanse $ -import os import sys from bfabric import BfabricWrapperCreator diff --git a/bfabric/scripts/bfabric_feeder_resource_autoQC.py b/bfabric/scripts/bfabric_feeder_resource_autoQC.py index d877605e..6a8c1d4f 100755 --- a/bfabric/scripts/bfabric_feeder_resource_autoQC.py +++ b/bfabric/scripts/bfabric_feeder_resource_autoQC.py @@ -26,7 +26,7 @@ class AutoQC: feeder for autoQC raw files """ - def __init__(self): + def __init__(self) -> None: self.bfabric_storageid = 2 self.client = Bfabric.from_config(verbose=True) self.bfabric_application_ids = self.client.config.application_ids @@ -187,11 +187,11 @@ def resource_check(self, projectid: int, name: str, workunitid: int, filename: s res = self.client.save(endpoint="resource", obj=query).to_list_dict() query = {"id": workunitid, "status": "available"} - res2 = self.client.save(endpoint="workunit", obj=query).to_list_dict() + self.client.save(endpoint="workunit", obj=query).to_list_dict() return res[0]["id"] - def feed(self, line): + def feed(self, line) -> None: """ feeds one line example: :param line: @@ -250,10 +250,10 @@ class TestCaseAutoQC(unittest.TestCase): BF = AutoQC() - def setUp(self): + def setUp(self) -> None: pass - def test_feed(self): + def test_feed(self) -> None: line = "61cf7e172713344bdf6ebe5b1ed61d99;1549963879;306145606;p2928/Proteomics/QEXACTIVEHF_2/ciuffar_20190211_190211_TNF_PRM_rT_again_AQUA_LHration/20190211_013_autoQC4L.raw" # self.BF.feed(line) line = "efdf5e375d6e0e4e4abf9c2b3e1e97d5;1542134408;59129652;p1000/Proteomics/QEXACTIVEHF_2/tobiasko_20181113/20181113_003_autoQC01.raw" diff --git a/bfabric/scripts/bfabric_list_not_existing_storage_directories.py b/bfabric/scripts/bfabric_list_not_existing_storage_directories.py index 95f9534a..570ec630 100755 --- a/bfabric/scripts/bfabric_list_not_existing_storage_directories.py +++ b/bfabric/scripts/bfabric_list_not_existing_storage_directories.py @@ -11,23 +11,26 @@ http://fgcz-bfabric.uzh.ch/bfabric/executable?wsdl """ from __future__ import annotations -import os + +from pathlib import Path from bfabric import Bfabric -ROOTDIR = "/srv/www/htdocs/" +ROOTDIR = Path("/srv/www/htdocs/") def list_not_existing_storage_dirs(client: Bfabric, technologyid: int = 2) -> None: + """Lists not existing storage directories for a given technologyid.""" results = client.read(endpoint="container", obj={"technologyid": technologyid}).to_list_dict() container_ids = sorted({x["id"] for x in results}) for cid in container_ids: - if not os.path.isdir(os.path.join(ROOTDIR, f"p{cid}")): + if not (ROOTDIR / f"p{cid}").is_dir(): print(cid) def main() -> None: + """Parses CLI arguments and calls `list_not_existing_storage_dirs`.""" client = Bfabric.from_config(verbose=True) list_not_existing_storage_dirs(client=client, technologyid=2) list_not_existing_storage_dirs(client=client, technologyid=4) diff --git a/bfabric/scripts/bfabric_save_csv2dataset.py b/bfabric/scripts/bfabric_save_csv2dataset.py index 212cad6f..50df9e31 100755 --- a/bfabric/scripts/bfabric_save_csv2dataset.py +++ b/bfabric/scripts/bfabric_save_csv2dataset.py @@ -37,6 +37,7 @@ def polars_to_bfabric_type(dtype: pl.DataType) -> str | None: + """Returns the B-Fabric type for a given Polars data type, defaulting to String if no correspondence is found.""" if str(dtype).startswith("Int"): return "Integer" elif str(dtype).startswith("String"): @@ -48,6 +49,7 @@ def polars_to_bfabric_type(dtype: pl.DataType) -> str | None: def polars_to_bfabric_dataset(data: pl.DataFrame) -> dict[str, list[dict[str, int | str | float]]]: + """Converts a Polars DataFrame to a B-Fabric dataset representation.""" attributes = [ {"name": col, "position": i + 1, "type": polars_to_bfabric_type(data[col].dtype)} for i, col in enumerate(data.columns) diff --git a/bfabric/scripts/bfabric_save_importresource_sample.py b/bfabric/scripts/bfabric_save_importresource_sample.py index 53f25753..fa4eccc4 100755 --- a/bfabric/scripts/bfabric_save_importresource_sample.py +++ b/bfabric/scripts/bfabric_save_importresource_sample.py @@ -79,6 +79,7 @@ def save_importresource(client: Bfabric, line: str) -> None: def get_sample_id_from_path(file_path: str) -> int | None: + """Returns the sample id for a given file path, if it's present in the correct format.""" match = re.search( r"p([0-9]+)\/(Proteomics\/[A-Z]+_[1-9])\/.*_\d\d\d_S([0-9][0-9][0-9][0-9][0-9][0-9]+)_.*(raw|zip)$", file_path, diff --git a/bfabric/scripts/bfabric_wrapper_creator_yaml.py b/bfabric/scripts/bfabric_wrapper_creator_yaml.py index ab5b28c5..8e6fc953 100755 --- a/bfabric/scripts/bfabric_wrapper_creator_yaml.py +++ b/bfabric/scripts/bfabric_wrapper_creator_yaml.py @@ -21,7 +21,6 @@ # $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/wrapper_creator_yaml.py $ # $Id: wrapper_creator_yaml.py 2478 2016-09-26 09:46:53Z cpanse $ -import os import sys from bfabric import BfabricWrapperCreator @@ -32,7 +31,7 @@ if len(sys.argv) == 3 and sys.argv[1] == "-j" and int(sys.argv[2]) > 0: externaljobid = int(sys.argv[2]) else: - print("usage: {} -j ".format(sys.argv[0])) + print(f"usage: {sys.argv[0]} -j ") sys.exit(1) bfapp = BfabricWrapperCreator(externaljobid=externaljobid) diff --git a/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py b/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py index d003af46..2a50994d 100755 --- a/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py +++ b/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # Copyright (C) 2019 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. # @@ -17,7 +16,7 @@ import sys from lxml import etree import yaml -from io import StringIO, BytesIO +from io import StringIO from optparse import OptionParser # import unittest @@ -34,14 +33,14 @@ class FgczMaxQuantScaffold: fasta = None samples = None - def __init__(self, yamlfilename=None, zipfilename=None): + def __init__(self, yamlfilename=None, zipfilename=None) -> None: if not os.path.isfile(zipfilename): - print("ERROR: no such file '{0}'".format(zipfilename)) + print(f"ERROR: no such file '{zipfilename}'") sys.exit(1) self.zipfilename = zipfilename - with open(yamlfilename, "r") as f: + with open(yamlfilename) as f: content = f.read() self.config = yaml.load(content, Loader=yaml.FullLoader) @@ -79,12 +78,12 @@ def getBiologicalSample(selfs, InputFile=None, category="***BASENAME***"): if eInputFile is None: raise TypeError - eInputFile.text = "{}".format(InputFile) - eInputFile.attrib["maxQuantExperiment"] = "{}".format(category) + eInputFile.text = f"{InputFile}" + eInputFile.attrib["maxQuantExperiment"] = f"{category}" eBiologicalSample = eInputFile.getparent() - eBiologicalSample.attrib["category"] = "{}".format(category) - eBiologicalSample.attrib["name"] = "{}".format(category) + eBiologicalSample.attrib["category"] = f"{category}" + eBiologicalSample.attrib["name"] = f"{category}" return pBioSample @@ -119,12 +118,12 @@ def getScaffold(selfs): # pxml = etree.XML(xml) return pxml - def run(self): + def run(self) -> None: xml = self.getScaffold() eExperiment = xml.find("/Experiment") eFastaDatabase = xml.find("/Experiment/FastaDatabase") - eFastaDatabase.attrib["path"] = "{}/{}".format(os.getcwd(), self.fasta) + eFastaDatabase.attrib["path"] = f"{os.getcwd()}/{self.fasta}" for s in self.samples: eExperiment.extend(self.getBiologicalSample(category=s, InputFile=self.zipfilename)) diff --git a/bfabric/scripts/fgcz_maxquant_wrapper.py b/bfabric/scripts/fgcz_maxquant_wrapper.py index eb250504..a0a44774 100755 --- a/bfabric/scripts/fgcz_maxquant_wrapper.py +++ b/bfabric/scripts/fgcz_maxquant_wrapper.py @@ -38,16 +38,16 @@ class FgczMaxQuantConfig: config = None scratchdir = None - def __init__(self, config=None, scratch="/scratch/MAXQUANT/"): + def __init__(self, config=None, scratch="/scratch/MAXQUANT/") -> None: if config: self.config = config - self.scratchdir = Path("{0}/WU{1}".format(scratch, self.config["job_configuration"]["workunit_id"])) + self.scratchdir = Path("{}/WU{}".format(scratch, self.config["job_configuration"]["workunit_id"])) if not os.path.isdir(self.scratchdir): - print("no scratch dir '{0}'.".format(self.scratchdir)) + print(f"no scratch dir '{self.scratchdir}'.") # raise SystemError - def generate_mqpar(self, xml_filename, xml_template): + def generate_mqpar(self, xml_filename, xml_template) -> None: """PARAMETER""" for query, value in self.config["application"]["parameters"].items(): element = xml_template.find(query) @@ -61,7 +61,7 @@ def generate_mqpar(self, xml_filename, xml_template): element.extend(estring) pass else: - print("replacing xpath expression {} by {}.".format(query, value)) + print(f"replacing xpath expression {query} by {value}.") element.text = value ecount = 0 @@ -74,19 +74,19 @@ def generate_mqpar(self, xml_filename, xml_template): host, file = input.split(":") - print("{}\t{}".format(os.path.basename(input), file)) + print(f"{os.path.basename(input)}\t{file}") if not os.path.isfile(file): - print("'{}' do not exists.".format(file)) + print(f"'{file}' do not exists.") # raise SystemError - targetRawFile = "{}/{}".format(self.scratchdir, os.path.basename(input)) + targetRawFile = f"{self.scratchdir}/{os.path.basename(input)}" if not os.path.islink(targetRawFile): try: os.symlink(file, targetRawFile) except: - print("linking '{}' failed.".format(file)) + print(f"linking '{file}' failed.") estring = etree.Element("string") estring.text = targetRawFile @@ -128,7 +128,7 @@ def generate_mqpar(self, xml_filename, xml_template): # return(xml_template) xml_template.write(xml_filename) # , pretty_print=True) - def run(self): + def run(self) -> None: pass @@ -504,10 +504,10 @@ def run(self): (options, args) = parser.parse_args() if not os.path.isfile(options.yaml_filename): - print("ERROR: no such file '{0}'".format(options.yaml_filename)) + print(f"ERROR: no such file '{options.yaml_filename}'") sys.exit(1) try: - with open(options.yaml_filename, "r") as f: + with open(options.yaml_filename) as f: job_config = yaml.safe_load(f) if options.xml_template_filename is None: @@ -516,7 +516,7 @@ def run(self): except: raise else: - with open(options.xml_template_filename, "r") as f: + with open(options.xml_template_filename) as f: mqpartree = etree.parse(f) MQC = FgczMaxQuantConfig(config=job_config, scratch="d:/scratch/") @@ -535,7 +535,7 @@ def run(self): class TestFgczMaxQuantConfig(unittest.TestCase): - def test_xml(self): + def test_xml(self) -> None: input_WU181492_yaml = """ application: input: diff --git a/bfabric/tests/old_integration/__test_bfabric.py b/bfabric/tests/old_integration/__test_bfabric.py index 6243c681..61bca83f 100755 --- a/bfabric/tests/old_integration/__test_bfabric.py +++ b/bfabric/tests/old_integration/__test_bfabric.py @@ -5,7 +5,7 @@ """ import unittest -from bfabric import BfabricLegacy +from bfabric.bfabric_legacy import BfabricLegacy """ ssh localhost "cat > /tmp/bb.py && /usr/bin/python /tmp/bb.py" < PycharmProjects/untitled/bfabric_wsdl.py diff --git a/pyproject.toml b/pyproject.toml index 143ed6ea..d0fcd25c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,7 +78,7 @@ target-version = "py39" [tool.ruff.lint] select = ["ANN", "BLE", "D103", "E", "F", "PLW", "PTH", "SIM", "UP", "TCH"] -ignore = ["ANN101"] +ignore = ["ANN101", "ANN102"] [tool.licensecheck] using = "PEP631" From e5deb2b01fc523b0cefe4e9359b46efadbf5064b Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 17 May 2024 09:53:30 +0200 Subject: [PATCH 115/129] add unit tests --- bfabric/engine/engine_suds.py | 7 ++-- bfabric/engine/engine_zeep.py | 7 ++-- bfabric/results/response_format_dict.py | 17 ++++++++++ bfabric/results/result_container.py | 16 ---------- bfabric/tests/unit/test_bfabric.py | 32 +++++++++++++++++++ .../tests/unit/test_response_format_dict.py | 10 ++++++ bfabric/tests/unit/test_result_container.py | 11 +++++-- 7 files changed, 76 insertions(+), 24 deletions(-) diff --git a/bfabric/engine/engine_suds.py b/bfabric/engine/engine_suds.py index 258d0f1d..f01271a5 100644 --- a/bfabric/engine/engine_suds.py +++ b/bfabric/engine/engine_suds.py @@ -8,7 +8,8 @@ from bfabric.engine.response_format_suds import suds_asdict_recursive from bfabric.errors import BfabricRequestError, get_response_errors -from bfabric.results.result_container import _clean_result, ResultContainer +from bfabric.results.result_container import ResultContainer +from bfabric.results.response_format_dict import clean_result if TYPE_CHECKING: from suds.serviceproxy import ServiceProxy @@ -98,10 +99,10 @@ def _convert_results(self, response: Any, endpoint: str) -> ResultContainer: results = [] for result in response[endpoint]: result_parsed = suds_asdict_recursive(result, convert_types=True) - result_parsed = _clean_result( + result_parsed = clean_result( result_parsed, drop_underscores_suds=self._drop_underscores, - sort_responses=True, + sort_keys=True, ) results += [result_parsed] return ResultContainer(results=results, total_pages_api=n_available_pages, errors=errors) diff --git a/bfabric/engine/engine_zeep.py b/bfabric/engine/engine_zeep.py index 0e4cf752..b91c120c 100644 --- a/bfabric/engine/engine_zeep.py +++ b/bfabric/engine/engine_zeep.py @@ -6,7 +6,8 @@ from zeep.helpers import serialize_object from bfabric.errors import BfabricRequestError, get_response_errors -from bfabric.results.result_container import ResultContainer, _clean_result +from bfabric.results.result_container import ResultContainer +from bfabric.results.response_format_dict import clean_result if TYPE_CHECKING: from bfabric.bfabric_config import BfabricAuth @@ -122,10 +123,10 @@ def _convert_results(self, response: Any, endpoint: str) -> ResultContainer: results = [] for result in response[endpoint]: results_parsed = dict(serialize_object(result, target_cls=dict)) - results_parsed = _clean_result( + results_parsed = clean_result( results_parsed, drop_underscores_suds=False, # NOTE: Underscore problem specific to SUDS - sort_responses=True, + sort_keys=True, ) results += [results_parsed] return ResultContainer(results=results, total_pages_api=n_available_pages, errors=errors) diff --git a/bfabric/results/response_format_dict.py b/bfabric/results/response_format_dict.py index 47ce298a..fb7e0cc5 100644 --- a/bfabric/results/response_format_dict.py +++ b/bfabric/results/response_format_dict.py @@ -1,4 +1,5 @@ from __future__ import annotations + from copy import deepcopy @@ -115,3 +116,19 @@ def sort_dicts_by_key(response: list | dict, inplace: bool = True) -> list | dic response_filtered = deepcopy(response) if not inplace else response _recursive_sort_dicts_by_key(response_filtered) return response_filtered + + +def clean_result(result: dict, drop_underscores_suds: bool = True, sort_keys: bool = False) -> dict: + """ + :param result: the response dictionary to clean + :param drop_underscores_suds: if True, the keys of the dictionaries in the response will have leading + underscores removed in some cases (relevant for SUDS) + :param sort_keys: the keys of the dictionaries in the response will be sorted (recursively) + """ + result = deepcopy(result) + if drop_underscores_suds: + map_element_keys(result, {"_id": "id", "_classname": "classname", "_projectid": "projectid"}, inplace=True) + if sort_keys: + sort_dicts_by_key(result, inplace=True) + + return result diff --git a/bfabric/results/result_container.py b/bfabric/results/result_container.py index 0a5c086c..b8bd3a48 100644 --- a/bfabric/results/result_container.py +++ b/bfabric/results/result_container.py @@ -98,19 +98,3 @@ def to_polars(self, drop_empty: bool = False) -> polars.DataFrame: import polars return polars.DataFrame(self.to_list_dict(drop_empty=drop_empty)) - - -def _clean_result(result: dict, drop_underscores_suds: bool = True, sort_responses: bool = False) -> dict: - """ - :param drop_underscores_suds: if True, the keys of the dictionaries in the response will have leading - underscores removed in some cases (relevant for SUDS) - :param sort_responses: the keys of the dictionaries in the response will be sorted (recursively) - """ - if drop_underscores_suds: - formatter.map_element_keys( - result, {"_id": "id", "_classname": "classname", "_projectid": "projectid"}, inplace=True - ) - if sort_responses: - formatter.sort_dicts_by_key(result, inplace=True) - - return result diff --git a/bfabric/tests/unit/test_bfabric.py b/bfabric/tests/unit/test_bfabric.py index 355114be..23e01d3e 100644 --- a/bfabric/tests/unit/test_bfabric.py +++ b/bfabric/tests/unit/test_bfabric.py @@ -267,6 +267,38 @@ def test_delete_when_auth_and_check_true(self): method_assert_success.assert_called_once_with() mock_engine.delete.assert_called_once_with(endpoint=endpoint, id=10, auth=self.mock_auth) + @patch.object(Bfabric, "read") + def test_exists_when_true(self, method_read): + method_read.return_value.__len__.return_value = 1 + self.assertTrue(self.mock_bfabric.exists(endpoint="test_endpoint", key="key", value="value")) + method_read.assert_called_once_with( + endpoint="test_endpoint", obj={"key": "value"}, max_results=1, check=True, return_id_only=True + ) + + @patch.object(Bfabric, "read") + def test_exists_when_true_and_extra_args(self, method_read): + method_read.return_value.__len__.return_value = 1 + self.assertTrue( + self.mock_bfabric.exists( + endpoint="test_endpoint", key="key", value="value", query={"extra": "arg"}, check=False + ) + ) + method_read.assert_called_once_with( + endpoint="test_endpoint", + obj={"key": "value", "extra": "arg"}, + max_results=1, + check=False, + return_id_only=True, + ) + + @patch.object(Bfabric, "read") + def test_exists_when_false(self, method_read): + method_read.return_value.__len__.return_value = 0 + self.assertFalse(self.mock_bfabric.exists(endpoint="test_endpoint", key="key", value="value")) + method_read.assert_called_once_with( + endpoint="test_endpoint", obj={"key": "value"}, max_results=1, check=True, return_id_only=True + ) + @patch.object(Bfabric, "save") def test_upload_resource(self, method_save): resource_name = "hello_world.txt" diff --git a/bfabric/tests/unit/test_response_format_dict.py b/bfabric/tests/unit/test_response_format_dict.py index 35c312b3..9fe1765b 100644 --- a/bfabric/tests/unit/test_response_format_dict.py +++ b/bfabric/tests/unit/test_response_format_dict.py @@ -1,4 +1,5 @@ import unittest + import bfabric.results.response_format_dict as response_format_dict @@ -27,6 +28,15 @@ def test_sort_dicts_by_key(self): output_list_dict = response_format_dict.sort_dicts_by_key(input_list_dict, inplace=False) self.assertEqual(str(output_list_dict), str(target_list_dict)) + def test_clean_result(self): + result_input = [{"b": 1, "a": 2, "_id": 3}, {"b": 4, "_id": 5, "a": 6}] + cleaned = response_format_dict.clean_result(result_input, drop_underscores_suds=True, sort_keys=True) + self.assertEqual(repr([{"a": 2, "b": 1, "id": 3}, {"a": 6, "b": 4, "id": 5}]), repr(cleaned)) + self.assertEqual( + repr([{"b": 1, "a": 2, "_id": 3}, {"b": 4, "_id": 5, "a": 6}]), + repr(result_input), + ) + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/bfabric/tests/unit/test_result_container.py b/bfabric/tests/unit/test_result_container.py index 8ced19ab..6591fa89 100644 --- a/bfabric/tests/unit/test_result_container.py +++ b/bfabric/tests/unit/test_result_container.py @@ -1,6 +1,8 @@ import logging import unittest +import polars.testing + from bfabric.results.result_container import ResultContainer @@ -15,8 +17,8 @@ def test_str(self): self.assertEqual("[4, 5]", str(self.res2)) def test_repr(self): - self.assertEqual("[1, 2, 3]", str(self.res1)) - self.assertEqual("[4, 5]", str(self.res2)) + self.assertEqual("[1, 2, 3]", repr(self.res1)) + self.assertEqual("[4, 5]", repr(self.res2)) def test_len(self): self.assertEqual(3, len(self.res1)) @@ -82,6 +84,11 @@ def test_to_list_dict_when_drop_empty(self): expected = [{"b": 1}, {"a": 2, "b": 3}] self.assertListEqual(expected, self.res_with_empty.to_list_dict(drop_empty=True)) + def test_to_polars(self): + res = ResultContainer([{"a": 1, "b": 2}, {"a": 3, "b": 4}]) + df = res.to_polars() + polars.testing.assert_frame_equal(polars.DataFrame({"a": [1, 3], "b": [2, 4]}), df) + if __name__ == "__main__": unittest.main(verbosity=2) From e8d2e604f267c6b9c69028fdbaa69c8e23e278b5 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 17 May 2024 12:16:32 +0200 Subject: [PATCH 116/129] update README.md --- README.md | 122 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 76 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index c0d990c9..0d0af7b1 100644 --- a/README.md +++ b/README.md @@ -17,39 +17,60 @@ For more advanced users the *bfabricPy* package also provides a powerful query i ## Install -There are many ways to install Python packages. -Generally it's recommended to use some type of virtual environment manager, like [conda](https://docs.conda.io/en/latest/), [uv](https://github.com/astral-sh/uv), or Python's [venv](https://docs.python.org/3/library/venv.html). Then the following commands work. -If you don't, you might need to specify `--user` to the pip commands, so they get installed into the user's Python package directory. +The installation will vary depending on which of the use cases you want: -To use bfabricPy a normal installation is good enough: -```{bash} -pip install git+https://github.com/fgcz/bfabricPy.git +1. Use the command line scripts +2. Use the Python API +3. Develop on the package + +The command line scripts are currently included in all cases. + +### Command line scripts +To use the command line scripts, it's recommended to install `bfabricPy` with [pipx](https://pipx.pypa.io/). +If you don't have `pipx` installed, refer to the [pipx documentation](https://pipx.pypa.io/stable/installation/) for instructions. + +You can execute a command using a specific version of `bfabricPy` with the `pipx run` command. +This command handles the dependencies of multiple concurrent installations: + +```bash +pipx run --spec "git+https://github.com/fgcz/bfabricPy.git@0.13.8" bfabric_read.py --help ``` -As a user: (i.e. a regular install, files will be used from your current directory instead of properly installing a copy of it) +To install a specific version of bfabricPy on your system and make the command available without `pipx run` prefix, use the following command: +```bash +pipx install "git+https://github.com/fgcz/bfabricPy.git@0.13.8" +bfabric_read.py --help +``` -```{bash} -# variant 1) clone to a folder -git clone https://github.com/fgcz/bfabricPy.git && cd bfabricPy -pip install . +### Python API +If you're interested in using the Python API of `bfabricPy`, you have two options: -# variant 2) direct install from GitHub -pip install git+https://github.com/fgcz/bfabricPy.git +#### 1. Configure it in your `pyproject.toml` file. +```toml +[project] +dependencies = [ + "bfabricPy @ git+https://github.com/fgcz/bfabricPy.git@main" +] ``` +#### 2. Install the `bfabricPy` package directly using pip. +```bash +pip install git+https://github.com/fgcz/bfabricPy.git +```` + +### Development As a bfabricPy developer: (i.e. an editable install) ```{bash} pip install -e ".[dev]" ``` -## Configuration [outdated] - -```{bash} -cat ~/.bfabricpy.yml -``` +## Configuration +Create a file as follows: (note: the password is not your login password, but the web service password) ```{yaml} +# ~/.bfabricpy.yml + GENERAL: default_config: PRODUCTION @@ -59,6 +80,15 @@ PRODUCTION: base_url: https://fgcz-bfabric.uzh.ch/bfabric ``` +You can also include an additional config for the TEST instance + +```{yaml} +TEST: + login: yourBfabricLogin + password: yourBfabricWebPassword + base_url: https://fgcz-bfabric-test.uzh.ch/bfabric +``` + ## CheatSheet ### Read @@ -83,41 +113,50 @@ bfabric_read.py workunit status failed bfabric_read.py resource filechecksum d41d8cd98f00b204e9800998ecf8427e ``` -call the `python3` interpreter and enter +Using the Python API: ```{py} -import bfabric +from bfabric import Bfabric -B = bfabric.Bfabric() +client = Bfabric.from.config() -user = B.read_object(endpoint = 'user', obj={'login': 'cpanse'}) -resource = B.read_object(endpoint = 'resource', obj={'id': 550327 }) +user = B.read(endpoint = 'user', obj={'login': 'cpanse'}) +resource = B.read(endpoint = 'resource', obj={'id': 550327 }) ``` ### save -``` -rv = B.save_object('workunit', {'id': 254063, 'status': 'available'}) -B.print_json(rv) -# print(rv) +```{bash} +bfabric_save_workunit_attribute.py 199387 status available ``` -### Command line code snippets - -remove pending workunits from the past -```{bash} - bfabric_read.py workunit status pending \ - | awk '$2~/cpanse/ && $3~/2015/{print $1}' - | fgcz_bfabric_delete_workunits.py +```{python} +import json +rv = client.save('workunit', {'id': 254063, 'status': 'available'}) +print(json.dumps(rv.to_list_dict(), indent=2)) ``` -find empty resource files in bfabric +### Command line code snippet +Find empty resource files in bfabric ```{bash} bfabric_read.py resource filechecksum `md5sum < /dev/null | cut -c-32` \ | cat -n \ | tail ``` -## Examples +## Testing +Please be advised that integration tests will write to the `TEST` instance configured in your `~/.bfabricpy.yml` config file. + +Run unit tests: +```{bash} +python3 -m unittest discover -s "bfabric/tests/unit" +``` + +Run integration tests (see note above): +```{bash} +python3 -m unittest discover -s "bfabric/tests/integration" +``` + +## Examples [outdated] ### bash script generated by the yaml wrapper creator / submitter @@ -276,7 +315,7 @@ bfabric_read.py importresource \ done ``` -## Send an E-mail +## Send an E-mail [outdated] ``` # by CT,CP @@ -290,11 +329,7 @@ rv = B.save_object(endpoint = 'mail', # shown as mail for user id 482 ``` -## Testing -```{sh} -cd bfabric/tests/ && python3 -m unittest discover; echo $?; cd - -``` ## See also @@ -305,16 +340,13 @@ cd bfabric/tests/ && python3 -m unittest discover; echo $?; cd - ## FAQ - ### How to resolve ` Date: Fri, 17 May 2024 12:30:55 +0200 Subject: [PATCH 117/129] update README.md --- README.md | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 0d0af7b1..cef98382 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,17 @@ -![functionTest](https://github.com/fgcz/bfabricPy/workflows/functionalTest/badge.svg) +![unitTests](https://github.com/fgcz/bfabricPy/workflows/unit%20tests/badge.svg) [![EDBT'10](https://img.shields.io/badge/EDBT-10.1145%2F1739041.1739135-brightgreen)](https://doi.org/10.1145/1739041.1739135) [![JIB](https://img.shields.io/badge/JIB-10.1515%2Fjib.2022.0031-brightgreen)](https://doi.org/10.1515/jib-2022-0031) -![Downloads](https://img.shields.io/github/downloads/fgcz/bfabricPy/total) - - # bfabricPy - This package connects the [bfabric](https://fgcz-bfabric.uzh.ch/bfabric/) system to the [python](https://www.python.org/) and [R](https://cran.r-project.org/) world while providing a JSON and REST interface using [Flask](https://www.fullstackpython.com). The [bfabricShiny](https://github.com/cpanse/bfabricShiny) R package is an extension and provides code snippets and sample implementation for a seamless R shiny bfabric integration. For more advanced users the *bfabricPy* package also provides a powerful query interface on the command-line though using the provided scripts. - -![bfabricPy-read](https://user-images.githubusercontent.com/4901987/65025926-db77c900-d937-11e9-8c92-f2412d6793ee.gif) -[see also #14](https://github.com/fgcz/bfabricPy/issues/14) - - ## Install -The installation will vary depending on which of the use cases you want: +The package can be installed like any other Python package, so if you are familiar you might not need to read this section. +Currently, it's only available from GitHub. + +The best way to install the package depends on your use case, i.e. whether you want to: 1. Use the command line scripts 2. Use the Python API From 88ab710dcf386666e503cbec19fc3ea237fa5b78 Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Fri, 17 May 2024 14:03:13 +0200 Subject: [PATCH 118/129] config_path param --- bfabric/bfabric.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index 11c4d634..2a94a38d 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -76,6 +76,7 @@ def __init__( def from_config( cls, config_env: str | None = None, + config_path: str | None = None, auth: BfabricAuth | Literal["config"] | None = "config", engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, verbose: bool = False, @@ -86,12 +87,13 @@ def from_config( - The `BFABRICPY_CONFIG_ENV` environment variable - The `default_config` field in the config file "GENERAL" section :param config_env: Configuration environment to use. If not given, it is deduced as described above. + :param config_path: Path to the config file, in case it is different from default :param auth: Authentication to use. If "config" is given, the authentication will be read from the config file. If it is set to None, no authentication will be used. :param engine: Engine to use for the API. Default is SUDS. :param verbose: Print a system info message to standard error console """ - config, auth_config = get_system_auth(config_env=config_env) + config, auth_config = get_system_auth(config_env=config_env, config_path=config_path) auth_used: BfabricAuth | None = auth_config if auth == "config" else auth return cls(config, auth_used, engine=engine, verbose=verbose) From eac4cffc4a2c041aa8a7313525240adbda4b2c1d Mon Sep 17 00:00:00 2001 From: Aleksejs Fomins Date: Fri, 17 May 2024 14:10:55 +0200 Subject: [PATCH 119/129] minor Path bug --- bfabric/bfabric.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index c308e390..88205f34 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -294,6 +294,9 @@ def get_system_auth( if not have_config_path: # Get default path config file path config_path = Path("~/.bfabricpy.yml").expanduser() + else: + config_path = Path(config_path).expanduser() + # Use the provided config data from arguments instead of the file if not config_path.is_file(): From f321a24859b23067bf42817602d9c9c9ab19071a Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 17 May 2024 14:57:22 +0200 Subject: [PATCH 120/129] flask: simplify error handling --- bfabric/scripts/bfabric_flask.py | 47 +++++++++++--------------------- 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/bfabric/scripts/bfabric_flask.py b/bfabric/scripts/bfabric_flask.py index 96f72104..4ff84afa 100755 --- a/bfabric/scripts/bfabric_flask.py +++ b/bfabric/scripts/bfabric_flask.py @@ -121,25 +121,17 @@ def read() -> Response: auth = get_request_auth(params) logger.info(f"'{auth.login}' /read {page_offset=}, {page_max_results=}, {query=}") - try: - with client.with_auth(auth): - client.print_version_message() - res = client.read( - endpoint=endpoint, - obj=query, - offset=page_offset, - max_results=page_max_results, - ) - logger.info(f"'{auth.login}' login success query {query} ...") - except Exception: - logger.exception(f"'{auth.login}' query failed ...") - return jsonify({"status": "jsonify failed: bfabric python module."}) - - try: - return jsonify({"res": res.to_list_dict()}) - except Exception: - logger.exception(f"'{auth.login}' query failed ...") - return jsonify({"status": "jsonify failed"}) + with client.with_auth(auth): + client.print_version_message() + res = client.read( + endpoint=endpoint, + obj=query, + offset=page_offset, + max_results=page_max_results, + ) + logger.info(f"'{auth.login}' login success query {query} ...") + + return jsonify({"res": res.to_list_dict()}) @app.route("/save", methods=["POST"]) @@ -150,18 +142,11 @@ def save() -> Response: query = params["query"] auth = get_request_auth(params) - try: - with client.with_auth(auth): - res = client.save(endpoint=endpoint, obj=query) - logger.info(f"'{auth.login}' login success save method ...") - except Exception: - logger.exception(f"save method failed for login {auth.login}.") - return jsonify({"status": "jsonify failed: bfabric python module."}) - - try: - return jsonify({"res": res.to_list_dict()}) - except Exception: - return jsonify({"status": "jsonify failed"}) + with client.with_auth(auth): + res = client.save(endpoint=endpoint, obj=query) + logger.info(f"'{auth.login}' login success save method ...") + + return jsonify({"res": res.to_list_dict()}) @app.route("/add_resource", methods=["POST"]) From 69599ed72794d8060acd093b9b83dbcea3dd151f Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 17 May 2024 15:04:11 +0200 Subject: [PATCH 121/129] adapt unit tests --- bfabric/tests/unit/test_bfabric.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bfabric/tests/unit/test_bfabric.py b/bfabric/tests/unit/test_bfabric.py index 23e01d3e..4ed44ecf 100644 --- a/bfabric/tests/unit/test_bfabric.py +++ b/bfabric/tests/unit/test_bfabric.py @@ -28,7 +28,7 @@ def test_from_config_when_no_args(self, _mock_engine_suds, mock_get_system_auth) self.assertIsInstance(client, Bfabric) self.assertEqual(mock_config, client.config) self.assertEqual(mock_auth, client.auth) - mock_get_system_auth.assert_called_once_with(config_env=None) + mock_get_system_auth.assert_called_once_with(config_env=None, config_path=None) @patch("bfabric.bfabric.get_system_auth") @patch("bfabric.bfabric.EngineSUDS") @@ -41,7 +41,7 @@ def test_from_config_when_explicit_auth(self, _mock_engine_suds, mock_get_system self.assertIsInstance(client, Bfabric) self.assertEqual(mock_config, client.config) self.assertEqual(mock_auth, client.auth) - mock_get_system_auth.assert_called_once_with(config_env="TestingEnv") + mock_get_system_auth.assert_called_once_with(config_env="TestingEnv", config_path=None) @patch("bfabric.bfabric.get_system_auth") @patch("bfabric.bfabric.EngineSUDS") @@ -55,7 +55,7 @@ def test_from_config_when_none_auth(self, _mock_engine_suds, mock_get_system_aut with self.assertRaises(ValueError) as error: _ = client.auth self.assertIn("Authentication not available", str(error.exception)) - mock_get_system_auth.assert_called_once_with(config_env="TestingEnv") + mock_get_system_auth.assert_called_once_with(config_env="TestingEnv", config_path=None) @patch("bfabric.bfabric.get_system_auth") @patch("bfabric.bfabric.EngineSUDS") @@ -69,7 +69,7 @@ def test_from_config_when_engine_suds(self, mock_engine_suds, mock_get_system_au self.assertEqual(mock_config, client.config) self.assertEqual(mock_auth, client.auth) self.assertEqual(mock_engine_suds.return_value, client.engine) - mock_get_system_auth.assert_called_once_with(config_env=None) + mock_get_system_auth.assert_called_once_with(config_env=None, config_path=None) mock_engine_suds.assert_called_once_with(base_url=mock_config.base_url) self.assertEqual(mock_engine_suds.return_value, client.engine) @@ -86,7 +86,7 @@ def test_from_config_when_engine_zeep(self, mock_engine_zeep, mock_get_system_au self.assertEqual(mock_config, client.config) self.assertEqual(mock_auth, client.auth) self.assertEqual(mock_engine_zeep.return_value, client.engine) - mock_get_system_auth.assert_called_once_with(config_env=None) + mock_get_system_auth.assert_called_once_with(config_env=None, config_path=None) mock_engine_zeep.assert_called_once_with(base_url=mock_config.base_url) self.assertEqual(mock_engine_zeep.return_value, client.engine) From 6f47ec0021aef5b06a13a7eb6d3edadfc6f97938 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 21 May 2024 10:57:39 +0200 Subject: [PATCH 122/129] ensure compatibility with recent changes - datetime in isoformat is now supported (including tz info) - small adaption to changed from_config method --- .../bfabric_list_not_available_proteomics_workunits.py | 2 +- bfabric/tests/integration/test_bfabric2_read.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py b/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py index 7f02f2c7..55fb9502 100755 --- a/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py +++ b/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py @@ -68,7 +68,7 @@ def list_not_available_proteomics_workunits(date_cutoff: datetime) -> None: for status in ["Pending", "Processing", "Failed"]: workunits_by_status[status] = client.read( endpoint="workunit", - obj={"status": status, "createdafter": date_cutoff}, + obj={"status": status, "createdafter": date_cutoff.isoformat()}, ).to_list_dict() render_output(workunits_by_status, config=client.config) diff --git a/bfabric/tests/integration/test_bfabric2_read.py b/bfabric/tests/integration/test_bfabric2_read.py index 5fb6ff6a..8069be45 100755 --- a/bfabric/tests/integration/test_bfabric2_read.py +++ b/bfabric/tests/integration/test_bfabric2_read.py @@ -81,8 +81,8 @@ def test_annotation(self): def test_invalid_auth(self): auth = BfabricAuth(login=self.clients["suds"].auth.login, password="invalid_password") clients = { - "zeep": Bfabric.from_config("TEST", auth, engine=BfabricAPIEngineType.ZEEP), - "suds": Bfabric.from_config("TEST", auth, engine=BfabricAPIEngineType.SUDS), + "zeep": Bfabric.from_config("TEST", auth=auth, engine=BfabricAPIEngineType.ZEEP), + "suds": Bfabric.from_config("TEST", auth=auth, engine=BfabricAPIEngineType.SUDS), } for engine, bf in clients.items(): with self.subTest(engine=engine): From 3c328e552b2a0f2c277b973f6ac81c199af669dd Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 21 May 2024 11:14:47 +0200 Subject: [PATCH 123/129] deactivate automatic functional test (to be migrated) --- .github/workflows/run_functional_test.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/run_functional_test.yml b/.github/workflows/run_functional_test.yml index 45a963f9..dea4b6df 100644 --- a/.github/workflows/run_functional_test.yml +++ b/.github/workflows/run_functional_test.yml @@ -5,12 +5,12 @@ name: functionalTest concurrency: functional-test-group on: - push: - branches: [ main ] - schedule: - - cron: "00 08 * * *" - pull_request: - branches: [ main ] + #push: + # branches: [ main ] + #schedule: + # - cron: "00 08 * * *" + #pull_request: + # branches: [ main ] workflow_dispatch: jobs: From fd30cfc1d68ebbcad8920a4bd6d3ede52361a490 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Tue, 21 May 2024 15:52:56 +0200 Subject: [PATCH 124/129] Add script to list the parameters of workunit runs --- bfabric/bfabric.py | 3 +- .../bfabric_list_workunit_parameters.py | 107 ++++++++++++++++++ pyproject.toml | 1 + 3 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 bfabric/scripts/bfabric_list_workunit_parameters.py diff --git a/bfabric/bfabric.py b/bfabric/bfabric.py index 88205f34..9401956d 100755 --- a/bfabric/bfabric.py +++ b/bfabric/bfabric.py @@ -16,6 +16,7 @@ import base64 import importlib.metadata import logging +import sys from contextlib import contextmanager from datetime import datetime from enum import Enum @@ -172,7 +173,7 @@ def read( page_offset = initial_offset for i_iter, i_page in enumerate(requested_pages): if not (i_iter == 0 and i_page == 1): - print("-- reading page", i_page, "of", n_available_pages) + print(f"-- reading page {i_page} of {n_available_pages}", file=sys.stderr) results = self.engine.read( endpoint=endpoint, obj=obj, auth=self.auth, page=i_page, return_id_only=return_id_only ) diff --git a/bfabric/scripts/bfabric_list_workunit_parameters.py b/bfabric/scripts/bfabric_list_workunit_parameters.py new file mode 100644 index 00000000..c15b299f --- /dev/null +++ b/bfabric/scripts/bfabric_list_workunit_parameters.py @@ -0,0 +1,107 @@ +import argparse +import json +import sys + +import polars as pl +import rich + +from bfabric import Bfabric + + +def bfabric_list_workunit_parameters(client: Bfabric, application_id: int, max_workunits: int, format: str) -> None: + """Lists the workunit parameters of the provided application. + :param client: The Bfabric client to use. + :param application_id: The application ID to list the workunit parameters for. + :param max_workunits: The maximum number of workunits to fetch. + :param format: The output format to use. + """ + workunits_table_full = get_workunits_table_full(application_id, client, max_workunits) + workunits_table_explode = workunits_table_full.explode("parameter").with_columns( + parameter_id=pl.col("parameter").struct[1] + ) + parameter_table_wide = get_parameter_table(client, workunits_table_explode) + + merged_result = workunits_table_full[ + ["workunit_id", "created", "createdby", "name", "container_id", "inputdataset_id", "resource_ids"] + ].join(parameter_table_wide, on="workunit_id", how="left") + + print_results(format, merged_result) + + +def get_workunits_table_full(application_id: int, client: Bfabric, max_workunits: int) -> pl.DataFrame: + """Returns a table with the workunits for the specified application.""" + # read the workunit data + workunits_table_full = ( + client.read("workunit", {"applicationid": application_id}, max_results=max_workunits) + .to_polars() + .rename({"id": "workunit_id"}) + ) + # add some extra columns flattening the structure for the output + workunits_table_full = workunits_table_full.with_columns( + container_id=pl.col("container").struct[1], + resource_ids=pl.col("resource").map_elements( + lambda x: json.dumps([xx["id"] for xx in x]), return_dtype=pl.String + ), + ) + if "inputdataset" in workunits_table_full.columns: + workunits_table_full = workunits_table_full.with_columns( + inputdataset_id=pl.col("inputdataset").struct[1], + ) + else: + workunits_table_full = workunits_table_full.with_columns( + inputdataset_id=pl.lit(None) + ) + return workunits_table_full + + +def print_results(format: str, merged_result: pl.DataFrame) -> None: + """Prints the results to the console, in the requested format.""" + if format == "tsv": + print(merged_result.write_csv(file=None, separator="\t")) + elif format == "json": + print(merged_result.write_json(file=None)) + elif format == "pretty": + # use rich + rich_table = rich.table.Table() + for column in merged_result.columns: + rich_table.add_column(column) + for row in merged_result.iter_rows(): + rich_table.add_row(*map(str, row)) + console = rich.console.Console() + console.print(rich_table) + else: + raise ValueError("Unsupported format") + + +def get_parameter_table(client: Bfabric, workunits_table_explode: pl.DataFrame) -> pl.DataFrame: + """Returns a wide format table for the specified parameters, with the key `workunit_id` indicating the source.""" + # load the parameters table + collect = [] + for i_frame, frame in enumerate(workunits_table_explode.iter_slices(100)): + print(f"-- Reading parameters chunk {i_frame + 1} of {len(workunits_table_explode) // 100 + 1}", file=sys.stderr) + chunk = ( + client.read("parameter", {"id": frame["parameter_id"].to_list()}).to_polars().rename({"id": "parameter_id"}) + ) + collect.append(chunk) + parameter_table_full = pl.concat(collect, how="align")[["parameter_id", "key", "value"]] + # add workunit id to parameter table + parameter_table_full = parameter_table_full.join( + workunits_table_explode[["workunit_id", "parameter_id"]], on="parameter_id", how="left" + ) + # convert to wide format + return parameter_table_full.pivot(values="value", index="workunit_id", columns="key") + + +def main() -> None: + """Parses command line arguments and calls `bfabric_list_workunit_parameters`.""" + client = Bfabric.from_config(verbose=True) + parser = argparse.ArgumentParser() + parser.add_argument("application_id", type=int, help="The application ID to list the workunit parameters for.") + parser.add_argument("--max-workunits", type=int, help="The maximum number of workunits to fetch.", default=200) + parser.add_argument("--format", choices=["tsv", "json", "pretty"], default="tsv") + args = vars(parser.parse_args()) + bfabric_list_workunit_parameters(client, **args) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index d0fcd25c..52b451e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ Repository = "https://github.com/fgcz/bfabricPy" #bfabric_feeder_resource_autoQC="bfabric.scripts.bfabric_feeder_resource_autoQC:main" "bfabric_list_not_existing_storage_directories.py"="bfabric.scripts.bfabric_list_not_existing_storage_directories:main" "bfabric_list_not_available_proteomics_workunits.py"="bfabric.scripts.bfabric_list_not_available_proteomics_workunits:main" +"bfabric_list_workunit_parameters.py"="bfabric.scripts.bfabric_list_workunit_parameters:main" "bfabric_upload_resource.py"="bfabric.scripts.bfabric_upload_resource:main" "bfabric_logthis.py"="bfabric.scripts.bfabric_logthis:main" "bfabric_setResourceStatus_available.py"="bfabric.scripts.bfabric_setResourceStatus_available:main" From c52b1829c6b3fa4144cafc2e00ac4646c73d7152 Mon Sep 17 00:00:00 2001 From: Witold Wolski Date: Wed, 22 May 2024 11:14:31 +0200 Subject: [PATCH 125/129] polars-lts for m1 compbatibility --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 52b451e8..e3b3c5f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ dependencies = [ "rich >= 13.7.1", "zeep >= 4.2.1", "pandas >= 2.2.2", - "polars >= 0.20.25", + "polars-lts-cpu >= 0.20.25", "setuptools" ] From 92c3859379c8fb15e81e4d6918c0de955a03c329 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 23 May 2024 10:46:29 +0200 Subject: [PATCH 126/129] change version numbering --- README.md | 6 +++--- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index cef98382..4fd37d38 100644 --- a/README.md +++ b/README.md @@ -343,11 +343,11 @@ cd /Applications/Python 3.12 && ./Install\ Certificates.command ### How is the version numbering working? X.Y.Z -X is not used +X is used for major changes, that contain breaking changes -Y should be the bfabric release +Y should be the current bfabric release -Z increment for significant changes +Z is increased for feature releases, that should not break the API ### Howto cite? diff --git a/pyproject.toml b/pyproject.toml index e3b3c5f2..314f24cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ include = ["bfabric*"] [project] name = "bfabric" description = "Python client for the B-Fabric WSDL API" -version = "0.13.9" +version = "1.13.0" license = { text = "GPL-3.0" } authors = [ {name = "Christian Panse", email = "cp@fgcz.ethz.ch"}, From 2a9695e792490ebfdce3b088226819ff8ef95214 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 23 May 2024 10:57:26 +0200 Subject: [PATCH 127/129] make filename of downloaded csv configurable --- bfabric/scripts/bfabric_save_dataset2csv.py | 18 +++++++++++++----- .../scripts/test_save_dataset2csv.py | 7 +++++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/bfabric/scripts/bfabric_save_dataset2csv.py b/bfabric/scripts/bfabric_save_dataset2csv.py index 81b39fbf..5d3599fb 100755 --- a/bfabric/scripts/bfabric_save_dataset2csv.py +++ b/bfabric/scripts/bfabric_save_dataset2csv.py @@ -34,13 +34,15 @@ def dataset2csv(dataset: dict, output_path: Path, sep: str) -> None: df.write_csv(output_path, separator=sep) -def bfabric_save_dataset2csv(client: Bfabric, dataset_id: int, out_dir: Path, sep: str) -> None: - """Saves the dataset with id `dataset_id` to a csv file at `out_dir`.""" +def bfabric_save_dataset2csv(client: Bfabric, dataset_id: int, out_dir: Path, out_filename: Path, sep: str) -> None: + """Saves the dataset with id `dataset_id` to a csv file at `out_dir/out_filename` or `out_filename` if it's an + absolute path. + """ results = client.read(endpoint="dataset", obj={"id": dataset_id}).to_list_dict() if not results: raise RuntimeError(f"No dataset found with id '{dataset_id}'") dataset = results[0] - output_path = out_dir / "dataset.csv" + output_path = out_dir / out_filename try: dataset2csv(dataset, output_path=output_path, sep=sep) except Exception: @@ -55,14 +57,20 @@ def main() -> None: parser.add_argument("--id", metavar="int", required=True, help="dataset id", type=int) parser.add_argument( "--dir", - required=False, type=Path, default=".", help="the path to the directory where to save the csv file", ) + parser.add_argument( + "--file", + default="dataset.csv", + help="the name of the csv file to save the dataset content", + ) parser.add_argument("--sep", default=",", help="the separator to use in the csv file e.g. ',' or '\\t'") args = parser.parse_args() - bfabric_save_dataset2csv(client=client, out_dir=args.dir, dataset_id=args.id, sep=args.sep) + bfabric_save_dataset2csv( + client=client, out_dir=args.dir, out_filename=args.file, dataset_id=args.id, sep=args.sep + ) if __name__ == "__main__": diff --git a/bfabric/tests/integration/scripts/test_save_dataset2csv.py b/bfabric/tests/integration/scripts/test_save_dataset2csv.py index 096eafdc..639a7268 100644 --- a/bfabric/tests/integration/scripts/test_save_dataset2csv.py +++ b/bfabric/tests/integration/scripts/test_save_dataset2csv.py @@ -1,8 +1,9 @@ import unittest from pathlib import Path from tempfile import TemporaryDirectory -import polars.testing + import polars as pl +import polars.testing from bfabric import Bfabric from bfabric.scripts.bfabric_save_dataset2csv import bfabric_save_dataset2csv @@ -16,7 +17,9 @@ def setUp(self): def test_save_dataset2csv(self): with TemporaryDirectory() as temp_dir: out_dir = Path(temp_dir) - bfabric_save_dataset2csv(self.mock_client, self.dataset_id, out_dir, sep=",") + bfabric_save_dataset2csv( + self.mock_client, self.dataset_id, out_dir, out_filename=Path("dataset.csv"), sep="," + ) expected_lines = [ r"Normal,Comma,Backslash,Apostrophe", From 708b1782c948a302ac887e2af82bfc13ae74ff67 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Thu, 23 May 2024 16:00:48 +0200 Subject: [PATCH 128/129] make ResultContainer iterable --- bfabric/results/result_container.py | 5 ++++- bfabric/tests/unit/test_result_container.py | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/bfabric/results/result_container.py b/bfabric/results/result_container.py index b8bd3a48..7f714b8c 100644 --- a/bfabric/results/result_container.py +++ b/bfabric/results/result_container.py @@ -1,7 +1,7 @@ from __future__ import annotations import logging -from typing import Any, TYPE_CHECKING +from typing import Any, TYPE_CHECKING, Iterable import bfabric.results.response_format_dict as formatter @@ -30,6 +30,9 @@ def __init__( def __getitem__(self, idx: int) -> dict[str, Any]: return self.results[idx] + def __iter__(self) -> Iterable[dict[str, Any]]: + return iter(self.results) + def __repr__(self) -> str: return self.__str__() diff --git a/bfabric/tests/unit/test_result_container.py b/bfabric/tests/unit/test_result_container.py index 6591fa89..6c24aa1e 100644 --- a/bfabric/tests/unit/test_result_container.py +++ b/bfabric/tests/unit/test_result_container.py @@ -20,6 +20,10 @@ def test_repr(self): self.assertEqual("[1, 2, 3]", repr(self.res1)) self.assertEqual("[4, 5]", repr(self.res2)) + def test_iter(self): + items = list(iter(self.res1)) + self.assertListEqual([1, 2, 3], items) + def test_len(self): self.assertEqual(3, len(self.res1)) self.assertEqual(2, len(self.res2)) From 8490b16358a6f8086051accc5b3829369e312998 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 24 May 2024 08:32:35 +0200 Subject: [PATCH 129/129] add a changelog --- docs/changelog.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 docs/changelog.md diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 00000000..171ac06f --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1,27 @@ +# Changelog +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). + +Versioning currently follows `X.Y.Z` where + +- `X` is used for major changes, that contain breaking changes +- `Y` should be the current bfabric release +- `Z` is increased for feature releases, that should not break the API + +## [1.13.0] - 2024-05-24 +This is a major release refactoring bfabricPy's API. + +### Changed +- The `Bfabric` class operations now return `ResultContainer` objects. + - These provide a list-like interface to access individual items or iterate over them. + - Individual items are a dictionary, potentially nested, and not specific to suds/zeep anymore. + - Convenience conversions, e.g. to a polars DataFrame, can be provided there. +- Configuration is now defined in `~/.bfabricpy.yml` and supports multiple configurations, which can be selected by the `BFABRICPY_CONFIG_ENV` environment variable. Please consult the README for an example configuration. +- Use `pyproject.toml` for package configuration. +- Scripts have been refactored on a case-by-case basis. + +### Added +- Zeep can be used instead of suds for SOAP communication. +- `Bfabric` can be instantiated without authentication, that can be provided later. This is useful in a server setup. + +### Removed +- Several old scripts have been moved into a `deprecated_scripts` folder.