From cc41e94c94b54d897c0e5b92f3fc16f8c9181dd7 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 2 Apr 2021 13:21:35 -0400
Subject: [PATCH 01/65] init branch

---
 docs/changelog.md    | 3 +++
 pipestat/_version.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/changelog.md b/docs/changelog.md
index 8419336f..e59f61c0 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -2,6 +2,9 @@
 
 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.
 
+## [0.1.0] - unreleased
+
+
 ## [0.0.4] - 2021-04-02
 ### Added
 - config validation
diff --git a/pipestat/_version.py b/pipestat/_version.py
index 81f0fdec..607f7a49 100644
--- a/pipestat/_version.py
+++ b/pipestat/_version.py
@@ -1 +1 @@
-__version__ = "0.0.4"
+__version__ = "0.1.0-dev"

From 9ac892646f98d35225807ec6dfebb02c46da9ae3 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 2 Apr 2021 15:27:48 -0400
Subject: [PATCH 02/65] update pre-commit cfg

---
 .pre-commit-config.yaml | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 16cccb97..ab5489e2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,18 +4,17 @@ repos:
     hooks:
       - id: trailing-whitespace
       - id: check-yaml
-      - id: check-json
       - id: end-of-file-fixer
-      - id: name-tests-test
       - id: requirements-txt-fixer
       - id: trailing-whitespace
 
-  - repo: https://github.com/psf/black
-    rev: 20.8b1
-    hooks:
-      - id: black
-
   - repo: https://github.com/PyCQA/isort
     rev: 5.7.0
     hooks:
       - id: isort
+        args: ["--profile", "black"]
+
+  - repo: https://github.com/psf/black
+    rev: 20.8b1
+    hooks:
+      - id: black

From 06dfab2854b0d71c536383c785131af26ea13b94 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 2 Apr 2021 15:28:17 -0400
Subject: [PATCH 03/65] start ORM implementation

---
 pipestat/pipestat_orm.py | 552 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 552 insertions(+)
 create mode 100644 pipestat/pipestat_orm.py

diff --git a/pipestat/pipestat_orm.py b/pipestat/pipestat_orm.py
new file mode 100644
index 00000000..99556f52
--- /dev/null
+++ b/pipestat/pipestat_orm.py
@@ -0,0 +1,552 @@
+from contextlib import contextmanager
+from copy import deepcopy
+from logging import getLogger
+from typing import Any, Dict, List, Optional, Union
+from urllib.parse import quote_plus
+
+from attmap import PathExAttMap as PXAM
+from jsonschema import validate
+from sqlalchemy import Column, Float, ForeignKey, Integer, String, Table, create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import relationship, sessionmaker
+from ubiquerg import create_lock, remove_lock
+from yacman import YacAttMap
+
+from .const import *
+from .exceptions import *
+from .helpers import *
+
+_LOGGER = getLogger(PKG_NAME)
+
+
+class PipestatManagerORM(dict):
+    """
+    Pipestat standardizes reporting of pipeline results and
+    pipeline status management. It formalizes a way for pipeline developers
+    and downstream tools developers to communicate -- results produced by a
+    pipeline can easily and reliably become an input for downstream analyses.
+    The object exposes API for interacting with the results and
+    pipeline status and can be backed by either a YAML-formatted file
+    or a PostgreSQL database.
+    """
+
+    def __init__(
+        self,
+        namespace: str = None,
+        record_identifier: str = None,
+        schema_path: str = None,
+        results_file_path: str = None,
+        database_only: bool = False,
+        config: Union[str, dict] = None,
+        status_schema_path: str = None,
+        flag_file_dir: str = None,
+    ):
+        """
+        Initialize the object
+
+        :param str namespace: namespace to report into. This will be the DB
+        table name if using DB as the object back-end
+        :param str record_identifier: record identifier to report for. This
+            creates a weak bound to the record, which can be overriden in
+            this object method calls
+        :param str schema_path: path to the output schema that formalizes
+            the results structure
+        :param str results_file_path: YAML file to report into, if file is
+            used as the object back-end
+        :param bool database_only: whether the reported data should not be
+            stored in the memory, but only in the database
+        :param str | dict config: path to the configuration file or a mapping
+            with the config file content
+        :param str status_schema_path: path to the status schema that formalizes
+            the status flags structure
+        """
+
+        def _check_cfg_key(cfg: dict, key: str) -> bool:
+            if key not in cfg:
+                _LOGGER.warning(f"Key '{key}' not found in config")
+                return False
+            return True
+
+        def _mk_abs_via_cfg(
+            path: Optional[str],
+            cfg_path: Optional[str],
+        ) -> Optional[str]:
+            if path is None:
+                return path
+            assert isinstance(path, str), TypeError("Path is expected to be a str")
+            if os.path.isabs(path):
+                return path
+            if cfg_path is None:
+                rel_to_cwd = os.path.join(os.getcwd(), path)
+                if os.path.exists(rel_to_cwd) or os.access(
+                    os.path.dirname(rel_to_cwd), os.W_OK
+                ):
+                    return rel_to_cwd
+                raise OSError(f"Could not make this path absolute: {path}")
+            joined = os.path.join(os.path.dirname(cfg_path), path)
+            if os.path.isabs(joined):
+                return joined
+            raise OSError(f"Could not make this path absolute: {path}")
+
+        def _select_value(
+            arg_name: str,
+            arg_value: Any,
+            cfg: dict,
+            strict: bool = True,
+            env_var: str = None,
+        ) -> Any:
+            if arg_value is not None:
+                return arg_value
+            if arg_name not in cfg or cfg[arg_name] is None:
+                if env_var is not None:
+                    arg = os.getenv(env_var, None)
+                    if arg is not None:
+                        _LOGGER.debug(f"Value '{arg}' sourced from '{env_var}' env var")
+                        return expandpath(arg)
+                if strict:
+                    raise PipestatError(
+                        f"Value for the required '{arg_name}' argument could not be"
+                        f" determined. Provide it in the config or pass to the "
+                        f"object constructor."
+                    )
+                return
+            return cfg[arg_name]
+
+        super(PipestatManagerORM, self).__init__()
+        self[CONFIG_KEY] = YacAttMap()
+        # read config or config data
+        config = config or os.getenv(ENV_VARS["config"])
+        if config is not None:
+            if isinstance(config, str):
+                config = os.path.abspath(expandpath(config))
+                self[CONFIG_KEY] = YacAttMap(filepath=config)
+                self._config_path = config
+            elif isinstance(config, dict):
+                self[CONFIG_KEY] = YacAttMap(entries=config)
+                self._config_path = None
+            else:
+                raise TypeError(
+                    "database_config has to be either path to the "
+                    "file to read or a dict"
+                )
+            # validate config
+            cfg = self[CONFIG_KEY].to_dict(expand=True)
+            _, cfg_schema = read_yaml_data(CFG_SCHEMA, "config schema")
+            validate(cfg, cfg_schema)
+
+        self[NAME_KEY] = _select_value(
+            "namespace", namespace, self[CONFIG_KEY], env_var=ENV_VARS["namespace"]
+        )
+        self[RECORD_ID_KEY] = _select_value(
+            "record_identifier",
+            record_identifier,
+            self[CONFIG_KEY],
+            False,
+            ENV_VARS["record_identifier"],
+        )
+        self[DB_ONLY_KEY] = database_only
+        # read results schema
+        self._schema_path = _select_value(
+            "schema_path",
+            schema_path,
+            self[CONFIG_KEY],
+            False,
+            env_var=ENV_VARS["schema"],
+        )
+        if self._schema_path is not None:
+            _, self[SCHEMA_KEY] = read_yaml_data(
+                _mk_abs_via_cfg(self._schema_path, self.config_path), "schema"
+            )
+            self.validate_schema()
+            # determine the highlighted results
+            self[HIGHLIGHTED_KEY] = [
+                k
+                for k, v in self.schema.items()
+                if "highlight" in v and v["highlight"] is True
+            ]
+            if self[HIGHLIGHTED_KEY]:
+                assert isinstance(self[HIGHLIGHTED_KEY], list), TypeError(
+                    f"highlighted results specification "
+                    f"({self[HIGHLIGHTED_KEY]}) has to be a list"
+                )
+        # read status schema
+        status_schema_path = (
+            _mk_abs_via_cfg(
+                _select_value(
+                    "status_schema_path",
+                    status_schema_path,
+                    self[CONFIG_KEY],
+                    False,
+                    env_var=ENV_VARS["status_schema"],
+                ),
+                self.config_path,
+            )
+            or STATUS_SCHEMA
+        )
+        self[STATUS_SCHEMA_SOURCE_KEY], self[STATUS_SCHEMA_KEY] = read_yaml_data(
+            status_schema_path, "status schema"
+        )
+        # determine results file
+        results_file_path = _mk_abs_via_cfg(
+            _select_value(
+                "results_file_path",
+                results_file_path,
+                self[CONFIG_KEY],
+                False,
+                ENV_VARS["results_file"],
+            ),
+            self.config_path,
+        )
+        if results_file_path:
+            if self[DB_ONLY_KEY]:
+                raise ValueError(
+                    "Running in database only mode does not make "
+                    "sense with a YAML file as a backend."
+                )
+            self[FILE_KEY] = results_file_path
+            self._init_results_file()
+            flag_file_dir = _select_value(
+                "flag_file_dir", flag_file_dir, self[CONFIG_KEY], False
+            ) or os.path.dirname(self.file)
+            self[STATUS_FILE_DIR] = _mk_abs_via_cfg(flag_file_dir, self.config_path)
+        elif CFG_DATABASE_KEY in self[CONFIG_KEY]:
+            if not all(
+                [
+                    _check_cfg_key(self[CONFIG_KEY][CFG_DATABASE_KEY], key)
+                    for key in DB_CREDENTIALS
+                ]
+            ):
+                raise MissingConfigDataError(
+                    "Must specify all database login " "credentials or result_file_path"
+                )
+            self[DATA_KEY] = YacAttMap()
+            self._init_postgres_table()
+            # self._init_status_table()
+        else:
+            raise MissingConfigDataError(
+                "Must specify either database login " "credentials or a YAML file path"
+            )
+
+    def _get_flag_file(
+        self, record_identifier: str = None
+    ) -> Union[str, List[str], None]:
+        """
+        Get path to the status flag file for the specified record
+
+        :param str record_identifier: unique record identifier
+        :return str | list[str] | None: path to the status flag file
+        """
+        from glob import glob
+
+        r_id = self._strict_record_id(record_identifier)
+        if self.file is None:
+            return
+        if self.file is not None:
+            regex = os.path.join(
+                self[STATUS_FILE_DIR], f"{self.namespace}_{r_id}_*.flag"
+            )
+            file_list = glob(regex)
+            if len(file_list) > 1:
+                _LOGGER.warning("Multiple flag files found")
+                return file_list
+            elif len(file_list) == 1:
+                return file_list[0]
+            else:
+                _LOGGER.debug("No flag files found")
+                return None
+
+    @property
+    def record_count(self) -> int:
+        """
+        Number of records reported
+
+        :return int: number of records reported
+        """
+        return (
+            len(self.data[self.namespace])
+            if self.file
+            else self._count_rows(self.namespace)
+        )
+
+    @property
+    def highlighted_results(self) -> List[str]:
+        """
+        Highlighted results
+
+        :return List[str]: a collection of highlighted results
+        """
+        return self._get_attr(HIGHLIGHTED_KEY) or []
+
+    @property
+    def namespace(self) -> str:
+        """
+        Namespace the object writes the results to
+
+        :return str: namespace the object writes the results to
+        """
+        return self._get_attr(NAME_KEY)
+
+    @property
+    def record_identifier(self) -> str:
+        """
+        Unique identifier of the record
+
+        :return str: unique identifier of the record
+        """
+        return self._get_attr(RECORD_ID_KEY)
+
+    @property
+    def schema(self) -> Dict:
+        """
+        Schema mapping
+
+        :return dict: schema that formalizes the results structure
+        """
+        return self._get_attr(SCHEMA_KEY)
+
+    @property
+    def status_schema(self) -> Dict:
+        """
+        Status schema mapping
+
+        :return dict: schema that formalizes the pipeline status structure
+        """
+        return self._get_attr(STATUS_SCHEMA_KEY)
+
+    @property
+    def status_schema_source(self) -> Dict:
+        """
+        Status schema source
+
+        :return dict: source of the schema that formalizes
+            the pipeline status structure
+        """
+        return self._get_attr(STATUS_SCHEMA_SOURCE_KEY)
+
+    @property
+    def schema_path(self) -> str:
+        """
+        Schema path
+
+        :return str: path to the provided schema
+        """
+        return self._schema_path
+
+    @property
+    def config_path(self) -> str:
+        """
+        Config path. None if the config was not provided or if provided
+        as a mapping of the config contents
+
+        :return str: path to the provided config
+        """
+        return getattr(self, "_config_path", None)
+
+    @property
+    def result_schemas(self) -> Dict:
+        """
+        Result schema mappings
+
+        :return dict: schemas that formalize the structure of each result
+            in a canonical jsonschema way
+        """
+        return self._get_attr(RES_SCHEMAS_KEY)
+
+    @property
+    def file(self) -> str:
+        """
+        File path that the object is reporting the results into
+
+        :return str: file path that the object is reporting the results into
+        """
+        return self._get_attr(FILE_KEY)
+
+    @property
+    def data(self) -> YacAttMap:
+        """
+        Data object
+
+        :return yacman.YacAttMap: the object that stores the reported data
+        """
+        return self._get_attr(DATA_KEY)
+
+    @property
+    @contextmanager
+    def session(self):
+        """
+        Provide a transactional scope around a series of query
+        operations, no commit afterwards.
+        """
+        if not self.is_db_connected():
+            self.establish_postgres_connection_orm()
+        with self[DB_SESSION_KEY]() as session:
+            _LOGGER.debug("Created session")
+            yield session
+            _LOGGER.debug("Ending session")
+
+    def _create_table_orm(self, table_name: str, schema: Dict[str, Any]):
+        """
+        Create a table
+
+        :param str table_name: name of the table to create
+        :param Dict[str, Any] schema: schema to base table creation on
+        """
+        tn = table_name or self.namespace
+        attr_dict = dict(__tablename__=tn, id=Column(Integer, primary_key=True))
+        for result_id, result_metadata in schema.items():
+            col_type = SQL_CLASSES_BY_TYPE[result_metadata[SCHEMA_TYPE_KEY]]
+            _LOGGER.info(f"Adding object: {result_id} of type: {str(col_type)}")
+            attr_dict.update({result_id: Column(col_type)})
+        _LOGGER.debug(f"Creating '{tn}' ORM with args: {attr_dict}")
+        Base = declarative_base()
+        type(tn, (Base,), attr_dict)
+        Base.metadata.create_all(bind=self[DB_ENGINE_KEY])
+
+    def establish_postgres_connection_orm(self) -> bool:
+        """
+        Establish PostgreSQL connection using the config data
+
+        :return bool: whether the connection has been established successfully
+        """
+        if self.is_db_connected():
+            raise PipestatDatabaseError("Connection is already established")
+
+        self[DB_ENGINE_KEY] = create_engine(self.db_url, echo=True)
+        self[DB_SESSION_KEY] = sessionmaker(bind=self[DB_ENGINE_KEY])
+
+    def is_db_connected(self) -> bool:
+        """
+        Check whether a PostgreSQL connection has been established
+
+        :return bool: whether the connection has been established
+        """
+        if self.file is not None:
+            raise PipestatDatabaseError(
+                f"The {self.__class__.__name__} object is not backed by a database"
+            )
+        if DB_SESSION_KEY in self and isinstance(self[DB_SESSION_KEY], sessionmaker):
+            return True
+        return False
+
+    @property
+    def db_url(self) -> str:
+        """
+        Database URL, generated based on config credentials
+
+        :return str: database URL
+        """
+        try:
+            creds = dict(
+                name=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_NAME_KEY],
+                user=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_USER_KEY],
+                passwd=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PASSWORD_KEY],
+                host=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY],
+                port=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PORT_KEY],
+                dialect=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_DIALECT_KEY],
+            )
+        except (KeyError, AttributeError) as e:
+            raise PipestatDatabaseError(
+                f"Could not determine database URL. Caught error: {str(e)}"
+            )
+        # parsed_creds = {k: quote_plus(v) for k, v in creds.items()}
+        parsed_creds = creds
+        return "{dialect}://{user}:{passwd}@{host}:{port}/{name}".format(**parsed_creds)
+
+    def validate_schema(self) -> None:
+        """
+        Check schema for any possible issues
+
+        :raises SchemaError: if any schema format issue is detected
+        """
+
+        def _recursively_replace_custom_types(s: dict) -> Dict:
+            """
+            Replace the custom types in pipestat schema with canonical types
+
+            :param dict s: schema to replace types in
+            :return dict: schema with types replaced
+            """
+            for k, v in s.items():
+                assert SCHEMA_TYPE_KEY in v, SchemaError(
+                    f"Result '{k}' is missing '{SCHEMA_TYPE_KEY}' key"
+                )
+                if v[SCHEMA_TYPE_KEY] == "object" and SCHEMA_PROP_KEY in s[k]:
+                    _recursively_replace_custom_types(s[k][SCHEMA_PROP_KEY])
+                if v[SCHEMA_TYPE_KEY] in CANONICAL_TYPES.keys():
+                    s.setdefault(k, {})
+                    s[k].setdefault(SCHEMA_PROP_KEY, {})
+                    s[k][SCHEMA_PROP_KEY].update(
+                        CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]][SCHEMA_PROP_KEY]
+                    )
+                    s[k].setdefault("required", [])
+                    s[k]["required"].extend(
+                        CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]]["required"]
+                    )
+                    s[k][SCHEMA_TYPE_KEY] = CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]][
+                        SCHEMA_TYPE_KEY
+                    ]
+            return s
+
+        schema = deepcopy(self.schema)
+        _LOGGER.debug(f"Validating input schema")
+        assert isinstance(schema, dict), SchemaError(
+            f"The schema has to be a {dict().__class__.__name__}"
+        )
+        for col_name in RESERVED_COLNAMES:
+            assert col_name not in schema.keys(), PipestatError(
+                f"'{col_name}' is an identifier reserved by pipestat"
+            )
+        self[RES_SCHEMAS_KEY] = {}
+        schema = _recursively_replace_custom_types(schema)
+        self[RES_SCHEMAS_KEY] = schema
+
+    def _init_postgres_table(self) -> bool:
+        """
+        Initialize a PostgreSQL table based on the provided schema,
+        if it does not exist. Read the data stored in the database into the
+        memory otherwise.
+
+        :return bool: whether the table has been created
+        """
+        if self.schema is None:
+            raise SchemaNotFoundError("initialize the database table")
+        if self._check_table_exists(table_name=self.namespace):
+            _LOGGER.debug(f"Table '{self.namespace}' already exists in the database")
+            if not self[DB_ONLY_KEY]:
+                self._table_to_dict()
+            return False
+        _LOGGER.info(f"Initializing '{self.namespace}' table in '{PKG_NAME}' database")
+        self._create_table_orm(table_name=self.namespace, schema=self.result_schemas)
+        return True
+
+    def _init_status_table(self):
+        status_table_name = f"{self.namespace}_{STATUS}"
+        # self._create_status_type()
+        if not self._check_table_exists(table_name=status_table_name):
+            _LOGGER.info(
+                f"Initializing '{status_table_name}' table in " f"'{PKG_NAME}' database"
+            )
+            self._create_table_orm(
+                table_name=status_table_name, schema=self.status_schema
+            )
+
+    def _get_attr(self, attr: str) -> Any:
+        """
+        Safely get the name of the selected attribute of this object
+
+        :param str attr: attr to select
+        :return:
+        """
+        return self[attr] if attr in self else None
+
+    def _check_table_exists(self, table_name: str) -> bool:
+        """
+        Check if the specified table exists
+
+        :param str table_name: table name to be checked
+        :return bool: whether the specified table exists
+        """
+        from sqlalchemy import inspect
+
+        with self.session as s:
+            return inspect(s.bind).has_table(table_name=table_name)

From 179f6dcef35eedb413842eb3476f15b07e0d7d89 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Thu, 8 Apr 2021 09:37:29 -0400
Subject: [PATCH 04/65] fix db credentials parsing, other minor changes

---
 pipestat/const.py        | 16 ++++++++++++++++
 pipestat/pipestat_orm.py |  6 ++++--
 tests/data/config.yaml   |  1 +
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/pipestat/const.py b/pipestat/const.py
index 3c72253a..1a0c97a0 100644
--- a/pipestat/const.py
+++ b/pipestat/const.py
@@ -1,5 +1,7 @@
 import os
 
+from sqlalchemy.types import ARRAY, JSON, Boolean, Float, Integer, String
+
 PKG_NAME = "pipestat"
 LOCK_PREFIX = "lock."
 REPORT_CMD = "report"
@@ -43,6 +45,7 @@
 CFG_PORT_KEY = "port"
 CFG_PASSWORD_KEY = "password"
 CFG_USER_KEY = "user"
+CFG_DIALECT_KEY = "dialect"  # sqlite, mysql, postgresql, oracle, or mssql
 
 DB_CREDENTIALS = [
     CFG_HOST_KEY,
@@ -66,6 +69,8 @@
 FILE_KEY = "_file"
 RECORD_ID_KEY = "_record_id"
 DB_CONNECTION_KEY = "_db_connnection"
+DB_SESSION_KEY = "_db_session"
+DB_ENGINE_KEY = "_db_engine"
 HIGHLIGHTED_KEY = "_highlighted"
 
 # schema keys
@@ -127,6 +132,17 @@
     "boolean": bool,
 }
 
+SQL_CLASSES_BY_TYPE = {
+    "number": Float,
+    "integer": Integer,
+    "object": JSON,
+    "image": JSON,
+    "file": JSON,
+    "string": String,
+    "array": ARRAY(String),
+    "boolean": Boolean,
+}
+
 CFG_SCHEMA = os.path.join(
     os.path.dirname(os.path.abspath(__file__)), "schemas", "pipestat_config_schema.yaml"
 )
diff --git a/pipestat/pipestat_orm.py b/pipestat/pipestat_orm.py
index 99556f52..6d8248df 100644
--- a/pipestat/pipestat_orm.py
+++ b/pipestat/pipestat_orm.py
@@ -413,6 +413,7 @@ def establish_postgres_connection_orm(self) -> bool:
 
         self[DB_ENGINE_KEY] = create_engine(self.db_url, echo=True)
         self[DB_SESSION_KEY] = sessionmaker(bind=self[DB_ENGINE_KEY])
+        return True
 
     def is_db_connected(self) -> bool:
         """
@@ -448,8 +449,9 @@ def db_url(self) -> str:
             raise PipestatDatabaseError(
                 f"Could not determine database URL. Caught error: {str(e)}"
             )
-        # parsed_creds = {k: quote_plus(v) for k, v in creds.items()}
-        parsed_creds = creds
+        import urllib.parse
+
+        parsed_creds = {k: urllib.parse.quote_plus(str(v)) for k, v in creds.items()}
         return "{dialect}://{user}:{passwd}@{host}:{port}/{name}".format(**parsed_creds)
 
     def validate_schema(self) -> None:
diff --git a/tests/data/config.yaml b/tests/data/config.yaml
index 1538ae9f..1e619592 100644
--- a/tests/data/config.yaml
+++ b/tests/data/config.yaml
@@ -2,6 +2,7 @@ namespace: test
 record_identifier: sample1
 schema_path: sample_output_schema.yaml #$HOME/Desktop/sample_output_schema.yaml
 database:
+  dialect: postgresql
   name: pipestat-test
   user: postgres
   password: pipestat-password

From 63f073f2548cdaae93d8111fb4e2de07eaf79818 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Thu, 8 Apr 2021 09:43:53 -0400
Subject: [PATCH 05/65] restrict DB dialects in schema

---
 pipestat/pipestat_orm.py                     | 4 +---
 pipestat/schemas/pipestat_config_schema.yaml | 6 +++++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/pipestat/pipestat_orm.py b/pipestat/pipestat_orm.py
index 6d8248df..e123ba47 100644
--- a/pipestat/pipestat_orm.py
+++ b/pipestat/pipestat_orm.py
@@ -449,9 +449,7 @@ def db_url(self) -> str:
             raise PipestatDatabaseError(
                 f"Could not determine database URL. Caught error: {str(e)}"
             )
-        import urllib.parse
-
-        parsed_creds = {k: urllib.parse.quote_plus(str(v)) for k, v in creds.items()}
+        parsed_creds = {k: quote_plus(str(v)) for k, v in creds.items()}
         return "{dialect}://{user}:{passwd}@{host}:{port}/{name}".format(**parsed_creds)
 
     def validate_schema(self) -> None:
diff --git a/pipestat/schemas/pipestat_config_schema.yaml b/pipestat/schemas/pipestat_config_schema.yaml
index ee11bd51..03670227 100644
--- a/pipestat/schemas/pipestat_config_schema.yaml
+++ b/pipestat/schemas/pipestat_config_schema.yaml
@@ -1,6 +1,6 @@
 description: "Pipestat configuration file schema; docs: http://pipestat.databio.org/en/latest/config/"
 properties:
-  name: 
+  name:
     type: string
     pattern: "^\\S*$"
     description: "Namespace identifier with no whitespace"
@@ -17,6 +17,10 @@ properties:
   database:
     type: object
     properties:
+      dialect:
+        type: string
+        enum: ["sqlite", "mysql", "postgresql", "oracle", "mssql"]
+        description: "Dialect names include the identifying name of the SQLAlchemy dialect, a name such as sqlite, mysql, postgresql, oracle, or mssql"
       name:
         type: string
         description: "DB name"

From 96455c890e3396028c2b3e56df66168566b88a01 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Thu, 8 Apr 2021 11:00:54 -0400
Subject: [PATCH 06/65] status table initialization and schema processing

---
 pipestat/const.py                         |  4 ++++
 pipestat/helpers.py                       | 18 ++++++++++++++++++
 pipestat/pipestat_orm.py                  |  8 ++++----
 pipestat/schemas/status_table_schema.yaml |  7 +++++++
 4 files changed, 33 insertions(+), 4 deletions(-)
 create mode 100644 pipestat/schemas/status_table_schema.yaml

diff --git a/pipestat/const.py b/pipestat/const.py
index 1a0c97a0..b8438c7d 100644
--- a/pipestat/const.py
+++ b/pipestat/const.py
@@ -149,3 +149,7 @@
 STATUS_SCHEMA = os.path.join(
     os.path.dirname(os.path.abspath(__file__)), "schemas", "status_schema.yaml"
 )
+
+STATUS_TABLE_SCHEMA = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "schemas", "status_table_schema.yaml"
+)
diff --git a/pipestat/helpers.py b/pipestat/helpers.py
index 44af23a3..d4cda351 100644
--- a/pipestat/helpers.py
+++ b/pipestat/helpers.py
@@ -1,5 +1,6 @@
 import logging
 from re import findall
+from typing import Any, Dict
 
 import jsonschema
 from oyaml import safe_load
@@ -11,6 +12,23 @@
 _LOGGER = logging.getLogger(__name__)
 
 
+def get_status_table_schema(status_schema: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Update and return a status_table_schema based on user-provided status schema
+
+    :param Dict[str, Any] status_schema: status schema provided by the user
+    :return Dict[str, Any]: status_schema status table scheme
+        to use as a base for status table generation
+    """
+    defined_status_codes = list(status_schema.keys())
+    _, status_table_schema = read_yaml_data(
+        path=STATUS_TABLE_SCHEMA, what="status table schema"
+    )
+    status_table_schema["status"].update({"enum": defined_status_codes})
+    _LOGGER.debug(f"Updated status table schema: {status_table_schema}")
+    return status_table_schema
+
+
 def schema_to_columns(schema):
     """
     Get a list of database table columns from a schema
diff --git a/pipestat/pipestat_orm.py b/pipestat/pipestat_orm.py
index e123ba47..eac36a0f 100644
--- a/pipestat/pipestat_orm.py
+++ b/pipestat/pipestat_orm.py
@@ -221,7 +221,7 @@ def _select_value(
                 )
             self[DATA_KEY] = YacAttMap()
             self._init_postgres_table()
-            # self._init_status_table()
+            self._init_status_table()
         else:
             raise MissingConfigDataError(
                 "Must specify either database login " "credentials or a YAML file path"
@@ -395,7 +395,7 @@ def _create_table_orm(self, table_name: str, schema: Dict[str, Any]):
         attr_dict = dict(__tablename__=tn, id=Column(Integer, primary_key=True))
         for result_id, result_metadata in schema.items():
             col_type = SQL_CLASSES_BY_TYPE[result_metadata[SCHEMA_TYPE_KEY]]
-            _LOGGER.info(f"Adding object: {result_id} of type: {str(col_type)}")
+            _LOGGER.debug(f"Adding object: {result_id} of type: {str(col_type)}")
             attr_dict.update({result_id: Column(col_type)})
         _LOGGER.debug(f"Creating '{tn}' ORM with args: {attr_dict}")
         Base = declarative_base()
@@ -521,13 +521,13 @@ def _init_postgres_table(self) -> bool:
 
     def _init_status_table(self):
         status_table_name = f"{self.namespace}_{STATUS}"
-        # self._create_status_type()
         if not self._check_table_exists(table_name=status_table_name):
             _LOGGER.info(
                 f"Initializing '{status_table_name}' table in " f"'{PKG_NAME}' database"
             )
             self._create_table_orm(
-                table_name=status_table_name, schema=self.status_schema
+                table_name=status_table_name,
+                schema=get_status_table_schema(status_schema=self.status_schema),
             )
 
     def _get_attr(self, attr: str) -> Any:
diff --git a/pipestat/schemas/status_table_schema.yaml b/pipestat/schemas/status_table_schema.yaml
new file mode 100644
index 00000000..8bbcf2e8
--- /dev/null
+++ b/pipestat/schemas/status_table_schema.yaml
@@ -0,0 +1,7 @@
+record_identifier:
+  type: string
+  description: "record identifier"
+status:
+  # this is updated with an 'enum' based on the status_schema provided by the user
+  type: string
+  description: "status code, only statuses defined in the status_schema are defined"

From efd06b16b24a48f4c379dc8ae10cb7dbe5f5300f Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Thu, 8 Apr 2021 13:22:59 -0400
Subject: [PATCH 07/65] multiple changes:

- auto-generate repr for ORMs
- store ORMs mapping in PipestatManager objs
- add _count_rows method
- status table
---
 pipestat/const.py        |  2 ++
 pipestat/pipestat_orm.py | 77 +++++++++++++++++++++++++++-------------
 2 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/pipestat/const.py b/pipestat/const.py
index b8438c7d..0b75a84d 100644
--- a/pipestat/const.py
+++ b/pipestat/const.py
@@ -64,6 +64,8 @@
 STATUS_SCHEMA_SOURCE_KEY = "_status_schema_source"
 STATUS_FILE_DIR = "_status_file_dir"
 RES_SCHEMAS_KEY = "_result_schemas"
+DB_BASE_KEY = "_declarative_base"
+DB_ORMS_KEY = "_orms"
 DATA_KEY = "_data"
 NAME_KEY = "_name"
 FILE_KEY = "_file"
diff --git a/pipestat/pipestat_orm.py b/pipestat/pipestat_orm.py
index eac36a0f..585d479b 100644
--- a/pipestat/pipestat_orm.py
+++ b/pipestat/pipestat_orm.py
@@ -27,7 +27,7 @@ class PipestatManagerORM(dict):
     pipeline can easily and reliably become an input for downstream analyses.
     The object exposes API for interacting with the results and
     pipeline status and can be backed by either a YAML-formatted file
-    or a PostgreSQL database.
+    or a database.
     """
 
     def __init__(
@@ -219,8 +219,10 @@ def _select_value(
                 raise MissingConfigDataError(
                     "Must specify all database login " "credentials or result_file_path"
                 )
+            self[DB_ORMS_KEY] = {}
+            self[DB_BASE_KEY] = declarative_base()
             self[DATA_KEY] = YacAttMap()
-            self._init_postgres_table()
+            self._init_db_table()
             self._init_status_table()
         else:
             raise MissingConfigDataError(
@@ -378,7 +380,7 @@ def session(self):
         operations, no commit afterwards.
         """
         if not self.is_db_connected():
-            self.establish_postgres_connection_orm()
+            self.establish_db_connection_orm()
         with self[DB_SESSION_KEY]() as session:
             _LOGGER.debug("Created session")
             yield session
@@ -391,20 +393,33 @@ def _create_table_orm(self, table_name: str, schema: Dict[str, Any]):
         :param str table_name: name of the table to create
         :param Dict[str, Any] schema: schema to base table creation on
         """
+
+        def _auto_repr(x: Any) -> str:
+            """
+            Auto-generated __repr__ fun
+
+            :param Any x:
+            :return str: string object representation
+            """
+            attr_strs = [
+                f"{k}={str(v)}" for k, v in x.__dict__.items() if not k.startswith("_")
+            ]
+            return "<{}: {}>".format(x.__class__.__name__, ", ".join(attr_strs))
+
         tn = table_name or self.namespace
         attr_dict = dict(__tablename__=tn, id=Column(Integer, primary_key=True))
         for result_id, result_metadata in schema.items():
             col_type = SQL_CLASSES_BY_TYPE[result_metadata[SCHEMA_TYPE_KEY]]
             _LOGGER.debug(f"Adding object: {result_id} of type: {str(col_type)}")
             attr_dict.update({result_id: Column(col_type)})
+        attr_dict.update({"__repr__": _auto_repr})
         _LOGGER.debug(f"Creating '{tn}' ORM with args: {attr_dict}")
-        Base = declarative_base()
-        type(tn, (Base,), attr_dict)
-        Base.metadata.create_all(bind=self[DB_ENGINE_KEY])
+        self[DB_ORMS_KEY][tn] = type(tn.capitalize(), (self[DB_BASE_KEY],), attr_dict)
+        self[DB_BASE_KEY].metadata.create_all(bind=self[DB_ENGINE_KEY])
 
-    def establish_postgres_connection_orm(self) -> bool:
+    def establish_db_connection_orm(self) -> bool:
         """
-        Establish PostgreSQL connection using the config data
+        Establish DB connection using the config data
 
         :return bool: whether the connection has been established successfully
         """
@@ -417,7 +432,7 @@ def establish_postgres_connection_orm(self) -> bool:
 
     def is_db_connected(self) -> bool:
         """
-        Check whether a PostgreSQL connection has been established
+        Check whether a DB connection has been established
 
         :return bool: whether the connection has been established
         """
@@ -500,9 +515,9 @@ def _recursively_replace_custom_types(s: dict) -> Dict:
         schema = _recursively_replace_custom_types(schema)
         self[RES_SCHEMAS_KEY] = schema
 
-    def _init_postgres_table(self) -> bool:
+    def _init_db_table(self) -> bool:
         """
-        Initialize a PostgreSQL table based on the provided schema,
+        Initialize a database table based on the provided schema,
         if it does not exist. Read the data stored in the database into the
         memory otherwise.
 
@@ -510,25 +525,29 @@ def _init_postgres_table(self) -> bool:
         """
         if self.schema is None:
             raise SchemaNotFoundError("initialize the database table")
-        if self._check_table_exists(table_name=self.namespace):
-            _LOGGER.debug(f"Table '{self.namespace}' already exists in the database")
-            if not self[DB_ONLY_KEY]:
-                self._table_to_dict()
-            return False
+        if not self.is_db_connected():
+            self.establish_db_connection_orm()
+        # if self._check_table_exists(table_name=self.namespace):
+        #     _LOGGER.debug(f"Table '{self.namespace}' already exists in the database")
+        #     if not self[DB_ONLY_KEY]:
+        #         self._table_to_dict()
+        #     # return False
         _LOGGER.info(f"Initializing '{self.namespace}' table in '{PKG_NAME}' database")
         self._create_table_orm(table_name=self.namespace, schema=self.result_schemas)
         return True
 
     def _init_status_table(self):
         status_table_name = f"{self.namespace}_{STATUS}"
-        if not self._check_table_exists(table_name=status_table_name):
-            _LOGGER.info(
-                f"Initializing '{status_table_name}' table in " f"'{PKG_NAME}' database"
-            )
-            self._create_table_orm(
-                table_name=status_table_name,
-                schema=get_status_table_schema(status_schema=self.status_schema),
-            )
+        if not self.is_db_connected():
+            self.establish_db_connection_orm()
+        # if not self._check_table_exists(table_name=status_table_name):
+        _LOGGER.debug(
+            f"Initializing '{status_table_name}' table in " f"'{PKG_NAME}' database"
+        )
+        self._create_table_orm(
+            table_name=status_table_name,
+            schema=get_status_table_schema(status_schema=self.status_schema),
+        )
 
     def _get_attr(self, attr: str) -> Any:
         """
@@ -550,3 +569,13 @@ def _check_table_exists(self, table_name: str) -> bool:
 
         with self.session as s:
             return inspect(s.bind).has_table(table_name=table_name)
+
+    def _count_rows(self, table_name: str) -> int:
+        """
+        Count rows in a selected table
+
+        :param str table_name: table to count rows for
+        :return int: number of rows in the selected table
+        """
+        with self.session as s:
+            return s.query(self[DB_ORMS_KEY][table_name].id).count()

From 5a89679fa6c60286301c134c0cc1441b8e8a8da0 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Thu, 8 Apr 2021 14:50:36 -0400
Subject: [PATCH 08/65] add reporting skeleton, auto create record ID column

---
 pipestat/pipestat_orm.py                  | 75 ++++++++++++++++++++++-
 pipestat/schemas/status_table_schema.yaml |  3 -
 2 files changed, 72 insertions(+), 6 deletions(-)

diff --git a/pipestat/pipestat_orm.py b/pipestat/pipestat_orm.py
index 585d479b..4eea2306 100644
--- a/pipestat/pipestat_orm.py
+++ b/pipestat/pipestat_orm.py
@@ -8,7 +8,7 @@
 from jsonschema import validate
 from sqlalchemy import Column, Float, ForeignKey, Integer, String, Table, create_engine
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, sessionmaker
+from sqlalchemy.orm import DeclarativeMeta, relationship, sessionmaker
 from ubiquerg import create_lock, remove_lock
 from yacman import YacAttMap
 
@@ -398,7 +398,7 @@ def _auto_repr(x: Any) -> str:
             """
             Auto-generated __repr__ fun
 
-            :param Any x:
+            :param Any x: object to generate __repr__ method for
             :return str: string object representation
             """
             attr_strs = [
@@ -407,7 +407,11 @@ def _auto_repr(x: Any) -> str:
             return "<{}: {}>".format(x.__class__.__name__, ", ".join(attr_strs))
 
         tn = table_name or self.namespace
-        attr_dict = dict(__tablename__=tn, id=Column(Integer, primary_key=True))
+        attr_dict = dict(
+            __tablename__=tn,
+            id=Column(Integer, primary_key=True),
+            record_identifier=Column(String, unique=True),
+        )
         for result_id, result_metadata in schema.items():
             col_type = SQL_CLASSES_BY_TYPE[result_metadata[SCHEMA_TYPE_KEY]]
             _LOGGER.debug(f"Adding object: {result_id} of type: {str(col_type)}")
@@ -579,3 +583,68 @@ def _count_rows(self, table_name: str) -> int:
         """
         with self.session as s:
             return s.query(self[DB_ORMS_KEY][table_name].id).count()
+
+    def _get_orm(self, table_name: str = None) -> Any:
+        """
+        Get an object relational mapper class
+
+        :param str table_name: table name to get a class for
+        :return Any: Object relational mapper class
+        """
+        if DB_ORMS_KEY not in self:
+            raise PipestatDatabaseError("Object relational mapper classes not defined")
+        tn = f"{table_name or self.namespace}"
+        if tn not in self[DB_ORMS_KEY]:
+            raise PipestatDatabaseError(
+                f"No object relational mapper class defined for table: {tn}"
+            )
+        if not isinstance(self[DB_ORMS_KEY][tn], DeclarativeMeta):
+            raise PipestatDatabaseError(
+                f"Object relational mapper class for table '{tn}' is invalid"
+            )
+        return self[DB_ORMS_KEY][tn]
+
+    def check_record_exists(
+        self, record_identifier: str, table_name: str = None
+    ) -> bool:
+        """
+        Check if the specified record exists in the table
+
+        :param ste record_identifier: record to check for
+        :param str table_name: table name to check
+        :return bool: whether the record exists in the table
+        """
+        with self.session as s:
+            return (
+                s.query(self._get_orm(table_name).id)
+                .filter_by(record_identifier=record_identifier)
+                .first()
+                is not None
+            )
+
+    def _report(
+        self, value: Dict[str, Any], record_identifier: str, table_name: str = None
+    ) -> int:
+        """
+
+
+        :param value:
+        :param record_identifier:
+        :param table_name:
+        :return:
+        """
+        ORMClass = self._get_orm(table_name)
+        value.update({RECORD_ID: record_identifier})
+        if not self.check_record_exists(
+            record_identifier=record_identifier, table_name=table_name
+        ):
+            x = ORMClass(**value)
+            with self.session as s:
+                s.add(x)
+                s.commit()
+        else:
+            with self.session as s:
+                s.query(ORMClass).filter(
+                    getattr(ORMClass, RECORD_ID) == record_identifier
+                ).update(value)
+                s.commit()
diff --git a/pipestat/schemas/status_table_schema.yaml b/pipestat/schemas/status_table_schema.yaml
index 8bbcf2e8..530dfc07 100644
--- a/pipestat/schemas/status_table_schema.yaml
+++ b/pipestat/schemas/status_table_schema.yaml
@@ -1,6 +1,3 @@
-record_identifier:
-  type: string
-  description: "record identifier"
 status:
   # this is updated with an 'enum' based on the status_schema provided by the user
   type: string

From 990925f3663f990df1682d566be4ba2757e497e8 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Thu, 8 Apr 2021 16:44:40 -0400
Subject: [PATCH 09/65] return row id when inserting/updating

---
 pipestat/pipestat_orm.py | 53 +++++++++++++++++++++++++++++-----------
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/pipestat/pipestat_orm.py b/pipestat/pipestat_orm.py
index 4eea2306..efbfbd8c 100644
--- a/pipestat/pipestat_orm.py
+++ b/pipestat/pipestat_orm.py
@@ -386,6 +386,23 @@ def session(self):
             yield session
             _LOGGER.debug("Ending session")
 
+    def _strict_record_id(self, forced_value: str = None) -> str:
+        """
+        Get record identifier from the outer source or stored with this object
+
+        :param str forced_value: return this value
+        :return str: record identifier
+        """
+        if forced_value is not None:
+            return forced_value
+        if self.record_identifier is not None:
+            return self.record_identifier
+        raise PipestatError(
+            f"You must provide the record identifier you want to perform "
+            f"the action on. Either in the {self.__class__.__name__} "
+            f"constructor or as an argument to the method."
+        )
+
     def _create_table_orm(self, table_name: str, schema: Dict[str, Any]):
         """
         Create a table
@@ -610,7 +627,7 @@ def check_record_exists(
         """
         Check if the specified record exists in the table
 
-        :param ste record_identifier: record to check for
+        :param str record_identifier: record to check for
         :param str table_name: table name to check
         :return bool: whether the record exists in the table
         """
@@ -622,29 +639,37 @@ def check_record_exists(
                 is not None
             )
 
-    def _report(
-        self, value: Dict[str, Any], record_identifier: str, table_name: str = None
+    def _report_db(
+        self, values: Dict[str, Any], record_identifier: str, table_name: str = None
     ) -> int:
         """
+        Report a result to a database
 
-
-        :param value:
-        :param record_identifier:
-        :param table_name:
-        :return:
+        :param Dict[str, Any] values: values to report
+        :param str record_identifier: record to report the result for
+        :param str table_name: name of the table to report the result in
+        :return :
         """
+        record_identifier = self._strict_record_id(record_identifier)
         ORMClass = self._get_orm(table_name)
-        value.update({RECORD_ID: record_identifier})
+        values.update({RECORD_ID: record_identifier})
         if not self.check_record_exists(
             record_identifier=record_identifier, table_name=table_name
         ):
-            x = ORMClass(**value)
+            new_record = ORMClass(**values)
             with self.session as s:
-                s.add(x)
+                s.add(new_record)
                 s.commit()
+                returned_id = new_record.id
         else:
             with self.session as s:
-                s.query(ORMClass).filter(
-                    getattr(ORMClass, RECORD_ID) == record_identifier
-                ).update(value)
+                record_to_update = (
+                    s.query(ORMClass)
+                    .filter(getattr(ORMClass, RECORD_ID) == record_identifier)
+                    .first()
+                )
+                for k, v in values.items():
+                    setattr(record_to_update, k, v)
                 s.commit()
+                returned_id = record_to_update.id
+        return returned_id

From 9c0547de51d70207ca3cf224d2d126e6b0385235 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 9 Apr 2021 13:31:33 -0400
Subject: [PATCH 10/65] add check_results_exist method

---
 pipestat/pipestat_orm.py | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/pipestat/pipestat_orm.py b/pipestat/pipestat_orm.py
index efbfbd8c..0976b81f 100644
--- a/pipestat/pipestat_orm.py
+++ b/pipestat/pipestat_orm.py
@@ -639,6 +639,25 @@ def check_record_exists(
                 is not None
             )
 
+    def check_results_exist(
+        self, results: List[str], rid: str = None, table_name: str = None
+    ) -> List[str]:
+        """
+        Check if the specified record exists in the table
+
+        :param str rid: record to check for
+        :param List[str] results: results identifiers to check for
+        :return List[str]: results identifiers that exist
+        """
+        table_name = table_name or self.namespace
+        with self.session as s:
+            record = (
+                s.query(self._get_orm(table_name))
+                .filter_by(record_identifier=rid)
+                .first()
+            )
+        return [r for r in results if getattr(record, r, None) is not None]
+
     def _report_db(
         self, values: Dict[str, Any], record_identifier: str, table_name: str = None
     ) -> int:
@@ -648,7 +667,7 @@ def _report_db(
         :param Dict[str, Any] values: values to report
         :param str record_identifier: record to report the result for
         :param str table_name: name of the table to report the result in
-        :return :
+        :return int: updated/inserted row
         """
         record_identifier = self._strict_record_id(record_identifier)
         ORMClass = self._get_orm(table_name)
@@ -668,8 +687,8 @@ def _report_db(
                     .filter(getattr(ORMClass, RECORD_ID) == record_identifier)
                     .first()
                 )
-                for k, v in values.items():
-                    setattr(record_to_update, k, v)
+                for result_id, result_value in values.items():
+                    setattr(record_to_update, result_id, result_value)
                 s.commit()
                 returned_id = record_to_update.id
         return returned_id

From 175f9c650cce684a0ae04858949e474db74746c8 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 9 Apr 2021 14:03:00 -0400
Subject: [PATCH 11/65] implement _retrieve_db method

---
 pipestat/pipestat_orm.py | 53 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/pipestat/pipestat_orm.py b/pipestat/pipestat_orm.py
index 0976b81f..8b3f8244 100644
--- a/pipestat/pipestat_orm.py
+++ b/pipestat/pipestat_orm.py
@@ -643,13 +643,15 @@ def check_results_exist(
         self, results: List[str], rid: str = None, table_name: str = None
     ) -> List[str]:
         """
-        Check if the specified record exists in the table
+        Check if the specified results exist in the table
 
         :param str rid: record to check for
         :param List[str] results: results identifiers to check for
+        :param str table_name: name of the table to search for results in
         :return List[str]: results identifiers that exist
         """
         table_name = table_name or self.namespace
+        rid = self._strict_record_id(rid)
         with self.session as s:
             record = (
                 s.query(self._get_orm(table_name))
@@ -658,6 +660,55 @@ def check_results_exist(
             )
         return [r for r in results if getattr(record, r, None) is not None]
 
+    def _retrieve_db(
+        self,
+        result_identifier: str = None,
+        record_identifier: str = None,
+        table_name: str = None,
+    ) -> Dict[str, Any]:
+        """
+        Retrieve a result for a record.
+
+        If no result ID specified, results for the entire record will
+        be returned.
+
+        :param str record_identifier: unique identifier of the record
+        :param str result_identifier: name of the result to be retrieved
+        :param str table_name: name of the table to search for results in
+        :return Dict[str, any]: a single result or a mapping with all the results
+            reported for the record
+        """
+        table_name = table_name or self.namespace
+        record_identifier = self._strict_record_id(record_identifier)
+        if result_identifier is not None:
+            existing = self.check_results_exist(
+                results=[result_identifier],
+                rid=record_identifier,
+                table_name=table_name,
+            )
+            if not existing:
+                raise PipestatDatabaseError(
+                    f"Result '{result_identifier}' not found for record "
+                    f"'{record_identifier}'"
+                )
+
+        with self.session as s:
+            record = (
+                s.query(self._get_orm(table_name))
+                .filter_by(record_identifier=record_identifier)
+                .first()
+            )
+
+        if record is not None:
+            if result_identifier is not None:
+                return {result_identifier: getattr(record, result_identifier)}
+            return {
+                column: getattr(record, column)
+                for column in [c.name for c in record.__table__.columns]
+                if getattr(record, column, None) is not None
+            }
+        raise PipestatDatabaseError(f"Record '{record_identifier}' not found")
+
     def _report_db(
         self, values: Dict[str, Any], record_identifier: str, table_name: str = None
     ) -> int:

From a67bf393bebd315326de8121afc415fca4fd9e35 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 9 Apr 2021 15:07:37 -0400
Subject: [PATCH 12/65] implement _remove_db and check_result_exists methods

---
 pipestat/pipestat_orm.py | 74 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 72 insertions(+), 2 deletions(-)

diff --git a/pipestat/pipestat_orm.py b/pipestat/pipestat_orm.py
index 8b3f8244..3de4625a 100644
--- a/pipestat/pipestat_orm.py
+++ b/pipestat/pipestat_orm.py
@@ -639,7 +639,7 @@ def check_record_exists(
                 is not None
             )
 
-    def check_results_exist(
+    def check_which_results_exist(
         self, results: List[str], rid: str = None, table_name: str = None
     ) -> List[str]:
         """
@@ -660,6 +660,26 @@ def check_results_exist(
             )
         return [r for r in results if getattr(record, r, None) is not None]
 
+    def check_result_exists(
+        self,
+        result_identifier: str,
+        record_identifier: str = None,
+        table_name: str = None,
+    ) -> bool:
+        """
+        Check if the result has been reported
+
+        :param str record_identifier: unique identifier of the record
+        :param str result_identifier: name of the result to check
+        :param str table_name: name of the table to search for results in
+        :return bool: whether the specified result has been reported for the
+            indicated record in current namespace
+        """
+        record_identifier = self._strict_record_id(record_identifier)
+        return self.check_which_results_exist(
+            results=[result_identifier], rid=record_identifier, table_name=table_name
+        )
+
     def _retrieve_db(
         self,
         result_identifier: str = None,
@@ -681,7 +701,7 @@ def _retrieve_db(
         table_name = table_name or self.namespace
         record_identifier = self._strict_record_id(record_identifier)
         if result_identifier is not None:
-            existing = self.check_results_exist(
+            existing = self.check_which_results_exist(
                 results=[result_identifier],
                 rid=record_identifier,
                 table_name=table_name,
@@ -743,3 +763,53 @@ def _report_db(
                 s.commit()
                 returned_id = record_to_update.id
         return returned_id
+
+    def _remove_db(
+        self,
+        record_identifier: str = None,
+        result_identifier: str = None,
+        table_name: str = None,
+    ) -> bool:
+        """
+        Remove a result.
+
+        If no result ID specified or last result is removed, the entire record
+        will be removed.
+
+        :param str record_identifier: unique identifier of the record
+        :param str result_identifier: name of the result to be removed or None
+             if the record should be removed.
+        :param str table_name: name of the table to report the result in
+        :return bool: whether the result has been removed
+        :raise PipestatDatabaseError: if either record or result specified are not found
+        """
+        table_name = table_name or self.namespace
+        record_identifier = self._strict_record_id(record_identifier)
+        ORMClass = self._get_orm(table_name=table_name)
+        if self.check_record_exists(
+            record_identifier=record_identifier, table_name=table_name
+        ):
+            with self.session as s:
+                record = (
+                    s.query(ORMClass)
+                    .filter(getattr(ORMClass, RECORD_ID) == record_identifier)
+                    .first()
+                )
+                if result_identifier is None:
+                    # delete row
+                    record.delete()
+                else:
+                    # set the value to None
+                    if not self.check_result_exists(
+                        record_identifier=record_identifier,
+                        result_identifier=result_identifier,
+                        table_name=table_name,
+                    ):
+                        raise PipestatDatabaseError(
+                            f"Result '{result_identifier}' not found for record "
+                            f"'{record_identifier}'"
+                        )
+                    setattr(record, result_identifier, None)
+                s.commit()
+        else:
+            raise PipestatDatabaseError(f"Record '{record_identifier}' not found")

From 661ccadddb9df020515b0397184233e6f9d8a942 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 9 Apr 2021 15:37:57 -0400
Subject: [PATCH 13/65] implement status management

---
 pipestat/pipestat_orm.py | 155 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 138 insertions(+), 17 deletions(-)

diff --git a/pipestat/pipestat_orm.py b/pipestat/pipestat_orm.py
index 3de4625a..01f5dfb1 100644
--- a/pipestat/pipestat_orm.py
+++ b/pipestat/pipestat_orm.py
@@ -372,6 +372,29 @@ def data(self) -> YacAttMap:
         """
         return self._get_attr(DATA_KEY)
 
+    @property
+    def db_url(self) -> str:
+        """
+        Database URL, generated based on config credentials
+
+        :return str: database URL
+        """
+        try:
+            creds = dict(
+                name=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_NAME_KEY],
+                user=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_USER_KEY],
+                passwd=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PASSWORD_KEY],
+                host=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY],
+                port=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PORT_KEY],
+                dialect=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_DIALECT_KEY],
+            )
+        except (KeyError, AttributeError) as e:
+            raise PipestatDatabaseError(
+                f"Could not determine database URL. Caught error: {str(e)}"
+            )
+        parsed_creds = {k: quote_plus(str(v)) for k, v in creds.items()}
+        return "{dialect}://{user}:{passwd}@{host}:{port}/{name}".format(**parsed_creds)
+
     @property
     @contextmanager
     def session(self):
@@ -465,28 +488,126 @@ def is_db_connected(self) -> bool:
             return True
         return False
 
-    @property
-    def db_url(self) -> str:
+    def set_status(self, status_identifier: str, record_identifier: str = None) -> None:
         """
-        Database URL, generated based on config credentials
+        Set pipeline run status.
 
-        :return str: database URL
+        The status identifier needs to match one of identifiers specified in
+        the status schema. A basic, ready to use, status schema is shipped with
+         this package.
+
+        :param str status_identifier: status to set, one of statuses defined
+            in the status schema
+        :param str record_identifier: record identifier to set the
+            pipeline status for
         """
-        try:
-            creds = dict(
-                name=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_NAME_KEY],
-                user=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_USER_KEY],
-                passwd=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PASSWORD_KEY],
-                host=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY],
-                port=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PORT_KEY],
-                dialect=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_DIALECT_KEY],
+        r_id = self._strict_record_id(record_identifier)
+        known_status_identifiers = self.status_schema.keys()
+        if status_identifier not in known_status_identifiers:
+            raise PipestatError(
+                f"'{status_identifier}' is not a defined status identifier. "
+                f"These are allowed: {known_status_identifiers}"
             )
-        except (KeyError, AttributeError) as e:
-            raise PipestatDatabaseError(
-                f"Could not determine database URL. Caught error: {str(e)}"
+        prev_status = self.get_status(r_id)
+        if self.file is not None:
+            if prev_status:
+                prev_flag_path = self.get_status_flag_path(prev_status, r_id)
+                os.remove(prev_flag_path)
+            flag_path = self.get_status_flag_path(status_identifier, r_id)
+            create_lock(flag_path)
+            with open(flag_path, "w") as f:
+                f.write(status_identifier)
+            remove_lock(flag_path)
+        else:
+            try:
+                self._report_db(
+                    values={STATUS: status_identifier},
+                    record_identifier=r_id,
+                    table_name=f"{self.namespace}_{STATUS}",
+                )
+            except Exception as e:
+                _LOGGER.error(f"Could not insert into the status table. Exception: {e}")
+                raise
+        if prev_status:
+            _LOGGER.debug(
+                f"Changed status from '{prev_status}' to '{status_identifier}'"
             )
-        parsed_creds = {k: quote_plus(str(v)) for k, v in creds.items()}
-        return "{dialect}://{user}:{passwd}@{host}:{port}/{name}".format(**parsed_creds)
+
+    def get_status(self, record_identifier: str = None) -> Optional[str]:
+        """
+        Get the current pipeline status
+
+        :return str: status identifier, like 'running'
+        """
+        r_id = self._strict_record_id(record_identifier)
+        if self.file is None:
+            try:
+                result = self._retrieve_db(
+                    result_identifier=STATUS,
+                    record_identifier=r_id,
+                    table_name=f"{self.namespace}_{STATUS}",
+                )
+            except PipestatDatabaseError:
+                return None
+            return result[STATUS]
+        else:
+            flag_file = self._get_flag_file(record_identifier=r_id)
+            if flag_file is not None:
+                assert isinstance(flag_file, str), TypeError(
+                    "Flag file path is expected to be a str, were multiple flags found?"
+                )
+                with open(flag_file, "r") as f:
+                    status = f.read()
+                return status
+            _LOGGER.debug(
+                f"Could not determine status for '{r_id}' record. "
+                f"No flags found in: {self[STATUS_FILE_DIR]}"
+            )
+            return None
+
+    def clear_status(
+        self, record_identifier: str = None, flag_names: List[str] = None
+    ) -> List[str]:
+        """
+        Remove status flags
+
+        :param str record_identifier: name of the record to remove flags for
+        :param Iterable[str] flag_names: Names of flags to remove, optional; if
+            unspecified, all schema-defined flag names will be used.
+        :return List[str]: Collection of names of flags removed
+        """
+        r_id = self._strict_record_id(record_identifier)
+        if self.file is not None:
+            flag_names = flag_names or list(self.status_schema.keys())
+            if isinstance(flag_names, str):
+                flag_names = [flag_names]
+            removed = []
+            for f in flag_names:
+                path_flag_file = self.get_status_flag_path(
+                    status_identifier=f, record_identifier=r_id
+                )
+                try:
+                    os.remove(path_flag_file)
+                except:
+                    pass
+                else:
+                    _LOGGER.info(f"Removed existing flag: {path_flag_file}")
+                    removed.append(f)
+            return removed
+        else:
+            removed = self.get_status(r_id)
+            try:
+                self._remove_db(
+                    record_identifier=r_id,
+                    table_name=f"{self.namespace}_{STATUS}",
+                )
+            except Exception as e:
+                _LOGGER.error(
+                    f"Could not remove the status from the database. Exception: {e}"
+                )
+                return []
+            else:
+                return [removed]
 
     def validate_schema(self) -> None:
         """

From f5128deda6b8ebcb4f949cffb7c56c4cf64ad0a3 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 12 Apr 2021 13:41:34 -0400
Subject: [PATCH 14/65] fix versions

---
 requirements/requirements-all.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt
index e0d427df..ab2db6b7 100644
--- a/requirements/requirements-all.txt
+++ b/requirements/requirements-all.txt
@@ -3,5 +3,6 @@ jsonschema
 logmuse>=0.2.5
 oyaml
 psycopg2-binary
+sqlalchemy>=1.4.7
 ubiquerg>=0.6.1
-yacman>=0.8.0
+yacman>=0.8.1

From 1b49f9e4254b4b85b011552c86e381070fce7d86 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Tue, 13 Apr 2021 09:49:45 -0400
Subject: [PATCH 15/65] implement file support, rename files, major refactoring

---
 docs_jupyter/python_api.ipynb |   21 +-
 pipestat/pipestat.py          | 1367 ++++++++++++++++-----------------
 pipestat/pipestat_dirver.py   | 1270 ++++++++++++++++++++++++++++++
 pipestat/pipestat_orm.py      |  936 ----------------------
 4 files changed, 1963 insertions(+), 1631 deletions(-)
 create mode 100644 pipestat/pipestat_dirver.py
 delete mode 100644 pipestat/pipestat_orm.py

diff --git a/docs_jupyter/python_api.ipynb b/docs_jupyter/python_api.ipynb
index f70bce46..77b349fd 100644
--- a/docs_jupyter/python_api.ipynb
+++ b/docs_jupyter/python_api.ipynb
@@ -22,6 +22,7 @@
    "outputs": [],
    "source": [
     "import pipestat\n",
+    "from pipestat.pipestat_orm import PipestatManagerORM as PipestatManager, SchemaNotFoundError\n",
     "from jsonschema import ValidationError"
    ]
   },
@@ -57,7 +58,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmpa6poai5_.yaml\n"
+      "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp9cozi7pc.yaml\n"
      ]
     }
    ],
@@ -81,7 +82,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "psm = pipestat.PipestatManager(\n",
+    "psm = PipestatManager(\n",
     "    namespace=\"test\",\n",
     "    record_identifier=\"sample1\",\n",
     "    results_file_path=temp_file,\n",
@@ -460,7 +461,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "psm1 = pipestat.PipestatManager(\n",
+    "psm1 = PipestatManager(\n",
     "    namespace=\"test\",\n",
     "    record_identifier=\"sample1\",\n",
     "    results_file_path=temp_file,\n",
@@ -508,7 +509,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmpa6poai5_.yaml\n",
+      "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp9cozi7pc.yaml\n",
       "test:\n",
       "  sample1:\n",
       "    output_file:\n",
@@ -828,7 +829,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmpf9m81e1n.yaml\n"
+      "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmpsfzryntz.yaml\n"
      ]
     }
    ],
@@ -838,7 +839,7 @@
     "_, temp_file_highlight = mkstemp(suffix=\".yaml\")\n",
     "print(temp_file_highlight)\n",
     "\n",
-    "psm_highlight = pipestat.PipestatManager(\n",
+    "psm_highlight = PipestatManager(\n",
     "    namespace=\"test_highlight\",\n",
     "    record_identifier=\"sample1\",\n",
     "    results_file_path=temp_file_highlight,\n",
@@ -1035,7 +1036,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmpq81inuvn.yaml\n"
+      "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmpxwy566e3.yaml\n"
      ]
     }
    ],
@@ -1043,7 +1044,7 @@
     "_, temp_file_no_schema = mkstemp(suffix=\".yaml\")\n",
     "print(temp_file_no_schema)\n",
     "\n",
-    "psm_no_schema = pipestat.PipestatManager(\n",
+    "psm_no_schema = PipestatManager(\n",
     "    namespace=\"test_no_schema\", results_file_path=temp_file_no_schema\n",
     ")"
    ]
@@ -1073,7 +1074,7 @@
    "source": [
     "try:\n",
     "    psm_no_schema.report(record_identifier=\"sample1\", values={\"key\": \"val\"})\n",
-    "except pipestat.SchemaNotFoundError as e:\n",
+    "except SchemaNotFoundError as e:\n",
     "    print(e)"
    ]
   },
@@ -1122,7 +1123,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.9.4"
   }
  },
  "nbformat": 4,
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index f3f75262..f3149fcc 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -2,12 +2,13 @@
 from copy import deepcopy
 from logging import getLogger
 from typing import Any, Dict, List, Optional, Union
+from urllib.parse import quote_plus
 
-import psycopg2
 from attmap import PathExAttMap as PXAM
 from jsonschema import validate
-from psycopg2.extensions import connection
-from psycopg2.extras import DictCursor, Json
+from sqlalchemy import Column, Float, ForeignKey, Integer, String, Table, create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import DeclarativeMeta, relationship, sessionmaker
 from ubiquerg import create_lock, remove_lock
 from yacman import YacAttMap
 
@@ -18,25 +19,6 @@
 _LOGGER = getLogger(PKG_NAME)
 
 
-class LoggingCursor(psycopg2.extras.DictCursor):
-    """
-    Logging db cursor
-    """
-
-    def execute(self, query, vars=None):
-        """
-        Execute a database operation (query or command) and issue a debug
-        and info level log messages
-
-        :param query:
-        :param vars:
-        :return:
-        """
-        _LOGGER.debug(f"Executing query: {self.mogrify(query, vars)}")
-        super(LoggingCursor, self).execute(query=query, vars=vars)
-        _LOGGER.debug(f"Executed query: {self.query}")
-
-
 class PipestatManager(dict):
     """
     Pipestat standardizes reporting of pipeline results and
@@ -45,7 +27,7 @@ class PipestatManager(dict):
     pipeline can easily and reliably become an input for downstream analyses.
     The object exposes API for interacting with the results and
     pipeline status and can be backed by either a YAML-formatted file
-    or a PostgreSQL database.
+    or a database.
     """
 
     def __init__(
@@ -237,31 +219,16 @@ def _select_value(
                 raise MissingConfigDataError(
                     "Must specify all database login " "credentials or result_file_path"
                 )
+            self[DB_ORMS_KEY] = {}
+            self[DB_BASE_KEY] = declarative_base()
             self[DATA_KEY] = YacAttMap()
-            self._init_postgres_table()
+            self._init_db_table()
             self._init_status_table()
         else:
             raise MissingConfigDataError(
                 "Must specify either database login " "credentials or a YAML file path"
             )
 
-    def __str__(self):
-        """
-        Generate string representation of the object
-
-        :return str: string representation of the object
-        """
-        res = f"{self.__class__.__name__} ({self.namespace})"
-        res += "\nBackend: {}".format(
-            f"file ({self.file})" if self.file else "PostgreSQL"
-        )
-        res += f"\nResults schema source: {self.schema_path}"
-        res += f"\nStatus schema source: {self.status_schema_source}"
-        res += f"\nRecords count: {self.record_count}"
-        if self.highlighted_results:
-            res += f"\nHighlighted results: {', '.join(self.highlighted_results)}"
-        return res
-
     def _get_flag_file(
         self, record_identifier: str = None
     ) -> Union[str, List[str], None]:
@@ -290,15 +257,6 @@ def _get_flag_file(
                 _LOGGER.debug("No flag files found")
                 return None
 
-    @property
-    def highlighted_results(self) -> List[str]:
-        """
-        Highlighted results
-
-        :return List[str]: a collection of highlighted results
-        """
-        return self._get_attr(HIGHLIGHTED_KEY) or []
-
     @property
     def record_count(self) -> int:
         """
@@ -312,6 +270,15 @@ def record_count(self) -> int:
             else self._count_rows(self.namespace)
         )
 
+    @property
+    def highlighted_results(self) -> List[str]:
+        """
+        Highlighted results
+
+        :return List[str]: a collection of highlighted results
+        """
+        return self._get_attr(HIGHLIGHTED_KEY) or []
+
     @property
     def namespace(self) -> str:
         """
@@ -406,302 +373,162 @@ def data(self) -> YacAttMap:
         return self._get_attr(DATA_KEY)
 
     @property
-    @contextmanager
-    def db_cursor(self):
+    def db_url(self) -> str:
         """
-        Establish connection and get a PostgreSQL database cursor,
-        commit and close the connection afterwards
+        Database URL, generated based on config credentials
 
-        :return LoggingCursor: Database cursor object
+        :return str: database URL
+        :raise PipestatDatabaseError: if the object is not backed by a database
         """
+        if self.file is not None:
+            raise PipestatDatabaseError(
+                "Can't determine database URL if the object is backed by a file"
+            )
         try:
-            if not self.check_connection():
-                self.establish_postgres_connection()
-            with self[DB_CONNECTION_KEY] as c, c.cursor(
-                cursor_factory=LoggingCursor
-            ) as cur:
-                yield cur
-        except Exception:
-            raise
-        finally:
-            self.close_postgres_connection()
-
-    def get_status(self, record_identifier: str = None) -> Optional[str]:
-        """
-        Get the current pipeline status
-
-        :return str: status identifier, like 'running'
-        """
-        r_id = self._strict_record_id(record_identifier)
-        if self.file is None:
-            with self.db_cursor as cur:
-                query = sql.SQL(
-                    f"SELECT {STATUS} "
-                    f"FROM {f'{self.namespace}_{STATUS}'} "
-                    f"WHERE {RECORD_ID}=%s"
-                )
-                cur.execute(query, (r_id,))
-                result = cur.fetchone()
-            return result[0] if result is not None else None
-        else:
-            flag_file = self._get_flag_file(record_identifier=r_id)
-            if flag_file is not None:
-                assert isinstance(flag_file, str), TypeError(
-                    "Flag file path is expected to be a str, were multiple flags found?"
-                )
-                with open(flag_file, "r") as f:
-                    status = f.read()
-                return status
-            _LOGGER.debug(
-                f"Could not determine status for '{r_id}' record. "
-                f"No flags found in: {self[STATUS_FILE_DIR]}"
+            creds = dict(
+                name=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_NAME_KEY],
+                user=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_USER_KEY],
+                passwd=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PASSWORD_KEY],
+                host=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY],
+                port=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PORT_KEY],
+                dialect=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_DIALECT_KEY],
             )
-            return None
-
-    def _get_attr(self, attr: str) -> Any:
-        """
-        Safely get the name of the selected attribute of this object
-
-        :param str attr: attr to select
-        :return:
-        """
-        return self[attr] if attr in self else None
+        except (KeyError, AttributeError) as e:
+            raise PipestatDatabaseError(
+                f"Could not determine database URL. Caught error: {str(e)}"
+            )
+        parsed_creds = {k: quote_plus(str(v)) for k, v in creds.items()}
+        return "{dialect}://{user}:{passwd}@{host}:{port}/{name}".format(**parsed_creds)
 
-    def _table_to_dict(self) -> None:
+    @property
+    @contextmanager
+    def session(self):
         """
-        Create a dictionary from the database table data
+        Provide a transactional scope around a series of query
+        operations, no commit afterwards.
         """
-        with self.db_cursor as cur:
-            cur.execute(f"SELECT * FROM {self.namespace}")
-            data = cur.fetchall()
-        _LOGGER.debug(f"Reading data from database for '{self.namespace}' namespace")
-        for record in data:
-            record_id = record[RECORD_ID]
-            for res_id, val in record.items():
-                if val is not None:
-                    self._report_data_element(
-                        record_identifier=record_id, values={res_id: val}
-                    )
+        if not self.is_db_connected():
+            self.establish_db_connection_orm()
+        with self[DB_SESSION_KEY]() as session:
+            _LOGGER.debug("Created session")
+            yield session
+            _LOGGER.debug("Ending session")
 
-    def _init_postgres_table(self) -> bool:
+    def _strict_record_id(self, forced_value: str = None) -> str:
         """
-        Initialize a PostgreSQL table based on the provided schema,
-        if it does not exist. Read the data stored in the database into the
-        memory otherwise.
+        Get record identifier from the outer source or stored with this object
 
-        :return bool: whether the table has been created
+        :param str forced_value: return this value
+        :return str: record identifier
         """
-        if self.schema is None:
-            raise SchemaNotFoundError("initialize the database table")
-        if self._check_table_exists(table_name=self.namespace):
-            _LOGGER.debug(f"Table '{self.namespace}' already exists in the database")
-            if not self[DB_ONLY_KEY]:
-                self._table_to_dict()
-            return False
-        _LOGGER.info(f"Initializing '{self.namespace}' table in '{PKG_NAME}' database")
-        columns = FIXED_COLUMNS + schema_to_columns(schema=self.schema)
-        self._create_table(table_name=self.namespace, columns=columns)
-        return True
-
-    # def _create_status_type(self):
-    #     with self.db_cursor as cur:
-    #         s = sql.SQL(f"SELECT exists (SELECT 1 FROM pg_type WHERE typname = '{STATUS}');")
-    #         cur.execute(s)
-    #         if cur.fetchone()[0]:
-    #             return
-    #     with self.db_cursor as cur:
-    #         status_strs = [f"'{st_id}'" for st_id in self.status_schema.keys()]
-    #         status_str = ", ".join(status_strs)
-    #         s = sql.SQL(f"CREATE TYPE {STATUS} as enum({status_str});")
-    #         cur.execute(s)
-
-    def _init_status_table(self):
-        status_table_name = f"{self.namespace}_{STATUS}"
-        # self._create_status_type()
-        if not self._check_table_exists(table_name=status_table_name):
-            _LOGGER.info(
-                f"Initializing '{status_table_name}' table in " f"'{PKG_NAME}' database"
-            )
-            self._create_table(status_table_name, STATUS_TABLE_COLUMNS)
+        if forced_value is not None:
+            return forced_value
+        if self.record_identifier is not None:
+            return self.record_identifier
+        raise PipestatError(
+            f"You must provide the record identifier you want to perform "
+            f"the action on. Either in the {self.__class__.__name__} "
+            f"constructor or as an argument to the method."
+        )
 
-    def _create_table(self, table_name: str, columns: List[str]):
+    def _create_table_orm(self, table_name: str, schema: Dict[str, Any]):
         """
         Create a table
 
         :param str table_name: name of the table to create
-        :param str | List[str] columns: columns definition list,
-            for instance: ['name VARCHAR(50) NOT NULL']
+        :param Dict[str, Any] schema: schema to base table creation on
         """
-        columns = mk_list_of_str(columns)
-        with self.db_cursor as cur:
-            s = sql.SQL(f"CREATE TABLE {table_name} ({','.join(columns)})")
-            cur.execute(s)
 
-    def _init_results_file(self) -> bool:
-        """
-        Initialize YAML results file if it does not exist.
-        Read the data stored in the existing file into the memory otherwise.
+        def _auto_repr(x: Any) -> str:
+            """
+            Auto-generated __repr__ fun
 
-        :return bool: whether the file has been created
-        """
-        if not os.path.exists(self.file):
-            _LOGGER.info(f"Initializing results file '{self.file}'")
-            data = YacAttMap(entries={self.namespace: "{}"})
-            data.write(filepath=self.file)
-            data.make_readonly()
-            self[DATA_KEY] = data
-            return True
-        _LOGGER.debug(f"Reading data from '{self.file}'")
-        data = YacAttMap(filepath=self.file)
-        filtered = list(filter(lambda x: not x.startswith("_"), data.keys()))
-        if filtered and self.namespace not in filtered:
-            raise PipestatDatabaseError(
-                f"'{self.file}' is already used to report results for "
-                f"other namespace: {filtered[0]}"
-            )
-        self[DATA_KEY] = data
-        return False
+            :param Any x: object to generate __repr__ method for
+            :return str: string object representation
+            """
+            attr_strs = [
+                f"{k}={str(v)}" for k, v in x.__dict__.items() if not k.startswith("_")
+            ]
+            return "<{}: {}>".format(x.__class__.__name__, ", ".join(attr_strs))
 
-    def _check_table_exists(self, table_name: str) -> bool:
-        """
-        Check if the specified table exists
+        tn = table_name or self.namespace
+        attr_dict = dict(
+            __tablename__=tn,
+            id=Column(Integer, primary_key=True),
+            record_identifier=Column(String, unique=True),
+        )
+        for result_id, result_metadata in schema.items():
+            col_type = SQL_CLASSES_BY_TYPE[result_metadata[SCHEMA_TYPE_KEY]]
+            _LOGGER.debug(f"Adding object: {result_id} of type: {str(col_type)}")
+            attr_dict.update({result_id: Column(col_type)})
+        attr_dict.update({"__repr__": _auto_repr})
+        _LOGGER.debug(f"Creating '{tn}' ORM with args: {attr_dict}")
+        self[DB_ORMS_KEY][tn] = type(tn.capitalize(), (self[DB_BASE_KEY],), attr_dict)
+        self[DB_BASE_KEY].metadata.create_all(bind=self[DB_ENGINE_KEY])
 
-        :param str table_name: table name to be checked
-        :return bool: whether the specified table exists
+    def establish_db_connection_orm(self) -> bool:
         """
-        with self.db_cursor as cur:
-            cur.execute(
-                "SELECT EXISTS(SELECT * FROM information_schema.tables "
-                "WHERE table_name=%s)",
-                (table_name,),
-            )
-            return cur.fetchone()[0]
+        Establish DB connection using the config data
 
-    def _check_record(
-        self, condition_col: str, condition_val: str, table_name: str
-    ) -> bool:
+        :return bool: whether the connection has been established successfully
         """
-        Check if the record matching the condition is in the table
+        if self.is_db_connected():
+            raise PipestatDatabaseError("Connection is already established")
 
-        :param str condition_col: column to base the check on
-        :param str condition_val: value in the selected column
-        :param str table_name: name of the table ot check the record in
-        :return bool: whether any record matches the provided condition
-        """
-        with self.db_cursor as cur:
-            statement = (
-                f"SELECT EXISTS(SELECT 1 from {table_name} "
-                f"WHERE {condition_col}=%s)"
-            )
-            cur.execute(statement, (condition_val,))
-            return cur.fetchone()[0]
+        self[DB_ENGINE_KEY] = create_engine(self.db_url, echo=True)
+        self[DB_SESSION_KEY] = sessionmaker(bind=self[DB_ENGINE_KEY])
+        return True
 
-    def _count_rows(self, table_name: str) -> int:
+    def is_db_connected(self) -> bool:
         """
-        Count rows in a selected table
+        Check whether a DB connection has been established
 
-        :param str table_name: table to count rows for
-        :return int: number of rows in the selected table
+        :return bool: whether the connection has been established
         """
-        with self.db_cursor as cur:
-            statement = sql.SQL("SELECT COUNT(*) FROM {}").format(
-                sql.Identifier(table_name)
+        if self.file is not None:
+            raise PipestatDatabaseError(
+                f"The {self.__class__.__name__} object is not backed by a database"
             )
-            cur.execute(statement)
-            return cur.fetchall()[0][0]
-
-    def _report_postgres(
-        self, value: Dict[str, Any], record_identifier: str, table_name: str = None
-    ) -> int:
-        """
-        Check if record with this record identifier in table, create new record
-         if not (INSERT), update the record if yes (UPDATE).
-
-        Currently supports just one column at a time.
+        if DB_SESSION_KEY in self and isinstance(self[DB_SESSION_KEY], sessionmaker):
+            return True
+        return False
 
-        :param str record_identifier: unique identifier of the record, value to
-            in 'record_identifier' column to look for to determine if the record
-            already exists in the table
-        :param dict value: a mapping of pair of table column names and
-            respective values to be inserted to the database
-        :return int: id of the row just inserted
+    def set_status(self, status_identifier: str, record_identifier: str = None) -> None:
         """
-        table_name = table_name or self.namespace
-        if not self._check_record(
-            condition_col=RECORD_ID,
-            condition_val=record_identifier,
-            table_name=table_name,
-        ):
-            with self.db_cursor as cur:
-                cur.execute(
-                    f"INSERT INTO {table_name} ({RECORD_ID}) VALUES (%s)",
-                    (record_identifier,),
-                )
-        # prep a list of SQL objects with column-named value placeholders
-        columns = sql.SQL(",").join(
-            [
-                sql.SQL("{}=%({})s").format(sql.Identifier(k), sql.SQL(k))
-                for k in list(value.keys())
-            ]
-        )
-        # construct the query template to execute
-        query = sql.SQL("UPDATE {n} SET {c} WHERE {id}=%({id})s RETURNING id").format(
-            n=sql.Identifier(table_name), c=columns, id=sql.SQL(RECORD_ID)
-        )
-        # preprocess the values, dict -> Json
-        values = {k: Json(v) if isinstance(v, dict) else v for k, v in value.items()}
-        # add record_identifier column, which is specified outside of values
-        values.update({RECORD_ID: record_identifier})
-        with self.db_cursor as cur:
-            cur.execute(query, values)
-            return cur.fetchone()[0]
+        Set pipeline run status.
 
-    def clear_status(
-        self, record_identifier: str = None, flag_names: List[str] = None
-    ) -> List[str]:
-        """
-        Remove status flags
+        The status identifier needs to match one of identifiers specified in
+        the status schema. A basic, ready to use, status schema is shipped with
+         this package.
 
-        :param str record_identifier: name of the record to remove flags for
-        :param Iterable[str] flag_names: Names of flags to remove, optional; if
-            unspecified, all schema-defined flag names will be used.
-        :return List[str]: Collection of names of flags removed
+        :param str status_identifier: status to set, one of statuses defined
+            in the status schema
+        :param str record_identifier: record identifier to set the
+            pipeline status for
         """
         r_id = self._strict_record_id(record_identifier)
+        known_status_identifiers = self.status_schema.keys()
+        if status_identifier not in known_status_identifiers:
+            raise PipestatError(
+                f"'{status_identifier}' is not a defined status identifier. "
+                f"These are allowed: {known_status_identifiers}"
+            )
+        prev_status = self.get_status(r_id)
         if self.file is not None:
-            flag_names = flag_names or list(self.status_schema.keys())
-            if isinstance(flag_names, str):
-                flag_names = [flag_names]
-            removed = []
-            for f in flag_names:
-                path_flag_file = self.get_status_flag_path(
-                    status_identifier=f, record_identifier=r_id
-                )
-                try:
-                    os.remove(path_flag_file)
-                except:
-                    pass
-                else:
-                    _LOGGER.info(f"Removed existing flag: {path_flag_file}")
-                    removed.append(f)
-            return removed
+            self._set_status_file(
+                status_identifier=status_identifier,
+                record_identifier=r_id,
+                prev_status=prev_status,
+            )
         else:
-            removed = self.get_status(r_id)
-            status_table_name = f"{self.namespace}_{STATUS}"
-            with self.db_cursor as cur:
-                try:
-                    cur.execute(
-                        f"DELETE FROM {status_table_name} WHERE "
-                        f"{RECORD_ID}='{r_id}'"
-                    )
-                except Exception as e:
-                    _LOGGER.error(
-                        f"Could not remove the status from the "
-                        f"database. Exception: {e}"
-                    )
-                    return []
-                else:
-                    return [removed]
+            self._set_status_db(
+                status_identifier=status_identifier,
+                record_identifier=r_id,
+            )
+        if prev_status:
+            _LOGGER.debug(
+                f"Changed status from '{prev_status}' to '{status_identifier}'"
+            )
 
     def get_status_flag_path(
         self, status_identifier: str, record_identifier=None
@@ -723,125 +550,476 @@ def get_status_flag_path(
             self[STATUS_FILE_DIR], f"{self.namespace}_{r_id}_{status_identifier}.flag"
         )
 
-    def set_status(self, status_identifier: str, record_identifier: str = None) -> None:
-        """
-        Set pipeline run status.
+    def _set_status_file(
+        self,
+        status_identifier: str,
+        record_identifier: str,
+        prev_status: Optional[str] = None,
+    ) -> None:
+        if prev_status is not None:
+            prev_flag_path = self.get_status_flag_path(prev_status, record_identifier)
+            os.remove(prev_flag_path)
+        flag_path = self.get_status_flag_path(status_identifier, record_identifier)
+        create_lock(flag_path)
+        with open(flag_path, "w") as f:
+            f.write(status_identifier)
+        remove_lock(flag_path)
+
+    def _set_status_db(
+        self,
+        status_identifier: str,
+        record_identifier: str,
+    ) -> None:
+        try:
+            self._report_db(
+                values={STATUS: status_identifier},
+                record_identifier=record_identifier,
+                table_name=f"{self.namespace}_{STATUS}",
+            )
+        except Exception as e:
+            _LOGGER.error(f"Could not insert into the status table. Exception: {e}")
+            raise
 
-        The status identifier needs to match one of identifiers specified in
-        the status schema. A basic, ready to use, status schema is shipped with
-         this package.
+    def get_status(self, record_identifier: str = None) -> Optional[str]:
+        """
+        Get the current pipeline status
 
-        :param str status_identifier: status to set, one of statuses defined
-            in the status schema
-        :param str record_identifier: record identifier to set the
-            pipeline status for
+        :return str: status identifier, like 'running'
         """
         r_id = self._strict_record_id(record_identifier)
-        known_status_identifiers = self.status_schema.keys()
-        if status_identifier not in known_status_identifiers:
-            raise PipestatError(
-                f"'{status_identifier}' is not a defined status identifier. "
-                f"These are allowed: {known_status_identifiers}"
-            )
-        prev_status = self.get_status(r_id)
-        if self.file is not None:
-            if prev_status:
-                prev_flag_path = self.get_status_flag_path(prev_status, r_id)
-                os.remove(prev_flag_path)
-            flag_path = self.get_status_flag_path(status_identifier, r_id)
-            create_lock(flag_path)
-            with open(flag_path, "w") as f:
-                f.write(status_identifier)
-            remove_lock(flag_path)
+        if self.file is None:
+            return self._get_status_db(record_identifier=r_id)
         else:
-            try:
-                self._report_postgres(
-                    value={STATUS: status_identifier},
-                    record_identifier=r_id,
-                    table_name=f"{self.namespace}_{STATUS}",
-                )
-            except Exception as e:
-                _LOGGER.error(
-                    f"Could not insert into the status table. " f"Exception: {e}"
-                )
-                raise
-        if prev_status:
-            _LOGGER.debug(
-                f"Changed status from '{prev_status}' to '{status_identifier}'"
+            return self._get_status_file(record_identifier=r_id)
+
+    def _get_status_file(self, record_identifier: str) -> Optional[str]:
+        r_id = self._strict_record_id(record_identifier)
+        flag_file = self._get_flag_file(record_identifier=record_identifier)
+        if flag_file is not None:
+            assert isinstance(flag_file, str), TypeError(
+                "Flag file path is expected to be a str, were multiple flags found?"
             )
+            with open(flag_file, "r") as f:
+                status = f.read()
+            return status
+        _LOGGER.debug(
+            f"Could not determine status for '{r_id}' record. "
+            f"No flags found in: {self[STATUS_FILE_DIR]}"
+        )
+        return None
 
-    def check_result_exists(self, result_identifier, record_identifier=None):
-        """
-        Check if the result has been reported
+    def _get_status_db(self, record_identifier: str) -> Optional[str]:
+        try:
+            result = self._retrieve_db(
+                result_identifier=STATUS,
+                record_identifier=record_identifier,
+                table_name=f"{self.namespace}_{STATUS}",
+            )
+        except PipestatDatabaseError:
+            return None
+        return result[STATUS]
 
-        :param str record_identifier: unique identifier of the record
-        :param str result_identifier: name of the result to check
-        :return bool: whether the specified result has been reported for the
-            indicated record in current namespace
+    def clear_status(
+        self, record_identifier: str = None, flag_names: List[str] = None
+    ) -> List[Union[str, None]]:
         """
-        record_identifier = self._strict_record_id(record_identifier)
-        return self._check_which_results_exist(
-            results=[result_identifier], rid=record_identifier
-        )
+        Remove status flags
 
-    def _check_which_results_exist(
-        self, results: List[str], rid: str = None
-    ) -> List[str]:
+        :param str record_identifier: name of the record to remove flags for
+        :param Iterable[str] flag_names: Names of flags to remove, optional; if
+            unspecified, all schema-defined flag names will be used.
+        :return List[str]: Collection of names of flags removed
         """
-        Check which results have been reported
-
-        :param str rid: unique identifier of the record
+        r_id = self._strict_record_id(record_identifier)
+        if self.file is not None:
+            return self._clear_status_file(
+                record_identifier=r_id, flag_names=flag_names
+            )
+        else:
+            return self._clear_status_db(record_identifier=r_id)
+
+    def _clear_status_file(
+        self, record_identifier: str = None, flag_names: List[str] = None
+    ) -> List[Union[str, None]]:
+        flag_names = flag_names or list(self.status_schema.keys())
+        if isinstance(flag_names, str):
+            flag_names = [flag_names]
+        removed = []
+        for f in flag_names:
+            path_flag_file = self.get_status_flag_path(
+                status_identifier=f, record_identifier=record_identifier
+            )
+            try:
+                os.remove(path_flag_file)
+            except:
+                pass
+            else:
+                _LOGGER.info(f"Removed existing flag: {path_flag_file}")
+                removed.append(f)
+        return removed
+
+    def _clear_status_db(self, record_identifier: str = None) -> List[Union[str, None]]:
+        removed = self.get_status(record_identifier)
+        try:
+            self._remove_db(
+                record_identifier=record_identifier,
+                table_name=f"{self.namespace}_{STATUS}",
+            )
+        except Exception as e:
+            _LOGGER.error(
+                f"Could not remove the status from the database. Exception: {e}"
+            )
+            return []
+        else:
+            return [removed]
+
+    def validate_schema(self) -> None:
+        """
+        Check schema for any possible issues
+
+        :raises SchemaError: if any schema format issue is detected
+        """
+
+        def _recursively_replace_custom_types(s: dict) -> Dict:
+            """
+            Replace the custom types in pipestat schema with canonical types
+
+            :param dict s: schema to replace types in
+            :return dict: schema with types replaced
+            """
+            for k, v in s.items():
+                assert SCHEMA_TYPE_KEY in v, SchemaError(
+                    f"Result '{k}' is missing '{SCHEMA_TYPE_KEY}' key"
+                )
+                if v[SCHEMA_TYPE_KEY] == "object" and SCHEMA_PROP_KEY in s[k]:
+                    _recursively_replace_custom_types(s[k][SCHEMA_PROP_KEY])
+                if v[SCHEMA_TYPE_KEY] in CANONICAL_TYPES.keys():
+                    s.setdefault(k, {})
+                    s[k].setdefault(SCHEMA_PROP_KEY, {})
+                    s[k][SCHEMA_PROP_KEY].update(
+                        CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]][SCHEMA_PROP_KEY]
+                    )
+                    s[k].setdefault("required", [])
+                    s[k]["required"].extend(
+                        CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]]["required"]
+                    )
+                    s[k][SCHEMA_TYPE_KEY] = CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]][
+                        SCHEMA_TYPE_KEY
+                    ]
+            return s
+
+        schema = deepcopy(self.schema)
+        _LOGGER.debug(f"Validating input schema")
+        assert isinstance(schema, dict), SchemaError(
+            f"The schema has to be a {dict().__class__.__name__}"
+        )
+        for col_name in RESERVED_COLNAMES:
+            assert col_name not in schema.keys(), PipestatError(
+                f"'{col_name}' is an identifier reserved by pipestat"
+            )
+        self[RES_SCHEMAS_KEY] = {}
+        schema = _recursively_replace_custom_types(schema)
+        self[RES_SCHEMAS_KEY] = schema
+
+    def _init_results_file(self) -> bool:
+        """
+        Initialize YAML results file if it does not exist.
+        Read the data stored in the existing file into the memory otherwise.
+
+        :return bool: whether the file has been created
+        """
+        if not os.path.exists(self.file):
+            _LOGGER.info(f"Initializing results file '{self.file}'")
+            data = YacAttMap(entries={self.namespace: "{}"})
+            data.write(filepath=self.file)
+            data.make_readonly()
+            self[DATA_KEY] = data
+            return True
+        _LOGGER.debug(f"Reading data from '{self.file}'")
+        data = YacAttMap(filepath=self.file)
+        filtered = list(filter(lambda x: not x.startswith("_"), data.keys()))
+        if filtered and self.namespace not in filtered:
+            raise PipestatDatabaseError(
+                f"'{self.file}' is already used to report results for "
+                f"other namespace: {filtered[0]}"
+            )
+        self[DATA_KEY] = data
+        return False
+
+    def _init_db_table(self) -> bool:
+        """
+        Initialize a database table based on the provided schema,
+        if it does not exist. Read the data stored in the database into the
+        memory otherwise.
+
+        :return bool: whether the table has been created
+        """
+        if self.schema is None:
+            raise SchemaNotFoundError("initialize the database table")
+        if not self.is_db_connected():
+            self.establish_db_connection_orm()
+        # if self._check_table_exists(table_name=self.namespace):
+        #     _LOGGER.debug(f"Table '{self.namespace}' already exists in the database")
+        #     if not self[DB_ONLY_KEY]:
+        #         self._table_to_dict()
+        #     # return False
+        _LOGGER.info(f"Initializing '{self.namespace}' table in '{PKG_NAME}' database")
+        self._create_table_orm(table_name=self.namespace, schema=self.result_schemas)
+        return True
+
+    def _init_status_table(self):
+        status_table_name = f"{self.namespace}_{STATUS}"
+        if not self.is_db_connected():
+            self.establish_db_connection_orm()
+        # if not self._check_table_exists(table_name=status_table_name):
+        _LOGGER.debug(
+            f"Initializing '{status_table_name}' table in " f"'{PKG_NAME}' database"
+        )
+        self._create_table_orm(
+            table_name=status_table_name,
+            schema=get_status_table_schema(status_schema=self.status_schema),
+        )
+
+    def _get_attr(self, attr: str) -> Any:
+        """
+        Safely get the name of the selected attribute of this object
+
+        :param str attr: attr to select
+        :return:
+        """
+        return self[attr] if attr in self else None
+
+    def _check_table_exists(self, table_name: str) -> bool:
+        """
+        Check if the specified table exists
+
+        :param str table_name: table name to be checked
+        :return bool: whether the specified table exists
+        """
+        from sqlalchemy import inspect
+
+        with self.session as s:
+            return inspect(s.bind).has_table(table_name=table_name)
+
+    def _count_rows(self, table_name: str) -> int:
+        """
+        Count rows in a selected table
+
+        :param str table_name: table to count rows for
+        :return int: number of rows in the selected table
+        """
+        with self.session as s:
+            return s.query(self[DB_ORMS_KEY][table_name].id).count()
+
+    def _get_orm(self, table_name: str = None) -> Any:
+        """
+        Get an object relational mapper class
+
+        :param str table_name: table name to get a class for
+        :return Any: Object relational mapper class
+        """
+        if DB_ORMS_KEY not in self:
+            raise PipestatDatabaseError("Object relational mapper classes not defined")
+        tn = f"{table_name or self.namespace}"
+        if tn not in self[DB_ORMS_KEY]:
+            raise PipestatDatabaseError(
+                f"No object relational mapper class defined for table: {tn}"
+            )
+        if not isinstance(self[DB_ORMS_KEY][tn], DeclarativeMeta):
+            raise PipestatDatabaseError(
+                f"Object relational mapper class for table '{tn}' is invalid"
+            )
+        return self[DB_ORMS_KEY][tn]
+
+    def check_record_exists(
+        self, record_identifier: str, table_name: str = None
+    ) -> bool:
+        """
+        Check if the specified record exists in the table
+
+        :param str record_identifier: record to check for
+        :param str table_name: table name to check
+        :return bool: whether the record exists in the table
+        """
+        if self.file is None:
+            with self.session as s:
+                return (
+                    s.query(self._get_orm(table_name).id)
+                    .filter_by(record_identifier=record_identifier)
+                    .first()
+                    is not None
+                )
+        else:
+            if (
+                self.namespace in self.data
+                and record_identifier in self.data[table_name]
+            ):
+                return True
+            return False
+
+    def check_which_results_exist(
+        self, results: List[str], rid: str = None
+    ) -> List[str]:
+        """
+        Check which results have been reported
+
+        :param str rid: unique identifier of the record
         :param List[str] results: names of the results to check
         :return List[str]: whether the specified result has been reported for the
             indicated record in current namespace
         """
         rid = self._strict_record_id(rid)
-        existing = []
-        for r in results:
-            if not self[DB_ONLY_KEY]:
+        if self.file is None:
+            existing = self._check_which_results_exist_db(results=results, rid=rid)
+        else:
+            existing = []
+            for r in results:
                 if (
                     self.namespace in self.data
                     and rid in self.data[self.namespace]
                     and r in self.data[self.namespace][rid]
                 ):
                     existing.append(r)
-            else:
-                with self.db_cursor as cur:
-                    try:
-                        cur.execute(
-                            f"SELECT {r} FROM {self.namespace} WHERE {RECORD_ID}=%s",
-                            (rid,),
-                        )
-                    except Exception:
-                        continue
-                    else:
-                        res = cur.fetchone()
-                        if res is not None and res[0] is not None:
-                            existing.append(r)
         return existing
 
-    def check_record_exists(self, record_identifier: str = None) -> bool:
+    def _check_which_results_exist_db(
+        self, results: List[str], rid: str = None, table_name: str = None
+    ) -> List[str]:
         """
-        Check if the record exists
+        Check if the specified results exist in the table
+
+        :param str rid: record to check for
+        :param List[str] results: results identifiers to check for
+        :param str table_name: name of the table to search for results in
+        :return List[str]: results identifiers that exist
+        """
+        table_name = table_name or self.namespace
+        rid = self._strict_record_id(rid)
+        with self.session as s:
+            record = (
+                s.query(self._get_orm(table_name))
+                .filter_by(record_identifier=rid)
+                .first()
+            )
+        return [r for r in results if getattr(record, r, None) is not None]
+
+    def check_result_exists(
+        self,
+        result_identifier: str,
+        record_identifier: str = None,
+    ) -> bool:
+        """
+        Check if the result has been reported
 
         :param str record_identifier: unique identifier of the record
-        :return bool: whether the record exists
+        :param str result_identifier: name of the result to check
+        :return bool: whether the specified result has been reported for the
+            indicated record in current namespace
         """
         record_identifier = self._strict_record_id(record_identifier)
-        if self[DB_ONLY_KEY]:
-            with self.db_cursor as cur:
-                cur.execute(
-                    f"SELECT exists(SELECT 1 from {self.namespace} "
-                    f"WHERE {RECORD_ID}=%s)",
-                    (record_identifier,),
+        return (
+            len(
+                self.check_which_results_exist(
+                    results=[result_identifier],
+                    rid=record_identifier,
                 )
-                return cur.fetchone()
-        if (
-            self.namespace in self.data
-            and record_identifier in self.data[self.namespace]
-        ):
-            return True
-        return False
+            )
+            > 0
+        )
+
+    def retrieve(
+        self, record_identifier: str = None, result_identifier: str = None
+    ) -> Union[Any, Dict[str, Any]]:
+        """
+        Retrieve a result for a record.
+
+        If no result ID specified, results for the entire record will
+        be returned.
+
+        :param str record_identifier: unique identifier of the record
+        :param str result_identifier: name of the result to be retrieved
+        :return any | Dict[str, any]: a single result or a mapping with all the
+            results reported for the record
+        """
+        r_id = self._strict_record_id(record_identifier)
+        if self.file is None:
+            return self._retrieve_db(
+                result_identifier=result_identifier, record_identifier=r_id
+            )
+        else:
+            if r_id not in self.data[self.namespace]:
+                raise PipestatDatabaseError(f"Record '{r_id}' not found")
+            if result_identifier is None:
+                return self.data[self.namespace][r_id].to_dict()
+            if result_identifier not in self.data[self.namespace][r_id]:
+                raise PipestatDatabaseError(
+                    f"Result '{result_identifier}' not found for record '{r_id}'"
+                )
+            return self.data[self.namespace][r_id][result_identifier]
+
+    def _retrieve_db(
+        self,
+        result_identifier: str = None,
+        record_identifier: str = None,
+        table_name: str = None,
+    ) -> Dict[str, Any]:
+        """
+        Retrieve a result for a record.
+
+        If no result ID specified, results for the entire record will
+        be returned.
+
+        :param str record_identifier: unique identifier of the record
+        :param str result_identifier: name of the result to be retrieved
+        :param str table_name: name of the table to search for results in
+        :return Dict[str, any]: a single result or a mapping with all the results
+            reported for the record
+        """
+        table_name = table_name or self.namespace
+        record_identifier = self._strict_record_id(record_identifier)
+        if result_identifier is not None:
+            existing = self.check_which_results_exist(
+                results=[result_identifier],
+                rid=record_identifier,
+                table_name=table_name,
+            )
+            if not existing:
+                raise PipestatDatabaseError(
+                    f"Result '{result_identifier}' not found for record "
+                    f"'{record_identifier}'"
+                )
+
+        with self.session as s:
+            record = (
+                s.query(self._get_orm(table_name))
+                .filter_by(record_identifier=record_identifier)
+                .first()
+            )
+
+        if record is not None:
+            if result_identifier is not None:
+                return {result_identifier: getattr(record, result_identifier)}
+            return {
+                column: getattr(record, column)
+                for column in [c.name for c in record.__table__.columns]
+                if getattr(record, column, None) is not None
+            }
+        raise PipestatDatabaseError(f"Record '{record_identifier}' not found")
+
+    def assert_results_defined(self, results: List[str]) -> None:
+        """
+        Assert provided list of results is defined in the schema
+
+        :param List[str] results: list of results to
+            check for existence in the schema
+        :raises SchemaError: if any of the results is not defined in the schema
+        """
+        known_results = self.result_schemas.keys()
+        for r in results:
+            assert r in known_results, SchemaError(
+                f"'{r}' is not a known result. Results defined in the "
+                f"schema are: {list(known_results)}."
+            )
 
     def report(
         self,
@@ -873,12 +1051,12 @@ def report(
                 "There is no way to return the updated object ID while using "
                 "results file as the object backend"
             )
-        updated_ids = False
         if self.schema is None:
             raise SchemaNotFoundError("report results")
+        updated_ids = False
         result_identifiers = list(values.keys())
         self.assert_results_defined(results=result_identifiers)
-        existing = self._check_which_results_exist(
+        existing = self.check_which_results_exist(
             rid=record_identifier, results=result_identifiers
         )
         if existing:
@@ -903,12 +1081,12 @@ def report(
             self.data.make_readonly()
         else:
             try:
-                updated_ids = self._report_postgres(
+                updated_ids = self._report_db(
                     record_identifier=record_identifier, value=values
                 )
             except Exception as e:
                 _LOGGER.error(
-                    f"Could not insert the result into the database. " f"Exception: {e}"
+                    f"Could not insert the result into the database. Exception: {e}"
                 )
                 if not self[DB_ONLY_KEY]:
                     for r in result_identifiers:
@@ -922,6 +1100,41 @@ def report(
         )
         return True if not return_id else updated_ids
 
+    def _report_db(
+        self, values: Dict[str, Any], record_identifier: str, table_name: str = None
+    ) -> int:
+        """
+        Report a result to a database
+
+        :param Dict[str, Any] values: values to report
+        :param str record_identifier: record to report the result for
+        :param str table_name: name of the table to report the result in
+        :return int: updated/inserted row
+        """
+        record_identifier = self._strict_record_id(record_identifier)
+        ORMClass = self._get_orm(table_name)
+        values.update({RECORD_ID: record_identifier})
+        if not self.check_record_exists(
+            record_identifier=record_identifier, table_name=table_name
+        ):
+            new_record = ORMClass(**values)
+            with self.session as s:
+                s.add(new_record)
+                s.commit()
+                returned_id = new_record.id
+        else:
+            with self.session as s:
+                record_to_update = (
+                    s.query(ORMClass)
+                    .filter(getattr(ORMClass, RECORD_ID) == record_identifier)
+                    .first()
+                )
+                for result_id, result_value in values.items():
+                    setattr(record_to_update, result_id, result_value)
+                s.commit()
+                returned_id = record_to_update.id
+        return returned_id
+
     def _report_data_element(
         self, record_identifier: str, values: Dict[str, Any]
     ) -> None:
@@ -932,7 +1145,7 @@ def _report_data_element(
          hierarchical mapping structure if needed.
 
         :param str record_identifier: unique identifier of the record
-        :param Dict[str,Any] values: dict of results identifiers and values
+        :param Dict[str, Any] values: dict of results identifiers and values
             to be reported
         """
         self[DATA_KEY].setdefault(self.namespace, PXAM())
@@ -940,104 +1153,10 @@ def _report_data_element(
         for res_id, val in values.items():
             self[DATA_KEY][self.namespace][record_identifier][res_id] = val
 
-    def select(
-        self,
-        columns: Union[str, List[str]] = None,
-        condition: str = None,
-        condition_val: str = None,
-        offset: int = None,
-        limit: int = None,
-    ) -> List[psycopg2.extras.DictRow]:
-        """
-        Get all the contents from the selected table, possibly restricted by
-        the provided condition.
-
-        :param str | List[str] columns: columns to select
-        :param str condition: condition to restrict the results
-            with, will be appended to the end of the SELECT statement and
-            safely populated with 'condition_val',
-            for example: `"id=%s"`
-        :param list condition_val: values to fill the placeholder
-            in 'condition' with
-        :param int offset: number of records to be skipped
-        :param int limit: max number of records to be returned
-        :return List[psycopg2.extras.DictRow]: all table contents
-        """
-        if self.file:
-            raise NotImplementedError(
-                "Selection is not supported on objects backed by results files."
-                " Use 'retrieve' method instead."
-            )
-        condition, condition_val = preprocess_condition_pair(condition, condition_val)
-        if not columns:
-            columns = sql.SQL("*")
-        else:
-            columns = sql.SQL(",").join(
-                [sql.Identifier(x) for x in mk_list_of_str(columns)]
-            )
-        statement = sql.SQL("SELECT {} FROM {}").format(
-            columns, sql.Identifier(self.namespace)
-        )
-        if condition:
-            statement += sql.SQL(" WHERE ")
-            statement += condition
-        statement = paginate_query(statement, offset, limit)
-        with self.db_cursor as cur:
-            cur.execute(query=statement, vars=condition_val)
-            result = cur.fetchall()
-        return result
-
-    def retrieve(
-        self, record_identifier: str = None, result_identifier: str = None
-    ) -> Union[Any, Dict[str, Any]]:
-        """
-        Retrieve a result for a record.
-
-        If no result ID specified, results for the entire record will
-        be returned.
-
-        :param str record_identifier: unique identifier of the record
-        :param str result_identifier: name of the result to be retrieved
-        :return any | Dict[str, any]: a single result or a mapping with all the
-            results reported for the record
-        """
-        record_identifier = self._strict_record_id(record_identifier)
-        if self[DB_ONLY_KEY]:
-            if result_identifier is not None:
-                existing = self._check_which_results_exist(
-                    results=[result_identifier], rid=record_identifier
-                )
-                if not existing:
-                    raise PipestatDatabaseError(
-                        f"Result '{result_identifier}' not found for record "
-                        f"'{record_identifier}'"
-                    )
-            with self.db_cursor as cur:
-                query = sql.SQL(
-                    f"SELECT {result_identifier or '*'} "
-                    f"FROM {self.namespace} WHERE {RECORD_ID}=%s"
-                )
-                cur.execute(query, (record_identifier,))
-                result = cur.fetchall()
-            if len(result) > 0:
-                if result_identifier is None:
-                    return {k: v for k, v in dict(result[0]).items() if v is not None}
-                return dict(result[0])[result_identifier]
-            raise PipestatDatabaseError(f"Record '{record_identifier}' not found")
-        else:
-            if record_identifier not in self.data[self.namespace]:
-                raise PipestatDatabaseError(f"Record '{record_identifier}' not found")
-            if result_identifier is None:
-                return self.data[self.namespace][record_identifier].to_dict()
-            if result_identifier not in self.data[self.namespace][record_identifier]:
-                raise PipestatDatabaseError(
-                    f"Result '{result_identifier}' not found for record "
-                    f"'{record_identifier}'"
-                )
-            return self.data[self.namespace][record_identifier][result_identifier]
-
     def remove(
-        self, record_identifier: str = None, result_identifier: str = None
+        self,
+        record_identifier: str = None,
+        result_identifier: str = None,
     ) -> bool:
         """
         Remove a result.
@@ -1050,221 +1169,99 @@ def remove(
              if the record should be removed.
         :return bool: whether the result has been removed
         """
-        record_identifier = self._strict_record_id(record_identifier)
+        r_id = self._strict_record_id(record_identifier)
         rm_record = True if result_identifier is None else False
-        if not self.check_record_exists(record_identifier):
-            _LOGGER.error(f"Record '{record_identifier}' not found")
-            return False
-        if result_identifier and not self.check_result_exists(
-            result_identifier, record_identifier
+        if not self.check_record_exists(
+            record_identifier=r_id, table_name=self.namespace
         ):
-            _LOGGER.error(
-                f"'{result_identifier}' has not been reported for "
-                f"'{record_identifier}'"
-            )
+            _LOGGER.error(f"Record '{r_id}' not found")
+            return False
+        if result_identifier and not self.check_result_exists(result_identifier, r_id):
+            _LOGGER.error(f"'{result_identifier}' has not been reported for '{r_id}'")
             return False
         if self.file:
             self.data.make_writable()
         if not self[DB_ONLY_KEY]:
             if rm_record:
-                _LOGGER.info(f"Removing '{record_identifier}' record")
-                del self[DATA_KEY][self.namespace][record_identifier]
+                _LOGGER.info(f"Removing '{r_id}' record")
+                del self[DATA_KEY][self.namespace][r_id]
             else:
-                val_backup = self[DATA_KEY][self.namespace][record_identifier][
-                    result_identifier
-                ]
-                del self[DATA_KEY][self.namespace][record_identifier][result_identifier]
+                val_backup = self[DATA_KEY][self.namespace][r_id][result_identifier]
+                del self[DATA_KEY][self.namespace][r_id][result_identifier]
                 _LOGGER.info(
                     f"Removed result '{result_identifier}' for record "
-                    f"'{record_identifier}' from '{self.namespace}' namespace"
+                    f"'{r_id}' from '{self.namespace}' namespace"
                 )
-                if not self[DATA_KEY][self.namespace][record_identifier]:
+                if not self[DATA_KEY][self.namespace][r_id]:
                     _LOGGER.info(
-                        f"Last result removed for '{record_identifier}'. "
-                        f"Removing the record"
+                        f"Last result removed for '{r_id}'. " f"Removing the record"
                     )
-                    del self[DATA_KEY][self.namespace][record_identifier]
+                    del self[DATA_KEY][self.namespace][r_id]
                     rm_record = True
         if self.file:
             self.data.write()
             self.data.make_readonly()
         if self.file is None:
-            if rm_record:
-                try:
-                    with self.db_cursor as cur:
-                        cur.execute(
-                            f"DELETE FROM {self.namespace} WHERE "
-                            f"{RECORD_ID}='{record_identifier}'"
-                        )
-                except Exception as e:
-                    _LOGGER.error(
-                        f"Could not remove the result from the "
-                        f"database. Exception: {e}"
-                    )
-                    self[DATA_KEY][self.namespace].setdefault(record_identifier, PXAM())
-                    raise
-                return True
             try:
-                with self.db_cursor as cur:
-                    cur.execute(
-                        f"UPDATE {self.namespace} SET {result_identifier}=null "
-                        f"WHERE {RECORD_ID}='{record_identifier}'"
-                    )
+                self._remove_db(
+                    record_identifier=r_id,
+                    result_identifier=None if rm_record else result_identifier,
+                )
             except Exception as e:
                 _LOGGER.error(
-                    f"Could not remove the result from the database. " f"Exception: {e}"
+                    f"Could not remove the result from the database. Exception: {e}"
                 )
-                if not self[DB_ONLY_KEY]:
-                    self[DATA_KEY][self.namespace][record_identifier][
-                        result_identifier
-                    ] = val_backup
+                if not self[DB_ONLY_KEY] and not rm_record:
+                    self[DATA_KEY][self.namespace][r_id][result_identifier] = val_backup
                 raise
         return True
 
-    def validate_schema(self) -> None:
-        """
-        Check schema for any possible issues
-
-        :raises SchemaError: if any schema format issue is detected
-        """
-
-        def _recursively_replace_custom_types(s: dict) -> Dict:
-            """
-            Replace the custom types in pipestat schema with canonical types
-
-            :param dict s: schema to replace types in
-            :return dict: schema with types replaced
-            """
-            for k, v in s.items():
-                assert SCHEMA_TYPE_KEY in v, SchemaError(
-                    f"Result '{k}' is missing '{SCHEMA_TYPE_KEY}' key"
-                )
-                if v[SCHEMA_TYPE_KEY] == "object" and SCHEMA_PROP_KEY in s[k]:
-                    _recursively_replace_custom_types(s[k][SCHEMA_PROP_KEY])
-                if v[SCHEMA_TYPE_KEY] in CANONICAL_TYPES.keys():
-                    s.setdefault(k, {})
-                    s[k].setdefault(SCHEMA_PROP_KEY, {})
-                    s[k][SCHEMA_PROP_KEY].update(
-                        CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]][SCHEMA_PROP_KEY]
-                    )
-                    s[k].setdefault("required", [])
-                    s[k]["required"].extend(
-                        CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]]["required"]
-                    )
-                    s[k][SCHEMA_TYPE_KEY] = CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]][
-                        SCHEMA_TYPE_KEY
-                    ]
-            return s
-
-        schema = deepcopy(self.schema)
-        _LOGGER.debug(f"Validating input schema")
-        assert isinstance(schema, dict), SchemaError(
-            f"The schema has to be a {dict().__class__.__name__}"
-        )
-        for col_name in RESERVED_COLNAMES:
-            assert col_name not in schema.keys(), PipestatError(
-                f"'{col_name}' is an identifier reserved by pipestat"
-            )
-        self[RES_SCHEMAS_KEY] = {}
-        schema = _recursively_replace_custom_types(schema)
-        self[RES_SCHEMAS_KEY] = schema
-
-    def assert_results_defined(self, results: List[str]) -> None:
-        """
-        Assert provided list of results is defined in the schema
-
-        :param List[str] results: list of results to
-            check for existence in the schema
-        :raises SchemaError: if any of the results is not defined in the schema
+    def _remove_db(
+        self,
+        record_identifier: str = None,
+        result_identifier: str = None,
+        table_name: str = None,
+    ) -> bool:
         """
-        known_results = self.result_schemas.keys()
-        for r in results:
-            assert r in known_results, SchemaError(
-                f"'{r}' is not a known result. Results defined in the "
-                f"schema are: {list(known_results)}."
-            )
+        Remove a result.
 
-    def check_connection(self) -> bool:
-        """
-        Check whether a PostgreSQL connection has been established
+        If no result ID specified or last result is removed, the entire record
+        will be removed.
 
-        :return bool: whether the connection has been established
+        :param str record_identifier: unique identifier of the record
+        :param str result_identifier: name of the result to be removed or None
+             if the record should be removed.
+        :param str table_name: name of the table to report the result in
+        :return bool: whether the result has been removed
+        :raise PipestatDatabaseError: if either record or result specified are not found
         """
-        if self.file is not None:
-            raise PipestatDatabaseError(
-                f"The {self.__class__.__name__} object " f"is not backed by a database"
-            )
-        if DB_CONNECTION_KEY in self and isinstance(
-            self[DB_CONNECTION_KEY], psycopg2.extensions.connection
+        table_name = table_name or self.namespace
+        record_identifier = self._strict_record_id(record_identifier)
+        ORMClass = self._get_orm(table_name=table_name)
+        if self.check_record_exists(
+            record_identifier=record_identifier, table_name=table_name
         ):
-            return True
-        return False
-
-    def establish_postgres_connection(self, suppress: bool = False) -> bool:
-        """
-        Establish PostgreSQL connection using the config data
-
-        :param bool suppress: whether to suppress any connection errors
-        :return bool: whether the connection has been established successfully
-        """
-        if self.check_connection():
-            raise PipestatDatabaseError(
-                f"Connection is already established: "
-                f"{self[DB_CONNECTION_KEY].info.host}"
-            )
-        try:
-            self[DB_CONNECTION_KEY] = psycopg2.connect(
-                dbname=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_NAME_KEY],
-                user=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_USER_KEY],
-                password=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PASSWORD_KEY],
-                host=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY],
-                port=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PORT_KEY],
-            )
-        except psycopg2.Error as e:
-            _LOGGER.error(
-                f"Could not connect to: "
-                f"{self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY]}"
-            )
-            _LOGGER.info(f"Caught error: {e}")
-            if suppress:
-                return False
-            raise
+            with self.session as s:
+                record = (
+                    s.query(ORMClass)
+                    .filter(getattr(ORMClass, RECORD_ID) == record_identifier)
+                    .first()
+                )
+                if result_identifier is None:
+                    # delete row
+                    record.delete()
+                else:
+                    # set the value to None
+                    if not self.check_result_exists(
+                        record_identifier=record_identifier,
+                        result_identifier=result_identifier,
+                        table_name=table_name,
+                    ):
+                        raise PipestatDatabaseError(
+                            f"Result '{result_identifier}' not found for record "
+                            f"'{record_identifier}'"
+                        )
+                    setattr(record, result_identifier, None)
+                s.commit()
         else:
-            _LOGGER.debug(
-                f"Established connection with PostgreSQL: "
-                f"{self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY]}"
-            )
-            return True
-
-    def close_postgres_connection(self) -> None:
-        """
-        Close connection and remove client bound
-        """
-        if not self.check_connection():
-            raise PipestatDatabaseError(
-                f"The connection has not been established: "
-                f"{self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY]}"
-            )
-        self[DB_CONNECTION_KEY].close()
-        del self[DB_CONNECTION_KEY]
-        _LOGGER.debug(
-            f"Closed connection with PostgreSQL: "
-            f"{self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY]}"
-        )
-
-    def _strict_record_id(self, forced_value: str = None) -> str:
-        """
-        Get record identifier from the outer source or stored with this object
-
-        :param str forced_value: return this value
-        :return str: record identifier
-        """
-        if forced_value is not None:
-            return forced_value
-        if self.record_identifier is not None:
-            return self.record_identifier
-        raise PipestatError(
-            f"You must provide the record identifier you want to perform "
-            f"the action on. Either in the {self.__class__.__name__} "
-            f"constructor or as an argument to the method."
-        )
+            raise PipestatDatabaseError(f"Record '{record_identifier}' not found")
diff --git a/pipestat/pipestat_dirver.py b/pipestat/pipestat_dirver.py
new file mode 100644
index 00000000..ea9cdb42
--- /dev/null
+++ b/pipestat/pipestat_dirver.py
@@ -0,0 +1,1270 @@
+from contextlib import contextmanager
+from copy import deepcopy
+from logging import getLogger
+from typing import Any, Dict, List, Optional, Union
+
+import psycopg2
+from attmap import PathExAttMap as PXAM
+from jsonschema import validate
+from psycopg2.extensions import connection
+from psycopg2.extras import DictCursor, Json
+from ubiquerg import create_lock, remove_lock
+from yacman import YacAttMap
+
+from .const import *
+from .exceptions import *
+from .helpers import *
+
+_LOGGER = getLogger(PKG_NAME)
+
+
+class LoggingCursor(psycopg2.extras.DictCursor):
+    """
+    Logging db cursor
+    """
+
+    def execute(self, query, vars=None):
+        """
+        Execute a database operation (query or command) and issue a debug
+        and info level log messages
+
+        :param query:
+        :param vars:
+        :return:
+        """
+        _LOGGER.debug(f"Executing query: {self.mogrify(query, vars)}")
+        super(LoggingCursor, self).execute(query=query, vars=vars)
+        _LOGGER.debug(f"Executed query: {self.query}")
+
+
+class PipestatManagerDirver(dict):
+    """
+    Pipestat standardizes reporting of pipeline results and
+    pipeline status management. It formalizes a way for pipeline developers
+    and downstream tools developers to communicate -- results produced by a
+    pipeline can easily and reliably become an input for downstream analyses.
+    The object exposes API for interacting with the results and
+    pipeline status and can be backed by either a YAML-formatted file
+    or a PostgreSQL database.
+    """
+
+    def __init__(
+        self,
+        namespace: str = None,
+        record_identifier: str = None,
+        schema_path: str = None,
+        results_file_path: str = None,
+        database_only: bool = False,
+        config: Union[str, dict] = None,
+        status_schema_path: str = None,
+        flag_file_dir: str = None,
+    ):
+        """
+        Initialize the object
+
+        :param str namespace: namespace to report into. This will be the DB
+        table name if using DB as the object back-end
+        :param str record_identifier: record identifier to report for. This
+            creates a weak bound to the record, which can be overriden in
+            this object method calls
+        :param str schema_path: path to the output schema that formalizes
+            the results structure
+        :param str results_file_path: YAML file to report into, if file is
+            used as the object back-end
+        :param bool database_only: whether the reported data should not be
+            stored in the memory, but only in the database
+        :param str | dict config: path to the configuration file or a mapping
+            with the config file content
+        :param str status_schema_path: path to the status schema that formalizes
+            the status flags structure
+        """
+
+        def _check_cfg_key(cfg: dict, key: str) -> bool:
+            if key not in cfg:
+                _LOGGER.warning(f"Key '{key}' not found in config")
+                return False
+            return True
+
+        def _mk_abs_via_cfg(
+            path: Optional[str],
+            cfg_path: Optional[str],
+        ) -> Optional[str]:
+            if path is None:
+                return path
+            assert isinstance(path, str), TypeError("Path is expected to be a str")
+            if os.path.isabs(path):
+                return path
+            if cfg_path is None:
+                rel_to_cwd = os.path.join(os.getcwd(), path)
+                if os.path.exists(rel_to_cwd) or os.access(
+                    os.path.dirname(rel_to_cwd), os.W_OK
+                ):
+                    return rel_to_cwd
+                raise OSError(f"Could not make this path absolute: {path}")
+            joined = os.path.join(os.path.dirname(cfg_path), path)
+            if os.path.isabs(joined):
+                return joined
+            raise OSError(f"Could not make this path absolute: {path}")
+
+        def _select_value(
+            arg_name: str,
+            arg_value: Any,
+            cfg: dict,
+            strict: bool = True,
+            env_var: str = None,
+        ) -> Any:
+            if arg_value is not None:
+                return arg_value
+            if arg_name not in cfg or cfg[arg_name] is None:
+                if env_var is not None:
+                    arg = os.getenv(env_var, None)
+                    if arg is not None:
+                        _LOGGER.debug(f"Value '{arg}' sourced from '{env_var}' env var")
+                        return expandpath(arg)
+                if strict:
+                    raise PipestatError(
+                        f"Value for the required '{arg_name}' argument could not be"
+                        f" determined. Provide it in the config or pass to the "
+                        f"object constructor."
+                    )
+                return
+            return cfg[arg_name]
+
+        super(PipestatManagerDirver, self).__init__()
+        self[CONFIG_KEY] = YacAttMap()
+        # read config or config data
+        config = config or os.getenv(ENV_VARS["config"])
+        if config is not None:
+            if isinstance(config, str):
+                config = os.path.abspath(expandpath(config))
+                self[CONFIG_KEY] = YacAttMap(filepath=config)
+                self._config_path = config
+            elif isinstance(config, dict):
+                self[CONFIG_KEY] = YacAttMap(entries=config)
+                self._config_path = None
+            else:
+                raise TypeError(
+                    "database_config has to be either path to the "
+                    "file to read or a dict"
+                )
+            # validate config
+            cfg = self[CONFIG_KEY].to_dict(expand=True)
+            _, cfg_schema = read_yaml_data(CFG_SCHEMA, "config schema")
+            validate(cfg, cfg_schema)
+
+        self[NAME_KEY] = _select_value(
+            "namespace", namespace, self[CONFIG_KEY], env_var=ENV_VARS["namespace"]
+        )
+        self[RECORD_ID_KEY] = _select_value(
+            "record_identifier",
+            record_identifier,
+            self[CONFIG_KEY],
+            False,
+            ENV_VARS["record_identifier"],
+        )
+        self[DB_ONLY_KEY] = database_only
+        # read results schema
+        self._schema_path = _select_value(
+            "schema_path",
+            schema_path,
+            self[CONFIG_KEY],
+            False,
+            env_var=ENV_VARS["schema"],
+        )
+        if self._schema_path is not None:
+            _, self[SCHEMA_KEY] = read_yaml_data(
+                _mk_abs_via_cfg(self._schema_path, self.config_path), "schema"
+            )
+            self.validate_schema()
+            # determine the highlighted results
+            self[HIGHLIGHTED_KEY] = [
+                k
+                for k, v in self.schema.items()
+                if "highlight" in v and v["highlight"] is True
+            ]
+            if self[HIGHLIGHTED_KEY]:
+                assert isinstance(self[HIGHLIGHTED_KEY], list), TypeError(
+                    f"highlighted results specification "
+                    f"({self[HIGHLIGHTED_KEY]}) has to be a list"
+                )
+        # read status schema
+        status_schema_path = (
+            _mk_abs_via_cfg(
+                _select_value(
+                    "status_schema_path",
+                    status_schema_path,
+                    self[CONFIG_KEY],
+                    False,
+                    env_var=ENV_VARS["status_schema"],
+                ),
+                self.config_path,
+            )
+            or STATUS_SCHEMA
+        )
+        self[STATUS_SCHEMA_SOURCE_KEY], self[STATUS_SCHEMA_KEY] = read_yaml_data(
+            status_schema_path, "status schema"
+        )
+        # determine results file
+        results_file_path = _mk_abs_via_cfg(
+            _select_value(
+                "results_file_path",
+                results_file_path,
+                self[CONFIG_KEY],
+                False,
+                ENV_VARS["results_file"],
+            ),
+            self.config_path,
+        )
+        if results_file_path:
+            if self[DB_ONLY_KEY]:
+                raise ValueError(
+                    "Running in database only mode does not make "
+                    "sense with a YAML file as a backend."
+                )
+            self[FILE_KEY] = results_file_path
+            self._init_results_file()
+            flag_file_dir = _select_value(
+                "flag_file_dir", flag_file_dir, self[CONFIG_KEY], False
+            ) or os.path.dirname(self.file)
+            self[STATUS_FILE_DIR] = _mk_abs_via_cfg(flag_file_dir, self.config_path)
+        elif CFG_DATABASE_KEY in self[CONFIG_KEY]:
+            if not all(
+                [
+                    _check_cfg_key(self[CONFIG_KEY][CFG_DATABASE_KEY], key)
+                    for key in DB_CREDENTIALS
+                ]
+            ):
+                raise MissingConfigDataError(
+                    "Must specify all database login " "credentials or result_file_path"
+                )
+            self[DATA_KEY] = YacAttMap()
+            self._init_postgres_table()
+            self._init_status_table()
+        else:
+            raise MissingConfigDataError(
+                "Must specify either database login " "credentials or a YAML file path"
+            )
+
+    def __str__(self):
+        """
+        Generate string representation of the object
+
+        :return str: string representation of the object
+        """
+        res = f"{self.__class__.__name__} ({self.namespace})"
+        res += "\nBackend: {}".format(
+            f"file ({self.file})" if self.file else "PostgreSQL"
+        )
+        res += f"\nResults schema source: {self.schema_path}"
+        res += f"\nStatus schema source: {self.status_schema_source}"
+        res += f"\nRecords count: {self.record_count}"
+        if self.highlighted_results:
+            res += f"\nHighlighted results: {', '.join(self.highlighted_results)}"
+        return res
+
+    def _get_flag_file(
+        self, record_identifier: str = None
+    ) -> Union[str, List[str], None]:
+        """
+        Get path to the status flag file for the specified record
+
+        :param str record_identifier: unique record identifier
+        :return str | list[str] | None: path to the status flag file
+        """
+        from glob import glob
+
+        r_id = self._strict_record_id(record_identifier)
+        if self.file is None:
+            return
+        if self.file is not None:
+            regex = os.path.join(
+                self[STATUS_FILE_DIR], f"{self.namespace}_{r_id}_*.flag"
+            )
+            file_list = glob(regex)
+            if len(file_list) > 1:
+                _LOGGER.warning("Multiple flag files found")
+                return file_list
+            elif len(file_list) == 1:
+                return file_list[0]
+            else:
+                _LOGGER.debug("No flag files found")
+                return None
+
+    @property
+    def highlighted_results(self) -> List[str]:
+        """
+        Highlighted results
+
+        :return List[str]: a collection of highlighted results
+        """
+        return self._get_attr(HIGHLIGHTED_KEY) or []
+
+    @property
+    def record_count(self) -> int:
+        """
+        Number of records reported
+
+        :return int: number of records reported
+        """
+        return (
+            len(self.data[self.namespace])
+            if self.file
+            else self._count_rows(self.namespace)
+        )
+
+    @property
+    def namespace(self) -> str:
+        """
+        Namespace the object writes the results to
+
+        :return str: namespace the object writes the results to
+        """
+        return self._get_attr(NAME_KEY)
+
+    @property
+    def record_identifier(self) -> str:
+        """
+        Unique identifier of the record
+
+        :return str: unique identifier of the record
+        """
+        return self._get_attr(RECORD_ID_KEY)
+
+    @property
+    def schema(self) -> Dict:
+        """
+        Schema mapping
+
+        :return dict: schema that formalizes the results structure
+        """
+        return self._get_attr(SCHEMA_KEY)
+
+    @property
+    def status_schema(self) -> Dict:
+        """
+        Status schema mapping
+
+        :return dict: schema that formalizes the pipeline status structure
+        """
+        return self._get_attr(STATUS_SCHEMA_KEY)
+
+    @property
+    def status_schema_source(self) -> Dict:
+        """
+        Status schema source
+
+        :return dict: source of the schema that formalizes
+            the pipeline status structure
+        """
+        return self._get_attr(STATUS_SCHEMA_SOURCE_KEY)
+
+    @property
+    def schema_path(self) -> str:
+        """
+        Schema path
+
+        :return str: path to the provided schema
+        """
+        return self._schema_path
+
+    @property
+    def config_path(self) -> str:
+        """
+        Config path. None if the config was not provided or if provided
+        as a mapping of the config contents
+
+        :return str: path to the provided config
+        """
+        return getattr(self, "_config_path", None)
+
+    @property
+    def result_schemas(self) -> Dict:
+        """
+        Result schema mappings
+
+        :return dict: schemas that formalize the structure of each result
+            in a canonical jsonschema way
+        """
+        return self._get_attr(RES_SCHEMAS_KEY)
+
+    @property
+    def file(self) -> str:
+        """
+        File path that the object is reporting the results into
+
+        :return str: file path that the object is reporting the results into
+        """
+        return self._get_attr(FILE_KEY)
+
+    @property
+    def data(self) -> YacAttMap:
+        """
+        Data object
+
+        :return yacman.YacAttMap: the object that stores the reported data
+        """
+        return self._get_attr(DATA_KEY)
+
+    @property
+    @contextmanager
+    def db_cursor(self):
+        """
+        Establish connection and get a PostgreSQL database cursor,
+        commit and close the connection afterwards
+
+        :return LoggingCursor: Database cursor object
+        """
+        try:
+            if not self.check_connection():
+                self.establish_postgres_connection()
+            with self[DB_CONNECTION_KEY] as c, c.cursor(
+                cursor_factory=LoggingCursor
+            ) as cur:
+                yield cur
+        except Exception:
+            raise
+        finally:
+            self.close_postgres_connection()
+
+    def get_status(self, record_identifier: str = None) -> Optional[str]:
+        """
+        Get the current pipeline status
+
+        :return str: status identifier, like 'running'
+        """
+        r_id = self._strict_record_id(record_identifier)
+        if self.file is None:
+            with self.db_cursor as cur:
+                query = sql.SQL(
+                    f"SELECT {STATUS} "
+                    f"FROM {f'{self.namespace}_{STATUS}'} "
+                    f"WHERE {RECORD_ID}=%s"
+                )
+                cur.execute(query, (r_id,))
+                result = cur.fetchone()
+            return result[0] if result is not None else None
+        else:
+            flag_file = self._get_flag_file(record_identifier=r_id)
+            if flag_file is not None:
+                assert isinstance(flag_file, str), TypeError(
+                    "Flag file path is expected to be a str, were multiple flags found?"
+                )
+                with open(flag_file, "r") as f:
+                    status = f.read()
+                return status
+            _LOGGER.debug(
+                f"Could not determine status for '{r_id}' record. "
+                f"No flags found in: {self[STATUS_FILE_DIR]}"
+            )
+            return None
+
+    def _get_attr(self, attr: str) -> Any:
+        """
+        Safely get the name of the selected attribute of this object
+
+        :param str attr: attr to select
+        :return:
+        """
+        return self[attr] if attr in self else None
+
+    def _table_to_dict(self) -> None:
+        """
+        Create a dictionary from the database table data
+        """
+        with self.db_cursor as cur:
+            cur.execute(f"SELECT * FROM {self.namespace}")
+            data = cur.fetchall()
+        _LOGGER.debug(f"Reading data from database for '{self.namespace}' namespace")
+        for record in data:
+            record_id = record[RECORD_ID]
+            for res_id, val in record.items():
+                if val is not None:
+                    self._report_data_element(
+                        record_identifier=record_id, values={res_id: val}
+                    )
+
+    def _init_postgres_table(self) -> bool:
+        """
+        Initialize a PostgreSQL table based on the provided schema,
+        if it does not exist. Read the data stored in the database into the
+        memory otherwise.
+
+        :return bool: whether the table has been created
+        """
+        if self.schema is None:
+            raise SchemaNotFoundError("initialize the database table")
+        if self._check_table_exists(table_name=self.namespace):
+            _LOGGER.debug(f"Table '{self.namespace}' already exists in the database")
+            if not self[DB_ONLY_KEY]:
+                self._table_to_dict()
+            return False
+        _LOGGER.info(f"Initializing '{self.namespace}' table in '{PKG_NAME}' database")
+        columns = FIXED_COLUMNS + schema_to_columns(schema=self.schema)
+        self._create_table(table_name=self.namespace, columns=columns)
+        return True
+
+    # def _create_status_type(self):
+    #     with self.db_cursor as cur:
+    #         s = sql.SQL(f"SELECT exists (SELECT 1 FROM pg_type WHERE typname = '{STATUS}');")
+    #         cur.execute(s)
+    #         if cur.fetchone()[0]:
+    #             return
+    #     with self.db_cursor as cur:
+    #         status_strs = [f"'{st_id}'" for st_id in self.status_schema.keys()]
+    #         status_str = ", ".join(status_strs)
+    #         s = sql.SQL(f"CREATE TYPE {STATUS} as enum({status_str});")
+    #         cur.execute(s)
+
+    def _init_status_table(self):
+        status_table_name = f"{self.namespace}_{STATUS}"
+        # self._create_status_type()
+        if not self._check_table_exists(table_name=status_table_name):
+            _LOGGER.info(
+                f"Initializing '{status_table_name}' table in " f"'{PKG_NAME}' database"
+            )
+            self._create_table(status_table_name, STATUS_TABLE_COLUMNS)
+
+    def _create_table(self, table_name: str, columns: List[str]):
+        """
+        Create a table
+
+        :param str table_name: name of the table to create
+        :param str | List[str] columns: columns definition list,
+            for instance: ['name VARCHAR(50) NOT NULL']
+        """
+        columns = mk_list_of_str(columns)
+        with self.db_cursor as cur:
+            s = sql.SQL(f"CREATE TABLE {table_name} ({','.join(columns)})")
+            cur.execute(s)
+
+    def _init_results_file(self) -> bool:
+        """
+        Initialize YAML results file if it does not exist.
+        Read the data stored in the existing file into the memory otherwise.
+
+        :return bool: whether the file has been created
+        """
+        if not os.path.exists(self.file):
+            _LOGGER.info(f"Initializing results file '{self.file}'")
+            data = YacAttMap(entries={self.namespace: "{}"})
+            data.write(filepath=self.file)
+            data.make_readonly()
+            self[DATA_KEY] = data
+            return True
+        _LOGGER.debug(f"Reading data from '{self.file}'")
+        data = YacAttMap(filepath=self.file)
+        filtered = list(filter(lambda x: not x.startswith("_"), data.keys()))
+        if filtered and self.namespace not in filtered:
+            raise PipestatDatabaseError(
+                f"'{self.file}' is already used to report results for "
+                f"other namespace: {filtered[0]}"
+            )
+        self[DATA_KEY] = data
+        return False
+
+    def _check_table_exists(self, table_name: str) -> bool:
+        """
+        Check if the specified table exists
+
+        :param str table_name: table name to be checked
+        :return bool: whether the specified table exists
+        """
+        with self.db_cursor as cur:
+            cur.execute(
+                "SELECT EXISTS(SELECT * FROM information_schema.tables "
+                "WHERE table_name=%s)",
+                (table_name,),
+            )
+            return cur.fetchone()[0]
+
+    def _check_record(
+        self, condition_col: str, condition_val: str, table_name: str
+    ) -> bool:
+        """
+        Check if the record matching the condition is in the table
+
+        :param str condition_col: column to base the check on
+        :param str condition_val: value in the selected column
+        :param str table_name: name of the table ot check the record in
+        :return bool: whether any record matches the provided condition
+        """
+        with self.db_cursor as cur:
+            statement = (
+                f"SELECT EXISTS(SELECT 1 from {table_name} "
+                f"WHERE {condition_col}=%s)"
+            )
+            cur.execute(statement, (condition_val,))
+            return cur.fetchone()[0]
+
+    def _count_rows(self, table_name: str) -> int:
+        """
+        Count rows in a selected table
+
+        :param str table_name: table to count rows for
+        :return int: number of rows in the selected table
+        """
+        with self.db_cursor as cur:
+            statement = sql.SQL("SELECT COUNT(*) FROM {}").format(
+                sql.Identifier(table_name)
+            )
+            cur.execute(statement)
+            return cur.fetchall()[0][0]
+
+    def _report_postgres(
+        self, value: Dict[str, Any], record_identifier: str, table_name: str = None
+    ) -> int:
+        """
+        Check if record with this record identifier in table, create new record
+         if not (INSERT), update the record if yes (UPDATE).
+
+        Currently supports just one column at a time.
+
+        :param str record_identifier: unique identifier of the record, value to
+            in 'record_identifier' column to look for to determine if the record
+            already exists in the table
+        :param dict value: a mapping of pair of table column names and
+            respective values to be inserted to the database
+        :return int: id of the row just inserted
+        """
+        table_name = table_name or self.namespace
+        if not self._check_record(
+            condition_col=RECORD_ID,
+            condition_val=record_identifier,
+            table_name=table_name,
+        ):
+            with self.db_cursor as cur:
+                cur.execute(
+                    f"INSERT INTO {table_name} ({RECORD_ID}) VALUES (%s)",
+                    (record_identifier,),
+                )
+        # prep a list of SQL objects with column-named value placeholders
+        columns = sql.SQL(",").join(
+            [
+                sql.SQL("{}=%({})s").format(sql.Identifier(k), sql.SQL(k))
+                for k in list(value.keys())
+            ]
+        )
+        # construct the query template to execute
+        query = sql.SQL("UPDATE {n} SET {c} WHERE {id}=%({id})s RETURNING id").format(
+            n=sql.Identifier(table_name), c=columns, id=sql.SQL(RECORD_ID)
+        )
+        # preprocess the values, dict -> Json
+        values = {k: Json(v) if isinstance(v, dict) else v for k, v in value.items()}
+        # add record_identifier column, which is specified outside of values
+        values.update({RECORD_ID: record_identifier})
+        with self.db_cursor as cur:
+            cur.execute(query, values)
+            return cur.fetchone()[0]
+
+    def clear_status(
+        self, record_identifier: str = None, flag_names: List[str] = None
+    ) -> List[str]:
+        """
+        Remove status flags
+
+        :param str record_identifier: name of the record to remove flags for
+        :param Iterable[str] flag_names: Names of flags to remove, optional; if
+            unspecified, all schema-defined flag names will be used.
+        :return List[str]: Collection of names of flags removed
+        """
+        r_id = self._strict_record_id(record_identifier)
+        if self.file is not None:
+            flag_names = flag_names or list(self.status_schema.keys())
+            if isinstance(flag_names, str):
+                flag_names = [flag_names]
+            removed = []
+            for f in flag_names:
+                path_flag_file = self.get_status_flag_path(
+                    status_identifier=f, record_identifier=r_id
+                )
+                try:
+                    os.remove(path_flag_file)
+                except:
+                    pass
+                else:
+                    _LOGGER.info(f"Removed existing flag: {path_flag_file}")
+                    removed.append(f)
+            return removed
+        else:
+            removed = self.get_status(r_id)
+            status_table_name = f"{self.namespace}_{STATUS}"
+            with self.db_cursor as cur:
+                try:
+                    cur.execute(
+                        f"DELETE FROM {status_table_name} WHERE "
+                        f"{RECORD_ID}='{r_id}'"
+                    )
+                except Exception as e:
+                    _LOGGER.error(
+                        f"Could not remove the status from the "
+                        f"database. Exception: {e}"
+                    )
+                    return []
+                else:
+                    return [removed]
+
+    def get_status_flag_path(
+        self, status_identifier: str, record_identifier=None
+    ) -> str:
+        """
+        Get the path to the status file flag
+
+        :param str status_identifier: one of the defined status IDs in schema
+        :param str record_identifier: unique record ID, optional if
+            specified in the object constructor
+        :return str: absolute path to the flag file or None if object is
+            backed by a DB
+        """
+        if self.file is None:
+            # DB as the backend
+            return
+        r_id = self._strict_record_id(record_identifier)
+        return os.path.join(
+            self[STATUS_FILE_DIR], f"{self.namespace}_{r_id}_{status_identifier}.flag"
+        )
+
+    def set_status(self, status_identifier: str, record_identifier: str = None) -> None:
+        """
+        Set pipeline run status.
+
+        The status identifier needs to match one of identifiers specified in
+        the status schema. A basic, ready to use, status schema is shipped with
+         this package.
+
+        :param str status_identifier: status to set, one of statuses defined
+            in the status schema
+        :param str record_identifier: record identifier to set the
+            pipeline status for
+        """
+        r_id = self._strict_record_id(record_identifier)
+        known_status_identifiers = self.status_schema.keys()
+        if status_identifier not in known_status_identifiers:
+            raise PipestatError(
+                f"'{status_identifier}' is not a defined status identifier. "
+                f"These are allowed: {known_status_identifiers}"
+            )
+        prev_status = self.get_status(r_id)
+        if self.file is not None:
+            if prev_status:
+                prev_flag_path = self.get_status_flag_path(prev_status, r_id)
+                os.remove(prev_flag_path)
+            flag_path = self.get_status_flag_path(status_identifier, r_id)
+            create_lock(flag_path)
+            with open(flag_path, "w") as f:
+                f.write(status_identifier)
+            remove_lock(flag_path)
+        else:
+            try:
+                self._report_postgres(
+                    value={STATUS: status_identifier},
+                    record_identifier=r_id,
+                    table_name=f"{self.namespace}_{STATUS}",
+                )
+            except Exception as e:
+                _LOGGER.error(
+                    f"Could not insert into the status table. " f"Exception: {e}"
+                )
+                raise
+        if prev_status:
+            _LOGGER.debug(
+                f"Changed status from '{prev_status}' to '{status_identifier}'"
+            )
+
+    def check_result_exists(self, result_identifier, record_identifier=None):
+        """
+        Check if the result has been reported
+
+        :param str record_identifier: unique identifier of the record
+        :param str result_identifier: name of the result to check
+        :return bool: whether the specified result has been reported for the
+            indicated record in current namespace
+        """
+        record_identifier = self._strict_record_id(record_identifier)
+        return self._check_which_results_exist(
+            results=[result_identifier], rid=record_identifier
+        )
+
+    def _check_which_results_exist(
+        self, results: List[str], rid: str = None
+    ) -> List[str]:
+        """
+        Check which results have been reported
+
+        :param str rid: unique identifier of the record
+        :param List[str] results: names of the results to check
+        :return List[str]: whether the specified result has been reported for the
+            indicated record in current namespace
+        """
+        rid = self._strict_record_id(rid)
+        existing = []
+        for r in results:
+            if not self[DB_ONLY_KEY]:
+                if (
+                    self.namespace in self.data
+                    and rid in self.data[self.namespace]
+                    and r in self.data[self.namespace][rid]
+                ):
+                    existing.append(r)
+            else:
+                with self.db_cursor as cur:
+                    try:
+                        cur.execute(
+                            f"SELECT {r} FROM {self.namespace} WHERE {RECORD_ID}=%s",
+                            (rid,),
+                        )
+                    except Exception:
+                        continue
+                    else:
+                        res = cur.fetchone()
+                        if res is not None and res[0] is not None:
+                            existing.append(r)
+        return existing
+
+    def check_record_exists(self, record_identifier: str = None) -> bool:
+        """
+        Check if the record exists
+
+        :param str record_identifier: unique identifier of the record
+        :return bool: whether the record exists
+        """
+        record_identifier = self._strict_record_id(record_identifier)
+        if self[DB_ONLY_KEY]:
+            with self.db_cursor as cur:
+                cur.execute(
+                    f"SELECT exists(SELECT 1 from {self.namespace} "
+                    f"WHERE {RECORD_ID}=%s)",
+                    (record_identifier,),
+                )
+                return cur.fetchone()
+        if (
+            self.namespace in self.data
+            and record_identifier in self.data[self.namespace]
+        ):
+            return True
+        return False
+
+    def report(
+        self,
+        values: Dict[str, Any],
+        record_identifier: str = None,
+        force_overwrite: bool = False,
+        strict_type: bool = True,
+        return_id: bool = False,
+    ) -> Union[bool, int]:
+        """
+        Report a result.
+
+        :param Dict[str, any] values: dictionary of result-value pairs
+        :param str record_identifier: unique identifier of the record, value
+            in 'record_identifier' column to look for to determine if the record
+            already exists
+        :param bool force_overwrite: whether to overwrite the existing record
+        :param bool strict_type: whether the type of the reported values should
+            remain as is. Pipestat would attempt to convert to the
+            schema-defined one otherwise
+        :param bool return_id: PostgreSQL IDs of the records that have been
+            updated. Not available with results file as backend
+        :return bool | int: whether the result has been reported or the ID of
+            the updated record in the table, if requested
+        """
+        record_identifier = self._strict_record_id(record_identifier)
+        if return_id and self.file is not None:
+            raise NotImplementedError(
+                "There is no way to return the updated object ID while using "
+                "results file as the object backend"
+            )
+        if self.schema is None:
+            raise SchemaNotFoundError("report results")
+        updated_ids = False
+        result_identifiers = list(values.keys())
+        self.assert_results_defined(results=result_identifiers)
+        existing = self._check_which_results_exist(
+            rid=record_identifier, results=result_identifiers
+        )
+        if existing:
+            _LOGGER.warning(
+                f"These results exist for '{record_identifier}': {existing}"
+            )
+            if not force_overwrite:
+                return False
+            _LOGGER.info(f"Overwriting existing results: {existing}")
+        for r in result_identifiers:
+            validate_type(
+                value=values[r], schema=self.result_schemas[r], strict_type=strict_type
+            )
+        if self.file is not None:
+            self.data.make_writable()
+        if not self[DB_ONLY_KEY]:
+            self._report_data_element(
+                record_identifier=record_identifier, values=values
+            )
+        if self.file is not None:
+            self.data.write()
+            self.data.make_readonly()
+        else:
+            try:
+                updated_ids = self._report_postgres(
+                    record_identifier=record_identifier, value=values
+                )
+            except Exception as e:
+                _LOGGER.error(
+                    f"Could not insert the result into the database. " f"Exception: {e}"
+                )
+                if not self[DB_ONLY_KEY]:
+                    for r in result_identifiers:
+                        del self[DATA_KEY][self.namespace][record_identifier][r]
+                raise
+        nl = "\n"
+        rep_strs = [f"{k}: {v}" for k, v in values.items()]
+        _LOGGER.info(
+            f"Reported records for '{record_identifier}' in '{self.namespace}' "
+            f"namespace:{nl} - {(nl + ' - ').join(rep_strs)}"
+        )
+        return True if not return_id else updated_ids
+
+    def _report_data_element(
+        self, record_identifier: str, values: Dict[str, Any]
+    ) -> None:
+        """
+        Update the value of a result in a current namespace.
+
+        This method overwrites any existing data and creates the required
+         hierarchical mapping structure if needed.
+
+        :param str record_identifier: unique identifier of the record
+        :param Dict[str, Any] values: dict of results identifiers and values
+            to be reported
+        """
+        self[DATA_KEY].setdefault(self.namespace, PXAM())
+        self[DATA_KEY][self.namespace].setdefault(record_identifier, PXAM())
+        for res_id, val in values.items():
+            self[DATA_KEY][self.namespace][record_identifier][res_id] = val
+
+    def select(
+        self,
+        columns: Union[str, List[str]] = None,
+        condition: str = None,
+        condition_val: str = None,
+        offset: int = None,
+        limit: int = None,
+    ) -> List[psycopg2.extras.DictRow]:
+        """
+        Get all the contents from the selected table, possibly restricted by
+        the provided condition.
+
+        :param str | List[str] columns: columns to select
+        :param str condition: condition to restrict the results
+            with, will be appended to the end of the SELECT statement and
+            safely populated with 'condition_val',
+            for example: `"id=%s"`
+        :param list condition_val: values to fill the placeholder
+            in 'condition' with
+        :param int offset: number of records to be skipped
+        :param int limit: max number of records to be returned
+        :return List[psycopg2.extras.DictRow]: all table contents
+        """
+        if self.file:
+            raise NotImplementedError(
+                "Selection is not supported on objects backed by results files."
+                " Use 'retrieve' method instead."
+            )
+        condition, condition_val = preprocess_condition_pair(condition, condition_val)
+        if not columns:
+            columns = sql.SQL("*")
+        else:
+            columns = sql.SQL(",").join(
+                [sql.Identifier(x) for x in mk_list_of_str(columns)]
+            )
+        statement = sql.SQL("SELECT {} FROM {}").format(
+            columns, sql.Identifier(self.namespace)
+        )
+        if condition:
+            statement += sql.SQL(" WHERE ")
+            statement += condition
+        statement = paginate_query(statement, offset, limit)
+        with self.db_cursor as cur:
+            cur.execute(query=statement, vars=condition_val)
+            result = cur.fetchall()
+        return result
+
+    def retrieve(
+        self, record_identifier: str = None, result_identifier: str = None
+    ) -> Union[Any, Dict[str, Any]]:
+        """
+        Retrieve a result for a record.
+
+        If no result ID specified, results for the entire record will
+        be returned.
+
+        :param str record_identifier: unique identifier of the record
+        :param str result_identifier: name of the result to be retrieved
+        :return any | Dict[str, any]: a single result or a mapping with all the
+            results reported for the record
+        """
+        record_identifier = self._strict_record_id(record_identifier)
+        if self[DB_ONLY_KEY]:
+            if result_identifier is not None:
+                existing = self._check_which_results_exist(
+                    results=[result_identifier], rid=record_identifier
+                )
+                if not existing:
+                    raise PipestatDatabaseError(
+                        f"Result '{result_identifier}' not found for record "
+                        f"'{record_identifier}'"
+                    )
+            with self.db_cursor as cur:
+                query = sql.SQL(
+                    f"SELECT {result_identifier or '*'} "
+                    f"FROM {self.namespace} WHERE {RECORD_ID}=%s"
+                )
+                cur.execute(query, (record_identifier,))
+                result = cur.fetchall()
+            if len(result) > 0:
+                if result_identifier is None:
+                    return {k: v for k, v in dict(result[0]).items() if v is not None}
+                return dict(result[0])[result_identifier]
+            raise PipestatDatabaseError(f"Record '{record_identifier}' not found")
+        else:
+            if record_identifier not in self.data[self.namespace]:
+                raise PipestatDatabaseError(f"Record '{record_identifier}' not found")
+            if result_identifier is None:
+                return self.data[self.namespace][record_identifier].to_dict()
+            if result_identifier not in self.data[self.namespace][record_identifier]:
+                raise PipestatDatabaseError(
+                    f"Result '{result_identifier}' not found for record "
+                    f"'{record_identifier}'"
+                )
+            return self.data[self.namespace][record_identifier][result_identifier]
+
+    def remove(
+        self, record_identifier: str = None, result_identifier: str = None
+    ) -> bool:
+        """
+        Remove a result.
+
+        If no result ID specified or last result is removed, the entire record
+        will be removed.
+
+        :param str record_identifier: unique identifier of the record
+        :param str result_identifier: name of the result to be removed or None
+             if the record should be removed.
+        :return bool: whether the result has been removed
+        """
+        record_identifier = self._strict_record_id(record_identifier)
+        rm_record = True if result_identifier is None else False
+        if not self.check_record_exists(record_identifier):
+            _LOGGER.error(f"Record '{record_identifier}' not found")
+            return False
+        if result_identifier and not self.check_result_exists(
+            result_identifier, record_identifier
+        ):
+            _LOGGER.error(
+                f"'{result_identifier}' has not been reported for "
+                f"'{record_identifier}'"
+            )
+            return False
+        if self.file:
+            self.data.make_writable()
+        if not self[DB_ONLY_KEY]:
+            if rm_record:
+                _LOGGER.info(f"Removing '{record_identifier}' record")
+                del self[DATA_KEY][self.namespace][record_identifier]
+            else:
+                val_backup = self[DATA_KEY][self.namespace][record_identifier][
+                    result_identifier
+                ]
+                del self[DATA_KEY][self.namespace][record_identifier][result_identifier]
+                _LOGGER.info(
+                    f"Removed result '{result_identifier}' for record "
+                    f"'{record_identifier}' from '{self.namespace}' namespace"
+                )
+                if not self[DATA_KEY][self.namespace][record_identifier]:
+                    _LOGGER.info(
+                        f"Last result removed for '{record_identifier}'. "
+                        f"Removing the record"
+                    )
+                    del self[DATA_KEY][self.namespace][record_identifier]
+                    rm_record = True
+        if self.file:
+            self.data.write()
+            self.data.make_readonly()
+        if self.file is None:
+            if rm_record:
+                try:
+                    with self.db_cursor as cur:
+                        cur.execute(
+                            f"DELETE FROM {self.namespace} WHERE "
+                            f"{RECORD_ID}='{record_identifier}'"
+                        )
+                except Exception as e:
+                    _LOGGER.error(
+                        f"Could not remove the result from the "
+                        f"database. Exception: {e}"
+                    )
+                    self[DATA_KEY][self.namespace].setdefault(record_identifier, PXAM())
+                    raise
+                return True
+            try:
+                with self.db_cursor as cur:
+                    cur.execute(
+                        f"UPDATE {self.namespace} SET {result_identifier}=null "
+                        f"WHERE {RECORD_ID}='{record_identifier}'"
+                    )
+            except Exception as e:
+                _LOGGER.error(
+                    f"Could not remove the result from the database. " f"Exception: {e}"
+                )
+                if not self[DB_ONLY_KEY]:
+                    self[DATA_KEY][self.namespace][record_identifier][
+                        result_identifier
+                    ] = val_backup
+                raise
+        return True
+
+    def validate_schema(self) -> None:
+        """
+        Check schema for any possible issues
+
+        :raises SchemaError: if any schema format issue is detected
+        """
+
+        def _recursively_replace_custom_types(s: dict) -> Dict:
+            """
+            Replace the custom types in pipestat schema with canonical types
+
+            :param dict s: schema to replace types in
+            :return dict: schema with types replaced
+            """
+            for k, v in s.items():
+                assert SCHEMA_TYPE_KEY in v, SchemaError(
+                    f"Result '{k}' is missing '{SCHEMA_TYPE_KEY}' key"
+                )
+                if v[SCHEMA_TYPE_KEY] == "object" and SCHEMA_PROP_KEY in s[k]:
+                    _recursively_replace_custom_types(s[k][SCHEMA_PROP_KEY])
+                if v[SCHEMA_TYPE_KEY] in CANONICAL_TYPES.keys():
+                    s.setdefault(k, {})
+                    s[k].setdefault(SCHEMA_PROP_KEY, {})
+                    s[k][SCHEMA_PROP_KEY].update(
+                        CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]][SCHEMA_PROP_KEY]
+                    )
+                    s[k].setdefault("required", [])
+                    s[k]["required"].extend(
+                        CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]]["required"]
+                    )
+                    s[k][SCHEMA_TYPE_KEY] = CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]][
+                        SCHEMA_TYPE_KEY
+                    ]
+            return s
+
+        schema = deepcopy(self.schema)
+        _LOGGER.debug(f"Validating input schema")
+        assert isinstance(schema, dict), SchemaError(
+            f"The schema has to be a {dict().__class__.__name__}"
+        )
+        for col_name in RESERVED_COLNAMES:
+            assert col_name not in schema.keys(), PipestatError(
+                f"'{col_name}' is an identifier reserved by pipestat"
+            )
+        self[RES_SCHEMAS_KEY] = {}
+        schema = _recursively_replace_custom_types(schema)
+        self[RES_SCHEMAS_KEY] = schema
+
+    def assert_results_defined(self, results: List[str]) -> None:
+        """
+        Assert provided list of results is defined in the schema
+
+        :param List[str] results: list of results to
+            check for existence in the schema
+        :raises SchemaError: if any of the results is not defined in the schema
+        """
+        known_results = self.result_schemas.keys()
+        for r in results:
+            assert r in known_results, SchemaError(
+                f"'{r}' is not a known result. Results defined in the "
+                f"schema are: {list(known_results)}."
+            )
+
+    def check_connection(self) -> bool:
+        """
+        Check whether a PostgreSQL connection has been established
+
+        :return bool: whether the connection has been established
+        """
+        if self.file is not None:
+            raise PipestatDatabaseError(
+                f"The {self.__class__.__name__} object " f"is not backed by a database"
+            )
+        if DB_CONNECTION_KEY in self and isinstance(
+            self[DB_CONNECTION_KEY], psycopg2.extensions.connection
+        ):
+            return True
+        return False
+
+    def establish_postgres_connection(self, suppress: bool = False) -> bool:
+        """
+        Establish PostgreSQL connection using the config data
+
+        :param bool suppress: whether to suppress any connection errors
+        :return bool: whether the connection has been established successfully
+        """
+        if self.check_connection():
+            raise PipestatDatabaseError(
+                f"Connection is already established: "
+                f"{self[DB_CONNECTION_KEY].info.host}"
+            )
+        try:
+            self[DB_CONNECTION_KEY] = psycopg2.connect(
+                dbname=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_NAME_KEY],
+                user=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_USER_KEY],
+                password=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PASSWORD_KEY],
+                host=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY],
+                port=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PORT_KEY],
+            )
+        except psycopg2.Error as e:
+            _LOGGER.error(
+                f"Could not connect to: "
+                f"{self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY]}"
+            )
+            _LOGGER.info(f"Caught error: {e}")
+            if suppress:
+                return False
+            raise
+        else:
+            _LOGGER.debug(
+                f"Established connection with PostgreSQL: "
+                f"{self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY]}"
+            )
+            return True
+
+    def close_postgres_connection(self) -> None:
+        """
+        Close connection and remove client bound
+        """
+        if not self.check_connection():
+            raise PipestatDatabaseError(
+                f"The connection has not been established: "
+                f"{self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY]}"
+            )
+        self[DB_CONNECTION_KEY].close()
+        del self[DB_CONNECTION_KEY]
+        _LOGGER.debug(
+            f"Closed connection with PostgreSQL: "
+            f"{self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY]}"
+        )
+
+    def _strict_record_id(self, forced_value: str = None) -> str:
+        """
+        Get record identifier from the outer source or stored with this object
+
+        :param str forced_value: return this value
+        :return str: record identifier
+        """
+        if forced_value is not None:
+            return forced_value
+        if self.record_identifier is not None:
+            return self.record_identifier
+        raise PipestatError(
+            f"You must provide the record identifier you want to perform "
+            f"the action on. Either in the {self.__class__.__name__} "
+            f"constructor or as an argument to the method."
+        )
diff --git a/pipestat/pipestat_orm.py b/pipestat/pipestat_orm.py
deleted file mode 100644
index 01f5dfb1..00000000
--- a/pipestat/pipestat_orm.py
+++ /dev/null
@@ -1,936 +0,0 @@
-from contextlib import contextmanager
-from copy import deepcopy
-from logging import getLogger
-from typing import Any, Dict, List, Optional, Union
-from urllib.parse import quote_plus
-
-from attmap import PathExAttMap as PXAM
-from jsonschema import validate
-from sqlalchemy import Column, Float, ForeignKey, Integer, String, Table, create_engine
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import DeclarativeMeta, relationship, sessionmaker
-from ubiquerg import create_lock, remove_lock
-from yacman import YacAttMap
-
-from .const import *
-from .exceptions import *
-from .helpers import *
-
-_LOGGER = getLogger(PKG_NAME)
-
-
-class PipestatManagerORM(dict):
-    """
-    Pipestat standardizes reporting of pipeline results and
-    pipeline status management. It formalizes a way for pipeline developers
-    and downstream tools developers to communicate -- results produced by a
-    pipeline can easily and reliably become an input for downstream analyses.
-    The object exposes API for interacting with the results and
-    pipeline status and can be backed by either a YAML-formatted file
-    or a database.
-    """
-
-    def __init__(
-        self,
-        namespace: str = None,
-        record_identifier: str = None,
-        schema_path: str = None,
-        results_file_path: str = None,
-        database_only: bool = False,
-        config: Union[str, dict] = None,
-        status_schema_path: str = None,
-        flag_file_dir: str = None,
-    ):
-        """
-        Initialize the object
-
-        :param str namespace: namespace to report into. This will be the DB
-        table name if using DB as the object back-end
-        :param str record_identifier: record identifier to report for. This
-            creates a weak bound to the record, which can be overriden in
-            this object method calls
-        :param str schema_path: path to the output schema that formalizes
-            the results structure
-        :param str results_file_path: YAML file to report into, if file is
-            used as the object back-end
-        :param bool database_only: whether the reported data should not be
-            stored in the memory, but only in the database
-        :param str | dict config: path to the configuration file or a mapping
-            with the config file content
-        :param str status_schema_path: path to the status schema that formalizes
-            the status flags structure
-        """
-
-        def _check_cfg_key(cfg: dict, key: str) -> bool:
-            if key not in cfg:
-                _LOGGER.warning(f"Key '{key}' not found in config")
-                return False
-            return True
-
-        def _mk_abs_via_cfg(
-            path: Optional[str],
-            cfg_path: Optional[str],
-        ) -> Optional[str]:
-            if path is None:
-                return path
-            assert isinstance(path, str), TypeError("Path is expected to be a str")
-            if os.path.isabs(path):
-                return path
-            if cfg_path is None:
-                rel_to_cwd = os.path.join(os.getcwd(), path)
-                if os.path.exists(rel_to_cwd) or os.access(
-                    os.path.dirname(rel_to_cwd), os.W_OK
-                ):
-                    return rel_to_cwd
-                raise OSError(f"Could not make this path absolute: {path}")
-            joined = os.path.join(os.path.dirname(cfg_path), path)
-            if os.path.isabs(joined):
-                return joined
-            raise OSError(f"Could not make this path absolute: {path}")
-
-        def _select_value(
-            arg_name: str,
-            arg_value: Any,
-            cfg: dict,
-            strict: bool = True,
-            env_var: str = None,
-        ) -> Any:
-            if arg_value is not None:
-                return arg_value
-            if arg_name not in cfg or cfg[arg_name] is None:
-                if env_var is not None:
-                    arg = os.getenv(env_var, None)
-                    if arg is not None:
-                        _LOGGER.debug(f"Value '{arg}' sourced from '{env_var}' env var")
-                        return expandpath(arg)
-                if strict:
-                    raise PipestatError(
-                        f"Value for the required '{arg_name}' argument could not be"
-                        f" determined. Provide it in the config or pass to the "
-                        f"object constructor."
-                    )
-                return
-            return cfg[arg_name]
-
-        super(PipestatManagerORM, self).__init__()
-        self[CONFIG_KEY] = YacAttMap()
-        # read config or config data
-        config = config or os.getenv(ENV_VARS["config"])
-        if config is not None:
-            if isinstance(config, str):
-                config = os.path.abspath(expandpath(config))
-                self[CONFIG_KEY] = YacAttMap(filepath=config)
-                self._config_path = config
-            elif isinstance(config, dict):
-                self[CONFIG_KEY] = YacAttMap(entries=config)
-                self._config_path = None
-            else:
-                raise TypeError(
-                    "database_config has to be either path to the "
-                    "file to read or a dict"
-                )
-            # validate config
-            cfg = self[CONFIG_KEY].to_dict(expand=True)
-            _, cfg_schema = read_yaml_data(CFG_SCHEMA, "config schema")
-            validate(cfg, cfg_schema)
-
-        self[NAME_KEY] = _select_value(
-            "namespace", namespace, self[CONFIG_KEY], env_var=ENV_VARS["namespace"]
-        )
-        self[RECORD_ID_KEY] = _select_value(
-            "record_identifier",
-            record_identifier,
-            self[CONFIG_KEY],
-            False,
-            ENV_VARS["record_identifier"],
-        )
-        self[DB_ONLY_KEY] = database_only
-        # read results schema
-        self._schema_path = _select_value(
-            "schema_path",
-            schema_path,
-            self[CONFIG_KEY],
-            False,
-            env_var=ENV_VARS["schema"],
-        )
-        if self._schema_path is not None:
-            _, self[SCHEMA_KEY] = read_yaml_data(
-                _mk_abs_via_cfg(self._schema_path, self.config_path), "schema"
-            )
-            self.validate_schema()
-            # determine the highlighted results
-            self[HIGHLIGHTED_KEY] = [
-                k
-                for k, v in self.schema.items()
-                if "highlight" in v and v["highlight"] is True
-            ]
-            if self[HIGHLIGHTED_KEY]:
-                assert isinstance(self[HIGHLIGHTED_KEY], list), TypeError(
-                    f"highlighted results specification "
-                    f"({self[HIGHLIGHTED_KEY]}) has to be a list"
-                )
-        # read status schema
-        status_schema_path = (
-            _mk_abs_via_cfg(
-                _select_value(
-                    "status_schema_path",
-                    status_schema_path,
-                    self[CONFIG_KEY],
-                    False,
-                    env_var=ENV_VARS["status_schema"],
-                ),
-                self.config_path,
-            )
-            or STATUS_SCHEMA
-        )
-        self[STATUS_SCHEMA_SOURCE_KEY], self[STATUS_SCHEMA_KEY] = read_yaml_data(
-            status_schema_path, "status schema"
-        )
-        # determine results file
-        results_file_path = _mk_abs_via_cfg(
-            _select_value(
-                "results_file_path",
-                results_file_path,
-                self[CONFIG_KEY],
-                False,
-                ENV_VARS["results_file"],
-            ),
-            self.config_path,
-        )
-        if results_file_path:
-            if self[DB_ONLY_KEY]:
-                raise ValueError(
-                    "Running in database only mode does not make "
-                    "sense with a YAML file as a backend."
-                )
-            self[FILE_KEY] = results_file_path
-            self._init_results_file()
-            flag_file_dir = _select_value(
-                "flag_file_dir", flag_file_dir, self[CONFIG_KEY], False
-            ) or os.path.dirname(self.file)
-            self[STATUS_FILE_DIR] = _mk_abs_via_cfg(flag_file_dir, self.config_path)
-        elif CFG_DATABASE_KEY in self[CONFIG_KEY]:
-            if not all(
-                [
-                    _check_cfg_key(self[CONFIG_KEY][CFG_DATABASE_KEY], key)
-                    for key in DB_CREDENTIALS
-                ]
-            ):
-                raise MissingConfigDataError(
-                    "Must specify all database login " "credentials or result_file_path"
-                )
-            self[DB_ORMS_KEY] = {}
-            self[DB_BASE_KEY] = declarative_base()
-            self[DATA_KEY] = YacAttMap()
-            self._init_db_table()
-            self._init_status_table()
-        else:
-            raise MissingConfigDataError(
-                "Must specify either database login " "credentials or a YAML file path"
-            )
-
-    def _get_flag_file(
-        self, record_identifier: str = None
-    ) -> Union[str, List[str], None]:
-        """
-        Get path to the status flag file for the specified record
-
-        :param str record_identifier: unique record identifier
-        :return str | list[str] | None: path to the status flag file
-        """
-        from glob import glob
-
-        r_id = self._strict_record_id(record_identifier)
-        if self.file is None:
-            return
-        if self.file is not None:
-            regex = os.path.join(
-                self[STATUS_FILE_DIR], f"{self.namespace}_{r_id}_*.flag"
-            )
-            file_list = glob(regex)
-            if len(file_list) > 1:
-                _LOGGER.warning("Multiple flag files found")
-                return file_list
-            elif len(file_list) == 1:
-                return file_list[0]
-            else:
-                _LOGGER.debug("No flag files found")
-                return None
-
-    @property
-    def record_count(self) -> int:
-        """
-        Number of records reported
-
-        :return int: number of records reported
-        """
-        return (
-            len(self.data[self.namespace])
-            if self.file
-            else self._count_rows(self.namespace)
-        )
-
-    @property
-    def highlighted_results(self) -> List[str]:
-        """
-        Highlighted results
-
-        :return List[str]: a collection of highlighted results
-        """
-        return self._get_attr(HIGHLIGHTED_KEY) or []
-
-    @property
-    def namespace(self) -> str:
-        """
-        Namespace the object writes the results to
-
-        :return str: namespace the object writes the results to
-        """
-        return self._get_attr(NAME_KEY)
-
-    @property
-    def record_identifier(self) -> str:
-        """
-        Unique identifier of the record
-
-        :return str: unique identifier of the record
-        """
-        return self._get_attr(RECORD_ID_KEY)
-
-    @property
-    def schema(self) -> Dict:
-        """
-        Schema mapping
-
-        :return dict: schema that formalizes the results structure
-        """
-        return self._get_attr(SCHEMA_KEY)
-
-    @property
-    def status_schema(self) -> Dict:
-        """
-        Status schema mapping
-
-        :return dict: schema that formalizes the pipeline status structure
-        """
-        return self._get_attr(STATUS_SCHEMA_KEY)
-
-    @property
-    def status_schema_source(self) -> Dict:
-        """
-        Status schema source
-
-        :return dict: source of the schema that formalizes
-            the pipeline status structure
-        """
-        return self._get_attr(STATUS_SCHEMA_SOURCE_KEY)
-
-    @property
-    def schema_path(self) -> str:
-        """
-        Schema path
-
-        :return str: path to the provided schema
-        """
-        return self._schema_path
-
-    @property
-    def config_path(self) -> str:
-        """
-        Config path. None if the config was not provided or if provided
-        as a mapping of the config contents
-
-        :return str: path to the provided config
-        """
-        return getattr(self, "_config_path", None)
-
-    @property
-    def result_schemas(self) -> Dict:
-        """
-        Result schema mappings
-
-        :return dict: schemas that formalize the structure of each result
-            in a canonical jsonschema way
-        """
-        return self._get_attr(RES_SCHEMAS_KEY)
-
-    @property
-    def file(self) -> str:
-        """
-        File path that the object is reporting the results into
-
-        :return str: file path that the object is reporting the results into
-        """
-        return self._get_attr(FILE_KEY)
-
-    @property
-    def data(self) -> YacAttMap:
-        """
-        Data object
-
-        :return yacman.YacAttMap: the object that stores the reported data
-        """
-        return self._get_attr(DATA_KEY)
-
-    @property
-    def db_url(self) -> str:
-        """
-        Database URL, generated based on config credentials
-
-        :return str: database URL
-        """
-        try:
-            creds = dict(
-                name=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_NAME_KEY],
-                user=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_USER_KEY],
-                passwd=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PASSWORD_KEY],
-                host=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY],
-                port=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PORT_KEY],
-                dialect=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_DIALECT_KEY],
-            )
-        except (KeyError, AttributeError) as e:
-            raise PipestatDatabaseError(
-                f"Could not determine database URL. Caught error: {str(e)}"
-            )
-        parsed_creds = {k: quote_plus(str(v)) for k, v in creds.items()}
-        return "{dialect}://{user}:{passwd}@{host}:{port}/{name}".format(**parsed_creds)
-
-    @property
-    @contextmanager
-    def session(self):
-        """
-        Provide a transactional scope around a series of query
-        operations, no commit afterwards.
-        """
-        if not self.is_db_connected():
-            self.establish_db_connection_orm()
-        with self[DB_SESSION_KEY]() as session:
-            _LOGGER.debug("Created session")
-            yield session
-            _LOGGER.debug("Ending session")
-
-    def _strict_record_id(self, forced_value: str = None) -> str:
-        """
-        Get record identifier from the outer source or stored with this object
-
-        :param str forced_value: return this value
-        :return str: record identifier
-        """
-        if forced_value is not None:
-            return forced_value
-        if self.record_identifier is not None:
-            return self.record_identifier
-        raise PipestatError(
-            f"You must provide the record identifier you want to perform "
-            f"the action on. Either in the {self.__class__.__name__} "
-            f"constructor or as an argument to the method."
-        )
-
-    def _create_table_orm(self, table_name: str, schema: Dict[str, Any]):
-        """
-        Create a table
-
-        :param str table_name: name of the table to create
-        :param Dict[str, Any] schema: schema to base table creation on
-        """
-
-        def _auto_repr(x: Any) -> str:
-            """
-            Auto-generated __repr__ fun
-
-            :param Any x: object to generate __repr__ method for
-            :return str: string object representation
-            """
-            attr_strs = [
-                f"{k}={str(v)}" for k, v in x.__dict__.items() if not k.startswith("_")
-            ]
-            return "<{}: {}>".format(x.__class__.__name__, ", ".join(attr_strs))
-
-        tn = table_name or self.namespace
-        attr_dict = dict(
-            __tablename__=tn,
-            id=Column(Integer, primary_key=True),
-            record_identifier=Column(String, unique=True),
-        )
-        for result_id, result_metadata in schema.items():
-            col_type = SQL_CLASSES_BY_TYPE[result_metadata[SCHEMA_TYPE_KEY]]
-            _LOGGER.debug(f"Adding object: {result_id} of type: {str(col_type)}")
-            attr_dict.update({result_id: Column(col_type)})
-        attr_dict.update({"__repr__": _auto_repr})
-        _LOGGER.debug(f"Creating '{tn}' ORM with args: {attr_dict}")
-        self[DB_ORMS_KEY][tn] = type(tn.capitalize(), (self[DB_BASE_KEY],), attr_dict)
-        self[DB_BASE_KEY].metadata.create_all(bind=self[DB_ENGINE_KEY])
-
-    def establish_db_connection_orm(self) -> bool:
-        """
-        Establish DB connection using the config data
-
-        :return bool: whether the connection has been established successfully
-        """
-        if self.is_db_connected():
-            raise PipestatDatabaseError("Connection is already established")
-
-        self[DB_ENGINE_KEY] = create_engine(self.db_url, echo=True)
-        self[DB_SESSION_KEY] = sessionmaker(bind=self[DB_ENGINE_KEY])
-        return True
-
-    def is_db_connected(self) -> bool:
-        """
-        Check whether a DB connection has been established
-
-        :return bool: whether the connection has been established
-        """
-        if self.file is not None:
-            raise PipestatDatabaseError(
-                f"The {self.__class__.__name__} object is not backed by a database"
-            )
-        if DB_SESSION_KEY in self and isinstance(self[DB_SESSION_KEY], sessionmaker):
-            return True
-        return False
-
-    def set_status(self, status_identifier: str, record_identifier: str = None) -> None:
-        """
-        Set pipeline run status.
-
-        The status identifier needs to match one of identifiers specified in
-        the status schema. A basic, ready to use, status schema is shipped with
-         this package.
-
-        :param str status_identifier: status to set, one of statuses defined
-            in the status schema
-        :param str record_identifier: record identifier to set the
-            pipeline status for
-        """
-        r_id = self._strict_record_id(record_identifier)
-        known_status_identifiers = self.status_schema.keys()
-        if status_identifier not in known_status_identifiers:
-            raise PipestatError(
-                f"'{status_identifier}' is not a defined status identifier. "
-                f"These are allowed: {known_status_identifiers}"
-            )
-        prev_status = self.get_status(r_id)
-        if self.file is not None:
-            if prev_status:
-                prev_flag_path = self.get_status_flag_path(prev_status, r_id)
-                os.remove(prev_flag_path)
-            flag_path = self.get_status_flag_path(status_identifier, r_id)
-            create_lock(flag_path)
-            with open(flag_path, "w") as f:
-                f.write(status_identifier)
-            remove_lock(flag_path)
-        else:
-            try:
-                self._report_db(
-                    values={STATUS: status_identifier},
-                    record_identifier=r_id,
-                    table_name=f"{self.namespace}_{STATUS}",
-                )
-            except Exception as e:
-                _LOGGER.error(f"Could not insert into the status table. Exception: {e}")
-                raise
-        if prev_status:
-            _LOGGER.debug(
-                f"Changed status from '{prev_status}' to '{status_identifier}'"
-            )
-
-    def get_status(self, record_identifier: str = None) -> Optional[str]:
-        """
-        Get the current pipeline status
-
-        :return str: status identifier, like 'running'
-        """
-        r_id = self._strict_record_id(record_identifier)
-        if self.file is None:
-            try:
-                result = self._retrieve_db(
-                    result_identifier=STATUS,
-                    record_identifier=r_id,
-                    table_name=f"{self.namespace}_{STATUS}",
-                )
-            except PipestatDatabaseError:
-                return None
-            return result[STATUS]
-        else:
-            flag_file = self._get_flag_file(record_identifier=r_id)
-            if flag_file is not None:
-                assert isinstance(flag_file, str), TypeError(
-                    "Flag file path is expected to be a str, were multiple flags found?"
-                )
-                with open(flag_file, "r") as f:
-                    status = f.read()
-                return status
-            _LOGGER.debug(
-                f"Could not determine status for '{r_id}' record. "
-                f"No flags found in: {self[STATUS_FILE_DIR]}"
-            )
-            return None
-
-    def clear_status(
-        self, record_identifier: str = None, flag_names: List[str] = None
-    ) -> List[str]:
-        """
-        Remove status flags
-
-        :param str record_identifier: name of the record to remove flags for
-        :param Iterable[str] flag_names: Names of flags to remove, optional; if
-            unspecified, all schema-defined flag names will be used.
-        :return List[str]: Collection of names of flags removed
-        """
-        r_id = self._strict_record_id(record_identifier)
-        if self.file is not None:
-            flag_names = flag_names or list(self.status_schema.keys())
-            if isinstance(flag_names, str):
-                flag_names = [flag_names]
-            removed = []
-            for f in flag_names:
-                path_flag_file = self.get_status_flag_path(
-                    status_identifier=f, record_identifier=r_id
-                )
-                try:
-                    os.remove(path_flag_file)
-                except:
-                    pass
-                else:
-                    _LOGGER.info(f"Removed existing flag: {path_flag_file}")
-                    removed.append(f)
-            return removed
-        else:
-            removed = self.get_status(r_id)
-            try:
-                self._remove_db(
-                    record_identifier=r_id,
-                    table_name=f"{self.namespace}_{STATUS}",
-                )
-            except Exception as e:
-                _LOGGER.error(
-                    f"Could not remove the status from the database. Exception: {e}"
-                )
-                return []
-            else:
-                return [removed]
-
-    def validate_schema(self) -> None:
-        """
-        Check schema for any possible issues
-
-        :raises SchemaError: if any schema format issue is detected
-        """
-
-        def _recursively_replace_custom_types(s: dict) -> Dict:
-            """
-            Replace the custom types in pipestat schema with canonical types
-
-            :param dict s: schema to replace types in
-            :return dict: schema with types replaced
-            """
-            for k, v in s.items():
-                assert SCHEMA_TYPE_KEY in v, SchemaError(
-                    f"Result '{k}' is missing '{SCHEMA_TYPE_KEY}' key"
-                )
-                if v[SCHEMA_TYPE_KEY] == "object" and SCHEMA_PROP_KEY in s[k]:
-                    _recursively_replace_custom_types(s[k][SCHEMA_PROP_KEY])
-                if v[SCHEMA_TYPE_KEY] in CANONICAL_TYPES.keys():
-                    s.setdefault(k, {})
-                    s[k].setdefault(SCHEMA_PROP_KEY, {})
-                    s[k][SCHEMA_PROP_KEY].update(
-                        CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]][SCHEMA_PROP_KEY]
-                    )
-                    s[k].setdefault("required", [])
-                    s[k]["required"].extend(
-                        CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]]["required"]
-                    )
-                    s[k][SCHEMA_TYPE_KEY] = CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]][
-                        SCHEMA_TYPE_KEY
-                    ]
-            return s
-
-        schema = deepcopy(self.schema)
-        _LOGGER.debug(f"Validating input schema")
-        assert isinstance(schema, dict), SchemaError(
-            f"The schema has to be a {dict().__class__.__name__}"
-        )
-        for col_name in RESERVED_COLNAMES:
-            assert col_name not in schema.keys(), PipestatError(
-                f"'{col_name}' is an identifier reserved by pipestat"
-            )
-        self[RES_SCHEMAS_KEY] = {}
-        schema = _recursively_replace_custom_types(schema)
-        self[RES_SCHEMAS_KEY] = schema
-
-    def _init_db_table(self) -> bool:
-        """
-        Initialize a database table based on the provided schema,
-        if it does not exist. Read the data stored in the database into the
-        memory otherwise.
-
-        :return bool: whether the table has been created
-        """
-        if self.schema is None:
-            raise SchemaNotFoundError("initialize the database table")
-        if not self.is_db_connected():
-            self.establish_db_connection_orm()
-        # if self._check_table_exists(table_name=self.namespace):
-        #     _LOGGER.debug(f"Table '{self.namespace}' already exists in the database")
-        #     if not self[DB_ONLY_KEY]:
-        #         self._table_to_dict()
-        #     # return False
-        _LOGGER.info(f"Initializing '{self.namespace}' table in '{PKG_NAME}' database")
-        self._create_table_orm(table_name=self.namespace, schema=self.result_schemas)
-        return True
-
-    def _init_status_table(self):
-        status_table_name = f"{self.namespace}_{STATUS}"
-        if not self.is_db_connected():
-            self.establish_db_connection_orm()
-        # if not self._check_table_exists(table_name=status_table_name):
-        _LOGGER.debug(
-            f"Initializing '{status_table_name}' table in " f"'{PKG_NAME}' database"
-        )
-        self._create_table_orm(
-            table_name=status_table_name,
-            schema=get_status_table_schema(status_schema=self.status_schema),
-        )
-
-    def _get_attr(self, attr: str) -> Any:
-        """
-        Safely get the name of the selected attribute of this object
-
-        :param str attr: attr to select
-        :return:
-        """
-        return self[attr] if attr in self else None
-
-    def _check_table_exists(self, table_name: str) -> bool:
-        """
-        Check if the specified table exists
-
-        :param str table_name: table name to be checked
-        :return bool: whether the specified table exists
-        """
-        from sqlalchemy import inspect
-
-        with self.session as s:
-            return inspect(s.bind).has_table(table_name=table_name)
-
-    def _count_rows(self, table_name: str) -> int:
-        """
-        Count rows in a selected table
-
-        :param str table_name: table to count rows for
-        :return int: number of rows in the selected table
-        """
-        with self.session as s:
-            return s.query(self[DB_ORMS_KEY][table_name].id).count()
-
-    def _get_orm(self, table_name: str = None) -> Any:
-        """
-        Get an object relational mapper class
-
-        :param str table_name: table name to get a class for
-        :return Any: Object relational mapper class
-        """
-        if DB_ORMS_KEY not in self:
-            raise PipestatDatabaseError("Object relational mapper classes not defined")
-        tn = f"{table_name or self.namespace}"
-        if tn not in self[DB_ORMS_KEY]:
-            raise PipestatDatabaseError(
-                f"No object relational mapper class defined for table: {tn}"
-            )
-        if not isinstance(self[DB_ORMS_KEY][tn], DeclarativeMeta):
-            raise PipestatDatabaseError(
-                f"Object relational mapper class for table '{tn}' is invalid"
-            )
-        return self[DB_ORMS_KEY][tn]
-
-    def check_record_exists(
-        self, record_identifier: str, table_name: str = None
-    ) -> bool:
-        """
-        Check if the specified record exists in the table
-
-        :param str record_identifier: record to check for
-        :param str table_name: table name to check
-        :return bool: whether the record exists in the table
-        """
-        with self.session as s:
-            return (
-                s.query(self._get_orm(table_name).id)
-                .filter_by(record_identifier=record_identifier)
-                .first()
-                is not None
-            )
-
-    def check_which_results_exist(
-        self, results: List[str], rid: str = None, table_name: str = None
-    ) -> List[str]:
-        """
-        Check if the specified results exist in the table
-
-        :param str rid: record to check for
-        :param List[str] results: results identifiers to check for
-        :param str table_name: name of the table to search for results in
-        :return List[str]: results identifiers that exist
-        """
-        table_name = table_name or self.namespace
-        rid = self._strict_record_id(rid)
-        with self.session as s:
-            record = (
-                s.query(self._get_orm(table_name))
-                .filter_by(record_identifier=rid)
-                .first()
-            )
-        return [r for r in results if getattr(record, r, None) is not None]
-
-    def check_result_exists(
-        self,
-        result_identifier: str,
-        record_identifier: str = None,
-        table_name: str = None,
-    ) -> bool:
-        """
-        Check if the result has been reported
-
-        :param str record_identifier: unique identifier of the record
-        :param str result_identifier: name of the result to check
-        :param str table_name: name of the table to search for results in
-        :return bool: whether the specified result has been reported for the
-            indicated record in current namespace
-        """
-        record_identifier = self._strict_record_id(record_identifier)
-        return self.check_which_results_exist(
-            results=[result_identifier], rid=record_identifier, table_name=table_name
-        )
-
-    def _retrieve_db(
-        self,
-        result_identifier: str = None,
-        record_identifier: str = None,
-        table_name: str = None,
-    ) -> Dict[str, Any]:
-        """
-        Retrieve a result for a record.
-
-        If no result ID specified, results for the entire record will
-        be returned.
-
-        :param str record_identifier: unique identifier of the record
-        :param str result_identifier: name of the result to be retrieved
-        :param str table_name: name of the table to search for results in
-        :return Dict[str, any]: a single result or a mapping with all the results
-            reported for the record
-        """
-        table_name = table_name or self.namespace
-        record_identifier = self._strict_record_id(record_identifier)
-        if result_identifier is not None:
-            existing = self.check_which_results_exist(
-                results=[result_identifier],
-                rid=record_identifier,
-                table_name=table_name,
-            )
-            if not existing:
-                raise PipestatDatabaseError(
-                    f"Result '{result_identifier}' not found for record "
-                    f"'{record_identifier}'"
-                )
-
-        with self.session as s:
-            record = (
-                s.query(self._get_orm(table_name))
-                .filter_by(record_identifier=record_identifier)
-                .first()
-            )
-
-        if record is not None:
-            if result_identifier is not None:
-                return {result_identifier: getattr(record, result_identifier)}
-            return {
-                column: getattr(record, column)
-                for column in [c.name for c in record.__table__.columns]
-                if getattr(record, column, None) is not None
-            }
-        raise PipestatDatabaseError(f"Record '{record_identifier}' not found")
-
-    def _report_db(
-        self, values: Dict[str, Any], record_identifier: str, table_name: str = None
-    ) -> int:
-        """
-        Report a result to a database
-
-        :param Dict[str, Any] values: values to report
-        :param str record_identifier: record to report the result for
-        :param str table_name: name of the table to report the result in
-        :return int: updated/inserted row
-        """
-        record_identifier = self._strict_record_id(record_identifier)
-        ORMClass = self._get_orm(table_name)
-        values.update({RECORD_ID: record_identifier})
-        if not self.check_record_exists(
-            record_identifier=record_identifier, table_name=table_name
-        ):
-            new_record = ORMClass(**values)
-            with self.session as s:
-                s.add(new_record)
-                s.commit()
-                returned_id = new_record.id
-        else:
-            with self.session as s:
-                record_to_update = (
-                    s.query(ORMClass)
-                    .filter(getattr(ORMClass, RECORD_ID) == record_identifier)
-                    .first()
-                )
-                for result_id, result_value in values.items():
-                    setattr(record_to_update, result_id, result_value)
-                s.commit()
-                returned_id = record_to_update.id
-        return returned_id
-
-    def _remove_db(
-        self,
-        record_identifier: str = None,
-        result_identifier: str = None,
-        table_name: str = None,
-    ) -> bool:
-        """
-        Remove a result.
-
-        If no result ID specified or last result is removed, the entire record
-        will be removed.
-
-        :param str record_identifier: unique identifier of the record
-        :param str result_identifier: name of the result to be removed or None
-             if the record should be removed.
-        :param str table_name: name of the table to report the result in
-        :return bool: whether the result has been removed
-        :raise PipestatDatabaseError: if either record or result specified are not found
-        """
-        table_name = table_name or self.namespace
-        record_identifier = self._strict_record_id(record_identifier)
-        ORMClass = self._get_orm(table_name=table_name)
-        if self.check_record_exists(
-            record_identifier=record_identifier, table_name=table_name
-        ):
-            with self.session as s:
-                record = (
-                    s.query(ORMClass)
-                    .filter(getattr(ORMClass, RECORD_ID) == record_identifier)
-                    .first()
-                )
-                if result_identifier is None:
-                    # delete row
-                    record.delete()
-                else:
-                    # set the value to None
-                    if not self.check_result_exists(
-                        record_identifier=record_identifier,
-                        result_identifier=result_identifier,
-                        table_name=table_name,
-                    ):
-                        raise PipestatDatabaseError(
-                            f"Result '{result_identifier}' not found for record "
-                            f"'{record_identifier}'"
-                        )
-                    setattr(record, result_identifier, None)
-                s.commit()
-        else:
-            raise PipestatDatabaseError(f"Record '{record_identifier}' not found")

From 505c8308df460de1288c2f80ffda3244c6052d2b Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Tue, 13 Apr 2021 11:41:17 -0400
Subject: [PATCH 16/65] read DB to dict at init if requested

---
 docs_jupyter/python_api.ipynb |  10 +--
 pipestat/pipestat.py          | 115 +++++++++++++++++++++-------------
 tests/test_pipestat.py        |  45 +------------
 3 files changed, 79 insertions(+), 91 deletions(-)

diff --git a/docs_jupyter/python_api.ipynb b/docs_jupyter/python_api.ipynb
index 77b349fd..aded8c5d 100644
--- a/docs_jupyter/python_api.ipynb
+++ b/docs_jupyter/python_api.ipynb
@@ -17,12 +17,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [],
    "source": [
     "import pipestat\n",
-    "from pipestat.pipestat_orm import PipestatManagerORM as PipestatManager, SchemaNotFoundError\n",
+    "from pipestat import PipestatManager, SchemaNotFoundError\n",
     "from jsonschema import ValidationError"
    ]
   },
@@ -51,14 +51,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp9cozi7pc.yaml\n"
+      "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmplgf49zfa.yaml\n"
      ]
     }
    ],
@@ -78,7 +78,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index f3149fcc..6f9d384d 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -229,33 +229,22 @@ def _select_value(
                 "Must specify either database login " "credentials or a YAML file path"
             )
 
-    def _get_flag_file(
-        self, record_identifier: str = None
-    ) -> Union[str, List[str], None]:
+    def __str__(self):
         """
-        Get path to the status flag file for the specified record
+        Generate string representation of the object
 
-        :param str record_identifier: unique record identifier
-        :return str | list[str] | None: path to the status flag file
+        :return str: string representation of the object
         """
-        from glob import glob
-
-        r_id = self._strict_record_id(record_identifier)
-        if self.file is None:
-            return
-        if self.file is not None:
-            regex = os.path.join(
-                self[STATUS_FILE_DIR], f"{self.namespace}_{r_id}_*.flag"
-            )
-            file_list = glob(regex)
-            if len(file_list) > 1:
-                _LOGGER.warning("Multiple flag files found")
-                return file_list
-            elif len(file_list) == 1:
-                return file_list[0]
-            else:
-                _LOGGER.debug("No flag files found")
-                return None
+        res = f"{self.__class__.__name__} ({self.namespace})"
+        res += "\nBackend: {}".format(
+            f"file ({self.file})" if self.file else "PostgreSQL"
+        )
+        res += f"\nResults schema source: {self.schema_path}"
+        res += f"\nStatus schema source: {self.status_schema_source}"
+        res += f"\nRecords count: {self.record_count}"
+        if self.highlighted_results:
+            res += f"\nHighlighted results: {', '.join(self.highlighted_results)}"
+        return res
 
     @property
     def record_count(self) -> int:
@@ -408,12 +397,40 @@ def session(self):
         operations, no commit afterwards.
         """
         if not self.is_db_connected():
-            self.establish_db_connection_orm()
+            self.establish_db_connection()
         with self[DB_SESSION_KEY]() as session:
             _LOGGER.debug("Created session")
             yield session
             _LOGGER.debug("Ending session")
 
+    def _get_flag_file(
+        self, record_identifier: str = None
+    ) -> Union[str, List[str], None]:
+        """
+        Get path to the status flag file for the specified record
+
+        :param str record_identifier: unique record identifier
+        :return str | list[str] | None: path to the status flag file
+        """
+        from glob import glob
+
+        r_id = self._strict_record_id(record_identifier)
+        if self.file is None:
+            return
+        if self.file is not None:
+            regex = os.path.join(
+                self[STATUS_FILE_DIR], f"{self.namespace}_{r_id}_*.flag"
+            )
+            file_list = glob(regex)
+            if len(file_list) > 1:
+                _LOGGER.warning("Multiple flag files found")
+                return file_list
+            elif len(file_list) == 1:
+                return file_list[0]
+            else:
+                _LOGGER.debug("No flag files found")
+                return None
+
     def _strict_record_id(self, forced_value: str = None) -> str:
         """
         Get record identifier from the outer source or stored with this object
@@ -466,7 +483,7 @@ def _auto_repr(x: Any) -> str:
         self[DB_ORMS_KEY][tn] = type(tn.capitalize(), (self[DB_BASE_KEY],), attr_dict)
         self[DB_BASE_KEY].metadata.create_all(bind=self[DB_ENGINE_KEY])
 
-    def establish_db_connection_orm(self) -> bool:
+    def establish_db_connection(self) -> bool:
         """
         Establish DB connection using the config data
 
@@ -757,20 +774,33 @@ def _init_db_table(self) -> bool:
         if self.schema is None:
             raise SchemaNotFoundError("initialize the database table")
         if not self.is_db_connected():
-            self.establish_db_connection_orm()
-        # if self._check_table_exists(table_name=self.namespace):
-        #     _LOGGER.debug(f"Table '{self.namespace}' already exists in the database")
-        #     if not self[DB_ONLY_KEY]:
-        #         self._table_to_dict()
-        #     # return False
+            self.establish_db_connection()
         _LOGGER.info(f"Initializing '{self.namespace}' table in '{PKG_NAME}' database")
         self._create_table_orm(table_name=self.namespace, schema=self.result_schemas)
+        if not self[DB_ONLY_KEY]:
+            self._table_to_dict()
         return True
 
+    def _table_to_dict(self) -> None:
+        """
+        Create a dictionary from the database table data
+        """
+        with self.session as s:
+            records = s.query(self._get_orm(self.namespace)).all()
+        _LOGGER.debug(f"Reading data from database for '{self.namespace}' namespace")
+        for record in records:
+            record_id = getattr(record, RECORD_ID)
+            for column in record.__table__.columns:
+                val = getattr(record, column.name, None)
+                if val is not None:
+                    self._report_data_element(
+                        record_identifier=record_id, values={column.name: val}
+                    )
+
     def _init_status_table(self):
         status_table_name = f"{self.namespace}_{STATUS}"
         if not self.is_db_connected():
-            self.establish_db_connection_orm()
+            self.establish_db_connection()
         # if not self._check_table_exists(table_name=status_table_name):
         _LOGGER.debug(
             f"Initializing '{status_table_name}' table in " f"'{PKG_NAME}' database"
@@ -943,9 +973,12 @@ def retrieve(
         """
         r_id = self._strict_record_id(record_identifier)
         if self.file is None:
-            return self._retrieve_db(
+            results = self._retrieve_db(
                 result_identifier=result_identifier, record_identifier=r_id
             )
+            if result_identifier is not None:
+                return results[result_identifier]
+            return results
         else:
             if r_id not in self.data[self.namespace]:
                 raise PipestatDatabaseError(f"Record '{r_id}' not found")
@@ -981,7 +1014,6 @@ def _retrieve_db(
             existing = self.check_which_results_exist(
                 results=[result_identifier],
                 rid=record_identifier,
-                table_name=table_name,
             )
             if not existing:
                 raise PipestatDatabaseError(
@@ -1082,7 +1114,7 @@ def report(
         else:
             try:
                 updated_ids = self._report_db(
-                    record_identifier=record_identifier, value=values
+                    record_identifier=record_identifier, values=values
                 )
             except Exception as e:
                 _LOGGER.error(
@@ -1242,26 +1274,23 @@ def _remove_db(
             record_identifier=record_identifier, table_name=table_name
         ):
             with self.session as s:
-                record = (
-                    s.query(ORMClass)
-                    .filter(getattr(ORMClass, RECORD_ID) == record_identifier)
-                    .first()
+                records = s.query(ORMClass).filter(
+                    getattr(ORMClass, RECORD_ID) == record_identifier
                 )
                 if result_identifier is None:
                     # delete row
-                    record.delete()
+                    records.delete()
                 else:
                     # set the value to None
                     if not self.check_result_exists(
                         record_identifier=record_identifier,
                         result_identifier=result_identifier,
-                        table_name=table_name,
                     ):
                         raise PipestatDatabaseError(
                             f"Result '{result_identifier}' not found for record "
                             f"'{record_identifier}'"
                         )
-                    setattr(record, result_identifier, None)
+                    setattr(records.first(), result_identifier, None)
                 s.commit()
         else:
             raise PipestatDatabaseError(f"Record '{record_identifier}' not found")
diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py
index b8eef8df..9e87819f 100644
--- a/tests/test_pipestat.py
+++ b/tests/test_pipestat.py
@@ -39,10 +39,7 @@ def test_connection_checker(self, config_file_path, schema_file_path):
             schema_path=schema_file_path,
             namespace="test",
         )
-        assert not pm.check_connection()
-        pm.establish_postgres_connection()
-        assert pm.check_connection()
-        pm.close_postgres_connection()
+        assert pm.is_db_connected()
 
     def test_connection_overwrite_error(self, config_file_path, schema_file_path):
         pm = PipestatManager(
@@ -51,46 +48,8 @@ def test_connection_overwrite_error(self, config_file_path, schema_file_path):
             schema_path=schema_file_path,
             namespace="test",
         )
-        pm.establish_postgres_connection()
         with pytest.raises(PipestatDatabaseError):
-            pm.establish_postgres_connection()
-        pm.close_postgres_connection()
-
-    @pytest.mark.parametrize("suppress", [True, False])
-    def test_connection_error(self, config_file_path, schema_file_path, suppress):
-        pm = PipestatManager(
-            config=config_file_path,
-            database_only=True,
-            schema_path=schema_file_path,
-            namespace="test",
-        )
-        pm[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY] = "bogus_host"
-        if suppress:
-            assert not pm.establish_postgres_connection(suppress=suppress)
-        else:
-            with pytest.raises(psycopg2Error):
-                pm.establish_postgres_connection(suppress=suppress)
-
-    def test_connection_closing(self, config_file_path, schema_file_path):
-        pm = PipestatManager(
-            config=config_file_path,
-            database_only=True,
-            schema_path=schema_file_path,
-            namespace="test",
-        )
-        pm.establish_postgres_connection()
-        pm.close_postgres_connection()
-        assert not pm.check_connection()
-
-    def test_connection_closing_closed(self, config_file_path, schema_file_path):
-        pm = PipestatManager(
-            config=config_file_path,
-            database_only=True,
-            schema_path=schema_file_path,
-            namespace="test",
-        )
-        with pytest.raises(PipestatDatabaseError):
-            pm.close_postgres_connection()
+            pm.establish_db_connection()
 
 
 class TestPipestatManagerInstantiation:

From 978f2831fa2c199a2538f07c59974cd85c24751c Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Wed, 14 Apr 2021 14:20:18 -0400
Subject: [PATCH 17/65] implement select

---
 pipestat/pipestat.py | 93 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 92 insertions(+), 1 deletion(-)

diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 6f9d384d..8b26067c 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -1,9 +1,10 @@
 from contextlib import contextmanager
 from copy import deepcopy
 from logging import getLogger
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 from urllib.parse import quote_plus
 
+import sqlalchemy.orm
 from attmap import PathExAttMap as PXAM
 from jsonschema import validate
 from sqlalchemy import Column, Float, ForeignKey, Integer, String, Table, create_engine
@@ -957,6 +958,96 @@ def check_result_exists(
             > 0
         )
 
+    def select(
+        self,
+        table_name: Optional[str] = None,
+        columns: Optional[List[str]] = None,
+        filter_condition: Optional[List[Tuple[str, str, Union[str, List[str]]]]] = None,
+        offset: Optional[int] = None,
+        limit: Optional[int] = None,
+    ) -> List[Any]:
+        """
+        Perform a SELECT on the table, filtering limited to a single condition
+
+        :param str table_name: name of the table to SELECT from
+        :param List[str] columns: columns to include in the result
+        :param [(key,operator,value)] filter_condition: e.g. [("id", "eq", 1)] operator list
+            - eq for ==
+            - lt for <
+            - ge for >=
+            - in for in_
+            - like for like
+        :param int offset: skip this number of rows
+        :param int limit: include this number of rows
+        """
+
+        def _dynamic_filter(
+            ORM: sqlalchemy.orm.DeclarativeMeta,
+            query: sqlalchemy.orm.Query,
+            filter_condition: List[Tuple[str, str, Union[str, List[str]]]],
+        ):
+            """
+            Return filtered query based on condition.
+
+            :param sqlalchemy.orm.DeclarativeMeta ORM:
+            :param sqlalchemy.orm.Query query: takes query
+            :param [(key,operator,value)] filter_condition: e.g. [("id", "eq", 1)] operator list
+                - eq for ==
+                - lt for <
+                - ge for >=
+                - in for in_
+                - like for like
+            :return: query
+            """
+            for raw in filter_condition:
+                try:
+                    key, op, value = raw
+                except ValueError:
+                    raise Exception("Invalid filter: %s" % raw)
+                column = getattr(ORM, key, None)
+                if column is None:
+                    raise Exception("Invalid filter column: %s" % key)
+                if op == "in":
+                    if isinstance(value, list):
+                        filt = column.in_(value)
+                    else:
+                        filt = column.in_(value.split(","))
+                else:
+                    try:
+                        attr = (
+                            list(
+                                filter(
+                                    lambda e: hasattr(column, e % op),
+                                    ["%s", "%s_", "__%s__"],
+                                )
+                            )[0]
+                            % op
+                        )
+                    except IndexError:
+                        raise Exception(f"Invalid filter operator: {op}")
+                    if value == "null":
+                        value = None
+                    filt = getattr(column, attr)(value)
+                query = query.filter(filt)
+            return query
+
+        ORM = self._get_orm(table_name or self.namespace)
+        with self.session as s:
+            if columns is not None:
+                query = s.query(*[getattr(ORM, column) for column in columns])
+            else:
+                query = s.query(ORM)
+            if filter_condition is not None:
+                query = _dynamic_filter(
+                    ORM=ORM, query=query, filter_condition=filter_condition
+                )
+            if isinstance(offset, int):
+                query = query.offset(offset)
+            if isinstance(limit, int):
+                query = query.limit(limit)
+            result = query.all()
+        return result
+
     def retrieve(
         self, record_identifier: str = None, result_identifier: str = None
     ) -> Union[Any, Dict[str, Any]]:

From 879d26160620e7bb1384a91814d6dfba8f7d8c92 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Wed, 14 Apr 2021 15:35:39 -0400
Subject: [PATCH 18/65] tests refactoring, add table_name parameter to methods

---
 pipestat/pipestat.py       |  10 +-
 tests/test_db_only_mode.py |  86 +++++++++++
 tests/test_init.py         | 146 +++++++++++++++++++
 tests/test_pipestat.py     | 286 -------------------------------------
 tests/test_status.py       |  62 ++++++++
 5 files changed, 302 insertions(+), 288 deletions(-)
 create mode 100644 tests/test_db_only_mode.py
 create mode 100644 tests/test_init.py
 create mode 100644 tests/test_status.py

diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 8b26067c..5b814480 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -889,7 +889,10 @@ def check_record_exists(
             return False
 
     def check_which_results_exist(
-        self, results: List[str], rid: str = None
+        self,
+        results: List[str],
+        rid: Optional[str] = None,
+        table_name: Optional[str] = None,
     ) -> List[str]:
         """
         Check which results have been reported
@@ -901,7 +904,9 @@ def check_which_results_exist(
         """
         rid = self._strict_record_id(rid)
         if self.file is None:
-            existing = self._check_which_results_exist_db(results=results, rid=rid)
+            existing = self._check_which_results_exist_db(
+                results=results, rid=rid, table_name=table_name
+            )
         else:
             existing = []
             for r in results:
@@ -1105,6 +1110,7 @@ def _retrieve_db(
             existing = self.check_which_results_exist(
                 results=[result_identifier],
                 rid=record_identifier,
+                table_name=table_name,
             )
             if not existing:
                 raise PipestatDatabaseError(
diff --git a/tests/test_db_only_mode.py b/tests/test_db_only_mode.py
new file mode 100644
index 00000000..660006ec
--- /dev/null
+++ b/tests/test_db_only_mode.py
@@ -0,0 +1,86 @@
+import pytest
+
+from pipestat import PipestatManager
+from pipestat.const import *
+
+
+class TestDatabaseOnly:
+    @pytest.mark.parametrize(
+        "val",
+        [
+            {"name_of_something": "test_name"},
+            {"number_of_things": 1},
+            {"percentage_of_things": 10.1},
+        ],
+    )
+    def test_report(self, val, config_file_path, schema_file_path, results_file_path):
+        REC_ID = "constant_record_id"
+        psm = PipestatManager(
+            schema_path=schema_file_path,
+            namespace="test",
+            record_identifier=REC_ID,
+            database_only=True,
+            config=config_file_path,
+        )
+        psm.report(values=val)
+        assert len(psm.data) == 0
+        val_name = list(val.keys())[0]
+        assert psm.select(filter_condition=[(val_name, "eq", str(val[val_name]))])
+
+    @pytest.mark.parametrize(["rec_id", "res_id"], [("sample2", "number_of_things")])
+    @pytest.mark.parametrize("backend", ["db"])
+    @pytest.mark.parametrize("limit", [1, 2, 3, 15555])
+    def test_select_limit(
+        self,
+        rec_id,
+        res_id,
+        config_file_path,
+        results_file_path,
+        schema_file_path,
+        backend,
+        limit,
+    ):
+        args = dict(
+            schema_path=schema_file_path, namespace="test", config=config_file_path
+        )
+        psm = PipestatManager(**args)
+        result = psm.select(
+            filter_condition=[(RECORD_ID, "eq", rec_id)],
+            columns=[res_id],
+            limit=limit,
+        )
+        assert len(result) <= limit
+
+    @pytest.mark.parametrize("backend", ["db"])
+    @pytest.mark.parametrize("offset", [0, 1, 2, 3, 15555])
+    def test_select_offset(
+        self, config_file_path, results_file_path, schema_file_path, backend, offset
+    ):
+        args = dict(
+            schema_path=schema_file_path, namespace="test", config=config_file_path
+        )
+        psm = PipestatManager(**args)
+        result = psm.select(offset=offset)
+        print(result)
+        assert len(result) == max((psm.record_count - offset), 0)
+
+    @pytest.mark.parametrize("backend", ["db"])
+    @pytest.mark.parametrize(
+        ["offset", "limit"], [(0, 0), (0, 1), (0, 2), (0, 11111), (1, 1), (1, 0)]
+    )
+    def test_select_pagination(
+        self,
+        config_file_path,
+        results_file_path,
+        schema_file_path,
+        backend,
+        offset,
+        limit,
+    ):
+        args = dict(
+            schema_path=schema_file_path, namespace="test", config=config_file_path
+        )
+        psm = PipestatManager(**args)
+        result = psm.select(offset=offset, limit=limit)
+        print(result)
+        assert len(result) == min(max((psm.record_count - offset), 0), limit)
diff --git a/tests/test_init.py b/tests/test_init.py
new file mode 100644
index 00000000..1490a78a
--- /dev/null
+++ b/tests/test_init.py
@@ -0,0 +1,146 @@
+from tempfile import mkdtemp
+
+import pytest
+from yaml import dump
+
+from pipestat import PipestatManager
+from pipestat.const import *
+from pipestat.exceptions import *
+
+
+class TestConnection:
+    def test_connection_checker(self, config_file_path, schema_file_path):
+        pm = PipestatManager(
+            config=config_file_path,
+            database_only=True,
+            schema_path=schema_file_path,
+            namespace="test",
+        )
+        assert pm.is_db_connected()
+
+    def test_connection_overwrite_error(self, config_file_path, schema_file_path):
+        pm = PipestatManager(
+            config=config_file_path,
+            database_only=True,
+            schema_path=schema_file_path,
+            namespace="test",
+        )
+        with pytest.raises(PipestatDatabaseError):
+            pm.establish_db_connection()
+
+
+class TestPipestatManagerInstantiation:
+    def test_obj_creation_file(self, schema_file_path, results_file_path):
+        """ Object constructor works with file as backend"""
+        assert isinstance(
+            PipestatManager(
+                namespace="test",
+                results_file_path=results_file_path,
+                schema_path=schema_file_path,
+            ),
+            PipestatManager,
+        )
+
+    def test_obj_creation_db(self, config_file_path):
+        """ Object constructor works with database as backend"""
+        assert isinstance(PipestatManager(config=config_file_path), PipestatManager)
+
+    @pytest.mark.xfail(reason="schema is no longer required to init the object")
+    def test_schema_req(self, results_file_path):
+        """
+        Object constructor raises exception if schema is not provided
+        """
+        with pytest.raises(PipestatError):
+            PipestatManager(namespace="test", results_file_path=results_file_path)
+
+    def test_schema_recursive_custom_type_conversion(
+        self, recursive_schema_file_path, results_file_path
+    ):
+        psm = PipestatManager(
+            namespace="test",
+            results_file_path=results_file_path,
+            schema_path=recursive_schema_file_path,
+        )
+        assert (
+            "path"
+            in psm.result_schemas["output_file_in_object"]["properties"]["prop1"][
+                "properties"
+            ]
+        )
+        assert (
+            "thumbnail_path"
+            in psm.result_schemas["output_file_in_object"]["properties"]["prop2"][
+                "properties"
+            ]
+        )
+
+    def test_missing_cfg_data(self, schema_file_path):
+        """ Object constructor raises exception if cfg is missing data """
+        tmp_pth = os.path.join(mkdtemp(), "res.yml")
+        with open(tmp_pth, "w") as file:
+            dump({"database": {"host": "localhost"}}, file)
+        with pytest.raises(MissingConfigDataError):
+            PipestatManager(
+                namespace="test", config=tmp_pth, schema_path=schema_file_path
+            )
+
+    def test_unknown_backend(self, schema_file_path):
+        """ Either db config or results file path needs to be provided """
+        with pytest.raises(MissingConfigDataError):
+            PipestatManager(namespace="test", schema_path=schema_file_path)
+
+    def test_create_results_file(self, schema_file_path):
+        """ Results file is created if a nonexistent path provided """
+        tmp_res_file = os.path.join(mkdtemp(), "res.yml")
+        print(f"Temporary results file: {tmp_res_file}")
+        assert not os.path.exists(tmp_res_file)
+        PipestatManager(
+            namespace="test",
+            results_file_path=tmp_res_file,
+            schema_path=schema_file_path,
+        )
+        assert os.path.exists(tmp_res_file)
+
+    def test_use_other_namespace_file(self, schema_file_path):
+        """ Results file can be used with just one namespace """
+        tmp_res_file = os.path.join(mkdtemp(), "res.yml")
+        print(f"Temporary results file: {tmp_res_file}")
+        assert not os.path.exists(tmp_res_file)
+        PipestatManager(
+            namespace="test",
+            results_file_path=tmp_res_file,
+            schema_path=schema_file_path,
+        )
+        assert os.path.exists(tmp_res_file)
+        with pytest.raises(PipestatDatabaseError):
+            PipestatManager(
+                namespace="new_test",
+                results_file_path=tmp_res_file,
+                schema_path=schema_file_path,
+            )
+
+    @pytest.mark.parametrize("pth", [["/$HOME/path.yaml"], 1])
+    def test_wrong_class_results_file(self, schema_file_path, pth):
+        """ Input string that is not a file path raises an informative error """
+        with pytest.raises((TypeError, AssertionError)):
+            PipestatManager(
+                namespace="test", results_file_path=pth, schema_path=schema_file_path
+            )
+
+    def test_results_file_contents_loaded(self, results_file_path, schema_file_path):
+        """ Contents of the results file are present after loading """
+        psm = PipestatManager(
+            namespace="test",
+            results_file_path=results_file_path,
+            schema_path=schema_file_path,
+        )
+        assert "test" in psm.data
+
+    def test_str_representation(self, results_file_path, schema_file_path):
+        """ Test string representation identifies number of records """
+        psm = PipestatManager(
+            namespace="test",
+            results_file_path=results_file_path,
+            schema_path=schema_file_path,
+        )
+        assert f"Records count: {len(psm.data[psm.namespace])}" in str(psm)
diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py
index 9e87819f..f8c199d6 100644
--- a/tests/test_pipestat.py
+++ b/tests/test_pipestat.py
@@ -1,12 +1,7 @@
-import os
 from collections import Mapping
-from tempfile import mkdtemp
 
 import pytest
-from _pytest.monkeypatch import monkeypatch
 from jsonschema import ValidationError
-from psycopg2 import Error as psycopg2Error
-from yaml import dump
 
 from pipestat import PipestatManager
 from pipestat.const import *
@@ -31,144 +26,6 @@ def is_in_file(fs, s, reverse=False):
                 assert s in fh.read()
 
 
-class TestConnection:
-    def test_connection_checker(self, config_file_path, schema_file_path):
-        pm = PipestatManager(
-            config=config_file_path,
-            database_only=True,
-            schema_path=schema_file_path,
-            namespace="test",
-        )
-        assert pm.is_db_connected()
-
-    def test_connection_overwrite_error(self, config_file_path, schema_file_path):
-        pm = PipestatManager(
-            config=config_file_path,
-            database_only=True,
-            schema_path=schema_file_path,
-            namespace="test",
-        )
-        with pytest.raises(PipestatDatabaseError):
-            pm.establish_db_connection()
-
-
-class TestPipestatManagerInstantiation:
-    def test_obj_creation_file(self, schema_file_path, results_file_path):
-        """ Object constructor works with file as backend"""
-        assert isinstance(
-            PipestatManager(
-                namespace="test",
-                results_file_path=results_file_path,
-                schema_path=schema_file_path,
-            ),
-            PipestatManager,
-        )
-
-    def test_obj_creation_db(self, config_file_path):
-        """ Object constructor works with database as backend"""
-        assert isinstance(PipestatManager(config=config_file_path), PipestatManager)
-
-    @pytest.mark.xfail(reason="schema is no longer required to init the object")
-    def test_schema_req(self, results_file_path):
-        """
-        Object constructor raises exception if schema is not provided
-        """
-        with pytest.raises(PipestatError):
-            PipestatManager(namespace="test", results_file_path=results_file_path)
-
-    def test_schema_recursive_custom_type_conversion(
-        self, recursive_schema_file_path, results_file_path
-    ):
-        psm = PipestatManager(
-            namespace="test",
-            results_file_path=results_file_path,
-            schema_path=recursive_schema_file_path,
-        )
-        assert (
-            "path"
-            in psm.result_schemas["output_file_in_object"]["properties"]["prop1"][
-                "properties"
-            ]
-        )
-        assert (
-            "thumbnail_path"
-            in psm.result_schemas["output_file_in_object"]["properties"]["prop2"][
-                "properties"
-            ]
-        )
-
-    def test_missing_cfg_data(self, schema_file_path):
-        """ Object constructor raises exception if cfg is missing data """
-        tmp_pth = os.path.join(mkdtemp(), "res.yml")
-        with open(tmp_pth, "w") as file:
-            dump({"database": {"host": "localhost"}}, file)
-        with pytest.raises(MissingConfigDataError):
-            PipestatManager(
-                namespace="test", config=tmp_pth, schema_path=schema_file_path
-            )
-
-    def test_unknown_backend(self, schema_file_path):
-        """ Either db config or results file path needs to be provided """
-        with pytest.raises(MissingConfigDataError):
-            PipestatManager(namespace="test", schema_path=schema_file_path)
-
-    def test_create_results_file(self, schema_file_path):
-        """ Results file is created if a nonexistent path provided """
-        tmp_res_file = os.path.join(mkdtemp(), "res.yml")
-        print(f"Temporary results file: {tmp_res_file}")
-        assert not os.path.exists(tmp_res_file)
-        PipestatManager(
-            namespace="test",
-            results_file_path=tmp_res_file,
-            schema_path=schema_file_path,
-        )
-        assert os.path.exists(tmp_res_file)
-
-    def test_use_other_namespace_file(self, schema_file_path):
-        """ Results file can be used with just one namespace """
-        tmp_res_file = os.path.join(mkdtemp(), "res.yml")
-        print(f"Temporary results file: {tmp_res_file}")
-        assert not os.path.exists(tmp_res_file)
-        PipestatManager(
-            namespace="test",
-            results_file_path=tmp_res_file,
-            schema_path=schema_file_path,
-        )
-        assert os.path.exists(tmp_res_file)
-        with pytest.raises(PipestatDatabaseError):
-            PipestatManager(
-                namespace="new_test",
-                results_file_path=tmp_res_file,
-                schema_path=schema_file_path,
-            )
-
-    @pytest.mark.parametrize("pth", [["/$HOME/path.yaml"], 1])
-    def test_wrong_class_results_file(self, schema_file_path, pth):
-        """ Input string that is not a file path raises an informative error """
-        with pytest.raises((TypeError, AssertionError)):
-            PipestatManager(
-                namespace="test", results_file_path=pth, schema_path=schema_file_path
-            )
-
-    def test_results_file_contents_loaded(self, results_file_path, schema_file_path):
-        """ Contents of the results file are present after loading """
-        psm = PipestatManager(
-            namespace="test",
-            results_file_path=results_file_path,
-            schema_path=schema_file_path,
-        )
-        assert "test" in psm.data
-
-    def test_str_representation(self, results_file_path, schema_file_path):
-        """ Test string representation identifies number of records """
-        psm = PipestatManager(
-            namespace="test",
-            results_file_path=results_file_path,
-            schema_path=schema_file_path,
-        )
-        assert f"Records count: {len(psm.data[psm.namespace])}" in str(psm)
-
-
 class TestReporting:
     @pytest.mark.parametrize(
         ["rec_id", "val"],
@@ -576,91 +433,6 @@ def test_remove(
         assert psm.remove(result_identifier=list(val.keys())[0])
 
 
-class TestDatabaseOnly:
-    @pytest.mark.parametrize(
-        "val",
-        [
-            {"name_of_something": "test_name"},
-            {"number_of_things": 1},
-            {"percentage_of_things": 10.1},
-        ],
-    )
-    def test_report(self, val, config_file_path, schema_file_path, results_file_path):
-        REC_ID = "constant_record_id"
-        psm = PipestatManager(
-            schema_path=schema_file_path,
-            namespace="test",
-            record_identifier=REC_ID,
-            database_only=True,
-            config=config_file_path,
-        )
-        psm.report(values=val)
-        assert len(psm.data) == 0
-        val_name = list(val.keys())[0]
-        assert psm.select(
-            condition=val_name + "=%s", condition_val=[str(val[val_name])]
-        )
-
-    @pytest.mark.parametrize(["rec_id", "res_id"], [("sample2", "number_of_things")])
-    @pytest.mark.parametrize("backend", ["db"])
-    @pytest.mark.parametrize("limit", [1, 2, 3, 15555])
-    def test_select_limit(
-        self,
-        rec_id,
-        res_id,
-        config_file_path,
-        results_file_path,
-        schema_file_path,
-        backend,
-        limit,
-    ):
-        args = dict(
-            schema_path=schema_file_path, namespace="test", config=config_file_path
-        )
-        psm = PipestatManager(**args)
-        result = psm.select(
-            condition=f"{RECORD_ID}=%s",
-            condition_val=[rec_id],
-            columns=[res_id],
-            limit=limit,
-        )
-        assert len(result) <= limit
-
-    @pytest.mark.parametrize("backend", ["db"])
-    @pytest.mark.parametrize("offset", [0, 1, 2, 3, 15555])
-    def test_select_offset(
-        self, config_file_path, results_file_path, schema_file_path, backend, offset
-    ):
-        args = dict(
-            schema_path=schema_file_path, namespace="test", config=config_file_path
-        )
-        psm = PipestatManager(**args)
-        result = psm.select(offset=offset)
-        print(result)
-        assert len(result) == max((psm.record_count - offset), 0)
-
-    @pytest.mark.parametrize("backend", ["db"])
-    @pytest.mark.parametrize(
-        ["offset", "limit"], [(0, 0), (0, 1), (0, 2), (0, 11111), (1, 1), (1, 0)]
-    )
-    def test_select_pagination(
-        self,
-        config_file_path,
-        results_file_path,
-        schema_file_path,
-        backend,
-        offset,
-        limit,
-    ):
-        args = dict(
-            schema_path=schema_file_path, namespace="test", config=config_file_path
-        )
-        psm = PipestatManager(**args)
-        result = psm.select(offset=offset, limit=limit)
-        print(result)
-        assert len(result) == min(max((psm.record_count - offset), 0), limit)
-
-
 class TestHighlighting:
     def test_highlighting_works(self, highlight_schema_file_path, results_file_path):
         """the highlighted results are sourced from the schema and only ones
@@ -677,64 +449,6 @@ def test_highlighting_works(self, highlight_schema_file_path, results_file_path)
         assert psm.highlighted_results == schema_highlighted_results
 
 
-class TestStatus:
-    def test_status_file_defult_location(self, schema_file_path, results_file_path):
-        """status file location is set to the results file dir
-        if not specified"""
-        psm = PipestatManager(
-            namespace="test",
-            results_file_path=results_file_path,
-            schema_path=schema_file_path,
-        )
-        assert psm[STATUS_FILE_DIR] == os.path.dirname(psm.file)
-
-    @pytest.mark.parametrize("backend", ["file", "db"])
-    @pytest.mark.parametrize("status_id", ["running", "failed", "completed"])
-    def test_status_not_configured(
-        self, schema_file_path, config_file_path, results_file_path, backend, status_id
-    ):
-        """ status management works even in case it has not been configured"""
-        args = dict(schema_path=schema_file_path, namespace="test")
-        backend_data = (
-            {"config": config_file_path}
-            if backend == "db"
-            else {"results_file_path": results_file_path}
-        )
-        args.update(backend_data)
-        psm = PipestatManager(**args)
-        psm.set_status(record_identifier="sample1", status_identifier=status_id)
-        assert psm.get_status(record_identifier="sample1") == status_id
-
-    @pytest.mark.parametrize("backend", ["file", "db"])
-    @pytest.mark.parametrize(
-        "status_id", ["running_custom", "failed_custom", "completed_custom"]
-    )
-    def test_custom_status_schema(
-        self,
-        schema_file_path,
-        config_file_path,
-        results_file_path,
-        backend,
-        status_id,
-        custom_status_schema,
-    ):
-        """ status management works even in case it has not been configured"""
-        args = dict(
-            schema_path=schema_file_path,
-            namespace="test",
-            status_schema_path=custom_status_schema,
-        )
-        backend_data = (
-            {"config": config_file_path}
-            if backend == "db"
-            else {"results_file_path": results_file_path}
-        )
-        args.update(backend_data)
-        psm = PipestatManager(**args)
-        psm.set_status(record_identifier="sample1", status_identifier=status_id)
-        assert psm.get_status(record_identifier="sample1") == status_id
-
-
 class TestEnvVars:
     def test_no_config(self, monkeypatch, results_file_path, schema_file_path):
         """
diff --git a/tests/test_status.py b/tests/test_status.py
new file mode 100644
index 00000000..a8addf6f
--- /dev/null
+++ b/tests/test_status.py
@@ -0,0 +1,62 @@
+import pytest
+
+from pipestat import PipestatManager
+from pipestat.const import *
+
+
+class TestStatus:
+    def test_status_file_defult_location(self, schema_file_path, results_file_path):
+        """status file location is set to the results file dir
+        if not specified"""
+        psm = PipestatManager(
+            namespace="test",
+            results_file_path=results_file_path,
+            schema_path=schema_file_path,
+        )
+        assert psm[STATUS_FILE_DIR] == os.path.dirname(psm.file)
+
+    @pytest.mark.parametrize("backend", ["file", "db"])
+    @pytest.mark.parametrize("status_id", ["running", "failed", "completed"])
+    def test_status_not_configured(
+        self, schema_file_path, config_file_path, results_file_path, backend, status_id
+    ):
+        """ status management works even in case it has not been configured"""
+        args = dict(schema_path=schema_file_path, namespace="test")
+        backend_data = (
+            {"config": config_file_path}
+            if backend == "db"
+            else {"results_file_path": results_file_path}
+        )
+        args.update(backend_data)
+        psm = PipestatManager(**args)
+        psm.set_status(record_identifier="sample1", status_identifier=status_id)
+        assert psm.get_status(record_identifier="sample1") == status_id
+
+    @pytest.mark.parametrize("backend", ["file", "db"])
+    @pytest.mark.parametrize(
+        "status_id", ["running_custom", "failed_custom", "completed_custom"]
+    )
+    def test_custom_status_schema(
+        self,
+        schema_file_path,
+        config_file_path,
+        results_file_path,
+        backend,
+        status_id,
+        custom_status_schema,
+    ):
+        """ status management works even in case it has not been configured"""
+        args = dict(
+            schema_path=schema_file_path,
+            namespace="test",
+            status_schema_path=custom_status_schema,
+        )
+        backend_data = (
+            {"config": config_file_path}
+            if backend == "db"
+            else {"results_file_path": results_file_path}
+        )
+        args.update(backend_data)
+        psm = PipestatManager(**args)
+        psm.set_status(record_identifier="sample1", status_identifier=status_id)
+        assert psm.get_status(record_identifier="sample1") == status_id

From dd9f62feb4f9f6e6b820cfa2a8ed0acdf8b1c904 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 16 Apr 2021 09:26:36 -0400
Subject: [PATCH 19/65] fix deprecated import

---
 tests/test_pipestat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py
index f8c199d6..1822c9a1 100644
--- a/tests/test_pipestat.py
+++ b/tests/test_pipestat.py
@@ -1,4 +1,4 @@
-from collections import Mapping
+from collections.abc import Mapping
 
 import pytest
 from jsonschema import ValidationError

From f12ce065c3bf01e98f932f180ce7b2acafad60b6 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 16 Apr 2021 09:33:51 -0400
Subject: [PATCH 20/65] remove prev implementation

---
 pipestat/const.py                |   21 +-
 pipestat/helpers.py              |   20 -
 pipestat/pipestat_dirver.py      | 1270 ------------------------------
 tests/data/config_no_schema.yaml |    1 +
 4 files changed, 2 insertions(+), 1310 deletions(-)
 delete mode 100644 pipestat/pipestat_dirver.py

diff --git a/pipestat/const.py b/pipestat/const.py
index 0b75a84d..09178d07 100644
--- a/pipestat/const.py
+++ b/pipestat/const.py
@@ -25,17 +25,6 @@
     STATUS_GET_CMD: "Get status.",
 }
 
-TABLE_COLS_BY_TYPE = {
-    "integer": "{} INT",
-    "number": "{} NUMERIC",
-    "string": "{} TEXT",
-    "boolean": "{} BOOLEAN",
-    "object": "{} JSONB",
-    "array": "{} TEXT[]",
-    "file": "{} JSONB",
-    "image": "{} JSONB",
-}
-
 DOC_URL = "http://pipestat.databio.org/en/latest/db_config/"
 
 # DB config keys
@@ -53,6 +42,7 @@
     CFG_PASSWORD_KEY,
     CFG_USER_KEY,
     CFG_NAME_KEY,
+    CFG_DIALECT_KEY,
 ]
 
 # object attribute names
@@ -70,7 +60,6 @@
 NAME_KEY = "_name"
 FILE_KEY = "_file"
 RECORD_ID_KEY = "_record_id"
-DB_CONNECTION_KEY = "_db_connnection"
 DB_SESSION_KEY = "_db_session"
 DB_ENGINE_KEY = "_db_engine"
 HIGHLIGHTED_KEY = "_highlighted"
@@ -86,14 +75,6 @@
 
 RESERVED_COLNAMES = [ID, RECORD_ID]
 
-FIXED_COLUMNS = [f"{ID} BIGSERIAL PRIMARY KEY", f"{RECORD_ID} TEXT UNIQUE NOT NULL"]
-
-STATUS_TABLE_COLUMNS = [
-    f"{ID} BIGSERIAL PRIMARY KEY",
-    f"{RECORD_ID} TEXT UNIQUE NOT NULL",
-    f"{STATUS} TEXT",
-]
-# f"{STATUS} {STATUS}"]  # custom type 'status'
 CANONICAL_TYPES = {
     "image": {
         "type": "object",
diff --git a/pipestat/helpers.py b/pipestat/helpers.py
index d4cda351..b24c0a0c 100644
--- a/pipestat/helpers.py
+++ b/pipestat/helpers.py
@@ -29,26 +29,6 @@ def get_status_table_schema(status_schema: Dict[str, Any]) -> Dict[str, Any]:
     return status_table_schema
 
 
-def schema_to_columns(schema):
-    """
-    Get a list of database table columns from a schema
-
-    :param dict schema: schema to parse
-    :return list[str]: columns to inial ize database table with
-    """
-    columns = []
-    for colname, col_dict in schema.items():
-        if col_dict[SCHEMA_TYPE_KEY] not in TABLE_COLS_BY_TYPE:
-            _LOGGER.warning(
-                f"'{col_dict[SCHEMA_TYPE_KEY]}' result type defined"
-                f" in schema is not supported"
-            )
-            continue
-        columns.append(TABLE_COLS_BY_TYPE[col_dict[SCHEMA_TYPE_KEY]].format(colname))
-    _LOGGER.info(f"Table columns created based on schema: {columns}")
-    return columns
-
-
 def validate_type(value, schema, strict_type=False):
     """
     Validate reported result against a partial schema, in case of failure try
diff --git a/pipestat/pipestat_dirver.py b/pipestat/pipestat_dirver.py
deleted file mode 100644
index ea9cdb42..00000000
--- a/pipestat/pipestat_dirver.py
+++ /dev/null
@@ -1,1270 +0,0 @@
-from contextlib import contextmanager
-from copy import deepcopy
-from logging import getLogger
-from typing import Any, Dict, List, Optional, Union
-
-import psycopg2
-from attmap import PathExAttMap as PXAM
-from jsonschema import validate
-from psycopg2.extensions import connection
-from psycopg2.extras import DictCursor, Json
-from ubiquerg import create_lock, remove_lock
-from yacman import YacAttMap
-
-from .const import *
-from .exceptions import *
-from .helpers import *
-
-_LOGGER = getLogger(PKG_NAME)
-
-
-class LoggingCursor(psycopg2.extras.DictCursor):
-    """
-    Logging db cursor
-    """
-
-    def execute(self, query, vars=None):
-        """
-        Execute a database operation (query or command) and issue a debug
-        and info level log messages
-
-        :param query:
-        :param vars:
-        :return:
-        """
-        _LOGGER.debug(f"Executing query: {self.mogrify(query, vars)}")
-        super(LoggingCursor, self).execute(query=query, vars=vars)
-        _LOGGER.debug(f"Executed query: {self.query}")
-
-
-class PipestatManagerDirver(dict):
-    """
-    Pipestat standardizes reporting of pipeline results and
-    pipeline status management. It formalizes a way for pipeline developers
-    and downstream tools developers to communicate -- results produced by a
-    pipeline can easily and reliably become an input for downstream analyses.
-    The object exposes API for interacting with the results and
-    pipeline status and can be backed by either a YAML-formatted file
-    or a PostgreSQL database.
-    """
-
-    def __init__(
-        self,
-        namespace: str = None,
-        record_identifier: str = None,
-        schema_path: str = None,
-        results_file_path: str = None,
-        database_only: bool = False,
-        config: Union[str, dict] = None,
-        status_schema_path: str = None,
-        flag_file_dir: str = None,
-    ):
-        """
-        Initialize the object
-
-        :param str namespace: namespace to report into. This will be the DB
-        table name if using DB as the object back-end
-        :param str record_identifier: record identifier to report for. This
-            creates a weak bound to the record, which can be overriden in
-            this object method calls
-        :param str schema_path: path to the output schema that formalizes
-            the results structure
-        :param str results_file_path: YAML file to report into, if file is
-            used as the object back-end
-        :param bool database_only: whether the reported data should not be
-            stored in the memory, but only in the database
-        :param str | dict config: path to the configuration file or a mapping
-            with the config file content
-        :param str status_schema_path: path to the status schema that formalizes
-            the status flags structure
-        """
-
-        def _check_cfg_key(cfg: dict, key: str) -> bool:
-            if key not in cfg:
-                _LOGGER.warning(f"Key '{key}' not found in config")
-                return False
-            return True
-
-        def _mk_abs_via_cfg(
-            path: Optional[str],
-            cfg_path: Optional[str],
-        ) -> Optional[str]:
-            if path is None:
-                return path
-            assert isinstance(path, str), TypeError("Path is expected to be a str")
-            if os.path.isabs(path):
-                return path
-            if cfg_path is None:
-                rel_to_cwd = os.path.join(os.getcwd(), path)
-                if os.path.exists(rel_to_cwd) or os.access(
-                    os.path.dirname(rel_to_cwd), os.W_OK
-                ):
-                    return rel_to_cwd
-                raise OSError(f"Could not make this path absolute: {path}")
-            joined = os.path.join(os.path.dirname(cfg_path), path)
-            if os.path.isabs(joined):
-                return joined
-            raise OSError(f"Could not make this path absolute: {path}")
-
-        def _select_value(
-            arg_name: str,
-            arg_value: Any,
-            cfg: dict,
-            strict: bool = True,
-            env_var: str = None,
-        ) -> Any:
-            if arg_value is not None:
-                return arg_value
-            if arg_name not in cfg or cfg[arg_name] is None:
-                if env_var is not None:
-                    arg = os.getenv(env_var, None)
-                    if arg is not None:
-                        _LOGGER.debug(f"Value '{arg}' sourced from '{env_var}' env var")
-                        return expandpath(arg)
-                if strict:
-                    raise PipestatError(
-                        f"Value for the required '{arg_name}' argument could not be"
-                        f" determined. Provide it in the config or pass to the "
-                        f"object constructor."
-                    )
-                return
-            return cfg[arg_name]
-
-        super(PipestatManagerDirver, self).__init__()
-        self[CONFIG_KEY] = YacAttMap()
-        # read config or config data
-        config = config or os.getenv(ENV_VARS["config"])
-        if config is not None:
-            if isinstance(config, str):
-                config = os.path.abspath(expandpath(config))
-                self[CONFIG_KEY] = YacAttMap(filepath=config)
-                self._config_path = config
-            elif isinstance(config, dict):
-                self[CONFIG_KEY] = YacAttMap(entries=config)
-                self._config_path = None
-            else:
-                raise TypeError(
-                    "database_config has to be either path to the "
-                    "file to read or a dict"
-                )
-            # validate config
-            cfg = self[CONFIG_KEY].to_dict(expand=True)
-            _, cfg_schema = read_yaml_data(CFG_SCHEMA, "config schema")
-            validate(cfg, cfg_schema)
-
-        self[NAME_KEY] = _select_value(
-            "namespace", namespace, self[CONFIG_KEY], env_var=ENV_VARS["namespace"]
-        )
-        self[RECORD_ID_KEY] = _select_value(
-            "record_identifier",
-            record_identifier,
-            self[CONFIG_KEY],
-            False,
-            ENV_VARS["record_identifier"],
-        )
-        self[DB_ONLY_KEY] = database_only
-        # read results schema
-        self._schema_path = _select_value(
-            "schema_path",
-            schema_path,
-            self[CONFIG_KEY],
-            False,
-            env_var=ENV_VARS["schema"],
-        )
-        if self._schema_path is not None:
-            _, self[SCHEMA_KEY] = read_yaml_data(
-                _mk_abs_via_cfg(self._schema_path, self.config_path), "schema"
-            )
-            self.validate_schema()
-            # determine the highlighted results
-            self[HIGHLIGHTED_KEY] = [
-                k
-                for k, v in self.schema.items()
-                if "highlight" in v and v["highlight"] is True
-            ]
-            if self[HIGHLIGHTED_KEY]:
-                assert isinstance(self[HIGHLIGHTED_KEY], list), TypeError(
-                    f"highlighted results specification "
-                    f"({self[HIGHLIGHTED_KEY]}) has to be a list"
-                )
-        # read status schema
-        status_schema_path = (
-            _mk_abs_via_cfg(
-                _select_value(
-                    "status_schema_path",
-                    status_schema_path,
-                    self[CONFIG_KEY],
-                    False,
-                    env_var=ENV_VARS["status_schema"],
-                ),
-                self.config_path,
-            )
-            or STATUS_SCHEMA
-        )
-        self[STATUS_SCHEMA_SOURCE_KEY], self[STATUS_SCHEMA_KEY] = read_yaml_data(
-            status_schema_path, "status schema"
-        )
-        # determine results file
-        results_file_path = _mk_abs_via_cfg(
-            _select_value(
-                "results_file_path",
-                results_file_path,
-                self[CONFIG_KEY],
-                False,
-                ENV_VARS["results_file"],
-            ),
-            self.config_path,
-        )
-        if results_file_path:
-            if self[DB_ONLY_KEY]:
-                raise ValueError(
-                    "Running in database only mode does not make "
-                    "sense with a YAML file as a backend."
-                )
-            self[FILE_KEY] = results_file_path
-            self._init_results_file()
-            flag_file_dir = _select_value(
-                "flag_file_dir", flag_file_dir, self[CONFIG_KEY], False
-            ) or os.path.dirname(self.file)
-            self[STATUS_FILE_DIR] = _mk_abs_via_cfg(flag_file_dir, self.config_path)
-        elif CFG_DATABASE_KEY in self[CONFIG_KEY]:
-            if not all(
-                [
-                    _check_cfg_key(self[CONFIG_KEY][CFG_DATABASE_KEY], key)
-                    for key in DB_CREDENTIALS
-                ]
-            ):
-                raise MissingConfigDataError(
-                    "Must specify all database login " "credentials or result_file_path"
-                )
-            self[DATA_KEY] = YacAttMap()
-            self._init_postgres_table()
-            self._init_status_table()
-        else:
-            raise MissingConfigDataError(
-                "Must specify either database login " "credentials or a YAML file path"
-            )
-
-    def __str__(self):
-        """
-        Generate string representation of the object
-
-        :return str: string representation of the object
-        """
-        res = f"{self.__class__.__name__} ({self.namespace})"
-        res += "\nBackend: {}".format(
-            f"file ({self.file})" if self.file else "PostgreSQL"
-        )
-        res += f"\nResults schema source: {self.schema_path}"
-        res += f"\nStatus schema source: {self.status_schema_source}"
-        res += f"\nRecords count: {self.record_count}"
-        if self.highlighted_results:
-            res += f"\nHighlighted results: {', '.join(self.highlighted_results)}"
-        return res
-
-    def _get_flag_file(
-        self, record_identifier: str = None
-    ) -> Union[str, List[str], None]:
-        """
-        Get path to the status flag file for the specified record
-
-        :param str record_identifier: unique record identifier
-        :return str | list[str] | None: path to the status flag file
-        """
-        from glob import glob
-
-        r_id = self._strict_record_id(record_identifier)
-        if self.file is None:
-            return
-        if self.file is not None:
-            regex = os.path.join(
-                self[STATUS_FILE_DIR], f"{self.namespace}_{r_id}_*.flag"
-            )
-            file_list = glob(regex)
-            if len(file_list) > 1:
-                _LOGGER.warning("Multiple flag files found")
-                return file_list
-            elif len(file_list) == 1:
-                return file_list[0]
-            else:
-                _LOGGER.debug("No flag files found")
-                return None
-
-    @property
-    def highlighted_results(self) -> List[str]:
-        """
-        Highlighted results
-
-        :return List[str]: a collection of highlighted results
-        """
-        return self._get_attr(HIGHLIGHTED_KEY) or []
-
-    @property
-    def record_count(self) -> int:
-        """
-        Number of records reported
-
-        :return int: number of records reported
-        """
-        return (
-            len(self.data[self.namespace])
-            if self.file
-            else self._count_rows(self.namespace)
-        )
-
-    @property
-    def namespace(self) -> str:
-        """
-        Namespace the object writes the results to
-
-        :return str: namespace the object writes the results to
-        """
-        return self._get_attr(NAME_KEY)
-
-    @property
-    def record_identifier(self) -> str:
-        """
-        Unique identifier of the record
-
-        :return str: unique identifier of the record
-        """
-        return self._get_attr(RECORD_ID_KEY)
-
-    @property
-    def schema(self) -> Dict:
-        """
-        Schema mapping
-
-        :return dict: schema that formalizes the results structure
-        """
-        return self._get_attr(SCHEMA_KEY)
-
-    @property
-    def status_schema(self) -> Dict:
-        """
-        Status schema mapping
-
-        :return dict: schema that formalizes the pipeline status structure
-        """
-        return self._get_attr(STATUS_SCHEMA_KEY)
-
-    @property
-    def status_schema_source(self) -> Dict:
-        """
-        Status schema source
-
-        :return dict: source of the schema that formalizes
-            the pipeline status structure
-        """
-        return self._get_attr(STATUS_SCHEMA_SOURCE_KEY)
-
-    @property
-    def schema_path(self) -> str:
-        """
-        Schema path
-
-        :return str: path to the provided schema
-        """
-        return self._schema_path
-
-    @property
-    def config_path(self) -> str:
-        """
-        Config path. None if the config was not provided or if provided
-        as a mapping of the config contents
-
-        :return str: path to the provided config
-        """
-        return getattr(self, "_config_path", None)
-
-    @property
-    def result_schemas(self) -> Dict:
-        """
-        Result schema mappings
-
-        :return dict: schemas that formalize the structure of each result
-            in a canonical jsonschema way
-        """
-        return self._get_attr(RES_SCHEMAS_KEY)
-
-    @property
-    def file(self) -> str:
-        """
-        File path that the object is reporting the results into
-
-        :return str: file path that the object is reporting the results into
-        """
-        return self._get_attr(FILE_KEY)
-
-    @property
-    def data(self) -> YacAttMap:
-        """
-        Data object
-
-        :return yacman.YacAttMap: the object that stores the reported data
-        """
-        return self._get_attr(DATA_KEY)
-
-    @property
-    @contextmanager
-    def db_cursor(self):
-        """
-        Establish connection and get a PostgreSQL database cursor,
-        commit and close the connection afterwards
-
-        :return LoggingCursor: Database cursor object
-        """
-        try:
-            if not self.check_connection():
-                self.establish_postgres_connection()
-            with self[DB_CONNECTION_KEY] as c, c.cursor(
-                cursor_factory=LoggingCursor
-            ) as cur:
-                yield cur
-        except Exception:
-            raise
-        finally:
-            self.close_postgres_connection()
-
-    def get_status(self, record_identifier: str = None) -> Optional[str]:
-        """
-        Get the current pipeline status
-
-        :return str: status identifier, like 'running'
-        """
-        r_id = self._strict_record_id(record_identifier)
-        if self.file is None:
-            with self.db_cursor as cur:
-                query = sql.SQL(
-                    f"SELECT {STATUS} "
-                    f"FROM {f'{self.namespace}_{STATUS}'} "
-                    f"WHERE {RECORD_ID}=%s"
-                )
-                cur.execute(query, (r_id,))
-                result = cur.fetchone()
-            return result[0] if result is not None else None
-        else:
-            flag_file = self._get_flag_file(record_identifier=r_id)
-            if flag_file is not None:
-                assert isinstance(flag_file, str), TypeError(
-                    "Flag file path is expected to be a str, were multiple flags found?"
-                )
-                with open(flag_file, "r") as f:
-                    status = f.read()
-                return status
-            _LOGGER.debug(
-                f"Could not determine status for '{r_id}' record. "
-                f"No flags found in: {self[STATUS_FILE_DIR]}"
-            )
-            return None
-
-    def _get_attr(self, attr: str) -> Any:
-        """
-        Safely get the name of the selected attribute of this object
-
-        :param str attr: attr to select
-        :return:
-        """
-        return self[attr] if attr in self else None
-
-    def _table_to_dict(self) -> None:
-        """
-        Create a dictionary from the database table data
-        """
-        with self.db_cursor as cur:
-            cur.execute(f"SELECT * FROM {self.namespace}")
-            data = cur.fetchall()
-        _LOGGER.debug(f"Reading data from database for '{self.namespace}' namespace")
-        for record in data:
-            record_id = record[RECORD_ID]
-            for res_id, val in record.items():
-                if val is not None:
-                    self._report_data_element(
-                        record_identifier=record_id, values={res_id: val}
-                    )
-
-    def _init_postgres_table(self) -> bool:
-        """
-        Initialize a PostgreSQL table based on the provided schema,
-        if it does not exist. Read the data stored in the database into the
-        memory otherwise.
-
-        :return bool: whether the table has been created
-        """
-        if self.schema is None:
-            raise SchemaNotFoundError("initialize the database table")
-        if self._check_table_exists(table_name=self.namespace):
-            _LOGGER.debug(f"Table '{self.namespace}' already exists in the database")
-            if not self[DB_ONLY_KEY]:
-                self._table_to_dict()
-            return False
-        _LOGGER.info(f"Initializing '{self.namespace}' table in '{PKG_NAME}' database")
-        columns = FIXED_COLUMNS + schema_to_columns(schema=self.schema)
-        self._create_table(table_name=self.namespace, columns=columns)
-        return True
-
-    # def _create_status_type(self):
-    #     with self.db_cursor as cur:
-    #         s = sql.SQL(f"SELECT exists (SELECT 1 FROM pg_type WHERE typname = '{STATUS}');")
-    #         cur.execute(s)
-    #         if cur.fetchone()[0]:
-    #             return
-    #     with self.db_cursor as cur:
-    #         status_strs = [f"'{st_id}'" for st_id in self.status_schema.keys()]
-    #         status_str = ", ".join(status_strs)
-    #         s = sql.SQL(f"CREATE TYPE {STATUS} as enum({status_str});")
-    #         cur.execute(s)
-
-    def _init_status_table(self):
-        status_table_name = f"{self.namespace}_{STATUS}"
-        # self._create_status_type()
-        if not self._check_table_exists(table_name=status_table_name):
-            _LOGGER.info(
-                f"Initializing '{status_table_name}' table in " f"'{PKG_NAME}' database"
-            )
-            self._create_table(status_table_name, STATUS_TABLE_COLUMNS)
-
-    def _create_table(self, table_name: str, columns: List[str]):
-        """
-        Create a table
-
-        :param str table_name: name of the table to create
-        :param str | List[str] columns: columns definition list,
-            for instance: ['name VARCHAR(50) NOT NULL']
-        """
-        columns = mk_list_of_str(columns)
-        with self.db_cursor as cur:
-            s = sql.SQL(f"CREATE TABLE {table_name} ({','.join(columns)})")
-            cur.execute(s)
-
-    def _init_results_file(self) -> bool:
-        """
-        Initialize YAML results file if it does not exist.
-        Read the data stored in the existing file into the memory otherwise.
-
-        :return bool: whether the file has been created
-        """
-        if not os.path.exists(self.file):
-            _LOGGER.info(f"Initializing results file '{self.file}'")
-            data = YacAttMap(entries={self.namespace: "{}"})
-            data.write(filepath=self.file)
-            data.make_readonly()
-            self[DATA_KEY] = data
-            return True
-        _LOGGER.debug(f"Reading data from '{self.file}'")
-        data = YacAttMap(filepath=self.file)
-        filtered = list(filter(lambda x: not x.startswith("_"), data.keys()))
-        if filtered and self.namespace not in filtered:
-            raise PipestatDatabaseError(
-                f"'{self.file}' is already used to report results for "
-                f"other namespace: {filtered[0]}"
-            )
-        self[DATA_KEY] = data
-        return False
-
-    def _check_table_exists(self, table_name: str) -> bool:
-        """
-        Check if the specified table exists
-
-        :param str table_name: table name to be checked
-        :return bool: whether the specified table exists
-        """
-        with self.db_cursor as cur:
-            cur.execute(
-                "SELECT EXISTS(SELECT * FROM information_schema.tables "
-                "WHERE table_name=%s)",
-                (table_name,),
-            )
-            return cur.fetchone()[0]
-
-    def _check_record(
-        self, condition_col: str, condition_val: str, table_name: str
-    ) -> bool:
-        """
-        Check if the record matching the condition is in the table
-
-        :param str condition_col: column to base the check on
-        :param str condition_val: value in the selected column
-        :param str table_name: name of the table ot check the record in
-        :return bool: whether any record matches the provided condition
-        """
-        with self.db_cursor as cur:
-            statement = (
-                f"SELECT EXISTS(SELECT 1 from {table_name} "
-                f"WHERE {condition_col}=%s)"
-            )
-            cur.execute(statement, (condition_val,))
-            return cur.fetchone()[0]
-
-    def _count_rows(self, table_name: str) -> int:
-        """
-        Count rows in a selected table
-
-        :param str table_name: table to count rows for
-        :return int: number of rows in the selected table
-        """
-        with self.db_cursor as cur:
-            statement = sql.SQL("SELECT COUNT(*) FROM {}").format(
-                sql.Identifier(table_name)
-            )
-            cur.execute(statement)
-            return cur.fetchall()[0][0]
-
-    def _report_postgres(
-        self, value: Dict[str, Any], record_identifier: str, table_name: str = None
-    ) -> int:
-        """
-        Check if record with this record identifier in table, create new record
-         if not (INSERT), update the record if yes (UPDATE).
-
-        Currently supports just one column at a time.
-
-        :param str record_identifier: unique identifier of the record, value to
-            in 'record_identifier' column to look for to determine if the record
-            already exists in the table
-        :param dict value: a mapping of pair of table column names and
-            respective values to be inserted to the database
-        :return int: id of the row just inserted
-        """
-        table_name = table_name or self.namespace
-        if not self._check_record(
-            condition_col=RECORD_ID,
-            condition_val=record_identifier,
-            table_name=table_name,
-        ):
-            with self.db_cursor as cur:
-                cur.execute(
-                    f"INSERT INTO {table_name} ({RECORD_ID}) VALUES (%s)",
-                    (record_identifier,),
-                )
-        # prep a list of SQL objects with column-named value placeholders
-        columns = sql.SQL(",").join(
-            [
-                sql.SQL("{}=%({})s").format(sql.Identifier(k), sql.SQL(k))
-                for k in list(value.keys())
-            ]
-        )
-        # construct the query template to execute
-        query = sql.SQL("UPDATE {n} SET {c} WHERE {id}=%({id})s RETURNING id").format(
-            n=sql.Identifier(table_name), c=columns, id=sql.SQL(RECORD_ID)
-        )
-        # preprocess the values, dict -> Json
-        values = {k: Json(v) if isinstance(v, dict) else v for k, v in value.items()}
-        # add record_identifier column, which is specified outside of values
-        values.update({RECORD_ID: record_identifier})
-        with self.db_cursor as cur:
-            cur.execute(query, values)
-            return cur.fetchone()[0]
-
-    def clear_status(
-        self, record_identifier: str = None, flag_names: List[str] = None
-    ) -> List[str]:
-        """
-        Remove status flags
-
-        :param str record_identifier: name of the record to remove flags for
-        :param Iterable[str] flag_names: Names of flags to remove, optional; if
-            unspecified, all schema-defined flag names will be used.
-        :return List[str]: Collection of names of flags removed
-        """
-        r_id = self._strict_record_id(record_identifier)
-        if self.file is not None:
-            flag_names = flag_names or list(self.status_schema.keys())
-            if isinstance(flag_names, str):
-                flag_names = [flag_names]
-            removed = []
-            for f in flag_names:
-                path_flag_file = self.get_status_flag_path(
-                    status_identifier=f, record_identifier=r_id
-                )
-                try:
-                    os.remove(path_flag_file)
-                except:
-                    pass
-                else:
-                    _LOGGER.info(f"Removed existing flag: {path_flag_file}")
-                    removed.append(f)
-            return removed
-        else:
-            removed = self.get_status(r_id)
-            status_table_name = f"{self.namespace}_{STATUS}"
-            with self.db_cursor as cur:
-                try:
-                    cur.execute(
-                        f"DELETE FROM {status_table_name} WHERE "
-                        f"{RECORD_ID}='{r_id}'"
-                    )
-                except Exception as e:
-                    _LOGGER.error(
-                        f"Could not remove the status from the "
-                        f"database. Exception: {e}"
-                    )
-                    return []
-                else:
-                    return [removed]
-
-    def get_status_flag_path(
-        self, status_identifier: str, record_identifier=None
-    ) -> str:
-        """
-        Get the path to the status file flag
-
-        :param str status_identifier: one of the defined status IDs in schema
-        :param str record_identifier: unique record ID, optional if
-            specified in the object constructor
-        :return str: absolute path to the flag file or None if object is
-            backed by a DB
-        """
-        if self.file is None:
-            # DB as the backend
-            return
-        r_id = self._strict_record_id(record_identifier)
-        return os.path.join(
-            self[STATUS_FILE_DIR], f"{self.namespace}_{r_id}_{status_identifier}.flag"
-        )
-
-    def set_status(self, status_identifier: str, record_identifier: str = None) -> None:
-        """
-        Set pipeline run status.
-
-        The status identifier needs to match one of identifiers specified in
-        the status schema. A basic, ready to use, status schema is shipped with
-         this package.
-
-        :param str status_identifier: status to set, one of statuses defined
-            in the status schema
-        :param str record_identifier: record identifier to set the
-            pipeline status for
-        """
-        r_id = self._strict_record_id(record_identifier)
-        known_status_identifiers = self.status_schema.keys()
-        if status_identifier not in known_status_identifiers:
-            raise PipestatError(
-                f"'{status_identifier}' is not a defined status identifier. "
-                f"These are allowed: {known_status_identifiers}"
-            )
-        prev_status = self.get_status(r_id)
-        if self.file is not None:
-            if prev_status:
-                prev_flag_path = self.get_status_flag_path(prev_status, r_id)
-                os.remove(prev_flag_path)
-            flag_path = self.get_status_flag_path(status_identifier, r_id)
-            create_lock(flag_path)
-            with open(flag_path, "w") as f:
-                f.write(status_identifier)
-            remove_lock(flag_path)
-        else:
-            try:
-                self._report_postgres(
-                    value={STATUS: status_identifier},
-                    record_identifier=r_id,
-                    table_name=f"{self.namespace}_{STATUS}",
-                )
-            except Exception as e:
-                _LOGGER.error(
-                    f"Could not insert into the status table. " f"Exception: {e}"
-                )
-                raise
-        if prev_status:
-            _LOGGER.debug(
-                f"Changed status from '{prev_status}' to '{status_identifier}'"
-            )
-
-    def check_result_exists(self, result_identifier, record_identifier=None):
-        """
-        Check if the result has been reported
-
-        :param str record_identifier: unique identifier of the record
-        :param str result_identifier: name of the result to check
-        :return bool: whether the specified result has been reported for the
-            indicated record in current namespace
-        """
-        record_identifier = self._strict_record_id(record_identifier)
-        return self._check_which_results_exist(
-            results=[result_identifier], rid=record_identifier
-        )
-
-    def _check_which_results_exist(
-        self, results: List[str], rid: str = None
-    ) -> List[str]:
-        """
-        Check which results have been reported
-
-        :param str rid: unique identifier of the record
-        :param List[str] results: names of the results to check
-        :return List[str]: whether the specified result has been reported for the
-            indicated record in current namespace
-        """
-        rid = self._strict_record_id(rid)
-        existing = []
-        for r in results:
-            if not self[DB_ONLY_KEY]:
-                if (
-                    self.namespace in self.data
-                    and rid in self.data[self.namespace]
-                    and r in self.data[self.namespace][rid]
-                ):
-                    existing.append(r)
-            else:
-                with self.db_cursor as cur:
-                    try:
-                        cur.execute(
-                            f"SELECT {r} FROM {self.namespace} WHERE {RECORD_ID}=%s",
-                            (rid,),
-                        )
-                    except Exception:
-                        continue
-                    else:
-                        res = cur.fetchone()
-                        if res is not None and res[0] is not None:
-                            existing.append(r)
-        return existing
-
-    def check_record_exists(self, record_identifier: str = None) -> bool:
-        """
-        Check if the record exists
-
-        :param str record_identifier: unique identifier of the record
-        :return bool: whether the record exists
-        """
-        record_identifier = self._strict_record_id(record_identifier)
-        if self[DB_ONLY_KEY]:
-            with self.db_cursor as cur:
-                cur.execute(
-                    f"SELECT exists(SELECT 1 from {self.namespace} "
-                    f"WHERE {RECORD_ID}=%s)",
-                    (record_identifier,),
-                )
-                return cur.fetchone()
-        if (
-            self.namespace in self.data
-            and record_identifier in self.data[self.namespace]
-        ):
-            return True
-        return False
-
-    def report(
-        self,
-        values: Dict[str, Any],
-        record_identifier: str = None,
-        force_overwrite: bool = False,
-        strict_type: bool = True,
-        return_id: bool = False,
-    ) -> Union[bool, int]:
-        """
-        Report a result.
-
-        :param Dict[str, any] values: dictionary of result-value pairs
-        :param str record_identifier: unique identifier of the record, value
-            in 'record_identifier' column to look for to determine if the record
-            already exists
-        :param bool force_overwrite: whether to overwrite the existing record
-        :param bool strict_type: whether the type of the reported values should
-            remain as is. Pipestat would attempt to convert to the
-            schema-defined one otherwise
-        :param bool return_id: PostgreSQL IDs of the records that have been
-            updated. Not available with results file as backend
-        :return bool | int: whether the result has been reported or the ID of
-            the updated record in the table, if requested
-        """
-        record_identifier = self._strict_record_id(record_identifier)
-        if return_id and self.file is not None:
-            raise NotImplementedError(
-                "There is no way to return the updated object ID while using "
-                "results file as the object backend"
-            )
-        if self.schema is None:
-            raise SchemaNotFoundError("report results")
-        updated_ids = False
-        result_identifiers = list(values.keys())
-        self.assert_results_defined(results=result_identifiers)
-        existing = self._check_which_results_exist(
-            rid=record_identifier, results=result_identifiers
-        )
-        if existing:
-            _LOGGER.warning(
-                f"These results exist for '{record_identifier}': {existing}"
-            )
-            if not force_overwrite:
-                return False
-            _LOGGER.info(f"Overwriting existing results: {existing}")
-        for r in result_identifiers:
-            validate_type(
-                value=values[r], schema=self.result_schemas[r], strict_type=strict_type
-            )
-        if self.file is not None:
-            self.data.make_writable()
-        if not self[DB_ONLY_KEY]:
-            self._report_data_element(
-                record_identifier=record_identifier, values=values
-            )
-        if self.file is not None:
-            self.data.write()
-            self.data.make_readonly()
-        else:
-            try:
-                updated_ids = self._report_postgres(
-                    record_identifier=record_identifier, value=values
-                )
-            except Exception as e:
-                _LOGGER.error(
-                    f"Could not insert the result into the database. " f"Exception: {e}"
-                )
-                if not self[DB_ONLY_KEY]:
-                    for r in result_identifiers:
-                        del self[DATA_KEY][self.namespace][record_identifier][r]
-                raise
-        nl = "\n"
-        rep_strs = [f"{k}: {v}" for k, v in values.items()]
-        _LOGGER.info(
-            f"Reported records for '{record_identifier}' in '{self.namespace}' "
-            f"namespace:{nl} - {(nl + ' - ').join(rep_strs)}"
-        )
-        return True if not return_id else updated_ids
-
-    def _report_data_element(
-        self, record_identifier: str, values: Dict[str, Any]
-    ) -> None:
-        """
-        Update the value of a result in a current namespace.
-
-        This method overwrites any existing data and creates the required
-         hierarchical mapping structure if needed.
-
-        :param str record_identifier: unique identifier of the record
-        :param Dict[str, Any] values: dict of results identifiers and values
-            to be reported
-        """
-        self[DATA_KEY].setdefault(self.namespace, PXAM())
-        self[DATA_KEY][self.namespace].setdefault(record_identifier, PXAM())
-        for res_id, val in values.items():
-            self[DATA_KEY][self.namespace][record_identifier][res_id] = val
-
-    def select(
-        self,
-        columns: Union[str, List[str]] = None,
-        condition: str = None,
-        condition_val: str = None,
-        offset: int = None,
-        limit: int = None,
-    ) -> List[psycopg2.extras.DictRow]:
-        """
-        Get all the contents from the selected table, possibly restricted by
-        the provided condition.
-
-        :param str | List[str] columns: columns to select
-        :param str condition: condition to restrict the results
-            with, will be appended to the end of the SELECT statement and
-            safely populated with 'condition_val',
-            for example: `"id=%s"`
-        :param list condition_val: values to fill the placeholder
-            in 'condition' with
-        :param int offset: number of records to be skipped
-        :param int limit: max number of records to be returned
-        :return List[psycopg2.extras.DictRow]: all table contents
-        """
-        if self.file:
-            raise NotImplementedError(
-                "Selection is not supported on objects backed by results files."
-                " Use 'retrieve' method instead."
-            )
-        condition, condition_val = preprocess_condition_pair(condition, condition_val)
-        if not columns:
-            columns = sql.SQL("*")
-        else:
-            columns = sql.SQL(",").join(
-                [sql.Identifier(x) for x in mk_list_of_str(columns)]
-            )
-        statement = sql.SQL("SELECT {} FROM {}").format(
-            columns, sql.Identifier(self.namespace)
-        )
-        if condition:
-            statement += sql.SQL(" WHERE ")
-            statement += condition
-        statement = paginate_query(statement, offset, limit)
-        with self.db_cursor as cur:
-            cur.execute(query=statement, vars=condition_val)
-            result = cur.fetchall()
-        return result
-
-    def retrieve(
-        self, record_identifier: str = None, result_identifier: str = None
-    ) -> Union[Any, Dict[str, Any]]:
-        """
-        Retrieve a result for a record.
-
-        If no result ID specified, results for the entire record will
-        be returned.
-
-        :param str record_identifier: unique identifier of the record
-        :param str result_identifier: name of the result to be retrieved
-        :return any | Dict[str, any]: a single result or a mapping with all the
-            results reported for the record
-        """
-        record_identifier = self._strict_record_id(record_identifier)
-        if self[DB_ONLY_KEY]:
-            if result_identifier is not None:
-                existing = self._check_which_results_exist(
-                    results=[result_identifier], rid=record_identifier
-                )
-                if not existing:
-                    raise PipestatDatabaseError(
-                        f"Result '{result_identifier}' not found for record "
-                        f"'{record_identifier}'"
-                    )
-            with self.db_cursor as cur:
-                query = sql.SQL(
-                    f"SELECT {result_identifier or '*'} "
-                    f"FROM {self.namespace} WHERE {RECORD_ID}=%s"
-                )
-                cur.execute(query, (record_identifier,))
-                result = cur.fetchall()
-            if len(result) > 0:
-                if result_identifier is None:
-                    return {k: v for k, v in dict(result[0]).items() if v is not None}
-                return dict(result[0])[result_identifier]
-            raise PipestatDatabaseError(f"Record '{record_identifier}' not found")
-        else:
-            if record_identifier not in self.data[self.namespace]:
-                raise PipestatDatabaseError(f"Record '{record_identifier}' not found")
-            if result_identifier is None:
-                return self.data[self.namespace][record_identifier].to_dict()
-            if result_identifier not in self.data[self.namespace][record_identifier]:
-                raise PipestatDatabaseError(
-                    f"Result '{result_identifier}' not found for record "
-                    f"'{record_identifier}'"
-                )
-            return self.data[self.namespace][record_identifier][result_identifier]
-
-    def remove(
-        self, record_identifier: str = None, result_identifier: str = None
-    ) -> bool:
-        """
-        Remove a result.
-
-        If no result ID specified or last result is removed, the entire record
-        will be removed.
-
-        :param str record_identifier: unique identifier of the record
-        :param str result_identifier: name of the result to be removed or None
-             if the record should be removed.
-        :return bool: whether the result has been removed
-        """
-        record_identifier = self._strict_record_id(record_identifier)
-        rm_record = True if result_identifier is None else False
-        if not self.check_record_exists(record_identifier):
-            _LOGGER.error(f"Record '{record_identifier}' not found")
-            return False
-        if result_identifier and not self.check_result_exists(
-            result_identifier, record_identifier
-        ):
-            _LOGGER.error(
-                f"'{result_identifier}' has not been reported for "
-                f"'{record_identifier}'"
-            )
-            return False
-        if self.file:
-            self.data.make_writable()
-        if not self[DB_ONLY_KEY]:
-            if rm_record:
-                _LOGGER.info(f"Removing '{record_identifier}' record")
-                del self[DATA_KEY][self.namespace][record_identifier]
-            else:
-                val_backup = self[DATA_KEY][self.namespace][record_identifier][
-                    result_identifier
-                ]
-                del self[DATA_KEY][self.namespace][record_identifier][result_identifier]
-                _LOGGER.info(
-                    f"Removed result '{result_identifier}' for record "
-                    f"'{record_identifier}' from '{self.namespace}' namespace"
-                )
-                if not self[DATA_KEY][self.namespace][record_identifier]:
-                    _LOGGER.info(
-                        f"Last result removed for '{record_identifier}'. "
-                        f"Removing the record"
-                    )
-                    del self[DATA_KEY][self.namespace][record_identifier]
-                    rm_record = True
-        if self.file:
-            self.data.write()
-            self.data.make_readonly()
-        if self.file is None:
-            if rm_record:
-                try:
-                    with self.db_cursor as cur:
-                        cur.execute(
-                            f"DELETE FROM {self.namespace} WHERE "
-                            f"{RECORD_ID}='{record_identifier}'"
-                        )
-                except Exception as e:
-                    _LOGGER.error(
-                        f"Could not remove the result from the "
-                        f"database. Exception: {e}"
-                    )
-                    self[DATA_KEY][self.namespace].setdefault(record_identifier, PXAM())
-                    raise
-                return True
-            try:
-                with self.db_cursor as cur:
-                    cur.execute(
-                        f"UPDATE {self.namespace} SET {result_identifier}=null "
-                        f"WHERE {RECORD_ID}='{record_identifier}'"
-                    )
-            except Exception as e:
-                _LOGGER.error(
-                    f"Could not remove the result from the database. " f"Exception: {e}"
-                )
-                if not self[DB_ONLY_KEY]:
-                    self[DATA_KEY][self.namespace][record_identifier][
-                        result_identifier
-                    ] = val_backup
-                raise
-        return True
-
-    def validate_schema(self) -> None:
-        """
-        Check schema for any possible issues
-
-        :raises SchemaError: if any schema format issue is detected
-        """
-
-        def _recursively_replace_custom_types(s: dict) -> Dict:
-            """
-            Replace the custom types in pipestat schema with canonical types
-
-            :param dict s: schema to replace types in
-            :return dict: schema with types replaced
-            """
-            for k, v in s.items():
-                assert SCHEMA_TYPE_KEY in v, SchemaError(
-                    f"Result '{k}' is missing '{SCHEMA_TYPE_KEY}' key"
-                )
-                if v[SCHEMA_TYPE_KEY] == "object" and SCHEMA_PROP_KEY in s[k]:
-                    _recursively_replace_custom_types(s[k][SCHEMA_PROP_KEY])
-                if v[SCHEMA_TYPE_KEY] in CANONICAL_TYPES.keys():
-                    s.setdefault(k, {})
-                    s[k].setdefault(SCHEMA_PROP_KEY, {})
-                    s[k][SCHEMA_PROP_KEY].update(
-                        CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]][SCHEMA_PROP_KEY]
-                    )
-                    s[k].setdefault("required", [])
-                    s[k]["required"].extend(
-                        CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]]["required"]
-                    )
-                    s[k][SCHEMA_TYPE_KEY] = CANONICAL_TYPES[v[SCHEMA_TYPE_KEY]][
-                        SCHEMA_TYPE_KEY
-                    ]
-            return s
-
-        schema = deepcopy(self.schema)
-        _LOGGER.debug(f"Validating input schema")
-        assert isinstance(schema, dict), SchemaError(
-            f"The schema has to be a {dict().__class__.__name__}"
-        )
-        for col_name in RESERVED_COLNAMES:
-            assert col_name not in schema.keys(), PipestatError(
-                f"'{col_name}' is an identifier reserved by pipestat"
-            )
-        self[RES_SCHEMAS_KEY] = {}
-        schema = _recursively_replace_custom_types(schema)
-        self[RES_SCHEMAS_KEY] = schema
-
-    def assert_results_defined(self, results: List[str]) -> None:
-        """
-        Assert provided list of results is defined in the schema
-
-        :param List[str] results: list of results to
-            check for existence in the schema
-        :raises SchemaError: if any of the results is not defined in the schema
-        """
-        known_results = self.result_schemas.keys()
-        for r in results:
-            assert r in known_results, SchemaError(
-                f"'{r}' is not a known result. Results defined in the "
-                f"schema are: {list(known_results)}."
-            )
-
-    def check_connection(self) -> bool:
-        """
-        Check whether a PostgreSQL connection has been established
-
-        :return bool: whether the connection has been established
-        """
-        if self.file is not None:
-            raise PipestatDatabaseError(
-                f"The {self.__class__.__name__} object " f"is not backed by a database"
-            )
-        if DB_CONNECTION_KEY in self and isinstance(
-            self[DB_CONNECTION_KEY], psycopg2.extensions.connection
-        ):
-            return True
-        return False
-
-    def establish_postgres_connection(self, suppress: bool = False) -> bool:
-        """
-        Establish PostgreSQL connection using the config data
-
-        :param bool suppress: whether to suppress any connection errors
-        :return bool: whether the connection has been established successfully
-        """
-        if self.check_connection():
-            raise PipestatDatabaseError(
-                f"Connection is already established: "
-                f"{self[DB_CONNECTION_KEY].info.host}"
-            )
-        try:
-            self[DB_CONNECTION_KEY] = psycopg2.connect(
-                dbname=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_NAME_KEY],
-                user=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_USER_KEY],
-                password=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PASSWORD_KEY],
-                host=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY],
-                port=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PORT_KEY],
-            )
-        except psycopg2.Error as e:
-            _LOGGER.error(
-                f"Could not connect to: "
-                f"{self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY]}"
-            )
-            _LOGGER.info(f"Caught error: {e}")
-            if suppress:
-                return False
-            raise
-        else:
-            _LOGGER.debug(
-                f"Established connection with PostgreSQL: "
-                f"{self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY]}"
-            )
-            return True
-
-    def close_postgres_connection(self) -> None:
-        """
-        Close connection and remove client bound
-        """
-        if not self.check_connection():
-            raise PipestatDatabaseError(
-                f"The connection has not been established: "
-                f"{self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY]}"
-            )
-        self[DB_CONNECTION_KEY].close()
-        del self[DB_CONNECTION_KEY]
-        _LOGGER.debug(
-            f"Closed connection with PostgreSQL: "
-            f"{self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY]}"
-        )
-
-    def _strict_record_id(self, forced_value: str = None) -> str:
-        """
-        Get record identifier from the outer source or stored with this object
-
-        :param str forced_value: return this value
-        :return str: record identifier
-        """
-        if forced_value is not None:
-            return forced_value
-        if self.record_identifier is not None:
-            return self.record_identifier
-        raise PipestatError(
-            f"You must provide the record identifier you want to perform "
-            f"the action on. Either in the {self.__class__.__name__} "
-            f"constructor or as an argument to the method."
-        )
diff --git a/tests/data/config_no_schema.yaml b/tests/data/config_no_schema.yaml
index a82d0c92..e9202da9 100644
--- a/tests/data/config_no_schema.yaml
+++ b/tests/data/config_no_schema.yaml
@@ -2,6 +2,7 @@ namespace: test
 record_identifier: sample1
 #schema_path: sample_output_schema.yaml #$HOME/Desktop/sample_output_schema.yaml
 database:
+  dialect: postgresql
   name: pipestat-test
   user: postgres
   password: pipestat-password

From b384c37aed04fc710f3b718f690ef73662eaef15 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 16 Apr 2021 13:54:32 -0400
Subject: [PATCH 21/65] accept a custom declarative base if needed, repr

---
 pipestat/pipestat.py | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 5b814480..b6ef451a 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -1,15 +1,15 @@
 from contextlib import contextmanager
 from copy import deepcopy
 from logging import getLogger
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Tuple, Union
 from urllib.parse import quote_plus
 
 import sqlalchemy.orm
 from attmap import PathExAttMap as PXAM
 from jsonschema import validate
-from sqlalchemy import Column, Float, ForeignKey, Integer, String, Table, create_engine
+from sqlalchemy import Column, create_engine
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import DeclarativeMeta, relationship, sessionmaker
+from sqlalchemy.orm import DeclarativeMeta, sessionmaker
 from ubiquerg import create_lock, remove_lock
 from yacman import YacAttMap
 
@@ -33,14 +33,15 @@ class PipestatManager(dict):
 
     def __init__(
         self,
-        namespace: str = None,
-        record_identifier: str = None,
-        schema_path: str = None,
-        results_file_path: str = None,
-        database_only: bool = False,
-        config: Union[str, dict] = None,
-        status_schema_path: str = None,
-        flag_file_dir: str = None,
+        namespace: Optional[str] = None,
+        record_identifier: Optional[str] = None,
+        schema_path: Optional[str] = None,
+        results_file_path: Optional[str] = None,
+        database_only: Optional[bool] = False,
+        config: Optional[Union[str, dict]] = None,
+        status_schema_path: Optional[str] = None,
+        flag_file_dir: Optional[str] = None,
+        custom_declarative_base: Optional[sqlalchemy.orm.DeclarativeMeta] = None,
     ):
         """
         Initialize the object
@@ -60,6 +61,8 @@ def __init__(
             with the config file content
         :param str status_schema_path: path to the status schema that formalizes
             the status flags structure
+        :param sqlalchemy.orm.DeclarativeMeta custom_declarative_base: a declarative base to
+            use for ORMs creation a new instance will be created if not provided
         """
 
         def _check_cfg_key(cfg: dict, key: str) -> bool:
@@ -218,16 +221,16 @@ def _select_value(
                 ]
             ):
                 raise MissingConfigDataError(
-                    "Must specify all database login " "credentials or result_file_path"
+                    "Must specify all database login credentials or result_file_path"
                 )
             self[DB_ORMS_KEY] = {}
-            self[DB_BASE_KEY] = declarative_base()
+            self[DB_BASE_KEY] = custom_declarative_base or declarative_base()
             self[DATA_KEY] = YacAttMap()
             self._init_db_table()
             self._init_status_table()
         else:
             raise MissingConfigDataError(
-                "Must specify either database login " "credentials or a YAML file path"
+                "Must specify either database login credentials or a YAML file path"
             )
 
     def __str__(self):
@@ -238,7 +241,9 @@ def __str__(self):
         """
         res = f"{self.__class__.__name__} ({self.namespace})"
         res += "\nBackend: {}".format(
-            f"file ({self.file})" if self.file else "PostgreSQL"
+            f"file ({self.file})"
+            if self.file
+            else f"Database (dialect: {self[DB_ENGINE_KEY].dialect.name})"
         )
         res += f"\nResults schema source: {self.schema_path}"
         res += f"\nStatus schema source: {self.status_schema_source}"
@@ -1054,7 +1059,9 @@ def _dynamic_filter(
         return result
 
     def retrieve(
-        self, record_identifier: str = None, result_identifier: str = None
+        self,
+        record_identifier: Optional[str] = None,
+        result_identifier: Optional[str] = None,
     ) -> Union[Any, Dict[str, Any]]:
         """
         Retrieve a result for a record.

From 420bbfbfd5b638df0aeaef980fcf7cd05c606028 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 19 Apr 2021 09:23:20 -0400
Subject: [PATCH 22/65] enable selection of custom DBAPI drivers

---
 pipestat/const.py      | 2 ++
 pipestat/pipestat.py   | 5 ++++-
 tests/data/config.yaml | 3 ++-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/pipestat/const.py b/pipestat/const.py
index 09178d07..acee68b6 100644
--- a/pipestat/const.py
+++ b/pipestat/const.py
@@ -35,6 +35,7 @@
 CFG_PASSWORD_KEY = "password"
 CFG_USER_KEY = "user"
 CFG_DIALECT_KEY = "dialect"  # sqlite, mysql, postgresql, oracle, or mssql
+CFG_DRIVER_KEY = "driver"
 
 DB_CREDENTIALS = [
     CFG_HOST_KEY,
@@ -43,6 +44,7 @@
     CFG_USER_KEY,
     CFG_NAME_KEY,
     CFG_DIALECT_KEY,
+    CFG_DRIVER_KEY,
 ]
 
 # object attribute names
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index b6ef451a..39d91417 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -387,13 +387,16 @@ def db_url(self) -> str:
                 host=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_HOST_KEY],
                 port=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_PORT_KEY],
                 dialect=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_DIALECT_KEY],
+                driver=self[CONFIG_KEY][CFG_DATABASE_KEY][CFG_DRIVER_KEY],
             )
         except (KeyError, AttributeError) as e:
             raise PipestatDatabaseError(
                 f"Could not determine database URL. Caught error: {str(e)}"
             )
         parsed_creds = {k: quote_plus(str(v)) for k, v in creds.items()}
-        return "{dialect}://{user}:{passwd}@{host}:{port}/{name}".format(**parsed_creds)
+        return "{dialect}+{driver}://{user}:{passwd}@{host}:{port}/{name}".format(
+            **parsed_creds
+        )
 
     @property
     @contextmanager
diff --git a/tests/data/config.yaml b/tests/data/config.yaml
index 1e619592..7385e630 100644
--- a/tests/data/config.yaml
+++ b/tests/data/config.yaml
@@ -3,9 +3,10 @@ record_identifier: sample1
 schema_path: sample_output_schema.yaml #$HOME/Desktop/sample_output_schema.yaml
 database:
   dialect: postgresql
+  driver: psycopg2
   name: pipestat-test
   user: postgres
   password: pipestat-password
-  host: localhost
+  host: 127.0.0.1
   port: 5432
 #results_file_path: results_file.yaml #$HOME/Desktop/results.yaml

From 4bbc6cbe3af228184902c701e986788a15b2c064 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 19 Apr 2021 10:24:51 -0400
Subject: [PATCH 23/65] make pipestat DB type agnostic

---
 pipestat/const.py    |  4 +--
 pipestat/helpers.py  | 56 ++++++++++++++++++++++++++++++++++++++
 pipestat/pipestat.py | 65 ++++++--------------------------------------
 3 files changed, 66 insertions(+), 59 deletions(-)

diff --git a/pipestat/const.py b/pipestat/const.py
index acee68b6..e72ea111 100644
--- a/pipestat/const.py
+++ b/pipestat/const.py
@@ -123,8 +123,8 @@
     "object": JSON,
     "image": JSON,
     "file": JSON,
-    "string": String,
-    "array": ARRAY(String),
+    "string": String(500),
+    "array": JSON,
     "boolean": Boolean,
 }
 
diff --git a/pipestat/helpers.py b/pipestat/helpers.py
index b24c0a0c..dbd35ef7 100644
--- a/pipestat/helpers.py
+++ b/pipestat/helpers.py
@@ -163,3 +163,59 @@ def paginate_query(query, offset, limit):
         )
         query += sql.SQL(f" LIMIT {limit}")
     return query
+
+
+from typing import Dict, List, Optional, Tuple, Union
+
+from sqlalchemy.orm import DeclarativeMeta, Query
+
+
+def dynamic_filter(
+    ORM: DeclarativeMeta,
+    query: Query,
+    filter_conditions: List[Tuple[str, str, Union[str, List[str]]]],
+):
+    """
+    Return filtered query based on condition.
+
+    :param sqlalchemy.orm.DeclarativeMeta ORM:
+    :param sqlalchemy.orm.Query query: takes query
+    :param [(key,operator,value)] filter_conditions: e.g. [("id", "eq", 1)] operator list
+        - eq for ==
+        - lt for <
+        - ge for >=
+        - in for in_
+        - like for like
+    :return: query
+    """
+    for raw in filter_conditions:
+        try:
+            key, op, value = raw
+        except ValueError:
+            raise Exception(f"Invalid filter: {raw}")
+        column = getattr(ORM, key, None)
+        if column is None:
+            raise Exception(f"Invalid filter column: {key}")
+        if op == "in":
+            if isinstance(value, list):
+                filt = column.in_(value)
+            else:
+                filt = column.in_(value.split(","))
+        else:
+            try:
+                attr = (
+                    list(
+                        filter(
+                            lambda e: hasattr(column, e % op),
+                            ["%s", "%s_", "__%s__"],
+                        )
+                    )[0]
+                    % op
+                )
+            except IndexError:
+                raise Exception(f"Invalid filter operator: {op}")
+            if value == "null":
+                value = None
+            filt = getattr(column, attr)(value)
+        query = query.filter(filt)
+    return query
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 39d91417..73bff113 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -481,7 +481,7 @@ def _auto_repr(x: Any) -> str:
         attr_dict = dict(
             __tablename__=tn,
             id=Column(Integer, primary_key=True),
-            record_identifier=Column(String, unique=True),
+            record_identifier=Column(SQL_CLASSES_BY_TYPE["string"], unique=True),
         )
         for result_id, result_metadata in schema.items():
             col_type = SQL_CLASSES_BY_TYPE[result_metadata[SCHEMA_TYPE_KEY]]
@@ -500,7 +500,6 @@ def establish_db_connection(self) -> bool:
         """
         if self.is_db_connected():
             raise PipestatDatabaseError("Connection is already established")
-
         self[DB_ENGINE_KEY] = create_engine(self.db_url, echo=True)
         self[DB_SESSION_KEY] = sessionmaker(bind=self[DB_ENGINE_KEY])
         return True
@@ -975,7 +974,9 @@ def select(
         self,
         table_name: Optional[str] = None,
         columns: Optional[List[str]] = None,
-        filter_condition: Optional[List[Tuple[str, str, Union[str, List[str]]]]] = None,
+        filter_conditions: Optional[
+            List[Tuple[str, str, Union[str, List[str]]]]
+        ] = None,
         offset: Optional[int] = None,
         limit: Optional[int] = None,
     ) -> List[Any]:
@@ -984,7 +985,7 @@ def select(
 
         :param str table_name: name of the table to SELECT from
         :param List[str] columns: columns to include in the result
-        :param [(key,operator,value)] filter_condition: e.g. [("id", "eq", 1)] operator list
+        :param [(key,operator,value)] filter_conditions: e.g. [("id", "eq", 1)] operator list
             - eq for ==
             - lt for <
             - ge for >=
@@ -994,65 +995,15 @@ def select(
         :param int limit: include this number of rows
         """
 
-        def _dynamic_filter(
-            ORM: sqlalchemy.orm.DeclarativeMeta,
-            query: sqlalchemy.orm.Query,
-            filter_condition: List[Tuple[str, str, Union[str, List[str]]]],
-        ):
-            """
-            Return filtered query based on condition.
-
-            :param sqlalchemy.orm.DeclarativeMeta ORM:
-            :param sqlalchemy.orm.Query query: takes query
-            :param [(key,operator,value)] filter_condition: e.g. [("id", "eq", 1)] operator list
-                - eq for ==
-                - lt for <
-                - ge for >=
-                - in for in_
-                - like for like
-            :return: query
-            """
-            for raw in filter_condition:
-                try:
-                    key, op, value = raw
-                except ValueError:
-                    raise Exception("Invalid filter: %s" % raw)
-                column = getattr(ORM, key, None)
-                if column is None:
-                    raise Exception("Invalid filter column: %s" % key)
-                if op == "in":
-                    if isinstance(value, list):
-                        filt = column.in_(value)
-                    else:
-                        filt = column.in_(value.split(","))
-                else:
-                    try:
-                        attr = (
-                            list(
-                                filter(
-                                    lambda e: hasattr(column, e % op),
-                                    ["%s", "%s_", "__%s__"],
-                                )
-                            )[0]
-                            % op
-                        )
-                    except IndexError:
-                        raise Exception(f"Invalid filter operator: {op}")
-                    if value == "null":
-                        value = None
-                    filt = getattr(column, attr)(value)
-                query = query.filter(filt)
-            return query
-
         ORM = self._get_orm(table_name or self.namespace)
         with self.session as s:
             if columns is not None:
                 query = s.query(*[getattr(ORM, column) for column in columns])
             else:
                 query = s.query(ORM)
-            if filter_condition is not None:
-                query = _dynamic_filter(
-                    ORM=ORM, query=query, filter_condition=filter_condition
+            if filter_conditions is not None:
+                query = dynamic_filter(
+                    ORM=ORM, query=query, filter_conditions=filter_conditions
                 )
             if isinstance(offset, int):
                 query = query.offset(offset)

From ace33a8f07b95ba61d7dd24021421d9fd2aa4d11 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Tue, 20 Apr 2021 11:10:50 -0400
Subject: [PATCH 24/65] add JSONB querying support

---
 pipestat/const.py    |  9 +++---
 pipestat/helpers.py  | 73 ++++++++++++++++++++++++++------------------
 pipestat/pipestat.py | 19 ++++++++----
 3 files changed, 62 insertions(+), 39 deletions(-)

diff --git a/pipestat/const.py b/pipestat/const.py
index e72ea111..1772e5fd 100644
--- a/pipestat/const.py
+++ b/pipestat/const.py
@@ -1,5 +1,6 @@
 import os
 
+from sqlalchemy.dialects.postgresql.json import JSONB
 from sqlalchemy.types import ARRAY, JSON, Boolean, Float, Integer, String
 
 PKG_NAME = "pipestat"
@@ -120,11 +121,11 @@
 SQL_CLASSES_BY_TYPE = {
     "number": Float,
     "integer": Integer,
-    "object": JSON,
-    "image": JSON,
-    "file": JSON,
+    "object": JSONB,
+    "image": JSONB,
+    "file": JSONB,
     "string": String(500),
-    "array": JSON,
+    "array": JSONB,
     "boolean": Boolean,
 }
 
diff --git a/pipestat/helpers.py b/pipestat/helpers.py
index dbd35ef7..e51dd49e 100644
--- a/pipestat/helpers.py
+++ b/pipestat/helpers.py
@@ -3,6 +3,7 @@
 from typing import Any, Dict
 
 import jsonschema
+import sqlalchemy.orm
 from oyaml import safe_load
 from psycopg2 import sql
 from ubiquerg import expandpath
@@ -173,8 +174,9 @@ def paginate_query(query, offset, limit):
 def dynamic_filter(
     ORM: DeclarativeMeta,
     query: Query,
-    filter_conditions: List[Tuple[str, str, Union[str, List[str]]]],
-):
+    filter_conditions: Optional[List[Tuple[str, str, Union[str, List[str]]]]] = None,
+    json_filter_conditions: Optional[List[Tuple[str, str, str]]] = None,
+) -> sqlalchemy.orm.Query:
     """
     Return filtered query based on condition.
 
@@ -186,36 +188,49 @@ def dynamic_filter(
         - ge for >=
         - in for in_
         - like for like
+    :param [(col,key,value)] json_filter_conditions: conditions for JSONB column to query.
+        Only '==' is supported e.g. [("other", "genome", "hg38")]
     :return: query
     """
-    for raw in filter_conditions:
+
+    def _unpack_tripartite(x):
         try:
-            key, op, value = raw
+            e1, e2, e3 = x
+            return e1, e2, e3
         except ValueError:
-            raise Exception(f"Invalid filter: {raw}")
-        column = getattr(ORM, key, None)
-        if column is None:
-            raise Exception(f"Invalid filter column: {key}")
-        if op == "in":
-            if isinstance(value, list):
-                filt = column.in_(value)
+            raise Exception(f"Invalid tripartite element: {x}")
+
+    if filter_conditions is not None:
+        for filter_condition in filter_conditions:
+            key, op, value = _unpack_tripartite(filter_condition)
+            column = getattr(ORM, key, None)
+            if column is None:
+                raise Exception(f"Invalid filter column: {key}")
+            if op == "in":
+                if isinstance(value, list):
+                    filt = column.in_(value)
+                else:
+                    filt = column.in_(value.split(","))
             else:
-                filt = column.in_(value.split(","))
-        else:
-            try:
-                attr = (
-                    list(
-                        filter(
-                            lambda e: hasattr(column, e % op),
-                            ["%s", "%s_", "__%s__"],
-                        )
-                    )[0]
-                    % op
-                )
-            except IndexError:
-                raise Exception(f"Invalid filter operator: {op}")
-            if value == "null":
-                value = None
-            filt = getattr(column, attr)(value)
-        query = query.filter(filt)
+                try:
+                    attr = (
+                        list(
+                            filter(
+                                lambda e: hasattr(column, e % op),
+                                ["%s", "%s_", "__%s__"],
+                            )
+                        )[0]
+                        % op
+                    )
+                except IndexError:
+                    raise Exception(f"Invalid filter operator: {op}")
+                if value == "null":
+                    value = None
+                filt = getattr(column, attr)(value)
+            query = query.filter(filt)
+
+    if json_filter_conditions is not None:
+        for json_filter_condition in json_filter_conditions:
+            col, key, value = _unpack_tripartite(json_filter_condition)
+            query = query.filter(getattr(ORM, col)[key].astext == value)
     return query
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 73bff113..79e0779f 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -977,20 +977,25 @@ def select(
         filter_conditions: Optional[
             List[Tuple[str, str, Union[str, List[str]]]]
         ] = None,
+        json_filter_conditions: Optional[List[Tuple[str, str, str]]] = None,
         offset: Optional[int] = None,
         limit: Optional[int] = None,
     ) -> List[Any]:
         """
-        Perform a SELECT on the table, filtering limited to a single condition
+        Perform a SELECT on the table
 
         :param str table_name: name of the table to SELECT from
         :param List[str] columns: columns to include in the result
-        :param [(key,operator,value)] filter_conditions: e.g. [("id", "eq", 1)] operator list
+        :param [(key,operator,value)] filter_conditions: e.g. [("id", "eq", 1)], operator list:
             - eq for ==
             - lt for <
             - ge for >=
             - in for in_
             - like for like
+        :param [(col,key,value)] json_filter_conditions: conditions for JSONB column to
+            query that include JSON column name, key withing the JSON object in that
+            column and the value to check the identity against. Therefore only '==' is
+            supported in non-nested checks, e.g. [("other", "genome", "hg38")]
         :param int offset: skip this number of rows
         :param int limit: include this number of rows
         """
@@ -1001,10 +1006,12 @@ def select(
                 query = s.query(*[getattr(ORM, column) for column in columns])
             else:
                 query = s.query(ORM)
-            if filter_conditions is not None:
-                query = dynamic_filter(
-                    ORM=ORM, query=query, filter_conditions=filter_conditions
-                )
+            query = dynamic_filter(
+                ORM=ORM,
+                query=query,
+                filter_conditions=filter_conditions,
+                json_filter_conditions=json_filter_conditions,
+            )
             if isinstance(offset, int):
                 query = query.offset(offset)
             if isinstance(limit, int):

From ca1df08fb6fb3ea24414782c4f5e7c3552594542 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Tue, 27 Apr 2021 15:15:51 -0400
Subject: [PATCH 25/65] store scoped_session as obj attr

---
 pipestat/const.py                | 1 +
 pipestat/pipestat.py             | 4 +++-
 tests/data/config_no_schema.yaml | 1 +
 tests/test_db_only_mode.py       | 4 ++--
 4 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/pipestat/const.py b/pipestat/const.py
index 1772e5fd..eac46bcd 100644
--- a/pipestat/const.py
+++ b/pipestat/const.py
@@ -64,6 +64,7 @@
 FILE_KEY = "_file"
 RECORD_ID_KEY = "_record_id"
 DB_SESSION_KEY = "_db_session"
+DB_SCOPED_SESSION_KEY = "_db_scoped_session"
 DB_ENGINE_KEY = "_db_engine"
 HIGHLIGHTED_KEY = "_highlighted"
 
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 79e0779f..8e19b635 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -9,7 +9,7 @@
 from jsonschema import validate
 from sqlalchemy import Column, create_engine
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import DeclarativeMeta, sessionmaker
+from sqlalchemy.orm import DeclarativeMeta, scoped_session, sessionmaker
 from ubiquerg import create_lock, remove_lock
 from yacman import YacAttMap
 
@@ -482,6 +482,7 @@ def _auto_repr(x: Any) -> str:
             __tablename__=tn,
             id=Column(Integer, primary_key=True),
             record_identifier=Column(SQL_CLASSES_BY_TYPE["string"], unique=True),
+            query=self[DB_SCOPED_SESSION_KEY].query_property(),
         )
         for result_id, result_metadata in schema.items():
             col_type = SQL_CLASSES_BY_TYPE[result_metadata[SCHEMA_TYPE_KEY]]
@@ -502,6 +503,7 @@ def establish_db_connection(self) -> bool:
             raise PipestatDatabaseError("Connection is already established")
         self[DB_ENGINE_KEY] = create_engine(self.db_url, echo=True)
         self[DB_SESSION_KEY] = sessionmaker(bind=self[DB_ENGINE_KEY])
+        self[DB_SCOPED_SESSION_KEY] = scoped_session(self[DB_SESSION_KEY])
         return True
 
     def is_db_connected(self) -> bool:
diff --git a/tests/data/config_no_schema.yaml b/tests/data/config_no_schema.yaml
index e9202da9..0248ec5a 100644
--- a/tests/data/config_no_schema.yaml
+++ b/tests/data/config_no_schema.yaml
@@ -3,6 +3,7 @@ record_identifier: sample1
 #schema_path: sample_output_schema.yaml #$HOME/Desktop/sample_output_schema.yaml
 database:
   dialect: postgresql
+  driver: psycopg2
   name: pipestat-test
   user: postgres
   password: pipestat-password
diff --git a/tests/test_db_only_mode.py b/tests/test_db_only_mode.py
index 660006ec..dd259f3a 100644
--- a/tests/test_db_only_mode.py
+++ b/tests/test_db_only_mode.py
@@ -25,7 +25,7 @@ def test_report(self, val, config_file_path, schema_file_path, results_file_path
         psm.report(values=val)
         assert len(psm.data) == 0
         val_name = list(val.keys())[0]
-        assert psm.select(filter_condition=[(val_name, "eq", str(val[val_name]))])
+        assert psm.select(filter_conditions=[(val_name, "eq", str(val[val_name]))])
 
     @pytest.mark.parametrize(["rec_id", "res_id"], [("sample2", "number_of_things")])
     @pytest.mark.parametrize("backend", ["db"])
@@ -45,7 +45,7 @@ def test_select_limit(
         )
         psm = PipestatManager(**args)
         result = psm.select(
-            filter_condition=[(RECORD_ID, "eq", rec_id)],
+            filter_conditions=[(RECORD_ID, "eq", rec_id)],
             columns=[res_id],
             limit=limit,
         )

From 294031e19281c8de689f9749bc7c0f7c95bf60cf Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Tue, 27 Apr 2021 15:18:58 -0400
Subject: [PATCH 26/65] reformat

---
 pipestat/cli.py        |  2 +-
 pipestat/exceptions.py | 14 +++++++-------
 tests/test_init.py     | 18 +++++++++---------
 tests/test_status.py   |  4 ++--
 4 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/pipestat/cli.py b/pipestat/cli.py
index ab52c931..73bfe4f1 100644
--- a/pipestat/cli.py
+++ b/pipestat/cli.py
@@ -12,7 +12,7 @@
 
 
 def main():
-    """ Primary workflow """
+    """Primary workflow"""
     from inspect import getdoc
 
     parser = logmuse.add_logging_options(build_argparser(getdoc(PipestatManager)))
diff --git a/pipestat/exceptions.py b/pipestat/exceptions.py
index 8d4f4522..6185aa39 100644
--- a/pipestat/exceptions.py
+++ b/pipestat/exceptions.py
@@ -16,20 +16,20 @@
 
 
 class PipestatError(Exception):
-    """ Base exception type for this package """
+    """Base exception type for this package"""
 
     __metaclass__ = abc.ABCMeta
 
 
 class SchemaError(PipestatError):
-    """ Schema error """
+    """Schema error"""
 
     def __init__(self, msg):
         super(SchemaError, self).__init__(msg)
 
 
 class SchemaNotFoundError(SchemaError):
-    """ Schema not found error """
+    """Schema not found error"""
 
     def __init__(self, msg):
         txt = (
@@ -40,7 +40,7 @@ def __init__(self, msg):
 
 
 class MissingConfigDataError(PipestatError):
-    """ Exception for invalid config file. """
+    """Exception for invalid config file."""
 
     def __init__(self, msg):
         spacing = " " if msg[-1] in ["?", ".", "\n"] else "; "
@@ -49,14 +49,14 @@ def __init__(self, msg):
 
 
 class PipestatDatabaseError(PipestatError):
-    """ Database error """
+    """Database error"""
 
     def __init__(self, msg):
         super(PipestatDatabaseError, self).__init__(msg)
 
 
 class InvalidTypeError(PipestatError):
-    """ Type of the reported value is not supported """
+    """Type of the reported value is not supported"""
 
     def __init__(self, type):
         super(InvalidTypeError, self).__init__(
@@ -66,7 +66,7 @@ def __init__(self, type):
 
 
 class IncompatibleClassError(PipestatError):
-    """ Class  of the reported value is not supported """
+    """Class  of the reported value is not supported"""
 
     def __init__(self, cls, req_cls, type):
         super(IncompatibleClassError, self).__init__(
diff --git a/tests/test_init.py b/tests/test_init.py
index 1490a78a..e614c52e 100644
--- a/tests/test_init.py
+++ b/tests/test_init.py
@@ -31,7 +31,7 @@ def test_connection_overwrite_error(self, config_file_path, schema_file_path):
 
 class TestPipestatManagerInstantiation:
     def test_obj_creation_file(self, schema_file_path, results_file_path):
-        """ Object constructor works with file as backend"""
+        """Object constructor works with file as backend"""
         assert isinstance(
             PipestatManager(
                 namespace="test",
@@ -42,7 +42,7 @@ def test_obj_creation_file(self, schema_file_path, results_file_path):
         )
 
     def test_obj_creation_db(self, config_file_path):
-        """ Object constructor works with database as backend"""
+        """Object constructor works with database as backend"""
         assert isinstance(PipestatManager(config=config_file_path), PipestatManager)
 
     @pytest.mark.xfail(reason="schema is no longer required to init the object")
@@ -75,7 +75,7 @@ def test_schema_recursive_custom_type_conversion(
         )
 
     def test_missing_cfg_data(self, schema_file_path):
-        """ Object constructor raises exception if cfg is missing data """
+        """Object constructor raises exception if cfg is missing data"""
         tmp_pth = os.path.join(mkdtemp(), "res.yml")
         with open(tmp_pth, "w") as file:
             dump({"database": {"host": "localhost"}}, file)
@@ -85,12 +85,12 @@ def test_missing_cfg_data(self, schema_file_path):
             )
 
     def test_unknown_backend(self, schema_file_path):
-        """ Either db config or results file path needs to be provided """
+        """Either db config or results file path needs to be provided"""
         with pytest.raises(MissingConfigDataError):
             PipestatManager(namespace="test", schema_path=schema_file_path)
 
     def test_create_results_file(self, schema_file_path):
-        """ Results file is created if a nonexistent path provided """
+        """Results file is created if a nonexistent path provided"""
         tmp_res_file = os.path.join(mkdtemp(), "res.yml")
         print(f"Temporary results file: {tmp_res_file}")
         assert not os.path.exists(tmp_res_file)
@@ -102,7 +102,7 @@ def test_create_results_file(self, schema_file_path):
         assert os.path.exists(tmp_res_file)
 
     def test_use_other_namespace_file(self, schema_file_path):
-        """ Results file can be used with just one namespace """
+        """Results file can be used with just one namespace"""
         tmp_res_file = os.path.join(mkdtemp(), "res.yml")
         print(f"Temporary results file: {tmp_res_file}")
         assert not os.path.exists(tmp_res_file)
@@ -121,14 +121,14 @@ def test_use_other_namespace_file(self, schema_file_path):
 
     @pytest.mark.parametrize("pth", [["/$HOME/path.yaml"], 1])
     def test_wrong_class_results_file(self, schema_file_path, pth):
-        """ Input string that is not a file path raises an informative error """
+        """Input string that is not a file path raises an informative error"""
         with pytest.raises((TypeError, AssertionError)):
             PipestatManager(
                 namespace="test", results_file_path=pth, schema_path=schema_file_path
             )
 
     def test_results_file_contents_loaded(self, results_file_path, schema_file_path):
-        """ Contents of the results file are present after loading """
+        """Contents of the results file are present after loading"""
         psm = PipestatManager(
             namespace="test",
             results_file_path=results_file_path,
@@ -137,7 +137,7 @@ def test_results_file_contents_loaded(self, results_file_path, schema_file_path)
         assert "test" in psm.data
 
     def test_str_representation(self, results_file_path, schema_file_path):
-        """ Test string representation identifies number of records """
+        """Test string representation identifies number of records"""
         psm = PipestatManager(
             namespace="test",
             results_file_path=results_file_path,
diff --git a/tests/test_status.py b/tests/test_status.py
index a8addf6f..58fd3f86 100644
--- a/tests/test_status.py
+++ b/tests/test_status.py
@@ -20,7 +20,7 @@ def test_status_file_defult_location(self, schema_file_path, results_file_path):
     def test_status_not_configured(
         self, schema_file_path, config_file_path, results_file_path, backend, status_id
     ):
-        """ status management works even in case it has not been configured"""
+        """status management works even in case it has not been configured"""
         args = dict(schema_path=schema_file_path, namespace="test")
         backend_data = (
             {"config": config_file_path}
@@ -45,7 +45,7 @@ def test_custom_status_schema(
         status_id,
         custom_status_schema,
     ):
-        """ status management works even in case it has not been configured"""
+        """status management works even in case it has not been configured"""
         args = dict(
             schema_path=schema_file_path,
             namespace="test",

From f3354acc1bf5b8b76cbc5a1c1dd0dfab1df83c11 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Thu, 29 Apr 2021 08:31:33 -0400
Subject: [PATCH 27/65] add documentation to the columns

---
 pipestat/pipestat.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 8e19b635..44887790 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -481,13 +481,19 @@ def _auto_repr(x: Any) -> str:
         attr_dict = dict(
             __tablename__=tn,
             id=Column(Integer, primary_key=True),
-            record_identifier=Column(SQL_CLASSES_BY_TYPE["string"], unique=True),
+            record_identifier=Column(
+                SQL_CLASSES_BY_TYPE["string"],
+                unique=True,
+                doc="A unique identifier of the record",
+            ),
             query=self[DB_SCOPED_SESSION_KEY].query_property(),
         )
         for result_id, result_metadata in schema.items():
             col_type = SQL_CLASSES_BY_TYPE[result_metadata[SCHEMA_TYPE_KEY]]
             _LOGGER.debug(f"Adding object: {result_id} of type: {str(col_type)}")
-            attr_dict.update({result_id: Column(col_type)})
+            attr_dict.update(
+                {result_id: Column(col_type, doc=result_metadata["description"])}
+            )
         attr_dict.update({"__repr__": _auto_repr})
         _LOGGER.debug(f"Creating '{tn}' ORM with args: {attr_dict}")
         self[DB_ORMS_KEY][tn] = type(tn.capitalize(), (self[DB_BASE_KEY],), attr_dict)

From 8291db023ccff5d2ce3a451e7bde4604668430ac Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Thu, 29 Apr 2021 09:15:22 -0400
Subject: [PATCH 28/65] require description in results schema

---
 pipestat/const.py    | 1 +
 pipestat/pipestat.py | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/pipestat/const.py b/pipestat/const.py
index eac46bcd..c1e105fe 100644
--- a/pipestat/const.py
+++ b/pipestat/const.py
@@ -71,6 +71,7 @@
 # schema keys
 SCHEMA_PROP_KEY = "properties"
 SCHEMA_TYPE_KEY = "type"
+SCHEMA_DESC_KEY = "description"
 
 # DB column names
 ID = "id"
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 44887790..81597426 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -724,6 +724,9 @@ def _recursively_replace_custom_types(s: dict) -> Dict:
                 assert SCHEMA_TYPE_KEY in v, SchemaError(
                     f"Result '{k}' is missing '{SCHEMA_TYPE_KEY}' key"
                 )
+                assert SCHEMA_DESC_KEY in v, SchemaError(
+                    f"Result '{k}' is missing '{SCHEMA_DESC_KEY}' key"
+                )
                 if v[SCHEMA_TYPE_KEY] == "object" and SCHEMA_PROP_KEY in s[k]:
                     _recursively_replace_custom_types(s[k][SCHEMA_PROP_KEY])
                 if v[SCHEMA_TYPE_KEY] in CANONICAL_TYPES.keys():

From 821da6009d922870375268d0049919a0bbae669b Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 30 Apr 2021 13:50:45 -0400
Subject: [PATCH 29/65] move/update log

---
 pipestat/pipestat.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 81597426..a6def569 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -477,6 +477,9 @@ def _auto_repr(x: Any) -> str:
             ]
             return "<{}: {}>".format(x.__class__.__name__, ", ".join(attr_strs))
 
+        _LOGGER.info(
+            f"Creating models for '{self.namespace}' table in '{PKG_NAME}' database"
+        )
         tn = table_name or self.namespace
         attr_dict = dict(
             __tablename__=tn,
@@ -794,7 +797,6 @@ def _init_db_table(self) -> bool:
             raise SchemaNotFoundError("initialize the database table")
         if not self.is_db_connected():
             self.establish_db_connection()
-        _LOGGER.info(f"Initializing '{self.namespace}' table in '{PKG_NAME}' database")
         self._create_table_orm(table_name=self.namespace, schema=self.result_schemas)
         if not self[DB_ONLY_KEY]:
             self._table_to_dict()

From 45d1cd27851f9c59e33a2aecaa8f6750cb9a51d1 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Tue, 4 May 2021 17:08:41 -0400
Subject: [PATCH 30/65] dont show DB engine logs by default

---
 pipestat/__init__.py | 2 +-
 pipestat/pipestat.py | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pipestat/__init__.py b/pipestat/__init__.py
index 02cdabbb..57d036ff 100644
--- a/pipestat/__init__.py
+++ b/pipestat/__init__.py
@@ -9,4 +9,4 @@
 __classes__ = ["PipestatManager"]
 __all__ = __classes__
 
-logmuse.init_logger("pipestat")
+logmuse.init_logger(PKG_NAME)
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index a6def569..959e5df9 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -42,6 +42,7 @@ def __init__(
         status_schema_path: Optional[str] = None,
         flag_file_dir: Optional[str] = None,
         custom_declarative_base: Optional[sqlalchemy.orm.DeclarativeMeta] = None,
+        show_db_logs: bool = False,
     ):
         """
         Initialize the object
@@ -226,6 +227,7 @@ def _select_value(
             self[DB_ORMS_KEY] = {}
             self[DB_BASE_KEY] = custom_declarative_base or declarative_base()
             self[DATA_KEY] = YacAttMap()
+            self._show_db_logs = show_db_logs
             self._init_db_table()
             self._init_status_table()
         else:
@@ -477,7 +479,7 @@ def _auto_repr(x: Any) -> str:
             ]
             return "<{}: {}>".format(x.__class__.__name__, ", ".join(attr_strs))
 
-        _LOGGER.info(
+        _LOGGER.debug(
             f"Creating models for '{self.namespace}' table in '{PKG_NAME}' database"
         )
         tn = table_name or self.namespace
@@ -510,7 +512,7 @@ def establish_db_connection(self) -> bool:
         """
         if self.is_db_connected():
             raise PipestatDatabaseError("Connection is already established")
-        self[DB_ENGINE_KEY] = create_engine(self.db_url, echo=True)
+        self[DB_ENGINE_KEY] = create_engine(self.db_url, echo=self._show_db_logs)
         self[DB_SESSION_KEY] = sessionmaker(bind=self[DB_ENGINE_KEY])
         self[DB_SCOPED_SESSION_KEY] = scoped_session(self[DB_SESSION_KEY])
         return True

From b69c0a138321bb428ef100ad60b86b60aed0198c Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Wed, 5 May 2021 13:56:40 -0400
Subject: [PATCH 31/65] dont store data in memory be default when using DB

---
 pipestat/pipestat.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 959e5df9..6a9cf7f2 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -37,7 +37,7 @@ def __init__(
         record_identifier: Optional[str] = None,
         schema_path: Optional[str] = None,
         results_file_path: Optional[str] = None,
-        database_only: Optional[bool] = False,
+        database_only: Optional[bool] = True,
         config: Optional[Union[str, dict]] = None,
         status_schema_path: Optional[str] = None,
         flag_file_dir: Optional[str] = None,
@@ -203,11 +203,6 @@ def _select_value(
             self.config_path,
         )
         if results_file_path:
-            if self[DB_ONLY_KEY]:
-                raise ValueError(
-                    "Running in database only mode does not make "
-                    "sense with a YAML file as a backend."
-                )
             self[FILE_KEY] = results_file_path
             self._init_results_file()
             flag_file_dir = _select_value(
@@ -1172,12 +1167,13 @@ def report(
             rid=record_identifier, results=result_identifiers
         )
         if existing:
+            existing_str = ", ".join(existing)
             _LOGGER.warning(
-                f"These results exist for '{record_identifier}': {existing}"
+                f"These results exist for '{record_identifier}': {existing_str}"
             )
             if not force_overwrite:
                 return False
-            _LOGGER.info(f"Overwriting existing results: {existing}")
+            _LOGGER.info(f"Overwriting existing results: {existing_str}")
         for r in result_identifiers:
             validate_type(
                 value=values[r], schema=self.result_schemas[r], strict_type=strict_type

From fbdd3003c151fde8a62e1c70f247b1289f2d6427 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Wed, 5 May 2021 14:03:34 -0400
Subject: [PATCH 32/65] add descriptions to test properties

---
 tests/data/sample_output_schema_recursive.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/data/sample_output_schema_recursive.yaml b/tests/data/sample_output_schema_recursive.yaml
index f398275d..74a281c8 100644
--- a/tests/data/sample_output_schema_recursive.yaml
+++ b/tests/data/sample_output_schema_recursive.yaml
@@ -7,8 +7,10 @@ output_file_in_object:
   type: object
   properties:
     prop1:
+      description: "This is an example file"
       type: file
     prop2:
+      description: "This is an example image"
       type: image
   description: "Object output"
 output_file:
@@ -16,4 +18,4 @@ output_file:
   description: "This a path to the output file"
 output_image:
   type: image
-  description: "This a path to the output image"
\ No newline at end of file
+  description: "This a path to the output image"

From 0b11661245c08f5fc759ff62b7d95d7af2b6f83a Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Wed, 5 May 2021 14:08:02 -0400
Subject: [PATCH 33/65] update tests after chaging mem data behavior

---
 tests/test_pipestat.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py
index 1822c9a1..3bfb272a 100644
--- a/tests/test_pipestat.py
+++ b/tests/test_pipestat.py
@@ -48,7 +48,7 @@ def test_report_basic(
         results_file_path,
         backend,
     ):
-        args = dict(schema_path=schema_file_path, namespace="test")
+        args = dict(schema_path=schema_file_path, namespace="test", database_only=False)
         backend_data = (
             {"config": config_file_path}
             if backend == "db"
@@ -115,7 +115,7 @@ def test_report_overwrite(
         results_file_path,
         backend,
     ):
-        args = dict(schema_path=schema_file_path, namespace="test")
+        args = dict(schema_path=schema_file_path, namespace="test", database_only=False)
         backend_data = (
             {"config": config_file_path}
             if backend == "db"
@@ -263,7 +263,7 @@ def test_remove_basic(
         schema_file_path,
         backend,
     ):
-        args = dict(schema_path=schema_file_path, namespace="test")
+        args = dict(schema_path=schema_file_path, namespace="test", database_only=False)
         backend_data = (
             {"config": config_file_path}
             if backend == "db"
@@ -279,7 +279,7 @@ def test_remove_basic(
     def test_remove_record(
         self, rec_id, schema_file_path, config_file_path, results_file_path, backend
     ):
-        args = dict(schema_path=schema_file_path, namespace="test")
+        args = dict(schema_path=schema_file_path, namespace="test", database_only=False)
         backend_data = (
             {"config": config_file_path}
             if backend == "db"
@@ -339,7 +339,7 @@ def test_last_result_removal_removes_record(
         results_file_path,
         backend,
     ):
-        args = dict(schema_path=schema_file_path, namespace="test")
+        args = dict(schema_path=schema_file_path, namespace="test", database_only=False)
         backend_data = (
             {"config": config_file_path}
             if backend == "db"
@@ -366,7 +366,10 @@ def test_report(
     ):
         REC_ID = "constant_record_id"
         args = dict(
-            schema_path=schema_file_path, namespace="test", record_identifier=REC_ID
+            schema_path=schema_file_path,
+            namespace="test",
+            record_identifier=REC_ID,
+            database_only=False,
         )
         backend_data = (
             {"config": config_file_path}

From 5e33a1313d40716d729e210bd751cae341064eef Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 7 May 2021 10:17:38 -0400
Subject: [PATCH 34/65] always save results in memory in case of file backend

---
 pipestat/pipestat.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 6a9cf7f2..7912488f 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -203,6 +203,12 @@ def _select_value(
             self.config_path,
         )
         if results_file_path:
+            if self[DB_ONLY_KEY]:
+                _LOGGER.warning(
+                    "Running in database only mode does not make sense with a YAML file as a backend. "
+                    "Changing back to using memory."
+                )
+                self[DB_ONLY_KEY] = not self[DB_ONLY_KEY]
             self[FILE_KEY] = results_file_path
             self._init_results_file()
             flag_file_dir = _select_value(

From aadc4211e4e6c194ddbdaeebd48c5f057c90ad3c Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 10 May 2021 10:53:25 -0400
Subject: [PATCH 35/65] add piestat configuration docs

---
 docs/configuration.md | 17 +++++++++++++++++
 pipestat/pipestat.py  |  2 +-
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 docs/configuration.md

diff --git a/docs/configuration.md b/docs/configuration.md
new file mode 100644
index 00000000..0ac6460f
--- /dev/null
+++ b/docs/configuration.md
@@ -0,0 +1,17 @@
+# Pipestat configuration
+
+Pipestat *requires* a few pieces of information to run:
+
+- a **namespace** to write into, for example the name of the pipeline
+- a path to the **schema** file that describes results that can be reported
+- **backend info**: either path to a YAML-formatted file or pipestat config with PostgreSQL database login credentials
+
+Apart from that, there are many other *optional* configuration points that have defaults. Please refer to the [environment variables reference](http://pipestat.databio.org/en/dev/env_vars/) to learn about the the optional configuration options and their meaning.
+
+## Configuration sources
+
+Pipestat configuration can come from 3 sources, with the following priority:
+
+1. `PipestatManager` constructor
+2. Pipestat configuration file
+3. Environment variables
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 7912488f..be23e0fe 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -204,7 +204,7 @@ def _select_value(
         )
         if results_file_path:
             if self[DB_ONLY_KEY]:
-                _LOGGER.warning(
+                _LOGGER.debug(
                     "Running in database only mode does not make sense with a YAML file as a backend. "
                     "Changing back to using memory."
                 )

From de5ab704da0d984ee2d146394c98dc79c14dc13a Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 10 May 2021 13:18:51 -0400
Subject: [PATCH 36/65] remove obsolete code, add tests, adjust exception class

---
 docs_jupyter/cli.ipynb        | 35 +++++++--------
 docs_jupyter/python_api.ipynb | 20 ++++-----
 mkdocs.yml                    |  5 ++-
 pipestat/helpers.py           | 81 +++--------------------------------
 tests/test_db_only_mode.py    | 44 +++++++++++++++++++
 5 files changed, 79 insertions(+), 106 deletions(-)

diff --git a/docs_jupyter/cli.ipynb b/docs_jupyter/cli.ipynb
index 914ace6f..50115062 100644
--- a/docs_jupyter/cli.ipynb
+++ b/docs_jupyter/cli.ipynb
@@ -51,7 +51,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "version: 0.0.3\n",
+      "version: 0.1.0-dev\n",
       "usage: pipestat [-h] [--version] [--silent] [--verbosity V] [--logdev]\n",
       "                {report,inspect,remove,retrieve,status} ...\n",
       "\n",
@@ -77,7 +77,7 @@
       "developers to communicate -- results produced by a pipeline can easily and\n",
       "reliably become an input for downstream analyses. The object exposes API for\n",
       "interacting with the results and pipeline status and can be backed by either a\n",
-      "YAML-formatted file or a PostgreSQL database.\n"
+      "YAML-formatted file or a database.\n"
      ]
     }
    ],
@@ -116,8 +116,7 @@
       "                               will be used. Currently set to:\n",
       "                               ../tests/data/sample_output_schema.yaml\n",
       "  --status-schema ST           Path to the status schema. Default will be used if not\n",
-      "                               provided: /Library/Frameworks/Python.framework/Versions/3.6\n",
-      "                               /lib/python3.6/site-\n",
+      "                               provided: /opt/homebrew/lib/python3.9/site-\n",
       "                               packages/pipestat/schemas/status_schema.yaml\n",
       "  --flag-dir FD                Path to the flag directory in case YAML file is the\n",
       "                               pipestat backend.\n",
@@ -169,8 +168,7 @@
       "                               will be used. Currently set to:\n",
       "                               ../tests/data/sample_output_schema.yaml\n",
       "  --status-schema ST           Path to the status schema. Default will be used if not\n",
-      "                               provided: /Library/Frameworks/Python.framework/Versions/3.6\n",
-      "                               /lib/python3.6/site-\n",
+      "                               provided: /opt/homebrew/lib/python3.9/site-\n",
       "                               packages/pipestat/schemas/status_schema.yaml\n",
       "  --flag-dir FD                Path to the flag directory in case YAML file is the\n",
       "                               pipestat backend.\n",
@@ -217,8 +215,7 @@
       "                               will be used. Currently set to:\n",
       "                               ../tests/data/sample_output_schema.yaml\n",
       "  --status-schema ST           Path to the status schema. Default will be used if not\n",
-      "                               provided: /Library/Frameworks/Python.framework/Versions/3.6\n",
-      "                               /lib/python3.6/site-\n",
+      "                               provided: /opt/homebrew/lib/python3.9/site-\n",
       "                               packages/pipestat/schemas/status_schema.yaml\n",
       "  --flag-dir FD                Path to the flag directory in case YAML file is the\n",
       "                               pipestat backend.\n",
@@ -265,8 +262,8 @@
       "                          be used. Currently set to:\n",
       "                          ../tests/data/sample_output_schema.yaml\n",
       "  --status-schema ST      Path to the status schema. Default will be used if not provided:\n",
-      "                          /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/\n",
-      "                          site-packages/pipestat/schemas/status_schema.yaml\n",
+      "                          /opt/homebrew/lib/python3.9/site-\n",
+      "                          packages/pipestat/schemas/status_schema.yaml\n",
       "  --flag-dir FD           Path to the flag directory in case YAML file is the pipestat\n",
       "                          backend.\n",
       "  -d, --data              Whether to display the data\n"
@@ -335,8 +332,7 @@
       "                               will be used. Currently set to:\n",
       "                               ../tests/data/sample_output_schema.yaml\n",
       "  --status-schema ST           Path to the status schema. Default will be used if not\n",
-      "                               provided: /Library/Frameworks/Python.framework/Versions/3.6\n",
-      "                               /lib/python3.6/site-\n",
+      "                               provided: /opt/homebrew/lib/python3.9/site-\n",
       "                               packages/pipestat/schemas/status_schema.yaml\n",
       "  --flag-dir FD                Path to the flag directory in case YAML file is the\n",
       "                               pipestat backend.\n",
@@ -382,8 +378,7 @@
       "                               will be used. Currently set to:\n",
       "                               ../tests/data/sample_output_schema.yaml\n",
       "  --status-schema ST           Path to the status schema. Default will be used if not\n",
-      "                               provided: /Library/Frameworks/Python.framework/Versions/3.6\n",
-      "                               /lib/python3.6/site-\n",
+      "                               provided: /opt/homebrew/lib/python3.9/site-\n",
       "                               packages/pipestat/schemas/status_schema.yaml\n",
       "  --flag-dir FD                Path to the flag directory in case YAML file is the\n",
       "                               pipestat backend.\n",
@@ -518,9 +513,9 @@
       "\n",
       "\n",
       "PipestatManager (test)\n",
-      "Backend: file (/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.Zid7BMd1)\n",
+      "Backend: file (/var/folders/z_/bkm4rp412zjctjm28b54tbvr0000gn/T/tmp.h5YnTVAA)\n",
       "Results schema source: ../tests/data/sample_output_schema.yaml\n",
-      "Status schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pipestat/schemas/status_schema.yaml\n",
+      "Status schema source: /opt/homebrew/lib/python3.9/site-packages/pipestat/schemas/status_schema.yaml\n",
       "Records count: 1\n"
      ]
     }
@@ -548,9 +543,9 @@
       "\n",
       "\n",
       "PipestatManager (test)\n",
-      "Backend: file (/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.Zid7BMd1)\n",
+      "Backend: file (/var/folders/z_/bkm4rp412zjctjm28b54tbvr0000gn/T/tmp.h5YnTVAA)\n",
       "Results schema source: ../tests/data/sample_output_schema.yaml\n",
-      "Status schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pipestat/schemas/status_schema.yaml\n",
+      "Status schema source: /opt/homebrew/lib/python3.9/site-packages/pipestat/schemas/status_schema.yaml\n",
       "Records count: 1\n",
       "\n",
       "Data:\n",
@@ -655,9 +650,9 @@
       "\n",
       "\n",
       "PipestatManager (test)\n",
-      "Backend: file (/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.Zid7BMd1)\n",
+      "Backend: file (/var/folders/z_/bkm4rp412zjctjm28b54tbvr0000gn/T/tmp.h5YnTVAA)\n",
       "Results schema source: ../tests/data/sample_output_schema.yaml\n",
-      "Status schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pipestat/schemas/status_schema.yaml\n",
+      "Status schema source: /opt/homebrew/lib/python3.9/site-packages/pipestat/schemas/status_schema.yaml\n",
       "Records count: 1\n",
       "\n",
       "Data:\n",
diff --git a/docs_jupyter/python_api.ipynb b/docs_jupyter/python_api.ipynb
index aded8c5d..85d972a4 100644
--- a/docs_jupyter/python_api.ipynb
+++ b/docs_jupyter/python_api.ipynb
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -51,14 +51,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmplgf49zfa.yaml\n"
+      "/var/folders/z_/bkm4rp412zjctjm28b54tbvr0000gn/T/tmpylsi8onv.yaml\n"
      ]
     }
    ],
@@ -78,7 +78,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -380,7 +380,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "These results exist for 'sample1': ['output_file']\n"
+      "These results exist for 'sample1': output_file\n"
      ]
     },
     {
@@ -414,8 +414,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "These results exist for 'sample1': ['output_file']\n",
-      "Overwriting existing results: ['output_file']\n",
+      "These results exist for 'sample1': output_file\n",
+      "Overwriting existing results: output_file\n",
       "Reported records for 'sample1' in 'test' namespace:\n",
       " - output_file: {'path': '/home/user/path_new.csv', 'title': 'new CSV file with some data'}\n"
      ]
@@ -509,7 +509,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp9cozi7pc.yaml\n",
+      "/var/folders/z_/bkm4rp412zjctjm28b54tbvr0000gn/T/tmpylsi8onv.yaml\n",
       "test:\n",
       "  sample1:\n",
       "    output_file:\n",
@@ -829,7 +829,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmpsfzryntz.yaml\n"
+      "/var/folders/z_/bkm4rp412zjctjm28b54tbvr0000gn/T/tmplfz72deu.yaml\n"
      ]
     }
    ],
@@ -1036,7 +1036,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmpxwy566e3.yaml\n"
+      "/var/folders/z_/bkm4rp412zjctjm28b54tbvr0000gn/T/tmp8q7552ae.yaml\n"
      ]
     }
    ],
diff --git a/mkdocs.yml b/mkdocs.yml
index c76d9ec4..b4ebeaf7 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -5,12 +5,13 @@ repo_url: http://github.com/pepkit/pipestat
 pypi_name: pipestat
 
 nav:
-  - Getting Started: 
+  - Getting Started:
     - Introduction: README.md
     - Install and configure: install.md
   - How-to guides:
     - Use Python API: python_api.md
     - Use command line interface: cli.md
+    - Configure pipestat: configuration.md
   - Reference:
     - Configuration format : config.md
     - Environment variables: env_vars.md
@@ -29,4 +30,4 @@ plugins:
     autodoc_build: "docs/autodoc_build"
     autodoc_package: "pipestat"
     no_top_level: true
-- search
\ No newline at end of file
+- search
diff --git a/pipestat/helpers.py b/pipestat/helpers.py
index e51dd49e..71f60a30 100644
--- a/pipestat/helpers.py
+++ b/pipestat/helpers.py
@@ -1,11 +1,12 @@
 import logging
 from re import findall
-from typing import Any, Dict
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import jsonschema
 import sqlalchemy.orm
 from oyaml import safe_load
 from psycopg2 import sql
+from sqlalchemy.orm import DeclarativeMeta, Query
 from ubiquerg import expandpath
 
 from .const import *
@@ -101,76 +102,6 @@ def mk_list_of_str(x):
     )
 
 
-def preprocess_condition_pair(condition, condition_val):
-    """
-    Preprocess query condition and values to ensure sanity and compatibility
-
-    :param str condition: condition string
-    :param tuple condition_val: values to populate condition string with
-    :return (psycopg2.sql.SQL, tuple): condition pair
-    """
-
-    def _check_semicolon(x):
-        """
-        recursively check for semicolons in an object
-
-        :param aby x: object to inspect
-        :raises ValueError: if semicolon detected
-        """
-        if isinstance(x, str):
-            assert ";" not in x, ValueError(
-                f"semicolons are not permitted in condition values: '{str(x)}'"
-            )
-        if isinstance(x, list):
-            list(map(lambda v: _check_semicolon(v), x))
-
-    if condition:
-        if not isinstance(condition, str):
-            raise TypeError("Condition has to be a string")
-        else:
-            _check_semicolon(condition)
-            placeholders = findall("%s", condition)
-            condition = sql.SQL(condition)
-        if not condition_val:
-            raise ValueError("condition provided but condition_val missing")
-        assert isinstance(condition_val, list), TypeError(
-            "condition_val has to be a list"
-        )
-        condition_val = tuple(condition_val)
-        assert len(placeholders) == len(condition_val), ValueError(
-            f"Number of condition ({len(condition_val)}) values not equal "
-            f"number of placeholders in: {condition}"
-        )
-    return condition, condition_val
-
-
-def paginate_query(query, offset, limit):
-    """
-    Apply offset and limit to the query string
-
-    :param sql.SQL query: query string to apply limit and offset to
-    :param int offset: offset to apply; no. of records to skip
-    :param int limit: limit to apply; max no. of records to return
-    :return sql.SQL: a possibly paginated query
-    """
-    if offset is not None:
-        assert isinstance(offset, int), TypeError(
-            f"Provided offset ({offset}) must be an int"
-        )
-        query += sql.SQL(f" OFFSET {offset}")
-    if limit is not None:
-        assert isinstance(limit, int), TypeError(
-            f"Provided limit ({limit}) must be an int"
-        )
-        query += sql.SQL(f" LIMIT {limit}")
-    return query
-
-
-from typing import Dict, List, Optional, Tuple, Union
-
-from sqlalchemy.orm import DeclarativeMeta, Query
-
-
 def dynamic_filter(
     ORM: DeclarativeMeta,
     query: Query,
@@ -198,14 +129,16 @@ def _unpack_tripartite(x):
             e1, e2, e3 = x
             return e1, e2, e3
         except ValueError:
-            raise Exception(f"Invalid tripartite element: {x}")
+            raise ValueError(
+                f"Invalid filter value: {x}. The filters must be tripartite"
+            )
 
     if filter_conditions is not None:
         for filter_condition in filter_conditions:
             key, op, value = _unpack_tripartite(filter_condition)
             column = getattr(ORM, key, None)
             if column is None:
-                raise Exception(f"Invalid filter column: {key}")
+                raise ValueError(f"Selected filter column does not exist: {key}")
             if op == "in":
                 if isinstance(value, list):
                     filt = column.in_(value)
@@ -223,7 +156,7 @@ def _unpack_tripartite(x):
                         % op
                     )
                 except IndexError:
-                    raise Exception(f"Invalid filter operator: {op}")
+                    raise ValueError()(f"Invalid filter operator: {op}")
                 if value == "null":
                     value = None
                 filt = getattr(column, attr)(value)
diff --git a/tests/test_db_only_mode.py b/tests/test_db_only_mode.py
index dd259f3a..d2fffcd8 100644
--- a/tests/test_db_only_mode.py
+++ b/tests/test_db_only_mode.py
@@ -27,6 +27,50 @@ def test_report(self, val, config_file_path, schema_file_path, results_file_path
         val_name = list(val.keys())[0]
         assert psm.select(filter_conditions=[(val_name, "eq", str(val[val_name]))])
 
+    @pytest.mark.parametrize(["rec_id", "res_id"], [("sample2", "number_of_things")])
+    @pytest.mark.parametrize("backend", ["db"])
+    def test_select_invalid_filter_column(
+        self,
+        rec_id,
+        res_id,
+        config_file_path,
+        results_file_path,
+        schema_file_path,
+        backend,
+    ):
+        args = dict(
+            schema_path=schema_file_path, namespace="test", config=config_file_path
+        )
+        psm = PipestatManager(**args)
+        with pytest.raises(ValueError):
+            psm.select(
+                filter_conditions=[("bogus_column", "eq", rec_id)],
+                columns=[res_id],
+            )
+
+    @pytest.mark.parametrize(["rec_id", "res_id"], [("sample2", "number_of_things")])
+    @pytest.mark.parametrize("backend", ["db"])
+    @pytest.mark.parametrize("filter", [("column", "eq", 1), "a", [1, 2, 3]])
+    def test_select_invalid_filter_structure(
+        self,
+        rec_id,
+        res_id,
+        config_file_path,
+        results_file_path,
+        schema_file_path,
+        backend,
+        filter,
+    ):
+        args = dict(
+            schema_path=schema_file_path, namespace="test", config=config_file_path
+        )
+        psm = PipestatManager(**args)
+        with pytest.raises(ValueError):
+            psm.select(
+                filter_conditions=[filter],
+                columns=[res_id],
+            )
+
     @pytest.mark.parametrize(["rec_id", "res_id"], [("sample2", "number_of_things")])
     @pytest.mark.parametrize("backend", ["db"])
     @pytest.mark.parametrize("limit", [1, 2, 3, 15555])

From 906a04a99fa11ee69faae4be33dd6238f1c1206d Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 10 May 2021 13:27:34 -0400
Subject: [PATCH 37/65] update exception

---
 pipestat/helpers.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pipestat/helpers.py b/pipestat/helpers.py
index 71f60a30..34cfcdfd 100644
--- a/pipestat/helpers.py
+++ b/pipestat/helpers.py
@@ -126,11 +126,14 @@ def dynamic_filter(
 
     def _unpack_tripartite(x):
         try:
+            assert isinstance(x, Iterable) and not isinstance(x, str), TypeError(
+                "Wrong filter class, a non-string iterable is required"
+            )
             e1, e2, e3 = x
             return e1, e2, e3
-        except ValueError:
+        except Exception:
             raise ValueError(
-                f"Invalid filter value: {x}. The filters must be tripartite"
+                f"Invalid filter value: {x}. The filter must be a tripartite iterable"
             )
 
     if filter_conditions is not None:

From cdeddf0098a8035f5b77cabd8bbad9ff34c64511 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 10 May 2021 13:30:17 -0400
Subject: [PATCH 38/65] update exception

---
 pipestat/helpers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipestat/helpers.py b/pipestat/helpers.py
index 34cfcdfd..942f9d75 100644
--- a/pipestat/helpers.py
+++ b/pipestat/helpers.py
@@ -126,8 +126,8 @@ def dynamic_filter(
 
     def _unpack_tripartite(x):
         try:
-            assert isinstance(x, Iterable) and not isinstance(x, str), TypeError(
-                "Wrong filter class, a non-string iterable is required"
+            assert isinstance(x, List) or isinstance(x, Tuple), TypeError(
+                "Wrong filter class, a List or Tuple is required"
             )
             e1, e2, e3 = x
             return e1, e2, e3

From dc03da4f54f6ae8f16383f2703c0ed33410b575c Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Tue, 11 May 2021 14:35:23 -0400
Subject: [PATCH 39/65] update docs

---
 docs/changelog.md              |  18 +++++-
 docs/config.md                 |  38 +++++++------
 docs/contributing.md           |   4 +-
 docs/env_vars.md               |   3 +-
 docs/install.md                |   3 +-
 docs/pipestat_specification.md |  46 +++++++--------
 docs/schema.md                 | 100 ---------------------------------
 7 files changed, 62 insertions(+), 150 deletions(-)
 delete mode 100644 docs/schema.md

diff --git a/docs/changelog.md b/docs/changelog.md
index e59f61c0..b00a5aa8 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -4,18 +4,32 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 
 ## [0.1.0] - unreleased
 
+**This update introduces some backwards-incompatible changes due to database interface redesign**
+
+### Changed
+
+- database interface type from a driver to an Object–relational mapping (ORM) approach
+
+### Added
+
+- results highligting support
+- static typing
 
 ## [0.0.4] - 2021-04-02
+
 ### Added
+
 - config validation
 - typing in code
 
-
 ## [0.0.3] - 2021-03-12
+
 ### Added
-- possibility to initialize the `PipestatManager` object (or use the `pipestat status` CLI) with no results schema defined for pipeline status management; [Issue #1](https://github.com/pepkit/pipestat/issues/1)
 
+- possibility to initialize the `PipestatManager` object (or use the `pipestat status` CLI) with no results schema defined for pipeline status management; [Issue #1](https://github.com/pepkit/pipestat/issues/1)
 
 ## [0.0.2] - 2021-02-22
+
 ### Added
+
 - initial package release
diff --git a/docs/config.md b/docs/config.md
index 21019931..a9a28755 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -8,28 +8,30 @@ schema_path: <path to the schema>
 record_identifier: <unique record ID>
 results_file_path: <path to results file> # either "results_file_path"
 database: # or DB login credentials
-    name: <database name>
-    user: <user name>
-    password: <user password>
-    host: <database address>
-    port: <database port>
+  name: <database name>
+  user: <user name>
+  password: <user password>
+  host: <database address>
+  port: <database port>
+  dialect: <database type>
+  driver: <python database driver>
 ```
 
 If both `results_file_path` and DB login credentials are provided, the YAML results file is given priority.
 
-Any of the settings specified in the configuration file, apart from the database login credentials, can be overwritten with the respectively named arguments in the `PipestatManager` object constructor, which implies the fact that the configuration file is *required* only if the the intended pipestat back-end is a database.     
+Any of the settings specified in the configuration file, apart from the database login credentials, can be overwritten with the respectively named arguments in the `PipestatManager` object constructor, which implies the fact that the configuration file is *required* only if the the intended pipestat back-end is a database.
 
 ## Example
 
 For the [PostgreSQL](https://www.postgresql.org/) instance has been started in a container, with the following command:
 
 ```console
-docker run -d 
-    --name pipestat-postgres \ 
-    -p 5432:5432 \ 
+docker run -d
+    --name pipestat-postgres \
+    -p 5432:5432 \
     -e POSTGRES_PASSWORD=b4fd34f^Fshdwede \
-    -e POSTGRES_USER=john \ 
-    -e POSTGRES_DB=pipestat-test \ 
+    -e POSTGRES_USER=john \
+    -e POSTGRES_DB=pipestat-test \
     -v postgres-data:/var/lib/postgresql/data postgres
 ```
 
@@ -39,9 +41,11 @@ The configuration file should look like this:
 namespace: my_pipeline
 schema_path: /path/to/schema.yaml
 database:
-    name: pipestat-test
-    user: john
-    password: b4fd34f^Fshdwede
-    host: localhost
-    port: 5432
-```
\ No newline at end of file
+  name: pipestat-test
+  user: john
+  password: b4fd34f^Fshdwede
+  host: 127.0.0.1
+  port: 5432
+  dialect: postgresql
+  driver: psycopg2
+```
diff --git a/docs/contributing.md b/docs/contributing.md
index e53b307a..d2e6cd6b 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -1,7 +1,7 @@
-## Contributing
+# Contributing
 
 We welcome contributions from the community.
 
-## Suggestions and feedback
+# Suggestions and feedback
 
 Please open an issue on the github issue tracker with suggestions, bug reports, or other feedback.
diff --git a/docs/env_vars.md b/docs/env_vars.md
index c68fc9ab..0dc32e51 100644
--- a/docs/env_vars.md
+++ b/docs/env_vars.md
@@ -4,7 +4,6 @@ Both the command line interface (CLI) and Python API support a collection of env
 
 Here is a list of the supported environment variables:
 
-
 | Environment variable       | API argument       | Description                                                                                                                 |
 |----------------------------|--------------------|-----------------------------------------------------------------------------------------------------------------------------|
 | **PIPESTAT_NAMESPACE**     | namespace          | namespace to report into. This will be the DBtable name if using DB as the object back-end                                  |
@@ -12,4 +11,4 @@ Here is a list of the supported environment variables:
 | **PIPESTAT_CONFIG**         | config             | path to the configuration file or a mappingwith the config file content                                                     |
 | **PIPESTAT_RESULTS_FILE**   | results_file_path  | YAML file to report into, if file isused as the object back-end                                                             |
 | **PIPESTAT_RESULTS_SCHEMA** | schema_path        | path to the output schema that formalizesthe results structure                                                              |
-| **PIPESTAT_STATUS_SCHEMA**  | status_schema_path | path to the status schema that formalizes the status flags structure                                                        |
\ No newline at end of file
+| **PIPESTAT_STATUS_SCHEMA**  | status_schema_path | path to the status schema that formalizes the status flags structure                                                        |
diff --git a/docs/install.md b/docs/install.md
index 6ccde5e5..9282d686 100644
--- a/docs/install.md
+++ b/docs/install.md
@@ -8,6 +8,7 @@ Install pipestat from [GitHub releases](https://github.com/pepkit/pipestat/relea
 - `pip install --upgrade pipestat`: update in virtual environment.
 
 See if your install worked by calling `pipestat -h` on the command line. If the `pipestat` executable is not in your `$PATH`, append this to your `.bashrc` or `.profile` (or `.bash_profile` on macOS):
+
 ```console
 export PATH=~/.local/bin:$PATH
-```
\ No newline at end of file
+```
diff --git a/docs/pipestat_specification.md b/docs/pipestat_specification.md
index 61169abf..f3608619 100644
--- a/docs/pipestat_specification.md
+++ b/docs/pipestat_specification.md
@@ -6,11 +6,10 @@
 
 Pipelines, or workflows, are made from a set of commands that process input data and produce results. These results may take many forms, such as simple statistics, string variables, images, or processed data files. How do pipelines structure the results they produce? There is no standard structure for results, so usually, it's done differently for each pipeline. This restricts the portability of the outputs of pipelines, and makes it difficult to write software that can process results from a variety of different pipelines. As a result, each pipeline author usually writes dedicated report functions for each pipeline.
 
-Pipestat provides a formal specification for how a pipeline should structure its results. Therefore, any pipeline that follows the pipestat specification will record results in the same way. This makes it possible to build generic report software that can work with any pipestat-compatible pipeline, offloading the task of making pretty result reports to generic tools. 
+Pipestat provides a formal specification for how a pipeline should structure its results. Therefore, any pipeline that follows the pipestat specification will record results in the same way. This makes it possible to build generic report software that can work with any pipestat-compatible pipeline, offloading the task of making pretty result reports to generic tools.
 
 This document outlines the specification for pipestat results. If your pipeline stores results like this, then downstream tools that read pipestat results will be able to build nice summaries of your pipeline runs automatically. To write results according to this specification, you can use the reference implementation (the `pipestat` python package), or you can simply write your results to this specification using whatever system you like.
 
-
 # Terminology
 
 - *result*: An element produced by a pipeline. Results have defined data types, described herein.
@@ -33,13 +32,13 @@ Each *result* reported by a pipeline must have a specified data type. Pipestat i
 
 Importantly, pipestat extends the jsonschema vocabulary by adding two additional types, which are common results of a pipeline: `image` and `file`. These types require reporting objects with the following attributes:
 
-- `file`: 
-    - `path`: path to the reported file
-    - `title`: human readable description of the file
-- `image`: 
-    - `path`: path to the reported image, usually PDF
-    - `thumbnail`: path to the reported thumbnail, usually PNG or JPEG
-    - `title`: human readable description of the image    
+- `file`:
+  - `path`: path to the reported file
+  - `title`: human readable description of the file
+- `image`:
+  - `path`: path to the reported image, usually PDF
+  - `thumbnail`: path to the reported thumbnail, usually PNG or JPEG
+  - `title`: human readable description of the image
 
 # Pipestat schema
 
@@ -60,11 +59,10 @@ The pipestat schema is a YAML-formatted file. The top level keys are the unique
 ```yaml
 result_identifier:
   type: <type>
-``` 
+```
 
 Here, `result_identifier` can be whatever name you want to use to identify this result. Here's a simple schema example that showcases most of the supported types:
 
-
 ```yaml
 number_of_things:
   type: integer
@@ -90,7 +88,7 @@ output_file:
 output_image:
   type: image
   description: "This a path to the output image"
-``` 
+```
 
 Here's a more complex schema example that showcases some of the more advanced jsonschema features:
 
@@ -114,18 +112,18 @@ output_object:
   properties:
     property1:
       array:
-        items: 
+        items:
           type: integer
     property2:
       type: boolean
   required:
     - property1
   description: "Object output with required array of integers and optional boolean"
-``` 
+```
 
 ## Results highlighting
 
-The pipestat specification allows to highlight results by adding `highlight: true` attribute under result identifier in the schema file. In the example below the `log_file` result will be highlighted. 
+The pipestat specification allows to highlight results by adding `highlight: true` attribute under result identifier in the schema file. In the example below the `log_file` result will be highlighted.
 
 ```yaml
 number_of_things:
@@ -148,7 +146,6 @@ Apart from results reporting pipestat provides a robust pipeline status manageme
 
 Here's an example of the pipestat status schema, which at the same time is the default status schema shipped with the pipestat Python package:
 
-
 ```yaml
 running:
   description: "the pipeline is running"
@@ -174,7 +171,7 @@ As depicted above the top-level attributes are the status identifiers. Within ea
 
 # Backends
 
-The pipestat specification describes two backend types for storing results: a [YAML-formatted file](https://yaml.org/spec/1.2/spec.html) or a [PostgreSQL database](https://www.postgresql.org/). This flexibility makes pipestat useful for a wide variety of use cases. Some users just need a simple text file for smaller-scale needs, which is convenient and universal, requiring no database infrastructure. For larger-scale systems, a database back-end is necssary. The pipestat specification provides a layer that spans the two possibilities, so that reports can be made in the same way, regardless of which back-end is used in a particular use case. 
+The pipestat specification describes two backend types for storing results: a [YAML-formatted file](https://yaml.org/spec/1.2/spec.html) or a [PostgreSQL database](https://www.postgresql.org/). This flexibility makes pipestat useful for a wide variety of use cases. Some users just need a simple text file for smaller-scale needs, which is convenient and universal, requiring no database infrastructure. For larger-scale systems, a database back-end is necssary. The pipestat specification provides a layer that spans the two possibilities, so that reports can be made in the same way, regardless of which back-end is used in a particular use case.
 
 By using the `pipestat` package to write results, the pipeline author need not be concerned with database connections or dealing with racefree file writing, as these tasks are already implemented. The user who runs the pipeline will simply configure the pipestat backend as required.
 
@@ -190,32 +187,29 @@ For the YAML file backend, each file represents a namespace. The file always beg
 my_namespace:
     record1:
         my_result: 10
-        my_result1: 
+        my_result1:
             key: "value1"
     record2:
         my_result: 3
-        my_result1: 
+        my_result1:
             key: "value2"
-```  
+```
 
 A more concrete example would be:
 
-
 ```yaml
 rnaseq-pipe:
     patient1:
         duplicate_rate: 10
-        genomic_distribution: 
+        genomic_distribution:
             promoter: 15
             enhancer: 85
     patient2:
         duplicate_rate: 3
-        genomic_distribution: 
+        genomic_distribution:
             promoter: 30
             enhancer: 70
-```  
-
-
+```
 
 ## PostgreSQL database
 
diff --git a/docs/schema.md b/docs/schema.md
deleted file mode 100644
index 2e9bb4c5..00000000
--- a/docs/schema.md
+++ /dev/null
@@ -1,100 +0,0 @@
-# Schema specification
-
-One of the *required* pipestat inputs is a schema file. **The schema specifies the results types and names that can be reported with pipestat.** As a pipeline developer, you create a schema to describe all of the important results to be recorded from your pipeline.
-
-Pipestat uses the schema as a base for creating a collection of self-contained result-specific [jsonschema schemas](https://json-schema.org/) that are used to **validate** the reported results prior to inserting into the database or saving in the YAML results file, depending on the selected backend.
-
-## Components
-
-Each schema is a YAML-formatted file composed of a set of self-contained result definitions. The top level keys are the unique result identifiers. The result definitions are jsonschema schemas. For a minimal schema, only the `type` attribute is required, which indicates the required type of the result to be reported. Please refer to the jsonschema documentation to learn more about the types and other attributes. This is an example of such component:
-
-```yaml
-result_identifier:
-  type: <type>
-``` 
-
-Here, `result_identifier` can be whatever name you want to use to identify this result. Importantly, pipestat extends the jsonschema vocabulary by adding two additional types: `image` and `file`. These types require reporting objects with the following attributes:
-
-- `file`: 
-    - `path`: path to the reported file
-    - `title`: human readable description of the file
-- `image`: 
-    - `path`: path to the reported image, usually PDF
-    - `thumbnail`: path to the reported thumbnail, usually PNG or JPEG
-    - `title`: human readable description of the image    
-
-Therefore, in practice, a result of type `file` is equivalent to:
-
-```yaml
-type: object
-properties:
-    path:
-      type: string
-    title:
-      type: string
-``` 
-
-## Basic example
-
-Here's a simple schema example that showcases most of the supported types:
-
-
-```yaml
-number_of_things:
-  type: integer
-  description: "Number of things"
-percentage_of_things:
-  type: number
-  description: "Percentage of things"
-name_of_something:
-  type: string
-  description: "Name of something"
-swtich_value:
-  type: boolean
-  description: "Is the switch on of off"
-collection_of_things:
-  type: array
-  description: "This store collection of values"
-output_object:
-  type: object
-  description: "Object output"
-output_file:
-  type: file
-  description: "This a path to the output file"
-output_image:
-  type: image
-  description: "This a path to the output image"
-``` 
-
-## More complex example
-
-Here's a more complex schema example that showcases some of the more advanced jsonschema features:
-
-```yaml
-number_of_things:
-  type: integer
-  description: "Number of things, min 20, multiple of 10"
-  multipleOf: 10
-  minimum: 20
-name_of_something:
-  type: string
-  description: "Name of something, min len 2 characters"
-  minLength: 2
-collection_of_things:
-  type: array
-  items:
-    type: string
-  description: "This store collection of strings"
-output_object:
-  type: object
-  properties:
-    property1:
-      array:
-        items: 
-          type: integer
-    property2:
-      type: boolean
-  required:
-    - property1
-  description: "Object output with required array of integers and optional boolean"
-``` 

From 1adfb5fe2c6df9eb2dc0f77b4a6e80c5755df6a3 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 14 May 2021 17:16:13 -0400
Subject: [PATCH 40/65] set up a system for schema-based DB column
 parametrization

---
 .gitignore                                    |  1 +
 pipestat/pipestat.py                          | 21 ++++++++++++++++++-
 tests/data/sample_output_schema.yaml          | 10 ++++++++-
 .../data/sample_output_schema_highlight.yaml  | 10 ++++-----
 4 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/.gitignore b/.gitignore
index fcf0f3c8..b07a8870 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@ __pycache__
 *.py[cod]
 *$py.class
 __pycache__/
+.vscode/
 
 # C extensions
 *.so
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index be23e0fe..88768690 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -277,6 +277,19 @@ def highlighted_results(self) -> List[str]:
         """
         return self._get_attr(HIGHLIGHTED_KEY) or []
 
+    @property
+    def db_column_kwargs_by_result(self) -> Dict[str, Any]:
+        """
+        Database column key word arguments for every result, sourced from the results schema
+
+        :return Dict[str, Any]: key word arguments for every result
+        """
+        return {
+            result_id: self.schema[result_id]["db_column"]
+            for result_id in self.schema.keys()
+            if "db_column" in self.schema[result_id]
+        }
+
     @property
     def namespace(self) -> str:
         """
@@ -498,7 +511,13 @@ def _auto_repr(x: Any) -> str:
             col_type = SQL_CLASSES_BY_TYPE[result_metadata[SCHEMA_TYPE_KEY]]
             _LOGGER.debug(f"Adding object: {result_id} of type: {str(col_type)}")
             attr_dict.update(
-                {result_id: Column(col_type, doc=result_metadata["description"])}
+                {
+                    result_id: Column(
+                        col_type,
+                        doc=result_metadata["description"],
+                        **self.db_column_kwargs_by_result.get(result_id, {}),
+                    )
+                }
             )
         attr_dict.update({"__repr__": _auto_repr})
         _LOGGER.debug(f"Creating '{tn}' ORM with args: {attr_dict}")
diff --git a/tests/data/sample_output_schema.yaml b/tests/data/sample_output_schema.yaml
index 392bb832..c7c8b79e 100644
--- a/tests/data/sample_output_schema.yaml
+++ b/tests/data/sample_output_schema.yaml
@@ -21,4 +21,12 @@ output_file:
   description: "This a path to the output file"
 output_image:
   type: image
-  description: "This a path to the output image"
\ No newline at end of file
+  description: "This a path to the output image"
+md5sum:
+  type: string
+  description: "MD5SUM of an object"
+  highlighted: true
+  db_column:
+    unique: true
+    nullable: false
+    
\ No newline at end of file
diff --git a/tests/data/sample_output_schema_highlight.yaml b/tests/data/sample_output_schema_highlight.yaml
index e51fd0bd..3373f6e8 100644
--- a/tests/data/sample_output_schema_highlight.yaml
+++ b/tests/data/sample_output_schema_highlight.yaml
@@ -21,21 +21,21 @@ output_file:
   description: "This a path to the output file"
 output_image:
   type: image
-  highlight: false
+  highlighted: false
   description: "This a path to the output image"
 log:
   type: file
-  highlight: true
+  highlighted: true
   description: "The log file of the pipeline run"
 profile:
   type: file
-  highlight: true
+  highlighted: true
   description: "The profile of the pipeline run"
 commands:
   type: file
-  highlight: true
+  highlighted: true
   description: "The file with shell commands executed by this pipeline"
 version:
   type: string
-  highlight: true
+  highlighted: true
   description: "Pipeline version"
\ No newline at end of file

From dc554e0902ce2c497c163e7ef9705d75544e98f7 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 14 May 2021 17:19:12 -0400
Subject: [PATCH 41/65] fix keys, add const

---
 pipestat/const.py                              |  1 +
 pipestat/pipestat.py                           |  4 ++--
 tests/data/sample_output_schema.yaml           |  2 +-
 tests/data/sample_output_schema_highlight.yaml | 10 +++++-----
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/pipestat/const.py b/pipestat/const.py
index c1e105fe..a4854b6c 100644
--- a/pipestat/const.py
+++ b/pipestat/const.py
@@ -67,6 +67,7 @@
 DB_SCOPED_SESSION_KEY = "_db_scoped_session"
 DB_ENGINE_KEY = "_db_engine"
 HIGHLIGHTED_KEY = "_highlighted"
+DB_COLUMN = "db_column"
 
 # schema keys
 SCHEMA_PROP_KEY = "properties"
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 88768690..6c9da8df 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -285,9 +285,9 @@ def db_column_kwargs_by_result(self) -> Dict[str, Any]:
         :return Dict[str, Any]: key word arguments for every result
         """
         return {
-            result_id: self.schema[result_id]["db_column"]
+            result_id: self.schema[result_id][DB_COLUMN]
             for result_id in self.schema.keys()
-            if "db_column" in self.schema[result_id]
+            if DB_COLUMN in self.schema[result_id]
         }
 
     @property
diff --git a/tests/data/sample_output_schema.yaml b/tests/data/sample_output_schema.yaml
index c7c8b79e..b577d418 100644
--- a/tests/data/sample_output_schema.yaml
+++ b/tests/data/sample_output_schema.yaml
@@ -25,7 +25,7 @@ output_image:
 md5sum:
   type: string
   description: "MD5SUM of an object"
-  highlighted: true
+  highlight: true
   db_column:
     unique: true
     nullable: false
diff --git a/tests/data/sample_output_schema_highlight.yaml b/tests/data/sample_output_schema_highlight.yaml
index 3373f6e8..e51fd0bd 100644
--- a/tests/data/sample_output_schema_highlight.yaml
+++ b/tests/data/sample_output_schema_highlight.yaml
@@ -21,21 +21,21 @@ output_file:
   description: "This a path to the output file"
 output_image:
   type: image
-  highlighted: false
+  highlight: false
   description: "This a path to the output image"
 log:
   type: file
-  highlighted: true
+  highlight: true
   description: "The log file of the pipeline run"
 profile:
   type: file
-  highlighted: true
+  highlight: true
   description: "The profile of the pipeline run"
 commands:
   type: file
-  highlighted: true
+  highlight: true
   description: "The file with shell commands executed by this pipeline"
 version:
   type: string
-  highlighted: true
+  highlight: true
   description: "Pipeline version"
\ No newline at end of file

From 8f533f6026523662430d53c042e9bb915b79a336 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 17 May 2021 17:03:59 -0400
Subject: [PATCH 42/65] experimental: set up a system for schema-based
 relationships specification

---
 pipestat/const.py    | 13 ++++++++-
 pipestat/pipestat.py | 66 ++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/pipestat/const.py b/pipestat/const.py
index a4854b6c..b5868bfa 100644
--- a/pipestat/const.py
+++ b/pipestat/const.py
@@ -67,7 +67,18 @@
 DB_SCOPED_SESSION_KEY = "_db_scoped_session"
 DB_ENGINE_KEY = "_db_engine"
 HIGHLIGHTED_KEY = "_highlighted"
-DB_COLUMN = "db_column"
+DB_COLUMN_KEY = "db_column"
+DB_RELATIONSHIP_KEY = "relationship"
+DB_RELATIONSHIP_NAME_KEY = "name"
+DB_RELATIONSHIP_TABLE_KEY = "table"
+DB_RELATIONSHIP_COL_KEY = "column"
+DB_RELATIONSHIP_BACKREF_KEY = "backref"
+DB_RELATIONSHIP_ELEMENTS = [
+    DB_RELATIONSHIP_BACKREF_KEY,
+    DB_RELATIONSHIP_COL_KEY,
+    DB_RELATIONSHIP_NAME_KEY,
+    DB_RELATIONSHIP_TABLE_KEY,
+]
 
 # schema keys
 SCHEMA_PROP_KEY = "properties"
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 6c9da8df..79ce0787 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -7,9 +7,15 @@
 import sqlalchemy.orm
 from attmap import PathExAttMap as PXAM
 from jsonschema import validate
-from sqlalchemy import Column, create_engine
+from sqlalchemy import Column, ForeignKey, create_engine
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import DeclarativeMeta, scoped_session, sessionmaker
+from sqlalchemy.orm import (
+    DeclarativeMeta,
+    backref,
+    relationship,
+    scoped_session,
+    sessionmaker,
+)
 from ubiquerg import create_lock, remove_lock
 from yacman import YacAttMap
 
@@ -280,14 +286,46 @@ def highlighted_results(self) -> List[str]:
     @property
     def db_column_kwargs_by_result(self) -> Dict[str, Any]:
         """
-        Database column key word arguments for every result, sourced from the results schema
+        Database column key word arguments for every result,
+        sourced from the results schema in the `db_column` section
 
         :return Dict[str, Any]: key word arguments for every result
         """
         return {
-            result_id: self.schema[result_id][DB_COLUMN]
+            result_id: self.schema[result_id][DB_COLUMN_KEY]
             for result_id in self.schema.keys()
-            if DB_COLUMN in self.schema[result_id]
+            if DB_COLUMN_KEY in self.schema[result_id]
+        }
+
+    @property
+    def db_column_relationships_by_result(self) -> Dict[str, str]:
+        """
+        Database column relationships for every result,
+        sourced from the results schema in the `relationship` section
+
+        *Note: this is an experimental feature*
+
+        :return Dict[str, Dict[str, str]]: relationships for every result
+        """
+
+        def _validate_rel_section(result_id):
+            if not all(
+                [
+                    k in self.schema[result_id][DB_RELATIONSHIP_KEY].keys()
+                    for k in DB_RELATIONSHIP_ELEMENTS
+                ]
+            ):
+                PipestatDatabaseError(
+                    f"Not all required {DB_RELATIONSHIP_KEY} settings ({DB_RELATIONSHIP_ELEMENTS}) were "
+                    f"provided for result: {result_id}"
+                )
+            return True
+
+        return {
+            result_id: self.schema[result_id][DB_RELATIONSHIP_KEY]
+            for result_id in self.schema.keys()
+            if DB_RELATIONSHIP_KEY in self.schema[result_id]
+            and _validate_rel_section(result_id)
         }
 
     @property
@@ -510,11 +548,29 @@ def _auto_repr(x: Any) -> str:
         for result_id, result_metadata in schema.items():
             col_type = SQL_CLASSES_BY_TYPE[result_metadata[SCHEMA_TYPE_KEY]]
             _LOGGER.debug(f"Adding object: {result_id} of type: {str(col_type)}")
+
+            rel_info = self.db_column_relationships_by_result.get(result_id, {})
+            col_args = []
+            # if there is a relationship defined for this result, include it
+            if rel_info:
+                attr_dict.update(
+                    {
+                        rel_info["name"]: relationship(
+                            rel_info["table"].capitalize(),
+                            backref=backref(
+                                rel_info["backref"], uselist=True, cascade="delete,all"
+                            ),
+                        )
+                    }
+                )
+                col_args = [ForeignKey(f"{rel_info['table']}.{rel_info['column']}")]
+
             attr_dict.update(
                 {
                     result_id: Column(
                         col_type,
                         doc=result_metadata["description"],
+                        *col_args,
                         **self.db_column_kwargs_by_result.get(result_id, {}),
                     )
                 }

From ee4aeed1275b54d67789520940058621bd90e839 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Tue, 18 May 2021 09:38:13 -0400
Subject: [PATCH 43/65] add textual filter support for db

---
 pipestat/pipestat.py | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 79ce0787..ba98f958 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -8,6 +8,7 @@
 from attmap import PathExAttMap as PXAM
 from jsonschema import validate
 from sqlalchemy import Column, ForeignKey, create_engine
+from sqlalchemy import text
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import (
     DeclarativeMeta,
@@ -1194,6 +1195,48 @@ def _retrieve_db(
             }
         raise PipestatDatabaseError(f"Record '{record_identifier}' not found")
 
+    def select_txt(
+        self,
+        filter_templ: Optional[str] = "",
+        filter_params: Optional[Dict[str, Any]] = {},
+        table_name: Optional[str] = None,
+        offset: Optional[int] = None,
+        limit: Optional[int] = None,
+    ) -> List[Any]:
+        """
+        Execute a query with a textual filter. Returns all results.
+
+        To retrieve all table contents, leave the filter arguments out.
+        Table name defaults to the namespace
+
+        :param str filter_templ: filter template with value placeholders,
+             formatted as follows `id<:value and name=:name`
+        :param Dict[str, Any] filter_params: a mapping keys specified in the `filter_templ`
+            to parameters that are supposed to replace the placeholders
+        :param str table_name: name of the table to query
+        :param int offset: skip this number of rows
+        :param int limit: include this number of rows
+        :return List[Any]: a list of matched records
+        """
+        if self.file:
+            raise PipestatDatabaseError(
+                f"The {self.__class__.__name__} object is not backed by a database. "
+                f"This operation is not supported for file backend."
+            )
+        table_name = table_name or self.namespace
+        with self.session as s:
+            q = (
+                s.query(self._get_orm(table_name))
+                .filter(text(filter_templ))
+                .params(**filter_params)
+            )
+            if isinstance(offset, int):
+                q = q.offset(offset)
+            if isinstance(limit, int):
+                q = q.limit(limit)
+            results = q.all()
+        return results
+
     def assert_results_defined(self, results: List[str]) -> None:
         """
         Assert provided list of results is defined in the schema

From 0932733cca9e1fd1bfd1d8e6c45445bbcca764ff Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Wed, 2 Jun 2021 13:57:20 -0400
Subject: [PATCH 44/65] add more docs, make  public

---
 docs/api_docs.md              | 185 ++++++++++++++++++++++++----------
 docs_jupyter/python_api.ipynb | 105 ++++++++++++++++++-
 pipestat/pipestat.py          |  24 ++---
 3 files changed, 245 insertions(+), 69 deletions(-)

diff --git a/docs/api_docs.md b/docs/api_docs.md
index 21f94abe..1eb7bc19 100644
--- a/docs/api_docs.md
+++ b/docs/api_docs.md
@@ -31,11 +31,11 @@ h4 .content {
 # Package `pipestat` Documentation
 
 ## <a name="PipestatManager"></a> Class `PipestatManager`
-Pipestat standardizes reporting of pipeline results and pipeline status management. It formalizes a way for pipeline developers and downstream tools developers to communicate -- results produced by a pipeline can easily and reliably become an input for downstream analyses. The object exposes API for interacting with the results and pipeline status and can be backed by either a YAML-formatted file or a PostgreSQL database.
+Pipestat standardizes reporting of pipeline results and pipeline status management. It formalizes a way for pipeline developers and downstream tools developers to communicate -- results produced by a pipeline can easily and reliably become an input for downstream analyses. The object exposes API for interacting with the results and pipeline status and can be backed by either a YAML-formatted file or a database.
 
 
 ```python
-def __init__(self, namespace=None, record_identifier=None, schema_path=None, results_file_path=None, database_only=False, config=None, status_schema_path=None, flag_file_dir=None)
+def __init__(self, namespace: Optional[str]=None, record_identifier: Optional[str]=None, schema_path: Optional[str]=None, results_file_path: Optional[str]=None, database_only: Optional[bool]=True, config: Union[str, dict, NoneType]=None, status_schema_path: Optional[str]=None, flag_file_dir: Optional[str]=None, custom_declarative_base: Optional[sqlalchemy.orm.decl_api.DeclarativeMeta]=None, show_db_logs: bool=False)
 ```
 
 Initialize the object
@@ -48,18 +48,19 @@ Initialize the object
 - `database_only` (`bool`):  whether the reported data should not bestored in the memory, but only in the database
 - `config` (`str | dict`):  path to the configuration file or a mappingwith the config file content
 - `status_schema_path` (`str`):  path to the status schema that formalizesthe status flags structure
+- `custom_declarative_base` (`sqlalchemy.orm.DeclarativeMeta`):  a declarative base touse for ORMs creation a new instance will be created if not provided
 
 
 
 
 ```python
-def assert_results_defined(self, results)
+def assert_results_defined(self, results: List[str]) -> None
 ```
 
 Assert provided list of results is defined in the schema
 #### Parameters:
 
-- `results` (`list[str]`):  list of results tocheck for existence in the schema
+- `results` (`List[str]`):  list of results tocheck for existence in the schema
 
 
 #### Raises:
@@ -70,54 +71,61 @@ Assert provided list of results is defined in the schema
 
 
 ```python
-def check_connection(self)
+def check_record_exists(self, record_identifier: str, table_name: str=None) -> bool
 ```
 
-Check whether a PostgreSQL connection has been established
+Check if the specified record exists in the table
+#### Parameters:
+
+- `record_identifier` (`str`):  record to check for
+- `table_name` (`str`):  table name to check
+
+
 #### Returns:
 
-- `bool`:  whether the connection has been established
+- `bool`:  whether the record exists in the table
 
 
 
 
 ```python
-def check_record_exists(self, record_identifier=None)
+def check_result_exists(self, result_identifier: str, record_identifier: str=None) -> bool
 ```
 
-Check if the record exists
+Check if the result has been reported
 #### Parameters:
 
 - `record_identifier` (`str`):  unique identifier of the record
+- `result_identifier` (`str`):  name of the result to check
 
 
 #### Returns:
 
-- `bool`:  whether the record exists
+- `bool`:  whether the specified result has been reported for theindicated record in current namespace
 
 
 
 
 ```python
-def check_result_exists(self, result_identifier, record_identifier=None)
+def check_which_results_exist(self, results: List[str], rid: Optional[str]=None, table_name: Optional[str]=None) -> List[str]
 ```
 
-Check if the result has been reported
+Check which results have been reported
 #### Parameters:
 
-- `record_identifier` (`str`):  unique identifier of the record
-- `result_identifier` (`str`):  name of the result to check
+- `rid` (`str`):  unique identifier of the record
+- `results` (`List[str]`):  names of the results to check
 
 
 #### Returns:
 
-- `bool`:  whether the specified result has been reported for theindicated record in current namespace
+- `List[str]`:  whether the specified result has been reported for theindicated record in current namespace
 
 
 
 
 ```python
-def clear_status(self, record_identifier=None, flag_names=None)
+def clear_status(self, record_identifier: str=None, flag_names: List[str]=None) -> List[Optional[str]]
 ```
 
 Remove status flags
@@ -129,16 +137,8 @@ Remove status flags
 
 #### Returns:
 
-- `list[str]`:  Collection of names of flags removed
-
-
-
+- `List[str]`:  Collection of names of flags removed
 
-```python
-def close_postgres_connection(self)
-```
-
-Close connection and remove client bound
 
 
 
@@ -167,27 +167,53 @@ Data object
 
 
 ```python
-def db_cursor(self)
+def db_column_kwargs_by_result(self)
 ```
 
-Establish connection and get a PostgreSQL database cursor, commit and close the connection afterwards
+Database column key word arguments for every result, sourced from the results schema in the `db_column` section
 #### Returns:
 
-- `LoggingCursor`:  Database cursor object
+- `Dict[str, Any]`:  key word arguments for every result
 
 
 
 
 ```python
-def establish_postgres_connection(self, suppress=False)
+def db_column_relationships_by_result(self)
 ```
 
-Establish PostgreSQL connection using the config data
-#### Parameters:
+Database column relationships for every result, sourced from the results schema in the `relationship` section
 
-- `suppress` (`bool`):  whether to suppress any connection errors
+*Note: this is an experimental feature*
+#### Returns:
 
+- `Dict[str, Dict[str, str]]`:  relationships for every result
+
+
+
+
+```python
+def db_url(self)
+```
+
+Database URL, generated based on config credentials
+#### Returns:
+
+- `str`:  database URL
+
+
+#### Raises:
+
+- `PipestatDatabaseError`:  if the object is not backed by a database
+
+
+
+
+```python
+def establish_db_connection(self) -> bool
+```
 
+Establish DB connection using the config data
 #### Returns:
 
 - `bool`:  whether the connection has been established successfully
@@ -208,7 +234,24 @@ File path that the object is reporting the results into
 
 
 ```python
-def get_status(self, record_identifier=None)
+def get_orm(self, table_name: str=None) -> Any
+```
+
+Get an object relational mapper class
+#### Parameters:
+
+- `table_name` (`str`):  table name to get a class for
+
+
+#### Returns:
+
+- `Any`:  Object relational mapper class
+
+
+
+
+```python
+def get_status(self, record_identifier: str=None) -> Optional[str]
 ```
 
 Get the current pipeline status
@@ -220,7 +263,7 @@ Get the current pipeline status
 
 
 ```python
-def get_status_flag_path(self, status_identifier, record_identifier=None)
+def get_status_flag_path(self, status_identifier: str, record_identifier=None) -> str
 ```
 
 Get the path to the status file flag
@@ -244,7 +287,19 @@ def highlighted_results(self)
 Highlighted results
 #### Returns:
 
-- `list[str]`:  a collection of highlighted results
+- `List[str]`:  a collection of highlighted results
+
+
+
+
+```python
+def is_db_connected(self) -> bool
+```
+
+Check whether a DB connection has been established
+#### Returns:
+
+- `bool`:  whether the connection has been established
 
 
 
@@ -286,7 +341,7 @@ Unique identifier of the record
 
 
 ```python
-def remove(self, record_identifier=None, result_identifier=None)
+def remove(self, record_identifier: str=None, result_identifier: str=None) -> bool
 ```
 
 Remove a result.
@@ -307,13 +362,13 @@ will be removed.
 
 
 ```python
-def report(self, values, record_identifier=None, force_overwrite=False, strict_type=True, return_id=False)
+def report(self, values: Dict[str, Any], record_identifier: str=None, force_overwrite: bool=False, strict_type: bool=True, return_id: bool=False) -> Union[bool, int]
 ```
 
 Report a result.
 #### Parameters:
 
-- `values` (`dict[str, any]`):  dictionary of result-value pairs
+- `values` (`Dict[str, any]`):  dictionary of result-value pairs
 - `record_identifier` (`str`):  unique identifier of the record, valuein 'record_identifier' column to look for to determine if the record already exists
 - `force_overwrite` (`bool`):  whether to overwrite the existing record
 - `strict_type` (`bool`):  whether the type of the reported values shouldremain as is. Pipestat would attempt to convert to the schema-defined one otherwise
@@ -340,7 +395,7 @@ Result schema mappings
 
 
 ```python
-def retrieve(self, record_identifier=None, result_identifier=None)
+def retrieve(self, record_identifier: Optional[str]=None, result_identifier: Optional[str]=None) -> Union[Any, Dict[str, Any]]
 ```
 
 Retrieve a result for a record.
@@ -355,7 +410,7 @@ be returned.
 
 #### Returns:
 
-- `any | dict[str, any]`:  a single result or a mapping with all theresults reported for the record
+- `any | Dict[str, any]`:  a single result or a mapping with all theresults reported for the record
 
 
 
@@ -385,28 +440,56 @@ Schema path
 
 
 ```python
-def select(self, columns=None, condition=None, condition_val=None, offset=None, limit=None)
+def select(self, table_name: Optional[str]=None, columns: Optional[List[str]]=None, filter_conditions: Optional[List[Tuple[str, str, Union[str, List[str]]]]]=None, json_filter_conditions: Optional[List[Tuple[str, str, str]]]=None, offset: Optional[int]=None, limit: Optional[int]=None) -> List[Any]
 ```
 
-Get all the contents from the selected table, possibly restricted by the provided condition.
+Perform a `SELECT` on the table
 #### Parameters:
 
-- `columns` (`str | list[str]`):  columns to select
-- `condition` (`str`):  condition to restrict the resultswith, will be appended to the end of the SELECT statement and safely populated with 'condition_val', for example: `"id=%s"`
-- `condition_val` (`list`):  values to fill the placeholderin 'condition' with
-- `offset` (`int`):  number of records to be skipped
-- `limit` (`int`):  max number of records to be returned
+- `table_name` (`str`):  name of the table to SELECT from
+- `columns` (`List[str]`):  columns to include in the result
+- `filter_conditions` (`[(key,operator,value)]`): - eq for == - lt for < - ge for >= - in for in_ - like for like
+- `json_filter_conditions` (`[(col,key,value)]`):  conditions for JSONB column toquery that include JSON column name, key withing the JSON object in that column and the value to check the identity against. Therefore only '==' is supported in non-nested checks, e.g. [("other", "genome", "hg38")]
+- `offset` (`int`):  skip this number of rows
+- `limit` (`int`):  include this number of rows
+
+
+
+
+```python
+def select_txt(self, filter_templ: Optional[str]='', filter_params: Optional[Dict[str, Any]]={}, table_name: Optional[str]=None, offset: Optional[int]=None, limit: Optional[int]=None) -> List[Any]
+```
+
+Execute a query with a textual filter. Returns all results.
+
+To retrieve all table contents, leave the filter arguments out.
+Table name defaults to the namespace
+#### Parameters:
+
+- `filter_templ` (`str`):  filter template with value placeholders,formatted as follows `id<:value and name=:name`
+- `filter_params` (`Dict[str, Any]`):  a mapping keys specified in the `filter_templ`to parameters that are supposed to replace the placeholders
+- `table_name` (`str`):  name of the table to query
+- `offset` (`int`):  skip this number of rows
+- `limit` (`int`):  include this number of rows
 
 
 #### Returns:
 
-- `list[psycopg2.extras.DictRow]`:  all table contents
+- `List[Any]`:  a list of matched records
+
+
+
+
+```python
+def session(self)
+```
 
+Provide a transactional scope around a series of query operations, no commit afterwards.
 
 
 
 ```python
-def set_status(self, status_identifier, record_identifier=None)
+def set_status(self, status_identifier: str, record_identifier: str=None) -> None
 ```
 
 Set pipeline run status.
@@ -447,7 +530,7 @@ Status schema source
 
 
 ```python
-def validate_schema(self)
+def validate_schema(self) -> None
 ```
 
 Check schema for any possible issues
@@ -461,4 +544,4 @@ Check schema for any possible issues
 
 
 
-*Version Information: `pipestat` v0.0.3-dev, generated by `lucidoc` v0.4.3*
+*Version Information: `pipestat` v0.1.0-dev, generated by `lucidoc` v0.4.2*
diff --git a/docs_jupyter/python_api.ipynb b/docs_jupyter/python_api.ipynb
index 85d972a4..3fc17a69 100644
--- a/docs_jupyter/python_api.ipynb
+++ b/docs_jupyter/python_api.ipynb
@@ -1105,13 +1105,103 @@
     "psm_no_schema.set_status(status_identifier=\"running\", record_identifier=\"sample1\")\n",
     "psm_no_schema.get_status(record_identifier=\"sample1\")"
    ]
+  },
+  {
+   "source": [
+    "## Extras\n",
+    "\n",
+    "Here are some additional convenience methods  \n",
+    "\n",
+    "### Methods avaialable to a database-backed `PipestatManager`\n",
+    "\n",
+    "To make it possible to benefit from the performance gain provided by the database backend you may use the following `PipestatManager` methods or properties\n",
+    "\n"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "source": [
+    "- `get_orm` method\n",
+    "\n",
+    "This way you can retrieve the object-relational mapper for the `PipestatManger` instance.\n",
+    "\n",
+    "\n",
+    "```python\n",
+    "TestORM = psm.get_orm(\"test\")\n",
+    "```\n"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "source": [
+    "\n",
+    "- `session` property\n",
+    "\n",
+    "This is a universal way to get access to a database session. The `session` property is a context manager, so it needs to be used in a `with` block.\n",
+    "\n",
+    "Database queries of any kind can be performed with the `PipestatManager.session` property. Although, it requires knowledge of [`sqlalchemy`](https://www.sqlalchemy.org/) library. If you'd like to perform simple filtered queries the `select*` methods described below may be sufficient."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "source": [
+    "```python\n",
+    "# get the object-relational mapper\n",
+    "TestORM = psm.get_orm(\"test\")\n",
+    "\n",
+    "# query the database\n",
+    "with psm.session as s:\n",
+    "    results = s.query(TestORM).filter(TestORM.number_of_things > 1).all()\n",
+    "```\n",
+    "\n",
+    "For more information regarding database querying and ORM concepts please refer to the [`sqlalchemy` library documentation](https://www.sqlalchemy.org/)\n",
+    "\n",
+    "- `select` method\n",
+    "\n",
+    "This way you can perform a `SELECT` query on the results database. This is the safest way of performing custom `SELECT`s. Here are the avaialable operators:\n",
+    "- `eq` for ==\n",
+    "- `lt` for <\n",
+    "- `ge` for >=\n",
+    "- `in` for in_\n",
+    "- `like` for LIKE\n",
+    "\n",
+    "The operators need to be provided in a list filter condition tuples in the following way: `[(key,operator,value)]`, for example `[(\"id\", \"eq\", 1)]` would match the record with value `1` in `id` column. Multiple filter conditions are treated as `AND`.\n",
+    "\n",
+    "Columns of `JSONB` type are treated differently. The conditions for `JSONB` columns include JSON column name, key within the JSON object in that column and the value to check the identity against. Therefore only '==' operation is supported in non-nested checks, for example `[(\"other\", \"genome\", \"hg38\")]` would match the records with value `hg38` for `genome` key in the `other` (`JSONB`) column.\n",
+    "\n",
+    "```python\n",
+    "select(\n",
+    "    filter_conditions=[(\"id\", \"eq\", 1)], \n",
+    "    json_filter_conditions=[(\"other\", \"genome\", \"hg38\")], \n",
+    "    offset=1, \n",
+    "    limit=10,\n",
+    ")\n",
+    "```\n",
+    "\n",
+    "- `select_txt` method\n",
+    "\n",
+    "This way you can perform a more flexible, text based query `SELECT`. Please note the security shortcomings related to this method and do not pass the input text from external sources, which may result in SQL injections.\n",
+    "\n",
+    "```python\n",
+    "select_txt(\n",
+    "    filter_templ=\"id<:value and name=:name\", \n",
+    "    filter_params={\"value\": 10, \"name\": \"test\"}, \n",
+    "    offset=1, \n",
+    "    limit=10,\n",
+    ")\n",
+    "```"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
+   "name": "python395jvsc74a57bd0aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49",
+   "display_name": "Python 3.9.5 64-bit"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1123,9 +1213,14 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.4"
+   "version": "3.9.5"
+  },
+  "metadata": {
+   "interpreter": {
+    "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
+   }
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index ba98f958..ca195460 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -7,8 +7,7 @@
 import sqlalchemy.orm
 from attmap import PathExAttMap as PXAM
 from jsonschema import validate
-from sqlalchemy import Column, ForeignKey, create_engine
-from sqlalchemy import text
+from sqlalchemy import Column, ForeignKey, create_engine, text
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import (
     DeclarativeMeta,
@@ -886,7 +885,7 @@ def _table_to_dict(self) -> None:
         Create a dictionary from the database table data
         """
         with self.session as s:
-            records = s.query(self._get_orm(self.namespace)).all()
+            records = s.query(self.get_orm(self.namespace)).all()
         _LOGGER.debug(f"Reading data from database for '{self.namespace}' namespace")
         for record in records:
             record_id = getattr(record, RECORD_ID)
@@ -941,7 +940,7 @@ def _count_rows(self, table_name: str) -> int:
         with self.session as s:
             return s.query(self[DB_ORMS_KEY][table_name].id).count()
 
-    def _get_orm(self, table_name: str = None) -> Any:
+    def get_orm(self, table_name: str = None) -> Any:
         """
         Get an object relational mapper class
 
@@ -974,7 +973,7 @@ def check_record_exists(
         if self.file is None:
             with self.session as s:
                 return (
-                    s.query(self._get_orm(table_name).id)
+                    s.query(self.get_orm(table_name).id)
                     .filter_by(record_identifier=record_identifier)
                     .first()
                     is not None
@@ -1032,7 +1031,7 @@ def _check_which_results_exist_db(
         rid = self._strict_record_id(rid)
         with self.session as s:
             record = (
-                s.query(self._get_orm(table_name))
+                s.query(self.get_orm(table_name))
                 .filter_by(record_identifier=rid)
                 .first()
             )
@@ -1074,7 +1073,7 @@ def select(
         limit: Optional[int] = None,
     ) -> List[Any]:
         """
-        Perform a SELECT on the table
+        Perform a `SELECT` on the table
 
         :param str table_name: name of the table to SELECT from
         :param List[str] columns: columns to include in the result
@@ -1092,7 +1091,7 @@ def select(
         :param int limit: include this number of rows
         """
 
-        ORM = self._get_orm(table_name or self.namespace)
+        ORM = self.get_orm(table_name or self.namespace)
         with self.session as s:
             if columns is not None:
                 query = s.query(*[getattr(ORM, column) for column in columns])
@@ -1180,7 +1179,7 @@ def _retrieve_db(
 
         with self.session as s:
             record = (
-                s.query(self._get_orm(table_name))
+                s.query(self.get_orm(table_name))
                 .filter_by(record_identifier=record_identifier)
                 .first()
             )
@@ -1223,10 +1222,9 @@ def select_txt(
                 f"The {self.__class__.__name__} object is not backed by a database. "
                 f"This operation is not supported for file backend."
             )
-        table_name = table_name or self.namespace
         with self.session as s:
             q = (
-                s.query(self._get_orm(table_name))
+                s.query(self.get_orm(table_name or self.namespace))
                 .filter(text(filter_templ))
                 .params(**filter_params)
             )
@@ -1344,7 +1342,7 @@ def _report_db(
         :return int: updated/inserted row
         """
         record_identifier = self._strict_record_id(record_identifier)
-        ORMClass = self._get_orm(table_name)
+        ORMClass = self.get_orm(table_name)
         values.update({RECORD_ID: record_identifier})
         if not self.check_record_exists(
             record_identifier=record_identifier, table_name=table_name
@@ -1469,7 +1467,7 @@ def _remove_db(
         """
         table_name = table_name or self.namespace
         record_identifier = self._strict_record_id(record_identifier)
-        ORMClass = self._get_orm(table_name=table_name)
+        ORMClass = self.get_orm(table_name=table_name)
         if self.check_record_exists(
             record_identifier=record_identifier, table_name=table_name
         ):

From fe7e4ed0c9ca2363af78835437ddbe3708f66e1f Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Thu, 3 Jun 2021 16:31:13 -0400
Subject: [PATCH 45/65] document db_column section in the result schema

---
 docs/pipestat_specification.md |  16 +++
 docs_jupyter/python_api.ipynb  | 220 ++++++++++++++++-----------------
 2 files changed, 122 insertions(+), 114 deletions(-)

diff --git a/docs/pipestat_specification.md b/docs/pipestat_specification.md
index f3608619..a1a987e2 100644
--- a/docs/pipestat_specification.md
+++ b/docs/pipestat_specification.md
@@ -140,6 +140,22 @@ log_file:
 
 The highlighted results can be later retrieved by pipestat clients via `PipestatManager.highlighted_results` property, which simply returns a list of result identifiers.
 
+## Database columns configuration (DB backend only)
+If the `PipestatManager` object is backed by a database, the database columns can be easily configured using the results schema via `db_column` section. For example:
+
+```yaml
+important_numeric_id:
+  type: integer
+  description: "An important ID that must be unique and always exist"
+  db_column:
+    unique: true
+    nullable: false
+```
+
+The values provided in the `db_column` section are passed to the `sqlalchemy.schema.Column` constructor. Therefore, please refer to [`sqlalchemy.Column` class constructor documentation](https://docs.sqlalchemy.org/en/14/core/metadata.html?highlight=column#sqlalchemy.schema.Column.__init__) to learn more about the keys that can be specified in this section.
+
+In the above example, the `important_numeric_id` result reported with the `PipestatManager` instance initialized with that schema will be forced to be always provided and unique across all records.
+
 # Status schema
 
 Apart from results reporting pipestat provides a robust pipeline status management system, which can be used to report pipeline status from within the pipeline and monitor pipeline's status in other software. Status schema file defines the possible pipeline status identifiers and provides other metadata, like `description` or `color` for display purposes.
diff --git a/docs_jupyter/python_api.ipynb b/docs_jupyter/python_api.ipynb
index 3fc17a69..6907c65f 100644
--- a/docs_jupyter/python_api.ipynb
+++ b/docs_jupyter/python_api.ipynb
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 69,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -51,14 +51,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 70,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
-      "/var/folders/z_/bkm4rp412zjctjm28b54tbvr0000gn/T/tmpylsi8onv.yaml\n"
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpx1zhr9bi.yaml\n"
      ]
     }
    ],
@@ -78,7 +78,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 71,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -99,18 +99,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 72,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "'test'"
       ]
      },
-     "execution_count": 4,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 72
     }
    ],
    "source": [
@@ -126,18 +126,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 73,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "'sample1'"
       ]
      },
-     "execution_count": 5,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 73
     }
    ],
    "source": [
@@ -153,18 +153,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 74,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "YacAttMap: {}"
       ]
      },
-     "execution_count": 6,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 74
     }
    ],
    "source": [
@@ -194,10 +194,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 75,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'number_of_things': {'type': 'integer', 'description': 'Number of things'},\n",
@@ -211,12 +212,15 @@
        " 'output_file': {'type': 'file',\n",
        "  'description': 'This a path to the output file'},\n",
        " 'output_image': {'type': 'image',\n",
-       "  'description': 'This a path to the output image'}}"
+       "  'description': 'This a path to the output image'},\n",
+       " 'md5sum': {'type': 'string',\n",
+       "  'description': 'MD5SUM of an object',\n",
+       "  'highlight': True,\n",
+       "  'db_column': {'unique': True, 'nullable': False}}}"
       ]
      },
-     "execution_count": 7,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 75
     }
    ],
    "source": [
@@ -232,10 +236,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 76,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'type': 'object',\n",
@@ -244,9 +249,8 @@
        " 'required': ['path', 'title']}"
       ]
      },
-     "execution_count": 8,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 76
     }
    ],
    "source": [
@@ -263,24 +267,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 77,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
-      "'title' is a required property\n",
-      "\n",
-      "Failed validating 'required' in schema:\n",
-      "    {'description': 'This a path to the output file',\n",
-      "     'properties': {'path': {'type': 'string'},\n",
-      "                    'title': {'type': 'string'}},\n",
-      "     'required': ['path', 'title'],\n",
-      "     'type': 'object'}\n",
-      "\n",
-      "On instance:\n",
-      "    {'path': '/home/user/path.csv'}\n"
+      "'title' is a required property\n\nFailed validating 'required' in schema:\n    {'description': 'This a path to the output file',\n     'properties': {'path': {'type': 'string'},\n                    'title': {'type': 'string'}},\n     'required': ['path', 'title'],\n     'type': 'object'}\n\nOn instance:\n    {'path': '/home/user/path.csv'}\n"
      ]
     }
    ],
@@ -300,26 +294,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 78,
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
+     "name": "stderr",
      "text": [
-      "Reported records for 'sample1' in 'test' namespace:\n",
-      " - output_file: {'path': '/home/user/path.csv', 'title': 'CSV file with some data'}\n"
+      "Reported records for 'sample1' in 'test' namespace:\n - output_file: {'path': '/home/user/path.csv', 'title': 'CSV file with some data'}\n"
      ]
     },
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "True"
       ]
      },
-     "execution_count": 10,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 78
     }
    ],
    "source": [
@@ -342,10 +335,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 79,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "test:\n",
@@ -355,9 +349,8 @@
        "      title: CSV file with some data"
       ]
      },
-     "execution_count": 11,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 79
     }
    ],
    "source": [
@@ -373,25 +366,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 80,
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
+     "name": "stderr",
      "text": [
       "These results exist for 'sample1': output_file\n"
      ]
     },
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "False"
       ]
      },
-     "execution_count": 12,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 80
     }
    ],
    "source": [
@@ -407,12 +400,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 81,
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
+     "name": "stderr",
      "text": [
       "These results exist for 'sample1': output_file\n",
       "Overwriting existing results: output_file\n",
@@ -421,6 +414,7 @@
      ]
     },
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "test:\n",
@@ -430,9 +424,8 @@
        "      title: new CSV file with some data"
       ]
      },
-     "execution_count": 13,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 81
     }
    ],
    "source": [
@@ -457,7 +450,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 82,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -471,10 +464,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 83,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "test:\n",
@@ -484,9 +478,8 @@
        "      title: new CSV file with some data"
       ]
      },
-     "execution_count": 15,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 83
     }
    ],
    "source": [
@@ -502,14 +495,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 84,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
-      "/var/folders/z_/bkm4rp412zjctjm28b54tbvr0000gn/T/tmpylsi8onv.yaml\n",
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpx1zhr9bi.yaml\n",
       "test:\n",
       "  sample1:\n",
       "    output_file:\n",
@@ -538,18 +531,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 85,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'type': 'integer', 'description': 'Number of things'}"
       ]
      },
-     "execution_count": 17,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 85
     }
    ],
    "source": [
@@ -558,26 +551,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 86,
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
+     "name": "stderr",
      "text": [
-      "Reported records for 'sample1' in 'test' namespace:\n",
-      " - number_of_things: 10\n"
+      "Reported records for 'sample1' in 'test' namespace:\n - number_of_things: 10\n"
      ]
     },
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "True"
       ]
      },
-     "execution_count": 18,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 86
     }
    ],
    "source": [
@@ -593,12 +585,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 87,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "int() argument must be a string, a bytes-like object or a number, not 'list'\n"
      ]
@@ -622,10 +614,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 88,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "test:\n",
@@ -636,9 +629,8 @@
        "    number_of_things: 10"
       ]
      },
-     "execution_count": 20,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 88
     }
    ],
    "source": [
@@ -658,18 +650,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 89,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "'10'"
       ]
      },
-     "execution_count": 21,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 89
     }
    ],
    "source": [
@@ -685,10 +677,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 90,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'output_file': {'path': '/home/user/path_new.csv',\n",
@@ -696,9 +689,8 @@
        " 'number_of_things': '10'}"
       ]
      },
-     "execution_count": 22,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 90
     }
    ],
    "source": [
@@ -721,25 +713,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 91,
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
+     "name": "stderr",
      "text": [
       "Removed result 'number_of_things' for record 'sample1' from 'test' namespace\n"
      ]
     },
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "True"
       ]
      },
-     "execution_count": 23,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 91
     }
    ],
    "source": [
@@ -755,25 +747,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 92,
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
+     "name": "stderr",
      "text": [
       "Removing 'sample1' record\n"
      ]
     },
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "True"
       ]
      },
-     "execution_count": 24,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 92
     }
    ],
    "source": [
@@ -789,7 +781,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 93,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -797,14 +789,14 @@
    },
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "test: OrderedDict()"
       ]
      },
-     "execution_count": 25,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 93
     }
    ],
    "source": [
@@ -822,14 +814,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 94,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
-      "/var/folders/z_/bkm4rp412zjctjm28b54tbvr0000gn/T/tmplfz72deu.yaml\n"
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmp6gz01fu4.yaml\n"
      ]
     }
    ],
@@ -856,10 +848,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 95,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'number_of_things': {'type': 'integer', 'description': 'Number of things'},\n",
@@ -889,9 +882,8 @@
        "  'description': 'Pipeline version'}}"
       ]
      },
-     "execution_count": 27,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 95
     }
    ],
    "source": [
@@ -907,18 +899,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 96,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "['log', 'profile', 'commands', 'version']"
       ]
      },
-     "execution_count": 28,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 96
     }
    ],
    "source": [
@@ -938,7 +930,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 97,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -954,18 +946,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 98,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "'running'"
       ]
      },
-     "execution_count": 30,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 98
     }
    ],
    "source": [
@@ -981,10 +973,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 99,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'running': {'description': 'the pipeline is running',\n",
@@ -998,9 +991,8 @@
        "  'color': [169, 169, 169]}}"
       ]
      },
-     "execution_count": 31,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 99
     }
    ],
    "source": [
@@ -1029,14 +1021,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 100,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
-      "/var/folders/z_/bkm4rp412zjctjm28b54tbvr0000gn/T/tmp8q7552ae.yaml\n"
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpm44tb0h2.yaml\n"
      ]
     }
    ],
@@ -1060,12 +1052,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 101,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Results schema not found. The schema is required to report results. It needs to be supplied to the object constructor.\n"
      ]
@@ -1087,18 +1079,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 102,
    "metadata": {},
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "'running'"
       ]
      },
-     "execution_count": 34,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 102
     }
    ],
    "source": [
@@ -1223,4 +1215,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
\ No newline at end of file
+}

From 20d9eda9e50f14f730190c8e61318106603fa85d Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 4 Jun 2021 14:58:49 -0400
Subject: [PATCH 46/65] reformat, remove db column params from test schema

---
 .github/workflows/run-pytest.yml              |  2 +-
 .pre-commit-config.yaml                       |  6 +--
 MANIFEST.in                                   |  2 +-
 README.md                                     |  4 +-
 codecov.yml                                   |  2 +-
 docs/README.md                                |  4 +-
 docs/api_docs.md                              |  4 +-
 docs/autodoc_build/pipestat.md                |  6 +--
 docs/changelog.md                             |  1 +
 docs_jupyter/cli.md                           | 41 +++++++++----------
 pipestat/pipestat.py                          |  2 +-
 pipestat/schemas/status_schema.yaml           |  2 +-
 requirements/requirements-doc.txt             |  6 +--
 requirements/requirements-test.txt            |  2 +-
 tests/data/custom_status_schema.yaml          |  2 +-
 tests/data/sample_output_schema.yaml          |  4 --
 .../data/sample_output_schema_highlight.yaml  |  2 +-
 update_api_docs.sh                            |  2 +-
 18 files changed, 43 insertions(+), 51 deletions(-)

diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml
index 216adf26..7fe43476 100644
--- a/.github/workflows/run-pytest.yml
+++ b/.github/workflows/run-pytest.yml
@@ -47,4 +47,4 @@ jobs:
       uses: codecov/codecov-action@v1
       with:
         file: ./coverage.xml
-        name: py-${{ matrix.python-version }}-${{ matrix.os }}
\ No newline at end of file
+        name: py-${{ matrix.python-version }}-${{ matrix.os }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ab5489e2..de8f7bdf 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.4.0
+    rev: v4.0.1
     hooks:
       - id: trailing-whitespace
       - id: check-yaml
@@ -9,12 +9,12 @@ repos:
       - id: trailing-whitespace
 
   - repo: https://github.com/PyCQA/isort
-    rev: 5.7.0
+    rev: 5.8.0
     hooks:
       - id: isort
         args: ["--profile", "black"]
 
   - repo: https://github.com/psf/black
-    rev: 20.8b1
+    rev: 21.5b2
     hooks:
       - id: black
diff --git a/MANIFEST.in b/MANIFEST.in
index 19044bd6..be6e936b 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,3 @@
 include requirements/*
 include README.md
-include pipestat/schemas/*
\ No newline at end of file
+include pipestat/schemas/*
diff --git a/README.md b/README.md
index ea931792..cec9c068 100644
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ import pipestat
 psm = pipestat.PipestatManager()
 psm.report(values={"result_name": 1.1})
 ```
- 
+
 ## Retrieve a result
 
 From command line:
@@ -63,5 +63,3 @@ import pipestat
 psm = pipestat.PipestatManager()
 psm.retrieve(result_identifier="result_name")
 ```
- 
-
diff --git a/codecov.yml b/codecov.yml
index bc5f12fb..be72bc25 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -2,4 +2,4 @@ ignore:
   - "*/argparser.py"
   - "*/cli.py"
   - "*/__main__.py"
-  - "setup.py"
\ No newline at end of file
+  - "setup.py"
diff --git a/docs/README.md b/docs/README.md
index ea931792..cec9c068 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -46,7 +46,7 @@ import pipestat
 psm = pipestat.PipestatManager()
 psm.report(values={"result_name": 1.1})
 ```
- 
+
 ## Retrieve a result
 
 From command line:
@@ -63,5 +63,3 @@ import pipestat
 psm = pipestat.PipestatManager()
 psm.retrieve(result_identifier="result_name")
 ```
- 
-
diff --git a/docs/api_docs.md b/docs/api_docs.md
index 1eb7bc19..005e2bb7 100644
--- a/docs/api_docs.md
+++ b/docs/api_docs.md
@@ -8,7 +8,7 @@ document.addEventListener('DOMContentLoaded', (event) => {
 </script>
 
 <style>
-h3 .content { 
+h3 .content {
     padding-left: 22px;
     text-indent: -15px;
  }
@@ -19,7 +19,7 @@ h3 .hljs .content {
     martin-bottom: 0px;
 }
 h4 .content, table .content, p .content, li .content { margin-left: 30px; }
-h4 .content { 
+h4 .content {
     font-style: italic;
     font-size: 1em;
     margin-bottom: 0px;
diff --git a/docs/autodoc_build/pipestat.md b/docs/autodoc_build/pipestat.md
index 46782529..7a9ca695 100644
--- a/docs/autodoc_build/pipestat.md
+++ b/docs/autodoc_build/pipestat.md
@@ -7,7 +7,7 @@ document.addEventListener('DOMContentLoaded', (event) => {
 </script>
 
 <style>
-h3 .content { 
+h3 .content {
     padding-left: 22px;
     text-indent: -15px;
  }
@@ -18,7 +18,7 @@ h3 .hljs .content {
     martin-bottom: 0px;
 }
 h4 .content, table .content, p .content, li .content { margin-left: 30px; }
-h4 .content { 
+h4 .content {
     font-style: italic;
     font-size: 1em;
     margin-bottom: 0px;
@@ -460,4 +460,4 @@ Check schema for any possible issues
 
 
 
-*Version Information: `pipestat` v0.0.3-dev, generated by `lucidoc` v0.4.3*
\ No newline at end of file
+*Version Information: `pipestat` v0.0.3-dev, generated by `lucidoc` v0.4.3*
diff --git a/docs/changelog.md b/docs/changelog.md
index b00a5aa8..d29fab08 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -13,6 +13,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 ### Added
 
 - results highligting support
+- database column parametrizing from the results schema
 - static typing
 
 ## [0.0.4] - 2021-04-02
diff --git a/docs_jupyter/cli.md b/docs_jupyter/cli.md
index 24acad15..3e7b064e 100644
--- a/docs_jupyter/cli.md
+++ b/docs_jupyter/cli.md
@@ -14,23 +14,23 @@ pipestat -h
     version: 0.0.1
     usage: pipestat [-h] [--version] [--silent] [--verbosity V] [--logdev]
                     {report,inspect,remove,retrieve} ...
-    
+
     pipestat - report pipeline results
-    
+
     positional arguments:
       {report,inspect,remove,retrieve}
         report              Report a result.
         inspect             Inspect a database.
         remove              Remove a result.
         retrieve            Retrieve a result.
-    
+
     optional arguments:
       -h, --help            show this help message and exit
       --version             show program's version number and exit
       --silent              Silence logging. Overrides verbosity.
       --verbosity V         Set logging level (1-5 or logging module level name)
       --logdev              Expand content of logging message format.
-    
+
     pipestat standardizes reporting of pipeline results. It formalizes a way for
     pipeline developers and downstream tools developers to communicate -- results
     produced by a pipeline can easily and reliably become an input for downstream
@@ -45,9 +45,9 @@ pipestat report -h
 
     usage: pipestat report [-h] -n N (-f F | -c C | -a) -s S -i I -r R -v V [-o]
                            [-t]
-    
+
     Report a result.
-    
+
     optional arguments:
       -h, --help            show this help message and exit
       -n N, --namespace N   Name of the pipeline to report result for
@@ -82,9 +82,9 @@ pipestat retrieve -h
 ```
 
     usage: pipestat retrieve [-h] -n N (-f F | -c C | -a) [-s S] -i I -r R
-    
+
     Retrieve a result.
-    
+
     optional arguments:
       -h, --help            show this help message and exit
       -n N, --namespace N   Name of the pipeline to report result for
@@ -113,9 +113,9 @@ pipestat remove -h
 ```
 
     usage: pipestat remove [-h] -n N (-f F | -c C | -a) [-s S] -i I -r R
-    
+
     Remove a result.
-    
+
     optional arguments:
       -h, --help            show this help message and exit
       -n N, --namespace N   Name of the pipeline to report result for
@@ -144,9 +144,9 @@ pipestat inspect -h
 ```
 
     usage: pipestat inspect [-h] -n N (-f F | -c C | -a) [-s S] [-d]
-    
+
     Inspect a database.
-    
+
     optional arguments:
       -h, --help            show this help message and exit
       -n N, --namespace N   Name of the pipeline to report result for
@@ -227,8 +227,8 @@ pipestat inspect -f $temp_file -n test
 ```
 
     Reading data from '/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d'
-    
-    
+
+
     PipestatManager (test)
     Backend: file (/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d)
     Records count: 1
@@ -242,12 +242,12 @@ pipestat inspect --data -f $temp_file -n test
 ```
 
     Reading data from '/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d'
-    
-    
+
+
     PipestatManager (test)
     Backend: file (/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d)
     Records count: 1
-    
+
     Data:
     test:
       sample1:
@@ -299,14 +299,13 @@ pipestat inspect --data -f $temp_file -n test
 ```
 
     Reading data from '/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d'
-    
-    
+
+
     PipestatManager (test)
     Backend: file (/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d)
     Records count: 1
-    
+
     Data:
     test:
       sample1:
         number_of_things: 100
-
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index ca195460..a586bd92 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -457,7 +457,7 @@ def db_url(self) -> str:
     def session(self):
         """
         Provide a transactional scope around a series of query
-        operations, no commit afterwards.
+        operations.
         """
         if not self.is_db_connected():
             self.establish_db_connection()
diff --git a/pipestat/schemas/status_schema.yaml b/pipestat/schemas/status_schema.yaml
index 0a7cf280..492e496d 100644
--- a/pipestat/schemas/status_schema.yaml
+++ b/pipestat/schemas/status_schema.yaml
@@ -12,4 +12,4 @@ waiting:
   color: [240, 230, 140] # khaki
 partial:
   description: "the pipeline stopped before completion point"
-  color: [169, 169, 169] # darkgray
\ No newline at end of file
+  color: [169, 169, 169] # darkgray
diff --git a/requirements/requirements-doc.txt b/requirements/requirements-doc.txt
index 307edf2e..320b6050 100644
--- a/requirements/requirements-doc.txt
+++ b/requirements/requirements-doc.txt
@@ -1,5 +1,5 @@
-mkdocs>=1.0
+https://github.com/databio/mkdocs-databio/archive/master.zip
+https://github.com/pepkit/pipestat/archive/master.zip
 markdown-include
+mkdocs>=1.0
 pydoc-markdown
-https://github.com/pepkit/pipestat/archive/master.zip
-https://github.com/databio/mkdocs-databio/archive/master.zip
diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt
index 61196936..a1303c85 100644
--- a/requirements/requirements-test.txt
+++ b/requirements/requirements-test.txt
@@ -1,3 +1,3 @@
+coveralls
 pytest>=4.6.9
 pytest-cov>=2.8.1
-coveralls
\ No newline at end of file
diff --git a/tests/data/custom_status_schema.yaml b/tests/data/custom_status_schema.yaml
index 460e410a..3630d0ea 100644
--- a/tests/data/custom_status_schema.yaml
+++ b/tests/data/custom_status_schema.yaml
@@ -6,4 +6,4 @@ completed_custom:
   color: [50, 205, 50] # limegreen
 failed_custom:
   description: "the pipeline has failed"
-  color: [220, 20, 60] # crimson
\ No newline at end of file
+  color: [220, 20, 60] # crimson
diff --git a/tests/data/sample_output_schema.yaml b/tests/data/sample_output_schema.yaml
index b577d418..38341c95 100644
--- a/tests/data/sample_output_schema.yaml
+++ b/tests/data/sample_output_schema.yaml
@@ -26,7 +26,3 @@ md5sum:
   type: string
   description: "MD5SUM of an object"
   highlight: true
-  db_column:
-    unique: true
-    nullable: false
-    
\ No newline at end of file
diff --git a/tests/data/sample_output_schema_highlight.yaml b/tests/data/sample_output_schema_highlight.yaml
index e51fd0bd..32b63c02 100644
--- a/tests/data/sample_output_schema_highlight.yaml
+++ b/tests/data/sample_output_schema_highlight.yaml
@@ -38,4 +38,4 @@ commands:
 version:
   type: string
   highlight: true
-  description: "Pipeline version"
\ No newline at end of file
+  description: "Pipeline version"
diff --git a/update_api_docs.sh b/update_api_docs.sh
index 0eec075d..ef1083f4 100755
--- a/update_api_docs.sh
+++ b/update_api_docs.sh
@@ -1,2 +1,2 @@
 #!/bin/bash
-lucidoc pipestat -P rst > docs/api_docs.md
\ No newline at end of file
+lucidoc pipestat -P rst > docs/api_docs.md

From 1d3e65ceaf2d6d552698864b82d0da7c6f9ba72d Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Fri, 4 Jun 2021 15:08:05 -0400
Subject: [PATCH 47/65] expect another exception in tests

---
 tests/test_db_only_mode.py | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/tests/test_db_only_mode.py b/tests/test_db_only_mode.py
index d2fffcd8..31a6e653 100644
--- a/tests/test_db_only_mode.py
+++ b/tests/test_db_only_mode.py
@@ -13,7 +13,12 @@ class TestDatabaseOnly:
             {"percentage_of_things": 10.1},
         ],
     )
-    def test_report(self, val, config_file_path, schema_file_path, results_file_path):
+    def test_report(
+        self,
+        val,
+        config_file_path,
+        schema_file_path,
+    ):
         REC_ID = "constant_record_id"
         psm = PipestatManager(
             schema_path=schema_file_path,
@@ -28,15 +33,12 @@ def test_report(self, val, config_file_path, schema_file_path, results_file_path
         assert psm.select(filter_conditions=[(val_name, "eq", str(val[val_name]))])
 
     @pytest.mark.parametrize(["rec_id", "res_id"], [("sample2", "number_of_things")])
-    @pytest.mark.parametrize("backend", ["db"])
     def test_select_invalid_filter_column(
         self,
         rec_id,
         res_id,
         config_file_path,
-        results_file_path,
         schema_file_path,
-        backend,
     ):
         args = dict(
             schema_path=schema_file_path, namespace="test", config=config_file_path
@@ -48,40 +50,33 @@ def test_select_invalid_filter_column(
                 columns=[res_id],
             )
 
-    @pytest.mark.parametrize(["rec_id", "res_id"], [("sample2", "number_of_things")])
-    @pytest.mark.parametrize("backend", ["db"])
+    @pytest.mark.parametrize("res_id", ["number_of_things"])
     @pytest.mark.parametrize("filter", [("column", "eq", 1), "a", [1, 2, 3]])
     def test_select_invalid_filter_structure(
         self,
-        rec_id,
         res_id,
         config_file_path,
-        results_file_path,
         schema_file_path,
-        backend,
         filter,
     ):
         args = dict(
             schema_path=schema_file_path, namespace="test", config=config_file_path
         )
         psm = PipestatManager(**args)
-        with pytest.raises(ValueError):
+        with pytest.raises((ValueError, TypeError)):
             psm.select(
                 filter_conditions=[filter],
                 columns=[res_id],
             )
 
     @pytest.mark.parametrize(["rec_id", "res_id"], [("sample2", "number_of_things")])
-    @pytest.mark.parametrize("backend", ["db"])
     @pytest.mark.parametrize("limit", [1, 2, 3, 15555])
     def test_select_limit(
         self,
         rec_id,
         res_id,
         config_file_path,
-        results_file_path,
         schema_file_path,
-        backend,
         limit,
     ):
         args = dict(
@@ -95,10 +90,12 @@ def test_select_limit(
         )
         assert len(result) <= limit
 
-    @pytest.mark.parametrize("backend", ["db"])
     @pytest.mark.parametrize("offset", [0, 1, 2, 3, 15555])
     def test_select_offset(
-        self, config_file_path, results_file_path, schema_file_path, backend, offset
+        self,
+        config_file_path,
+        schema_file_path,
+        offset,
     ):
         args = dict(
             schema_path=schema_file_path, namespace="test", config=config_file_path
@@ -108,16 +105,13 @@ def test_select_offset(
         print(result)
         assert len(result) == max((psm.record_count - offset), 0)
 
-    @pytest.mark.parametrize("backend", ["db"])
     @pytest.mark.parametrize(
         ["offset", "limit"], [(0, 0), (0, 1), (0, 2), (0, 11111), (1, 1), (1, 0)]
     )
     def test_select_pagination(
         self,
         config_file_path,
-        results_file_path,
         schema_file_path,
-        backend,
         offset,
         limit,
     ):

From 0ff6bd1bdeb39f92c1f8c61b493bebede479f15d Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Wed, 9 Jun 2021 14:43:40 -0400
Subject: [PATCH 48/65] add table of contents to tutorials

---
 docs/api_docs.md               |   2 +-
 docs/autodoc_build/pipestat.md | 185 ++++++++++++++++++++++++---------
 docs_jupyter/cli.ipynb         |   7 +-
 docs_jupyter/python_api.ipynb  |   6 +-
 4 files changed, 146 insertions(+), 54 deletions(-)

diff --git a/docs/api_docs.md b/docs/api_docs.md
index 005e2bb7..8c52ebe9 100644
--- a/docs/api_docs.md
+++ b/docs/api_docs.md
@@ -484,7 +484,7 @@ Table name defaults to the namespace
 def session(self)
 ```
 
-Provide a transactional scope around a series of query operations, no commit afterwards.
+Provide a transactional scope around a series of query operations.
 
 
 
diff --git a/docs/autodoc_build/pipestat.md b/docs/autodoc_build/pipestat.md
index 7a9ca695..4fb1ec54 100644
--- a/docs/autodoc_build/pipestat.md
+++ b/docs/autodoc_build/pipestat.md
@@ -30,11 +30,11 @@ h4 .content {
 # Package `pipestat` Documentation
 
 ## <a name="PipestatManager"></a> Class `PipestatManager`
-Pipestat standardizes reporting of pipeline results and pipeline status management. It formalizes a way for pipeline developers and downstream tools developers to communicate -- results produced by a pipeline can easily and reliably become an input for downstream analyses. The object exposes API for interacting with the results and pipeline status and can be backed by either a YAML-formatted file or a PostgreSQL database.
+Pipestat standardizes reporting of pipeline results and pipeline status management. It formalizes a way for pipeline developers and downstream tools developers to communicate -- results produced by a pipeline can easily and reliably become an input for downstream analyses. The object exposes API for interacting with the results and pipeline status and can be backed by either a YAML-formatted file or a database.
 
 
 ```python
-def __init__(self, namespace=None, record_identifier=None, schema_path=None, results_file_path=None, database_only=False, config=None, status_schema_path=None, flag_file_dir=None)
+def __init__(self, namespace: Optional[str]=None, record_identifier: Optional[str]=None, schema_path: Optional[str]=None, results_file_path: Optional[str]=None, database_only: Optional[bool]=True, config: Union[str, dict, NoneType]=None, status_schema_path: Optional[str]=None, flag_file_dir: Optional[str]=None, custom_declarative_base: Optional[sqlalchemy.orm.decl_api.DeclarativeMeta]=None, show_db_logs: bool=False)
 ```
 
 Initialize the object
@@ -47,18 +47,19 @@ Initialize the object
 - `database_only` (`bool`):  whether the reported data should not bestored in the memory, but only in the database
 - `config` (`str | dict`):  path to the configuration file or a mappingwith the config file content
 - `status_schema_path` (`str`):  path to the status schema that formalizesthe status flags structure
+- `custom_declarative_base` (`sqlalchemy.orm.DeclarativeMeta`):  a declarative base touse for ORMs creation a new instance will be created if not provided
 
 
 
 
 ```python
-def assert_results_defined(self, results)
+def assert_results_defined(self, results: List[str]) -> None
 ```
 
 Assert provided list of results is defined in the schema
 #### Parameters:
 
-- `results` (`list[str]`):  list of results tocheck for existence in the schema
+- `results` (`List[str]`):  list of results tocheck for existence in the schema
 
 
 #### Raises:
@@ -69,54 +70,61 @@ Assert provided list of results is defined in the schema
 
 
 ```python
-def check_connection(self)
+def check_record_exists(self, record_identifier: str, table_name: str=None) -> bool
 ```
 
-Check whether a PostgreSQL connection has been established
+Check if the specified record exists in the table
+#### Parameters:
+
+- `record_identifier` (`str`):  record to check for
+- `table_name` (`str`):  table name to check
+
+
 #### Returns:
 
-- `bool`:  whether the connection has been established
+- `bool`:  whether the record exists in the table
 
 
 
 
 ```python
-def check_record_exists(self, record_identifier=None)
+def check_result_exists(self, result_identifier: str, record_identifier: str=None) -> bool
 ```
 
-Check if the record exists
+Check if the result has been reported
 #### Parameters:
 
 - `record_identifier` (`str`):  unique identifier of the record
+- `result_identifier` (`str`):  name of the result to check
 
 
 #### Returns:
 
-- `bool`:  whether the record exists
+- `bool`:  whether the specified result has been reported for theindicated record in current namespace
 
 
 
 
 ```python
-def check_result_exists(self, result_identifier, record_identifier=None)
+def check_which_results_exist(self, results: List[str], rid: Optional[str]=None, table_name: Optional[str]=None) -> List[str]
 ```
 
-Check if the result has been reported
+Check which results have been reported
 #### Parameters:
 
-- `record_identifier` (`str`):  unique identifier of the record
-- `result_identifier` (`str`):  name of the result to check
+- `rid` (`str`):  unique identifier of the record
+- `results` (`List[str]`):  names of the results to check
 
 
 #### Returns:
 
-- `bool`:  whether the specified result has been reported for theindicated record in current namespace
+- `List[str]`:  whether the specified result has been reported for theindicated record in current namespace
 
 
 
 
 ```python
-def clear_status(self, record_identifier=None, flag_names=None)
+def clear_status(self, record_identifier: str=None, flag_names: List[str]=None) -> List[Optional[str]]
 ```
 
 Remove status flags
@@ -128,16 +136,8 @@ Remove status flags
 
 #### Returns:
 
-- `list[str]`:  Collection of names of flags removed
-
-
-
+- `List[str]`:  Collection of names of flags removed
 
-```python
-def close_postgres_connection(self)
-```
-
-Close connection and remove client bound
 
 
 
@@ -166,27 +166,53 @@ Data object
 
 
 ```python
-def db_cursor(self)
+def db_column_kwargs_by_result(self)
 ```
 
-Establish connection and get a PostgreSQL database cursor, commit and close the connection afterwards
+Database column key word arguments for every result, sourced from the results schema in the `db_column` section
 #### Returns:
 
-- `LoggingCursor`:  Database cursor object
+- `Dict[str, Any]`:  key word arguments for every result
 
 
 
 
 ```python
-def establish_postgres_connection(self, suppress=False)
+def db_column_relationships_by_result(self)
 ```
 
-Establish PostgreSQL connection using the config data
-#### Parameters:
+Database column relationships for every result, sourced from the results schema in the `relationship` section
 
-- `suppress` (`bool`):  whether to suppress any connection errors
+*Note: this is an experimental feature*
+#### Returns:
 
+- `Dict[str, Dict[str, str]]`:  relationships for every result
+
+
+
+
+```python
+def db_url(self)
+```
+
+Database URL, generated based on config credentials
+#### Returns:
+
+- `str`:  database URL
+
+
+#### Raises:
+
+- `PipestatDatabaseError`:  if the object is not backed by a database
+
+
+
+
+```python
+def establish_db_connection(self) -> bool
+```
 
+Establish DB connection using the config data
 #### Returns:
 
 - `bool`:  whether the connection has been established successfully
@@ -207,7 +233,24 @@ File path that the object is reporting the results into
 
 
 ```python
-def get_status(self, record_identifier=None)
+def get_orm(self, table_name: str=None) -> Any
+```
+
+Get an object relational mapper class
+#### Parameters:
+
+- `table_name` (`str`):  table name to get a class for
+
+
+#### Returns:
+
+- `Any`:  Object relational mapper class
+
+
+
+
+```python
+def get_status(self, record_identifier: str=None) -> Optional[str]
 ```
 
 Get the current pipeline status
@@ -219,7 +262,7 @@ Get the current pipeline status
 
 
 ```python
-def get_status_flag_path(self, status_identifier, record_identifier=None)
+def get_status_flag_path(self, status_identifier: str, record_identifier=None) -> str
 ```
 
 Get the path to the status file flag
@@ -243,7 +286,19 @@ def highlighted_results(self)
 Highlighted results
 #### Returns:
 
-- `list[str]`:  a collection of highlighted results
+- `List[str]`:  a collection of highlighted results
+
+
+
+
+```python
+def is_db_connected(self) -> bool
+```
+
+Check whether a DB connection has been established
+#### Returns:
+
+- `bool`:  whether the connection has been established
 
 
 
@@ -285,7 +340,7 @@ Unique identifier of the record
 
 
 ```python
-def remove(self, record_identifier=None, result_identifier=None)
+def remove(self, record_identifier: str=None, result_identifier: str=None) -> bool
 ```
 
 Remove a result.
@@ -306,13 +361,13 @@ will be removed.
 
 
 ```python
-def report(self, values, record_identifier=None, force_overwrite=False, strict_type=True, return_id=False)
+def report(self, values: Dict[str, Any], record_identifier: str=None, force_overwrite: bool=False, strict_type: bool=True, return_id: bool=False) -> Union[bool, int]
 ```
 
 Report a result.
 #### Parameters:
 
-- `values` (`dict[str, any]`):  dictionary of result-value pairs
+- `values` (`Dict[str, any]`):  dictionary of result-value pairs
 - `record_identifier` (`str`):  unique identifier of the record, valuein 'record_identifier' column to look for to determine if the record already exists
 - `force_overwrite` (`bool`):  whether to overwrite the existing record
 - `strict_type` (`bool`):  whether the type of the reported values shouldremain as is. Pipestat would attempt to convert to the schema-defined one otherwise
@@ -339,7 +394,7 @@ Result schema mappings
 
 
 ```python
-def retrieve(self, record_identifier=None, result_identifier=None)
+def retrieve(self, record_identifier: Optional[str]=None, result_identifier: Optional[str]=None) -> Union[Any, Dict[str, Any]]
 ```
 
 Retrieve a result for a record.
@@ -354,7 +409,7 @@ be returned.
 
 #### Returns:
 
-- `any | dict[str, any]`:  a single result or a mapping with all theresults reported for the record
+- `any | Dict[str, any]`:  a single result or a mapping with all theresults reported for the record
 
 
 
@@ -384,28 +439,56 @@ Schema path
 
 
 ```python
-def select(self, columns=None, condition=None, condition_val=None, offset=None, limit=None)
+def select(self, table_name: Optional[str]=None, columns: Optional[List[str]]=None, filter_conditions: Optional[List[Tuple[str, str, Union[str, List[str]]]]]=None, json_filter_conditions: Optional[List[Tuple[str, str, str]]]=None, offset: Optional[int]=None, limit: Optional[int]=None) -> List[Any]
 ```
 
-Get all the contents from the selected table, possibly restricted by the provided condition.
+Perform a `SELECT` on the table
 #### Parameters:
 
-- `columns` (`str | list[str]`):  columns to select
-- `condition` (`str`):  condition to restrict the resultswith, will be appended to the end of the SELECT statement and safely populated with 'condition_val', for example: `"id=%s"`
-- `condition_val` (`list`):  values to fill the placeholderin 'condition' with
-- `offset` (`int`):  number of records to be skipped
-- `limit` (`int`):  max number of records to be returned
+- `table_name` (`str`):  name of the table to SELECT from
+- `columns` (`List[str]`):  columns to include in the result
+- `filter_conditions` (`[(key,operator,value)]`): - eq for == - lt for < - ge for >= - in for in_ - like for like
+- `json_filter_conditions` (`[(col,key,value)]`):  conditions for JSONB column toquery that include JSON column name, key withing the JSON object in that column and the value to check the identity against. Therefore only '==' is supported in non-nested checks, e.g. [("other", "genome", "hg38")]
+- `offset` (`int`):  skip this number of rows
+- `limit` (`int`):  include this number of rows
+
+
+
+
+```python
+def select_txt(self, filter_templ: Optional[str]='', filter_params: Optional[Dict[str, Any]]={}, table_name: Optional[str]=None, offset: Optional[int]=None, limit: Optional[int]=None) -> List[Any]
+```
+
+Execute a query with a textual filter. Returns all results.
+
+To retrieve all table contents, leave the filter arguments out.
+Table name defaults to the namespace
+#### Parameters:
+
+- `filter_templ` (`str`):  filter template with value placeholders,formatted as follows `id<:value and name=:name`
+- `filter_params` (`Dict[str, Any]`):  a mapping keys specified in the `filter_templ`to parameters that are supposed to replace the placeholders
+- `table_name` (`str`):  name of the table to query
+- `offset` (`int`):  skip this number of rows
+- `limit` (`int`):  include this number of rows
 
 
 #### Returns:
 
-- `list[psycopg2.extras.DictRow]`:  all table contents
+- `List[Any]`:  a list of matched records
+
+
+
+
+```python
+def session(self)
+```
 
+Provide a transactional scope around a series of query operations.
 
 
 
 ```python
-def set_status(self, status_identifier, record_identifier=None)
+def set_status(self, status_identifier: str, record_identifier: str=None) -> None
 ```
 
 Set pipeline run status.
@@ -446,7 +529,7 @@ Status schema source
 
 
 ```python
-def validate_schema(self)
+def validate_schema(self) -> None
 ```
 
 Check schema for any possible issues
@@ -460,4 +543,4 @@ Check schema for any possible issues
 
 
 
-*Version Information: `pipestat` v0.0.3-dev, generated by `lucidoc` v0.4.3*
+*Version Information: `pipestat` v0.1.0-dev, generated by `lucidoc` v0.4.2*
diff --git a/docs_jupyter/cli.ipynb b/docs_jupyter/cli.ipynb
index 50115062..091c7a19 100644
--- a/docs_jupyter/cli.ipynb
+++ b/docs_jupyter/cli.ipynb
@@ -4,7 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Pipestat CLI\n",
+    "<h1>Table of contents</h1>\n",
+    "\n",
+    "[TOC]\n",
+    "\n",
+    "<h1>Pipestat CLI</h1>\n",
+    "\n",
     "\n",
     "**Before following this tutorial please make sure you're familiar with more information-rich \"Pipestat Python API\" tutorial.**"
    ]
diff --git a/docs_jupyter/python_api.ipynb b/docs_jupyter/python_api.ipynb
index 6907c65f..d3a47e71 100644
--- a/docs_jupyter/python_api.ipynb
+++ b/docs_jupyter/python_api.ipynb
@@ -4,7 +4,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Pipestat Python API\n",
+    "<h1>Table of contents</h1>\n",
+    "\n",
+    "[TOC]\n",
+    "\n",
+    "<h1>Pipestat Python API</h1>\n",
     "\n",
     "Pipestat is a [Python package](https://pypi.org/project/pipestat/) for a standardized reporting of pipeline statistics.\n",
     "\n",

From 7f773ee3db84f680752dcef2125711122dc0e77c Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Fri, 18 Jun 2021 13:00:18 -0400
Subject: [PATCH 49/65] clarify tutorial

---
 docs_jupyter/python_api.ipynb | 53 ++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/docs_jupyter/python_api.ipynb b/docs_jupyter/python_api.ipynb
index f70bce46..fd1f5412 100644
--- a/docs_jupyter/python_api.ipynb
+++ b/docs_jupyter/python_api.ipynb
@@ -6,34 +6,23 @@
    "source": [
     "# Pipestat Python API\n",
     "\n",
-    "Pipestat is a [Python package](https://pypi.org/project/pipestat/) for a standardized reporting of pipeline statistics.\n",
+    "Pipestat is a [Python package](https://pypi.org/project/pipestat/) for a standardized reporting of pipeline statistics. It formalizes a way to communicate between pipelines and downstream tools that analyze their results so that pipeline results can easily become input for downstream analyses.\n",
     "\n",
-    "It formalizes a way for pipeline developers and downstream tools developers to communicate -- results produced by a pipeline can easily and reliably become an input for downstream analyses.\n",
+    "This tutorial is targeted toward pipeline developers, and shows how to use pipestat to manage pipeline results. This tutorial assumes you're writing your pipeline in Python; if not, there's another tutorial that accomplishes the same thing for any pipeline using the command-line interface.\n",
     "\n",
-    "## Usage\n",
+    "## Introduction\n",
     "\n",
-    "Here's how a pipeline developer can use `pipestat` to report results:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pipestat\n",
-    "from jsonschema import ValidationError"
+    "To make your Python pipeline pipestat-compatible, you first need to initialize pipestat with some important configuration setup:"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "After importing the package, create an `PipestatManager` object. The object constructor requires a few pieces of information: \n",
-    "\n",
-    "1. a namespace to write into, for example the name of the pipeline\n",
-    "2. a path to the schema file that describes results that can be reported\n",
-    "3. backend info: either path to a YAML-formatted file or pipestat config with PostgreSQL database login credentials\n",
+    "1. **pipestat schema**: a path to a JSON-schema file that defines results reported by this pipeline\n",
+    "2. **namespace**: defines a unique group name for reported results, typically a pipeline name\n",
+    "3. **record identifier**: a unique name for a particular *run* of the pipeline, typically corresponding to a sample name\n",
+    "4. **backend**: where the results should be stored. either path to a YAML-formatted file or pipestat config with PostgreSQL database login credentials\n",
     "\n",
     "## Back-end types\n",
     "\n",
@@ -45,7 +34,27 @@
     "2. a **PostgreSQL database** (pass a path to the pipestat config to the constructor)\n",
     "This option gives the user the possibility to use a fully fledged database to back `PipestatManager`. \n",
     "\n",
-    "To use a file as the back-end, just pass a file path string to the constructor. Let's create a temporary file first:"
+    "\n",
+    "## Initializing a pipestat session\n",
+    "\n",
+    "Start by importing the `pipestat` package in Python."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pipestat\n",
+    "from jsonschema import ValidationError"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After importing the package, we need to create an `PipestatManager` object. The object constructor requires a few pieces of information. We'll use a file as the back-end, by passing a file path string to the constructor. Let's create a temporary file first:"
    ]
   },
   {
@@ -93,7 +102,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The results will be reported to a \"test\" namespace."
+    "You can also put these results into a config file and just pass that to the `config` argument, instead of speicfying each argument separately. The results will be reported to a \"test\" namespace."
    ]
   },
   {
@@ -1122,7 +1131,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,

From bb225682092a79799c11b5f53508cb90a7a1c8b3 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Fri, 18 Jun 2021 13:15:41 -0400
Subject: [PATCH 50/65] Make auto-generating usage docs with template and
 script

---
 docs/usage.md          | 252 ++++++++++++++++++++++++++++
 docs/usage.template    |   5 +
 docs_jupyter/cli.ipynb | 365 -----------------------------------------
 update-usage-docs.sh   |  25 +++
 4 files changed, 282 insertions(+), 365 deletions(-)
 create mode 100644 docs/usage.md
 create mode 100644 docs/usage.template
 create mode 100755 update-usage-docs.sh

diff --git a/docs/usage.md b/docs/usage.md
new file mode 100644
index 00000000..a6371c65
--- /dev/null
+++ b/docs/usage.md
@@ -0,0 +1,252 @@
+# Usage reference
+
+Pipestat offers a CLI that can be access via the `pipestat` command in the shell. It offers complete control over reporting, inspecting, etc, via a series of subcommands.
+
+Here you can see the command-line usage instructions for the main command and for each subcommand:
+## `pipestat --help`
+```console
+version: 0.0.3
+usage: pipestat [-h] [--version] [--silent] [--verbosity V] [--logdev]
+                {report,inspect,remove,retrieve,status} ...
+
+pipestat - report pipeline results
+
+positional arguments:
+  {report,inspect,remove,retrieve,status}
+    report              Report a result.
+    inspect             Inspect a database.
+    remove              Remove a result.
+    retrieve            Retrieve a result.
+    status              Manage pipeline status.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --version             show program's version number and exit
+  --silent              Silence logging. Overrides verbosity.
+  --verbosity V         Set logging level (1-5 or logging module level name)
+  --logdev              Expand content of logging message format.
+
+Pipestat standardizes reporting of pipeline results and pipeline status
+management. It formalizes a way for pipeline developers and downstream tools
+developers to communicate -- results produced by a pipeline can easily and
+reliably become an input for downstream analyses. The object exposes API for
+interacting with the results and pipeline status and can be backed by either a
+YAML-formatted file or a PostgreSQL database.
+```
+
+## `pipestat report --help`
+```console
+usage: pipestat report [-h] [-n N] [-f F] [-c C] [-a] [-s S] [--status-schema ST]
+                       [--flag-dir FD] -i I [-r R] -v V [-o] [-t]
+
+Report a result.
+
+optional arguments:
+  -h, --help                   show this help message and exit
+  -n N, --namespace N          Name of the pipeline to report result for. If not provided
+                               'PIPESTAT_NAMESPACE' env var will be used. Currently not
+                               set
+  -f F, --results-file F       Path to the YAML file where the results will be stored.
+                               This file will be used as pipestat backend and to restore
+                               the reported results across sessions
+  -c C, --config C             Path to the YAML configuration file. If not provided
+                               'PIPESTAT_CONFIG' env var will be used. Currently not set
+  -a, --database-only          Whether the reported data should not be stored in the
+                               memory, only in the database.
+  -s S, --schema S             Path to the schema that defines the results that can be
+                               reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var
+                               will be used. Currently not set
+  --status-schema ST           Path to the status schema. Default will be used if not
+                               provided: /home/nsheff/.local/lib/python3.8/site-
+                               packages/pipestat/schemas/status_schema.yaml
+  --flag-dir FD                Path to the flag directory in case YAML file is the
+                               pipestat backend.
+  -i I, --result-identifier I  ID of the result to report; needs to be defined in the
+                               schema
+  -r R, --record-identifier R  ID of the record to report the result for. If not provided
+                               'PIPESTAT_RECORD_ID' env var will be used. Currently not
+                               set
+  -v V, --value V              Value of the result to report
+  -o, --overwrite              Whether the result should override existing ones in case of
+                               name clashes
+  -t, --try-convert            Whether to try to convert the reported value into reqiuired
+                               class in case it does not meet the schema requirements
+```
+
+## `pipestat inspect --help`
+```console
+usage: pipestat inspect [-h] [-n N] [-f F] [-c C] [-a] [-s S] [--status-schema ST]
+                        [--flag-dir FD] [-d]
+
+Inspect a database.
+
+optional arguments:
+  -h, --help              show this help message and exit
+  -n N, --namespace N     Name of the pipeline to report result for. If not provided
+                          'PIPESTAT_NAMESPACE' env var will be used. Currently not set
+  -f F, --results-file F  Path to the YAML file where the results will be stored. This
+                          file will be used as pipestat backend and to restore the
+                          reported results across sessions
+  -c C, --config C        Path to the YAML configuration file. If not provided
+                          'PIPESTAT_CONFIG' env var will be used. Currently not set
+  -a, --database-only     Whether the reported data should not be stored in the memory,
+                          only in the database.
+  -s S, --schema S        Path to the schema that defines the results that can be
+                          reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var will
+                          be used. Currently not set
+  --status-schema ST      Path to the status schema. Default will be used if not provided:
+                          /home/nsheff/.local/lib/python3.8/site-
+                          packages/pipestat/schemas/status_schema.yaml
+  --flag-dir FD           Path to the flag directory in case YAML file is the pipestat
+                          backend.
+  -d, --data              Whether to display the data
+```
+
+## `pipestat remove --help`
+```console
+usage: pipestat remove [-h] [-n N] [-f F] [-c C] [-a] [-s S] [--status-schema ST]
+                       [--flag-dir FD] -i I [-r R]
+
+Remove a result.
+
+optional arguments:
+  -h, --help                   show this help message and exit
+  -n N, --namespace N          Name of the pipeline to report result for. If not provided
+                               'PIPESTAT_NAMESPACE' env var will be used. Currently not
+                               set
+  -f F, --results-file F       Path to the YAML file where the results will be stored.
+                               This file will be used as pipestat backend and to restore
+                               the reported results across sessions
+  -c C, --config C             Path to the YAML configuration file. If not provided
+                               'PIPESTAT_CONFIG' env var will be used. Currently not set
+  -a, --database-only          Whether the reported data should not be stored in the
+                               memory, only in the database.
+  -s S, --schema S             Path to the schema that defines the results that can be
+                               reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var
+                               will be used. Currently not set
+  --status-schema ST           Path to the status schema. Default will be used if not
+                               provided: /home/nsheff/.local/lib/python3.8/site-
+                               packages/pipestat/schemas/status_schema.yaml
+  --flag-dir FD                Path to the flag directory in case YAML file is the
+                               pipestat backend.
+  -i I, --result-identifier I  ID of the result to report; needs to be defined in the
+                               schema
+  -r R, --record-identifier R  ID of the record to report the result for. If not provided
+                               'PIPESTAT_RECORD_ID' env var will be used. Currently not
+                               set
+```
+
+## `pipestat retrieve --help`
+```console
+usage: pipestat retrieve [-h] [-n N] [-f F] [-c C] [-a] [-s S] [--status-schema ST]
+                         [--flag-dir FD] -i I [-r R]
+
+Retrieve a result.
+
+optional arguments:
+  -h, --help                   show this help message and exit
+  -n N, --namespace N          Name of the pipeline to report result for. If not provided
+                               'PIPESTAT_NAMESPACE' env var will be used. Currently not
+                               set
+  -f F, --results-file F       Path to the YAML file where the results will be stored.
+                               This file will be used as pipestat backend and to restore
+                               the reported results across sessions
+  -c C, --config C             Path to the YAML configuration file. If not provided
+                               'PIPESTAT_CONFIG' env var will be used. Currently not set
+  -a, --database-only          Whether the reported data should not be stored in the
+                               memory, only in the database.
+  -s S, --schema S             Path to the schema that defines the results that can be
+                               reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var
+                               will be used. Currently not set
+  --status-schema ST           Path to the status schema. Default will be used if not
+                               provided: /home/nsheff/.local/lib/python3.8/site-
+                               packages/pipestat/schemas/status_schema.yaml
+  --flag-dir FD                Path to the flag directory in case YAML file is the
+                               pipestat backend.
+  -i I, --result-identifier I  ID of the result to report; needs to be defined in the
+                               schema
+  -r R, --record-identifier R  ID of the record to report the result for. If not provided
+                               'PIPESTAT_RECORD_ID' env var will be used. Currently not
+                               set
+```
+
+## `pipestat status --help`
+```console
+usage: pipestat status [-h] {set,get} ...
+
+Manage pipeline status.
+
+positional arguments:
+  {set,get}
+    set       Set status.
+    get       Get status.
+
+optional arguments:
+  -h, --help  show this help message and exit
+```
+
+## `pipestat status get --help`
+```console
+usage: pipestat status get [-h] [-n N] [-f F] [-c C] [-a] [-s S] [--status-schema ST]
+                           [--flag-dir FD] [-r R]
+
+Get status.
+
+optional arguments:
+  -h, --help                   show this help message and exit
+  -n N, --namespace N          Name of the pipeline to report result for. If not provided
+                               'PIPESTAT_NAMESPACE' env var will be used. Currently not
+                               set
+  -f F, --results-file F       Path to the YAML file where the results will be stored.
+                               This file will be used as pipestat backend and to restore
+                               the reported results across sessions
+  -c C, --config C             Path to the YAML configuration file. If not provided
+                               'PIPESTAT_CONFIG' env var will be used. Currently not set
+  -a, --database-only          Whether the reported data should not be stored in the
+                               memory, only in the database.
+  -s S, --schema S             Path to the schema that defines the results that can be
+                               reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var
+                               will be used. Currently not set
+  --status-schema ST           Path to the status schema. Default will be used if not
+                               provided: /home/nsheff/.local/lib/python3.8/site-
+                               packages/pipestat/schemas/status_schema.yaml
+  --flag-dir FD                Path to the flag directory in case YAML file is the
+                               pipestat backend.
+  -r R, --record-identifier R  ID of the record to report the result for. If not provided
+                               'PIPESTAT_RECORD_ID' env var will be used. Currently not
+                               set
+```
+
+## `pipestat status set --help`
+```console
+usage: pipestat status set [-h] [-n N] -i S [-f F] [-c C] [-a] [-s S] [--status-schema ST]
+                           [--flag-dir FD] [-r R]
+
+Set status.
+
+optional arguments:
+  -h, --help                   show this help message and exit
+  -n N, --namespace N          Name of the pipeline to report result for. If not provided
+                               'PIPESTAT_NAMESPACE' env var will be used. Currently not
+                               set
+  -i S, --status-identifier S  Status identifier to use
+  -f F, --results-file F       Path to the YAML file where the results will be stored.
+                               This file will be used as pipestat backend and to restore
+                               the reported results across sessions
+  -c C, --config C             Path to the YAML configuration file. If not provided
+                               'PIPESTAT_CONFIG' env var will be used. Currently not set
+  -a, --database-only          Whether the reported data should not be stored in the
+                               memory, only in the database.
+  -s S, --schema S             Path to the schema that defines the results that can be
+                               reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var
+                               will be used. Currently not set
+  --status-schema ST           Path to the status schema. Default will be used if not
+                               provided: /home/nsheff/.local/lib/python3.8/site-
+                               packages/pipestat/schemas/status_schema.yaml
+  --flag-dir FD                Path to the flag directory in case YAML file is the
+                               pipestat backend.
+  -r R, --record-identifier R  ID of the record to report the result for. If not provided
+                               'PIPESTAT_RECORD_ID' env var will be used. Currently not
+                               set
+```
+
diff --git a/docs/usage.template b/docs/usage.template
new file mode 100644
index 00000000..abbab4da
--- /dev/null
+++ b/docs/usage.template
@@ -0,0 +1,5 @@
+# Usage reference
+
+Pipestat offers a CLI that can be access via the `pipestat` command in the shell. It offers complete control over reporting, inspecting, etc, via a series of subcommands.
+
+Here you can see the command-line usage instructions for the main command and for each subcommand:
diff --git a/docs_jupyter/cli.ipynb b/docs_jupyter/cli.ipynb
index 914ace6f..9043b459 100644
--- a/docs_jupyter/cli.ipynb
+++ b/docs_jupyter/cli.ipynb
@@ -32,371 +32,6 @@
     "export PIPESTAT_NAMESPACE=test"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "\n",
-    "## Usage reference\n",
-    "\n",
-    "To learn about the usage `pipestat` usage use `--help`/`-h` option on any level. If the environment variables are set, the `pipestat` command help will reflect that:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "version: 0.0.3\n",
-      "usage: pipestat [-h] [--version] [--silent] [--verbosity V] [--logdev]\n",
-      "                {report,inspect,remove,retrieve,status} ...\n",
-      "\n",
-      "pipestat - report pipeline results\n",
-      "\n",
-      "positional arguments:\n",
-      "  {report,inspect,remove,retrieve,status}\n",
-      "    report              Report a result.\n",
-      "    inspect             Inspect a database.\n",
-      "    remove              Remove a result.\n",
-      "    retrieve            Retrieve a result.\n",
-      "    status              Manage pipeline status.\n",
-      "\n",
-      "optional arguments:\n",
-      "  -h, --help            show this help message and exit\n",
-      "  --version             show program's version number and exit\n",
-      "  --silent              Silence logging. Overrides verbosity.\n",
-      "  --verbosity V         Set logging level (1-5 or logging module level name)\n",
-      "  --logdev              Expand content of logging message format.\n",
-      "\n",
-      "Pipestat standardizes reporting of pipeline results and pipeline status\n",
-      "management. It formalizes a way for pipeline developers and downstream tools\n",
-      "developers to communicate -- results produced by a pipeline can easily and\n",
-      "reliably become an input for downstream analyses. The object exposes API for\n",
-      "interacting with the results and pipeline status and can be backed by either a\n",
-      "YAML-formatted file or a PostgreSQL database.\n"
-     ]
-    }
-   ],
-   "source": [
-    "pipestat -h"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "usage: pipestat report [-h] [-n N] [-f F] [-c C] [-a] [-s S] [--status-schema ST]\n",
-      "                       [--flag-dir FD] -i I [-r R] -v V [-o] [-t]\n",
-      "\n",
-      "Report a result.\n",
-      "\n",
-      "optional arguments:\n",
-      "  -h, --help                   show this help message and exit\n",
-      "  -n N, --namespace N          Name of the pipeline to report result for. If not provided\n",
-      "                               'PIPESTAT_NAMESPACE' env var will be used. Currently set\n",
-      "                               to: test\n",
-      "  -f F, --results-file F       Path to the YAML file where the results will be stored.\n",
-      "                               This file will be used as pipestat backend and to restore\n",
-      "                               the reported results across sessions\n",
-      "  -c C, --config C             Path to the YAML configuration file. If not provided\n",
-      "                               'PIPESTAT_CONFIG' env var will be used. Currently not set\n",
-      "  -a, --database-only          Whether the reported data should not be stored in the\n",
-      "                               memory, only in the database.\n",
-      "  -s S, --schema S             Path to the schema that defines the results that can be\n",
-      "                               reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var\n",
-      "                               will be used. Currently set to:\n",
-      "                               ../tests/data/sample_output_schema.yaml\n",
-      "  --status-schema ST           Path to the status schema. Default will be used if not\n",
-      "                               provided: /Library/Frameworks/Python.framework/Versions/3.6\n",
-      "                               /lib/python3.6/site-\n",
-      "                               packages/pipestat/schemas/status_schema.yaml\n",
-      "  --flag-dir FD                Path to the flag directory in case YAML file is the\n",
-      "                               pipestat backend.\n",
-      "  -i I, --result-identifier I  ID of the result to report; needs to be defined in the\n",
-      "                               schema\n",
-      "  -r R, --record-identifier R  ID of the record to report the result for. If not provided\n",
-      "                               'PIPESTAT_RECORD_ID' env var will be used. Currently set\n",
-      "                               to: sample1\n",
-      "  -v V, --value V              Value of the result to report\n",
-      "  -o, --overwrite              Whether the result should override existing ones in case of\n",
-      "                               name clashes\n",
-      "  -t, --try-convert            Whether to try to convert the reported value into reqiuired\n",
-      "                               class in case it does not meet the schema requirements\n"
-     ]
-    }
-   ],
-   "source": [
-    "pipestat report -h"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "usage: pipestat retrieve [-h] [-n N] [-f F] [-c C] [-a] [-s S] [--status-schema ST]\n",
-      "                         [--flag-dir FD] -i I [-r R]\n",
-      "\n",
-      "Retrieve a result.\n",
-      "\n",
-      "optional arguments:\n",
-      "  -h, --help                   show this help message and exit\n",
-      "  -n N, --namespace N          Name of the pipeline to report result for. If not provided\n",
-      "                               'PIPESTAT_NAMESPACE' env var will be used. Currently set\n",
-      "                               to: test\n",
-      "  -f F, --results-file F       Path to the YAML file where the results will be stored.\n",
-      "                               This file will be used as pipestat backend and to restore\n",
-      "                               the reported results across sessions\n",
-      "  -c C, --config C             Path to the YAML configuration file. If not provided\n",
-      "                               'PIPESTAT_CONFIG' env var will be used. Currently not set\n",
-      "  -a, --database-only          Whether the reported data should not be stored in the\n",
-      "                               memory, only in the database.\n",
-      "  -s S, --schema S             Path to the schema that defines the results that can be\n",
-      "                               reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var\n",
-      "                               will be used. Currently set to:\n",
-      "                               ../tests/data/sample_output_schema.yaml\n",
-      "  --status-schema ST           Path to the status schema. Default will be used if not\n",
-      "                               provided: /Library/Frameworks/Python.framework/Versions/3.6\n",
-      "                               /lib/python3.6/site-\n",
-      "                               packages/pipestat/schemas/status_schema.yaml\n",
-      "  --flag-dir FD                Path to the flag directory in case YAML file is the\n",
-      "                               pipestat backend.\n",
-      "  -i I, --result-identifier I  ID of the result to report; needs to be defined in the\n",
-      "                               schema\n",
-      "  -r R, --record-identifier R  ID of the record to report the result for. If not provided\n",
-      "                               'PIPESTAT_RECORD_ID' env var will be used. Currently set\n",
-      "                               to: sample1\n"
-     ]
-    }
-   ],
-   "source": [
-    "pipestat retrieve -h"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "usage: pipestat remove [-h] [-n N] [-f F] [-c C] [-a] [-s S] [--status-schema ST]\n",
-      "                       [--flag-dir FD] -i I [-r R]\n",
-      "\n",
-      "Remove a result.\n",
-      "\n",
-      "optional arguments:\n",
-      "  -h, --help                   show this help message and exit\n",
-      "  -n N, --namespace N          Name of the pipeline to report result for. If not provided\n",
-      "                               'PIPESTAT_NAMESPACE' env var will be used. Currently set\n",
-      "                               to: test\n",
-      "  -f F, --results-file F       Path to the YAML file where the results will be stored.\n",
-      "                               This file will be used as pipestat backend and to restore\n",
-      "                               the reported results across sessions\n",
-      "  -c C, --config C             Path to the YAML configuration file. If not provided\n",
-      "                               'PIPESTAT_CONFIG' env var will be used. Currently not set\n",
-      "  -a, --database-only          Whether the reported data should not be stored in the\n",
-      "                               memory, only in the database.\n",
-      "  -s S, --schema S             Path to the schema that defines the results that can be\n",
-      "                               reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var\n",
-      "                               will be used. Currently set to:\n",
-      "                               ../tests/data/sample_output_schema.yaml\n",
-      "  --status-schema ST           Path to the status schema. Default will be used if not\n",
-      "                               provided: /Library/Frameworks/Python.framework/Versions/3.6\n",
-      "                               /lib/python3.6/site-\n",
-      "                               packages/pipestat/schemas/status_schema.yaml\n",
-      "  --flag-dir FD                Path to the flag directory in case YAML file is the\n",
-      "                               pipestat backend.\n",
-      "  -i I, --result-identifier I  ID of the result to report; needs to be defined in the\n",
-      "                               schema\n",
-      "  -r R, --record-identifier R  ID of the record to report the result for. If not provided\n",
-      "                               'PIPESTAT_RECORD_ID' env var will be used. Currently set\n",
-      "                               to: sample1\n"
-     ]
-    }
-   ],
-   "source": [
-    "pipestat remove -h"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "usage: pipestat inspect [-h] [-n N] [-f F] [-c C] [-a] [-s S] [--status-schema ST]\n",
-      "                        [--flag-dir FD] [-d]\n",
-      "\n",
-      "Inspect a database.\n",
-      "\n",
-      "optional arguments:\n",
-      "  -h, --help              show this help message and exit\n",
-      "  -n N, --namespace N     Name of the pipeline to report result for. If not provided\n",
-      "                          'PIPESTAT_NAMESPACE' env var will be used. Currently set to:\n",
-      "                          test\n",
-      "  -f F, --results-file F  Path to the YAML file where the results will be stored. This\n",
-      "                          file will be used as pipestat backend and to restore the\n",
-      "                          reported results across sessions\n",
-      "  -c C, --config C        Path to the YAML configuration file. If not provided\n",
-      "                          'PIPESTAT_CONFIG' env var will be used. Currently not set\n",
-      "  -a, --database-only     Whether the reported data should not be stored in the memory,\n",
-      "                          only in the database.\n",
-      "  -s S, --schema S        Path to the schema that defines the results that can be\n",
-      "                          reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var will\n",
-      "                          be used. Currently set to:\n",
-      "                          ../tests/data/sample_output_schema.yaml\n",
-      "  --status-schema ST      Path to the status schema. Default will be used if not provided:\n",
-      "                          /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/\n",
-      "                          site-packages/pipestat/schemas/status_schema.yaml\n",
-      "  --flag-dir FD           Path to the flag directory in case YAML file is the pipestat\n",
-      "                          backend.\n",
-      "  -d, --data              Whether to display the data\n"
-     ]
-    }
-   ],
-   "source": [
-    "pipestat inspect -h"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "usage: pipestat status [-h] {set,get} ...\n",
-      "\n",
-      "Manage pipeline status.\n",
-      "\n",
-      "positional arguments:\n",
-      "  {set,get}\n",
-      "    set       Set status.\n",
-      "    get       Get status.\n",
-      "\n",
-      "optional arguments:\n",
-      "  -h, --help  show this help message and exit\n"
-     ]
-    }
-   ],
-   "source": [
-    "pipestat status -h"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "usage: pipestat status get [-h] [-n N] [-f F] [-c C] [-a] [-s S] [--status-schema ST]\n",
-      "                           [--flag-dir FD] [-r R]\n",
-      "\n",
-      "Get status.\n",
-      "\n",
-      "optional arguments:\n",
-      "  -h, --help                   show this help message and exit\n",
-      "  -n N, --namespace N          Name of the pipeline to report result for. If not provided\n",
-      "                               'PIPESTAT_NAMESPACE' env var will be used. Currently set\n",
-      "                               to: test\n",
-      "  -f F, --results-file F       Path to the YAML file where the results will be stored.\n",
-      "                               This file will be used as pipestat backend and to restore\n",
-      "                               the reported results across sessions\n",
-      "  -c C, --config C             Path to the YAML configuration file. If not provided\n",
-      "                               'PIPESTAT_CONFIG' env var will be used. Currently not set\n",
-      "  -a, --database-only          Whether the reported data should not be stored in the\n",
-      "                               memory, only in the database.\n",
-      "  -s S, --schema S             Path to the schema that defines the results that can be\n",
-      "                               reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var\n",
-      "                               will be used. Currently set to:\n",
-      "                               ../tests/data/sample_output_schema.yaml\n",
-      "  --status-schema ST           Path to the status schema. Default will be used if not\n",
-      "                               provided: /Library/Frameworks/Python.framework/Versions/3.6\n",
-      "                               /lib/python3.6/site-\n",
-      "                               packages/pipestat/schemas/status_schema.yaml\n",
-      "  --flag-dir FD                Path to the flag directory in case YAML file is the\n",
-      "                               pipestat backend.\n",
-      "  -r R, --record-identifier R  ID of the record to report the result for. If not provided\n",
-      "                               'PIPESTAT_RECORD_ID' env var will be used. Currently set\n",
-      "                               to: sample1\n"
-     ]
-    }
-   ],
-   "source": [
-    "pipestat status get -h"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "usage: pipestat status set [-h] [-n N] -i S [-f F] [-c C] [-a] [-s S] [--status-schema ST]\n",
-      "                           [--flag-dir FD] [-r R]\n",
-      "\n",
-      "Set status.\n",
-      "\n",
-      "optional arguments:\n",
-      "  -h, --help                   show this help message and exit\n",
-      "  -n N, --namespace N          Name of the pipeline to report result for. If not provided\n",
-      "                               'PIPESTAT_NAMESPACE' env var will be used. Currently set\n",
-      "                               to: test\n",
-      "  -i S, --status-identifier S  Status identifier to use\n",
-      "  -f F, --results-file F       Path to the YAML file where the results will be stored.\n",
-      "                               This file will be used as pipestat backend and to restore\n",
-      "                               the reported results across sessions\n",
-      "  -c C, --config C             Path to the YAML configuration file. If not provided\n",
-      "                               'PIPESTAT_CONFIG' env var will be used. Currently not set\n",
-      "  -a, --database-only          Whether the reported data should not be stored in the\n",
-      "                               memory, only in the database.\n",
-      "  -s S, --schema S             Path to the schema that defines the results that can be\n",
-      "                               reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var\n",
-      "                               will be used. Currently set to:\n",
-      "                               ../tests/data/sample_output_schema.yaml\n",
-      "  --status-schema ST           Path to the status schema. Default will be used if not\n",
-      "                               provided: /Library/Frameworks/Python.framework/Versions/3.6\n",
-      "                               /lib/python3.6/site-\n",
-      "                               packages/pipestat/schemas/status_schema.yaml\n",
-      "  --flag-dir FD                Path to the flag directory in case YAML file is the\n",
-      "                               pipestat backend.\n",
-      "  -r R, --record-identifier R  ID of the record to report the result for. If not provided\n",
-      "                               'PIPESTAT_RECORD_ID' env var will be used. Currently set\n",
-      "                               to: sample1\n"
-     ]
-    }
-   ],
-   "source": [
-    "pipestat status set -h"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
diff --git a/update-usage-docs.sh b/update-usage-docs.sh
new file mode 100755
index 00000000..10aba076
--- /dev/null
+++ b/update-usage-docs.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+# This little bash script just updates the docs/usage.md file with
+# latest usage, for the installed version.
+# Run this when updating version to update the usage docs.
+cp docs/usage.template usage.template
+
+for cmd in "--help" "report --help" "inspect --help" "remove --help" "retrieve --help" "status --help" "status get --help" "status set --help"; do
+	echo $cmd
+	echo -e "## \`pipestat $cmd\`" > USAGE_header.temp
+	pipestat $cmd --help > USAGE.temp 2>&1
+	# sed -i 's/^/\t/' USAGE.temp
+	sed -i.bak '1s;^;\`\`\`console\
+;' USAGE.temp
+#	sed -i '1s/^/\n\`\`\`console\n/' USAGE.temp
+	echo -e "\`\`\`\n" >> USAGE.temp
+	#sed -i -e "/\`looper $cmd\`/r USAGE.temp" -e '$G' usage.template  # for -in place inserts
+	cat USAGE_header.temp USAGE.temp >> usage.template # to append to the end
+done
+rm USAGE.temp
+rm USAGE_header.temp
+rm USAGE.temp.bak
+mv usage.template  docs/usage.md
+cat docs/usage.md
+#rm USAGE.temp

From bf1e3f5075525e902b827e185a987e3ab5eae59c Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Fri, 18 Jun 2021 14:00:05 -0400
Subject: [PATCH 51/65] Fix error message. See #7

---
 pipestat/pipestat.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index 497d1a10..7abbab7c 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -97,6 +97,8 @@ def _mk_abs_via_cfg(
                 rel_to_cwd = os.path.join(os.getcwd(), path)
                 if os.path.exists(rel_to_cwd):
                     return rel_to_cwd
+                else:
+                    raise OSError(f"File not found: {path}")
                 raise OSError(f"Could not make this path absolute: {path}")
             joined = os.path.join(os.path.dirname(cfg_path), path)
             if os.path.isabs(joined):

From d7055c6440b6afb2fb371601b60d1870830bd92b Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Fri, 18 Jun 2021 14:04:11 -0400
Subject: [PATCH 52/65] update CLI tutorial, add link to usage

---
 docs_jupyter/cli.ipynb         | 182 ++++++++++++++++++++++++++++-----
 mkdocs.yml                     |   3 +-
 tests/data/test_results_1.yaml |   3 +
 3 files changed, 160 insertions(+), 28 deletions(-)
 create mode 100644 tests/data/test_results_1.yaml

diff --git a/docs_jupyter/cli.ipynb b/docs_jupyter/cli.ipynb
index 9043b459..453d1d82 100644
--- a/docs_jupyter/cli.ipynb
+++ b/docs_jupyter/cli.ipynb
@@ -6,24 +6,68 @@
    "source": [
     "# Pipestat CLI\n",
     "\n",
-    "**Before following this tutorial please make sure you're familiar with more information-rich \"Pipestat Python API\" tutorial.**"
+    "This tutorial demonstrates how to use the pipeline command-line interface (CLI). You should have already installed pipestat. **Before following this tutorial please make sure you're familiar with more information-rich \"Pipestat Python API\" tutorial.**"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Prepare environment\n",
-    "\n",
-    "Pipestat command line interface can use multiple environment variables to avoid copious repetition of arguments in subsequent `pipestat` calls. \n",
-    "\n",
-    "Please refer to the [Environment variables reference](http://pipestat.databio.org/en/latest/env_vars/) for the complete list of supported environment variables. We will set a few for this tutorial:"
+    "Report results from the command line by calling `pipestat` and passing in all relevant information:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Reported records for 'sample_name' in 'pipeline_name' namespace:\n",
+      " - percentage_of_things: 12\n"
+     ]
+    }
+   ],
+   "source": [
+    "rm ../tests/data/test_results_1.yaml\n",
+    "touch ../tests/data/test_results_1.yaml\n",
+    "pipestat report \\\n",
+    "    --namespace pipeline_name \\\n",
+    "    --record-identifier sample_name \\\n",
+    "    --result-identifier percentage_of_things \\\n",
+    "    --value 12 \\\n",
+    "    --results-file ../tests/data/test_results_1.yaml \\\n",
+    "    --schema ../tests/data/sample_output_schema.yaml \\\n",
+    "    --try-convert"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "But this is obviously pretty cumbsersome, since you have to pass lots of constant information to every call to report a result. So instead, you have an option to set up environment variables for a particular pipeline run:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prepare environment"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Pipestat environment variables avoid copious repetition of arguments in subsequent `pipestat` calls. Refer to the [Environment variables reference](http://pipestat.databio.org/en/latest/env_vars/) for the complete list of supported environment variables. We will set a few for this tutorial:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
    "outputs": [],
    "source": [
     "export PIPESTAT_RESULTS_SCHEMA=../tests/data/sample_output_schema.yaml\n",
@@ -36,8 +80,53 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Usage demonstration\n",
-    "\n",
+    "Before we dive in, let's take a quick glance at the schema. This is the file that describes what sort of results are reported by this pipeline:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "number_of_things:\n",
+      "  type: integer\n",
+      "  description: \"Number of things\"\n",
+      "percentage_of_things:\n",
+      "  type: number\n",
+      "  description: \"Percentage of things\"\n",
+      "name_of_something:\n",
+      "  type: string\n",
+      "  description: \"Name of something\"\n",
+      "swtich_value:\n",
+      "  type: boolean\n",
+      "  description: \"Is the switch on of off\"\n",
+      "collection_of_things:\n",
+      "  type: array\n",
+      "  description: \"This store collection of values\"\n",
+      "output_object:\n",
+      "  type: object\n",
+      "  description: \"Object output\"\n",
+      "output_file:\n",
+      "  type: file\n",
+      "  description: \"This a path to the output file\"\n",
+      "output_image:\n",
+      "  type: image\n",
+      "  description: \"This a path to the output image\""
+     ]
+    }
+   ],
+   "source": [
+    "cat $PIPESTAT_RESULTS_SCHEMA"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "### Reporting\n",
     "\n",
     "Naturally, the command line interface provides access to all the Python API functionalities of `pipestat`. So, for example, to report a result and back the object by a file use:"
@@ -45,7 +134,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -70,7 +159,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -96,7 +185,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -114,7 +203,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -143,7 +232,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -153,9 +242,9 @@
       "\n",
       "\n",
       "PipestatManager (test)\n",
-      "Backend: file (/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.Zid7BMd1)\n",
+      "Backend: file (/tmp/tmp.QqINh7AniI)\n",
       "Results schema source: ../tests/data/sample_output_schema.yaml\n",
-      "Status schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pipestat/schemas/status_schema.yaml\n",
+      "Status schema source: /home/nsheff/.local/lib/python3.8/site-packages/pipestat/schemas/status_schema.yaml\n",
       "Records count: 1\n"
      ]
     }
@@ -173,7 +262,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -183,9 +272,9 @@
       "\n",
       "\n",
       "PipestatManager (test)\n",
-      "Backend: file (/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.Zid7BMd1)\n",
+      "Backend: file (/tmp/tmp.QqINh7AniI)\n",
       "Results schema source: ../tests/data/sample_output_schema.yaml\n",
-      "Status schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pipestat/schemas/status_schema.yaml\n",
+      "Status schema source: /home/nsheff/.local/lib/python3.8/site-packages/pipestat/schemas/status_schema.yaml\n",
       "Records count: 1\n",
       "\n",
       "Data:\n",
@@ -211,7 +300,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -237,7 +326,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -261,7 +350,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -280,7 +369,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -290,9 +379,9 @@
       "\n",
       "\n",
       "PipestatManager (test)\n",
-      "Backend: file (/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.Zid7BMd1)\n",
+      "Backend: file (/tmp/tmp.QqINh7AniI)\n",
       "Results schema source: ../tests/data/sample_output_schema.yaml\n",
-      "Status schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pipestat/schemas/status_schema.yaml\n",
+      "Status schema source: /home/nsheff/.local/lib/python3.8/site-packages/pipestat/schemas/status_schema.yaml\n",
       "Records count: 1\n",
       "\n",
       "Data:\n",
@@ -322,7 +411,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipestat status set --status-identifier running"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -334,7 +432,6 @@
     }
    ],
    "source": [
-    "pipestat status set --status-identifier running\n",
     "pipestat status get"
    ]
   },
@@ -347,7 +444,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "running:\n",
+      "  description: \"the pipeline is running\"\n",
+      "  color: [30, 144, 255] # dodgerblue\n",
+      "completed:\n",
+      "  description: \"the pipeline has completed\"\n",
+      "  color: [50, 205, 50] # limegreen\n",
+      "failed:\n",
+      "  description: \"the pipeline has failed\"\n",
+      "  color: [220, 20, 60] # crimson\n",
+      "waiting:\n",
+      "  description: \"the pipeline is waiting\"\n",
+      "  color: [240, 230, 140] # khaki\n",
+      "partial:\n",
+      "  description: \"the pipeline stopped before completion point\"\n",
+      "  color: [169, 169, 169] # darkgray"
+     ]
+    }
+   ],
+   "source": [
+    "cat /home/nsheff/.local/lib/python3.8/site-packages/pipestat/schemas/status_schema.yaml"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/mkdocs.yml b/mkdocs.yml
index c76d9ec4..68f69154 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -12,7 +12,8 @@ nav:
     - Use Python API: python_api.md
     - Use command line interface: cli.md
   - Reference:
-    - Configuration format : config.md
+    - CLI usage: usage.md
+    - Configuration format: config.md
     - Environment variables: env_vars.md
     - Pipestat specification: pipestat_specification.md
     - Python API: autodoc_build/pipestat.md
diff --git a/tests/data/test_results_1.yaml b/tests/data/test_results_1.yaml
new file mode 100644
index 00000000..b43bdf32
--- /dev/null
+++ b/tests/data/test_results_1.yaml
@@ -0,0 +1,3 @@
+pipeline_name:
+  sample_name:
+    percentage_of_things: 12

From 41ed5d892ff1c0a73e92b3f139390ecf2d359f51 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 21 Jun 2021 09:05:09 -0400
Subject: [PATCH 53/65] Check for schema in cli; fix #8

---
 pipestat/cli.py        |  3 +++
 pipestat/exceptions.py | 11 +++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/pipestat/cli.py b/pipestat/cli.py
index 73bfe4f1..bba4cf35 100644
--- a/pipestat/cli.py
+++ b/pipestat/cli.py
@@ -6,6 +6,7 @@
 
 from .argparser import build_argparser
 from .const import *
+from .exceptions import SchemaNotFoundError
 from .pipestat import PipestatManager
 
 _LOGGER = getLogger(PKG_NAME)
@@ -36,6 +37,8 @@ def main():
     )
     if args.command == REPORT_CMD:
         value = args.value
+        if psm.schema is None:
+            raise SchemaNotFoundError(msg="report", cli=True)
         result_metadata = psm.schema[args.result_identifier]
         if (
             result_metadata[SCHEMA_TYPE_KEY]
diff --git a/pipestat/exceptions.py b/pipestat/exceptions.py
index 6185aa39..42243c8c 100644
--- a/pipestat/exceptions.py
+++ b/pipestat/exceptions.py
@@ -31,11 +31,14 @@ def __init__(self, msg):
 class SchemaNotFoundError(SchemaError):
     """Schema not found error"""
 
-    def __init__(self, msg):
-        txt = (
-            f"Results schema not found. The schema is required to {msg}. "
-            f"It needs to be supplied to the object constructor."
+    def __init__(self, msg, cli=False):
+        txt = f"Results schema not found. The schema is required to {msg}. "
+        txt += (
+            f"It needs to be supplied as an CLI argument"
+            if cli
+            else "It needs to be supplied to the object constructor."
         )
+        txt += f"or via '{ENV_VARS['schema']}' environment variable."
         super(SchemaNotFoundError, self).__init__(txt)
 
 

From 294c81173767072fa10e4c33dd7558b99bf594bb Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 21 Jun 2021 11:43:00 -0400
Subject: [PATCH 54/65] CLI updates; #9 and #10

---
 pipestat/argparser.py | 11 ++++-------
 pipestat/cli.py       |  2 +-
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/pipestat/argparser.py b/pipestat/argparser.py
index 1418664f..a4d065c7 100644
--- a/pipestat/argparser.py
+++ b/pipestat/argparser.py
@@ -71,11 +71,8 @@ def add_subparser(cmd, msg, subparsers):
         )
         if cmd == STATUS_SET_CMD:
             status_sps[cmd].add_argument(
-                "-i",
-                "--status-identifier",
-                metavar="S",
-                help="Status identifier to use",
-                required=True,
+                "status-identifier",
+                help="Status identifier to set.",
             )
         status_sps[cmd].add_argument(
             "-f",
@@ -213,9 +210,9 @@ def add_subparser(cmd, msg, subparsers):
 
     sps[REPORT_CMD].add_argument(
         "-t",
-        "--try-convert",
+        "--skip-convert",
         action="store_true",
-        help="Whether to try to convert the reported value into reqiuired "
+        help="Whether skip result type conversion into the reqiuired "
         "class in case it does not meet the schema requirements",
     )
 
diff --git a/pipestat/cli.py b/pipestat/cli.py
index bba4cf35..8de24841 100644
--- a/pipestat/cli.py
+++ b/pipestat/cli.py
@@ -60,7 +60,7 @@ def main():
             record_identifier=args.record_identifier,
             values={args.result_identifier: value},
             force_overwrite=args.overwrite,
-            strict_type=not args.try_convert,
+            strict_type=args.skip_convert,
         )
     if args.command == INSPECT_CMD:
         print("\n")

From d0e7bfab57662e298d6f3d58ebc141a213358a79 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 21 Jun 2021 11:44:08 -0400
Subject: [PATCH 55/65] show status dir in str repr; #11

---
 pipestat/pipestat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index a586bd92..bc3568a5 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -250,7 +250,7 @@ def __str__(self):
         """
         res = f"{self.__class__.__name__} ({self.namespace})"
         res += "\nBackend: {}".format(
-            f"file ({self.file})"
+            f"File (results: {self.file}, status: {self[STATUS_FILE_DIR]})"
             if self.file
             else f"Database (dialect: {self[DB_ENGINE_KEY].dialect.name})"
         )

From b6bf9c955583e344aee3879f1c93eb89342e5736 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 21 Jun 2021 13:29:36 -0400
Subject: [PATCH 56/65] fix argument name

---
 pipestat/argparser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipestat/argparser.py b/pipestat/argparser.py
index a4d065c7..34253ebb 100644
--- a/pipestat/argparser.py
+++ b/pipestat/argparser.py
@@ -71,7 +71,7 @@ def add_subparser(cmd, msg, subparsers):
         )
         if cmd == STATUS_SET_CMD:
             status_sps[cmd].add_argument(
-                "status-identifier",
+                "status_identifier",
                 help="Status identifier to set.",
             )
         status_sps[cmd].add_argument(

From 12b731a4249807f5d946a606a15721b3d50c7a64 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 21 Jun 2021 13:36:41 -0400
Subject: [PATCH 57/65] update tutorials

---
 docs_jupyter/cli.ipynb        | 1081 +++++++--------
 docs_jupyter/python_api.ipynb | 2464 +++++++++++++++------------------
 pipestat/pipestat.py          |    2 +-
 3 files changed, 1648 insertions(+), 1899 deletions(-)

diff --git a/docs_jupyter/cli.ipynb b/docs_jupyter/cli.ipynb
index 10a653f2..43569dde 100644
--- a/docs_jupyter/cli.ipynb
+++ b/docs_jupyter/cli.ipynb
@@ -1,576 +1,513 @@
-// 20210621113740
-// https://raw.githubusercontent.com/pepkit/pipestat/dev/docs_jupyter/cli.ipynb
-
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "# Pipestat CLI\n",
-        "\n",
-        "This tutorial demonstrates how to use the pipeline command-line interface (CLI). You should have already installed pipestat. **Before following this tutorial please make sure you're familiar with more information-rich \"Pipestat Python API\" tutorial.**"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "Report results from the command line by calling `pipestat` and passing in all relevant information:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 1,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Reported records for 'sample_name' in 'pipeline_name' namespace:\n",
-            " - percentage_of_things: 12\n"
-          ]
-        }
-      ],
-      "source": [
-        "rm ../tests/data/test_results_1.yaml\n",
-        "touch ../tests/data/test_results_1.yaml\n",
-        "pipestat report \\\n",
-        "    --namespace pipeline_name \\\n",
-        "    --record-identifier sample_name \\\n",
-        "    --result-identifier percentage_of_things \\\n",
-        "    --value 12 \\\n",
-        "    --results-file ../tests/data/test_results_1.yaml \\\n",
-        "    --schema ../tests/data/sample_output_schema.yaml \\\n",
-        "    --try-convert"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "But this is obviously pretty cumbsersome, since you have to pass lots of constant information to every call to report a result. So instead, you have an option to set up environment variables for a particular pipeline run:"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "## Prepare environment"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "Pipestat environment variables avoid copious repetition of arguments in subsequent `pipestat` calls. Refer to the [Environment variables reference](http://pipestat.databio.org/en/latest/env_vars/) for the complete list of supported environment variables. We will set a few for this tutorial:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 2,
-      "metadata": {
-        
-      },
-      "outputs": [
-        
-      ],
-      "source": [
-        "export PIPESTAT_RESULTS_SCHEMA=../tests/data/sample_output_schema.yaml\n",
-        "export PIPESTAT_RECORD_ID=sample1\n",
-        "export PIPESTAT_RESULTS_FILE=`mktemp` # temporary file for results storage\n",
-        "export PIPESTAT_NAMESPACE=test"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "Before we dive in, let's take a quick glance at the schema. This is the file that describes what sort of results are reported by this pipeline:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 3,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "number_of_things:\n",
-            "  type: integer\n",
-            "  description: \"Number of things\"\n",
-            "percentage_of_things:\n",
-            "  type: number\n",
-            "  description: \"Percentage of things\"\n",
-            "name_of_something:\n",
-            "  type: string\n",
-            "  description: \"Name of something\"\n",
-            "swtich_value:\n",
-            "  type: boolean\n",
-            "  description: \"Is the switch on of off\"\n",
-            "collection_of_things:\n",
-            "  type: array\n",
-            "  description: \"This store collection of values\"\n",
-            "output_object:\n",
-            "  type: object\n",
-            "  description: \"Object output\"\n",
-            "output_file:\n",
-            "  type: file\n",
-            "  description: \"This a path to the output file\"\n",
-            "output_image:\n",
-            "  type: image\n",
-            "  description: \"This a path to the output image\""
-          ]
-        }
-      ],
-      "source": [
-        "cat $PIPESTAT_RESULTS_SCHEMA"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "### Reporting\n",
-        "\n",
-        "Naturally, the command line interface provides access to all the Python API functionalities of `pipestat`. So, for example, to report a result and back the object by a file use:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 4,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Reported records for 'sample1' in 'test' namespace:\n",
-            " - number_of_things: 100\n"
-          ]
-        }
-      ],
-      "source": [
-        "pipestat report -i number_of_things -v 100 --try-convert"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "The result has been reported and the database file has been updated:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 5,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "test:\n",
-            "  sample1:\n",
-            "    number_of_things: 100\n"
-          ]
-        }
-      ],
-      "source": [
-        "cat $PIPESTAT_RESULTS_FILE"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "Let's report another result:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 6,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Reported records for 'sample1' in 'test' namespace:\n",
-            " - percentage_of_things: 1.1\n"
-          ]
-        }
-      ],
-      "source": [
-        "pipestat report -i percentage_of_things -v 1.1 --try-convert"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 7,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "test:\n",
-            "  sample1:\n",
-            "    number_of_things: 100\n",
-            "    percentage_of_things: 1.1\n"
-          ]
-        }
-      ],
-      "source": [
-        "cat $PIPESTAT_RESULTS_FILE"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "### Inspection\n",
-        "\n",
-        "`pipestat inspect` command is a way to briefly look at the general `PipestatManager` state, like number of records, type of backend etc."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 8,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "PipestatManager (test)\n",
-            "Backend: file (/tmp/tmp.QqINh7AniI)\n",
-            "Results schema source: ../tests/data/sample_output_schema.yaml\n",
-            "Status schema source: /home/nsheff/.local/lib/python3.8/site-packages/pipestat/schemas/status_schema.yaml\n",
-            "Records count: 1\n"
-          ]
-        }
-      ],
-      "source": [
-        "pipestat inspect"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "In order to display the contents of the results file or database table associated with the indicated namespace, add `--data` flag:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 9,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "PipestatManager (test)\n",
-            "Backend: file (/tmp/tmp.QqINh7AniI)\n",
-            "Results schema source: ../tests/data/sample_output_schema.yaml\n",
-            "Status schema source: /home/nsheff/.local/lib/python3.8/site-packages/pipestat/schemas/status_schema.yaml\n",
-            "Records count: 1\n",
-            "\n",
-            "Data:\n",
-            "test:\n",
-            "  sample1:\n",
-            "    number_of_things: 100\n",
-            "    percentage_of_things: 1.1\n"
-          ]
-        }
-      ],
-      "source": [
-        "pipestat inspect --data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "### Retrieval\n",
-        "\n",
-        "Naturally, the reported results can be retrieved. Just call `pipestat retrieve` to do so:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 10,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "1.1\n"
-          ]
-        }
-      ],
-      "source": [
-        "pipestat retrieve -i percentage_of_things"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "### Removal\n",
-        "\n",
-        "In order to remove a result call `pipestat remove`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 11,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Removed result 'percentage_of_things' for record 'sample1' from 'test' namespace\n"
-          ]
-        }
-      ],
-      "source": [
-        "pipestat remove -i percentage_of_things"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "The results file and the state of the `PipestatManager` object reflect the removal:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 12,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "test:\n",
-            "  sample1:\n",
-            "    number_of_things: 100\n"
-          ]
-        }
-      ],
-      "source": [
-        "cat $PIPESTAT_RESULTS_FILE"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 13,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "PipestatManager (test)\n",
-            "Backend: file (/tmp/tmp.QqINh7AniI)\n",
-            "Results schema source: ../tests/data/sample_output_schema.yaml\n",
-            "Status schema source: /home/nsheff/.local/lib/python3.8/site-packages/pipestat/schemas/status_schema.yaml\n",
-            "Records count: 1\n",
-            "\n",
-            "Data:\n",
-            "test:\n",
-            "  sample1:\n",
-            "    number_of_things: 100\n"
-          ]
-        }
-      ],
-      "source": [
-        "pipestat inspect --data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "## Status management\n",
-        "\n",
-        "To manage pipeline status call `pipestat status <subcommand>`:\n",
-        "\n",
-        "- `set` to set pipeline statuses\n",
-        "- `get` to retrieve pipeline statuses\n",
-        "\n",
-        "Starting with `pipestat 0.0.3` the `--schema` argument is not required for status management if YAML file is used as the backend."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 14,
-      "metadata": {
-        
-      },
-      "outputs": [
-        
-      ],
-      "source": [
-        "pipestat status set --status-identifier running"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 15,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "running\n"
-          ]
-        }
-      ],
-      "source": [
-        "pipestat status get"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "Note that only statuses defined in the status schema are supported:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 17,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "running:\n",
-            "  description: \"the pipeline is running\"\n",
-            "  color: [30, 144, 255] # dodgerblue\n",
-            "completed:\n",
-            "  description: \"the pipeline has completed\"\n",
-            "  color: [50, 205, 50] # limegreen\n",
-            "failed:\n",
-            "  description: \"the pipeline has failed\"\n",
-            "  color: [220, 20, 60] # crimson\n",
-            "waiting:\n",
-            "  description: \"the pipeline is waiting\"\n",
-            "  color: [240, 230, 140] # khaki\n",
-            "partial:\n",
-            "  description: \"the pipeline stopped before completion point\"\n",
-            "  color: [169, 169, 169] # darkgray"
-          ]
-        }
-      ],
-      "source": [
-        "cat /home/nsheff/.local/lib/python3.8/site-packages/pipestat/schemas/status_schema.yaml"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 16,
-      "metadata": {
-        
-      },
-      "outputs": [
-        
-      ],
-      "source": [
-        "rm $PIPESTAT_RESULTS_FILE"
-      ]
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Pipestat CLI\n",
+    "\n",
+    "This tutorial demonstrates how to use the pipeline command-line interface (CLI). You should have already installed pipestat. **Before following this tutorial please make sure you're familiar with more information-rich \"Pipestat Python API\" tutorial.**"
+   ]
+  },
+  {
+   "source": [
+    "Report results from the command line by calling `pipestat` and passing in all relevant information:"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Reported records for 'sample_name' in 'pipeline_name' namespace:\n",
+      " - percentage_of_things: 12\n"
+     ]
     }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Bash",
-      "language": "bash",
-      "name": "bash"
-    },
-    "language_info": {
-      "codemirror_mode": "shell",
-      "file_extension": ".sh",
-      "mimetype": "text/x-sh",
-      "name": "bash"
+   ],
+   "source": [
+    "rm ../tests/data/test_results_1.yaml\n",
+    "touch ../tests/data/test_results_1.yaml\n",
+    "pipestat report \\\n",
+    "    --namespace pipeline_name \\\n",
+    "    --record-identifier sample_name \\\n",
+    "    --result-identifier percentage_of_things \\\n",
+    "    --value 12 \\\n",
+    "    --results-file ../tests/data/test_results_1.yaml \\\n",
+    "    --schema ../tests/data/sample_output_schema.yaml"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "But this is obviously pretty cumbsersome, since you have to pass lots of constant information to every call to report a result. So instead, you have an option to set up environment variables for a particular pipeline run:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prepare environment"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Pipestat environment variables avoid copious repetition of arguments in subsequent `pipestat` calls. Refer to the [Environment variables reference](http://pipestat.databio.org/en/latest/env_vars/) for the complete list of supported environment variables. We will set a few for this tutorial:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "export PIPESTAT_RESULTS_SCHEMA=../tests/data/sample_output_schema.yaml\n",
+    "export PIPESTAT_RECORD_ID=sample1\n",
+    "export PIPESTAT_RESULTS_FILE=`mktemp` # temporary file for results storage\n",
+    "export PIPESTAT_NAMESPACE=test"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Before we dive in, let's take a quick glance at the schema. This is the file that describes what sort of results are reported by this pipeline:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "number_of_things:\n",
+      "  type: integer\n",
+      "  description: \"Number of things\"\n",
+      "percentage_of_things:\n",
+      "  type: number\n",
+      "  description: \"Percentage of things\"\n",
+      "name_of_something:\n",
+      "  type: string\n",
+      "  description: \"Name of something\"\n",
+      "swtich_value:\n",
+      "  type: boolean\n",
+      "  description: \"Is the switch on of off\"\n",
+      "collection_of_things:\n",
+      "  type: array\n",
+      "  description: \"This store collection of values\"\n",
+      "output_object:\n",
+      "  type: object\n",
+      "  description: \"Object output\"\n",
+      "output_file:\n",
+      "  type: file\n",
+      "  description: \"This a path to the output file\"\n",
+      "output_image:\n",
+      "  type: image\n",
+      "  description: \"This a path to the output image\"\n",
+      "md5sum:\n",
+      "  type: string\n",
+      "  description: \"MD5SUM of an object\"\n",
+      "  highlight: true\n"
+     ]
+    }
+   ],
+   "source": [
+    "cat $PIPESTAT_RESULTS_SCHEMA"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Reporting\n",
+    "\n",
+    "Naturally, the command line interface provides access to all the Python API functionalities of `pipestat`. So, for example, to report a result and back the object by a file use:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Reported records for 'sample1' in 'test' namespace:\n",
+      " - number_of_things: 100\n"
+     ]
+    }
+   ],
+   "source": [
+    "pipestat report -i number_of_things -v 100"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The result has been reported and the database file has been updated:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "test:\n",
+      "  sample1:\n",
+      "    number_of_things: 100\n"
+     ]
+    }
+   ],
+   "source": [
+    "cat $PIPESTAT_RESULTS_FILE"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's report another result:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Reported records for 'sample1' in 'test' namespace:\n",
+      " - percentage_of_things: 1.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "pipestat report -i percentage_of_things -v 1.1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "test:\n",
+      "  sample1:\n",
+      "    number_of_things: 100\n",
+      "    percentage_of_things: 1.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "cat $PIPESTAT_RESULTS_FILE"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Inspection\n",
+    "\n",
+    "`pipestat inspect` command is a way to briefly look at the general `PipestatManager` state, like number of records, type of backend etc."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "\n",
+      "\n",
+      "PipestatManager (test)\n",
+      "Backend: File \n",
+      " - results: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmp.zAv99Q1j\n",
+      " - status: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T)\n",
+      "Results schema source: ../tests/data/sample_output_schema.yaml\n",
+      "Status schema source: /usr/local/lib/python3.9/site-packages/pipestat/schemas/status_schema.yaml\n",
+      "Records count: 1\n",
+      "Highlighted results: md5sum\n"
+     ]
+    }
+   ],
+   "source": [
+    "pipestat inspect"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In order to display the contents of the results file or database table associated with the indicated namespace, add `--data` flag:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "\n",
+      "\n",
+      "PipestatManager (test)\n",
+      "Backend: File \n",
+      " - results: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmp.zAv99Q1j\n",
+      " - status: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T)\n",
+      "Results schema source: ../tests/data/sample_output_schema.yaml\n",
+      "Status schema source: /usr/local/lib/python3.9/site-packages/pipestat/schemas/status_schema.yaml\n",
+      "Records count: 1\n",
+      "Highlighted results: md5sum\n",
+      "\n",
+      "Data:\n",
+      "test:\n",
+      "  sample1:\n",
+      "    number_of_things: 100\n",
+      "    percentage_of_things: 1.1\n"
+     ]
     }
+   ],
+   "source": [
+    "pipestat inspect --data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Retrieval\n",
+    "\n",
+    "Naturally, the reported results can be retrieved. Just call `pipestat retrieve` to do so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 92,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "1.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "pipestat retrieve -i percentage_of_things"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Removal\n",
+    "\n",
+    "In order to remove a result call `pipestat remove`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Removed result 'percentage_of_things' for record 'sample1' from 'test' namespace\n"
+     ]
+    }
+   ],
+   "source": [
+    "pipestat remove -i percentage_of_things"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The results file and the state of the `PipestatManager` object reflect the removal:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "test:\n",
+      "  sample1:\n",
+      "    number_of_things: 100\n"
+     ]
+    }
+   ],
+   "source": [
+    "cat $PIPESTAT_RESULTS_FILE"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "\n",
+      "\n",
+      "PipestatManager (test)\n",
+      "Backend: File \n",
+      " - results: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmp.zAv99Q1j\n",
+      " - status: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T)\n",
+      "Results schema source: ../tests/data/sample_output_schema.yaml\n",
+      "Status schema source: /usr/local/lib/python3.9/site-packages/pipestat/schemas/status_schema.yaml\n",
+      "Records count: 1\n",
+      "Highlighted results: md5sum\n",
+      "\n",
+      "Data:\n",
+      "test:\n",
+      "  sample1:\n",
+      "    number_of_things: 100\n"
+     ]
+    }
+   ],
+   "source": [
+    "pipestat inspect --data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Status management\n",
+    "\n",
+    "To manage pipeline status call `pipestat status <subcommand>`:\n",
+    "\n",
+    "- `set` to set pipeline statuses\n",
+    "- `get` to retrieve pipeline statuses\n",
+    "\n",
+    "Starting with `pipestat 0.0.3` the `--schema` argument is not required for status management if YAML file is used as the backend."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipestat status set running"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "running\n"
+     ]
+    }
+   ],
+   "source": [
+    "pipestat status get"
+   ]
+  },
+  {
+   "source": [
+    "Note that only statuses defined in the status schema are supported:"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "running:\n",
+      "  description: \"the pipeline is running\"\n",
+      "  color: [30, 144, 255] # dodgerblue\n",
+      "completed:\n",
+      "  description: \"the pipeline has completed\"\n",
+      "  color: [50, 205, 50] # limegreen\n",
+      "failed:\n",
+      "  description: \"the pipeline has failed\"\n",
+      "  color: [220, 20, 60] # crimson\n",
+      "waiting:\n",
+      "  description: \"the pipeline is waiting\"\n",
+      "  color: [240, 230, 140] # khaki\n",
+      "partial:\n",
+      "  description: \"the pipeline stopped before completion point\"\n",
+      "  color: [169, 169, 169] # darkgray\n"
+     ]
+    }
+   ],
+   "source": [
+    "cat /usr/local/lib/python3.9/site-packages/pipestat/schemas/status_schema.yaml"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rm $PIPESTAT_RESULTS_FILE"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Bash",
+   "language": "bash",
+   "name": "bash"
   },
-  "nbformat": 4,
-  "nbformat_minor": 2
-}
\ No newline at end of file
+  "language_info": {
+   "codemirror_mode": "shell",
+   "file_extension": ".sh",
+   "mimetype": "text/x-sh",
+   "name": "bash"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs_jupyter/python_api.ipynb b/docs_jupyter/python_api.ipynb
index 46645cea..dc476c16 100644
--- a/docs_jupyter/python_api.ipynb
+++ b/docs_jupyter/python_api.ipynb
@@ -1,1332 +1,1144 @@
-// 20210621113831
-// https://raw.githubusercontent.com/pepkit/pipestat/dev/docs_jupyter/python_api.ipynb
-
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "# Pipestat Python API\n",
-        "\n",
-        "Pipestat is a [Python package](https://pypi.org/project/pipestat/) for a standardized reporting of pipeline statistics. It formalizes a way to communicate between pipelines and downstream tools that analyze their results so that pipeline results can easily become input for downstream analyses.\n",
-        "\n",
-        "This tutorial is targeted toward pipeline developers, and shows how to use pipestat to manage pipeline results. This tutorial assumes you're writing your pipeline in Python; if not, there's another tutorial that accomplishes the same thing for any pipeline using the command-line interface.\n",
-        "\n",
-        "## Introduction\n",
-        "\n",
-        "To make your Python pipeline pipestat-compatible, you first need to initialize pipestat with some important configuration setup:"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "1. **pipestat schema**: a path to a JSON-schema file that defines results reported by this pipeline\n",
-        "2. **namespace**: defines a unique group name for reported results, typically a pipeline name\n",
-        "3. **record identifier**: a unique name for a particular *run* of the pipeline, typically corresponding to a sample name\n",
-        "4. **backend**: where the results should be stored. either path to a YAML-formatted file or pipestat config with PostgreSQL database login credentials\n",
-        "\n",
-        "## Back-end types\n",
-        "\n",
-        "Two types of back-ends are currently supported:\n",
-        "\n",
-        "1. a **file** (pass a file path to the constructor)  \n",
-        "The changes reported using the `report` method of `PipestatManger` will be securely written to the file. Currently only [YAML](https://yaml.org/) format is supported. \n",
-        "\n",
-        "2. a **PostgreSQL database** (pass a path to the pipestat config to the constructor)\n",
-        "This option gives the user the possibility to use a fully fledged database to back `PipestatManager`. \n",
-        "\n",
-        "\n",
-        "## Initializing a pipestat session\n",
-        "\n",
-        "Start by importing the `pipestat` package in Python."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 1,
-      "metadata": {
-        
-      },
-      "outputs": [
-        
-      ],
-      "source": [
-        "import pipestat\n",
-        "from jsonschema import ValidationError"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "After importing the package, we need to create an `PipestatManager` object. The object constructor requires a few pieces of information. We'll use a file as the back-end, by passing a file path string to the constructor. Let's create a temporary file first:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 2,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmpa6poai5_.yaml\n"
-          ]
-        }
-      ],
-      "source": [
-        "from tempfile import mkstemp\n",
-        "\n",
-        "_, temp_file = mkstemp(suffix=\".yaml\")\n",
-        "print(temp_file)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "Now we can create a `PipestatManager` object that uses this file as the back-end:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 3,
-      "metadata": {
-        
-      },
-      "outputs": [
-        
-      ],
-      "source": [
-        "psm = pipestat.PipestatManager(\n",
-        "    namespace=\"test\",\n",
-        "    record_identifier=\"sample1\",\n",
-        "    results_file_path=temp_file,\n",
-        "    schema_path=\"../tests/data/sample_output_schema.yaml\",\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "You can also put these results into a config file and just pass that to the `config` argument, instead of speicfying each argument separately. The results will be reported to a \"test\" namespace."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 4,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "'test'"
-            ]
-          },
-          "execution_count": 4,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.namespace"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "By default, `PipestatManager` instance is bound to the record it was initialized with. However, reporting or removing results for a different record can be enforced in the respective methods with `record_identifier` argument."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 5,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "'sample1'"
-            ]
-          },
-          "execution_count": 5,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.record_identifier"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "Since we've used a newly created file, nothing has been reported yet:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 6,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "YacAttMap: {}"
-            ]
-          },
-          "execution_count": 6,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "## Reporting results"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "To report a result, use a `report` method. It requires three pieces of information:\n",
-        "\n",
-        "1. record identifier -- record to report the result for, for example a unique name of the sample (optional if provided at `PipestatManager` initialization stage)\n",
-        "2. values -- a Python `dict` of resultID-value pairs to report. The top level keys need to correspond to the results identifiers defined in the schema\n",
-        "\n",
-        "### Available results defined in schemas\n",
-        "\n",
-        "To learn about the results that the current `PipestatManager` instance supports check out the `schema` property:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 7,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "{'number_of_things': {'type': 'integer', 'description': 'Number of things'},\n",
-              " 'percentage_of_things': {'type': 'number',\n",
-              "  'description': 'Percentage of things'},\n",
-              " 'name_of_something': {'type': 'string', 'description': 'Name of something'},\n",
-              " 'swtich_value': {'type': 'boolean', 'description': 'Is the switch on of off'},\n",
-              " 'collection_of_things': {'type': 'array',\n",
-              "  'description': 'This store collection of values'},\n",
-              " 'output_object': {'type': 'object', 'description': 'Object output'},\n",
-              " 'output_file': {'type': 'file',\n",
-              "  'description': 'This a path to the output file'},\n",
-              " 'output_image': {'type': 'image',\n",
-              "  'description': 'This a path to the output image'}}"
-            ]
-          },
-          "execution_count": 7,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.schema"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "To learn about the actual required attributes of the reported results, like `file` or `image` (see: `output_file` and `output_image` results) select the `result_identifier` from the `result_schemas` property:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 8,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "{'type': 'object',\n",
-              " 'description': 'This a path to the output file',\n",
-              " 'properties': {'path': {'type': 'string'}, 'title': {'type': 'string'}},\n",
-              " 'required': ['path', 'title']}"
-            ]
-          },
-          "execution_count": 8,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.result_schemas[\"output_file\"]"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "### Results composition enforcement\n",
-        "As you can see, to report a `output_file` result, you need to provide an object with `path` and `title` string attributes. If you fail to do so `PipestatManager` will issue an informative validation error:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 9,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "'title' is a required property\n",
-            "\n",
-            "Failed validating 'required' in schema:\n",
-            "    {'description': 'This a path to the output file',\n",
-            "     'properties': {'path': {'type': 'string'},\n",
-            "                    'title': {'type': 'string'}},\n",
-            "     'required': ['path', 'title'],\n",
-            "     'type': 'object'}\n",
-            "\n",
-            "On instance:\n",
-            "    {'path': '/home/user/path.csv'}\n"
-          ]
-        }
-      ],
-      "source": [
-        "try:\n",
-        "    psm.report(values={\"output_file\": {\"path\": \"/home/user/path.csv\"}})\n",
-        "except ValidationError as e:\n",
-        "    print(e)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "Let's report a correct object this time:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 10,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Reported records for 'sample1' in 'test' namespace:\n",
-            " - output_file: {'path': '/home/user/path.csv', 'title': 'CSV file with some data'}\n"
-          ]
-        },
-        {
-          "data": {
-            "text/plain": [
-              "True"
-            ]
-          },
-          "execution_count": 10,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.report(\n",
-        "    values={\n",
-        "        \"output_file\": {\n",
-        "            \"path\": \"/home/user/path.csv\",\n",
-        "            \"title\": \"CSV file with some data\",\n",
-        "        }\n",
-        "    }\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "Inspect the object's database to verify whether the result has been successfully reported:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 11,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "test:\n",
-              "  sample1:\n",
-              "    output_file:\n",
-              "      path: /home/user/path.csv\n",
-              "      title: CSV file with some data"
-            ]
-          },
-          "execution_count": 11,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "No results duplication is allowed, unless you force overwrite:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 12,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "These results exist for 'sample1': ['output_file']\n"
-          ]
-        },
-        {
-          "data": {
-            "text/plain": [
-              "False"
-            ]
-          },
-          "execution_count": 12,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.report(\n",
-        "    values={\n",
-        "        \"output_file\": {\n",
-        "            \"path\": \"/home/user/path_new.csv\",\n",
-        "            \"title\": \"new CSV file with some data\",\n",
-        "        }\n",
-        "    }\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 13,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "These results exist for 'sample1': ['output_file']\n",
-            "Overwriting existing results: ['output_file']\n",
-            "Reported records for 'sample1' in 'test' namespace:\n",
-            " - output_file: {'path': '/home/user/path_new.csv', 'title': 'new CSV file with some data'}\n"
-          ]
-        },
-        {
-          "data": {
-            "text/plain": [
-              "test:\n",
-              "  sample1:\n",
-              "    output_file:\n",
-              "      path: /home/user/path_new.csv\n",
-              "      title: new CSV file with some data"
-            ]
-          },
-          "execution_count": 13,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.report(\n",
-        "    values={\n",
-        "        \"output_file\": {\n",
-        "            \"path\": \"/home/user/path_new.csv\",\n",
-        "            \"title\": \"new CSV file with some data\",\n",
-        "        }\n",
-        "    },\n",
-        "    force_overwrite=True,\n",
-        ")\n",
-        "psm.data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "Most importantly, by backing the object by a file, the reported results persist -- another `PipestatManager` object reads the results when created:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 14,
-      "metadata": {
-        
-      },
-      "outputs": [
-        
-      ],
-      "source": [
-        "psm1 = pipestat.PipestatManager(\n",
-        "    namespace=\"test\",\n",
-        "    record_identifier=\"sample1\",\n",
-        "    results_file_path=temp_file,\n",
-        "    schema_path=\"../tests/data/sample_output_schema.yaml\",\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 15,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "test:\n",
-              "  sample1:\n",
-              "    output_file:\n",
-              "      path: /home/user/path_new.csv\n",
-              "      title: new CSV file with some data"
-            ]
-          },
-          "execution_count": 15,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "That's because the contents are stored in the file we've specified at object creation stage:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 16,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmpa6poai5_.yaml\n",
-            "test:\n",
-            "  sample1:\n",
-            "    output_file:\n",
-            "      path: /home/user/path_new.csv\n",
-            "      title: new CSV file with some data\n"
-          ]
-        }
-      ],
-      "source": [
-        "!echo $temp_file\n",
-        "!cat $temp_file"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "Note that two processes can securely report to a single file and single namespace since `pipestat` supports locks and race-free writes to control multi-user conflicts and prevent data loss.\n",
-        "\n",
-        "### Results type enforcement\n",
-        "\n",
-        "By default `PipestatManager` raises an exception if a non-compatible result value is reported. \n",
-        "\n",
-        "This behavior can be changed by setting `stric_type` to `True` in `PipestatManager.report` method. In thi case `PipestatManager` tries to cast the reported results values to the Python classes required by schema. For example, if a result defined as `integer` is reported and a `str` value is passed, the eventual value will be `int`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 17,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "{'type': 'integer', 'description': 'Number of things'}"
-            ]
-          },
-          "execution_count": 17,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.result_schemas[\"number_of_things\"]"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 18,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Reported records for 'sample1' in 'test' namespace:\n",
-            " - number_of_things: 10\n"
-          ]
-        },
-        {
-          "data": {
-            "text/plain": [
-              "True"
-            ]
-          },
-          "execution_count": 18,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.report(values={\"number_of_things\": \"10\"}, strict_type=False)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "The method will attempt to cast the value to a proper Python class and store the converted object. In case of a failure, an error will be raised:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 19,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "int() argument must be a string, a bytes-like object or a number, not 'list'\n"
-          ]
-        }
-      ],
-      "source": [
-        "try:\n",
-        "    psm.report(\n",
-        "        record_identifier=\"sample2\", values={\"number_of_things\": []}, strict_type=False\n",
-        "    )\n",
-        "except TypeError as e:\n",
-        "    print(e)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "Note that in this case we tried to report a result for a different record (`sample2`), which had to be enforced with `record_identifier` argument."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 20,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "test:\n",
-              "  sample1:\n",
-              "    output_file:\n",
-              "      path: /home/user/path_new.csv\n",
-              "      title: new CSV file with some data\n",
-              "    number_of_things: 10"
-            ]
-          },
-          "execution_count": 20,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "## Retrieving results\n",
-        "\n",
-        "Naturally, the reported results can be retrieved. Let's explore all the options the `PipestatManager.retrieve` method provides:\n",
-        "\n",
-        "To retrieve a *specific* result for a record, provide the identifiers:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 21,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "'10'"
-            ]
-          },
-          "execution_count": 21,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.retrieve(record_identifier=\"sample1\", result_identifier=\"number_of_things\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "To retrieve *all* the results for a record, skip the `result_identifier` argument:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 22,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "{'output_file': {'path': '/home/user/path_new.csv',\n",
-              "  'title': 'new CSV file with some data'},\n",
-              " 'number_of_things': '10'}"
-            ]
-          },
-          "execution_count": 22,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.retrieve(record_identifier=\"sample1\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "## Removing results"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "`PipestatManager` object also supports results removal. Call `remove` method and provide `record_identifier` and  `result_identifier` method to do so:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 23,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Removed result 'number_of_things' for record 'sample1' from 'test' namespace\n"
-          ]
-        },
-        {
-          "data": {
-            "text/plain": [
-              "True"
-            ]
-          },
-          "execution_count": 23,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.remove(result_identifier=\"number_of_things\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "The entire record, skip the `result_identifier` argument:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 24,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Removing 'sample1' record\n"
-          ]
-        },
-        {
-          "data": {
-            "text/plain": [
-              "True"
-            ]
-          },
-          "execution_count": 24,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.remove()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "Verify that an appropriate entry from the results was deleted:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 25,
-      "metadata": {
-        "pycharm": {
-          "name": "#%%\n"
-        }
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "test: OrderedDict()"
-            ]
-          },
-          "execution_count": 25,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "## Highligting results\n",
-        "\n",
-        "In order to highlight results we need to add an extra property in the pipestat results schema (`highlight: true`) under the result identifier that we wish to highlight. "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 26,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmpf9m81e1n.yaml\n"
-          ]
-        }
-      ],
-      "source": [
-        "from tempfile import mkstemp\n",
-        "\n",
-        "_, temp_file_highlight = mkstemp(suffix=\".yaml\")\n",
-        "print(temp_file_highlight)\n",
-        "\n",
-        "psm_highlight = pipestat.PipestatManager(\n",
-        "    namespace=\"test_highlight\",\n",
-        "    record_identifier=\"sample1\",\n",
-        "    results_file_path=temp_file_highlight,\n",
-        "    schema_path=\"../tests/data/sample_output_schema_highlight.yaml\",\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "For example, result `log` is highlighted in this case:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 27,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "{'number_of_things': {'type': 'integer', 'description': 'Number of things'},\n",
-              " 'percentage_of_things': {'type': 'number',\n",
-              "  'description': 'Percentage of things'},\n",
-              " 'name_of_something': {'type': 'string', 'description': 'Name of something'},\n",
-              " 'swtich_value': {'type': 'boolean', 'description': 'Is the switch on of off'},\n",
-              " 'collection_of_things': {'type': 'array',\n",
-              "  'description': 'This store collection of values'},\n",
-              " 'output_object': {'type': 'object', 'description': 'Object output'},\n",
-              " 'output_file': {'type': 'file',\n",
-              "  'description': 'This a path to the output file'},\n",
-              " 'output_image': {'type': 'image',\n",
-              "  'highlight': False,\n",
-              "  'description': 'This a path to the output image'},\n",
-              " 'log': {'type': 'file',\n",
-              "  'highlight': True,\n",
-              "  'description': 'The log file of the pipeline run'},\n",
-              " 'profile': {'type': 'file',\n",
-              "  'highlight': True,\n",
-              "  'description': 'The profile of the pipeline run'},\n",
-              " 'commands': {'type': 'file',\n",
-              "  'highlight': True,\n",
-              "  'description': 'The file with shell commands executed by this pipeline'},\n",
-              " 'version': {'type': 'string',\n",
-              "  'highlight': True,\n",
-              "  'description': 'Pipeline version'}}"
-            ]
-          },
-          "execution_count": 27,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm_highlight.schema"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "The highlighting feature can be used by pipestat clients to present the highlighted results in a special way."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 28,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "['log', 'profile', 'commands', 'version']"
-            ]
-          },
-          "execution_count": 28,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm_highlight.highlighted_results"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "## Pipeline status management\n",
-        "\n",
-        "Pipestat provides a pipeline status management system, which can be used to set and read pipeline status. To maintain the status information between sessions it uses flags or additional DB table if the `PipestatManager` object is backed with YAML file or PostgreSQL database, respectively.\n",
-        "\n",
-        "To set pipeline status use `set_status` method:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 29,
-      "metadata": {
-        
-      },
-      "outputs": [
-        
-      ],
-      "source": [
-        "psm.set_status(record_identifier=\"sample1\", status_identifier=\"running\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "To get pipeline status use `get_status` method:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 30,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "'running'"
-            ]
-          },
-          "execution_count": 30,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.get_status(record_identifier=\"sample1\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "Allowable statuses and related metadata are defined in the status schema, which can be accessed via `PipestatManager.status_schema` property."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 31,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "{'running': {'description': 'the pipeline is running',\n",
-              "  'color': [30, 144, 255]},\n",
-              " 'completed': {'description': 'the pipeline has completed',\n",
-              "  'color': [50, 205, 50]},\n",
-              " 'failed': {'description': 'the pipeline has failed', 'color': [220, 20, 60]},\n",
-              " 'waiting': {'description': 'the pipeline is waiting',\n",
-              "  'color': [240, 230, 140]},\n",
-              " 'partial': {'description': 'the pipeline stopped before completion point',\n",
-              "  'color': [169, 169, 169]}}"
-            ]
-          },
-          "execution_count": 31,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm.status_schema"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "`pipestat` Python package ships with a default status schema, so we did not have to provide the schema when constructing the `PipestatManager` object. Similarly, the flags containg directory is an optional configuration option. \n",
-        "\n",
-        "Please refer to the Python API documentation (`__init__` method) to see how to use custom status schema and flags directory."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "## Initializing `PipestatManager` without results schema\n",
-        "\n",
-        "Starting with `pipestat 0.0.3`, it is possible to initialize the `PipestatManager` object without specifying the results schema file. This feature comes in handy if `PipestatManager` is created with a sole intent to monitor pipeline status.\n",
-        "\n",
-        "Here's an example:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 32,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmpq81inuvn.yaml\n"
-          ]
-        }
-      ],
-      "source": [
-        "_, temp_file_no_schema = mkstemp(suffix=\".yaml\")\n",
-        "print(temp_file_no_schema)\n",
-        "\n",
-        "psm_no_schema = pipestat.PipestatManager(\n",
-        "    namespace=\"test_no_schema\", results_file_path=temp_file_no_schema\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "As you can see, the object has been initialized successfully. Obviuosly, the schema has to be defined to report and retrieve results as the requirement to predefine results and therefore the possibility to rely on the schema to gather all the possible results metadata in the pipestat clients is a big advantage.\n",
-        "\n",
-        "Moreover, initialization with a database as a backend is impossible without schema due to the characteristics of relational databases, which must have columns predefined, when created."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 33,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Results schema not found. The schema is required to report results. It needs to be supplied to the object constructor.\n"
-          ]
-        }
-      ],
-      "source": [
-        "try:\n",
-        "    psm_no_schema.report(record_identifier=\"sample1\", values={\"key\": \"val\"})\n",
-        "except pipestat.SchemaNotFoundError as e:\n",
-        "    print(e)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        
-      },
-      "source": [
-        "As mentioned above, the pipeline status management capabilities are supported with no results schema defined:"
-      ]
-    },
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Pipestat Python API\n",
+    "\n",
+    "Pipestat is a [Python package](https://pypi.org/project/pipestat/) for a standardized reporting of pipeline statistics. It formalizes a way to communicate between pipelines and downstream tools that analyze their results so that pipeline results can easily become input for downstream analyses.\n",
+    "\n",
+    "This tutorial is targeted toward pipeline developers, and shows how to use pipestat to manage pipeline results. This tutorial assumes you're writing your pipeline in Python; if not, there's another tutorial that accomplishes the same thing for any pipeline using the command-line interface.\n",
+    "\n",
+    "## Introduction\n",
+    "\n",
+    "To make your Python pipeline pipestat-compatible, you first need to initialize pipestat with some important configuration setup:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "1. **pipestat schema**: a path to a JSON-schema file that defines results reported by this pipeline\n",
+    "2. **namespace**: defines a unique group name for reported results, typically a pipeline name\n",
+    "3. **record identifier**: a unique name for a particular *run* of the pipeline, typically corresponding to a sample name\n",
+    "4. **backend**: where the results should be stored. either path to a YAML-formatted file or pipestat config with PostgreSQL database login credentials\n",
+    "\n",
+    "## Back-end types\n",
+    "\n",
+    "Two types of back-ends are currently supported:\n",
+    "\n",
+    "1. a **file** (pass a file path to the constructor)  \n",
+    "The changes reported using the `report` method of `PipestatManger` will be securely written to the file. Currently only [YAML](https://yaml.org/) format is supported. \n",
+    "\n",
+    "2. a **PostgreSQL database** (pass a path to the pipestat config to the constructor)\n",
+    "This option gives the user the possibility to use a fully fledged database to back `PipestatManager`. \n",
+    "\n",
+    "\n",
+    "## Initializing a pipestat session\n",
+    "\n",
+    "Start by importing the `pipestat` package in Python."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "error",
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'pipestat'",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-2-9e4881cbd397>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpipestat\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mjsonschema\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mValidationError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pipestat'"
+     ]
+    }
+   ],
+   "source": [
+    "import pipestat\n",
+    "from jsonschema import ValidationError"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After importing the package, we need to create an `PipestatManager` object. The object constructor requires a few pieces of information. We'll use a file as the back-end, by passing a file path string to the constructor. Let's create a temporary file first:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpp85qu9en.yaml\n"
+     ]
+    }
+   ],
+   "source": [
+    "from tempfile import mkstemp\n",
+    "\n",
+    "_, temp_file = mkstemp(suffix=\".yaml\")\n",
+    "print(temp_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we can create a `PipestatManager` object that uses this file as the back-end:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "psm = pipestat.PipestatManager(\n",
+    "    namespace=\"test\",\n",
+    "    record_identifier=\"sample1\",\n",
+    "    results_file_path=temp_file,\n",
+    "    schema_path=\"../tests/data/sample_output_schema.yaml\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can also put these results into a config file and just pass that to the `config` argument, instead of speicfying each argument separately. The results will be reported to a \"test\" namespace."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "'test'"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 4
+    }
+   ],
+   "source": [
+    "psm.namespace"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "By default, `PipestatManager` instance is bound to the record it was initialized with. However, reporting or removing results for a different record can be enforced in the respective methods with `record_identifier` argument."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "'sample1'"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 5
+    }
+   ],
+   "source": [
+    "psm.record_identifier"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Since we've used a newly created file, nothing has been reported yet:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "YacAttMap: {}"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 6
+    }
+   ],
+   "source": [
+    "psm.data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Reporting results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To report a result, use a `report` method. It requires three pieces of information:\n",
+    "\n",
+    "1. record identifier -- record to report the result for, for example a unique name of the sample (optional if provided at `PipestatManager` initialization stage)\n",
+    "2. values -- a Python `dict` of resultID-value pairs to report. The top level keys need to correspond to the results identifiers defined in the schema\n",
+    "\n",
+    "### Available results defined in schemas\n",
+    "\n",
+    "To learn about the results that the current `PipestatManager` instance supports check out the `schema` property:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "{'number_of_things': {'type': 'integer', 'description': 'Number of things'},\n",
+       " 'percentage_of_things': {'type': 'number',\n",
+       "  'description': 'Percentage of things'},\n",
+       " 'name_of_something': {'type': 'string', 'description': 'Name of something'},\n",
+       " 'swtich_value': {'type': 'boolean', 'description': 'Is the switch on of off'},\n",
+       " 'collection_of_things': {'type': 'array',\n",
+       "  'description': 'This store collection of values'},\n",
+       " 'output_object': {'type': 'object', 'description': 'Object output'},\n",
+       " 'output_file': {'type': 'file',\n",
+       "  'description': 'This a path to the output file'},\n",
+       " 'output_image': {'type': 'image',\n",
+       "  'description': 'This a path to the output image'},\n",
+       " 'md5sum': {'type': 'string',\n",
+       "  'description': 'MD5SUM of an object',\n",
+       "  'highlight': True}}"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 7
+    }
+   ],
+   "source": [
+    "psm.schema"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To learn about the actual required attributes of the reported results, like `file` or `image` (see: `output_file` and `output_image` results) select the `result_identifier` from the `result_schemas` property:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "{'type': 'object',\n",
+       " 'description': 'This a path to the output file',\n",
+       " 'properties': {'path': {'type': 'string'}, 'title': {'type': 'string'}},\n",
+       " 'required': ['path', 'title']}"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 8
+    }
+   ],
+   "source": [
+    "psm.result_schemas[\"output_file\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Results composition enforcement\n",
+    "As you can see, to report a `output_file` result, you need to provide an object with `path` and `title` string attributes. If you fail to do so `PipestatManager` will issue an informative validation error:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "'title' is a required property\n\nFailed validating 'required' in schema:\n    {'description': 'This a path to the output file',\n     'properties': {'path': {'type': 'string'},\n                    'title': {'type': 'string'}},\n     'required': ['path', 'title'],\n     'type': 'object'}\n\nOn instance:\n    {'path': '/home/user/path.csv'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "try:\n",
+    "    psm.report(values={\"output_file\": {\"path\": \"/home/user/path.csv\"}})\n",
+    "except ValidationError as e:\n",
+    "    print(e)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's report a correct object this time:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "Reported records for 'sample1' in 'test' namespace:\n - output_file: {'path': '/home/user/path.csv', 'title': 'CSV file with some data'}\n"
+     ]
+    },
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 10
+    }
+   ],
+   "source": [
+    "psm.report(\n",
+    "    values={\n",
+    "        \"output_file\": {\n",
+    "            \"path\": \"/home/user/path.csv\",\n",
+    "            \"title\": \"CSV file with some data\",\n",
+    "        }\n",
+    "    }\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Inspect the object's database to verify whether the result has been successfully reported:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "test:\n",
+       "  sample1:\n",
+       "    output_file:\n",
+       "      path: /home/user/path.csv\n",
+       "      title: CSV file with some data"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 11
+    }
+   ],
+   "source": [
+    "psm.data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "No results duplication is allowed, unless you force overwrite:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "These results exist for 'sample1': output_file\n"
+     ]
+    },
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "False"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 12
+    }
+   ],
+   "source": [
+    "psm.report(\n",
+    "    values={\n",
+    "        \"output_file\": {\n",
+    "            \"path\": \"/home/user/path_new.csv\",\n",
+    "            \"title\": \"new CSV file with some data\",\n",
+    "        }\n",
+    "    }\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "These results exist for 'sample1': output_file\n",
+      "Overwriting existing results: output_file\n",
+      "Reported records for 'sample1' in 'test' namespace:\n",
+      " - output_file: {'path': '/home/user/path_new.csv', 'title': 'new CSV file with some data'}\n"
+     ]
+    },
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "test:\n",
+       "  sample1:\n",
+       "    output_file:\n",
+       "      path: /home/user/path_new.csv\n",
+       "      title: new CSV file with some data"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 13
+    }
+   ],
+   "source": [
+    "psm.report(\n",
+    "    values={\n",
+    "        \"output_file\": {\n",
+    "            \"path\": \"/home/user/path_new.csv\",\n",
+    "            \"title\": \"new CSV file with some data\",\n",
+    "        }\n",
+    "    },\n",
+    "    force_overwrite=True,\n",
+    ")\n",
+    "psm.data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Most importantly, by backing the object by a file, the reported results persist -- another `PipestatManager` object reads the results when created:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "psm1 = pipestat.PipestatManager(\n",
+    "    namespace=\"test\",\n",
+    "    record_identifier=\"sample1\",\n",
+    "    results_file_path=temp_file,\n",
+    "    schema_path=\"../tests/data/sample_output_schema.yaml\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "test:\n",
+       "  sample1:\n",
+       "    output_file:\n",
+       "      path: /home/user/path_new.csv\n",
+       "      title: new CSV file with some data"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 15
+    }
+   ],
+   "source": [
+    "psm.data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "That's because the contents are stored in the file we've specified at object creation stage:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpp85qu9en.yaml\n",
+      "test:\n",
+      "  sample1:\n",
+      "    output_file:\n",
+      "      path: /home/user/path_new.csv\n",
+      "      title: new CSV file with some data\n"
+     ]
+    }
+   ],
+   "source": [
+    "!echo $temp_file\n",
+    "!cat $temp_file"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that two processes can securely report to a single file and single namespace since `pipestat` supports locks and race-free writes to control multi-user conflicts and prevent data loss.\n",
+    "\n",
+    "### Results type enforcement\n",
+    "\n",
+    "By default `PipestatManager` raises an exception if a non-compatible result value is reported. \n",
+    "\n",
+    "This behavior can be changed by setting `stric_type` to `True` in `PipestatManager.report` method. In thi case `PipestatManager` tries to cast the reported results values to the Python classes required by schema. For example, if a result defined as `integer` is reported and a `str` value is passed, the eventual value will be `int`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "{'type': 'integer', 'description': 'Number of things'}"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 17
+    }
+   ],
+   "source": [
+    "psm.result_schemas[\"number_of_things\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "Reported records for 'sample1' in 'test' namespace:\n - number_of_things: 10\n"
+     ]
+    },
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 18
+    }
+   ],
+   "source": [
+    "psm.report(values={\"number_of_things\": \"10\"}, strict_type=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The method will attempt to cast the value to a proper Python class and store the converted object. In case of a failure, an error will be raised:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "int() argument must be a string, a bytes-like object or a number, not 'list'\n"
+     ]
+    }
+   ],
+   "source": [
+    "try:\n",
+    "    psm.report(\n",
+    "        record_identifier=\"sample2\", values={\"number_of_things\": []}, strict_type=False\n",
+    "    )\n",
+    "except TypeError as e:\n",
+    "    print(e)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that in this case we tried to report a result for a different record (`sample2`), which had to be enforced with `record_identifier` argument."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "test:\n",
+       "  sample1:\n",
+       "    output_file:\n",
+       "      path: /home/user/path_new.csv\n",
+       "      title: new CSV file with some data\n",
+       "    number_of_things: 10"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 20
+    }
+   ],
+   "source": [
+    "psm.data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Retrieving results\n",
+    "\n",
+    "Naturally, the reported results can be retrieved. Let's explore all the options the `PipestatManager.retrieve` method provides:\n",
+    "\n",
+    "To retrieve a *specific* result for a record, provide the identifiers:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "'10'"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 21
+    }
+   ],
+   "source": [
+    "psm.retrieve(record_identifier=\"sample1\", result_identifier=\"number_of_things\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To retrieve *all* the results for a record, skip the `result_identifier` argument:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "{'output_file': {'path': '/home/user/path_new.csv',\n",
+       "  'title': 'new CSV file with some data'},\n",
+       " 'number_of_things': '10'}"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 22
+    }
+   ],
+   "source": [
+    "psm.retrieve(record_identifier=\"sample1\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Removing results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`PipestatManager` object also supports results removal. Call `remove` method and provide `record_identifier` and  `result_identifier` method to do so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "Removed result 'number_of_things' for record 'sample1' from 'test' namespace\n"
+     ]
+    },
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 23
+    }
+   ],
+   "source": [
+    "psm.remove(result_identifier=\"number_of_things\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The entire record, skip the `result_identifier` argument:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "Removing 'sample1' record\n"
+     ]
+    },
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 24
+    }
+   ],
+   "source": [
+    "psm.remove()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Verify that an appropriate entry from the results was deleted:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": 34,
-      "metadata": {
-        
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "'running'"
-            ]
-          },
-          "execution_count": 34,
-          "metadata": {
-            
-          },
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "psm_no_schema.set_status(status_identifier=\"running\", record_identifier=\"sample1\")\n",
-        "psm_no_schema.get_status(record_identifier=\"sample1\")"
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "test: OrderedDict()"
       ]
+     },
+     "metadata": {},
+     "execution_count": 25
     }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.8.5"
+   ],
+   "source": [
+    "psm.data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Highligting results\n",
+    "\n",
+    "In order to highlight results we need to add an extra property in the pipestat results schema (`highlight: true`) under the result identifier that we wish to highlight. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpvhtrgs1m.yaml\n"
+     ]
+    }
+   ],
+   "source": [
+    "from tempfile import mkstemp\n",
+    "\n",
+    "_, temp_file_highlight = mkstemp(suffix=\".yaml\")\n",
+    "print(temp_file_highlight)\n",
+    "\n",
+    "psm_highlight = pipestat.PipestatManager(\n",
+    "    namespace=\"test_highlight\",\n",
+    "    record_identifier=\"sample1\",\n",
+    "    results_file_path=temp_file_highlight,\n",
+    "    schema_path=\"../tests/data/sample_output_schema_highlight.yaml\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For example, result `log` is highlighted in this case:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "{'number_of_things': {'type': 'integer', 'description': 'Number of things'},\n",
+       " 'percentage_of_things': {'type': 'number',\n",
+       "  'description': 'Percentage of things'},\n",
+       " 'name_of_something': {'type': 'string', 'description': 'Name of something'},\n",
+       " 'swtich_value': {'type': 'boolean', 'description': 'Is the switch on of off'},\n",
+       " 'collection_of_things': {'type': 'array',\n",
+       "  'description': 'This store collection of values'},\n",
+       " 'output_object': {'type': 'object', 'description': 'Object output'},\n",
+       " 'output_file': {'type': 'file',\n",
+       "  'description': 'This a path to the output file'},\n",
+       " 'output_image': {'type': 'image',\n",
+       "  'highlight': False,\n",
+       "  'description': 'This a path to the output image'},\n",
+       " 'log': {'type': 'file',\n",
+       "  'highlight': True,\n",
+       "  'description': 'The log file of the pipeline run'},\n",
+       " 'profile': {'type': 'file',\n",
+       "  'highlight': True,\n",
+       "  'description': 'The profile of the pipeline run'},\n",
+       " 'commands': {'type': 'file',\n",
+       "  'highlight': True,\n",
+       "  'description': 'The file with shell commands executed by this pipeline'},\n",
+       " 'version': {'type': 'string',\n",
+       "  'highlight': True,\n",
+       "  'description': 'Pipeline version'}}"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 27
+    }
+   ],
+   "source": [
+    "psm_highlight.schema"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The highlighting feature can be used by pipestat clients to present the highlighted results in a special way."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "['log', 'profile', 'commands', 'version']"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 28
     }
+   ],
+   "source": [
+    "psm_highlight.highlighted_results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Pipeline status management\n",
+    "\n",
+    "Pipestat provides a pipeline status management system, which can be used to set and read pipeline status. To maintain the status information between sessions it uses flags or additional DB table if the `PipestatManager` object is backed with YAML file or PostgreSQL database, respectively.\n",
+    "\n",
+    "To set pipeline status use `set_status` method:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "psm.set_status(record_identifier=\"sample1\", status_identifier=\"running\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To get pipeline status use `get_status` method:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "'running'"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 30
+    }
+   ],
+   "source": [
+    "psm.get_status(record_identifier=\"sample1\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Allowable statuses and related metadata are defined in the status schema, which can be accessed via `PipestatManager.status_schema` property."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "{'running': {'description': 'the pipeline is running',\n",
+       "  'color': [30, 144, 255]},\n",
+       " 'completed': {'description': 'the pipeline has completed',\n",
+       "  'color': [50, 205, 50]},\n",
+       " 'failed': {'description': 'the pipeline has failed', 'color': [220, 20, 60]},\n",
+       " 'waiting': {'description': 'the pipeline is waiting',\n",
+       "  'color': [240, 230, 140]},\n",
+       " 'partial': {'description': 'the pipeline stopped before completion point',\n",
+       "  'color': [169, 169, 169]}}"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 31
+    }
+   ],
+   "source": [
+    "psm.status_schema"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`pipestat` Python package ships with a default status schema, so we did not have to provide the schema when constructing the `PipestatManager` object. Similarly, the flags containg directory is an optional configuration option. \n",
+    "\n",
+    "Please refer to the Python API documentation (`__init__` method) to see how to use custom status schema and flags directory."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initializing `PipestatManager` without results schema\n",
+    "\n",
+    "Starting with `pipestat 0.0.3`, it is possible to initialize the `PipestatManager` object without specifying the results schema file. This feature comes in handy if `PipestatManager` is created with a sole intent to monitor pipeline status.\n",
+    "\n",
+    "Here's an example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmp6vb3qxv1.yaml\n"
+     ]
+    }
+   ],
+   "source": [
+    "_, temp_file_no_schema = mkstemp(suffix=\".yaml\")\n",
+    "print(temp_file_no_schema)\n",
+    "\n",
+    "psm_no_schema = pipestat.PipestatManager(\n",
+    "    namespace=\"test_no_schema\", results_file_path=temp_file_no_schema\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As you can see, the object has been initialized successfully. Obviuosly, the schema has to be defined to report and retrieve results as the requirement to predefine results and therefore the possibility to rely on the schema to gather all the possible results metadata in the pipestat clients is a big advantage.\n",
+    "\n",
+    "Moreover, initialization with a database as a backend is impossible without schema due to the characteristics of relational databases, which must have columns predefined, when created."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Results schema not found. The schema is required to report results. It needs to be supplied to the object constructor.or via 'PIPESTAT_RESULTS_SCHEMA' environment variable.\n"
+     ]
+    }
+   ],
+   "source": [
+    "try:\n",
+    "    psm_no_schema.report(record_identifier=\"sample1\", values={\"key\": \"val\"})\n",
+    "except pipestat.SchemaNotFoundError as e:\n",
+    "    print(e)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As mentioned above, the pipeline status management capabilities are supported with no results schema defined:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "'running'"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 34
+    }
+   ],
+   "source": [
+    "psm_no_schema.set_status(status_identifier=\"running\", record_identifier=\"sample1\")\n",
+    "psm_no_schema.get_status(record_identifier=\"sample1\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3.8.2 64-bit"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
   },
-  "nbformat": 4,
-  "nbformat_minor": 2
-}
\ No newline at end of file
+  "interpreter": {
+   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index a6ad0c89..bc3fb331 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -252,7 +252,7 @@ def __str__(self):
         """
         res = f"{self.__class__.__name__} ({self.namespace})"
         res += "\nBackend: {}".format(
-            f"File (results: {self.file}, status: {self[STATUS_FILE_DIR]})"
+            f"File \n - results: {self.file}\n - status: {self[STATUS_FILE_DIR]})"
             if self.file
             else f"Database (dialect: {self[DB_ENGINE_KEY].dialect.name})"
         )

From abdbed9ec4e2d098bcb5fc720f0e084e4c3510c0 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 21 Jun 2021 13:39:24 -0400
Subject: [PATCH 58/65] update usage docs

---
 docs/usage.md                                | 26 +++++++++++---------
 update-usage-docs.sh => update_usage_docs.sh |  0
 2 files changed, 14 insertions(+), 12 deletions(-)
 rename update-usage-docs.sh => update_usage_docs.sh (100%)

diff --git a/docs/usage.md b/docs/usage.md
index a6371c65..df9c6c45 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -5,7 +5,7 @@ Pipestat offers a CLI that can be access via the `pipestat` command in the shell
 Here you can see the command-line usage instructions for the main command and for each subcommand:
 ## `pipestat --help`
 ```console
-version: 0.0.3
+version: 0.1.0-dev
 usage: pipestat [-h] [--version] [--silent] [--verbosity V] [--logdev]
                 {report,inspect,remove,retrieve,status} ...
 
@@ -31,7 +31,7 @@ management. It formalizes a way for pipeline developers and downstream tools
 developers to communicate -- results produced by a pipeline can easily and
 reliably become an input for downstream analyses. The object exposes API for
 interacting with the results and pipeline status and can be backed by either a
-YAML-formatted file or a PostgreSQL database.
+YAML-formatted file or a database.
 ```
 
 ## `pipestat report --help`
@@ -57,7 +57,7 @@ optional arguments:
                                reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var
                                will be used. Currently not set
   --status-schema ST           Path to the status schema. Default will be used if not
-                               provided: /home/nsheff/.local/lib/python3.8/site-
+                               provided: /usr/local/lib/python3.9/site-
                                packages/pipestat/schemas/status_schema.yaml
   --flag-dir FD                Path to the flag directory in case YAML file is the
                                pipestat backend.
@@ -69,7 +69,7 @@ optional arguments:
   -v V, --value V              Value of the result to report
   -o, --overwrite              Whether the result should override existing ones in case of
                                name clashes
-  -t, --try-convert            Whether to try to convert the reported value into reqiuired
+  -t, --skip-convert           Whether skip result type conversion into the reqiuired
                                class in case it does not meet the schema requirements
 ```
 
@@ -95,7 +95,7 @@ optional arguments:
                           reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var will
                           be used. Currently not set
   --status-schema ST      Path to the status schema. Default will be used if not provided:
-                          /home/nsheff/.local/lib/python3.8/site-
+                          /usr/local/lib/python3.9/site-
                           packages/pipestat/schemas/status_schema.yaml
   --flag-dir FD           Path to the flag directory in case YAML file is the pipestat
                           backend.
@@ -125,7 +125,7 @@ optional arguments:
                                reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var
                                will be used. Currently not set
   --status-schema ST           Path to the status schema. Default will be used if not
-                               provided: /home/nsheff/.local/lib/python3.8/site-
+                               provided: /usr/local/lib/python3.9/site-
                                packages/pipestat/schemas/status_schema.yaml
   --flag-dir FD                Path to the flag directory in case YAML file is the
                                pipestat backend.
@@ -159,7 +159,7 @@ optional arguments:
                                reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var
                                will be used. Currently not set
   --status-schema ST           Path to the status schema. Default will be used if not
-                               provided: /home/nsheff/.local/lib/python3.8/site-
+                               provided: /usr/local/lib/python3.9/site-
                                packages/pipestat/schemas/status_schema.yaml
   --flag-dir FD                Path to the flag directory in case YAML file is the
                                pipestat backend.
@@ -208,7 +208,7 @@ optional arguments:
                                reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var
                                will be used. Currently not set
   --status-schema ST           Path to the status schema. Default will be used if not
-                               provided: /home/nsheff/.local/lib/python3.8/site-
+                               provided: /usr/local/lib/python3.9/site-
                                packages/pipestat/schemas/status_schema.yaml
   --flag-dir FD                Path to the flag directory in case YAML file is the
                                pipestat backend.
@@ -219,17 +219,20 @@ optional arguments:
 
 ## `pipestat status set --help`
 ```console
-usage: pipestat status set [-h] [-n N] -i S [-f F] [-c C] [-a] [-s S] [--status-schema ST]
+usage: pipestat status set [-h] [-n N] [-f F] [-c C] [-a] [-s S] [--status-schema ST]
                            [--flag-dir FD] [-r R]
+                           status_identifier
 
 Set status.
 
+positional arguments:
+  status_identifier            Status identifier to set.
+
 optional arguments:
   -h, --help                   show this help message and exit
   -n N, --namespace N          Name of the pipeline to report result for. If not provided
                                'PIPESTAT_NAMESPACE' env var will be used. Currently not
                                set
-  -i S, --status-identifier S  Status identifier to use
   -f F, --results-file F       Path to the YAML file where the results will be stored.
                                This file will be used as pipestat backend and to restore
                                the reported results across sessions
@@ -241,7 +244,7 @@ optional arguments:
                                reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var
                                will be used. Currently not set
   --status-schema ST           Path to the status schema. Default will be used if not
-                               provided: /home/nsheff/.local/lib/python3.8/site-
+                               provided: /usr/local/lib/python3.9/site-
                                packages/pipestat/schemas/status_schema.yaml
   --flag-dir FD                Path to the flag directory in case YAML file is the
                                pipestat backend.
@@ -249,4 +252,3 @@ optional arguments:
                                'PIPESTAT_RECORD_ID' env var will be used. Currently not
                                set
 ```
-
diff --git a/update-usage-docs.sh b/update_usage_docs.sh
similarity index 100%
rename from update-usage-docs.sh
rename to update_usage_docs.sh

From 5100e92ca35326d5dadc8cd734f86eb1f4d599b8 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Wed, 23 Jun 2021 15:29:08 -0400
Subject: [PATCH 59/65] allow no schema init when used with DB; #1

---
 .pre-commit-config.yaml          |  5 +++--
 pipestat/cli.py                  |  2 +-
 pipestat/exceptions.py           |  4 ++--
 pipestat/pipestat.py             | 20 ++++++++++++++------
 tests/data/config.yaml           | 21 +++++++++++++++------
 tests/data/config_no_schema.yaml | 21 +++++++++++++++------
 tests/test_pipestat.py           |  6 +-----
 7 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index de8f7bdf..05d4cd4b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,14 +7,15 @@ repos:
       - id: end-of-file-fixer
       - id: requirements-txt-fixer
       - id: trailing-whitespace
+      - id: check-ast
 
   - repo: https://github.com/PyCQA/isort
-    rev: 5.8.0
+    rev: 5.9.1
     hooks:
       - id: isort
         args: ["--profile", "black"]
 
   - repo: https://github.com/psf/black
-    rev: 21.5b2
+    rev: 21.6b0
     hooks:
       - id: black
diff --git a/pipestat/cli.py b/pipestat/cli.py
index 8de24841..eb084208 100644
--- a/pipestat/cli.py
+++ b/pipestat/cli.py
@@ -24,7 +24,7 @@ def main():
     global _LOGGER
     _LOGGER = logmuse.logger_via_cli(args, make_root=True)
     _LOGGER.debug("Args namespace:\n{}".format(args))
-    if args.config and not args.schema:
+    if args.config and not args.schema and args.command != STATUS_CMD:
         parser.error("the following arguments are required: -s/--schema")
     psm = PipestatManager(
         namespace=args.namespace,
diff --git a/pipestat/exceptions.py b/pipestat/exceptions.py
index 42243c8c..af1db58d 100644
--- a/pipestat/exceptions.py
+++ b/pipestat/exceptions.py
@@ -36,9 +36,9 @@ def __init__(self, msg, cli=False):
         txt += (
             f"It needs to be supplied as an CLI argument"
             if cli
-            else "It needs to be supplied to the object constructor."
+            else "It needs to be supplied to the object constructor"
         )
-        txt += f"or via '{ENV_VARS['schema']}' environment variable."
+        txt += f" or via '{ENV_VARS['schema']}' environment variable."
         super(SchemaNotFoundError, self).__init__(txt)
 
 
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index bc3fb331..f1cc5b14 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -95,7 +95,6 @@ def _mk_abs_via_cfg(
                     return rel_to_cwd
                 else:
                     raise OSError(f"File not found: {path}")
-                raise OSError(f"Could not make this path absolute: {path}")
             joined = os.path.join(os.path.dirname(cfg_path), path)
             if os.path.isabs(joined):
                 return joined
@@ -182,6 +181,11 @@ def _select_value(
                     f"highlighted results specification "
                     f"({self[HIGHLIGHTED_KEY]}) has to be a list"
                 )
+        else:
+            _LOGGER.debug(
+                "Creating object with no results schema."
+                " The object can be used only for status management in this case."
+            )
         # read status schema
         status_schema_path = (
             _mk_abs_via_cfg(
@@ -293,6 +297,8 @@ def db_column_kwargs_by_result(self) -> Dict[str, Any]:
 
         :return Dict[str, Any]: key word arguments for every result
         """
+        if self.schema is None:
+            return {}
         return {
             result_id: self.schema[result_id][DB_COLUMN_KEY]
             for result_id in self.schema.keys()
@@ -309,6 +315,8 @@ def db_column_relationships_by_result(self) -> Dict[str, str]:
 
         :return Dict[str, Dict[str, str]]: relationships for every result
         """
+        if self.schema is None:
+            return {}
 
         def _validate_rel_section(result_id):
             if not all(
@@ -560,7 +568,9 @@ def _auto_repr(x: Any) -> str:
                         rel_info["name"]: relationship(
                             rel_info["table"].capitalize(),
                             backref=backref(
-                                rel_info["backref"], uselist=True, cascade="delete,all"
+                                rel_info["backref"],
+                                uselist=True,
+                                cascade="delete,all",
                             ),
                         )
                     }
@@ -874,7 +884,7 @@ def _init_db_table(self) -> bool:
         :return bool: whether the table has been created
         """
         if self.schema is None:
-            raise SchemaNotFoundError("initialize the database table")
+            return False
         if not self.is_db_connected():
             self.establish_db_connection()
         self._create_table_orm(table_name=self.namespace, schema=self.result_schemas)
@@ -903,9 +913,7 @@ def _init_status_table(self):
         if not self.is_db_connected():
             self.establish_db_connection()
         # if not self._check_table_exists(table_name=status_table_name):
-        _LOGGER.debug(
-            f"Initializing '{status_table_name}' table in " f"'{PKG_NAME}' database"
-        )
+        _LOGGER.debug(f"Initializing '{status_table_name}' table")
         self._create_table_orm(
             table_name=status_table_name,
             schema=get_status_table_schema(status_schema=self.status_schema),
diff --git a/tests/data/config.yaml b/tests/data/config.yaml
index 7385e630..4586102b 100644
--- a/tests/data/config.yaml
+++ b/tests/data/config.yaml
@@ -1,12 +1,21 @@
 namespace: test
 record_identifier: sample1
 schema_path: sample_output_schema.yaml #$HOME/Desktop/sample_output_schema.yaml
+# database:
+  # dialect: postgresql
+  # driver: psycopg2
+  # name: pipestat-test
+  # user: postgres
+  # password: pipestat-password
+  # host: 127.0.0.1
+  # port: 5432
+#results_file_path: results_file.yaml #$HOME/Desktop/results.yaml
+
 database:
+  name: pypiper
+  user: pypiper
+  password: pypiper
+  host: localhost
+  port: 5432
   dialect: postgresql
   driver: psycopg2
-  name: pipestat-test
-  user: postgres
-  password: pipestat-password
-  host: 127.0.0.1
-  port: 5432
-#results_file_path: results_file.yaml #$HOME/Desktop/results.yaml
diff --git a/tests/data/config_no_schema.yaml b/tests/data/config_no_schema.yaml
index 0248ec5a..1113ca0c 100644
--- a/tests/data/config_no_schema.yaml
+++ b/tests/data/config_no_schema.yaml
@@ -1,12 +1,21 @@
 namespace: test
 record_identifier: sample1
 #schema_path: sample_output_schema.yaml #$HOME/Desktop/sample_output_schema.yaml
+# database:
+#   dialect: postgresql
+#   driver: psycopg2
+#   name: pipestat-test
+#   user: postgres
+#   password: pipestat-password
+#   host: localhost
+#   port: 5432
+#results_file_path: results_file.yaml #$HOME/Desktop/results.yaml
+
 database:
-  dialect: postgresql
-  driver: psycopg2
-  name: pipestat-test
-  user: postgres
-  password: pipestat-password
+  name: pypiper
+  user: pypiper
+  password: pypiper
   host: localhost
   port: 5432
-#results_file_path: results_file.yaml #$HOME/Desktop/results.yaml
+  dialect: postgresql
+  driver: psycopg2
diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py
index 3bfb272a..f67df065 100644
--- a/tests/test_pipestat.py
+++ b/tests/test_pipestat.py
@@ -92,11 +92,7 @@ def test_report_requires_schema(
             else {"results_file_path": results_file_path}
         )
         args.update(backend_data)
-        if backend == "db":
-            with pytest.raises(SchemaNotFoundError):
-                psm = PipestatManager(**args)
-        else:
-            psm = PipestatManager(**args)
+        psm = PipestatManager(**args)
         if backend == "file":
             with pytest.raises(SchemaNotFoundError):
                 psm.report(record_identifier=rec_id, values=val)

From 782a776a53f946ffd27082a6c6950be9f8e1309e Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Wed, 23 Jun 2021 15:30:26 -0400
Subject: [PATCH 60/65] fix test configs

---
 tests/data/config.yaml           | 21 ++++++---------------
 tests/data/config_no_schema.yaml | 21 ++++++---------------
 2 files changed, 12 insertions(+), 30 deletions(-)

diff --git a/tests/data/config.yaml b/tests/data/config.yaml
index 4586102b..7385e630 100644
--- a/tests/data/config.yaml
+++ b/tests/data/config.yaml
@@ -1,21 +1,12 @@
 namespace: test
 record_identifier: sample1
 schema_path: sample_output_schema.yaml #$HOME/Desktop/sample_output_schema.yaml
-# database:
-  # dialect: postgresql
-  # driver: psycopg2
-  # name: pipestat-test
-  # user: postgres
-  # password: pipestat-password
-  # host: 127.0.0.1
-  # port: 5432
-#results_file_path: results_file.yaml #$HOME/Desktop/results.yaml
-
 database:
-  name: pypiper
-  user: pypiper
-  password: pypiper
-  host: localhost
-  port: 5432
   dialect: postgresql
   driver: psycopg2
+  name: pipestat-test
+  user: postgres
+  password: pipestat-password
+  host: 127.0.0.1
+  port: 5432
+#results_file_path: results_file.yaml #$HOME/Desktop/results.yaml
diff --git a/tests/data/config_no_schema.yaml b/tests/data/config_no_schema.yaml
index 1113ca0c..0248ec5a 100644
--- a/tests/data/config_no_schema.yaml
+++ b/tests/data/config_no_schema.yaml
@@ -1,21 +1,12 @@
 namespace: test
 record_identifier: sample1
 #schema_path: sample_output_schema.yaml #$HOME/Desktop/sample_output_schema.yaml
-# database:
-#   dialect: postgresql
-#   driver: psycopg2
-#   name: pipestat-test
-#   user: postgres
-#   password: pipestat-password
-#   host: localhost
-#   port: 5432
-#results_file_path: results_file.yaml #$HOME/Desktop/results.yaml
-
 database:
-  name: pypiper
-  user: pypiper
-  password: pypiper
-  host: localhost
-  port: 5432
   dialect: postgresql
   driver: psycopg2
+  name: pipestat-test
+  user: postgres
+  password: pipestat-password
+  host: localhost
+  port: 5432
+#results_file_path: results_file.yaml #$HOME/Desktop/results.yaml

From e8ad1728b9da09af1e362a66897afb0bdd7fd2df Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Wed, 23 Jun 2021 15:32:13 -0400
Subject: [PATCH 61/65] limit gh actions runs

---
 .github/workflows/run-pytest.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml
index 7fe43476..2d1c4916 100644
--- a/.github/workflows/run-pytest.yml
+++ b/.github/workflows/run-pytest.yml
@@ -2,15 +2,15 @@ name: Run pytests
 
 on:
   push:
-    branches: [master, dev]
+    branches: [master]
   pull_request:
-    branches: [master, dev]
+    branches: [master]
 
 jobs:
   pytest:
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8, 3.9]
+        python-version: [3.6, 3.9]
         os: [ubuntu-latest] # can't use macOS when using service containers or container jobs
     runs-on: ${{ matrix.os }}
     services:

From dea1757864381de62ec57d323d2b60f21e1b9095 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Thu, 24 Jun 2021 09:16:00 -0400
Subject: [PATCH 62/65] update notebooks

---
 docs_jupyter/cli.ipynb        |  42 +++++-----
 docs_jupyter/python_api.ipynb | 144 ++++++++++++++++------------------
 2 files changed, 87 insertions(+), 99 deletions(-)

diff --git a/docs_jupyter/cli.ipynb b/docs_jupyter/cli.ipynb
index 43569dde..9efec6fa 100644
--- a/docs_jupyter/cli.ipynb
+++ b/docs_jupyter/cli.ipynb
@@ -18,7 +18,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 83,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -65,7 +65,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 84,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -84,7 +84,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 85,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
@@ -137,7 +137,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 86,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
@@ -162,7 +162,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 87,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
@@ -188,7 +188,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 88,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [
     {
@@ -206,7 +206,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 89,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [
     {
@@ -235,7 +235,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 90,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [
     {
@@ -246,7 +246,7 @@
       "\n",
       "PipestatManager (test)\n",
       "Backend: File \n",
-      " - results: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmp.zAv99Q1j\n",
+      " - results: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmp.hk8q23wT\n",
       " - status: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T)\n",
       "Results schema source: ../tests/data/sample_output_schema.yaml\n",
       "Status schema source: /usr/local/lib/python3.9/site-packages/pipestat/schemas/status_schema.yaml\n",
@@ -268,7 +268,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 91,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
@@ -279,7 +279,7 @@
       "\n",
       "PipestatManager (test)\n",
       "Backend: File \n",
-      " - results: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmp.zAv99Q1j\n",
+      " - results: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmp.hk8q23wT\n",
       " - status: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T)\n",
       "Results schema source: ../tests/data/sample_output_schema.yaml\n",
       "Status schema source: /usr/local/lib/python3.9/site-packages/pipestat/schemas/status_schema.yaml\n",
@@ -309,7 +309,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 92,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [
     {
@@ -335,7 +335,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 93,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [
     {
@@ -359,7 +359,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 94,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [
     {
@@ -378,7 +378,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 95,
+   "execution_count": 47,
    "metadata": {},
    "outputs": [
     {
@@ -389,7 +389,7 @@
       "\n",
       "PipestatManager (test)\n",
       "Backend: File \n",
-      " - results: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmp.zAv99Q1j\n",
+      " - results: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmp.hk8q23wT\n",
       " - status: /var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T)\n",
       "Results schema source: ../tests/data/sample_output_schema.yaml\n",
       "Status schema source: /usr/local/lib/python3.9/site-packages/pipestat/schemas/status_schema.yaml\n",
@@ -418,12 +418,12 @@
     "- `set` to set pipeline statuses\n",
     "- `get` to retrieve pipeline statuses\n",
     "\n",
-    "Starting with `pipestat 0.0.3` the `--schema` argument is not required for status management if YAML file is used as the backend."
+    "Starting with `pipestat 0.0.3` the `--schema` argument is not required for status management."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 96,
+   "execution_count": 48,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -432,7 +432,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 97,
+   "execution_count": 49,
    "metadata": {},
    "outputs": [
     {
@@ -456,7 +456,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 98,
+   "execution_count": 50,
    "metadata": {},
    "outputs": [
     {
@@ -487,7 +487,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 99,
+   "execution_count": 51,
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/docs_jupyter/python_api.ipynb b/docs_jupyter/python_api.ipynb
index dc476c16..4bb7ffc8 100644
--- a/docs_jupyter/python_api.ipynb
+++ b/docs_jupyter/python_api.ipynb
@@ -42,21 +42,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 35,
    "metadata": {},
-   "outputs": [
-    {
-     "output_type": "error",
-     "ename": "ModuleNotFoundError",
-     "evalue": "No module named 'pipestat'",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-2-9e4881cbd397>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpipestat\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mjsonschema\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mValidationError\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pipestat'"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import pipestat\n",
     "from jsonschema import ValidationError"
@@ -71,14 +59,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpp85qu9en.yaml\n"
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpubs9xbqr.yaml\n"
      ]
     }
    ],
@@ -98,7 +86,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -119,7 +107,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
@@ -130,7 +118,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 4
+     "execution_count": 38
     }
    ],
    "source": [
@@ -146,7 +134,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
@@ -157,7 +145,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 5
+     "execution_count": 39
     }
    ],
    "source": [
@@ -173,7 +161,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [
     {
@@ -184,7 +172,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 6
+     "execution_count": 40
     }
    ],
    "source": [
@@ -214,7 +202,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [
     {
@@ -239,7 +227,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 7
+     "execution_count": 41
     }
    ],
    "source": [
@@ -255,7 +243,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [
     {
@@ -269,7 +257,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 8
+     "execution_count": 42
     }
    ],
    "source": [
@@ -286,7 +274,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
@@ -313,7 +301,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [
     {
@@ -331,7 +319,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 10
+     "execution_count": 44
     }
    ],
    "source": [
@@ -354,7 +342,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [
     {
@@ -369,7 +357,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 11
+     "execution_count": 45
     }
    ],
    "source": [
@@ -385,7 +373,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [
     {
@@ -403,7 +391,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 12
+     "execution_count": 46
     }
    ],
    "source": [
@@ -419,7 +407,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 47,
    "metadata": {},
    "outputs": [
     {
@@ -444,7 +432,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 13
+     "execution_count": 47
     }
    ],
    "source": [
@@ -469,7 +457,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 48,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -483,7 +471,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 49,
    "metadata": {},
    "outputs": [
     {
@@ -498,7 +486,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 15
+     "execution_count": 49
     }
    ],
    "source": [
@@ -514,14 +502,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 50,
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpp85qu9en.yaml\n",
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpubs9xbqr.yaml\n",
       "test:\n",
       "  sample1:\n",
       "    output_file:\n",
@@ -550,7 +538,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 51,
    "metadata": {},
    "outputs": [
     {
@@ -561,7 +549,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 17
+     "execution_count": 51
     }
    ],
    "source": [
@@ -570,7 +558,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 52,
    "metadata": {},
    "outputs": [
     {
@@ -588,7 +576,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 18
+     "execution_count": 52
     }
    ],
    "source": [
@@ -604,7 +592,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 53,
    "metadata": {},
    "outputs": [
     {
@@ -633,7 +621,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 54,
    "metadata": {},
    "outputs": [
     {
@@ -649,7 +637,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 20
+     "execution_count": 54
     }
    ],
    "source": [
@@ -669,7 +657,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 55,
    "metadata": {},
    "outputs": [
     {
@@ -680,7 +668,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 21
+     "execution_count": 55
     }
    ],
    "source": [
@@ -696,7 +684,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 56,
    "metadata": {},
    "outputs": [
     {
@@ -709,7 +697,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 22
+     "execution_count": 56
     }
    ],
    "source": [
@@ -732,7 +720,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 57,
    "metadata": {},
    "outputs": [
     {
@@ -750,7 +738,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 23
+     "execution_count": 57
     }
    ],
    "source": [
@@ -766,7 +754,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 58,
    "metadata": {},
    "outputs": [
     {
@@ -784,7 +772,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 24
+     "execution_count": 58
     }
    ],
    "source": [
@@ -800,7 +788,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 59,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -815,7 +803,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 25
+     "execution_count": 59
     }
    ],
    "source": [
@@ -833,14 +821,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 60,
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpvhtrgs1m.yaml\n"
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpu8uxttep.yaml\n"
      ]
     }
    ],
@@ -867,7 +855,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 61,
    "metadata": {},
    "outputs": [
     {
@@ -902,7 +890,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 27
+     "execution_count": 61
     }
    ],
    "source": [
@@ -918,7 +906,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 62,
    "metadata": {},
    "outputs": [
     {
@@ -929,7 +917,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 28
+     "execution_count": 62
     }
    ],
    "source": [
@@ -949,7 +937,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 63,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -965,7 +953,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 64,
    "metadata": {},
    "outputs": [
     {
@@ -976,7 +964,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 30
+     "execution_count": 64
     }
    ],
    "source": [
@@ -992,7 +980,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 65,
    "metadata": {},
    "outputs": [
     {
@@ -1011,7 +999,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 31
+     "execution_count": 65
     }
    ],
    "source": [
@@ -1040,14 +1028,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 66,
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmp6vb3qxv1.yaml\n"
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmp5xtkiy3c.yaml\n"
      ]
     }
    ],
@@ -1071,14 +1059,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 67,
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "Results schema not found. The schema is required to report results. It needs to be supplied to the object constructor.or via 'PIPESTAT_RESULTS_SCHEMA' environment variable.\n"
+      "Results schema not found. The schema is required to report results. It needs to be supplied to the object constructor or via 'PIPESTAT_RESULTS_SCHEMA' environment variable.\n"
      ]
     }
    ],
@@ -1098,7 +1086,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 68,
    "metadata": {},
    "outputs": [
     {
@@ -1109,7 +1097,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 34
+     "execution_count": 68
     }
    ],
    "source": [
@@ -1121,7 +1109,7 @@
  "metadata": {
   "kernelspec": {
    "name": "python3",
-   "display_name": "Python 3.8.2 64-bit"
+   "display_name": "Python 3.9.5 64-bit ('python@3.9')"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1133,10 +1121,10 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.2"
+   "version": "3.9.5"
   },
   "interpreter": {
-   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+   "hash": "ac2eaa0ea0ebeafcc7822e65e46aa9d4f966f30b695406963e145ea4a91cd4fc"
   }
  },
  "nbformat": 4,

From 241b487c798beaf702f324c152f141cf4289b34f Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Thu, 24 Jun 2021 09:43:01 -0400
Subject: [PATCH 63/65] fix typos in docs

---
 docs_jupyter/python_api.ipynb | 14 ++++++--------
 pipestat/pipestat.py          |  2 +-
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/docs_jupyter/python_api.ipynb b/docs_jupyter/python_api.ipynb
index 4bb7ffc8..ab78c94f 100644
--- a/docs_jupyter/python_api.ipynb
+++ b/docs_jupyter/python_api.ipynb
@@ -102,7 +102,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "You can also put these results into a config file and just pass that to the `config` argument, instead of speicfying each argument separately. The results will be reported to a \"test\" namespace."
+    "You can also put these settings into a config file and just pass that to the `config` argument, instead of specifying each argument separately. The results will be reported to a \"test\" namespace."
    ]
   },
   {
@@ -190,10 +190,10 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "To report a result, use a `report` method. It requires three pieces of information:\n",
+    "To report a result, use a `report` method. It requires two pieces of information:\n",
     "\n",
     "1. record identifier -- record to report the result for, for example a unique name of the sample (optional if provided at `PipestatManager` initialization stage)\n",
-    "2. values -- a Python `dict` of resultID-value pairs to report. The top level keys need to correspond to the results identifiers defined in the schema\n",
+    "2. values -- a Python `dict` of resultID-value pairs to report. The top level keys must correspond to the results identifiers defined in the schema\n",
     "\n",
     "### Available results defined in schemas\n",
     "\n",
@@ -533,7 +533,7 @@
     "\n",
     "By default `PipestatManager` raises an exception if a non-compatible result value is reported. \n",
     "\n",
-    "This behavior can be changed by setting `stric_type` to `True` in `PipestatManager.report` method. In thi case `PipestatManager` tries to cast the reported results values to the Python classes required by schema. For example, if a result defined as `integer` is reported and a `str` value is passed, the eventual value will be `int`:"
+    "This behavior can be changed by setting `strict_type` to `True` in `PipestatManager.report` method. In this case `PipestatManager` tries to cast the reported results values to the Python classes required by schema. For example, if a result defined as `integer` is reported and a `str` value is passed, the eventual value will be `int`:"
    ]
   },
   {
@@ -715,7 +715,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "`PipestatManager` object also supports results removal. Call `remove` method and provide `record_identifier` and  `result_identifier` method to do so:"
+    "`PipestatManager` object also supports results removal. Call `remove` method and provide `record_identifier` and  `result_identifier` arguments to do so:"
    ]
   },
   {
@@ -1052,9 +1052,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As you can see, the object has been initialized successfully. Obviuosly, the schema has to be defined to report and retrieve results as the requirement to predefine results and therefore the possibility to rely on the schema to gather all the possible results metadata in the pipestat clients is a big advantage.\n",
-    "\n",
-    "Moreover, initialization with a database as a backend is impossible without schema due to the characteristics of relational databases, which must have columns predefined, when created."
+    "As you can see, the object has been initialized successfully. Obviously, the schema has to be defined to report and retrieve results as the requirement to predefine results and therefore the possibility to rely on the schema to gather all the possible results metadata in the pipestat clients is a big advantage."
    ]
   },
   {
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
index f1cc5b14..8311e51c 100644
--- a/pipestat/pipestat.py
+++ b/pipestat/pipestat.py
@@ -256,7 +256,7 @@ def __str__(self):
         """
         res = f"{self.__class__.__name__} ({self.namespace})"
         res += "\nBackend: {}".format(
-            f"File \n - results: {self.file}\n - status: {self[STATUS_FILE_DIR]})"
+            f"File\n - results: {self.file}\n - status: {self[STATUS_FILE_DIR]}"
             if self.file
             else f"Database (dialect: {self[DB_ENGINE_KEY].dialect.name})"
         )

From 1da644f63e2c4e42d8c7e8bab2eabb9a51163ace Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Thu, 24 Jun 2021 10:00:09 -0400
Subject: [PATCH 64/65] add status docs to readmes

---
 README.md                     |  42 ++++-
 docs/README.md                |  42 ++++-
 docs_jupyter/cli.md           | 311 ----------------------------------
 docs_jupyter/python_api.ipynb | 136 +++++++--------
 4 files changed, 148 insertions(+), 383 deletions(-)
 delete mode 100644 docs_jupyter/cli.md

diff --git a/README.md b/README.md
index cec9c068..4f8c41f7 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,9 @@ export PIPESTAT_RESULTS_FILE=results_file.yaml
 export PIPESTAT_NAMESPACE=my_namespace
 ```
 
-## Report a result
+## Pipeline results reporting and retrieval
+
+### Report a result
 
 From command line:
 
@@ -47,7 +49,7 @@ psm = pipestat.PipestatManager()
 psm.report(values={"result_name": 1.1})
 ```
 
-## Retrieve a result
+### Retrieve a result
 
 From command line:
 
@@ -63,3 +65,39 @@ import pipestat
 psm = pipestat.PipestatManager()
 psm.retrieve(result_identifier="result_name")
 ```
+
+## Pipeline status management
+
+## Set status
+
+From command line:
+
+```console
+pipestat status set running
+```
+
+From Python:
+
+```python
+import pipestat
+
+psm = pipestat.PipestatManager()
+psm.set_status(status_identifier="running")
+```
+
+## Get status
+
+From command line:
+
+```console
+pipestat status get
+```
+
+From Python:
+
+```python
+import pipestat
+
+psm = pipestat.PipestatManager()
+psm.get_status()
+```
diff --git a/docs/README.md b/docs/README.md
index cec9c068..4f8c41f7 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -30,7 +30,9 @@ export PIPESTAT_RESULTS_FILE=results_file.yaml
 export PIPESTAT_NAMESPACE=my_namespace
 ```
 
-## Report a result
+## Pipeline results reporting and retrieval
+
+### Report a result
 
 From command line:
 
@@ -47,7 +49,7 @@ psm = pipestat.PipestatManager()
 psm.report(values={"result_name": 1.1})
 ```
 
-## Retrieve a result
+### Retrieve a result
 
 From command line:
 
@@ -63,3 +65,39 @@ import pipestat
 psm = pipestat.PipestatManager()
 psm.retrieve(result_identifier="result_name")
 ```
+
+## Pipeline status management
+
+## Set status
+
+From command line:
+
+```console
+pipestat status set running
+```
+
+From Python:
+
+```python
+import pipestat
+
+psm = pipestat.PipestatManager()
+psm.set_status(status_identifier="running")
+```
+
+## Get status
+
+From command line:
+
+```console
+pipestat status get
+```
+
+From Python:
+
+```python
+import pipestat
+
+psm = pipestat.PipestatManager()
+psm.get_status()
+```
diff --git a/docs_jupyter/cli.md b/docs_jupyter/cli.md
deleted file mode 100644
index 3e7b064e..00000000
--- a/docs_jupyter/cli.md
+++ /dev/null
@@ -1,311 +0,0 @@
-# Pipestat CLI
-
-Before following this tutorial please make sure you're familiar with "Pipestat Python API" tutorial.
-
-## Usage reference
-
-To learn about the usage `pipestat` usage use `--help`/`-h` option on any level:
-
-
-```bash
-pipestat -h
-```
-
-    version: 0.0.1
-    usage: pipestat [-h] [--version] [--silent] [--verbosity V] [--logdev]
-                    {report,inspect,remove,retrieve} ...
-
-    pipestat - report pipeline results
-
-    positional arguments:
-      {report,inspect,remove,retrieve}
-        report              Report a result.
-        inspect             Inspect a database.
-        remove              Remove a result.
-        retrieve            Retrieve a result.
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      --version             show program's version number and exit
-      --silent              Silence logging. Overrides verbosity.
-      --verbosity V         Set logging level (1-5 or logging module level name)
-      --logdev              Expand content of logging message format.
-
-    pipestat standardizes reporting of pipeline results. It formalizes a way for
-    pipeline developers and downstream tools developers to communicate -- results
-    produced by a pipeline can easily and reliably become an input for downstream
-    analyses. The ovject exposes API for interacting with the results can be
-    backed by either a YAML-formatted file or a PostgreSQL database.
-
-
-
-```bash
-pipestat report -h
-```
-
-    usage: pipestat report [-h] -n N (-f F | -c C | -a) -s S -i I -r R -v V [-o]
-                           [-t]
-
-    Report a result.
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      -n N, --namespace N   Name of the pipeline to report result for
-      -f F, --results-file F
-                            Path to the YAML file where the results will be
-                            stored. This file will be used as pipestat backend and
-                            to restore the reported results across sesssions
-      -c C, --database-config C
-                            Path to the YAML file with PostgreSQL database
-                            configuration. Please refer to the documentation for
-                            the file format requirements.
-      -a, --database-only   Whether the reported data should not be stored in the
-                            memory, only in the database.
-      -s S, --schema S      Path to the schema that defines the results that can
-                            be eported
-      -i I, --result-identifier I
-                            ID of the result to report; needs to be defined in the
-                            schema
-      -r R, --record-identifier R
-                            ID of the record to report the result for
-      -v V, --value V       Value of the result to report
-      -o, --overwrite       Whether the result should override existing ones in
-                            case of name clashes
-      -t, --try-convert     Whether to try to convert the reported value into
-                            reqiuired class in case it does not meet the schema
-                            requirements
-
-
-
-```bash
-pipestat retrieve -h
-```
-
-    usage: pipestat retrieve [-h] -n N (-f F | -c C | -a) [-s S] -i I -r R
-
-    Retrieve a result.
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      -n N, --namespace N   Name of the pipeline to report result for
-      -f F, --results-file F
-                            Path to the YAML file where the results will be
-                            stored. This file will be used as pipestat backend and
-                            to restore the reported results across sesssions
-      -c C, --database-config C
-                            Path to the YAML file with PostgreSQL database
-                            configuration. Please refer to the documentation for
-                            the file format requirements.
-      -a, --database-only   Whether the reported data should not be stored in the
-                            memory, only in the database.
-      -s S, --schema S      Path to the schema that defines the results that can
-                            be eported
-      -i I, --result-identifier I
-                            ID of the result to report; needs to be defined in the
-                            schema
-      -r R, --record-identifier R
-                            ID of the record to report the result for
-
-
-
-```bash
-pipestat remove -h
-```
-
-    usage: pipestat remove [-h] -n N (-f F | -c C | -a) [-s S] -i I -r R
-
-    Remove a result.
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      -n N, --namespace N   Name of the pipeline to report result for
-      -f F, --results-file F
-                            Path to the YAML file where the results will be
-                            stored. This file will be used as pipestat backend and
-                            to restore the reported results across sesssions
-      -c C, --database-config C
-                            Path to the YAML file with PostgreSQL database
-                            configuration. Please refer to the documentation for
-                            the file format requirements.
-      -a, --database-only   Whether the reported data should not be stored in the
-                            memory, only in the database.
-      -s S, --schema S      Path to the schema that defines the results that can
-                            be eported
-      -i I, --result-identifier I
-                            ID of the result to report; needs to be defined in the
-                            schema
-      -r R, --record-identifier R
-                            ID of the record to report the result for
-
-
-
-```bash
-pipestat inspect -h
-```
-
-    usage: pipestat inspect [-h] -n N (-f F | -c C | -a) [-s S] [-d]
-
-    Inspect a database.
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      -n N, --namespace N   Name of the pipeline to report result for
-      -f F, --results-file F
-                            Path to the YAML file where the results will be
-                            stored. This file will be used as pipestat backend and
-                            to restore the reported results across sesssions
-      -c C, --database-config C
-                            Path to the YAML file with PostgreSQL database
-                            configuration. Please refer to the documentation for
-                            the file format requirements.
-      -a, --database-only   Whether the reported data should not be stored in the
-                            memory, only in the database.
-      -s S, --schema S      Path to the schema that defines the results that can
-                            be eported
-      -d, --data            Whether to display the data
-
-
-## Usage demonstration
-
-### Reporting
-
-Naturally, the command line interface provides access to all the Python API functionalities of `pipestat`. So, for example, to report a result and back the object by a file use:
-
-
-```bash
-temp_file=`mktemp`
-pipestat report -f $temp_file -n test -r sample1 -i number_of_things -v 100 -s ../tests/data/sample_output_schema.yaml --try-convert
-```
-
-    Reading data from '/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d'
-    Reported records for 'sample1' in 'test' namespace:
-     - number_of_things: 100
-
-
-The result has been reported and the database file has been updated:
-
-
-```bash
-cat $temp_file
-```
-
-    test:
-      sample1:
-        number_of_things: 100
-
-
-Let's report another result:
-
-
-```bash
-pipestat report -f $temp_file -n test -r sample1 -i percentage_of_things -v 1.1 -s ../tests/data/sample_output_schema.yaml --try-convert
-```
-
-    Reading data from '/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d'
-    Reported records for 'sample1' in 'test' namespace:
-     - percentage_of_things: 1.1
-
-
-
-```bash
-cat $temp_file
-```
-
-    test:
-      sample1:
-        number_of_things: 100
-        percentage_of_things: 1.1
-
-
-### Inspection
-
-`pipestat inspect` command is a way to briefly look at the general `PipestatManager` state, like number of records, type of backend etc.
-
-
-```bash
-pipestat inspect -f $temp_file -n test
-```
-
-    Reading data from '/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d'
-
-
-    PipestatManager (test)
-    Backend: file (/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d)
-    Records count: 1
-
-
-In order to display the contents of the results file or database table associated with the indicated namespace, add `--data` flag:
-
-
-```bash
-pipestat inspect --data -f $temp_file -n test
-```
-
-    Reading data from '/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d'
-
-
-    PipestatManager (test)
-    Backend: file (/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d)
-    Records count: 1
-
-    Data:
-    test:
-      sample1:
-        number_of_things: 100
-        percentage_of_things: 1.1
-
-
-### Retrieval
-
-Naturally, the reported results can be retrieved. Just call `pipestat retrieve` to do so:
-
-
-```bash
-pipestat retrieve -f $temp_file -n test -r sample1 -i percentage_of_things
-```
-
-    Reading data from '/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d'
-    1.1
-
-
-### Removal
-
-In order to remove a result call `pipestat remove`:
-
-
-```bash
-pipestat remove -f $temp_file -n test -r sample1 -i percentage_of_things
-```
-
-    Reading data from '/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d'
-    Removed result 'percentage_of_things' for record 'sample1' from 'test' namespace
-
-
-The results file and the state of the `PipestatManager` object reflect the removal:
-
-
-```bash
-cat $temp_file
-```
-
-    test:
-      sample1:
-        number_of_things: 100
-
-
-
-```bash
-pipestat inspect --data -f $temp_file -n test
-```
-
-    Reading data from '/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d'
-
-
-    PipestatManager (test)
-    Backend: file (/var/folders/3f/0wj7rs2144l9zsgxd3jn5nxc0000gn/T/tmp.G6Gtt93d)
-    Records count: 1
-
-    Data:
-    test:
-      sample1:
-        number_of_things: 100
diff --git a/docs_jupyter/python_api.ipynb b/docs_jupyter/python_api.ipynb
index ab78c94f..851e3c87 100644
--- a/docs_jupyter/python_api.ipynb
+++ b/docs_jupyter/python_api.ipynb
@@ -12,13 +12,8 @@
     "\n",
     "## Introduction\n",
     "\n",
-    "To make your Python pipeline pipestat-compatible, you first need to initialize pipestat with some important configuration setup:"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
+    "To make your Python pipeline pipestat-compatible, you first need to initialize pipestat with some important configuration setup:\n",
+    "\n",
     "1. **pipestat schema**: a path to a JSON-schema file that defines results reported by this pipeline\n",
     "2. **namespace**: defines a unique group name for reported results, typically a pipeline name\n",
     "3. **record identifier**: a unique name for a particular *run* of the pipeline, typically corresponding to a sample name\n",
@@ -40,9 +35,14 @@
     "Start by importing the `pipestat` package in Python."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 239,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -59,14 +59,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 240,
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpubs9xbqr.yaml\n"
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpjupp4wcz.yaml\n"
      ]
     }
    ],
@@ -86,7 +86,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 241,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -107,7 +107,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 242,
    "metadata": {},
    "outputs": [
     {
@@ -118,7 +118,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 38
+     "execution_count": 242
     }
    ],
    "source": [
@@ -134,7 +134,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 243,
    "metadata": {},
    "outputs": [
     {
@@ -145,7 +145,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 39
+     "execution_count": 243
     }
    ],
    "source": [
@@ -161,7 +161,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 244,
    "metadata": {},
    "outputs": [
     {
@@ -172,7 +172,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 40
+     "execution_count": 244
     }
    ],
    "source": [
@@ -202,7 +202,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 245,
    "metadata": {},
    "outputs": [
     {
@@ -227,7 +227,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 41
+     "execution_count": 245
     }
    ],
    "source": [
@@ -243,7 +243,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 246,
    "metadata": {},
    "outputs": [
     {
@@ -257,7 +257,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 42
+     "execution_count": 246
     }
    ],
    "source": [
@@ -274,7 +274,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 247,
    "metadata": {},
    "outputs": [
     {
@@ -301,7 +301,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 248,
    "metadata": {},
    "outputs": [
     {
@@ -319,7 +319,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 44
+     "execution_count": 248
     }
    ],
    "source": [
@@ -342,7 +342,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 249,
    "metadata": {},
    "outputs": [
     {
@@ -357,7 +357,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 45
+     "execution_count": 249
     }
    ],
    "source": [
@@ -373,7 +373,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 250,
    "metadata": {},
    "outputs": [
     {
@@ -391,7 +391,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 46
+     "execution_count": 250
     }
    ],
    "source": [
@@ -407,7 +407,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 251,
    "metadata": {},
    "outputs": [
     {
@@ -432,7 +432,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 47
+     "execution_count": 251
     }
    ],
    "source": [
@@ -457,7 +457,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 252,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -471,7 +471,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 253,
    "metadata": {},
    "outputs": [
     {
@@ -486,7 +486,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 49
+     "execution_count": 253
     }
    ],
    "source": [
@@ -502,14 +502,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 254,
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpubs9xbqr.yaml\n",
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpjupp4wcz.yaml\n",
       "test:\n",
       "  sample1:\n",
       "    output_file:\n",
@@ -538,7 +538,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 255,
    "metadata": {},
    "outputs": [
     {
@@ -549,7 +549,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 51
+     "execution_count": 255
     }
    ],
    "source": [
@@ -558,7 +558,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 256,
    "metadata": {},
    "outputs": [
     {
@@ -576,7 +576,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 52
+     "execution_count": 256
     }
    ],
    "source": [
@@ -592,7 +592,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 257,
    "metadata": {},
    "outputs": [
     {
@@ -621,7 +621,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 258,
    "metadata": {},
    "outputs": [
     {
@@ -637,7 +637,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 54
+     "execution_count": 258
     }
    ],
    "source": [
@@ -657,7 +657,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 259,
    "metadata": {},
    "outputs": [
     {
@@ -668,7 +668,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 55
+     "execution_count": 259
     }
    ],
    "source": [
@@ -684,7 +684,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 260,
    "metadata": {},
    "outputs": [
     {
@@ -697,7 +697,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 56
+     "execution_count": 260
     }
    ],
    "source": [
@@ -720,7 +720,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 261,
    "metadata": {},
    "outputs": [
     {
@@ -738,7 +738,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 57
+     "execution_count": 261
     }
    ],
    "source": [
@@ -754,7 +754,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 262,
    "metadata": {},
    "outputs": [
     {
@@ -772,7 +772,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 58
+     "execution_count": 262
     }
    ],
    "source": [
@@ -788,7 +788,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 263,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -803,7 +803,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 59
+     "execution_count": 263
     }
    ],
    "source": [
@@ -821,14 +821,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 264,
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpu8uxttep.yaml\n"
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpshtnle33.yaml\n"
      ]
     }
    ],
@@ -855,7 +855,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 265,
    "metadata": {},
    "outputs": [
     {
@@ -890,7 +890,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 61
+     "execution_count": 265
     }
    ],
    "source": [
@@ -906,7 +906,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 266,
    "metadata": {},
    "outputs": [
     {
@@ -917,7 +917,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 62
+     "execution_count": 266
     }
    ],
    "source": [
@@ -937,7 +937,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 267,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -953,7 +953,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 268,
    "metadata": {},
    "outputs": [
     {
@@ -964,7 +964,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 64
+     "execution_count": 268
     }
    ],
    "source": [
@@ -980,7 +980,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 269,
    "metadata": {},
    "outputs": [
     {
@@ -999,7 +999,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 65
+     "execution_count": 269
     }
    ],
    "source": [
@@ -1028,14 +1028,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 270,
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmp5xtkiy3c.yaml\n"
+      "/var/folders/h8/8npwnh2s4rb8lr6hsy2ydrsh0000gp/T/tmpauamyheb.yaml\n"
      ]
     }
    ],
@@ -1057,7 +1057,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 67,
+   "execution_count": 271,
    "metadata": {},
    "outputs": [
     {
@@ -1084,7 +1084,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 68,
+   "execution_count": 272,
    "metadata": {},
    "outputs": [
     {
@@ -1095,7 +1095,7 @@
       ]
      },
      "metadata": {},
-     "execution_count": 68
+     "execution_count": 272
     }
    ],
    "source": [

From fa75868cf1ee9f87425474b6f1b03c414b75dbf9 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Thu, 24 Jun 2021 15:55:53 -0400
Subject: [PATCH 65/65] prep release

---
 docs/changelog.md    | 3 ++-
 pipestat/_version.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/changelog.md b/docs/changelog.md
index d29fab08..7e48bec2 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -2,7 +2,7 @@
 
 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.
 
-## [0.1.0] - unreleased
+## [0.1.0] - 2021-06-24
 
 **This update introduces some backwards-incompatible changes due to database interface redesign**
 
@@ -15,6 +15,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 - results highligting support
 - database column parametrizing from the results schema
 - static typing
+- possibility to initialize the `PipestatManager` object (or use the `pipestat status` CLI) with no results schema defined for pipeline status management even when backed by a database; [Issue #1](https://github.com/pepkit/pipestat/issues/1)
 
 ## [0.0.4] - 2021-04-02
 
diff --git a/pipestat/_version.py b/pipestat/_version.py
index 607f7a49..3dc1f76b 100644
--- a/pipestat/_version.py
+++ b/pipestat/_version.py
@@ -1 +1 @@
-__version__ = "0.1.0-dev"
+__version__ = "0.1.0"