diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index e9ca8d10..2927f157 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -7,6 +7,7 @@ on: branches: [master] workflow_dispatch: inputs: null + jobs: pytest: strategy: diff --git a/README.md b/README.md index 4f8c41f7..14843395 100644 --- a/README.md +++ b/README.md @@ -4,100 +4,23 @@ pipestat
-# What is this? +Pipestat standardizes reporting of pipeline results. It provides 1) a standard specification for how pipeline outputs should be stored; and 2) an implementation to easily write results to that format from within Python or from the command line. A pipeline author defines all the outputs produced by a pipeline by writing a JSON-schema. The pipeline then uses pipestat to report pipeline outputs as the pipeline runs, either via the Python API or command line interface. The user configures results to be stored either in a [YAML-formatted file](https://yaml.org/spec/1.2/spec.html) or a [PostgreSQL database](https://www.postgresql.org/). -Pipestat standardizes reporting of pipeline results. It provides 1) a standard specification for how pipeline outputs should be stored; and 2) an implementation to easily write results to that format from within Python or from the command line. +See [Pipestat documentation](https://pipestat.databio.org) for complete details. -# How does it work? -A pipeline author defines all the outputs produced by a pipeline by writing a JSON-schema. The pipeline then uses pipestat to report pipeline outputs as the pipeline runs, either via the Python API or command line interface. The user configures results to be stored either in a [YAML-formatted file](https://yaml.org/spec/1.2/spec.html) or a [PostgreSQL database](https://www.postgresql.org/). The results are recorded according to the pipestat specification, in a standard, pipeline-agnostic way. This way, downstream software can use this specification to create universal tools for analyzing, monitoring, and visualizing pipeline results that will work with any pipeline or workflow. +## Developer tests +First you need a local demo instance of posgres running to test the database back-end. you can get one using docker matching the included config file like this: -# Quick start - -## Install pipestat - -```console -pip install pipestat -``` - -## Set environment variables (optional) - -```console -export PIPESTAT_RESULTS_SCHEMA=output_schema.yaml -export PIPESTAT_RECORD_ID=my_record -export PIPESTAT_RESULTS_FILE=results_file.yaml -export PIPESTAT_NAMESPACE=my_namespace -``` - -## Pipeline results reporting and retrieval - -### Report a result - -From command line: - -```console -pipestat report -i result_name -v 1.1 -``` - -From Python: - -```python -import pipestat - -psm = pipestat.PipestatManager() -psm.report(values={"result_name": 1.1}) -``` - -### Retrieve a result - -From command line: - -```console -pipestat retrieve -i result_name -``` - -From Python: - -```python -import pipestat - -psm = pipestat.PipestatManager() -psm.retrieve(result_identifier="result_name") ``` +docker run --rm -it -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=pipestat-password -e POSTGRES_DB=pipestat-test -p 5432:5432 postgres -## Pipeline status management - -## Set status - -From command line: - -```console -pipestat status set running ``` -From Python: - -```python -import pipestat +Then, run tests: -psm = pipestat.PipestatManager() -psm.set_status(status_identifier="running") ``` - -## Get status - -From command line: - -```console -pipestat status get +pytest ``` -From Python: - -```python -import pipestat - -psm = pipestat.PipestatManager() -psm.get_status() -``` diff --git a/docs/img/pipestat_logo.svg b/docs/img/pipestat_logo.svg index 06aefbd6..aaab9636 100644 --- a/docs/img/pipestat_logo.svg +++ b/docs/img/pipestat_logo.svg @@ -9,11 +9,11 @@ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" width="122mm" - height="53mm" - viewBox="0 0 432.28321 187.79531" + height="40mm" + viewBox="0 0 432.28321 141.73231" id="svg4818" version="1.1" - inkscape:version="1.0.1 (c497b03c, 2020-09-10)" + inkscape:version="0.92.5 (2060ec1f9f, 2020-04-08)" sodipodi:docname="pipestat_logo.svg"> @@ -39,9 +39,9 @@ borderopacity="1.0" inkscape:pageopacity="0.0" inkscape:pageshadow="2" - inkscape:zoom="1.1933509" - inkscape:cx="245.97685" - inkscape:cy="0.40827274" + inkscape:zoom="0.59667545" + inkscape:cx="-328.69803" + inkscape:cy="52.482513" inkscape:document-units="px" inkscape:current-layer="layer1" showgrid="false" @@ -50,10 +50,10 @@ fit-margin-right="1" fit-margin-bottom="1" inkscape:document-rotation="0" - inkscape:window-width="1252" - inkscape:window-height="855" - inkscape:window-x="0" - inkscape:window-y="23" + inkscape:window-width="1802" + inkscape:window-height="1042" + inkscape:window-x="1563" + inkscape:window-y="450" inkscape:window-maximized="0" /> @@ -63,7 +63,7 @@ image/svg+xml - + @@ -71,49 +71,67 @@ inkscape:label="Layer 1" inkscape:groupmode="layer" id="layer1" - transform="translate(-442.41812,-347.24105)"> + transform="translate(-442.41812,-393.30403)"> - - - - - - - - - - + inkscape:connector-curvature="0" + id="path47" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:125.86335754px;line-height:1.25;font-family:Uroob;-inkscape-font-specification:Uroob;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;fill:#000000;fill-opacity:1;stroke:none;stroke-width:9.43975067;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" + d="m 566.22163,438.26134 q 3.99468,-2.88847 8.91122,-2.88847 h 4.30197 q 7.80501,0 12.47572,4.48634 4.67071,4.48634 4.67071,12.41426 v 24.58269 q 0,12.90591 -8.6654,16.83914 -3.01138,1.35204 -7.25189,1.35204 h -6.76024 q -3.99469,0 -6.76024,-3.01138 v 30.05234 h -10.63201 l 0.0614,-86.71543 h 9.64871 z m 19.85052,37.79588 v -22.30879 q 0,-4.79362 -1.04477,-6.3915 -1.90515,-2.94992 -6.33004,-2.94992 h -5.28528 q -1.90515,0 -4.1176,1.35205 -2.15098,1.35204 -2.15098,3.19575 v 30.91272 q 0,1.84371 2.2739,3.99469 2.2739,2.08953 3.99468,2.08953 h 5.28528 q 6.08422,0 7.06752,-6.08422 0.24583,-1.78224 0.30729,-3.81031 z" /> + + + + + + + + + pipestat + diff --git a/docs/img/pipestat_logo_light.svg b/docs/img/pipestat_logo_light.svg new file mode 100644 index 00000000..b4cd86af --- /dev/null +++ b/docs/img/pipestat_logo_light.svg @@ -0,0 +1,137 @@ + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + pipestat + + + diff --git a/docs/img/pipestat_logo_old.svg b/docs/img/pipestat_logo_old.svg new file mode 100644 index 00000000..06aefbd6 --- /dev/null +++ b/docs/img/pipestat_logo_old.svg @@ -0,0 +1,119 @@ + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + diff --git a/pipestat/cli.py b/pipestat/cli.py index eb084208..b9826362 100644 --- a/pipestat/cli.py +++ b/pipestat/cli.py @@ -40,15 +40,11 @@ def main(): if psm.schema is None: raise SchemaNotFoundError(msg="report", cli=True) result_metadata = psm.schema[args.result_identifier] - if ( - result_metadata[SCHEMA_TYPE_KEY] - in [ - "object", - "image", - "file", - ] - and os.path.exists(expandpath(value)) - ): + if result_metadata[SCHEMA_TYPE_KEY] in [ + "object", + "image", + "file", + ] and os.path.exists(expandpath(value)): from json import load _LOGGER.info( diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index 3b2f7f48..060069ea 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -471,10 +471,19 @@ def session(self): """ if not self.is_db_connected(): self.establish_db_connection() - with self[DB_SESSION_KEY]() as session: - _LOGGER.debug("Created session") + # with self[DB_SESSION_KEY]() as session: + session = self[DB_SESSION_KEY]() + _LOGGER.debug("Created session") + try: yield session - _LOGGER.debug("Ending session") + except: + _LOGGER.info("session.rollback") + session.rollback() + raise + finally: + _LOGGER.info("session.close") + session.close() + _LOGGER.debug("Ending session") def _get_flag_file( self, record_identifier: str = None @@ -896,7 +905,7 @@ def _table_to_dict(self) -> None: """ Create a dictionary from the database table data """ - with self.session as s: + with self[DB_SESSION_KEY]() as s: records = s.query(self.get_orm(self.namespace)).all() _LOGGER.debug(f"Reading data from database for '{self.namespace}' namespace") for record in records: @@ -937,7 +946,7 @@ def _check_table_exists(self, table_name: str) -> bool: """ from sqlalchemy import inspect - with self.session as s: + with self[DB_SESSION_KEY]() as s: return inspect(s.bind).has_table(table_name=table_name) def _count_rows(self, table_name: str) -> int: @@ -947,7 +956,7 @@ def _count_rows(self, table_name: str) -> int: :param str table_name: table to count rows for :return int: number of rows in the selected table """ - with self.session as s: + with self[DB_SESSION_KEY]() as s: return s.query(self[DB_ORMS_KEY][table_name].id).count() def get_orm(self, table_name: str = None) -> Any: @@ -981,7 +990,7 @@ def check_record_exists( :return bool: whether the record exists in the table """ if self.file is None: - with self.session as s: + with self[DB_SESSION_KEY]() as s: return ( s.query(self.get_orm(table_name).id) .filter_by(record_identifier=record_identifier) @@ -1039,7 +1048,7 @@ def _check_which_results_exist_db( """ table_name = table_name or self.namespace rid = self._strict_record_id(rid) - with self.session as s: + with self[DB_SESSION_KEY]() as s: record = ( s.query(self.get_orm(table_name)) .filter_by(record_identifier=rid) @@ -1102,7 +1111,7 @@ def select( """ ORM = self.get_orm(table_name or self.namespace) - with self.session as s: + with self[DB_SESSION_KEY]() as s: if columns is not None: query = s.query(*[getattr(ORM, column) for column in columns]) else: @@ -1129,7 +1138,7 @@ def select_distinct(self, table_name, columns) -> List[Any]: """ ORM = self.get_orm(table_name or self.namespace) - with self.session as s: + with self[DB_SESSION_KEY]() as s: query = s.query(*[getattr(ORM, column) for column in columns]) query = query.distinct() result = query.all() @@ -1202,7 +1211,7 @@ def _retrieve_db( f"'{record_identifier}'" ) - with self.session as s: + with self[DB_SESSION_KEY]() as s: record = ( s.query(self.get_orm(table_name)) .filter_by(record_identifier=record_identifier) @@ -1249,7 +1258,7 @@ def select_txt( f"This operation is not supported for file backend." ) ORM = self.get_orm(table_name or self.namespace) - with self.session as s: + with self[DB_SESSION_KEY]() as s: if columns is not None: q = ( s.query(*[getattr(ORM, column) for column in columns]) @@ -1378,12 +1387,12 @@ def _report_db( record_identifier=record_identifier, table_name=table_name ): new_record = ORMClass(**values) - with self.session as s: + with self[DB_SESSION_KEY]() as s: s.add(new_record) s.commit() returned_id = new_record.id else: - with self.session as s: + with self[DB_SESSION_KEY]() as s: record_to_update = ( s.query(ORMClass) .filter(getattr(ORMClass, RECORD_ID) == record_identifier) @@ -1501,7 +1510,7 @@ def _remove_db( if self.check_record_exists( record_identifier=record_identifier, table_name=table_name ): - with self.session as s: + with self[DB_SESSION_KEY]() as s: records = s.query(ORMClass).filter( getattr(ORMClass, RECORD_ID) == record_identifier ) diff --git a/setup.py b/setup.py index 5b437d28..672a2f86 100644 --- a/setup.py +++ b/setup.py @@ -19,9 +19,6 @@ # DEPENDENCIES.append(line.split("=")[0].rstrip("<>")) DEPENDENCIES.append(line) -# 2to3 -if sys.version_info >= (3,): - extra["use_2to3"] = True extra["install_requires"] = DEPENDENCIES @@ -73,7 +70,7 @@ def get_static(name, condition=None): ], keywords="project, metadata, bioinformatics, sequencing, ngs, workflow", url="https://github.com/pepkit/" + PACKAGE, - author=u"Michal Stolarczyk, Nathan Sheffield", + author="Michal Stolarczyk, Nathan Sheffield", license="BSD2", entry_points={ "console_scripts": ["pipestat = pipestat.__main__:main"],