From f852d04763d88b5ade5cf783562125a366218b3b Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 8 Feb 2024 13:20:52 -0700 Subject: [PATCH] calculate_rna_copy_counts --- qp_woltka/__init__.py | 20 ++++++++++-- qp_woltka/tests/test_woltka.py | 18 +++++++++- qp_woltka/woltka.py | 60 ++++++++++++++++++++++++++++++++++ scripts/start_woltka | 5 +-- 4 files changed, 98 insertions(+), 5 deletions(-) diff --git a/qp_woltka/__init__.py b/qp_woltka/__init__.py index 68ae5bc..6c6fe04 100644 --- a/qp_woltka/__init__.py +++ b/qp_woltka/__init__.py @@ -8,7 +8,8 @@ from qiita_client import QiitaPlugin, QiitaCommand -from .woltka import woltka, woltka_syndna, calculate_cell_counts +from .woltka import (woltka, woltka_syndna, calculate_cell_counts, + calculate_rna_copy_counts) from qp_woltka.util import generate_woltka_dflt_params, get_dbs, plugin_details from os import environ @@ -61,7 +62,7 @@ req_params, opt_params, outputs, dflt_param_set) plugin.register_command(syndna_cmd) -# Cell counts +# WGS cell counts req_params = { 'synDNA hits': ('artifact', ['BIOM']), 'Woltka per-genome': ('artifact', ['BIOM']) @@ -85,3 +86,18 @@ 'Calculate Cell Counts', "Calculate cell counts per-genome", calculate_cell_counts, req_params, opt_params, outputs, dflt_param_set) plugin.register_command(calculate_cell_counts_cmd) + + +# MTX calculate RNA copy counts +req_params = { + 'Woltka per-gene': ('artifact', ['BIOM']) +} +opt_params = {} +outputs = { + 'RNA copy counts': 'BIOM' +} +dflt_param_set = {} +calculate_rna_copy_counts_cmd = QiitaCommand( + 'RNA Copy Counts', "Calculate RNA copy counts per-gene", + calculate_rna_copy_counts, req_params, opt_params, outputs, dflt_param_set) +plugin.register_command(calculate_rna_copy_counts_cmd) diff --git a/qp_woltka/tests/test_woltka.py b/qp_woltka/tests/test_woltka.py index adba0ef..866e5ae 100644 --- a/qp_woltka/tests/test_woltka.py +++ b/qp_woltka/tests/test_woltka.py @@ -18,7 +18,7 @@ from qp_woltka import plugin from qp_woltka.woltka import ( woltka_to_array, woltka, woltka_syndna_to_array, woltka_syndna, - calculate_cell_counts) + calculate_cell_counts, calculate_rna_copy_counts) class WoltkaTests(PluginTestCase): @@ -615,6 +615,22 @@ def test_calculate_cell_counts(self): # Finally, adding a full test is close to impossible - too many steps. + def test_calculate_rna_copy_counts(self): + params = {'Woltka per-gene': 6} + job_id = 'my-job-id' + out_dir = mkdtemp() + self._clean_up_files.append(out_dir) + + # this should fail cause we don't have valid data + success, ainfo, msg = calculate_rna_copy_counts( + self.qclient, job_id, params, out_dir) + self.assertFalse(success) + self.assertEqual(msg, "The selected 'Woltka per-gene' artifact " + "doesn't look like one, did you select the " + "correct file?") + + # Finally, adding a full test is close to impossible - too many steps. + if __name__ == '__main__': main() diff --git a/qp_woltka/woltka.py b/qp_woltka/woltka.py index 57ec54e..70c2d64 100644 --- a/qp_woltka/woltka.py +++ b/qp_woltka/woltka.py @@ -16,6 +16,7 @@ import pandas as pd from pysyndna import fit_linear_regression_models_for_qiita from pysyndna import calc_ogu_cell_counts_per_g_of_sample_for_qiita +from pysyndna import calc_copies_of_ogu_orf_ssrna_per_g_sample_for_qiita from qp_woltka.util import search_by_filename @@ -631,3 +632,62 @@ def calculate_cell_counts(qclient, job_id, parameters, out_dir): 'Cell counts', 'BIOM', [(biom_fp, 'biom'), (log_fp, 'log')])] return True, ainfo, "" + + +def calculate_rna_copy_counts(qclient, job_id, parameters, out_dir): + """Run calc_copies_of_ogu_orf_ssrna_per_g_sample_for_qiita + + Parameters + ---------- + qclient : tgp.qiita_client.QiitaClient + The Qiita server client + job_id : str + The job id + parameters : dict + The parameter values to wolka syndna + out_dir : str + The path to the job's output directory + + Returns + ------- + bool, list, str + The results of the job + """ + + per_gene_id = parameters['Woltka per-gene'] + ainfo = qclient.get("/qiita_db/artifacts/%s/" % per_gene_id) + aparams = ainfo['processing_parameters'] + pg_fp = ainfo['files']['biom'][0]['filepath'] + + if 'Database' not in aparams or not pg_fp.endswith('per-gene.biom'): + error = ("The selected 'Woltka per-gene' artifact doesn't " + "look like one, did you select the correct file?") + return False, None, error + + pergene = load_table(pg_fp) + db_files = _process_database_files(aparams['Database']) + ogu_orf_coords_fp = db_files["gene_coordinates"] + + _, prep_info = qclient.artifact_and_preparation_files(per_gene_id) + + sample_info = qclient.get( + '/qiita_db/prep_template/%s/data/?sample_information=true' + % ainfo['prep_information'][0]) + sample_info = pd.DataFrame.from_dict( + sample_info['data'], orient='index') + sample_info.reset_index(names='sample_name', inplace=True) + + try: + output = calc_copies_of_ogu_orf_ssrna_per_g_sample_for_qiita( + sample_info, prep_info, pergene, ogu_orf_coords_fp) + except Exception as e: + return False, None, str(e) + + biom_fp = f'{out_dir}/rna_copy_counts.biom' + with biom_open(biom_fp, 'w') as f: + output['rna_copy_counts'].to_hdf5(f, f"RNA copy counts - {job_id}") + ainfo = [ + ArtifactInfo( + 'RNA Copy Counts', 'BIOM', [(biom_fp, 'biom')])] + + return True, ainfo, "" diff --git a/scripts/start_woltka b/scripts/start_woltka index 413ee7b..51dcf8a 100755 --- a/scripts/start_woltka +++ b/scripts/start_woltka @@ -35,13 +35,14 @@ def execute(url, job_id, out_dir): # these were defined in qp_woltka/__init.py__ while defining the # available commands for this plugin valid_commands = [ - 'Woltka v0.1.4', 'SynDNA Woltka', 'Calculate Cell Counts'] + 'Woltka v0.1.4', 'SynDNA Woltka', 'Calculate Cell Counts', + 'RNA Copy Counts'] # this if/elif is the current solution for # https://github.com/qiita-spots/qiita/issues/3340 if command not in valid_commands: raise ValueError(f'Not a valid command: "{command}"') - elif command == 'Calculate Cell Counts': + elif command in {'Calculate Cell Counts', 'RNA Copy Counts'}: plugin(url, job_id, out_dir) exit(0)