Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mv qclient.update_job_step #28

Merged
merged 9 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions qp_woltka/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

from qiita_client import QiitaPlugin, QiitaCommand

from .woltka import woltka, woltka_syndna, calculate_cell_counts
from .woltka import (woltka, woltka_syndna, calculate_cell_counts,
calculate_rna_copy_counts)
from qp_woltka.util import generate_woltka_dflt_params, get_dbs, plugin_details
from os import environ

Expand Down Expand Up @@ -61,9 +62,9 @@
req_params, opt_params, outputs, dflt_param_set)
plugin.register_command(syndna_cmd)

# Cell counts
# WGS cell counts
req_params = {
'synDNA hits': ('artifact', ['BIOM']),
'SynDNA hits': ('artifact', ['BIOM']),
'Woltka per-genome': ('artifact', ['BIOM'])
}
opt_params = {
Expand All @@ -85,3 +86,18 @@
'Calculate Cell Counts', "Calculate cell counts per-genome",
calculate_cell_counts, req_params, opt_params, outputs, dflt_param_set)
plugin.register_command(calculate_cell_counts_cmd)


# MTX calculate RNA copy counts
req_params = {
'Woltka per-gene': ('artifact', ['BIOM'])
}
opt_params = {}
outputs = {
'RNA copy counts': 'BIOM'
}
dflt_param_set = {'default': {}}
calculate_rna_copy_counts_cmd = QiitaCommand(
'Calculate RNA Copy Counts', "Calculate RNA copy counts per-gene",
calculate_rna_copy_counts, req_params, opt_params, outputs, dflt_param_set)
plugin.register_command(calculate_rna_copy_counts_cmd)
24 changes: 20 additions & 4 deletions qp_woltka/tests/test_woltka.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from qp_woltka import plugin
from qp_woltka.woltka import (
woltka_to_array, woltka, woltka_syndna_to_array, woltka_syndna,
calculate_cell_counts)
calculate_cell_counts, calculate_rna_copy_counts)


class WoltkaTests(PluginTestCase):
Expand Down Expand Up @@ -557,7 +557,7 @@ def test_woltka_syndna_to_array(self):
self.assertCountEqual(ainfo, exp)

def test_calculate_cell_counts(self):
params = {'synDNA hits': 5, 'Woltka per-genome': 6,
params = {'SynDNA hits': 5, 'Woltka per-genome': 6,
'min_coverage': 1, 'read_length': 150,
'min_rsquared': 0.8}
job_id = 'my-job-id'
Expand All @@ -569,7 +569,7 @@ def test_calculate_cell_counts(self):
self.qclient, job_id, params, out_dir)
self.assertFalse(success)
self.assertEqual(msg, "No logs found, are you sure you selected the "
"correct artifact for 'synDNA hits'?")
"correct artifact for 'SynDNA hits'?")

# this should fail too because but now we are getting deeper into
# the validation
Expand Down Expand Up @@ -603,7 +603,7 @@ def test_calculate_cell_counts(self):
'type': "BIOM",
'name': "SynDNA Hits - Test",
'prep': pid}
params['synDNA hits'] = self.qclient.post(
params['SynDNA hits'] = self.qclient.post(
'/apitest/artifact/', data=data)['artifact']

success, ainfo, msg = calculate_cell_counts(
Expand All @@ -615,6 +615,22 @@ def test_calculate_cell_counts(self):

# Finally, adding a full test is close to impossible - too many steps.

def test_calculate_rna_copy_counts(self):
params = {'Woltka per-gene': 6}
job_id = 'my-job-id'
out_dir = mkdtemp()
self._clean_up_files.append(out_dir)

# this should fail cause we don't have valid data
success, ainfo, msg = calculate_rna_copy_counts(
self.qclient, job_id, params, out_dir)
self.assertFalse(success)
self.assertEqual(msg, "The selected 'Woltka per-gene' artifact "
"doesn't look like one, did you select the "
"correct file?")

# Finally, adding a full test is close to impossible - too many steps.


if __name__ == '__main__':
main()
73 changes: 68 additions & 5 deletions qp_woltka/woltka.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import pandas as pd
from pysyndna import fit_linear_regression_models_for_qiita
from pysyndna import calc_ogu_cell_counts_per_g_of_sample_for_qiita
from pysyndna import calc_copies_of_ogu_orf_ssrna_per_g_sample_for_qiita

from qp_woltka.util import search_by_filename

Expand Down Expand Up @@ -570,18 +571,18 @@ def calculate_cell_counts(qclient, job_id, parameters, out_dir):
"""
error = ''
# let's get the syndna_id and prep in a single go
syndna_id = parameters['synDNA hits']
syndna_id = parameters['SynDNA hits']
syndna_files, prep = qclient.artifact_and_preparation_files(syndna_id)
if 'log' not in syndna_files.keys():
error = ("No logs found, are you sure you selected the correct "
"artifact for 'synDNA hits'?")
"artifact for 'SynDNA hits'?")
else:

lin_regress_by_sample_id_fp = [f for f in syndna_files['log']
if 'lin_regress_by_sample_id' in f]
if not lin_regress_by_sample_id_fp:
error = ("No 'lin_regress_by_sample_id' log found, are you sure "
" you selected the correct artifact for 'synDNA hits'?")
" you selected the correct artifact for 'SynDNA hits'?")
else:
lin_regress_by_sample_id_fp = lin_regress_by_sample_id_fp[0]

Expand Down Expand Up @@ -615,8 +616,8 @@ def calculate_cell_counts(qclient, job_id, parameters, out_dir):
output = calc_ogu_cell_counts_per_g_of_sample_for_qiita(
sample_info, prep, lin_regress_by_sample_id_fp,
ogu_counts_per_sample, ogu_lengths_fp,
parameters['read_length'], parameters['min_rsquared'],
parameters['min_rsquared'])
int(parameters['read_length']), float(parameters['min_coverage']),
float(parameters['min_rsquared']))
except Exception as e:
return False, None, str(e)

Expand All @@ -631,3 +632,65 @@ def calculate_cell_counts(qclient, job_id, parameters, out_dir):
'Cell counts', 'BIOM', [(biom_fp, 'biom'), (log_fp, 'log')])]

return True, ainfo, ""


def calculate_rna_copy_counts(qclient, job_id, parameters, out_dir):
"""Run calc_copies_of_ogu_orf_ssrna_per_g_sample_for_qiita

Parameters
----------
qclient : tgp.qiita_client.QiitaClient
The Qiita server client
job_id : str
The job id
parameters : dict
The parameter values to wolka syndna
out_dir : str
The path to the job's output directory

Returns
-------
bool, list, str
The results of the job
"""

per_gene_id = parameters['Woltka per-gene']
ainfo = qclient.get("/qiita_db/artifacts/%s/" % per_gene_id)
aparams = ainfo['processing_parameters']
pg_fp = ainfo['files']['biom'][0]['filepath']

if 'Database' not in aparams or not pg_fp.endswith('per-gene.biom'):
error = ("The selected 'Woltka per-gene' artifact doesn't "
"look like one, did you select the correct file?")
return False, None, error

pergene = load_table(pg_fp)
db_files = _process_database_files(aparams['Database'])
ogu_orf_coords_fp = db_files["gene_coordinates"]

_, prep_info = qclient.artifact_and_preparation_files(per_gene_id)

sample_info = qclient.get(
'/qiita_db/prep_template/%s/data/?sample_information=true'
% ainfo['prep_information'][0])
sample_info = pd.DataFrame.from_dict(
sample_info['data'], orient='index')
sample_info.reset_index(names='sample_name', inplace=True)

try:
output, log_msgs = calc_copies_of_ogu_orf_ssrna_per_g_sample_for_qiita(
sample_info, prep_info, pergene, ogu_orf_coords_fp)
except Exception as e:
return False, None, str(e)

log_fp = f'{out_dir}/rna_copy_counts.log'
with open(log_fp, 'w') as f:
f.write(''.join(log_msgs))
biom_fp = f'{out_dir}/rna_copy_counts.biom'
with biom_open(biom_fp, 'w') as f:
output.to_hdf5(f, f"RNA copy counts - {job_id}")
ainfo = [
ArtifactInfo(
'RNA copy counts', 'BIOM', [(biom_fp, 'biom'), (log_fp, 'log')])]

return True, ainfo, ""
11 changes: 6 additions & 5 deletions scripts/start_woltka
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,23 @@ def execute(url, job_id, out_dir):
command = job_info['command']
parameters = job_info['parameters']

qclient.update_job_step(
job_id, "Step 1 of 4: Collecting info and generating submission")

# these were defined in qp_woltka/__init.py__ while defining the
# available commands for this plugin
valid_commands = [
'Woltka v0.1.4', 'SynDNA Woltka', 'Calculate Cell Counts']
'Woltka v0.1.4', 'SynDNA Woltka', 'Calculate Cell Counts',
'Calculate RNA Copy Counts']

# this if/elif is the current solution for
# https://github.com/qiita-spots/qiita/issues/3340
if command not in valid_commands:
raise ValueError(f'Not a valid command: "{command}"')
elif command == 'Calculate Cell Counts':
elif command in {'Calculate Cell Counts', 'Calculate RNA Copy Counts'}:
plugin(url, job_id, out_dir)
exit(0)

qclient.update_job_step(
job_id, "Step 1 of 4: Collecting info and generating submission")

artifact_id = parameters['input']
del parameters['input']
files, prep = qclient.artifact_and_preparation_files(artifact_id)
Expand Down
Loading