diff --git a/README.md b/README.md index 9cdddb1..e632b16 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# VIRTUS : VIRal Transcript Usage Sensor v0.11 +# VIRTUS : VIRal Transcript Usage Sensor v1.0 Virus transcript detection and quantification using normal human RNAseq. VIRTUS is the first tool to detect viral transcripts considering their splicing event rather than the viral genome copy number. VIRTUS can be applied to both bulk RNAseq and single-cell RNAseq. The virus reference covers 763 viruses including SARS-CoV-2 (cause of COVID-19). The workflow is implemented by [Common Workflow Language](https://www.commonwl.org/) and [Rabix](https://rabix.io/). You can specify each parameter individually or give `yaml` or `json` file which describes all the parameter information. In detail, check [the CWL User Guide](http://www.commonwl.org/user_guide/) out. diff --git a/tool/mk_summary_virus_count/humanLog.final.out b/tool/mk_summary_virus_count/humanLog.final.out new file mode 100644 index 0000000..f53807f --- /dev/null +++ b/tool/mk_summary_virus_count/humanLog.final.out @@ -0,0 +1,37 @@ + Started job on | Feb 25 08:11:28 + Started mapping on | Feb 25 08:12:02 + Finished on | Feb 25 08:46:51 + Mapping speed, Million of reads per hour | 218.99 + + Number of input reads | 127074247 + Average input read length | 200 + UNIQUE READS: + Uniquely mapped reads number | 46789415 + Uniquely mapped reads % | 36.82% + Average mapped length | 196.83 + Number of splices: Total | 16249222 + Number of splices: Annotated (sjdb) | 0 + Number of splices: GT/AG | 15726296 + Number of splices: GC/AG | 127630 + Number of splices: AT/AC | 8313 + Number of splices: Non-canonical | 386983 + Mismatch rate per base, % | 0.35% + Deletion rate per base | 0.01% + Deletion average length | 1.91 + Insertion rate per base | 0.01% + Insertion average length | 1.62 + MULTI-MAPPING READS: + Number of reads mapped to multiple loci | 68574496 + % of reads mapped to multiple loci | 53.96% + Number of reads mapped to too many loci | 1040077 + % of reads mapped to too many loci | 0.82% + UNMAPPED READS: + Number of reads unmapped: too many mismatches | 0 + % of reads unmapped: too many mismatches | 0.00% + Number of reads unmapped: too short | 10608694 + % of reads unmapped: too short | 8.35% + Number of reads unmapped: other | 61565 + % of reads unmapped: other | 0.05% + CHIMERIC READS: + Number of chimeric reads | 0 + % of chimeric reads | 0.00% diff --git a/tool/mk_summary_virus_count/mk_summary_virus_count.cwl b/tool/mk_summary_virus_count/mk_summary_virus_count.cwl index 33f56ca..fbbf165 100644 --- a/tool/mk_summary_virus_count/mk_summary_virus_count.cwl +++ b/tool/mk_summary_virus_count/mk_summary_virus_count.cwl @@ -15,6 +15,10 @@ inputs: type: File inputBinding: position: 2 + - id: input_layout + type: string + inputBinding: + position: 4 outputs: - id: output type: File? @@ -23,4 +27,4 @@ outputs: label: mk_summary_virus_count requirements: - class: DockerRequirement - dockerPull: yyasumizu/mk_summary_virus_count + dockerPull: yyasumizu/mk_summary_virus_count:latest diff --git a/tool/mk_summary_virus_count/mk_summary_virus_count.job.yaml b/tool/mk_summary_virus_count/mk_summary_virus_count.job.yaml index de69ed4..7476640 100644 --- a/tool/mk_summary_virus_count/mk_summary_virus_count.job.yaml +++ b/tool/mk_summary_virus_count/mk_summary_virus_count.job.yaml @@ -1,8 +1,9 @@ input_STARLog: class: File path: >- - /Users/yasumizuyoshiaki/bioinformatics/EB_Virtect/donor1_day1/virusLog.final.out + /home/yyasumizu/Programs/VIRTUS/tool/mk_summary_virus_count/humanLog.final.out input_virus_count: class: File path: >- - /Users/yasumizuyoshiaki/bioinformatics/EB_Virtect/donor1_day1/virus_counts.txt + /home/yyasumizu/Programs/VIRTUS/tool/mk_summary_virus_count/virus_counts.txt +input_layout: PE diff --git a/tool/mk_summary_virus_count/mk_summary_virus_count.py b/tool/mk_summary_virus_count/mk_summary_virus_count.py index b1a3c13..e71d215 100644 --- a/tool/mk_summary_virus_count/mk_summary_virus_count.py +++ b/tool/mk_summary_virus_count/mk_summary_virus_count.py @@ -6,11 +6,15 @@ 2020.03.08 ex. -python mk_summary_virus_count.py output/donor1_day1/humanLog.final.out output/donor1_day1/virus_counts.txt +python mk_summary_virus_count.py \ + output/donor1_day1/humanLog.final.out \ + output/donor1_day1/virus_counts.txt \ + PE ''' input_STARLog = sys.argv[1] input_virus_count = sys.argv[2] +input_layout = sys.argv[3] df_STARLog = pd.read_csv(input_STARLog, sep='\t', header=None, index_col=0) num_reads = int(df_STARLog.loc[' Uniquely mapped reads number |', 1]) + \ @@ -18,6 +22,8 @@ df_virus_count = pd.read_csv(input_virus_count, delim_whitespace=True) df_virus_count.columns = ['num_hit', 'virus'] +if input_layout == 'PE': + df_virus_count['num_hit'] = df_virus_count['num_hit'] / 2 df_virus_count['rate_hit'] = df_virus_count['num_hit'] / num_reads df_virus_count = df_virus_count[['virus', 'num_hit', 'rate_hit']] df_virus_count = df_virus_count.sort_values(by='rate_hit', ascending=False) diff --git a/tool/mk_summary_virus_count/virus.counts.final.tsv b/tool/mk_summary_virus_count/virus.counts.final.tsv new file mode 100644 index 0000000..0f14d03 --- /dev/null +++ b/tool/mk_summary_virus_count/virus.counts.final.tsv @@ -0,0 +1,14 @@ +virus num_hit rate_hit +NC_007605.1_Human_herpesvirus_4_complete_wild_type_genome 83054.0 0.0007199305162253037 +NC_009823.1_Hepatitis_C_virus_genotype_2,_complete_genome 77433.0 0.0006712064399411701 +NC_004102.1_Hepatitis_C_virus_genotype_1,_complete_genome 48047.0 0.00041648206604229986 +NC_018382.1_Bat_hepevirus,_complete_genome 18969.0 0.0001644275045425601 +NC_009334.1_Human_herpesvirus_4,_complete_genome 16602.0 0.0001439098228907999 +NC_001672.1_Tick-borne_encephalitis_virus,_complete_genome 5169.0 4.480603990618869e-05 +NC_002645.1_Human_coronavirus_229E,_complete_genome 3029.0 2.6256044665476017e-05 +NC_001716.2_Human_herpesvirus_7,_complete_genome 2917.0 2.5285203793064888e-05 +NC_015521.1_Cutthroat_trout_virus,_complete_genome 1927.0 1.67036639387165e-05 +NC_009827.1_Hepatitis_C_virus_genotype_6,_complete_genome 1558.0 1.350508999300483e-05 +NC_022518.1_Human_endogenous_retrovirus_K113_complete_genome 1002.0 8.685558519249578e-06 +gi|12084981|lcl|HPV71REF.1|_Human_papillomavirus_71_(HPV71),_complete_genome 538.0 4.663503476403465e-06 +NC_003977.1_Hepatitis_B_virus,_complete_genome 243.0 2.1063779642491488e-06 diff --git a/tool/mk_summary_virus_count/virus_counts.txt b/tool/mk_summary_virus_count/virus_counts.txt new file mode 100644 index 0000000..6f1086a --- /dev/null +++ b/tool/mk_summary_virus_count/virus_counts.txt @@ -0,0 +1,14 @@ +23161329 * + 10338 NC_001672.1_Tick-borne_encephalitis_virus,_complete_genome + 5834 NC_001716.2_Human_herpesvirus_7,_complete_genome + 6058 NC_002645.1_Human_coronavirus_229E,_complete_genome + 486 NC_003977.1_Hepatitis_B_virus,_complete_genome + 96094 NC_004102.1_Hepatitis_C_virus_genotype_1,_complete_genome + 166108 NC_007605.1_Human_herpesvirus_4_complete_wild_type_genome + 33204 NC_009334.1_Human_herpesvirus_4,_complete_genome + 154866 NC_009823.1_Hepatitis_C_virus_genotype_2,_complete_genome + 3116 NC_009827.1_Hepatitis_C_virus_genotype_6,_complete_genome + 3854 NC_015521.1_Cutthroat_trout_virus,_complete_genome + 37938 NC_018382.1_Bat_hepevirus,_complete_genome + 2004 NC_022518.1_Human_endogenous_retrovirus_K113_complete_genome + 1076 gi|12084981|lcl|HPV71REF.1|_Human_papillomavirus_71_(HPV71),_complete_genome diff --git a/tool/star/star_index/star_index.cwl b/tool/star/star_index/star_index.cwl index d7bf09f..608e9b4 100644 --- a/tool/star/star_index/star_index.cwl +++ b/tool/star/star_index/star_index.cwl @@ -57,9 +57,6 @@ requirements: hints: - class: DockerRequirement dockerPull: 'quay.io/biocontainers/star:2.7.1a--0' -$schemas: - - 'https://schema.org/docs/schema_org_rdfa.html' - - 'http://edamontology.org/EDAM_1.18.owl' 's:author': - class: 's:Person' 's:email': 'mailto:inutano@gmail.com' diff --git a/tool/star/star_mapping-pe/star_mapping-pe.cwl b/tool/star/star_mapping-pe/star_mapping-pe.cwl index 9907235..31a58b1 100644 --- a/tool/star/star_mapping-pe/star_mapping-pe.cwl +++ b/tool/star/star_mapping-pe/star_mapping-pe.cwl @@ -172,9 +172,6 @@ requirements: hints: - class: DockerRequirement dockerPull: 'quay.io/biocontainers/star:2.7.1a--0' -$schemas: - - 'https://schema.org/docs/schema_org_rdfa.html' - - 'http://edamontology.org/EDAM_1.18.owl' 's:author': - class: 's:Person' 's:email': 'mailto:inutano@gmail.com' diff --git a/tool/star/star_mapping-se/star_mapping-se.cwl b/tool/star/star_mapping-se/star_mapping-se.cwl index 191c543..feafc84 100644 --- a/tool/star/star_mapping-se/star_mapping-se.cwl +++ b/tool/star/star_mapping-se/star_mapping-se.cwl @@ -166,9 +166,6 @@ requirements: hints: - class: DockerRequirement dockerPull: 'quay.io/biocontainers/star:2.7.1a--0' -$schemas: - - 'https://schema.org/docs/schema_org_rdfa.html' - - 'http://edamontology.org/EDAM_1.18.owl' 's:author': - class: 's:Person' 's:email': 'mailto:inutano@gmail.com' diff --git a/workflow/VIRTUS.PE.cwl b/workflow/VIRTUS.PE.cwl index f88703c..7c21c7e 100755 --- a/workflow/VIRTUS.PE.cwl +++ b/workflow/VIRTUS.PE.cwl @@ -3,7 +3,7 @@ class: Workflow cwlVersion: v1.0 id: VIRTUS.PE -doc: VIRTUS v0.11 +doc: VIRTUS v1.0 label: VIRTUS.PE $namespaces: sbg: 'https://www.sevenbridges.com/' @@ -292,6 +292,8 @@ steps: source: star_mapping_pe_human/mappingstats - id: input_virus_count source: mk_virus_count/virus_count + - id: input_layout + default: PE out: - id: output run: ../tool/mk_summary_virus_count/mk_summary_virus_count.cwl diff --git a/workflow/VIRTUS.PE.singlevirus.cwl b/workflow/VIRTUS.PE.singlevirus.cwl index b54834a..181f4bb 100755 --- a/workflow/VIRTUS.PE.singlevirus.cwl +++ b/workflow/VIRTUS.PE.singlevirus.cwl @@ -5,7 +5,7 @@ cwlVersion: v1.0 id: VIRTUS.PE.singlevirus doc: >- STAR mapping and salmon quantification for one specified virus. Prepare using - create_singlevirus.cwl beforehand. version 0.11 + create_singlevirus.cwl beforehand. version 1.0 label: VIRTUS.PE.singlevirus $namespaces: sbg: 'https://www.sevenbridges.com/' diff --git a/workflow/VIRTUS.SE.cwl b/workflow/VIRTUS.SE.cwl index ba1a7b3..bd5338f 100755 --- a/workflow/VIRTUS.SE.cwl +++ b/workflow/VIRTUS.SE.cwl @@ -3,7 +3,7 @@ class: Workflow cwlVersion: v1.0 id: VIRTUS.SE -doc: VIRTUS v0.11 +doc: VIRTUS v1.0 label: VIRTUS.SE $namespaces: sbg: 'https://www.sevenbridges.com/' @@ -157,6 +157,8 @@ steps: source: star_mapping_se/mappingstats - id: input_virus_count source: mk_virus_count/virus_count + - id: input_layout + default: SE out: - id: output run: ../tool/mk_summary_virus_count/mk_summary_virus_count.cwl diff --git a/workflow/VIRTUS.SE.singlevirus.cwl b/workflow/VIRTUS.SE.singlevirus.cwl index 35e541e..7ce4754 100755 --- a/workflow/VIRTUS.SE.singlevirus.cwl +++ b/workflow/VIRTUS.SE.singlevirus.cwl @@ -5,7 +5,7 @@ cwlVersion: v1.0 id: VIRTUS.PE.singlevirus doc: >- STAR mapping and salmon quantification for one specified virus. Prepare using - create_singlevirus.cwl beforehand. VIRTUS version 0.11 + create_singlevirus.cwl beforehand. VIRTUS version 1.0 label: VIRTUS.PE.singlevirus $namespaces: sbg: 'https://www.sevenbridges.com/' diff --git a/workflow/createindex.cwl b/workflow/createindex.cwl index 16d8041..d884a41 100755 --- a/workflow/createindex.cwl +++ b/workflow/createindex.cwl @@ -4,7 +4,7 @@ class: Workflow cwlVersion: v1.0 id: createindex label: CreateIndex -doc: VIRTUS v0.11 +doc: VIRTUS v1.0 $namespaces: sbg: 'https://www.sevenbridges.com/' inputs: diff --git a/workflow/createindex_singlevirus.cwl b/workflow/createindex_singlevirus.cwl index 14c6d76..4417e9f 100755 --- a/workflow/createindex_singlevirus.cwl +++ b/workflow/createindex_singlevirus.cwl @@ -4,7 +4,7 @@ class: Workflow cwlVersion: v1.0 id: createindex_singlevirus label: createindex_singlevirus -doc: VIRTUS v0.11 +doc: VIRTUS v1.0 $namespaces: sbg: 'https://www.sevenbridges.com/' inputs: diff --git a/workflow/rnaseq-star_index/rnaseq-star_index.cwl b/workflow/rnaseq-star_index/rnaseq-star_index.cwl index 6c20217..3c86971 100644 --- a/workflow/rnaseq-star_index/rnaseq-star_index.cwl +++ b/workflow/rnaseq-star_index/rnaseq-star_index.cwl @@ -85,9 +85,6 @@ steps: 'sbg:x': -233 'sbg:y': -35 requirements: [] -$schemas: - - 'https://schema.org/docs/schema_org_rdfa.html' - - 'http://edamontology.org/EDAM_1.18.owl' 's:author': - class: 's:Person' 's:email': 'mailto:inutano@gmail.com'