diff --git a/.gitignore b/.gitignore index 496ee2ca..b28de0b0 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,16 @@ -.DS_Store \ No newline at end of file +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc + +# PFR files +*.stdout +*.stderr + +# nf-test files +.nf-test/ +.nf-test.log \ No newline at end of file diff --git a/hic/FI1.hic b/hic/FI1.hic index 5b5315a9..a5b46ba9 100644 Binary files a/hic/FI1.hic and b/hic/FI1.hic differ diff --git a/hic/hicqc/SRR8238190.on.FI1_qc_report.pdf b/hic/hicqc/SRR8238190.on.FI1_qc_report.pdf new file mode 100644 index 00000000..26495928 Binary files /dev/null and b/hic/hicqc/SRR8238190.on.FI1_qc_report.pdf differ diff --git a/index.html b/index.html index 569e7287..caaa6621 100644 --- a/index.html +++ b/index.html @@ -214,6 +214,18 @@ .iframe-wrapper { text-align: center; + width: 90%; + margin-left: auto; + margin-right: auto; + margin-bottom: 32px; + } + + .iframe-wrapper-hic { + width: 700px; + height: 850px; + margin-left: auto; + margin-right: auto; + margin-bottom: 32px; } .tab { @@ -639,146 +651,211 @@
- AssemblyQC is a Nextflow pipeline which evaluates assembly quality with well established tools and presents - the results in a unified html report. -
-Reference:
-- Rashid, U., Wu, C., Shiller, J., Smith, K., Crowhurst, R., Davy, M., Chen, T.-H., Thomson, S., & Deng, C. - (2024). AssemblyQC: A NextFlow pipeline for evaluating assembly quality (1.4). Zenodo. - 10.5281/zenodo.10647870. GitHub. - https://github.com/Plant-Food-Research-Open/assemblyqc -
-+ AssemblyQC is a Nextflow pipeline which evaluates assembly quality with well established tools and presents + the results in a unified html report. +
+Reference:
++ Rashid, U., Wu, C., Shiller, J., Smith, K., Crowhurst, R., Davy, M., Chen, T.-H., Carvajal, I., Bailey, S., + Thomson, S., & Deng, C.H. + (2024). AssemblyQC: A Nextflow pipeline for reproducible reporting of assembly quality. Bioinformatics. + DOI 10.1093/bioinformatics/btae477. + GitHub https://github.com/Plant-Food-Research-Open/assemblyqc. +
+Only displaying parameters that differ from the pipeline defaults.
{
- "runName": "condescending_koch",
- "containerEngine": "apptainer",
- "launchDir": "/powerplant/workspace/hrauxr/assemblyqc",
- "workDir": "/powerplant/workspace/hrauxr/assemblyqc/work",
- "projectDir": "/powerplant/workspace/hrauxr/assemblyqc",
- "userName": "hrauxr",
- "profile": "pfr,apptainer,test_full",
- "input": "https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/assets/assemblysheet.csv",
+ "input": "https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/assets/assemblysheetv2.csv",
+ "outdir": "results",
+ "gfastats_skip": "false",
"ncbi_fcs_adaptor_skip": "false",
"ncbi_fcs_adaptor_empire": "euk",
"ncbi_fcs_gx_skip": "false",
"ncbi_fcs_gx_tax_id": "35717",
"ncbi_fcs_gx_db_path": "/workspace/ComparativeDataSources/NCBI/FCS/GX/r2023-01-24",
- "busco_skip": "false",
- "busco_mode": "geno",
- "busco_lineage_datasets": "fungi_odb10 hypocreales_odb10",
"tidk_skip": "false",
"tidk_repeat_seq": "TTTGGG",
+ "busco_skip": "false",
+ "busco_mode": "genome",
+ "busco_lineage_datasets": "fungi_odb10 hypocreales_odb10",
"lai_skip": "false",
"kraken2_skip": "false",
- "kraken2_db_path": "/workspace/ComparativeDataSources/kraken2db/k2_pluspfp_20230314",
- "hic": "test_data/SRR8238190_R{1,2}.fastq.gz",
+ "kraken2_db_path": "/workspace/ComparativeDataSources/kraken2db/k2_pluspfp_20240904",
+ "hic": "SRR8238190",
+ "merqury_skip": "false",
"synteny_skip": "false",
+ "synteny_mummer_skip": "false",
+ "synteny_plotsr_skip": "false",
"synteny_xref_assemblies": "https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/assets/xrefsheet.csv",
"config_profile_name": "Plant&Food profile",
- "config_profile_description": "Plant&Food profile using SLURM in combination with Apptainer"
+ "config_profile_description": "Plant&Food profile using SLURM in combination with Apptainer",
+ "runName": "disturbed_lamarck",
+ "containerEngine": "apptainer",
+ "launchDir": "/powerplant/workspace/hrauxr/assemblyqc",
+ "workDir": "/powerplant/workspace/hrauxr/assemblyqc/work",
+ "projectDir": "/powerplant/workspace/hrauxr/assemblyqc",
+ "userName": "hrauxr",
+ "profile": "pfr,apptainer,test_full"
}
Following is a non-exhaustive list of tools used to generate this report.
{
+ "FCS-adaptor": "0.5.0",
"KronaTools": "2.7.1",
"LTR_FINDER_parallel": "v1.1",
"LTR_HARVEST_parallel": "v1.1",
"LTR_retriever": "v2.9.9",
- "Nextflow": "23.04.4",
+ "Nextflow": "24.04.3",
"assemblathon_stats": "github/PlantandFoodResearch/assemblathon2-analysis/a93cba2",
- "av_screen_x": "0.4",
"awk": "1.3.4 20200120",
- "biopython": "1.75",
- "bundlelinks": "24Sep2013",
- "busco": "5.6.1",
- "bwa": "0.7.17-r1188",
+ "biopython": 1.75,
+ "busco": "5.7.1",
+ "bwa": "0.7.18-r1243-dirty",
"circos": "v0.69-8",
- "dnadiff": "1.3",
+ "curl": "8.5.0",
+ "dnadiff": 1.3,
"fastp": "0.23.4",
"fastqc": "0.12.1",
- "fcs_gx": "0.5",
+ "fcs_gx": "0.5.4",
"genometools": "1.6.5",
+ "gfastats": "1.3.6",
+ "gffread": "0.12.7",
"grep": "(GNU grep) 3.4",
- "gunzip": "1.10",
- "hic_qc.py": "0+unknown",
+ "gunzip": 1.1,
+ "hic_qc.py": "0+untagged.261.g6881c33",
"juicebox_scripts": "0.1.0",
"kraken2": "2.1.2",
"lai": "beta3.2",
"ltr_finder": "v1.07",
- "matlock": "20181227",
+ "matlock": 20181227,
+ "merqury": 1.3,
+ "meryl": "1.4.1",
+ "minimap2": "2.28-r1209",
"nucmer": "4.0.0rc1",
"pandas": "2.1.1",
"perl": "5.32.1",
- "pigz": "2.3.4",
- "plant-food-research-open/assemblyqc": "1.4",
- "py_fasta_validator": "0.6",
- "python": "3.8.13",
+ "pigz": 2.6,
+ "plant-food-research-open/assemblyqc": "v2.2.0",
+ "plotly": "5.20.0",
+ "plotsr": "1.1.1",
+ "py_fasta_validator": 0.6,
+ "python": "3.10.2",
"run-assembly-visualizer.sh": "18 July 2016",
"samblaster": "0.1.26",
- "samtools": "1.16.1",
+ "samtools": 1.21,
"sed": "(GNU sed) 4.7",
- "seqkit": "v2.6.1",
- "sort": "8.30",
+ "seqkit": "v2.8.0",
+ "sort": 8.3,
+ "sratools": "3.1.0",
+ "syri": "1.7.0",
"tidk": "0.2.41",
- "ubuntu": "20.04.6l",
- "yaml": "5.4.1"
+ "ubuntu": "20.04.6l"
}
FCS-adaptor detects adaptor and vector contamination in genome sequences.
@@ -786,22 +863,56 @@ -Version: 0.4
+Version: 0.5.0
Assembly | +Contaminated? | +
---|---|
+ FI1 + | ++ No + | +
No contamination detected.
FCS-GX detects contamination from foreign organisms in genome sequences.
Reference:
@@ -826,28 +941,60 @@ https://github.com/ncbi/fcs- Version: 0.5 + Version: 0.5.4
DB Version: 2023-01-24
- -Assembly | +Contaminated? | +
---|---|
+ FI1 + | ++ No + | +
A script to calculate a basic set of metrics from a genome assembly.
Reference:
@@ -980,7 +1131,151 @@A fast and exhaustive tool for summary statistics.
+Reference:
++ Giulio Formenti, Linelle Abueg, Angelo Brajuka, Nadolina Brajuka, Cristóbal Gallardo-Alba, Alice Giani, Olivier + Fedrigo, Erich D Jarvis, Gfastats: conversion, evaluation and manipulation of genome sequences using assembly + graphs, Bioinformatics, Volume 38, Issue 17, September 2022, Pages 4214–4216, + 10.1093/bioinformatics/btac460 +
++ Version: 1.3.6 +
+Stat | Value |
---|---|
Total scaffold length | 35023690 |
Average scaffold length | 4377961.25 |
Scaffold N50 | 6201951 |
Scaffold auN | 5781567.55 |
Scaffold L50 | 3 |
Largest scaffold | 7872678 |
Smallest scaffold | 52960 |
# contigs | 8 |
Total contig length | 35023690 |
Average contig length | 4377961.25 |
Contig N50 | 6201951 |
Contig auN | 5781567.55 |
Contig L50 | 3 |
Largest contig | 7872678 |
Smallest contig | 52960 |
# gaps in scaffolds | 0 |
Total gap length in scaffolds | 0 |
Average gap length in scaffolds | 0.00 |
Gap N50 in scaffolds | 0 |
Gap auN in scaffolds | 0.00 |
Gap L50 in scaffolds | 0 |
Largest gap in scaffolds | 0 |
Smallest gap in scaffolds | 0 |
Base composition (A:C:G:T) | 9857662:7662657:7645812:9857559 |
GC content % | 43.71 |
# soft-masked bases | 10431104 |
# segments | 8 |
Total segment length | 35023690 |
Average segment length | 4377961.25 |
# gaps | 0 |
# paths | 8 |
Scaffold N10 | 7872678 |
Scaffold N20 | 7872678 |
Scaffold N30 | 7605136 |
Scaffold N40 | 7605136 |
Scaffold N50 | 6201951 |
Scaffold N60 | 6201951 |
Scaffold N70 | 3434925 |
Scaffold N80 | 3417637 |
Scaffold N90 | 3252422 |
Scaffold N100 | 52960 |
Scaffold L10 | 1 |
Scaffold L20 | 1 |
Scaffold L30 | 2 |
Scaffold L40 | 2 |
Scaffold L50 | 3 |
Scaffold L60 | 3 |
Scaffold L70 | 4 |
Scaffold L80 | 5 |
Scaffold L90 | 6 |
Scaffold L100 | 8 |
Contig N10 | 7872678 |
Contig N20 | 7872678 |
Contig N30 | 7605136 |
Contig N40 | 7605136 |
Contig N50 | 6201951 |
Contig N60 | 6201951 |
Contig N70 | 3434925 |
Contig N80 | 3417637 |
Contig N90 | 3252422 |
Contig N100 | 52960 |
Contig L10 | 1 |
Contig L20 | 1 |
Contig L30 | 2 |
Contig L40 | 2 |
Contig L50 | 3 |
Contig L60 | 3 |
Contig L70 | 4 |
Contig L80 | 5 |
Contig L90 | 6 |
Contig L100 | 8 |
Gap N10 | 0 |
Gap N20 | 0 |
Gap N30 | 0 |
Gap N40 | 0 |
Gap N50 | 0 |
Gap N60 | 0 |
Gap N70 | 0 |
Gap N80 | 0 |
Gap N90 | 0 |
Gap N100 | 0 |
Gap L10 | 0 |
Gap L20 | 0 |
Gap L30 | 0 |
Gap L40 | 0 |
Gap L50 | 0 |
Gap L60 | 0 |
Gap L70 | 0 |
Gap L80 | 0 |
Gap L90 | 0 |
Gap L100 | 0 |
A tool to calculate a basic set of statistics about features contained in GFF3 files. @@ -1037,26 +1332,30 @@
BUSCO estimates the completeness and redundancy of processed genomic data based on universal single-copy @@ -1069,7 +1368,7 @@ viral genomes. arXiv:2106.11799 [q-bio] [Internet]. Available from: arxiv.org/abs/2106.11799
-Version: 5.6.1
+Version: 5.7.1
Dependency | Version |
---|---|
Dependency | Version |
hmmsearch | 3.1 |
bbtools | 39.01 |
metaeuk | 6.a5d39d9 |
hmmsearch | 3.1 |
bbtools | 39.01 |
metaeuk | 6.a5d39d9 |
python | sys.version_info(major=3, minor=7, micro=12, releaselevel='final', serial=0) |
Dependency | Version |
---|---|
Dependency | Version |
hmmsearch | 3.1 |
bbtools | 39.01 |
metaeuk | 6.a5d39d9 |
hmmsearch | 3.1 |
bbtools | 39.01 |
metaeuk | 6.a5d39d9 |
python | sys.version_info(major=3, minor=7, micro=12, releaselevel='final', serial=0) |
- A toolkit to identify and visualise telomeric repeats for the Darwin Tree of Life genomes. -
-Reference:
-- https://github.com/tolkit/telomeric-identifier -
-Version: 0.2.41
-- Searched sequence: - AACCCTAACCCTAACCCTAACCCT -
-+ BUSCO estimates the completeness and redundancy of processed genomic data based on universal single-copy + orthologs. GFFREAD is used to obtain protein sequences from assembly FASTA and annotation GFF3 files. +
+Reference:
++ Manni M., Berkeley M.R., Seppey M., Simao F.A., Zdobnov E.M. 2021. BUSCO update: novel and streamlined + workflows along with broader and deeper phylogenetic coverage for scoring of eukaryotic, prokaryotic, and + viral genomes. arXiv:2106.11799 [q-bio] [Internet]. Available from: + arxiv.org/abs/2106.11799 +
++ Pertea G, Pertea M. GFF Utilities: GffRead and GffCompare. F1000Res. 2020 Apr 28;9:ISCB Comm J-304. doi: 10.12688/f1000research.23297.2. PMID: + 32489650; PMCID: PMC7222033. +
+Version: 5.7.1 (BUSCO), 0.12.7 (GFFREAD)
+Annotation | +Lineage | +Percentages | +
---|---|---|
+ FI1 + | ++ fungi_odb10 + | +C:89.9%[S:89.4%,D:0.5%],F:0.8%,M:9.3%,n:758 | +
+ FI1 + | ++ hypocreales_odb10 + | +C:87.5%[S:87.4%,D:0.1%],F:0.6%,M:11.9%,n:4494 | +
Event | +Value | +
---|---|
Search Percentages | +C:89.9%[S:89.4%,D:0.5%],F:0.8%,M:9.3%,n:758 | +
Event | Frequency |
---|---|
Complete BUSCOs (C) | 682 |
Complete and single-copy BUSCOs (S) | 678 |
Complete and duplicated BUSCOs (D) | 4 |
Fragmented BUSCOs (F) | 6 |
Missing BUSCOs (M) | 70 |
Total BUSCO groups searched | 758 |
Parameter | +Value | +
---|---|
Version | +5.7.1 | +
Lineage create on | +2024-01-08 | +
mode | +proteins | +
predictor | +None | +
Dependency | Version |
---|---|
hmmsearch | 3.1 |
python | sys.version_info(major=3, minor=7, micro=12, releaselevel='final', serial=0) |
Event | +Value | +
---|---|
Search Percentages | +C:87.5%[S:87.4%,D:0.1%],F:0.6%,M:11.9%,n:4494 | +
Event | Frequency |
---|---|
Complete BUSCOs (C) | 3930 |
Complete and single-copy BUSCOs (S) | 3926 |
Complete and duplicated BUSCOs (D) | 4 |
Fragmented BUSCOs (F) | 27 |
Missing BUSCOs (M) | 537 |
Total BUSCO groups searched | 4494 |
Parameter | +Value | +
---|---|
Version | +5.7.1 | +
Lineage create on | +2024-01-08 | +
mode | +proteins | +
predictor | +None | +
Dependency | Version |
---|---|
hmmsearch | 3.1 |
python | sys.version_info(major=3, minor=7, micro=12, releaselevel='final', serial=0) |
+ A toolkit to identify and visualise telomeric repeats for the Darwin Tree of Life genomes. +
+Reference:
++ https://github.com/tolkit/telomeric-identifier +
+Version: 0.2.41
++ Searched sequence: + AACCCTAACCCTAACCCTAACCCT +
+LTR Assembly Index (LAI) is a reference-free genome metric that evaluates assembly continuity using LTR-RTs. @@ -1408,7 +1997,9 @@ Nucleic Acids Research, Volume 46, Issue 21, 30 November 2018, Page e126, 10.1093/nar/gky730
+Version: beta3.2
+Kraken2 assigns taxonomic labels to sequencing reads for metagenomics projects. @@ -1487,23 +2082,54 @@
- Hi-C contact mapping experiments measure the frequency of physical contact between loci in the genome. The - resulting dataset, called a “contact map,” is represented using a two-dimensional heatmap where the - intensity of each pixel indicates the frequency of contact between a pair of loci. -
-Reference:
-- Robinson JT, Turner D, Durand NC, Thorvaldsdóttir H, Mesirov JP, Aiden EL. Juicebox.js Provides a - Cloud-Based Visualization System for Hi-C Data. Cell Syst. 2018 Feb 28;6(2):256-258.e1. - 10.1016/j.cels.2018.01.001. Epub - 2018 Feb 7. PMID: 29428417; PMCID: PMC6047755. -
-Version: 2.4.3
-+ Hi-C contact mapping experiments measure the frequency of physical contact between loci in the genome. The + resulting dataset, called a “contact map,” is represented using a two-dimensional heatmap where the + intensity of each pixel indicates the frequency of contact between a pair of loci. +
+References:
+ ++ fastp Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics, + Volume 34, Issue 17, September 2018, Pages i884–i890, 10.1093/bioinformatics/bty560 +
+ ++ BWA Li, H. (2013). Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv preprint arXiv: 1303.3997. +
+ ++ SAMBLASTER Gregory G. Faust, Ira M. Hall, SAMBLASTER: fast duplicate marking and structural variant read extraction, + Bioinformatics, Volume 30, Issue 17, September 2014, Pages 2503–2505, 10.1093/bioinformatics/btu314 +
+ ++ SAMtools Petr Danecek, James K Bonfield, Jennifer Liddle, John Marshall, Valeriu Ohan, Martin O Pollard, Andrew Whitwham, + Thomas Keane, Shane A McCarthy, Robert M Davies, Heng Li, Twelve years of SAMtools and BCFtools, GigaScience, + Volume + 10, Issue 2, February 2021, giab008, 10.1093/gigascience/giab008 +
+ ++ Juicebox.js Robinson JT, Turner D, Durand NC, Thorvaldsdóttir H, Mesirov JP, Aiden EL. Juicebox.js Provides a + Cloud-Based Visualization System for Hi-C Data. Cell Syst. 2018 Feb 28;6(2):256-258.e1. + 10.1016/j.cels.2018.01.001. Epub + 2018 Feb 7. PMID: 29428417; PMCID: PMC6047755. +
+ +Version: 2.4.3
+Sequence labels and lengths
Sequence | Length |
---|---|
>CP031386.1 | 7605136 |
>CP031387.1 | 6201951 |
>CP031388.1 | 3252422 |
>CP031389.1 | 3434925 |
>CP031390.1 | 3417637 |
>CP031391.1 | 3185981 |
>CP031392.1 | 52960 |
HiC QC report
+fastp log
++Detecting adapter sequence for read1... +>Illumina TruSeq Adapter Read 1 +AGATCGGAAGAGCACACGTCTGAACTCCAGTCA + +Detecting adapter sequence for read2... +>Illumina TruSeq Adapter Read 2 +AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT + +Read1 before filtering: +total reads: 26408294 +total bases: 2112663520 +Q20 bases: 2033771115(96.2657%) +Q30 bases: 2001190936(94.7236%) + +Read2 before filtering: +total reads: 26408294 +total bases: 2112663520 +Q20 bases: 1978972764(93.6719%) +Q30 bases: 1933151999(91.5031%) + +Read1 after filtering: +total reads: 25405615 +total bases: 2031716126 +Q20 bases: 1966130766(96.7719%) +Q30 bases: 1937083561(95.3422%) + +Read2 after filtering: +total reads: 25405615 +total bases: 2031715879 +Q20 bases: 1935938955(95.2859%) +Q30 bases: 1897487240(93.3933%) + +Filtering result: +reads passed filter: 50811230 +reads failed due to low quality: 1584466 +reads failed due to too many N: 22150 +reads failed due to too short: 398742 +reads with adapter trimmed: 723915 +bases trimmed due to adapters: 37726206 + +Duplication rate: 7.78525% + +Insert size peak (evaluated by paired-end reads): 129 + +JSON report: SRR8238190.fastp.json +HTML report: SRR8238190.fastp.html + +fastp --in1 SRR8238190_1.fastq.gz --in2 SRR8238190_2.fastq.gz --out1 SRR8238190_1.fastp.fastq.gz --out2 SRR8238190_2.fastp.fastq.gz --json SRR8238190.fastp.json --html SRR8238190.fastp.html --failed_out SRR8238190.paired.fail.fastq.gz --unpaired1 SRR8238190_1.fail.fastq.gz --unpaired2 SRR8238190_2.fail.fastq.gz --thread 6 --detect_adapter_for_pe --qualified_quality_phred 20 --length_required 50 +fastp v0.23.4, time used: 104 seconds + ++
Circos facilitates the identification and analysis of similarities and differences arising from comparisons @@ -1557,7 +2267,7 @@ bundled together.
The genome-wide alignments are performed with MUMMER.
+References:
++ Krzywinski, M., Schein, J., Birol, I., Connors, J., Gascoyne, R., Horsman, D., ... & Marra, M. A. (2009). + Circos: an information aesthetic for comparative genomics. Genome research, 19(9), 1639-1645. + https://doi.org/10.1101/gr.092759.109 +
++ Version: 4.0.0rc1 (MUMMER) +
+Notes:
++ Plotsr generates high-quality visualisation of synteny and structural rearrangements between multiple genomes. For + this, it uses the genomic structural annotations between multiple chromosome-level assemblies. The genome-wide + alignments are performed with Minimap2. +
+References:
++ Goel M, Schneeberger K. 2022. plotsr: visualizing structural similarities and rearrangements between multiple + genomes. Bioinformatics. 2022 May 13;38(10):2922-2926. doi: 10.1093/bioinformatics/btac196. + PMID: 35561173; PMCID: PMC9113368. +
++ Goel M, Sun H, Jiao WB, Schneeberger K. 2019. SyRI: finding genomic rearrangements and local sequence differences + from whole-genome assemblies. Genome Biol. 2019 Dec 16;20(1):277. doi: 10.1186/s13059-019-1911-0. PMID: 31842948; + PMCID: PMC6913012. +
+ ++ Li H. 2021. New strategies to improve minimap2 alignment accuracy, Bioinformatics, Volume 37, Issue 23, December + 2021, Pages 4572–4574, doi: 10.1093/bioinformatics/btab705 +
+ ++ Versions: 1.1.1 + (PLOTSR), 1.7.0 (SYRI), 2.28-r1209 (MINIMAP2) +
++ Note: Syri failed to detect structural rearrangements for following comparisons: TT_2021a with reference to JAD. This may be due to known Syri limitations. See: GitHub/Syri/Limitations +
Sequence labels
+Labels | JAD | TT_2021a | FI1 |
---|---|---|---|
Chr1 | JADWOS010000003.1 | CP083245.1 | CP031385.1 |
Chr2 | JADWOS010000004.1 | CP083246.1 | CP031386.1 |
Chr3 | JADWOS010000005.1 | CP083247.1 | CP031387.1 |
Chr4 | JADWOS010000006.1 | CP083248.1 | CP031388.1 |
Chr5 | JADWOS010000007.1 | CP083249.1 | CP031389.1 |
Chr6 | JADWOS010000008.1 | CP083250.1 | CP031390.1 |
Chr7 | JADWOS010000009.1 | CP083251.1 | CP031391.1 |
Often, genome assembly projects have illumina whole genome sequencing reads available for + the assembled individual. The k-mer spectrum of this read set can be used for independently evaluating assembly + quality without the need of a high quality reference. Merqury provides a set of tools for this purpose.
+References:
++ Rhie, A., Walenz, B.P., Koren, S. et al. Merqury: reference-free quality, completeness, and phasing assessment for + genome assemblies. Genome Biol 21, 245 (2020). doi: 10.1186/s13059-020-02134-9 +
++ Version: 1.3 +
+Completeness stats
+Assembly | Region | Found | Total | % Covered |
---|---|---|---|---|
FI1 | all | 26653235 | 26743412 | 99.6628 |
Consensus quality QV stats
+Assembly | No Support | Total | QV | Error % |
---|---|---|---|---|
FI1 | 3468 | 35023530 | 53.2648 | 4.71542e-06 |
Spectra-asm
+FI1 spectra-cn
+