diff --git a/.gitignore b/.gitignore index c43601dd..d3523a85 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,46 @@ +# Ignore Nextflow-specific files, including logs, cache, and temporary files .nextflow* + +# Ignore Nextflow's work directory where intermediate files are stored work/ -data/ + +# Ignore results directory, as this is typically generated output that can be large and recreated results/ + +# MacOS specific hidden file that stores folder view settings .DS_Store + testing/ testing* -*.pyc + dbs/ + node_modules/ + +# Optional: ignore any temporary files created by Python or text editors +__pycache__/ # Python cache directory +*.pyc +*.pyo # Python optimized bytecode files +*.pkl # Pickle files (often generated during data processing) + +# Ignore any virtual environment directories used to isolate Python dependencies +venv/ +env/ +*.venv/ + +# Ignore Jupyter Notebook checkpoints, if notebooks are used for analysis or reporting +.ipynb_checkpoints/ + +# Ignore any temporary, swap, or backup files created by editors like Vim or Emacs +*~ +*.swp +*.swo +*.bak + +.coverage +htmlcov/ +*.cover +reports/ +trace/ +.cache/ +logs/ diff --git a/tests/scripts/data/deduplication_script_test_input.gff b/tests/scripts/data/deduplication_script_test_input.gff new file mode 100644 index 00000000..56e37ac2 --- /dev/null +++ b/tests/scripts/data/deduplication_script_test_input.gff @@ -0,0 +1,116 @@ +##gff-version 3 +#fucP - BACUNI_03973 in reference has no gene name; BACUNI_03974 in reference has a gene name +contig_4 Prodigal:002006 gene 27409 27582 . + 0 ID=BU_ATCC8492_00038;Name=fucP_1;gene=fucP_1;locus_tag=BU_ATCC8492_00038;product=hypothetical protein;product_source=Prokka;Alias=BACUNI_03973;extra_copy_number=0 +contig_4 Prodigal:002006 mRNA 27409 27582 . + 0 ID=transcript:BU_ATCC8492_00038;Name=fucP_1;gene=fucP_1;locus_tag=BU_ATCC8492_00038;product=hypothetical protein;product_source=Prokka;Parent=BU_ATCC8492_00038 +contig_4 Prodigal:002006 exon 27409 27582 . + 0 ID=exon:BU_ATCC8492_00038;Name=fucP_1;gene=fucP_1;locus_tag=BU_ATCC8492_00038;product=hypothetical protein;product_source=Prokka;Parent=transcript:BU_ATCC8492_00038 +contig_4 Prodigal:002006 CDS 27409 27582 . + 0 ID=CDS:BU_ATCC8492_00038;Name=fucP_1;gene=fucP_1;locus_tag=BU_ATCC8492_00038;product=hypothetical protein;product_source=Prokka;Parent=transcript:BU_ATCC8492_00038 +contig_4 Prodigal:002006 gene 27622 30429 . + 0 ID=BU_ATCC8492_00039;Name=fucP_2;gene=fucP_2;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;Alias=BACUNI_03974;extra_copy_number=0 +contig_4 Prodigal:002006 mRNA 27622 30429 . + 0 ID=transcript:BU_ATCC8492_00039;Name=fucP_2;gene=fucP_2;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;Parent=BU_ATCC8492_00039 +contig_4 Prodigal:002006 exon 27622 30429 . + 0 ID=exon:BU_ATCC8492_00039;Name=fucP_2;gene=fucP_2;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;Parent=transcript:BU_ATCC8492_00039 +contig_4 Prodigal:002006 CDS 27622 30429 . + 0 ID=CDS:BU_ATCC8492_00039;Name=fucP_2;gene=fucP_2;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;Parent=transcript:BU_ATCC8492_00039 +#dnaG - one copy has an alias (present in reference), the other one doesn't have an alias +contig_4 Prodigal:002006 gene 30597 32888 . + 0 ID=BU_ATCC8492_00040;eC_number=3.2.1.-;Name=dnaG_1;gene=dnaG_1;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;Alias=BACUNI_01111;extra_copy_number=0 +contig_4 Prodigal:002006 mRNA 30597 32888 . + 0 ID=transcript:BU_ATCC8492_00040;eC_number=3.2.1.-;Name=dnaG_1;gene=dnaG_1;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;Parent=BU_ATCC8492_00040 +contig_4 Prodigal:002006 exon 30597 32888 . + 0 ID=exon:BU_ATCC8492_00040;eC_number=3.2.1.-;Name=dnaG_1;gene=dnaG_1;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;Parent=transcript:BU_ATCC8492_00040 +contig_4 Prodigal:002006 CDS 30597 32888 . + 0 ID=CDS:BU_ATCC8492_00040;eC_number=3.2.1.-;Name=dnaG_1;gene=dnaG_1;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;Parent=transcript:BU_ATCC8492_00040 +contig_4 Prodigal:002006 gene 30597 32888 . + 0 ID=BU_ATCC8492_00040;eC_number=3.2.1.-;Name=dnaG_2;gene=dnaG_2;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;extra_copy_number=0 +contig_4 Prodigal:002006 mRNA 30597 32888 . + 0 ID=transcript:BU_ATCC8492_00040;eC_number=3.2.1.-;Name=dnaG_2;gene=dnaG_2;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;Parent=BU_ATCC8492_00040 +contig_4 Prodigal:002006 exon 30597 32888 . + 0 ID=exon:BU_ATCC8492_00040;eC_number=3.2.1.-;Name=dnaG_2;gene=dnaG_2;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;Parent=transcript:BU_ATCC8492_00040 +contig_4 Prodigal:002006 CDS 30597 32888 . + 0 ID=CDS:BU_ATCC8492_00040;eC_number=3.2.1.-;Name=dnaG_2;gene=dnaG_2;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;Parent=transcript:BU_ATCC8492_00040 +#dnaA - copy 1 has no alias, alias of copy 2 has a unique gene name, alias of copy 3 has a different unique gene name +contig_4 Prodigal:002006 gene 27409 27582 . + 0 ID=BU_ATCC8492_00138;Name=dnaA_1;gene=dnaA_1;locus_tag=BU_ATCC8492_00038;product=hypothetical protein;product_source=Prokka +contig_4 Prodigal:002006 mRNA 27409 27582 . + 0 ID=transcript:BU_ATCC8492_00138;Name=dnaA_1;gene=dnaA_1;locus_tag=BU_ATCC8492_00038;product=hypothetical protein;product_source=Prokka;Parent=BU_ATCC8492_00038 +contig_4 Prodigal:002006 exon 27409 27582 . + 0 ID=exon:BU_ATCC8492_00138;Name=dnaA_1;gene=dnaA_1;locus_tag=BU_ATCC8492_00038;product=hypothetical protein;product_source=Prokka;Parent=transcript:BU_ATCC8492_00038 +contig_4 Prodigal:002006 CDS 27409 27582 . + 0 ID=CDS:BU_ATCC8492_00138;Name=dnaA_1;gene=dnaA_1;locus_tag=BU_ATCC8492_00038;product=hypothetical protein;product_source=Prokka;Parent=transcript:BU_ATCC8492_00038 +contig_4 Prodigal:002006 gene 27622 30429 . + 0 ID=BU_ATCC8492_00139;Name=dnaA_2;gene=dnaA_2;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;Alias=BACUNI_13974;extra_copy_number=0 +contig_4 Prodigal:002006 mRNA 27622 30429 . + 0 ID=transcript:BU_ATCC8492_00139;Name=dnaA_2;gene=dnaA_2;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;Parent=BU_ATCC8492_00039 +contig_4 Prodigal:002006 exon 27622 30429 . + 0 ID=exon:BU_ATCC8492_00139;Name=dnaA_2;gene=dnaA_2;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;Parent=transcript:BU_ATCC8492_00039 +contig_4 Prodigal:002006 CDS 27622 30429 . + 0 ID=CDS:BU_ATCC8492_00139;Name=dnaA_2;gene=dnaA_2;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;Parent=transcript:BU_ATCC8492_00039 +contig_4 Prodigal:002006 gene 30597 32888 . + 0 ID=BU_ATCC8492_00140;eC_number=3.2.1.-;Name=dnaA_3;gene=dnaA_3;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;Alias=BACUNI_13973;extra_copy_number=0 +contig_4 Prodigal:002006 mRNA 30597 32888 . + 0 ID=transcript:BU_ATCC8492_00140;eC_number=3.2.1.-;Name=dnaA_3;gene=dnaA_3;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;Parent=BU_ATCC8492_00040 +contig_4 Prodigal:002006 exon 30597 32888 . + 0 ID=exon:BU_ATCC8492_00140;eC_number=3.2.1.-;Name=dnaA_3;gene=dnaA_3;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;Parent=transcript:BU_ATCC8492_00040 +contig_4 Prodigal:002006 CDS 30597 32888 . + 0 ID=CDS:BU_ATCC8492_00140;eC_number=3.2.1.-;Name=dnaA_3;gene=dnaA_3;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;Parent=transcript:BU_ATCC8492_00040 +#lacZ - both aliases in the reference are also lacZ +contig_4 Prodigal:002006 gene 33041 35692 . + 0 ID=BU_ATCC8492_00041;eC_number=3.2.1.23;Name=lacZ_1;gene=lacZ_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_01687;locus_tag=BU_ATCC8492_00041;product=Beta-galactosidase;product_source=Prokka;eggNOG=411479.BACUNI_03971;Alias=BACUNI_03971;extra_copy_number=0 +contig_4 Prodigal:002006 mRNA 33041 35692 . + 0 ID=transcript:BU_ATCC8492_00041;eC_number=3.2.1.23;Name=lacZ_1;gene=lacZ_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_01687;locus_tag=BU_ATCC8492_00041;product=Beta-galactosidase;product_source=Prokka;eggNOG=411479.BACUNI_03971;Parent=BU_ATCC8492_00041 +contig_4 Prodigal:002006 exon 33041 35692 . + 0 ID=exon:BU_ATCC8492_00041;eC_number=3.2.1.23;Name=lacZ_1;gene=lacZ_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_01687;locus_tag=BU_ATCC8492_00041;product=Beta-galactosidase;product_source=Prokka;eggNOG=411479.BACUNI_03971;Parent=transcript:BU_ATCC8492_00041 +contig_4 Prodigal:002006 CDS 33041 35692 . + 0 ID=CDS:BU_ATCC8492_00041;eC_number=3.2.1.23;Name=lacZ_1;gene=lacZ_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_01687;locus_tag=BU_ATCC8492_00041;product=Beta-galactosidase;product_source=Prokka;eggNOG=411479.BACUNI_03971;Parent=transcript:BU_ATCC8492_00041 +contig_4 Prodigal:002006 gene 35740 37305 . + 0 ID=BU_ATCC8492_00042;eC_number=3.1.6.-;Name=lacZ_2;gene=lacZ_2;Dbxref=COG:COG3119,UniProt:A7V8Q1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8A2H2;locus_tag=BU_ATCC8492_00042;product=N-acetylgalactosamine-6-O-sulfatase;product_source=Prokka;eggNOG=411479.BACUNI_03970;Alias=BACUNI_03970;extra_copy_number=0 +contig_4 Prodigal:002006 mRNA 35740 37305 . + 0 ID=transcript:BU_ATCC8492_00042;eC_number=3.1.6.-;Name=lacZ_2;gene=lacZ_2;Dbxref=COG:COG3119,UniProt:A7V8Q1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8A2H2;locus_tag=BU_ATCC8492_00042;product=N-acetylgalactosamine-6-O-sulfatase;product_source=Prokka;eggNOG=411479.BACUNI_03970;Parent=BU_ATCC8492_00042 +contig_4 Prodigal:002006 exon 35740 37305 . + 0 ID=exon:BU_ATCC8492_00042;eC_number=3.1.6.-;Name=lacZ_2;gene=lacZ_2;Dbxref=COG:COG3119,UniProt:A7V8Q1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8A2H2;locus_tag=BU_ATCC8492_00042;product=N-acetylgalactosamine-6-O-sulfatase;product_source=Prokka;eggNOG=411479.BACUNI_03970;Parent=transcript:BU_ATCC8492_00042 +contig_4 Prodigal:002006 CDS 35740 37305 . + 0 ID=CDS:BU_ATCC8492_00042;eC_number=3.1.6.-;Name=lacZ_2;gene=lacZ_2;Dbxref=COG:COG3119,UniProt:A7V8Q1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8A2H2;locus_tag=BU_ATCC8492_00042;product=N-acetylgalactosamine-6-O-sulfatase;product_source=Prokka;eggNOG=411479.BACUNI_03970;Parent=transcript:BU_ATCC8492_00042 +#rcsC +contig_4 Prodigal:002006 gene 37353 38537 . + 0 ID=BU_ATCC8492_00043;locus_tag=BU_ATCC8492_00043;Name=rcsC_1;gene=rcsC_1;product=Protein of unknown function DUF2961;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03969;Alias=BACUNI_03969;extra_copy_number=0 +contig_4 Prodigal:002006 mRNA 37353 38537 . + 0 ID=transcript:BU_ATCC8492_00043;locus_tag=BU_ATCC8492_00043;Name=rcsC_1;gene=rcsC_1;product=Protein of unknown function DUF2961;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03969;Parent=BU_ATCC8492_00043 +contig_4 Prodigal:002006 exon 37353 38537 . + 0 ID=exon:BU_ATCC8492_00043;locus_tag=BU_ATCC8492_00043;Name=rcsC_1;gene=rcsC_1;product=Protein of unknown function DUF2961;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03969;Parent=transcript:BU_ATCC8492_00043 +contig_4 Prodigal:002006 CDS 37353 38537 . + 0 ID=CDS:BU_ATCC8492_00043;locus_tag=BU_ATCC8492_00043;Name=rcsC_1;gene=rcsC_1;product=Protein of unknown function DUF2961;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03969;Parent=transcript:BU_ATCC8492_00043 +contig_4 Prodigal:002006 gene 42873 45848 . + 0 ID=BU_ATCC8492_00047;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KPJ3;Name=rcsC_2;gene=rcsC_2;locus_tag=BU_ATCC8492_00047;product=TonB-dependent receptor P3;product_source=Prokka;eggNOG=411479.BACUNI_03965;Alias=BACUNI_03965;extra_copy_number=0 +contig_4 Prodigal:002006 mRNA 42873 45848 . + 0 ID=transcript:BU_ATCC8492_00047;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KPJ3;Name=rcsC_2;gene=rcsC_2;locus_tag=BU_ATCC8492_00047;product=TonB-dependent receptor P3;product_source=Prokka;eggNOG=411479.BACUNI_03965;Parent=BU_ATCC8492_00047 +contig_4 Prodigal:002006 exon 42873 45848 . + 0 ID=exon:BU_ATCC8492_00047;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KPJ3;Name=rcsC_2;gene=rcsC_2;locus_tag=BU_ATCC8492_00047;product=TonB-dependent receptor P3;product_source=Prokka;eggNOG=411479.BACUNI_03965;Parent=transcript:BU_ATCC8492_00047 +contig_4 Prodigal:002006 CDS 42873 45848 . + 0 ID=CDS:BU_ATCC8492_00047;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KPJ3;Name=rcsC_2;gene=rcsC_2;locus_tag=BU_ATCC8492_00047;product=TonB-dependent receptor P3;product_source=Prokka;eggNOG=411479.BACUNI_03965;Parent=transcript:BU_ATCC8492_00047 +contig_4 Prodigal:002006 gene 42873 45848 . + 0 ID=BU_ATCC8492_03960;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KPJ3;Name=rcsB;gene=rcsB;locus_tag=BU_ATCC8492_00047;product=TonB-dependent receptor P3;product_source=Prokka;eggNOG=411479.BACUNI_03965 +contig_4 Prodigal:002006 mRNA 42873 45848 . + 0 ID=transcript:BU_ATCC8492_03960;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KPJ3;Name=rcsB;gene=rcsB;locus_tag=BU_ATCC8492_00047;product=TonB-dependent receptor P3;product_source=Prokka;eggNOG=411479.BACUNI_03965;Parent=BU_ATCC8492_03960 +contig_4 Prodigal:002006 exon 42873 45848 . + 0 ID=exon:BU_ATCC8492_03960;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KPJ3;Name=rcsB;gene=rcsB;locus_tag=BU_ATCC8492_00047;product=TonB-dependent receptor P3;product_source=Prokka;eggNOG=411479.BACUNI_03965;Parent=transcript:BU_ATCC8492_03960 +contig_4 Prodigal:002006 CDS 42873 45848 . + 0 ID=CDS:BU_ATCC8492_03960;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KPJ3;Name=rcsB;gene=rcsB;locus_tag=BU_ATCC8492_00047;product=TonB-dependent receptor P3;product_source=Prokka;eggNOG=411479.BACUNI_03965;Parent=transcript:BU_ATCC8492_03960 +#susC +contig_4 Prodigal:002006 gene 45874 47382 . + 0 ID=BU_ATCC8492_11148;locus_tag=BU_ATCC8492_11148;Name=susC_1;gene=susC_1;product=SusD-like protein P2;product_source=Prokka;eggNOG=411479.BACUNI_03964;Alias=BACUNI_93964;extra_copy_number=0 +contig_4 Prodigal:002006 mRNA 45874 47382 . + 0 ID=transcript:BU_ATCC8492_11148;locus_tag=BU_ATCC8492_11148;Name=susC_1;gene=susC_1;product=SusD-like protein P2;product_source=Prokka;eggNOG=411479.BACUNI_03964;Parent=BU_ATCC8492_11148 +contig_4 Prodigal:002006 exon 45874 47382 . + 0 ID=exon:BU_ATCC8492_11148;locus_tag=BU_ATCC8492_11148;Name=susC_1;gene=susC_1;product=SusD-like protein P2;product_source=Prokka;eggNOG=411479.BACUNI_03964;Parent=transcript:BU_ATCC8492_11148 +contig_4 Prodigal:002006 CDS 45874 47382 . + 0 ID=CDS:BU_ATCC8492_11148;locus_tag=BU_ATCC8492_11148;Name=susC_1;gene=susC_1;product=SusD-like protein P2;product_source=Prokka;eggNOG=411479.BACUNI_03964;Parent=transcript:BU_ATCC8492_11148 +contig_4 Prodigal:002006 gene 47450 48781 . + 0 ID=BU_ATCC8492_11149;locus_tag=BU_ATCC8492_11149;Name=susC_2;gene=susC_2;product=Uncharacterised conserved protein UCP028431;product_source=InterPro(PIRSF);eggNOG=411479.BACUNI_03963 +contig_4 Prodigal:002006 mRNA 47450 48781 . + 0 ID=transcript:BU_ATCC8492_11149;locus_tag=BU_ATCC8492_11149;Name=susC_2;gene=susC_2;product=Uncharacterised conserved protein UCP028431;product_source=InterPro(PIRSF);eggNOG=411479.BACUNI_03963;Parent=BU_ATCC8492_11149 +contig_4 Prodigal:002006 exon 47450 48781 . + 0 ID=exon:BU_ATCC8492_11149;locus_tag=BU_ATCC8492_11149;Name=susC_2;gene=susC_2;product=Uncharacterised conserved protein UCP028431;product_source=InterPro(PIRSF);eggNOG=411479.BACUNI_03963;Parent=transcript:BU_ATCC8492_11149 +contig_4 Prodigal:002006 CDS 47450 48781 . + 0 ID=CDS:BU_ATCC8492_11149;locus_tag=BU_ATCC8492_11149;Name=susC_2;gene=susC_2;product=Uncharacterised conserved protein UCP028431;product_source=InterPro(PIRSF);eggNOG=411479.BACUNI_03963;Parent=transcript:BU_ATCC8492_11149 +#susD +contig_4 Prodigal:002006 gene 45874 47382 . + 0 ID=BU_ATCC8492_00048;locus_tag=BU_ATCC8492_00048;Name=susD_1;gene=susD_1;product=SusD-like protein P2;product_source=Prokka;eggNOG=411479.BACUNI_03964;Alias=BACUNI_93964;extra_copy_number=0 +contig_4 Prodigal:002006 mRNA 45874 47382 . + 0 ID=transcript:BU_ATCC8492_00048;locus_tag=BU_ATCC8492_00048;Name=susD_1;gene=susD_1;product=SusD-like protein P2;product_source=Prokka;eggNOG=411479.BACUNI_03964;Parent=BU_ATCC8492_00048 +contig_4 Prodigal:002006 exon 45874 47382 . + 0 ID=exon:BU_ATCC8492_00048;locus_tag=BU_ATCC8492_00048;Name=susD_1;gene=susD_1;product=SusD-like protein P2;product_source=Prokka;eggNOG=411479.BACUNI_03964;Parent=transcript:BU_ATCC8492_00048 +contig_4 Prodigal:002006 CDS 45874 47382 . + 0 ID=CDS:BU_ATCC8492_00048;locus_tag=BU_ATCC8492_00048;Name=susD_1;gene=susD_1;product=SusD-like protein P2;product_source=Prokka;eggNOG=411479.BACUNI_03964;Parent=transcript:BU_ATCC8492_00048 +contig_4 Prodigal:002006 gene 47450 48781 . + 0 ID=BU_ATCC8492_00049;locus_tag=BU_ATCC8492_00049;Name=susD_2;gene=susD_2;product=Uncharacterised conserved protein UCP028431;product_source=InterPro(PIRSF);eggNOG=411479.BACUNI_03963 +contig_4 Prodigal:002006 mRNA 47450 48781 . + 0 ID=transcript:BU_ATCC8492_00049;locus_tag=BU_ATCC8492_00049;Name=susD_2;gene=susD_2;product=Uncharacterised conserved protein UCP028431;product_source=InterPro(PIRSF);eggNOG=411479.BACUNI_03963;Parent=BU_ATCC8492_00049 +contig_4 Prodigal:002006 exon 47450 48781 . + 0 ID=exon:BU_ATCC8492_00049;locus_tag=BU_ATCC8492_00049;Name=susD_2;gene=susD_2;product=Uncharacterised conserved protein UCP028431;product_source=InterPro(PIRSF);eggNOG=411479.BACUNI_03963;Parent=transcript:BU_ATCC8492_00049 +contig_4 Prodigal:002006 CDS 47450 48781 . + 0 ID=CDS:BU_ATCC8492_00049;locus_tag=BU_ATCC8492_00049;Name=susD_2;gene=susD_2;product=Uncharacterised conserved protein UCP028431;product_source=InterPro(PIRSF);eggNOG=411479.BACUNI_03963;Parent=transcript:BU_ATCC8492_00049 +contig_4 Prodigal:002006 gene 48844 49695 . + 0 ID=BU_ATCC8492_00050;locus_tag=BU_ATCC8492_00050;Name=susD_3;gene=susD_3;product=Endonuclease/exonuclease/phosphatase;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03962 +contig_4 Prodigal:002006 mRNA 48844 49695 . + 0 ID=transcript:BU_ATCC8492_00050;locus_tag=BU_ATCC8492_00050;Name=susD_3;gene=susD_3;product=Endonuclease/exonuclease/phosphatase;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03962;Parent=BU_ATCC8492_00050 +contig_4 Prodigal:002006 exon 48844 49695 . + 0 ID=exon:BU_ATCC8492_00050;locus_tag=BU_ATCC8492_00050;Name=susD_3;gene=susD_3;product=Endonuclease/exonuclease/phosphatase;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03962;Parent=transcript:BU_ATCC8492_00050 +contig_4 Prodigal:002006 CDS 48844 49695 . + 0 ID=CDS:BU_ATCC8492_00050;locus_tag=BU_ATCC8492_00050;Name=susD_3;gene=susD_3;product=Endonuclease/exonuclease/phosphatase;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03962;Parent=transcript:BU_ATCC8492_00050 +#axe7A +contig_1 Prodigal:002006 gene 275640 276932 . + 0 ID=BU_ATCC8492_00246;eC_number=3.1.1.-;Name=axe7A_1;Dbxref=COG:COG3458,UniProt:A7V6D1;gene=axe7A_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:D5EXI2;locus_tag=BU_ATCC8492_00246;product=Acetyl esterase Axe7A;product_source=Prokka;eggNOG=411479.BACUNI_03144;cog=Q;pfam=PF05448;interpro=IPR008391,IPR029058,IPR039069;dbcan_prot_type=CAZyme;dbcan_prot_family=CE7_e10|CE7;substrate_dbcan-pul=pectin;substrate_dbcan-sub=N/A;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Alias=BACUNI_03144;extra_copy_number=0 +contig_1 Prodigal:002006 mRNA 275640 276932 . + 0 ID=transcript:BU_ATCC8492_00246;eC_number=3.1.1.-;Name=axe7A_1;Dbxref=COG:COG3458,UniProt:A7V6D1;gene=axe7A_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:D5EXI2;locus_tag=BU_ATCC8492_00246;product=Acetyl esterase Axe7A;product_source=Prokka;eggNOG=411479.BACUNI_03144;cog=Q;pfam=PF05448;interpro=IPR008391,IPR029058,IPR039069;dbcan_prot_type=CAZyme;dbcan_prot_family=CE7_e10|CE7;substrate_dbcan-pul=pectin;substrate_dbcan-sub=N/A;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BU_ATCC8492_00246 +contig_1 Prodigal:002006 exon 275640 276932 . + 0 ID=exon:BU_ATCC8492_00246;eC_number=3.1.1.-;Name=axe7A_1;Dbxref=COG:COG3458,UniProt:A7V6D1;gene=axe7A_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:D5EXI2;locus_tag=BU_ATCC8492_00246;product=Acetyl esterase Axe7A;product_source=Prokka;eggNOG=411479.BACUNI_03144;cog=Q;pfam=PF05448;interpro=IPR008391,IPR029058,IPR039069;dbcan_prot_type=CAZyme;dbcan_prot_family=CE7_e10|CE7;substrate_dbcan-pul=pectin;substrate_dbcan-sub=N/A;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_00246 +contig_1 Prodigal:002006 CDS 275640 276932 . + 0 ID=CDS:BU_ATCC8492_00246;eC_number=3.1.1.-;Name=axe7A_1;Dbxref=COG:COG3458,UniProt:A7V6D1;gene=axe7A_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:D5EXI2;locus_tag=BU_ATCC8492_00246;product=Acetyl esterase Axe7A;product_source=Prokka;eggNOG=411479.BACUNI_03144;cog=Q;pfam=PF05448;interpro=IPR008391,IPR029058,IPR039069;dbcan_prot_type=CAZyme;dbcan_prot_family=CE7_e10|CE7;substrate_dbcan-pul=pectin;substrate_dbcan-sub=N/A;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_00246 +contig_1 Prodigal:002006 gene 276947 278941 . + 0 ID=BU_ATCC8492_00247;eC_number=3.2.1.22;Name=axe7A_2;gene=axe7A_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8A6L0;locus_tag=BU_ATCC8492_00247;product=Retaining alpha-galactosidase;product_source=Prokka;eggNOG=411479.BACUNI_03143;cog=M;kegg=ko:K01187;pfam=PF10566,PF14508,PF14509;interpro=IPR013780,IPR013785,IPR014718,IPR017853,IPR019563,IPR029483,IPR029486;dbcan_prot_type=CAZyme;dbcan_prot_family=GH97_e1|GH97;substrate_dbcan-pul=pectin;substrate_dbcan-sub=N/A;uf_keyword=Hydrolase,Glycosidase;uf_ontology_term=GO:0016798,GO:0004553,GO:0003824,GO:0016787;Dbxref=UniProt:A7V6D0;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Alias=BACUNI_03144;extra_copy_number=1 +contig_1 Prodigal:002006 mRNA 276947 278941 . + 0 ID=transcript:BU_ATCC8492_00247;eC_number=3.2.1.22;Name=axe7A_2;gene=axe7A_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8A6L0;locus_tag=BU_ATCC8492_00247;product=Retaining alpha-galactosidase;product_source=Prokka;eggNOG=411479.BACUNI_03143;cog=M;kegg=ko:K01187;pfam=PF10566,PF14508,PF14509;interpro=IPR013780,IPR013785,IPR014718,IPR017853,IPR019563,IPR029483,IPR029486;dbcan_prot_type=CAZyme;dbcan_prot_family=GH97_e1|GH97;substrate_dbcan-pul=pectin;substrate_dbcan-sub=N/A;uf_keyword=Hydrolase,Glycosidase;uf_ontology_term=GO:0016798,GO:0004553,GO:0003824,GO:0016787;Dbxref=UniProt:A7V6D0;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BU_ATCC8492_00247 +contig_1 Prodigal:002006 exon 276947 278941 . + 0 ID=exon:BU_ATCC8492_00247;eC_number=3.2.1.22;Name=axe7A_2;gene=axe7A_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8A6L0;locus_tag=BU_ATCC8492_00247;product=Retaining alpha-galactosidase;product_source=Prokka;eggNOG=411479.BACUNI_03143;cog=M;kegg=ko:K01187;pfam=PF10566,PF14508,PF14509;interpro=IPR013780,IPR013785,IPR014718,IPR017853,IPR019563,IPR029483,IPR029486;dbcan_prot_type=CAZyme;dbcan_prot_family=GH97_e1|GH97;substrate_dbcan-pul=pectin;substrate_dbcan-sub=N/A;uf_keyword=Hydrolase,Glycosidase;uf_ontology_term=GO:0016798,GO:0004553,GO:0003824,GO:0016787;Dbxref=UniProt:A7V6D0;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_00247 +contig_1 Prodigal:002006 CDS 276947 278941 . + 0 ID=CDS:BU_ATCC8492_00247;eC_number=3.2.1.22;Name=axe7A_2;gene=axe7A_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8A6L0;locus_tag=BU_ATCC8492_00247;product=Retaining alpha-galactosidase;product_source=Prokka;eggNOG=411479.BACUNI_03143;cog=M;kegg=ko:K01187;pfam=PF10566,PF14508,PF14509;interpro=IPR013780,IPR013785,IPR014718,IPR017853,IPR019563,IPR029483,IPR029486;dbcan_prot_type=CAZyme;dbcan_prot_family=GH97_e1|GH97;substrate_dbcan-pul=pectin;substrate_dbcan-sub=N/A;uf_keyword=Hydrolase,Glycosidase;uf_ontology_term=GO:0016798,GO:0004553,GO:0003824,GO:0016787;Dbxref=UniProt:A7V6D0;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_00247 +contig_1 Prodigal:002006 gene 273563 275626 . + 0 ID=BU_ATCC8492_00245;Name=axe7A_3;gene=axe7A_3;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00245;product=Glycoside hydrolase superfamily protein;product_source=InterPro(SUPERFAMILY);eggNOG=411479.BACUNI_03145;cog=G;interpro=IPR013785,IPR017853;Dbxref=UniProt:A7V6D2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Alias=BACUNI_03145;extra_copy_number=0 +contig_1 Prodigal:002006 mRNA 273563 275626 . + 0 ID=transcript:BU_ATCC8492_00245;Name=axe7A_3;gene=axe7A_3;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00245;product=Glycoside hydrolase superfamily protein;product_source=InterPro(SUPERFAMILY);eggNOG=411479.BACUNI_03145;cog=G;interpro=IPR013785,IPR017853;Dbxref=UniProt:A7V6D2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BU_ATCC8492_00245 +contig_1 Prodigal:002006 exon 273563 275626 . + 0 ID=exon:BU_ATCC8492_00245;Name=axe7A_3;gene=axe7A_3;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00245;product=Glycoside hydrolase superfamily protein;product_source=InterPro(SUPERFAMILY);eggNOG=411479.BACUNI_03145;cog=G;interpro=IPR013785,IPR017853;Dbxref=UniProt:A7V6D2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_00245 +contig_1 Prodigal:002006 CDS 273563 275626 . + 0 ID=CDS:BU_ATCC8492_00245;Name=axe7A_3;gene=axe7A_3;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00245;product=Glycoside hydrolase superfamily protein;product_source=InterPro(SUPERFAMILY);eggNOG=411479.BACUNI_03145;cog=G;interpro=IPR013785,IPR017853;Dbxref=UniProt:A7V6D2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_00245 +#noc +contig_1 Prodigal:002006 gene 251336 253048 . - 0 ID=BU_ATCC8492_00224;Name=noc_1;gene=noc_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_00224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Alias=BACUNI_03176;extra_copy_number=0 +contig_1 Prodigal:002006 mRNA 251336 253048 . - 0 ID=transcript:BU_ATCC8492_00224;Name=noc_1;gene=noc_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_00224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BU_ATCC8492_00224 +contig_1 Prodigal:002006 exon 251336 253048 . - 0 ID=exon:BU_ATCC8492_00224;Name=noc_1;gene=noc_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_00224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_00224 +contig_1 Prodigal:002006 CDS 251336 253048 . - 0 ID=CDS:BU_ATCC8492_00224;Name=noc_1;gene=noc_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_00224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_00224 +contig_1 Prodigal:002006 gene 253199 253747 . - 0 ID=BU_ATCC8492_00225;Name=noc_2;gene=noc_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Alias=BACUNI_03175;extra_copy_number=0 +contig_1 Prodigal:002006 mRNA 253199 253747 . - 0 ID=transcript:BU_ATCC8492_00225;Name=noc_2;gene=noc_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BU_ATCC8492_00225 +contig_1 Prodigal:002006 exon 253199 253747 . - 0 ID=exon:BU_ATCC8492_00225;Name=noc_2;gene=noc_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_00225 +contig_1 Prodigal:002006 CDS 253199 253747 . - 0 ID=CDS:BU_ATCC8492_00225;Name=noc_2;gene=noc_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_00225 +#nod +contig_1 Prodigal:002006 gene 251336 253048 . - 0 ID=BU_ATCC8492_01224;Name=nod_1;gene=nod_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_01224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element +contig_1 Prodigal:002006 mRNA 251336 253048 . - 0 ID=transcript:BU_ATCC8492_01224;Name=nod_1;gene=nod_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_01224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BU_ATCC8492_01224 +contig_1 Prodigal:002006 exon 251336 253048 . - 0 ID=exon:BU_ATCC8492_01224;Name=nod_1;gene=nod_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_01224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_01224 +contig_1 Prodigal:002006 CDS 251336 253048 . - 0 ID=CDS:BU_ATCC8492_01224;Name=nod_1;gene=nod_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_01224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_01224 +contig_1 Prodigal:002006 gene 253199 253747 . - 0 ID=BU_ATCC8492_01225;Name=nod_2;gene=nod_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Alias=BACUNI_13175;extra_copy_number=0 +contig_1 Prodigal:002006 mRNA 253199 253747 . - 0 ID=transcript:BU_ATCC8492_01225;Name=nod_2;gene=nod_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BU_ATCC8492_01225 +contig_1 Prodigal:002006 exon 253199 253747 . - 0 ID=exon:BU_ATCC8492_01225;Name=nod_2;gene=nod_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_01225 +contig_1 Prodigal:002006 CDS 253199 253747 . - 0 ID=CDS:BU_ATCC8492_01225;Name=nod_2;gene=nod_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_01225 +#nos +contig_1 Prodigal:002006 gene 251336 253048 . - 0 ID=BU_ATCC8492_11224;Name=nos_1;gene=nos_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_11224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Alias=BACUNI_23176;extra_copy_number=0 +contig_1 Prodigal:002006 mRNA 251336 253048 . - 0 ID=transcript:BU_ATCC8492_11224;Name=nos_1;gene=nos_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_11224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BU_ATCC8492_11224 +contig_1 Prodigal:002006 exon 251336 253048 . - 0 ID=exon:BU_ATCC8492_11224;Name=nos_1;gene=nos_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_11224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_11224 +contig_1 Prodigal:002006 CDS 251336 253048 . - 0 ID=CDS:BU_ATCC8492_11224;Name=nos_1;gene=nos_1;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_11224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_11224 +contig_1 Prodigal:002006 gene 253199 253747 . - 0 ID=BU_ATCC8492_11225;Name=nos_2;gene=nos_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Alias=BACUNI_23175;extra_copy_number=0 +contig_1 Prodigal:002006 mRNA 253199 253747 . - 0 ID=transcript:BU_ATCC8492_11225;Name=nos_2;gene=nos_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BU_ATCC8492_11225 +contig_1 Prodigal:002006 exon 253199 253747 . - 0 ID=exon:BU_ATCC8492_11225;Name=nos_2;gene=nos_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_11225 +contig_1 Prodigal:002006 CDS 253199 253747 . - 0 ID=CDS:BU_ATCC8492_11225;Name=nos_2;gene=nos_2;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BU_ATCC8492_11225 diff --git a/tests/scripts/data/dummy_reference.gff b/tests/scripts/data/dummy_reference.gff new file mode 100644 index 00000000..7d559bcd --- /dev/null +++ b/tests/scripts/data/dummy_reference.gff @@ -0,0 +1,75 @@ +##gff-version 3 +# 2 genes below are both lacZ but with different BACUNIs; this should count as "unable to decide" +contig_4 Prodigal:002006 gene 33041 35692 . + 0 ID=gene:BACUNI_03971;eC_number=3.2.1.23;Name=lacZ;gene=lacZ;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_01687;locus_tag=BU_ATCC8492_00041;product=Beta-galactosidase;product_source=Prokka;eggNOG=411479.BACUNI_03971;cog=G;kegg=ko:K01190;pfam=PF00703,PF02836,PF02837,PF11721;interpro=IPR006101,IPR006102,IPR006103,IPR006104,IPR008979,IPR013783,IPR017853,IPR021720,IPR036156;dbcan_prot_type=CAZyme;dbcan_prot_family=CBM57|GH2_e35|CBM57_e12|GH2;substrate_dbcan-pul=alginate;substrate_dbcan-sub=hostglycan;uf_keyword=Hydrolase,Glycosidase;Dbxref=UniProt:A7V8Q2 +contig_4 Prodigal:002006 mRNA 33041 35692 . + 0 ID=transcript:BACUNI_03971;eC_number=3.2.1.23;Name=lacZ;gene=lacZ;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_01687;locus_tag=BU_ATCC8492_00041;product=Beta-galactosidase;product_source=Prokka;eggNOG=411479.BACUNI_03971;cog=G;kegg=ko:K01190;pfam=PF00703,PF02836,PF02837,PF11721;interpro=IPR006101,IPR006102,IPR006103,IPR006104,IPR008979,IPR013783,IPR017853,IPR021720,IPR036156;dbcan_prot_type=CAZyme;dbcan_prot_family=CBM57|GH2_e35|CBM57_e12|GH2;substrate_dbcan-pul=alginate;substrate_dbcan-sub=hostglycan;uf_keyword=Hydrolase,Glycosidase;Dbxref=UniProt:A7V8Q2;Parent=BACUNI_03971 +contig_4 Prodigal:002006 exon 33041 35692 . + 0 ID=exon:BACUNI_03971;eC_number=3.2.1.23;Name=lacZ;gene=lacZ;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_01687;locus_tag=BU_ATCC8492_00041;product=Beta-galactosidase;product_source=Prokka;eggNOG=411479.BACUNI_03971;cog=G;kegg=ko:K01190;pfam=PF00703,PF02836,PF02837,PF11721;interpro=IPR006101,IPR006102,IPR006103,IPR006104,IPR008979,IPR013783,IPR017853,IPR021720,IPR036156;dbcan_prot_type=CAZyme;dbcan_prot_family=CBM57|GH2_e35|CBM57_e12|GH2;substrate_dbcan-pul=alginate;substrate_dbcan-sub=hostglycan;uf_keyword=Hydrolase,Glycosidase;Dbxref=UniProt:A7V8Q2;Parent=transcript:BACUNI_03971 +contig_4 Prodigal:002006 CDS 33041 35692 . + 0 ID=CDS:BACUNI_03971;eC_number=3.2.1.23;Name=lacZ;gene=lacZ;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_01687;locus_tag=BU_ATCC8492_00041;product=Beta-galactosidase;product_source=Prokka;eggNOG=411479.BACUNI_03971;cog=G;kegg=ko:K01190;pfam=PF00703,PF02836,PF02837,PF11721;interpro=IPR006101,IPR006102,IPR006103,IPR006104,IPR008979,IPR013783,IPR017853,IPR021720,IPR036156;dbcan_prot_type=CAZyme;dbcan_prot_family=CBM57|GH2_e35|CBM57_e12|GH2;substrate_dbcan-pul=alginate;substrate_dbcan-sub=hostglycan;uf_keyword=Hydrolase,Glycosidase;Dbxref=UniProt:A7V8Q2;Parent=transcript:BACUNI_03971 +contig_4 Prodigal:002006 gene 35740 37305 . + 0 ID=gene:BACUNI_03970;eC_number=3.1.6.-;Name=lacZ;gene=lacZ;Dbxref=COG:COG3119,UniProt:A7V8Q1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8A2H2;locus_tag=BU_ATCC8492_00042;product=N-acetylgalactosamine-6-O-sulfatase;product_source=Prokka;eggNOG=411479.BACUNI_03970;cog=P;pfam=PF00884;interpro=IPR000917,IPR017850;uf_keyword=Hydrolase +contig_4 Prodigal:002006 mRNA 35740 37305 . + 0 ID=transcript:BACUNI_03970;eC_number=3.1.6.-;Name=lacZ;gene=lacZ;Dbxref=COG:COG3119,UniProt:A7V8Q1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8A2H2;locus_tag=BU_ATCC8492_00042;product=N-acetylgalactosamine-6-O-sulfatase;product_source=Prokka;eggNOG=411479.BACUNI_03970;cog=P;pfam=PF00884;interpro=IPR000917,IPR017850;uf_keyword=Hydrolase;Parent=BACUNI_03970 +contig_4 Prodigal:002006 exon 35740 37305 . + 0 ID=exon:BACUNI_03970;eC_number=3.1.6.-;Name=lacZ;gene=lacZ;Dbxref=COG:COG3119,UniProt:A7V8Q1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8A2H2;locus_tag=BU_ATCC8492_00042;product=N-acetylgalactosamine-6-O-sulfatase;product_source=Prokka;eggNOG=411479.BACUNI_03970;cog=P;pfam=PF00884;interpro=IPR000917,IPR017850;uf_keyword=Hydrolase;Parent=transcript:BACUNI_03970 +contig_4 Prodigal:002006 CDS 35740 37305 . + 0 ID=CDS:BACUNI_03970;eC_number=3.1.6.-;Name=lacZ;gene=lacZ;Dbxref=COG:COG3119,UniProt:A7V8Q1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q8A2H2;locus_tag=BU_ATCC8492_00042;product=N-acetylgalactosamine-6-O-sulfatase;product_source=Prokka;eggNOG=411479.BACUNI_03970;cog=P;pfam=PF00884;interpro=IPR000917,IPR017850;uf_keyword=Hydrolase;Parent=transcript:BACUNI_03970 +# fucP gene below is present in only one copy in reference, this should count as "replaced" +contig_4 Prodigal:002006 gene 27622 30429 . + 0 ID=gene:BACUNI_03974;Name=fucP;gene=fucP;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;interpro=IPR013783,IPR015943;Dbxref=UniProt:A7V8Q5 +contig_4 Prodigal:002006 mRNA 27622 30429 . + 0 ID=transcript:BACUNI_03974;Name=fucP;gene=fucP;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;interpro=IPR013783,IPR015943;Dbxref=UniProt:A7V8Q5;Parent=BU_ATCC8492_00039 +contig_4 Prodigal:002006 exon 27622 30429 . + 0 ID=exon:BACUNI_03974;Name=fucP;gene=fucP;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;interpro=IPR013783,IPR015943;Dbxref=UniProt:A7V8Q5;Parent=transcript:BU_ATCC8492_00039 +contig_4 Prodigal:002006 CDS 27622 30429 . + 0 ID=CDS:BACUNI_03974;Name=fucP;gene=fucP;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;interpro=IPR013783,IPR015943;Dbxref=UniProt:A7V8Q5;Parent=transcript:BU_ATCC8492_00039 +# below is a case where 2 BACUNI's are duplicates in Prokka but have different gene names in reference; this should count as "replaced" +contig_4 Prodigal:002006 gene 27622 30429 . + 0 ID=gene:BACUNI_13974;Name=dnaA;gene=dnaA;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;interpro=IPR013783,IPR015943;Dbxref=UniProt:A7V8Q5 +contig_4 Prodigal:002006 mRNA 27622 30429 . + 0 ID=transcript:BACUNI_13974;Name=dnaA;gene=dnaA;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;interpro=IPR013783,IPR015943;Dbxref=UniProt:A7V8Q5;Parent=BU_ATCC8492_00039 +contig_4 Prodigal:002006 exon 27622 30429 . + 0 ID=exon:BACUNI_13974;Name=dnaA;gene=dnaA;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;interpro=IPR013783,IPR015943;Dbxref=UniProt:A7V8Q5;Parent=transcript:BU_ATCC8492_00039 +contig_4 Prodigal:002006 CDS 27622 30429 . + 0 ID=CDS:BACUNI_13974;Name=dnaA;gene=dnaA;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00039;product=WD40/YVTN repeat-like-containing domain superfamily protein;product_source=InterPro(Gene3D);eggNOG=411479.BACUNI_03974;cog=R;interpro=IPR013783,IPR015943;Dbxref=UniProt:A7V8Q5;Parent=transcript:BU_ATCC8492_00039 +contig_4 Prodigal:002006 gene 30597 32888 . + 0 ID=gene:BACUNI_13973;eC_number=3.2.1.-;Name=dnaB;gene=dnaB;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KMH0;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;cog=G;kegg=ko:K05349;pfam=PF00933,PF01915,PF14310;interpro=IPR001764,IPR002772,IPR013783,IPR017853,IPR026891,IPR036881,IPR036962;dbcan_prot_type=CAZyme;dbcan_prot_family=GH3_e1|GH3;substrate_dbcan-pul=alginate;substrate_dbcan-sub=hostglycan;uf_keyword=Hydrolase;Dbxref=UniProt:A7V8Q3 +contig_4 Prodigal:002006 mRNA 30597 32888 . + 0 ID=transcript:BACUNI_13973;eC_number=3.2.1.-;Name=dnaB;gene=dnaB;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KMH0;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;cog=G;kegg=ko:K05349;pfam=PF00933,PF01915,PF14310;interpro=IPR001764,IPR002772,IPR013783,IPR017853,IPR026891,IPR036881,IPR036962;dbcan_prot_type=CAZyme;dbcan_prot_family=GH3_e1|GH3;substrate_dbcan-pul=alginate;substrate_dbcan-sub=hostglycan;uf_keyword=Hydrolase;Dbxref=UniProt:A7V8Q3;Parent=BU_ATCC8492_00040 +contig_4 Prodigal:002006 exon 30597 32888 . + 0 ID=exon:BACUNI_13973;eC_number=3.2.1.-;Name=dnaB;gene=dnaB;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KMH0;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;cog=G;kegg=ko:K05349;pfam=PF00933,PF01915,PF14310;interpro=IPR001764,IPR002772,IPR013783,IPR017853,IPR026891,IPR036881,IPR036962;dbcan_prot_type=CAZyme;dbcan_prot_family=GH3_e1|GH3;substrate_dbcan-pul=alginate;substrate_dbcan-sub=hostglycan;uf_keyword=Hydrolase;Dbxref=UniProt:A7V8Q3;Parent=transcript:BU_ATCC8492_00040 +contig_4 Prodigal:002006 CDS 30597 32888 . + 0 ID=CDS:BACUNI_13973;eC_number=3.2.1.-;Name=dnaB;gene=dnaB;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KMH0;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;cog=G;kegg=ko:K05349;pfam=PF00933,PF01915,PF14310;interpro=IPR001764,IPR002772,IPR013783,IPR017853,IPR026891,IPR036881,IPR036962;dbcan_prot_type=CAZyme;dbcan_prot_family=GH3_e1|GH3;substrate_dbcan-pul=alginate;substrate_dbcan-sub=hostglycan;uf_keyword=Hydrolase;Dbxref=UniProt:A7V8Q3;Parent=transcript:BU_ATCC8492_00040 +#dnaG - one copy has an alias, the other does not, so the underscore should be removed from the copy that has an alias; this should count as "replaced" +contig_4 Prodigal:002006 gene 30597 32888 . + 0 ID=gene:BACUNI_01111;eC_number=3.2.1.-;Name=dnaG;gene=dnaG;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;eggNOG=411479.BACUNI_03973 +contig_4 Prodigal:002006 mRNA 30597 32888 . + 0 ID=transcript:BACUNI_01111;eC_number=3.2.1.-;Name=dnaG;gene=dnaG;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;Parent=BACUNI_01111 +contig_4 Prodigal:002006 exon 30597 32888 . + 0 ID=exon:BACUNI_01111;eC_number=3.2.1.-;Name=dnaG;gene=dnaG;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;Parent=transcript:BACUNI_01111 +contig_4 Prodigal:002006 CDS 30597 32888 . + 0 ID=CDS:BACUNI_01111;eC_number=3.2.1.-;Name=dnaG;gene=dnaG;locus_tag=BU_ATCC8492_00040;product=Beta-xylosidase;product_source=Prokka;Parent=transcript:BACUNI_01111 +#rcsC - the two reference copies have different gene names but one of them (rcsB) is already present in the target in a different location; this should count as "unable to decide" +contig_4 Prodigal:002006 gene 37353 38537 . + 0 ID=gene:BACUNI_03969;locus_tag=BU_ATCC8492_00043;Name=rcsC;gene=rcsC;product=Protein of unknown function DUF2961;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03969 +contig_4 Prodigal:002006 mRNA 37353 38537 . + 0 ID=transcript:BACUNI_03969;locus_tag=BU_ATCC8492_00043;product=Protein of unknown function DUF2961;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03969;Parent=BACUNI_03969 +contig_4 Prodigal:002006 exon 37353 38537 . + 0 ID=exon:BACUNI_03969;locus_tag=BU_ATCC8492_00043;product=Protein of unknown function DUF2961;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03969;Parent=transcript:BACUNI_03969 +contig_4 Prodigal:002006 CDS 37353 38537 . + 0 ID=CDS:BACUNI_03969;locus_tag=BU_ATCC8492_00043;product=Protein of unknown function DUF2961;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03969;Parent=transcript:BACUNI_03969 +contig_4 Prodigal:002006 gene 42873 45848 . + 0 ID=gene:BACUNI_03965;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KPJ3;Name=rcsB;gene=rcsB;locus_tag=BU_ATCC8492_00047;product=TonB-dependent receptor P3;product_source=Prokka;eggNOG=411479.BACUNI_03965 +contig_4 Prodigal:002006 mRNA 42873 45848 . + 0 ID=transcript:BACUNI_03965;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KPJ3;locus_tag=BU_ATCC8492_00047;product=TonB-dependent receptor P3;product_source=Prokka;eggNOG=411479.BACUNI_03965;Parent=BACUNI_03965 +contig_4 Prodigal:002006 exon 42873 45848 . + 0 ID=exon:BACUNI_03965;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KPJ3;locus_tag=BU_ATCC8492_00047;product=TonB-dependent receptor P3;product_source=Prokka;eggNOG=411479.BACUNI_03965;Parent=transcript:BACUNI_03965 +contig_4 Prodigal:002006 CDS 42873 45848 . + 0 ID=CDS:BACUNI_03965;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:T2KPJ3;locus_tag=BU_ATCC8492_00047;product=TonB-dependent receptor P3;product_source=Prokka;eggNOG=411479.BACUNI_03965;Parent=transcript:BACUNI_03965 +#susC and susD - the same reference gene below is used to deduplicate 2 separate gene groups. Replacement is not possible and an earlier replacement should be undone +contig_4 Prodigal:002006 gene 45874 47382 . + 0 ID=gene:BACUNI_93964;locus_tag=BACUNI_93964;Name=susC;gene=susC;product=SusD-like protein P2;product_source=Prokka;eggNOG=411479.BACUNI_03964 +contig_4 Prodigal:002006 mRNA 45874 47382 . + 0 ID=transcript:BACUNI_93964;locus_tag=BACUNI_93964;Name=susC;gene=susC;product=SusD-like protein P2;product_source=Prokka;eggNOG=411479.BACUNI_03964;Parent=BACUNI_93964 +contig_4 Prodigal:002006 exon 45874 47382 . + 0 ID=exon:BACUNI_93964;locus_tag=BACUNI_93964;Name=susC;gene=susC;product=SusD-like protein P2;product_source=Prokka;eggNOG=411479.BACUNI_03964;Parent=transcript:BACUNI_93964 +contig_4 Prodigal:002006 CDS 45874 47382 . + 0 ID=CDS:BACUNI_93964;locus_tag=BACUNI_93964;Name=Name=susC;gene=susC;product=SusD-like protein P2;product_source=Prokka;eggNOG=411479.BACUNI_03964;Parent=transcript:BACUNI_93964 +#axe7A - the first reference gene ID (bacuni) occurs in target twice; the second alias has a different gene name. The different gene name should get replaced, the first two copies should retain the underscores +contig_1 Prodigal:002006 gene 275640 276932 . + 0 ID=gene:BACUNI_03144;eC_number=3.1.1.-;Name=axe7A;Dbxref=COG:COG3458,UniProt:A7V6D1;gene=axe7A;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:D5EXI2;locus_tag=BU_ATCC8492_00246;product=Acetyl esterase Axe7A;product_source=Prokka;eggNOG=411479.BACUNI_03144;cog=Q;pfam=PF05448;interpro=IPR008391,IPR029058,IPR039069;dbcan_prot_type=CAZyme;dbcan_prot_family=CE7_e10|CE7;substrate_dbcan-pul=pectin;substrate_dbcan-sub=N/A;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element +contig_1 Prodigal:002006 mRNA 275640 276932 . + 0 ID=transcript:BACUNI_03144;eC_number=3.1.1.-;Name=axe7A;Dbxref=COG:COG3458,UniProt:A7V6D1;gene=axe7A;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:D5EXI2;locus_tag=BU_ATCC8492_00246;product=Acetyl esterase Axe7A;product_source=Prokka;eggNOG=411479.BACUNI_03144;cog=Q;pfam=PF05448;interpro=IPR008391,IPR029058,IPR039069;dbcan_prot_type=CAZyme;dbcan_prot_family=CE7_e10|CE7;substrate_dbcan-pul=pectin;substrate_dbcan-sub=N/A;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BACUNI_03144 +contig_1 Prodigal:002006 exon 275640 276932 . + 0 ID=exon:BACUNI_03144;eC_number=3.1.1.-;Name=axe7A;Dbxref=COG:COG3458,UniProt:A7V6D1;gene=axe7A;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:D5EXI2;locus_tag=BU_ATCC8492_00246;product=Acetyl esterase Axe7A;product_source=Prokka;eggNOG=411479.BACUNI_03144;cog=Q;pfam=PF05448;interpro=IPR008391,IPR029058,IPR039069;dbcan_prot_type=CAZyme;dbcan_prot_family=CE7_e10|CE7;substrate_dbcan-pul=pectin;substrate_dbcan-sub=N/A;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BACUNI_03144 +contig_1 Prodigal:002006 CDS 275640 276932 . + 0 ID=CDS:BACUNI_03144;eC_number=3.1.1.-;Name=axe7A;Dbxref=COG:COG3458,UniProt:A7V6D1;gene=axe7A;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:D5EXI2;locus_tag=BU_ATCC8492_00246;product=Acetyl esterase Axe7A;product_source=Prokka;eggNOG=411479.BACUNI_03144;cog=Q;pfam=PF05448;interpro=IPR008391,IPR029058,IPR039069;dbcan_prot_type=CAZyme;dbcan_prot_family=CE7_e10|CE7;substrate_dbcan-pul=pectin;substrate_dbcan-sub=N/A;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BACUNI_03144 +contig_1 Prodigal:002006 gene 273563 275626 . + 0 ID=gene:BACUNI_03145;Name=axe7B;gene=axe7B;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00245;product=Glycoside hydrolase superfamily protein;product_source=InterPro(SUPERFAMILY);eggNOG=411479.BACUNI_03145;cog=G;interpro=IPR013785,IPR017853;Dbxref=UniProt:A7V6D2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element +contig_1 Prodigal:002006 mRNA 273563 275626 . + 0 ID=transcript:BACUNI_03145;Name=axe7B;gene=axe7B;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00245;product=Glycoside hydrolase superfamily protein;product_source=InterPro(SUPERFAMILY);eggNOG=411479.BACUNI_03145;cog=G;interpro=IPR013785,IPR017853;Dbxref=UniProt:A7V6D2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BACUNI_03145 +contig_1 Prodigal:002006 exon 273563 275626 . + 0 ID=exon:BACUNI_03145;Name=axe7B;gene=axe7B;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00245;product=Glycoside hydrolase superfamily protein;product_source=InterPro(SUPERFAMILY);eggNOG=411479.BACUNI_03145;cog=G;interpro=IPR013785,IPR017853;Dbxref=UniProt:A7V6D2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BACUNI_03145 +contig_1 Prodigal:002006 CDS 273563 275626 . + 0 ID=CDS:BACUNI_03145;Name=axe7B;gene=axe7B;inference=ab initio prediction:Prodigal:002006;locus_tag=BU_ATCC8492_00245;product=Glycoside hydrolase superfamily protein;product_source=InterPro(SUPERFAMILY);eggNOG=411479.BACUNI_03145;cog=G;interpro=IPR013785,IPR017853;Dbxref=UniProt:A7V6D2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BACUNI_03145 +#noc - one copy is known as noc, the other is known as a different gene, both should have underscores removed and the second renamed +contig_1 Prodigal:002006 gene 251336 253048 . - 0 ID=gene:BACUNI_03176;Name=noc;gene=noc;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_00224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element +contig_1 Prodigal:002006 mRNA 251336 253048 . - 0 ID=transcript:BACUNI_03176;Name=noc;gene=noc;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_00224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BACUNI_03176 +contig_1 Prodigal:002006 exon 251336 253048 . - 0 ID=exon:BACUNI_03176;Name=noc;gene=noc;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_00224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BACUNI_03176 +contig_1 Prodigal:002006 CDS 251336 253048 . - 0 ID=CDS:BACUNI_03176;Name=noc;gene=noc;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_00224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BACUNI_03176 +contig_1 Prodigal:002006 gene 253199 253747 . - 0 ID=gene:BACUNI_03175;Name=nof;gene=nof;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element +contig_1 Prodigal:002006 mRNA 253199 253747 . - 0 ID=transcript:BACUNI_03175;Name=nof;gene=nof;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BACUNI_03175 +contig_1 Prodigal:002006 exon 253199 253747 . - 0 ID=exon:BACUNI_03175;Name=nof;gene=nof;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BACUNI_03175 +contig_1 Prodigal:002006 CDS 253199 253747 . - 0 ID=CDS:BACUNI_03175;Name=nof;gene=nof;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BACUNI_03175 +#nod - one copy has no stable id assigned, the other is a different gene, both should have underscores removed and the second renamed +contig_1 Prodigal:002006 gene 253199 253747 . - 0 ID=gene:BACUNI_13175;Name=nog;gene=nog;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Alias=BACUNI_13175;extra_copy_number=0 +contig_1 Prodigal:002006 mRNA 253199 253747 . - 0 ID=transcript:BACUNI_13175;Name=nog;gene=nog;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BACUNI_13175 +contig_1 Prodigal:002006 exon 253199 253747 . - 0 ID=exon:BACUNI_13175;Name=nog;gene=nog;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BACUNI_13175 +contig_1 Prodigal:002006 CDS 253199 253747 . - 0 ID=CDS:BACUNI_13175;Name=nog;gene=nog;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BACUNI_13175 +#nos - each copy get a new gene name +contig_1 Prodigal:002006 gene 251336 253048 . - 0 ID=gene:BACUNI_23176;Name=noz;gene=noz;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_00224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element +contig_1 Prodigal:002006 mRNA 251336 253048 . - 0 ID=transcript:BACUNI_23176;Name=noz;gene=noz;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_00224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BACUNI_23176 +contig_1 Prodigal:002006 exon 251336 253048 . - 0 ID=exon:BACUNI_23176;Name=noz;gene=noz;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_00224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BACUNI_23176 +contig_1 Prodigal:002006 CDS 251336 253048 . - 0 ID=CDS:BACUNI_23176;Name=noz;gene=noz;inference=ab initio prediction:Prodigal:002006,protein motif:HAMAP:MF_02015;locus_tag=BU_ATCC8492_00224;product=Nucleoid occlusion protein;product_source=Prokka;eggNOG=411479.BACUNI_03176;cog=K;kegg=ko:K03497;pfam=PF02195;interpro=IPR003115,IPR004437,IPR036086;Dbxref=UniProt:A7V6G3;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BACUNI_23176 +contig_1 Prodigal:002006 gene 253199 253747 . - 0 ID=gene:BACUNI_23175;Name=noq;gene=noq;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element +contig_1 Prodigal:002006 mRNA 253199 253747 . - 0 ID=transcript:BACUNI_23175;Name=noq;gene=noq;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=BACUNI_23175 +contig_1 Prodigal:002006 exon 253199 253747 . - 0 ID=exon:BACUNI_23175;Name=noq;gene=noq;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BACUNI_23175 +contig_1 Prodigal:002006 CDS 253199 253747 . - 0 ID=CDS:BACUNI_23175;Name=noq;gene=noq;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P50838;locus_tag=BU_ATCC8492_00225;note=UPF0398 protein YpsA;product=YspA SLOG family protein;product_source=InterPro(Pfam);eggNOG=411479.BACUNI_03175;cog=S;pfam=PF06908;interpro=IPR010697;Dbxref=UniProt:A7V6G2;mge_id=BU_ATCC8492|contig_1:205218-296049;mge_types=integron,insertion_sequence,conjugative_element;Parent=transcript:BACUNI_23175