diff --git a/.github/workflows/gtf_tests.yml b/.github/workflows/gtf_tests.yml new file mode 100644 index 00000000..589de684 --- /dev/null +++ b/.github/workflows/gtf_tests.yml @@ -0,0 +1,133 @@ +name: GTF Comparison Tests + +on: + pull_request: + branches: [ "develop", "master" ] + paths: + - '3rd-party-tools/build-indices/**' + +jobs: + test: + runs-on: ubuntu-latest + + defaults: + run: + working-directory: 3rd-party-tools/build-indices + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pandas + + - name: Create output directories + run: | + mkdir -p test_output/comparison_files + mkdir -p test_data/reference_outputs + + - name: Verify test data + run: | + if [ ! -f "test_data/test1.gtf" ]; then + echo "Error: Required test file test_data/test1.gtf not found" + ls -la test_data/ + exit 1 + fi + if [ ! -f "Biotypes.tsv" ]; then + echo "Error: Required Biotypes.tsv file not found" + ls -la + exit 1 + fi + echo "Test files present:" + ls -l test_data/test1.gtf Biotypes.tsv + + - name: Run GTF modification and comparison + id: gtf_tests + env: + PYTHONPATH: ${{ github.workspace }}/3rd-party-tools/build-indices + run: | + # Run the unit tests + python -m unittest test_gtf_comparison.py -v + continue-on-error: true + + - name: Prepare artifacts + if: always() + run: | + # Create directory for all artifacts + mkdir -p artifact_output + + # Copy test input files + cp test_data/test1.gtf artifact_output/ + cp Biotypes.tsv artifact_output/ + + # Copy test outputs if they exist + if [ -d "test_output" ]; then + cp -r test_output/* artifact_output/ + fi + + # Copy reference outputs if they exist + if [ -d "test_data/reference_outputs" ]; then + mkdir -p artifact_output/reference_outputs + cp -r test_data/reference_outputs/* artifact_output/reference_outputs/ + fi + + # Create manifest + { + echo "GTF Test Artifacts" + echo "Generated: $(date)" + echo "" + echo "Test Input Files:" + ls -l artifact_output/test1.gtf artifact_output/Biotypes.tsv + echo "" + echo "Test Outputs:" + ls -R artifact_output/test_output/ 2>/dev/null || echo "No test outputs found" + echo "" + echo "Reference Outputs:" + ls -R artifact_output/reference_outputs/ 2>/dev/null || echo "No reference outputs found" + } > artifact_output/manifest.txt + + - name: Upload test artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: gtf-test-results + path: | + 3rd-party-tools/build-indices/artifact_output/**/* + 3rd-party-tools/build-indices/test_output/**/* + 3rd-party-tools/build-indices/test_data/reference_outputs/**/* + compression-level: 9 + retention-days: 14 + + - name: Check test results + if: always() + run: | + echo "=== Test Results Summary ===" + + # Check unit test results + if [ "${{ steps.gtf_tests.outcome }}" == "success" ]; then + echo "✅ GTF tests passed" + else + echo "❌ GTF tests failed" + + # Display difference report if it exists + if [ -f "test_output/comparison_files/difference_report.txt" ]; then + echo "" + echo "Differences found:" + cat test_output/comparison_files/difference_report.txt + fi + + # Display test summary if it exists + if [ -f "test_output/comparison_files/test_summary.txt" ]; then + echo "" + echo "Test Summary:" + cat test_output/comparison_files/test_summary.txt + fi + + exit 1 + fi \ No newline at end of file diff --git a/3rd-party-tools/build-indices/README.md b/3rd-party-tools/build-indices/README.md index b93d2dd5..d5f9d69b 100644 --- a/3rd-party-tools/build-indices/README.md +++ b/3rd-party-tools/build-indices/README.md @@ -2,10 +2,9 @@ ## Quick reference -Copy and paste to pull this image +Copy and paste to pull this image: #### `docker pull us.gcr.io/broad-gotc-prod/build-indices:1.0.0-2.7.10a-1663605340` -` - __What is this image:__ This image is a Debian-based custom image with STAR installed and pre-configured along with python scripts to build indices. - __What is STAR:__ Spliced Transcripts Alignment to a Reference (STAR) is a fast RNA-seq read mapper, with support for splice-junction and fusion read detection. STAR aligns reads by finding the Maximal Mappable Prefix (MMP) hits between reads (or read pairs) and the genome, using a Suffix Array index, [more info here](https://github.com/alexdobin/STAR). @@ -15,7 +14,7 @@ Copy and paste to pull this image Build_indices uses the following convention for versioning: -#### `us.gcr.io/broad-gotc-prod/build-indices:--` +#### `us.gcr.io/broad-gotc-prod/build-indices:--` We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP. @@ -28,7 +27,7 @@ $ docker inspect us.gcr.io/broad-gotc-prod/build-indices:1.0.0-2.7.10a-166360534 ## Usage -### Build_indices +### Build_indices Docker Container ```bash $ docker run --rm -it \ @@ -36,4 +35,124 @@ $ docker run --rm -it \ build-indices bash ``` -Then you can exec into the container and use STAR or any of the scripts accordingly. Alternatively, you can run one-off commands by passing the command as a docker run parameter. \ No newline at end of file +Then you can exec into the container and use STAR or any of the scripts accordingly. Alternatively, you can run one-off commands by passing the command as a docker run parameter. + +## GTF Comparison Tools + +This repository includes tools for comparing and testing GTF (Gene Transfer Format) file modifications. These tools ensure consistency in GTF processing and provide detailed comparison reports. + +### Components + +#### Scripts +- `compare_gtfs.py` - Analyzes differences between two GTF files +- `test_gtf_comparison.py` - Unit tests for GTF comparison functionality +- `modify_gtf.py` - Script to modify GTF files + +#### Required Files +- `test_data/test1.gtf` - Test GTF file +- `Biotypes.tsv` - File containing allowed biotypes + +### Features + +The comparison tool analyzes: +- Structural differences in GTF fields +- Attribute differences, including: + - Reordered attributes + - Extra or missing attributes + - Different attribute values +- Gene-level differences +- Mitochondrial gene comparisons + +### Running GTF Comparison + +```bash +python compare_gtfs.py --output-prefix +``` + +Example: +```bash +python compare_gtfs.py test_data/test1.gtf modified_output.gtf --output-prefix comparison +``` + +### Testing + +Run the test suite: +```bash +python -m unittest test_gtf_comparison.py -v +``` + +### GitHub Actions Integration + +Automated testing is configured via GitHub Actions: +- Runs comparison tests +- Generates reports +- Uploads test artifacts + +Configuration file: `.github/workflows/gtf_tests.yml` + +### Output Reports + +1. Structural Differences (`_structural_diff.txt`): + - Row counts + - Field differences + - Sample comparisons + +2. Attribute Differences (`_attribute_diff.txt`): + - Attribute summaries + - Detailed comparisons + - Value differences + +3. Gene Differences (`_gene_diff.txt`): + - Gene counts + - Unique gene lists + - MT gene analysis + +### Requirements + +- Python 3.x +- pandas +- Standard Python libraries + +Install dependencies: +```bash +pip install pandas +``` + +### Directory Structure + +``` +build-indices/ +├── test_data/ +│ ├── test1.gtf +│ └── reference_outputs/ +├── test_output/ +│ └── comparison_files/ +├── compare_gtfs.py +├── test_gtf_comparison.py +├── modify_gtf.py +└── Biotypes.tsv +``` + +### Error Handling + +The tools include comprehensive error handling for: +- Missing files +- Malformed GTF content +- Directory issues +- Attribute parsing errors + +### Contributing + +When modifying these tools: +1. Ensure all tests pass +2. Update test cases for new features +3. Maintain Docker compatibility +4. Update documentation +5. Follow GitHub Actions workflow requirements + +## Notes + +- GTF comparison is sensitive to format variations +- Docker container provides consistent environment +- All scripts are accessible within the container +- Use reference files for reliable testing \ No newline at end of file diff --git a/3rd-party-tools/build-indices/compare_gtfs.py b/3rd-party-tools/build-indices/compare_gtfs.py new file mode 100644 index 00000000..f9701b67 --- /dev/null +++ b/3rd-party-tools/build-indices/compare_gtfs.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 + +import argparse +import pandas as pd +import os +from collections import defaultdict + +def ensure_output_directory(output_prefix): + """Ensure the output directory exists.""" + output_dir = os.path.dirname(output_prefix) + if output_dir: + os.makedirs(output_dir, exist_ok=True) + +def verify_input_files(gtf1_path, gtf2_path): + """Verify input GTF files exist and are readable.""" + if not os.path.exists(gtf1_path): + raise FileNotFoundError(f"First GTF file not found: {gtf1_path}") + if not os.path.exists(gtf2_path): + raise FileNotFoundError(f"Second GTF file not found: {gtf2_path}") + +def parse_attributes(attr_str): + """Parse GTF attributes (9th field) into a dictionary, normalizing the values.""" + attrs = {} + if not attr_str or not isinstance(attr_str, str): + return attrs + + for pair in attr_str.strip().split(';'): + pair = pair.strip() + if not pair: + continue + try: + key_value = pair.strip().split(' ', 1) + if len(key_value) == 2: + key, value = key_value + # Normalize key and value by stripping quotes and whitespace + key = key.strip() + value = value.strip().strip('"') + attrs[key] = value + except Exception as e: + print(f"Warning: Could not parse attribute pair: {pair}") + continue + return attrs + +def get_attribute_differences(dict1, dict2): + """Get detailed differences between two attribute dictionaries.""" + extra_in_1 = {k: dict1[k] for k in dict1.keys() - dict2.keys()} + extra_in_2 = {k: dict2[k] for k in dict2.keys() - dict1.keys()} + different_values = {} + same_values = {} + + # Check common keys for value differences + common_keys = set(dict1.keys()) & set(dict2.keys()) + for key in common_keys: + if dict1[key] != dict2[key]: + different_values[key] = (dict1[key], dict2[key]) + else: + same_values[key] = dict1[key] + + return extra_in_1, extra_in_2, different_values, same_values + +def format_attribute_diff(extra_1, extra_2, diff_vals, row_num=None): + """Format attribute differences for reporting.""" + lines = [] + if row_num is not None: + lines.append(f"\nRow {row_num}:") + + if extra_1: + lines.append(" Extra attributes in GTF1:") + for k, v in sorted(extra_1.items()): + lines.append(f" {k} \"{v}\"") + + if extra_2: + lines.append(" Extra attributes in GTF2:") + for k, v in sorted(extra_2.items()): + lines.append(f" {k} \"{v}\"") + + if diff_vals: + lines.append(" Different values:") + for k, (v1, v2) in sorted(diff_vals.items()): + lines.append(f" {k}: GTF1=\"{v1}\", GTF2=\"{v2}\"") + + return "\n".join(lines) + +def compare_gtfs(gtf1_path, gtf2_path, output_prefix): + """Compare two GTF files and generate comparison reports.""" + + # Verify inputs and create output directory + verify_input_files(gtf1_path, gtf2_path) + ensure_output_directory(output_prefix) + + # Read both GTF files + print("Reading GTF files...") + cols = ['seqname', 'source', 'feature', 'start', 'end', 'score', 'strand', 'frame', 'attributes'] + gtf1 = pd.read_csv(gtf1_path, sep='\t', comment='#', names=cols) + gtf2 = pd.read_csv(gtf2_path, sep='\t', comment='#', names=cols) + + # Compare first 8 fields + print("\nAnalyzing structural differences (first 8 fields)...") + struct_diff = [] + struct_diff.append(f"Total rows in GTF1: {len(gtf1)}") + struct_diff.append(f"Total rows in GTF2: {len(gtf2)}") + struct_diff.append(f"Row difference: {abs(len(gtf1) - len(gtf2))}") + + min_len = min(len(gtf1), len(gtf2)) + + for col in cols[:8]: + try: + differences = (gtf1[col].iloc[:min_len] != gtf2[col].iloc[:min_len]).sum() + if differences > 0: + struct_diff.append(f"Column '{col}' differs in {differences} rows (of common rows)") + + diff_indices = (gtf1[col].iloc[:min_len] != gtf2[col].iloc[:min_len]) + sample_diffs = [] + for idx in diff_indices[diff_indices].index[:5]: + sample_diffs.append(f" Row {idx}: GTF1='{gtf1[col].iloc[idx]}', GTF2='{gtf2[col].iloc[idx]}'") + struct_diff.extend(sample_diffs) + except Exception as e: + print(f"Warning: Error comparing column {col}: {str(e)}") + + # Compare attributes (9th field) + print("\nAnalyzing attribute differences (9th field)...") + attr_diff = defaultdict(lambda: {'only_in_1': 0, 'only_in_2': 0, 'different_values': 0}) + detailed_diffs = [] + + for i in range(min_len): + try: + attr1 = gtf1['attributes'].iloc[i] + attr2 = gtf2['attributes'].iloc[i] + + dict1 = parse_attributes(attr1) + dict2 = parse_attributes(attr2) + + extra_1, extra_2, diff_vals, same_vals = get_attribute_differences(dict1, dict2) + + if extra_1 or extra_2 or diff_vals: + diff_report = format_attribute_diff(extra_1, extra_2, diff_vals, i) + detailed_diffs.append(diff_report) + + for k in extra_1: + attr_diff[k]['only_in_1'] += 1 + for k in extra_2: + attr_diff[k]['only_in_2'] += 1 + for k in diff_vals: + attr_diff[k]['different_values'] += 1 + + except Exception as e: + print(f"Warning: Error comparing attributes in row {i}: {str(e)}") + + # Generate gene-level comparison + print("\nAnalyzing gene-level differences...") + genes1 = set() + genes2 = set() + + for attr in gtf1['attributes']: + try: + attrs = parse_attributes(attr) + if 'gene_id' in attrs: + genes1.add(attrs['gene_id']) + except Exception as e: + print(f"Warning: Error parsing gene_id: {str(e)}") + + for attr in gtf2['attributes']: + try: + attrs = parse_attributes(attr) + if 'gene_id' in attrs: + genes2.add(attrs['gene_id']) + except Exception as e: + print(f"Warning: Error parsing gene_id: {str(e)}") + + # Write reports + with open(f"{output_prefix}_structural_diff.txt", 'w') as f: + f.write("Structural Differences (first 8 fields):\n") + for diff in struct_diff: + f.write(f"{diff}\n") + + with open(f"{output_prefix}_attribute_diff.txt", 'w') as f: + f.write("Attribute Differences (9th field):\n\n") + f.write(f"{'Attribute Key':<30} {'Only in GTF1':<15} {'Only in GTF2':<15} {'Different Values':<15}\n") + f.write("-" * 75 + "\n") + for key, counts in sorted(attr_diff.items()): + if any(counts.values()): + f.write(f"{key:<30} {counts['only_in_1']:<15} {counts['only_in_2']:<15} {counts['different_values']:<15}\n") + + if detailed_diffs: + f.write("\nDetailed Attribute Differences:\n") + f.write("-" * 30 + "\n") + for diff in detailed_diffs[:10]: + f.write(f"{diff}\n") + if len(detailed_diffs) > 10: + f.write(f"\n... and {len(detailed_diffs) - 10} more differences\n") + + with open(f"{output_prefix}_gene_diff.txt", 'w') as f: + f.write("Gene-level Differences:\n\n") + f.write(f"Total genes in GTF1: {len(genes1)}\n") + f.write(f"Total genes in GTF2: {len(genes2)}\n") + f.write(f"Genes only in GTF1: {len(genes1 - genes2)}\n") + f.write(f"Genes only in GTF2: {len(genes2 - genes1)}\n") + f.write(f"Genes in both: {len(genes1 & genes2)}\n\n") + + f.write("Genes only in GTF1:\n") + for gene in sorted(genes1 - genes2): + f.write(f"{gene}\n") + + f.write("\nGenes only in GTF2:\n") + for gene in sorted(genes2 - genes1): + f.write(f"{gene}\n") + + mt_genes1 = {g for g in genes1 if g.startswith("MT-")} + mt_genes2 = {g for g in genes2 if g.startswith("MT-")} + + f.write("\nMitochondrial Genes Comparison:\n") + f.write(f"MT genes in GTF1: {len(mt_genes1)}\n") + f.write(f"MT genes in GTF2: {len(mt_genes2)}\n") + f.write(f"MT genes only in GTF1: {len(mt_genes1 - mt_genes2)}\n") + f.write(f"MT genes only in GTF2: {len(mt_genes2 - mt_genes1)}\n") + + if mt_genes1 - mt_genes2: + f.write("\nMT genes only in GTF1:\n") + for gene in sorted(mt_genes1 - mt_genes2): + f.write(f"{gene}\n") + + if mt_genes2 - mt_genes1: + f.write("\nMT genes only in GTF2:\n") + for gene in sorted(mt_genes2 - mt_genes1): + f.write(f"{gene}\n") + +def main(): + parser = argparse.ArgumentParser(description="Compare two GTF files and analyze their differences") + parser.add_argument("gtf1", help="First GTF file (e.g., from old script)") + parser.add_argument("gtf2", help="Second GTF file (e.g., from new script)") + parser.add_argument("--output-prefix", "-o", default="gtf_comparison", + help="Prefix for output files (default: gtf_comparison)") + + args = parser.parse_args() + + try: + print(f"Comparing {args.gtf1} and {args.gtf2}") + compare_gtfs(args.gtf1, args.gtf2, args.output_prefix) + print(f"\nComparison complete. Check the following files for results:") + print(f" {args.output_prefix}_structural_diff.txt") + print(f" {args.output_prefix}_attribute_diff.txt") + print(f" {args.output_prefix}_gene_diff.txt") + except Exception as e: + print(f"Error: {str(e)}") + exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/3rd-party-tools/build-indices/create_test_gtfs.py b/3rd-party-tools/build-indices/create_test_gtfs.py new file mode 100644 index 00000000..d87d55a9 --- /dev/null +++ b/3rd-party-tools/build-indices/create_test_gtfs.py @@ -0,0 +1,32 @@ +# test_data/test1.gtf +import os + +def create_test_gtfs(): + # Create test_data directory if it doesn't exist + os.makedirs("test_data", exist_ok=True) + + # Test GTF 1 + test_gtf1 = """#!genome-build GRCh38.p13 +#!genome-version GRCh38 +chr1\tHAVANA\tgene\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_name "DDX11L1"; +chr1\tHAVANA\ttranscript\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000456328"; +chr1\tENSEMBL\texon\t11869\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000456328"; +chrM\tHAVANA\tgene\t577\t647\t.\t+\t.\tgene_id "MT-TF-1"; gene_name "MT-TF";""" + + # Test GTF 2 (with some differences) + test_gtf2 = """#!genome-build GRCh38.p13 +#!genome-version GRCh38 +chr1\tHAVANA\tgene\t11869\t14410\t.\t+\t.\tgene_id "ENSG00000223972"; gene_name "DDX11L1"; +chr1\tHAVANA\ttranscript\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000456328"; +chr1\tENSEMBL\texon\t11869\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000456328"; +chrM\tHAVANA\tgene\t577\t647\t.\t+\t.\tgene_id "MT-TF-2"; gene_name "MT-TF-modified";""" + + # Write test files + with open("test_data/test1.gtf", "w") as f: + f.write(test_gtf1) + + with open("test_data/test2.gtf", "w") as f: + f.write(test_gtf2) + +if __name__ == "__main__": + create_test_gtfs() \ No newline at end of file diff --git a/3rd-party-tools/build-indices/test_data/reference_outputs/reference_modified.gtf b/3rd-party-tools/build-indices/test_data/reference_outputs/reference_modified.gtf new file mode 100644 index 00000000..db2352fa --- /dev/null +++ b/3rd-party-tools/build-indices/test_data/reference_outputs/reference_modified.gtf @@ -0,0 +1,82 @@ +#gtf-version 2.2 +#!genome-build mCalJa1.2.pat.X +#!genome-build-accession NCBI_Assembly:GCF_011100555.1 +#!annotation-date 03/02/2023 +#!annotation-source NCBI RefSeq GCF_011100555.1-RS_2023_03 +##gff-version 2 +##source-version rtracklayer 1.58.0 +##date 2023-09-11 +chr1 Gnomon gene 896221 897538 . + . gene_id "LOC100385748"; transcript_id ""; db_xref "GeneID:100385748"; description "heterogeneous nuclear ribonucleoprotein A1-like"; gbkey "Gene"; gene "LOC100385748"; gene_biotype "protein_coding"; gene_version "0"; transcript_version "0"; gene_name "LOC100385748" +chr1 Gnomon transcript 896221 897538 . + . gene_id "LOC100385748"; transcript_id "XM_035291264"; db_xref "GeneID:100385748"; gbkey "mRNA"; gene "LOC100385748"; model_evidence "Supporting evidence includes similarity to: 1 EST, 24 Proteins"; product "heterogeneous nuclear ribonucleoprotein A1-like"; transcript_biotype "mRNA"; gene_version "0"; transcript_version "2"; gene_name "LOC100385748" +chr1 Gnomon exon 896221 897538 . + . gene_id "LOC100385748"; transcript_id "XM_035291264"; db_xref "GeneID:100385748"; gene "LOC100385748"; model_evidence "Supporting evidence includes similarity to: 1 EST, 24 Proteins"; product "heterogeneous nuclear ribonucleoprotein A1-like"; transcript_biotype "mRNA"; exon_number "1"; gene_version "0"; transcript_version "2"; gene_name "LOC100385748" +chr1 Gnomon CDS 896273 897232 . + 0 gene_id "LOC100385748"; transcript_id "XM_035291264"; db_xref "GeneID:100385748"; gbkey "CDS"; gene "LOC100385748"; product "heterogeneous nuclear ribonucleoprotein A1-like"; exon_number "1"; protein_id "XP_035147155.2"; gene_version "0"; transcript_version "2"; gene_name "LOC100385748" +chr1 Gnomon start_codon 896273 896275 . + 0 gene_id "LOC100385748"; transcript_id "XM_035291264"; db_xref "GeneID:100385748"; gbkey "CDS"; gene "LOC100385748"; product "heterogeneous nuclear ribonucleoprotein A1-like"; exon_number "1"; protein_id "XP_035147155.2"; gene_version "0"; transcript_version "2"; gene_name "LOC100385748" +chr1 Gnomon stop_codon 897233 897235 . + 0 gene_id "LOC100385748"; transcript_id "XM_035291264"; db_xref "GeneID:100385748"; gbkey "CDS"; gene "LOC100385748"; product "heterogeneous nuclear ribonucleoprotein A1-like"; exon_number "1"; protein_id "XP_035147155.2"; gene_version "0"; transcript_version "2"; gene_name "LOC100385748" +chr1 Gnomon gene 1168056 1182196 . - . gene_id "SLITRK6"; transcript_id ""; db_xref "GeneID:100386103"; description "SLIT and NTRK like family member 6"; gbkey "Gene"; gene "SLITRK6"; gene_biotype "protein_coding"; gene_version "0"; transcript_version "0"; gene_name "SLITRK6" +chr1 Gnomon transcript 1168056 1182196 . - . gene_id "SLITRK6"; transcript_id "XM_035292445"; db_xref "GeneID:100386103"; gbkey "mRNA"; gene "SLITRK6"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 40 ESTs, 3 Proteins, 1 long SRA read"; product "SLIT and NTRK like family member 6, transcript variant X2"; transcript_biotype "mRNA"; gene_version "0"; transcript_version "1"; gene_name "SLITRK6" +chr1 Gnomon exon 1182037 1182196 . - . gene_id "SLITRK6"; transcript_id "XM_035292445"; db_xref "GeneID:100386103"; gene "SLITRK6"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 40 ESTs, 3 Proteins, 1 long SRA read"; product "SLIT and NTRK like family member 6, transcript variant X2"; transcript_biotype "mRNA"; exon_number "1"; gene_version "0"; transcript_version "1"; gene_name "SLITRK6" +chr1 Gnomon exon 1174312 1174384 . - . gene_id "SLITRK6"; transcript_id "XM_035292445"; db_xref "GeneID:100386103"; gene "SLITRK6"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 40 ESTs, 3 Proteins, 1 long SRA read"; product "SLIT and NTRK like family member 6, transcript variant X2"; transcript_biotype "mRNA"; exon_number "2"; gene_version "0"; transcript_version "1"; gene_name "SLITRK6" +chr1 Gnomon exon 1168056 1171809 . - . gene_id "SLITRK6"; transcript_id "XM_035292445"; db_xref "GeneID:100386103"; gene "SLITRK6"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 40 ESTs, 3 Proteins, 1 long SRA read"; product "SLIT and NTRK like family member 6, transcript variant X2"; transcript_biotype "mRNA"; exon_number "3"; gene_version "0"; transcript_version "1"; gene_name "SLITRK6" +chr1 Gnomon CDS 1169266 1171785 . - 0 gene_id "SLITRK6"; transcript_id "XM_035292445"; db_xref "GeneID:100386103"; gbkey "CDS"; gene "SLITRK6"; product "SLIT and NTRK-like protein 6"; exon_number "3"; protein_id "XP_035148336.1"; gene_version "0"; transcript_version "1"; gene_name "SLITRK6" +chr1 Gnomon start_codon 1171783 1171785 . - 0 gene_id "SLITRK6"; transcript_id "XM_035292445"; db_xref "GeneID:100386103"; gbkey "CDS"; gene "SLITRK6"; product "SLIT and NTRK-like protein 6"; exon_number "3"; protein_id "XP_035148336.1"; gene_version "0"; transcript_version "1"; gene_name "SLITRK6" +chr1 Gnomon stop_codon 1169263 1169265 . - 0 gene_id "SLITRK6"; transcript_id "XM_035292445"; db_xref "GeneID:100386103"; gbkey "CDS"; gene "SLITRK6"; product "SLIT and NTRK-like protein 6"; exon_number "3"; protein_id "XP_035148336.1"; gene_version "0"; transcript_version "1"; gene_name "SLITRK6" +chr1 Gnomon transcript 1168056 1174909 . - . gene_id "SLITRK6"; transcript_id "XM_035292444"; db_xref "GeneID:100386103"; gbkey "mRNA"; gene "SLITRK6"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 6 mRNAs, 43 ESTs, 3 Proteins, 6 long SRA reads"; product "SLIT and NTRK like family member 6, transcript variant X1"; transcript_biotype "mRNA"; gene_version "0"; transcript_version "2"; gene_name "SLITRK6" +chr1 Gnomon exon 1174312 1174909 . - . gene_id "SLITRK6"; transcript_id "XM_035292444"; db_xref "GeneID:100386103"; gene "SLITRK6"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 6 mRNAs, 43 ESTs, 3 Proteins, 6 long SRA reads"; product "SLIT and NTRK like family member 6, transcript variant X1"; transcript_biotype "mRNA"; exon_number "1"; gene_version "0"; transcript_version "2"; gene_name "SLITRK6" +chr1 Gnomon exon 1168056 1171809 . - . gene_id "SLITRK6"; transcript_id "XM_035292444"; db_xref "GeneID:100386103"; gene "SLITRK6"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 6 mRNAs, 43 ESTs, 3 Proteins, 6 long SRA reads"; product "SLIT and NTRK like family member 6, transcript variant X1"; transcript_biotype "mRNA"; exon_number "2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK6" +chr1 Gnomon CDS 1169266 1171785 . - 0 gene_id "SLITRK6"; transcript_id "XM_035292444"; db_xref "GeneID:100386103"; gbkey "CDS"; gene "SLITRK6"; product "SLIT and NTRK-like protein 6"; exon_number "2"; protein_id "XP_035148335.1"; gene_version "0"; transcript_version "2"; gene_name "SLITRK6" +chr1 Gnomon start_codon 1171783 1171785 . - 0 gene_id "SLITRK6"; transcript_id "XM_035292444"; db_xref "GeneID:100386103"; gbkey "CDS"; gene "SLITRK6"; product "SLIT and NTRK-like protein 6"; exon_number "2"; protein_id "XP_035148335.1"; gene_version "0"; transcript_version "2"; gene_name "SLITRK6" +chr1 Gnomon stop_codon 1169263 1169265 . - 0 gene_id "SLITRK6"; transcript_id "XM_035292444"; db_xref "GeneID:100386103"; gbkey "CDS"; gene "SLITRK6"; product "SLIT and NTRK-like protein 6"; exon_number "2"; protein_id "XP_035148335.1"; gene_version "0"; transcript_version "2"; gene_name "SLITRK6" +chr1 Gnomon gene 1914431 1933100 . - . gene_id "LOC118152108"; transcript_id ""; db_xref "GeneID:118152108"; description "uncharacterized LOC118152108"; gbkey "Gene"; gene "LOC118152108"; gene_biotype "lncRNA"; gene_version "0"; transcript_version "0"; gene_name "LOC118152108" +chr1 Gnomon transcript 1914431 1933100 . - . gene_id "LOC118152108"; transcript_id "XR_004740463"; db_xref "GeneID:118152108"; gbkey "ncRNA"; gene "LOC118152108"; product "uncharacterized LOC118152108"; transcript_biotype "lnc_RNA"; gene_version "0"; transcript_version "2"; gene_name "LOC118152108" +chr1 Gnomon exon 1932925 1933100 . - . gene_id "LOC118152108"; transcript_id "XR_004740463"; db_xref "GeneID:118152108"; gene "LOC118152108"; product "uncharacterized LOC118152108"; transcript_biotype "lnc_RNA"; exon_number "1"; gene_version "0"; transcript_version "2"; gene_name "LOC118152108" +chr1 Gnomon exon 1925270 1925442 . - . gene_id "LOC118152108"; transcript_id "XR_004740463"; db_xref "GeneID:118152108"; gene "LOC118152108"; product "uncharacterized LOC118152108"; transcript_biotype "lnc_RNA"; exon_number "2"; gene_version "0"; transcript_version "2"; gene_name "LOC118152108" +chr1 Gnomon exon 1914431 1915036 . - . gene_id "LOC118152108"; transcript_id "XR_004740463"; db_xref "GeneID:118152108"; gene "LOC118152108"; product "uncharacterized LOC118152108"; transcript_biotype "lnc_RNA"; exon_number "3"; gene_version "0"; transcript_version "2"; gene_name "LOC118152108" +chr1 Gnomon gene 2672931 2765441 . - . gene_id "LOC103791423"; transcript_id ""; db_xref "GeneID:103791423"; description "uncharacterized LOC103791423"; gbkey "Gene"; gene "LOC103791423"; gene_biotype "lncRNA"; gene_version "0"; transcript_version "0"; gene_name "LOC103791423" +chr1 Gnomon transcript 2672931 2765441 . - . gene_id "LOC103791423"; transcript_id "XR_008474938"; db_xref "GeneID:103791423"; gbkey "ncRNA"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; product "uncharacterized LOC103791423, transcript variant X2"; transcript_biotype "lnc_RNA"; gene_version "0"; transcript_version "1"; gene_name "LOC103791423" +chr1 Gnomon exon 2763930 2765441 . - . gene_id "LOC103791423"; transcript_id "XR_008474938"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; product "uncharacterized LOC103791423, transcript variant X2"; transcript_biotype "lnc_RNA"; exon_number "1"; gene_version "0"; transcript_version "1"; gene_name "LOC103791423" +chr1 Gnomon exon 2698627 2698764 . - . gene_id "LOC103791423"; transcript_id "XR_008474938"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; product "uncharacterized LOC103791423, transcript variant X2"; transcript_biotype "lnc_RNA"; exon_number "2"; gene_version "0"; transcript_version "1"; gene_name "LOC103791423" +chr1 Gnomon exon 2695315 2695392 . - . gene_id "LOC103791423"; transcript_id "XR_008474938"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; product "uncharacterized LOC103791423, transcript variant X2"; transcript_biotype "lnc_RNA"; exon_number "3"; gene_version "0"; transcript_version "1"; gene_name "LOC103791423" +chr1 Gnomon exon 2675830 2675918 . - . gene_id "LOC103791423"; transcript_id "XR_008474938"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; product "uncharacterized LOC103791423, transcript variant X2"; transcript_biotype "lnc_RNA"; exon_number "4"; gene_version "0"; transcript_version "1"; gene_name "LOC103791423" +chr1 Gnomon exon 2672931 2674442 . - . gene_id "LOC103791423"; transcript_id "XR_008474938"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; product "uncharacterized LOC103791423, transcript variant X2"; transcript_biotype "lnc_RNA"; exon_number "5"; gene_version "0"; transcript_version "1"; gene_name "LOC103791423" +chr1 Gnomon transcript 2672931 2702221 . - . gene_id "LOC103791423"; transcript_id "XR_001910283"; db_xref "GeneID:103791423"; gbkey "ncRNA"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 long SRA read"; product "uncharacterized LOC103791423, transcript variant X1"; transcript_biotype "lnc_RNA"; gene_version "0"; transcript_version "3"; gene_name "LOC103791423" +chr1 Gnomon exon 2701785 2702221 . - . gene_id "LOC103791423"; transcript_id "XR_001910283"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 long SRA read"; product "uncharacterized LOC103791423, transcript variant X1"; transcript_biotype "lnc_RNA"; exon_number "1"; gene_version "0"; transcript_version "3"; gene_name "LOC103791423" +chr1 Gnomon exon 2698627 2698764 . - . gene_id "LOC103791423"; transcript_id "XR_001910283"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 long SRA read"; product "uncharacterized LOC103791423, transcript variant X1"; transcript_biotype "lnc_RNA"; exon_number "2"; gene_version "0"; transcript_version "3"; gene_name "LOC103791423" +chr1 Gnomon exon 2695315 2695392 . - . gene_id "LOC103791423"; transcript_id "XR_001910283"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 long SRA read"; product "uncharacterized LOC103791423, transcript variant X1"; transcript_biotype "lnc_RNA"; exon_number "3"; gene_version "0"; transcript_version "3"; gene_name "LOC103791423" +chr1 Gnomon exon 2675830 2675918 . - . gene_id "LOC103791423"; transcript_id "XR_001910283"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 long SRA read"; product "uncharacterized LOC103791423, transcript variant X1"; transcript_biotype "lnc_RNA"; exon_number "4"; gene_version "0"; transcript_version "3"; gene_name "LOC103791423" +chr1 Gnomon exon 2672931 2674442 . - . gene_id "LOC103791423"; transcript_id "XR_001910283"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 long SRA read"; product "uncharacterized LOC103791423, transcript variant X1"; transcript_biotype "lnc_RNA"; exon_number "5"; gene_version "0"; transcript_version "3"; gene_name "LOC103791423" +chr1 Gnomon gene 2927508 2935023 . + . gene_id "SLITRK5"; transcript_id ""; db_xref "GeneID:100387170"; description "SLIT and NTRK like family member 5"; gbkey "Gene"; gene "SLITRK5"; gene_biotype "protein_coding"; gene_version "0"; transcript_version "0"; gene_name "SLITRK5" +chr1 Gnomon transcript 2927508 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292447"; db_xref "GeneID:100387170"; gbkey "mRNA"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 ESTs, 7 Proteins, 43 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X2"; transcript_biotype "mRNA"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon exon 2927508 2927648 . + . gene_id "SLITRK5"; transcript_id "XM_035292447"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 ESTs, 7 Proteins, 43 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X2"; transcript_biotype "mRNA"; exon_number "1"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon exon 2929685 2929835 . + . gene_id "SLITRK5"; transcript_id "XM_035292447"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 ESTs, 7 Proteins, 43 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X2"; transcript_biotype "mRNA"; exon_number "2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon exon 2930809 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292447"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 ESTs, 7 Proteins, 43 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X2"; transcript_biotype "mRNA"; exon_number "3"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon CDS 2930817 2933687 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292447"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "3"; protein_id "XP_035148338.2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon start_codon 2930817 2930819 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292447"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "3"; protein_id "XP_035148338.2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon stop_codon 2933688 2933690 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292447"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "3"; protein_id "XP_035148338.2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon transcript 2927510 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_017969928"; db_xref "GeneID:100387170"; gbkey "mRNA"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 11 ESTs, 7 Proteins, 45 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X1"; transcript_biotype "mRNA"; gene_version "0"; transcript_version "4"; gene_name "SLITRK5" +chr1 Gnomon exon 2927510 2927648 . + . gene_id "SLITRK5"; transcript_id "XM_017969928"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 11 ESTs, 7 Proteins, 45 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X1"; transcript_biotype "mRNA"; exon_number "1"; gene_version "0"; transcript_version "4"; gene_name "SLITRK5" +chr1 Gnomon exon 2930809 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_017969928"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 11 ESTs, 7 Proteins, 45 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X1"; transcript_biotype "mRNA"; exon_number "2"; gene_version "0"; transcript_version "4"; gene_name "SLITRK5" +chr1 Gnomon CDS 2930817 2933687 . + 0 gene_id "SLITRK5"; transcript_id "XM_017969928"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_017825417.3"; gene_version "0"; transcript_version "4"; gene_name "SLITRK5" +chr1 Gnomon start_codon 2930817 2930819 . + 0 gene_id "SLITRK5"; transcript_id "XM_017969928"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_017825417.3"; gene_version "0"; transcript_version "4"; gene_name "SLITRK5" +chr1 Gnomon stop_codon 2933688 2933690 . + 0 gene_id "SLITRK5"; transcript_id "XM_017969928"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_017825417.3"; gene_version "0"; transcript_version "4"; gene_name "SLITRK5" +chr1 Gnomon transcript 2928117 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292457"; db_xref "GeneID:100387170"; gbkey "mRNA"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 6 mRNAs, 1 EST, 7 Proteins, 45 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X5"; transcript_biotype "mRNA"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon exon 2928117 2928250 . + . gene_id "SLITRK5"; transcript_id "XM_035292457"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 6 mRNAs, 1 EST, 7 Proteins, 45 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X5"; transcript_biotype "mRNA"; exon_number "1"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon exon 2930809 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292457"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 6 mRNAs, 1 EST, 7 Proteins, 45 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X5"; transcript_biotype "mRNA"; exon_number "2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon CDS 2930817 2933687 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292457"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_035148348.2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon start_codon 2930817 2930819 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292457"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_035148348.2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon stop_codon 2933688 2933690 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292457"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_035148348.2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon transcript 2928123 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292451"; db_xref "GeneID:100387170"; gbkey "mRNA"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 Proteins, 42 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X3"; transcript_biotype "mRNA"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon exon 2928123 2928250 . + . gene_id "SLITRK5"; transcript_id "XM_035292451"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 Proteins, 42 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X3"; transcript_biotype "mRNA"; exon_number "1"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon exon 2929685 2929835 . + . gene_id "SLITRK5"; transcript_id "XM_035292451"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 Proteins, 42 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X3"; transcript_biotype "mRNA"; exon_number "2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon exon 2930809 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292451"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 Proteins, 42 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X3"; transcript_biotype "mRNA"; exon_number "3"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon CDS 2930817 2933687 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292451"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "3"; protein_id "XP_035148342.2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon start_codon 2930817 2930819 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292451"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "3"; protein_id "XP_035148342.2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon stop_codon 2933688 2933690 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292451"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "3"; protein_id "XP_035148342.2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon transcript 2928288 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292465"; db_xref "GeneID:100387170"; gbkey "mRNA"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 1 EST, 7 Proteins, 47 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X6"; transcript_biotype "mRNA"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon exon 2928288 2928982 . + . gene_id "SLITRK5"; transcript_id "XM_035292465"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 1 EST, 7 Proteins, 47 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X6"; transcript_biotype "mRNA"; exon_number "1"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon exon 2930809 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292465"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 1 EST, 7 Proteins, 47 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X6"; transcript_biotype "mRNA"; exon_number "2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon CDS 2930817 2933687 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292465"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_035148356.2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon start_codon 2930817 2930819 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292465"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_035148356.2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon stop_codon 2933688 2933690 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292465"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_035148356.2"; gene_version "0"; transcript_version "2"; gene_name "SLITRK5" +chr1 Gnomon transcript 2928289 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_054241421"; db_xref "GeneID:100387170"; gbkey "mRNA"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 1 EST, 7 Proteins, 42 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X4"; transcript_biotype "mRNA"; gene_version "0"; transcript_version "1"; gene_name "SLITRK5" +chr1 Gnomon exon 2928289 2928982 . + . gene_id "SLITRK5"; transcript_id "XM_054241421"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 1 EST, 7 Proteins, 42 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X4"; transcript_biotype "mRNA"; exon_number "1"; gene_version "0"; transcript_version "1"; gene_name "SLITRK5" +chr1 Gnomon exon 2929685 2929835 . + . gene_id "SLITRK5"; transcript_id "XM_054241421"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 1 EST, 7 Proteins, 42 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X4"; transcript_biotype "mRNA"; exon_number "2"; gene_version "0"; transcript_version "1"; gene_name "SLITRK5" diff --git a/3rd-party-tools/build-indices/test_data/test1.gtf b/3rd-party-tools/build-indices/test_data/test1.gtf new file mode 100644 index 00000000..e48424ce --- /dev/null +++ b/3rd-party-tools/build-indices/test_data/test1.gtf @@ -0,0 +1,100 @@ +#gtf-version 2.2 +#!genome-build mCalJa1.2.pat.X +#!genome-build-accession NCBI_Assembly:GCF_011100555.1 +#!annotation-date 03/02/2023 +#!annotation-source NCBI RefSeq GCF_011100555.1-RS_2023_03 +##gff-version 2 +##source-version rtracklayer 1.58.0 +##date 2023-09-11 +chr1 Gnomon gene 399914 559170 . - . gene_id "LOC118152095"; transcript_id ""; db_xref "GeneID:118152095"; description "glucose-6-phosphate 1-dehydrogenase-like"; gbkey "Gene"; gene "LOC118152095"; gene_biotype "transcribed_pseudogene"; pseudo "true"; +chr1 Gnomon transcript 399914 559170 . - . gene_id "LOC118152095"; transcript_id "XR_004740445.2"; db_xref "GeneID:118152095"; gbkey "misc_RNA"; gene "LOC118152095"; pseudo "true"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 19 ESTs, 17 Proteins, 1 long SRA read"; product "glucose-6-phosphate 1-dehydrogenase-like"; transcript_biotype "transcript"; +chr1 Gnomon exon 559033 559170 . - . gene_id "LOC118152095"; transcript_id "XR_004740445.2"; db_xref "GeneID:118152095"; gene "LOC118152095"; pseudo "true"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 19 ESTs, 17 Proteins, 1 long SRA read"; product "glucose-6-phosphate 1-dehydrogenase-like"; transcript_biotype "transcript"; exon_number "1"; +chr1 Gnomon exon 446522 446648 . - . gene_id "LOC118152095"; transcript_id "XR_004740445.2"; db_xref "GeneID:118152095"; gene "LOC118152095"; pseudo "true"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 19 ESTs, 17 Proteins, 1 long SRA read"; product "glucose-6-phosphate 1-dehydrogenase-like"; transcript_biotype "transcript"; exon_number "2"; +chr1 Gnomon exon 444272 444369 . - . gene_id "LOC118152095"; transcript_id "XR_004740445.2"; db_xref "GeneID:118152095"; gene "LOC118152095"; pseudo "true"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 19 ESTs, 17 Proteins, 1 long SRA read"; product "glucose-6-phosphate 1-dehydrogenase-like"; transcript_biotype "transcript"; exon_number "3"; +chr1 Gnomon exon 399914 406914 . - . gene_id "LOC118152095"; transcript_id "XR_004740445.2"; db_xref "GeneID:118152095"; gene "LOC118152095"; pseudo "true"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 19 ESTs, 17 Proteins, 1 long SRA read"; product "glucose-6-phosphate 1-dehydrogenase-like"; transcript_biotype "transcript"; exon_number "4"; +chr1 cmsearch gene 459917 460023 . - . gene_id "LOC118147983"; transcript_id ""; db_xref "GeneID:118147983"; description "U6 spliceosomal RNA"; gbkey "Gene"; gene "LOC118147983"; gene_biotype "snRNA"; +chr1 cmsearch transcript 459917 460023 . - . gene_id "LOC118147983"; transcript_id "XR_004734714.1"; db_xref "RFAM:RF00026"; gbkey "ncRNA"; gene "LOC118147983"; product "U6 spliceosomal RNA"; transcript_biotype "snRNA"; inference "COORDINATES: profile:INFERNAL:1.1.4"; +chr1 cmsearch exon 459917 460023 . - . gene_id "LOC118147983"; transcript_id "XR_004734714.1"; db_xref "RFAM:RF00026"; gene "LOC118147983"; product "U6 spliceosomal RNA"; transcript_biotype "snRNA"; exon_number "1"; inference "COORDINATES: profile:INFERNAL:1.1.4"; +chr1 Gnomon gene 558988 560838 . + . gene_id "LOC100395011"; transcript_id ""; db_xref "GeneID:100395011"; description "alpha-internexin-like"; gbkey "Gene"; gene "LOC100395011"; gene_biotype "pseudogene"; pseudo "true"; +chr1 cmsearch gene 580364 580504 . + . gene_id "LOC118147921"; transcript_id ""; db_xref "GeneID:118147921"; description "U4 spliceosomal RNA"; gbkey "Gene"; gene "LOC118147921"; gene_biotype "snRNA"; +chr1 cmsearch transcript 580364 580504 . + . gene_id "LOC118147921"; transcript_id "XR_004734623.1"; db_xref "RFAM:RF00015"; gbkey "ncRNA"; gene "LOC118147921"; product "U4 spliceosomal RNA"; transcript_biotype "snRNA"; inference "COORDINATES: profile:INFERNAL:1.1.4"; +chr1 cmsearch exon 580364 580504 . + . gene_id "LOC118147921"; transcript_id "XR_004734623.1"; db_xref "RFAM:RF00015"; gene "LOC118147921"; product "U4 spliceosomal RNA"; transcript_biotype "snRNA"; exon_number "1"; inference "COORDINATES: profile:INFERNAL:1.1.4"; +chr1 Gnomon gene 896221 897538 . + . gene_id "LOC100385748"; transcript_id ""; db_xref "GeneID:100385748"; description "heterogeneous nuclear ribonucleoprotein A1-like"; gbkey "Gene"; gene "LOC100385748"; gene_biotype "protein_coding"; +chr1 Gnomon transcript 896221 897538 . + . gene_id "LOC100385748"; transcript_id "XM_035291264.2"; db_xref "GeneID:100385748"; gbkey "mRNA"; gene "LOC100385748"; model_evidence "Supporting evidence includes similarity to: 1 EST, 24 Proteins"; product "heterogeneous nuclear ribonucleoprotein A1-like"; transcript_biotype "mRNA"; +chr1 Gnomon exon 896221 897538 . + . gene_id "LOC100385748"; transcript_id "XM_035291264.2"; db_xref "GeneID:100385748"; gene "LOC100385748"; model_evidence "Supporting evidence includes similarity to: 1 EST, 24 Proteins"; product "heterogeneous nuclear ribonucleoprotein A1-like"; transcript_biotype "mRNA"; exon_number "1"; +chr1 Gnomon CDS 896273 897232 . + 0 gene_id "LOC100385748"; transcript_id "XM_035291264.2"; db_xref "GeneID:100385748"; gbkey "CDS"; gene "LOC100385748"; product "heterogeneous nuclear ribonucleoprotein A1-like"; exon_number "1"; protein_id "XP_035147155.2"; +chr1 Gnomon start_codon 896273 896275 . + 0 gene_id "LOC100385748"; transcript_id "XM_035291264.2"; db_xref "GeneID:100385748"; gbkey "CDS"; gene "LOC100385748"; product "heterogeneous nuclear ribonucleoprotein A1-like"; exon_number "1"; protein_id "XP_035147155.2"; +chr1 Gnomon stop_codon 897233 897235 . + 0 gene_id "LOC100385748"; transcript_id "XM_035291264.2"; db_xref "GeneID:100385748"; gbkey "CDS"; gene "LOC100385748"; product "heterogeneous nuclear ribonucleoprotein A1-like"; exon_number "1"; protein_id "XP_035147155.2"; +chr1 cmsearch gene 950085 950190 . - . gene_id "LOC118148301"; transcript_id ""; db_xref "GeneID:118148301"; description "U6 spliceosomal RNA"; gbkey "Gene"; gene "LOC118148301"; gene_biotype "snRNA"; +chr1 cmsearch transcript 950085 950190 . - . gene_id "LOC118148301"; transcript_id "XR_004735066.2"; db_xref "RFAM:RF00026"; gbkey "ncRNA"; gene "LOC118148301"; product "U6 spliceosomal RNA"; transcript_biotype "snRNA"; inference "COORDINATES: profile:INFERNAL:1.1.4"; +chr1 cmsearch exon 950085 950190 . - . gene_id "LOC118148301"; transcript_id "XR_004735066.2"; db_xref "RFAM:RF00026"; gene "LOC118148301"; product "U6 spliceosomal RNA"; transcript_biotype "snRNA"; exon_number "1"; inference "COORDINATES: profile:INFERNAL:1.1.4"; +chr1 Gnomon gene 1168056 1182196 . - . gene_id "SLITRK6"; transcript_id ""; db_xref "GeneID:100386103"; description "SLIT and NTRK like family member 6"; gbkey "Gene"; gene "SLITRK6"; gene_biotype "protein_coding"; +chr1 Gnomon transcript 1168056 1182196 . - . gene_id "SLITRK6"; transcript_id "XM_035292445.1"; db_xref "GeneID:100386103"; gbkey "mRNA"; gene "SLITRK6"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 40 ESTs, 3 Proteins, 1 long SRA read"; product "SLIT and NTRK like family member 6, transcript variant X2"; transcript_biotype "mRNA"; +chr1 Gnomon exon 1182037 1182196 . - . gene_id "SLITRK6"; transcript_id "XM_035292445.1"; db_xref "GeneID:100386103"; gene "SLITRK6"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 40 ESTs, 3 Proteins, 1 long SRA read"; product "SLIT and NTRK like family member 6, transcript variant X2"; transcript_biotype "mRNA"; exon_number "1"; +chr1 Gnomon exon 1174312 1174384 . - . gene_id "SLITRK6"; transcript_id "XM_035292445.1"; db_xref "GeneID:100386103"; gene "SLITRK6"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 40 ESTs, 3 Proteins, 1 long SRA read"; product "SLIT and NTRK like family member 6, transcript variant X2"; transcript_biotype "mRNA"; exon_number "2"; +chr1 Gnomon exon 1168056 1171809 . - . gene_id "SLITRK6"; transcript_id "XM_035292445.1"; db_xref "GeneID:100386103"; gene "SLITRK6"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 40 ESTs, 3 Proteins, 1 long SRA read"; product "SLIT and NTRK like family member 6, transcript variant X2"; transcript_biotype "mRNA"; exon_number "3"; +chr1 Gnomon CDS 1169266 1171785 . - 0 gene_id "SLITRK6"; transcript_id "XM_035292445.1"; db_xref "GeneID:100386103"; gbkey "CDS"; gene "SLITRK6"; product "SLIT and NTRK-like protein 6"; exon_number "3"; protein_id "XP_035148336.1"; +chr1 Gnomon start_codon 1171783 1171785 . - 0 gene_id "SLITRK6"; transcript_id "XM_035292445.1"; db_xref "GeneID:100386103"; gbkey "CDS"; gene "SLITRK6"; product "SLIT and NTRK-like protein 6"; exon_number "3"; protein_id "XP_035148336.1"; +chr1 Gnomon stop_codon 1169263 1169265 . - 0 gene_id "SLITRK6"; transcript_id "XM_035292445.1"; db_xref "GeneID:100386103"; gbkey "CDS"; gene "SLITRK6"; product "SLIT and NTRK-like protein 6"; exon_number "3"; protein_id "XP_035148336.1"; +chr1 Gnomon transcript 1168056 1174909 . - . gene_id "SLITRK6"; transcript_id "XM_035292444.2"; db_xref "GeneID:100386103"; gbkey "mRNA"; gene "SLITRK6"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 6 mRNAs, 43 ESTs, 3 Proteins, 6 long SRA reads"; product "SLIT and NTRK like family member 6, transcript variant X1"; transcript_biotype "mRNA"; +chr1 Gnomon exon 1174312 1174909 . - . gene_id "SLITRK6"; transcript_id "XM_035292444.2"; db_xref "GeneID:100386103"; gene "SLITRK6"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 6 mRNAs, 43 ESTs, 3 Proteins, 6 long SRA reads"; product "SLIT and NTRK like family member 6, transcript variant X1"; transcript_biotype "mRNA"; exon_number "1"; +chr1 Gnomon exon 1168056 1171809 . - . gene_id "SLITRK6"; transcript_id "XM_035292444.2"; db_xref "GeneID:100386103"; gene "SLITRK6"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 6 mRNAs, 43 ESTs, 3 Proteins, 6 long SRA reads"; product "SLIT and NTRK like family member 6, transcript variant X1"; transcript_biotype "mRNA"; exon_number "2"; +chr1 Gnomon CDS 1169266 1171785 . - 0 gene_id "SLITRK6"; transcript_id "XM_035292444.2"; db_xref "GeneID:100386103"; gbkey "CDS"; gene "SLITRK6"; product "SLIT and NTRK-like protein 6"; exon_number "2"; protein_id "XP_035148335.1"; +chr1 Gnomon start_codon 1171783 1171785 . - 0 gene_id "SLITRK6"; transcript_id "XM_035292444.2"; db_xref "GeneID:100386103"; gbkey "CDS"; gene "SLITRK6"; product "SLIT and NTRK-like protein 6"; exon_number "2"; protein_id "XP_035148335.1"; +chr1 Gnomon stop_codon 1169263 1169265 . - 0 gene_id "SLITRK6"; transcript_id "XM_035292444.2"; db_xref "GeneID:100386103"; gbkey "CDS"; gene "SLITRK6"; product "SLIT and NTRK-like protein 6"; exon_number "2"; protein_id "XP_035148335.1"; +chr1 Gnomon gene 1569370 1570852 . + . gene_id "LOC100386454"; transcript_id ""; db_xref "GeneID:100386454"; description "probable ATP-dependent RNA helicase DDX6"; gbkey "Gene"; gene "LOC100386454"; gene_biotype "pseudogene"; pseudo "true"; +chr1 Gnomon gene 1914431 1933100 . - . gene_id "LOC118152108"; transcript_id ""; db_xref "GeneID:118152108"; description "uncharacterized LOC118152108"; gbkey "Gene"; gene "LOC118152108"; gene_biotype "lncRNA"; +chr1 Gnomon transcript 1914431 1933100 . - . gene_id "LOC118152108"; transcript_id "XR_004740463.2"; db_xref "GeneID:118152108"; gbkey "ncRNA"; gene "LOC118152108"; product "uncharacterized LOC118152108"; transcript_biotype "lnc_RNA"; +chr1 Gnomon exon 1932925 1933100 . - . gene_id "LOC118152108"; transcript_id "XR_004740463.2"; db_xref "GeneID:118152108"; gene "LOC118152108"; product "uncharacterized LOC118152108"; transcript_biotype "lnc_RNA"; exon_number "1"; +chr1 Gnomon exon 1925270 1925442 . - . gene_id "LOC118152108"; transcript_id "XR_004740463.2"; db_xref "GeneID:118152108"; gene "LOC118152108"; product "uncharacterized LOC118152108"; transcript_biotype "lnc_RNA"; exon_number "2"; +chr1 Gnomon exon 1914431 1915036 . - . gene_id "LOC118152108"; transcript_id "XR_004740463.2"; db_xref "GeneID:118152108"; gene "LOC118152108"; product "uncharacterized LOC118152108"; transcript_biotype "lnc_RNA"; exon_number "3"; +chr1 Gnomon gene 2607470 2607924 . - . gene_id "LOC100396820"; transcript_id ""; db_xref "GeneID:100396820"; description "protein lin-28 homolog A-like"; gbkey "Gene"; gene "LOC100396820"; gene_biotype "pseudogene"; pseudo "true"; +chr1 Gnomon gene 2672931 2765441 . - . gene_id "LOC103791423"; transcript_id ""; db_xref "GeneID:103791423"; description "uncharacterized LOC103791423"; gbkey "Gene"; gene "LOC103791423"; gene_biotype "lncRNA"; +chr1 Gnomon transcript 2672931 2765441 . - . gene_id "LOC103791423"; transcript_id "XR_008474938.1"; db_xref "GeneID:103791423"; gbkey "ncRNA"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; product "uncharacterized LOC103791423, transcript variant X2"; transcript_biotype "lnc_RNA"; +chr1 Gnomon exon 2763930 2765441 . - . gene_id "LOC103791423"; transcript_id "XR_008474938.1"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; product "uncharacterized LOC103791423, transcript variant X2"; transcript_biotype "lnc_RNA"; exon_number "1"; +chr1 Gnomon exon 2698627 2698764 . - . gene_id "LOC103791423"; transcript_id "XR_008474938.1"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; product "uncharacterized LOC103791423, transcript variant X2"; transcript_biotype "lnc_RNA"; exon_number "2"; +chr1 Gnomon exon 2695315 2695392 . - . gene_id "LOC103791423"; transcript_id "XR_008474938.1"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; product "uncharacterized LOC103791423, transcript variant X2"; transcript_biotype "lnc_RNA"; exon_number "3"; +chr1 Gnomon exon 2675830 2675918 . - . gene_id "LOC103791423"; transcript_id "XR_008474938.1"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; product "uncharacterized LOC103791423, transcript variant X2"; transcript_biotype "lnc_RNA"; exon_number "4"; +chr1 Gnomon exon 2672931 2674442 . - . gene_id "LOC103791423"; transcript_id "XR_008474938.1"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; product "uncharacterized LOC103791423, transcript variant X2"; transcript_biotype "lnc_RNA"; exon_number "5"; +chr1 Gnomon transcript 2672931 2702221 . - . gene_id "LOC103791423"; transcript_id "XR_001910283.3"; db_xref "GeneID:103791423"; gbkey "ncRNA"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 long SRA read"; product "uncharacterized LOC103791423, transcript variant X1"; transcript_biotype "lnc_RNA"; +chr1 Gnomon exon 2701785 2702221 . - . gene_id "LOC103791423"; transcript_id "XR_001910283.3"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 long SRA read"; product "uncharacterized LOC103791423, transcript variant X1"; transcript_biotype "lnc_RNA"; exon_number "1"; +chr1 Gnomon exon 2698627 2698764 . - . gene_id "LOC103791423"; transcript_id "XR_001910283.3"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 long SRA read"; product "uncharacterized LOC103791423, transcript variant X1"; transcript_biotype "lnc_RNA"; exon_number "2"; +chr1 Gnomon exon 2695315 2695392 . - . gene_id "LOC103791423"; transcript_id "XR_001910283.3"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 long SRA read"; product "uncharacterized LOC103791423, transcript variant X1"; transcript_biotype "lnc_RNA"; exon_number "3"; +chr1 Gnomon exon 2675830 2675918 . - . gene_id "LOC103791423"; transcript_id "XR_001910283.3"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 long SRA read"; product "uncharacterized LOC103791423, transcript variant X1"; transcript_biotype "lnc_RNA"; exon_number "4"; +chr1 Gnomon exon 2672931 2674442 . - . gene_id "LOC103791423"; transcript_id "XR_001910283.3"; db_xref "GeneID:103791423"; gene "LOC103791423"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 long SRA read"; product "uncharacterized LOC103791423, transcript variant X1"; transcript_biotype "lnc_RNA"; exon_number "5"; +chr1 Gnomon gene 2927508 2935023 . + . gene_id "SLITRK5"; transcript_id ""; db_xref "GeneID:100387170"; description "SLIT and NTRK like family member 5"; gbkey "Gene"; gene "SLITRK5"; gene_biotype "protein_coding"; +chr1 Gnomon transcript 2927508 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292447.2"; db_xref "GeneID:100387170"; gbkey "mRNA"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 ESTs, 7 Proteins, 43 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X2"; transcript_biotype "mRNA"; +chr1 Gnomon exon 2927508 2927648 . + . gene_id "SLITRK5"; transcript_id "XM_035292447.2"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 ESTs, 7 Proteins, 43 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X2"; transcript_biotype "mRNA"; exon_number "1"; +chr1 Gnomon exon 2929685 2929835 . + . gene_id "SLITRK5"; transcript_id "XM_035292447.2"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 ESTs, 7 Proteins, 43 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X2"; transcript_biotype "mRNA"; exon_number "2"; +chr1 Gnomon exon 2930809 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292447.2"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 ESTs, 7 Proteins, 43 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X2"; transcript_biotype "mRNA"; exon_number "3"; +chr1 Gnomon CDS 2930817 2933687 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292447.2"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "3"; protein_id "XP_035148338.2"; +chr1 Gnomon start_codon 2930817 2930819 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292447.2"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "3"; protein_id "XP_035148338.2"; +chr1 Gnomon stop_codon 2933688 2933690 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292447.2"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "3"; protein_id "XP_035148338.2"; +chr1 Gnomon transcript 2927510 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_017969928.4"; db_xref "GeneID:100387170"; gbkey "mRNA"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 11 ESTs, 7 Proteins, 45 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X1"; transcript_biotype "mRNA"; +chr1 Gnomon exon 2927510 2927648 . + . gene_id "SLITRK5"; transcript_id "XM_017969928.4"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 11 ESTs, 7 Proteins, 45 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X1"; transcript_biotype "mRNA"; exon_number "1"; +chr1 Gnomon exon 2930809 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_017969928.4"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 11 ESTs, 7 Proteins, 45 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X1"; transcript_biotype "mRNA"; exon_number "2"; +chr1 Gnomon CDS 2930817 2933687 . + 0 gene_id "SLITRK5"; transcript_id "XM_017969928.4"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_017825417.3"; +chr1 Gnomon start_codon 2930817 2930819 . + 0 gene_id "SLITRK5"; transcript_id "XM_017969928.4"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_017825417.3"; +chr1 Gnomon stop_codon 2933688 2933690 . + 0 gene_id "SLITRK5"; transcript_id "XM_017969928.4"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_017825417.3"; +chr1 Gnomon transcript 2928117 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292457.2"; db_xref "GeneID:100387170"; gbkey "mRNA"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 6 mRNAs, 1 EST, 7 Proteins, 45 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X5"; transcript_biotype "mRNA"; +chr1 Gnomon exon 2928117 2928250 . + . gene_id "SLITRK5"; transcript_id "XM_035292457.2"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 6 mRNAs, 1 EST, 7 Proteins, 45 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X5"; transcript_biotype "mRNA"; exon_number "1"; +chr1 Gnomon exon 2930809 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292457.2"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 6 mRNAs, 1 EST, 7 Proteins, 45 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X5"; transcript_biotype "mRNA"; exon_number "2"; +chr1 Gnomon CDS 2930817 2933687 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292457.2"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_035148348.2"; +chr1 Gnomon start_codon 2930817 2930819 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292457.2"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_035148348.2"; +chr1 Gnomon stop_codon 2933688 2933690 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292457.2"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_035148348.2"; +chr1 Gnomon transcript 2928123 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292451.2"; db_xref "GeneID:100387170"; gbkey "mRNA"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 Proteins, 42 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X3"; transcript_biotype "mRNA"; +chr1 Gnomon exon 2928123 2928250 . + . gene_id "SLITRK5"; transcript_id "XM_035292451.2"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 Proteins, 42 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X3"; transcript_biotype "mRNA"; exon_number "1"; +chr1 Gnomon exon 2929685 2929835 . + . gene_id "SLITRK5"; transcript_id "XM_035292451.2"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 Proteins, 42 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X3"; transcript_biotype "mRNA"; exon_number "2"; +chr1 Gnomon exon 2930809 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292451.2"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 7 Proteins, 42 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X3"; transcript_biotype "mRNA"; exon_number "3"; +chr1 Gnomon CDS 2930817 2933687 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292451.2"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "3"; protein_id "XP_035148342.2"; +chr1 Gnomon start_codon 2930817 2930819 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292451.2"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "3"; protein_id "XP_035148342.2"; +chr1 Gnomon stop_codon 2933688 2933690 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292451.2"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "3"; protein_id "XP_035148342.2"; +chr1 Gnomon transcript 2928288 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292465.2"; db_xref "GeneID:100387170"; gbkey "mRNA"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 1 EST, 7 Proteins, 47 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X6"; transcript_biotype "mRNA"; +chr1 Gnomon exon 2928288 2928982 . + . gene_id "SLITRK5"; transcript_id "XM_035292465.2"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 1 EST, 7 Proteins, 47 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X6"; transcript_biotype "mRNA"; exon_number "1"; +chr1 Gnomon exon 2930809 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_035292465.2"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 1 EST, 7 Proteins, 47 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X6"; transcript_biotype "mRNA"; exon_number "2"; +chr1 Gnomon CDS 2930817 2933687 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292465.2"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_035148356.2"; +chr1 Gnomon start_codon 2930817 2930819 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292465.2"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_035148356.2"; +chr1 Gnomon stop_codon 2933688 2933690 . + 0 gene_id "SLITRK5"; transcript_id "XM_035292465.2"; db_xref "GeneID:100387170"; gbkey "CDS"; gene "SLITRK5"; product "SLIT and NTRK-like protein 5"; exon_number "2"; protein_id "XP_035148356.2"; +chr1 Gnomon transcript 2928289 2935023 . + . gene_id "SLITRK5"; transcript_id "XM_054241421.1"; db_xref "GeneID:100387170"; gbkey "mRNA"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 1 EST, 7 Proteins, 42 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X4"; transcript_biotype "mRNA"; +chr1 Gnomon exon 2928289 2928982 . + . gene_id "SLITRK5"; transcript_id "XM_054241421.1"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 1 EST, 7 Proteins, 42 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X4"; transcript_biotype "mRNA"; exon_number "1"; +chr1 Gnomon exon 2929685 2929835 . + . gene_id "SLITRK5"; transcript_id "XM_054241421.1"; db_xref "GeneID:100387170"; gene "SLITRK5"; experiment "COORDINATES: polyA evidence [ECO:0006239]"; model_evidence "Supporting evidence includes similarity to: 1 mRNA, 1 EST, 7 Proteins, 42 long SRA reads"; product "SLIT and NTRK like family member 5, transcript variant X4"; transcript_biotype "mRNA"; exon_number "2"; diff --git a/3rd-party-tools/build-indices/test_gtf_comparison.py b/3rd-party-tools/build-indices/test_gtf_comparison.py new file mode 100644 index 00000000..11bfc49e --- /dev/null +++ b/3rd-party-tools/build-indices/test_gtf_comparison.py @@ -0,0 +1,161 @@ +import unittest +import os +import sys +import shutil +import subprocess +from compare_gtfs import compare_gtfs, parse_attributes + +class TestGTFModification(unittest.TestCase): + @classmethod + def setUpClass(cls): + """Set up test directories and reference data""" + # Create all required directories + for directory in ["test_data", "test_output", "test_output/comparison_files", "test_data/reference_outputs"]: + os.makedirs(directory, exist_ok=True) + + def setUp(self): + """Clean and recreate test output directories""" + if os.path.exists("test_output"): + shutil.rmtree("test_output") + os.makedirs("test_output") + os.makedirs("test_output/comparison_files") + + def run_modify_gtf(self, input_gtf, output_gtf, biotypes_file): + """Run modify_gtf.py with given parameters""" + result = subprocess.run([ + sys.executable, + 'modify_gtf.py', + '-i', input_gtf, + '-o', output_gtf, + '-b', biotypes_file + ], capture_output=True, text=True) + + if result.returncode != 0: + print(f"Error output: {result.stderr}") + print(f"Standard output: {result.stdout}") + + return result + + def compare_gtf_contents(self, file1, file2): + """Compare GTF files with normalized content""" + with open(file1) as f1, open(file2) as f2: + lines1 = set(self.normalize_gtf_line(line) for line in f1) + lines2 = set(self.normalize_gtf_line(line) for line in f2) + + only_in_1 = lines1 - lines2 + only_in_2 = lines2 - lines1 + + return only_in_1, only_in_2 + + def normalize_gtf_line(self, line): + """Normalize a GTF line for comparison""" + if line.startswith('#'): + return line.strip() + + fields = line.strip().split('\t') + if len(fields) != 9: + return line.strip() + + # Parse and sort attributes to ensure consistent ordering + attrs = parse_attributes(fields[8]) + sorted_attrs = '; '.join(f'{k} "{v}"' for k, v in sorted(attrs.items())) + fields[8] = sorted_attrs + + return '\t'.join(fields) + + def create_summary_file(self, test_input, new_output, comparison_results): + """Create a summary file of the test results""" + summary_path = "test_output/comparison_files/test_summary.txt" + with open(summary_path, 'w') as f: + f.write("GTF Modification Test Summary\n") + f.write("=========================\n\n") + f.write(f"Input GTF: {test_input}\n") + f.write(f"Output GTF: {new_output}\n\n") + f.write("Comparison Results:\n") + f.write("------------------\n") + f.write(comparison_results) + + def test_gtf_modification_against_reference(self): + """Test GTF modification against reference outputs""" + test_input = "test_data/test1.gtf" + reference_output = "test_data/reference_outputs/reference_modified.gtf" + new_output = "test_output/new_modified.gtf" + biotypes_file = "Biotypes.tsv" + + # Verify input files exist + self.assertTrue(os.path.exists(test_input), f"Test input GTF not found: {test_input}") + self.assertTrue(os.path.exists(biotypes_file), f"Biotypes file not found: {biotypes_file}") + + # Run current version of modify_gtf + result = self.run_modify_gtf(test_input, new_output, biotypes_file) + print("Script output:", result.stdout) + print("Script errors:", result.stderr) + + self.assertEqual(result.returncode, 0, + f"modify_gtf.py failed with error: {result.stderr}") + + # Verify the modified file was created + self.assertTrue(os.path.exists(new_output), + f"Modified GTF file was not created: {new_output}") + + # If reference output doesn't exist, create it + if not os.path.exists(reference_output): + print("Creating reference output for the first time...") + os.makedirs(os.path.dirname(reference_output), exist_ok=True) + shutil.copy(new_output, reference_output) + self.skipTest("Reference output created. Run tests again to compare.") + + # Now run the comparison + compare_gtfs( + test_input, + new_output, + "test_output/comparison_files/comparison" + ) + + # Verify comparison output files were created + expected_files = [ + "test_output/comparison_files/comparison_structural_diff.txt", + "test_output/comparison_files/comparison_gene_diff.txt", + "test_output/comparison_files/comparison_attribute_diff.txt" + ] + for file in expected_files: + self.assertTrue(os.path.exists(file), f"Expected output file not created: {file}") + + # Compare normalized contents + only_in_ref, only_in_new = self.compare_gtf_contents(reference_output, new_output) + + # Create detailed report regardless of differences + report = ["GTF Comparison Results:"] + if only_in_ref or only_in_new: + if only_in_ref: + report.append("\nLines only in reference GTF:") + for line in sorted(only_in_ref)[:5]: + report.append(f"REF: {line}") + + if only_in_new: + report.append("\nLines only in new GTF:") + for line in sorted(only_in_new)[:5]: + report.append(f"NEW: {line}") + else: + report.append("\nNo differences found between reference and new GTF files.") + + # Always write the report + with open("test_output/comparison_files/difference_report.txt", 'w') as f: + f.write("\n".join(report)) + + # Create summary file + self.create_summary_file(test_input, new_output, "\n".join(report)) + + # If there are differences, fail the test + if only_in_ref or only_in_new: + self.fail("\n".join(report[:10]) + + "\n...\nSee test_output/comparison_files/difference_report.txt for full details") + + @classmethod + def tearDownClass(cls): + """Preserve outputs for GitHub Actions""" + # Don't clean up - let GitHub Actions collect the artifacts + pass + +if __name__ == '__main__': + unittest.main() \ No newline at end of file