From f6b5c00a2e62a6a4ea43bc259612d815fc481d31 Mon Sep 17 00:00:00 2001 From: Tobias Rausch Date: Fri, 7 Jun 2024 15:53:56 +0200 Subject: [PATCH] select feature --- src/gtf.h | 4 ++-- src/padlock.h | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/gtf.h b/src/gtf.h index ff53663..10a6985 100644 --- a/src/gtf.h +++ b/src/gtf.h @@ -129,7 +129,7 @@ namespace dicey { return 0; } std::string ft = *tokIter++; - if (ft == "exon") { // Select exons + if (ft == c.feature) { // Select exons if (tokIter != tokens.end()) { int32_t start = boost::lexical_cast(*tokIter++); int32_t end = boost::lexical_cast(*tokIter++); @@ -150,7 +150,7 @@ namespace dicey { Tokenizer kvTokens(keyval, sepKeyVal); Tokenizer::iterator kvTokensIt = kvTokens.begin(); std::string key = *kvTokensIt++; - if (key == "gene_id") { // Select gene_id + if (key == c.idname) { // Select gene_id // Protein-coding exon? bool includeExon = false; for(Tokenizer::iterator arIter = attrTokens.begin(); arIter != attrTokens.end(); ++arIter) { diff --git a/src/padlock.h b/src/padlock.h index 68c7fb3..1ac5a06 100644 --- a/src/padlock.h +++ b/src/padlock.h @@ -54,7 +54,9 @@ namespace dicey std::string ucscDB; std::string anchor; std::string spacerleft; - std::string spacerright; + std::string spacerright; + std::string feature; + std::string idname; std::set geneset; std::vector chrname; std::map nchr; @@ -264,7 +266,7 @@ namespace dicey rcfile << meta.dump() << ','; rcfile << "\"data\":{"; rcfile << "\"columns\": ["; - rcfile << "\"Gene\", \"Symbol\", \"Code\", \"Position\", \"UCSC\", \"Strand\", \"ExonCoordinates\", \"ProbeSeq\", \"SpacerLeft\", \"AnchorSeq\", \"BarcodeSeq\", \"SpacerRight\", \"PadlockSeq\", \"Arm1TM\", \"Arm2TM\", \"BarcodeTM\", \"ProbeTM\", \"Arm1GC\", \"Arm2GC\", \"BarcodeGC\", \"ProbeGC\""; + rcfile << "\"Gene\", \"Symbol\", \"Code\", \"Position\", \"UCSC\", \"Strand\", \"FeatureCoordinates\", \"ProbeSeq\", \"SpacerLeft\", \"AnchorSeq\", \"BarcodeSeq\", \"SpacerRight\", \"PadlockSeq\", \"Arm1TM\", \"Arm2TM\", \"BarcodeTM\", \"ProbeTM\", \"Arm1GC\", \"Arm2GC\", \"BarcodeGC\", \"ProbeGC\""; rcfile << "]," << std::endl; rcfile << "\"rows\": [" << std::endl; } @@ -272,7 +274,7 @@ namespace dicey // Outfile std::cout << '[' << boost::posix_time::to_simple_string(boost::posix_time::second_clock::local_time()) << "] " << "Compute padlocks" << std::endl; std::ofstream ofile(c.outfile.string().c_str()); - ofile << "Gene\tSymbol\tCode\tPosition\tUCSC\tStrand\tExonCoordinates\tProbeSeq\tSpacerLeft\tAnchorSeq\tBarcodeSeq\tSpacerRight\tPadlockSeq\tArm1TM\tArm2TM\tBarcodeTM\tProbeTM\tArm1GC\tArm2GC\tBarcodeGC\tProbeGC" << std::endl; + ofile << "Gene\tSymbol\tCode\tPosition\tUCSC\tStrand\tFeatureCoordinates\tProbeSeq\tSpacerLeft\tAnchorSeq\tBarcodeSeq\tSpacerRight\tPadlockSeq\tArm1TM\tArm2TM\tBarcodeTM\tProbeTM\tArm1GC\tArm2GC\tBarcodeGC\tProbeGC" << std::endl; // Parse chromosomes faidx_t* fai = fai_load(c.genome.string().c_str()); uint32_t targetlen = 2 * c.armlen; @@ -536,6 +538,8 @@ namespace dicey ("barcodes,b", boost::program_options::value(&c.barcodes), "FASTA barcode file") ("distance,d", boost::program_options::value(&c.distance)->default_value(1), "neighborhood distance") ("armlen,m", boost::program_options::value(&c.armlen)->default_value(20), "probe arm length") + ("attribute,u", boost::program_options::value(&c.idname)->default_value("gene_id"), "gtf/gff3 attribute") + ("feature,f", boost::program_options::value(&c.feature)->default_value("exon"), "gtf/gff3 feature") ("probe,p", "apply distance to entire probe, i.e., only one arm needs to be unique") ("overlapping,v", "allow overlapping probes") ;