diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..f6cb8ad --- /dev/null +++ b/.clang-format @@ -0,0 +1 @@ +BasedOnStyle: Google diff --git a/.gitignore b/.gitignore index 5e35646..fd3b9ca 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ cmake-build-debug test-example build +.vscode diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..8ce581f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,19 @@ +language: cpp + +addons: + apt: + sources: + - boost-latest + - ubuntu-toolchain-r-test + packages: + - g++-4.9 + - libboost1.55-all-dev + +script: + - export CC="gcc-4.9" + - export CXX="g++-4.9" + - mkdir build + - cd build + - cmake .. + - make -j4 + - make test \ No newline at end of file diff --git a/common/parameters.cc b/common/parameters.cc index 0634a86..8e580ed 100644 --- a/common/parameters.cc +++ b/common/parameters.cc @@ -36,12 +36,12 @@ #include #include -using boost::lexical_cast; -using boost::format; using boost::assign::list_of; -using std::string; +using boost::format; +using boost::lexical_cast; using std::cerr; using std::endl; +using std::string; using std::vector; namespace po = boost::program_options; @@ -119,7 +119,7 @@ bool Parameters::Load(int argc, char **argv) { if (argc == 1) { std::cerr << usage << std::endl; - return false; + throw std::invalid_argument(""); } po::variables_map arg_map; diff --git a/common/parameters.h b/common/parameters.h index ac4f4b6..3484693 100644 --- a/common/parameters.h +++ b/common/parameters.h @@ -31,25 +31,29 @@ #include "genotyping/short_repeat_genotyper.h" class Outputs { -public: + public: Outputs(const std::string vcf_path, const std::string json_path, const std::string log_path); std::ostream &vcf() { return vcf_; } std::ostream &json() { return json_; } std::ostream &log() { return log_; } -private: + private: std::ofstream vcf_; std::ofstream json_; std::ofstream log_; }; class Parameters { -public: + public: const double kSmallestPossibleDepth = 5.0; Parameters() - : region_extension_len_(1000), min_wp_(0.90), min_baseq_(20), - min_anchor_mapq_(60), skip_unaligned_(false), depth_(0.0), + : region_extension_len_(1000), + min_wp_(0.90), + min_baseq_(20), + min_anchor_mapq_(60), + skip_unaligned_(false), + depth_(0.0), sex_(Sex::kFemale) {} bool Load(int argc, char **argv); std::string bam_path() const { return bam_path_; } @@ -73,7 +77,7 @@ class Parameters { bool depth_is_set() const { return depth_ >= kSmallestPossibleDepth; } Sex sex() const { return sex_; } -private: + private: std::string bam_path_; std::string genome_path_; // Maximum distance from a region to search for relevant reads. diff --git a/include/version.h b/include/version.h index 3c34d6b..d750fe8 100644 --- a/include/version.h +++ b/include/version.h @@ -24,4 +24,4 @@ #include -const std::string kProgramVersion = "Expansion Hunter v2.5.3"; +const std::string kProgramVersion = "Expansion Hunter v2.5.4"; diff --git a/src/expansion_hunter.cc b/src/expansion_hunter.cc index 3a528d2..169ddbe 100644 --- a/src/expansion_hunter.cc +++ b/src/expansion_hunter.cc @@ -51,14 +51,14 @@ #include "purity/purity.h" #include "rep_align/rep_align.h" -using std::unordered_set; -using std::map; -using std::vector; -using std::string; +using std::array; using std::cerr; using std::endl; +using std::map; using std::pair; -using std::array; +using std::string; +using std::unordered_set; +using std::vector; // Returns the length of the first read in a BAM file. size_t CalcReadLen(const string &bam_path) { @@ -189,14 +189,10 @@ void CacheAligns(BamFile *bam_file, const RepeatSpec &repeat_spec, &align_pairs); } - // Filling-in missing mates by jumping around the BAM. + // Filling-in missing mates by jumping around the BAM/CRAM file. cerr << TimeStamp() << ",\t[Filling in mates]" << endl; - if ((*bam_file).format() == BamFile::kBamFile) { - FillinMates(*bam_file, align_pairs, repeat_spec.units_shifts, 0.9, - ontarget_frag_names); - } else { - cerr << TimeStamp() << ",\t[Skipping filling in mates]" << endl; - } + FillinMates(*bam_file, align_pairs, repeat_spec.units_shifts, 0.9, + ontarget_frag_names); cerr << TimeStamp() << ",\t[Done filling in mates]" << endl; } @@ -385,14 +381,12 @@ void EstimateRepeatSizes(const Parameters ¶meters, haplotype_candidates.push_back( RepeatAllele(read_group.size, read_group.num_supporting_reads, ReadType::kSpanning)); - } else if (read_group.read_type == ReadType::kInrepeat) { flanking_size_counts[num_units_in_read] += read_group.num_supporting_reads; haplotype_candidates.push_back( RepeatAllele(num_units_in_read, read_group.num_supporting_reads, ReadType::kInrepeat)); - } else if (read_group.read_type == ReadType::kFlanking) { haplotype_candidates.push_back( RepeatAllele(read_group.size, read_group.num_supporting_reads, @@ -400,7 +394,6 @@ void EstimateRepeatSizes(const Parameters ¶meters, for (const auto &align : read_group.rep_aligns) { flanking_size_counts[align.size] += 1; } - } else { throw std::logic_error("Do not know how to deal with " + kReadTypeToString.at(read_group.read_type) + @@ -427,9 +420,10 @@ void EstimateRepeatSizes(const Parameters ¶meters, cerr << "]" << endl; if (haplotype_candidates.empty()) { - cerr << TimeStamp() - << ",\t[Skipping this region because no informative reads were found]" - << endl; + cerr + << TimeStamp() + << ",\t[Skipping this region because no informative reads were found]" + << endl; continue; } @@ -468,12 +462,13 @@ int main(int argc, char *argv[]) { cerr << kProgramVersion << endl; if (!parameters.Load(argc, argv)) { - return 1; + return 0; } - cerr << TimeStamp() << ",[Starting Logging for " << parameters.sample_name() << "]" << endl; + cerr << TimeStamp() << ",[Starting Logging for " << parameters.sample_name() + << "]" << endl; - Outputs outputs(parameters.vcf_path(), parameters.json_path(), + Outputs outputs(parameters.vcf_path(), parameters.json_path(), parameters.log_path()); map repeat_specs; @@ -498,7 +493,8 @@ int main(int argc, char *argv[]) { parameters.set_depth(depth); } - cerr << TimeStamp() << ",[Read length: " << parameters.read_len() << "]" << endl; + cerr << TimeStamp() << ",[Read length: " << parameters.read_len() << "]" + << endl; cerr << TimeStamp() << ",[Depth: " << parameters.depth() << "]" << endl; if (parameters.depth() < parameters.kSmallestPossibleDepth) { diff --git a/src/vcf_output.cc b/src/vcf_output.cc index 4821184..be14929 100644 --- a/src/vcf_output.cc +++ b/src/vcf_output.cc @@ -31,18 +31,17 @@ #include -using std::vector; -using std::ostream; -using std::string; -using std::map; using std::cerr; using std::endl; +using std::map; +using std::ostream; +using std::string; +using std::vector; void WriteVcf(const Parameters ¶meters, const std::map &repeat_specs, const std::vector &sample_findings, std::ostream &out) { - std::stringstream vcf_header, vcf_body; // clang-format off vcf_header << @@ -55,11 +54,11 @@ void WriteVcf(const Parameters ¶meters, "##INFO=\n" "##FORMAT=\n" "##FORMAT=\n" - "##FORMAT=\n" - "##FORMAT=\n" - "##FORMAT=\n" - "##FORMAT=\n" - "##FORMAT=\n"; + "##FORMAT=\n" + "##FORMAT=\n" + "##FORMAT=\n" + "##FORMAT=\n" + "##FORMAT=\n"; // clang-format on std::set alt_sizes; @@ -81,7 +80,6 @@ void WriteVcf(const Parameters ¶meters, sample_ad_fl, sample_ad_ir; for (const RepeatAllele allele : region_findings.genotype) { - const int allele_len = allele.size_ * unit_len; const string source = kReadTypeToString.at(allele.type_); @@ -141,17 +139,17 @@ void WriteVcf(const Parameters ¶meters, } } const Region ®ion = repeat_spec.target_region; - const string info = "SVTYPE=STR;END=" + std::to_string(region.end()) + - ";REF=" + std::to_string(reference_size) + ";RL=" + - std::to_string(reference_size * unit_len) + ";RU=" + - unit_encoding + ";REPID=" + region_id; + const string info = "END=" + std::to_string(region.end()) + + ";REF=" + std::to_string(reference_size) + + ";RL=" + std::to_string(reference_size * unit_len) + + ";RU=" + unit_encoding + ";REPID=" + region_id; if (alt.empty()) { alt = "."; } vcf_body << region.chrom() << "\t" << region.start() - 1 << "\t.\t" << ref_field << "\t" << alt << "\t.\tPASS\t" << info - << "\tGT:SO:CN:CI:AD_SP:AD_FL:AD_IR\t" << sample_gt << ":" + << "\tGT:SO:REPCN:REPCI:ADSP:ADFL:ADIR\t" << sample_gt << ":" << sample_so << ":" << sample_cn << ":" << sample_ci << ":" << sample_ad_sp << ":" << sample_ad_fl << ":" << sample_ad_ir << endl;