Skip to content

Commit

Permalink
Use functions to convert hits to positions.
Browse files Browse the repository at this point in the history
  • Loading branch information
haowenz authored and swiftgenomics committed Nov 13, 2022
1 parent 4ffcc6a commit a7bf985
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 49 deletions.
2 changes: 1 addition & 1 deletion src/chromap.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#include "temp_mapping.h"
#include "utils.h"

#define CHROMAP_VERSION "0.2.3-r430"
#define CHROMAP_VERSION "0.2.3-r431"

namespace chromap {

Expand Down
30 changes: 30 additions & 0 deletions src/hit.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#ifndef HIT_H_
#define HIT_H_

#include "strand.h"

namespace chromap {

inline static uint32_t GenerateSequenceIndex(uint64_t seed_hit) {
return (seed_hit >> 33);
}

inline static uint32_t GenerateSequencePosition(uint64_t seed_hit) {
return (seed_hit >> 1);
}

inline static Strand GenerateSequenceStrand(uint64_t seed_hit) {
if ((seed_hit & 1) == 0) {
return kPositive;
}
return kNegative;
}

inline static bool AreTwoHitsOnTheSameStrand(uint64_t seed_hit1,
uint64_t seed_hit2) {
return ((seed_hit1 & 1) == (seed_hit2 & 1));
}

} // namespace chromap

#endif // HIT_H_
54 changes: 31 additions & 23 deletions src/index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <algorithm>
#include <iostream>

#include "hit.h"
#include "minimizer_generator.h"

namespace chromap {
Expand Down Expand Up @@ -271,21 +272,22 @@ void Index::HeapMergeSeedHitLists(
}

uint64_t Index::GenerateCandidatePositionForSingleSeedHit(
uint64_t reference_seed_hit, uint32_t read_position,
const Strand &read_strand) const {
const uint64_t reference_id = reference_seed_hit >> 33;
const uint32_t reference_position = reference_seed_hit >> 1;
const Strand reference_strand =
(reference_seed_hit & 1) == 0 ? kPositive : kNegative;
uint64_t reference_seed_hit, uint64_t read_seed_hit) const {
const uint32_t reference_position =
GenerateSequencePosition(reference_seed_hit);

const uint32_t read_position = GenerateSequencePosition(read_seed_hit);

// For now we can't see the reference here. So let us don't validate
// this seed hit. Instead, we do it later some time when we check the
// candidates.
const uint32_t mapping_start_position =
read_strand == reference_strand
AreTwoHitsOnTheSameStrand(reference_seed_hit, read_seed_hit)
? reference_position - read_position
: reference_position + read_position - kmer_size_ + 1;

const uint64_t reference_id = GenerateSequenceIndex(reference_seed_hit);

const uint64_t candidate_position =
(reference_id << 32) | mapping_start_position;

Expand Down Expand Up @@ -331,19 +333,20 @@ int Index::CollectSeedHits(int max_seed_frequency,

const uint64_t reference_seed_hit = kh_value(lookup_table_, khash_iterator);

const uint32_t read_position = minimizers[mi].GetSequencePosition();
const Strand read_strand = minimizers[mi].GetSequenceStrand();
const uint64_t read_seed_hit = minimizers[mi].GetHit();
const uint32_t read_position = GenerateSequencePosition(read_seed_hit);
const Strand read_strand = GenerateSequenceStrand(read_seed_hit);

const bool is_reference_minimizer_single =
(kh_key(lookup_table_, khash_iterator) & 1) > 0;

if (is_reference_minimizer_single) {
const uint64_t candidate_position =
GenerateCandidatePositionForSingleSeedHit(reference_seed_hit,
read_position, read_strand);
read_seed_hit);

const Strand reference_strand =
(reference_seed_hit & 1) == 0 ? kPositive : kNegative;
GenerateSequenceStrand(reference_seed_hit);

if (read_strand == reference_strand) {
if (use_heap) {
Expand All @@ -370,12 +373,13 @@ int Index::CollectSeedHits(int max_seed_frequency,
occurrence_table_[occurrence_offset + oi];

const uint64_t candidate_position =
GenerateCandidatePositionForSingleSeedHit(
reference_seed_hit, read_position, read_strand);
GenerateCandidatePositionForSingleSeedHit(reference_seed_hit,
read_seed_hit);

const uint32_t reference_position = reference_seed_hit >> 1;
const uint32_t reference_position =
GenerateSequencePosition(reference_seed_hit);
const Strand reference_strand =
(reference_seed_hit & 1) == 0 ? kPositive : kNegative;
GenerateSequenceStrand(reference_seed_hit);

if (read_strand == reference_strand) {
if (use_heap) {
Expand Down Expand Up @@ -521,17 +525,20 @@ int Index::CollectSeedHitsFromRepetitiveReadWithMateInfo(
}

const uint64_t reference_seed_hit = kh_value(lookup_table_, khash_iterator);
const uint32_t read_position = minimizers[mi].GetSequencePosition();
const Strand read_strand = minimizers[mi].GetSequenceStrand();

const uint64_t read_seed_hit = minimizers[mi].GetHit();
const uint32_t read_position = GenerateSequencePosition(read_seed_hit);
const Strand read_strand = GenerateSequenceStrand(read_seed_hit);

const bool is_reference_minimizer_single =
(kh_key(lookup_table_, khash_iterator) & 1) > 0;

if (is_reference_minimizer_single) {
const uint64_t reference_id = reference_seed_hit >> 33;
const uint32_t reference_position = reference_seed_hit >> 1;
const uint64_t reference_id = GenerateSequenceIndex(reference_seed_hit);
const uint32_t reference_position =
GenerateSequencePosition(reference_seed_hit);
const Strand reference_strand =
(reference_seed_hit & 1) == 0 ? kPositive : kNegative;
GenerateSequenceStrand(reference_seed_hit);

if (read_strand == reference_strand) {
if (strand == kPositive) {
Expand Down Expand Up @@ -579,10 +586,11 @@ int Index::CollectSeedHitsFromRepetitiveReadWithMateInfo(
break;
}

const uint64_t reference_id = reference_seed_hit >> 33;
const uint32_t reference_position = reference_seed_hit >> 1;
const uint64_t reference_id = GenerateSequenceIndex(reference_seed_hit);
const uint32_t reference_position =
GenerateSequencePosition(reference_seed_hit);
const Strand reference_strand =
(reference_seed_hit & 1) == 0 ? kPositive : kNegative;
GenerateSequenceStrand(reference_seed_hit);

if (read_strand == reference_strand) {
if (strand == kPositive) {
Expand Down
3 changes: 1 addition & 2 deletions src/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,7 @@ class Index {
std::vector<uint64_t> &seed_hits) const;

uint64_t GenerateCandidatePositionForSingleSeedHit(
uint64_t reference_seed_hit, uint32_t read_position,
const Strand &read_strand) const;
uint64_t reference_seed_hit, uint64_t read_seed_hit) const;

protected:
int kmer_size_ = 0;
Expand Down
11 changes: 0 additions & 11 deletions src/minimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,6 @@ class Minimizer {

inline uint64_t GetHit() const { return hit_; }

inline uint32_t GetSequenceIndex() const { return (hit_ >> 33); }

inline uint32_t GetSequencePosition() const { return (hit_ >> 1); }

inline Strand GetSequenceStrand() const {
if ((hit_ & 1) == 0) {
return kPositive;
}
return kNegative;
}

inline bool operator<(const Minimizer &m) const {
if (hash_key_ < m.hash_key_) {
return true;
Expand Down
28 changes: 16 additions & 12 deletions src/mmcache.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef CHROMAP_CACHE_H_
#define CHROMAP_CACHE_H_

#include "hit.h"
#include "index.h"
#include "minimizer.h"

Expand Down Expand Up @@ -47,8 +48,9 @@ class mm_cache {
}
if (i >= size) {
for (i = 0; i < size - 1; ++i) {
if (cache.offsets[i] != ((int)minimizers[i + 1].GetSequencePosition()) -
((int)minimizers[i].GetSequencePosition()))
if (cache.offsets[i] !=
((int)GenerateSequencePosition(minimizers[i + 1].GetHit()) -
(int)GenerateSequencePosition(minimizers[i].GetHit())))
break;
}
if (i >= size - 1) direction = 1;
Expand All @@ -64,8 +66,8 @@ class mm_cache {
if (i >= size) {
for (i = 0, j = size - 1; i < size - 1; ++i, --j) {
if (cache.offsets[i] !=
((int)minimizers[j].GetSequencePosition()) -
((int)minimizers[j - 1].GetSequencePosition()))
((int)GenerateSequencePosition(minimizers[j].GetHit())) -
((int)GenerateSequencePosition(minimizers[j - 1].GetHit())))
break;
}

Expand Down Expand Up @@ -132,7 +134,7 @@ class mm_cache {
neg_candidates = cache[hidx].negative_candidates;
repetitive_seed_length = cache[hidx].repetitive_seed_length;
int size = pos_candidates.size();
int shift = (int)minimizers[0].GetSequencePosition();
int shift = (int)GenerateSequencePosition(minimizers[0].GetHit());
for (i = 0; i < size; ++i) {
uint64_t rid = pos_candidates[i].position >> 32;
int rpos = (int)pos_candidates[i].position;
Expand All @@ -146,9 +148,10 @@ class mm_cache {
int size = cache[hidx].negative_candidates.size();
// Start position of the last minimizer shoud equal the first minimizer's
// end position in rc "read".
int shift = read_len -
((int)minimizers[msize - 1].GetSequencePosition()) - 1 +
kmer_length - 1;
int shift =
read_len -
((int)GenerateSequencePosition(minimizers[msize - 1].GetHit())) - 1 +
kmer_length - 1;

pos_candidates = cache[hidx].negative_candidates;
for (i = 0; i < size; ++i) {
Expand Down Expand Up @@ -240,7 +243,7 @@ if (cache[hidx].finger_print_cnt_sum <= 5)
return;
}
int size = pos_candidates.size();
int shift = (int)minimizers[0].GetSequencePosition();
int shift = (int)GenerateSequencePosition(minimizers[0].GetHit());
// Do not cache if it is too near the start.
for (i = 0; i < size; ++i)
if ((int)pos_candidates[i].position < kmer_length + shift) {
Expand All @@ -252,7 +255,8 @@ if (cache[hidx].finger_print_cnt_sum <= 5)
size = neg_candidates.size();
for (i = 0; i < size; ++i)
if ((int)neg_candidates[i].position -
((int)minimizers[msize - 1].GetSequencePosition()) <
((int)GenerateSequencePosition(
minimizers[msize - 1].GetHit())) <
kmer_length + shift) {
cache[hidx].offsets.clear();
cache[hidx].strands.clear();
Expand All @@ -267,8 +271,8 @@ if (cache[hidx].finger_print_cnt_sum <= 5)
}
for (i = 0; i < msize - 1; ++i) {
cache[hidx].offsets[i] =
((int)minimizers[i + 1].GetSequencePosition()) -
((int)minimizers[i].GetSequencePosition());
((int)GenerateSequencePosition(minimizers[i + 1].GetHit())) -
((int)GenerateSequencePosition(minimizers[i].GetHit()));
}
std::vector<Candidate>().swap(cache[hidx].positive_candidates);
std::vector<Candidate>().swap(cache[hidx].negative_candidates);
Expand Down

0 comments on commit a7bf985

Please sign in to comment.