Skip to content

Commit

Permalink
Implement QCIPA scoring (fix for #18 )
Browse files Browse the repository at this point in the history
  • Loading branch information
dancsi committed Dec 23, 2017
1 parent 7c20a20 commit ed82f7c
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 27 deletions.
5 changes: 5 additions & 0 deletions src/fastscore/fastscore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "scoring/ScoringHelper.h"
#include "scoring/ScoringEnginePotapov.h"
#include "scoring/ScoringEngineBCIPA.h"
#include "scoring/ScoringEngineQCIPA.h"

#include "common/PeptideSet.h"
#include "common/SpecialMatrices.h"
Expand Down Expand Up @@ -94,6 +95,10 @@ int main(int argc, char **argv) {
ScoringHelper<ScoringEngineBCIPA> sc;
score_pairs(ps, sc, alignment, truncate, orientation, im, om, am);
}
else if (score_func == ScoringOptions::ScoreFunc::qcipa) {
ScoringHelper<ScoringEngineQCIPA> sc;
score_pairs(ps, sc, alignment, truncate, orientation, im, om, am);
}

return 0;
}
13 changes: 8 additions & 5 deletions src/fastscore/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ struct Options {
puts(R"(
USAGE: fastscore INPUT [OPTIONS...]
Available options:
--basename=PATH specify output base name
--max-heptad-displacement=NUM try shifting the peptides left and right by up to this many heptads
--alignment=LIST try these alignments
--truncate={0, 1} truncate the chains when aligning them, false by default
--basename=PATH specify output base name
--max-heptad-displacement=NUM try shifting the peptides left and right by up to this many heptads
--alignment=LIST try these alignments
--truncate={0, 1} truncate the chains when aligning them, false by default
--orientation={parallel, antiparallel, both}
--score-func={potapov, bcipa} choose scoring function
--score-func={potapov, bcipa, qcipa} choose scoring function
)");
exit(1);
}
Expand Down Expand Up @@ -103,6 +103,9 @@ Available options:
else if (score_func_str == "bcipa") {
score_func = ScoringOptions::ScoreFunc::bcipa;
}
else if (score_func_str == "qcipa") {
score_func = ScoringOptions::ScoreFunc::qcipa;
}
else {
print_usage_and_exit();
}
Expand Down
42 changes: 22 additions & 20 deletions src/scoring/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
set(SCORING_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/ScoringEngineBCIPA.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ScoringEnginePotapov.cpp
)

set(SCORING_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/ScoringEngineBCIPA.h
${CMAKE_CURRENT_SOURCE_DIR}/ScoringEnginePotapov.h
${CMAKE_CURRENT_SOURCE_DIR}/ScoringHelper.h
)

set(SCORING_ALL ${SCORING_SOURCES} ${SCORING_HEADERS})

configure_file(${CMAKE_CURRENT_SOURCE_DIR}/scores.dat ${CMAKE_CURRENT_BINARY_DIR}/../scores.dat COPYONLY)

add_library(scoring ${SCORING_ALL})

target_include_directories(scoring PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/../common/)
target_link_libraries(scoring common ${EXTERNAL_LIBRARIES})

set(SCORING_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/ScoringEngineBCIPA.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ScoringEngineQCIPA.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ScoringEnginePotapov.cpp
)

set(SCORING_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/ScoringEngineBCIPA.h
${CMAKE_CURRENT_SOURCE_DIR}/ScoringEngineQCIPA.h
${CMAKE_CURRENT_SOURCE_DIR}/ScoringEnginePotapov.h
${CMAKE_CURRENT_SOURCE_DIR}/ScoringHelper.h
)

set(SCORING_ALL ${SCORING_SOURCES} ${SCORING_HEADERS})

configure_file(${CMAKE_CURRENT_SOURCE_DIR}/scores.dat ${CMAKE_CURRENT_BINARY_DIR}/../scores.dat COPYONLY)

add_library(scoring ${SCORING_ALL})

target_include_directories(scoring PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/../common/)
target_link_libraries(scoring common ${EXTERNAL_LIBRARIES})

install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/scores.dat DESTINATION .)
2 changes: 1 addition & 1 deletion src/scoring/ScoringEngineBCIPA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ ScoringEngineBCIPA::ScoringEngineBCIPA() {
}

template<typename T, size_t N>
size_t array_length(T(&)[N])
inline size_t array_length(T(&)[N])
{
return N;
}
Expand Down
106 changes: 106 additions & 0 deletions src/scoring/ScoringEngineQCIPA.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#include "ScoringEngineQCIPA.h"

#include <algorithm>
#include <cctype>

ScoringEngineQCIPA::ScoringEngineQCIPA() {
}

template<typename T, size_t N>
inline size_t array_length(T(&)[N])
{
return N;
}

float ScoringEngineQCIPA::score(string_view chain1, string_view chain2) /*
* assumes sequences are aligned, starting with f position
* assumes uppercase characters (case sensitive!)
* all non-aa characters (including whitespace) assumed to take up space
in alignment, but contribute nothing to the score
*/
{
auto n1 = chain1.length();
auto n2 = chain2.length();

auto n = std::min(n1, n2);

int n_pairs = 0;
float hp_sum = 0, a_sum = 0, ge_sum;

for (int i = 0; i < n; i++)
//fgabcde register
//0123456
{
int reg = i % 7; //register index (0 = f)

if (!isupper(chain1[i]) || !isupper(chain2[i])) continue;

/* helical propensity contribution */
if ((hp_score(chain1[i])) != 0 && (hp_score(chain2[i]) != 0)) //only count paired residues ('-':'X' pairs contribute nothing)

This comment has been minimized.

Copy link
@ajasja

ajasja Dec 23, 2017

Collaborator

@dancsi Actually the helical propensity is just calculated from one chain, so this is unnecessary. (if the truncate is true, this never comes up, but if it is not, it should just sum all the weights for all the amino acid residues (ignoring -) in length.

This comment has been minimized.

Copy link
@dancsi

dancsi Dec 23, 2017

Author Owner

I think this is also taken from the reference implementation for bCIPA. This is the relevant segment

sub bCIPA {
...
    my $hp = bCIPAhp($seq1) + bCIPAhp($seq2);
    my $tm = 81.3256*$hp + -10.5716*$core + -4.7771*$el + -29.1320 - 273;
    return $tm;
}

sub bCIPAhp {
    my ($seq) = @_;

    my $hp = 0;
    my $count = 0;

    for( my $i = 0; $i < @$seq; $i++ )
    {
	next if( $$seq[$i] eq "-" );

	$hp += williams($$seq[$i]);
	$count++;
    }

    return $hp/$count;
}

This comment has been minimized.

Copy link
@dancsi

dancsi Dec 23, 2017

Author Owner

The part that is unclear is why are they dividing $hp by $count, as if they were computing some average helical propensity

This comment has been minimized.

Copy link
@ajasja

ajasja Dec 23, 2017

Collaborator

@dancsi It should just calculate for each chain separately. if ((hp_score(chain1[i])) != 0 && (hp_score(chain2[i]) != 0)) is the problem. the X in -:X should infact contribute to the propensity (in chain 2).

In the reference implementation you see that the propensity is calculated separately for each chain using bCIPAhp.

This comment has been minimized.

Copy link
@dancsi

dancsi Dec 23, 2017

Author Owner

Oh, that's true

This comment has been minimized.

Copy link
@ajasja

ajasja Dec 23, 2017

Collaborator

@dancsi

as if they were computing some average helical propensity

In fact it's exactly that:)

This comment has been minimized.

Copy link
@andy-brodnik

andy-brodnik via email Dec 24, 2017

Collaborator

This comment has been minimized.

Copy link
@dancsi

dancsi via email Dec 24, 2017

Author Owner
{
hp_sum += hp_score(chain1[i]) + hp_score(chain2[i]);
n_pairs++;
}

/* core contribution */
if (reg == 2) //sites a, a'
{
char c1 = chain1[i];
char c2 = chain2[i];
if (c1 > c2) std::swap(c1, c2);
if (c1 == 'I') {
if (c2 == 'I') {
a_sum += -1.75;
}
else if (c2 == 'N') {
a_sum += 11.78;
}
}
else if (c1 == 'N' && c2 == 'N') {
a_sum += -5.24;
}
}

/* electrostatic contribution */
else if (reg == 1) //g sites - interact with next e on opposite chain
{
if (i + 5 < n2)
{
if (!isupper(chain2[i + 5])) continue;
ge_sum += ge_score(chain1[i], chain2[i + 5]);
}
if (i + 5 < n1)
{
if (!isupper(chain1[i + 5])) continue;
ge_sum += ge_score(chain2[i], chain1[i + 5]);
}
}

This comment has been minimized.

Copy link
@ajasja

ajasja Dec 23, 2017

Collaborator

@dancsi The reg==6 and i-5 on the other chain is missing:)

On second thought they are not missing, since ge_sum += ge_score(chain2[i], chain1[i + 5]); covers the other part.

This comment has been minimized.

Copy link
@dancsi

dancsi Dec 23, 2017

Author Owner

I am not so sure, since for each g (reg==1), I am checking the e on the opposite side. As you can see, the condition 1+5<n is duplicated for both chains. This code is taken from the bCIPA reference implementation. A I missing something?

This comment has been minimized.

Copy link
@ajasja

ajasja Dec 23, 2017

Collaborator

No, you're right, it's fine.

}

return -(4.16 * hp_sum / n_pairs + a_sum + ge_sum + 30.18);
}

float ScoringEngineQCIPA::hp_score(const char c) {
if (!isupper(c)) return 0;
const float scores[] = { 1.41f, 0.66f, 0.99f, 1.59f, 1.16f, 0.43f, 1.05f, 1.09f, 1.23f, 1.34f, 1.30f, 0.76f, 0.34f, 1.27f, 1.21f, 0.57f, 0.76f, 0.98f, 1.02f, 0.74f };
return scores[detail::residue_code(c)];
}

float ScoringEngineQCIPA::ge_score(char c1, char c2)
{
float ret = 0;
if (c1 > c2) std::swap(c1, c2);
if (c1 == 'E') {
if (c2 == 'E') {
ret += -11.3;
}
else if (c2 == 'K') {
ret += -0.97;
}
}
else if (c1 == 'K' && c2 == 'K') {
ret += -76.22;
}

return ret;
}
16 changes: 16 additions & 0 deletions src/scoring/ScoringEngineQCIPA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "ScoringHelper.h"

struct ScoringEngineQCIPA {
using string_view = std::string_view;

using weights_t = std::array<float, 20 * 20>;
weights_t c_scores, es_scores;

ScoringEngineQCIPA();

float score(string_view chain1, string_view chain2);
float hp_score(const char c);
float ge_score(char c1, char c2);
};
2 changes: 1 addition & 1 deletion src/scoring/ScoringHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ namespace ScoringOptions {
{}
};

enum class ScoreFunc { potapov, bcipa };
enum class ScoreFunc { potapov, bcipa, qcipa };
}

template<typename ScoringEngine>
Expand Down

0 comments on commit ed82f7c

Please sign in to comment.