Skip to content

Commit

Permalink
New feature: tertiary interaction
Browse files Browse the repository at this point in the history
  • Loading branch information
khb7840 committed Apr 8, 2024
1 parent b75d633 commit ce7b23e
Show file tree
Hide file tree
Showing 7 changed files with 287 additions and 29 deletions.
30 changes: 13 additions & 17 deletions NOTE.md
Original file line number Diff line number Diff line change
@@ -1,28 +1,34 @@
# Development note

## TODOs 240404

IMPORTANT: BENCHMARK
- [ ] Setup module & script
- [ ] Build an index of PDB database
- [ ] TODO: Rebuild one with nbin_dist = 16, nbin_angle = 4
- [ ] Build an index of Swissprot
- [ ] Check if the query from other lab works or not
- [ ] Read MASTER, PDB realtime motif, pyscomotif on how they benchmarked
- [ ] TODO: check SCOP database
- [ ] Compare with pyscomotif
- [ ] TODO: IMPORTANT: Download and rerun pyscomotif

QUERYING
- [x] DONE: Comprehensive filtering parameters: node coverage, edge coverage, exact match, total match, grid count, check all grid is nearby
- [ ] Allow different amino acid pairs
- [ ] TODO: Policies: Any, Exact, Same property
- [ ] Collect test query info / commands in QUERY.md

DEV
- [ ] Print node residue,
- [ ] CLI: polish grid related parameters
- [ ] CLI::index: Delete unncessary parameters
- [x] DONE: CLI::query_pdb: filter how??
- [ ] CLI: polish logging
- [x] DONE: CLI::query_pdb: Extract functions and measure time
- [ ] CLI::query_pdb: Log the original query
- [ ] CLI::query_pdb: Output option
- [x] DONE: CLI::query_pdb: Print only the base name of the pdb file
- [x] DONE: CLI::index: Save only the base name of the pdb file (optional)
- [ ] CLI::query_pdb: Multiple queries by input file

INDEXING
- [x] DONE: Add an option to save indices with different schemes
- [x] 1. ID only
- [x] 2. ID + grid
- [ ] 3. ID + position

GEOMETRY
Expand All @@ -31,16 +37,6 @@ GEOMETRY

## TODOs

IMPORTANT: BENCHMARK
- [ ] Setup module & script
- [ ] Build an index of PDB database
- [ ] TODO: Rebuild one with nbin_dist = 16, nbin_angle = 4
- [ ] Build an index of Swissprot
- [ ] Check if the query from other lab works or not
- [ ] Read MASTER, PDB realtime motif, pyscomotif on how they benchmarked
- [ ] TODO: check SCOP database
- [ ] Compare with pyscomotif
- [ ] TODO: IMPORTANT: Download and rerun pyscomotif

DEV
- [ ] TODO: Split and extract ranking module
Expand Down
15 changes: 7 additions & 8 deletions src/cli/workflows/build_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,18 @@ Options:
-p, --pdbs <PDB_DIR> Directory containing PDB files
-y, --type <HASH_TYPE> Hash type to use (pdb, trrosetta, default)
-i, --index <INDEX_PATH> Path to save the index table
-m, --mode <MODE> Mode to index (default=0, 0: index only id, 1: index id and grid, 2: index id and position)
-m, --mode <MODE> Mode to index (default=id, id: index only id, grid: index id and grid, pos: index id and position)
-t, --threads <THREADS> Number of threads to use
-d, --distance <NBIN_DIST> Number of distance bins (default 0, zero means default)
-a, --angle <NBIN_ANGLE> Number of angle bins (default 0, zero means default)
-g, --grid <GRID_WIDTH> Grid width (default 30.0)
-c, --chunk <CHUNK_SIZE> Number of PDB files to index at once (default, max=65535)
-r, --recursive Index PDB files in subdirectories
-n, --max-residue <MAX_RES> Maximum number of residues in a PDB file (default=3000)
--idtype <ID_TYPE> ID type to use (pdb, uniprot, afdb, relpath, abspath, default=relpath)
-v, --verbose Print verbose messages
-h, --help Print this help menu
";
// TODO: ADD MONITOR_MEMORY AS A PARAMETER
// TODO: ADD NBIN_ANGLE, NBIN_DIST AS PARAMETERS

pub fn build_index(env: AppArgs) {
match env {
Expand Down Expand Up @@ -204,12 +203,12 @@ mod tests {
#[test]
fn test_build_index() {
let pdb_dir = "data/serine_peptidases_filtered";
let hash_type = "pdbtr";
let index_path = "data/serine_peptidases_pdbtr";
let index_mode = "grid";
let num_threads = 4;
let hash_type = "3di";
let index_path = "data/serine_peptidases_3di";
let index_mode = "id";
let num_threads = 1;
let num_bin_dist = 16;
let num_bin_angle = 4;
let num_bin_angle = 8;
let chunk_size = 30;
let max_residue = 3000;
let grid_width = 40.0;
Expand Down
8 changes: 6 additions & 2 deletions src/cli/workflows/query_pdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,11 @@ pub fn query_pdb(env: AppArgs) {
}
// If length is less than 4, fill with 0
while match_count_filter.len() < 6 {
match_count_filter.push(0);
if match_count_filter.len() < 4 {
match_count_filter.push(0);
} else {
match_count_filter.push(u32::MAX as usize);
}
}

// Load index table
Expand Down Expand Up @@ -220,7 +224,7 @@ mod tests {
let pdb_path = String::from("data/serine_peptidases_filtered/4cha.pdb");
let query_string = String::from("B57,B102,C195");
let threads = 4;
let index_path = Some(String::from("data/serine_peptidases_pdbtr"));
let index_path = Some(String::from("data/serine_peptidases_3di"));
let exact_match = false;
let retrieve = false;
let dist_threshold = Some(String::from("0.5,1.0"));
Expand Down
45 changes: 45 additions & 0 deletions src/controller/feature.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@

use std::hash::Hash;

use crate::geometry::util::map_aa_to_u8;
use crate::structure::coordinate::calc_angle_point;
use crate::structure::core::CompactStructure;
use crate::geometry::core::{GeometricHash, HashType};
use crate::structure::grid::{get_grid_index_vector_from_compact, merge_id_with_grid, nearby};
Expand Down Expand Up @@ -110,6 +113,46 @@ pub fn get_single_feature(i: usize, j: usize, structure: &CompactStructure, hash
None
}
}
HashType::TertiaryInteraction => {
if i == 0 || j == 0 || i == structure.num_residues - 1 || j == structure.num_residues - 1 {
return None;
}
let ca_1i = structure.get_ca(i-1);
let ca_i = structure.get_ca(i);
let ca_i1 = structure.get_ca(i+1);
let ca_1j = structure.get_ca(j-1);
let ca_j = structure.get_ca(j);
let ca_j1 = structure.get_ca(j+1);

if ca_1i.is_none() || ca_i.is_none() || ca_i1.is_none() || ca_1j.is_none() || ca_j.is_none() || ca_j1.is_none() {
return None;
} else {
let ca_1i = ca_1i.unwrap();
let ca_i = ca_i.unwrap();
let ca_i1 = ca_i1.unwrap();
let ca_1j = ca_1j.unwrap();
let ca_j = ca_j.unwrap();
let ca_j1 = ca_j1.unwrap();
let u1 = ca_i.sub(&ca_1i).normalize();
let u2 = ca_i1.sub(&ca_i).normalize();
let u3 = ca_j.sub(&ca_1j).normalize();
let u4 = ca_j1.sub(&ca_j).normalize();
let u5 = ca_j.sub(&ca_i).normalize();
let phi_12 = u1.dot(&u2).acos();
let phi_34 = u3.dot(&u4).acos();
let phi_15 = u1.dot(&u5).acos();
let phi_35 = u3.dot(&u5).acos();
let phi_14 = u1.dot(&u4).acos();
let phi_23 = u2.dot(&u3).acos();
let phi_13 = u1.dot(&u3).acos();
let ca_dist = ca_i.distance(&ca_j);
let seq_dist = j as f32 - i as f32;
let feature = vec![
phi_12, phi_34, phi_15, phi_35, phi_14, phi_23, phi_13, ca_dist, seq_dist
];
Some(feature)
}
}
// append new hash type here
_ => {
None
Expand Down Expand Up @@ -222,6 +265,7 @@ impl HashType {
HashType::FoldDiscoDefault | HashType::Default32bit |
HashType::PointPairFeature => Some(vec![2]),
HashType::TrRosetta => Some(vec![0]),
HashType::TertiaryInteraction => Some(vec![7]),
_ => None
}
}
Expand All @@ -233,6 +277,7 @@ impl HashType {
HashType::FoldDiscoDefault | HashType::Default32bit => Some(vec![3, 4, 5, 6, 7]),
HashType::PointPairFeature => Some(vec![3, 4, 5]),
HashType::PDBTrRosetta => Some(vec![4, 5, 6]),
HashType::TertiaryInteraction => Some(vec![0, 1, 2, 3, 4, 5, 6]),
_ => None
}
}
Expand Down
39 changes: 39 additions & 0 deletions src/geometry/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pub enum HashType {
Default32bit,
PointPairFeature,
PDBTrRosetta,
TertiaryInteraction,
// append new hash type here
Other,
}
Expand All @@ -30,6 +31,7 @@ impl HashType {
5 => HashType::Default32bit,
6 => HashType::PointPairFeature,
7 => HashType::PDBTrRosetta,
8 => HashType::TertiaryInteraction,
// append new hash type here
_ => HashType::Other,
}
Expand All @@ -44,6 +46,7 @@ impl HashType {
"5" | "Default32bit" | "default32" => HashType::Default32bit,
"6" | "PointPairFeature" | "ppf" => HashType::PointPairFeature,
"7" | "PDBTrRosetta" | "pdbtr" => HashType::PDBTrRosetta,
"8" | "TertiaryInteraction" | "tertiary" | "3di" => HashType::TertiaryInteraction,
// append new hash type here
_ => HashType::Other,
}
Expand All @@ -58,6 +61,7 @@ impl HashType {
HashType::Default32bit => "Default32bit".to_string(),
HashType::PointPairFeature => "PointPairFeature".to_string(),
HashType::PDBTrRosetta => "PDBTrRosetta".to_string(),
HashType::TertiaryInteraction => "TertiaryInteraction".to_string(),
// append new hash type here
HashType::Other => "Other".to_string(),
}
Expand All @@ -72,6 +76,7 @@ impl HashType {
HashType::Default32bit => 32usize,
HashType::PointPairFeature => 32usize,
HashType::PDBTrRosetta => 32usize,
HashType::TertiaryInteraction => 32usize,
// append new hash type here
HashType::Other => 0usize,
}
Expand All @@ -97,6 +102,7 @@ impl HashType {
"Default32bit" => HashType::Default32bit,
"PointPairFeature" => HashType::PointPairFeature,
"PDBTrRosetta" => HashType::PDBTrRosetta,
"TertiaryInteraction" => HashType::TertiaryInteraction,
// append new hash type here
_ => HashType::Other,
};
Expand All @@ -120,6 +126,7 @@ mod tests {
HashType::Default32bit,
HashType::PointPairFeature,
HashType::PDBTrRosetta,
HashType::TertiaryInteraction,
// append new hash type here
];
for hash_type in hash_type_vec {
Expand All @@ -140,6 +147,7 @@ pub enum GeometricHash {
Default32bit(super::default_32bit::HashValue),
PointPairFeature(super::ppf::HashValue),
PDBTrRosetta(super::pdb_tr::HashValue),
TertiaryInteraction(super::tertiary_interaction::HashValue),
// append new hash type here
}

Expand Down Expand Up @@ -172,6 +180,9 @@ impl GeometricHash {
HashType::PDBTrRosetta => GeometricHash::PDBTrRosetta(
super::pdb_tr::HashValue::perfect_hash_default(feature)
),
HashType::TertiaryInteraction => GeometricHash::TertiaryInteraction(
super::tertiary_interaction::HashValue::perfect_hash_default(feature)
),
// append new hash type here
_ => panic!("Invalid hash type"),
}
Expand Down Expand Up @@ -221,6 +232,11 @@ impl GeometricHash {
feature, nbin_dist, nbin_angle
)
),
HashType::TertiaryInteraction => GeometricHash::TertiaryInteraction(
super::tertiary_interaction::HashValue::perfect_hash(
feature, nbin_dist, nbin_angle
)
),
// append new hash type here
_ => panic!("Invalid hash type"),
}
Expand All @@ -237,6 +253,7 @@ impl GeometricHash {
GeometricHash::Default32bit(hash) => hash.reverse_hash_default(),
GeometricHash::PointPairFeature(hash) => hash.reverse_hash_default(),
GeometricHash::PDBTrRosetta(hash) => hash.reverse_hash_default(),
GeometricHash::TertiaryInteraction(hash) => hash.reverse_hash_default(),
// append new hash type here
_ => panic!("Invalid hash type"),
}
Expand All @@ -252,6 +269,7 @@ impl GeometricHash {
GeometricHash::Default32bit(hash) => hash.reverse_hash(nbin_dist, nbin_angle),
GeometricHash::PointPairFeature(hash) => hash.reverse_hash(nbin_dist, nbin_angle),
GeometricHash::PDBTrRosetta(hash) => hash.reverse_hash(nbin_dist, nbin_angle),
GeometricHash::TertiaryInteraction(hash) => hash.reverse_hash(nbin_dist, nbin_angle),
// append new hash type here
_ => panic!("Invalid hash type"),
}
Expand All @@ -267,6 +285,7 @@ impl GeometricHash {
GeometricHash::Default32bit(hash) => hash.hash_type(),
GeometricHash::PointPairFeature(hash) => hash.hash_type(),
GeometricHash::PDBTrRosetta(hash) => hash.hash_type(),
GeometricHash::TertiaryInteraction(hash) => hash.hash_type(),
// append new hash type here
_ => panic!("Invalid hash type"),
}
Expand All @@ -292,6 +311,9 @@ impl GeometricHash {
HashType::PDBTrRosetta => GeometricHash::PDBTrRosetta(
super::pdb_tr::HashValue::from_u32(hashvalue)
),
HashType::TertiaryInteraction => GeometricHash::TertiaryInteraction(
super::tertiary_interaction::HashValue::from_u32(hashvalue)
),
// append new hash type here if it is encoded as u32
_ => panic!("Invalid hash type"),
}
Expand Down Expand Up @@ -323,6 +345,9 @@ impl GeometricHash {
HashType::PDBTrRosetta => GeometricHash::PDBTrRosetta(
super::pdb_tr::HashValue::from_u64(hashvalue)
),
HashType::TertiaryInteraction => GeometricHash::TertiaryInteraction(
super::tertiary_interaction::HashValue::from_u64(hashvalue)
),
// append new hash type here
_ => panic!("Invalid hash type"),
}
Expand All @@ -336,6 +361,7 @@ impl GeometricHash {
GeometricHash::PointPairFeature(hash) => hash.as_u32(),
GeometricHash::Default32bit(hash) => hash.as_u32(),
GeometricHash::PDBTrRosetta(hash) => hash.as_u32(),
GeometricHash::TertiaryInteraction(hash) => hash.as_u32(),
// append new hash type here
_ => panic!("Invalid hash type"),
}
Expand All @@ -350,6 +376,7 @@ impl GeometricHash {
GeometricHash::Default32bit(hash) => hash.as_u64(),
GeometricHash::PointPairFeature(hash) => hash.as_u64(),
GeometricHash::PDBTrRosetta(hash) => hash.as_u64(),
GeometricHash::TertiaryInteraction(hash) => hash.as_u64(),
// append new hash type here
_ => panic!("Invalid hash type"),
}
Expand Down Expand Up @@ -403,6 +430,12 @@ impl GeometricHash {
_ => panic!("Invalid hash type"),
}
}
pub fn downcast_tertiary_interaction(&self) -> super::tertiary_interaction::HashValue {
match self {
GeometricHash::TertiaryInteraction(hash) => hash.clone(),
_ => panic!("Invalid hash type"),
}
}
// append the downcast method for new hash type here

}
Expand Down Expand Up @@ -434,6 +467,9 @@ impl fmt::Debug for GeometricHash {
GeometricHash::PDBTrRosetta(hash) => {
write!(f, "PDBTrRosetta({:?})", hash)
},
GeometricHash::TertiaryInteraction(hash) => {
write!(f, "TertiaryInteraction({:?})", hash)
},
// append new hash type here
_ => panic!("Invalid hash type"),
}
Expand Down Expand Up @@ -467,6 +503,9 @@ impl fmt::Display for GeometricHash {
GeometricHash::PDBTrRosetta(hash) => {
write!(f, "PDBTrRosetta\t{:?}", hash)
},
GeometricHash::TertiaryInteraction(hash) => {
write!(f, "TertiaryInteraction\t{:?}", hash)
},
// append new hash type here
_ => panic!("Invalid hash type"),
}
Expand Down
3 changes: 2 additions & 1 deletion src/geometry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ pub mod pdb_halfmatch;
pub mod ppf;
pub mod trrosetta;
pub mod util;
pub mod pdb_tr;
pub mod pdb_tr;
pub mod tertiary_interaction;
Loading

0 comments on commit ce7b23e

Please sign in to comment.