Skip to content

Commit

Permalink
Create
Browse files Browse the repository at this point in the history
  • Loading branch information
fcyu committed Nov 2, 2015
0 parents commit 0d16926
Show file tree
Hide file tree
Showing 24 changed files with 2,081 additions and 0 deletions.
17 changes: 17 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Auto detect text files and perform LF normalization
* text=auto

# Custom for Visual Studio
*.cs diff=csharp

# Standard to msysgit
*.doc diff=astextplain
*.DOC diff=astextplain
*.docx diff=astextplain
*.DOCX diff=astextplain
*.dot diff=astextplain
*.DOT diff=astextplain
*.pdf diff=astextplain
*.PDF diff=astextplain
*.rtf diff=astextplain
*.RTF diff=astextplain
18 changes: 18 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
*.class


# Package Files #
*.jar
*.war
*.ear

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*

*.class
nbactions.xml

target

.idea
*.iml
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# ExhaustiveCL
A fast and exhaustive cross-linked peptides identification tool.

## Usage
Usage: java -Xmx32g -jar /path/to/ExhaustiveCL.jar <parameter_file> <data_file> <result_dir>
<parameter_file>: parameter file. Can be download along with ExhaustiveCL.
<data_file>: spectra data file (mzXML)
<result_dir>: result files' directory
example: java -Xmx32g -jar ExhaustiveCL.jar parameter.def data.mzxml result_dir/

## Demo data


## Cite
1 change: 1 addition & 0 deletions add_jramp
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mvn install:install-file -Dfile=D:\Dropbox\proteomics\API\jramp\stax\software\jrap_StAX_v5.2.jar -DgroupId=jramp -DartifactId=jramp -Dversion=5.2 -Dpackaging=jar -DgeneratePom=true
62 changes: 62 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>proteomics</groupId>
<artifactId>ECL</artifactId>
<version>20151029</version>
<packaging>jar</packaging>

<name>ECL</name>
<url>http://maven.apache.org</url>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>

<dependencies>
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.8.9.1</version>
</dependency>
<dependency>
<groupId>jramp</groupId>
<artifactId>jramp</artifactId>
<version>5.2</version>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>proteomics.SearchMain</mainClass>
</manifest>
</archive>
<appendAssemblyId>
false
</appendAssemblyId>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
252 changes: 252 additions & 0 deletions src/main/java/proteomics/Index/BuildIndex.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
package proteomics.Index;

import java.util.*;
import java.sql.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import theoSeq.*;
import proteomics.Validation.*;

public class BuildIndex {

private static final String TARGET = "1";
private static final String DECOY = "0";

private double hatom_mass = 0;
private double oatom_mass = 0;
private float max_precursor_mass = 0;
private int min_chain_length = 0;
private int max_chain_length = 0;
private String db_path = "";
private MassTool mass_tool_obj = null;
private Map<String, String> pro_seq_map = null;
final private Map<String, Float> seq_mass_map = new HashMap<>();
final private Map<String, Set<String>> seq_pro_map = new HashMap<>();
final private Set<String> for_check_duplitate = new HashSet<>();
final private Map<String, Integer> seq_term_type_map = new HashMap<>();
final private Map<String, Float> decoy_seq_mass_map = new HashMap<>();
final private Map<String, Set<String>> decoy_seq_pro_map = new HashMap<>();
final private Map<String, Integer> decoy_seq_term_type_map = new HashMap<>();
private double nterm_mass = 0;

/////////////////////////////////public methods//////////////////////////////////////////////////////////////////
public BuildIndex(Map<String, String> parameter_map, Map<String, Double> fix_mod_map) throws Exception {
// initialize parameters
max_precursor_mass = Float.valueOf(parameter_map.get("max_precursor_mass"));
min_chain_length = Integer.valueOf(parameter_map.get("min_chain_length"));
max_chain_length = Integer.valueOf(parameter_map.get("max_chain_length"));
String decoy_db = parameter_map.get("decoy_db");
db_path = parameter_map.get("db");
int missed_cleavage = Integer.valueOf(parameter_map.get("missed_cleavage"));
String cl_aa = parameter_map.get("cl_aa");
int nterm_linkable = Integer.valueOf(parameter_map.get("nterm_linkable"));

// read protein database
DbTool db_tool_obj = new DbTool(db_path);
pro_seq_map = db_tool_obj.returnSeqMap();

// define a new MassTool object
mass_tool_obj = new MassTool(missed_cleavage, fix_mod_map, nterm_linkable);
Map<String, Double> mass_table = mass_tool_obj.returnMassTable();
hatom_mass = mass_table.get("Hatom");
oatom_mass = mass_table.get("Oatom");
nterm_mass = mass_table.get("Z");
}

/////////////////////////////////////public methods////////////////////////////////////////////////////////////////////
public void buildCLDb(Map<String, String> parameter_map) throws Exception {
buildPepChainMap();
buildDecoyPepChainMap();

// Create index SQL database
Connection index_conn = null;
Statement index_statement = null;
try {
Class.forName("org.sqlite.JDBC").newInstance();
index_conn = DriverManager.getConnection("jdbc:sqlite:" + ":memory:");
index_statement = index_conn.createStatement();
index_statement.executeUpdate("create table peptide_chain (chain_index integer primary key asc, mass real not null, peptide_sequence text not null, protein_id text not null, type text not null, term_type integer not null);");
index_statement.executeUpdate("create index mass on peptide_chain (mass);");
index_statement.executeUpdate("create index seq on peptide_chain (peptide_sequence);");
} catch (SQLException ex) {
System.err.println("SQLException: " + ex.getMessage());
System.exit(1);
}

// Build peptide_chain table
// It contains target and decoy sequence
PreparedStatement index_prepared_statement = index_conn.prepareStatement("insert into peptide_chain (mass, peptide_sequence, protein_id, type, term_type) values (?, ?, ?, ?, ?);");
try {
index_conn.setAutoCommit(false);
Set<String> seq_set = seq_pro_map.keySet();
for (String seq : seq_set) {
if ((seq.length() < min_chain_length) || (seq.length() > max_chain_length)) {
continue;
}

index_prepared_statement.setFloat(1, seq_mass_map.get(seq));
index_prepared_statement.setString(2, seq);
Set<String> pro_id_set = seq_pro_map.get(seq);
String temp = pro_id_set.toString().replace(", ", "&");
String temp_2 = temp.substring(1, temp.length() - 1);
index_prepared_statement.setString(3, temp_2);
index_prepared_statement.setString(4, TARGET);
index_prepared_statement.setInt(5, seq_term_type_map.get(seq));
index_prepared_statement.executeUpdate();
}
index_conn.commit();
} catch (SQLException ex) {
System.err.println("SQLException: " + ex.getMessage());
System.exit(1);
} finally {
index_prepared_statement.close();
index_conn.setAutoCommit(true);
}

index_prepared_statement = index_conn.prepareStatement("insert into peptide_chain (mass, peptide_sequence, protein_id, type, term_type) values (?, ?, ?, ?, ?);");
try {
index_conn.setAutoCommit(false);
Set<String> seq_set = decoy_seq_pro_map.keySet();
for (String seq : seq_set) {
if ((seq.length() < min_chain_length) || (seq.length() > max_chain_length)) {
continue;
}

index_prepared_statement.setFloat(1, decoy_seq_mass_map.get(seq));
index_prepared_statement.setString(2, seq);
Set<String> pro_id_set = decoy_seq_pro_map.get(seq);
String temp = pro_id_set.toString().replace(", ", "&");
String temp_2 = temp.substring(1, temp.length() - 1);
index_prepared_statement.setString(3, temp_2);
index_prepared_statement.setString(4, DECOY);
index_prepared_statement.setInt(5, decoy_seq_term_type_map.get(seq));
index_prepared_statement.executeUpdate();
}
index_conn.commit();
} catch (SQLException ex) {
System.err.println("SQLException: " + ex.getMessage());
System.exit(1);
} finally {
index_prepared_statement.close();
index_conn.setAutoCommit(true);
}

// Backup SQL database to disk
try {
index_statement.executeUpdate("backup to " + db_path + ".db");
} catch (SQLException ex) {
System.err.println("SQLException: " + ex.getMessage());
System.exit(1);
}

index_statement.close();
index_conn.close();
}

public MassTool returnMassToolObj() {
return mass_tool_obj;
}

//////////////////////////////////////////private methods////////////////////////////////////////////////////////
private void buildPepChainMap() {
Set<String> pro_id_set = pro_seq_map.keySet();
for (String pro_id : pro_id_set) {
String pro_seq = pro_seq_map.get(pro_id);
Set<String> seq_set = mass_tool_obj.buildChainSet(pro_seq);
for (String seq : seq_set) {
float mass_temp = (float) mass_tool_obj.calResidueMass(seq) + (float) nterm_mass + 2 * (float) hatom_mass + (float) oatom_mass; // calMass just calculate the residue mass, so we should add a H2O
if (mass_temp <= max_precursor_mass) {
seq_mass_map.put(seq, mass_temp);

// Add the sequence to the check set for decoy duplicate check
String template_seq = seq.replace("L", "I"); // "L" and "I" have the same mass.
template_seq = template_seq.replace("K", "Q"); // "K" and "Q" have the close mass.
for_check_duplitate.add(template_seq);

if (pro_seq.startsWith(seq)) {
seq_term_type_map.put(seq, 1);
} else if (pro_seq.endsWith(seq)) {
seq_term_type_map.put(seq, 2);
} else {
seq_term_type_map.put(seq, 0);
}

if (seq_pro_map.containsKey(seq)) {
Set<String> pro_list = seq_pro_map.get(seq);
pro_list.add(pro_id);
seq_pro_map.put(seq, pro_list);
} else {
Set<String> pro_list = new HashSet<>();
pro_list.add(pro_id);
seq_pro_map.put(seq, pro_list);
}
}
}
}
}

private void buildDecoyPepChainMap() throws Exception {
Set<String> seq_set = seq_pro_map.keySet();
for (String original_seq : seq_set) {
String decoy_seq = "";
if (original_seq.endsWith("K") || original_seq.endsWith("R")) {
String temp = reverseSeq(original_seq.substring(0, original_seq.length() - 1));
decoy_seq = temp + original_seq.charAt(original_seq.length() - 1);
} else {
decoy_seq = reverseSeq(original_seq);
}

// Check duplicate
String new_decoy_seq = decoy_seq.replace("L", "I");
new_decoy_seq = new_decoy_seq.replace("K", "Q");
if (for_check_duplitate.contains(new_decoy_seq)) {
// the decoy sequence is the same as the target sequence
continue;
}

float decoy_mass_temp = seq_mass_map.get(original_seq);
decoy_seq_mass_map.put(decoy_seq, decoy_mass_temp);
String pro_id = seq_pro_map.get(original_seq).iterator().next();
String decoy_pro_id = "DECOY_" + pro_id;
String pro_seq = pro_seq_map.get(pro_id);
if (pro_seq.startsWith(original_seq)) {
decoy_seq_term_type_map.put(decoy_seq, 1);
} else if (pro_seq.endsWith(original_seq)) {
decoy_seq_term_type_map.put(decoy_seq, 2);
} else {
decoy_seq_term_type_map.put(decoy_seq, 0);
}

Set<String> decoy_pro_set = new HashSet<>();
decoy_pro_set.add(decoy_pro_id);
decoy_seq_pro_map.put(decoy_seq, decoy_pro_set);
}
}

private String reverseSeq(String sequence) {
Pattern fix_pattern = Pattern.compile("[K]");
String decoy_str = "";
Matcher fix_matcher = fix_pattern.matcher(sequence);
int sequence_length = sequence.length();
int idx_1 = 0;
int idx_2;
while (idx_1 < sequence_length) {
String fix_aa;
if (fix_matcher.find()) {
idx_2 = fix_matcher.start();
fix_aa = sequence.substring(idx_2, idx_2 + 1);
} else {
idx_2 = sequence_length;
fix_aa = "";
}
String part = sequence.substring(idx_1, idx_2);

// Reverse part sequence
decoy_str += new StringBuilder(part).reverse().toString() + fix_aa;
idx_1 = idx_2 + 1;
}

return decoy_str;
}
}
Loading

0 comments on commit 0d16926

Please sign in to comment.