-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Uses the AMPSphere API to query the databases online
- Loading branch information
Showing
13 changed files
with
173 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import requests | ||
import pandas as pd | ||
|
||
|
||
def get_ampsphere_exact_match(seq, query_name): | ||
'''Get exact match from AMPSphere API''' | ||
URL = f'https://ampsphere-api.big-data-biology.org/v1/search/sequence-match?query={seq}' | ||
response = requests.get(URL) | ||
data = response.json() | ||
return pd.DataFrame.from_dict({query_name : data}, orient='index') | ||
|
||
def get_ampsphere_mmseqs_match(seq, query_name): | ||
'''Get MMSeqs2 match from AMPSphere API''' | ||
query = f'>{query_name}\n{seq}' | ||
URL = f'https://ampsphere-api.big-data-biology.org/v1/search/mmseqs?query={query}' | ||
response = requests.get(URL) | ||
data = response.json() | ||
return pd.DataFrame.from_dict(data)\ | ||
.drop("alignment_strings", axis=1)\ | ||
.set_index('query_identifier') | ||
|
||
def get_ampsphere_hmmer_match(seq, query_name): | ||
'''Get HMMER match from AMPSphere API''' | ||
query = f'>{query_name}\n{seq}' | ||
URL = f'https://ampsphere-api.big-data-biology.org/v1/search/hmmer?query={query}' | ||
response = requests.get(URL) | ||
data = response.json() | ||
if not data: | ||
return pd.DataFrame() | ||
return pd.DataFrame.from_dict(data)\ | ||
.set_index('query_name') | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from macrel import ampsphere | ||
|
||
seq1 = 'KRVKSFFKGYMRAIEINAALMYGYRPK' | ||
seq2 = 'GRVIGKQGRIAKAIRVVMRAAAVRVDEKVLVEID' | ||
def test_exact(): | ||
r = ampsphere.get_ampsphere_exact_match(seq1, 'seq1') | ||
assert r.index[0] == 'seq1' | ||
assert r.iloc[0]['result'] == 'AMP10.000_002' | ||
r = ampsphere.get_ampsphere_exact_match(seq1 + 'HELO', 'seq2') | ||
assert r.index[0] == 'seq2' | ||
assert r.iloc[0]['result'] is None | ||
|
||
def test_mmseqs(): | ||
r = ampsphere.get_ampsphere_mmseqs_match(seq1, 'seq1') | ||
assert r.index[0] == 'seq1' | ||
assert r.iloc[0]['target_identifier'] == 'AMP10.000_002' | ||
|
||
def test_hmmer(): | ||
r = ampsphere.get_ampsphere_hmmer_match(seq2, 'seq2') | ||
assert r.index[0] == 'seq2' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -e | ||
|
||
macrel query-ampsphere \ | ||
--fasta pep8.faa \ | ||
--output out | ||
gunzip out/macrel.out.ampsphere_exact.tsv.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# AMPSphere query results (mode: exact) | ||
query result | ||
Query0 KKVKSIFKKALAMMGENEVKAWGIGIK AMP10.000_000 | ||
Query1 FFGIGQQEMTLEEIGDKFGLTRERVRQIKEKAIRRLRQSNRSKLLKSYLG AMP10.000_001 | ||
Query2 KRVKSFFKGYMRAIEINAALMYGYRPK AMP10.000_002 | ||
Query3 GRVIGKQGRIAKAIRVVMRAAAVRVDEKVLVEID AMP10.000_003 | ||
Query4 KLRKILKSMFNNYCKTFKDVPPGNMFR AMP10.000_004 | ||
Query5 AIFYVIKHISRKHFVSLQRYKIKEKM AMP10.000_005 | ||
Query6 LVRIISMVIAGVIIVYLVRWIDNFFSRYRK AMP10.000_006 | ||
Query7 HELOQFLTHIIFLLNLLKTLINHFS No_Hit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
>Query0 | ||
KKVKSIFKKALAMMGENEVKAWGIGIK | ||
>Query1 | ||
FFGIGQQEMTLEEIGDKFGLTRERVRQIKEKAIRRLRQSNRSKLLKSYLG | ||
>Query2 | ||
KRVKSFFKGYMRAIEINAALMYGYRPK | ||
>Query3 | ||
GRVIGKQGRIAKAIRVVMRAAAVRVDEKVLVEID | ||
>Query4 | ||
KLRKILKSMFNNYCKTFKDVPPGNMFR | ||
>Query5 | ||
AIFYVIKHISRKHFVSLQRYKIKEKM | ||
>Query6 | ||
LVRIISMVIAGVIIVYLVRWIDNFFSRYRK | ||
>Query7 | ||
HELOQFLTHIIFLLNLLKTLINHFS |