Skip to content

Commit

Permalink
docker and streamlit 1.30 update
Browse files Browse the repository at this point in the history
  • Loading branch information
pgarrett-scripps committed Jan 19, 2024
1 parent d1e80b0 commit 0603c13
Showing 5 changed files with 112 additions and 68 deletions.
13 changes: 13 additions & 0 deletions .github/workflows/docker-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: Publish Docker
on: [push]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Publish to Registry
uses: elgohr/Publish-Docker-Github-Action@v4
with:
name: pgarrettscripps/protein-cleaver-streamlit
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_TOKEN }}
11 changes: 11 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM python:3.11.7

WORKDIR /usr/src/app

COPY requirements.txt ./

RUN pip install --no-cache-dir -r requirements.txt

COPY . .

CMD streamlit run home.py --server.port 8501
120 changes: 69 additions & 51 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import uuid
from collections import Counter
import random

import pandas as pd
import streamlit as st
@@ -14,39 +13,39 @@

from constants import *
from wiki import *
from util import make_clickable, generate_peptide_df, coverage_string, create_colorbar, generate_app_url, \
fetch_sequence_from_uniprot
from util import generate_peptide_df, coverage_string, create_colorbar, generate_app_url, fetch_sequence_from_uniprot, \
make_clickable

st.set_page_config(page_title="proteincleaver", page_icon=":knife:", layout="wide")

# Parse query parameters
params = st.experimental_get_query_params()
query_peptide_sequence = params.get('protein_sequence', [DEFAULT_PROTEIN_SEQUENCE])[0]
query_proteases = params.get('proteases', [';'.join(DEFAULT_PROTEASES)])[0].split(',')
query_custom_regex = params.get('custom_regex', [''])[0]
query_missed_cleavages = int(params.get('missed_cleavages', [DEFAULT_MISSED_CLEAVAGES])[0])
query_mass_type = params.get('mass_type', [DEFAULT_MASS_TYPE])[0]
query_min_peptide_len = int(params.get('min_peptide_len', [DEFAULT_MIN_PEPTIDE_LEN])[0])
query_max_peptide_len = int(params.get('max_peptide_len', [DEFAULT_MAX_PEPTIDE_LEN])[0])
query_min_mass = float(params.get('min_mass', [DEFAULT_MIN_PEPTIDE_MASS])[0])
query_max_mass = float(params.get('max_mass', [DEFAULT_MAX_PEPTIDE_MASS])[0])
query_semi_enzymatic = params.get('semi_enzymatic', ['False'])[0].lower() == 'true'
query_infer_charge = params.get('infer_charge', ['True'])[0].lower() == 'true'
query_min_charge = int(params.get('min_charge', [DEFAULT_MIN_CHARGE])[0])
query_max_charge = int(params.get('max_charge', [DEFAULT_MAX_CHARGE])[0])
query_min_mz = float(params.get('min_mz', [DEFAULT_MIN_MZ])[0])
query_max_mz = float(params.get('max_mz', [DEFAULT_MAX_MZ])[0])
query_remove_non_proteotypic = params.get('remove_non_proteotypic', ['False'])[0].lower() == 'true'
query_n_term_static_mod = float(params.get('n_term_static_mod', [0.0])[0])
query_c_term_static_mod = float(params.get('c_term_static_mod', [0.0])[0])
query_num_static_mods = int(params.get('num_static_mods', [DEFAULT_STATIC_MODS])[0])
query_n_term_var_mod = float(params.get('n_term_var_mod', [0.0])[0])
query_c_term_var_mod = float(params.get('c_term_var_mod', [0.0])[0])
query_max_var_mods = int(params.get('max_var_mods', [DEFAULT_MAX_VAR_MODS])[0])
query_num_variable_mods = int(params.get('num_variable_mods', [DEFAULT_VAR_MODS])[0])
query_static_mods_str = params.get('static_mods', ['C:57.02146'])[0]
params = st.query_params
query_peptide_sequence = params.get('protein_sequence', DEFAULT_PROTEIN_SEQUENCE)
query_proteases = params.get('proteases', ';'.join(DEFAULT_PROTEASES)).split(',')
query_custom_regex = params.get('custom_regex', '')
query_missed_cleavages = int(params.get('missed_cleavages', DEFAULT_MISSED_CLEAVAGES))
query_mass_type = params.get('mass_type', DEFAULT_MASS_TYPE)
query_min_peptide_len = int(params.get('min_peptide_len', DEFAULT_MIN_PEPTIDE_LEN))
query_max_peptide_len = int(params.get('max_peptide_len', DEFAULT_MAX_PEPTIDE_LEN))
query_min_mass = float(params.get('min_mass', DEFAULT_MIN_PEPTIDE_MASS))
query_max_mass = float(params.get('max_mass', DEFAULT_MAX_PEPTIDE_MASS))
query_semi_enzymatic = params.get('semi_enzymatic', 'False').lower() == 'true'
query_infer_charge = params.get('infer_charge', 'False').lower() == 'true'
query_min_charge = int(params.get('min_charge', DEFAULT_MIN_CHARGE))
query_max_charge = int(params.get('max_charge', DEFAULT_MAX_CHARGE))
query_min_mz = float(params.get('min_mz', DEFAULT_MIN_MZ))
query_max_mz = float(params.get('max_mz', DEFAULT_MAX_MZ))
query_remove_non_proteotypic = params.get('remove_non_proteotypic', 'False').lower() == 'true'
query_n_term_static_mod = float(params.get('n_term_static_mod', 0.0))
query_c_term_static_mod = float(params.get('c_term_static_mod', 0.0))
query_num_static_mods = int(params.get('num_static_mods', DEFAULT_STATIC_MODS))
query_n_term_var_mod = float(params.get('n_term_var_mod', 0.0))
query_c_term_var_mod = float(params.get('c_term_var_mod', 0.0))
query_max_var_mods = int(params.get('max_var_mods', DEFAULT_MAX_VAR_MODS))
query_num_variable_mods = int(params.get('num_variable_mods', DEFAULT_VAR_MODS))
query_static_mods_str = params.get('static_mods', 'C:57.02146')
query_static_mods = [(s.split(':')[0], float(s.split(':')[1])) for s in query_static_mods_str.split(';') if s]
query_variable_mods_str = params.get('variable_mods', [''])[0]
query_variable_mods_str = params.get('variable_mods', '')
query_variable_mods = [(s.split(':')[0], float(s.split(':')[1])) for s in query_variable_mods_str.split(';') if s]

# CSS to inject contained in a string
@@ -124,7 +123,8 @@
custom_regex = c2.text_input(label='(Additional) Custom protease',
value=query_custom_regex,
help='A custom regular expression to use for digestion. Will be used along with '
'selected proteases')
'selected proteases. For example a regex expression for trypsin would look like: '
'([KR])')

c1, c2 = st.columns(2)
missed_cleavages = c1.number_input(label='Max missed cleavages',
@@ -424,23 +424,25 @@ def add_variable_modification(r):

with t1:
st.header('Digestion Metrics')
c1, c2, c3, c4 = st.columns(4)
c1, c2, c3 = st.columns(3)
c1.metric('Total Peptides', len(df))
c2.metric('Semi Peptides', len(df[df['Semi']]))
c3.metric('Enzymatic Peptides', len(df[~df['Semi']]))
c4.metric('Unique Peptides', len(df['Sequence'].unique()))

st.subheader('Peptides')
clickable = st.checkbox('Peptide Fragmenter Links', value=False)

if clickable:
df_clickable = df.copy(deep=True)
df_clickable['Sequence'] = [make_clickable(peptide, mass_type) for peptide in
df_clickable['Sequence']]
st.caption('Click on a sequence to see the fragment ions!')
st.write(df_clickable.to_html(escape=False), unsafe_allow_html=True, use_container_width=True)
else:
st.dataframe(df, use_container_width=True)
df['Link'] = [make_clickable(peptide, mass_type) for peptide in df['Sequence']]

st.dataframe(
df,
column_config={
"Link": st.column_config.LinkColumn(
display_text="View Ions"),
},
hide_index=True,
)



with t2:
st.header('Cleavage & Coverage')
@@ -479,10 +481,16 @@ def add_variable_modification(r):

with t3:
st.header('Motif Analysis')
motif_regex = st.text_input('Motifs Regex', '(K)')
c1, c2, c3 = st.columns(3)
motif_regex = c1.text_input('Motifs Regex', '(K)')

if motif_regex:
st.cache_data()
def get_motif_sites(motif_regex, stripped_protein_sequence):
motif_sites = list(reg.finditer(motif_regex, stripped_protein_sequence, overlapped=True))
return motif_sites

if motif_regex:
motif_sites = get_motif_sites(motif_regex, stripped_protein_sequence)

def count_motifs(row):
return sum([1 for site in motif_sites if row['Start'] <= site.start() < row['End']])
@@ -506,18 +514,28 @@ def count_motifs(row):
else:
motif_cov_array[i] = min(row[2], motif_cov_array[i])


min_moitifs = c2.number_input('Min Motifs', min_value=0, max_value=max(df['Motifs']), value=0)
max_motifs = c3.number_input('Max Motifs', min_value=0, max_value=max(df['Motifs']), value=max(df['Motifs']))
df = df[(df['Motifs'] >= min_moitifs) & (df['Motifs'] <= max_motifs)]

st.subheader('Peptides')
clickable2 = st.checkbox('Peptide Fragmenter Links', value=False, key=1)
if clickable2:
df_clickable = df.copy(deep=True)
df_clickable['Sequence'] = [make_clickable(peptide, mass_type) for peptide in df_clickable['Sequence']]
st.caption('Click on a sequence to see the fragment ions!')
st.write(df_clickable.to_html(escape=False), unsafe_allow_html=True, use_container_width=True)
else:
st.dataframe(df, use_container_width=True)

# Make the Link column the last column int he dataframe
df = df[[c for c in df if c not in ['Link']] + ['Link']]

st.dataframe(
df,
column_config={
"Link": st.column_config.LinkColumn(
display_text="View Ions"),
},
hide_index=True,
)

counter = Counter(df['Motifs'])


st.subheader('Motif Site Coverage', help='The color corresponds to the peptide with the fewest number of motif '
'matches (excluding 0 matches). Example: Lets assume that the first '
'site is covered by two peptides, the first with one match and the '
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
pandas==2.1.3
streamlit==1.28.2
peptacular==1.0.1
streamlit==1.30.0
peptacular==1.2.0
requests==2.31.0
matplotlib==3.7.2
numpy==1.25.1
32 changes: 17 additions & 15 deletions util.py
Original file line number Diff line number Diff line change
@@ -12,7 +12,7 @@
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib as mpl
from peptacular.term import add_n_term_modification, add_c_term_modification
from peptacular.term.modification import add_n_term_modification, add_c_term_modification

from constants import LINK

@@ -23,13 +23,6 @@ def fetch_sequence_from_uniprot(accession_number):
return response


def make_clickable(sequence, mass_type):
# target _blank to open new window
# extract clickable text to display for your link
link = LINK + f'?sequence={sequence}&mass_type={mass_type}'
return f'<a target="_blank" href="{link}">{sequence}</a>'


def generate_peptide_df(sequence: str, cleavage_sites: List, missed_cleavages: int, min_len: int,
max_len: int, semi_enzymatic: bool, static_mods: dict, min_mass: float, max_mass: float,
is_mono: bool, infer_charge: bool, min_charge: int, max_charge: int, min_mz: float,
@@ -60,7 +53,7 @@ def generate_peptide_df(sequence: str, cleavage_sites: List, missed_cleavages: i
df = df[(df['Len'] >= min_len) & (df['Len'] <= max_len)]

# Apply variable modifications to each sequence in the DataFrame
def apply_var_mods(sequence):
def apply_var_mods(sequence: str) -> str:

var_seqs = apply_variable_modifications(sequence, var_mods, max_var_mods)

@@ -92,15 +85,17 @@ def apply_var_mods(sequence):
# expand the sequence column into multiple rows (sequences are separated by ';')
df = df.assign(Sequence=df.Sequence.str.split(';')).explode('Sequence')

def apply_static_mods(sequence):
# Update var_mods dictionary based on conditions
def apply_static_mods(sequence: str) -> str:

sequence = apply_static_modifications(sequence, static_mods)

if n_term_static_mod:
var_mods.update({-1: n_term_static_mod})
sequence = add_n_term_modification(sequence, n_term_static_mod)

if c_term_static_mod:
var_mods.update({calculate_sequence_length(sequence): c_term_static_mod})
sequence = add_c_term_modification(sequence, c_term_static_mod)

# Apply variable modifications and join them with ';'
return apply_static_modifications(sequence, static_mods)
return sequence

df['Sequence'] = df['Sequence'].apply(apply_static_mods)

@@ -246,3 +241,10 @@ def generate_app_url(protein_id, protein_sequence, proteases, custom_regex, miss
}
query_string = '&'.join([f'{key}={value}' for key, value in params.items() if value is not None])
return f'{base_url}?{query_string}'


def make_clickable(sequence, mass_type):
# target _blank to open new window
# extract clickable text to display for your link
link = LINK + f'?sequence={sequence}&mass_type={mass_type}'
return link

0 comments on commit 0603c13

Please sign in to comment.