From 4865289f4a12fedf89d016b6084e674de7481936 Mon Sep 17 00:00:00 2001 From: alexholehouse Date: Tue, 11 May 2021 01:15:43 -0500 Subject: [PATCH 1/3] Removed hidden .DS_Store file and updated gitignore so they'll be ignored in the future --- .gitignore | 1 + scripts/.DS_Store | Bin 6148 -> 0 bytes 2 files changed, 1 insertion(+) delete mode 100644 scripts/.DS_Store diff --git a/.gitignore b/.gitignore index 7428e9a..b2b3334 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ __pycache__/ # C extensions *.so +.DS_Store # Distribution / packaging .Python diff --git a/scripts/.DS_Store b/scripts/.DS_Store deleted file mode 100644 index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 Date: Tue, 11 May 2021 01:20:28 -0500 Subject: [PATCH 2/3] Removed some unecessary imports in uniprot_predictions and updated documentation slightly there re:function sig. --- metapredict/backend/uniprot_predictions.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/metapredict/backend/uniprot_predictions.py b/metapredict/backend/uniprot_predictions.py index 50f0af8..949c79a 100644 --- a/metapredict/backend/uniprot_predictions.py +++ b/metapredict/backend/uniprot_predictions.py @@ -1,18 +1,14 @@ # code for pulling down uniprot sequence for predictions -import os -import sys -import argparse import urllib3 -import csv -import protfasta - -from metapredict import meta - def fetch_sequence(uniprot_id): """ Function that returns the amino acid sequence by polling UniProt.com + Note that right now the test for success is a bit hap-hazard (looks for the + string "Sorry", which appears if the UniProt call fails. We probably want + something a bit more robust in the future... + Parameters -------------- uniprot_id : str @@ -20,6 +16,9 @@ def fetch_sequence(uniprot_id): Returns ----------- + str or None: + If the call is succesfull, this returns the amino acid string. If not, it returns + None. """ From ded69137cf36919544dec207b97124a3bbdb3a91 Mon Sep 17 00:00:00 2001 From: alexholehouse Date: Tue, 11 May 2021 01:29:04 -0500 Subject: [PATCH 3/3] added sneaky performance testing function --- metapredict/__init__.py | 57 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/metapredict/__init__.py b/metapredict/__init__.py index a3fe759..cd196df 100644 --- a/metapredict/__init__.py +++ b/metapredict/__init__.py @@ -16,9 +16,66 @@ + + # Handle versioneer from ._version import get_versions versions = get_versions() __version__ = versions['version'] __git_revision__ = versions['full-revisionid'] del get_versions, versions + + +def print_performance(seq_len=500, num_seqs=100, verbose=True): + """ + Function that lets you test metapredicts performance on your local hardware. + + Parameters + -------------- + seqlen : int + Length of each random sequence to be tested. Default = 500. + + num_seqs : int + Number of sequences to compute over. Default = 100. + + verbose : bool + Flag which, if true, means the function prints a summary when finished. If + false simply returns an integer + + Returns + --------------- + int + Returns the nearest number of sequences-per-second metapredict is currently + predicting. For ref, on a spring 2020 MBP this value was ~10,000 sequences per + second. + + """ + + # this is a bit bad but, only import random is this FX is called + import random + import time + VALID_AMINO_ACIDS = ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y'] + + def genseq(n): + """ + Function that generates a random + """ + return "".join([random.choice(VALID_AMINO_ACIDS) for i in range(n)]) + + seqs = [] + for i in range(num_seqs): + seqs.append(genseq(seq_len)) + + start = time.time() + for i in seqs: + predict_disorder(i) + + end = time.time() + s_per_second = (seq_len*num_seqs)/(end - start) + + if verbose: + print('Predicting %i sequences per second!'%(s_per_second)) + + return s_per_second + +