Skip to content

Commit 4ee41be

Browse files
committed
fix: protein hashing
1 parent 33be90c commit 4ee41be

File tree

2 files changed

+3
-1
lines changed

2 files changed

+3
-1
lines changed

socialgene/base/molbio.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ def _standardize_sequence(self):
149149
amino acids, raising an error if an unknown character is found.
150150
"""
151151
self.sequence = self.sequence.upper()
152+
self.sequence = self.sequence.replace(" ", "")
153+
self.sequence = self.sequence.strip("*")
152154
if not all([i in self._amino_acids for i in set(self.sequence)]):
153155
log.error(self.sequence)
154156
raise ValueError("Unknown character/letter in protein sequence")

socialgene/hashing/hashing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def hash_aminos(input, **kwargs):
2626
# make sure everything is uppercase before hashing
2727
cleaned = input.upper()
2828
# remove all whitespace
29-
cleaned = "".join(cleaned.split())
29+
cleaned = cleaned.replace(" ", "")
3030
# remove "*" if it's at the beginning or end of the string but not in the middle
3131
cleaned = cleaned.strip("*")
3232
return hasher(input=cleaned, **kwargs)

0 commit comments

Comments
 (0)