Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions quickumls/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,12 +316,11 @@ def _get_all_matches(self, ngrams):
if not self.to_lowercase_flag and ngram_normalized.isupper() and not self.keep_uppercase:
ngram_normalized = ngram_normalized.lower()

prev_cui = None
ngram_cands = list(self.ss_db.get(ngram_normalized))

ngram_dict = {}
for match in ngram_cands:
cuisem_match = sorted(self.cuisem_db.get(match))
cuisem_match = self.cuisem_db.get(match)

match_similarity = toolbox.get_similarity(
x=ngram_normalized,
Expand Down
14 changes: 8 additions & 6 deletions quickumls/spacy_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,13 @@ def __init__(self, nlp, name = "medspacy_quickumls", quickumls_fp=None,
# umls_matches below which contains more information and enables overlapping
if not Span.has_extension("similarity"):
Span.set_extension('similarity', default = -1.0)
if not Span.has_extension("semtypes"):
if not Span.has_extension("semtypes"):
Span.set_extension('semtypes', default = -1.0)

# match objects are a set, since span objects with the same start/end keys
# would have the same values for custom attributes in spacy
if not Span.has_extension("umls_matches"):
Span.set_extension('umls_matches', default=set())
Span.set_extension('umls_matches', default=[])

@property
def result_type(self) -> str:
Expand Down Expand Up @@ -205,11 +205,13 @@ def __call__(self, doc):
span._.semtypes = ngram_match_dict['semtypes']

# let's create this more fully featured match object
umls_match = UmlsMatch(cui,
ngram_match_dict['semtypes'],
ngram_match_dict['similarity'])
umls_match = UmlsMatch(
cui,
ngram_match_dict['semtypes'],
ngram_match_dict['similarity'],
)

span._.umls_matches.add(umls_match)
span._.umls_matches.append(umls_match)

if self.result_type.lower() == "ents":
doc.ents = list(doc.ents) + [span]
Expand Down
2 changes: 1 addition & 1 deletion quickumls/toolbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def get(self, term):
matches = (
(
cui,
pickle.loads(self.semtypes_db_get(db_key_encode(cui))),
list(pickle.loads(self.semtypes_db_get(db_key_encode(cui)))),
is_preferred
)
for cui, is_preferred in cuis
Expand Down
36 changes: 33 additions & 3 deletions quickumls/umls_match.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from typing import Set
from typing import Any, Dict, List
import srsly


class UmlsMatch:

def __init__(self,
cui: str,
semtypes: Set[str],
semtypes: List[str],
similarity: float):
"""Instantiate UmlsMatch object

Expand All @@ -15,10 +16,39 @@ def __init__(self,

Args:
cui: UMLS controlled unique identifier (CUI) value (e.g., "C0243095")
semtypes (Set[str]): List of UMLS semantic types as Type Unique Identifier values (TUI)
semtypes (List[str]): List of UMLS semantic types as Type Unique Identifier values (TUI)
for this matched concept (e.g., "T203")
similarity (float): Similarity score between match and UMLS concept
"""
self.cui = cui
self.semtypes = semtypes
self.similarity = similarity

def __repr__(self):
return f"UmlsMatch({str(self.cui), str(self.semtypes), str(self.similarity)})"

def serialized_representation(self) -> Dict[str, Any]:
"""
Returns the serialized representation of the UmlsMatch
"""
return self.__dict__

@classmethod
def from_serialized_representation(cls, serialized_representation):
"""
Creates the UmlsMatch from the serialized representation
"""
return UmlsMatch(**serialized_representation)

@srsly.msgpack_encoders("umls_match")
def serialize_context_graph(obj, chain=None):
if isinstance(obj, UmlsMatch):
return {"umls_match": obj.serialized_representation()}
return obj if chain is None else chain(obj)


@srsly.msgpack_decoders("umls_match")
def deserialize_context_graph(obj, chain=None):
if "umls_match" in obj:
return UmlsMatch.from_serialized_representation(obj["umls_match"])
return obj if chain is None else chain(obj)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ nltk>=3.3
medspacy_simstring>=2.1
unqlite>=0.8.1
pytest>=6
srsly>=2.4.8
six