Skip to content

Commit

Permalink
No issue. Enable sbert dependencies again and clean up deprecation wa…
Browse files Browse the repository at this point in the history
…rnings
  • Loading branch information
reckart committed Jan 7, 2024
1 parent 192e2cd commit 876d3e8
Show file tree
Hide file tree
Showing 10 changed files with 27 additions and 20 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,16 @@ listed and explained below:
<!--- * `doc` for being able to build the documentation % -->
* `dev` for being able to develop this package

Note that depending on your system, it may be necessary to install certain system libraries manually
before activating the contrib dependencies. For example on macOS, `libomp` is required by the `lightgbm`
contrib dependency:

brew install libomp

To install for example the `contrib` dependencies run:

pip install -e ".[contrib]"


## Starting a simple recommender

Expand Down
2 changes: 1 addition & 1 deletion ariadne/contrib/jieba.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
result = jieba.tokenize(cas.sofa_string)
for tk in result:
prediction = create_prediction(cas, layer, feature, tk[1], tk[2], tk[0])
cas.add_annotation(prediction)
cas.add(prediction)
4 changes: 2 additions & 2 deletions ariadne/contrib/sbert.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def fit(self, documents: List[TrainingDocument], layer: str, feature: str, proje
if label is None:
continue

sentences.append(cas.get_covered_text(sentence))
sentences.append(sentence.get_covered_text())
targets.append(label)

featurized_sentences = featurizer.featurize(sentences)
Expand All @@ -112,7 +112,7 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_

for sentence, featurized_sentence, label in zip(sentences, featurized_sentences, predictions):
prediction = create_prediction(cas, layer, feature, sentence.begin, sentence.end, label)
cas.add_annotation(prediction)
cas.add(prediction)

def _get_featurizer(self):
return CachedSentenceTransformer("distilbert-base-nli-mean-tokens")
4 changes: 2 additions & 2 deletions ariadne/contrib/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def fit(self, documents: List[TrainingDocument], layer: str, feature: str, proje
if label is None:
continue

sentences.append(cas.get_covered_text(sentence))
sentences.append(sentence.get_covered_text())
targets.append(label)

model = Pipeline([("vect", CountVectorizer()), ("tfidf", TfidfTransformer()), ("clf", MultinomialNB())])
Expand All @@ -79,7 +79,7 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
for sentence in cas.select(SENTENCE_TYPE):
predicted = model.predict([sentence.get_covered_text()])[0]
prediction = create_prediction(cas, layer, feature, sentence.begin, sentence.end, predicted)
cas.add_annotation(prediction)
cas.add(prediction)


# https://sklearn-crfsuite.readthedocs.io/en/latest/tutorial.html#let-s-use-conll-2002-data-to-build-a-ner-system
Expand Down
8 changes: 4 additions & 4 deletions ariadne/contrib/spacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(self, model_name: str, model_directory: Path = None):
def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_id: str, user_id: str):
# Extract the tokens from the CAS and create a spacy doc from it
cas_tokens = cas.select(TOKEN_TYPE)
words = [cas.get_covered_text(cas_token) for cas_token in cas_tokens]
words = [cas_token.get_covered_text() for cas_token in cas_tokens]

doc = Doc(self._model.vocab, words=words)

Expand All @@ -51,7 +51,7 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
end = cas_tokens[named_entity.end - 1].end
label = named_entity.label_
prediction = create_prediction(cas, layer, feature, begin, end, label)
cas.add_annotation(prediction)
cas.add(prediction)


class SpacyPosClassifier(Classifier):
Expand All @@ -66,7 +66,7 @@ def __init__(self, model_name: str):

def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_id: str, user_id: str):
# Extract the tokens from the CAS and create a spacy doc from it
words = [cas.get_covered_text(cas_token) for cas_token in cas.select(TOKEN_TYPE)]
words = [cas_token.get_covered_text() for cas_token in cas.select(TOKEN_TYPE)]

doc = Doc(self._model.vocab, words=words)

Expand All @@ -77,4 +77,4 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
# For every token, extract the POS tag and create an annotation in the CAS
for cas_token, spacy_token in zip(cas.select(TOKEN_TYPE), doc):
prediction = create_prediction(cas, layer, feature, cas_token.begin, cas_token.end, spacy_token.tag_)
cas.add_annotation(prediction)
cas.add(prediction)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
"nltk~=3.5",
"jieba~=0.42",
"sentence-transformers~=2.2.2",
# "lightgbm~=3.3.5",
"lightgbm~=4.2.0",
"diskcache~=5.2.1"
]

Expand Down
10 changes: 5 additions & 5 deletions tests/test_inception_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
def test_create_prediction():
typesystem = TypeSystem()
Span = typesystem.create_type("custom.Span")
typesystem.add_feature(Span, "inception_internal_predicted", "uima.cas.Boolean")
typesystem.add_feature(Span, "value", "uima.cas.String")
typesystem.add_feature(Span, "value_score", "uima.cas.Double")
typesystem.add_feature(Span, "value_score_explanation", "uima.cas.String")
typesystem.add_feature(Span, "value_auto_accept", "uima.cas.Boolean")
typesystem.create_feature(Span, "inception_internal_predicted", "uima.cas.Boolean")
typesystem.create_feature(Span, "value", "uima.cas.String")
typesystem.create_feature(Span, "value_score", "uima.cas.Double")
typesystem.create_feature(Span, "value_score_explanation", "uima.cas.String")
typesystem.create_feature(Span, "value_auto_accept", "uima.cas.Boolean")
cas = Cas(typesystem=typesystem)
prediction = create_prediction(
cas, "custom.Span", "value", 0, 4, "label", score=0.1, score_explanation="blah", auto_accept=True
Expand Down
4 changes: 2 additions & 2 deletions tests/test_jieba_segmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def _load_data() -> Cas:
cas = Cas()
cas.sofa_string = text.strip()
predicted_type = cas.typesystem.create_type(_PREDICTED_TYPE)
cas.typesystem.add_feature(predicted_type, _PREDICTED_FEATURE, "uima.cas.String")
cas.typesystem.add_feature(predicted_type, "inception_internal_predicted", "uima.cas.Boolean")
cas.typesystem.create_feature(predicted_type, _PREDICTED_FEATURE, "uima.cas.String")
cas.typesystem.create_feature(predicted_type, "inception_internal_predicted", "uima.cas.Boolean")

return cas
2 changes: 1 addition & 1 deletion tests/test_sbert_sentence_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import pytest

pytest.importorskip("lightgbm.LGBMClassifier")
#pytest.importorskip("lightgbm.LGBMClassifier")

from ariadne.contrib.sbert import SbertSentenceClassifier

Expand Down
4 changes: 2 additions & 2 deletions tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,6 @@ def build_typesystem() -> TypeSystem:
typesystem = TypeSystem()
typesystem.create_type(SENTENCE_TYPE)
PredictedType = typesystem.create_type(PREDICTED_TYPE)
typesystem.add_feature(PredictedType, PREDICTED_FEATURE, TYPE_NAME_STRING)
typesystem.add_feature(PredictedType, IS_PREDICTION, TYPE_NAME_BOOLEAN)
typesystem.create_feature(PredictedType, PREDICTED_FEATURE, TYPE_NAME_STRING)
typesystem.create_feature(PredictedType, IS_PREDICTION, TYPE_NAME_BOOLEAN)
return typesystem

0 comments on commit 876d3e8

Please sign in to comment.