No issue. Enable sbert dependencies again and clean up deprecation wa…

…rnings
inception-project · Jan 7, 2024 · 876d3e8 · 876d3e8
1 parent 192e2cd
commit 876d3e8
Show file tree

Hide file tree

Showing 10 changed files with 27 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -24,9 +24,16 @@ listed and explained below:
 <!--- * `doc` for being able to build the documentation % -->
 * `dev` for being able to develop this package
 
+Note that depending on your system, it may be necessary to install certain system libraries manually
+before activating the contrib dependencies. For example on macOS, `libomp` is required by the `lightgbm`
+contrib dependency:
+
+    brew install libomp
+
 To install for example the `contrib` dependencies run:
 
     pip install -e ".[contrib]"
+
 
 ## Starting a simple recommender
 

diff --git a/ariadne/contrib/jieba.py b/ariadne/contrib/jieba.py
@@ -26,4 +26,4 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
         result = jieba.tokenize(cas.sofa_string)
         for tk in result:
             prediction = create_prediction(cas, layer, feature, tk[1], tk[2], tk[0])
-            cas.add_annotation(prediction)
+            cas.add(prediction)
diff --git a/ariadne/contrib/sbert.py b/ariadne/contrib/sbert.py
@@ -87,7 +87,7 @@ def fit(self, documents: List[TrainingDocument], layer: str, feature: str, proje
                 if label is None:
                     continue
 
-                sentences.append(cas.get_covered_text(sentence))
+                sentences.append(sentence.get_covered_text())
                 targets.append(label)
 
         featurized_sentences = featurizer.featurize(sentences)
@@ -112,7 +112,7 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
 
         for sentence, featurized_sentence, label in zip(sentences, featurized_sentences, predictions):
             prediction = create_prediction(cas, layer, feature, sentence.begin, sentence.end, label)
-            cas.add_annotation(prediction)
+            cas.add(prediction)
 
     def _get_featurizer(self):
         return CachedSentenceTransformer("distilbert-base-nli-mean-tokens")
diff --git a/ariadne/contrib/sklearn.py b/ariadne/contrib/sklearn.py
@@ -60,7 +60,7 @@ def fit(self, documents: List[TrainingDocument], layer: str, feature: str, proje
                 if label is None:
                     continue
 
-                sentences.append(cas.get_covered_text(sentence))
+                sentences.append(sentence.get_covered_text())
                 targets.append(label)
 
         model = Pipeline([("vect", CountVectorizer()), ("tfidf", TfidfTransformer()), ("clf", MultinomialNB())])
@@ -79,7 +79,7 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
         for sentence in cas.select(SENTENCE_TYPE):
             predicted = model.predict([sentence.get_covered_text()])[0]
             prediction = create_prediction(cas, layer, feature, sentence.begin, sentence.end, predicted)
-            cas.add_annotation(prediction)
+            cas.add(prediction)
 
 
 # https://sklearn-crfsuite.readthedocs.io/en/latest/tutorial.html#let-s-use-conll-2002-data-to-build-a-ner-system

diff --git a/ariadne/contrib/spacy.py b/ariadne/contrib/spacy.py
@@ -38,7 +38,7 @@ def __init__(self, model_name: str, model_directory: Path = None):
     def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_id: str, user_id: str):
         # Extract the tokens from the CAS and create a spacy doc from it
         cas_tokens = cas.select(TOKEN_TYPE)
-        words = [cas.get_covered_text(cas_token) for cas_token in cas_tokens]
+        words = [cas_token.get_covered_text() for cas_token in cas_tokens]
 
         doc = Doc(self._model.vocab, words=words)
 
@@ -51,7 +51,7 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
             end = cas_tokens[named_entity.end - 1].end
             label = named_entity.label_
             prediction = create_prediction(cas, layer, feature, begin, end, label)
-            cas.add_annotation(prediction)
+            cas.add(prediction)
 
 
 class SpacyPosClassifier(Classifier):
@@ -66,7 +66,7 @@ def __init__(self, model_name: str):
 
     def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_id: str, user_id: str):
         # Extract the tokens from the CAS and create a spacy doc from it
-        words = [cas.get_covered_text(cas_token) for cas_token in cas.select(TOKEN_TYPE)]
+        words = [cas_token.get_covered_text() for cas_token in cas.select(TOKEN_TYPE)]
 
         doc = Doc(self._model.vocab, words=words)
 
@@ -77,4 +77,4 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
         # For every token, extract the POS tag and create an annotation in the CAS
         for cas_token, spacy_token in zip(cas.select(TOKEN_TYPE), doc):
             prediction = create_prediction(cas, layer, feature, cas_token.begin, cas_token.end, spacy_token.tag_)
-            cas.add_annotation(prediction)
+            cas.add(prediction)
diff --git a/setup.py b/setup.py
@@ -47,7 +47,7 @@
     "nltk~=3.5",
     "jieba~=0.42",
     "sentence-transformers~=2.2.2",
-#    "lightgbm~=3.3.5",
+    "lightgbm~=4.2.0",
     "diskcache~=5.2.1"
 ]
 

diff --git a/tests/test_inception_util.py b/tests/test_inception_util.py
@@ -21,11 +21,11 @@
 def test_create_prediction():
     typesystem = TypeSystem()
     Span = typesystem.create_type("custom.Span")
-    typesystem.add_feature(Span, "inception_internal_predicted", "uima.cas.Boolean")
-    typesystem.add_feature(Span, "value", "uima.cas.String")
-    typesystem.add_feature(Span, "value_score", "uima.cas.Double")
-    typesystem.add_feature(Span, "value_score_explanation", "uima.cas.String")
-    typesystem.add_feature(Span, "value_auto_accept", "uima.cas.Boolean")
+    typesystem.create_feature(Span, "inception_internal_predicted", "uima.cas.Boolean")
+    typesystem.create_feature(Span, "value", "uima.cas.String")
+    typesystem.create_feature(Span, "value_score", "uima.cas.Double")
+    typesystem.create_feature(Span, "value_score_explanation", "uima.cas.String")
+    typesystem.create_feature(Span, "value_auto_accept", "uima.cas.Boolean")
     cas = Cas(typesystem=typesystem)
     prediction = create_prediction(
         cas, "custom.Span", "value", 0, 4, "label", score=0.1, score_explanation="blah", auto_accept=True

diff --git a/tests/test_jieba_segmenter.py b/tests/test_jieba_segmenter.py
@@ -56,7 +56,7 @@ def _load_data() -> Cas:
     cas = Cas()
     cas.sofa_string = text.strip()
     predicted_type = cas.typesystem.create_type(_PREDICTED_TYPE)
-    cas.typesystem.add_feature(predicted_type, _PREDICTED_FEATURE, "uima.cas.String")
-    cas.typesystem.add_feature(predicted_type, "inception_internal_predicted", "uima.cas.Boolean")
+    cas.typesystem.create_feature(predicted_type, _PREDICTED_FEATURE, "uima.cas.String")
+    cas.typesystem.create_feature(predicted_type, "inception_internal_predicted", "uima.cas.Boolean")
 
     return cas
diff --git a/tests/test_sbert_sentence_classifier.py b/tests/test_sbert_sentence_classifier.py
@@ -17,7 +17,7 @@
 
 import pytest
 
-pytest.importorskip("lightgbm.LGBMClassifier")
+#pytest.importorskip("lightgbm.LGBMClassifier")
 
 from ariadne.contrib.sbert import SbertSentenceClassifier
 

diff --git a/tests/util.py b/tests/util.py
@@ -97,6 +97,6 @@ def build_typesystem() -> TypeSystem:
     typesystem = TypeSystem()
     typesystem.create_type(SENTENCE_TYPE)
     PredictedType = typesystem.create_type(PREDICTED_TYPE)
-    typesystem.add_feature(PredictedType, PREDICTED_FEATURE, TYPE_NAME_STRING)
-    typesystem.add_feature(PredictedType, IS_PREDICTION, TYPE_NAME_BOOLEAN)
+    typesystem.create_feature(PredictedType, PREDICTED_FEATURE, TYPE_NAME_STRING)
+    typesystem.create_feature(PredictedType, IS_PREDICTION, TYPE_NAME_BOOLEAN)
     return typesystem