Merge pull request #50 from inception-project/feature/49-Add-argument…

…s-for-score-explanation-etc-to-create_prediction #49 - Add arguments for score, explanation, etc. to create_prediction
inception-project · Jan 6, 2024 · 5f24285 · 5f24285
2 parents 1deda1c + 4329bc0
commit 5f24285
Show file tree

Hide file tree

Showing 13 changed files with 110 additions and 10 deletions.
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -0,0 +1,42 @@
+name: Run Tests
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        pip install --upgrade -e .[test]
+        pip install flake8
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Run tests
+      run: |
+        pytest --cov=./ --cov-report=xml
+
+    - name: Upload coverage to Codecov
+      uses: codecov/codecov-action@v3
+      with:
+        file: ./coverage.xml
+        env_vars: OS,PYTHON
+        name: codecov-umbrella
diff --git a/Makefile b/Makefile
@@ -1,3 +1,8 @@
+PYTHON_FILES = tests
+
+test:
+	python -m pytest -m "not performance" tests/
+
 gunicorn:
 	gunicorn -w 4 -b 127.0.0.1:5000 --reload wsgi:app
 

diff --git a/ariadne/contrib/adapters.py b/ariadne/contrib/adapters.py
@@ -23,7 +23,7 @@
 
 class AdapterSequenceTagger(Classifier):
     def __init__(self, base_model_name: str, adapter_name: str, labels: List[str], model_directory: Path = None):
-        """ Sequence Tagger using Adapters from https://adapterhub.ml .
+        """Sequence Tagger using Adapters from https://adapterhub.ml .
 
         As an example, to use it for POS tagging, one can use
 
@@ -137,7 +137,7 @@ def __init__(
         config: Optional[str] = None,
         model_directory: Path = None,
     ):
-        """ Sentence Classifier using Adapters from https://adapterhub.ml .
+        """Sentence Classifier using Adapters from https://adapterhub.ml .
 
         As an example, to use it to predict sentiment, one can use
 

diff --git a/ariadne/contrib/inception_util.py b/ariadne/contrib/inception_util.py
@@ -1,14 +1,53 @@
+from typing import Optional
+
 from cassis import Cas
 from cassis.typesystem import FeatureStructure
 
 SENTENCE_TYPE = "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence"
 TOKEN_TYPE = "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token"
 IS_PREDICTION = "inception_internal_predicted"
+FEATURE_NAME_SCORE_SUFFIX = "_score"
+FEATURE_NAME_SCORE_EXPLANATION_SUFFIX = "_score_explanation"
+FEATURE_NAME_AUTO_ACCEPT_MODE_SUFFIX = "_auto_accept"
+
 
+def create_prediction(
+    cas: Cas,
+    layer: str,
+    feature: str,
+    begin: int,
+    end: int,
+    label: str,
+    score: Optional[int] = None,
+    score_explanation: Optional[str] = None,
+    auto_accept: Optional[bool] = None,
+) -> FeatureStructure:
+    """
+    Create a prediction
 
-def create_prediction(cas: Cas, layer: str, feature: str, begin: int, end: int, label: str) -> FeatureStructure:
+    :param cas: the annotated document
+    :param layer: the layer on which to create the prediction
+    :param feature: the feature to predict
+    :param begin: the offset of the first character of the prediction
+    :param end: the offset of the first character after the prediction
+    :param label: the predicted label
+    :param score: the score
+    :param score_explanation: a rationale for the score / prediction
+    :param auto_accept: whether the prediction should be automatically accepted
+    :return: the prediction annotation
+    """
     AnnotationType = cas.typesystem.get_type(layer)
 
     fields = {"begin": begin, "end": end, IS_PREDICTION: True, feature: label}
     prediction = AnnotationType(**fields)
+
+    if score is not None:
+        prediction[f"{feature}{FEATURE_NAME_SCORE_SUFFIX}"] = score
+
+    if score_explanation is not None:
+        prediction[f"{feature}{FEATURE_NAME_SCORE_EXPLANATION_SUFFIX}"] = score_explanation
+
+    if auto_accept is not None:
+        prediction[f"{feature}{FEATURE_NAME_AUTO_ACCEPT_MODE_SUFFIX}"] = auto_accept
+
     return prediction
diff --git a/ariadne/contrib/simalign.py b/ariadne/contrib/simalign.py
@@ -14,7 +14,6 @@ def __init__(self):
         self._aligner = SentenceAligner(model="bert", token_type="bpe", matching_methods="mai")
 
     def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_id: str, user_id: str):
-
         sentences = cas.select(SENTENCE_TYPE)
 
         src_tokens = cas.select_covered("webanno.custom.Base", sentences[0])

diff --git a/ariadne/contrib/stringmatcher.py b/ariadne/contrib/stringmatcher.py
@@ -69,7 +69,7 @@ def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_
         m = Map.from_iter(items)
 
         # We iterate over the all candidates and check whether they match
-        for (begin, end, term) in chain(
+        for begin, end, term in chain(
             self._generate_candidates(cas, 3), self._generate_candidates(cas, 2), self._generate_candidates(cas, 1)
         ):
             for mention, label_id in m.search(term=term, max_dist=2):

diff --git a/tests/data/INCEpTION_TypeSystem.xml → tests/resources/INCEpTION_TypeSystem.xml b/tests/data/INCEpTION_TypeSystem.xml → tests/resources/INCEpTION_TypeSystem.xml
diff --git a/tests/data/Wikipedia-Obama.xmi → tests/resources/Wikipedia-Obama.xmi b/tests/data/Wikipedia-Obama.xmi → tests/resources/Wikipedia-Obama.xmi
diff --git a/tests/resources/__init__.py b/tests/resources/__init__.py
diff --git a/tests/test_adapter_recommender.py b/tests/test_adapter_recommender.py
@@ -1,6 +1,10 @@
+import pytest
+
+pytest.importorskip("transformers.AuthModelWithHeads")
+
 from pathlib import Path
 
-from ariadne.contrib import AdapterSequenceTagger
+from ariadne.contrib.adapters import AdapterSequenceTagger
 from ariadne.contrib.adapters import AdapterSentenceClassifier
 from tests.util import (
     load_obama,

diff --git a/tests/test_sbert_sentence_classifier.py b/tests/test_sbert_sentence_classifier.py
@@ -2,7 +2,9 @@
 
 import pytest
 
-from ariadne.contrib import SbertSentenceClassifier
+pytest.importorskip("lightgbm.LGBMClassifier")
+
+from ariadne.contrib.sbert import SbertSentenceClassifier
 
 from tests.util import *
 

diff --git a/tests/test_sklearn_sentence_classifier.py b/tests/test_sklearn_sentence_classifier.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 
-from ariadne.contrib import SklearnSentenceClassifier
+from ariadne.contrib.sklearn import SklearnSentenceClassifier
 
 from tests.util import *
 

diff --git a/tests/util.py b/tests/util.py
@@ -59,10 +59,19 @@ def load_newsgroup_test_data() -> List[Cas]:
 
 
 def load_obama() -> Cas:
-    with open("data/INCEpTION_TypeSystem.xml", "rb") as f:
+    # https://stackoverflow.com/a/20885799
+    try:
+        import importlib.resources as pkg_resources
+    except ImportError:
+        # Try backported to PY<37 `importlib_resources`.
+        import importlib_resources as pkg_resources
+
+    from . import resources  # relative-import the *package* containing the templates
+
+    with pkg_resources.open_binary(resources, "INCEpTION_TypeSystem.xml") as f:
         typesystem = merge_typesystems(load_typesystem(f), build_typesystem())
 
-    with open("data/Wikipedia-Obama.xmi", "rb") as f:
+    with pkg_resources.open_binary(resources, "Wikipedia-Obama.xmi") as f:
         cas = load_cas_from_xmi(f, typesystem=typesystem)
 
     return cas