add tests for LLM

baloise · Oct 30, 2024 · e6ba49c · e6ba49c
1 parent b678698
commit e6ba49c
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 2 deletions.
diff --git a/src/utils/llm.py b/src/utils/llm.py
@@ -8,7 +8,7 @@
 Task instructions: Analyze the text provided after 'Text to anonymize' carefully for all names of persons and places.
 For each name that you find evaluate whether it is a new name or just a repetition or variation of a name you have already found before.
 Names of persons are labeled as #person_1#, #person_2#, etc. Names of places are labeled as #place_1#, #place_2#, etc.
-Only return a json dictionary without any comments or markdown formatting around it.
+Only return a json dictionary without any comments or markdown formatting around it. Do not return keys without values.
 Example input: 'Tony Stark and Peter Parker walk through New York where Peter wants to show Tony the Broadway.'.
 Example output: {{"#person_1#": ["Tony Stark", "Tony"], "#person_2#": ["Peter Parker", "Peter"], "#place_1#": ["New York"], "#place_2#": ["Broadway"]}}
 Text to anonymize: {text}
@@ -31,4 +31,6 @@ def llm_find_entities(text, temperature=0, template=TEMPLATE, raw=False):
     result = chain.invoke({"text": text})
     if raw:
         return result
-    return {k: sorted(v, key=len, reverse=True) for k, v in json.loads(result).items()}
+
+    ret = {k: v for k, v in json.loads(result).items()}
+    return {k: set(v) for k, v in ret.items() if v}
diff --git a/tests/test_llm.py b/tests/test_llm.py
@@ -0,0 +1,36 @@
+import pytest
+from src.utils.llm import llm_find_entities
+
+def test_llm_find_entities_basic():
+    text = "Tony Stark and Peter Parker walk through New York where Peter wants to show Tony the Broadway."
+    expected_output = {'#person_1#': {'Tony Stark', 'Tony'}, '#person_2#': {'Peter Parker', 'Peter'}, '#place_1#': {'New York'}, '#place_2#': {'Broadway'}}
+    result = llm_find_entities(text)
+    assert result == expected_output
+    assert result == expected_output
+
+def test_llm_find_entities_no_entities():
+    text = "This is a text without any names of persons or places."
+    expected_output = {}
+    result = llm_find_entities(text)
+    assert result == expected_output
+
+def test_llm_find_entities_repeated_names():
+    text = "Alice and Bob went to Wonderland. Alice met Bob at the Wonderland park."
+    expected_output = {
+        "#person_1#": ["Alice"],
+        "#person_2#": ["Bob"],
+        "#place_1#": ["Wonderland"],
+        '#place_2#': ['Wonderland park']
+    }
+    result = llm_find_entities(text)
+
+def test_llm_find_entities_raw_output():
+    text = "Tony Stark and Peter Parker walk through New York where Peter wants to show Tony the Broadway."
+    result = llm_find_entities(text, raw=True)
+    print("result")
+    print(result)
+    assert isinstance(result, str)
+    assert "Tony Stark" in result
+    assert "Peter Parker" in result
+    assert "New York" in result
+    assert "Broadway" in result