Skip to content

Commit

Permalink
add tests for LLM
Browse files Browse the repository at this point in the history
  • Loading branch information
culmat committed Oct 30, 2024
1 parent b678698 commit e6ba49c
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
6 changes: 4 additions & 2 deletions src/utils/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
Task instructions: Analyze the text provided after 'Text to anonymize' carefully for all names of persons and places.
For each name that you find evaluate whether it is a new name or just a repetition or variation of a name you have already found before.
Names of persons are labeled as #person_1#, #person_2#, etc. Names of places are labeled as #place_1#, #place_2#, etc.
Only return a json dictionary without any comments or markdown formatting around it.
Only return a json dictionary without any comments or markdown formatting around it. Do not return keys without values.
Example input: 'Tony Stark and Peter Parker walk through New York where Peter wants to show Tony the Broadway.'.
Example output: {{"#person_1#": ["Tony Stark", "Tony"], "#person_2#": ["Peter Parker", "Peter"], "#place_1#": ["New York"], "#place_2#": ["Broadway"]}}
Text to anonymize: {text}
Expand All @@ -31,4 +31,6 @@ def llm_find_entities(text, temperature=0, template=TEMPLATE, raw=False):
result = chain.invoke({"text": text})
if raw:
return result
return {k: sorted(v, key=len, reverse=True) for k, v in json.loads(result).items()}

ret = {k: v for k, v in json.loads(result).items()}
return {k: set(v) for k, v in ret.items() if v}
36 changes: 36 additions & 0 deletions tests/test_llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import pytest
from src.utils.llm import llm_find_entities

def test_llm_find_entities_basic():
text = "Tony Stark and Peter Parker walk through New York where Peter wants to show Tony the Broadway."
expected_output = {'#person_1#': {'Tony Stark', 'Tony'}, '#person_2#': {'Peter Parker', 'Peter'}, '#place_1#': {'New York'}, '#place_2#': {'Broadway'}}
result = llm_find_entities(text)
assert result == expected_output
assert result == expected_output

def test_llm_find_entities_no_entities():
text = "This is a text without any names of persons or places."
expected_output = {}
result = llm_find_entities(text)
assert result == expected_output

def test_llm_find_entities_repeated_names():
text = "Alice and Bob went to Wonderland. Alice met Bob at the Wonderland park."
expected_output = {
"#person_1#": ["Alice"],
"#person_2#": ["Bob"],
"#place_1#": ["Wonderland"],
'#place_2#': ['Wonderland park']
}
result = llm_find_entities(text)

def test_llm_find_entities_raw_output():
text = "Tony Stark and Peter Parker walk through New York where Peter wants to show Tony the Broadway."
result = llm_find_entities(text, raw=True)
print("result")
print(result)
assert isinstance(result, str)
assert "Tony Stark" in result
assert "Peter Parker" in result
assert "New York" in result
assert "Broadway" in result

0 comments on commit e6ba49c

Please sign in to comment.