-
Notifications
You must be signed in to change notification settings - Fork 7
/
test_spacy_fastlang.py
75 lines (54 loc) · 1.98 KB
/
test_spacy_fastlang.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import spacy
import os
import spacy_fastlang # noqa: F401 # pylint: disable=unused-import
en_text = "Life is like a box of chocolates. You never know what you're gonna get."
poor_quality_text = "Hi Mademoiselle \n"
def test_detect_doc_language():
nlp = spacy.blank("xx")
nlp.add_pipe("language_detector")
doc = nlp(en_text)
assert doc._.language == "en"
assert doc._.language_score >= 0.8
def test_use_fallback_if_confidence_is_low():
nlp_without_fallback = spacy.blank("xx")
nlp_without_fallback.add_pipe("language_detector")
doc = nlp_without_fallback(poor_quality_text)
assert doc._.language_score < 0.5
nlp_with_fallback = spacy.blank("xx")
nlp_with_fallback.add_pipe("language_detector", config={"threshold": 0.5})
doc = nlp_with_fallback(poor_quality_text)
assert doc._.language == "xx"
assert doc._.language_score < 0.5
def test_use_fallback_value_if_language_not_supported():
nlp = spacy.blank("xx")
nlp.add_pipe("language_detector", config={"supported_languages": ["fr"]})
doc = nlp(en_text)
assert doc._.language == "xx"
assert doc._.language_score >= 0.8
def test_use_custom_fallback():
nlp = spacy.blank("xx")
nlp.add_pipe(
"language_detector", config={"threshold": 0.99, "default_language": "fr"}
)
doc = nlp(en_text)
assert doc._.language == "fr"
assert doc._.language_score >= 0.8
def test_use_custom_model():
nlp = spacy.blank("xx")
nlp.add_pipe(
"language_detector",
config={
"model_path": os.path.realpath(
os.path.join(__file__, "..", "..", "spacy_fastlang", "lid.176.ftz")
)
},
)
doc = nlp(en_text)
assert doc._.language == "en"
assert doc._.language_score >= 0.8
def test_batch_predictions():
nlp = spacy.blank("xx")
nlp.add_pipe("language_detector")
for doc in nlp.pipe([en_text, en_text]):
assert doc._.language == "en"
assert doc._.language_score >= 0.8