-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathtests.py
107 lines (91 loc) · 4.36 KB
/
tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from farasa.pos import FarasaPOSTagger
from farasa.ner import FarasaNamedEntityRecognizer
from farasa.diacratizer import FarasaDiacritizer
from farasa.segmenter import FarasaSegmenter
from farasa.stemmer import FarasaStemmer
from farasa.spellchecker import FarasaSpellChecker
# https://r12a.github.io/scripts/tutorial/summaries/arabic
sample = """
يُشار إلى أن اللغة العربية يتحدثها أكثر من 422 مليون نسمة ويتوزع متحدثوها في المنطقة المعروفة باسم الوطن العربي بالإضافة إلى العديد من المناطق الأخرى المجاورة مثل الأهواز وتركيا وتشاد والسنغال وإريتريا وغيرها. وهي اللغة الرابعة من لغات منظمة الأمم المتحدة الرسمية الست.
"""
spellchecker_sample = """
هذا النص خاطؤ الكتابه
"""
"""
---------------------
non interactive mode
---------------------
"""
print("original sample:", sample)
print("----------------------------------------")
print("Farasa features, noninteractive mode.")
print("----------------------------------------")
segmenter = FarasaSegmenter()
segmented = segmenter.segment(sample)
print("sample segmented:", segmented)
print("----------------------------------------------")
stemmer = FarasaStemmer()
stemmed = stemmer.stem(sample)
print("sample stemmed:", stemmed)
print("----------------------------------------------")
pos_tagger = FarasaPOSTagger()
pos_tagged = pos_tagger.tag(sample)
print("sample POS Tagged", pos_tagged)
print("----------------------------------------------")
pos_tagger_interactive = FarasaPOSTagger()
pos_tagged_interactive = pos_tagger_interactive.tag_segments(sample)
print("sample POS Tagged Segments", pos_tagged_interactive)
print("----------------------------------------------")
named_entity_recognizer = FarasaNamedEntityRecognizer()
named_entity_recognized = named_entity_recognizer.recognize(sample)
print("sample named entity recognized:", named_entity_recognized)
print("----------------------------------------------")
diacritizer = FarasaDiacritizer()
diacritized = diacritizer.diacritize(sample)
print("sample diacritized:", diacritized)
print("----------------------------------------------")
spellchecker = FarasaSpellChecker(binary_path="downloaded_jars/SpellChecker.jar")
corrected = spellchecker.spell_check(spellchecker_sample)
print("spell checking sample:", spellchecker_sample)
print("sample spell checked:", corrected)
print("----------------------------------------------")
"""
---------------------
interactive mode
---------------------
"""
print("----------------------------------------")
print("Farasa features, interactive mode.")
print("----------------------------------------")
segmenter_interactive = FarasaSegmenter(interactive=True)
segmented_interactive = segmenter_interactive.segment(sample)
print("sample segmented (interactive):", segmented_interactive)
print("----------------------------------------------")
stemmer_interactive = FarasaStemmer(interactive=True)
stemmed_interactive = stemmer_interactive.stem(sample)
print("sample stemmed (interactive):", stemmed_interactive)
print("----------------------------------------------")
pos_tagger_interactive = FarasaPOSTagger(interactive=True)
pos_tagged_interactive = pos_tagger_interactive.tag(sample)
print("sample POS Tagged (interactive)", pos_tagged_interactive)
print("----------------------------------------------")
pos_tagger_interactive = FarasaPOSTagger(interactive=True)
pos_tagged_interactive = pos_tagger_interactive.tag_segments(sample)
print("sample POS Tagged Segments (interactive)", pos_tagged_interactive)
print("----------------------------------------------")
named_entity_recognizer_interactive = FarasaNamedEntityRecognizer(interactive=True)
named_entity_recognized_interactive = named_entity_recognizer_interactive.recognize(
sample
)
print(
"sample named entity recognized (interactive):", named_entity_recognized_interactive
)
print("----------------------------------------------")
diacritizer_interactive = FarasaDiacritizer(interactive=True)
diacritized_interactive = diacritizer_interactive.diacritize(sample)
print("sample diacritized (interactive):", diacritized_interactive)
print("----------------------------------------------")
try:
spellchecker = FarasaSpellChecker(interactive=True)
except AssertionError as e:
print(e)