Skip to content

Commit 1310b70

Browse files
committed
added match_order() function to parser class
- added match_order(text,preference=[]) to Parser Class - wrote test cases for match_order in test_parser - updated just_asking stemmer to work on continents - removed bladerunner.gif - updated README.md - increased version number to 1.1.6
1 parent f470c8f commit 1310b70

File tree

7 files changed

+69
-24
lines changed

7 files changed

+69
-24
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
.cache
33
*.bat
44
test.py
5+

README.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,3 @@ for line in husz:
194194
## Licensing
195195

196196
This project has **dual licensing**. You may use it either under the [GNU GPLv3 License](LICENSE.md) for Open Source ChatBot solutions and NLP Research purposes or [contact me](https://github.com/sedthh) about different licensing options for commercial use.
197-
198-
Feel free to add your own ChatBot to the [use case list](https://github.com/sedthh/lara-hungarian-nlp/wiki/Projects) because
199-
200-
![Every civilization was built off the back of a disposable workforce... But I can only make so many.](https://github.com/sedthh/lara-hungarian-nlp/blob/master/bladerunner.gif)

bladerunner.gif

-1.19 MB
Binary file not shown.

lara/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Lara - Lingusitic Aim Recognizer API
44

55
__all__ = 'nlp','parser','stemmer','entities'
6-
__version__ = '1.1.5'
6+
__version__ = '1.1.6'
77
__version_info__ = tuple(int(num) for num in __version__.split('.'))
88

99
import sys

lara/parser.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,27 @@ def match_set(self, text=""):
270270
else:
271271
return set()
272272

273+
# Returns dictionary with N best matching intents with the highest value
274+
def match_best(self, text, n=1):
275+
if text:
276+
score = self.match(text)
277+
if score:
278+
best_candidates = sorted(score, key=score.get, reverse=True)
279+
best_candidates = best_candidates[:(min(len(best_candidates),n))]
280+
return {item:score[item] for item in best_candidates}
281+
return {}
282+
283+
# Get best match based on preference hierarchy
284+
def match_order(self,text,preference=[]):
285+
if text:
286+
score = self.match(text)
287+
if score:
288+
for item in preference:
289+
if item in score:
290+
return item
291+
return max(score, key=score.get)
292+
return ''
293+
273294
# Remove matches from text
274295
def clean(self, text="", deep=False):
275296
if text:
@@ -413,16 +434,6 @@ def _match_pattern(self, text, item, is_clean=False, delete=False, deep=False):
413434
if delete:
414435
return text
415436
return (False,0)
416-
417-
# Returns dictionary with N best matching intents with the highest value
418-
def match_best(self, text, n=1):
419-
if text:
420-
score = self.match(text)
421-
if score:
422-
best_candidates = sorted(score, key=score.get, reverse=True)
423-
best_candidates = best_candidates[:(min(len(best_candidates),n))]
424-
return {item:score[item] for item in best_candidates}
425-
return {}
426437

427438
# Extract Class
428439
class Extract:

lara/stemmer.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: UTF-8 -*-
22

3+
import re
34
import lara.nlp
45

56
# a stemmer that's slightly better than random guessing
@@ -250,16 +251,19 @@ def just_asking(text):
250251
word = word[:-2]
251252
elif word[-1] == 't':
252253
if len(word)>4:
253-
if vh == 'magas':
254-
if word[-2] in ('e','é'):
255-
word = word[:-2]
256-
else:
257-
word = word[:-1]
258-
else:
259-
if word[-2] in ('a','á','o','ó'):
260-
word = word[:-2]
254+
if re.findall(r'(ameri[ck][aá]|eur[oó]p[aá]|eur[aá]zsi[aá]|afri[ck][aá]|[aá]zsi[aá])t', word, re.IGNORECASE):
255+
word = word[:-1]
256+
else:
257+
if vh == 'magas':
258+
if word[-2] in ('e','é'):
259+
word = word[:-2]
260+
else:
261+
word = word[:-1]
261262
else:
262-
word = word[:-1]
263+
if word[-2] in ('a','á','o','ó'):
264+
word = word[:-2]
265+
else:
266+
word = word[:-1]
263267
elif word[-1] == 'l':
264268
if word[-2] in ('o','ó','ö','ő'):
265269
if word[-3] in ('b','r','t'):

tests/test_parser.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,39 @@ def test_parser_intents_match_best(intent,text,best):
262262
result = test.match_best(text[i],i+1)
263263
assert best[i] == result
264264

265+
@pytest.mark.parametrize("intent,text,order,preference", [
266+
(
267+
{
268+
"alma" : [{"stem":"alma","wordclass":"noun"}],
269+
"szed" : [{"stem":"szed","wordclass":"verb"}],
270+
"körte" : [{"stem":"körte","wordclass":"noun"}]
271+
},
272+
[
273+
"Mikor szedjük le a pirosabb almákat?",
274+
"Mikor szedjük le a pirosabb almákat?",
275+
"Mikor szedjük le a pirosabb almákat?",
276+
"Mikor szedjük le a pirosabb almákat?"
277+
],
278+
[
279+
["körte"],
280+
["körte","szed"],
281+
["körte","alma"],
282+
["alma","szed"],
283+
],
284+
[
285+
"szed",
286+
"szed",
287+
"alma",
288+
"alma"
289+
]
290+
),
291+
])
292+
def test_parser_intents_match_order(intent,text,order,preference):
293+
test = parser.Intents(intent)
294+
for i in range(len(text)):
295+
result = test.match_order(text[i],order[i])
296+
assert preference[i] == result
297+
265298
@pytest.mark.parametrize("intent,text,best", [
266299
(
267300
{

0 commit comments

Comments
 (0)