added match_order() function to parser class

sedthh · sedthh · commit 1310b704dcce · 2018-05-11T17:15:34.000+02:00
- added match_order(text,preference=[]) to Parser Class
- wrote test cases for match_order in test_parser
- updated just_asking stemmer to work on continents
- removed bladerunner.gif
- updated README.md
- increased version number to 1.1.6
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,4 @@
 .cache
 *.bat
 test.py
+
diff --git a/README.md b/README.md
@@ -194,7 +194,3 @@ for line in husz:
 ## Licensing
 
 This project has **dual licensing**. You may use it either under the [GNU GPLv3 License](LICENSE.md) for Open Source ChatBot solutions and NLP Research purposes or [contact me](https://github.com/sedthh) about different licensing options for commercial use. 
-
-Feel free to add your own ChatBot to the [use case list](https://github.com/sedthh/lara-hungarian-nlp/wiki/Projects) because
-
-![Every civilization was built off the back of a disposable workforce... But I can only make so many.](https://github.com/sedthh/lara-hungarian-nlp/blob/master/bladerunner.gif)
diff --git a/bladerunner.gif b/bladerunner.gif
diff --git a/lara/__init__.py b/lara/__init__.py
@@ -3,7 +3,7 @@
 # Lara - Lingusitic Aim Recognizer API
 
 __all__				= 'nlp','parser','stemmer','entities'
-__version__ 		= '1.1.5'
+__version__ 		= '1.1.6'
 __version_info__	= tuple(int(num) for num in __version__.split('.'))
 
 import sys
diff --git a/lara/parser.py b/lara/parser.py
@@ -270,6 +270,27 @@ def match_set(self, text=""):
 		else:
 			return set()
 	
+	# Returns dictionary with N best matching intents with the highest value
+	def match_best(self, text, n=1):
+		if text:
+			score	= self.match(text)
+			if score:
+				best_candidates	= sorted(score, key=score.get, reverse=True)
+				best_candidates	= best_candidates[:(min(len(best_candidates),n))]
+				return {item:score[item] for item in best_candidates}
+		return {}
+		
+	# Get best match based on preference hierarchy
+	def match_order(self,text,preference=[]):
+		if text:
+			score	= self.match(text)
+			if score:
+				for item in preference:
+					if item in score:
+						return item
+				return max(score, key=score.get)
+		return ''
+	
 	# Remove matches from text
 	def clean(self, text="", deep=False):
 		if text:
@@ -413,16 +434,6 @@ def _match_pattern(self, text, item, is_clean=False, delete=False, deep=False):
 		if delete:
 			return text
 		return (False,0)
-		
-	# Returns dictionary with N best matching intents with the highest value
-	def match_best(self, text, n=1):
-		if text:
-			score	= self.match(text)
-			if score:
-				best_candidates	= sorted(score, key=score.get, reverse=True)
-				best_candidates	= best_candidates[:(min(len(best_candidates),n))]
-				return {item:score[item] for item in best_candidates}
-		return {}
 
 # Extract Class
 class Extract:
diff --git a/lara/stemmer.py b/lara/stemmer.py
@@ -1,5 +1,6 @@
 # -*- coding: UTF-8 -*-
 
+import re
 import lara.nlp
 
 # a stemmer that's slightly better than random guessing
@@ -250,16 +251,19 @@ def just_asking(text):
 								word	= word[:-2]
 				elif word[-1] == 't':
 					if len(word)>4:
-						if vh == 'magas':
-							if word[-2] in ('e','é'):
-								word	= word[:-2]
-							else:
-								word	= word[:-1]
-						else:
-							if word[-2] in ('a','á','o','ó'):
-								word	= word[:-2]
+						if re.findall(r'(ameri[ck][aá]|eur[oó]p[aá]|eur[aá]zsi[aá]|afri[ck][aá]|[aá]zsi[aá])t', word, re.IGNORECASE):
+							word	= word[:-1]
+						else:							
+							if vh == 'magas':
+								if word[-2] in ('e','é'):
+									word	= word[:-2]
+								else:
+									word	= word[:-1]
 							else:
-								word	= word[:-1]	
+								if word[-2] in ('a','á','o','ó'):
+									word	= word[:-2]
+								else:
+									word	= word[:-1]	
 				elif word[-1] == 'l':
 					if word[-2] in ('o','ó','ö','ő'):
 						if word[-3] in ('b','r','t'):
diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -262,6 +262,39 @@ def test_parser_intents_match_best(intent,text,best):
 		result	= test.match_best(text[i],i+1)
 		assert best[i] == result
 
+@pytest.mark.parametrize("intent,text,order,preference", [
+	(	
+		{
+			"alma"		: [{"stem":"alma","wordclass":"noun"}],
+			"szed"		: [{"stem":"szed","wordclass":"verb"}],
+			"körte"		: [{"stem":"körte","wordclass":"noun"}]
+		},
+		[
+			"Mikor szedjük le a pirosabb almákat?",
+			"Mikor szedjük le a pirosabb almákat?",
+			"Mikor szedjük le a pirosabb almákat?",
+			"Mikor szedjük le a pirosabb almákat?"
+		],
+		[
+			["körte"],
+			["körte","szed"],
+			["körte","alma"],
+			["alma","szed"],
+		],
+		[
+			"szed",
+			"szed",
+			"alma",
+			"alma"
+		]
+	),
+])
+def test_parser_intents_match_order(intent,text,order,preference):
+	test	= parser.Intents(intent)
+	for i in range(len(text)):
+		result	= test.match_order(text[i],order[i])
+		assert preference[i] == result
+		
 @pytest.mark.parametrize("intent,text,best", [
 	(	
 		{

-Original file line number
+Diff line change
 .cache
 *.bat
 test.py
++