Skip to content

Commit

Permalink
Merge pull request #560 from hellohaptik/develop
Browse files Browse the repository at this point in the history
Release develop to master
  • Loading branch information
saumilhaptik authored Oct 8, 2024
2 parents d49fba4 + 4594410 commit a95ee9f
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1 +1 @@
* @hellohaptik/qa-approvers-ml-team
* @hellohaptik/qa-platform
12 changes: 10 additions & 2 deletions ner_v1/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,16 @@ def parse_post_request(request):
PARAMETER_PRIOR_RESULTS: request_data.get("predetected_values", [])
}

ner_logger.debug("parameters dict - {}".format(parameters_dict))

request_log_message = (
f"Request parameters: entity_name={parameters_dict['entity_name']}, "
f"fuzziness={parameters_dict['fuzziness']}, "
)
if isinstance(parameters_dict['message'], str):
request_log_message += f"message={parameters_dict['message']}"
elif isinstance(parameters_dict['message'], (list, tuple)):
request_log_message += f"message_count={len(parameters_dict['message'])}"

ner_logger.debug(request_log_message)
return parameters_dict


Expand Down
14 changes: 8 additions & 6 deletions ner_v1/chatbot/entity_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,12 +238,14 @@ def get_text(message, entity_name, structured_value, fallback_value, bot_message

ner_logger.debug("Predetected values: {}".format(predetected_values))
if isinstance(message, six.string_types):
ner_logger.debug(f"Detecting entities in message {message}")
entity_output = text_detector.detect(message=message,
structured_value=structured_value,
fallback_value=fallback_value,
bot_message=bot_message,
predetected_values=predetected_values)
elif isinstance(message, (list, tuple)):
ner_logger.debug(f"Detecting bulk entities in {len(message)} messages.")
entity_output = text_detector.detect_bulk(messages=message, fallback_values=fallback_value,
predetected_values=predetected_values)
else:
Expand Down Expand Up @@ -713,7 +715,7 @@ def get_shopping_size(message, entity_name, structured_value, fallback_value, bo
fallback_value=fallback_value, bot_message=bot_message)
print output
>> [{'detection': 'message', 'original_text': 'large', 'entity_value': {'value': u'L'}},
>> [{'detection': 'message', 'original_text': 'large', 'entity_value': {'value': u'L'}},
{'detection': 'message', 'original_text': '36', 'entity_value': {'value': '36'}}]
"""
Expand Down Expand Up @@ -1051,20 +1053,20 @@ def output_entity_dict_list(entity_value_list, original_text_list, detection_met
Args:
entity_value_list (list): list of entity values which are identified from given detection logic
original_text_list (list): list original values or actual values from message/structured_value
original_text_list (list): list original values or actual values from message/structured_value
which are identified
detection_method (str, optional): how the entity was detected
detection_method (str, optional): how the entity was detected
i.e. whether from message, structured_value
or fallback, verified from model or not.
defaults to None
detection_method_list(list, optional): list containing how each entity was detected in the entity_value list.
if provided, this argument will be used over detection method
defaults to None
defaults to None
Returns:
list of dict: list containing dictionaries, each containing entity_value, original_text and detection;
entity_value is in itself a dict with its keys varying from entity to entity
Example Output:
[
{
Expand Down
16 changes: 16 additions & 0 deletions ner_v2/detectors/pattern/phone_number/phone_number_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from __future__ import absolute_import

import re
import structlog

try:
import regex
Expand All @@ -16,6 +17,7 @@
from language_utilities.constant import ENGLISH_LANG, CHINESE_TRADITIONAL_LANG
from ner_v2.detectors.base_detector import BaseDetector
from ner_v2.detectors.numeral.number.number_detection import NumberDetector
ner_logger = structlog.getLogger('chatbot_ner')


class PhoneDetector(BaseDetector):
Expand Down Expand Up @@ -101,7 +103,20 @@ def detect_entity(self, text, **kwargs):
"""
self.text = " " + text.lower().strip() + " "
self.phone, self.original_phone_text = [], []

for match in phonenumbers.PhoneNumberMatcher(self.text, self.country_code, leniency=0):
try:
national_number_len = len(str(match.number.national_number))

# Get the national number and check its length is below 8 (including contry code) and \
# Exclude numbers that are too short to be a valid phone number (e.g., ticket numbers)
if national_number_len < 8:
self.original_phone_text.append(self.text)
continue
except Exception:
# Not logging exception object as structlog.exception() will print entire traceback
ner_logger.exception('Error in detect_entity function', text=self.text)

if match.number.country_code == phonenumbers.country_code_for_region(self.country_code):
self.phone.append(self.check_for_country_code(str(match.number.national_number)))
self.original_phone_text.append(self.text[match.start:match.end])
Expand All @@ -111,6 +126,7 @@ def detect_entity(self, text, **kwargs):
"value": str(match.number.national_number)})
self.original_phone_text.append(self.text[match.start:match.end])
self.phone, self.original_phone_text = self.check_for_alphas()

return self.phone, self.original_phone_text

def check_for_alphas(self):
Expand Down
16 changes: 9 additions & 7 deletions ner_v2/detectors/textual/elastic_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def get_multi_entity_results(self, entities, texts, fuzziness_threshold=1,
Example:
db = ElasticSearchDataStore()
entities = ['city', 'restaurant']
entities = [['city', 'restaurant']]
texts = ['I want to go to mumbai and eat at dominoes pizza',
' I want to go Jabalpur']
Expand Down Expand Up @@ -139,12 +139,14 @@ def get_multi_entity_results(self, entities, texts, fuzziness_threshold=1,
# this will be set to true only if one of or all names mentioned in log_results_for_entities list
# are present in the entities list
log_es_result = False
for entity_name in log_results_for_entities:
if entity_name in entities:
# if we find at least one entity name for which the es results need to be logged
# we set the value for the boolean and break the loop
log_es_result = True
break
if len(entities) > 0:
entities_list = entities[0]
for entity_name in log_results_for_entities:
if entity_name in entities_list:
# if we find at least one entity name for which the es results need to be logged
# we set the value for the boolean and break the loop
log_es_result = True
break
request_timeout = self._connection_settings.get('request_timeout', 20)
index_name = self._index_name

Expand Down
5 changes: 5 additions & 0 deletions ner_v2/detectors/textual/text_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,7 @@ def detect(self, message=None, **kwargs):
"""

res_list = self._get_single_text_detection_with_variants(message)
ner_logger.info(f"[detect] method res_list - {res_list}")
data_list = []

for index, res in enumerate(res_list):
Expand Down Expand Up @@ -668,6 +669,7 @@ def detect(self, message=None, **kwargs):

entities[entity] = out
data_list.append(entities)
ner_logger.info(f"[detect] method data_list - {data_list}")
return data_list

def detect_bulk(self, messages=None, **kwargs):
Expand Down Expand Up @@ -727,6 +729,7 @@ def detect_bulk(self, messages=None, **kwargs):
"""

res_list = self._get_bulk_text_detection_with_variants(messages)
ner_logger.info(f"[detect_bulk] method res_list - {res_list}")
data_list = []

for index, res in enumerate(res_list):
Expand Down Expand Up @@ -767,6 +770,8 @@ def detect_bulk(self, messages=None, **kwargs):

entities[entity] = out
data_list.append(entities)

ner_logger.info(f"[detect_bulk] method data_list - {data_list}")
return data_list

@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions ner_v2/detectors/textual/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,10 @@ def get_detection(message, entity_dict, bot_message=None, language=ENGLISH_LANG,
if isinstance(message, six.string_types):
entity_output = text_detector.detect(message=message,
bot_message=bot_message)
ner_logger.debug(f'[Single Message Detection] Entity Output {entity_output}')
ner_logger.info(f'[Single Message Detection] Entity Output {entity_output}')
elif isinstance(message, (list, tuple)):
entity_output = text_detector.detect_bulk(messages=message)
ner_logger.debug(f'[Multiple Message Detection] Entity Output {entity_output}')
ner_logger.info(f'[Multiple Message Detection] Entity Output {entity_output}')
else:
raise TypeError('`message` argument must be either of type `str`, `unicode`, `list` or `tuple`.')

Expand Down

0 comments on commit a95ee9f

Please sign in to comment.