From f2046c82b0700a71311282e0efc68452916c22e5 Mon Sep 17 00:00:00 2001 From: Avishkar Gupta Date: Sat, 29 Jul 2023 08:08:45 +0000 Subject: [PATCH 01/13] Logging changes for training flow --- ner_v1/api.py | 12 ++++++++++-- ner_v1/chatbot/entity_detection.py | 14 ++++++++------ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/ner_v1/api.py b/ner_v1/api.py index fc52ffb7..d5225989 100644 --- a/ner_v1/api.py +++ b/ner_v1/api.py @@ -103,8 +103,16 @@ def parse_post_request(request): PARAMETER_PRIOR_RESULTS: request_data.get("predetected_values", []) } - ner_logger.debug("parameters dict - {}".format(parameters_dict)) - + request_log_message = ( + f"Request parameters: entity_name={parameters_dict['entity_name']}, " + f"fuzziness={parameters_dict['fuzziness']}, " + ) + if isinstance(parameters_dict['message'], str): + request_log_message += f"message={parameters_dict['message']}" + elif isinstance(parameters_dict['message'], (list, tuple)): + request_log_message += f"message_count={len(parameters_dict['message'])}" + + ner_logger.debug(request_log_message) return parameters_dict diff --git a/ner_v1/chatbot/entity_detection.py b/ner_v1/chatbot/entity_detection.py index d7bf3999..e06b4f1b 100644 --- a/ner_v1/chatbot/entity_detection.py +++ b/ner_v1/chatbot/entity_detection.py @@ -238,12 +238,14 @@ def get_text(message, entity_name, structured_value, fallback_value, bot_message ner_logger.debug("Predetected values: {}".format(predetected_values)) if isinstance(message, six.string_types): + ner_logger.debug(f"Detecting entities in message {message}") entity_output = text_detector.detect(message=message, structured_value=structured_value, fallback_value=fallback_value, bot_message=bot_message, predetected_values=predetected_values) elif isinstance(message, (list, tuple)): + ner_logger.debug(f"Detecting bulk entities in {len(message)} messages.") entity_output = text_detector.detect_bulk(messages=message, fallback_values=fallback_value, predetected_values=predetected_values) else: @@ -710,7 +712,7 @@ def get_shopping_size(message, entity_name, structured_value, fallback_value, bo fallback_value=fallback_value, bot_message=bot_message) print output - >> [{'detection': 'message', 'original_text': 'large', 'entity_value': {'value': u'L'}}, + >> [{'detection': 'message', 'original_text': 'large', 'entity_value': {'value': u'L'}}, {'detection': 'message', 'original_text': '36', 'entity_value': {'value': '36'}}] """ @@ -1048,20 +1050,20 @@ def output_entity_dict_list(entity_value_list, original_text_list, detection_met Args: entity_value_list (list): list of entity values which are identified from given detection logic - original_text_list (list): list original values or actual values from message/structured_value + original_text_list (list): list original values or actual values from message/structured_value which are identified - detection_method (str, optional): how the entity was detected + detection_method (str, optional): how the entity was detected i.e. whether from message, structured_value or fallback, verified from model or not. defaults to None detection_method_list(list, optional): list containing how each entity was detected in the entity_value list. if provided, this argument will be used over detection method - defaults to None - + defaults to None + Returns: list of dict: list containing dictionaries, each containing entity_value, original_text and detection; entity_value is in itself a dict with its keys varying from entity to entity - + Example Output: [ { From d99b806db24915452f28c51f557cf2553b8e4531 Mon Sep 17 00:00:00 2001 From: Saumil Shah Date: Mon, 11 Sep 2023 12:02:37 +0530 Subject: [PATCH 02/13] Update CODEOWNERS --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 766812c3..74790065 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -* @hellohaptik/qa-approvers-ml-team +* @hellohaptik/qa-platform From 9cbb1b230e9e6b78bbb21fe3891cb04cec440dab Mon Sep 17 00:00:00 2001 From: Ankur Jain Date: Mon, 30 Oct 2023 13:00:29 +0530 Subject: [PATCH 03/13] Updated logs for text entity (#552) --- ner_v2/detectors/textual/elastic_search.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/ner_v2/detectors/textual/elastic_search.py b/ner_v2/detectors/textual/elastic_search.py index 3bb3950b..89a7bb75 100644 --- a/ner_v2/detectors/textual/elastic_search.py +++ b/ner_v2/detectors/textual/elastic_search.py @@ -102,7 +102,7 @@ def get_multi_entity_results(self, entities, texts, fuzziness_threshold=1, Example: db = ElasticSearchDataStore() - entities = ['city', 'restaurant'] + entities = [['city', 'restaurant']] texts = ['I want to go to mumbai and eat at dominoes pizza', ' I want to go Jabalpur'] @@ -139,12 +139,14 @@ def get_multi_entity_results(self, entities, texts, fuzziness_threshold=1, # this will be set to true only if one of or all names mentioned in log_results_for_entities list # are present in the entities list log_es_result = False - for entity_name in log_results_for_entities: - if entity_name in entities: - # if we find at least one entity name for which the es results need to be logged - # we set the value for the boolean and break the loop - log_es_result = True - break + if len(entities) > 0: + entities_list = entities[0] + for entity_name in log_results_for_entities: + if entity_name in entities_list: + # if we find at least one entity name for which the es results need to be logged + # we set the value for the boolean and break the loop + log_es_result = True + break request_timeout = self._connection_settings.get('request_timeout', 20) index_name = self._index_name From ac8260a35ef19fb6949f4fa75954ca2029516664 Mon Sep 17 00:00:00 2001 From: Ankur Jain Date: Mon, 30 Oct 2023 13:52:20 +0530 Subject: [PATCH 04/13] Added logs for text entity detection (#554) --- ner_v2/detectors/textual/text_detection.py | 5 +++++ ner_v2/detectors/textual/utils.py | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ner_v2/detectors/textual/text_detection.py b/ner_v2/detectors/textual/text_detection.py index 68e1c76d..616984aa 100644 --- a/ner_v2/detectors/textual/text_detection.py +++ b/ner_v2/detectors/textual/text_detection.py @@ -617,6 +617,7 @@ def detect(self, message=None, **kwargs): """ res_list = self._get_single_text_detection_with_variants(message) + ner_logger.info(f"[detect] method res_list - {res_list}") data_list = [] for index, res in enumerate(res_list): @@ -668,6 +669,7 @@ def detect(self, message=None, **kwargs): entities[entity] = out data_list.append(entities) + ner_logger.info(f"[detect] method data_list - {data_list}") return data_list def detect_bulk(self, messages=None, **kwargs): @@ -727,6 +729,7 @@ def detect_bulk(self, messages=None, **kwargs): """ res_list = self._get_bulk_text_detection_with_variants(messages) + ner_logger.info(f"[detect_bulk] method res_list - {res_list}") data_list = [] for index, res in enumerate(res_list): @@ -767,6 +770,8 @@ def detect_bulk(self, messages=None, **kwargs): entities[entity] = out data_list.append(entities) + + ner_logger.info(f"[detect_bulk] method data_list - {data_list}") return data_list @staticmethod diff --git a/ner_v2/detectors/textual/utils.py b/ner_v2/detectors/textual/utils.py index d2ef62e2..4fb2cd15 100644 --- a/ner_v2/detectors/textual/utils.py +++ b/ner_v2/detectors/textual/utils.py @@ -78,10 +78,10 @@ def get_detection(message, entity_dict, bot_message=None, language=ENGLISH_LANG, if isinstance(message, six.string_types): entity_output = text_detector.detect(message=message, bot_message=bot_message) - ner_logger.debug(f'[Single Message Detection] Entity Output {entity_output}') + ner_logger.info(f'[Single Message Detection] Entity Output {entity_output}') elif isinstance(message, (list, tuple)): entity_output = text_detector.detect_bulk(messages=message) - ner_logger.debug(f'[Multiple Message Detection] Entity Output {entity_output}') + ner_logger.info(f'[Multiple Message Detection] Entity Output {entity_output}') else: raise TypeError('`message` argument must be either of type `str`, `unicode`, `list` or `tuple`.') From bb848dc607c039229c97e6e9b4a07cba9935b6e2 Mon Sep 17 00:00:00 2001 From: "Ganesh Deshmukh (GD)" Date: Mon, 23 Sep 2024 13:47:48 +0530 Subject: [PATCH 05/13] Updated entitiy-detection logic of phone_number --- .../phone_number/phone_number_detection.py | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py index ec8c722f..2c7140a2 100644 --- a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py +++ b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py @@ -183,7 +183,25 @@ def _text_list_for_detection(self, text=None): return : list[string] """ text = text or '' - phone_number_format_regex = r'[-(),.+\s{}]+' + """ + Three cases are not covered in this regex: + - 123-4567 + - (123) 4567 + - +1 (800) 555-5555 ext. 1 + + But below ones are covered: + - +91 98765 43210 + - +1-800-555-555 + - 123, 456.7890 + - +44 20 7123 4567 + - 123 + 456 7890 + - {123} {456} 7890 + + The pattern matches exactly 9 to 12 occurrences of characters that are either: [-(),.+\s{}] + """ + # phone_number_format_regex = (r'[-(),.+\s{}]+') ) # older logic + phone_number_format_regex = r'[-(),.+\s{}]{9,12}' # new logic - with string length between 9-12 + matches = self.language_number_detector.extract_digits_only(text, phone_number_format_regex, True, True) return matches From 40f03e51a75df99ecdd3e1926948a66e3d7d4a76 Mon Sep 17 00:00:00 2001 From: "Ganesh Deshmukh (GD)" Date: Mon, 23 Sep 2024 13:54:26 +0530 Subject: [PATCH 06/13] linter fix --- .../detectors/pattern/phone_number/phone_number_detection.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py index 2c7140a2..1079d842 100644 --- a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py +++ b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py @@ -199,8 +199,7 @@ def _text_list_for_detection(self, text=None): The pattern matches exactly 9 to 12 occurrences of characters that are either: [-(),.+\s{}] """ - # phone_number_format_regex = (r'[-(),.+\s{}]+') ) # older logic - phone_number_format_regex = r'[-(),.+\s{}]{9,12}' # new logic - with string length between 9-12 + phone_number_format_regex = r'[-(),.+\s{}]{9,12}' matches = self.language_number_detector.extract_digits_only(text, phone_number_format_regex, True, True) return matches From e9734de008d7646a22cc677499ea1190c4ce2941 Mon Sep 17 00:00:00 2001 From: "Ganesh Deshmukh (GD)" Date: Mon, 23 Sep 2024 13:55:56 +0530 Subject: [PATCH 07/13] lint fix --- ner_v2/detectors/pattern/phone_number/phone_number_detection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py index 1079d842..1f541de5 100644 --- a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py +++ b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py @@ -197,7 +197,7 @@ def _text_list_for_detection(self, text=None): - 123 + 456 7890 - {123} {456} 7890 - The pattern matches exactly 9 to 12 occurrences of characters that are either: [-(),.+\s{}] + The pattern matches exactly 9 to 12 occurrences of characters that are either one specified in square brackets """ phone_number_format_regex = r'[-(),.+\s{}]{9,12}' From c86c495a9ebfa3d2fef87372d53399a4db6dcf13 Mon Sep 17 00:00:00 2001 From: Ganesh Deshmukh <84504740+ganeshhaptik@users.noreply.github.com> Date: Tue, 24 Sep 2024 17:30:15 +0530 Subject: [PATCH 08/13] Revert "(Enhancement): Analytics - Updated regex for entitiy-detection logic of phone_number " --- .../phone_number/phone_number_detection.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py index 1f541de5..ec8c722f 100644 --- a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py +++ b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py @@ -183,24 +183,7 @@ def _text_list_for_detection(self, text=None): return : list[string] """ text = text or '' - """ - Three cases are not covered in this regex: - - 123-4567 - - (123) 4567 - - +1 (800) 555-5555 ext. 1 - - But below ones are covered: - - +91 98765 43210 - - +1-800-555-555 - - 123, 456.7890 - - +44 20 7123 4567 - - 123 + 456 7890 - - {123} {456} 7890 - - The pattern matches exactly 9 to 12 occurrences of characters that are either one specified in square brackets - """ - phone_number_format_regex = r'[-(),.+\s{}]{9,12}' - + phone_number_format_regex = r'[-(),.+\s{}]+' matches = self.language_number_detector.extract_digits_only(text, phone_number_format_regex, True, True) return matches From 111f8feb97d09821ac78b8ef62fcd3041132bd85 Mon Sep 17 00:00:00 2001 From: "Ganesh Deshmukh (GD)" Date: Fri, 4 Oct 2024 20:57:37 +0530 Subject: [PATCH 09/13] Get the national number and check its length is below 8 (including contry code) and exclude them --- .../pattern/phone_number/phone_number_detection.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py index ec8c722f..b236df21 100644 --- a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py +++ b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py @@ -101,7 +101,19 @@ def detect_entity(self, text, **kwargs): """ self.text = " " + text.lower().strip() + " " self.phone, self.original_phone_text = [], [] + for match in phonenumbers.PhoneNumberMatcher(self.text, self.country_code, leniency=0): + try: + national_number_len = len(str(match.number.national_number)) + + # Get the national number and check its length is below 8 (including contry code) and \ + # Exclude numbers that are too short to be a valid phone number (e.g., ticket numbers) + if national_number_len < 8: + self.original_phone_text.append(self.text) + continue + except Exception: + pass + if match.number.country_code == phonenumbers.country_code_for_region(self.country_code): self.phone.append(self.check_for_country_code(str(match.number.national_number))) self.original_phone_text.append(self.text[match.start:match.end]) @@ -111,6 +123,7 @@ def detect_entity(self, text, **kwargs): "value": str(match.number.national_number)}) self.original_phone_text.append(self.text[match.start:match.end]) self.phone, self.original_phone_text = self.check_for_alphas() + return self.phone, self.original_phone_text def check_for_alphas(self): From 9025a17ebb6df5f28de5e49b7deb5d187e4aad9a Mon Sep 17 00:00:00 2001 From: "Ganesh Deshmukh (GD)" Date: Mon, 7 Oct 2024 16:29:37 +0530 Subject: [PATCH 10/13] Added logger in details --- .../pattern/phone_number/phone_number_detection.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py index b236df21..1f624f8b 100644 --- a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py +++ b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py @@ -2,6 +2,7 @@ from __future__ import absolute_import import re +import structlog try: import regex @@ -16,6 +17,7 @@ from language_utilities.constant import ENGLISH_LANG, CHINESE_TRADITIONAL_LANG from ner_v2.detectors.base_detector import BaseDetector from ner_v2.detectors.numeral.number.number_detection import NumberDetector +ner_logger = structlog.getLogger('chatbot_ner') class PhoneDetector(BaseDetector): @@ -112,7 +114,9 @@ def detect_entity(self, text, **kwargs): self.original_phone_text.append(self.text) continue except Exception: - pass + # Not logging exception object as structlog.exception() will print entire traceback + ner_logger.exception('Error in detect_entity function', + phonenumbers_match_obj=match.__dict__, text=self.text) if match.number.country_code == phonenumbers.country_code_for_region(self.country_code): self.phone.append(self.check_for_country_code(str(match.number.national_number))) From aa18d5987b81ca30b4a35054fa7241d4c7db0898 Mon Sep 17 00:00:00 2001 From: "Ganesh Deshmukh (GD)" Date: Mon, 7 Oct 2024 19:17:17 +0530 Subject: [PATCH 11/13] adding temp-loggers --- .../pattern/phone_number/phone_number_detection.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py index 1f624f8b..7cb2195e 100644 --- a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py +++ b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py @@ -2,7 +2,7 @@ from __future__ import absolute_import import re -import structlog +# import structlog try: import regex @@ -17,7 +17,7 @@ from language_utilities.constant import ENGLISH_LANG, CHINESE_TRADITIONAL_LANG from ner_v2.detectors.base_detector import BaseDetector from ner_v2.detectors.numeral.number.number_detection import NumberDetector -ner_logger = structlog.getLogger('chatbot_ner') +# ner_logger = structlog.getLogger('chatbot_ner') class PhoneDetector(BaseDetector): @@ -110,13 +110,16 @@ def detect_entity(self, text, **kwargs): # Get the national number and check its length is below 8 (including contry code) and \ # Exclude numbers that are too short to be a valid phone number (e.g., ticket numbers) + print(f"\n detect_entity - national_number_len = {national_number_len}") + print(f"\n detect_entity - self.text = {self.text}") if national_number_len < 8: self.original_phone_text.append(self.text) continue - except Exception: + except Exception as e: + print(f"\n detect_entity got an error as {str(e)}") # Not logging exception object as structlog.exception() will print entire traceback - ner_logger.exception('Error in detect_entity function', - phonenumbers_match_obj=match.__dict__, text=self.text) + # ner_logger.exception('Error in detect_entity function', + # phonenumbers_match_obj=match.__dict__, text=self.text) if match.number.country_code == phonenumbers.country_code_for_region(self.country_code): self.phone.append(self.check_for_country_code(str(match.number.national_number))) From 0f3b4167cb0e5f46cab18fad2bd5fb1f84cfb624 Mon Sep 17 00:00:00 2001 From: "Ganesh Deshmukh (GD)" Date: Mon, 7 Oct 2024 22:53:18 +0530 Subject: [PATCH 12/13] Updated loggers and removed print statements --- .../pattern/phone_number/phone_number_detection.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py index 7cb2195e..1f624f8b 100644 --- a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py +++ b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py @@ -2,7 +2,7 @@ from __future__ import absolute_import import re -# import structlog +import structlog try: import regex @@ -17,7 +17,7 @@ from language_utilities.constant import ENGLISH_LANG, CHINESE_TRADITIONAL_LANG from ner_v2.detectors.base_detector import BaseDetector from ner_v2.detectors.numeral.number.number_detection import NumberDetector -# ner_logger = structlog.getLogger('chatbot_ner') +ner_logger = structlog.getLogger('chatbot_ner') class PhoneDetector(BaseDetector): @@ -110,16 +110,13 @@ def detect_entity(self, text, **kwargs): # Get the national number and check its length is below 8 (including contry code) and \ # Exclude numbers that are too short to be a valid phone number (e.g., ticket numbers) - print(f"\n detect_entity - national_number_len = {national_number_len}") - print(f"\n detect_entity - self.text = {self.text}") if national_number_len < 8: self.original_phone_text.append(self.text) continue - except Exception as e: - print(f"\n detect_entity got an error as {str(e)}") + except Exception: # Not logging exception object as structlog.exception() will print entire traceback - # ner_logger.exception('Error in detect_entity function', - # phonenumbers_match_obj=match.__dict__, text=self.text) + ner_logger.exception('Error in detect_entity function', + phonenumbers_match_obj=match.__dict__, text=self.text) if match.number.country_code == phonenumbers.country_code_for_region(self.country_code): self.phone.append(self.check_for_country_code(str(match.number.national_number))) From 4cccc1f7b986bb00bd7204a3bcc76a7faff970b9 Mon Sep 17 00:00:00 2001 From: "Ganesh Deshmukh (GD)" Date: Mon, 7 Oct 2024 23:42:41 +0530 Subject: [PATCH 13/13] removed one logger parameter which will have chance of null value --- .../detectors/pattern/phone_number/phone_number_detection.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py index 1f624f8b..90097ff4 100644 --- a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py +++ b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py @@ -115,8 +115,7 @@ def detect_entity(self, text, **kwargs): continue except Exception: # Not logging exception object as structlog.exception() will print entire traceback - ner_logger.exception('Error in detect_entity function', - phonenumbers_match_obj=match.__dict__, text=self.text) + ner_logger.exception('Error in detect_entity function', text=self.text) if match.number.country_code == phonenumbers.country_code_for_region(self.country_code): self.phone.append(self.check_for_country_code(str(match.number.national_number)))