From 46964e90b8e6fac6cb56c1134164278a0b4ab3d5 Mon Sep 17 00:00:00 2001 From: Jose Javier Merchante Date: Thu, 5 Oct 2023 10:16:35 +0200 Subject: [PATCH] [github] Fix get_identities method not returning all identities This commit fixes a bug that cause that some identities were not loaded in SortingHat and in OpenSearch were displayed as UNDEFINED. The method `get_identities` was not returning the identities from `reviews_data`. Signed-off-by: Jose Javier Merchante --- grimoire_elk/enriched/github2.py | 25 +++++++++++-------- ...ndefined-identities-in-github-comments.yml | 9 +++++++ 2 files changed, 24 insertions(+), 10 deletions(-) create mode 100644 releases/unreleased/undefined-identities-in-github-comments.yml diff --git a/grimoire_elk/enriched/github2.py b/grimoire_elk/enriched/github2.py index fd7751fdc..d1c1d666e 100644 --- a/grimoire_elk/enriched/github2.py +++ b/grimoire_elk/enriched/github2.py @@ -134,15 +134,15 @@ def get_identities(self, item): category = item['category'] item = item['data'] - comments_attr = None if category == "issue": identity_types = ['user', 'assignee'] - comments_attr = 'comments_data' + comments_attrs = ['comments_data'] elif category == "pull_request": identity_types = ['user', 'merged_by'] - comments_attr = 'review_comments_data' + comments_attrs = ['review_comments_data', 'reviews_data'] else: identity_types = [] + comments_attrs = [] for identity in identity_types: identity_attr = identity + "_data" @@ -152,11 +152,12 @@ def get_identities(self, item): if user: yield user - comments = item.get(comments_attr, []) - for comment in comments: - user = self.get_sh_identity(comment['user_data']) - if user: - yield user + for comments_attr in comments_attrs: + comments = item.get(comments_attr, []) + for comment in comments: + user = self.get_sh_identity(comment['user_data']) + if user: + yield user def get_sh_identity(self, item, identity_field=None): identity = {} @@ -171,9 +172,13 @@ def get_sh_identity(self, item, identity_field=None): if not user: return identity - identity['name'] = user.get('name', user.get('login', None)) identity['email'] = user.get('email', None) - identity['username'] = user.get('username', user.get('login', None)) + identity['name'] = user.get('name', None) + if not identity['name']: + identity['name'] = user.get('login', None) + identity['username'] = user.get('username', None) + if not identity['username']: + identity['username'] = user.get('login', None) return identity diff --git a/releases/unreleased/undefined-identities-in-github-comments.yml b/releases/unreleased/undefined-identities-in-github-comments.yml new file mode 100644 index 000000000..130624162 --- /dev/null +++ b/releases/unreleased/undefined-identities-in-github-comments.yml @@ -0,0 +1,9 @@ +--- +title: Undefined identities in GitHub comments +category: fixed +author: Jose Javier Merchante +issue: null +notes: > + Fix a bug that causes certain identities from commentaries + to not be imported into SortingHat, resulting in them appearing + as UNDEFINED in OpenSearch.