From 2c4ea087cdfb52e76d76a300466263109011a857 Mon Sep 17 00:00:00 2001 From: Jose Javier Merchante Date: Fri, 8 Mar 2024 13:35:50 +0100 Subject: [PATCH 1/2] [kitsune] Include demography study in Kitsune This commit allows to execute the demography study for Kitsune backend (SUMO). Signed-off-by: Jose Javier Merchante --- grimoire_elk/enriched/kitsune.py | 8 +++++- .../unreleased/kitsune-demography-study.yml | 7 +++++ tests/test_kitsune.py | 27 +++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 releases/unreleased/kitsune-demography-study.yml diff --git a/grimoire_elk/enriched/kitsune.py b/grimoire_elk/enriched/kitsune.py index 7bf2d4487..c5d8a872b 100644 --- a/grimoire_elk/enriched/kitsune.py +++ b/grimoire_elk/enriched/kitsune.py @@ -60,7 +60,13 @@ def get_elastic_mappings(es_major): class KitsuneEnrich(Enrich): - mappping = Mapping + mapping = Mapping + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.studies = [] + self.studies.append(self.enrich_demography) def get_field_author(self): return "creator" diff --git a/releases/unreleased/kitsune-demography-study.yml b/releases/unreleased/kitsune-demography-study.yml new file mode 100644 index 000000000..c8c8b5a5c --- /dev/null +++ b/releases/unreleased/kitsune-demography-study.yml @@ -0,0 +1,7 @@ +--- +title: Kitsune demography study +category: added +author: Jose Javier Merchante +issue: null +notes: > + Include demography study in Kitsune (SUMO). diff --git a/tests/test_kitsune.py b/tests/test_kitsune.py index 822ad3da6..964497033 100644 --- a/tests/test_kitsune.py +++ b/tests/test_kitsune.py @@ -20,10 +20,13 @@ # Valerio Cosentino # import logging +import time import unittest from base import TestBaseBackend from grimoire_elk.enriched.utils import REPO_LABELS +from grimoire_elk.enriched.enrich import (logger, + anonymize_url) class TestKitsune(TestBaseBackend): @@ -111,6 +114,30 @@ def test_refresh_identities(self): result = self._test_refresh_identities() # ... ? + def test_demography_study(self): + """ Test that the demography study works correctly """ + + alias = 'demographics' + study, ocean_backend, enrich_backend = self._test_study('enrich_demography') + + with self.assertLogs(logger, level='INFO') as cm: + if study.__name__ == "enrich_demography": + study(ocean_backend, enrich_backend, alias) + + self.assertEqual(cm.output[0], 'INFO:grimoire_elk.enriched.enrich:[kitsune] Demography ' + 'starting study %s/test_kitsune_enrich' + % anonymize_url(self.es_con)) + self.assertEqual(cm.output[-1], 'INFO:grimoire_elk.enriched.enrich:[kitsune] Demography ' + 'end %s/test_kitsune_enrich' + % anonymize_url(self.es_con)) + + time.sleep(5) # HACK: Wait until kitsune enrich index has been written + items = [item for item in enrich_backend.fetch()] + self.assertEqual(len(items), 9) + for item in items: + self.assertTrue('demography_min_date' in item.keys()) + self.assertTrue('demography_max_date' in item.keys()) + if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') From dbc3c18b199725fd6c3d5143b98340e9e065cef7 Mon Sep 17 00:00:00 2001 From: Jose Javier Merchante Date: Mon, 11 Mar 2024 10:14:48 +0100 Subject: [PATCH 2/2] [kitsune] Include unique field in enriched index This commit updates the Kitsune enriched index. It was not including a unique identifier in the enriched index for questions and answers, this was causing an error when trying to autorefresh. Include also the default raw fields in answers items that were missing: "metadata__updated_on", "metadata__timestamp","offset", "origin", "tag", and "uuid". Signed-off-by: Jose Javier Merchante --- grimoire_elk/enriched/kitsune.py | 19 +++++++++++++++---- .../unreleased/kitsune-demography-study.yml | 5 ++++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/grimoire_elk/enriched/kitsune.py b/grimoire_elk/enriched/kitsune.py index c5d8a872b..8ca1eb1ec 100644 --- a/grimoire_elk/enriched/kitsune.py +++ b/grimoire_elk/enriched/kitsune.py @@ -51,6 +51,9 @@ def get_elastic_mappings(es_major): "tags_analyzed": { "type": "text", "index": true + }, + "id": { + "type": "keyword" } } } """ @@ -71,6 +74,9 @@ def __init__(self, *args, **kwargs): def get_field_author(self): return "creator" + def get_field_unique_id(self): + return "id" + def get_sh_identity(self, item, identity_field=None): identity = {} @@ -152,6 +158,8 @@ def get_rich_item(self, item, kind='question'): eitem['lifetime_days'] = \ get_time_diff_days(question['created'], question['updated']) + # Add id info to allow to coexistence of items of different types in the same index + eitem['id'] = 'question_{}'.format(question['id']) eitem.update(self.get_grimoire_fields(question['created'], "question")) eitem['author'] = question['creator']['username'] @@ -200,6 +208,8 @@ def get_rich_item(self, item, kind='question'): eitem['lifetime_days'] = \ get_time_diff_days(answer['created'], answer['updated']) + # Add id info to allow to coexistence of items of different types in the same index + eitem['id'] = 'question_{}_answer_{}'.format(answer['question'], answer['id']) eitem.update(self.get_grimoire_fields(answer['created'], "answer")) eitem['author'] = answer['creator']['username'] @@ -237,7 +247,7 @@ def enrich_items(self, ocean_backend): rich_item = self.get_rich_item(item) data_json = json.dumps(rich_item) bulk_json += '{"index" : {"_id" : "%s" } }\n' % \ - (item[self.get_field_unique_id()]) + (rich_item[self.get_field_unique_id()]) bulk_json += data_json + "\n" # Bulk document current += 1 # Time to enrich also de answers @@ -249,10 +259,11 @@ def enrich_items(self, ocean_backend): if answer['id'] == item['data']['solution']: answer['solution'] = 1 rich_answer = self.get_rich_item(answer, kind='answer') + self.copy_raw_fields(self.RAW_FIELDS_COPY, item, rich_answer) + data_json = json.dumps(rich_answer) - bulk_json += '{"index" : {"_id" : "%s_%i" } }\n' % \ - (item[self.get_field_unique_id()], - rich_answer['answer_id']) + bulk_json += '{"index" : {"_id" : "%s" } }\n' % \ + (rich_answer[self.get_field_unique_id()]) bulk_json += data_json + "\n" # Bulk document current += 1 diff --git a/releases/unreleased/kitsune-demography-study.yml b/releases/unreleased/kitsune-demography-study.yml index c8c8b5a5c..d21645fd2 100644 --- a/releases/unreleased/kitsune-demography-study.yml +++ b/releases/unreleased/kitsune-demography-study.yml @@ -4,4 +4,7 @@ category: added author: Jose Javier Merchante issue: null notes: > - Include demography study in Kitsune (SUMO). + Include demography study in Kitsune (SUMO). And update + the index to include standard fields such as a unique + identifier (`id`) and some missing fields like `origin` + or `uuid`.