Skip to content

Commit

Permalink
Merge branch 'kitsune-demography' of 'https://github.com/jjmerchante/…
Browse files Browse the repository at this point in the history
  • Loading branch information
sduenas authored Mar 11, 2024
2 parents c2d0296 + dbc3c18 commit 724efeb
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 5 deletions.
27 changes: 22 additions & 5 deletions grimoire_elk/enriched/kitsune.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def get_elastic_mappings(es_major):
"tags_analyzed": {
"type": "text",
"index": true
},
"id": {
"type": "keyword"
}
}
} """
Expand All @@ -60,11 +63,20 @@ def get_elastic_mappings(es_major):

class KitsuneEnrich(Enrich):

mappping = Mapping
mapping = Mapping

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self.studies = []
self.studies.append(self.enrich_demography)

def get_field_author(self):
return "creator"

def get_field_unique_id(self):
return "id"

def get_sh_identity(self, item, identity_field=None):
identity = {}

Expand Down Expand Up @@ -146,6 +158,8 @@ def get_rich_item(self, item, kind='question'):
eitem['lifetime_days'] = \
get_time_diff_days(question['created'], question['updated'])

# Add id info to allow to coexistence of items of different types in the same index
eitem['id'] = 'question_{}'.format(question['id'])
eitem.update(self.get_grimoire_fields(question['created'], "question"))

eitem['author'] = question['creator']['username']
Expand Down Expand Up @@ -194,6 +208,8 @@ def get_rich_item(self, item, kind='question'):
eitem['lifetime_days'] = \
get_time_diff_days(answer['created'], answer['updated'])

# Add id info to allow to coexistence of items of different types in the same index
eitem['id'] = 'question_{}_answer_{}'.format(answer['question'], answer['id'])
eitem.update(self.get_grimoire_fields(answer['created'], "answer"))

eitem['author'] = answer['creator']['username']
Expand Down Expand Up @@ -231,7 +247,7 @@ def enrich_items(self, ocean_backend):
rich_item = self.get_rich_item(item)
data_json = json.dumps(rich_item)
bulk_json += '{"index" : {"_id" : "%s" } }\n' % \
(item[self.get_field_unique_id()])
(rich_item[self.get_field_unique_id()])
bulk_json += data_json + "\n" # Bulk document
current += 1
# Time to enrich also de answers
Expand All @@ -243,10 +259,11 @@ def enrich_items(self, ocean_backend):
if answer['id'] == item['data']['solution']:
answer['solution'] = 1
rich_answer = self.get_rich_item(answer, kind='answer')
self.copy_raw_fields(self.RAW_FIELDS_COPY, item, rich_answer)

data_json = json.dumps(rich_answer)
bulk_json += '{"index" : {"_id" : "%s_%i" } }\n' % \
(item[self.get_field_unique_id()],
rich_answer['answer_id'])
bulk_json += '{"index" : {"_id" : "%s" } }\n' % \
(rich_answer[self.get_field_unique_id()])
bulk_json += data_json + "\n" # Bulk document
current += 1

Expand Down
10 changes: 10 additions & 0 deletions releases/unreleased/kitsune-demography-study.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
title: Kitsune demography study
category: added
author: Jose Javier Merchante <jjmerchante@bitergia.com>
issue: null
notes: >
Include demography study in Kitsune (SUMO). And update
the index to include standard fields such as a unique
identifier (`id`) and some missing fields like `origin`
or `uuid`.
27 changes: 27 additions & 0 deletions tests/test_kitsune.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,13 @@
# Valerio Cosentino <valcos@bitergia.com>
#
import logging
import time
import unittest

from base import TestBaseBackend
from grimoire_elk.enriched.utils import REPO_LABELS
from grimoire_elk.enriched.enrich import (logger,
anonymize_url)


class TestKitsune(TestBaseBackend):
Expand Down Expand Up @@ -111,6 +114,30 @@ def test_refresh_identities(self):
result = self._test_refresh_identities()
# ... ?

def test_demography_study(self):
""" Test that the demography study works correctly """

alias = 'demographics'
study, ocean_backend, enrich_backend = self._test_study('enrich_demography')

with self.assertLogs(logger, level='INFO') as cm:
if study.__name__ == "enrich_demography":
study(ocean_backend, enrich_backend, alias)

self.assertEqual(cm.output[0], 'INFO:grimoire_elk.enriched.enrich:[kitsune] Demography '
'starting study %s/test_kitsune_enrich'
% anonymize_url(self.es_con))
self.assertEqual(cm.output[-1], 'INFO:grimoire_elk.enriched.enrich:[kitsune] Demography '
'end %s/test_kitsune_enrich'
% anonymize_url(self.es_con))

time.sleep(5) # HACK: Wait until kitsune enrich index has been written
items = [item for item in enrich_backend.fetch()]
self.assertEqual(len(items), 9)
for item in items:
self.assertTrue('demography_min_date' in item.keys())
self.assertTrue('demography_max_date' in item.keys())


if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
Expand Down

0 comments on commit 724efeb

Please sign in to comment.