Skip to content

Commit

Permalink
[git] Improve branches study for consistency
Browse files Browse the repository at this point in the history
Previously, while the branches study was running,
the `branches` field remained empty or partially filled
until the study completed, leading to incorrect data
being displayed on the dashboard.
With this change, the study is first created in an
auxiliary field `branches_aux`, and once complete,
the results are moved to the `branches` field.
This approach ensures consistent and accurate data
is displayed.

Signed-off-by: Jose Javier Merchante <jjmerchante@bitergia.com>
  • Loading branch information
jjmerchante committed Aug 30, 2024
1 parent 86a4d8f commit 3b53d0b
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 4 deletions.
56 changes: 52 additions & 4 deletions grimoire_elk/enriched/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -970,8 +970,14 @@ def enrich_git_branches(self, ocean_backend, enrich_backend, run_month_days=[7,
logger.error("[git] study git-branches failed on repo {}, due to {}".format(git_repo.uri, e))
continue

logger.debug("[git] study git-branches repo {} in index {} processed".format(
git_repo.uri, anonymize_url(enrich_backend.elastic.index_url)))
try:
self.update_branches_field(git_repo, enrich_backend)
except Exception as e:
logger.error("[git] study git-branches failed on repo {}, due to {}".format(git_repo.uri, e))
continue

logger.info("[git] study git-branches repo {} in index {} processed".format(
git_repo.uri, anonymize_url(enrich_backend.elastic.index_url)))

logger.info("[git] study git-branches end")

Expand All @@ -996,7 +1002,7 @@ def delete_commit_branches(self, git_repo, enrich_backend):
es_query = """
{
"script": {
"source": "ctx._source.branches = new HashSet();",
"source": "ctx._source.branches_aux = new HashSet();",
"lang": "painless"
},
"query": {
Expand Down Expand Up @@ -1058,6 +1064,48 @@ def add_commit_branches(self, git_repo, enrich_backend):
logger.error("[git] Skip adding branch info for repo {} due to {}".format(git_repo.uri, e))
return

def update_branches_field(self, git_repo, enrich_backend):
"""Replace the branches field with the contents of branches_aux with
the processed branches in the enriched index.
:param git_repo: GitRepository object
:param enrich_backend: the enrich backend
"""
fltr = """
"filter": [
{
"term": {
"origin": "%s"
}
}
]
""" % anonymize_url(git_repo.uri)

es_query = """
{
"script": {
"source": "ctx._source.branches = ctx._source.branches_aux; ctx._source.remove('branches_aux');",
"lang": "painless"
},
"query": {
"bool": {
%s
}
}
}
""" % fltr

index = enrich_backend.elastic.index_url
r = self.requests.post(index + "/_update_by_query?refresh", data=es_query, headers=HEADER_JSON, verify=False)
try:
r.raise_for_status()
except requests.exceptions.HTTPError:
logger.error("[git] Error updating branches field for {}".format(anonymize_url(index)))
logger.error(r.text)
return

logger.debug("[git] Update branches field {}, index {}".format(r.text, anonymize_url(index)))

def __process_commits_in_branch(self, enrich_backend, repo_origin, branch_name, commits):
commits_str = ",".join(['"%s"' % c for c in commits])

Expand All @@ -1076,7 +1124,7 @@ def __process_commits_in_branch(self, enrich_backend, repo_origin, branch_name,
es_query = """
{
"script": {
"source": "if(!ctx._source.branches.contains(params.branch)){ctx._source.branches.add(params.branch);}",
"source": "if(!ctx._source.branches_aux.contains(params.branch)){ctx._source.branches_aux.add(params.branch);}",
"lang": "painless",
"params": {
"branch": "'%s'"
Expand Down
12 changes: 12 additions & 0 deletions releases/unreleased/git-branches-study-improved.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
title: Git branches study improved
category: fixed
author: null
issue: null
notes: >
Previously, while the branches study was running,
the `branches` field remained empty or partially filled
until the study was completed, leading to incorrect data
being displayed on the dashboard.
With this change, the branches field is updated only
after the study has finished.

0 comments on commit 3b53d0b

Please sign in to comment.