-
Notifications
You must be signed in to change notification settings - Fork 217
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* [WIP] Add PR review reminder DAG * Use requests directly instead of HttpHook * Add unit tests and fix weekday calculation * Use crontab config to only run on weekdays * Move repos into a constant * Reduce calls to base_repo_name * Remove unnecessary template configuration * Refine argument types * Use session and rename var for masking * Move files and reduce necessary calls * Add unit tests and fix bugs * Move files and clean up default dry_run values * Correct Urgency types * Remove unused function * Add docs to test factory
- Loading branch information
1 parent
825c5aa
commit f5de3e2
Showing
11 changed files
with
1,055 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import requests | ||
|
||
|
||
class GitHubAPI: | ||
def __init__(self, pat: str): | ||
""" | ||
:param pat: GitHub Personal Access Token to use to authenticate requests | ||
""" | ||
self.session = requests.Session() | ||
self.session.headers["Authorization"] = f"token {pat}" | ||
|
||
def _make_request(self, method: str, resource: str, **kwargs) -> requests.Response: | ||
response = getattr(self.session, method.lower())( | ||
f"https://api.github.com/{resource}", **kwargs | ||
) | ||
response.raise_for_status() | ||
return response.json() | ||
|
||
def get_open_prs(self, repo: str, owner: str = "WordPress"): | ||
return self._make_request( | ||
"GET", | ||
f"repos/{owner}/{repo}/pulls", | ||
data={ | ||
"state": "open", | ||
"base": "main", | ||
"sort": "updated", | ||
# this is the default when ``sort`` is ``updated`` but | ||
# it's helpful to specify for readers | ||
"direction": "asc", | ||
# we don't bother paginating because if we ever | ||
# have more than 100 open PRs in a single repo | ||
# then something is seriously wrong | ||
"per_page": 100, | ||
}, | ||
) | ||
|
||
def post_issue_comment( | ||
self, repo: str, issue_number: int, comment_body: str, owner: str = "WordPress" | ||
): | ||
return self._make_request( | ||
"POST", | ||
f"repos/{owner}/{repo}/issues/{issue_number}/comments", | ||
data={"body": comment_body}, | ||
) | ||
|
||
def get_issue_comments( | ||
self, repo: str, issue_number: int, owner: str = "WordPress" | ||
): | ||
return self._make_request( | ||
"GET", | ||
f"repos/{owner}/{repo}/issues/{issue_number}/comments", | ||
) |
172 changes: 172 additions & 0 deletions
172
openverse_catalog/dags/maintenance/pr_review_reminders/pr_review_reminders.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
import datetime | ||
import logging | ||
from dataclasses import dataclass | ||
from typing import Optional | ||
|
||
from common.github import GitHubAPI | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
REPOSITORIES = [ | ||
"openverse", | ||
"openverse-catalog", | ||
"openverse-api", | ||
"openverse-frontend", | ||
"openverse-infrastructure", | ||
] | ||
|
||
|
||
class Urgency: | ||
@dataclass | ||
class Urgency: | ||
label: str | ||
days: int | ||
|
||
CRITICAL = Urgency("critical", 1) | ||
HIGH = Urgency("high", 2) | ||
MEDIUM = Urgency("medium", 4) | ||
LOW = Urgency("low", 5) | ||
|
||
|
||
@dataclass | ||
class ReviewDelta: | ||
urgency: Urgency.Urgency | ||
days: int | ||
|
||
|
||
def pr_urgency(pr: dict) -> Urgency.Urgency: | ||
priority_labels = [ | ||
label["name"] for label in pr["labels"] if "priority" in label["name"].lower() | ||
] | ||
if not priority_labels: | ||
logger.error(f"Found unabled PR ({pr['html_url']}). Skipping!") | ||
return None | ||
|
||
priority_label = priority_labels[0] | ||
|
||
if "critical" in priority_label: | ||
return Urgency.CRITICAL | ||
elif "high" in priority_label: | ||
return Urgency.HIGH | ||
elif "medium" in priority_label: | ||
return Urgency.MEDIUM | ||
elif "low" in priority_label: | ||
return Urgency.LOW | ||
|
||
|
||
def days_without_weekends(today: datetime, updated_at: datetime) -> int: | ||
""" | ||
Adapted from: | ||
https://stackoverflow.com/a/3615984 CC BY-SA 2.5 | ||
""" | ||
if today.weekday() == 0 and (today - updated_at).days < 3: | ||
# shortcut mondays to 0 if last updated on the weekend | ||
return 0 | ||
|
||
daygenerator = ( | ||
updated_at + datetime.timedelta(x + 1) for x in range((today - updated_at).days) | ||
) | ||
return sum(1 for day in daygenerator if day.weekday() < 5) | ||
|
||
|
||
def get_urgency_if_urgent(pr: dict) -> Optional[ReviewDelta]: | ||
updated_at = datetime.datetime.fromisoformat(pr["updated_at"].rstrip("Z")) | ||
today = datetime.datetime.now() | ||
urgency = pr_urgency(pr) | ||
if urgency is None: | ||
return None | ||
|
||
days = days_without_weekends(today, updated_at) | ||
|
||
return ReviewDelta(urgency, days) if days >= urgency.days else None | ||
|
||
|
||
COMMENT_MARKER = ( | ||
"This reminder is being automatically generated due to the urgency configuration." | ||
) | ||
|
||
|
||
COMMENT_TEMPLATE = ( | ||
""" | ||
Based on the {urgency_label} urgency of this PR, the following reviewers are being | ||
gently reminded to review this PR: | ||
{user_logins} | ||
""" | ||
f"{COMMENT_MARKER}" | ||
""" | ||
Ignoring weekend days, this PR was updated {days_since_update} day(s) ago. PRs | ||
labelled with {urgency_label} urgency are expected to be reviewed within {urgency_days}. | ||
@{pr_author}, if this PR is not ready for a review, please draft it to prevent reviewers | ||
from getting further unnecessary pings. | ||
""" | ||
) | ||
|
||
|
||
def build_comment(review_delta: ReviewDelta, pr: dict): | ||
user_handles = [f"@{req['login']}" for req in pr["requested_reviewers"]] | ||
return user_handles, COMMENT_TEMPLATE.format( | ||
urgency_label=review_delta.urgency.label, | ||
urgency_days=review_delta.urgency.days, | ||
user_logins="\n".join(user_handles), | ||
days_since_update=review_delta.days, | ||
pr_author=pr["user"]["login"], | ||
) | ||
|
||
|
||
def base_repo_name(pr: dict): | ||
return pr["base"]["repo"]["name"] | ||
|
||
|
||
def post_reminders(github_pat: str, dry_run: bool): | ||
gh = GitHubAPI(github_pat) | ||
|
||
open_prs = [] | ||
for repo in REPOSITORIES: | ||
open_prs += [pr for pr in gh.get_open_prs(repo) if not pr["draft"]] | ||
|
||
urgent_prs = [] | ||
for pr in open_prs: | ||
review_delta = get_urgency_if_urgent(pr) | ||
if review_delta: | ||
urgent_prs.append((pr, review_delta)) | ||
|
||
to_ping = [] | ||
for pr, review_delta in urgent_prs: | ||
repo = base_repo_name(pr) | ||
comments = gh.get_issue_comments(repo, pr["number"]) | ||
|
||
reminder_comments = [ | ||
comment | ||
for comment in comments | ||
if ( | ||
comment["user"]["login"] == "openverse-bot" | ||
and COMMENT_MARKER in comment["body"] | ||
) | ||
] | ||
if reminder_comments: | ||
# maybe in the future we re-ping in some cases? | ||
continue | ||
|
||
if pr["requested_reviewers"] == []: | ||
# no requested reviewers to ping, maybe in the future we ping | ||
# the PR author or the whole openverse maintainers team? | ||
continue | ||
|
||
to_ping.append((pr, review_delta)) | ||
|
||
for pr, review_delta in to_ping: | ||
user_handles, comment_body = build_comment(review_delta, pr) | ||
|
||
logger.info(f"Pinging {', '.join(user_handles)} to review {pr['title']}") | ||
if not dry_run: | ||
gh.post_issue_comment(base_repo_name(pr), pr["number"], comment_body) | ||
|
||
if dry_run: | ||
logger.info( | ||
"This was a dry run. None of the pings listed above were actually sent." | ||
) |
60 changes: 60 additions & 0 deletions
60
openverse_catalog/dags/maintenance/pr_review_reminders/pr_review_reminders_dag.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
""" | ||
Iterates through open PRs in our repositories and pings assigned reviewers | ||
who have not yet approved the PR or explicitly requested changes. | ||
This DAG runs daily and pings on the following schedule based on priority label: | ||
| priority | days | | ||
| --- | --- | | ||
| critical | 1 day | | ||
| high | >2 days | | ||
| medium | >4 days | | ||
| low | >7 days | | ||
The DAG does not ping on Saturday and Sunday and accounts for weekend days | ||
when determining how much time has passed since the review. | ||
Unfortunately the DAG does not know when someone is on vacation. It is up to the | ||
author of the PR to re-assign review if one of the randomly selected reviewers | ||
is unavailable for the time period during which the PR should be reviewed. | ||
""" | ||
|
||
from datetime import datetime, timedelta | ||
|
||
from airflow.models import DAG, Variable | ||
from airflow.operators.python import PythonOperator | ||
from common import pr_review_reminders | ||
from common.constants import DAG_DEFAULT_ARGS | ||
|
||
|
||
DAG_ID = "pr_review_reminders" | ||
MAX_ACTIVE_TASKS = 1 | ||
ENVIRONMENT = Variable.get("ENVIRONMENT", default_var="dev") | ||
DRY_RUN = Variable.get("PR_REVIEW_REMINDER_DRY_RUN", default_var=(ENVIRONMENT == "dev")) | ||
GITHUB_PAT = Variable.get("GITHUB_API_KEY", default_var="not_set") | ||
|
||
dag = DAG( | ||
dag_id=DAG_ID, | ||
default_args={ | ||
**DAG_DEFAULT_ARGS, | ||
"retry_delay": timedelta(minutes=1), | ||
}, | ||
start_date=datetime(2022, 6, 9), | ||
# Run every weekday | ||
schedule_interval="0 0 * * 1-5", | ||
max_active_tasks=MAX_ACTIVE_TASKS, | ||
max_active_runs=MAX_ACTIVE_TASKS, | ||
# If this was True, airflow would run this DAG in the beginning | ||
# for each day from the start day to now | ||
catchup=False, | ||
# Use the docstring at the top of the file as md docs in the UI | ||
doc_md=__doc__, | ||
tags=["maintenance"], | ||
) | ||
|
||
with dag: | ||
PythonOperator( | ||
task_id="pr_review_reminder_operator", | ||
python_callable=pr_review_reminders.post_reminders, | ||
op_kwargs={"github_pat": GITHUB_PAT, "dry_run": DRY_RUN}, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.