Skip to content

Commit

Permalink
Add PR review reminder DAG (#553)
Browse files Browse the repository at this point in the history
* [WIP] Add PR review reminder DAG

* Use requests directly instead of HttpHook

* Add unit tests and fix weekday calculation

* Use crontab config to only run on weekdays

* Move repos into a constant

* Reduce calls to base_repo_name

* Remove unnecessary template configuration

* Refine argument types

* Use session and rename var for masking

* Move files and reduce necessary calls

* Add unit tests and fix bugs

* Move files and clean up default dry_run values

* Correct Urgency types

* Remove unused function

* Add docs to test factory
  • Loading branch information
sarayourfriend authored Jun 23, 2022
1 parent 825c5aa commit f5de3e2
Show file tree
Hide file tree
Showing 11 changed files with 1,055 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docker-compose.override.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ services:
command: init
volumes:
- ./openverse_catalog:/usr/local/airflow/openverse_catalog
stdin_open: true
tty: true

volumes:
postgres:
Expand Down
4 changes: 4 additions & 0 deletions env.template
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ AIRFLOW_VAR_API_KEY_NYPL=not_set
AIRFLOW_VAR_API_KEY_THINGIVERSE=not_set
AIRFLOW_VAR_API_KEY_FREESOUND=not_set


########################################################################################
# Connection/Variable info
########################################################################################
Expand Down Expand Up @@ -76,6 +77,9 @@ AIRFLOW_CONN_DATA_REFRESH=http://172.17.0.1:8001
# Django Admin url. Change the following line to use the appropriate environment.
DJANGO_ADMIN_URL="https://localhost:8000/admin"

# GitHub - used for maintenance
AIRFLOW_VAR_GITHUB_API_KEY="not_set"


########################################################################################
# Other config
Expand Down
52 changes: 52 additions & 0 deletions openverse_catalog/dags/common/github.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import requests


class GitHubAPI:
def __init__(self, pat: str):
"""
:param pat: GitHub Personal Access Token to use to authenticate requests
"""
self.session = requests.Session()
self.session.headers["Authorization"] = f"token {pat}"

def _make_request(self, method: str, resource: str, **kwargs) -> requests.Response:
response = getattr(self.session, method.lower())(
f"https://api.github.com/{resource}", **kwargs
)
response.raise_for_status()
return response.json()

def get_open_prs(self, repo: str, owner: str = "WordPress"):
return self._make_request(
"GET",
f"repos/{owner}/{repo}/pulls",
data={
"state": "open",
"base": "main",
"sort": "updated",
# this is the default when ``sort`` is ``updated`` but
# it's helpful to specify for readers
"direction": "asc",
# we don't bother paginating because if we ever
# have more than 100 open PRs in a single repo
# then something is seriously wrong
"per_page": 100,
},
)

def post_issue_comment(
self, repo: str, issue_number: int, comment_body: str, owner: str = "WordPress"
):
return self._make_request(
"POST",
f"repos/{owner}/{repo}/issues/{issue_number}/comments",
data={"body": comment_body},
)

def get_issue_comments(
self, repo: str, issue_number: int, owner: str = "WordPress"
):
return self._make_request(
"GET",
f"repos/{owner}/{repo}/issues/{issue_number}/comments",
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import datetime
import logging
from dataclasses import dataclass
from typing import Optional

from common.github import GitHubAPI


logger = logging.getLogger(__name__)


REPOSITORIES = [
"openverse",
"openverse-catalog",
"openverse-api",
"openverse-frontend",
"openverse-infrastructure",
]


class Urgency:
@dataclass
class Urgency:
label: str
days: int

CRITICAL = Urgency("critical", 1)
HIGH = Urgency("high", 2)
MEDIUM = Urgency("medium", 4)
LOW = Urgency("low", 5)


@dataclass
class ReviewDelta:
urgency: Urgency.Urgency
days: int


def pr_urgency(pr: dict) -> Urgency.Urgency:
priority_labels = [
label["name"] for label in pr["labels"] if "priority" in label["name"].lower()
]
if not priority_labels:
logger.error(f"Found unabled PR ({pr['html_url']}). Skipping!")
return None

priority_label = priority_labels[0]

if "critical" in priority_label:
return Urgency.CRITICAL
elif "high" in priority_label:
return Urgency.HIGH
elif "medium" in priority_label:
return Urgency.MEDIUM
elif "low" in priority_label:
return Urgency.LOW


def days_without_weekends(today: datetime, updated_at: datetime) -> int:
"""
Adapted from:
https://stackoverflow.com/a/3615984 CC BY-SA 2.5
"""
if today.weekday() == 0 and (today - updated_at).days < 3:
# shortcut mondays to 0 if last updated on the weekend
return 0

daygenerator = (
updated_at + datetime.timedelta(x + 1) for x in range((today - updated_at).days)
)
return sum(1 for day in daygenerator if day.weekday() < 5)


def get_urgency_if_urgent(pr: dict) -> Optional[ReviewDelta]:
updated_at = datetime.datetime.fromisoformat(pr["updated_at"].rstrip("Z"))
today = datetime.datetime.now()
urgency = pr_urgency(pr)
if urgency is None:
return None

days = days_without_weekends(today, updated_at)

return ReviewDelta(urgency, days) if days >= urgency.days else None


COMMENT_MARKER = (
"This reminder is being automatically generated due to the urgency configuration."
)


COMMENT_TEMPLATE = (
"""
Based on the {urgency_label} urgency of this PR, the following reviewers are being
gently reminded to review this PR:
{user_logins}
"""
f"{COMMENT_MARKER}"
"""
Ignoring weekend days, this PR was updated {days_since_update} day(s) ago. PRs
labelled with {urgency_label} urgency are expected to be reviewed within {urgency_days}.
@{pr_author}, if this PR is not ready for a review, please draft it to prevent reviewers
from getting further unnecessary pings.
"""
)


def build_comment(review_delta: ReviewDelta, pr: dict):
user_handles = [f"@{req['login']}" for req in pr["requested_reviewers"]]
return user_handles, COMMENT_TEMPLATE.format(
urgency_label=review_delta.urgency.label,
urgency_days=review_delta.urgency.days,
user_logins="\n".join(user_handles),
days_since_update=review_delta.days,
pr_author=pr["user"]["login"],
)


def base_repo_name(pr: dict):
return pr["base"]["repo"]["name"]


def post_reminders(github_pat: str, dry_run: bool):
gh = GitHubAPI(github_pat)

open_prs = []
for repo in REPOSITORIES:
open_prs += [pr for pr in gh.get_open_prs(repo) if not pr["draft"]]

urgent_prs = []
for pr in open_prs:
review_delta = get_urgency_if_urgent(pr)
if review_delta:
urgent_prs.append((pr, review_delta))

to_ping = []
for pr, review_delta in urgent_prs:
repo = base_repo_name(pr)
comments = gh.get_issue_comments(repo, pr["number"])

reminder_comments = [
comment
for comment in comments
if (
comment["user"]["login"] == "openverse-bot"
and COMMENT_MARKER in comment["body"]
)
]
if reminder_comments:
# maybe in the future we re-ping in some cases?
continue

if pr["requested_reviewers"] == []:
# no requested reviewers to ping, maybe in the future we ping
# the PR author or the whole openverse maintainers team?
continue

to_ping.append((pr, review_delta))

for pr, review_delta in to_ping:
user_handles, comment_body = build_comment(review_delta, pr)

logger.info(f"Pinging {', '.join(user_handles)} to review {pr['title']}")
if not dry_run:
gh.post_issue_comment(base_repo_name(pr), pr["number"], comment_body)

if dry_run:
logger.info(
"This was a dry run. None of the pings listed above were actually sent."
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""
Iterates through open PRs in our repositories and pings assigned reviewers
who have not yet approved the PR or explicitly requested changes.
This DAG runs daily and pings on the following schedule based on priority label:
| priority | days |
| --- | --- |
| critical | 1 day |
| high | >2 days |
| medium | >4 days |
| low | >7 days |
The DAG does not ping on Saturday and Sunday and accounts for weekend days
when determining how much time has passed since the review.
Unfortunately the DAG does not know when someone is on vacation. It is up to the
author of the PR to re-assign review if one of the randomly selected reviewers
is unavailable for the time period during which the PR should be reviewed.
"""

from datetime import datetime, timedelta

from airflow.models import DAG, Variable
from airflow.operators.python import PythonOperator
from common import pr_review_reminders
from common.constants import DAG_DEFAULT_ARGS


DAG_ID = "pr_review_reminders"
MAX_ACTIVE_TASKS = 1
ENVIRONMENT = Variable.get("ENVIRONMENT", default_var="dev")
DRY_RUN = Variable.get("PR_REVIEW_REMINDER_DRY_RUN", default_var=(ENVIRONMENT == "dev"))
GITHUB_PAT = Variable.get("GITHUB_API_KEY", default_var="not_set")

dag = DAG(
dag_id=DAG_ID,
default_args={
**DAG_DEFAULT_ARGS,
"retry_delay": timedelta(minutes=1),
},
start_date=datetime(2022, 6, 9),
# Run every weekday
schedule_interval="0 0 * * 1-5",
max_active_tasks=MAX_ACTIVE_TASKS,
max_active_runs=MAX_ACTIVE_TASKS,
# If this was True, airflow would run this DAG in the beginning
# for each day from the start day to now
catchup=False,
# Use the docstring at the top of the file as md docs in the UI
doc_md=__doc__,
tags=["maintenance"],
)

with dag:
PythonOperator(
task_id="pr_review_reminder_operator",
python_callable=pr_review_reminders.post_reminders,
op_kwargs={"github_pat": GITHUB_PAT, "dry_run": DRY_RUN},
)
42 changes: 42 additions & 0 deletions tests/dags/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,45 @@ def requests_get_mock():
with mock.patch("common.urls.requests_get", autospec=True) as mock_get:
mock_get.side_effect = _make_response
yield


@pytest.fixture
def freeze_time(monkeypatch):
"""
Now() manager patches datetime return a fixed, settable, value
(freezes time)
https://stackoverflow.com/a/28073449 CC BY-SA 3.0
"""
import datetime

original = datetime.datetime

class FreezeMeta(type):
def __instancecheck__(self, instance):
if type(instance) == original or type(instance) == Freeze:
return True

class Freeze(datetime.datetime):
__metaclass__ = FreezeMeta

@classmethod
def freeze(cls, val):
cls.frozen = val

@classmethod
def now(cls):
return cls.frozen

@classmethod
def delta(cls, timedelta=None, **kwargs):
"""Moves time fwd/bwd by the delta"""
from datetime import timedelta as td

if not timedelta:
timedelta = td(**kwargs)
cls.frozen += timedelta

monkeypatch.setattr(datetime, "datetime", Freeze)
Freeze.freeze(original.now())
return Freeze
Loading

0 comments on commit f5de3e2

Please sign in to comment.