diff --git a/.env.sample b/.env.sample index 0a74618e..1cc1c3cc 100644 --- a/.env.sample +++ b/.env.sample @@ -1,6 +1,14 @@ +# PAT token GitHub authentication, useful for local testing. # You can get this value from https://github.com/settings/tokens. # if `GITHUB_TOKEN` is not set here, the token can also be stored in `~/.gitconfig` GITHUB_TOKEN=MUST_BE_CONFIGURED +# GitHub app GitHub authentication, best for production. +# GITHUB_APP_ID=MUST_BE_CONFIGURED +# Private key in PEM format +# GITHUB_APP_PRIVATE_KEY=MUST_BE_CONFIGURED +# Installation ID of the GitHub App in a specific repository or organization +# GITHUB_APP_INSTALLATION_ID=MUST_BE_CONFIGURED + DATABASE_URL=MUST_BE_CONFIGURED # If this variable is uncommented set to 1, the DB migrations will be skipped. # SKIP_DB_MIGRATIONS=0 diff --git a/Cargo.lock b/Cargo.lock index 0cc47e7d..08cb992e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3457,6 +3457,7 @@ dependencies = [ "ignore", "imara-diff", "itertools", + "jsonwebtoken", "native-tls", "octocrab", "parser", diff --git a/Cargo.toml b/Cargo.toml index 20ef2317..067a63bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,7 @@ pulldown-cmark-escape = "0.11.0" axum-extra = { version = "0.10.1", default-features = false } unicode-segmentation = "1.12.0" secrecy = { version = "0.10", features = ["serde"] } +jsonwebtoken = "9" globset = { version = "0.4.18", default-features = false } tower_governor = { version = "0.8.0", default-features = false, features = ["axum", "tracing"] } diff --git a/README.md b/README.md index cd4477fe..9d8fffc9 100644 --- a/README.md +++ b/README.md @@ -31,10 +31,10 @@ Run `cargo build` to compile the triagebot. For local development/debugging for the log pages, do the following steps: - 1. Run `cp .env.sample .env` - 2. Change value of `SKIP_DB_MIGRATIONS` to `1`. - 3. Run `cargo run --bin triagebot` - 4. Go to this URL: +1. Run `cp .env.sample .env` +2. Change value of `SKIP_DB_MIGRATIONS` to `1`. +3. Run `cargo run --bin triagebot` +4. Go to this URL: ## Running triagebot @@ -48,12 +48,13 @@ The general overview of what you will need to do: 3. [Configure webhook forwarding](#configure-webhook-forwarding) 4. Configure the `.env` file: - 1. Copy `.env.sample` to `.env` - 2. `GITHUB_TOKEN`: This is a token needed for Triagebot to send requests to GitHub. Go to GitHub Settings > Developer Settings > Personal Access Token, and create a new token. The `repo` permission should be sufficient. - If this is not set, Triagebot will also look in `~/.gitconfig` in the `github.oauth-token` setting. - 3. `DATABASE_URL`: This is the URL to the database. See [Configuring a database](#configuring-a-database). - 4. `GITHUB_WEBHOOK_SECRET`: Enter the secret you entered in the webhook above. - 5. `RUST_LOG`: Set this to `debug`. +1. Copy `.env.sample` to `.env` +2. `GITHUB_TOKEN`: This is a token needed for Triagebot to send requests to GitHub. Go to GitHub Settings > Developer Settings > Personal Access Token, and create a new token. The `repo` permission should be sufficient. + - If this is not set, Triagebot will also look in `~/.gitconfig` in the `github.oauth-token` setting. + - For production deployment, a GitHub App should be used instead. In that case, configure `GITHUB_APP_ID`, `GITHUB_APP_PRIVATE_KEY` and `GITHUB_APP_INSTALLATION_ID` instead. +3. `DATABASE_URL`: This is the URL to the database. See [Configuring a database](#configuring-a-database). +4. `GITHUB_WEBHOOK_SECRET`: Enter the secret you entered in the webhook above. +5. `RUST_LOG`: Set this to `debug`. 5. Run `cargo run --bin triagebot`. This starts the http server listening for webhooks on port 8000. 6. Add a `triagebot.toml` file to the main branch of your GitHub repo with whichever services you want to try out. @@ -118,15 +119,15 @@ You need to sign up for a free account, and also deal with configuring the GitHu 3. Configure GitHub webhooks in the test repo you created. In short: - 1. Go to the settings page for your GitHub repo. - 2. Go to the webhook section. - 3. Click "Add webhook" - 4. Include the settings: +1. Go to the settings page for your GitHub repo. +2. Go to the webhook section. +3. Click "Add webhook" +4. Include the settings: - * Payload URL: This is the URL to your Triagebot server, for example http://7e9ea9dc.ngrok.io/github-hook. This URL is displayed when you ran the `ngrok` command above. - * Content type: application/json - * Secret: Enter a shared secret (some longish random text) - * Events: "Send me everything" +* Payload URL: This is the URL to your Triagebot server, for example http://7e9ea9dc.ngrok.io/github-hook. This URL is displayed when you ran the `ngrok` command above. +* Content type: application/json +* Secret: Enter a shared secret (some longish random text) +* Events: "Send me everything" ### Zulip testing diff --git a/src/github.rs b/src/github.rs index 6d9bb5eb..d696fb08 100644 --- a/src/github.rs +++ b/src/github.rs @@ -6,13 +6,13 @@ use bytes::Bytes; use chrono::{DateTime, FixedOffset, Utc}; use futures::{FutureExt, future::BoxFuture}; use itertools::Itertools; -use octocrab::models::{Author, AuthorAssociation}; +use octocrab::models::{Author, AuthorAssociation, InstallationId}; use regex::Regex; use reqwest::header::{AUTHORIZATION, USER_AGENT}; use reqwest::{Client, Request, RequestBuilder, Response, StatusCode}; use secrecy::{ExposeSecret, SecretString}; use std::collections::{HashMap, HashSet}; -use std::sync::OnceLock; +use std::sync::{Arc, OnceLock}; use std::{ fmt, time::{Duration, SystemTime}, @@ -43,6 +43,13 @@ impl From<&Author> for User { impl GithubClient { async fn send_req(&self, req: RequestBuilder) -> anyhow::Result<(Bytes, String)> { + // We need to make sure that we have an up-to-date token, if we're using GH app auth + let token = self + .auth + .ensure_app_token(&self.client, &self.api_url) + .await?; + let req = req.configure(token); + const MAX_ATTEMPTS: u32 = 2; log::debug!("send_req with {:?}", req); let req_dbg = format!("{req:?}"); @@ -99,7 +106,7 @@ impl GithubClient { req: Request, sleep: Duration, remaining_attempts: u32, - ) -> BoxFuture<'_, Result> { + ) -> BoxFuture<'_, Result> { #[derive(Debug, serde::Deserialize)] struct RateLimit { #[allow(unused)] @@ -132,13 +139,17 @@ impl GithubClient { async move { tokio::time::sleep(sleep).await; + let token = self + .auth + .ensure_app_token(&self.client, &self.api_url) + .await?; // check rate limit let rate_resp = self .client .execute( self.client .get(format!("{}/rate_limit", self.api_url)) - .configure(self) + .configure(token) .build() .unwrap(), ) @@ -2486,31 +2497,56 @@ impl Event { } trait RequestSend: Sized { - fn configure(self, g: &GithubClient) -> Self; + fn configure(self, token: RequestToken) -> Self; } impl RequestSend for RequestBuilder { - fn configure(self, g: &GithubClient) -> RequestBuilder { - let mut auth = reqwest::header::HeaderValue::from_maybe_shared(format!( - "token {}", - g.token.expose_secret() - )) - .unwrap(); - auth.set_sensitive(true); + fn configure(self, token: RequestToken) -> RequestBuilder { self.header(USER_AGENT, "rust-lang-triagebot") - .header(AUTHORIZATION, &auth) + .header(AUTHORIZATION, &token.auth_header()) } } /// Finds the token in the user's environment, panicking if no suitable token /// can be found. -pub fn default_token_from_env() -> SecretString { - std::env::var("GITHUB_TOKEN") - // kept for retrocompatibility but usage is discouraged and will be deprecated - .or_else(|_| std::env::var("GITHUB_API_TOKEN")) - .or_else(|_| get_token_from_git_config()) - .expect("could not find token in GITHUB_TOKEN, GITHUB_API_TOKEN or .gitconfig/github.oath-token") - .into() +pub fn default_auth_from_env() -> GithubAuth { + match ( + std::env::var("GITHUB_APP_ID"), + std::env::var("GITHUB_APP_PRIVATE_KEY"), + std::env::var("GITHUB_APP_INSTALLATION_ID"), + ) { + (Ok(app_id), Ok(private_key), Ok(installation_id)) => { + let app_id: u64 = app_id.parse().expect("GITHUB_APP_ID must be a number"); + let installation_id: u64 = installation_id + .parse() + .expect("GITHUB_APP_INSTALLATION_ID must be a number"); + let key = jsonwebtoken::EncodingKey::from_rsa_pem(private_key.as_bytes()) + .expect("GITHUB_APP_PRIVATE_KEY must be a valid RSA PEM key"); + tracing::info!( + "Using GitHub App (app={app_id}, installation={installation_id}) for GitHub authentication" + ); + + GithubAuth::App { + app_auth: octocrab::auth::AppAuth { + app_id: octocrab::models::AppId(app_id), + key, + }, + installation_id: InstallationId::from(installation_id), + token_cache: Arc::new(std::sync::RwLock::new(None)), + } + } + x => { + eprintln!("{x:?}"); + let pat_token = std::env::var("GITHUB_TOKEN") + // kept for retrocompatibility but usage is discouraged and will be deprecated + .or_else(|_| std::env::var("GITHUB_API_TOKEN")) + .or_else(|_| get_token_from_git_config()) + .expect("could not find token in GITHUB_TOKEN, GITHUB_API_TOKEN or .gitconfig/github.oath-token") + .into(); + tracing::info!("Using PAT token for GitHub authentication"); + GithubAuth::Pat { token: pat_token } + } + } } fn get_token_from_git_config() -> anyhow::Result { @@ -2526,9 +2562,112 @@ fn get_token_from_git_config() -> anyhow::Result { Ok(git_token) } +/// Cached GitHub App installation access token with its expiry time. #[derive(Clone)] -pub struct GithubClient { +pub struct CachedInstallationToken { token: SecretString, + expires_at: DateTime, +} + +/// Authentication mechanism for the GitHub API. +#[derive(Clone)] +pub enum GithubAuth { + /// Classic Personal Access Token (PAT) authentication. + /// Useful for local testing. + Pat { token: SecretString }, + /// GitHub App authentication using app ID and private key. + /// Generates JWTs to obtain short-lived installation access tokens. + App { + app_auth: octocrab::auth::AppAuth, + installation_id: InstallationId, + token_cache: Arc>>, + }, +} + +struct RequestToken(SecretString); + +impl RequestToken { + fn auth_header(&self) -> reqwest::header::HeaderValue { + let mut header = + reqwest::header::HeaderValue::from_str(&format!("token {}", self.0.expose_secret())) + .unwrap(); + header.set_sensitive(true); + header + } +} + +impl GithubAuth { + /// Ensures that a valid installation token is present for App auth. + /// No-op for PAT auth. + /// + /// Fetches a new installation token if the cache is empty or the token + /// is about to expire (within 5 minutes). + async fn ensure_app_token( + &self, + client: &Client, + api_url: &str, + ) -> anyhow::Result { + match self { + GithubAuth::Pat { token } => Ok(RequestToken(token.clone())), + GithubAuth::App { + app_auth, + token_cache, + installation_id, + } => { + // Cached token is still valid at least for 5+ minutes + if let Some(existing_token) = token_cache + .read() + .expect("token cache lock poisoned") + .as_ref() + .filter(|cached| cached.expires_at - Utc::now() >= chrono::Duration::minutes(5)) + { + return Ok(RequestToken(existing_token.token.clone())); + } + + // Cached token is not valid, we need to get a new one + let jwt = app_auth + .generate_bearer_token() + .context("failed to generate GitHub App JWT")?; + let url = format!("{api_url}/app/installations/{installation_id}/access_tokens"); + let resp = client + .post(&url) + .header(USER_AGENT, "rust-lang-triagebot") + .header(AUTHORIZATION, format!("Bearer {jwt}")) + .header("Accept", "application/vnd.github+json") + .send() + .await + .context("failed to request installation token")? + .error_for_status() + .context("GitHub rejected installation token request")?; + + #[derive(serde::Deserialize)] + struct InstallationTokenResponse { + token: String, + expires_at: DateTime, + } + let body: InstallationTokenResponse = resp + .json() + .await + .context("failed to parse installation token response")?; + + let mut cache = token_cache.write().expect("token cache lock poisoned"); + *cache = Some(CachedInstallationToken { + token: SecretString::from(body.token.clone()), + expires_at: body.expires_at, + }); + log::info!( + "Refreshed GitHub App installation token, expires at {}", + body.expires_at + ); + Ok(RequestToken(SecretString::from(body.token))) + } + } + } +} + +#[derive(Clone)] +pub struct GithubClient { + auth: GithubAuth, client: Client, api_url: String, graphql_url: String, @@ -2538,10 +2677,10 @@ pub struct GithubClient { } impl GithubClient { - pub fn new(token: SecretString, api_url: String, graphql_url: String, raw_url: String) -> Self { + pub fn new(auth: GithubAuth, api_url: String, graphql_url: String, raw_url: String) -> Self { GithubClient { client: Client::new(), - token, + auth, api_url, graphql_url, raw_url, @@ -2550,8 +2689,9 @@ impl GithubClient { } pub fn new_from_env() -> Self { + let auth = default_auth_from_env(); Self::new( - default_token_from_env(), + auth, std::env::var("GITHUB_API_URL") .unwrap_or_else(|_| "https://api.github.com".to_string()), std::env::var("GITHUB_GRAPHQL_API_URL") @@ -2561,6 +2701,11 @@ impl GithubClient { ) } + /// Returns a reference to the authentication mechanism. + pub fn auth(&self) -> &GithubAuth { + &self.auth + } + /// Sets whether or not this client will retry when it hits GitHub's rate limit. /// /// Just beware that the retry may take a long time (like 30 minutes, @@ -2582,9 +2727,16 @@ impl GithubClient { let url = format!("{}/{repo}/{branch}/{path}", self.raw_url); let req = self.get(&url); let req_dbg = format!("{req:?}"); + + let token = self + .auth + .ensure_app_token(&self.client, &self.api_url) + .await?; let req = req + .configure(token) .build() .with_context(|| format!("failed to build request {req_dbg:?}"))?; + let resp = self.client.execute(req).await.context(req_dbg.clone())?; let status = resp.status(); let body = resp @@ -2615,28 +2767,27 @@ impl GithubClient { fn get(&self, url: &str) -> RequestBuilder { log::trace!("get {:?}", url); - self.client.get(url).configure(self) + self.client.get(url) } fn patch(&self, url: &str) -> RequestBuilder { log::trace!("patch {:?}", url); - self.client.patch(url).configure(self) + self.client.patch(url) } fn delete(&self, url: &str) -> RequestBuilder { log::trace!("delete {:?}", url); - self.client.delete(url).configure(self) + self.client.delete(url) } fn post(&self, url: &str) -> RequestBuilder { log::trace!("post {:?}", url); - self.client.post(url).configure(self) + self.client.post(url) } - #[allow(unused)] fn put(&self, url: &str) -> RequestBuilder { log::trace!("put {:?}", url); - self.client.put(url).configure(self) + self.client.put(url) } pub async fn rust_commit(&self, sha: &str) -> Option { diff --git a/src/main.rs b/src/main.rs index abe06e42..c323dbd6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,6 +25,7 @@ use tower_http::trace::TraceLayer; use tracing::{self as log, info_span}; use triagebot::gh_comments::{GH_COMMENTS_CACHE_CAPACITY_BYTES, GitHubCommentsCache}; use triagebot::gha_logs::{GHA_LOGS_CACHE_CAPACITY_BYTES, GitHubActionLogsCache}; +use triagebot::github::GithubAuth; use triagebot::handlers::Context; use triagebot::handlers::pr_tracking::ReviewerWorkqueue; use triagebot::handlers::pr_tracking::load_workqueue; @@ -39,10 +40,26 @@ async fn run_server(addr: SocketAddr) -> anyhow::Result<()> { let gh = github::GithubClient::new_from_env(); let zulip = ZulipClient::new_from_env(); let team_api = TeamClient::new_from_env(); - let oc = octocrab::OctocrabBuilder::new() - .personal_token(github::default_token_from_env()) - .build() - .expect("Failed to build octocrab."); + + let oc = match gh.auth() { + GithubAuth::Pat { token } => octocrab::OctocrabBuilder::new() + .personal_token(token.clone()) + .build() + .expect("Failed to build octocrab."), + GithubAuth::App { + app_auth, + installation_id, + .. + } => { + // We want a client scoped to a specific app installation + octocrab::OctocrabBuilder::new() + .app(app_auth.app_id, app_auth.key.clone()) + .build() + .expect("Failed to build GitHub app octocrab client") + .installation(*installation_id) + .expect("Failed to build GitHub app installation octocrab client") + } + }; // Loading the workqueue takes ~10-15s, and it's annoying for local rebuilds. // Allow users to opt out of it. diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 906bb3de..76699283 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,7 +1,7 @@ use crate::db; use crate::db::users::record_username; use crate::db::{ClientPool, PooledClient, make_client}; -use crate::github::GithubClient; +use crate::github::{GithubAuth, GithubClient}; use crate::handlers::Context; use crate::team_data::TeamClient; use crate::zulip::client::ZulipClient; @@ -64,7 +64,7 @@ impl TestContext { let octocrab = Octocrab::builder().build().unwrap(); let github = GithubClient::new( - "gh-test-fake-token".into(), + GithubAuth::Pat { token: "gh-test-fake-token".into() }, "https://api.github.com".to_string(), "https://api.github.com/graphql".to_string(), "https://raw.githubusercontent.com".to_string(),