Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
346 changes: 346 additions & 0 deletions src/gh_comments.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,346 @@
use std::collections::VecDeque;
use std::fmt::Write;
use std::sync::Arc;
use std::time::Instant;

use anyhow::Context as _;
use axum::{
extract::{Path, State},
http::HeaderValue,
response::{IntoResponse, Response},
};
use chrono::Utc;
use hyper::{
HeaderMap, StatusCode,
header::{CACHE_CONTROL, CONTENT_SECURITY_POLICY, CONTENT_TYPE},
};

use crate::github::{GitHubGraphQlComment, GitHubIssueWithComments};
use crate::{
errors::AppError,
github::GitHubSimplifiedAuthor,
handlers::Context,
utils::{immutable_headers, is_repo_autorized},
};

pub const STYLE_URL: &str = "/gh-comments/style@0.0.1.css";
pub const MARKDOWN_URL: &str = "/gh-comments/github-markdown@5.8.1.css";

const MAX_CACHE_CAPACITY_BYTES: u64 = 35 * 1024 * 1024; // 35 Mb

type CacheKey = (String, String, u64);

#[derive(Default)]
pub struct GitHubCommentsCache {
capacity: u64,
entries: VecDeque<(CacheKey, Arc<CachedComments>)>,
}

pub struct CachedComments {
estimated_size: usize,
duration_secs: f64,
issue_with_comments: GitHubIssueWithComments,
}

impl GitHubCommentsCache {
pub fn get(&mut self, key: &CacheKey) -> Option<Arc<CachedComments>> {
if let Some(pos) = self.entries.iter().position(|(k, _)| k == key) {
// Move previously cached entry to the front
let entry = self.entries.remove(pos).unwrap();
self.entries.push_front(entry.clone());
Some(entry.1)
} else {
None
}
}

pub fn put(&mut self, key: CacheKey, value: Arc<CachedComments>) -> Arc<CachedComments> {
if value.estimated_size as u64 > MAX_CACHE_CAPACITY_BYTES {
// Entry is too large, don't cache, return as is
return value;
}

// Remove duplicate or last entry when necessary
let removed = if let Some(pos) = self.entries.iter().position(|(k, _)| k == &key) {
self.entries.remove(pos)
} else if self.capacity + value.estimated_size as u64 >= MAX_CACHE_CAPACITY_BYTES {
self.entries.pop_back()
} else {
None
};
if let Some(removed) = removed {
self.capacity -= removed.1.estimated_size as u64;
}

// Add entry the front of the list and return it
self.capacity += value.estimated_size as u64;
self.entries.push_front((key, value.clone()));
value
}

pub fn prune(&mut self, key: &CacheKey) -> bool {
if let Some(pos) = self.entries.iter().position(|(k, _)| k == key) {
self.entries.remove(pos);
true
} else {
false
}
}
}

pub async fn gh_comments(
Path(ref key @ (ref owner, ref repo, issue_id)): Path<(String, String, u64)>,
State(ctx): State<Arc<Context>>,
) -> axum::response::Result<Response, AppError> {
if !is_repo_autorized(&ctx, &owner, &repo).await? {
return Ok((
StatusCode::UNAUTHORIZED,
format!("repository `{owner}/{repo}` is not part of the Rust Project team repos"),
)
.into_response());
}

let CachedComments {
estimated_size: _,
duration_secs,
issue_with_comments,
} = &*'comments: {
if let Some(logs) = ctx.gh_comments.write().await.get(&key) {
tracing::info!("gh_comments: cache hit for issue #{issue_id}");
break 'comments logs;
}

tracing::info!("gh_comments: cache miss for issue #{issue_id}");

let start = Instant::now();

let issue_with_comments = ctx
.github
.issue_with_comments(&owner, &repo, issue_id)
.await
.context("unable to fetch the issue and it's comments")?;

let duration = start.elapsed();
let duration_secs = duration.as_secs_f64();

// Rough estimation of the byte size of the issue with comments
let estimated_size: usize = std::mem::size_of::<GitHubIssueWithComments>()
+ issue_with_comments.url.len()
+ issue_with_comments.title.len()
+ issue_with_comments.body_html.len()
+ issue_with_comments.title_html.len()
+ issue_with_comments
.comments
.nodes
.iter()
.map(|c| {
std::mem::size_of::<GitHubGraphQlComment>()
+ c.url.len()
+ c.body_html.len()
+ c.author.login.len()
+ c.author.avatar_url.len()
})
.sum::<usize>();

ctx.gh_comments.write().await.put(
key.clone(),
CachedComments {
estimated_size,
duration_secs,
issue_with_comments,
}
.into(),
)
};

let comment_count = issue_with_comments.comments.nodes.len();

let mut title = String::new();
pulldown_cmark_escape::escape_html(&mut title, &issue_with_comments.title)?;

let title_html = &issue_with_comments.title_html;

let mut html = String::new();

writeln!(
html,
r###"<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>{title} - #{issue_id}</title>
<link rel="icon" sizes="32x32" type="image/png" href="https://rust-lang.org/static/images/favicon-32x32.png">
<link rel="stylesheet" href="{MARKDOWN_URL}" />
<link rel="stylesheet" href="{STYLE_URL}" />
<script nonce="triagebot-gh-comments">
document.addEventListener('DOMContentLoaded', function() {{
document.querySelectorAll('[data-utc-time]').forEach(element => {{
const utcString = element.getAttribute('data-utc-time');
const utcDate = new Date(utcString);
element.textContent = utcDate.toLocaleString();
}});
}});
</script>
</head>
<body>
<div class="comments-container">
<h1 class="markdown-body title">{title_html} #{issue_id}</h1>
<p>{comment_count} comments loaded in {duration_secs:.2}s</p>
"###,
)
.unwrap();

write_comment_as_html(
&mut html,
&issue_with_comments.body_html,
&issue_with_comments.url,
&issue_with_comments.author,
&issue_with_comments.created_at,
&issue_with_comments.updated_at,
false,
None,
)?;

for comment in &issue_with_comments.comments.nodes {
write_comment_as_html(
&mut html,
&comment.body_html,
&comment.url,
&comment.author,
&comment.created_at,
&comment.updated_at,
comment.is_minimized,
comment.minimized_reason.as_deref(),
)?;
}

writeln!(html, r###"</div></body>"###).unwrap();

let mut headers = HeaderMap::new();
headers.insert(
CONTENT_TYPE,
HeaderValue::from_static("text/html; charset=utf-8"),
);
headers.insert(
CACHE_CONTROL,
HeaderValue::from_static("public, max-age=30"),
);
headers.insert(
CONTENT_SECURITY_POLICY,
HeaderValue::from_static(
"default-src 'none'; script-src 'nonce-triagebot-gh-comments'; style-src 'self'; img-src *",
),
);

Ok((StatusCode::OK, headers, html).into_response())
}

pub async fn style_css() -> impl IntoResponse {
const STYLE_CSS: &str = include_str!("gh_comments/style.css");

(immutable_headers("text/css; charset=utf-8"), STYLE_CSS)
}

pub async fn markdown_css() -> impl IntoResponse {
const MARKDOWN_CSS: &str = include_str!("gh_comments/github-markdown@5.8.1.css");

(immutable_headers("text/css; charset=utf-8"), MARKDOWN_CSS)
}

fn write_comment_as_html(
buffer: &mut String,
body_html: &str,
comment_url: &str,
author: &GitHubSimplifiedAuthor,
created_at: &chrono::DateTime<Utc>,
updated_at: &chrono::DateTime<Utc>,
minimized: bool,
minimized_reason: Option<&str>,
) -> anyhow::Result<()> {
let author_login = &author.login;
let author_avatar_url = &author.avatar_url;
let created_at_rfc3339 = created_at.to_rfc3339();

if minimized && let Some(minimized_reason) = minimized_reason {
writeln!(
buffer,
r###"
<div class="comment-wrapper">
<a href="https://github.com/{author_login}" target="_blank" class="desktop">
<img src="{author_avatar_url}" alt="{author_login} Avatar" class="avatar">
</a>

<details class="comment">
<summary class="comment-header">
<div class="author-info desktop">
<a href="https://github.com/{author_login}" target="_blank">{author_login}</a>
<span>on <span data-utc-time="{created_at_rfc3339}">{created_at}</span></span><span> · hidden as {minimized_reason}</span>
</div>

<div class="author-mobile">
<a href="https://github.com/{author_login}" target="_blank">
<img src="{author_avatar_url}" alt="{author_login} Avatar" class="avatar">
</a>
<div class="author-info">
<a href="https://github.com/{author_login}" target="_blank">{author_login}</a>
<span>on <span data-utc-time="{created_at_rfc3339}">{created_at}</span></span><span> · hidden as {minimized_reason}</span>
</div>
</div>

<a href="{comment_url}" target="_blank" class="github-link">View on GitHub</a>
</summary>

<div class="comment-body markdown-body">
{body_html}
</div>
</details>
</div>
"###
)?;
} else {
let edited = if created_at != updated_at {
"<span> · edited</span>"
} else {
""
};

writeln!(
buffer,
r###"
<div class="comment-wrapper">
<a href="https://github.com/{author_login}" target="_blank" class="desktop">
<img src="{author_avatar_url}" alt="{author_login} Avatar" class="avatar">
</a>

<div class="comment">
<div class="comment-header">
<div class="author-info desktop">
<a href="https://github.com/{author_login}" target="_blank">{author_login}</a>
<span>on <span data-utc-time="{created_at_rfc3339}">{created_at}</span></span>{edited}
</div>

<div class="author-mobile">
<a href="https://github.com/{author_login}" target="_blank">
<img src="{author_avatar_url}" alt="{author_login} Avatar" class="avatar">
</a>
<div class="author-info">
<a href="https://github.com/{author_login}" target="_blank">{author_login}</a>
<span>on <span data-utc-time="{created_at_rfc3339}">{created_at}</span></span>{edited}
</div>
</div>

<a href="{comment_url}" target="_blank" class="github-link">View on GitHub</a>
</div>

<div class="comment-body markdown-body">
{body_html}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was worried about XSS here, but it seems like GitHub already pre-escapes the HTML body of the comment. I hope that we can trust that...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, not only does GitHub do the escaping, they also gives us the rendered markdown as HTML, greatly reducing the complexity here.

</div>
</div>
</div>
"###
)?;
}

Ok(())
}
Loading