Skip to content

Commit 8af3da7

Browse files
committed
perf: parallelize Safe Browsing processing
1 parent 97ec501 commit 8af3da7

File tree

5 files changed

+45
-49
lines changed

5 files changed

+45
-49
lines changed

Cargo.lock

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ num-traits = "0.2.19"
3030
paste = "1.0.15"
3131
poise = { git = "https://github.com/serenity-rs/poise.git", branch = "serenity-next" }
3232
rand = "0.8.5"
33+
rayon = "1.10.0"
3334
redis = { version = "0.27.5", features = ["tokio-comp", "tokio-rustls", "rustls", "rustls-pemfile", "rustls-pki-types", "webpki-roots"] }
3435
redis-macros = "0.4.2"
3536
regex = "1.11.1"

flake.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

nix/package.nix

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,9 @@
44

55
{
66
lib,
7-
stdenv,
87
rustPlatform,
9-
darwin,
10-
nix-filter,
118
pkg-config,
9+
nix-filter,
1210
self,
1311
enableLTO ? true,
1412
enableOptimizeSize ? false,
@@ -46,15 +44,7 @@ rustPlatform.buildRustPackage rec {
4644

4745
doCheck = false;
4846

49-
buildInputs = lib.optionals stdenv.isDarwin [
50-
darwin.apple_sdk.frameworks.CoreFoundation
51-
darwin.apple_sdk.frameworks.Security
52-
darwin.apple_sdk.frameworks.SystemConfiguration
53-
darwin.apple_sdk.frameworks.IOKit
54-
darwin.libiconv
55-
];
56-
57-
nativeBuildInputs = lib.optionals stdenv.isDarwin [ pkg-config ];
47+
nativeBuildInputs = [ pkg-config ];
5848

5949
env =
6050
{

src/safe_browsing/mod.rs

Lines changed: 36 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _};
66
use eyre::eyre;
77
use sha2::{Digest as _, Sha256};
88

9+
use rayon::prelude::*;
910
use std::{
1011
collections::{HashMap, HashSet},
1112
sync::Arc,
@@ -153,35 +154,39 @@ impl SafeBrowsing {
153154
let mut url_hashes: HashMap<String, HashSet<Vec<u8>>> = HashMap::new();
154155

155156
for url in urls {
156-
let url_prefixes = Self::generate_url_prefixes(url)?;
157+
url_hashes.insert((*url).to_string(), HashSet::new());
157158

158-
for url_prefix in url_prefixes {
159+
for url_prefix in Self::generate_url_prefixes(url)? {
159160
let url_hash = Sha256::digest(&url_prefix).to_vec();
160161

161-
if let Some(v) = url_hashes.get_mut(*url) {
162-
v.insert(url_hash);
163-
} else {
164-
let mut hs = HashSet::new();
165-
hs.insert(url_hash);
166-
url_hashes.insert((*url).to_string(), hs);
167-
}
162+
url_hashes
163+
.get_mut(*url)
164+
.ok_or_else(|| eyre!("could not obtain `url_hashes` {url}"))?
165+
.insert(url_hash);
168166
}
169167
}
170168

171-
let mut matched_hash_prefixes = HashSet::new();
172169
let states = self.states.read().await;
173170

174-
for hash in url_hashes.values().flatten() {
175-
for list_state in states.values() {
176-
matched_hash_prefixes.extend(
177-
list_state
178-
.prefixes
179-
.iter()
180-
.filter(|prefix| hash.starts_with(prefix))
181-
.map(|p| p.to_owned()),
182-
);
183-
}
184-
}
171+
let matched_hash_prefixes = states
172+
.values()
173+
.par_bridge()
174+
.map(|list_state| {
175+
url_hashes
176+
.values()
177+
.flatten()
178+
.par_bridge()
179+
.map(|hash| {
180+
list_state
181+
.prefixes
182+
.par_iter()
183+
.filter(|prefix| hash.starts_with(prefix))
184+
.map(|p| p.to_owned())
185+
})
186+
.flatten()
187+
})
188+
.flatten()
189+
.collect::<HashSet<_>>();
185190

186191
drop(states);
187192

@@ -202,7 +207,7 @@ impl SafeBrowsing {
202207
platform_types: vec!["ANY_PLATFORM".to_string()],
203208
threat_entry_types: vec!["URL".to_string()],
204209
threat_entries: matched_hash_prefixes
205-
.iter()
210+
.par_iter()
206211
.map(|hash| ThreatEntry {
207212
hash: BASE64.encode(hash),
208213
})
@@ -223,36 +228,35 @@ impl SafeBrowsing {
223228
let matches = response
224229
.matches
225230
.unwrap_or_default()
226-
.into_iter()
231+
.into_par_iter()
227232
.filter_map(|m| {
228-
for (url, hashes) in &url_hashes {
229-
if let Ok(raw_threat_hash) = BASE64.decode(&m.threat.hash) {
230-
if hashes.contains(&raw_threat_hash) {
231-
return Some((url.to_owned(), m));
232-
}
233+
if let Ok(raw_threat_hash) = BASE64.decode(&m.threat.hash) {
234+
if let Some((url, _)) = url_hashes
235+
.par_iter()
236+
.find_any(|(_, h)| h.contains(&raw_threat_hash))
237+
{
238+
return Some((url.to_owned(), m));
233239
}
234240
}
235241

236242
None
237243
})
238244
.collect::<Vec<_>>();
239245

240-
let bench_elapsed = bench_start.elapsed();
241246
tracing::trace!(
242247
"Scanned {} URLs in {:.2}ms (prefixes matched) => {} matches",
243248
urls.len(),
244-
bench_elapsed.as_millis(),
249+
bench_start.elapsed().as_millis(),
245250
matches.len()
246251
);
247252

248253
return Ok(matches);
249254
}
250255

251-
let bench_elapsed = bench_start.elapsed();
252256
tracing::trace!(
253257
"Scanned {} URLs in {:.2}ms (no prefixes matched) => no matches",
254258
urls.len(),
255-
bench_elapsed.as_millis(),
259+
bench_start.elapsed().as_millis(),
256260
);
257261

258262
Ok(Vec::new())

0 commit comments

Comments
 (0)