Skip to content

Commit

Permalink
add preliminary support for rspack
Browse files Browse the repository at this point in the history
Lacking appropriate watchdog changes for now, and probably other things.
Asset extraction from the chunkloader ("deep") also remains unfixed.
  • Loading branch information
slice committed Oct 31, 2023
1 parent e0fdec2 commit 77833b9
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 69 deletions.
34 changes: 26 additions & 8 deletions crates/havoc/src/discord/assets/ext.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,40 @@
use crate::discord::{FeAsset, FeAssetType, RootScript};

pub trait AssetsExt<'a, I> {
pub trait AssetsExt<'source, Source: ?Sized> {
// We can't use "return position impl trait in traits" here, so we're forced
// to either box or implement our own iterator type. I can't be bothered to
// write a new iterator type at the moment so we'll just box.
fn filter_by_type(self, typ: FeAssetType) -> Box<dyn Iterator<Item = &'a FeAsset> + 'a + Send>;
fn filter_by_type(
self,
typ: FeAssetType,
) -> Box<dyn Iterator<Item = &'source FeAsset> + 'source + Send>;

fn find_root_script(self, root_script_type: RootScript) -> Option<&'a FeAsset>
fn find_root_script(self, root_script_type: RootScript) -> Option<&'source FeAsset>
where
Self: Sized,
{
self.filter_by_type(FeAssetType::Js)
.nth(root_script_type.assumed_index())
let scripts = self.filter_by_type(FeAssetType::Js).collect::<Vec<_>>();

root_script_type
.assumed_index_within_scripts(scripts.len())
.and_then(|index| scripts.into_iter().nth(index))
}
}

impl<'a, I: Iterator<Item = &'a FeAsset> + 'a + Send> AssetsExt<'a, I> for I {
fn filter_by_type(self, typ: FeAssetType) -> Box<dyn Iterator<Item = &'a FeAsset> + 'a + Send> {
Box::new(self.filter(move |asset| asset.typ == typ))
// Implement the convenience extension on any reference to a source, where said
// reference can be turned into an iterator yielding references to assets within
// the source. The source doesn't have to be sized, so we can use slices with
// it, which are notably unsized when not behind some indirection (e.g. a
// reference).
impl<'source, Source: ?Sized> AssetsExt<'source, Source> for &'source Source
where
&'source Source: IntoIterator<Item = &'source FeAsset> + Send,
<&'source Source as IntoIterator>::IntoIter: Send + 'source,
{
fn filter_by_type(
self,
typ: FeAssetType,
) -> Box<dyn Iterator<Item = &'source FeAsset> + 'source + Send> {
Box::new(self.into_iter().filter(move |asset| asset.typ == typ))
}
}
39 changes: 19 additions & 20 deletions crates/havoc/src/discord/assets/root.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use std::fmt::Display;
/// change at any time.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum RootScript {
/// A script which handles loading other Webpack chunks that aren't root
/// script.
/// A script which handles the loading of other Webpack chunks that aren't
/// present at the root.
ChunkLoader,

/// The Webpack chunk containing CSS chunk class mappings.
Expand All @@ -33,25 +33,24 @@ impl Display for RootScript {
}

impl RootScript {
/// Returns the assumed ordering of the root scripts in the application HTML.
///
/// This is a fragile assumption that could change at any time.
pub fn assumed_ordering() -> [RootScript; 4] {
/// Given a number of script tags present in the HTML of a frontend, returns
/// the assumed index of the script corresponding to this `RootScript`.
pub fn assumed_index_within_scripts(&self, n_scripts: usize) -> Option<usize> {
use RootScript::*;

[ChunkLoader, Classes, Vendor, Entrypoint]
}

/// Using the assumed ordering of the root scripts in the application HTML,
/// returns the index into that ordering for this root script.
///
/// This is a fragile assumption that could change at any time.
pub fn assumed_index(&self) -> usize {
Self::assumed_ordering()
.iter()
.position(|kind| kind == self)
.expect(
"invariant violation: RootScript::assumed_ordering doesn't contain all variants",
)
match self {
// Seemingly always last.
ChunkLoader => n_scripts.checked_sub(1),
// Seemingly always first.
Classes => Some(0),
// Seemingly always penultimate. Nota bene: it's now no longer clear
// to me if the concept of an "entrypoint" still applies with
// Rspack. Anyhow, it's a bit of a vague term, so this needs further
// design.
Entrypoint => n_scripts.checked_sub(2),
// In an Rspack world, it doesn't make sense to pinpoint a specific
// index for this.
Vendor => None,
}
}
}
1 change: 0 additions & 1 deletion crates/havoc/src/dump/classes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ impl Dump for CSSClasses {
) -> Result<DumpResult, DumpError> {
let classes_asset = artifact
.assets()
.iter()
.find_root_script(RootScript::Classes)
.ok_or(ScrapeError::MissingBranchPageAssets(
"failed to locate root classes script; discord has updated their /channels/@me",
Expand Down
9 changes: 4 additions & 5 deletions crates/havoc/src/dump/modules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,11 @@ async fn parse_webpack_chunk<'cache>(
assets: &'_ [FeAsset],
cache: &'cache mut AssetCache,
) -> Result<(swc_ecma_ast::Script, HashMap<ModuleId, &'cache str>), DumpError> {
let entrypoint_asset = assets
.iter()
.find_root_script(RootScript::Entrypoint)
.ok_or(ScrapeError::MissingBranchPageAssets(
let entrypoint_asset = assets.find_root_script(RootScript::Entrypoint).ok_or(
ScrapeError::MissingBranchPageAssets(
"failed to locate root entrypoint script; discord has updated their HTML",
))?;
),
)?;

let content = cache
.preprocessed_content(entrypoint_asset)
Expand Down
50 changes: 22 additions & 28 deletions crates/havoc/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,37 +179,31 @@ async fn print_build(
Ok(())
};

for (asset, root_script_type) in assets
.iter()
.filter_by_type(FeAssetType::Js)
.zip(RootScript::assumed_ordering().into_iter())
{
match root_script_type {
RootScript::ChunkLoader if matches.get_flag("deep") => {
if matches.get_flag("deep") {
let script_chunks = extract_assets_from_chunk_loader(&build.manifest, cache)
.await
.context("failed to extract assets from chunk loader")?;
write_asset_plain(
asset,
Some(format!(
"chunk loader, {} script chunks",
script_chunks.len()
)),
)?;

for (chunk_id, script_chunk) in script_chunks.iter().take(7) {
println!("\t\t{}: {}", chunk_id, script_chunk.filename());
}
println!("\t\t...");
}
}
_ => {
write_asset_plain(asset, Some(format!("{}", root_script_type)))?;
let scripts = assets.filter_by_type(FeAssetType::Js).collect::<Vec<_>>();
let chunkloader_index = RootScript::ChunkLoader.assumed_index_within_scripts(scripts.len());

for (index, asset) in scripts.iter().enumerate() {
if chunkloader_index == Some(index) && matches.get_flag("deep") {
let script_chunks = extract_assets_from_chunk_loader(&build.manifest, cache)
.await
.context("failed to extract assets from chunk loader")?;
write_asset_plain(
asset,
Some(format!(
"chunk loader, {} script chunks",
script_chunks.len()
)),
)?;

for (chunk_id, script_chunk) in script_chunks.iter().take(7) {
println!("\t\t{}: {}", chunk_id, script_chunk.filename());
}
println!("\t\t...");
} else {
write_asset_plain(asset, None)?;
}
}
for asset in assets.iter().filter_by_type(FeAssetType::Css) {
for asset in assets.filter_by_type(FeAssetType::Css) {
write_asset_plain(asset, None)?;
}

Expand Down
14 changes: 7 additions & 7 deletions crates/havoc/src/scrape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,7 @@ pub async fn extract_assets_from_chunk_loader(
manifest: &discord::FeManifest,
cache: &mut AssetCache,
) -> Result<Vec<(ChunkId, FeAsset)>, ScrapeError> {
let chunk_loader = manifest
.assets
.iter()
let chunk_loader = (&manifest.assets)
.find_root_script(RootScript::ChunkLoader)
.ok_or(ScrapeError::MissingBranchPageAssets("chunk loader"))?;
let data = cache.raw_content(chunk_loader).await?;
Expand Down Expand Up @@ -159,7 +157,7 @@ pub async fn scrape_fe_build(
// interested in.
let entrypoint_asset = fe_manifest
.assets
.iter()
.as_slice()
.find_root_script(RootScript::Entrypoint)
.expect(
"unable to locate entrypoint root script; discord has updated their /channels/@me html",
Expand Down Expand Up @@ -203,11 +201,13 @@ pub async fn request_branch_page(branch: discord::Branch) -> Result<IsahcRespons
/// page.
///
/// This function is designed to be used on the HTML content of `/channels/@me`
/// pages. Currently, crude regex matching is used instead of proper parsing.
/// pages. Currently, imprecise regex matching is used instead of parsing.
pub fn extract_assets_from_tags(page_content: &str) -> Vec<discord::FeAsset> {
lazy_static::lazy_static! {
static ref SCRIPT_TAG_RE: Regex = Regex::new(r#"<script src="/assets/(?P<name>[\.0-9a-f]+)\.js" integrity="[^"]+"></script>"#).unwrap();
static ref STYLE_TAG_RE: Regex = Regex::new(r#"<link rel="stylesheet" href="/assets/(?P<name>[\.0-9a-f]+)\.css" integrity="[^"]+">"#).unwrap();
// Crude matches, but I don't feel like bringing in a proper HTML parser
// unless I need to.
static ref SCRIPT_TAG_RE: Regex = Regex::new(r#"<script src="/assets/(?P<name>[\.0-9a-f]+)\.js""#).unwrap();
static ref STYLE_TAG_RE: Regex = Regex::new(r#"<link href="/assets/(?P<name>[\.0-9a-f]+)\.css""#).unwrap();
}

let collect_assets = |regex: &Regex, typ: discord::FeAssetType| {
Expand Down

0 comments on commit 77833b9

Please sign in to comment.