Skip to content

Commit

Permalink
fix typo in QA exclude check, which resulted in all URLs being exclud…
Browse files Browse the repository at this point in the history
…ed (#697)

- ensure exclusions now work as expected in replay mode
- add test for using --exclude with replay
  • Loading branch information
ikreymer authored Oct 8, 2024
1 parent 282c47a commit 157ac34
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/replaycrawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ export class ReplayCrawler extends Crawler {
}

for (const s of this.excludeRx) {
if (!s.test(url)) {
if (s.test(url)) {
logger.info("Skipping excluded page", { url }, "replay");
return;
}
Expand Down
6 changes: 4 additions & 2 deletions tests/qa_compare.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ test("run initial crawl with text and screenshots to prepare for QA", async () =
fs.rmSync("./test-crawls/qa-wr-net", { recursive: true, force: true });

child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --url https://webrecorder.net/about --url https://browsertrix.com/ --scopeType page --collection qa-wr-net --text to-warc --screenshot view --generateWACZ",
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --url https://webrecorder.net/about --url https://browsertrix.com/ --url https://webrecorder.net/contact --scopeType page --collection qa-wr-net --text to-warc --screenshot view --generateWACZ",
);

expect(
Expand All @@ -20,7 +20,7 @@ test("run QA comparison, with write pages to redis", async () => {
fs.rmSync("./test-crawls/qa-wr-net-replay", { recursive: true, force: true });

const child = child_process.exec(
"docker run -p 36380:6379 -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler qa --qaSource /crawls/collections/qa-wr-net/qa-wr-net.wacz --collection qa-wr-net-replay --crawlId test --qaDebugImageDiff --writePagesToRedis --debugAccessRedis",
"docker run -p 36380:6379 -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler qa --qaSource /crawls/collections/qa-wr-net/qa-wr-net.wacz --collection qa-wr-net-replay --crawlId test --qaDebugImageDiff --writePagesToRedis --debugAccessRedis --exclude contact",
);

// detect crawler exit
Expand Down Expand Up @@ -54,6 +54,8 @@ test("run QA comparison, with write pages to redis", async () => {
expect(json).toHaveProperty("loadState");
expect(json).toHaveProperty("comparison");

expect(json.title.indexOf("contact") < 0).toBe(true);

expect(json.comparison).toHaveProperty("screenshotMatch");
expect(json.comparison).toHaveProperty("textMatch");
expect(json.comparison).toHaveProperty("resourceCounts");
Expand Down

0 comments on commit 157ac34

Please sign in to comment.