From fdb6178381185c608556ce175299b7e9a268d9c3 Mon Sep 17 00:00:00 2001 From: zak <8143258+ZakisM@users.noreply.github.com> Date: Sat, 24 Feb 2024 21:38:07 +0000 Subject: [PATCH] feat: use playwright instead of splash --- Cargo.toml | 8 +- Dockerfile | 31 +++--- docker-compose.yml | 6 +- playwright_bridge/.gitignore | 175 ++++++++++++++++++++++++++++++++ playwright_bridge/Dockerfile | 26 +++++ playwright_bridge/README.md | 15 +++ playwright_bridge/bun.lockb | Bin 0 -> 4242 bytes playwright_bridge/index.ts | 66 ++++++++++++ playwright_bridge/package.json | 15 +++ playwright_bridge/tsconfig.json | 27 +++++ src/discord/mod.rs | 6 +- src/worker/alert.rs | 6 +- 12 files changed, 354 insertions(+), 27 deletions(-) create mode 100644 playwright_bridge/.gitignore create mode 100644 playwright_bridge/Dockerfile create mode 100644 playwright_bridge/README.md create mode 100755 playwright_bridge/bun.lockb create mode 100644 playwright_bridge/index.ts create mode 100644 playwright_bridge/package.json create mode 100644 playwright_bridge/tsconfig.json diff --git a/Cargo.toml b/Cargo.toml index 58661be..08f69a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ default-features = false features = ["cache", "client", "gateway", "http", "model", "standard_framework", "rustls_backend", "utils"] version = "0.12" -[profile.release] -lto = 'fat' -codegen-units = 1 -strip = "symbols" +# [profile.release] +# lto = 'fat' +# codegen-units = 1 +# strip = "symbols" diff --git a/Dockerfile b/Dockerfile index 160a623..94304b4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,21 @@ -ARG BASE_IMAGE=rust:latest - -FROM $BASE_IMAGE as builder +FROM lukemathwalker/cargo-chef:latest-rust-latest AS chef WORKDIR /app -ENV SQLX_OFFLINE='true' -COPY ./Cargo.lock ./Cargo.lock -COPY ./Cargo.toml ./Cargo.toml -COPY ./src ./src -COPY ./.sqlx ./.sqlx + +FROM chef AS planner +COPY . . +RUN cargo chef prepare --recipe-path recipe.json + +FROM chef AS builder +COPY --from=planner /app/recipe.json recipe.json +RUN cargo chef cook --release --recipe-path recipe.json +COPY . . RUN mkdir -p data RUN cargo build --release -FROM gcr.io/distroless/cc-debian11 -COPY --from=builder /app/target/release/general_notifier / -COPY --from=builder /app/data / -COPY ./migrations ./migrations -COPY ./.env ./.env -CMD ["./general_notifier"] +FROM gcr.io/distroless/cc-debian12 +WORKDIR /app +COPY --from=builder /app/target/release/general_notifier . +COPY --from=builder /app/.env . +COPY --from=builder /app/data ./data +COPY --from=builder /app/migrations ./migrations +ENTRYPOINT ["/app/general_notifier"] diff --git a/docker-compose.yml b/docker-compose.yml index e1d291d..6d00853 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,14 +4,14 @@ services: backend: image: zakism/general-notifier:latest depends_on: - - splash + - playwright environment: - DISCORD_TOKEN=$DISCORD_TOKEN volumes: - ./general_notifier_data:/data - splash: - image: scrapinghub/splash:master + playwright: + image: zakism/general-notifier-playwright:latest ports: - "127.0.0.1:8050:8050" restart: always diff --git a/playwright_bridge/.gitignore b/playwright_bridge/.gitignore new file mode 100644 index 0000000..9b1ee42 --- /dev/null +++ b/playwright_bridge/.gitignore @@ -0,0 +1,175 @@ +# Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore + +# Logs + +logs +_.log +npm-debug.log_ +yarn-debug.log* +yarn-error.log* +lerna-debug.log* +.pnpm-debug.log* + +# Caches + +.cache + +# Diagnostic reports (https://nodejs.org/api/report.html) + +report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json + +# Runtime data + +pids +_.pid +_.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover + +lib-cov + +# Coverage directory used by tools like istanbul + +coverage +*.lcov + +# nyc test coverage + +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) + +.grunt + +# Bower dependency directory (https://bower.io/) + +bower_components + +# node-waf configuration + +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) + +build/Release + +# Dependency directories + +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) + +web_modules/ + +# TypeScript cache + +*.tsbuildinfo + +# Optional npm cache directory + +.npm + +# Optional eslint cache + +.eslintcache + +# Optional stylelint cache + +.stylelintcache + +# Microbundle cache + +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history + +.node_repl_history + +# Output of 'npm pack' + +*.tgz + +# Yarn Integrity file + +.yarn-integrity + +# dotenv environment variable files + +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# parcel-bundler cache (https://parceljs.org/) + +.parcel-cache + +# Next.js build output + +.next +out + +# Nuxt.js build / generate output + +.nuxt +dist + +# Gatsby files + +# Comment in the public line in if your project uses Gatsby and not Next.js + +# https://nextjs.org/blog/next-9-1#public-directory-support + +# public + +# vuepress build output + +.vuepress/dist + +# vuepress v2.x temp and cache directory + +.temp + +# Docusaurus cache and generated files + +.docusaurus + +# Serverless directories + +.serverless/ + +# FuseBox cache + +.fusebox/ + +# DynamoDB Local files + +.dynamodb/ + +# TernJS port file + +.tern-port + +# Stores VSCode versions used for testing VSCode extensions + +.vscode-test + +# yarn v2 + +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* + +# IntelliJ based IDEs +.idea + +# Finder (MacOS) folder config +.DS_Store diff --git a/playwright_bridge/Dockerfile b/playwright_bridge/Dockerfile new file mode 100644 index 0000000..5bf773f --- /dev/null +++ b/playwright_bridge/Dockerfile @@ -0,0 +1,26 @@ +FROM oven/bun:1 as base +WORKDIR /usr/src/app + +FROM base AS install +RUN mkdir -p /temp/dev +COPY package.json bun.lockb /temp/dev/ +RUN cd /temp/dev && bun install --frozen-lockfile + +RUN mkdir -p /temp/prod +COPY package.json bun.lockb /temp/prod/ +RUN cd /temp/prod && bun install --frozen-lockfile --production + +FROM base AS prerelease +COPY --from=install /temp/dev/node_modules node_modules +COPY . . + +ENV NODE_ENV=production + +FROM mcr.microsoft.com/playwright:v1.41.1-jammy +COPY --from=install /temp/prod/node_modules node_modules +COPY --from=prerelease /usr/src/app/index.ts . +COPY --from=prerelease /usr/src/app/package.json . +RUN npm install -g bun + +ENTRYPOINT [ "bun", "run", "index.ts" ] + diff --git a/playwright_bridge/README.md b/playwright_bridge/README.md new file mode 100644 index 0000000..109fe59 --- /dev/null +++ b/playwright_bridge/README.md @@ -0,0 +1,15 @@ +# playwright_bridge + +To install dependencies: + +```bash +bun install +``` + +To run: + +```bash +bun run index.ts +``` + +This project was created using `bun init` in bun v1.0.29. [Bun](https://bun.sh) is a fast all-in-one JavaScript runtime. diff --git a/playwright_bridge/bun.lockb b/playwright_bridge/bun.lockb new file mode 100755 index 0000000000000000000000000000000000000000..25d7891090202fed089911c0f222c593b941a66e GIT binary patch literal 4242 zcmeHKX;2hL6rRNu*9{S+uryvQoK#pM*y589@+L(JQKFwgem* zxb=7pol<|#?6ncak9W9a7hDhpeobd?F&hjfWm0`%>w)4!I~+T`a3XZvNXXw5mc zZT+x~jY-#2+&k)vT&kP)O-Pvh`bH}7yt#d`hkV_GxCZsbB&)k)GcW9n*|)NiPLVC! zJn^X8w1GO;vU;ywi2k_*MGc7znj83s`0SzKdFb{k*e(vNRDeh0e$eoIuV53qBhY{I zIo3A^$6W#6A9&FEsP#p#pYDKn=GRBK0eu034aTFuxtQQb{Y(5Mz&imRWhB}|w*>5k z3$Nb~I@1Af4YrVP1k!yGjDH(CvjLCR9sv#MzKReGQA1qjJm^UpwrGeevV${1pfTDf z(#U5lFP5*}vK=k(f<-_Wc}=N>C2#z&O5Z$uo8F?tUT~|rE#JN7Q8rOl=^hrJ?EmLG zr91SQZ{@d?&&*3q{X;lRlIFEOxy(sSJ7v_?$L$$Yavyv|yf~KFWnq~^^6W7RK~2>$ z8|^aPzM;cJ)6Lc?$E}-r=hKhJw{H1oAT6hoS_)`p!Zm@B_slpY^HO{pH_&JX9AS<9jD_plxE|+_Nk0HJ{mVt^DD+W#jF->ctJe*Cc&@BRrqErtI+WP8}z*Y%gf`%AB@Xzhu8{Qr=mg43X7_58~ZU z#0iJr%zI$Mi(-vM*wj+-%c9i3TaJ&jty*7s+afo*xuIcIPMi6>Tg1V4z0)(s&MCBP zs?)Qwi)-fW*;RWq!hdtIkG=W6z#VNzE_uK`(HUQK4zUO)E=YE{qvzbk2fS9NeP-jG zVC^01c(2r@d}htVNbca&>^$4r1)t2nI9^tqxbyv({>x=aYYwN{KB_747#dO^n}vyb zXms8uKtu1(APr5fWP2F!Lu7~FtJn_b71mEizWiC!d*J1J^S=juk3cs6sy)tYu`A02 zk+hcO0yG*D7nOy!NL9}-m1+9(FRUQA21^;pr zyGA%FX8`ai6ex$#{fzD=l<(;7L-z?f`-n=#^91*S595)>y!ic$_%JW>b0^Q24+Sa` zv8#`Aif@DpwmYTMS4H>j;<^{ttAho8&ZL}_6U0cw04E(+up%=}07tPLLG;9k5XO;o z?7AIrRct2kQ;<#yQrUSQ;QH81!2dzuN*q_vLV*&vddb!#^t0nN-N1fN?-U0>L#suEhc4Oe&|4P6Y9)(P9>_yXO>3g+)>f zIX9Z47;S)N5y#M4^$dxkE7+>eU~_Dcf$C(d^?qjiHf!`Dgyt--XSz0yUR}tsEX4qu zZ^3FII=SH;sFD-o^2oobtvZ_$k4%R zii-)S*dT@u=VlqYGWh)JG79CmaCW*(#!zaS<(L>!8y>cjC3TEi)@3Af(H@WU^<9?=0;4-fW~wPVT1$ZjL3}n`rrHg2XwGV)&Kwi literal 0 HcmV?d00001 diff --git a/playwright_bridge/index.ts b/playwright_bridge/index.ts new file mode 100644 index 0000000..636cf28 --- /dev/null +++ b/playwright_bridge/index.ts @@ -0,0 +1,66 @@ +import {webkit, type Browser} from 'playwright'; + +function invariant(condition: any, message: string): asserts condition { + if (condition) return; + + throw new Error(message); +} + +let browser: Browser; + +const getPageSource = async (url: string, timeout: number) => { + if (!browser) { + try { + browser = await webkit.launch(); + } catch (error) { + console.error(error); + process.exit(1); + } + } + + let res: string | undefined; + + try { + const page = await browser.newPage(); + await page.goto(url, {timeout: timeout * 1000}); + + res = await page.content(); + + await page.close(); + } catch (error) { + console.error(`Failed to get ${URL} due to: ${error}`); + } + + invariant( + res !== undefined, + `Response was undefined when trying to to read page source for: ${URL}`, + ); + + return res; +}; + +Bun.serve({ + async fetch(req: Request) { + const {searchParams} = new URL(req.url); + + const url = searchParams.get('url'); + invariant(url !== null, 'url was null'); + + const timeout = Number(searchParams.get('timeout')); + invariant( + timeout !== null && !Number.isNaN(timeout), + 'timeout was null | NaN', + ); + + try { + const source = await getPageSource(url, Number(timeout)); + + return new Response(source); + } catch (error) { + throw new Error( + `Failed to call 'getPageSource' for '${URL}' due to: ${error}`, + ); + } + }, + port: 8050, +}); diff --git a/playwright_bridge/package.json b/playwright_bridge/package.json new file mode 100644 index 0000000..7ac5840 --- /dev/null +++ b/playwright_bridge/package.json @@ -0,0 +1,15 @@ +{ + "name": "playwright_bridge", + "module": "index.ts", + "type": "module", + "devDependencies": { + "@types/bun": "latest", + "typescript": "^5.3.3" + }, + "peerDependencies": { + "typescript": "^5.0.0" + }, + "dependencies": { + "playwright": "^1.41.2" + } +} \ No newline at end of file diff --git a/playwright_bridge/tsconfig.json b/playwright_bridge/tsconfig.json new file mode 100644 index 0000000..b6e46be --- /dev/null +++ b/playwright_bridge/tsconfig.json @@ -0,0 +1,27 @@ +{ + "compilerOptions": { + // Enable latest features + "lib": ["ESNext"], + "target": "ESNext", + "module": "ESNext", + "moduleDetection": "force", + "jsx": "react-jsx", + "allowJs": true, + + // Bundler mode + "moduleResolution": "node", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "noEmit": true, + + // Best practices + "strict": true, + "skipLibCheck": true, + "noFallthroughCasesInSwitch": true, + + // Some stricter flags (disabled by default) + "noUnusedLocals": false, + "noUnusedParameters": false, + "noPropertyAccessFromIndexSignature": false + } +} diff --git a/src/discord/mod.rs b/src/discord/mod.rs index ab3e4c8..3185fb3 100644 --- a/src/discord/mod.rs +++ b/src/discord/mod.rs @@ -107,9 +107,7 @@ pub async fn start( .await .context("Error creating client")?; - client.start().await?; - - let cache_http = (client.cache, client.http); + let cache_http = (client.cache.clone(), client.http.clone()); tokio::task::spawn(async move { while let Some(response_message) = responder_rx.recv().await { @@ -119,6 +117,8 @@ pub async fn start( } }); + client.start().await?; + Ok(()) } diff --git a/src/worker/alert.rs b/src/worker/alert.rs index 00b9997..038f2f1 100644 --- a/src/worker/alert.rs +++ b/src/worker/alert.rs @@ -49,7 +49,7 @@ pub async fn start(pool: Arc, responder_tx: Sender) Err(e) => error!("Failed to read all alerts: {}", e), } - tokio::time::sleep(Duration::from_secs(60 * 5)).await; + tokio::time::sleep(Duration::from_secs(5)).await; } } @@ -60,8 +60,8 @@ pub async fn check_alert( alerts: Vec<&Alert>, responder_tx: Sender, ) -> Result<()> { - let splash_url = format!("http://splash:8050/render.html?url={}&timeout=10", url); - let res = client.get(&splash_url).send().await?.text().await?; + let playwright_url = format!("http://playwright:8050/?url={}&timeout=10", url); + let res = client.get(&playwright_url).send().await?.text().await?; info!("Sent request to {}", &url);