Skip to content

Commit

Permalink
feat: use playwright instead of splash
Browse files Browse the repository at this point in the history
  • Loading branch information
ZakisM committed Feb 25, 2024
1 parent 8316c9a commit fdb6178
Show file tree
Hide file tree
Showing 12 changed files with 354 additions and 27 deletions.
8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ default-features = false
features = ["cache", "client", "gateway", "http", "model", "standard_framework", "rustls_backend", "utils"]
version = "0.12"

[profile.release]
lto = 'fat'
codegen-units = 1
strip = "symbols"
# [profile.release]
# lto = 'fat'
# codegen-units = 1
# strip = "symbols"
31 changes: 17 additions & 14 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
ARG BASE_IMAGE=rust:latest

FROM $BASE_IMAGE as builder
FROM lukemathwalker/cargo-chef:latest-rust-latest AS chef
WORKDIR /app
ENV SQLX_OFFLINE='true'
COPY ./Cargo.lock ./Cargo.lock
COPY ./Cargo.toml ./Cargo.toml
COPY ./src ./src
COPY ./.sqlx ./.sqlx

FROM chef AS planner
COPY . .
RUN cargo chef prepare --recipe-path recipe.json

FROM chef AS builder
COPY --from=planner /app/recipe.json recipe.json
RUN cargo chef cook --release --recipe-path recipe.json
COPY . .
RUN mkdir -p data
RUN cargo build --release

FROM gcr.io/distroless/cc-debian11
COPY --from=builder /app/target/release/general_notifier /
COPY --from=builder /app/data /
COPY ./migrations ./migrations
COPY ./.env ./.env
CMD ["./general_notifier"]
FROM gcr.io/distroless/cc-debian12
WORKDIR /app
COPY --from=builder /app/target/release/general_notifier .
COPY --from=builder /app/.env .
COPY --from=builder /app/data ./data
COPY --from=builder /app/migrations ./migrations
ENTRYPOINT ["/app/general_notifier"]
6 changes: 3 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ services:
backend:
image: zakism/general-notifier:latest
depends_on:
- splash
- playwright
environment:
- DISCORD_TOKEN=$DISCORD_TOKEN
volumes:
- ./general_notifier_data:/data

splash:
image: scrapinghub/splash:master
playwright:
image: zakism/general-notifier-playwright:latest
ports:
- "127.0.0.1:8050:8050"
restart: always
175 changes: 175 additions & 0 deletions playwright_bridge/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
# Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore

# Logs

logs
_.log
npm-debug.log_
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
.pnpm-debug.log*

# Caches

.cache

# Diagnostic reports (https://nodejs.org/api/report.html)

report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json

# Runtime data

pids
_.pid
_.seed
*.pid.lock

# Directory for instrumented libs generated by jscoverage/JSCover

lib-cov

# Coverage directory used by tools like istanbul

coverage
*.lcov

# nyc test coverage

.nyc_output

# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)

.grunt

# Bower dependency directory (https://bower.io/)

bower_components

# node-waf configuration

.lock-wscript

# Compiled binary addons (https://nodejs.org/api/addons.html)

build/Release

# Dependency directories

node_modules/
jspm_packages/

# Snowpack dependency directory (https://snowpack.dev/)

web_modules/

# TypeScript cache

*.tsbuildinfo

# Optional npm cache directory

.npm

# Optional eslint cache

.eslintcache

# Optional stylelint cache

.stylelintcache

# Microbundle cache

.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/

# Optional REPL history

.node_repl_history

# Output of 'npm pack'

*.tgz

# Yarn Integrity file

.yarn-integrity

# dotenv environment variable files

.env
.env.development.local
.env.test.local
.env.production.local
.env.local

# parcel-bundler cache (https://parceljs.org/)

.parcel-cache

# Next.js build output

.next
out

# Nuxt.js build / generate output

.nuxt
dist

# Gatsby files

# Comment in the public line in if your project uses Gatsby and not Next.js

# https://nextjs.org/blog/next-9-1#public-directory-support

# public

# vuepress build output

.vuepress/dist

# vuepress v2.x temp and cache directory

.temp

# Docusaurus cache and generated files

.docusaurus

# Serverless directories

.serverless/

# FuseBox cache

.fusebox/

# DynamoDB Local files

.dynamodb/

# TernJS port file

.tern-port

# Stores VSCode versions used for testing VSCode extensions

.vscode-test

# yarn v2

.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*

# IntelliJ based IDEs
.idea

# Finder (MacOS) folder config
.DS_Store
26 changes: 26 additions & 0 deletions playwright_bridge/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM oven/bun:1 as base
WORKDIR /usr/src/app

FROM base AS install
RUN mkdir -p /temp/dev
COPY package.json bun.lockb /temp/dev/
RUN cd /temp/dev && bun install --frozen-lockfile

RUN mkdir -p /temp/prod
COPY package.json bun.lockb /temp/prod/
RUN cd /temp/prod && bun install --frozen-lockfile --production

FROM base AS prerelease
COPY --from=install /temp/dev/node_modules node_modules
COPY . .

ENV NODE_ENV=production

FROM mcr.microsoft.com/playwright:v1.41.1-jammy
COPY --from=install /temp/prod/node_modules node_modules
COPY --from=prerelease /usr/src/app/index.ts .
COPY --from=prerelease /usr/src/app/package.json .
RUN npm install -g bun

ENTRYPOINT [ "bun", "run", "index.ts" ]

15 changes: 15 additions & 0 deletions playwright_bridge/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# playwright_bridge

To install dependencies:

```bash
bun install
```

To run:

```bash
bun run index.ts
```

This project was created using `bun init` in bun v1.0.29. [Bun](https://bun.sh) is a fast all-in-one JavaScript runtime.
Binary file added playwright_bridge/bun.lockb
Binary file not shown.
66 changes: 66 additions & 0 deletions playwright_bridge/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import {webkit, type Browser} from 'playwright';

function invariant(condition: any, message: string): asserts condition {
if (condition) return;

throw new Error(message);
}

let browser: Browser;

const getPageSource = async (url: string, timeout: number) => {
if (!browser) {
try {
browser = await webkit.launch();
} catch (error) {
console.error(error);
process.exit(1);
}
}

let res: string | undefined;

try {
const page = await browser.newPage();
await page.goto(url, {timeout: timeout * 1000});

res = await page.content();

await page.close();
} catch (error) {
console.error(`Failed to get ${URL} due to: ${error}`);
}

invariant(
res !== undefined,
`Response was undefined when trying to to read page source for: ${URL}`,
);

return res;
};

Bun.serve({
async fetch(req: Request) {
const {searchParams} = new URL(req.url);

const url = searchParams.get('url');
invariant(url !== null, 'url was null');

const timeout = Number(searchParams.get('timeout'));
invariant(
timeout !== null && !Number.isNaN(timeout),
'timeout was null | NaN',
);

try {
const source = await getPageSource(url, Number(timeout));

return new Response(source);
} catch (error) {
throw new Error(
`Failed to call 'getPageSource' for '${URL}' due to: ${error}`,
);
}
},
port: 8050,
});
15 changes: 15 additions & 0 deletions playwright_bridge/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"name": "playwright_bridge",
"module": "index.ts",
"type": "module",
"devDependencies": {
"@types/bun": "latest",
"typescript": "^5.3.3"
},
"peerDependencies": {
"typescript": "^5.0.0"
},
"dependencies": {
"playwright": "^1.41.2"
}
}
27 changes: 27 additions & 0 deletions playwright_bridge/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"compilerOptions": {
// Enable latest features
"lib": ["ESNext"],
"target": "ESNext",
"module": "ESNext",
"moduleDetection": "force",
"jsx": "react-jsx",
"allowJs": true,

// Bundler mode
"moduleResolution": "node",
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"noEmit": true,

// Best practices
"strict": true,
"skipLibCheck": true,
"noFallthroughCasesInSwitch": true,

// Some stricter flags (disabled by default)
"noUnusedLocals": false,
"noUnusedParameters": false,
"noPropertyAccessFromIndexSignature": false
}
}
Loading

0 comments on commit fdb6178

Please sign in to comment.