Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions apps/server/src/api/routes/health.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,36 @@
* SPDX-License-Identifier: AGPL-3.0-or-later
*/

import { EXIT_CODES } from '@browseros/shared/constants/exit-codes'
import { Hono } from 'hono'
import type { Browser } from '../../browser/browser'
import { logger } from '../../lib/logger'

const HEALTH_CHECK_TIMEOUT = 5 * 60 * 1000 // 5 minutes

interface HealthDeps {
browser?: Browser
}

export function createHealthRoute(deps: HealthDeps = {}) {
let watchdogTimer: ReturnType<typeof setTimeout> | null = null

function resetWatchdog() {
if (watchdogTimer) clearTimeout(watchdogTimer)
watchdogTimer = setTimeout(() => {
logger.error(
'No health check received in 5 minutes, Chromium may be gone — exiting',
)
process.exit(EXIT_CODES.GENERAL_ERROR)
}, HEALTH_CHECK_TIMEOUT)
}

// Start the watchdog on creation
resetWatchdog()

export function createHealthRoute() {
return new Hono().get('/', (c) => {
return c.json({ status: 'ok' })
resetWatchdog()
const cdpConnected = deps.browser?.isCdpConnected() ?? true
return c.json({ status: 'ok', cdpConnected })
})
}
2 changes: 1 addition & 1 deletion apps/server/src/api/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ export async function createHttpServer(config: HttpServerConfig) {

const app = new Hono<Env>()
.use('/*', cors(defaultCorsConfig))
.route('/health', createHealthRoute())
.route('/health', createHealthRoute({ browser }))
.route(
'/shutdown',
createShutdownRoute({ onShutdown: onShutdown ?? (() => {}) }),
Expand Down
113 changes: 105 additions & 8 deletions apps/server/src/browser/backends/cdp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import type { CdpTarget, CdpBackend as ICdpBackend } from './types'
interface PendingRequest {
resolve: (value: unknown) => void
reject: (reason: Error) => void
timer: ReturnType<typeof setTimeout>
}

// biome-ignore lint/correctness/noUnusedVariables: declaration merging adds ProtocolApi properties to the class
Expand All @@ -28,6 +29,7 @@ class CdpBackend implements ICdpBackend {
private reconnecting = false
private eventHandlers = new Map<string, ((params: unknown) => void)[]>()
private sessionCache = new Map<string, ProtocolApi>()
private keepaliveTimer: ReturnType<typeof setInterval> | null = null

constructor(config: { port: number }) {
this.port = config.port
Expand All @@ -44,6 +46,7 @@ class CdpBackend implements ICdpBackend {
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
await this.attemptConnect()
this.startKeepalive()
return
} catch (error) {
const msg = error instanceof Error ? error.message : String(error)
Expand Down Expand Up @@ -84,6 +87,8 @@ class CdpBackend implements ICdpBackend {
}

ws.onclose = () => {
// Guard against stale onclose from a replaced socket
if (this.ws !== ws) return
this.connected = false
this.ws = null
if (opened) this.handleUnexpectedClose()
Expand All @@ -97,37 +102,110 @@ class CdpBackend implements ICdpBackend {
})
}

private handleUnexpectedClose(): void {
private startKeepalive(): void {
this.stopKeepalive()

const interval = TIMEOUTS.CDP_KEEPALIVE_INTERVAL
const timeout = TIMEOUTS.CDP_KEEPALIVE_TIMEOUT

this.keepaliveTimer = setInterval(async () => {
if (!this.ws || !this.connected || this.disconnecting) return

let timeoutId: ReturnType<typeof setTimeout> | undefined
try {
await Promise.race([
this.rawSend('Browser.getVersion'),
new Promise((_, reject) => {
timeoutId = setTimeout(
() => reject(new Error('CDP keepalive timeout')),
timeout,
)
}),
])
clearTimeout(timeoutId)
} catch {
clearTimeout(timeoutId)
logger.warn('CDP keepalive failed, connection may be dead')
this.handleDeadConnection()
}
}, interval)
}

private stopKeepalive(): void {
if (this.keepaliveTimer) {
clearInterval(this.keepaliveTimer)
this.keepaliveTimer = null
}
}

/**
* Force-close a zombie WebSocket that stopped responding but never
* fired onclose. This triggers the normal reconnection path.
*/
private handleDeadConnection(): void {
if (this.disconnecting || this.reconnecting) return

this.stopKeepalive()

if (this.ws) {
try {
this.ws.close()
} catch {
// Already dead, ignore
}
this.ws = null
}
this.connected = false
this.handleUnexpectedClose()
}

private handleUnexpectedClose(): void {
if (this.disconnecting) return

// Allow re-entry if a previous reconnection already finished.
// The old guard `if (this.reconnecting) return` caused permanent
// death when a freshly reconnected socket closed again before
// the .finally() callback reset the flag.
if (this.reconnecting) {
logger.warn(
'CDP closed again while reconnecting — will retry after current attempt',
)
return
}

this.stopKeepalive()
this.rejectPendingRequests()

logger.error(
'CDP WebSocket closed unexpectedly, attempting reconnection...',
)
this.reconnecting = true
this.reconnectOrCrash().finally(() => {
this.reconnectWithRetries().finally(() => {
this.reconnecting = false
})
}

private rejectPendingRequests(): void {
const error = new Error('CDP connection lost')
for (const request of this.pending.values()) {
clearTimeout(request.timer)
request.reject(error)
}
this.pending.clear()
}

private async reconnectOrCrash(): Promise<void> {
const maxRetries = CDP_LIMITS.CONNECT_MAX_RETRIES
const retryDelay = TIMEOUTS.CDP_CONNECT_RETRY_DELAY
private async reconnectWithRetries(): Promise<void> {
const maxRetries = CDP_LIMITS.RECONNECT_MAX_RETRIES
const delay = TIMEOUTS.CDP_RECONNECT_DELAY

for (let attempt = 1; attempt <= maxRetries; attempt++) {
if (this.disconnecting) return

try {
logger.info(`CDP reconnection attempt ${attempt}/${maxRetries}...`)
await Bun.sleep(retryDelay)
await Bun.sleep(delay)
await this.attemptConnect()
this.startKeepalive()
logger.info('CDP reconnected successfully')
return
} catch (error) {
Expand All @@ -146,11 +224,13 @@ class CdpBackend implements ICdpBackend {

async disconnect(): Promise<void> {
this.disconnecting = true
this.stopKeepalive()
if (this.ws) {
this.ws.close()
this.ws = null
this.connected = false
}
this.rejectPendingRequests()
}

isConnected(): boolean {
Expand Down Expand Up @@ -203,8 +283,24 @@ class CdpBackend implements ICdpBackend {

const ws = this.ws
return new Promise<unknown>((resolve, reject) => {
this.pending.set(id, { resolve, reject })
ws.send(JSON.stringify(message))
const timer = setTimeout(() => {
this.pending.delete(id)
reject(new Error(`CDP request timeout: ${method} (id=${id})`))
}, TIMEOUTS.CDP_REQUEST_TIMEOUT)

this.pending.set(id, { resolve, reject, timer })

try {
ws.send(JSON.stringify(message))
} catch (err) {
clearTimeout(timer)
this.pending.delete(id)
const msg = err instanceof Error ? err.message : String(err)
reject(new Error(`CDP send failed: ${msg}`))

// send() failure likely means the socket is dead
this.handleDeadConnection()
}
})
}

Expand Down Expand Up @@ -237,6 +333,7 @@ class CdpBackend implements ICdpBackend {
if (message.id !== undefined) {
const pending = this.pending.get(message.id)
if (pending) {
clearTimeout(pending.timer)
this.pending.delete(message.id)
if (message.error) {
pending.reject(new Error(`CDP error: ${message.error.message}`))
Expand Down
4 changes: 4 additions & 0 deletions apps/server/src/browser/browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ export class Browser {
this.setupEventHandlers()
}

isCdpConnected(): boolean {
return this.cdp.isConnected()
}

private setupEventHandlers(): void {
this.cdp.Target.on('detachedFromTarget', (params) => {
if (params.sessionId) {
Expand Down
1 change: 1 addition & 0 deletions packages/shared/src/constants/limits.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export const PAGINATION = {

export const CDP_LIMITS = {
CONNECT_MAX_RETRIES: 3,
RECONNECT_MAX_RETRIES: 3,
} as const

export const CONTENT_LIMITS = {
Expand Down
4 changes: 4 additions & 0 deletions packages/shared/src/constants/timeouts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ export const TIMEOUTS = {
// CDP connection
CDP_CONNECT: 10_000,
CDP_CONNECT_RETRY_DELAY: 1_000,
CDP_RECONNECT_DELAY: 5_000,
CDP_KEEPALIVE_INTERVAL: 30_000,
CDP_KEEPALIVE_TIMEOUT: 10_000,
CDP_REQUEST_TIMEOUT: 60_000,

// External API calls
KLAVIS_FETCH: 30_000,
Expand Down
Loading