From 12e1239fe05bd9b2fc29eb1296a24bc1a73ef291 Mon Sep 17 00:00:00 2001
From: Fabien Penso <gpg@pen.so>
Date: Wed, 11 Feb 2026 10:22:20 -0800
Subject: [PATCH] test(ui): add mock OAuth provider e2e flow

---
 crates/gateway/ui/e2e/mock-oauth-server.js   | 223 ++++++++++++++++
 crates/gateway/ui/e2e/specs/oauth.spec.js    | 247 +++++++++++++++++
 crates/gateway/ui/e2e/start-gateway-oauth.sh | 103 ++++++++
 crates/gateway/ui/playwright.config.js       |  33 ++-
 plans/e2e-tests-oauth-provider-connection.md | 263 +++++++++++++++++++
 5 files changed, 863 insertions(+), 6 deletions(-)
 create mode 100644 crates/gateway/ui/e2e/mock-oauth-server.js
 create mode 100644 crates/gateway/ui/e2e/specs/oauth.spec.js
 create mode 100755 crates/gateway/ui/e2e/start-gateway-oauth.sh
 create mode 100644 plans/e2e-tests-oauth-provider-connection.md
diff --git a/crates/gateway/ui/e2e/mock-oauth-server.js b/crates/gateway/ui/e2e/mock-oauth-server.js
new file mode 100644
index 00000000..7cc3ff70
--- /dev/null
+++ b/crates/gateway/ui/e2e/mock-oauth-server.js
@@ -0,0 +1,223 @@
+// Mock OAuth server for E2E testing.
+// Implements /authorize (PKCE + state), /token (code exchange + refresh), and /calls (request log).
+// Usage: node mock-oauth-server.js
+// Prints JSON to stdout: { "port": <number> }
+
+const http = require("node:http");
+const crypto = require("node:crypto");
+
+var calls = [];
+// Map of state -> { challenge, redirectUri }
+var pendingFlows = new Map();
+// Tracks issued auth codes -> { state } so /token can verify
+var issuedCodes = new Map();
+// Whether /token should return errors (toggled via /config)
+var tokenShouldFail = false;
+
+function parseRequestUrl(req) {
+	return new URL(req.url, "http://127.0.0.1");
+}
+
+function queryObject(searchParams) {
+	var query = {};
+	for (const [key, value] of searchParams.entries()) {
+		query[key] = value;
+	}
+	return query;
+}
+
+function base64UrlEncode(buffer) {
+	return buffer.toString("base64").replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
+}
+
+function verifyPkce(verifier, challenge) {
+	var hash = crypto.createHash("sha256").update(verifier).digest();
+	var expected = base64UrlEncode(hash);
+	return expected === challenge;
+}
+
+function parseBody(req) {
+	return new Promise((resolve) => {
+		var chunks = [];
+		req.on("data", (c) => chunks.push(c));
+		req.on("end", () => {
+			var body = Buffer.concat(chunks).toString();
+			resolve(new URLSearchParams(body));
+		});
+	});
+}
+
+function respond(res, status, body) {
+	var json = JSON.stringify(body);
+	res.writeHead(status, { "Content-Type": "application/json" });
+	res.end(json);
+}
+
+function handleAuthorize(res, query) {
+	if (!(query.client_id && query.redirect_uri && query.code_challenge && query.state)) {
+		return respond(res, 400, {
+			error: "invalid_request",
+			error_description: "Missing required parameters: client_id, redirect_uri, code_challenge, state",
+		});
+	}
+	if (query.code_challenge_method !== "S256") {
+		return respond(res, 400, {
+			error: "invalid_request",
+			error_description: "Only S256 code_challenge_method is supported",
+		});
+	}
+
+	pendingFlows.set(query.state, {
+		challenge: query.code_challenge,
+		redirectUri: query.redirect_uri,
+	});
+
+	var authCode = `mock-auth-code-${crypto.randomBytes(8).toString("hex")}`;
+	issuedCodes.set(authCode, { state: query.state });
+
+	var redirectUrl = new URL(query.redirect_uri);
+	redirectUrl.searchParams.set("code", authCode);
+	redirectUrl.searchParams.set("state", query.state);
+
+	res.writeHead(302, { Location: redirectUrl.toString() });
+	res.end();
+}
+
+function handleTokenExchange(res, body) {
+	var authCode = body.get("code");
+	var verifier = body.get("code_verifier");
+	var clientId = body.get("client_id");
+
+	if (!(authCode && verifier && clientId)) {
+		return respond(res, 400, {
+			error: "invalid_request",
+			error_description: "Missing code, code_verifier, or client_id",
+		});
+	}
+
+	var codeEntry = issuedCodes.get(authCode);
+	if (!codeEntry) {
+		return respond(res, 400, {
+			error: "invalid_grant",
+			error_description: "Unknown or expired authorization code",
+		});
+	}
+
+	var flow = pendingFlows.get(codeEntry.state);
+	if (!flow) {
+		return respond(res, 400, {
+			error: "invalid_grant",
+			error_description: "No pending flow for this state",
+		});
+	}
+
+	if (!verifyPkce(verifier, flow.challenge)) {
+		return respond(res, 400, {
+			error: "invalid_grant",
+			error_description: "PKCE verification failed",
+		});
+	}
+
+	issuedCodes.delete(authCode);
+	pendingFlows.delete(codeEntry.state);
+
+	return respond(res, 200, {
+		access_token: `mock-access-token-${crypto.randomBytes(8).toString("hex")}`,
+		refresh_token: `mock-refresh-token-${crypto.randomBytes(8).toString("hex")}`,
+		token_type: "Bearer",
+		expires_in: 3600,
+	});
+}
+
+function handleTokenRefresh(res, body) {
+	var refreshToken = body.get("refresh_token");
+	if (!refreshToken) {
+		return respond(res, 400, {
+			error: "invalid_request",
+			error_description: "Missing refresh_token",
+		});
+	}
+
+	return respond(res, 200, {
+		access_token: `mock-refreshed-token-${crypto.randomBytes(8).toString("hex")}`,
+		refresh_token: `mock-refresh-token-${crypto.randomBytes(8).toString("hex")}`,
+		token_type: "Bearer",
+		expires_in: 3600,
+	});
+}
+
+async function handleToken(res, req) {
+	var body = await parseBody(req);
+	var grantType = body.get("grant_type");
+
+	if (tokenShouldFail) {
+		return respond(res, 400, {
+			error: "server_error",
+			error_description: "Mock server configured to return errors",
+		});
+	}
+
+	if (grantType === "authorization_code") {
+		return handleTokenExchange(res, body);
+	}
+	if (grantType === "refresh_token") {
+		return handleTokenRefresh(res, body);
+	}
+
+	return respond(res, 400, {
+		error: "unsupported_grant_type",
+		error_description: `Unsupported grant_type: ${grantType}`,
+	});
+}
+
+async function handleConfig(req) {
+	var configBody = await parseBody(req);
+	if (configBody.has("token_should_fail")) {
+		tokenShouldFail = configBody.get("token_should_fail") === "true";
+	}
+}
+
+function handleReset() {
+	calls = [];
+	pendingFlows.clear();
+	issuedCodes.clear();
+	tokenShouldFail = false;
+}
+
+var server = http.createServer(async (req, res) => {
+	var parsed = parseRequestUrl(req);
+	var pathname = parsed.pathname;
+	var query = queryObject(parsed.searchParams);
+
+	calls.push({
+		method: req.method,
+		path: pathname,
+		query,
+		timestamp: Date.now(),
+	});
+
+	if (req.method === "GET" && pathname === "/authorize") {
+		return handleAuthorize(res, query);
+	}
+	if (req.method === "POST" && pathname === "/token") {
+		return handleToken(res, req);
+	}
+	if (req.method === "GET" && pathname === "/calls") {
+		return respond(res, 200, calls);
+	}
+	if (req.method === "POST" && pathname === "/config") {
+		await handleConfig(req);
+		return respond(res, 200, { ok: true });
+	}
+	if (req.method === "POST" && pathname === "/reset") {
+		handleReset();
+		return respond(res, 200, { ok: true });
+	}
+
+	respond(res, 404, { error: "not_found" });
+});
+
+server.listen(0, "127.0.0.1", () => {
+	var port = server.address().port;
+	process.stdout.write(`${JSON.stringify({ port })}\n`);
+});
diff --git a/crates/gateway/ui/e2e/specs/oauth.spec.js b/crates/gateway/ui/e2e/specs/oauth.spec.js
new file mode 100644
index 00000000..c402d42a
--- /dev/null
+++ b/crates/gateway/ui/e2e/specs/oauth.spec.js
@@ -0,0 +1,247 @@
+const { expect, test } = require("@playwright/test");
+const fs = require("node:fs");
+const path = require("node:path");
+const { navigateAndWait, watchPageErrors, expectPageContentMounted, waitForWsConnected } = require("../helpers");
+
+// Resolve paths relative to the repo root
+var repoRoot = path.resolve(__dirname, "../../../../..");
+var runtimeRoot = process.env.MOLTIS_E2E_OAUTH_RUNTIME_DIR || path.join(repoRoot, "target/e2e-runtime-oauth");
+var runtimeConfigDir = path.join(runtimeRoot, "config");
+var runtimeHomeConfigDir = path.join(runtimeRoot, "home", ".config", "moltis");
+
+function getMockPort() {
+	var portFile = path.join(runtimeRoot, "mock-oauth-port");
+	try {
+		return parseInt(fs.readFileSync(portFile, "utf8").trim(), 10);
+	} catch {
+		return null;
+	}
+}
+
+function mockUrl(mockPort, pathname) {
+	return `http://127.0.0.1:${mockPort}${pathname}`;
+}
+
+async function resetMockServer(mockPort) {
+	var res = await fetch(mockUrl(mockPort, "/reset"), { method: "POST" });
+	return res.ok;
+}
+
+async function getMockCalls(mockPort) {
+	var res = await fetch(mockUrl(mockPort, "/calls"));
+	return res.json();
+}
+
+async function configureMock(mockPort, options) {
+	var body = new URLSearchParams(options);
+	var res = await fetch(mockUrl(mockPort, "/config"), { method: "POST", body });
+	return res.ok;
+}
+
+function removeIfExists(filePath) {
+	try {
+		fs.rmSync(filePath, { force: true });
+	} catch {
+		// Best-effort cleanup only.
+	}
+}
+
+function resetRuntimeAuthState() {
+	removeIfExists(path.join(runtimeConfigDir, "oauth_tokens.json"));
+	removeIfExists(path.join(runtimeConfigDir, "provider_keys.json"));
+	removeIfExists(path.join(runtimeHomeConfigDir, "oauth_tokens.json"));
+	removeIfExists(path.join(runtimeHomeConfigDir, "provider_keys.json"));
+}
+
+async function openProviderPicker(page) {
+	await waitForWsConnected(page);
+	await page.getByRole("button", { name: "Add LLM" }).click();
+	var codexCard = page.locator("#providerModalBody .provider-item").filter({ hasText: "OpenAI Codex" }).first();
+	await expect(codexCard).toBeVisible();
+	return codexCard;
+}
+
+test.describe("OAuth provider connection", () => {
+	var mockPort;
+
+	test.beforeAll(() => {
+		mockPort = getMockPort();
+		if (!mockPort) {
+			throw new Error(`Could not read mock OAuth server port from ${path.join(runtimeRoot, "mock-oauth-port")}`);
+		}
+	});
+
+	test.beforeEach(async () => {
+		resetRuntimeAuthState();
+		if (mockPort) await resetMockServer(mockPort);
+	});
+
+	test("provider list shows OAuth providers", async ({ page }) => {
+		var pageErrors = watchPageErrors(page);
+		await navigateAndWait(page, "/settings/providers");
+
+		await expect(page.getByRole("heading", { name: "LLMs" })).toBeVisible();
+
+		// Click "Add LLM" to see available providers.
+		var codexCard = await openProviderPicker(page);
+		await expect(codexCard.locator(".provider-item-badge.oauth")).toHaveText("OAuth");
+
+		expect(pageErrors).toEqual([]);
+	});
+
+	test("OAuth PKCE flow completes successfully", async ({ page, context }) => {
+		var pageErrors = watchPageErrors(page);
+		await navigateAndWait(page, "/settings/providers");
+
+		// Click "Add LLM" to open provider modal.
+		var codexCard = await openProviderPicker(page);
+
+		// Click on OpenAI Codex to start the OAuth flow.
+		await codexCard.click();
+		await expect(page.getByRole("button", { name: "Connect" })).toBeVisible();
+
+		// Listen for the popup that opens the OAuth auth URL.
+		var popupPromise = context.waitForEvent("page", { timeout: 10_000 });
+
+		// Click "Connect" to start the OAuth flow
+		await page.getByRole("button", { name: "Connect" }).click();
+
+		// The popup navigates to the mock server /authorize, which redirects
+		// back to the gateway's /auth/callback with code + state. The gateway
+		// exchanges the code and stores tokens.
+		var popup = await popupPromise;
+		// Callback success page may auto-close very quickly.
+		if (!popup.isClosed()) {
+			await popup.waitForEvent("close", { timeout: 10_000 }).catch(() => {
+				// Continue: main-page polling and mock call assertions below verify success.
+			});
+		}
+
+		// Back in the main page, wait for the polling to detect the authenticated state.
+		// The UI should either show "connected" or transition to a model selector.
+		await expect(page.getByText(/connected successfully|Select Model/i)).toBeVisible({ timeout: 15_000 });
+
+		// Verify mock server received the expected calls
+		var calls = await getMockCalls(mockPort);
+		var authorizeCalls = calls.filter((c) => c.path === "/authorize");
+		var tokenCalls = calls.filter((c) => c.path === "/token");
+
+		expect(authorizeCalls.length).toBe(1);
+		expect(tokenCalls.length).toBe(1);
+
+		// Verify the authorize call had PKCE params
+		var authCall = authorizeCalls[0];
+		expect(authCall.query.code_challenge).toBeTruthy();
+		expect(authCall.query.code_challenge_method).toBe("S256");
+		expect(authCall.query.state).toBeTruthy();
+		expect(authCall.query.client_id).toBe("test-client-id");
+
+		expect(pageErrors).toEqual([]);
+	});
+
+	test("OAuth state mismatch is rejected", async ({ page }) => {
+		var _pageErrors = watchPageErrors(page);
+
+		// Navigate directly to the callback with a bogus state
+		var response = await page.goto("/auth/callback?code=fake-code&state=wrong-state");
+
+		// The gateway should return a 400 error
+		expect(response.status()).toBe(400);
+		await expect(page.getByText("Authentication failed")).toBeVisible();
+
+		// Allow the expected page error (no JS error from our code, just the error page)
+		// pageErrors may be empty since the error page is a simple HTML page
+	});
+
+	test("disconnect removes provider tokens", async ({ page, context }) => {
+		var pageErrors = watchPageErrors(page);
+		await navigateAndWait(page, "/settings/providers");
+
+		// First, connect the provider.
+		var codexCard = await openProviderPicker(page);
+		await codexCard.click();
+		await expect(page.getByRole("button", { name: "Connect" })).toBeVisible();
+
+		var popupPromise = context.waitForEvent("page", { timeout: 10_000 });
+		await page.getByRole("button", { name: "Connect" }).click();
+		var popup = await popupPromise;
+		if (!popup.isClosed()) {
+			await popup.waitForEvent("close", { timeout: 10_000 }).catch(() => {
+				// Continue: main-page polling and follow-up assertions verify success.
+			});
+		}
+
+		// Wait for connection to complete.
+		await expect(page.getByText(/connected successfully|Select Model/i)).toBeVisible({ timeout: 15_000 });
+
+		// Close the modal if a model picker is still open.
+		var modalClose = page.locator("#providerModalClose");
+		if (await modalClose.isVisible().catch(() => false)) {
+			await modalClose.click();
+		}
+		await expect(page.locator("#providerModal")).toHaveClass(/hidden/);
+
+		// Navigate to providers page to see the connected provider.
+		await navigateAndWait(page, "/settings/providers");
+		await expectPageContentMounted(page);
+
+		// The provider should appear in the list. Delete it and confirm.
+		var configuredList = page.locator("#pageContent");
+		var codexHeading = configuredList.getByRole("heading", { name: "OpenAI Codex" });
+		await expect(codexHeading).toBeVisible();
+		var codexSection = codexHeading.locator("xpath=ancestor::div[contains(@class, 'max-w-form')]").first();
+		await codexSection.getByRole("button", { name: "Delete" }).click();
+		await page.getByRole("button", { name: "Confirm" }).click();
+		await expect(configuredList.getByRole("heading", { name: "OpenAI Codex" })).toHaveCount(0);
+
+		expect(pageErrors).toEqual([]);
+	});
+
+	test("token exchange failure shows error", async ({ page, context }) => {
+		var pageErrors = watchPageErrors(page);
+		await navigateAndWait(page, "/settings/providers");
+
+		// Configure mock to fail token exchange
+		await configureMock(mockPort, { token_should_fail: "true" });
+
+		// Start OAuth flow.
+		var codexCard = await openProviderPicker(page);
+		await codexCard.click();
+		await expect(page.getByRole("button", { name: "Connect" })).toBeVisible();
+
+		var popupPromise = context.waitForEvent("page", { timeout: 10_000 });
+		await page.getByRole("button", { name: "Connect" }).click();
+
+		var popup = await popupPromise;
+		await popup.waitForLoadState("domcontentloaded");
+
+		// The callback should fail because the mock /token returns 400
+		await expect(popup.getByText("Authentication failed")).toBeVisible({ timeout: 10_000 });
+
+		// The main page should still show the connect button after timeout/failure
+		// (poll will time out since tokens were never stored)
+		// We verify the mock received both calls
+		var calls = await getMockCalls(mockPort);
+		var authorizeCalls = calls.filter((c) => c.path === "/authorize");
+		var tokenCalls = calls.filter((c) => c.path === "/token");
+
+		expect(authorizeCalls.length).toBe(1);
+		expect(tokenCalls.length).toBe(1);
+
+		expect(pageErrors).toEqual([]);
+	});
+
+	test("missing callback code returns 400", async ({ page }) => {
+		// Navigate to callback with no code parameter
+		var response = await page.goto("/auth/callback?state=some-state");
+		expect(response.status()).toBe(400);
+		await expect(page.getByText("Missing authorization code")).toBeVisible();
+	});
+
+	test("missing callback state returns 400", async ({ page }) => {
+		// Navigate to callback with no state parameter
+		var response = await page.goto("/auth/callback?code=some-code");
+		expect(response.status()).toBe(400);
+		await expect(page.getByText("Missing OAuth state")).toBeVisible();
+	});
+});
diff --git a/crates/gateway/ui/e2e/start-gateway-oauth.sh b/crates/gateway/ui/e2e/start-gateway-oauth.sh
new file mode 100755
index 00000000..1337005e
--- /dev/null
+++ b/crates/gateway/ui/e2e/start-gateway-oauth.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Start the mock OAuth server + gateway with OAuth config overrides.
+# The mock server's /authorize and /token endpoints replace the real
+# OpenAI Codex OAuth endpoints, letting us test the full browser-side
+# OAuth PKCE flow without any external dependencies.
+
+SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd -- "${SCRIPT_DIR}/../../../.." && pwd)"
+
+PORT="${MOLTIS_E2E_OAUTH_PORT:-0}"
+RUNTIME_ROOT="${MOLTIS_E2E_OAUTH_RUNTIME_DIR:-${REPO_ROOT}/target/e2e-runtime-oauth}"
+CONFIG_DIR="${RUNTIME_ROOT}/config"
+DATA_DIR="${RUNTIME_ROOT}/data"
+HOME_DIR="${RUNTIME_ROOT}/home"
+
+rm -rf "${RUNTIME_ROOT}"
+mkdir -p "${CONFIG_DIR}" "${DATA_DIR}" "${HOME_DIR}/.config" "${HOME_DIR}/.codex"
+
+# Seed identity files so the app skips onboarding
+cat > "${DATA_DIR}/IDENTITY.md" <<'EOF'
+---
+name: e2e-bot
+---
+
+# IDENTITY.md
+
+This file is managed by Moltis settings.
+EOF
+
+cat > "${DATA_DIR}/USER.md" <<'EOF'
+---
+name: e2e-user
+---
+
+# USER.md
+
+This file is managed by Moltis settings.
+EOF
+
+# Start mock OAuth server and capture its port
+MOCK_PORT_FILE=$(mktemp)
+node "${SCRIPT_DIR}/mock-oauth-server.js" > "${MOCK_PORT_FILE}" &
+MOCK_PID=$!
+
+# Wait for the mock server to print its port (up to 5 seconds)
+for i in $(seq 1 50); do
+	if [ -s "${MOCK_PORT_FILE}" ]; then
+		break
+	fi
+	sleep 0.1
+done
+
+if [ ! -s "${MOCK_PORT_FILE}" ]; then
+	echo "ERROR: mock OAuth server did not start" >&2
+	kill "${MOCK_PID}" 2>/dev/null || true
+	exit 1
+fi
+
+MOCK_PORT=$(node -e "process.stdout.write(String(JSON.parse(require('fs').readFileSync('${MOCK_PORT_FILE}','utf8').trim()).port))")
+echo "Mock OAuth server running on port ${MOCK_PORT}" >&2
+
+# Write the mock port for the test spec to read
+echo "${MOCK_PORT}" > "${RUNTIME_ROOT}/mock-oauth-port"
+
+# Clean up the mock server when this script exits
+cleanup() {
+	kill "${MOCK_PID}" 2>/dev/null || true
+	rm -f "${MOCK_PORT_FILE}"
+}
+trap cleanup EXIT
+
+cd "${REPO_ROOT}"
+
+export MOLTIS_CONFIG_DIR="${CONFIG_DIR}"
+export MOLTIS_DATA_DIR="${DATA_DIR}"
+export MOLTIS_SERVER__PORT="${PORT}"
+export HOME="${HOME_DIR}"
+export XDG_CONFIG_HOME="${HOME_DIR}/.config"
+
+# Override OAuth config for openai-codex to point at the mock server
+export MOLTIS_OAUTH_OPENAI_CODEX_AUTH_URL="http://127.0.0.1:${MOCK_PORT}/authorize"
+export MOLTIS_OAUTH_OPENAI_CODEX_TOKEN_URL="http://127.0.0.1:${MOCK_PORT}/token"
+export MOLTIS_OAUTH_OPENAI_CODEX_CLIENT_ID="test-client-id"
+# Ensure the Add LLM picker shows the OpenAI Codex provider in this e2e project.
+export MOLTIS_PROVIDERS__OFFERED='["openai-codex","openai","github-copilot"]'
+
+# Prefer a pre-built binary to avoid recompiling every test run.
+BINARY="${MOLTIS_BINARY:-}"
+if [ -z "${BINARY}" ]; then
+	for candidate in target/debug/moltis target/release/moltis; do
+		if [ -x "${candidate}" ] && { [ -z "${BINARY}" ] || [ "${candidate}" -nt "${BINARY}" ]; }; then
+			BINARY="${candidate}"
+		fi
+	done
+fi
+
+if [ -n "${BINARY}" ]; then
+	exec "${BINARY}" --no-tls --bind 127.0.0.1 --port "${PORT}"
+else
+	exec cargo run --bin moltis -- --no-tls --bind 127.0.0.1 --port "${PORT}"
+fi
diff --git a/crates/gateway/ui/playwright.config.js b/crates/gateway/ui/playwright.config.js
index 2dc51740..a70048b9 100644
--- a/crates/gateway/ui/playwright.config.js
+++ b/crates/gateway/ui/playwright.config.js
@@ -1,5 +1,5 @@
 const { defineConfig } = require("@playwright/test");
-const { execFileSync } = require("child_process");
+const { execFileSync } = require("node:child_process");
 
 function pickFreePort() {
 	return execFileSync(
@@ -12,18 +12,18 @@ function pickFreePort() {
 	).trim();
 }
 
-function resolvePort(envVar, usedPorts) {
+function resolvePort(envVar, usedPortSet) {
 	var configured = process.env[envVar];
 	if (configured && configured !== "0") {
-		usedPorts.add(configured);
+		usedPortSet.add(configured);
 		return configured;
 	}
 	var picked = pickFreePort();
-	while (usedPorts.has(picked)) {
+	while (usedPortSet.has(picked)) {
 		picked = pickFreePort();
 	}
 	process.env[envVar] = picked;
-	usedPorts.add(picked);
+	usedPortSet.add(picked);
 	return picked;
 }
 
@@ -37,6 +37,9 @@ const onboardingBaseURL = process.env.MOLTIS_E2E_ONBOARDING_BASE_URL || `http://
 const onboardingAuthPort = resolvePort("MOLTIS_E2E_ONBOARDING_AUTH_PORT", usedPorts);
 const onboardingAuthBaseURL = `http://127.0.0.1:${onboardingAuthPort}`;
 
+const oauthPort = resolvePort("MOLTIS_E2E_OAUTH_PORT", usedPorts);
+const oauthBaseURL = `http://127.0.0.1:${oauthPort}`;
+
 module.exports = defineConfig({
 	testDir: "./e2e/specs",
 	timeout: 45_000,
@@ -57,7 +60,7 @@ module.exports = defineConfig({
 	projects: [
 		{
 			name: "default",
-			testIgnore: [/auth\.spec/, /onboarding\.spec/, /onboarding-auth\.spec/],
+			testIgnore: [/auth\.spec/, /onboarding\.spec/, /onboarding-auth\.spec/, /oauth\.spec/],
 		},
 		{
 			name: "auth",
@@ -78,6 +81,13 @@ module.exports = defineConfig({
 				baseURL: onboardingAuthBaseURL,
 			},
 		},
+		{
+			name: "oauth",
+			testMatch: /oauth\.spec/,
+			use: {
+				baseURL: oauthBaseURL,
+			},
+		},
 	],
 	webServer: [
 		{
@@ -113,5 +123,16 @@ module.exports = defineConfig({
 				MOLTIS_E2E_ONBOARDING_AUTH_PORT: onboardingAuthPort,
 			},
 		},
+		{
+			command: "./e2e/start-gateway-oauth.sh",
+			cwd: __dirname,
+			url: `${oauthBaseURL}/health`,
+			reuseExistingServer: !process.env.CI,
+			timeout: 300_000,
+			env: {
+				...process.env,
+				MOLTIS_E2E_OAUTH_PORT: oauthPort,
+			},
+		},
 	],
 });
diff --git a/plans/e2e-tests-oauth-provider-connection.md b/plans/e2e-tests-oauth-provider-connection.md
new file mode 100644
index 00000000..b158451d
--- /dev/null
+++ b/plans/e2e-tests-oauth-provider-connection.md
@@ -0,0 +1,263 @@
+# E2E Tests for OAuth Provider Connection
+
+## Goal
+
+Add end-to-end tests that exercise the OAuth provider connection flow in the
+gateway UI. Two complementary approaches: a mock OAuth server for CI (no secrets
+needed), and optional real-token tests for verifying refresh flows against live
+providers.
+
+---
+
+## Approach 1 — Mock OAuth Server (CI-safe, no secrets)
+
+### What it tests
+
+The full browser-side OAuth flow: user clicks "Connect" → `start_oauth` RPC →
+redirect to auth URL → callback with code → `complete_oauth` RPC → token stored
+→ provider shows as connected. This proves the gateway's own code paths work
+without depending on external providers.
+
+### Implementation steps
+
+#### 1. Create `e2e/mock-oauth-server.js`
+
+A minimal Node HTTP server that implements two endpoints:
+
+- **`GET /authorize`** — validates `client_id`, `redirect_uri`, `code_challenge`,
+  `state` query params. Immediately redirects back to `redirect_uri` with a
+  predictable `code=mock-auth-code&state=<state>`.
+- **`POST /token`** — validates `grant_type`, `code`, `code_verifier`. Returns
+  a JSON token response:
+  ```json
+  {
+    "access_token": "mock-access-token",
+    "refresh_token": "mock-refresh-token",
+    "token_type": "Bearer",
+    "expires_in": 3600
+  }
+  ```
+  For `grant_type=refresh_token`, return a new `access_token` with a different
+  value to prove refresh worked.
+
+The server should:
+- Listen on port 0 (OS-assigned) and write the port to a temp file or stdout
+- Validate PKCE: verify that `SHA256(code_verifier) == code_challenge` from the
+  authorize step (store challenge in memory keyed by state)
+- Return 400 with descriptive errors for invalid requests (helps debug test
+  failures)
+- Support a `GET /calls` endpoint that returns request history for assertions
+
+#### 2. Create `e2e/start-gateway-oauth.sh`
+
+New startup script (similar to existing `start-gateway.sh`) that:
+
+- Starts the mock OAuth server first, captures its port
+- Sets env vars to override the built-in OAuth config:
+  ```bash
+  export MOLTIS_OAUTH_OPENAI_CODEX_AUTH_URL="http://127.0.0.1:${MOCK_PORT}/authorize"
+  export MOLTIS_OAUTH_OPENAI_CODEX_TOKEN_URL="http://127.0.0.1:${MOCK_PORT}/token"
+  export MOLTIS_OAUTH_OPENAI_CODEX_CLIENT_ID="test-client-id"
+  export MOLTIS_OAUTH_OPENAI_CODEX_REDIRECT_URI="http://localhost:1455/auth/callback"
+  ```
+- Starts the moltis gateway as usual (isolated data dir, no TLS)
+- Seeds identity/user files like the default script
+
+#### 3. Add Playwright project in `playwright.config.js`
+
+```js
+{
+  name: "oauth",
+  testMatch: /oauth\.spec/,
+  use: { baseURL: `http://127.0.0.1:${process.env.MOLTIS_E2E_OAUTH_PORT || 4010}` },
+}
+```
+
+With a corresponding `webServer` entry that runs `start-gateway-oauth.sh`.
+
+#### 4. Create `e2e/specs/oauth.spec.js`
+
+Test cases:
+
+1. **Provider list shows OAuth providers as disconnected**
+   - Navigate to providers page
+   - Assert OpenAI Codex shows "Connect" button (not "Connected")
+
+2. **OAuth PKCE flow completes successfully**
+   - Click "Connect" on OpenAI Codex
+   - Playwright follows the redirect to mock server → callback → back to gateway
+   - Assert provider now shows as "Connected"
+   - Assert mock server received valid PKCE challenge/verifier pair
+
+3. **OAuth state mismatch is rejected**
+   - Manually navigate to `/auth/callback?code=x&state=wrong-state`
+   - Assert error is shown, provider stays disconnected
+
+4. **Token refresh works**
+   - Pre-seed `oauth_tokens.json` with an expired mock token
+   - Trigger an action that requires the token (e.g. list models)
+   - Assert mock server received a refresh request
+   - Assert new token is stored
+
+5. **Disconnect removes tokens**
+   - Start with a connected provider (pre-seed tokens)
+   - Click "Disconnect" / delete
+   - Assert `oauth_tokens.json` no longer has the provider entry
+   - Assert UI shows "Connect" again
+
+6. **Error handling — token exchange fails**
+   - Configure mock server to return 400 on `/token`
+   - Attempt OAuth flow
+   - Assert user sees an error message, provider stays disconnected
+
+#### 5. Update CI workflow
+
+Add the `oauth` project to the existing `e2e` job in `ci.yml`. No secrets
+needed — everything runs against the mock server.
+
+```yaml
+- name: Run E2E tests (including OAuth)
+  run: npx playwright test
+  env:
+    CI: true
+```
+
+### Files to create/modify
+
+| File | Action |
+|------|--------|
+| `crates/gateway/ui/e2e/mock-oauth-server.js` | Create |
+| `crates/gateway/ui/e2e/start-gateway-oauth.sh` | Create |
+| `crates/gateway/ui/e2e/specs/oauth.spec.js` | Create |
+| `crates/gateway/ui/playwright.config.js` | Add `oauth` project + webServer |
+| `.github/workflows/ci.yml` | No change needed (runs all Playwright projects) |
+
+---
+
+## Approach 2 — Real Provider Tokens (optional, for integration confidence)
+
+### What it tests
+
+That token refresh actually works against the real provider APIs, and that
+stored OAuth tokens can be used to list models / make API calls. This is a
+smoke test, not a full flow (you can't automate the provider's login page).
+
+### GitHub Actions setup
+
+#### Secrets to add
+
+Create a GitHub Environment called `e2e-oauth` with **required reviewers**
+(prevents fork PRs from accessing secrets):
+
+| Secret name | Description |
+|-------------|-------------|
+| `MOLTIS_OAUTH_OPENAI_CODEX_ACCESS_TOKEN` | Valid access token from a test OpenAI account |
+| `MOLTIS_OAUTH_OPENAI_CODEX_REFRESH_TOKEN` | Refresh token for the same account |
+| `MOLTIS_OAUTH_GITHUB_COPILOT_ACCESS_TOKEN` | Valid Copilot token (optional) |
+| `MOLTIS_OAUTH_GITHUB_COPILOT_REFRESH_TOKEN` | Copilot refresh token (optional) |
+
+#### Obtaining test tokens
+
+1. Run moltis locally: `cargo run -- serve`
+2. Connect the provider via the UI (triggers real OAuth flow)
+3. Copy tokens from `~/.config/moltis/oauth_tokens.json`
+4. Store in GitHub Secrets
+
+Tokens will need periodic rotation when the refresh token expires (provider
+dependent — OpenAI refresh tokens are long-lived).
+
+#### CI workflow addition
+
+Add a **separate job** that only runs on `main` (never on PRs from forks):
+
+```yaml
+e2e-oauth-integration:
+  runs-on: ubuntu-latest
+  if: github.ref == 'refs/heads/main'
+  environment: e2e-oauth  # requires reviewer approval
+  needs: [e2e]
+  steps:
+    - uses: actions/checkout@v4
+    - name: Build
+      run: cargo build --bin moltis
+    - name: Setup Node
+      uses: actions/setup-node@v4
+      with: { node-version: 22 }
+    - name: Install deps
+      run: npm ci
+      working-directory: crates/gateway/ui
+    - name: Install Playwright
+      run: npx playwright install --with-deps chromium
+      working-directory: crates/gateway/ui
+    - name: Seed OAuth tokens
+      run: |
+        mkdir -p target/e2e-runtime-oauth/.config/moltis
+        cat > target/e2e-runtime-oauth/.config/moltis/oauth_tokens.json << 'SEED'
+        {
+          "openai-codex": {
+            "access_token": "${{ secrets.MOLTIS_OAUTH_OPENAI_CODEX_ACCESS_TOKEN }}",
+            "refresh_token": "${{ secrets.MOLTIS_OAUTH_OPENAI_CODEX_REFRESH_TOKEN }}",
+            "expires_at": 0
+          }
+        }
+        SEED
+    - name: Run OAuth integration tests
+      run: npx playwright test --project=oauth-integration
+      working-directory: crates/gateway/ui
+      env:
+        CI: true
+```
+
+### Test cases (separate spec file)
+
+`e2e/specs/oauth-integration.spec.js`:
+
+1. **Token refresh against real provider** — start with expired `access_token`
+   + valid `refresh_token`, trigger a list-models call, assert it succeeds
+   (proves refresh flow works end-to-end).
+
+2. **Provider shows as connected** — navigate to providers page, assert the
+   pre-seeded provider shows "Connected" status.
+
+3. **List models returns results** — call `list_models` RPC for the connected
+   provider, assert non-empty model list returned.
+
+### Security guardrails
+
+- **GitHub Environment with reviewers**: secrets only available in approved runs
+- **`if: github.ref == 'refs/heads/main'`**: never runs on fork PRs
+- **Dedicated test account**: don't use personal credentials; create a test org
+  or separate account for CI
+- **Scrub Playwright reports**: ensure HTML report doesn't capture env vars in
+  screenshots or console output. Add `filterEnv` or avoid `console.log` of
+  tokens in test code
+- **Token rotation**: document how to refresh the stored tokens (manual process,
+  quarterly cadence suggested)
+
+### Files to create/modify
+
+| File | Action |
+|------|--------|
+| `crates/gateway/ui/e2e/specs/oauth-integration.spec.js` | Create |
+| `crates/gateway/ui/e2e/start-gateway-oauth-integration.sh` | Create |
+| `crates/gateway/ui/playwright.config.js` | Add `oauth-integration` project |
+| `.github/workflows/ci.yml` | Add `e2e-oauth-integration` job |
+
+---
+
+## Order of work
+
+1. **Approach 1 first** — mock server + PKCE flow tests. This is self-contained,
+   requires no secrets, and covers the majority of the value.
+2. **Approach 2 later** — only after approach 1 is stable and you want confidence
+   in real provider refresh flows. Can be skipped entirely if mock tests are
+   sufficient.
+
+## Open questions
+
+- Should the mock server also support device flow (for GitHub Copilot / Kimi)?
+  Device flow is harder to E2E test since it requires polling. Could add a
+  `POST /device/code` + `POST /token` (device grant) to the mock.
+- Should token refresh be tested in approach 1 by pre-seeding expired tokens
+  and having the mock server handle refresh, or is that better left to unit
+  tests in `crates/oauth/`?