From d9e3baaa3617268c43f210bbb2f171c455677c06 Mon Sep 17 00:00:00 2001 From: highperfocused Date: Thu, 12 Mar 2026 14:32:43 +0100 Subject: [PATCH] Add gateway/web UI docs and new session button --- README.md | 7 ++ docs/README.md | 4 + docs/gateway.md | 226 ++++++++++++++++++++++++++++++++++++++++++ docs/web-ui.md | 157 +++++++++++++++++++++++++++++ src/gateway/web-ui.ts | 24 ++++- 5 files changed, 417 insertions(+), 1 deletion(-) create mode 100644 docs/README.md create mode 100644 docs/gateway.md create mode 100644 docs/web-ui.md diff --git a/README.md b/README.md index ee216d2..8ec8602 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,13 @@ You can also keep one-shot mode (`RUN_MODE=single`) for script usage. --- +## Documentation + +- Gateway internals: `docs/gateway.md` +- Web UI internals: `docs/web-ui.md` + +--- + ## Run ```bash diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..868deb2 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,4 @@ +# Documentation + +- [Gateway: how it works](./gateway.md) +- [Web UI: how it works](./web-ui.md) diff --git a/docs/gateway.md b/docs/gateway.md new file mode 100644 index 0000000..863e274 --- /dev/null +++ b/docs/gateway.md @@ -0,0 +1,226 @@ +# Gateway: how it works + +This document explains how the HTTP gateway in this repository works. + +## Overview + +The gateway is a thin HTTP layer around `@mariozechner/pi-coding-agent` sessions. + +Main goals: + +- expose chat over HTTP (`/v1/chat`, `/v1/chat/stream`) +- keep long-lived conversation state per `conversationId` +- support adapter-friendly IDs (Slack/Matrix/etc.) +- optionally expose a built-in browser UI at `/` + +Key source files: + +- `src/index.ts` +- `src/gateway/server.ts` +- `src/conversation-manager.ts` +- `src/agent-session-factory.ts` +- `src/gateway/events.ts` + +--- + +## Startup flow + +1. `src/index.ts` loads env config via `loadConfig()`. +2. If `RUN_MODE=single`, one-shot mode is executed and exits. +3. Otherwise (`RUN_MODE=gateway`), it: + - creates `ConversationManager` + - initializes persisted conversation metadata (if enabled) + - starts `GatewayHttpServer` +4. On `SIGINT`/`SIGTERM`, it stops the HTTP server and disposes sessions. + +--- + +## Core components + +### 1) `GatewayHttpServer` (`src/gateway/server.ts`) + +Responsible for: + +- request routing +- auth and CORS handling +- request validation +- SSE streaming responses +- JSON/HTML responses + +### 2) `ConversationManager` (`src/conversation-manager.ts`) + +Responsible for: + +- creating and tracking conversation records +- loading/opening/creating agent sessions +- serializing prompts per conversation (queue) +- persisting conversation index + session metadata +- aborting/deleting sessions + +### 3) `AgentSessionFactory` (`src/agent-session-factory.ts`) + +Responsible for constructing agent sessions with: + +- model/provider selection (including Ollama support) +- tool selection (`all`, `readonly`, `none`, or subset) +- optional system prompt override/append +- auth storage and model registry wiring + +--- + +## Conversation model + +A conversation is identified by `conversationId`. + +- If client provides no ID, a UUID is generated. +- Each conversation maps to one `AgentSession`. +- Multiple requests for the same conversation are queued and processed in order. +- Metadata is exposed via `/v1/conversations` endpoints. + +Validation rules: + +- `conversationId` max length: 200 +- `conversationId` must not contain `\n`/`\r` +- `message` must be a non-empty string +- `images` must be an array when provided +- `streamingBehavior` must be `"steer"` or `"followUp"` when provided + +--- + +## Persistence behavior + +Controlled by `SESSION_PERSIST`. + +### `SESSION_PERSIST=true` + +Data is stored under: + +- `/.gateway/conversations.json` (conversation index) +- `/.gateway/sessions/...` (session files) + +At startup, the index is loaded and conversations are restored as unloaded records. +The actual `AgentSession` is lazily opened when that conversation is used. + +### `SESSION_PERSIST=false` + +Everything is in memory and lost on process exit. + +--- + +## API routes + +### Health/UI + +- `GET /health` → `{ "ok": true }` +- `GET /` → built-in Web UI HTML (if `GATEWAY_ENABLE_WEB_UI=true`) + +### Conversation management + +- `GET /v1/conversations` +- `POST /v1/conversations` +- `GET /v1/conversations/:id` +- `DELETE /v1/conversations/:id` +- `POST /v1/conversations/:id/abort` + +### Chat + +- `POST /v1/chat` (JSON response) +- `POST /v1/chat/stream` (SSE response) + +### Adapter endpoints + +- `POST /v1/adapters/chat` +- `POST /v1/adapters/chat/stream` + +Adapter request fields (`source`, `workspaceId`, `channelId`, `threadId`, `userId`) are normalized into: + +- `conversationId = source:workspaceId:channelId:threadId` +- `adapterKey = source:workspaceId:channelId:threadId:userId` + +`channelId` is required. `:` is not allowed inside segment values. + +--- + +## Streaming (SSE) behavior + +For `/v1/chat/stream` and `/v1/adapters/chat/stream`: + +1. Response starts with SSE headers. +2. A `ready` event is emitted. +3. Agent session events are mapped to gateway events (`src/gateway/events.ts`). +4. A final `done` event is emitted with summary payload. +5. On failure, an `error` event is emitted and stream ends. + +Common emitted event types: + +- `assistant_text_delta` +- `assistant_thinking_delta` +- `assistant_message_update` +- `tool_start`, `tool_update`, `tool_end` +- `agent_start`, `agent_end` +- `retry_start`, `retry_end` +- `compaction_start`, `compaction_end` +- `done` +- `error` + +`done` includes: + +- `conversationId` +- `sessionId` +- `sessionFile` +- `assistantText` +- plus `adapterKey` on adapter streaming routes + +Disconnect behavior: + +- if client disconnects mid-stream **and** the request had a `conversationId`, the server attempts to abort that conversation. + +--- + +## Auth and CORS + +### Bearer auth + +If `GATEWAY_AUTH_TOKEN` is set, requests must include: + +`Authorization: Bearer ` + +Otherwise server returns `401`. + +Note: auth is checked before route handling, so this applies to all routes (including `GET /` and `GET /health`). + +### CORS + +If `GATEWAY_CORS_ORIGIN` is set, server adds: + +- `Access-Control-Allow-Origin` +- `Access-Control-Allow-Headers: Content-Type, Authorization` +- `Access-Control-Allow-Methods: GET, POST, DELETE, OPTIONS` + +`OPTIONS` preflight returns `204`. + +--- + +## Request limits and errors + +- JSON body max size: 1 MiB (413 if exceeded) +- invalid JSON: 400 +- invalid payload field types: 400 +- unknown route: 404 +- unexpected errors: 500 + +--- + +## Environment variables (gateway-relevant) + +- `RUN_MODE` (`gateway` | `single`) +- `GATEWAY_HOST` +- `GATEWAY_PORT` +- `GATEWAY_CORS_ORIGIN` +- `GATEWAY_AUTH_TOKEN` +- `GATEWAY_ENABLE_WEB_UI` +- `SESSION_PERSIST` +- `VERBOSE_TOOLS` +- `CWD` + +See `.env.example` for complete defaults and comments. diff --git a/docs/web-ui.md b/docs/web-ui.md new file mode 100644 index 0000000..1965c00 --- /dev/null +++ b/docs/web-ui.md @@ -0,0 +1,157 @@ +# Web UI: how it works + +This document explains the built-in browser UI served by the gateway. + +Source file: + +- `src/gateway/web-ui.ts` + +--- + +## Overview + +The Web UI is a single HTML page returned by `GET /` (when enabled). + +It is intentionally simple: + +- plain HTML/CSS/JS (no framework) +- sends requests to `/v1/chat/stream` +- renders streamed assistant text in real time +- stores and reuses `conversationId` in `localStorage` + +--- + +## Availability + +The UI route is controlled by `GATEWAY_ENABLE_WEB_UI`: + +- `true` (default): `GET /` returns UI +- `false`: `GET /` returns `404` with `{ "error": "Web UI disabled" }` + +If `GATEWAY_AUTH_TOKEN` is enabled, `GET /` also requires an `Authorization` header, because auth is global in the gateway. + +--- + +## UI sections + +### 1) Session/header card + +- **Conversation ID input** (`#conversationId`) + - if empty, server auto-creates one during first message + - persisted locally under `pi_gateway_conversation_id` +- **Auth token input** (`#token`) + - optional bearer token included in API requests from the page + - this affects `fetch` calls only; it does not add auth headers to the initial page load + +### 2) Messages card + +- container `#messages` +- each message is appended as a `.msg.user` or `.msg.assistant` block +- text is rendered as plain text (`textContent`), not Markdown/HTML + +### 3) Composer card + +- textarea `#message` +- status text `#status` +- buttons: + - `Send` + - `New session` + +--- + +## Local state + +The page keeps only minimal browser-side state: + +- `conversationId` in input + local storage +- rendered message list in DOM +- current request state via button disabled/enabled + +Storage key: + +- `pi_gateway_conversation_id` + +On load, if this key exists, it pre-fills the conversation input. + +--- + +## Send flow + +When user presses **Send** (or Cmd/Ctrl + Enter): + +1. Trim textarea value; ignore empty input. +2. Disable `Send` and `New session` buttons. +3. Append user message bubble. +4. Append empty assistant bubble. +5. Build payload: + - required: `message` + - optional: `conversationId` (if input non-empty) +6. POST to `/v1/chat/stream` with JSON body. +7. Parse SSE stream incrementally. +8. Update assistant bubble and status based on events. +9. Re-enable buttons when request finishes/fails. + +--- + +## SSE event handling in UI + +Handled events: + +- `assistant_text_delta` + - appends `data.delta` to assistant message bubble +- `done` + - reads `data.conversationId` + - updates conversation input + - writes `pi_gateway_conversation_id` + - status becomes `Done • conversation ` +- `error` + - status becomes error text + - writes fallback error into assistant bubble if empty + +Other event types are currently ignored by the UI. + +--- + +## New session button behavior + +Clicking **New session**: + +- does nothing if a request is currently streaming (`Send` disabled) +- clears conversation ID input +- removes `pi_gateway_conversation_id` from local storage +- clears rendered message list +- sets status to `New session ready` +- focuses the message textarea + +This starts a fresh client-side chat thread. The next send will create a new conversation on the server. + +--- + +## Keyboard shortcut + +In the message textarea: + +- `Cmd + Enter` (macOS) or `Ctrl + Enter` (Windows/Linux) +- triggers the same send flow as the Send button + +--- + +## Limitations + +Current UI is intentionally minimal: + +- no server-side message history loading +- no cancel/abort button for in-flight response +- no rendering for tool events/thinking events +- no Markdown formatting +- no multi-conversation sidebar + +It is best used as a lightweight test/debug interface. + +--- + +## Related API docs + +For full gateway/API details, see: + +- `docs/gateway.md` diff --git a/src/gateway/web-ui.ts b/src/gateway/web-ui.ts index 99120d5..a54ad06 100644 --- a/src/gateway/web-ui.ts +++ b/src/gateway/web-ui.ts @@ -69,8 +69,9 @@ export function getWebUiHtml(): string { .actions { display: grid; - grid-template-columns: 1fr auto; + grid-template-columns: 1fr auto auto; gap: 10px; + align-items: center; } button { @@ -80,6 +81,10 @@ export function getWebUiHtml(): string { padding: 10px 18px; } + button.secondary { + background: #1f2734; + } + button:disabled { opacity: 0.6; cursor: default; @@ -136,6 +141,7 @@ export function getWebUiHtml(): string {
Idle
+
@@ -146,6 +152,7 @@ export function getWebUiHtml(): string { const tokenInput = document.getElementById("token"); const messageInput = document.getElementById("message"); const sendButton = document.getElementById("send"); + const newSessionButton = document.getElementById("newSession"); const messagesEl = document.getElementById("messages"); const statusEl = document.getElementById("status"); @@ -167,6 +174,18 @@ export function getWebUiHtml(): string { return el; } + function startNewSession() { + if (sendButton.disabled) { + return; + } + + conversationInput.value = ""; + localStorage.removeItem("pi_gateway_conversation_id"); + messagesEl.textContent = ""; + setStatus("New session ready"); + messageInput.focus(); + } + async function consumeSse(body, onEvent) { const reader = body.getReader(); const decoder = new TextDecoder(); @@ -214,6 +233,7 @@ export function getWebUiHtml(): string { } sendButton.disabled = true; + newSessionButton.disabled = true; setStatus("Streaming response..."); addMessage("user", message); @@ -280,9 +300,11 @@ export function getWebUiHtml(): string { setStatus("Request failed"); } finally { sendButton.disabled = false; + newSessionButton.disabled = false; } } + newSessionButton.addEventListener("click", startNewSession); sendButton.addEventListener("click", sendMessage); messageInput.addEventListener("keydown", (event) => { if ((event.metaKey || event.ctrlKey) && event.key === "Enter") {