coder
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎bun.lock‎
Lines changed: 109 additions & 135 deletions b/‎bun.lock‎
Lines changed: 109 additions & 135 deletions
diff --git a/‎bunfig.toml‎
Lines changed: 8 additions & 0 deletions b/‎bunfig.toml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/e2e/mock-transcript.md‎
Lines changed: 80 additions & 0 deletions b/‎docs/e2e/mock-transcript.md‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎package.json‎
Lines changed: 3 additions & 0 deletions b/‎package.json‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎playwright.config.ts‎
Lines changed: 34 additions & 0 deletions b/‎playwright.config.ts‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎scripts/check_codex_comments.sh‎
Lines changed: 1 addition & 2 deletions b/‎scripts/check_codex_comments.sh‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/components/AIView.tsx‎
Lines changed: 7 additions & 2 deletions b/‎src/components/AIView.tsx‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎src/components/ChatInput.tsx‎
Lines changed: 10 additions & 1 deletion b/‎src/components/ChatInput.tsx‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎src/components/ChatInputToast.tsx‎
Lines changed: 10 additions & 3 deletions b/‎src/components/ChatInputToast.tsx‎
Lines changed: 10 additions & 3 deletions
@@ -77,3 +77,6 @@ docs/mermaid.min.js
 **.cpuprofile
 profile.txt
 src/version.ts
+
+artifacts/
+tests/e2e/tmp/
@@ -0,0 +1,8 @@
+[test]
+root = "src"
+match = [
+  "**/*.test.ts",
+  "**/*.test.tsx",
+  "**/*.spec.ts",
+  "**/*.spec.tsx"
+]
@@ -0,0 +1,80 @@
+# Mock Transcript: Cmux End-to-End Demo
+
+This scripted flow is tailored for automated Playwright captures. It walks through the highest-impact UI affordances so reviewers can skim a single recording and understand the app’s behavior.
+
+## Goals
+
+- Exercise key surfaces: project sidebar, workspace modal, chat surface (streaming + reasoning + tool call), chat meta sidebar, plan/exec toggle, thinking slider, error banner, edit flow, and history truncation.
+- Keep every interaction deterministic so the mock AI backend can replay the same transcript on every run.
+- Ensure videos remain under ~90 seconds by limiting idle time while still letting animations complete.
+
+## Environment Prep
+
+1. Launch Cmux with `CMUX_MOCK_AI=1` so the main process swaps in the scripted responder.
+2. Pre-seed `~/.cmux/config.json` with a single project `demo-repo` and worktree `feature/login`. (The Playwright harness will handle this setup.)
+3. Start the app with the project pre-selected so the chat surface is immediately visible.
+4. Install Playwright’s bundled ffmpeg runtime via `bunx playwright install ffmpeg` to ensure Electron video capture works reliably.
+
+## High-Level Timeline
+
+| Step | UI Action                                                                                            | Transcript Snippet         | Feature Coverage                                                |
+| ---- | ---------------------------------------------------------------------------------------------------- | -------------------------- | --------------------------------------------------------------- |
+| 0    | Hover "Tips" carousel briefly before interacting                                                     | —                          | Carousel animation baseline                                     |
+| 1    | Open project sidebar menu → click `+` to add workspace                                               | —                          | Project sidebar controls, modal launch                          |
+| 2    | Use `NewWorkspaceModal` to create branch `demo-review`                                               | —                          | Modal form + validation                                         |
+| 3    | Select `demo-review` workspace                                                                       | —                          | Workspace selection, metadata refresh                           |
+| 4    | Adjust plan/exec toggle to `plan` and drag thinking slider to `3`                                    | —                          | Input controls, tooltips                                        |
+| 5    | Send message `Let's summarize the current branches.`                                                 | `User#1`                   | Chat input send, persisted state                                |
+| 6    | Mock assistant streams plan-style response with reasoning preamble and tool call to `git.branchList` | `Assistant#1`              | Streaming text, reasoning block, tool message, message metadata |
+| 7    | Switch toggle to `exec`, thinking level `1`, send follow-up `Open the onboarding doc.`               | `User#2`                   | Mode swap effect, second send                                   |
+| 8    | Mock assistant attempts `filesystem.open` tool, emits `StreamError` (simulated ENOENT)               | `Assistant#2` (error)      | Error rendering, cancel streaming state                         |
+| 9    | Click edit on `User#2`, modify text to `Show the onboarding doc contents instead.` and submit        | `User#2-edit`              | Edit barrier, resend flow                                       |
+| 10   | Assistant retries, succeeds with streamed content and closes tool call                               | `Assistant#3`              | Stream restart after edit, reasoning end, tool output           |
+| 11   | Invoke `/truncate 50` command from command palette to trim history                                   | `System` message (backend) | Slash command handling, delete message event                    |
+| 12   | Chat auto-scroll hint appears and is dismissed via tooltip/button                                    | —                          | Jump-to-bottom affordance                                       |
+| 13   | Use chat meta sidebar to collapse/expand (`ChatMetaSidebar`), ensure recording captures state change | —                          | Sidebar interactions                                            |
+
+## Detailed Transcript
+
+The mock backend will replay the following payloads (history sequences are strictly increasing):
+
+1. **User#1** (`historySequence: 1`)
+   - Text: "Let's summarize the current branches."
+   - Metadata: plan mode, thinking level 3.
+2. **Assistant#1** (streamed)
+   - `stream-start` for `msg-plan-1` (`historySequence: 2`).
+   - `reasoning-delta`: "Looking at demo-repo/workspaces…" → "Found three branches." (two chunks).
+   - `tool-call-start`: id `tool-branches`, name `git.branchList`, args `{ project: "demo-repo" }`.
+   - `tool-call-end`: same id, result `[{ name: "main" }, { name: "feature/login" }, { name: "demo-review" }]`.
+   - `stream-delta` chunks forming assistant text:
+     1. "Here’s the current branch roster:"
+     2. "• `main` – release baseline"
+     3. "• `feature/login` – authentication refresh"
+     4. "• `demo-review` – sandbox you just created"
+   - `stream-end` with metadata `{ model: "mock:planner", usage: { inputTokens: 128, outputTokens: 85 } }`.
+3. **User#2** (`historySequence: 3`)
+   - Text: "Open the onboarding doc."
+   - Metadata: exec mode, thinking level 1.
+4. **Assistant#2 error run**
+   - `stream-start` for `msg-exec-1` (`historySequence: 4`).
+   - `tool-call-start`: id `tool-open`, name `filesystem.open`, args `{ path: "docs/onboarding.md" }`.
+   - `stream-error`: `{ messageId: "msg-exec-1", error: "ENOENT: docs/onboarding.md not found", errorType: "tool_failed" }`.
+5. **User#2 edit** (`historySequence: 4` replacement)
+   - Edited text: "Show the onboarding doc contents instead." (same history slot replaces prior message; backend replays truncated history before new message).
+6. **Assistant#3 success run**
+   - `stream-start` for `msg-exec-2` (`historySequence: 5`).
+   - `tool-call-start`: id `tool-open`, name `filesystem.open`, args `{ path: "docs/onboarding.md" }`.
+   - `tool-call-end`: result `{ excerpt: "1. Clone the repo…" }`.
+   - `stream-delta` chunks narrating successful retrieval.
+   - `stream-end` metadata `{ model: "mock:executor", usage: { inputTokens: 96, outputTokens: 142 } }`.
+7. **System truncate acknowledgement**
+   - After `/truncate 50`, backend emits `DeleteMessage` for sequences `[1, 2]` followed by informational assistant message `historySequence: 6` summarizing remaining context.
+
+## Notes for Automation
+
+- Every event is timestamped deterministically (e.g., add 1s per history sequence) so recordings align across runs.
+- Tool outputs should stay compact to avoid long scrolls; prefer bullet lists under 5 items.
+- When the error fires, keep stream duration short (<2s) so reviewers see the red banner without waiting.
+- After truncation, ensure the jump-to-bottom hint becomes visible by temporarily scrolling up before the delete event.
+
+This transcript can be encoded as a JSON/TypeScript fixture and consumed by the mock AI service during tests.
@@ -25,6 +25,7 @@
     "test:watch": "./scripts/test.sh --watch",
     "test:coverage": "./scripts/test.sh --coverage",
     "test:integration": "bun test src && TEST_INTEGRATION=1 jest tests",
+    "test:e2e": "PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 bunx playwright test --project=electron",
     "dist": "bun run build && electron-builder --publish never",
     "dist:mac": "bun run build && electron-builder --mac --publish never",
     "dist:win": "bun run build && electron-builder --win --publish never",
@@ -63,6 +64,7 @@
   },
   "devDependencies": {
     "@eslint/js": "^9.36.0",
+    "@playwright/test": "^1.56.0",
     "@testing-library/react": "^16.3.0",
     "@types/bun": "^1.2.23",
     "@types/diff": "^8.0.0",
@@ -84,6 +86,7 @@
     "eslint-plugin-react": "^7.37.5",
     "eslint-plugin-react-hooks": "^5.2.0",
     "jest": "^30.1.3",
+    "playwright": "^1.56.0",
     "prettier": "^3.6.2",
     "ts-jest": "^29.4.4",
     "tsc-alias": "^1.8.16",
 
@@ -0,0 +1,34 @@
+import { defineConfig } from "@playwright/test";
+
+const isCI = process.env.CI === "true";
+
+export default defineConfig({
+  testDir: "./tests/e2e",
+  timeout: 120_000,
+  expect: {
+    timeout: 5_000,
+  },
+  fullyParallel: false,
+  forbidOnly: isCI,
+  retries: isCI ? 1 : 0,
+  reporter: [
+    ["list"],
+    ["html", { outputFolder: "artifacts/playwright-report", open: "never" }],
+  ],
+  workers: 1,
+  use: {
+    trace: isCI ? "on-first-retry" : "retain-on-failure",
+    screenshot: "only-on-failure",
+    video: {
+      mode: "on",
+      size: { width: 1280, height: 720 },
+    },
+  },
+  outputDir: "artifacts/playwright-output",
+  projects: [
+    {
+      name: "electron",
+      testDir: "./tests/e2e",
+    },
+  ],
+});
@@ -14,9 +14,8 @@ BOT_LOGIN_GRAPHQL="chatgpt-codex-connector"    # GraphQL does not
 echo "Checking for unresolved Codex comments in PR #${PR_NUMBER}..."
 
 # Get all regular issue comments from the Codex bot (these can't be resolved)
-# Filter out "all clear" comments that indicate no issues found
 REGULAR_COMMENTS=$(gh api "/repos/{owner}/{repo}/issues/${PR_NUMBER}/comments" \
-    --jq "[.[] | select(.user.login == \"${BOT_LOGIN_REST}\") | select(.body | test(\"Didn't find any major issues\") | not)]")
+    --jq "[.[] | select(.user.login == \"${BOT_LOGIN_REST}\")]")
 
 REGULAR_COUNT=$(echo "$REGULAR_COMMENTS" | jq 'length')
 
 
@@ -138,7 +138,7 @@ const EditBarrier = styled.div`
   text-align: center;
 `;
 
-const JumpToBottomIndicator = styled.div`
+const JumpToBottomIndicator = styled.button`
   position: absolute;
   bottom: 8px;
   left: 50%;
@@ -355,6 +355,11 @@ const AIViewInner: React.FC<AIViewProps> = ({
               onWheel={markUserInteraction}
               onTouchMove={markUserInteraction}
               onScroll={handleScroll}
+              role="log"
+              aria-live={canInterrupt ? "polite" : "off"}
+              aria-busy={canInterrupt}
+              aria-label="Conversation transcript"
+              tabIndex={0}
             >
               {messages.length === 0 ? (
                 <EmptyState>
@@ -394,7 +399,7 @@ const AIViewInner: React.FC<AIViewProps> = ({
               )}
             </OutputContent>
             {!autoScroll && (
-              <JumpToBottomIndicator onClick={jumpToBottom}>
+              <JumpToBottomIndicator onClick={jumpToBottom} type="button">
                 Press {formatKeybind(KEYBINDS.JUMP_TO_BOTTOM)} to jump to bottom
               </JumpToBottomIndicator>
             )}
 
@@ -1,4 +1,4 @@
-import React, { useState, useRef, useCallback, useEffect } from "react";
+import React, { useState, useRef, useCallback, useEffect, useId } from "react";
 import styled from "@emotion/styled";
 import { CommandSuggestions, COMMAND_SUGGESTION_KEYS } from "./CommandSuggestions";
 import type { Toast } from "./ChatInputToast";
@@ -334,6 +334,7 @@ export const ChatInput: React.FC<ChatInputProps> = ({
   const [thinkingLevel] = useThinkingLevel();
   const [mode, setMode] = useMode();
   const { recentModels } = useModelLRU();
+  const commandListId = useId();
 
   const focusMessageInput = useCallback(() => {
     const element = inputRef.current;
@@ -730,6 +731,8 @@ export const ChatInput: React.FC<ChatInputProps> = ({
         onSelectSuggestion={handleCommandSelect}
         onDismiss={() => setShowCommandSuggestions(false)}
         isVisible={showCommandSuggestions}
+        ariaLabel="Slash command suggestions"
+        listId={commandListId}
       />
       <InputControls>
         <InputField
@@ -750,6 +753,12 @@ export const ChatInput: React.FC<ChatInputProps> = ({
           placeholder={placeholder}
           disabled={disabled || isSending || isCompacting}
           canInterrupt={canInterrupt}
+          aria-label={editingMessage ? "Edit your last message" : "Message Claude"}
+          aria-autocomplete="list"
+          aria-controls={
+            showCommandSuggestions && commandSuggestions.length > 0 ? commandListId : undefined
+          }
+          aria-expanded={showCommandSuggestions && commandSuggestions.length > 0}
         />
       </InputControls>
       <ModeToggles>
 
@@ -202,7 +202,7 @@ export const ChatInputToast: React.FC<ChatInputToastProps> = ({ toast, onDismiss
   if (isRichError) {
     return (
       <ToastWrapper>
-        <ErrorContainer>
+        <ErrorContainer role="alert" aria-live="assertive">
           <div style={{ display: "flex", alignItems: "flex-start", gap: "6px" }}>
             <ToastIcon>⚠</ToastIcon>
             <div style={{ flex: 1 }}>
@@ -212,7 +212,9 @@ export const ChatInputToast: React.FC<ChatInputToastProps> = ({ toast, onDismiss
               <ErrorDetails>{toast.message}</ErrorDetails>
               {toast.solution && <ErrorSolution>{toast.solution}</ErrorSolution>}
             </div>
-            <CloseButton onClick={handleDismiss}>×</CloseButton>
+            <CloseButton onClick={handleDismiss} aria-label="Dismiss">
+              ×
+            </CloseButton>
           </div>
         </ErrorContainer>
       </ToastWrapper>
@@ -222,7 +224,12 @@ export const ChatInputToast: React.FC<ChatInputToastProps> = ({ toast, onDismiss
   // Regular toast for simple messages and success
   return (
     <ToastWrapper>
-      <ToastContainer type={toast.type} isLeaving={isLeaving}>
+      <ToastContainer
+        type={toast.type}
+        isLeaving={isLeaving}
+        role={toast.type === "error" ? "alert" : "status"}
+        aria-live={toast.type === "error" ? "assertive" : "polite"}
+      >
         <ToastIcon>{toast.type === "success" ? "✓" : "⚠"}</ToastIcon>
         <ToastContent>
           {toast.title && <ToastTitle>{toast.title}</ToastTitle>}