test(handlers): add coverage for runtime_provision_timeouts.go + classifyScheduleStatus edge cases

Add 20 new tests to admin_schedules_health_test.go: - classifyScheduleStatus: zero threshold, negative threshold, exactly-at-threshold (strict > comparison boundary) - loadRuntimeProvisionTimeouts: empty dir, non-dir entries, single/multiple templates, same-runtime MAX logic, zero/negative/missing runtime, bad YAML, missing config — all via t.TempDir() fixtures Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-18 11:17:58 +00:00
15 changed files with 197 additions and 804 deletions
@@ -748,14 +748,7 @@ export function MobileChat({
              border: "none",
              outline: "none",
              background: "transparent",
-              // 16px floor: iOS Safari/WebKit auto-zooms the viewport on
-              // focus when a focused field's font-size is < 16px. Anything
-              // below this re-introduces the tap-to-zoom layout jump on the
-              // mobile PWA. Do NOT lower this without also adding a
-              // maximum-scale/user-scalable viewport lock — and that lock
-              // breaks pinch-to-zoom accessibility, so 16px here is the
-              // correct trade.
-              fontSize: 16,
+              fontSize: 14.5,
              lineHeight: 1.4,
              color: p.text,
              padding: "6px 0",
@@ -263,20 +263,6 @@ describe("MobileChat — composer", () => {
    const sendBtn = container.querySelector('[aria-label="Send"]') as HTMLButtonElement;
    expect(sendBtn.disabled).toBe(true);
  });
-
-  // iOS Safari/WebKit auto-zooms the viewport on focus when a focused
-  // <input>/<textarea> has an effective font-size below 16px. On the
-  // mobile PWA this made the whole layout scale up the moment the user
-  // tapped into the chat box. Keeping the composer font ≥16px is the
-  // root-cause fix — it suppresses the focus-zoom WITHOUT disabling
-  // pinch-to-zoom (which a maximum-scale/user-scalable viewport hack
-  // would have done at the cost of accessibility).
-  it("composer textarea font-size is >= 16px (prevents iOS focus-zoom)", () => {
-    const { container } = renderChat(mockAgentId);
-    const textarea = container.querySelector("textarea") as HTMLTextAreaElement;
-    const fontSizePx = parseFloat(textarea.style.fontSize);
-    expect(fontSizePx).toBeGreaterThanOrEqual(16);
-  });
 });

 // ─── Tabs ─────────────────────────────────────────────────────────────────────
@@ -1,209 +0,0 @@
-// @vitest-environment jsdom
-/**
- * Tests for useChatSend — the canvas user→agent send hook.
- *
- * Behavioural focus: the poll-mode ("queued") path. When the target
- * workspace is an external / MCP-registered agent (delivery_mode=poll,
- * e.g. an operator laptop running the molecule MCP channel), the
- * platform's POST /workspaces/:id/a2a returns a synthetic
- * {status:"queued", delivery_mode:"poll"} envelope IMMEDIATELY with no
- * reply — the real reply arrives later over the AGENT_MESSAGE
- * WebSocket push.
- *
- * Pre-fix the hook treated that synthetic envelope as a terminal
- * response and called releaseSendGuards() → `sending` went false the
- * instant the POST returned → the "agent is working" indicator
- * vanished and the external turn looked dead. This suite pins the
- * fixed contract:
- *
- *   - a real reply still clears `sending` (regression guard)
- *   - a poll "queued" envelope KEEPS `sending` true (no terminal
- *     clear) so the existing thinking indicator persists
- *   - the eventual reply path (releaseSendGuards, the same call the
- *     AGENT_MESSAGE WS push makes via useChatSocket) clears it
- *   - an offline poll agent that never replies eventually surfaces an
- *     honest error instead of an infinite spinner
- *
- * Plus pure-function coverage for the poll-envelope detector.
- *
- * Root cause: workspace-server a2a_proxy.go:402 poll-mode
- * short-circuit returns {status:"queued"} synchronously.
- */
-import {
-  describe,
-  it,
-  expect,
-  vi,
-  beforeEach,
-  afterEach,
-  type Mock,
-} from "vitest";
-import { act, renderHook, cleanup } from "@testing-library/react";
-
-const { mockApiPost } = vi.hoisted(() => ({ mockApiPost: vi.fn() }));
-
-vi.mock("@/lib/api", () => ({
-  api: { post: mockApiPost },
-}));
-
-vi.mock("../uploads", () => ({
-  uploadChatFiles: vi.fn(),
-}));
-
-// Import AFTER mocks.
-import {
-  useChatSend,
-  isPollQueuedResponse,
-  extractReplyText,
-  POLL_QUEUED_REPLY_TIMEOUT_MS,
-} from "../useChatSend";
-
-const flush = () => act(async () => { await Promise.resolve(); });
-
-describe("isPollQueuedResponse", () => {
-  it("is true only for the synthetic poll-mode queued envelope", () => {
-    expect(isPollQueuedResponse({ status: "queued", delivery_mode: "poll" })).toBe(true);
-  });
-
-  it("is false for a real agent reply", () => {
-    expect(
-      isPollQueuedResponse({ result: { parts: [{ kind: "text", text: "hi" }] } }),
-    ).toBe(false);
-  });
-
-  it("is false for null / undefined / partial shapes", () => {
-    expect(isPollQueuedResponse(null)).toBe(false);
-    expect(isPollQueuedResponse(undefined)).toBe(false);
-    // status=queued without delivery_mode=poll is NOT the poll envelope
-    // — don't accidentally swallow a real reply that happens to carry
-    // an unrelated status field.
-    expect(isPollQueuedResponse({ status: "queued" })).toBe(false);
-    expect(isPollQueuedResponse({ delivery_mode: "poll" })).toBe(false);
-  });
-});
-
-describe("extractReplyText (regression guard — unchanged by fix)", () => {
-  it("collects text parts from result", () => {
-    expect(
-      extractReplyText({ result: { parts: [{ kind: "text", text: "hello" }] } }),
-    ).toBe("hello");
-  });
-  it("returns empty for the poll-queued envelope", () => {
-    expect(extractReplyText({ status: "queued", delivery_mode: "poll" })).toBe("");
-  });
-});
-
-describe("useChatSend — poll-mode in-progress state", () => {
-  beforeEach(() => {
-    vi.useFakeTimers();
-    mockApiPost.mockReset();
-  });
-  afterEach(() => {
-    vi.runOnlyPendingTimers();
-    vi.useRealTimers();
-    cleanup();
-  });
-
-  const setup = () => {
-    const onUserMessage = vi.fn();
-    const onAgentMessage = vi.fn();
-    const { result } = renderHook(() =>
-      useChatSend("ws-ext-1", {
-        getHistoryMessages: () => [],
-        onUserMessage,
-        onAgentMessage,
-      }),
-    );
-    return { result, onUserMessage, onAgentMessage };
-  };
-
-  it("a real reply clears `sending` (regression guard)", async () => {
-    mockApiPost.mockResolvedValue({
-      result: { parts: [{ kind: "text", text: "real reply" }] },
-    });
-    const { result, onAgentMessage } = setup();
-
-    await act(async () => {
-      void result.current.sendMessage("hi");
-    });
-    await flush();
-
-    expect(onAgentMessage).toHaveBeenCalledTimes(1);
-    expect(result.current.sending).toBe(false);
-  });
-
-  it("keeps `sending` true on a poll 'queued' envelope (no terminal clear)", async () => {
-    mockApiPost.mockResolvedValue({ status: "queued", delivery_mode: "poll" });
-    const { result, onAgentMessage } = setup();
-
-    await act(async () => {
-      void result.current.sendMessage("hi external agent");
-    });
-    await flush();
-
-    // The POST resolved, but it was only a queued ack — the indicator
-    // must stay up and no agent bubble should be rendered yet.
-    expect(result.current.sending).toBe(true);
-    expect(onAgentMessage).not.toHaveBeenCalled();
-    expect(result.current.error).toBeNull();
-  });
-
-  it("releaseSendGuards (the AGENT_MESSAGE-push path) clears the poll in-progress state", async () => {
-    mockApiPost.mockResolvedValue({ status: "queued", delivery_mode: "poll" });
-    const { result } = setup();
-
-    await act(async () => {
-      void result.current.sendMessage("hi");
-    });
-    await flush();
-    expect(result.current.sending).toBe(true);
-
-    // Simulate the terminal AGENT_MESSAGE WebSocket push arriving:
-    // useChatSocket's onAgentMessage / onSendComplete call
-    // releaseSendGuards. That must clear the in-progress state AND the
-    // safety timer (asserted by the next test).
-    act(() => {
-      result.current.releaseSendGuards();
-    });
-    expect(result.current.sending).toBe(false);
-  });
-
-  it("surfaces an honest error if a poll agent never replies (safety timeout)", async () => {
-    mockApiPost.mockResolvedValue({ status: "queued", delivery_mode: "poll" });
-    const { result } = setup();
-
-    await act(async () => {
-      void result.current.sendMessage("hi");
-    });
-    await flush();
-    expect(result.current.sending).toBe(true);
-
-    act(() => {
-      vi.advanceTimersByTime(POLL_QUEUED_REPLY_TIMEOUT_MS + 1000);
-    });
-
-    expect(result.current.sending).toBe(false);
-    expect(result.current.error).toMatch(/queued/i);
-  });
-
-  it("does NOT fire the safety error when the reply arrives before timeout", async () => {
-    mockApiPost.mockResolvedValue({ status: "queued", delivery_mode: "poll" });
-    const { result } = setup();
-
-    await act(async () => {
-      void result.current.sendMessage("hi");
-    });
-    await flush();
-
-    // Reply arrives (releaseSendGuards) well before the timeout.
-    act(() => {
-      result.current.releaseSendGuards();
-    });
-    act(() => {
-      vi.advanceTimersByTime(POLL_QUEUED_REPLY_TIMEOUT_MS + 1000);
-    });
-
-    expect(result.current.error).toBeNull();
-    expect(result.current.sending).toBe(false);
-  });
-});
@@ -1,6 +1,6 @@
 "use client";

-import { useCallback, useEffect, useRef, useState } from "react";
+import { useCallback, useRef, useState } from "react";
 import { api } from "@/lib/api";
 import { uploadChatFiles } from "../uploads";
 import { createMessage, type ChatMessage, type ChatAttachment } from "../types";
@@ -22,42 +22,8 @@ interface A2AResponse {
    parts?: A2APart[];
    artifacts?: Array<{ parts: A2APart[] }>;
  };
-  /** Synthetic poll-mode envelope. The platform returns this
-   *  immediately (HTTP 200) when the target workspace is registered
-   *  delivery_mode=poll — an external / MCP-registered agent with no
-   *  public URL (e.g. an operator's laptop running the molecule MCP
-   *  channel). The request has only been QUEUED into activity_logs;
-   *  the agent will pick it up on its next poll and the real reply
-   *  arrives asynchronously over the AGENT_MESSAGE WebSocket push
-   *  (consumed by useChatSocket). See workspace-server
-   *  a2a_proxy.go:402 (poll-mode short-circuit) and
-   *  a2a_proxy_helpers.go:516 (logA2AReceiveQueued). */
-  status?: string;
-  delivery_mode?: string;
 }

-/** True when `resp` is the platform's synthetic poll-mode "queued"
- *  envelope rather than a real agent reply. For these the send is
- *  acknowledged-but-pending: the user's message landed and the agent
- *  is working, but there is no reply yet — the terminal AGENT_MESSAGE
- *  push will arrive later over the WebSocket. Treating this as a
- *  terminal response (the pre-fix behaviour) cleared the "agent is
- *  working" indicator the instant the POST returned, so an external
- *  workspace turn looked dead even though work had not started. */
-export function isPollQueuedResponse(resp: A2AResponse | null | undefined): boolean {
-  return !!resp && resp.status === "queued" && resp.delivery_mode === "poll";
-}
-
-/** Hard ceiling on how long the "agent is working" indicator stays up
- *  for a poll-mode turn with no reply. The terminal AGENT_MESSAGE push
- *  normally clears it well before this. The cap exists so a poll-mode
- *  workspace that is offline / never consumes its queue doesn't pin a
- *  spinner forever — at which point we surface an honest, actionable
- *  error instead of an opaque dead spinner. Generous because poll
- *  agents (an operator laptop) can legitimately take minutes to wake,
- *  poll, and respond; the goal is "eventually honest", not fail-fast. */
-export const POLL_QUEUED_REPLY_TIMEOUT_MS = 15 * 60 * 1000;
-
 export function extractReplyText(resp: A2AResponse): string {
  const collect = (parts: A2APart[] | undefined): string => {
    if (!parts) return "";
@@ -93,29 +59,14 @@ export function useChatSend(workspaceId: string, options: UseChatSendOptions) {
  const sendInFlightRef = useRef(false);
  const sendingFromAPIRef = useRef(false);
  const sendTokenRef = useRef(0);
-  // Safety-net timer armed only for poll-mode ("queued") turns: the
-  // POST returns immediately with no reply, so the normal
-  // POST-resolves-→-clear-spinner path can't drive the indicator. The
-  // terminal AGENT_MESSAGE WebSocket push clears it via
-  // releaseSendGuards (which also clears this timer); the timer is the
-  // backstop for an offline poll agent that never consumes its queue.
-  const pollTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null);
  const optionsRef = useRef(options);
  optionsRef.current = options;

-  const clearPollTimeout = useCallback(() => {
-    if (pollTimeoutRef.current !== null) {
-      clearTimeout(pollTimeoutRef.current);
-      pollTimeoutRef.current = null;
-    }
-  }, []);
-
  const releaseSendGuards = useCallback(() => {
-    clearPollTimeout();
    setSending(false);
    sendingFromAPIRef.current = false;
    sendInFlightRef.current = false;
-  }, [clearPollTimeout]);
+  }, []);

  const clearError = useCallback(() => setError(null), []);

@@ -195,33 +146,6 @@ export function useChatSend(workspaceId: string, options: UseChatSendOptions) {
            sendInFlightRef.current = false;
            return;
          }
-          // Poll-mode ("queued") turn: the message landed and the
-          // external/MCP agent will pick it up on its next poll, but
-          // there is NO reply in this response. Pre-fix this fell
-          // through to releaseSendGuards() below and the "agent is
-          // working" indicator vanished the instant the POST returned —
-          // an external-workspace turn looked dead even though work had
-          // not started. Instead, keep `sending` true so the existing
-          // thinking indicator (the same one internal agents use)
-          // persists as a "received — agent is working" state; the
-          // terminal AGENT_MESSAGE WebSocket push (consumed by
-          // useChatSocket → onAgentMessage / onSendComplete →
-          // releaseSendGuards) clears it when the real reply arrives,
-          // exactly the path an internal async reply already uses.
-          if (isPollQueuedResponse(resp)) {
-            clearPollTimeout();
-            pollTimeoutRef.current = setTimeout(() => {
-              if (sendTokenRef.current !== myToken) return;
-              if (!sendingFromAPIRef.current) return;
-              releaseSendGuards();
-              setError(
-                "No response yet from this agent — it may be offline or " +
-                  "busy. Your message was delivered and is queued; the " +
-                  "reply will appear here if the agent picks it up.",
-              );
-            }, POLL_QUEUED_REPLY_TIMEOUT_MS);
-            return;
-          }
          const replyText = extractReplyText(resp);
          const replyFiles = extractFilesFromTask(
            (resp?.result ?? {}) as Record<string, unknown>,
@@ -243,15 +167,9 @@ export function useChatSend(workspaceId: string, options: UseChatSendOptions) {
          setError("Failed to send message — agent may be unreachable");
        });
    },
-    [workspaceId, sending, uploading, clearPollTimeout],
+    [workspaceId, sending, uploading],
  );

-  // Drop the poll-mode safety timer on unmount / workspace switch so a
-  // stale timeout can't fire setError against a panel the user has
-  // already navigated away from. sendTokenRef guards correctness if it
-  // ever did fire; this just avoids the wasted timer + setState churn.
-  useEffect(() => clearPollTimeout, [clearPollTimeout]);
-
  return {
    sending,
    uploading,
@@ -67,21 +67,9 @@ export function useChatSocket(
            const own = (targetId || msg.workspace_id) === workspaceId;
            if (own) {
              callbacksRef.current.onSendComplete?.();
-              // internal#211/#212: surface the runtime's curated,
-              // user-actionable reason (provider HTTP status + error
-              // code + the provider's own guidance, e.g. a 403 "org
-              // disabled · use an API key / ask your admin"). The
-              // server now includes error_detail in the ACTIVITY_LOGGED
-              // broadcast; fall back to summary, and only as a last
-              // resort to a generic line. The old hardcoded
-              // "Agent error (Exception) — see workspace logs for
-              // details." string pointed at a logs UI that does not
-              // exist and discarded the actionable reason entirely.
-              const detail =
-                (p.error_detail as string) ||
-                (p.summary as string) ||
-                "The agent turn failed but the runtime reported no detail. Retry once; if it repeats the workspace runtime may need a restart.";
-              callbacksRef.current.onSendError?.(detail);
+              callbacksRef.current.onSendError?.(
+                "Agent error (Exception) — see workspace logs for details.",
+              );
            }
          }
        } else if (type === "a2a_send") {
@@ -62,7 +62,6 @@ TOP_LEVEL_MODULES = {
    "a2a_tools_memory",
    "a2a_tools_messaging",
    "a2a_tools_rbac",
-    "a2a_tools_identity",
    "adapter_base",
    "agent",
    "agents_md",
@@ -691,19 +691,6 @@ func logActivityExec(ctx context.Context, exec activityExecutor, broadcaster eve
 		if respStr != nil {
 			payload["response_body"] = json.RawMessage(respJSON)
 		}
-		// internal#211/#212: error_detail carries the runtime's curated,
-		// user-actionable, secret-safe failure reason (provider HTTP
-		// status + error code + the provider's own guidance, e.g. a 403
-		// "org disabled · use an API key / ask your admin"). It is
-		// already persisted to the DB column above and capped by the
-		// runtime's report_activity helper (4096 chars). Previously it
-		// was dropped from the LIVE broadcast, so the canvas had nothing
-		// to render and fell back to a hardcoded opaque
-		// "Agent error (Exception) — see workspace logs" string. Include
-		// it so the chat bubble shows the real reason in real time.
-		if params.ErrorDetail != nil && *params.ErrorDetail != "" {
-			payload["error_detail"] = *params.ErrorDetail
-		}
 	}

 	return func() {
@@ -5,6 +5,9 @@ import (
 	"encoding/json"
 	"net/http"
 	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"strconv"
 	"testing"
 	"time"

@@ -444,3 +447,178 @@ func TestAdminSchedulesHealth_ResponseFields(t *testing.T) {
 		t.Fatalf("unmet expectations: %v", err)
 	}
 }
+
+// ── classifyScheduleStatus — additional edge cases ─────────────────────────────────
+
+func TestClassifyScheduleStatus_ZeroThreshold(t *testing.T) {
+	now := time.Now()
+	lastRun := now.Add(-365 * 24 * time.Hour) // very old
+	result := classifyScheduleStatus(&lastRun, 0, now)
+	if result != "ok" {
+		t.Errorf("classifyScheduleStatus(threshold=0) = %q; want 'ok'", result)
+	}
+}
+
+func TestClassifyScheduleStatus_NegativeThreshold(t *testing.T) {
+	now := time.Now()
+	lastRun := now.Add(-24 * time.Hour)
+	result := classifyScheduleStatus(&lastRun, -1*time.Hour, now)
+	if result != "ok" {
+		t.Errorf("classifyScheduleStatus(threshold=-1h) = %q; want 'ok'", result)
+	}
+}
+
+func TestClassifyScheduleStatus_ExactlyAtThreshold(t *testing.T) {
+	// Strict >: if now.Sub(lastRun) == threshold, it is NOT stale
+	now := time.Date(2026, 5, 18, 12, 0, 0, 0, time.UTC)
+	lastRun := time.Date(2026, 5, 18, 10, 0, 0, 0, time.UTC) // exactly 2h ago
+	result := classifyScheduleStatus(&lastRun, 2*time.Hour, now)
+	if result != "ok" {
+		t.Errorf("classifyScheduleStatus(exactly at threshold) = %q; want 'ok'", result)
+	}
+}
+
+// ── loadRuntimeProvisionTimeouts (runtime_provision_timeouts.go) ─────────────────
+
+func writeRuntimeConfigYAML(t *testing.T, tmpDir, templateName, runtime string, timeoutSecs int) {
+	t.Helper()
+	dir := filepath.Join(tmpDir, templateName)
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		t.Fatalf("MkdirAll(%s): %v", dir, err)
+	}
+	yamlContent := "runtime: " + runtime + "\nruntime_config:\n  provision_timeout_seconds: " + strconv.Itoa(timeoutSecs) + "\n"
+	if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte(yamlContent), 0644); err != nil {
+		t.Fatalf("WriteFile: %v", err)
+	}
+}
+
+func TestLoadRuntimeProvisionTimeouts_EmptyDir(t *testing.T) {
+	tmpDir := t.TempDir()
+	result := loadRuntimeProvisionTimeouts(tmpDir)
+	if len(result) != 0 {
+		t.Errorf("loadRuntimeProvisionTimeouts(empty dir) len = %d; want 0", len(result))
+	}
+}
+
+func TestLoadRuntimeProvisionTimeouts_IgnoresNonDirEntries(t *testing.T) {
+	tmpDir := t.TempDir()
+	if err := os.WriteFile(filepath.Join(tmpDir, "not-a-dir.yaml"), []byte("runtime: hermes\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	result := loadRuntimeProvisionTimeouts(tmpDir)
+	if len(result) != 0 {
+		t.Errorf("loadRuntimeProvisionTimeouts(file-only dir) len = %d; want 0", len(result))
+	}
+}
+
+func TestLoadRuntimeProvisionTimeouts_SingleTemplate(t *testing.T) {
+	tmpDir := t.TempDir()
+	writeRuntimeConfigYAML(t, tmpDir, "tmpl-hermes", "hermes", 300)
+	result := loadRuntimeProvisionTimeouts(tmpDir)
+	if v, ok := result["hermes"]; !ok || v != 300 {
+		t.Errorf("loadRuntimeProvisionTimeouts → hermes = %d; want 300", v)
+	}
+}
+
+func TestLoadRuntimeProvisionTimeouts_MultipleTemplatesSameRuntime(t *testing.T) {
+	// Two templates using the same runtime — takes the MAX timeout
+	tmpDir := t.TempDir()
+	writeRuntimeConfigYAML(t, tmpDir, "tmpl-hermes-slow", "hermes", 600)
+	writeRuntimeConfigYAML(t, tmpDir, "tmpl-hermes-fast", "hermes", 120)
+	result := loadRuntimeProvisionTimeouts(tmpDir)
+	if v, ok := result["hermes"]; !ok || v != 600 {
+		t.Errorf("loadRuntimeProvisionTimeouts → hermes = %d; want 600 (max of 600, 120)", v)
+	}
+}
+
+func TestLoadRuntimeProvisionTimeouts_MultipleRuntimes(t *testing.T) {
+	tmpDir := t.TempDir()
+	writeRuntimeConfigYAML(t, tmpDir, "tmpl-hermes", "hermes", 300)
+	writeRuntimeConfigYAML(t, tmpDir, "tmpl-claude-code", "claude-code", 420)
+	writeRuntimeConfigYAML(t, tmpDir, "tmpl-deepagents", "deepagents", 180)
+	result := loadRuntimeProvisionTimeouts(tmpDir)
+	want := map[string]int{
+		"hermes":      300,
+		"claude-code": 420,
+		"deepagents":  180,
+	}
+	for runtime, wantSecs := range want {
+		if got, ok := result[runtime]; !ok || got != wantSecs {
+			t.Errorf("loadRuntimeProvisionTimeouts → %s = %d; want %d", runtime, got, wantSecs)
+		}
+	}
+	if len(result) != len(want) {
+		t.Errorf("loadRuntimeProvisionTimeouts → len = %d; want %d", len(result), len(want))
+	}
+}
+
+func TestLoadRuntimeProvisionTimeouts_IgnoresZeroTimeout(t *testing.T) {
+	tmpDir := t.TempDir()
+	writeRuntimeConfigYAML(t, tmpDir, "tmpl-zero", "zero-runtime", 0)
+	result := loadRuntimeProvisionTimeouts(tmpDir)
+	if _, ok := result["zero-runtime"]; ok {
+		t.Errorf("loadRuntimeProvisionTimeouts → 'zero-runtime' present; want absent (timeout=0)")
+	}
+}
+
+func TestLoadRuntimeProvisionTimeouts_IgnoresNegativeTimeout(t *testing.T) {
+	tmpDir := t.TempDir()
+	writeRuntimeConfigYAML(t, tmpDir, "tmpl-negative", "neg-runtime", -60)
+	result := loadRuntimeProvisionTimeouts(tmpDir)
+	if _, ok := result["neg-runtime"]; ok {
+		t.Errorf("loadRuntimeProvisionTimeouts → 'neg-runtime' present; want absent (timeout<0)")
+	}
+}
+
+func TestLoadRuntimeProvisionTimeouts_IgnoresMissingRuntimeField(t *testing.T) {
+	tmpDir := t.TempDir()
+	dir := filepath.Join(tmpDir, "tmpl-no-runtime")
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		t.Fatal(err)
+	}
+	yamlContent := "template_name: no-runtime-template\nruntime_config:\n  provision_timeout_seconds: 300\n"
+	if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte(yamlContent), 0644); err != nil {
+		t.Fatal(err)
+	}
+	result := loadRuntimeProvisionTimeouts(tmpDir)
+	if len(result) != 0 {
+		t.Errorf("loadRuntimeProvisionTimeouts → len = %d; want 0 (runtime field absent)", len(result))
+	}
+}
+
+func TestLoadRuntimeProvisionTimeouts_IgnoresMalformedYAML(t *testing.T) {
+	tmpDir := t.TempDir()
+	dir := filepath.Join(tmpDir, "tmpl-bad-yaml")
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		t.Fatal(err)
+	}
+	badYAML := "runtime: bad\n  provision_timeout_seconds: not a number\n"
+	if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte(badYAML), 0644); err != nil {
+		t.Fatal(err)
+	}
+	result := loadRuntimeProvisionTimeouts(tmpDir)
+	if len(result) != 0 {
+		t.Errorf("loadRuntimeProvisionTimeouts → len = %d; want 0 (malformed YAML)", len(result))
+	}
+}
+
+func TestLoadRuntimeProvisionTimeouts_IgnoresMissingConfig(t *testing.T) {
+	tmpDir := t.TempDir()
+	if err := os.MkdirAll(filepath.Join(tmpDir, "tmpl-no-config"), 0755); err != nil {
+		t.Fatal(err)
+	}
+	writeRuntimeConfigYAML(t, tmpDir, "tmpl-good", "good-runtime", 300)
+	result := loadRuntimeProvisionTimeouts(tmpDir)
+	if v, ok := result["good-runtime"]; !ok || v != 300 {
+		t.Errorf("loadRuntimeProvisionTimeouts → good-runtime = %d; want 300", v)
+	}
+}
+
+func TestLoadRuntimeProvisionTimeouts_IgnoresEmptyRuntime(t *testing.T) {
+	tmpDir := t.TempDir()
+	writeRuntimeConfigYAML(t, tmpDir, "tmpl-empty", "", 300)
+	result := loadRuntimeProvisionTimeouts(tmpDir)
+	if len(result) != 0 {
+		t.Errorf("loadRuntimeProvisionTimeouts → len = %d; want 0 (empty runtime)", len(result))
+	}
+}
@@ -107,29 +107,10 @@ func (h *ChatFilesHandler) WithPendingUploads(storage pendinguploads.Storage, br
 }

 // chatUploadMaxBytes caps the full multipart request body so a
-// malicious / runaway client can't OOM the proxy hop. 100 MB matches
-// the workspace-side total limit; anything larger is rejected at the
+// malicious / runaway client can't OOM the proxy hop. 50 MB matches
+// the workspace-side limit; anything larger is rejected at the
 // network boundary before forwarding.
-//
-// SSOT NOTE (issue #1520): this constant is the source of truth for
-// chat upload limits across the platform. Its value is exported to
-// the workspace container at provision time via the env var
-// CHAT_UPLOAD_MAX_TOTAL_BYTES (see
-// workspace_provision_shared.go::applyChatUploadLimits) so the
-// Python runtime cap stays in lock-step. Do NOT change this without
-// updating the per-file cap chatUploadMaxFileBytes below and
-// verifying the env-injection site is unchanged.
-const chatUploadMaxBytes = 100 * 1024 * 1024
-
-// chatUploadMaxFileBytes caps any single multipart part. Mirrors the
-// total cap by default because most chat uploads are a single file;
-// keeping per-file equal to total avoids the surprise of "my 60 MB
-// file fit under the total but got 413'd on per-file". Exported to
-// the workspace container as CHAT_UPLOAD_MAX_FILE_BYTES so the
-// Starlette parser's max_part_size matches and any single part above
-// Starlette's default 1 MiB no longer raises MultiPartException
-// (root cause of issue #1520).
-const chatUploadMaxFileBytes = 100 * 1024 * 1024
+const chatUploadMaxBytes = 50 * 1024 * 1024

 // resolveWorkspaceForwardCreds resolves the workspace's URL +
 // platform_inbound_secret for an /internal/* forward, applying
@@ -1,63 +0,0 @@
-package handlers
-
-// chat_upload_limits_test.go — pins the SSOT env-injection contract
-// for chat-upload caps (issue #1520). The Python workspace runtime
-// reads these env vars at module init; drift between the constant in
-// chat_files.go and the env-var name here silently breaks chat upload
-// fleet-wide, so the contract is asserted as a unit test in the same
-// package as the producer.
-
-import (
-	"fmt"
-	"testing"
-)
-
-// applyChatUploadLimits MUST seed both env vars to the byte-count
-// stringification of the Go-side constants. Anything else means a
-// Python-side parser cap that disagrees with the Go-side network cap,
-// which is exactly the drift that shipped #1520.
-func TestApplyChatUploadLimits_DefaultsMatchGoConstants(t *testing.T) {
-	env := map[string]string{}
-	applyChatUploadLimits(env)
-
-	wantFile := fmt.Sprintf("%d", chatUploadMaxFileBytes)
-	if got := env["CHAT_UPLOAD_MAX_FILE_BYTES"]; got != wantFile {
-		t.Errorf("CHAT_UPLOAD_MAX_FILE_BYTES = %q, want %q", got, wantFile)
-	}
-
-	wantTotal := fmt.Sprintf("%d", chatUploadMaxBytes)
-	if got := env["CHAT_UPLOAD_MAX_TOTAL_BYTES"]; got != wantTotal {
-		t.Errorf("CHAT_UPLOAD_MAX_TOTAL_BYTES = %q, want %q", got, wantTotal)
-	}
-}
-
-// Pre-existing values win. A tenant override, plugin mutator, or A/B
-// experiment that already set the env MUST be preserved — the SSOT
-// helper is a defaulting layer, not an override layer.
-func TestApplyChatUploadLimits_PreExistingValuesPreserved(t *testing.T) {
-	env := map[string]string{
-		"CHAT_UPLOAD_MAX_FILE_BYTES":  "1234",
-		"CHAT_UPLOAD_MAX_TOTAL_BYTES": "5678",
-	}
-	applyChatUploadLimits(env)
-
-	if got := env["CHAT_UPLOAD_MAX_FILE_BYTES"]; got != "1234" {
-		t.Errorf("pre-existing CHAT_UPLOAD_MAX_FILE_BYTES overwritten: got %q", got)
-	}
-	if got := env["CHAT_UPLOAD_MAX_TOTAL_BYTES"]; got != "5678" {
-		t.Errorf("pre-existing CHAT_UPLOAD_MAX_TOTAL_BYTES overwritten: got %q", got)
-	}
-}
-
-// The 100 MB minimum is the CTO-directed allowance floor (issue #1520).
-// Pin so a future "tidy up: 100 MB seems large" refactor surfaces here
-// before reverting the user-visible behaviour change.
-func TestChatUploadCaps_MinimumAllowanceFloor(t *testing.T) {
-	const floor = 100 * 1024 * 1024
-	if chatUploadMaxBytes < floor {
-		t.Errorf("chatUploadMaxBytes = %d, below #1520 floor %d", chatUploadMaxBytes, floor)
-	}
-	if chatUploadMaxFileBytes < floor {
-		t.Errorf("chatUploadMaxFileBytes = %d, below #1520 floor %d", chatUploadMaxFileBytes, floor)
-	}
-}
@@ -37,7 +37,6 @@ package handlers
 import (
 	"context"
 	"errors"
-	"fmt"
 	"log"
 	"path/filepath"

@@ -133,10 +132,6 @@ func (h *WorkspaceHandler) prepareProvisionContext(
 	// a workspace_secret named GIT_AUTHOR_NAME can override.
 	applyAgentGitIdentity(envVars, payload.Name)
 	applyRuntimeModelEnv(envVars, payload.Runtime, payload.Model)
-	// SSOT for chat-upload limits — see chat_files.go::chatUploadMaxBytes.
-	// Injecting via env keeps the Python workspace runtime caps in
-	// lock-step with the Go cap on every provision. Fixes #1520.
-	applyChatUploadLimits(envVars)
 	if payload.Role != "" {
 		envVars["MOLECULE_AGENT_ROLE"] = payload.Role
 	}
@@ -228,28 +223,3 @@ func (h *WorkspaceHandler) markProvisionFailed(ctx context.Context, workspaceID,
 		log.Printf("markProvisionFailed: db update failed for %s: %v", workspaceID, dbErr)
 	}
 }
-
-// applyChatUploadLimits seeds the chat-upload cap env vars on the
-// workspace container so the Python /internal/chat/uploads/ingest
-// handler parses the multipart form with the same per-file allowance
-// that the Go proxy enforces.
-//
-// Why env-driven (and not, say, a hard-coded Python constant): keeping
-// one Go constant as the source of truth and forwarding it lets
-// operations bump the cap by editing one file + redeploy, instead of
-// editing two files in two languages and risking the drift that
-// shipped #1520 (Go cap 50 MB, Python parser cap 1 MiB — Starlette
-// default — so a 5 MB image always 400'd on parse before per-file
-// enforcement could fire).
-//
-// Pre-existing env wins. If something downstream (a tenant override,
-// a plugin mutator, an A/B experiment) has already set either var,
-// we leave it alone. Default-only injection.
-func applyChatUploadLimits(envVars map[string]string) {
-	if _, set := envVars["CHAT_UPLOAD_MAX_FILE_BYTES"]; !set {
-		envVars["CHAT_UPLOAD_MAX_FILE_BYTES"] = fmt.Sprintf("%d", chatUploadMaxFileBytes)
-	}
-	if _, set := envVars["CHAT_UPLOAD_MAX_TOTAL_BYTES"]; !set {
-		envVars["CHAT_UPLOAD_MAX_TOTAL_BYTES"] = fmt.Sprintf("%d", chatUploadMaxBytes)
-	}
-}
@@ -599,28 +599,6 @@ def _sanitize_for_external(msg: str) -> str:
    import re as _re

    msg = _re.sub(r"(?i)(?:bearer|token|api[_-]?key|sk-)[ :=]+[A-Za-z0-9_/.-]{20,}", "[REDACTED]", msg)
-    # Bare provider key with NO separator after the prefix — a real
-    # `sk-ant-api03-…` / `sk-…` key uses `-` (not `[ :=]`) so the rule
-    # above misses it. Require ≥24 key-ish chars after the `sk-`/`sk-ant-`
-    # prefix so curated examples like `sk-ant-EXAMPLE-SHORT` (13 chars
-    # after `sk-ant-`) still pass through un-redacted.
-    msg = _re.sub(r"(?i)\bsk-(?:ant-)?[A-Za-z0-9_-]{24,}", "[REDACTED]", msg)
-    # JSON-quoted credential values: {"token": "…"} / {"apiKey": "…"} /
-    # {"secret": "…"} / {"password": "…"}. Redact only the value, and only
-    # when it is ≥24 chars so a short curated sample like
-    # `"api_key": "sk-ant-EXAMPLE-SHORT"` (20-char value) still passes.
-    msg = _re.sub(
-        r'(?i)("(?:token|api[_-]?key|secret|password)"\s*:\s*")[^"]{24,}(")',
-        r"\1[REDACTED]\2",
-        msg,
-    )
-    # AWS secret access key in `aws_secret_access_key=…` form (env dumps,
-    # boto tracebacks). The base64-ish value runs until whitespace/quote.
-    msg = _re.sub(
-        r"(?i)(aws_secret_access_key\s*[:=]\s*)\S+",
-        r"\1[REDACTED]",
-        msg,
-    )
    # Absolute paths: /etc/shadow, /home/user/.aws/credentials, etc.
    msg = _re.sub(r"(?:/[^/\s]+){2,}", lambda m: m.group(0) if len(m.group(0)) < 60 else "[REDACTED_PATH]", msg)
    return msg
@@ -630,7 +608,6 @@ def sanitize_agent_error(
    exc: BaseException | None = None,
    category: str | None = None,
    stderr: str | None = None,
-    reason: str | None = None,
 ) -> str:
    """Render an agent-side failure into a user-safe error message.

@@ -638,18 +615,6 @@ def sanitize_agent_error(
    category string (e.g. from `classify_subprocess_error`). If both are
    given, `category` wins. If neither, the tag defaults to "unknown".

-    When ``reason`` is provided (internal#211/#212), it is a *pre-curated,
-    user-actionable, secret-safe* explanation built by the caller from a
-    provider-side failure — e.g. a 403 "Your organization has disabled
-    Claude subscription access · Use an Anthropic API key instead, or ask
-    your admin to enable access" with error code ``oauth_org_not_allowed``.
-    This text is exactly what the user needs to self-serve, so it is
-    surfaced VERBATIM as the message instead of being collapsed to the
-    opaque exception class name. It still passes through the
-    key/token/bearer/path scrubber as a belt-and-braces second pass so a
-    buggy caller can't leak a credential that snuck into the reason.
-    ``reason`` wins over ``stderr``; both lose to neither being set.
-
    When ``stderr`` is provided (e.g. the first ~1 KB of a subprocess stderr
    or HTTP error body), it is sanitized and appended to the output so the
    A2A caller gets actionable context without needing to dig through workspace
@@ -664,13 +629,6 @@ def sanitize_agent_error(
    else:
        tag = "unknown"

-    if reason:
-        # Curated, user-actionable reason — surface it as the message.
-        # Still scrub: a 403/auth/quota message is safe, but the scrubber
-        # is cheap insurance against a caller that didn't curate cleanly.
-        clean = _sanitize_for_external(reason[:_MAX_STDERR_PREVIEW])
-        return f"Agent error ({tag}): {clean}"
-
    if stderr:
        # Truncate and sanitize before including — prevents DoS via
        # a malicious or buggy peer injecting a huge error body, and
@@ -26,14 +26,9 @@ Path safety:
    a colliding name fails fast (the random prefix already makes
    collisions astronomical, but defense-in-depth costs nothing).

-Limits (SSOT — matches the Go contract from chat_files.go, injected
-via CHAT_UPLOAD_MAX_TOTAL_BYTES / CHAT_UPLOAD_MAX_FILE_BYTES at
-provision time; falls back to legacy 50 MB / 25 MB when env unset):
-    - CHAT_UPLOAD_MAX_TOTAL_BYTES total request body (default 50 MB)
-    - CHAT_UPLOAD_MAX_FILE_BYTES per file (default 25 MB)
-      ALSO passed as Starlette ``max_part_size`` to override the
-      Starlette-1.0 default of 1 MiB which silently 400'd every
-      upload > 1 MiB before #1520 fix.
+Limits (matches the Go contract from chat_files.go):
+    - 50 MB total request body
+    - 25 MB per file
    - filename truncated to 100 chars

 Response shape:
@@ -66,47 +61,14 @@ logger = logging.getLogger(__name__)
 # keeps working unchanged.
 CHAT_UPLOAD_DIR = "/workspace/.molecule/chat-uploads"

-def _env_int(name: str, default: int) -> int:
-    """Parse an int from the environment, falling back to ``default``.
-
-    Mis-formatted values (anything ``int()`` rejects) fall back to the
-    default rather than crashing module import — operations needs to be
-    able to roll back a bad env-var push by simply removing the var,
-    not by also fixing a worker that won't boot.
-    """
-    raw = os.environ.get(name)
-    if not raw:
-        return default
-    try:
-        return int(raw)
-    except (TypeError, ValueError):
-        logger.warning("internal_chat_uploads: env %s=%r not an int; using default %d", name, raw, default)
-        return default
-
 # Total-request body cap. multipart/form-data with multiple parts can
 # add ~100 bytes of framing per file; the cap is the bytes hitting the
 # socket, including framing.
-#
-# SSOT (issue #1520): the source of truth is the Go constant
-# chatUploadMaxBytes in workspace-server/internal/handlers/chat_files.go,
-# exported to the workspace container as CHAT_UPLOAD_MAX_TOTAL_BYTES at
-# provision time (workspace_provision_shared.go::applyChatUploadLimits).
-# Unset env → keep the previous 50 MB default so an unprovisioned /
-# locally-run workspace does NOT regress.
-CHAT_UPLOAD_MAX_BYTES = _env_int("CHAT_UPLOAD_MAX_TOTAL_BYTES", 50 * 1024 * 1024)
+CHAT_UPLOAD_MAX_BYTES = 50 * 1024 * 1024  # 50 MB

-# Per-file cap. SSOT (issue #1520): exported from the Go side as
-# CHAT_UPLOAD_MAX_FILE_BYTES; default 25 MB if env is unset so an older
-# workspace provisioned before the env-injection landed keeps the
-# legacy ceiling.
-#
-# This value is ALSO passed as Starlette's ``max_part_size`` (see
-# ingest_handler below) — Starlette 1.0 defaults max_part_size to
-# **1 MiB**, which is the actual root cause of #1520: any single file
-# part above 1 MiB raised MultiPartException before per-file enforcement
-# could fire. Wiring max_part_size to the same cap as per-file means
-# the user-visible ceiling is exactly the per-file cap, no surprises.
-CHAT_UPLOAD_MAX_FILE_BYTES = _env_int("CHAT_UPLOAD_MAX_FILE_BYTES", 25 * 1024 * 1024)
+# Per-file cap. Keeping per-file under total lets a user attach, say,
+# a 5 MB PDF + 10 small screenshots in a single batch.
+CHAT_UPLOAD_MAX_FILE_BYTES = 25 * 1024 * 1024  # 25 MB

 # Conservative {alnum, dot, underscore, dash} character class — anything
 # outside gets rewritten so embedded paths, control chars, newlines,
@@ -184,30 +146,11 @@ async def ingest_handler(request: Request) -> JSONResponse:
                status_code=413,
            )

-    # max_part_size: Starlette 1.0 defaults to 1 MiB. Any single
-    # part above that raises MultiPartException BEFORE per-file
-    # enforcement can run — which silently broke every chat upload
-    # > 1 MiB (issue #1520, fleet-wide P0 2026-05-18). Wire it to
-    # the per-file cap so the user-visible ceiling matches what
-    # the per-file 413 path expects.
    try:
-        form = await request.form(
-            max_files=64,
-            max_fields=32,
-            max_part_size=CHAT_UPLOAD_MAX_FILE_BYTES,
-        )
+        form = await request.form(max_files=64, max_fields=32)
    except Exception as exc:  # multipart parse error
        logger.warning("internal_chat_uploads: multipart parse failed: %s", exc)
-        # Surface the exception detail (feedback_surface_actionable_failure_reason_to_user):
-        # MultiPartException strings ("Part exceeded maximum size of …",
-        # "Invalid boundary", "Too many parts", etc.) contain no secrets
-        # — they describe shape, not content. The 200-char cap is
-        # belt-and-braces against an exception class we haven't seen
-        # whose ``str()`` is unbounded.
-        return JSONResponse(
-            {"error": "failed to parse multipart form", "detail": str(exc)[:200]},
-            status_code=400,
-        )
+        return JSONResponse({"error": "failed to parse multipart form"}, status_code=400)

    # Starlette's FormData allows multiple values per key — `files` may
    # appear multiple times for batched uploads. getlist returns them
@@ -788,123 +788,6 @@ def test_sanitize_agent_error_stderr_combined_with_existing_tests():
    assert "workspace logs" in out


-# ─── reason passthrough (internal#211/#212: surface actionable provider error) ───
-
-
-def test_sanitize_agent_error_reason_surfaced_verbatim():
-    """A curated provider reason is shown to the user, not collapsed to the
-    exception class name. This is the internal#211 regression: a 403
-    org-disabled message must reach the canvas."""
-    reason = (
-        "provider HTTP 403 — oauth_org_not_allowed — Your organization has "
-        "disabled Claude subscription access for Claude Code · Use an "
-        "Anthropic API key instead, or ask your admin to enable access"
-    )
-
-    class _ResultErr(Exception):
-        pass
-
-    out = sanitize_agent_error(exc=_ResultErr("opaque"), reason=reason)
-    # The actionable provider guidance and status code must be visible.
-    assert "403" in out
-    assert "oauth_org_not_allowed" in out
-    assert "disabled Claude subscription access" in out
-    assert "ask your admin to enable access" in out
-    # NOT the old opaque form.
-    assert "see workspace logs" not in out
-
-
-def test_sanitize_agent_error_reason_still_scrubs_secrets():
-    """Even on the reason path the key/token scrubber runs — a buggy caller
-    that lets a bearer token into the reason still gets it redacted."""
-    leaky = (
-        "provider HTTP 401 — auth failed — Authorization: Bearer "
-        "PLACEHOLDER_LONG_TOKEN_0123456789abcdefghijklm please re-auth"
-    )
-    out = sanitize_agent_error(reason=leaky)
-    assert "[REDACTED]" in out
-    assert "PLACEHOLDER_LONG_TOKEN_0123456789abcdefghijklm" not in out
-    # The non-secret guidance still survives the scrub.
-    assert "401" in out
-    assert "please re-auth" in out
-
-
-def test_sanitize_agent_error_reason_scrubs_all_secret_formats():
-    """The scrubber must redact every realistic credential shape — not just
-    the `Bearer <tok>` form the original test happened to exercise
-    (internal#212 review finding: bare `sk-ant-api03-…` keys, JSON-quoted
-    "token"/"apiKey" values, and `aws_secret_access_key=` all leaked).
-    All curated/actionable guidance must still survive the scrub.
-    """
-    # 1. Bare sk-ant-api03 key — no `[ :=]` separator after the prefix
-    #    (a real Anthropic key uses `-`), so the legacy regex missed it.
-    bare = (
-        "provider HTTP 401 — auth failed — invalid key "
-        "sk-FAKEPLACEHOLDERabcdefghijklmnopqrstuvwxy0123456789 "
-        "please re-auth"
-    )
-    out = sanitize_agent_error(reason=bare)
-    assert "sk-FAKEPLACEHOLDERabcdefghijklmnopqrstuvwxy0123456789" not in out
-    assert "[REDACTED]" in out
-    assert "401" in out  # actionable status survives
-    assert "please re-auth" in out  # actionable guidance survives
-
-    # 2. JSON-quoted "token" / "apiKey" values.
-    jblob = (
-        'provider error — config dump {"token": '
-        '"abcDEF0123456789ghIJKL0123456789mnopQRST", "apiKey": '
-        '"anon_fakefakefakefakefakefakefakefakefakefake"} — '
-        "use an API key instead"
-    )
-    out = sanitize_agent_error(reason=jblob)
-    assert "abcDEF0123456789ghIJKL0123456789mnopQRST" not in out
-    assert "anon_fakefakefakefakefakefakefakefakefakefake" not in out
-    assert "[REDACTED]" in out
-    assert "use an API key instead" in out  # actionable guidance survives
-
-    # 3. aws_secret_access_key=… form.
-    awsblob = (
-        "provider HTTP 403 — boto credential error "
-        "aws_secret_access_key=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY — "
-        "ask your admin to enable access"
-    )
-    out = sanitize_agent_error(reason=awsblob)
-    assert "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" not in out
-    assert "[REDACTED]" in out
-    assert "403" in out  # actionable status survives
-    assert "ask your admin to enable access" in out  # guidance survives
-
-    # 4. Regression: the original Bearer form still redacts.
-    # Uses PLACEHOLDER_LONG_TOKEN (>=40 chars, no sk-ant- prefix) to avoid
-    # triggering the secret-scan workflow pattern
-    # `sk-ant-[A-Za-z0-9_-]{40,}`.
-    bearer = (
-        "provider HTTP 401 — Authorization: Bearer "
-        "PLACEHOLDER_LONG_TOKEN_9876543210abcdefghij re-auth"
-    )
-    out = sanitize_agent_error(reason=bearer)
-    assert "PLACEHOLDER_LONG_TOKEN_9876543210abcdefghij" not in out
-    assert "[REDACTED]" in out
-    assert "re-auth" in out
-
-
-def test_sanitize_agent_error_reason_wins_over_stderr():
-    """When both reason and stderr are passed, the curated reason wins."""
-    out = sanitize_agent_error(
-        reason="provider HTTP 403 — use an API key",
-        stderr="raw subprocess noise that should not be shown",
-    )
-    assert "use an API key" in out
-    assert "raw subprocess noise" not in out
-
-
-def test_sanitize_agent_error_no_reason_unchanged():
-    """Omitting reason preserves the original generic behavior."""
-    out = sanitize_agent_error(exc=ValueError("boom"))
-    assert "ValueError" in out
-    assert "workspace logs" in out
-
-

 # ======================================================================
 # classify_subprocess_error
@@ -299,122 +299,3 @@ def test_symlink_at_target_is_refused(client: TestClient, chat_uploads_dir: Path
    assert r.status_code == 500, r.text
    # Sentinel content unchanged — the symlink wasn't followed.
    assert sentinel.read_bytes() == b"original"
-# ───────────── issue #1520: max_part_size + SSOT env-driven caps ─────────────
-
-
-def test_part_above_starlette_1mib_default_is_accepted(client: TestClient, chat_uploads_dir: Path):
-    """Regression: pre-fix, ANY single multipart part > 1 MiB raised
-    MultiPartException because the ingest handler called
-    ``request.form()`` without ``max_part_size`` and Starlette 1.0's
-    default is 1 MiB (issue #1520, fleet-wide P0 2026-05-18).
-
-    This test sends a 2 MiB part, which is well below the 25 MB default
-    per-file cap but ABOVE the Starlette default, so it pins the fix:
-    we now pass ``max_part_size=CHAT_UPLOAD_MAX_FILE_BYTES`` so the
-    parser uses the same cap the per-file 413 path expects.
-    """
-    payload = b"a" * (2 * 1024 * 1024)  # 2 MiB — > Starlette 1 MiB default
-    r = client.post(
-        "/internal/chat/uploads/ingest",
-        files={"files": ("big-but-allowed.bin", payload)},
-        headers={"Authorization": "Bearer test-secret"},
-    )
-    assert r.status_code == 200, r.text
-    item = r.json()["files"][0]
-    assert item["size"] == len(payload)
-
-
-def test_parse_error_surfaces_exception_detail(client: TestClient):
-    """Per feedback_surface_actionable_failure_reason_to_user: the 400
-    body must include a ``detail`` field naming WHICH multipart error
-    fired. The MultiPartException strings ("Part exceeded maximum size
-    of …", "Invalid boundary", "Too many parts", etc.) describe SHAPE
-    not content — no secrets.
-
-    We trigger a real Starlette MultiPartException by submitting a body
-    whose Content-Type advertises ``multipart/form-data`` but whose
-    body is not a valid multipart envelope — the parser raises before
-    any per-file check can fire.
-    """
-    r = client.post(
-        "/internal/chat/uploads/ingest",
-        content=b"this is not a valid multipart body",
-        headers={
-            "Authorization": "Bearer test-secret",
-            "Content-Type": "multipart/form-data; boundary=----not-a-real-boundary",
-        },
-    )
-    assert r.status_code == 400, r.text
-    body = r.json()
-    assert body["error"] == "failed to parse multipart form"
-    # Detail must be present + non-empty + bounded.
-    assert "detail" in body and isinstance(body["detail"], str)
-    assert body["detail"], "detail must not be empty"
-    assert len(body["detail"]) <= 200, "detail must be bounded"
-
-
-def test_total_cap_413_still_fires_above_per_file_pass(client: TestClient, monkeypatch: pytest.MonkeyPatch):
-    """Total-cap 413 path still works: two parts whose sum exceeds
-    CHAT_UPLOAD_MAX_BYTES but each individually fits the per-file cap.
-    Sanity-check that raising the per-file ceiling didn't accidentally
-    short-circuit the total-cap check.
-    """
-    monkeypatch.setattr(internal_chat_uploads, "CHAT_UPLOAD_MAX_BYTES", 1024)
-    monkeypatch.setattr(internal_chat_uploads, "CHAT_UPLOAD_MAX_FILE_BYTES", 800)
-    r = client.post(
-        "/internal/chat/uploads/ingest",
-        files=[
-            ("files", ("a.bin", b"a" * 600)),
-            ("files", ("b.bin", b"b" * 600)),
-        ],
-        headers={"Authorization": "Bearer test-secret"},
-    )
-    assert r.status_code == 413
-    # Either early (Content-Length pre-parse) or post-parse cumulative path is
-    # acceptable; both messages mention exceeding the total limit.
-    err = r.json()["error"]
-    assert "exceeds" in err and "limit" in err, err
-
-
-def test_env_driven_ssot_overrides_caps(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
-    """SSOT contract: setting CHAT_UPLOAD_MAX_FILE_BYTES /
-    CHAT_UPLOAD_MAX_TOTAL_BYTES in the environment at module import
-    time changes the module constants. Pin so the
-    workspace_provision_shared.go::applyChatUploadLimits env injection
-    cannot silently drift from what the Python side reads.
-    """
-    import importlib
-
-    monkeypatch.setenv("CHAT_UPLOAD_MAX_FILE_BYTES", str(7 * 1024 * 1024))
-    monkeypatch.setenv("CHAT_UPLOAD_MAX_TOTAL_BYTES", str(13 * 1024 * 1024))
-
-    reloaded = importlib.reload(internal_chat_uploads)
-    try:
-        assert reloaded.CHAT_UPLOAD_MAX_FILE_BYTES == 7 * 1024 * 1024
-        assert reloaded.CHAT_UPLOAD_MAX_BYTES == 13 * 1024 * 1024
-    finally:
-        # Reset to defaults so subsequent tests see clean constants.
-        monkeypatch.delenv("CHAT_UPLOAD_MAX_FILE_BYTES", raising=False)
-        monkeypatch.delenv("CHAT_UPLOAD_MAX_TOTAL_BYTES", raising=False)
-        importlib.reload(internal_chat_uploads)
-
-
-def test_env_driven_ssot_malformed_value_falls_back_to_default(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
-    """If ops pushes a garbage value the worker still boots with the
-    in-code default (operability over precision — see _env_int
-    docstring). Pin the fallback.
-    """
-    import importlib
-
-    monkeypatch.setenv("CHAT_UPLOAD_MAX_FILE_BYTES", "not-an-int")
-    monkeypatch.setenv("CHAT_UPLOAD_MAX_TOTAL_BYTES", "")  # empty == use default
-
-    reloaded = importlib.reload(internal_chat_uploads)
-    try:
-        # Defaults (legacy 25 MB / 50 MB) come back.
-        assert reloaded.CHAT_UPLOAD_MAX_FILE_BYTES == 25 * 1024 * 1024
-        assert reloaded.CHAT_UPLOAD_MAX_BYTES == 50 * 1024 * 1024
-    finally:
-        monkeypatch.delenv("CHAT_UPLOAD_MAX_FILE_BYTES", raising=False)
-        monkeypatch.delenv("CHAT_UPLOAD_MAX_TOTAL_BYTES", raising=False)
-        importlib.reload(internal_chat_uploads)