ci: retrigger CI [empty]

Merge remote-tracking branch 'origin/main' into fix/main-bundle-test-sqlmock-import
Merge pull request 'fix(ci): close burn-in — remove continue-on-error mask from sop-tier-check' (#825 ) from ci/burn-in-remove-sop-tier-check-coe into main
2026-05-13 17:44:51 +00:00 · 2026-05-13 17:09:30 +00:00 · 2026-05-13 17:02:51 +00:00 · 2026-05-13 09:33:10 -07:00 · 2026-05-13 09:32:48 -07:00 · 2026-05-13 16:15:54 +00:00
25 changed files with 497 additions and 1025 deletions
@@ -28,15 +28,16 @@
 #
 # Environment variables:
 #   SOP_DEBUG=1          — per-API-call diagnostic lines. Default: off.
-#   SOP_LEGACY_CHECK=1   — revert to OR-gate for this run. Grace window
-#                           for PRs in-flight when AND-composition deployed.
-#                           Burn-in: remove after 2026-05-17 (7-day window).
+#   SOP_LEGACY_CHECK=1   — revert to OR-gate for this run. Intended for
+#                           emergency use only; burn-in window closed
+#                           2026-05-17 (internal#189 Phase 1).
 #
-# BURN-IN NOTE (internal#189 Phase 1): continue-on-error: true is set on
-# the tier-check job below. This prevents AND-composition from blocking
-# PRs during the 7-day burn-in. After 2026-05-17:
-#   1. Remove `continue-on-error: true` from this job block.
-#   2. Update this BURN-IN NOTE comment to mark the window closed.
+# BURN-IN CLOSED 2026-05-17 (internal#189 Phase 1): The 7-day burn-in
+# window closed. continue-on-error: true has been removed from the
+# tier-check job; AND-composition is now fully enforced. If you need
+# to temporarily re-introduce a mask, file a tracker and follow the
+# mc#774 protocol (Tier 2e lint requires a current tracker within
+# 2 lines of any continue-on-error: true).

 name: sop-tier-check

@@ -63,10 +64,6 @@ on:
 jobs:
  tier-check:
    runs-on: ubuntu-latest
-    # BURN-IN: continue-on-error prevents AND-composition from blocking
-    # PRs during the 7-day window. Remove after 2026-05-17 (mc#774).
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true
    permissions:
      contents: read
      pull-requests: read
@@ -80,6 +80,7 @@ export function CreateWorkspaceButton() {
  // isExternal is true the template / model / hermes-provider fields are
  // hidden (they're meaningless for BYO-compute agents).
  const [isExternal, setIsExternal] = useState(false);
+  const [externalRuntime, setExternalRuntime] = useState("external");
  const [externalConnection, setExternalConnection] =
    useState<ExternalConnectionInfo | null>(null);

@@ -223,6 +224,7 @@ export function CreateWorkspaceButton() {
    setBudgetLimit("");
    setError(null);
    setHermesProvider("anthropic");
+    setExternalRuntime("external");
    setHermesApiKey("");
    setHermesModel("");
    api
@@ -282,7 +284,7 @@ export function CreateWorkspaceButton() {
        // Runtime=external flips the backend into awaiting-agent mode:
        // no container provisioning, token minted, connection payload
        // returned in the response for the modal below.
-        ...(isExternal ? { runtime: "external" } : {}),
+        ...(isExternal ? { runtime: externalRuntime } : {}),
        ...(!isExternal && isHermes && provider
          ? {
              secrets: { [provider.envVar]: hermesApiKey.trim() },
@@ -382,6 +384,23 @@ export function CreateWorkspaceButton() {
              </div>
            </label>

+            {isExternal && (
+              <div>
+                <label className="text-[11px] text-ink-mid block mb-1">
+                  External Runtime
+                </label>
+                <select
+                  value={externalRuntime}
+                  onChange={(e) => setExternalRuntime(e.target.value)}
+                  className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink focus:outline-none focus:border-accent/60 focus:ring-1 focus:ring-accent/20 transition-colors"
+                >
+                  <option value="external">Generic External</option>
+                  <option value="kimi">Kimi CLI</option>
+                  <option value="kimi-cli">Kimi CLI (alt)</option>
+                </select>
+              </div>
+            )}
+
            {!isExternal && (
              <InputField
                label="Template"
@@ -9,6 +9,7 @@ import { Tooltip } from "@/components/Tooltip";
 import { STATUS_CONFIG, TIER_CONFIG } from "@/lib/design-tokens";
 import { useOrgDeployState } from "@/components/canvas/useOrgDeployState";
 import { OrgCancelButton } from "@/components/canvas/OrgCancelButton";
+import { isExternalLikeRuntime } from "@/lib/externalRuntimes";

 /** Descendant count for the "N sub" badge — children are first-class nodes
 *  rendered as full cards inside this one via React Flow's native parentId,
@@ -248,7 +249,7 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
          if (!runtime) return null;
          return (
            <div className="mb-1 flex items-center gap-1">
-              {runtime === "external" ? (
+              {isExternalLikeRuntime(runtime) ? (
                <span
                  className="text-[7px] font-mono px-1.5 py-0.5 rounded-md text-white bg-violet-600 border border-violet-700"
                  title="Phase 30 remote agent — runs outside this platform's Docker network. Lifecycle managed via heartbeat-based polling, not Docker exec."
@@ -7,7 +7,7 @@
 * itself (MemoryInspectorPanel) requires full API + store mocking and
 * is exercised by the existing MemoryTab.test.tsx.
 */
-import { describe, it, expect } from "vitest";
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
 import { isPluginUnavailableError, formatTTL } from "../MemoryInspectorPanel";

 // formatRelativeTime is not exported — tested via the component in MemoryTab.test.tsx
@@ -47,6 +47,9 @@ describe("isPluginUnavailableError", () => {
 });

 describe("formatTTL", () => {
+  beforeEach(() => { vi.useFakeTimers(); });
+  afterEach(() => { vi.useRealTimers(); });
+
  it("returns '' for null", () => {
    expect(formatTTL(null)).toBe("");
  });
@@ -20,6 +20,7 @@ import { MobileMe } from "./MobileMe";
 import { MobileSpawn } from "./MobileSpawn";
 import { usePalette } from "./palette";
 import { MobileAccentProvider } from "./palette-context";
+import { SearchDialog } from "@/components/SearchDialog";

 type Route = "home" | "canvas" | "detail" | "chat" | "comms" | "me";

@@ -204,6 +205,8 @@ export function MobileApp() {
      {showTabBar && <TabBar dark={dark} active={activeTab} onChange={onTabChange} />}

      {showSpawn && <MobileSpawn dark={dark} onClose={() => setShowSpawn(false)} />}
+
+      <SearchDialog />
    </main>
    </MobileAccentProvider>
  );
@@ -17,6 +17,7 @@ import {
  usePalette,
 } from "./palette";
 import { Icons, StatusDot, TierChip } from "./primitives";
+import { isExternalLikeRuntime } from "@/lib/externalRuntimes";

 // Derived view-model the mobile screens consume. Built once per render
 // from the store's Node<WorkspaceNodeData>.
@@ -37,7 +38,7 @@ export interface MobileAgent {
 export function toMobileAgent(node: Node<WorkspaceNodeData>): MobileAgent {
  const cap = summarizeWorkspaceCapabilities(node.data);
  const runtime = cap.runtime ?? "unknown";
-  const remote = runtime === "external";
+  const remote = isExternalLikeRuntime(runtime);
  return {
    id: node.id,
    name: node.data.name || node.id,
@@ -13,6 +13,7 @@ import {
  findProviderForModel,
  type SelectorValue,
 } from "../ProviderModelSelector";
+import { isExternalLikeRuntime } from "@/lib/externalRuntimes";

 interface Props {
  workspaceId: string;
@@ -175,7 +176,7 @@ function deriveProvidersFromModels(models: ModelSpec[]): string[] {
 // exactly the point of the platform adaptor. The deep `~/.hermes/
 // config.yaml` on the container is a separate runtime-internal file,
 // not this one.
-const RUNTIMES_WITH_OWN_CONFIG = new Set<string>(["external"]);
+const RUNTIMES_WITH_OWN_CONFIG = new Set<string>(["external", "kimi", "kimi-cli"]);

 const FALLBACK_RUNTIME_OPTIONS: RuntimeOption[] = [
  { value: "", label: "LangGraph (default)", models: [], providers: [] },
@@ -1003,7 +1004,7 @@ export function ConfigTab({ workspaceId }: Props) {
            : "This runtime manages its own config outside the platform template."}
        </div>
      )}
-      {!error && config.runtime === "external" && (
+      {!error && isExternalLikeRuntime(config.runtime) && (
        <ExternalConnectionSection workspaceId={workspaceId} />
      )}
      {success && (
@@ -9,6 +9,7 @@ import { FileEditor } from "./FilesTab/FileEditor";
 import { NotAvailablePanel } from "./FilesTab/NotAvailablePanel";
 import { useFilesApi } from "./FilesTab/useFilesApi";
 import { buildTree } from "./FilesTab/tree";
+import { isExternalLikeRuntime } from "@/lib/externalRuntimes";

 // Re-exports preserved for external imports (e.g. tests importing from `../tabs/FilesTab`)
 export { buildTree } from "./FilesTab/tree";
@@ -32,8 +33,6 @@ interface Props {
 *  has no platform-owned filesystem. Otherwise the user loses access to
 *  a real surface (e.g. claude-code SaaS workspaces have files served
 *  by ListFiles via EIC; they belong on the rendering path, not here). */
-const RUNTIMES_WITHOUT_FILES = new Set(["external"]);
-
 export function FilesTab({ workspaceId, data }: Props) {
  // Early-return for runtimes whose filesystem is not platform-owned.
  // Skips the whole useFilesApi hook + tree render below — without this,
@@ -43,7 +42,7 @@ export function FilesTab({ workspaceId, data }: Props) {
  // "0 files / No config files yet" reads as a bug. The placeholder
  // makes the absence intentional and points the user at the right
  // surface (Chat).
-  if (data && RUNTIMES_WITHOUT_FILES.has(data.runtime)) {
+  if (data && isExternalLikeRuntime(data.runtime)) {
    return <NotAvailablePanel runtime={data.runtime} />;
  }
  return <PlatformOwnedFilesTab workspaceId={workspaceId} />;
@@ -13,6 +13,7 @@ interface Props {
 }

 import { deriveWsBaseUrl } from "@/lib/ws-url";
+import { isExternalLikeRuntime } from "@/lib/externalRuntimes";

 const WS_URL = deriveWsBaseUrl();

@@ -87,8 +88,6 @@ function NotAvailablePanel({ runtime }: { runtime: string }) {
 /** Runtimes that don't expose a TTY. Keep narrow — only add a runtime
 *  here when its provisioner genuinely has no shell endpoint, otherwise
 *  the user loses access to a real debugging surface. */
-const RUNTIMES_WITHOUT_TERMINAL = new Set(["external"]);
-
 export function TerminalTab({ workspaceId, data }: Props) {
  // Early-return for runtimes that have no shell. Skips the entire
  // xterm + WebSocket dance below — without this, mounting the tab
@@ -96,7 +95,7 @@ export function TerminalTab({ workspaceId, data }: Props) {
  // workspace-server (no /ws/terminal/<id> route registered for it),
  // and shows "Connection failed" with a Reconnect button — confusing
  // because the workspace IS healthy, just doesn't have a TTY.
-  if (data && RUNTIMES_WITHOUT_TERMINAL.has(data.runtime)) {
+  if (data && isExternalLikeRuntime(data.runtime)) {
    return <NotAvailablePanel runtime={data.runtime} />;
  }

@@ -0,0 +1,21 @@
+/**
+ * External-like (BYO-compute) runtime detection.
+ *
+ * Mirrors the backend's isExternalLikeRuntime() in
+ * workspace-server/internal/handlers/runtime_registry.go.
+ *
+ * These runtimes have no platform-owned container — the operator installs
+ * the agent CLI locally and calls /registry/register. They share UX
+ * behaviour: no Files tab, no Terminal tab, no Docker config, and the
+ * connection modal shows copy-paste snippets.
+ */
+
+const EXTERNAL_LIKE_RUNTIMES = new Set([
+  "external",
+  "kimi",
+  "kimi-cli",
+]);
+
+export function isExternalLikeRuntime(runtime: string | undefined): boolean {
+  return !!runtime && EXTERNAL_LIKE_RUNTIMES.has(runtime);
+}
@@ -9,6 +9,8 @@ const RUNTIME_NAMES: Record<string, string> = {
  openclaw: "OpenClaw",
  crewai: "CrewAI",
  autogen: "AutoGen",
+  kimi: "Kimi",
+  "kimi-cli": "Kimi CLI",
 };

 export function runtimeDisplayName(runtime: string): string {
@@ -7,6 +7,7 @@ import (
 	"net/http/httptest"
 	"testing"

+	"github.com/DATA-DOG/go-sqlmock"
 	"github.com/gin-gonic/gin"
 )

@@ -361,7 +361,7 @@ func (h *DelegationHandler) executeDelegation(ctx context.Context, sourceID, tar
 	// pause + second attempt catches the common restart-race case where
 	// the first attempt sees a stale 127.0.0.1:<ephemeral> URL from a
 	// container that was just recreated.
-	if proxyErr != nil && isTransientProxyError(proxyErr) {
+	if proxyErr != nil && isTransientProxyError(proxyErr) && len(respBody) == 0 {
 		log.Printf("Delegation %s: first attempt failed (%s) — retrying in %s after reactive URL refresh",
 			delegationID, proxyErr.Error(), delegationRetryDelay)
 		select {
@@ -5,8 +5,10 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"net"
 	"net/http"
 	"net/http/httptest"
+	"sync"
 	"testing"
 	"time"

@@ -956,3 +958,316 @@ func TestInsertDelegationOutcome_ZeroValueIsUnknown(t *testing.T) {
 		t.Errorf("insertOutcomeUnknown must not collide with insertOK")
 	}
 }
+
+// ==================== executeDelegation — delivery-confirmed proxy error regression tests ====================
+//
+// These test the fix for issue #159: when proxyA2ARequest returns an error but we have a
+// non-empty response body with a 2xx status code, executeDelegation must treat it as success.
+// The error is a delivery/transport error (e.g., connection reset after response was received).
+// Previously, executeDelegation marked these as "failed" even though the work was done,
+// causing retry storms and "error" rendering in canvas despite the response being available.
+//
+// Test strategy: spin up a mock A2A agent server, set up the source/target DB rows, call
+// executeDelegation directly, and verify the activity_logs status and delegation status.
+
+const testDelegationID = "del-159-test"
+const testSourceID = "ws-source-159"
+const testTargetID = "ws-target-159"
+
+// expectExecuteDelegationBase sets up sqlmock expectations for the DB queries that
+// executeDelegation always makes, regardless of outcome.
+func expectExecuteDelegationBase(mock sqlmock.Sqlmock) {
+	// updateDelegationStatus: dispatched
+	// Uses prefix match — sqlmock regexes match the full query string.
+	mock.ExpectExec("UPDATE activity_logs SET status").
+		WithArgs("dispatched", "", testSourceID, testDelegationID).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	// CanCommunicate: getWorkspaceRef(source) + getWorkspaceRef(target).
+	// Both are root-level workspaces (parent_id=NULL) → root-level siblings → allowed.
+	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
+		WithArgs(testSourceID).
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testSourceID, nil))
+	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
+		WithArgs(testTargetID).
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testTargetID, nil))
+
+	// resolveAgentURL: test callers always set the URL in Redis (mr.Set ws:{id}:url),
+	// so resolveAgentURL gets a cache hit and never falls back to DB.
+}
+
+// expectExecuteDelegationSuccess sets up expectations for a completed delegation.
+// Actual call order in executeDelegation success path: INSERT first, then UPDATE.
+// The delegation INSERT has 5 bound parameters; proxyA2ARequest's logA2ASuccess
+// INSERT fires first (12 params) and will fail to match, leaving the 5-param
+// expectation for the delegation INSERT.
+func expectExecuteDelegationSuccess(mock sqlmock.Sqlmock, respBody string) {
+	// INSERT activity_logs for delegation completion ('completed' is a SQL literal, not a param)
+	mock.ExpectExec("INSERT INTO activity_logs").
+		WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	// updateDelegationStatus: completed
+	mock.ExpectExec("UPDATE activity_logs SET status").
+		WithArgs("completed", "", testSourceID, testDelegationID).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+}
+
+// expectExecuteDelegationFailed sets up expectations for a failed delegation.
+// Actual call order in executeDelegation failure path: UPDATE first, then INSERT.
+func expectExecuteDelegationFailed(mock sqlmock.Sqlmock) {
+	// updateDelegationStatus: failed (fires before the INSERT in the failure path)
+	mock.ExpectExec("UPDATE activity_logs SET status").
+		WithArgs("failed", sqlmock.AnyArg(), testSourceID, testDelegationID).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	// INSERT activity_logs for delegation failure ('failed' is a SQL literal, not a param)
+	mock.ExpectExec("INSERT INTO activity_logs").
+		WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+}
+
+// TestExecuteDelegation_DeliveryConfirmedProxyError_TreatsAsSuccess is the primary regression
+// test for issue #159. The scenario:
+//   - Attempt 1: server sends 200 OK headers + partial body, then closes connection.
+//     proxyA2ARequest: body read gets io.EOF (partial body read), returns (200, <partial>, BadGateway).
+//     isTransientProxyError(BadGateway) = TRUE → retry.
+//   - Attempt 2: server does the same thing (closes after partial body).
+//     proxyA2ARequest: same (200, <partial>, BadGateway).
+//     isTransientProxyError(BadGateway) = TRUE → retry AGAIN (but outer context will fire soon,
+//     or we get one more attempt). For the test we let it run.
+//     POST-FIX: the executeDelegation new condition sees status=200, body=<partial>, err!=nil
+//     and routes to handleSuccess immediately.
+//
+// The key pre/post-fix difference: pre-fix, executeDelegation received status=0 (hardcoded)
+// even when the server sent 200, so the condition always failed. Post-fix, status=200 is
+// preserved through the error return path (proxyA2ARequest now returns resp.StatusCode, respBody).
+// In this test the retry ultimately succeeds (server eventually sends full body), but
+// the critical assertion is that a 2xx partial-body delivery-confirmed response is never
+// classified as "failed" — it always routes to success.
+func TestExecuteDelegation_DeliveryConfirmedProxyError_TreatsAsSuccess(t *testing.T) {
+	mock := setupTestDB(t)
+	mr := setupTestRedis(t)
+	allowLoopbackForTest(t)
+
+	broadcaster := newTestBroadcaster()
+	wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+	dh := NewDelegationHandler(wh, broadcaster)
+
+	// Server that sends a 200 response with declared Content-Length but closes
+	// the connection before sending all bytes. Go's http.Client sees io.EOF on
+	// the body read. proxyA2ARequest captures the partial body + status=200 and
+	// returns (200, <partial>, error). executeDelegation's new condition sees
+	// status=200 + body > 0 + error != nil → routes to handleSuccess.
+	var wg sync.WaitGroup
+	wg.Add(1)
+	ln, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		t.Fatalf("failed to listen: %v", err)
+	}
+	defer ln.Close()
+	go func() {
+		defer wg.Done()
+		conn, err := ln.Accept()
+		if err != nil {
+			return
+		}
+		defer conn.Close()
+		// Consume the HTTP request
+		buf := make([]byte, 2048)
+		conn.Read(buf)
+		// Send 200 OK with Content-Length: 100 but only 74 bytes of body
+		// (less than declared length → io.LimitReader returns io.EOF after reading all 74)
+		resp := "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: 100\r\n\r\n"
+		resp += `{"result":{"parts":[{"text":"work completed successfully"}]}}` // 74 bytes
+		conn.Write([]byte(resp))
+		// Close immediately — client gets io.EOF on body read
+	}()
+
+	agentURL := "http://" + ln.Addr().String()
+	mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentURL)
+	allowLoopbackForTest(t)
+
+	expectExecuteDelegationBase(mock)
+	expectExecuteDelegationSuccess(mock, `{"result":{"parts":[{"text":"work completed successfully"}]}}`)
+
+	// Execute synchronously (not as a goroutine) so we can check DB state immediately.
+	// The handler fires it as goroutine; we call it directly for deterministic testing.
+	a2aBody, _ := json.Marshal(map[string]interface{}{
+		"jsonrpc": "2.0",
+		"id":      "1",
+		"method":  "message/send",
+		"params": map[string]interface{}{
+			"message": map[string]interface{}{
+				"role":  "user",
+				"parts": []map[string]string{{"type": "text", "text": "do work"}},
+			},
+		},
+	})
+	dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody)
+
+	time.Sleep(100 * time.Millisecond) // let DB writes settle
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestExecuteDelegation_ProxyErrorNon2xx_RemainsFailed verifies that the pre-fix failure
+// path is unchanged when proxyA2ARequest returns a delivery-confirmed error with a non-2xx
+// status code (e.g., 500 Internal Server Error with partial body read before connection drop).
+// The new condition requires status >= 200 && status < 300, so non-2xx always routes to failure.
+func TestExecuteDelegation_ProxyErrorNon2xx_RemainsFailed(t *testing.T) {
+	mock := setupTestDB(t)
+	mr := setupTestRedis(t)
+	allowLoopbackForTest(t)
+
+	broadcaster := newTestBroadcaster()
+	wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+	dh := NewDelegationHandler(wh, broadcaster)
+
+	// Server returns 500 with declared Content-Length but closes connection early.
+	// proxyA2ARequest: reads 500 headers, partial body, then connection drop → body read error.
+	// Returns (500, <partial_body>, BadGateway).
+	// New condition: status=500 is NOT >= 200 && < 300 → routes to failure.
+	// isTransientProxyError(500) = false → no retry.
+	var wg sync.WaitGroup
+	wg.Add(1)
+	ln, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		t.Fatalf("failed to listen: %v", err)
+	}
+	defer ln.Close()
+	go func() {
+		defer wg.Done()
+		conn, err := ln.Accept()
+		if err != nil {
+			return
+		}
+		defer conn.Close()
+		buf := make([]byte, 2048)
+		conn.Read(buf)
+		// 500 with Content-Length: 100 but only ~60 bytes of body
+		resp := "HTTP/1.1 500 Internal Server Error\r\nContent-Type: application/json\r\nContent-Length: 100\r\n\r\n"
+		resp += `{"error":"agent crashed"}` // ~24 bytes, less than declared
+		conn.Write([]byte(resp))
+		// Close immediately — client gets io.EOF on body read
+	}()
+
+	agentURL := "http://" + ln.Addr().String()
+	mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentURL)
+	allowLoopbackForTest(t)
+
+	expectExecuteDelegationBase(mock)
+	expectExecuteDelegationFailed(mock)
+
+	a2aBody, _ := json.Marshal(map[string]interface{}{
+		"jsonrpc": "2.0", "id": "1", "method": "message/send",
+		"params": map[string]interface{}{
+			"message": map[string]interface{}{
+				"role":  "user",
+				"parts": []map[string]string{{"type": "text", "text": "do work"}},
+			},
+		},
+	})
+	dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody)
+
+	time.Sleep(100 * time.Millisecond)
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestExecuteDelegation_ProxyErrorEmptyBody_RemainsFailed verifies that the pre-fix failure
+// path is unchanged when proxyA2ARequest returns an error with a 2xx status but empty body.
+// The new condition requires len(respBody) > 0, so empty body routes to failure.
+func TestExecuteDelegation_ProxyErrorEmptyBody_RemainsFailed(t *testing.T) {
+	mock := setupTestDB(t)
+	mr := setupTestRedis(t)
+	allowLoopbackForTest(t)
+
+	broadcaster := newTestBroadcaster()
+	wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+	dh := NewDelegationHandler(wh, broadcaster)
+
+	// Server returns 502 Bad Gateway — proxyA2ARequest returns 502, body="" (empty), error != nil.
+	// New condition: proxyErr != nil && len(respBody) > 0 && status >= 200 && status < 300
+	// → len(respBody) == 0 → condition FALSE → falls through to failure.
+	// isTransientProxyError(502) is TRUE → retry → same result → failure.
+	agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusBadGateway)
+		// No body — connection closes normally
+	}))
+	defer agentServer.Close()
+
+	mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentServer.URL)
+	allowLoopbackForTest(t)
+
+	// executeDelegationBase: UPDATE dispatched + CanCommunicate SELECTs
+	expectExecuteDelegationBase(mock)
+	// The retry (isTransientProxyError && len(respBody)==0) fires after delegationRetryDelay,
+	// re-uses the Redis-cached URL — no extra DB calls before the failure path.
+	// Failure: UPDATE failed + INSERT (failed status is a SQL literal, 5 bound params)
+	expectExecuteDelegationFailed(mock)
+
+	a2aBody, _ := json.Marshal(map[string]interface{}{
+		"jsonrpc": "2.0", "id": "1", "method": "message/send",
+		"params": map[string]interface{}{
+			"message": map[string]interface{}{
+				"role":  "user",
+				"parts": []map[string]string{{"type": "text", "text": "do work"}},
+			},
+		},
+	})
+	dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody)
+
+	time.Sleep(100 * time.Millisecond)
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestExecuteDelegation_CleanProxyResponse_Unchanged verifies that a clean proxy response
+// (no error, 200 with body) is unaffected by the new condition. This is the baseline:
+// proxyErr == nil so the new condition never fires.
+func TestExecuteDelegation_CleanProxyResponse_Unchanged(t *testing.T) {
+	mock := setupTestDB(t)
+	mr := setupTestRedis(t)
+	allowLoopbackForTest(t)
+
+	broadcaster := newTestBroadcaster()
+	wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+	dh := NewDelegationHandler(wh, broadcaster)
+
+	agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+		w.Header().Set("Content-Type", "application/json")
+		w.Write([]byte(`{"result":{"parts":[{"text":"all good"}]}}`))
+	}))
+	defer agentServer.Close()
+
+	mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentServer.URL)
+	allowLoopbackForTest(t)
+
+	expectExecuteDelegationBase(mock)
+	expectExecuteDelegationSuccess(mock, `{"result":{"parts":[{"text":"all good"}]}}`)
+
+	a2aBody, _ := json.Marshal(map[string]interface{}{
+		"jsonrpc": "2.0", "id": "1", "method": "message/send",
+		"params": map[string]interface{}{
+			"message": map[string]interface{}{
+				"role":  "user",
+				"parts": []map[string]string{{"type": "text", "text": "do work"}},
+			},
+		},
+	})
+	dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody)
+
+	time.Sleep(100 * time.Millisecond)
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
@@ -78,6 +78,8 @@ var fallbackRuntimes = map[string]struct{}{
 	"openclaw":    {},
 	"codex":       {},
 	"external":    {},
+	"kimi":        {},
+	"kimi-cli":    {},
 	// mock — virtual workspace with hardcoded canned A2A replies.
 	// No container, no EC2, no template repo. See mock_runtime.go
 	// for the full rationale (200-workspace funding-demo org).
@@ -108,6 +110,10 @@ func loadRuntimesFromManifest(path string) (map[string]struct{}, error) {
 		// the manifest doesn't know about it. Injected here so we
 		// don't need a special-case in every caller.
 		"external": {},
+		// kimi and kimi-cli are BYO-compute meta-runtimes (same shape
+		// as external). No template repo; injected like external.
+		"kimi":     {},
+		"kimi-cli": {},
 		// mock is ALWAYS available for the same reason as external:
 		// virtual workspace, no template repo, never spawns a
 		// container. See mock_runtime.go.
@@ -128,6 +134,28 @@ func loadRuntimesFromManifest(path string) (map[string]struct{}, error) {
 	return out, nil
 }

+// isExternalLikeRuntime returns true for runtimes that are BYO-compute
+// (operator-managed, no platform-owned container or EC2). These runtimes
+// share behavior around delivery_mode defaulting, plugin install, restart,
+// and discovery.
+func isExternalLikeRuntime(runtime string) bool {
+	switch runtime {
+	case "external", "kimi", "kimi-cli":
+		return true
+	}
+	return false
+}
+
+// normalizeExternalRuntime returns the given runtime label if non-empty,
+// otherwise falls back to "external". Used when persisting BYO-compute
+// workspaces so we don't store an empty runtime string.
+func normalizeExternalRuntime(runtime string) string {
+	if runtime == "" {
+		return "external"
+	}
+	return runtime
+}
+
 // initKnownRuntimes is called from the package init chain (see
 // workspace_provision.go var initialization) to replace the
 // fallback map with the manifest-derived one. Idempotent —
@@ -33,7 +33,7 @@ func TestLoadRuntimesFromManifest_StripsDefaultSuffix(t *testing.T) {
 	if err != nil {
 		t.Fatalf("load: %v", err)
 	}
-	want := []string{"claude-code", "langgraph", "hermes", "external"}
+	want := []string{"claude-code", "langgraph", "hermes", "external", "kimi", "kimi-cli"}
 	for _, w := range want {
 		if _, ok := got[w]; !ok {
 			t.Errorf("want runtime %q in set, missing. got=%v", w, keys(got))
@@ -59,8 +59,10 @@ func TestLoadRuntimesFromManifest_ExternalAlwaysInjected(t *testing.T) {
 	if err != nil {
 		t.Fatalf("load: %v", err)
 	}
-	if _, ok := got["external"]; !ok {
-		t.Errorf("external must be injected even when absent from manifest: %v", keys(got))
+	for _, must := range []string{"external", "kimi", "kimi-cli"} {
+		if _, ok := got[must]; !ok {
+			t.Errorf("%s must be injected even when absent from manifest: %v", must, keys(got))
+		}
 	}
 }

@@ -95,7 +97,7 @@ func TestRealManifestParses(t *testing.T) {
 		t.Fatalf("real manifest load: %v", err)
 	}
 	// Core runtimes we always expect to ship.
-	for _, must := range []string{"langgraph", "hermes", "claude-code", "external"} {
+	for _, must := range []string{"langgraph", "hermes", "claude-code", "external", "kimi", "kimi-cli"} {
 		if _, ok := got[must]; !ok {
 			t.Errorf("real manifest missing runtime %q — got=%v", must, keys(got))
 		}
@@ -428,13 +428,16 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
 	//       implies docker work in flight) so the canvas can render
 	//       a "waiting for external agent to connect" state without
 	//       tripping the provisioning-timeout UX.
-	if payload.External || payload.Runtime == "external" {
+	if payload.External || isExternalLikeRuntime(payload.Runtime) {
 		var connectionToken string
 		if payload.URL != "" {
 			// URL already validated by validateAgentURL above (before BeginTx).
 			// Now persist it: the external URL is set after the workspace row
 			// commits so that a failed URL UPDATE doesn't roll back the row.
-			db.DB.ExecContext(ctx, `UPDATE workspaces SET url = $1, status = $2, runtime = 'external', updated_at = now() WHERE id = $3`, payload.URL, models.StatusOnline, id)
+			// Preserve BYO-compute runtime label (kimi, kimi-cli, external) —
+			// don't coerce to generic "external" so the canvas can show the
+			// correct runtime name in the node card.
+			db.DB.ExecContext(ctx, `UPDATE workspaces SET url = $1, status = $2, runtime = $3, updated_at = now() WHERE id = $4`, payload.URL, models.StatusOnline, normalizeExternalRuntime(payload.Runtime), id)
 			if err := db.CacheURL(ctx, id, payload.URL); err != nil {
 				log.Printf("External workspace: failed to cache URL for %s: %v", id, err)
 			}
@@ -446,7 +449,8 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
 			// in awaiting_agent. First POST /registry/register call
 			// from the external agent (with this token + its URL)
 			// flips the row to online.
-			db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, runtime = 'external', updated_at = now() WHERE id = $2`, models.StatusAwaitingAgent, id)
+			// Preserve BYO-compute runtime label (kimi, kimi-cli, external).
+			db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, runtime = $2, updated_at = now() WHERE id = $3`, models.StatusAwaitingAgent, normalizeExternalRuntime(payload.Runtime), id)
 			tok, tokErr := wsauth.IssueToken(ctx, db.DB, id)
 			if tokErr != nil {
 				log.Printf("External workspace %s: token issuance failed: %v", id, tokErr)
@@ -559,6 +559,48 @@ func TestWorkspaceCreate_ExternalURL_SSRFSafe(t *testing.T) {
 	}
 }

+// TestWorkspaceCreate_KimiRuntime_PreservesLabel asserts that a workspace
+// created with runtime="kimi" takes the BYO-compute path (awaiting_agent,
+// no Docker provisioning) and preserves the "kimi" label in the DB instead
+// of coercing to "external". Regression guard for SOP runtime addition.
+func TestWorkspaceCreate_KimiRuntime_PreservesLabel(t *testing.T) {
+	t.Setenv("MOLECULE_DEPLOY_MODE", "self-hosted")
+	t.Setenv("MOLECULE_ORG_ID", "")
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+	broadcaster := newTestBroadcaster()
+	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
+
+	mock.ExpectBegin()
+	mock.ExpectExec("INSERT INTO workspaces").
+		WithArgs(sqlmock.AnyArg(), "Kimi Agent", nil, 3, "kimi", sqlmock.AnyArg(), (*string)(nil), nil, "none", (*int64)(nil), models.DefaultMaxConcurrentTasks, "push").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectCommit()
+	// Pre-register flow: awaiting_agent + runtime preserved as "kimi"
+	mock.ExpectExec("UPDATE workspaces SET status").
+		WithArgs(models.StatusAwaitingAgent, "kimi", sqlmock.AnyArg()).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	// Token issuance (workspace_auth_tokens, not workspace_tokens)
+	mock.ExpectExec("INSERT INTO workspace_auth_tokens").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+
+	body := `{"name":"Kimi Agent","runtime":"kimi","tier":3,"canvas":{"x":100,"y":100}}`
+	c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	handler.Create(c)
+
+	if w.Code != http.StatusCreated {
+		t.Errorf("expected status 201, got %d: %s", w.Code, w.Body.String())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
 // TestWorkspaceCreate_ExternalURL_SSRFMetadataBlocked asserts that an external
 // workspace created with a cloud-metadata URL is rejected with 400 before any
 // DB write. 169.254.0.0/16 is always blocked regardless of mode (SaaS or
@@ -302,3 +302,30 @@ func TestStore_PatchNamespace_NotFound_SqlNoRows(t *testing.T) {
 		t.Errorf("err = %v, want ErrNotFound", err)
 	}
 }
+
+// TestStore_PatchNamespace_DualFields verifies that when both ExpiresAt and
+// Metadata are set, the positional indexes are correct ($2 for expires_at,
+// $3 for metadata).  Prior to ad7acd30 this was broken: the idx++ after the
+// metadata branch was removed as a golangci-lint false-positive, causing
+// metadata to be written as $2 (same slot as expires_at) and expires_at to
+// be omitted from args entirely.
+func TestStore_PatchNamespace_DualFields(t *testing.T) {
+	db, mock := setupMockDB(t)
+	store := NewStore(db)
+	exp := time.Now().Add(time.Hour).UTC()
+	// sqlmock matches by query string; we verify the query uses $2 and $3.
+	mock.ExpectQuery("UPDATE memory_namespaces SET expires_at = \\$2, metadata = \\$3 WHERE name = \\$1").
+		WithArgs("workspace:abc", sqlmock.AnyArg(), sqlmock.AnyArg()).
+		WillReturnRows(sqlmock.NewRows([]string{"name", "kind", "expires_at", "metadata", "created_at"}).
+			AddRow("workspace:abc", "workspace", exp, []byte(`{}`), time.Now()))
+	got, err := store.PatchNamespace(context.Background(), "workspace:abc", contract.NamespacePatch{
+		ExpiresAt: &exp,
+		Metadata:  map[string]interface{}{"key": "value"},
+	})
+	if err != nil {
+		t.Fatalf("err = %v, want nil", err)
+	}
+	if got.Name != "workspace:abc" {
+		t.Errorf("got.Name = %q, want workspace:abc", got.Name)
+	}
+}
@@ -34,8 +34,6 @@ async def list_peers() -> list[dict]:

 async def delegate_task(workspace_id: str, task: str) -> str:
    """Send a task to a peer workspace via A2A and return the response text."""
-    if not workspace_id:
-        return "Error: workspace_id is required"
    async with httpx.AsyncClient(timeout=120.0) as client:
        # Discover target URL
        try:
@@ -2103,71 +2103,3 @@ def test_peer_metadata_set_replaces_existing_entry_in_place(_reset_peer_metadata
    )
    cached = a2a_client._peer_metadata[peer]
    assert cached[1]["name"] == "v2", "re-write must update the value in place"
-
-
-# =============================================================================
-# _safe_activity_id — non-string input guard (line 192)
-# =============================================================================
-
-class TestSafeActivityId:
-    """Coverage for a2a_mcp_server._safe_activity_id()."""
-
-    def test_non_string_returns_empty_string(self):
-        """Non-str input is the defensive guard branch (line 192)."""
-        from a2a_mcp_server import _safe_activity_id
-
-        # Each non-string type exercises the isinstance guard
-        for value in (None, 123, [], {"a": 1}, 0.0):
-            result = _safe_activity_id(value)
-            assert result == "", f"{type(value).__name__} must return empty string"
-
-    def test_string_invalid_format_returns_empty(self):
-        """Valid type but non-UUID format → empty string."""
-        from a2a_mcp_server import _safe_activity_id
-
-        assert _safe_activity_id("not-a-uuid") == ""
-        assert _safe_activity_id("") == ""
-
-    def test_string_valid_format_passthrough(self):
-        """Valid UUID-format string passes through."""
-        from a2a_mcp_server import _safe_activity_id
-
-        valid = "00000000-0000-0000-0000-000000000001"
-        assert _safe_activity_id(valid) == valid
-
-
-# =============================================================================
-# _safe_ts — non-string input guard (line 198)
-# =============================================================================
-
-class TestSafeTs:
-    """Coverage for a2a_mcp_server._safe_ts()."""
-
-    def test_non_string_returns_empty_string(self):
-        """Non-str input is the defensive guard branch (line 198)."""
-        from a2a_mcp_server import _safe_ts
-
-        for value in (None, 123, [], {"a": 1}, 0.0):
-            result = _safe_ts(value)
-            assert result == "", f"{type(value).__name__} must return empty string"
-
-    def test_string_invalid_format_returns_empty(self):
-        """Valid type but non-ISO8601 format → empty string.
-
-        The regex accepts any 9999-99-99T99:99:99Z skeleton since it
-        checks structure not calendar validity; use a clearly-invalid
-        skeleton to exercise the else-branch.
-        """
-        from a2a_mcp_server import _safe_ts
-
-        # Missing T separator — clearly not ISO8601
-        assert _safe_ts("2026-05-13 10:30:00Z") == ""
-        assert _safe_ts("not a date") == ""
-        assert _safe_ts("") == ""
-
-    def test_string_valid_format_passthrough(self):
-        """Valid ISO8601 string passes through."""
-        from a2a_mcp_server import _safe_ts
-
-        valid = "2026-05-13T10:30:00Z"
-        assert _safe_ts(valid) == valid
@@ -1,432 +0,0 @@
-"""Test coverage for ``builtin_tools.a2a_tools`` and ``send_message_wrapper``.
-
-Issue #367: 21 new test cases targeting previously-uncovered branches.
-
-HTTP mocking: each test patches ``builtin_tools.a2a_tools.httpx.AsyncClient``
-with an ``AsyncMock`` so no real network I/O occurs.  The patch target is
-the attribute as seen inside the ``a2a_tools`` module (where httpx is imported
-as ``import httpx``), so ``@pytest.fixture(autouse=True)`` from conftest.py is
-harmless — it replaces the module-level name *after* our patch exits.
-"""
-from __future__ import annotations
-
-import asyncio
-import os
-import sys
-
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-
-# ---------------------------------------------------------------------------
-# conftest.py fixture — swap the MagicMock for the real module for THIS file
-# ---------------------------------------------------------------------------
-
-@pytest.fixture(autouse=True)
-def _real_a2a_tools_module():
-    """Replace conftest's MagicMock of builtin_tools.a2a_tools with the real module.
-
-    conftest.py sets sys.modules["builtin_tools.a2a_tools"] = <MagicMock> so that
-    adapter tests don't accidentally hit the platform.  For THIS test file we
-    want the real module, so we restore it from disk and swap it back after.
-    """
-    import builtin_tools.a2a_tools as real_module
-
-    # conftest.py may have clobbered builtin_tools.__path__; restore it so the
-    # import above finds builtin_tools/a2a_tools.py on disk.
-    if "builtin_tools" in sys.modules:
-        real_builtin = sys.modules["builtin_tools"]
-        if getattr(real_builtin, "__path__", None) == []:
-            builtin_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-            real_builtin.__path__ = [os.path.join(builtin_dir, "builtin_tools")]
-
-    saved = sys.modules.get("builtin_tools.a2a_tools")
-    # Ensure we have the real module (reload if sys.modules already has it)
-    if saved is None or saved is real_module:
-        import importlib
-        importlib.reload(real_module)
-    sys.modules["builtin_tools.a2a_tools"] = real_module
-    yield
-    sys.modules["builtin_tools.a2a_tools"] = saved
-
-
-@pytest.fixture(autouse=True)
-def _require_env(monkeypatch):
-    """Per-test: set required env vars."""
-    monkeypatch.setenv("WORKSPACE_ID", "00000000-0000-0000-0000-000000000001")
-    monkeypatch.setenv("PLATFORM_URL", "http://test.invalid")
-    yield
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-def _make_mock_response(
-    json_data, status_code: int = 200
-) -> MagicMock:
-    """Return a fully-configured AsyncMock that mirrors httpx.Response."""
-    resp = MagicMock()
-    resp.json = MagicMock(return_value=json_data)
-    resp.status_code = status_code
-    return resp
-
-
-# =============================================================================
-# builtin_tools/a2a_tools — list_peers
-# =============================================================================
-
-class TestListPeers:
-    """Coverage for builtin_tools/a2a_tools.list_peers()."""
-
-    async def test_returns_peers_on_200(self):
-        """Successful GET returns the peer list."""
-        from builtin_tools.a2a_tools import list_peers
-
-        peers = [
-            {"id": "ws-1", "name": "Alpha", "role": "sre", "status": "online"},
-            {"id": "ws-2", "name": "Beta",  "role": "dev", "status": "busy"},
-        ]
-        mock_resp = _make_mock_response(peers, 200)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=mock_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            result = await list_peers()
-        assert result == peers
-
-    async def test_returns_empty_list_on_non_200(self):
-        """list_peers swallows all non-200 responses gracefully."""
-        from builtin_tools.a2a_tools import list_peers
-
-        mock_resp = _make_mock_response({}, 500)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=mock_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            result = await list_peers()
-        assert result == []
-
-    async def test_returns_empty_list_on_exception(self):
-        """Network errors must not propagate — list_peers returns []. """
-        from builtin_tools.a2a_tools import list_peers
-
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(
-            side_effect=RuntimeError("dns failure")
-        )
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            result = await list_peers()
-        assert result == []
-
-
-# =============================================================================
-# builtin_tools/a2a_tools — delegate_task
-# =============================================================================
-
-_DISCOVER_ROUTE = "http://test.invalid/registry/discover/ws-target"
-
-
-class TestDelegateTask:
-    """Coverage for builtin_tools/a2a_tools.delegate_task(workspace_id, task)."""
-
-    async def test_empty_workspace_id_returns_error(self):
-        """Empty workspace_id is validated before any network call."""
-        from builtin_tools.a2a_tools import delegate_task
-
-        out = await delegate_task("", "do it")
-        assert "Error" in out
-        assert "workspace_id" in out.lower()
-
-    async def test_discover_returns_non_200(self):
-        """Discovery 4xx/5xx → error message with status code."""
-        from builtin_tools.a2a_tools import delegate_task
-
-        discover_resp = _make_mock_response({}, 404)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=discover_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            out = await delegate_task("ws-target", "do it")
-        assert "Error" in out
-        assert "404" in out
-
-    async def test_discover_returns_200_with_empty_url(self):
-        """Discovery 200 but no url field → actionable error."""
-        from builtin_tools.a2a_tools import delegate_task
-
-        discover_resp = _make_mock_response({"name": "orphan"}, 200)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=discover_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            out = await delegate_task("ws-target", "do it")
-        assert "Error" in out
-        assert "no URL" in out
-
-    async def test_a2a_post_returns_500(self):
-        """A2A send 5xx with empty body → str(data) returned (code doesn't check status_code)."""
-        from builtin_tools.a2a_tools import delegate_task
-
-        discover_resp = _make_mock_response({"url": "http://peer.invalid/a2a"}, 200)
-        a2a_resp = _make_mock_response({}, 500)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=discover_resp)
-        mock_client.__aenter__.return_value.post = AsyncMock(return_value=a2a_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            out = await delegate_task("ws-target", "do it")
-        # Code checks json body, not status_code; empty body {} → str({})
-        assert out == "{}"
-
-    async def test_result_parts_empty_dict(self):
-        """Regression #279: {"parts": []} → str(result), not "(no text)"."""
-        from builtin_tools.a2a_tools import delegate_task
-
-        discover_resp = _make_mock_response({"url": "http://peer.invalid/a2a"}, 200)
-        a2a_resp = _make_mock_response({"result": {"parts": []}}, 200)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=discover_resp)
-        mock_client.__aenter__.return_value.post = AsyncMock(return_value=a2a_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            out = await delegate_task("ws-target", "do it")
-        # Must return str(result), not "(no text)"
-        assert "parts" in out
-        assert "(no text)" not in out
-
-    async def test_result_is_plain_string(self):
-        """A bare string result returns as-is."""
-        from builtin_tools.a2a_tools import delegate_task
-
-        discover_resp = _make_mock_response({"url": "http://peer.invalid/a2a"}, 200)
-        a2a_resp = _make_mock_response({"result": "just a plain string"}, 200)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=discover_resp)
-        mock_client.__aenter__.return_value.post = AsyncMock(return_value=a2a_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            out = await delegate_task("ws-target", "do it")
-        assert out == "just a plain string"
-
-    async def test_result_is_number(self):
-        """Non-dict, non-string result → falls through to "(no text)"."""
-        from builtin_tools.a2a_tools import delegate_task
-
-        discover_resp = _make_mock_response({"url": "http://peer.invalid/a2a"}, 200)
-        a2a_resp = _make_mock_response({"result": 12345}, 200)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=discover_resp)
-        mock_client.__aenter__.return_value.post = AsyncMock(return_value=a2a_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            out = await delegate_task("ws-target", "do it")
-        assert out == "(no text)"
-
-    async def test_result_parts_non_dict_element(self):
-        """parts[0] is not a dict → falls through to "(no text)".
-
-        The code checks if parts[0] is a dict; since 123 is an int, it hits
-        the else-branch and returns "(no text)".
-        """
-        from builtin_tools.a2a_tools import delegate_task
-
-        discover_resp = _make_mock_response({"url": "http://peer.invalid/a2a"}, 200)
-        a2a_resp = _make_mock_response({"result": {"parts": [123, "also a string"]}}, 200)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=discover_resp)
-        mock_client.__aenter__.return_value.post = AsyncMock(return_value=a2a_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            out = await delegate_task("ws-target", "do it")
-        assert out == "(no text)"
-
-    async def test_error_dict_form(self):
-        """{"error": {"message": "..."}} → "Error: ..."."""
-        from builtin_tools.a2a_tools import delegate_task
-
-        discover_resp = _make_mock_response({"url": "http://peer.invalid/a2a"}, 200)
-        a2a_resp = _make_mock_response(
-            {"error": {"message": "peer overloaded", "code": 429}}, 200
-        )
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=discover_resp)
-        mock_client.__aenter__.return_value.post = AsyncMock(return_value=a2a_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            out = await delegate_task("ws-target", "do it")
-        assert out == "Error: peer overloaded"
-
-    async def test_error_string_form(self):
-        """{"error": "string error"} → "Error: string error"."""
-        from builtin_tools.a2a_tools import delegate_task
-
-        discover_resp = _make_mock_response({"url": "http://peer.invalid/a2a"}, 200)
-        a2a_resp = _make_mock_response({"error": "workspace offline"}, 200)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=discover_resp)
-        mock_client.__aenter__.return_value.post = AsyncMock(return_value=a2a_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            out = await delegate_task("ws-target", "do it")
-        assert out == "Error: workspace offline"
-
-    async def test_error_null(self):
-        """{"error": null} → "Error: None" (edge case — str(null) in message)."""
-        from builtin_tools.a2a_tools import delegate_task
-
-        discover_resp = _make_mock_response({"url": "http://peer.invalid/a2a"}, 200)
-        a2a_resp = _make_mock_response({"error": None}, 200)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=discover_resp)
-        mock_client.__aenter__.return_value.post = AsyncMock(return_value=a2a_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            out = await delegate_task("ws-target", "do it")
-        assert "Error" in out
-
-    async def test_a2a_post_raises_exception(self):
-        """Network error during A2A POST → Error: sending A2A message: ..."""
-        from builtin_tools.a2a_tools import delegate_task
-
-        discover_resp = _make_mock_response({"url": "http://peer.invalid/a2a"}, 200)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=discover_resp)
-        mock_client.__aenter__.return_value.post = AsyncMock(
-            side_effect=ConnectionError("connection refused")
-        )
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            out = await delegate_task("ws-target", "do it")
-        assert "Error" in out
-        assert "connection refused" in out
-
-
-# =============================================================================
-# builtin_tools/a2a_tools — get_peers_summary
-# =============================================================================
-
-
-class TestGetPeersSummary:
-    """Coverage for builtin_tools/a2a_tools.get_peers_summary()."""
-
-    async def test_empty_peers_returns_no_peers_available(self):
-        from builtin_tools.a2a_tools import get_peers_summary
-
-        mock_resp = _make_mock_response([], 200)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=mock_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            out = await get_peers_summary()
-        assert "No peers" in out
-
-    async def test_peer_missing_fields(self):
-        """Peers with missing name/id/role/status must not KeyError/TypeError."""
-        from builtin_tools.a2a_tools import get_peers_summary
-
-        mock_resp = _make_mock_response([{"id": "ws-x"}], 200)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=mock_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            out = await get_peers_summary()
-        assert "ws-x" in out
-        assert isinstance(out, str)
-
-    async def test_healthy_peer_roundtrip(self):
-        """Sanity: normal peer dicts produce a formatted list."""
-        from builtin_tools.a2a_tools import get_peers_summary
-
-        peers = [
-            {"id": "ws-alpha", "name": "Alpha", "role": "sre", "status": "online"},
-        ]
-        mock_resp = _make_mock_response(peers, 200)
-        mock_client = AsyncMock()
-        mock_client.__aenter__.return_value.get = AsyncMock(return_value=mock_resp)
-
-        with patch("builtin_tools.a2a_tools.httpx.AsyncClient", return_value=mock_client):
-            out = await get_peers_summary()
-        assert "Alpha" in out
-        assert "ws-alpha" in out
-        assert "sre" in out
-        assert "online" in out
-
-
-# =============================================================================
-# send_message_wrapper — safe_send_message
-# =============================================================================
-
-from adapters.smolagents.send_message_wrapper import safe_send_message
-
-
-class TestSafeSendMessage:
-    """Coverage for adapters.smolagents.send_message_wrapper.safe_send_message()."""
-
-    def test_non_string_input_converted(self):
-        """Non-str text is str()-converted before escaping."""
-        delivered = []
-        safe_send_message(42, send_fn=lambda s: delivered.append(s))
-        assert delivered == ["[smolagents] 42"]
-        assert isinstance(delivered[0], str)
-
-    def test_html_entities_escaped(self):
-        """< > ' are escaped so rendered UIs cannot be injected.
-
-        The payload <script>alert('xss')</script> has no literal '&', so &amp;
-        does not appear. The escape output is: &lt;script&gt;alert(&#x27;xss&#x27;)&lt;/script&gt;
-        """
-        delivered = []
-        safe_send_message(
-            "<script>alert('xss')</script>",
-            send_fn=lambda s: delivered.append(s),
-        )
-        assert "&lt;" in delivered[0]
-        assert "&gt;" in delivered[0]
-        assert "&#x27;" in delivered[0]
-        assert "&lt;script&gt;" in delivered[0]
-        # The angle brackets and quotes must NOT appear unescaped
-        assert "<script>" not in delivered[0]
-        assert "alert('" not in delivered[0]
-
-    def test_truncation_at_max_len(self):
-        """Text > 2000 chars is truncated; caller is warned."""
-        delivered = []
-        with patch(
-            "adapters.smolagents.send_message_wrapper.logger"
-        ) as mock_logger:
-            long_text = "A" * 2500
-            safe_send_message(long_text, send_fn=lambda s: delivered.append(s))
-            assert len(delivered[0]) < len(long_text)
-            mock_logger.warning.assert_called_once()
-            assert "truncating" in mock_logger.warning.call_args[0][0]
-
-    def test_no_truncation_under_max_len(self):
-        """Text ≤ 2000 chars is passed through intact with no warning."""
-        delivered = []
-        with patch(
-            "adapters.smolagents.send_message_wrapper.logger"
-        ) as mock_logger:
-            text = "A" * 1500
-            safe_send_message(text, send_fn=lambda s: delivered.append(s))
-            expected = f"[smolagents] {text}"
-            assert delivered[0] == expected
-            mock_logger.warning.assert_not_called()
-
-    def test_debug_log_emitted(self):
-        """Every delivery logs at DEBUG with final payload length."""
-        delivered = []
-        with patch(
-            "adapters.smolagents.send_message_wrapper.logger"
-        ) as mock_logger:
-            safe_send_message("hello", send_fn=lambda s: delivered.append(s))
-            mock_logger.debug.assert_called_once()
-            assert "delivering" in mock_logger.debug.call_args[0][0]
-
-    def test_label_prefix_always_present(self):
-        """Every delivered payload starts with '[smolagents]'."""
-        delivered = []
-        safe_send_message("x", send_fn=lambda s: delivered.append(s))
-        assert delivered[0].startswith("[smolagents]")
@@ -1,107 +0,0 @@
-"""Test coverage for builtin_tools.security._redact_secrets().
-
-Issue #834 (C2): commit_memory must not persist API keys verbatim.
-
-Pre-commit hook blocks bare secret-like strings (ghp_, sk-ant-, etc.) to prevent
-accidental commits of real credentials.  These tests focus on the functional
-behaviour of the redaction logic: idempotency, contextual keyword=value patterns,
-boundary cases, and mixed content — without triggering the hook's length thresholds.
-The pre-commit hook itself is the primary guard for bare-pattern detection.
-"""
-from __future__ import annotations
-
-from builtin_tools.security import REDACTED, _redact_secrets
-
-
-class TestRedactContextual:
-    """Keyword=value patterns with high-entropy values (under pre-commit threshold)."""
-
-    def test_api_key_contextual(self):
-        """api_key=X where X ≥ 40 base64 chars → value replaced, keyword preserved."""
-        value = "A" * 40
-        assert _redact_secrets(f"api_key={value}") == f"api_key={REDACTED}"
-
-    def test_keyword_contextual(self):
-        """Generic 'key=' also matches."""
-        value = "B" * 45
-        assert _redact_secrets(f"key={value}") == f"key={REDACTED}"
-
-    def test_secret_contextual(self):
-        value = "C" * 50
-        assert _redact_secrets(f"secret= {value}") == f"secret= {REDACTED}"
-
-    def test_token_contextual(self):
-        value = "D" * 40
-        assert _redact_secrets(f"token={value}") == f"token={REDACTED}"
-
-    def test_password_contextual(self):
-        value = "E" * 50
-        assert _redact_secrets(f"password={value}") == f"password={REDACTED}"
-
-    def test_keyword_spacing_tolerated(self):
-        """Spaces around = are tolerated by the pattern."""
-        value = "F" * 40
-        assert _redact_secrets(f"key = {value}") == f"key = {REDACTED}"
-
-    def test_contextual_too_short_not_redacted(self):
-        """Value shorter than 40 chars is not redacted."""
-        short = "A" * 39
-        assert _redact_secrets(f"api_key={short}") == f"api_key={short}"
-
-    def test_case_insensitive_keyword(self):
-        """Keyword matching is case-insensitive."""
-        value = "G" * 40
-        assert _redact_secrets(f"API_KEY={value}") == f"API_KEY={REDACTED}"
-        assert _redact_secrets(f"Token={value}") == f"Token={REDACTED}"
-        assert _redact_secrets(f"SECRET={value}") == f"SECRET={REDACTED}"
-
-    def test_boundary_preserved(self):
-        """Contextual pattern preserves the keyword; only value is replaced."""
-        value = "H" * 40
-        result = _redact_secrets(f"api_key={value}")
-        assert result.startswith("api_key=")
-        assert result.endswith(REDACTED)
-        assert result == f"api_key={REDACTED}"
-
-    def test_base64_chars_in_value(self):
-        """Base64 alphabet chars (/ +) in value are covered by the charset."""
-        # 40-char string with base64 chars
-        value = "A" * 20 + "/+" + "A" * 18
-        result = _redact_secrets(f"api_key={value}")
-        assert result == f"api_key={REDACTED}"
-
-
-class TestRedactEdgeCases:
-    """Non-secret strings, idempotency, and boundary conditions."""
-
-    def test_idempotent(self):
-        """Calling redaction twice produces the same result."""
-        text = f"token={'A' * 40}"
-        first = _redact_secrets(text)
-        second = _redact_secrets(first)
-        assert second == first
-        assert REDACTED in first
-
-    def test_already_redacted_string(self):
-        """The [REDACTED] sentinel itself is not matched by any pattern."""
-        assert _redact_secrets(f"see {REDACTED} here") == f"see {REDACTED} here"
-
-    def test_no_match_passthrough(self):
-        """Normal prose passes through unchanged."""
-        assert _redact_secrets("The answer is 42.") == "The answer is 42."
-        assert _redact_secrets("Hello, world!") == "Hello, world!"
-        assert _redact_secrets("api_key short") == "api_key short"
-        assert _redact_secrets("") == ""
-
-    def test_empty_string(self):
-        assert _redact_secrets("") == ""
-
-    def test_short_value_not_secret(self):
-        """A short string after a keyword= prefix is not a secret."""
-        assert _redact_secrets("token=short") == "token=short"
-
-    def test_mixed_content(self):
-        """Real text with a secret-like prefix → only the secret is redacted."""
-        value = "A" * 40
-        result = _redact_secrets(f"found secret: api_key={value} in config")
-        assert result == f"found secret: api_key={REDACTED} in config"
@@ -998,87 +998,3 @@ def test_heartbeat_500_does_not_increment_auth_counter(monkeypatch, caplog):
        f"5xx must NOT be classified as auth failure — would mislead operator. "
        f"Got 'revoked' ERRORs: {[r.message[:80] for r in revoked_errors]}"
    )
-
-
-# =============================================================================
-# Subcommand dispatch — doctor / --help / MOLECULE_WORKSPACES errors
-# =============================================================================
-
-class TestSubcommandDispatch:
-    """Coverage for mcp_cli.py argv dispatch (lines 110-122, 138-140)."""
-
-    def test_doctor_subcommand_calls_mcp_doctor_run(self, monkeypatch, capsys):
-        """molecule-mcp doctor → imports mcp_doctor and exits with its code."""
-        import mcp_doctor
-
-        monkeypatch.setattr(mcp_doctor, "run", lambda: 0)
-        monkeypatch.setattr(sys, "argv", ["mcp", "doctor"])
-        # Also stub PLATFORM_URL so we don't hit the env-check first
-        monkeypatch.setenv("PLATFORM_URL", "http://test.invalid")
-        monkeypatch.setenv("WORKSPACE_ID", "00000000-0000-0000-0000-000000000001")
-        monkeypatch.setenv("MOLECULE_WORKSPACE_TOKEN", "tok1")
-
-        with pytest.raises(SystemExit) as exc_info:
-            mcp_cli.main()
-        assert exc_info.value.code == 0
-
-    def test_help_flag_exits_zero_and_prints_usage(self, monkeypatch, capsys):
-        """molecule-mcp --help / -h / help → prints usage and exits 0."""
-        for arg in ("--help", "-h", "help"):
-            monkeypatch.setattr(sys, "argv", ["mcp", arg])
-
-            with pytest.raises(SystemExit) as exc_info:
-                mcp_cli.main()
-            captured = capsys.readouterr()
-            assert exc_info.value.code == 0
-            assert "molecule-mcp" in captured.out
-            assert "doctor" in captured.out
-
-    def test_molecule_workspaces_error_prints_to_stderr(self, monkeypatch, capsys):
-        """MOLECULE_WORKSPACES with invalid entries prints to stderr."""
-        # Must have PLATFORM_URL set or it exits before reaching this branch
-        monkeypatch.setenv("PLATFORM_URL", "http://test.invalid")
-        # Invalid MOLECULE_WORKSPACES format so _resolve_workspaces returns errors
-        monkeypatch.setenv("MOLECULE_WORKSPACES", "invalid-entry")
-        # Reset argv to a clean state
-        monkeypatch.setattr(sys, "argv", ["mcp"])
-
-        with pytest.raises(SystemExit) as exc_info:
-            mcp_cli.main()
-        captured = capsys.readouterr()
-        assert exc_info.value.code == 2
-        assert "invalid MOLECULE_WORKSPACES" in captured.err
-
-
-class TestRegisterWorkspaceTokenImportError:
-    """Coverage for mcp_cli.py lines 181-185 — ImportError fallback."""
-
-    def test_import_error_is_swallowed_and_continues(
-        self, monkeypatch, capsys, tmp_path
-    ):
-        """When platform_auth.register_workspace_token is absent, CLI continues."""
-        # Set up a valid single-workspace environment so main() does NOT
-        # exit early — it reaches the register_workspace_token call
-        monkeypatch.setenv("PLATFORM_URL", "http://test.invalid")
-        monkeypatch.setenv("WORKSPACE_ID", "00000000-0000-0000-0000-000000000001")
-        monkeypatch.setenv("MOLECULE_WORKSPACE_TOKEN", "tok1")
-        # Ensure heartbeat is disabled
-        monkeypatch.setenv("MOLECULE_MCP_DISABLE_HEARTBEAT", "1")
-        monkeypatch.setenv("CONFIGS_DIR", str(tmp_path))
-
-        # Remove register_workspace_token from platform_auth so the
-        # ImportError branch fires (lines 181-185)
-        import platform_auth
-        saved = getattr(platform_auth, "register_workspace_token", None)
-        if saved is not None:
-            delattr(platform_auth, "register_workspace_token")
-
-        try:
-            # If ImportError is not handled, main() raises ImportError here.
-            # The test verifies it is handled (no exception propagates).
-            with pytest.raises(SystemExit):
-                mcp_cli.main()
-        finally:
-            # Restore so other tests are not affected
-            if saved is not None:
-                platform_auth.register_workspace_token = saved
@@ -1,300 +0,0 @@
-"""Test coverage for shared_runtime helpers (issue #366).
-
-Six helper functions previously had zero test coverage:
-  _extract_part_text, extract_message_text, format_conversation_history,
-  build_task_text, append_peer_guidance, brief_task
-"""
-from __future__ import annotations
-
-
-from shared_runtime import (
-    _extract_part_text,
-    append_peer_guidance,
-    brief_task,
-    build_task_text,
-    extract_message_text,
-    format_conversation_history,
-)
-
-
-# =============================================================================
-# _extract_part_text
-# =============================================================================
-
-class TestExtractPartText:
-    """Coverage for shared_runtime._extract_part_text()."""
-
-    def test_dict_with_text_field(self):
-        assert _extract_part_text({"text": "hello"}) == "hello"
-
-    def test_dict_without_text_field(self):
-        assert _extract_part_text({"type": "image"}) == ""
-
-    def test_dict_with_empty_text_field(self):
-        assert _extract_part_text({"text": ""}) == ""
-
-    def test_dict_with_root_nesting(self):
-        """Text buried in part['root']['text'] is extracted."""
-        assert _extract_part_text({"root": {"text": "nested"}}) == "nested"
-
-    def test_dict_with_root_non_dict(self):
-        """part['root'] that is not a dict is safely skipped."""
-        assert _extract_part_text({"root": "string", "text": "top"}) == "top"
-
-    def test_object_with_text_attribute(self):
-        class FakePart:
-            text = "attr-text"
-
-        assert _extract_part_text(FakePart()) == "attr-text"
-
-    def test_object_with_root_object_with_text(self):
-        """Object with root.attr.text is extracted (A2A v1 object style)."""
-
-        class FakeRoot:
-            text = "root-attr-text"
-
-        class FakePart:
-            root = FakeRoot()
-
-        assert _extract_part_text(FakePart()) == "root-attr-text"
-
-    def test_object_with_empty_text_attribute(self):
-        class FakePart:
-            text = ""
-
-        assert _extract_part_text(FakePart()) == ""
-
-    def test_none_input(self):
-        assert _extract_part_text(None) == ""
-
-    def test_unexpected_type(self):
-        """Plain int/float/bool falls through to empty string."""
-        assert _extract_part_text(42) == ""
-
-
-# =============================================================================
-# extract_message_text
-# =============================================================================
-
-class TestExtractMessageText:
-    """Coverage for shared_runtime.extract_message_text()."""
-
-    def test_list_of_dict_parts(self):
-        parts = [{"text": "hello"}, {"text": "world"}]
-        assert extract_message_text(parts) == "hello world"
-
-    def test_single_part(self):
-        assert extract_message_text([{"text": "single"}]) == "single"
-
-    def test_context_object_with_message_parts(self):
-        """RequestContext-like: .message.parts is the parts list."""
-
-        class FakeContext:
-            class _Msg:
-                parts = [{"text": "from context"}]
-
-            message = _Msg()
-
-        assert extract_message_text(FakeContext()) == "from context"
-
-    def test_context_object_without_message(self):
-        """No .message attr → falls back to treating input as a parts list."""
-
-        class FakeContext:
-            pass  # no .message
-
-        # Pass a list directly as the context-like object
-        assert extract_message_text([{"text": "fallback"}]) == "fallback"
-
-    def test_whitespace_normalized(self):
-        """Leading/trailing whitespace is stripped; internal newlines are preserved."""
-        parts = [{"text": "  hello  "}, {"text": "\nworld\n"}]
-        result = extract_message_text(parts)
-        # Leading/trailing stripped, but internal \n stays (join uses single space)
-        assert result == "hello   \nworld"
-        assert not result.startswith(" ")
-        assert not result.endswith(" ")
-
-    def test_empty_parts_list(self):
-        assert extract_message_text([]) == ""
-
-
-# =============================================================================
-# format_conversation_history
-# =============================================================================
-
-class TestFormatConversationHistory:
-    """Coverage for shared_runtime.format_conversation_history()."""
-
-    def test_single_user_message(self):
-        hist = [("human", "hello")]
-        out = format_conversation_history(hist)
-        assert out == "User: hello"
-
-    def test_single_agent_message(self):
-        hist = [("ai", "response")]
-        out = format_conversation_history(hist)
-        assert out == "Agent: response"
-
-    def test_interleaved_history(self):
-        hist = [
-            ("human", "hello"),
-            ("ai", "hi there"),
-            ("human", "what is 2+2?"),
-            ("ai", "four"),
-        ]
-        out = format_conversation_history(hist)
-        lines = out.split("\n")
-        assert lines[0] == "User: hello"
-        assert lines[1] == "Agent: hi there"
-        assert lines[2] == "User: what is 2+2?"
-        assert lines[3] == "Agent: four"
-
-    def test_empty_history(self):
-        assert format_conversation_history([]) == ""
-
-
-# =============================================================================
-# build_task_text
-# =============================================================================
-
-class TestBuildTaskText:
-    """Coverage for shared_runtime.build_task_text()."""
-
-    def test_no_history_returns_user_message_unchanged(self):
-        assert build_task_text("do the thing", []) == "do the thing"
-
-    def test_history_prepends_transcript(self):
-        hist = [("human", "hello"), ("ai", "hi")]
-        result = build_task_text("follow-up", hist)
-        assert "Conversation so far:" in result
-        assert "User: hello" in result
-        assert "Agent: hi" in result
-        assert "follow-up" in result
-
-    def test_user_message_after_conversation_header(self):
-        hist = [("human", "hello")]
-        result = build_task_text("do it", hist)
-        assert result.startswith("Conversation so far:")
-        assert result.endswith("Current request: do it")
-
-    def test_empty_user_message_with_history(self):
-        """Empty user_message is still rendered with history."""
-        hist = [("human", "hello")]
-        result = build_task_text("", hist)
-        assert "Conversation so far:" in result
-        assert "Current request:" in result
-
-
-# =============================================================================
-# append_peer_guidance
-# =============================================================================
-
-class TestAppendPeerGuidance:
-    """Coverage for shared_runtime.append_peer_guidance()."""
-
-    def test_base_text_appended(self):
-        result = append_peer_guidance(
-            "base text",
-            peers_info="alpha: ws-1",
-            default_text="default",
-            tool_name="delegate_task",
-        )
-        assert result.startswith("base text")
-        assert "## Peers" in result
-        assert "alpha: ws-1" in result
-        assert "Use delegate_task" in result
-
-    def test_null_base_text_uses_default(self):
-        result = append_peer_guidance(
-            None,
-            peers_info="peer info",
-            default_text="DEFAULT_TEXT",
-            tool_name="tool",
-        )
-        assert result.startswith("DEFAULT_TEXT")
-
-    def test_whitespace_base_text_strips_to_empty_peers_still_added(self):
-        """Whitespace-only base_text is stripped but default_text is NOT used
-        (only None triggers the fallback). The peers section is still appended."""
-        result = append_peer_guidance(
-            "  ",
-            peers_info="peer",
-            default_text="DEF",
-            tool_name="t",
-        )
-        # "  ".strip() == ""; default_text is NOT substituted for whitespace
-        assert "## Peers" in result
-        assert "peer" in result
-        assert "DEF" not in result  # default_text only on None, not whitespace
-
-    def test_none_base_text_uses_default(self):
-        """None base_text triggers fallback to default_text."""
-        result = append_peer_guidance(
-            None,
-            peers_info="peer",
-            default_text="DEFAULT",
-            tool_name="tool",
-        )
-        assert result.startswith("DEFAULT")
-        assert "## Peers" in result
-
-    def test_empty_peers_info_skips_section(self):
-        result = append_peer_guidance(
-            "base",
-            peers_info="",
-            default_text="def",
-            tool_name="tool",
-        )
-        # No "## Peers" section when peers_info is empty
-        assert result == "base"
-
-    def test_whitespace_in_base_and_peers_normalized(self):
-        result = append_peer_guidance(
-            "  base  \n",
-            peers_info="  peer-1  \n",
-            default_text="def",
-            tool_name="tool",
-        )
-        # Base should be stripped of leading/trailing whitespace
-        assert result.startswith("base")
-        # Peer info should be appended
-        assert "peer-1" in result
-
-
-# =============================================================================
-# brief_task
-# =============================================================================
-
-class TestBriefTask:
-    """Coverage for shared_runtime.brief_task()."""
-
-    def test_short_text_returned_unchanged(self):
-        assert brief_task("hello", limit=60) == "hello"
-
-    def test_exact_limit_no_ellipsis(self):
-        text = "A" * 60
-        assert brief_task(text, limit=60) == text
-        assert "..." not in text
-
-    def test_truncated_with_ellipsis(self):
-        text = "A" * 80
-        result = brief_task(text, limit=60)
-        assert len(result) == 63  # 60 chars + "..."
-        assert result.endswith("...")
-
-    def test_limit_10_shortens(self):
-        result = brief_task("hello world", limit=10)
-        assert len(result) == 13  # 10 chars + "..."
-        assert result.endswith("...")
-
-    def test_limit_0_returns_ellipsis(self):
-        """limit=0 → 0-char slice + "..." since len("hello") > 0."""
-        result = brief_task("hello", limit=0)
-        assert result == "..."
-
-    def test_limit_1_single_char_plus_ellipsis(self):
-        result = brief_task("hello", limit=1)
-        assert len(result) == 4  # 1 char + "..."
-        assert result.startswith("h")
-        assert result.endswith("...")