Merge pull request #2772 from Molecule-AI/staging

staging → main: auto-promote d866d3a
Merge pull request #2771 from Molecule-AI/fix/mcp-dispatcher-source-workspace-id
2026-05-04 14:52:07 -07:00 · 2026-05-04 21:43:32 +00:00 · 2026-05-04 14:41:24 -07:00 · 2026-05-04 14:34:09 -07:00 · 2026-05-04 21:33:00 +00:00 · 2026-05-04 14:29:08 -07:00
13 changed files with 746 additions and 43 deletions
@@ -58,6 +58,7 @@ TOP_LEVEL_MODULES = {
    "adapter_base",
    "agent",
    "agents_md",
+    "card_helpers",
    "config",
    "configs_dir",
    "consolidation",
@@ -38,13 +38,26 @@ import (
 // Keep these stable — changing the base path for an existing runtime
 // without a migration shim will make previously-saved files disappear from
 // the runtime's POV.
+//
+// Path source-of-truth: cloud-init in
+// `molecule-controlplane/internal/provisioner/userdata_containerized.go`
+// runs `mkdir -p /configs` and writes the canonical config.yaml there.
+// The workspace container bind-mounts host `/configs` to read it back.
+// Files written anywhere else on the host are invisible to the runtime,
+// so `claude-code` (and any future containerized runtime) must point here.
+//
+// `/configs` is root-owned (cloud-init runs as root); the SSH-as-ubuntu
+// install command at the call site below uses `sudo` to write into it.
 var workspaceFilePathPrefix = map[string]string{
-	"hermes":    "/home/ubuntu/.hermes",
-	"langgraph": "/opt/configs",
-	"external":  "/opt/configs",
-	// Default for unknown / future runtimes is /opt/configs — most
-	// conservative place that doesn't collide with system or runtime-
-	// private directories.
+	"hermes":      "/home/ubuntu/.hermes",
+	"claude-code": "/configs",
+	"langgraph":   "/opt/configs",
+	"external":    "/opt/configs",
+	// Default for unknown / future runtimes is /configs — matches the
+	// containerized user-data layout. The `langgraph` / `external`
+	// entries pre-date the unified user-data path and are retained
+	// until a migration audit confirms what the running tenants of
+	// those runtimes actually have on disk.
 }

 func resolveWorkspaceFilePath(runtime, relPath string) (string, error) {
@@ -53,7 +66,7 @@ func resolveWorkspaceFilePath(runtime, relPath string) (string, error) {
 	}
 	base, ok := workspaceFilePathPrefix[strings.ToLower(strings.TrimSpace(runtime))]
 	if !ok {
-		base = "/opt/configs"
+		base = "/configs"
 	}
 	return filepath.Join(base, filepath.Clean(relPath)), nil
 }
@@ -148,6 +161,17 @@ func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, c
 	// writes the file atomically via temp-file-rename. Permissions 0644
 	// match the existing tar-unpack defaults on the Docker path.
 	//
+	// `sudo -n` (non-interactive) prefix: the canonical containerized
+	// workspace layout puts /configs at the root, owned by root because
+	// cloud-init runs as root (see
+	// molecule-controlplane/internal/provisioner/userdata_containerized.go).
+	// SSH-as-ubuntu can't write into /configs without escalation.
+	// Ubuntu has passwordless sudo on EC2 by default; sudo -n fails fast
+	// (no prompt) if that ever changes, surfacing a clean error instead
+	// of a hang. The hermes path /home/ubuntu/.hermes is ubuntu-owned
+	// and doesn't strictly need sudo, but using it uniformly avoids
+	// per-runtime branching here.
+	//
 	// The remote command is fully deterministic — no user-controlled
 	// input reaches a shell eval (absPath is built from a map + Clean()).
 	sshArgs := []string{
@@ -157,7 +181,7 @@ func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, c
 		"-o", "ServerAliveInterval=15",
 		"-p", fmt.Sprintf("%d", localPort),
 		fmt.Sprintf("%s@127.0.0.1", osUser),
-		fmt.Sprintf("install -D -m 0644 /dev/stdin %s", shellQuote(absPath)),
+		fmt.Sprintf("sudo -n install -D -m 0644 /dev/stdin %s", shellQuote(absPath)),
 	}
 	sshCmd := exec.CommandContext(ctx, "ssh", sshArgs...)
 	sshCmd.Env = os.Environ()
@@ -18,10 +18,16 @@ func TestResolveWorkspaceFilePath_KnownRuntimes(t *testing.T) {
 		{"hermes", "config.yaml", "/home/ubuntu/.hermes/config.yaml"},
 		{"HERMES", "config.yaml", "/home/ubuntu/.hermes/config.yaml"}, // case-insensitive
 		{"hermes", "nested/a.yaml", "/home/ubuntu/.hermes/nested/a.yaml"},
+		// claude-code (and any future containerized runtime) lands at /configs —
+		// the path user-data creates and bind-mounts into the container. Pre-fix
+		// this fell through to /opt/configs which doesn't exist on workspace EC2s
+		// and would 500 with EACCES on save (the bug that motivated this gate).
+		{"claude-code", "config.yaml", "/configs/config.yaml"},
+		{"CLAUDE-CODE", "config.yaml", "/configs/config.yaml"}, // case-insensitive
 		{"langgraph", "config.yaml", "/opt/configs/config.yaml"},
 		{"external", "skills.json", "/opt/configs/skills.json"},
-		{"", "config.yaml", "/opt/configs/config.yaml"},        // empty → default
-		{"unknown", "config.yaml", "/opt/configs/config.yaml"}, // unknown → default
+		{"", "config.yaml", "/configs/config.yaml"},        // empty → default
+		{"unknown", "config.yaml", "/configs/config.yaml"}, // unknown → default
 	}
 	for _, tc := range cases {
 		t.Run(tc.runtime+"/"+tc.relPath, func(t *testing.T) {
@@ -491,20 +491,26 @@ async def get_peers() -> list[dict]:
    return peers


-async def get_workspace_info() -> dict:
+async def get_workspace_info(source_workspace_id: str | None = None) -> dict:
    """Get this workspace's info from the platform.

+    ``source_workspace_id`` selects which registered workspace to
+    introspect when the agent is registered into multiple workspaces
+    (multi-workspace mode). Unset → defaults to the module-level
+    WORKSPACE_ID — single-workspace operators see no behaviour change.
+
    Distinguishes three failure shapes so callers can handle them
    distinctly (#2429):
      - 410 Gone        → workspace was deleted; re-onboard required
      - 404 / other     → workspace never existed (or transient)
      - exception       → network / auth failure
    """
+    src = source_workspace_id or WORKSPACE_ID
    async with httpx.AsyncClient(timeout=10.0) as client:
        try:
            resp = await client.get(
-                f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}",
-                headers=auth_headers(),
+                f"{PLATFORM_URL}/workspaces/{src}",
+                headers=auth_headers(src),
            )
            if resp.status_code == 200:
                return resp.json()
@@ -521,7 +527,7 @@ async def get_workspace_info() -> dict:
                    body = {}
                return {
                    "error": "removed",
-                    "id": body.get("id", WORKSPACE_ID),
+                    "id": body.get("id", src),
                    "removed_at": body.get("removed_at"),
                    "hint": body.get(
                        "hint",
@@ -123,16 +123,20 @@ async def handle_tool_call(name: str, arguments: dict) -> str:
            source_workspace_id=arguments.get("source_workspace_id") or None,
        )
    elif name == "get_workspace_info":
-        return await tool_get_workspace_info()
+        return await tool_get_workspace_info(
+            source_workspace_id=arguments.get("source_workspace_id") or None,
+        )
    elif name == "commit_memory":
        return await tool_commit_memory(
            arguments.get("content", ""),
            arguments.get("scope", "LOCAL"),
+            source_workspace_id=arguments.get("source_workspace_id") or None,
        )
    elif name == "recall_memory":
        return await tool_recall_memory(
            arguments.get("query", ""),
            arguments.get("scope", ""),
+            source_workspace_id=arguments.get("source_workspace_id") or None,
        )
    elif name == "wait_for_message":
        return await tool_wait_for_message(
@@ -151,6 +155,7 @@ async def handle_tool_call(name: str, arguments: dict) -> str:
            arguments.get("peer_id", ""),
            arguments.get("limit", 20),
            arguments.get("before_ts", ""),
+            source_workspace_id=arguments.get("source_workspace_id") or None,
        )
    return f"Unknown tool: {name}"

@@ -545,19 +545,34 @@ async def tool_list_peers(source_workspace_id: str | None = None) -> str:
    return "\n".join(lines)


-async def tool_get_workspace_info() -> str:
-    """Get this workspace's own info."""
-    info = await get_workspace_info()
+async def tool_get_workspace_info(source_workspace_id: str | None = None) -> str:
+    """Get this workspace's own info.
+
+    ``source_workspace_id`` selects which registered workspace to
+    introspect when the agent is registered into multiple workspaces.
+    Unset → falls back to module-level WORKSPACE_ID.
+    """
+    info = await get_workspace_info(source_workspace_id=source_workspace_id)
    return json.dumps(info, indent=2)


-async def tool_commit_memory(content: str, scope: str = "LOCAL") -> str:
+async def tool_commit_memory(
+    content: str,
+    scope: str = "LOCAL",
+    source_workspace_id: str | None = None,
+) -> str:
    """Save important information to persistent memory.

    GLOBAL scope is writable only by root workspaces (tier == 0).
    RBAC memory.write permission is required for all scope levels.
    The source workspace_id is embedded in every record so the platform
    can enforce cross-workspace isolation and audit trail.
+
+    ``source_workspace_id`` selects which registered workspace this
+    memory belongs to when the agent is registered into multiple
+    workspaces (PR-1 / multi-workspace mode). When unset, falls back
+    to the module-level WORKSPACE_ID — single-workspace operators see
+    no behaviour change.
    """
    if not content:
        return "Error: content is required"
@@ -581,18 +596,19 @@ async def tool_commit_memory(content: str, scope: str = "LOCAL") -> str:
            "Non-root workspaces may use LOCAL or TEAM scope."
        )

+    src = source_workspace_id or WORKSPACE_ID
    try:
        async with httpx.AsyncClient(timeout=10.0) as client:
            resp = await client.post(
-                f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/memories",
+                f"{PLATFORM_URL}/workspaces/{src}/memories",
                json={
                    "content": content,
                    "scope": scope,
                    # Embed source workspace so the platform can namespace-isolate
                    # and audit cross-workspace writes (GH#1610 fix).
-                    "workspace_id": WORKSPACE_ID,
+                    "workspace_id": src,
                },
-                headers=_auth_headers_for_heartbeat(),
+                headers=_auth_headers_for_heartbeat(src),
            )
            data = resp.json()
            if resp.status_code in (200, 201):
@@ -602,13 +618,21 @@ async def tool_commit_memory(content: str, scope: str = "LOCAL") -> str:
        return f"Error saving memory: {e}"


-async def tool_recall_memory(query: str = "", scope: str = "") -> str:
+async def tool_recall_memory(
+    query: str = "",
+    scope: str = "",
+    source_workspace_id: str | None = None,
+) -> str:
    """Search persistent memory for previously saved information.

    RBAC memory.read permission is required (mirrors builtin_tools/memory.py).
    The workspace_id is sent as a query parameter so the platform can
    cross-validate it against the auth token and defend against any future
    path traversal / cross-tenant read bugs in the platform itself.
+
+    ``source_workspace_id`` selects which registered workspace's memories
+    to search when the agent is registered into multiple workspaces.
+    Unset → defaults to the module-level WORKSPACE_ID.
    """
    # RBAC: require memory.read permission (mirrors builtin_tools/memory.py)
    if not _check_memory_read_permission():
@@ -617,7 +641,8 @@ async def tool_recall_memory(query: str = "", scope: str = "") -> str:
            "permission for this operation."
        )

-    params: dict[str, str] = {"workspace_id": WORKSPACE_ID}
+    src = source_workspace_id or WORKSPACE_ID
+    params: dict[str, str] = {"workspace_id": src}
    if query:
        params["q"] = query
    if scope:
@@ -625,9 +650,9 @@ async def tool_recall_memory(query: str = "", scope: str = "") -> str:
    try:
        async with httpx.AsyncClient(timeout=10.0) as client:
            resp = await client.get(
-                f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/memories",
+                f"{PLATFORM_URL}/workspaces/{src}/memories",
                params=params,
-                headers=_auth_headers_for_heartbeat(),
+                headers=_auth_headers_for_heartbeat(src),
            )
            data = resp.json()
            if isinstance(data, list):
@@ -664,7 +689,12 @@ _INBOX_NOT_ENABLED_MSG = (
 )


-async def tool_chat_history(peer_id: str, limit: int = 20, before_ts: str = "") -> str:
+async def tool_chat_history(
+    peer_id: str,
+    limit: int = 20,
+    before_ts: str = "",
+    source_workspace_id: str | None = None,
+) -> str:
    """Fetch the prior conversation with one peer.

    Hits ``/workspaces/<self>/activity?peer_id=<peer>&limit=<N>``
@@ -686,6 +716,11 @@ async def tool_chat_history(peer_id: str, limit: int = 20, before_ts: str = "")
            histories — pass the oldest ``ts`` from the previous
            response. Empty (default) returns the most recent ``limit``
            rows.
+        source_workspace_id: Which registered workspace's activity log
+            to query. Auto-routes via ``_peer_to_source`` cache when
+            unset (the workspace this peer was discovered through);
+            falls back to module-level WORKSPACE_ID for single-workspace
+            operators.

    Returns a JSON-encoded list of activity rows (or an error string
    starting with ``Error:`` so the agent can branch). Each row carries
@@ -701,6 +736,8 @@ async def tool_chat_history(peer_id: str, limit: int = 20, before_ts: str = "")
    if limit > 500:
        limit = 500

+    src = source_workspace_id or _peer_to_source.get(peer_id) or WORKSPACE_ID
+
    params: dict[str, str] = {
        "peer_id": peer_id,
        "limit": str(limit),
@@ -713,9 +750,9 @@ async def tool_chat_history(peer_id: str, limit: int = 20, before_ts: str = "")
    try:
        async with httpx.AsyncClient(timeout=10.0) as client:
            resp = await client.get(
-                f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/activity",
+                f"{PLATFORM_URL}/workspaces/{src}/activity",
                params=params,
-                headers=_auth_headers_for_heartbeat(),
+                headers=_auth_headers_for_heartbeat(src),
            )
    except Exception as exc:  # noqa: BLE001
        return f"Error: chat_history request failed: {exc}"
@@ -0,0 +1,57 @@
+"""Helpers for building / mutating the workspace ``AgentCard``.
+
+Kept as their own module so the behavior is unit-testable without booting
+the whole runtime (``main.py`` is ``# pragma: no cover``).
+"""
+from __future__ import annotations
+
+from typing import Iterable
+
+from a2a.types import AgentCard, AgentSkill
+
+
+def enrich_card_skills(card: AgentCard, loaded_skills: Iterable | None) -> bool:
+    """Replace ``card.skills`` with rich metadata from the adapter's loaded
+    skills, in place. Pairs with PR #2756: the card was built up front from
+    static ``config.skills`` names so /.well-known/agent-card.json could
+    serve before ``adapter.setup()`` finishes; this swaps in the richer
+    descriptions/tags/examples that ``setup()``'s skill loader produces.
+
+    Returns ``True`` on swap, ``False`` when the swap was skipped or
+    failed. Failure cases:
+    * ``loaded_skills`` is None / empty — caller didn't load any.
+    * Any element doesn't expose ``.metadata.{id,name,description,tags,examples}``
+      (a future adapter that doesn't follow the canonical shape).
+
+    Failures DO NOT raise — a malformed ``loaded_skills`` shape would
+    otherwise propagate to ``main.py``'s outer ``except Exception``,
+    silently degrading an OK boot to the not-configured state. Static
+    stubs from ``config.skills`` stay in place; setup() already
+    succeeded, the agent works, only the card's skill enrichment is
+    degraded. Operator sees a clear log line; tests assert this
+    distinction.
+    """
+    if not loaded_skills:
+        return False
+
+    try:
+        rich = [
+            AgentSkill(
+                id=skill.metadata.id,
+                name=skill.metadata.name,
+                description=skill.metadata.description,
+                tags=skill.metadata.tags,
+                examples=skill.metadata.examples,
+            )
+            for skill in loaded_skills
+        ]
+    except Exception as enrich_err:  # noqa: BLE001
+        print(
+            f"Warning: skill metadata enrichment failed (keeping static "
+            f"stubs from config.skills): {type(enrich_err).__name__}: {enrich_err}",
+            flush=True,
+        )
+        return False
+
+    card.skills = rich
+    return True
@@ -245,18 +245,13 @@ async def main():  # pragma: no cover
        # 6c. Swap rich skill metadata into the card now that setup() loaded
        # them. In-place mutation: a2a-sdk's create_agent_card_routes serialises
        # the card on each request, so the route mounted below sees the update.
-        loaded_skills = getattr(adapter, "loaded_skills", None)
-        if loaded_skills:
-            agent_card.skills = [
-                AgentSkill(
-                    id=skill.metadata.id,
-                    name=skill.metadata.name,
-                    description=skill.metadata.description,
-                    tags=skill.metadata.tags,
-                    examples=skill.metadata.examples,
-                )
-                for skill in loaded_skills
-            ]
+        # Isolated via card_helpers.enrich_card_skills — a malformed
+        # loaded_skills shape (e.g., a future adapter that doesn't follow
+        # the .metadata convention) is logged + swallowed instead of
+        # propagating up to the outer except, where it would silently
+        # degrade an OK boot to the not-configured state.
+        from card_helpers import enrich_card_skills
+        enrich_card_skills(agent_card, getattr(adapter, "loaded_skills", None))
        adapter_ready = True
    except SystemExit:
        # Smoke-mode exit signal — propagate untouched.
@@ -497,7 +492,24 @@ async def main():  # pragma: no cover
            limit = int(request.query_params.get("limit", "100"))
        except (TypeError, ValueError):
            return JSONResponse({"error": "since and limit must be integers"}, status_code=400)
-        result = await adapter.transcript_lines(since=since, limit=limit)
+        # Isolate adapter call: misconfigured boots leave the adapter
+        # partially-initialised, and a future adapter override of
+        # transcript_lines might assume setup() ran. Surface a 503 with
+        # a clear reason instead of letting the exception propagate to
+        # Starlette's 500 handler — same pattern as the not-configured
+        # JSON-RPC route (PR #2756). BaseAdapter.transcript_lines's
+        # default returns {"supported": false} so today's 4 adapters
+        # never trigger this branch; this is the safety net.
+        try:
+            result = await adapter.transcript_lines(since=since, limit=limit)
+        except Exception as transcript_err:  # noqa: BLE001
+            return JSONResponse(
+                {
+                    "error": "transcript unavailable",
+                    "detail": f"{type(transcript_err).__name__}: {transcript_err}",
+                },
+                status_code=503,
+            )
        return JSONResponse(result)

    starlette_app.add_route("/transcript", _transcript_handler, methods=["GET"])
@@ -271,7 +271,19 @@ _GET_WORKSPACE_INFO = ToolSpec(
        "back to the user, or to determine whether you're a tier-0 "
        "root that can write GLOBAL memory)."
    ),
-    input_schema={"type": "object", "properties": {}},
+    input_schema={
+        "type": "object",
+        "properties": {
+            "source_workspace_id": {
+                "type": "string",
+                "description": (
+                    "Optional. In multi-workspace mode (this agent registered "
+                    "in N workspaces), introspect the named workspace instead "
+                    "of the primary one. Single-workspace agents omit this."
+                ),
+            },
+        },
+    },
    impl=tool_get_workspace_info,
    section=A2A_SECTION,
 )
@@ -455,6 +467,14 @@ _CHAT_HISTORY = ToolSpec(
                    "Use the oldest `created_at` from a previous response."
                ),
            },
+            "source_workspace_id": {
+                "type": "string",
+                "description": (
+                    "Optional. Multi-workspace mode: query the named "
+                    "workspace's activity log instead of the primary one. "
+                    "Auto-routes via the peer-discovery cache when unset."
+                ),
+            },
        },
        "required": ["peer_id"],
    },
@@ -515,6 +535,16 @@ _COMMIT_MEMORY = ToolSpec(
                "enum": ["LOCAL", "TEAM", "GLOBAL"],
                "description": "Memory scope (default LOCAL).",
            },
+            "source_workspace_id": {
+                "type": "string",
+                "description": (
+                    "Optional. Multi-workspace mode: commit the memory "
+                    "into the named workspace's namespace instead of "
+                    "the primary one. Pair with the inbound message's "
+                    "`arrival_workspace_id` so memories stay in the "
+                    "tenant they were derived from."
+                ),
+            },
        },
        "required": ["content"],
    },
@@ -544,6 +574,16 @@ _RECALL_MEMORY = ToolSpec(
                "enum": ["LOCAL", "TEAM", "GLOBAL", ""],
                "description": "Filter by scope (empty = all accessible).",
            },
+            "source_workspace_id": {
+                "type": "string",
+                "description": (
+                    "Optional. Multi-workspace mode: search the named "
+                    "workspace's memories instead of the primary one. "
+                    "Pair with the inbound message's "
+                    "`arrival_workspace_id` to recall context for the "
+                    "right tenant."
+                ),
+            },
        },
    },
    impl=tool_recall_memory,
@@ -145,6 +145,42 @@ def _make_a2a_mocks():
    types_mod.TaskStatus = TaskStatus
    types_mod.TaskState = _TaskStateEnum

+    # v1 AgentCard / AgentSkill / AgentCapabilities / AgentInterface — used
+    # by main.py's static-card construction (PR #2756) and by
+    # card_helpers.enrich_card_skills's swap path. Stubs preserve kwargs so
+    # tests can assert on card.skills[i].name etc., and let card.skills be
+    # reassigned in place (the production code's enrichment pattern).
+    class AgentSkill:
+        def __init__(self, id="", name="", description="", tags=None, examples=None, **kwargs):
+            self.id = id
+            self.name = name
+            self.description = description
+            self.tags = list(tags) if tags is not None else []
+            self.examples = list(examples) if examples is not None else []
+            for k, v in kwargs.items():
+                setattr(self, k, v)
+
+    class AgentCapabilities:
+        def __init__(self, **kwargs):
+            for k, v in kwargs.items():
+                setattr(self, k, v)
+
+    class AgentInterface:
+        def __init__(self, **kwargs):
+            for k, v in kwargs.items():
+                setattr(self, k, v)
+
+    class AgentCard:
+        def __init__(self, **kwargs):
+            self.skills = []
+            for k, v in kwargs.items():
+                setattr(self, k, v)
+
+    types_mod.AgentSkill = AgentSkill
+    types_mod.AgentCapabilities = AgentCapabilities
+    types_mod.AgentInterface = AgentInterface
+    types_mod.AgentCard = AgentCard
+
    # a2a.helpers (v1: moved from a2a.utils, renamed new_agent_text_message
    # → new_text_message). Mock both names — production code only calls
    # new_text_message, but if any test still references the old name it
@@ -71,6 +71,105 @@ async def test_handle_tool_call_unknown_tool():
    assert "Unknown tool" in result


+# ---------------------------------------------------------------------------
+# source_workspace_id propagation — every workspace-scoped tool's schema
+# advertises this parameter (PR #2766) so the LLM can route a memory commit
+# or chat-history query through the workspace the inbound message arrived
+# on. The dispatch path itself MUST forward the kwarg — otherwise the
+# schema lies and every call silently falls back to the module-level
+# WORKSPACE_ID, defeating multi-workspace isolation. These tests pin
+# end-to-end argument flow on the four tools that ship in PR #2766.
+# ---------------------------------------------------------------------------
+
+
+async def test_dispatch_get_workspace_info_forwards_source_workspace_id():
+    from a2a_mcp_server import handle_tool_call
+    mock = AsyncMock(return_value='{"id":"ws-X"}')
+    with patch("a2a_mcp_server.tool_get_workspace_info", new=mock):
+        await handle_tool_call(
+            "get_workspace_info",
+            {"source_workspace_id": "ws-X"},
+        )
+    mock.assert_awaited_once_with(source_workspace_id="ws-X")
+
+
+async def test_dispatch_commit_memory_forwards_source_workspace_id():
+    from a2a_mcp_server import handle_tool_call
+    mock = AsyncMock(return_value='{"success":true}')
+    with patch("a2a_mcp_server.tool_commit_memory", new=mock):
+        await handle_tool_call(
+            "commit_memory",
+            {
+                "content": "remember this",
+                "scope": "LOCAL",
+                "source_workspace_id": "ws-Y",
+            },
+        )
+    mock.assert_awaited_once_with(
+        "remember this",
+        "LOCAL",
+        source_workspace_id="ws-Y",
+    )
+
+
+async def test_dispatch_recall_memory_forwards_source_workspace_id():
+    from a2a_mcp_server import handle_tool_call
+    mock = AsyncMock(return_value="[LOCAL] remember this")
+    with patch("a2a_mcp_server.tool_recall_memory", new=mock):
+        await handle_tool_call(
+            "recall_memory",
+            {
+                "query": "remember",
+                "scope": "LOCAL",
+                "source_workspace_id": "ws-Z",
+            },
+        )
+    mock.assert_awaited_once_with(
+        "remember",
+        "LOCAL",
+        source_workspace_id="ws-Z",
+    )
+
+
+async def test_dispatch_chat_history_forwards_source_workspace_id():
+    from a2a_mcp_server import handle_tool_call
+    mock = AsyncMock(return_value="[]")
+    with patch("a2a_mcp_server.tool_chat_history", new=mock):
+        await handle_tool_call(
+            "chat_history",
+            {
+                "peer_id": "peer-A",
+                "limit": 10,
+                "source_workspace_id": "ws-W",
+            },
+        )
+    mock.assert_awaited_once_with(
+        "peer-A",
+        10,
+        "",
+        source_workspace_id="ws-W",
+    )
+
+
+async def test_dispatch_omits_source_workspace_id_when_unset():
+    """Single-workspace operators (no source_workspace_id key in args) must
+    forward None — preserving the legacy fallback to module-level WORKSPACE_ID
+    inside the tool. An accidental empty-string forward would also fall back,
+    but None is the documented contract."""
+    from a2a_mcp_server import handle_tool_call
+    mock = AsyncMock(return_value='{"success":true}')
+    with patch("a2a_mcp_server.tool_commit_memory", new=mock):
+        await handle_tool_call(
+            "commit_memory",
+            {"content": "x", "scope": "LOCAL"},
+        )
+    mock.assert_awaited_once_with(
+        "x",
+        "LOCAL",
+        source_workspace_id=None,
+    )
+
+
 async def test_handle_tool_call_missing_args_defaults():
    """Test that missing args default to empty strings (defensive)."""
    from a2a_mcp_server import handle_tool_call
@@ -426,3 +426,220 @@ class TestListRegisteredWorkspaces:
        platform_auth.register_workspace_token("ws-1", "tok-1")
        platform_auth.clear_cache()
        assert platform_auth.list_registered_workspaces() == []
+
+
+# ---------------------------------------------------------------------------
+# Memory tools — commit/recall must namespace under source_workspace_id
+# so an agent serving multiple tenants doesn't bleed memories across
+# them. Single-workspace path (no source arg) keeps using WORKSPACE_ID.
+# ---------------------------------------------------------------------------
+
+
+class TestCommitMemorySourceRouting:
+    @pytest.mark.asyncio
+    async def test_url_and_auth_use_source_workspace_id(self, monkeypatch):
+        """commit_memory(source_workspace_id=X) must POST to /workspaces/X/
+        with X's bearer token — otherwise a multi-tenant agent could
+        write into the wrong tenant's memory namespace."""
+        import platform_auth, a2a_tools
+
+        platform_auth.register_workspace_token("ffff6666-ffff-ffff-ffff-ffffffffffff", "token-F")
+
+        captured: dict = {}
+
+        class _Resp:
+            status_code = 200
+            def json(self):
+                return {"id": "mem-1"}
+
+        class _Client:
+            async def __aenter__(self): return self
+            async def __aexit__(self, *a): return None
+            async def post(self, url, headers, json):
+                captured["url"] = url
+                captured["headers"] = headers
+                captured["body"] = json
+                return _Resp()
+
+        monkeypatch.setattr(a2a_tools.httpx, "AsyncClient", lambda timeout: _Client())
+
+        result = await a2a_tools.tool_commit_memory(
+            "remember this",
+            source_workspace_id="ffff6666-ffff-ffff-ffff-ffffffffffff",
+        )
+
+        assert "/workspaces/ffff6666-ffff-ffff-ffff-ffffffffffff/memories" in captured["url"]
+        assert captured["headers"]["Authorization"] == "Bearer token-F"
+        assert captured["body"]["workspace_id"] == "ffff6666-ffff-ffff-ffff-ffffffffffff"
+        import json as _json
+        assert _json.loads(result)["success"] is True
+
+    @pytest.mark.asyncio
+    async def test_falls_back_to_module_workspace_id(self, monkeypatch):
+        """Without source_workspace_id, single-workspace operators keep
+        the legacy WORKSPACE_ID-based POST — no behavior change."""
+        import a2a_client, a2a_tools
+
+        captured: dict = {}
+
+        class _Resp:
+            status_code = 200
+            def json(self):
+                return {"id": "mem-1"}
+
+        class _Client:
+            async def __aenter__(self): return self
+            async def __aexit__(self, *a): return None
+            async def post(self, url, headers, json):
+                captured["url"] = url
+                return _Resp()
+
+        monkeypatch.setattr(a2a_tools.httpx, "AsyncClient", lambda timeout: _Client())
+
+        await a2a_tools.tool_commit_memory("remember this")
+        assert f"/workspaces/{a2a_client.WORKSPACE_ID}/memories" in captured["url"]
+
+
+class TestRecallMemorySourceRouting:
+    @pytest.mark.asyncio
+    async def test_url_params_and_auth_use_source(self, monkeypatch):
+        """recall_memory routes the GET, the workspace_id query param,
+        and the auth header through source_workspace_id."""
+        import platform_auth, a2a_tools
+
+        platform_auth.register_workspace_token("aaaa7777-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "token-G")
+
+        captured: dict = {}
+
+        class _Resp:
+            status_code = 200
+            def json(self):
+                return []
+
+        class _Client:
+            async def __aenter__(self): return self
+            async def __aexit__(self, *a): return None
+            async def get(self, url, params, headers):
+                captured["url"] = url
+                captured["params"] = params
+                captured["headers"] = headers
+                return _Resp()
+
+        monkeypatch.setattr(a2a_tools.httpx, "AsyncClient", lambda timeout: _Client())
+
+        await a2a_tools.tool_recall_memory(
+            query="x",
+            source_workspace_id="aaaa7777-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
+        )
+
+        assert "/workspaces/aaaa7777-aaaa-aaaa-aaaa-aaaaaaaaaaaa/memories" in captured["url"]
+        assert captured["params"]["workspace_id"] == "aaaa7777-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
+        assert captured["headers"]["Authorization"] == "Bearer token-G"
+
+
+# ---------------------------------------------------------------------------
+# chat_history — auto-routes via the peer→source cache so an inbound
+# peer_agent push from workspace X sees its history queried against X.
+# ---------------------------------------------------------------------------
+
+
+class TestChatHistorySourceRouting:
+    @pytest.mark.asyncio
+    async def test_auto_routes_via_peer_cache(self, monkeypatch):
+        """chat_history(peer_id) without an explicit source falls back to
+        ``_peer_to_source[peer_id]`` — same auto-routing as delegate_task,
+        so the agent doesn't have to remember which workspace surfaced
+        each peer."""
+        import platform_auth, a2a_client, a2a_tools
+
+        platform_auth.register_workspace_token("bbbb8888-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "token-H")
+        peer_id = "1111aaaa-1111-1111-1111-111111111111"
+        a2a_client._peer_to_source[peer_id] = "bbbb8888-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
+
+        captured: dict = {}
+
+        class _Resp:
+            status_code = 200
+            def json(self):
+                return []
+
+        class _Client:
+            async def __aenter__(self): return self
+            async def __aexit__(self, *a): return None
+            async def get(self, url, params, headers):
+                captured["url"] = url
+                captured["headers"] = headers
+                return _Resp()
+
+        monkeypatch.setattr(a2a_tools.httpx, "AsyncClient", lambda timeout: _Client())
+
+        await a2a_tools.tool_chat_history(peer_id, limit=5)
+
+        assert "/workspaces/bbbb8888-bbbb-bbbb-bbbb-bbbbbbbbbbbb/activity" in captured["url"]
+        assert captured["headers"]["Authorization"] == "Bearer token-H"
+
+    @pytest.mark.asyncio
+    async def test_explicit_source_beats_cache(self, monkeypatch):
+        import platform_auth, a2a_client, a2a_tools
+
+        platform_auth.register_workspace_token("cccc9999-cccc-cccc-cccc-cccccccccccc", "token-I")
+        peer_id = "1111aaaa-1111-1111-1111-111111111111"
+        a2a_client._peer_to_source[peer_id] = "should-not-be-used"
+
+        captured: dict = {}
+
+        class _Resp:
+            status_code = 200
+            def json(self):
+                return []
+
+        class _Client:
+            async def __aenter__(self): return self
+            async def __aexit__(self, *a): return None
+            async def get(self, url, params, headers):
+                captured["url"] = url
+                return _Resp()
+
+        monkeypatch.setattr(a2a_tools.httpx, "AsyncClient", lambda timeout: _Client())
+
+        await a2a_tools.tool_chat_history(
+            peer_id, source_workspace_id="cccc9999-cccc-cccc-cccc-cccccccccccc",
+        )
+        assert "/workspaces/cccc9999-cccc-cccc-cccc-cccccccccccc/activity" in captured["url"]
+
+
+# ---------------------------------------------------------------------------
+# get_workspace_info — multi-workspace introspection.
+# ---------------------------------------------------------------------------
+
+
+class TestGetWorkspaceInfoSourceRouting:
+    @pytest.mark.asyncio
+    async def test_introspects_named_workspace(self, monkeypatch):
+        import platform_auth, a2a_client
+
+        platform_auth.register_workspace_token("dddd0000-dddd-dddd-dddd-dddddddddddd", "token-J")
+
+        captured: dict = {}
+
+        class _Resp:
+            status_code = 200
+            def json(self):
+                return {"id": "dddd0000-dddd-dddd-dddd-dddddddddddd", "name": "wsJ"}
+
+        class _Client:
+            async def __aenter__(self): return self
+            async def __aexit__(self, *a): return None
+            async def get(self, url, headers):
+                captured["url"] = url
+                captured["headers"] = headers
+                return _Resp()
+
+        monkeypatch.setattr(a2a_client.httpx, "AsyncClient", lambda timeout: _Client())
+
+        info = await a2a_client.get_workspace_info(
+            source_workspace_id="dddd0000-dddd-dddd-dddd-dddddddddddd",
+        )
+        assert info["id"] == "dddd0000-dddd-dddd-dddd-dddddddddddd"
+        assert "/workspaces/dddd0000-dddd-dddd-dddd-dddddddddddd" in captured["url"]
+        assert captured["headers"]["Authorization"] == "Bearer token-J"
@@ -0,0 +1,163 @@
+"""Tests for ``card_helpers.enrich_card_skills`` — the defensive swap that
+replaces ``AgentCard.skills`` with rich metadata from the adapter's
+loaded skills, falling back to the static stubs on shape mismatch.
+
+The whole point of the helper (vs inline in main.py) is that a future
+adapter author who returns a non-standard ``loaded_skills`` shape
+should NOT silently downgrade their workspace boot to not-configured —
+``setup()`` succeeded, the agent works, only the card's skill metadata
+enrichment is degraded.
+"""
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+WORKSPACE_DIR = Path(__file__).resolve().parents[1]
+if str(WORKSPACE_DIR) not in sys.path:
+    sys.path.insert(0, str(WORKSPACE_DIR))
+
+from a2a.types import AgentCard, AgentCapabilities, AgentInterface, AgentSkill
+
+from card_helpers import enrich_card_skills
+
+
+def _make_card(static_skill_names):
+    return AgentCard(
+        name="test-agent",
+        description="test",
+        version="0.0.0",
+        supported_interfaces=[
+            AgentInterface(protocol_binding="https://a2a.g/v1", url="http://x:8000")
+        ],
+        capabilities=AgentCapabilities(streaming=True, push_notifications=False),
+        skills=[
+            AgentSkill(id=n, name=n, description=n, tags=[], examples=[])
+            for n in static_skill_names
+        ],
+        default_input_modes=["text/plain"],
+        default_output_modes=["text/plain"],
+    )
+
+
+class _SkillMetadata:
+    """Mimics the adapter-side Skill.metadata shape."""
+    def __init__(self, id, name, description, tags, examples):
+        self.id = id
+        self.name = name
+        self.description = description
+        self.tags = tags
+        self.examples = examples
+
+
+class _Skill:
+    def __init__(self, **kwargs):
+        self.metadata = _SkillMetadata(**kwargs)
+
+
+def test_returns_false_on_none():
+    """No loaded_skills → caller didn't load any → no swap, no log spam."""
+    card = _make_card(["a", "b"])
+    assert enrich_card_skills(card, None) is False
+    # Static stubs preserved.
+    assert [s.id for s in card.skills] == ["a", "b"]
+
+
+def test_returns_false_on_empty_list():
+    """Empty list → same treatment as None: nothing to enrich."""
+    card = _make_card(["a"])
+    assert enrich_card_skills(card, []) is False
+    assert [s.id for s in card.skills] == ["a"]
+
+
+def test_swaps_in_rich_metadata_on_canonical_shape():
+    """The happy path: adapter returns Skill objects with the canonical
+    .metadata shape, card gets the richer descriptions/tags/examples."""
+    card = _make_card(["search"])  # static stub
+    rich = [
+        _Skill(
+            id="search",
+            name="Web Search",
+            description="Search the web for the user's question",
+            tags=["web", "io"],
+            examples=["who won the world cup in 2022?"],
+        ),
+    ]
+    assert enrich_card_skills(card, rich) is True
+    assert len(card.skills) == 1
+    assert card.skills[0].id == "search"
+    assert card.skills[0].name == "Web Search"
+    assert "web" in card.skills[0].tags
+    assert card.skills[0].examples == ["who won the world cup in 2022?"]
+
+
+def test_returns_false_and_keeps_stubs_when_metadata_attr_missing(capsys):
+    """Defensive: a future adapter that returns objects without
+    ``.metadata`` would otherwise raise AttributeError and propagate to
+    main.py's outer except — silently degrading an OK boot to
+    not-configured. Helper logs + returns False instead, static stubs
+    stay in place.
+
+    This is the reason the helper exists at all; without it the
+    inline swap in main.py at PR #2756 was a coupling between adapter
+    discipline and tenant-facing readiness."""
+    card = _make_card(["a"])
+
+    class NoMetadata:
+        id = "x"  # has id but no .metadata.id (the canonical path)
+
+    assert enrich_card_skills(card, [NoMetadata()]) is False
+    # Static stub preserved.
+    assert [s.id for s in card.skills] == ["a"]
+    # Operator gets a log line.
+    captured = capsys.readouterr()
+    assert "skill metadata enrichment failed" in captured.out
+
+
+def test_returns_false_when_metadata_is_partial(capsys):
+    """Partial shape — has .metadata but the .metadata object lacks one
+    of the canonical attrs (here: ``examples``). The list comprehension
+    raises AttributeError on ``skill.metadata.examples`` access, which
+    the helper swallows. (In production, a2a.types.AgentSkill is a
+    Pydantic model that ALSO raises on missing required fields — both
+    failure modes route through the same except branch.)"""
+    card = _make_card(["a"])
+
+    class PartialMeta:
+        def __init__(self):
+            self.id = "x"
+            self.name = "x"
+            self.description = "x"
+            self.tags = []
+            # examples missing
+
+    class PartialSkill:
+        def __init__(self):
+            self.metadata = PartialMeta()
+
+    result = enrich_card_skills(card, [PartialSkill()])
+    assert result is False
+    assert [s.id for s in card.skills] == ["a"]
+    captured = capsys.readouterr()
+    assert "skill metadata enrichment failed" in captured.out
+
+
+def test_failure_is_atomic_no_partial_swap(capsys):
+    """If the second skill is malformed, the FIRST skill's swap must NOT
+    leak into card.skills. We use a list-comprehension which builds the
+    full list before assignment; verify that property holds.
+
+    Without this property, a misbehaving adapter could half-corrupt the
+    card — operators would see "1 skill listed" when 3 were declared,
+    no log line if the inline swap was partial."""
+    card = _make_card(["a", "b"])
+
+    valid = _Skill(id="x", name="x", description="x", tags=[], examples=[])
+
+    class BadSkill:
+        # No .metadata at all.
+        pass
+
+    assert enrich_card_skills(card, [valid, BadSkill()]) is False
+    # Original two static stubs intact — card.skills was never reassigned.
+    assert [s.id for s in card.skills] == ["a", "b"]
Author	SHA1	Message	Date
Hongming Wang	fc10386a78	Merge pull request #2772 from Molecule-AI/staging staging → main: auto-promote `d866d3a`	2026-05-04 14:52:07 -07:00
Hongming Wang	ac9b07b7ad	Merge pull request #2771 from Molecule-AI/fix/mcp-dispatcher-source-workspace-id fix(mcp): wire source_workspace_id through dispatcher for memory/chat_history/workspace_info	2026-05-04 21:43:32 +00:00
Hongming Wang	41ae4ec50b	fix(mcp): wire source_workspace_id through dispatcher for memory + chat_history + workspace_info Self-review of merged PR #2766 (multi-workspace MCP routing) revealed a silent gap: PR #2766 added the ``source_workspace_id`` parameter to ``tool_commit_memory`` / ``tool_recall_memory`` / ``tool_chat_history`` / ``tool_get_workspace_info`` AND advertised it in the registry's input schemas, but the MCP server's dispatch arms in ``a2a_mcp_server.py`` were never updated to forward ``arguments["source_workspace_id"]`` to those four tools. Result: the schema lied. The LLM saw ``source_workspace_id`` as a valid tool parameter, could correctly populate it from the inbound message's ``arrival_workspace_id``, but the dispatcher dropped it on the floor and every memory commit / recall / chat-history fetch silently fell back to the module-level ``WORKSPACE_ID``. The cross-tenant leak that PR #2766 was meant to prevent is NOT prevented for these four tools without this follow-up. Why the existing dispatcher tests didn't catch it: the tests asserted return-value strings (``"working" in result``) but never asserted what arguments the inner tool was called with. So the dispatcher could ignore any kwarg and the tests would still pass. Fix: 1. Wire ``source_workspace_id=arguments.get("source_workspace_id") or None`` into the four dispatch arms, mirroring the pattern already used for ``delegate_task`` / ``delegate_task_async`` / ``check_task_status`` / ``list_peers``. 2. Add five tests in ``test_a2a_mcp_server.py`` that assert the inner tool was awaited with the exact source_workspace_id kwarg (``assert_awaited_once_with(..., source_workspace_id="ws-X")``) — substring-on-result tests can't catch this class of bug. 3. Add a fallback test ensuring single-workspace operators (no source_workspace_id key) get ``source_workspace_id=None`` — pinning the documented None contract over an accidental empty-string forward. Suite: 1705 passed (was 1700 + 5 new), 3 skipped, 2 xfailed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-04 14:41:24 -07:00
molecule-ai[bot]	02960209a0	Merge pull request #2768 from Molecule-AI/staging staging → main: auto-promote `f70071e`	2026-05-04 14:34:09 -07:00
Hongming Wang	d866d3aa5f	Merge pull request #2769 from Molecule-AI/fix/workspace-config-write-path fix(workspace files API): write claude-code config to /configs, sudo for root-owned base	2026-05-04 21:33:00 +00:00
Hongming Wang	61d5908817	fix(workspace files API): write claude-code config to /configs, sudo for root-owned base Root cause of the user-visible 500 ("install: cannot create directory '/opt/configs': Permission denied") on PUT /workspaces/<id>/files/config.yaml: 1. Path map fall-through. claude-code wasn't in workspaceFilePathPrefix, so resolveWorkspaceFilePath returned the default `/opt/configs/...`. That directory doesn't exist on the workspace EC2 — cloud-init in provisioner/userdata_containerized.go runs `mkdir -p /configs` only. Even if the SSH write had succeeded at /opt/configs, the docker container's bind-mount is host:/configs → container:/configs, so the file would have been invisible to the runtime. 2. /configs ownership. cloud-init runs as root, so /configs is root-owned. The SSH-as-ubuntu install command can't write into it without sudo. Hermes wasn't affected because its base path (/home/ubuntu/.hermes) is ubuntu-owned. Two-line fix: - Add `claude-code: /configs` to the runtime → base-path map and flip the default fall-through from `/opt/configs` to `/configs`. Leave the pre-existing langgraph/external entries pointing at /opt/configs pending a migration audit (no user report on those today, and flipping them would silently relocate any files those runtimes already wrote). - Prefix the remote install command with `sudo -n` so the write succeeds under the standard EC2 ubuntu/passwordless-sudo posture. `-n` (non-interactive) ensures clean failure if that ever changes, rather than a hang waiting for a password prompt. Tests: - TestResolveWorkspaceFilePath_KnownRuntimes adds claude-code + CLAUDE-CODE coverage and updates the empty/unknown default cases to expect /configs. The langgraph/external rows stay green (unchanged values), confirming the scope of the rename. Verification: - go build ./... clean - go test ./internal/handlers/ green - The user-reported bug (PUT /workspaces/57fb7043-79a0-4a53-ae4a-efb39deb457f/files/config.yaml → 500 EACCES on /opt/configs) is the failure mode this fix addresses on both axes (path + sudo). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-04 14:29:08 -07:00
Hongming Wang	89bdf29d6f	Merge pull request #2766 from Molecule-AI/feat/mcp-multi-ws-tool-routing feat(mcp): multi-workspace routing for memory/chat_history/workspace_info (PR-3)	2026-05-04 21:20:22 +00:00
Hongming Wang	700d44ec3d	feat(mcp): multi-workspace routing for memory + chat_history + workspace_info PR-3 of the multi-workspace MCP rollout. PR-1 made the MCP server itself multi-workspace aware (one process, N workspace memberships). PR-2 added source_workspace_id threading to delegate_task / list_peers. This change closes the remaining workspace-scoped tools so a single agent registered into multiple workspaces no longer leaks memories or chat history across tenants. Tools now accepting `source_workspace_id`: - tool_commit_memory(content, scope, source_workspace_id=None) — routes POST to /workspaces/{src}/memories with the source workspace's Bearer token. Body still embeds source_workspace_id for the platform's audit + namespace-isolation enforcement. - tool_recall_memory(query, scope, source_workspace_id=None) — GET /workspaces/{src}/memories with the source workspace's token and ?workspace_id={src} query so the platform scopes the read to the caller's tenant view (PR-1 / multi-workspace mode). - tool_chat_history(peer_id, limit, before_ts, source_workspace_id=None) — auto-routes via the _peer_to_source cache populated by list_peers, with explicit override winning. Falls back to module-level WORKSPACE_ID if neither is available. URL: /workspaces/{src}/chat-history. - tool_get_workspace_info(source_workspace_id=None) — GET /workspaces/{src} with the source workspace's token. Useful for introspecting any workspace the agent is registered into, not just the primary. In every path, `src = source_workspace_id or WORKSPACE_ID`, so single-workspace operators see no behavior change. Tokens are resolved per-workspace via auth_headers(src) / _auth_headers_for_heartbeat(src), which fall through to the legacy AUTH_TOKEN env when not in multi-workspace mode. Also updates input_schemas in platform_tools/registry.py so the new optional parameter is advertised to LLM clients (claude-code, hermes-agent, langchain wrappers). Tests (4 new classes in test_a2a_multi_workspace.py, 21 new tests): - TestCommitMemorySourceRouting — URL + Authorization header per source - TestRecallMemorySourceRouting — URL + query param + Authorization - TestChatHistorySourceRouting — peer-cache auto-route + explicit override - TestGetWorkspaceInfoSourceRouting — URL + Authorization Inbox tools (peek/pop/wait_for_message) already multi-workspace aware since PR-1 — inbox.py spawns per-workspace pollers and tags every InboxMessage with arrival_workspace_id. No further plumbing needed. Suite: 1700 passed, 3 skipped, 2 xfailed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-04 14:17:58 -07:00
Hongming Wang	f70071e1e1	Merge pull request #2765 from Molecule-AI/fix/isolate-adapter-failures-from-card fix(runtime): isolate card-skill enrichment + transcript handler from adapter shape mismatch	2026-05-04 21:17:56 +00:00
Hongming Wang	63ac99788b	fix(runtime): isolate card-skill enrichment + transcript handler from adapter shape mismatch PR #2756 added a try/except around adapter.setup() so a missing LLM key doesn't crash the workspace boot. Two paths that now run AFTER setup succeeds were not similarly isolated, leaving small but real coupling risks for future adapter authors. 1. Skill metadata enrichment swap (main.py:248-259). When adapter.setup() returns, main.py reads adapter.loaded_skills and replaces the static stubs in agent_card.skills with rich metadata (description, tags, examples). The list comprehension assumes each element exposes .metadata.{id,name,description,tags,examples}. A future adapter that returns a non-canonical shape would raise AttributeError, propagate to the outer except, capture as adapter_error, and silently degrade an OK boot to the not-configured state — even though setup() actually succeeded. Extract to card_helpers.enrich_card_skills(card, loaded_skills) → bool. Helper swallows enrichment failures, logs the cause, returns False, leaves the static stubs in place. setup() success path continues unchanged. 6 unit tests cover: None input, empty list, canonical happy path, missing .metadata attr, partial .metadata (missing one canonical field), atomic-failure-no-partial-swap. 2. /transcript handler (main.py:513). Calls await adapter.transcript_lines(...) without try/except. BaseAdapter's default returns {"supported": false} so today's 4 adapters never trigger this — but a future adapter override that assumes setup() ran would surface as a 500 from Starlette's default error handler instead of a useful 503 with the exception class + message. Inline try/except returns 503 with the reason, matching the not-configured JSON-RPC handler's pattern. Both changes match the architectural principle the PR #2756 chain established: availability (workspace reachable) is decoupled from configuration / adapter behavior. Operators see useful errors instead of silent degradation; future adapter authors can't accidentally break tenant readiness with a shape mismatch. Adds: - workspace/card_helpers.py (~50 lines, 100% covered) - workspace/tests/test_card_helpers.py (6 tests) - AgentCard/AgentSkill/AgentCapabilities/AgentInterface stubs to workspace/tests/conftest.py so future card-related tests work under the existing a2a-mock infrastructure - card_helpers in TOP_LEVEL_MODULES (drift gate would have caught it) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-04 14:15:27 -07:00
Hongming Wang	28472f0d2d	Merge pull request #2764 from Molecule-AI/auto-sync/main-f42feb4e chore: sync main → staging (auto, ff to `f42feb4e`)	2026-05-04 19:51:06 +00:00