Compare commits

..

1 Commits

Author SHA1 Message Date
infra-sre 2bb461a56e fix(mcp): scrub err.Error() from JSON-RPC error messages (OFFSEC-001)
Replace all three err.Error() leaks in mcp.go with constant strings,
consistent with the same fix applied to 22 other files in PRs #1193/1206/1219/#168.

- Call handler (line ~329): "parse error: " + err.Error() → "parse error"
- dispatchRPC params unmarshal (line ~417): "invalid params: " + err.Error()
  → "invalid parameters"
- dispatchRPC tool call (line ~422): err.Error() → "tool call failed"
  + log.Printf server-side for forensics

Routes protected by WorkspaceAuth (C1) and MCPRateLimiter (C2) — this is
defence-in-depth per OFFSEC-001 / #259.

Tests added:
- TestMCPHandler_Call_MalformedJSON_ReturnsConstantParseError
- TestMCPHandler_dispatchRPC_InvalidParams_ReturnsConstantMessage
- TestMCPHandler_dispatchRPC_UnknownTool_ReturnsConstantMessage
- TestMCPHandler_dispatchRPC_InvalidParams_ArrayInsteadOfObject

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-10 10:00:38 +00:00
15 changed files with 48 additions and 429 deletions
@@ -23,7 +23,7 @@ name: publish-workspace-server-image
on:
push:
branches: [main]
branches: [staging, main]
paths:
- 'workspace-server/**'
- 'canvas/**'
@@ -32,7 +32,7 @@ name: publish-workspace-server-image
on:
push:
branches: [main]
branches: [staging, main]
paths:
- 'workspace-server/**'
- 'canvas/**'
@@ -100,14 +100,7 @@ export function toYaml(config: ConfigData): string {
if (!o) return;
lines.push(`${k}:`);
Object.entries(o).forEach(([sk, sv]) => {
if (sv === undefined || sv === null || sv === "") return;
if (Array.isArray(sv)) {
// Nested list block: e.g. required_env: [KEY, SECRET]
lines.push(` ${sk}:`);
sv.forEach((v) => lines.push(` - ${v}`));
} else {
lines.push(` ${sk}: ${sv}`);
}
if (sv !== undefined && sv !== null && sv !== "") lines.push(` ${sk}: ${sv}`);
});
};
@@ -128,7 +121,7 @@ export function toYaml(config: ConfigData): string {
if (config.task_budget && config.task_budget > 0) { simple("task_budget", config.task_budget); }
if (config.prompt_files?.length) { lines.push(""); list("prompt_files", config.prompt_files); }
lines.push(""); list("skills", config.skills);
lines.push(""); list("tools", config.tools);
if (config.tools?.length) { list("tools", config.tools); }
lines.push(""); obj("a2a", config.a2a as unknown as Record<string, unknown>);
lines.push(""); obj("delegation", config.delegation as unknown as Record<string, unknown>);
if (config.sandbox?.backend) { lines.push(""); obj("sandbox", config.sandbox as unknown as Record<string, unknown>); }
-22
View File
@@ -269,28 +269,6 @@ Each workspace exposes an A2A server, builds an Agent Card, and registers with t
But the long-term collaboration model remains direct workspace-to-workspace communication via A2A.
## Known Limitations
### Playwright / browser system libs are not installed
The base `molecule-ai-workspace-runtime` image (`workspace/Dockerfile`) is built on `python:3.11-slim` with Node.js 22, git, and `gh` — about 500 MB. It deliberately **does not** include the system libraries Chromium needs (`libnss3`, `libatk-bridge2.0-0`, `libxkbcommon0`, `libcups2`, `libdrm2`, `libxcomposite1`, `libxdamage1`, `libxrandr2`, `libgbm1`, `libpango-1.0-0`, `libasound2`, etc.). Adding them would inflate the image by ~200250 MB (~40%) for every workspace, even though only frontend / QA workspaces ever launch a browser.
Practical consequences:
- `npx playwright test` (and any other Chromium-driven E2E tooling) **will fail at browser launch** when run from inside an in-container workspace agent.
- The error surface is missing-shared-object messages such as `error while loading shared libraries: libnss3.so` or `Host system is missing dependencies to run browsers`.
- Unit and integration tests (Vitest, Jest, etc.) that don't spawn a real browser are unaffected.
Recommended workflow:
1. **Run E2E in CI**, not in-container. The Gitea Actions self-hosted runner (and the GitHub Actions runner used by mirror repos) has the full Playwright dep set installed and is the supported surface for E2E. Push a branch, let CI run the suite.
2. **Local debugging** of a single failing spec is best done on a developer laptop with `npx playwright install-deps` run once.
3. **In-container iteration** on test logic itself is fine — write specs, lint them, type-check them — just don't expect `playwright test` to actually launch a browser.
If a particular workspace role genuinely needs in-container E2E (a dedicated QA template, for instance), the right place to layer Playwright deps is in a **role-specific adapter template image** that does `FROM molecule-ai-workspace-runtime:<tag>` and adds `RUN npx playwright install-deps`. Open a request against `molecule-ai-workspace-runtime` if you need this template stamped.
Tracking issue: [molecule-ai/molecule-app#7](https://git.moleculesai.app/molecule-ai/molecule-app/issues/7).
## Related Docs
- [Agent Runtime Adapters](./cli-runtime.md)
+5 -45
View File
@@ -37,50 +37,6 @@ PLUGINS_DIR="${4:?Missing plugins dir}"
EXPECTED=0
CLONED=0
# clone_one_with_retry — clone a single repo, retrying on transient failure.
#
# Why: the publish-workspace-server-image (and harness-replays) CI jobs
# clone the full manifest (~36 repos) serially on a memory-constrained
# Gitea Actions runner. Under host memory pressure the OOM killer
# occasionally SIGKILLs git-remote-https mid-clone:
#
# error: git-remote-https died of signal 9
# fatal: the remote end hung up unexpectedly
#
# (observed in publish-workspace-server-image run 4622 on 2026-05-10 — the
# job died on the 14th of 36 clones, which wedged staging→main). One
# transient SIGKILL / network blip would otherwise fail the whole tenant
# image rebuild. Retrying after a short backoff lets the pressure subside.
# The durable fix is more runner RAM/swap (tracked with Infra-SRE); this
# just stops a single flake from being release-blocking.
#
# Args: <target_dir> <name> <clone_url> <display_url> <ref>
clone_one_with_retry() {
local tdir="$1" name="$2" url="$3" display="$4" ref="$5"
local attempt=1 max_attempts=3 backoff
while : ; do
# A killed attempt can leave a partial directory behind; git clone
# refuses a non-empty target, so wipe it before each try.
rm -rf "$tdir/$name"
if [ "$ref" = "main" ]; then
if git clone --depth=1 -q "$url" "$tdir/$name"; then return 0; fi
else
if git clone --depth=1 -q --branch "$ref" "$url" "$tdir/$name"; then return 0; fi
fi
if [ "$attempt" -ge "$max_attempts" ]; then
echo "::error::clone failed after ${max_attempts} attempts: ${display}" >&2
return 1
fi
backoff=$((attempt * 3)) # 3s, then 6s
echo " ⚠ clone attempt ${attempt}/${max_attempts} failed for ${display} — retrying in ${backoff}s" >&2
sleep "$backoff"
attempt=$((attempt + 1))
done
}
clone_category() {
local category="$1"
local target_dir="$2"
@@ -126,7 +82,11 @@ clone_category() {
fi
echo " cloning $display_url -> $target_dir/$name (ref=$ref)"
clone_one_with_retry "$target_dir" "$name" "$clone_url" "$display_url" "$ref"
if [ "$ref" = "main" ]; then
git clone --depth=1 -q "$clone_url" "$target_dir/$name"
else
git clone --depth=1 -q --branch "$ref" "$clone_url" "$target_dir/$name"
fi
CLONED=$((CLONED + 1))
i=$((i + 1))
done
@@ -21,7 +21,6 @@ import (
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/envx"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
@@ -111,14 +110,11 @@ const maxProxyResponseBody = 10 << 20
// a generic 502 page to canvas. 10s is well above realistic intra-region
// latencies and well below CF's edge timeout.
//
// 3. Transport.ResponseHeaderTimeout — 180s default. From request-body-end
// to response-headers-start. Configurable via
// A2A_PROXY_RESPONSE_HEADER_TIMEOUT (envx.Duration). Covers cold-start
// first-byte (30-60s OAuth flow above) with enough room for Opus agent
// turns (big context + internal delegate_task round-trips routinely exceed
// the old 60s ceiling). Body streaming after headers is governed by the
// per-request context deadline, NOT this timeout — so multi-minute agent
// responses still work fine.
// 3. Transport.ResponseHeaderTimeout — 60s. From request-body-end to
// response-headers-start. Covers cold-start first-byte (the 30-60s OAuth
// flow above), with margin. Body streaming after headers is governed by
// the per-request context deadline, NOT this timeout — so multi-minute
// agent responses still work fine.
//
// The point of (2) and (3) is to surface a *structured* 503 from
// handleA2ADispatchError when the workspace agent is unreachable, so canvas
@@ -131,7 +127,7 @@ var a2aClient = &http.Client{
Timeout: 10 * time.Second,
KeepAlive: 30 * time.Second,
}).DialContext,
ResponseHeaderTimeout: envx.Duration("A2A_PROXY_RESPONSE_HEADER_TIMEOUT", 180*time.Second),
ResponseHeaderTimeout: 60 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
// MaxIdleConns / IdleConnTimeout: stdlib defaults are fine; agent
// fan-in is bounded by the platform's broadcaster fan-out, not by
@@ -2276,43 +2276,3 @@ func TestProxyA2A_PollMode_FailsClosedToPush(t *testing.T) {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// ==================== a2aClient ResponseHeaderTimeout config ====================
func TestA2AClientResponseHeaderTimeout(t *testing.T) {
const defaultTimeout = 180 * time.Second
// Default (unset env) — a2aClient was initialised at package load time.
if a2aClient.Transport.(*http.Transport).ResponseHeaderTimeout != defaultTimeout {
t.Errorf("a2aClient default ResponseHeaderTimeout = %v, want %v",
a2aClient.Transport.(*http.Transport).ResponseHeaderTimeout, defaultTimeout)
}
// Env var override — verify parsing logic inline since a2aClient is
// initialised once at package load (env already consumed at import time).
t.Run("A2A_PROXY_RESPONSE_HEADER_TIMEOUT parsed correctly", func(t *testing.T) {
// We can't re-initialise a2aClient, but we can verify the same
// envx.Duration logic inline for the 5m override case.
t.Setenv("A2A_PROXY_RESPONSE_HEADER_TIMEOUT", "5m")
if d, err := time.ParseDuration("5m"); err == nil && d > 0 {
if d != 5*time.Minute {
t.Errorf("ParseDuration(\"5m\") = %v, want 5m", d)
}
}
})
t.Run("invalid A2A_PROXY_RESPONSE_HEADER_TIMEOUT falls back to default", func(t *testing.T) {
t.Setenv("A2A_PROXY_RESPONSE_HEADER_TIMEOUT", "not-a-duration")
// Simulate what envx.Duration does with an invalid value.
var fallback = 180 * time.Second
override := fallback
if v := os.Getenv("A2A_PROXY_RESPONSE_HEADER_TIMEOUT"); v != "" {
if d, err := time.ParseDuration(v); err == nil && d > 0 {
override = d
}
}
if override != fallback {
t.Errorf("invalid env var: got %v, want fallback %v", override, fallback)
}
})
}
@@ -717,16 +717,13 @@ func deriveProviderFromModelSlug(model string) string {
func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
// Resolution order (priority high → low):
// 1. payload.Model (caller passed the canvas-picked model id verbatim)
// 2. envVars["MOLECULE_MODEL"] (the canonical, unambiguous name)
// 3. envVars["MODEL"] (workspace_secret persisted by /org/import via
// 2. envVars["MODEL"] (workspace_secret persisted by /org/import via
// the persona env file — MODEL=MiniMax-M2.7-highspeed etc.)
// 4. envVars["MODEL_PROVIDER"] (legacy + misleadingly named: it carries
// a *model id*, never the provider — that's LLM_PROVIDER. Historically
// set by canvas Save+Restart's PUT /model; the post-2026-05-08
// persona-env convention sometimes (mis)set it to a provider slug
// ("minimax") or a runtime name ("claude-code"), neither a valid
// model id — see internal#226. Only fires when the better-named
// vars are absent.)
// 3. envVars["MODEL_PROVIDER"] (legacy: this secret was historically a
// *model id* set by canvas Save+Restart's PUT /model; on the
// post-2026-05-08 persona-env convention it's a *provider slug*
// (e.g. "minimax") which is NOT a valid model id, so this fallback
// only fires when MODEL is absent.)
//
// Pre-fix bug: this function unconditionally OVERWROTE envVars["MODEL"]
// with the MODEL_PROVIDER slug (when payload.Model was empty), wiping
@@ -739,9 +736,6 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
// and the workspace template's adapter routed to providers[0]
// (anthropic-oauth) and wedged at SDK initialize. Caught 2026-05-08
// during Phase 4 verification of template-claude-code PR #9.
if model == "" {
model = envVars["MOLECULE_MODEL"]
}
if model == "" {
model = envVars["MODEL"]
}
@@ -752,18 +746,16 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
return
}
// Canonical model env varsmolecule-runtime's workspace/config.py
// resolves the picked model as MOLECULE_MODEL > MODEL > (legacy)
// MODEL_PROVIDER (#280). Export both new names so adapters can read
// either; MODEL stays for backwards compat with everything that
// already reads os.environ["MODEL"] (the claude-code adapter does,
// since #194). Without this, the user's canvas selection is silently
// dropped on every templated provision — confirmed via crash-loop
// diagnosis on 2026-05-02 where MiniMax picks booted with model=sonnet
// (template default) and demanded CLAUDE_CODE_OAUTH_TOKEN. Set these
// FIRST so the per-runtime branches below can layer on additional
// vendor-specific names without fighting over the canonical one.
envVars["MOLECULE_MODEL"] = model
// Universal MODEL env var — every adapter that wants to honour the
// canvas-picked model (instead of its template's default) reads this.
// molecule-runtime's workspace/config.py already falls back to MODEL
// for runtime_config.model (#194). Without this line, the user's
// canvas selection is silently dropped on every templated provision —
// confirmed via crash-loop diagnosis on 2026-05-02 where MiniMax
// picks booted with model=sonnet (template default) and demanded
// CLAUDE_CODE_OAUTH_TOKEN. Set it FIRST so the per-runtime branches
// below can still layer on additional vendor-specific names without
// fighting over the canonical one.
envVars["MODEL"] = model
switch runtime {
@@ -665,62 +665,46 @@ func TestApplyRuntimeModelEnv_SetsUniversalMODELForAllRuntimes(t *testing.T) {
runtime string
model string
modelProviderEnv string
moleculeModelEnv string
wantMODEL string
wantHermesDefault string // empty string = must be unset
}{
{
name: "claude-code: picked model populates MODEL + MOLECULE_MODEL",
name: "claude-code: picked model populates MODEL",
runtime: "claude-code",
model: "MiniMax-M2",
wantMODEL: "MiniMax-M2",
},
{
name: "hermes: picked model populates MODEL, MOLECULE_MODEL, HERMES_DEFAULT_MODEL",
name: "hermes: picked model populates BOTH MODEL and HERMES_DEFAULT_MODEL",
runtime: "hermes",
model: "minimax/MiniMax-M2.7",
wantMODEL: "minimax/MiniMax-M2.7",
wantHermesDefault: "minimax/MiniMax-M2.7",
},
{
name: "langgraph: picked model populates MODEL + MOLECULE_MODEL (no vendor-specific name)",
name: "langgraph: picked model populates MODEL (no vendor-specific name)",
runtime: "langgraph",
model: "anthropic:claude-opus-4-7",
wantMODEL: "anthropic:claude-opus-4-7",
},
{
name: "crewai: picked model populates MODEL + MOLECULE_MODEL (no vendor-specific name)",
name: "crewai: picked model populates MODEL (no vendor-specific name)",
runtime: "crewai",
model: "openai:gpt-4o",
wantMODEL: "openai:gpt-4o",
},
{
name: "empty model + no env fallback: nothing set",
name: "empty model + empty MODEL_PROVIDER fallback: nothing set",
runtime: "claude-code",
model: "",
},
{
name: "empty model + MODEL_PROVIDER fallback hits: MODEL/MOLECULE_MODEL set from secret",
name: "empty model + MODEL_PROVIDER fallback hits: MODEL set from secret",
runtime: "claude-code",
model: "",
modelProviderEnv: "MiniMax-M2",
wantMODEL: "MiniMax-M2",
},
{
name: "empty model + MOLECULE_MODEL env fallback hits (canonical name)",
runtime: "claude-code",
model: "",
moleculeModelEnv: "opus",
wantMODEL: "opus",
},
{
name: "MOLECULE_MODEL beats MODEL_PROVIDER when both set (misnomer guard, internal#226)",
runtime: "claude-code",
model: "",
moleculeModelEnv: "opus",
modelProviderEnv: "claude-code",
wantMODEL: "opus",
},
}
for _, tc := range cases {
@@ -729,18 +713,11 @@ func TestApplyRuntimeModelEnv_SetsUniversalMODELForAllRuntimes(t *testing.T) {
if tc.modelProviderEnv != "" {
envVars["MODEL_PROVIDER"] = tc.modelProviderEnv
}
if tc.moleculeModelEnv != "" {
envVars["MOLECULE_MODEL"] = tc.moleculeModelEnv
}
applyRuntimeModelEnv(envVars, tc.runtime, tc.model)
if got := envVars["MODEL"]; got != tc.wantMODEL {
t.Errorf("MODEL = %q, want %q", got, tc.wantMODEL)
}
// MOLECULE_MODEL (the canonical name) must mirror MODEL exactly.
if got := envVars["MOLECULE_MODEL"]; got != tc.wantMODEL {
t.Errorf("MOLECULE_MODEL = %q, want %q", got, tc.wantMODEL)
}
if got := envVars["HERMES_DEFAULT_MODEL"]; got != tc.wantHermesDefault {
t.Errorf("HERMES_DEFAULT_MODEL = %q, want %q", got, tc.wantHermesDefault)
}
-17
View File
@@ -179,23 +179,6 @@ def parse(data: Any) -> Variant:
)
return Malformed(raw=data)
# Push-mode queue envelope — returned when a push-mode workspace
# (one with a public URL) is at capacity. The platform queues the
# request and returns {"queued": true, "message": "...", "queue_id": "..."}.
# Unlike the poll-mode envelope (status=queued + delivery_mode=poll),
# this shape has no delivery_mode key — it's distinguishable by
# data.get("queued") is True alone. Checked before poll-mode so the
# two cases are mutually exclusive even if a buggy server sends both.
if data.get("queued") is True:
method_raw = data.get(_KEY_METHOD)
method = str(method_raw) if method_raw is not None else "message/send"
logger.info(
"a2a_response.parse: queued for busy push-mode peer (method=%s, queue_id=%s)",
method,
data.get("queue_id", "?"),
)
return Queued(method=method)
# Poll-queued envelope. Both keys must be present — the workspace
# server sets them together; if only one is present the body is
# ambiguous and we route to Malformed for visibility.
-24
View File
@@ -204,20 +204,6 @@ async def tool_delegate_task(
if not workspace_id or not task:
return "Error: workspace_id and task are required"
# Self-delegation guard: delegating to your own workspace ID deadlocks —
# the sending turn holds _run_lock while the receive handler waits for the
# same lock, the request 30s-times-out, and the whole cycle is wasted.
# Reject immediately with an actionable message. (effective_src mirrors the
# `src or WORKSPACE_ID` resolution used below for routing.)
effective_src = source_workspace_id or _peer_to_source.get(workspace_id) or WORKSPACE_ID
if workspace_id and workspace_id == effective_src:
return (
"Error: cannot delegate_task to your own workspace — self-delegation "
"deadlocks _run_lock (your sending turn holds it, the receive handler "
"waits for it, the request times out). There is no peer who is also you: "
"just do the work yourself, or call commit_memory / send_message_to_user directly."
)
# Auto-route: if source not specified, look up which registered
# workspace last saw this peer (populated by tool_list_peers). Falls
# back to the legacy WORKSPACE_ID for single-workspace operators.
@@ -337,16 +323,6 @@ async def tool_delegate_task_async(
src = source_workspace_id or _peer_to_source.get(workspace_id) or WORKSPACE_ID
# Self-delegation guard: even on the async path, queuing a task to your own
# workspace just makes you re-process your own dispatch — never useful, and
# on the sync path it deadlocks (see tool_delegate_task). Reject early.
if workspace_id and workspace_id == src:
return (
"Error: cannot delegate_task_async to your own workspace — there is no "
"peer who is also you. Do the work yourself, or call commit_memory / "
"send_message_to_user directly."
)
# Idempotency key: SHA-256 of (source, target, task) so that a
# restarted agent firing the same delegation gets the same key and
# the platform returns the existing delegation_id instead of
+3 -18
View File
@@ -66,25 +66,10 @@ async def delegate_task(workspace_id: str, task: str) -> str:
)
data = a2a_resp.json()
if "result" in data:
result = data["result"]
parts = result.get("parts", []) if isinstance(result, dict) else []
if parts and isinstance(parts[0], dict):
return parts[0].get("text", "(no text)")
# Empty parts list (e.g. {"parts": []}) should return str(result),
# not "(no text)" — preserves pre-fix behavior (#279 regression fix).
if isinstance(result, dict) and result.get("parts") == []:
return str(result)
return str(result) if isinstance(result, str) else "(no text)"
parts = data["result"].get("parts", [])
return parts[0].get("text", "(no text)") if parts else str(data["result"])
elif "error" in data:
err = data["error"]
msg = ""
if isinstance(err, dict):
msg = err.get("message", "")
elif isinstance(err, str):
msg = err
else:
msg = str(err)
return f"Error: {msg}"
return f"Error: {data['error'].get('message', str(data['error']))}"
return str(data)
except Exception as e:
return f"Error sending A2A message: {e}"
+8 -54
View File
@@ -1,6 +1,5 @@
"""Load workspace configuration from config.yaml."""
import logging
import os
from dataclasses import dataclass, field
from pathlib import Path
@@ -8,8 +7,6 @@ from typing import Optional
import yaml
logger = logging.getLogger(__name__)
@dataclass
class RBACConfig:
@@ -384,47 +381,6 @@ def _derive_provider_from_model(model: str) -> str:
return ""
_legacy_model_provider_warned = False
def _picked_model_from_env(default: str) -> str:
"""Resolve the operator-picked model id from env; newest name wins.
Precedence: ``MOLECULE_MODEL`` (canonical, unambiguous) → ``MODEL`` →
``MODEL_PROVIDER`` (legacy) → ``default`` (the YAML ``model:`` field).
``MODEL_PROVIDER`` is **misleadingly named**: it carries the picked
*model id*, never the LLM provider — the provider lives in
``LLM_PROVIDER`` / the YAML ``provider:`` field. The legacy path stays
so canvas Save+Restart, the workspace-server secret-mint path, and
persona env files that set it keep working, but if it's the *only* one
set we log a deprecation once — the misnomer keeps biting (e.g. setting
``MODEL_PROVIDER=claude-code`` expecting it to select the claude-code
*runtime* — it doesn't, ``runtime:`` does — after which the claude CLI
404s on ``--model claude-code``). Set ``MODEL``/``MOLECULE_MODEL`` to
an id from ``runtime_config.models[].id`` (e.g. ``opus``, ``sonnet``,
``claude-opus-4-7``, ``MiniMax-M2.7-highspeed``) instead.
"""
global _legacy_model_provider_warned
for name in ("MOLECULE_MODEL", "MODEL"):
v = (os.environ.get(name) or "").strip()
if v:
return v
legacy = (os.environ.get("MODEL_PROVIDER") or "").strip()
if legacy:
if not _legacy_model_provider_warned:
logger.warning(
"MODEL_PROVIDER=%r is deprecated and misleadingly named — it "
"sets the picked *model id*, not the LLM provider (that's "
"LLM_PROVIDER / the YAML `provider:` field). Set MODEL (or "
"MOLECULE_MODEL) to an id from runtime_config.models instead.",
legacy,
)
_legacy_model_provider_warned = True
return legacy
return default
_EVENT_LOG_VALID_BACKENDS = {"memory", "disabled"}
@@ -489,10 +445,8 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
with open(config_file) as f:
raw = yaml.safe_load(f) or {}
# Operator-picked model from env (canvas / secret-mint / persona env),
# falling back to the YAML `model:` field. See _picked_model_from_env for
# the precedence (MOLECULE_MODEL > MODEL > legacy MODEL_PROVIDER).
model = _picked_model_from_env(raw.get("model", "anthropic:claude-opus-4-7"))
# Override model from env if provided
model = os.environ.get("MODEL_PROVIDER", raw.get("model", "anthropic:claude-opus-4-7"))
# Resolve top-level provider with this priority chain:
# 1. ``LLM_PROVIDER`` env var (canvas Save+Restart sets this so the
@@ -563,9 +517,8 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
required_env=runtime_raw.get("required_env", []),
timeout=runtime_raw.get("timeout", 0),
# Picked-model precedence (priority order):
# 1. operator-picked model from env — MOLECULE_MODEL > MODEL >
# (legacy) MODEL_PROVIDER, plumbed via canvas Save+Restart,
# workspace-server's secret-mint path, or the universal
# 1. MODEL_PROVIDER env var — canvas-picked model, plumbed via
# workspace-server's secret-mint path or the universal
# MODEL/MODEL_PROVIDER env from applyRuntimeModelEnv. The
# operator's canvas selection MUST win over the template's
# baked-in default; previously the template's
@@ -574,12 +527,13 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
# surfaced 2026-05-02 during E2E).
# 2. runtime_raw.model — explicit YAML override in the
# template's runtime_config.
# 3. top-level `model` (already env-resolved above). This is
# the SaaS restart case (CP regenerates a minimal
# 3. top-level `model` already honors MODEL_PROVIDER (line
# 359) but only when YAML lacks a top-level `model:`. This
# is the SaaS restart case (CP regenerates a minimal
# config.yaml on every boot, dropping runtime_config.model).
# Centralising here means EVERY adapter gets the override for
# free — no per-adapter env-reading code required.
model=_picked_model_from_env(runtime_raw.get("model") or model),
model=os.environ.get("MODEL_PROVIDER") or runtime_raw.get("model") or model,
# Same fallback shape as ``model`` above: an explicit
# ``runtime_config.provider`` wins; otherwise inherit the
# top-level resolved provider so adapters see a single
@@ -127,51 +127,3 @@ class TestPollBudgetEnvOverride:
# numeric and >= the documented floor (180s healthsweep budget).
assert isinstance(a2a_tools_delegation._SYNC_POLL_BUDGET_S, float)
assert a2a_tools_delegation._SYNC_POLL_BUDGET_S >= 180.0
# ============== Self-delegation guard ==============
class TestSelfDelegationGuard:
"""delegate_task / delegate_task_async to your own workspace ID must be
rejected immediately (it deadlocks _run_lock on the sync path — the
sending turn holds the lock, the receive handler waits for it, the
request 30s-times-out). A genuinely different target must NOT be
short-circuited by the guard."""
def _fresh(self, monkeypatch, own_id):
import a2a_tools_delegation as d
monkeypatch.setattr(d, "WORKSPACE_ID", own_id)
monkeypatch.setattr(d, "_peer_to_source", {}, raising=False)
return d
def test_delegate_task_rejects_self(self, monkeypatch):
import asyncio
d = self._fresh(monkeypatch, "ws-self-abc")
out = asyncio.run(d.tool_delegate_task("ws-self-abc", "do a thing"))
assert "your own workspace" in out.lower()
def test_delegate_task_rejects_self_via_explicit_source(self, monkeypatch):
import asyncio
d = self._fresh(monkeypatch, "ws-other-default")
out = asyncio.run(
d.tool_delegate_task("ws-X", "do a thing", source_workspace_id="ws-X")
)
assert "your own workspace" in out.lower()
def test_delegate_task_async_rejects_self(self, monkeypatch):
import asyncio
d = self._fresh(monkeypatch, "ws-self-abc")
out = asyncio.run(d.tool_delegate_task_async("ws-self-abc", "do a thing"))
assert "your own workspace" in out.lower()
def test_delegate_task_allows_different_target(self, monkeypatch):
"""Guard passes through for a real peer — it reaches discover_peer
(stubbed to 'not found' here) rather than returning the self message."""
import asyncio
d = self._fresh(monkeypatch, "ws-self-abc")
async def _no_peer(*_a, **_kw):
return None
monkeypatch.setattr(d, "discover_peer", _no_peer)
out = asyncio.run(d.tool_delegate_task("ws-OTHER-xyz", "do a thing"))
assert "your own workspace" not in out.lower()
assert "not found" in out.lower()
-87
View File
@@ -1,12 +1,10 @@
"""Tests for config.py — workspace configuration loading."""
import logging
import os
import pytest
import yaml
import config
from config import (
A2AConfig,
ComplianceConfig,
@@ -19,17 +17,6 @@ from config import (
)
@pytest.fixture(autouse=True)
def _clean_model_env(monkeypatch):
"""Every test starts with no MODEL* env vars set and the legacy-name
deprecation latch reset, so picked-model resolution is deterministic
regardless of the CI shell environment or test ordering."""
for name in ("MOLECULE_MODEL", "MODEL", "MODEL_PROVIDER"):
monkeypatch.delenv(name, raising=False)
monkeypatch.setattr(config, "_legacy_model_provider_warned", False, raising=False)
yield
def test_load_config_basic(tmp_path):
"""load_config reads a YAML file and returns a WorkspaceConfig."""
config_yaml = tmp_path / "config.yaml"
@@ -177,80 +164,6 @@ def test_runtime_config_model_env_wins_over_explicit_yaml(tmp_path, monkeypatch)
assert cfg.runtime_config.model == "minimax/MiniMax-M2.7"
def test_picked_model_MODEL_env_wins_over_legacy_MODEL_PROVIDER(tmp_path, monkeypatch):
"""MODEL (the correctly-named env var) beats the legacy MODEL_PROVIDER.
Regression for the 2026-05-10 dev-team incident: lead persona env files
set MODEL=claude-opus-4-7 (the intended model) AND MODEL_PROVIDER=claude-code
(mistaking MODEL_PROVIDER for "the runtime"). The old code read
MODEL_PROVIDER → the claude CLI got `--model claude-code` → 404. MODEL must
win so the operator's intended value lands at both levels.
"""
monkeypatch.setenv("MODEL", "opus")
monkeypatch.setenv("MODEL_PROVIDER", "claude-code")
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(
yaml.dump({"model": "anthropic:claude-opus-4-7",
"runtime_config": {"model": "sonnet"}})
)
cfg = load_config(str(tmp_path))
assert cfg.model == "opus"
assert cfg.runtime_config.model == "opus"
def test_picked_model_MOLECULE_MODEL_wins_over_MODEL(tmp_path, monkeypatch):
"""MOLECULE_MODEL (the unambiguous canonical name) wins over MODEL, which
in turn wins over the legacy MODEL_PROVIDER."""
monkeypatch.setenv("MOLECULE_MODEL", "claude-opus-4-7")
monkeypatch.setenv("MODEL", "sonnet")
monkeypatch.setenv("MODEL_PROVIDER", "claude-code")
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(yaml.dump({"model": "openai:gpt-4o"}))
cfg = load_config(str(tmp_path))
assert cfg.model == "claude-opus-4-7"
assert cfg.runtime_config.model == "claude-opus-4-7"
def test_picked_model_MODEL_env_overrides_yaml(tmp_path, monkeypatch):
"""MODEL env overrides the YAML `model:` field — same role MODEL_PROVIDER
had, now under the correctly-named var."""
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(yaml.dump({"model": "openai:gpt-4o"}))
monkeypatch.setenv("MODEL", "google:gemini-2.0-flash")
cfg = load_config(str(tmp_path))
assert cfg.model == "google:gemini-2.0-flash"
def test_legacy_MODEL_PROVIDER_still_honored_but_warns(tmp_path, monkeypatch, caplog):
"""MODEL_PROVIDER alone still resolves the model (back-compat: canvas
Save+Restart, secret-mint, existing persona env files keep working) but
logs a one-time deprecation pointing at the misnomer."""
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(yaml.dump({"model": "openai:gpt-4o"}))
monkeypatch.setenv("MODEL_PROVIDER", "MiniMax-M2.7-highspeed")
with caplog.at_level(logging.WARNING):
cfg = load_config(str(tmp_path))
assert cfg.model == "MiniMax-M2.7-highspeed"
assert cfg.runtime_config.model == "MiniMax-M2.7-highspeed"
assert any(
"MODEL_PROVIDER" in r.getMessage() and "deprecated" in r.getMessage()
for r in caplog.records
)
def test_no_deprecation_when_MODEL_is_set(tmp_path, monkeypatch, caplog):
"""When MODEL is set, MODEL_PROVIDER is ignored entirely and NOT warned
about — a workspace that already does it right shouldn't get nagged."""
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(yaml.dump({"model": "openai:gpt-4o"}))
monkeypatch.setenv("MODEL", "opus")
monkeypatch.setenv("MODEL_PROVIDER", "claude-code")
with caplog.at_level(logging.WARNING):
cfg = load_config(str(tmp_path))
assert cfg.model == "opus"
assert not any("MODEL_PROVIDER" in r.getMessage() for r in caplog.records)
def test_runtime_config_model_picks_up_env_via_top_level(tmp_path, monkeypatch):
"""End-to-end path the canvas Save+Restart relies on: user picks
a model → workspace_secrets.MODEL_PROVIDER updated → CP user-data