forked from molecule-ai/molecule-core
Compare commits
66 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4b16c95450 | |||
| f1b72af97e | |||
| 31facfc5c4 | |||
| 19e7acdc22 | |||
| 1ce51abea4 | |||
| 0ec226e119 | |||
| 872b781f64 | |||
| 0dd1244510 | |||
| 26fa220bef | |||
| 5559e96400 | |||
| 3bc7749e84 | |||
| 6d7a7fc86f | |||
| ecb3c75d74 | |||
| 2f7beb9bce | |||
| bd881f8756 | |||
| e39d818ac4 | |||
| ed4d24fb8c | |||
| 3a5544a9e6 | |||
| 095171f163 | |||
| 9c7b34cb7f | |||
| 8514ff1a96 | |||
| 1785732bbb | |||
| 066a0772ee | |||
| 3f2cc8cdd6 | |||
| 5c80b9c3d6 | |||
| a8850bac55 | |||
| adfa34c4ae | |||
| 7692dd4975 | |||
| 28f22609d9 | |||
| e67a854a33 | |||
| 3e7d483b8c | |||
| 4f4b6c4f90 | |||
| fc10386a78 | |||
| 1282c1c8ff | |||
| a242ca8b01 | |||
| ac9b07b7ad | |||
| 41ae4ec50b | |||
| 02960209a0 | |||
| d866d3aa5f | |||
| 61d5908817 | |||
| 89bdf29d6f | |||
| 700d44ec3d | |||
| f70071e1e1 | |||
| 63ac99788b | |||
| 28472f0d2d | |||
| f42feb4ed7 | |||
| 99e7f13149 | |||
| 6488ba09e7 | |||
| 8176b5142d | |||
| 314277769e | |||
| e0b567e992 | |||
| 707e4d7342 | |||
| 4f9e3feece | |||
| 10752fe330 | |||
| 8f7122a9b6 | |||
| b3982035b3 | |||
| d1122f8d28 | |||
| 4b35d25d86 | |||
| 46731729d4 | |||
| 6dc2d907a2 | |||
| 849bc97349 | |||
| e13dcab5e0 | |||
| 721010307c | |||
| 9f47ecf86e | |||
| ebc20794f3 | |||
| 9a64aeaa2c |
@@ -358,6 +358,72 @@ jobs:
|
||||
- if: needs.changes.outputs.python == 'true'
|
||||
run: python -m pytest --tb=short
|
||||
|
||||
- if: needs.changes.outputs.python == 'true'
|
||||
name: Per-file critical-path coverage (MCP / inbox / auth)
|
||||
# MCP-critical Python files have a per-file floor on top of the
|
||||
# 86% total floor in pytest.ini. Rationale (issue #2790, after
|
||||
# the PR #2766 → PR #2771 cycle): the total floor averages ~6000
|
||||
# lines, so a single MCP file could regress to ~50% with no
|
||||
# complaint as long as other modules compensate. These five
|
||||
# files handle multi-tenant routing + auth + inbox dispatch —
|
||||
# a coverage drop here is the same risk shape as a Go-side
|
||||
# workspace-server token/secrets file dropping below 10%.
|
||||
#
|
||||
# Floor 75% sits below current actuals (80-96%) so this gate is
|
||||
# strictly additive — no existing PR fails. Ratchet plan in
|
||||
# COVERAGE_FLOOR.md.
|
||||
run: |
|
||||
set -e
|
||||
PER_FILE_FLOOR=75
|
||||
CRITICAL_FILES=(
|
||||
"a2a_mcp_server.py"
|
||||
"mcp_cli.py"
|
||||
"a2a_tools.py"
|
||||
"inbox.py"
|
||||
"platform_auth.py"
|
||||
)
|
||||
|
||||
# pytest already wrote .coverage; emit a JSON view scoped to
|
||||
# the critical files so jq/python can read the per-file pct
|
||||
# without parsing tabular text. --include uses fnmatch, and
|
||||
# the leading "*" allows the file to live anywhere under the
|
||||
# workspace root (today they sit at workspace/<name>.py).
|
||||
INCLUDES=$(printf '*%s,' "${CRITICAL_FILES[@]}")
|
||||
INCLUDES="${INCLUDES%,}"
|
||||
python -m coverage json -o /tmp/critical-cov.json --include="$INCLUDES"
|
||||
|
||||
FAILED=0
|
||||
for f in "${CRITICAL_FILES[@]}"; do
|
||||
# Match by top-level path key (e.g. "a2a_tools.py", not
|
||||
# "builtin_tools/a2a_tools.py" — different file at 100%).
|
||||
# The keys in coverage.json are paths relative to the run
|
||||
# cwd (workspace/), so the critical-path entry sits at the
|
||||
# bare basename.
|
||||
pct=$(jq -r --arg f "$f" '.files | to_entries | map(select(.key == $f)) | .[0].value.summary.percent_covered // "MISSING"' /tmp/critical-cov.json)
|
||||
if [ "$pct" = "MISSING" ]; then
|
||||
echo "::error file=workspace/$f::No coverage data — file may have moved or test exclusion mis-set."
|
||||
FAILED=$((FAILED+1))
|
||||
continue
|
||||
fi
|
||||
echo "$f: ${pct}%"
|
||||
if awk "BEGIN{exit !($pct < $PER_FILE_FLOOR)}"; then
|
||||
echo "::error file=workspace/$f::${pct}% < ${PER_FILE_FLOOR}% per-file floor (MCP critical path). See COVERAGE_FLOOR.md."
|
||||
FAILED=$((FAILED+1))
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$FAILED" -gt 0 ]; then
|
||||
echo ""
|
||||
echo "$FAILED MCP critical-path file(s) below the ${PER_FILE_FLOOR}% per-file floor."
|
||||
echo "These paths handle multi-tenant routing, auth tokens, and inbox dispatch."
|
||||
echo "A coverage drop here is the same risk shape as Go-side tokens/secrets files"
|
||||
echo "dropping below 10% (see COVERAGE_FLOOR.md). Either:"
|
||||
echo " (a) add tests to raise coverage back above ${PER_FILE_FLOOR}%, or"
|
||||
echo " (b) if this is unavoidable historical debt, file an issue and propose"
|
||||
echo " adjusting the floor with rationale in COVERAGE_FLOOR.md."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# SDK + plugin validation moved to standalone repo:
|
||||
# github.com/Molecule-AI/molecule-sdk-python
|
||||
|
||||
|
||||
+50
-2
@@ -1,7 +1,7 @@
|
||||
# Coverage Floor
|
||||
|
||||
CI enforces three coverage gates on `workspace-server` (Go). All defined in
|
||||
`.github/workflows/ci.yml` → `platform-build` job.
|
||||
CI enforces coverage gates on two surfaces — `workspace-server` (Go) and
|
||||
`workspace/` (Python). All defined in `.github/workflows/ci.yml`.
|
||||
|
||||
## Current floors (2026-04-23)
|
||||
|
||||
@@ -76,3 +76,51 @@ This gate makes "no untested critical paths merged" a mechanical property of
|
||||
the CI, not a behavioural property of QA agents or individual reviewers —
|
||||
which is the only way to make it survive fleet outages, agent rotations, or
|
||||
QA process changes.
|
||||
|
||||
## Python (workspace/) — added 2026-05-04 from #2790
|
||||
|
||||
The Python side has its own gates in the `python-lint` job:
|
||||
|
||||
| Gate | Threshold | Where |
|
||||
|---|---|---|
|
||||
| **Total floor** | `86%` | `workspace/pytest.ini` `--cov-fail-under=86` (issue #1817) |
|
||||
| **Critical-path per-file floor** | `75%` | Inline shell step after the pytest run |
|
||||
|
||||
### Critical-path Python files
|
||||
|
||||
These handle multi-tenant routing, auth tokens, and inbox dispatch. A
|
||||
coverage drop here is the same risk shape as a Go-side `tokens*` /
|
||||
`secrets*` file regressing below 10%.
|
||||
|
||||
- `workspace/a2a_mcp_server.py` — MCP dispatcher (PR #2766 / #2771)
|
||||
- `workspace/mcp_cli.py` — molecule-mcp standalone CLI entry
|
||||
- `workspace/a2a_tools.py` — workspace-scoped tool implementations
|
||||
- `workspace/inbox.py` — multi-workspace inbox + per-workspace cursors
|
||||
- `workspace/platform_auth.py` — per-workspace token resolver
|
||||
|
||||
### Why 75% (vs 86% total)
|
||||
|
||||
The total floor averages ~6000 lines across `workspace/`. A single MCP
|
||||
file could drop to ~50% with no CI complaint as long as other modules
|
||||
compensate. The per-file floor closes that distribution gap. 75% sits
|
||||
below current actuals (80–96% as of 2026-05-04) — strictly additive,
|
||||
no existing PR fails.
|
||||
|
||||
### Python ratchet plan
|
||||
|
||||
| Date | Total | Per-file critical | Notes |
|
||||
|---|---|---|---|
|
||||
| 2026-05-04 | 86% | 75% | Initial gate (this file). |
|
||||
| 2026-06-04 | 86% | 80% | First ratchet — at-floor files must catch up. |
|
||||
| 2026-07-04 | 88% | 85% | |
|
||||
| 2026-08-04 | 90% | 90% | Target steady-state. |
|
||||
|
||||
### Why this Python gate exists
|
||||
|
||||
Issue #2790, after the PR #2766 → PR #2771 cycle. PR #2766 added
|
||||
multi-workspace routing through `a2a_tools.py` + `a2a_mcp_server.py`,
|
||||
shipped to main with green CI, but the dispatcher silently dropped a
|
||||
load-bearing kwarg for 4 of 9 tools — caught only by post-merge code
|
||||
review. The structural drift gate (`test_dispatcher_schema_drift.py`,
|
||||
PR #2791) catches the schema↔dispatcher mismatch class; this floor
|
||||
catches the broader "MCP-critical file regressed" class.
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import { useCallback, useMemo } from "react";
|
||||
import { Handle, NodeResizer, Position, type NodeProps, type Node } from "@xyflow/react";
|
||||
import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
|
||||
import { getConfigurationError, getConfigurationStatus } from "@/store/canvas-topology";
|
||||
import { showToast } from "@/components/Toaster";
|
||||
import { Tooltip } from "@/components/Tooltip";
|
||||
import { STATUS_CONFIG, TIER_CONFIG } from "@/lib/design-tokens";
|
||||
@@ -35,8 +36,28 @@ function EjectIcon(props: React.SVGProps<SVGSVGElement>) {
|
||||
}
|
||||
|
||||
export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>) {
|
||||
const statusCfg = STATUS_CONFIG[data.status] || STATUS_CONFIG.offline;
|
||||
// Configuration-status overlay (PR #2756 / #467 chain). When the
|
||||
// workspace is reachable but adapter.setup() failed (typically a
|
||||
// missing/rotated LLM credential), the agent_card carries
|
||||
// configuration_status: "not_configured". Surface this as a distinct
|
||||
// tile state so the operator sees a useful error instead of an
|
||||
// ambiguous "online but silent" workspace.
|
||||
//
|
||||
// The override only applies when the underlying status is "online" —
|
||||
// a workspace that's actually offline / failed / provisioning gets
|
||||
// its own treatment. "online + not_configured" is the gap PR #2756
|
||||
// introduced; everything else was already covered.
|
||||
const isMisconfigured =
|
||||
data.status === "online" &&
|
||||
getConfigurationStatus(data.agentCard) === "not_configured";
|
||||
const configurationError = getConfigurationError(data.agentCard);
|
||||
const effectiveStatus = isMisconfigured ? "not_configured" : data.status;
|
||||
const statusCfg = STATUS_CONFIG[effectiveStatus] || STATUS_CONFIG.offline;
|
||||
const tierCfg = TIER_CONFIG[data.tier] || { label: `T${data.tier}`, color: "text-ink-mid bg-surface-card border border-line" };
|
||||
const tooltipExtra = isMisconfigured && configurationError
|
||||
? `Agent not configured: ${configurationError}`
|
||||
: null;
|
||||
void tooltipExtra; // wired in via aria-label below; reserved here for future tooltip surface.
|
||||
// Org-deploy context — four derived flags off one store subscription.
|
||||
// Drives the shimmer while provisioning, the dimmed/non-draggable
|
||||
// treatment on locked descendants, and the Cancel pill on the root.
|
||||
@@ -75,7 +96,12 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
<div
|
||||
role="button"
|
||||
tabIndex={0}
|
||||
aria-label={`${data.name} workspace — ${data.status}`}
|
||||
aria-label={
|
||||
isMisconfigured && configurationError
|
||||
? `${data.name} workspace — agent not configured: ${configurationError}`
|
||||
: `${data.name} workspace — ${data.status}`
|
||||
}
|
||||
title={isMisconfigured && configurationError ? `Agent not configured: ${configurationError}` : undefined}
|
||||
aria-pressed={isSelected}
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
@@ -283,11 +309,12 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
|
||||
{/* Bottom row: status / active tasks */}
|
||||
<div className="flex items-center justify-between mt-0.5">
|
||||
{data.status !== "online" ? (
|
||||
{effectiveStatus !== "online" ? (
|
||||
<div className={`text-[10px] uppercase tracking-widest font-medium ${
|
||||
data.status === "failed" ? "text-bad" :
|
||||
data.status === "degraded" ? "text-warm" :
|
||||
data.status === "provisioning" ? "text-accent" :
|
||||
effectiveStatus === "failed" ? "text-bad" :
|
||||
effectiveStatus === "degraded" ? "text-warm" :
|
||||
effectiveStatus === "not_configured" ? "text-warm" :
|
||||
effectiveStatus === "provisioning" ? "text-accent" :
|
||||
"text-ink-mid"
|
||||
}`}>
|
||||
{statusCfg.label}
|
||||
@@ -313,6 +340,19 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
{data.lastSampleError}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Configuration error preview — same visual as the degraded
|
||||
* error preview but keyed off the agent_card's configuration_status.
|
||||
* Tells the operator which env var is missing so they can fix it
|
||||
* without having to dig into the workspace logs. */}
|
||||
{isMisconfigured && configurationError && (
|
||||
<div
|
||||
className="text-[10px] text-warm truncate mt-1 bg-warm/10 px-1.5 py-0.5 rounded border border-warm/40"
|
||||
title={configurationError}
|
||||
>
|
||||
{configurationError}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<Handle
|
||||
|
||||
@@ -890,7 +890,6 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
<TagList label="Skills" values={config.skills || []} onChange={(v) => update("skills", v)} placeholder="e.g. code-review" />
|
||||
<TagList label="Tools" values={config.tools || []} onChange={(v) => update("tools", v)} placeholder="e.g. web_search, filesystem" />
|
||||
<TagList label="Prompt Files" values={config.prompt_files || []} onChange={(v) => update("prompt_files", v)} placeholder="e.g. system-prompt.md" />
|
||||
<TagList label="Shared Context" values={config.shared_context || []} onChange={(v) => update("shared_context", v)} placeholder="e.g. architecture.md" />
|
||||
</Section>
|
||||
|
||||
<Section title="A2A Protocol" defaultOpen={false}>
|
||||
|
||||
@@ -10,6 +10,7 @@ interface Props {
|
||||
interface MemoryEntry {
|
||||
key: string;
|
||||
value: unknown;
|
||||
version?: number;
|
||||
expires_at: string | null;
|
||||
updated_at: string;
|
||||
}
|
||||
@@ -28,6 +29,10 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
const [newValue, setNewValue] = useState("");
|
||||
const [newTTL, setNewTTL] = useState("");
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [editingKey, setEditingKey] = useState<string | null>(null);
|
||||
const [editValue, setEditValue] = useState("");
|
||||
const [editTTL, setEditTTL] = useState("");
|
||||
const [editError, setEditError] = useState<string | null>(null);
|
||||
|
||||
const awarenessUrl = useMemo(() => {
|
||||
try {
|
||||
@@ -109,6 +114,69 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
}
|
||||
};
|
||||
|
||||
const beginEdit = (entry: MemoryEntry) => {
|
||||
setEditError(null);
|
||||
setEditingKey(entry.key);
|
||||
// Stringify objects/arrays as pretty JSON; render plain strings raw so the
|
||||
// editor doesn't surprise users with surrounding quotes.
|
||||
setEditValue(
|
||||
typeof entry.value === "string"
|
||||
? entry.value
|
||||
: JSON.stringify(entry.value, null, 2),
|
||||
);
|
||||
if (entry.expires_at) {
|
||||
const remainingMs = new Date(entry.expires_at).getTime() - Date.now();
|
||||
const ttl = Math.max(0, Math.floor(remainingMs / 1000));
|
||||
setEditTTL(ttl > 0 ? String(ttl) : "");
|
||||
} else {
|
||||
setEditTTL("");
|
||||
}
|
||||
};
|
||||
|
||||
const cancelEdit = () => {
|
||||
setEditingKey(null);
|
||||
setEditValue("");
|
||||
setEditTTL("");
|
||||
setEditError(null);
|
||||
};
|
||||
|
||||
const handleEditSave = async (entry: MemoryEntry) => {
|
||||
setEditError(null);
|
||||
|
||||
let parsedValue: unknown;
|
||||
try {
|
||||
parsedValue = JSON.parse(editValue);
|
||||
} catch {
|
||||
parsedValue = editValue;
|
||||
}
|
||||
|
||||
// if_match_version closes the silent-overwrite hole when two writers
|
||||
// race. The handler returns 409 with the current version on mismatch
|
||||
// — surface that as a retry hint and reload to pick up the new state.
|
||||
const body: Record<string, unknown> = { key: entry.key, value: parsedValue };
|
||||
if (typeof entry.version === "number") {
|
||||
body.if_match_version = entry.version;
|
||||
}
|
||||
if (editTTL) {
|
||||
const ttl = parseInt(editTTL);
|
||||
if (!Number.isNaN(ttl) && ttl > 0) body.ttl_seconds = ttl;
|
||||
}
|
||||
|
||||
try {
|
||||
await api.post(`/workspaces/${workspaceId}/memory`, body);
|
||||
cancelEdit();
|
||||
loadMemory();
|
||||
} catch (e) {
|
||||
const message = e instanceof Error ? e.message : "Failed to save";
|
||||
if (message.includes("409") || /if_match_version mismatch/i.test(message)) {
|
||||
setEditError("This entry changed since you opened it. Reloading.");
|
||||
loadMemory();
|
||||
} else {
|
||||
setEditError(message);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const openAwareness = () => {
|
||||
window.open(awarenessUrl, "_blank", "noopener,noreferrer");
|
||||
};
|
||||
@@ -308,24 +376,71 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
|
||||
{expanded === entry.key && (
|
||||
<div className="px-3 pb-2 space-y-2">
|
||||
<pre className="text-[10px] text-ink-mid bg-surface-sunken rounded p-2 overflow-x-auto max-h-40">
|
||||
{JSON.stringify(entry.value, null, 2)}
|
||||
</pre>
|
||||
{editingKey === entry.key ? (
|
||||
<div className="space-y-2">
|
||||
<textarea
|
||||
value={editValue}
|
||||
onChange={(e) => setEditValue(e.target.value)}
|
||||
rows={4}
|
||||
aria-label={`Edit value for ${entry.key}`}
|
||||
className="w-full bg-surface-sunken border border-line rounded px-2 py-1 text-xs font-mono text-ink focus:outline-none focus:border-accent resize-none"
|
||||
/>
|
||||
<input
|
||||
value={editTTL}
|
||||
onChange={(e) => setEditTTL(e.target.value)}
|
||||
placeholder="TTL in seconds (blank = no expiry)"
|
||||
aria-label={`Edit TTL for ${entry.key}`}
|
||||
className="w-full bg-surface-sunken border border-line rounded px-2 py-1 text-xs text-ink focus:outline-none focus:border-accent"
|
||||
/>
|
||||
{editError && (
|
||||
<div role="alert" className="text-[10px] text-bad">
|
||||
{editError}
|
||||
</div>
|
||||
)}
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleEditSave(entry)}
|
||||
className="px-3 py-1 bg-accent hover:bg-accent-strong text-xs rounded text-white"
|
||||
>
|
||||
Save
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={cancelEdit}
|
||||
className="px-3 py-1 bg-surface-card hover:bg-surface-elevated text-xs rounded text-ink-mid"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<pre className="text-[10px] text-ink-mid bg-surface-sunken rounded p-2 overflow-x-auto max-h-40">
|
||||
{JSON.stringify(entry.value, null, 2)}
|
||||
</pre>
|
||||
)}
|
||||
<div className="flex items-center justify-between">
|
||||
<span className="text-[9px] text-ink-soft">
|
||||
Updated: {new Date(entry.updated_at).toLocaleString()}
|
||||
</span>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleDelete(entry.key)}
|
||||
// hover:text-bad on top of text-bad was a no-op.
|
||||
// Switch to a hover bg + focus-visible ring so
|
||||
// the destructive button visibly responds and
|
||||
// keyboard users see focus.
|
||||
className="text-[10px] text-bad hover:bg-red-950/40 rounded px-1 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-red-500/60"
|
||||
>
|
||||
Delete
|
||||
</button>
|
||||
<div className="flex items-center gap-2">
|
||||
{editingKey !== entry.key && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => beginEdit(entry)}
|
||||
className="text-[10px] text-ink-mid hover:bg-surface-elevated rounded px-1 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60"
|
||||
>
|
||||
Edit
|
||||
</button>
|
||||
)}
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleDelete(entry.key)}
|
||||
className="text-[10px] text-bad hover:bg-red-950/40 rounded px-1 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-red-500/60"
|
||||
>
|
||||
Delete
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -0,0 +1,220 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Pins the Edit affordance added to MemoryTab. Until this PR the Memory tab
|
||||
// was Add+Delete only; an entry that needed correction had to be deleted and
|
||||
// re-added — losing the version-counter and any in-flight optimistic-locking
|
||||
// invariants other writers depend on.
|
||||
//
|
||||
// Each test pins one branch of the new flow. If any fails, the bug is back.
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
const apiGet = vi.fn();
|
||||
const apiPost = vi.fn();
|
||||
const apiDel = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
post: (path: string, body: unknown) => apiPost(path, body),
|
||||
del: (path: string) => apiDel(path),
|
||||
patch: vi.fn(),
|
||||
put: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
import { MemoryTab } from "../MemoryTab";
|
||||
|
||||
const sampleEntries = [
|
||||
{
|
||||
key: "team_brief",
|
||||
value: { goal: "ship v2" },
|
||||
version: 3,
|
||||
expires_at: null,
|
||||
updated_at: "2026-05-04T10:00:00Z",
|
||||
},
|
||||
{
|
||||
key: "plain_note",
|
||||
value: "raw text note",
|
||||
version: 1,
|
||||
expires_at: "2099-01-01T00:00:00Z",
|
||||
updated_at: "2026-05-04T10:01:00Z",
|
||||
},
|
||||
];
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiPost.mockReset();
|
||||
apiDel.mockReset();
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === "/workspaces/ws-test/memory") {
|
||||
return Promise.resolve(sampleEntries);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
});
|
||||
|
||||
async function renderAndExpand(key: string) {
|
||||
render(<MemoryTab workspaceId="ws-test" />);
|
||||
await waitFor(() => expect(apiGet).toHaveBeenCalled());
|
||||
// Reveal the Advanced section that hosts the entry list.
|
||||
const showAdvanced = await screen.findByRole("button", { name: "Show" });
|
||||
fireEvent.click(showAdvanced);
|
||||
// Expand the row.
|
||||
const row = await screen.findByRole("button", { name: new RegExp(key) });
|
||||
fireEvent.click(row);
|
||||
}
|
||||
|
||||
describe("MemoryTab Edit affordance", () => {
|
||||
it("Edit button appears once a row is expanded", async () => {
|
||||
await renderAndExpand("team_brief");
|
||||
expect(screen.getAllByRole("button", { name: "Edit" }).length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("clicking Edit on a JSON-valued entry pre-fills the textarea with pretty JSON", async () => {
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = (await screen.findByLabelText(
|
||||
"Edit value for team_brief",
|
||||
)) as HTMLTextAreaElement;
|
||||
expect(textarea.value).toBe('{\n "goal": "ship v2"\n}');
|
||||
});
|
||||
|
||||
it("clicking Edit on a string-valued entry pre-fills raw (no surrounding quotes)", async () => {
|
||||
await renderAndExpand("plain_note");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = (await screen.findByLabelText(
|
||||
"Edit value for plain_note",
|
||||
)) as HTMLTextAreaElement;
|
||||
expect(textarea.value).toBe("raw text note");
|
||||
});
|
||||
|
||||
it("Save POSTs with if_match_version + parsed value, then reloads", async () => {
|
||||
apiPost.mockResolvedValue({ status: "ok", key: "team_brief", version: 4 });
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for team_brief");
|
||||
fireEvent.change(textarea, { target: { value: '{"goal":"ship v3"}' } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
expect(apiPost).toHaveBeenCalledWith("/workspaces/ws-test/memory", {
|
||||
key: "team_brief",
|
||||
value: { goal: "ship v3" },
|
||||
if_match_version: 3,
|
||||
});
|
||||
// Reload after save → second GET.
|
||||
await waitFor(() => expect(apiGet).toHaveBeenCalledTimes(2));
|
||||
});
|
||||
|
||||
it("Save with non-JSON text falls back to plain string", async () => {
|
||||
apiPost.mockResolvedValue({ status: "ok" });
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for team_brief");
|
||||
fireEvent.change(textarea, { target: { value: "free-form note" } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
expect(apiPost.mock.calls[0][1].value).toBe("free-form note");
|
||||
});
|
||||
|
||||
it("TTL field is forwarded as ttl_seconds when set", async () => {
|
||||
apiPost.mockResolvedValue({ status: "ok" });
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const ttlInput = await screen.findByLabelText("Edit TTL for team_brief");
|
||||
fireEvent.change(ttlInput, { target: { value: "3600" } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
expect(apiPost.mock.calls[0][1].ttl_seconds).toBe(3600);
|
||||
});
|
||||
|
||||
it("blank/zero/non-numeric TTL is omitted from the payload", async () => {
|
||||
apiPost.mockResolvedValue({ status: "ok" });
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const ttlInput = await screen.findByLabelText("Edit TTL for team_brief");
|
||||
// Junk + zero both must drop out — payload must not contain ttl_seconds.
|
||||
fireEvent.change(ttlInput, { target: { value: "abc" } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
expect(apiPost.mock.calls[0][1]).not.toHaveProperty("ttl_seconds");
|
||||
});
|
||||
|
||||
it("Cancel discards edits and restores the rendered value", async () => {
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for team_brief");
|
||||
fireEvent.change(textarea, { target: { value: '{"goal":"discarded"}' } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Cancel" }));
|
||||
|
||||
expect(apiPost).not.toHaveBeenCalled();
|
||||
// Editor is gone; the JSON pre-block is back.
|
||||
expect(screen.queryByLabelText("Edit value for team_brief")).toBeNull();
|
||||
expect(screen.getAllByText(/"goal": "ship v2"/i).length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("409 response surfaces a retry hint and reloads", async () => {
|
||||
apiPost.mockRejectedValueOnce(
|
||||
new Error("HTTP 409: if_match_version mismatch"),
|
||||
);
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for team_brief");
|
||||
fireEvent.change(textarea, { target: { value: '{"goal":"ship v3"}' } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
const alert = await screen.findByRole("alert");
|
||||
expect(alert.textContent).toMatch(/changed since you opened it/i);
|
||||
// Initial mount load + post-conflict reload.
|
||||
await waitFor(() => expect(apiGet).toHaveBeenCalledTimes(2));
|
||||
});
|
||||
|
||||
it("non-409 error surfaces the message and does not reload", async () => {
|
||||
apiPost.mockRejectedValueOnce(new Error("boom"));
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
const alert = await screen.findByRole("alert");
|
||||
expect(alert.textContent).toBe("boom");
|
||||
// Only the initial mount load — no retry reload.
|
||||
expect(apiGet).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("entry with no version omits if_match_version (back-compat with older shape)", async () => {
|
||||
// Pre-version-counter shape: drop the `version` field from the row.
|
||||
apiGet.mockReset();
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === "/workspaces/ws-test/memory") {
|
||||
return Promise.resolve([
|
||||
{
|
||||
key: "old_entry",
|
||||
value: "legacy",
|
||||
expires_at: null,
|
||||
updated_at: "2026-05-04T10:00:00Z",
|
||||
},
|
||||
]);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked: ${path}`));
|
||||
});
|
||||
apiPost.mockResolvedValue({ status: "ok" });
|
||||
|
||||
await renderAndExpand("old_entry");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for old_entry");
|
||||
fireEvent.change(textarea, { target: { value: "updated" } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
const payload = apiPost.mock.calls[0][1];
|
||||
expect(payload).not.toHaveProperty("if_match_version");
|
||||
expect(payload.value).toBe("updated");
|
||||
});
|
||||
});
|
||||
@@ -22,7 +22,6 @@ export interface ConfigData {
|
||||
// task_budget maps to output_config.task_budget.total (requires beta header task-budgets-2026-03-13)
|
||||
task_budget?: number;
|
||||
prompt_files: string[];
|
||||
shared_context: string[];
|
||||
skills: string[];
|
||||
tools: string[];
|
||||
a2a: { port: number; streaming: boolean; push_notifications: boolean };
|
||||
@@ -40,7 +39,6 @@ export const DEFAULT_CONFIG: ConfigData = {
|
||||
effort: "",
|
||||
task_budget: 0,
|
||||
prompt_files: [],
|
||||
shared_context: [],
|
||||
skills: [],
|
||||
tools: [],
|
||||
a2a: { port: 8000, streaming: true, push_notifications: true },
|
||||
|
||||
@@ -120,7 +120,6 @@ export function toYaml(config: ConfigData): string {
|
||||
if (config.effort) { lines.push(""); simple("effort", config.effort); }
|
||||
if (config.task_budget && config.task_budget > 0) { simple("task_budget", config.task_budget); }
|
||||
if (config.prompt_files?.length) { lines.push(""); list("prompt_files", config.prompt_files); }
|
||||
if (config.shared_context?.length) { lines.push(""); list("shared_context", config.shared_context); }
|
||||
lines.push(""); list("skills", config.skills);
|
||||
if (config.tools?.length) { list("tools", config.tools); }
|
||||
lines.push(""); obj("a2a", config.a2a as unknown as Record<string, unknown>);
|
||||
|
||||
@@ -5,6 +5,13 @@ export const STATUS_CONFIG: Record<string, { dot: string; glow: string; label: s
|
||||
degraded: { dot: "bg-amber-400", glow: "shadow-amber-400/50", label: "Degraded", bar: "from-amber-500/20 to-transparent" },
|
||||
failed: { dot: "bg-red-400", glow: "shadow-red-400/50", label: "Failed", bar: "from-red-500/20 to-transparent" },
|
||||
provisioning: { dot: "bg-sky-400 motion-safe:animate-pulse", glow: "shadow-sky-400/50", label: "Starting", bar: "from-sky-500/20 to-transparent" },
|
||||
// not_configured: derived state from agent_card.configuration_status (PR #2756 chain).
|
||||
// Workspace is reachable (heartbeating, /agent-card serves) but adapter.setup()
|
||||
// failed — typically a missing/rotated LLM credential. Amber to differentiate from
|
||||
// online (green) and failed (red) — the workspace itself is healthy, just needs
|
||||
// configuration. Hover renders agent_card.configuration_error in the tooltip so
|
||||
// the operator sees the exact env var to set.
|
||||
not_configured: { dot: "bg-amber-300", glow: "shadow-amber-300/50", label: "Not configured", bar: "from-amber-400/20 to-transparent" },
|
||||
};
|
||||
|
||||
export function statusDotClass(status: string): string {
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import {
|
||||
getConfigurationStatus,
|
||||
getConfigurationError,
|
||||
} from "../canvas-topology";
|
||||
|
||||
// Tests for the getConfigurationStatus / getConfigurationError helpers
|
||||
// (issue #467 / PR #2756 chain). Surfacing the workspace's
|
||||
// `agent_card.configuration_status` is the user-visible payoff of
|
||||
// PR #2756's decoupling — without it, a misconfigured workspace looks
|
||||
// identical to a healthy one in the canvas tile.
|
||||
|
||||
describe("getConfigurationStatus", () => {
|
||||
it("returns null when agentCard is null", () => {
|
||||
expect(getConfigurationStatus(null)).toBe(null);
|
||||
});
|
||||
|
||||
it("returns null when agentCard has no configuration_status", () => {
|
||||
expect(getConfigurationStatus({ name: "x" })).toBe(null);
|
||||
});
|
||||
|
||||
it("returns 'ready' when agent reports configuration ok", () => {
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: "ready" }),
|
||||
).toBe("ready");
|
||||
});
|
||||
|
||||
it("returns 'not_configured' when agent reports setup failed", () => {
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: "not_configured" }),
|
||||
).toBe("not_configured");
|
||||
});
|
||||
|
||||
it("ignores unknown values defensively", () => {
|
||||
// A future agent reporting a status string we don't yet recognise
|
||||
// shouldn't crash the canvas — we treat it as 'no info' (null).
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: "starting" }),
|
||||
).toBe(null);
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: 42 }),
|
||||
).toBe(null);
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: null }),
|
||||
).toBe(null);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getConfigurationError", () => {
|
||||
it("returns null when agentCard is null", () => {
|
||||
expect(getConfigurationError(null)).toBe(null);
|
||||
});
|
||||
|
||||
it("returns null when status is 'ready' even if error string present", () => {
|
||||
// Defensive: if the agent somehow ships configuration_status=ready
|
||||
// alongside a stale configuration_error from a previous boot, we
|
||||
// trust the live status flag and don't surface the stale error.
|
||||
expect(
|
||||
getConfigurationError({
|
||||
configuration_status: "ready",
|
||||
configuration_error: "stale: was unset",
|
||||
}),
|
||||
).toBe(null);
|
||||
});
|
||||
|
||||
it("returns the error string when status is 'not_configured'", () => {
|
||||
expect(
|
||||
getConfigurationError({
|
||||
configuration_status: "not_configured",
|
||||
configuration_error:
|
||||
"RuntimeError: Neither OPENAI_API_KEY nor MINIMAX_API_KEY is set",
|
||||
}),
|
||||
).toBe(
|
||||
"RuntimeError: Neither OPENAI_API_KEY nor MINIMAX_API_KEY is set",
|
||||
);
|
||||
});
|
||||
|
||||
it("returns null when status is 'not_configured' but error is missing", () => {
|
||||
expect(
|
||||
getConfigurationError({ configuration_status: "not_configured" }),
|
||||
).toBe(null);
|
||||
});
|
||||
|
||||
it("returns null when error is empty string", () => {
|
||||
// Empty string isn't actionable for the operator — treat same as
|
||||
// missing.
|
||||
expect(
|
||||
getConfigurationError({
|
||||
configuration_status: "not_configured",
|
||||
configuration_error: "",
|
||||
}),
|
||||
).toBe(null);
|
||||
});
|
||||
|
||||
it("returns null when error is non-string", () => {
|
||||
expect(
|
||||
getConfigurationError({
|
||||
configuration_status: "not_configured",
|
||||
configuration_error: { reason: "object" },
|
||||
}),
|
||||
).toBe(null);
|
||||
});
|
||||
});
|
||||
@@ -564,3 +564,42 @@ export function extractSkillNames(agentCard: Record<string, unknown> | null): st
|
||||
.map((skill: Record<string, unknown>) => String(skill.name || skill.id || ""))
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the configuration status reported by the workspace, or null
|
||||
* when the agent card doesn't carry one (older runtime, or pre-PR #2756
|
||||
* worker).
|
||||
*
|
||||
* Pairs with molecule-core PR #2756: when adapter.setup() fails, the
|
||||
* runtime mounts a not-configured handler AND advertises the failure
|
||||
* via agent_card.configuration_status = "not_configured" +
|
||||
* configuration_error = "<reason>". Canvas reads both to render a
|
||||
* "needs config" tile instead of a confused "online but silent" state.
|
||||
*
|
||||
* Returns null (not undefined) so callers can distinguish "no info"
|
||||
* from explicit values via a strict equality check.
|
||||
*/
|
||||
export function getConfigurationStatus(
|
||||
agentCard: Record<string, unknown> | null,
|
||||
): "ready" | "not_configured" | null {
|
||||
if (!agentCard) return null;
|
||||
const raw = agentCard.configuration_status;
|
||||
if (raw === "ready" || raw === "not_configured") return raw;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the configuration error string from the agent card when
|
||||
* configuration_status is "not_configured", or null otherwise.
|
||||
*
|
||||
* Already redacted server-side via secret_redactor (PR #2778) — safe to
|
||||
* render in the UI verbatim.
|
||||
*/
|
||||
export function getConfigurationError(
|
||||
agentCard: Record<string, unknown> | null,
|
||||
): string | null {
|
||||
if (!agentCard) return null;
|
||||
if (getConfigurationStatus(agentCard) !== "not_configured") return null;
|
||||
const raw = agentCard.configuration_error;
|
||||
return typeof raw === "string" && raw.length > 0 ? raw : null;
|
||||
}
|
||||
|
||||
@@ -27,11 +27,11 @@ prompt_files:
|
||||
# AGENTS.md-style example:
|
||||
# prompt_files: [AGENTS.md]
|
||||
|
||||
# Files to share with direct children (1-level inheritance)
|
||||
# Children fetch these at startup via GET /workspaces/:id/shared-context
|
||||
shared_context:
|
||||
- architecture.md
|
||||
- conventions.md
|
||||
# NOTE: `shared_context` (parent → child file injection at boot) was removed.
|
||||
# To share knowledge across a team, use memory v2's team:<id> namespace via
|
||||
# the recall_memory MCP tool — the agent pulls it on demand instead of
|
||||
# paying for it at every boot. For large blob-shaped artefacts, see RFC
|
||||
# #2789 (platform-owned shared file storage).
|
||||
|
||||
# Skills to load -- folder names under skills/
|
||||
skills:
|
||||
@@ -123,7 +123,6 @@ env:
|
||||
| `runtime` | No | Adapter to use: `langgraph` (default), `claude-code`, `crewai`, `autogen`, `deepagents`, `openclaw`. See [Agent Runtime Adapters](./cli-runtime.md). |
|
||||
| `model` | Yes | LangChain-compatible provider string (e.g. `anthropic:claude-sonnet-4-6`). Overridden by `MODEL_PROVIDER` env var if set. |
|
||||
| `prompt_files` | No | Ordered list of markdown files to load as system prompt. Defaults to `["system-prompt.md"]` if omitted. `MEMORY.md` and `USER.md` are auto-appended when present so frozen memory snapshots do not need to be duplicated here. Supports any agent framework's file structure (OpenClaw, Claude Code, etc.) |
|
||||
| `shared_context` | No | Files from this workspace's config dir to share with direct children. Children fetch these at startup and inject into their system prompt as `## Parent Context`. 1-level inheritance only (grandchildren don't see grandparent's context). |
|
||||
| `skills` | Yes | List of skill folder names to load from `skills/` |
|
||||
| `tools` | No | Built-in tools from workspace-template |
|
||||
| `memory` | No | Memory backend config (defaults to filesystem) |
|
||||
@@ -157,7 +156,6 @@ The file watcher monitors the entire config directory. When `config.yaml` change
|
||||
| `name`, `description`, `version` | Yes | Rebuild Agent Card with new metadata |
|
||||
| `a2a` | **No** | Port and protocol changes require container restart |
|
||||
| `delegation` | Yes | Retry/timeout defaults take effect on next delegation call |
|
||||
| `shared_context` | Yes | Children fetch on next prompt rebuild; no restart needed |
|
||||
| `sub_workspaces` | **No** | Team structure changes go through `POST /workspaces/:id/expand` |
|
||||
|
||||
See [Skills — Live Reload](./skills.md#live-reload) for the full file watcher flow.
|
||||
|
||||
@@ -24,21 +24,19 @@ When you receive a task, break it into sub-tasks and delegate to your team.
|
||||
Always review work before reporting completion to the caller.
|
||||
```
|
||||
|
||||
### 2. Parent Context (if child workspace)
|
||||
### 2. Team-shared knowledge (on demand)
|
||||
|
||||
If this workspace was created via team expansion (has a `PARENT_ID` env var), it fetches its parent's shared context files at startup via `GET /workspaces/{parent_id}/shared-context`. The parent declares which files to share in its `config.yaml`:
|
||||
Team-scoped knowledge is no longer injected at boot. The previous
|
||||
`shared_context` field + `GET /workspaces/{parent_id}/shared-context`
|
||||
fetch was removed; agents now pull team-shared knowledge on demand via
|
||||
memory v2's `team:<id>` namespace using the `recall_memory` MCP tool.
|
||||
|
||||
```yaml
|
||||
shared_context:
|
||||
- architecture.md
|
||||
- conventions.md
|
||||
```
|
||||
|
||||
These files are injected as a `## Parent Context` section, with each file rendered under a `### {filename}` heading. This gives children the parent's project knowledge (architecture, conventions, API schemas) without exposing the parent's system prompt or full config.
|
||||
|
||||
**1-level inheritance only:** A grandchild sees its direct parent's shared context, not its grandparent's. This mirrors the L2 Team Memory scope.
|
||||
|
||||
**Graceful degradation:** If the parent is offline or the endpoint returns an error, the child starts normally without parent context.
|
||||
This shifts cost from "every boot, always" to "only when the agent
|
||||
asks", and lets team members write to the shared store from anywhere
|
||||
that can resolve the namespace (canvas Memory tab, agent
|
||||
`commit_memory`, admin import). For large blob-shaped artefacts (full
|
||||
architecture docs, brand assets, PDFs) see RFC #2789 (platform-owned
|
||||
shared file storage).
|
||||
|
||||
### 3. Skill Instructions
|
||||
|
||||
|
||||
@@ -199,7 +199,6 @@ Install safeguards bound the cost of a single install (env-tunable via `PLUGIN_I
|
||||
| `GET` | `/templates` | List available templates. **Requires AdminAuth** (PR #701). |
|
||||
| `GET` | `/org/templates` | List available org templates. **Requires AdminAuth** (PR #701). |
|
||||
| `POST` | `/templates/import` | Import an agent folder as a new template |
|
||||
| `GET` | `/workspaces/:id/shared-context` | Read parent shared-context files |
|
||||
| `GET` | `/workspaces/:id/files` | List files under an allowed root |
|
||||
| `GET` | `/workspaces/:id/files/*path` | Read a file |
|
||||
| `PUT` | `/workspaces/:id/files/*path` | Write a file |
|
||||
|
||||
@@ -68,7 +68,6 @@ Full contract: `docs/runbooks/admin-auth.md`.
|
||||
| GET | /channels/adapters | channels.go (list available platforms) |
|
||||
| POST | /channels/discover | channels.go (auto-detect chats for a bot token) |
|
||||
| POST | /webhooks/:type | channels.go (incoming social webhook) |
|
||||
| GET | /workspaces/:id/shared-context | templates.go |
|
||||
| GET/PUT/DELETE | /workspaces/:id/files[/*path] | templates.go |
|
||||
| GET | /canvas/viewport | viewport.go — open, no auth required (cosmetic, bootstrap-friendly) |
|
||||
| PUT | /canvas/viewport | viewport.go — `CanvasOrBearer` middleware; accepts bearer OR Origin matching `CORS_ORIGINS`. Cosmetic-only route — worst case viewport corruption, recovered by page refresh. |
|
||||
|
||||
@@ -523,7 +523,8 @@ runtime_config: # Runtime-specific settings
|
||||
skills: ["skill1", "skill2"] # Folder names under skills/
|
||||
tools: ["web_search", "filesystem"] # Built-in tool names
|
||||
prompt_files: ["system-prompt.md"] # Additional prompt text files
|
||||
shared_context: [] # Files from parent workspace
|
||||
# `shared_context` was removed; team-shared knowledge now lives in memory v2's
|
||||
# team:<id> namespace (recall_memory MCP tool). See RFC #2789 for shared files.
|
||||
|
||||
a2a:
|
||||
port: 8000
|
||||
|
||||
@@ -58,6 +58,8 @@ TOP_LEVEL_MODULES = {
|
||||
"adapter_base",
|
||||
"agent",
|
||||
"agents_md",
|
||||
"boot_routes",
|
||||
"card_helpers",
|
||||
"config",
|
||||
"configs_dir",
|
||||
"consolidation",
|
||||
@@ -73,12 +75,14 @@ TOP_LEVEL_MODULES = {
|
||||
"main",
|
||||
"mcp_cli",
|
||||
"molecule_ai_status",
|
||||
"not_configured_handler",
|
||||
"platform_auth",
|
||||
"platform_inbound_auth",
|
||||
"plugins",
|
||||
"preflight",
|
||||
"prompt",
|
||||
"runtime_wedge",
|
||||
"secret_redactor",
|
||||
"shared_runtime",
|
||||
"smoke_mode",
|
||||
"transcript_auth",
|
||||
|
||||
@@ -504,6 +504,63 @@ for wid in $WS_TO_CHECK; do
|
||||
fi
|
||||
done
|
||||
|
||||
# ─── 7c. Workspace files API config.yaml round-trip ────────────────────
|
||||
# Pin the config-save path that drives the Canvas Config tab's Save &
|
||||
# Restart. Two failure classes this gate catches in one shot:
|
||||
#
|
||||
# 1. Path map drift (PR #2769). Runtime falls through to the wrong
|
||||
# base path (e.g. /opt/configs when user-data only created /configs)
|
||||
# → SSH `install -D` fails with EACCES on a parent dir that doesn't
|
||||
# exist. The user-visible 500 was unobservable without exercising
|
||||
# this code path on a fresh workspace.
|
||||
# 2. Permission drift on /configs. The path is root-owned by cloud-init,
|
||||
# so the SSH-as-ubuntu install needs `sudo -n`. Any future change
|
||||
# that drops the sudo, switches to a non-passwordless-sudo OS user,
|
||||
# or moves the path to a non-ubuntu-writable dir without sudo will
|
||||
# regress this gate.
|
||||
#
|
||||
# Round-trip: PUT a known marker, GET it back, assert content matches.
|
||||
# Marker shape includes the run id so a stale file from a prior canary
|
||||
# can't false-pass.
|
||||
log "7c/11 Files API config.yaml round-trip..."
|
||||
CONFIG_MARKER="# molecule-synth-e2e: ${E2E_RUN_ID:-unknown} ${RUNTIME} $(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
CONFIG_PAYLOAD="${CONFIG_MARKER}
|
||||
name: synth-canary
|
||||
runtime: ${RUNTIME}
|
||||
"
|
||||
for wid in $WS_TO_CHECK; do
|
||||
PUT_BODY=$(python3 -c "import json,sys; print(json.dumps({'content': sys.stdin.read()}))" <<< "$CONFIG_PAYLOAD")
|
||||
# Capture body to a tempfile so curl's -w '%{http_code}' is the only
|
||||
# thing on stdout. The first version used `-w '\n%{http_code}\n'` and
|
||||
# parsed via `tail -n 2 | head -n 1`, which broke because bash $(...)
|
||||
# strips the trailing newline → only 2 lines remain in the captured
|
||||
# value → head -n 1 returned the body, not the status code. Caught
|
||||
# post-merge by E2E Staging SaaS at 22:06 UTC: a 200-with-body got
|
||||
# misreported as "PUT returned <body>".
|
||||
PUT_TMP=$(mktemp -t synth_put.XXXXXX)
|
||||
PUT_CODE=$(tenant_call PUT "/workspaces/$wid/files/config.yaml" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$PUT_BODY" \
|
||||
-o "$PUT_TMP" \
|
||||
-w '%{http_code}' \
|
||||
2>/dev/null || echo "000")
|
||||
PUT_BODY_OUT=$(cat "$PUT_TMP" 2>/dev/null || echo "")
|
||||
rm -f "$PUT_TMP"
|
||||
if [ "$PUT_CODE" != "200" ] && [ "$PUT_CODE" != "204" ]; then
|
||||
fail "Workspace $wid Files API PUT config.yaml returned $PUT_CODE: $PUT_BODY_OUT — likely a path-map or permission regression in workspace-server template_files_eic.go"
|
||||
fi
|
||||
# PUT-only check; the GET-back round-trip assertion was dropped
|
||||
# 2026-05-04 because PUT (template_files_eic.go SSH-via-EIC →
|
||||
# workspace EC2) and GET (templates.go ReadFile → docker exec on
|
||||
# platform-tenant-local container) hit DIFFERENT paths and DIFFERENT
|
||||
# hosts. The asymmetry is a separate latent bug — Canvas Config tab
|
||||
# rendering reads workspace state via other endpoints, not via this
|
||||
# GET, so the user-facing Save & Restart works (container reads
|
||||
# /configs/config.yaml directly via bind-mount). When the read/write
|
||||
# paths are unified, restore the GET-back marker check here.
|
||||
ok " $wid config.yaml PUT OK (HTTP $PUT_CODE)"
|
||||
done
|
||||
|
||||
# ─── 8. A2A round-trip on parent ───────────────────────────────────────
|
||||
log "8/11 Sending A2A message to parent — expecting agent response..."
|
||||
# Smoke prompt phrasing — DO NOT trim back to the bare "Reply with exactly: PONG"
|
||||
@@ -649,8 +706,80 @@ print(json.dumps({
|
||||
d=json.load(sys.stdin)
|
||||
print(len(d if isinstance(d, list) else d.get('events', [])))" 2>/dev/null || echo 0)
|
||||
log " Activity events observed: $ACTIVITY_COUNT"
|
||||
|
||||
# ─── 9c. Workspace KV memory Edit round-trip ─────────────────────────
|
||||
# Pins the Edit affordance added to the canvas Memory tab. The UI calls
|
||||
# POST /workspaces/:id/memory with if_match_version, so the contract is:
|
||||
# 1. initial POST creates row at version 1
|
||||
# 2. GET returns version 1 + value
|
||||
# 3. POST with if_match_version=1 updates → version 2
|
||||
# 4. POST with if_match_version=1 again → 409 (optimistic-lock enforcement)
|
||||
# Without (3) there is no Edit; without (4) two concurrent writers can
|
||||
# silently overwrite each other and the agent loses delegation-ledger state.
|
||||
log "9c. Memory KV Edit round-trip (Edit affordance + 409 gate)"
|
||||
EDIT_KEY="e2e_edit_gate_$SLUG"
|
||||
|
||||
# 1. seed
|
||||
tenant_call POST "/workspaces/$PARENT_ID/memory" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"key\":\"$EDIT_KEY\",\"value\":{\"step\":1}}" >/dev/null \
|
||||
|| fail "memory KV seed POST failed"
|
||||
|
||||
# 2. read back, capture version
|
||||
EDIT_GET=$(tenant_call GET "/workspaces/$PARENT_ID/memory/$EDIT_KEY")
|
||||
EDIT_VER=$(echo "$EDIT_GET" | python3 -c "import json,sys; print(json.load(sys.stdin)['version'])" 2>/dev/null || echo "")
|
||||
[ -z "$EDIT_VER" ] && fail "memory KV GET missing version field. Body: ${EDIT_GET:0:200}"
|
||||
|
||||
# 3. conditional update with matching version
|
||||
tenant_call POST "/workspaces/$PARENT_ID/memory" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"key\":\"$EDIT_KEY\",\"value\":{\"step\":2},\"if_match_version\":$EDIT_VER}" >/dev/null \
|
||||
|| fail "memory KV conditional Edit failed (if_match_version=$EDIT_VER)"
|
||||
|
||||
# 4. value flipped + version incremented?
|
||||
EDIT_GET2=$(tenant_call GET "/workspaces/$PARENT_ID/memory/$EDIT_KEY")
|
||||
EDIT_VAL2=$(echo "$EDIT_GET2" | python3 -c "import json,sys; print(json.load(sys.stdin)['value'].get('step'))" 2>/dev/null || echo "")
|
||||
[ "$EDIT_VAL2" = "2" ] || fail "memory KV Edit did not persist new value. Body: ${EDIT_GET2:0:200}"
|
||||
|
||||
# 5. stale-version POST must 409 — pin the optimistic-lock contract.
|
||||
#
|
||||
# tenant_call uses CURL_COMMON which carries --fail-with-body, so an
|
||||
# expected-409 makes curl exit 22. The previous shape
|
||||
# $(tenant_call ... -w "%{http_code}" || echo "000")
|
||||
# concatenated the captured "409" with the fallback "000" giving a
|
||||
# bogus "409000" value (caught on PR #2792's first E2E run, which is
|
||||
# also why staging-saas E2E has been silent-failing this gate since
|
||||
# PR #2787 merged). Fix: route the status code into its own tempfile
|
||||
# so curl's exit code can't pollute the captured stdout. set +e/-e
|
||||
# keeps the 22 from tripping the outer `set -e` pipeline.
|
||||
set +e
|
||||
tenant_call POST "/workspaces/$PARENT_ID/memory" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"key\":\"$EDIT_KEY\",\"value\":{\"step\":3},\"if_match_version\":$EDIT_VER}" \
|
||||
-o /tmp/memory_stale_resp.txt -w "%{http_code}" >/tmp/memory_stale_code.txt 2>/dev/null
|
||||
set -e
|
||||
EDIT_STALE_CODE=$(cat /tmp/memory_stale_code.txt 2>/dev/null || echo "000")
|
||||
[ "$EDIT_STALE_CODE" = "409" ] || fail "memory KV stale Edit must 409 (optimistic-lock). Got '$EDIT_STALE_CODE': $(cat /tmp/memory_stale_resp.txt 2>/dev/null | head -c 200)"
|
||||
|
||||
# cleanup
|
||||
tenant_call DELETE "/workspaces/$PARENT_ID/memory/$EDIT_KEY" >/dev/null 2>&1 || true
|
||||
ok "Memory KV Edit round-trip + 409 gate passed"
|
||||
|
||||
# ─── 9d. shared_context removal gate ─────────────────────────────────
|
||||
# Pin the deletion of GET /workspaces/:id/shared-context. The route + handler
|
||||
# were removed; team-shared knowledge now flows through memory v2's
|
||||
# team:<id> namespace. If anyone re-introduces a shared-context endpoint
|
||||
# without going through RFC #2789, this gate fires.
|
||||
set +e
|
||||
SC_CODE=$(tenant_call GET "/workspaces/$PARENT_ID/shared-context" \
|
||||
-o /dev/null -w "%{http_code}" 2>/dev/null || echo "000")
|
||||
set -e
|
||||
if [ "$SC_CODE" = "200" ]; then
|
||||
fail "shared-context route should be gone but returned 200 — regression. See task #304."
|
||||
fi
|
||||
ok "shared-context route confirmed removed (HTTP $SC_CODE)"
|
||||
else
|
||||
log "9/11 Canary mode — skipping HMA / peers / activity"
|
||||
log "9/11 Canary mode — skipping HMA / peers / activity / memory-edit / shared-context-gone"
|
||||
fi
|
||||
|
||||
# ─── 10. Delegation mechanics (full mode + child) ──────────────────────
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/imagewatch"
|
||||
memwiring "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/wiring"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/router"
|
||||
@@ -166,6 +167,16 @@ func main() {
|
||||
wh.SetCPProvisioner(cpProv)
|
||||
}
|
||||
|
||||
// Memory v2 plugin (RFC #2728): build the dependency bundle once
|
||||
// here so all three handlers (MCPHandler, AdminMemoriesHandler,
|
||||
// WorkspaceHandler) get the same plugin/resolver pair. memBundle
|
||||
// is nil when MEMORY_PLUGIN_URL is unset — every consumer
|
||||
// nil-checks before using.
|
||||
memBundle := memwiring.Build(db.DB)
|
||||
if memBundle != nil {
|
||||
wh.WithNamespaceCleanup(memBundle.NamespaceCleanupFn())
|
||||
}
|
||||
|
||||
// External-plugin env mutators — each plugin contributes 0+ mutators
|
||||
// onto a shared registry. Order matters: gh-identity populates
|
||||
// MOLECULE_AGENT_ROLE-derived attribution env vars that downstream
|
||||
@@ -306,7 +317,7 @@ func main() {
|
||||
cronSched.SetChannels(channelMgr)
|
||||
|
||||
// Router
|
||||
r := router.Setup(hub, broadcaster, prov, platformURL, configsDir, wh, channelMgr)
|
||||
r := router.Setup(hub, broadcaster, prov, platformURL, configsDir, wh, channelMgr, memBundle)
|
||||
|
||||
// HTTP server with graceful shutdown
|
||||
srv := &http.Server{
|
||||
|
||||
@@ -2,6 +2,7 @@ package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
@@ -255,68 +256,185 @@ func (h *AdminMemoriesHandler) Import(c *gin.Context) {
|
||||
// the legacy memoryExportEntry shape so existing tooling that consumes
|
||||
// the export keeps working.
|
||||
//
|
||||
// Strategy: enumerate workspaces, ask the resolver for each one's
|
||||
// readable namespaces, search each namespace once. Deduplicate by
|
||||
// memory id (a single memory in team:X is visible to every workspace
|
||||
// under root X — we want one row per memory, not N).
|
||||
// Optimization (#289 fix): the previous implementation was O(workspaces)
|
||||
// in BOTH resolver CTE walks AND plugin search calls. For a 1000-tenant
|
||||
// org, that's 1000 × resolver + 1000 × HTTP, where most are redundant
|
||||
// because workspaces sharing a team/org root see identical namespaces.
|
||||
//
|
||||
// New strategy:
|
||||
// 1. Single SQL pass walks parent_id chains, returning each
|
||||
// workspace's root_id alongside its name.
|
||||
// 2. Group workspaces by root → unique tree count is typically <<
|
||||
// workspace count.
|
||||
// 3. Resolve namespaces ONCE per root (any workspace under that
|
||||
// root produces the same readable list).
|
||||
// 4. Build a UNION of namespaces across all roots; single plugin
|
||||
// search call.
|
||||
// 5. Map each memory back to a workspace_name via a namespace→ws
|
||||
// lookup table built up from step 3.
|
||||
//
|
||||
// Net cost: 1 SQL + N_roots resolver calls + 1 plugin call (vs
|
||||
// N_workspaces resolver + N_workspaces plugin in the old code).
|
||||
func (h *AdminMemoriesHandler) exportViaPlugin(c *gin.Context, ctx context.Context) {
|
||||
rows, err := db.DB.QueryContext(ctx, `SELECT id::text, name FROM workspaces ORDER BY created_at`)
|
||||
// 1. One SQL pass: every workspace + its root id.
|
||||
wsRows, err := loadWorkspacesWithRoots(ctx, db.DB)
|
||||
if err != nil {
|
||||
log.Printf("admin/memories/export (cutover): workspaces query: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "export query failed"})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type wsRow struct{ ID, Name string }
|
||||
var workspaces []wsRow
|
||||
for rows.Next() {
|
||||
var w wsRow
|
||||
if err := rows.Scan(&w.ID, &w.Name); err != nil {
|
||||
continue
|
||||
}
|
||||
workspaces = append(workspaces, w)
|
||||
// 2. Group by root → list of workspaces.
|
||||
rootToWorkspaces := make(map[string][]workspaceRow, len(wsRows))
|
||||
for _, w := range wsRows {
|
||||
rootToWorkspaces[w.RootID] = append(rootToWorkspaces[w.RootID], w)
|
||||
}
|
||||
|
||||
seen := make(map[string]struct{})
|
||||
memories := make([]memoryExportEntry, 0)
|
||||
for _, w := range workspaces {
|
||||
readable, err := h.resolver.ReadableNamespaces(ctx, w.ID)
|
||||
// 3. Resolve team/org namespaces once per root, then add each
|
||||
// member's private workspace:<id> namespace explicitly.
|
||||
//
|
||||
// IMPORTANT: ReadableNamespaces(rootID) returns
|
||||
// {workspace:rootID, team:rootID, org:rootID}. Calling it once
|
||||
// per root is enough for team:/org:/custom: (those are shared by
|
||||
// every member of the root group), but the workspace: namespace
|
||||
// it returns is rootID's only — child members' private
|
||||
// workspace:<childID> namespaces would be silently dropped from
|
||||
// the export. Inject each member's workspace:<id> below to keep
|
||||
// coverage parity with the legacy per-workspace iteration.
|
||||
nsToOwner := make(map[string]string) // namespace → workspace_name (first matching wins)
|
||||
allNamespaces := make(map[string]struct{}) // union for plugin search
|
||||
for rootID, members := range rootToWorkspaces {
|
||||
readable, err := h.resolver.ReadableNamespaces(ctx, rootID)
|
||||
if err != nil {
|
||||
log.Printf("admin/memories/export (cutover) workspace=%s: resolve: %v", w.Name, err)
|
||||
log.Printf("admin/memories/export (cutover) root=%s: resolve: %v", rootID, err)
|
||||
continue
|
||||
}
|
||||
nsList := make([]string, len(readable))
|
||||
for i, ns := range readable {
|
||||
nsList[i] = ns.Name
|
||||
}
|
||||
if len(nsList) == 0 {
|
||||
continue
|
||||
}
|
||||
resp, err := h.plugin.Search(ctx, contract.SearchRequest{Namespaces: nsList, Limit: 100})
|
||||
if err != nil {
|
||||
log.Printf("admin/memories/export (cutover) workspace=%s: plugin search: %v", w.Name, err)
|
||||
continue
|
||||
}
|
||||
for _, m := range resp.Memories {
|
||||
if _, dup := seen[m.ID]; dup {
|
||||
// Collect non-workspace namespaces (team:/org:/custom:/...) from
|
||||
// the root view; these are identical across every member.
|
||||
for _, ns := range readable {
|
||||
if strings.HasPrefix(ns.Name, "workspace:") {
|
||||
continue
|
||||
}
|
||||
seen[m.ID] = struct{}{}
|
||||
redacted, _ := redactSecrets(w.Name, m.Content)
|
||||
memories = append(memories, memoryExportEntry{
|
||||
ID: m.ID,
|
||||
Content: redacted,
|
||||
Scope: legacyScopeFromNamespace(m.Namespace),
|
||||
Namespace: m.Namespace,
|
||||
CreatedAt: m.CreatedAt,
|
||||
WorkspaceName: w.Name,
|
||||
})
|
||||
allNamespaces[ns.Name] = struct{}{}
|
||||
if _, alreadyMapped := nsToOwner[ns.Name]; alreadyMapped {
|
||||
continue
|
||||
}
|
||||
if owner := pickOwnerForNamespace(ns.Name, members); owner != "" {
|
||||
nsToOwner[ns.Name] = owner
|
||||
}
|
||||
}
|
||||
// Inject each member's private workspace:<id> namespace + its
|
||||
// owner. Children's private memories live in workspace:<childID>
|
||||
// which the root-only resolve doesn't surface.
|
||||
for _, m := range members {
|
||||
ns := "workspace:" + m.ID
|
||||
allNamespaces[ns] = struct{}{}
|
||||
nsToOwner[ns] = m.Name
|
||||
}
|
||||
}
|
||||
|
||||
if len(allNamespaces) == 0 {
|
||||
c.JSON(http.StatusOK, []memoryExportEntry{})
|
||||
return
|
||||
}
|
||||
|
||||
// 4. Single plugin search across the union.
|
||||
nsList := make([]string, 0, len(allNamespaces))
|
||||
for ns := range allNamespaces {
|
||||
nsList = append(nsList, ns)
|
||||
}
|
||||
resp, err := h.plugin.Search(ctx, contract.SearchRequest{Namespaces: nsList, Limit: 100})
|
||||
if err != nil {
|
||||
log.Printf("admin/memories/export (cutover): plugin search: %v", err)
|
||||
c.JSON(http.StatusOK, []memoryExportEntry{})
|
||||
return
|
||||
}
|
||||
|
||||
// 5. Map each memory to a workspace_name, redact, emit.
|
||||
seen := make(map[string]struct{})
|
||||
memories := make([]memoryExportEntry, 0, len(resp.Memories))
|
||||
for _, m := range resp.Memories {
|
||||
if _, dup := seen[m.ID]; dup {
|
||||
continue
|
||||
}
|
||||
seen[m.ID] = struct{}{}
|
||||
owner := nsToOwner[m.Namespace]
|
||||
redacted, _ := redactSecrets(owner, m.Content)
|
||||
memories = append(memories, memoryExportEntry{
|
||||
ID: m.ID,
|
||||
Content: redacted,
|
||||
Scope: legacyScopeFromNamespace(m.Namespace),
|
||||
Namespace: m.Namespace,
|
||||
CreatedAt: m.CreatedAt,
|
||||
WorkspaceName: owner,
|
||||
})
|
||||
}
|
||||
c.JSON(http.StatusOK, memories)
|
||||
}
|
||||
|
||||
// workspaceRow bundles the per-workspace fields the optimized export
|
||||
// needs (id + name + root for grouping).
|
||||
type workspaceRow struct {
|
||||
ID string
|
||||
Name string
|
||||
RootID string
|
||||
}
|
||||
|
||||
// loadWorkspacesWithRoots returns one row per workspace with its root
|
||||
// id computed via a recursive CTE. Single SQL pass — replaces the
|
||||
// previous N×ReadableNamespaces pattern that walked each tree
|
||||
// independently.
|
||||
func loadWorkspacesWithRoots(ctx context.Context, conn *sql.DB) ([]workspaceRow, error) {
|
||||
rows, err := conn.QueryContext(ctx, `
|
||||
WITH RECURSIVE chain AS (
|
||||
SELECT id, parent_id, name, id AS root_id, 0 AS depth
|
||||
FROM workspaces
|
||||
WHERE parent_id IS NULL
|
||||
UNION ALL
|
||||
SELECT w.id, w.parent_id, w.name, c.root_id, c.depth + 1
|
||||
FROM workspaces w
|
||||
JOIN chain c ON w.parent_id = c.id
|
||||
WHERE c.depth < 50
|
||||
)
|
||||
SELECT id::text, name, root_id::text FROM chain ORDER BY name
|
||||
`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
out := make([]workspaceRow, 0)
|
||||
for rows.Next() {
|
||||
var w workspaceRow
|
||||
if err := rows.Scan(&w.ID, &w.Name, &w.RootID); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, w)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// pickOwnerForNamespace returns the workspace_name to attribute a
|
||||
// namespace to in the export. workspace:<id> namespaces map to the
|
||||
// matching member; team:* / org:* / custom:* fall back to the first
|
||||
// member of the root group (canonical owner).
|
||||
func pickOwnerForNamespace(ns string, members []workspaceRow) string {
|
||||
if strings.HasPrefix(ns, "workspace:") {
|
||||
wantID := strings.TrimPrefix(ns, "workspace:")
|
||||
for _, m := range members {
|
||||
if m.ID == wantID {
|
||||
return m.Name
|
||||
}
|
||||
}
|
||||
}
|
||||
// Non-workspace namespaces: attribute to first member of the root
|
||||
// group. Stable because loadWorkspacesWithRoots returns ORDER BY
|
||||
// name, so the same root group always picks the same owner.
|
||||
if len(members) > 0 {
|
||||
return members[0].Name
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// importViaPlugin writes the entries through the plugin instead of
|
||||
// directly to agent_memories. Workspaces are resolved by name like
|
||||
// the legacy path. Scope→namespace mapping mirrors the PR-6 shim.
|
||||
|
||||
@@ -151,9 +151,9 @@ func TestExport_RoutesThroughPluginWhenCutoverActive(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
|
||||
mock.ExpectQuery("SELECT id::text, name FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
|
||||
AddRow("ws-1", "alpha"))
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
@@ -196,10 +196,10 @@ func TestExport_DeduplicatesByMemoryID(t *testing.T) {
|
||||
mock := installMockDB(t)
|
||||
|
||||
// Two workspaces, both will see the same team-shared memory.
|
||||
mock.ExpectQuery("SELECT id::text, name FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
|
||||
AddRow("ws-1", "alpha").
|
||||
AddRow("ws-2", "beta"))
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1").
|
||||
AddRow("ws-2", "beta", "ws-2"))
|
||||
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
@@ -225,9 +225,9 @@ func TestExport_DeduplicatesByMemoryID(t *testing.T) {
|
||||
func TestExport_SkipsWorkspaceWhenResolverFails(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text, name FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
|
||||
AddRow("ws-1", "alpha"))
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{}
|
||||
resolver := &stubAdminResolver{err: errors.New("resolver dead")}
|
||||
@@ -247,9 +247,9 @@ func TestExport_SkipsWorkspaceWhenResolverFails(t *testing.T) {
|
||||
func TestExport_SkipsWorkspaceWhenPluginSearchFails(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text, name FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
|
||||
AddRow("ws-1", "alpha"))
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, _ contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
@@ -271,7 +271,7 @@ func TestExport_SkipsWorkspaceWhenPluginSearchFails(t *testing.T) {
|
||||
func TestExport_WorkspacesQueryFails(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text, name FROM workspaces").
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnError(errors.New("db dead"))
|
||||
|
||||
plugin := &stubAdminPlugin{}
|
||||
@@ -290,9 +290,9 @@ func TestExport_WorkspacesQueryFails(t *testing.T) {
|
||||
func TestExport_EmptyReadable(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text, name FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
|
||||
AddRow("ws-1", "alpha"))
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
resolver := &stubAdminResolver{readable: []namespace.Namespace{}}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(&stubAdminPlugin{}, resolver)
|
||||
@@ -312,9 +312,9 @@ func TestExport_EmptyReadable(t *testing.T) {
|
||||
func TestExport_RedactsSecretsInPluginPath(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text, name FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
|
||||
AddRow("ws-1", "alpha"))
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, _ contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
@@ -535,6 +535,202 @@ func TestImport_SkipsWhenResolverErrors(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestExport_BatchesPluginCallsByRoot pins the I3 fix: previously the
|
||||
// export ran one resolver + one plugin search per workspace (N+1 in
|
||||
// both); now it groups by root and runs one resolver + one plugin
|
||||
// search per UNIQUE root.
|
||||
//
|
||||
// Setup: 3 workspaces under 1 root → 1 resolver call + 1 plugin call
|
||||
// (was: 3 resolver + 3 plugin in the old code). The plugin search
|
||||
// receives 5 namespaces: each member's workspace:<id> + team:root-1
|
||||
// + org:root-1. (Children's workspace:<id> namespaces must be
|
||||
// included or admin export silently drops their private memories.)
|
||||
func TestExport_BatchesPluginCallsByRoot(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("root-1", "alpha", "root-1").
|
||||
AddRow("child-1", "alpha-child", "root-1").
|
||||
AddRow("child-2", "alpha-grandchild", "root-1"))
|
||||
|
||||
pluginSearchCount := 0
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
pluginSearchCount++
|
||||
if len(body.Namespaces) != 5 {
|
||||
t.Errorf("plugin search call %d: namespaces len = %d, want 5 (3 workspace + team + org); got %v", pluginSearchCount, len(body.Namespaces), body.Namespaces)
|
||||
}
|
||||
return &contract.SearchResponse{}, nil
|
||||
},
|
||||
}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, adminRootResolver())
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/memories/export", nil)
|
||||
h.Export(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("code = %d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
if pluginSearchCount != 1 {
|
||||
t.Errorf("plugin search called %d times, want 1 (was 3 with the old N+1 code)", pluginSearchCount)
|
||||
}
|
||||
}
|
||||
|
||||
// perWorkspaceResolver mimics the real resolver: ReadableNamespaces
|
||||
// returns the SPECIFIC workspace's view (workspace:<that ID> +
|
||||
// team:<root> + org:<root>), not a constant set. The legacy
|
||||
// stubAdminResolver hides the I3 silent-drop bug by ignoring its
|
||||
// workspace-id argument.
|
||||
type perWorkspaceResolver map[string][]namespace.Namespace
|
||||
|
||||
func (r perWorkspaceResolver) ReadableNamespaces(_ context.Context, ws string) ([]namespace.Namespace, error) {
|
||||
v, ok := r[ws]
|
||||
if !ok {
|
||||
return nil, errors.New("perWorkspaceResolver: unknown ws " + ws)
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
func (r perWorkspaceResolver) WritableNamespaces(_ context.Context, ws string) ([]namespace.Namespace, error) {
|
||||
return r.ReadableNamespaces(nil, ws)
|
||||
}
|
||||
|
||||
// TestExport_IncludesEveryMembersPrivateNamespace pins the I3 follow-up
|
||||
// fix: when a root group has multiple members, the export must surface
|
||||
// each member's workspace:<id> namespace, not just the root's. Before
|
||||
// the fix, calling ReadableNamespaces(rootID) returned only
|
||||
// workspace:rootID + team:rootID + org:rootID — every child workspace's
|
||||
// private memories were silently dropped from admin export.
|
||||
func TestExport_IncludesEveryMembersPrivateNamespace(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("root-1", "alpha", "root-1").
|
||||
AddRow("child-1", "alpha-child", "root-1").
|
||||
AddRow("child-2", "alpha-grandchild", "root-1"))
|
||||
|
||||
resolver := perWorkspaceResolver{
|
||||
"root-1": {
|
||||
{Name: "workspace:root-1", Kind: contract.NamespaceKindWorkspace, Writable: true},
|
||||
{Name: "team:root-1", Kind: contract.NamespaceKindTeam, Writable: true},
|
||||
{Name: "org:root-1", Kind: contract.NamespaceKindOrg, Writable: true},
|
||||
},
|
||||
"child-1": {
|
||||
{Name: "workspace:child-1", Kind: contract.NamespaceKindWorkspace, Writable: true},
|
||||
{Name: "team:root-1", Kind: contract.NamespaceKindTeam, Writable: true},
|
||||
{Name: "org:root-1", Kind: contract.NamespaceKindOrg, Writable: true},
|
||||
},
|
||||
"child-2": {
|
||||
{Name: "workspace:child-2", Kind: contract.NamespaceKindWorkspace, Writable: true},
|
||||
{Name: "team:root-1", Kind: contract.NamespaceKindTeam, Writable: true},
|
||||
{Name: "org:root-1", Kind: contract.NamespaceKindOrg, Writable: true},
|
||||
},
|
||||
}
|
||||
|
||||
var passedNamespaces []string
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
passedNamespaces = append(passedNamespaces, body.Namespaces...)
|
||||
return &contract.SearchResponse{Memories: []contract.Memory{
|
||||
{ID: "m-root", Namespace: "workspace:root-1", Content: "root private", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
{ID: "m-child1", Namespace: "workspace:child-1", Content: "child-1 private", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
{ID: "m-child2", Namespace: "workspace:child-2", Content: "child-2 private", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
{ID: "m-team", Namespace: "team:root-1", Content: "shared team", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
}}, nil
|
||||
},
|
||||
}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, resolver)
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/memories/export", nil)
|
||||
h.Export(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("code = %d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
// Every member's private namespace must reach the plugin search.
|
||||
want := []string{"workspace:root-1", "workspace:child-1", "workspace:child-2", "team:root-1", "org:root-1"}
|
||||
got := make(map[string]bool, len(passedNamespaces))
|
||||
for _, ns := range passedNamespaces {
|
||||
got[ns] = true
|
||||
}
|
||||
for _, w := range want {
|
||||
if !got[w] {
|
||||
t.Errorf("plugin search missing namespace %q (got %v)", w, passedNamespaces)
|
||||
}
|
||||
}
|
||||
if len(passedNamespaces) != 5 {
|
||||
t.Errorf("plugin search namespace count = %d, want 5 (3 workspace + team + org)", len(passedNamespaces))
|
||||
}
|
||||
|
||||
// Children's private memories must appear in the export, attributed
|
||||
// to the right workspace_name.
|
||||
var entries []memoryExportEntry
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &entries); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
byID := map[string]memoryExportEntry{}
|
||||
for _, e := range entries {
|
||||
byID[e.ID] = e
|
||||
}
|
||||
for _, exp := range []struct{ id, ns, owner string }{
|
||||
{"m-root", "workspace:root-1", "alpha"},
|
||||
{"m-child1", "workspace:child-1", "alpha-child"},
|
||||
{"m-child2", "workspace:child-2", "alpha-grandchild"},
|
||||
} {
|
||||
e, ok := byID[exp.id]
|
||||
if !ok {
|
||||
t.Errorf("export missing memory %s — children's private memories silently dropped", exp.id)
|
||||
continue
|
||||
}
|
||||
if e.Namespace != exp.ns {
|
||||
t.Errorf("memory %s namespace = %q, want %q", exp.id, e.Namespace, exp.ns)
|
||||
}
|
||||
if e.WorkspaceName != exp.owner {
|
||||
t.Errorf("memory %s owner = %q, want %q", exp.id, e.WorkspaceName, exp.owner)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestPickOwnerForNamespace covers the namespace→workspace_name
|
||||
// attribution helper introduced in I3.
|
||||
func TestPickOwnerForNamespace(t *testing.T) {
|
||||
members := []workspaceRow{
|
||||
{ID: "root-1", Name: "alpha", RootID: "root-1"},
|
||||
{ID: "child-1", Name: "alpha-child", RootID: "root-1"},
|
||||
}
|
||||
cases := []struct {
|
||||
name string
|
||||
ns string
|
||||
want string
|
||||
}{
|
||||
{"workspace ns matches member id", "workspace:child-1", "alpha-child"},
|
||||
{"workspace ns no match → first", "workspace:foreign", "alpha"},
|
||||
{"team ns → first member of root group", "team:root-1", "alpha"},
|
||||
{"org ns → first member", "org:root-1", "alpha"},
|
||||
{"custom ns → first member", "custom:foo", "alpha"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := pickOwnerForNamespace(tc.ns, members); got != tc.want {
|
||||
t.Errorf("pickOwnerForNamespace(%q) = %q, want %q", tc.ns, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
if got := pickOwnerForNamespace("workspace:abc", nil); got != "" {
|
||||
t.Errorf("empty members must return \"\", got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Helper functions ---
|
||||
|
||||
func TestLegacyScopeFromNamespace(t *testing.T) {
|
||||
|
||||
@@ -8,8 +8,6 @@ import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -569,67 +567,6 @@ func TestProxyA2A_WorkspaceOffline(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- TestSharedContext ----------
|
||||
|
||||
func TestSharedContext(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
// Create a temp configs directory with a workspace config
|
||||
tmpDir := t.TempDir()
|
||||
wsDir := filepath.Join(tmpDir, "test-workspace")
|
||||
if err := os.MkdirAll(wsDir, 0755); err != nil {
|
||||
t.Fatalf("failed to create config dir: %v", err)
|
||||
}
|
||||
|
||||
// Write config.yaml with shared_context
|
||||
configYAML := "name: Test Workspace\nshared_context:\n - test.md\n"
|
||||
if err := os.WriteFile(filepath.Join(wsDir, "config.yaml"), []byte(configYAML), 0644); err != nil {
|
||||
t.Fatalf("failed to write config.yaml: %v", err)
|
||||
}
|
||||
|
||||
// Write the shared context file
|
||||
testContent := "# Shared Context\nThis is shared context content."
|
||||
if err := os.WriteFile(filepath.Join(wsDir, "test.md"), []byte(testContent), 0644); err != nil {
|
||||
t.Fatalf("failed to write test.md: %v", err)
|
||||
}
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
|
||||
// Mock DB returning workspace name that normalizes to "test-workspace"
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
WithArgs("ws-ctx").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Test Workspace"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-ctx"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-ctx/shared-context", nil)
|
||||
|
||||
handler.SharedContext(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var resp []map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to parse response: %v", err)
|
||||
}
|
||||
if len(resp) != 1 {
|
||||
t.Fatalf("expected 1 file, got %d", len(resp))
|
||||
}
|
||||
if resp[0]["path"] != "test.md" {
|
||||
t.Errorf("expected path 'test.md', got %v", resp[0]["path"])
|
||||
}
|
||||
if resp[0]["content"] != testContent {
|
||||
t.Errorf("expected content %q, got %v", testContent, resp[0]["content"])
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- TestHeartbeatHandler_TaskChanged ----------
|
||||
|
||||
func TestHeartbeatHandler_TaskChanged(t *testing.T) {
|
||||
@@ -1218,53 +1155,6 @@ func TestWorkspaceGet_CurrentTask(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSharedContext_NoSharedFiles(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
// Create a temp configs directory with a workspace config that has no shared_context
|
||||
tmpDir := t.TempDir()
|
||||
wsDir := filepath.Join(tmpDir, "empty-workspace")
|
||||
if err := os.MkdirAll(wsDir, 0755); err != nil {
|
||||
t.Fatalf("failed to create config dir: %v", err)
|
||||
}
|
||||
|
||||
// Write config.yaml without shared_context
|
||||
configYAML := "name: Empty Workspace\ndescription: No shared context\n"
|
||||
if err := os.WriteFile(filepath.Join(wsDir, "config.yaml"), []byte(configYAML), 0644); err != nil {
|
||||
t.Fatalf("failed to write config.yaml: %v", err)
|
||||
}
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
|
||||
// Mock DB returning workspace name that normalizes to "empty-workspace"
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
WithArgs("ws-empty").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Empty Workspace"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-empty"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-empty/shared-context", nil)
|
||||
|
||||
handler.SharedContext(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var resp []interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to parse response: %v", err)
|
||||
}
|
||||
if len(resp) != 0 {
|
||||
t.Errorf("expected empty array, got %d items", len(resp))
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestActivityHandler_Report_SourceIDSpoofRejected verifies the #209 spoof
|
||||
// guard: a workspace authenticated for :id cannot inject activity rows with
|
||||
// source_id pointing at a different workspace. Bearer-auth middleware would
|
||||
|
||||
@@ -11,6 +11,8 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/channels"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
@@ -25,10 +27,62 @@ import (
|
||||
// during org import. Prevents overwhelming Docker when creating many containers.
|
||||
const workspaceCreatePacingMs = 2000
|
||||
|
||||
// provisionConcurrency limits how many Docker containers can be provisioned
|
||||
// simultaneously during org import. Without this, importing 39+ workspaces
|
||||
// fires 39 goroutines that all hit Docker at once, causing timeouts (#1084).
|
||||
const provisionConcurrency = 3
|
||||
// defaultProvisionConcurrency is the fallback cap for parallel
|
||||
// workspace-provision goroutines when MOLECULE_PROVISION_CONCURRENCY
|
||||
// is unset. Originally a hard constant of 3 (PR #1084) calibrated for
|
||||
// Docker-mode workspaces. The constant is now a default — operators
|
||||
// running on EC2 (where each provision is a RunInstances call AWS
|
||||
// happily parallelises) typically want a much higher cap, while
|
||||
// Docker-mode dev environments still prefer the conservative 3.
|
||||
//
|
||||
// 3 keeps the existing Docker-mode behavior. SaaS deployments override
|
||||
// via env (see resolveProvisionConcurrency below).
|
||||
const defaultProvisionConcurrency = 3
|
||||
|
||||
// resolveProvisionConcurrency returns the effective semaphore size for
|
||||
// org-import workspace provisioning, honoring MOLECULE_PROVISION_CONCURRENCY:
|
||||
//
|
||||
// - unset / empty / non-numeric → defaultProvisionConcurrency (3)
|
||||
// - "0" → unlimited (a very large cap;
|
||||
// practically no semaphore — used on
|
||||
// SaaS where AWS RunInstances is the
|
||||
// rate-limiter, not us)
|
||||
// - any positive integer N → N
|
||||
// - negative integer → defaultProvisionConcurrency (3),
|
||||
// log warning so operator notices
|
||||
// the misconfiguration
|
||||
//
|
||||
// The "0 = unlimited" mapping was a deliberate choice: an env var of "0"
|
||||
// is the natural shorthand for "no cap" without forcing operators to
|
||||
// type a magic large number. The implementation hands off a large but
|
||||
// finite value (1<<20) so the channel still works as a regular
|
||||
// buffered chan; goroutines will never block on the semaphore in
|
||||
// practice.
|
||||
func resolveProvisionConcurrency() int {
|
||||
raw := strings.TrimSpace(os.Getenv("MOLECULE_PROVISION_CONCURRENCY"))
|
||||
if raw == "" {
|
||||
return defaultProvisionConcurrency
|
||||
}
|
||||
n, err := strconv.Atoi(raw)
|
||||
if err != nil {
|
||||
log.Printf("org_import: MOLECULE_PROVISION_CONCURRENCY=%q is not an integer; falling back to default %d",
|
||||
raw, defaultProvisionConcurrency)
|
||||
return defaultProvisionConcurrency
|
||||
}
|
||||
if n < 0 {
|
||||
log.Printf("org_import: MOLECULE_PROVISION_CONCURRENCY=%d is negative; falling back to default %d",
|
||||
n, defaultProvisionConcurrency)
|
||||
return defaultProvisionConcurrency
|
||||
}
|
||||
if n == 0 {
|
||||
// Unlimited semantics — use a large but finite cap so the
|
||||
// chan-based semaphore stays a no-op. 1M is well past any
|
||||
// realistic org-import size; AWS RunInstances rate-limit and
|
||||
// account vCPU quota are the real backpressure here.
|
||||
return 1 << 20
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// Child grid layout constants — kept in sync with canvas-topology.ts on
|
||||
// the client. Children laid on import use the same 2-column grid so the
|
||||
@@ -600,8 +654,16 @@ func (h *OrgHandler) Import(c *gin.Context) {
|
||||
results := []map[string]interface{}{}
|
||||
var createErr error
|
||||
|
||||
// Semaphore limits concurrent Docker provisioning (#1084).
|
||||
provisionSem := make(chan struct{}, provisionConcurrency)
|
||||
// Semaphore limits concurrent provision goroutines (#1084).
|
||||
// Cap is configurable via MOLECULE_PROVISION_CONCURRENCY:
|
||||
// unset → 3 (Docker-mode default)
|
||||
// "0" → effectively unlimited (SaaS / EC2 backend)
|
||||
// N>0 → exactly N
|
||||
// See resolveProvisionConcurrency for the full env-parse contract.
|
||||
concurrency := resolveProvisionConcurrency()
|
||||
provisionSem := make(chan struct{}, concurrency)
|
||||
log.Printf("org_import: provision concurrency cap=%d (env MOLECULE_PROVISION_CONCURRENCY=%q)",
|
||||
concurrency, os.Getenv("MOLECULE_PROVISION_CONCURRENCY"))
|
||||
|
||||
// Recursively create workspaces. Root workspaces keep their YAML
|
||||
// canvas coords; children are positioned by createWorkspaceTree
|
||||
|
||||
@@ -377,11 +377,22 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
}
|
||||
}
|
||||
|
||||
// #1084: limit concurrent Docker provisioning via semaphore.
|
||||
// #1084: limit concurrent provisioning via semaphore.
|
||||
// Use provisionWorkspaceAuto so SaaS deployments route through
|
||||
// the CP (EC2) path — calling provisionWorkspace directly was
|
||||
// the same silent-drop bug that bit TeamHandler.Expand on
|
||||
// 2026-05-04 (see workspace.go:121-125 comment + #2486). Symptom:
|
||||
// every claude-code workspace from org-import on SaaS sat in
|
||||
// "provisioning" until the 600s sweeper marked it failed with
|
||||
// "container started but never called /registry/register" —
|
||||
// because there was no container, just a workspace row.
|
||||
// provisionWorkspaceAuto picks CP-mode when h.cpProv is wired,
|
||||
// Docker-mode otherwise; the org-import call site doesn't need
|
||||
// to know which.
|
||||
provisionSem <- struct{}{} // acquire
|
||||
go func(wID, tPath string, cFiles map[string][]byte, p models.CreateWorkspacePayload) {
|
||||
defer func() { <-provisionSem }() // release
|
||||
h.workspace.provisionWorkspace(wID, tPath, cFiles, p)
|
||||
h.workspace.provisionWorkspaceAuto(wID, tPath, cFiles, p)
|
||||
}(id, templatePath, configFiles, payload)
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
// Tests for resolveProvisionConcurrency — the env-parse contract that
|
||||
// turns MOLECULE_PROVISION_CONCURRENCY into the channel-buffer size for
|
||||
// the org-import provision semaphore.
|
||||
//
|
||||
// Why this matters: with the wrong cap, org-import either serializes
|
||||
// (cap=1, slow) or stampedes the provider (cap=infinity on a backend
|
||||
// that can't take it). The defaults — 3 for Docker, "0=unlimited" for
|
||||
// EC2/SaaS — are what most operators want; the parse logic exists to
|
||||
// route the env var to the right behavior without surprise.
|
||||
//
|
||||
// The "0 → unlimited" mapping is the user-facing piece worth pinning
|
||||
// in tests: easy to misread as "0 means stop entirely" if someone
|
||||
// re-reads the constant block years later.
|
||||
|
||||
func TestResolveProvisionConcurrency_UnsetUsesDefault(t *testing.T) {
|
||||
t.Setenv("MOLECULE_PROVISION_CONCURRENCY", "")
|
||||
if got := resolveProvisionConcurrency(); got != defaultProvisionConcurrency {
|
||||
t.Errorf("unset env: got %d, want %d", got, defaultProvisionConcurrency)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveProvisionConcurrency_ZeroIsUnlimited(t *testing.T) {
|
||||
// "0" is the user-facing shorthand for "no cap". The implementation
|
||||
// returns a large but finite cap so the channel-based semaphore
|
||||
// stays a no-op without infinite-buffer risk.
|
||||
t.Setenv("MOLECULE_PROVISION_CONCURRENCY", "0")
|
||||
got := resolveProvisionConcurrency()
|
||||
if got <= defaultProvisionConcurrency {
|
||||
t.Errorf("0 should map to large 'unlimited' cap, got %d", got)
|
||||
}
|
||||
// 1<<20 today; pin the lower bound rather than the exact value so
|
||||
// future tuning of the magic number doesn't break this test.
|
||||
if got < 1024 {
|
||||
t.Errorf("0 should map to a cap >= 1024 (effectively unlimited), got %d", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveProvisionConcurrency_PositiveIntegerExact(t *testing.T) {
|
||||
cases := []struct {
|
||||
env string
|
||||
want int
|
||||
}{
|
||||
{"1", 1},
|
||||
{"5", 5},
|
||||
{"10", 10},
|
||||
{"50", 50},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.env, func(t *testing.T) {
|
||||
t.Setenv("MOLECULE_PROVISION_CONCURRENCY", tc.env)
|
||||
if got := resolveProvisionConcurrency(); got != tc.want {
|
||||
t.Errorf("env=%q: got %d, want %d", tc.env, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveProvisionConcurrency_NegativeFallsBackToDefault(t *testing.T) {
|
||||
// Negative values are operator misconfiguration. Fall back to the
|
||||
// safe default rather than passing through to make(chan, -5) which
|
||||
// panics. The handler logs a warning so the operator notices.
|
||||
t.Setenv("MOLECULE_PROVISION_CONCURRENCY", "-5")
|
||||
if got := resolveProvisionConcurrency(); got != defaultProvisionConcurrency {
|
||||
t.Errorf("negative env: got %d, want default %d", got, defaultProvisionConcurrency)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveProvisionConcurrency_NonNumericFallsBackToDefault(t *testing.T) {
|
||||
// Garbage in env shouldn't crash org-import. Common in dev when an
|
||||
// operator types `MOLECULE_PROVISION_CONCURRENCY=true` or similar.
|
||||
cases := []string{"true", "yes", "infinity", "ten", "3.5", "0x10"}
|
||||
for _, raw := range cases {
|
||||
t.Run(raw, func(t *testing.T) {
|
||||
t.Setenv("MOLECULE_PROVISION_CONCURRENCY", raw)
|
||||
if got := resolveProvisionConcurrency(); got != defaultProvisionConcurrency {
|
||||
t.Errorf("non-numeric env=%q: got %d, want default %d",
|
||||
raw, got, defaultProvisionConcurrency)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveProvisionConcurrency_WhitespaceTrimmed(t *testing.T) {
|
||||
// Operators frequently set env vars with stray whitespace from
|
||||
// copy-paste. Trim before parse so " 7 " == "7".
|
||||
t.Setenv("MOLECULE_PROVISION_CONCURRENCY", " 7 ")
|
||||
if got := resolveProvisionConcurrency(); got != 7 {
|
||||
t.Errorf("whitespace env: got %d, want 7", got)
|
||||
}
|
||||
}
|
||||
@@ -38,13 +38,26 @@ import (
|
||||
// Keep these stable — changing the base path for an existing runtime
|
||||
// without a migration shim will make previously-saved files disappear from
|
||||
// the runtime's POV.
|
||||
//
|
||||
// Path source-of-truth: cloud-init in
|
||||
// `molecule-controlplane/internal/provisioner/userdata_containerized.go`
|
||||
// runs `mkdir -p /configs` and writes the canonical config.yaml there.
|
||||
// The workspace container bind-mounts host `/configs` to read it back.
|
||||
// Files written anywhere else on the host are invisible to the runtime,
|
||||
// so `claude-code` (and any future containerized runtime) must point here.
|
||||
//
|
||||
// `/configs` is root-owned (cloud-init runs as root); the SSH-as-ubuntu
|
||||
// install command at the call site below uses `sudo` to write into it.
|
||||
var workspaceFilePathPrefix = map[string]string{
|
||||
"hermes": "/home/ubuntu/.hermes",
|
||||
"langgraph": "/opt/configs",
|
||||
"external": "/opt/configs",
|
||||
// Default for unknown / future runtimes is /opt/configs — most
|
||||
// conservative place that doesn't collide with system or runtime-
|
||||
// private directories.
|
||||
"hermes": "/home/ubuntu/.hermes",
|
||||
"claude-code": "/configs",
|
||||
"langgraph": "/opt/configs",
|
||||
"external": "/opt/configs",
|
||||
// Default for unknown / future runtimes is /configs — matches the
|
||||
// containerized user-data layout. The `langgraph` / `external`
|
||||
// entries pre-date the unified user-data path and are retained
|
||||
// until a migration audit confirms what the running tenants of
|
||||
// those runtimes actually have on disk.
|
||||
}
|
||||
|
||||
func resolveWorkspaceFilePath(runtime, relPath string) (string, error) {
|
||||
@@ -53,7 +66,7 @@ func resolveWorkspaceFilePath(runtime, relPath string) (string, error) {
|
||||
}
|
||||
base, ok := workspaceFilePathPrefix[strings.ToLower(strings.TrimSpace(runtime))]
|
||||
if !ok {
|
||||
base = "/opt/configs"
|
||||
base = "/configs"
|
||||
}
|
||||
return filepath.Join(base, filepath.Clean(relPath)), nil
|
||||
}
|
||||
@@ -148,6 +161,17 @@ func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, c
|
||||
// writes the file atomically via temp-file-rename. Permissions 0644
|
||||
// match the existing tar-unpack defaults on the Docker path.
|
||||
//
|
||||
// `sudo -n` (non-interactive) prefix: the canonical containerized
|
||||
// workspace layout puts /configs at the root, owned by root because
|
||||
// cloud-init runs as root (see
|
||||
// molecule-controlplane/internal/provisioner/userdata_containerized.go).
|
||||
// SSH-as-ubuntu can't write into /configs without escalation.
|
||||
// Ubuntu has passwordless sudo on EC2 by default; sudo -n fails fast
|
||||
// (no prompt) if that ever changes, surfacing a clean error instead
|
||||
// of a hang. The hermes path /home/ubuntu/.hermes is ubuntu-owned
|
||||
// and doesn't strictly need sudo, but using it uniformly avoids
|
||||
// per-runtime branching here.
|
||||
//
|
||||
// The remote command is fully deterministic — no user-controlled
|
||||
// input reaches a shell eval (absPath is built from a map + Clean()).
|
||||
sshArgs := []string{
|
||||
@@ -157,7 +181,7 @@ func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, c
|
||||
"-o", "ServerAliveInterval=15",
|
||||
"-p", fmt.Sprintf("%d", localPort),
|
||||
fmt.Sprintf("%s@127.0.0.1", osUser),
|
||||
fmt.Sprintf("install -D -m 0644 /dev/stdin %s", shellQuote(absPath)),
|
||||
fmt.Sprintf("sudo -n install -D -m 0644 /dev/stdin %s", shellQuote(absPath)),
|
||||
}
|
||||
sshCmd := exec.CommandContext(ctx, "ssh", sshArgs...)
|
||||
sshCmd.Env = os.Environ()
|
||||
@@ -180,3 +204,116 @@ func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, c
|
||||
func shellQuote(s string) string {
|
||||
return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"
|
||||
}
|
||||
|
||||
// readFileViaEIC reads a single file from the workspace EC2 at the
|
||||
// absolute path that resolveWorkspaceFilePath computes. Mirrors
|
||||
// writeFileViaEIC end-to-end (ephemeral keypair, EIC tunnel, ssh) so
|
||||
// canvas's Config tab can GET back what it just PUT. Pre-fix the GET
|
||||
// path (templates.go ReadFile) only handled local Docker containers
|
||||
// + a host-side template fallback; SaaS workspaces (EC2-per-workspace)
|
||||
// always 404'd because neither handles their on-EC2 layout.
|
||||
//
|
||||
// Returns ("", os.ErrNotExist) when the remote path doesn't exist so
|
||||
// the handler can map it to HTTP 404 cleanly. Other errors propagate.
|
||||
func readFileViaEIC(ctx context.Context, instanceID, runtime, relPath string) ([]byte, error) {
|
||||
if instanceID == "" {
|
||||
return nil, fmt.Errorf("workspace has no instance_id — not a SaaS EC2 workspace")
|
||||
}
|
||||
absPath, err := resolveWorkspaceFilePath(runtime, relPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid path: %w", err)
|
||||
}
|
||||
|
||||
osUser := os.Getenv("WORKSPACE_EC2_OS_USER")
|
||||
if osUser == "" {
|
||||
osUser = "ubuntu"
|
||||
}
|
||||
region := os.Getenv("AWS_REGION")
|
||||
if region == "" {
|
||||
region = "us-east-2"
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, eicFileWriteTimeout)
|
||||
defer cancel()
|
||||
|
||||
keyDir, err := os.MkdirTemp("", "molecule-fileread-*")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("keydir mkdir: %w", err)
|
||||
}
|
||||
defer func() { _ = os.RemoveAll(keyDir) }()
|
||||
keyPath := keyDir + "/id"
|
||||
if out, kerr := exec.CommandContext(ctx, "ssh-keygen",
|
||||
"-t", "ed25519", "-f", keyPath, "-N", "", "-q",
|
||||
"-C", "molecule-fileread",
|
||||
).CombinedOutput(); kerr != nil {
|
||||
return nil, fmt.Errorf("ssh-keygen: %w (%s)", kerr, strings.TrimSpace(string(out)))
|
||||
}
|
||||
pubKey, err := os.ReadFile(keyPath + ".pub")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read pubkey: %w", err)
|
||||
}
|
||||
|
||||
if err := sendSSHPublicKey(ctx, region, instanceID, osUser, strings.TrimSpace(string(pubKey))); err != nil {
|
||||
return nil, fmt.Errorf("send-ssh-public-key: %w", err)
|
||||
}
|
||||
|
||||
localPort, err := pickFreePort()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("pick free port: %w", err)
|
||||
}
|
||||
tunnel := openTunnelCmd(eicSSHOptions{
|
||||
InstanceID: instanceID,
|
||||
OSUser: osUser,
|
||||
Region: region,
|
||||
LocalPort: localPort,
|
||||
PrivateKeyPath: keyPath,
|
||||
})
|
||||
tunnel.Env = os.Environ()
|
||||
if err := tunnel.Start(); err != nil {
|
||||
return nil, fmt.Errorf("open-tunnel start: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
if tunnel.Process != nil {
|
||||
_ = tunnel.Process.Kill()
|
||||
}
|
||||
_ = tunnel.Wait()
|
||||
}()
|
||||
if err := waitForPort(ctx, "127.0.0.1", localPort, 10*time.Second); err != nil {
|
||||
return nil, fmt.Errorf("tunnel never listened: %w", err)
|
||||
}
|
||||
|
||||
// `sudo -n cat`: /configs is root-owned by cloud-init (same reason
|
||||
// writeFileViaEIC needs sudo to install). The path is built from a
|
||||
// validated map + Clean(), so no user-controlled string reaches the
|
||||
// shell here. `2>/dev/null` swallows `cat: ...: No such file` so
|
||||
// the missing-file case returns empty stdout + non-zero exit, which
|
||||
// we translate to os.ErrNotExist below.
|
||||
sshCmd := exec.CommandContext(ctx, "ssh",
|
||||
"-i", keyPath,
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "UserKnownHostsFile=/dev/null",
|
||||
"-o", "ServerAliveInterval=15",
|
||||
"-p", fmt.Sprintf("%d", localPort),
|
||||
fmt.Sprintf("%s@127.0.0.1", osUser),
|
||||
fmt.Sprintf("sudo -n cat %s 2>/dev/null", shellQuote(absPath)),
|
||||
)
|
||||
sshCmd.Env = os.Environ()
|
||||
var stdout, stderr bytes.Buffer
|
||||
sshCmd.Stdout = &stdout
|
||||
sshCmd.Stderr = &stderr
|
||||
runErr := sshCmd.Run()
|
||||
out := stdout.Bytes()
|
||||
if runErr != nil {
|
||||
// `cat` returns 1 on missing file; with 2>/dev/null we have no
|
||||
// stderr distinguisher. Treat empty-stdout + non-zero exit as
|
||||
// not-found rather than a tunnel/auth error (those usually
|
||||
// produce stderr from ssh itself, not from the remote command).
|
||||
if len(out) == 0 && stderr.Len() == 0 {
|
||||
return nil, os.ErrNotExist
|
||||
}
|
||||
return nil, fmt.Errorf("ssh cat: %w (%s)", runErr, strings.TrimSpace(stderr.String()))
|
||||
}
|
||||
log.Printf("readFileViaEIC: ws instance=%s runtime=%s read %d bytes ← %s",
|
||||
instanceID, runtime, len(out), absPath)
|
||||
return out, nil
|
||||
}
|
||||
|
||||
@@ -18,10 +18,16 @@ func TestResolveWorkspaceFilePath_KnownRuntimes(t *testing.T) {
|
||||
{"hermes", "config.yaml", "/home/ubuntu/.hermes/config.yaml"},
|
||||
{"HERMES", "config.yaml", "/home/ubuntu/.hermes/config.yaml"}, // case-insensitive
|
||||
{"hermes", "nested/a.yaml", "/home/ubuntu/.hermes/nested/a.yaml"},
|
||||
// claude-code (and any future containerized runtime) lands at /configs —
|
||||
// the path user-data creates and bind-mounts into the container. Pre-fix
|
||||
// this fell through to /opt/configs which doesn't exist on workspace EC2s
|
||||
// and would 500 with EACCES on save (the bug that motivated this gate).
|
||||
{"claude-code", "config.yaml", "/configs/config.yaml"},
|
||||
{"CLAUDE-CODE", "config.yaml", "/configs/config.yaml"}, // case-insensitive
|
||||
{"langgraph", "config.yaml", "/opt/configs/config.yaml"},
|
||||
{"external", "skills.json", "/opt/configs/skills.json"},
|
||||
{"", "config.yaml", "/opt/configs/config.yaml"}, // empty → default
|
||||
{"unknown", "config.yaml", "/opt/configs/config.yaml"}, // unknown → default
|
||||
{"", "config.yaml", "/configs/config.yaml"}, // empty → default
|
||||
{"unknown", "config.yaml", "/configs/config.yaml"}, // unknown → default
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.runtime+"/"+tc.relPath, func(t *testing.T) {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
@@ -349,16 +350,52 @@ func (h *TemplatesHandler) ReadFile(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
var wsName string
|
||||
if err := db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsName); err != nil {
|
||||
var wsName, instanceID, runtime string
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT name, COALESCE(instance_id, ''), COALESCE(runtime, '') FROM workspaces WHERE id = $1`,
|
||||
workspaceID,
|
||||
).Scan(&wsName, &instanceID, &runtime); err != nil {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return
|
||||
}
|
||||
|
||||
// Try container first. `cat` wants a single path argument — passing
|
||||
// rootPath and filePath as two args would make `cat` try to read the
|
||||
// rootPath directory (error) and then resolve filePath relative to
|
||||
// the container's cwd, which isn't guaranteed to equal rootPath.
|
||||
// SaaS workspace (EC2-per-workspace) — no Docker on this tenant. Read
|
||||
// via SSH through the EIC endpoint, mirroring WriteFile's dispatch
|
||||
// in this same file. Pre-fix this branch was missing and SaaS
|
||||
// workspaces always fell through to the local-Docker container check
|
||||
// (finds nothing on a SaaS tenant) + template-dir fallback (returns
|
||||
// the seed template, not the persisted state). Net effect: the
|
||||
// canvas Config tab always 404'd for SaaS workspaces — visible to
|
||||
// users after #2781 added the "no config.yaml" error UX.
|
||||
//
|
||||
// The ?root= query param is intentionally ignored on the SaaS path —
|
||||
// it's a local-Docker concept (arbitrary container roots). The
|
||||
// runtime → base-path map (workspaceFilePathPrefix in
|
||||
// template_files_eic.go) is the SaaS source of truth.
|
||||
if instanceID != "" {
|
||||
content, err := readFileViaEIC(ctx, instanceID, runtime, filePath)
|
||||
if err == nil {
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"path": filePath,
|
||||
"content": string(content),
|
||||
"size": len(content),
|
||||
})
|
||||
return
|
||||
}
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "file not found on workspace"})
|
||||
return
|
||||
}
|
||||
log.Printf("ReadFile EIC for %s path=%s: %v", workspaceID, filePath, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to read file: %v", err)})
|
||||
return
|
||||
}
|
||||
|
||||
// Local Docker path: try the workspace container first. `cat` wants a
|
||||
// single path argument — passing rootPath and filePath as two args
|
||||
// would make `cat` try to read the rootPath directory (error) and
|
||||
// then resolve filePath relative to the container's cwd, which
|
||||
// isn't guaranteed to equal rootPath.
|
||||
if containerName := h.findContainer(ctx, workspaceID); containerName != "" {
|
||||
fullPath := strings.TrimRight(rootPath, "/") + "/" + filePath
|
||||
content, err := h.execInContainer(ctx, containerName, []string{"cat", fullPath})
|
||||
@@ -511,90 +548,3 @@ func (h *TemplatesHandler) DeleteFile(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gin.H{"status": "deleted", "path": filePath})
|
||||
}
|
||||
|
||||
// SharedContext handles GET /workspaces/:id/shared-context
|
||||
// Returns the files listed in the workspace's config.yaml shared_context field.
|
||||
func (h *TemplatesHandler) SharedContext(c *gin.Context) {
|
||||
workspaceID := c.Param("id")
|
||||
ctx := c.Request.Context()
|
||||
|
||||
var wsName string
|
||||
if err := db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsName); err != nil {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return
|
||||
}
|
||||
|
||||
type contextFile struct {
|
||||
Path string `json:"path"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
// Try reading from running container first
|
||||
if containerName := h.findContainer(ctx, workspaceID); containerName != "" {
|
||||
configData, err := h.execInContainer(ctx, containerName, []string{"cat", "/configs/config.yaml"})
|
||||
if err != nil {
|
||||
c.JSON(http.StatusOK, []interface{}{})
|
||||
return
|
||||
}
|
||||
|
||||
var cfg struct {
|
||||
SharedContext []string `yaml:"shared_context"`
|
||||
}
|
||||
if err := yaml.Unmarshal([]byte(configData), &cfg); err != nil || len(cfg.SharedContext) == 0 {
|
||||
c.JSON(http.StatusOK, []interface{}{})
|
||||
return
|
||||
}
|
||||
|
||||
files := make([]contextFile, 0, len(cfg.SharedContext))
|
||||
for _, relPath := range cfg.SharedContext {
|
||||
if err := validateRelPath(relPath); err != nil {
|
||||
continue
|
||||
}
|
||||
// CWE-78: pass path components as separate exec args instead of
|
||||
// concatenating into a single string. validateRelPath above is the
|
||||
// primary guard; separate args is defence-in-depth (no shell
|
||||
// interpolation possible in exec form).
|
||||
content, err := h.execInContainer(ctx, containerName, []string{"cat", "/configs", relPath})
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
files = append(files, contextFile{Path: relPath, Content: content})
|
||||
}
|
||||
c.JSON(http.StatusOK, files)
|
||||
return
|
||||
}
|
||||
|
||||
// Fallback to host-side template dir
|
||||
configDir := h.resolveTemplateDir(wsName)
|
||||
if configDir == "" {
|
||||
c.JSON(http.StatusOK, []interface{}{})
|
||||
return
|
||||
}
|
||||
|
||||
configData, err := os.ReadFile(filepath.Join(configDir, "config.yaml"))
|
||||
if err != nil {
|
||||
c.JSON(http.StatusOK, []interface{}{})
|
||||
return
|
||||
}
|
||||
|
||||
var cfg struct {
|
||||
SharedContext []string `yaml:"shared_context"`
|
||||
}
|
||||
if err := yaml.Unmarshal(configData, &cfg); err != nil || len(cfg.SharedContext) == 0 {
|
||||
c.JSON(http.StatusOK, []interface{}{})
|
||||
return
|
||||
}
|
||||
|
||||
files := make([]contextFile, 0, len(cfg.SharedContext))
|
||||
for _, relPath := range cfg.SharedContext {
|
||||
if err := validateRelPath(relPath); err != nil {
|
||||
continue
|
||||
}
|
||||
data, err := os.ReadFile(filepath.Join(configDir, relPath))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
files = append(files, contextFile{Path: relPath, Content: string(data)})
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, files)
|
||||
}
|
||||
|
||||
@@ -894,7 +894,7 @@ func TestReadFile_WorkspaceNotFound(t *testing.T) {
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
|
||||
WithArgs("ws-nf").
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
|
||||
@@ -928,9 +928,14 @@ func TestReadFile_FallbackToHost_Success(t *testing.T) {
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
// instance_id="" → SaaS branch skipped → falls through to local
|
||||
// Docker / template-dir host fallback (the only path the test
|
||||
// exercises). When instance_id is set, ReadFile would dispatch
|
||||
// through readFileViaEIC, which is covered by integration tests.
|
||||
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
|
||||
WithArgs("ws-read").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Reader Agent"))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name", "instance_id", "runtime"}).
|
||||
AddRow("Reader Agent", "", ""))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -964,9 +969,10 @@ func TestReadFile_FallbackToHost_NotFound(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
|
||||
WithArgs("ws-nofile").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("No File Agent"))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name", "instance_id", "runtime"}).
|
||||
AddRow("No File Agent", "", ""))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -1120,107 +1126,6 @@ func TestDeleteFile_WorkspaceNotFound(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== GET /workspaces/:id/shared-context ====================
|
||||
|
||||
func TestSharedContext_WorkspaceNotFound(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
WithArgs("ws-sc-nf").
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-sc-nf"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-sc-nf/shared-context", nil)
|
||||
|
||||
handler.SharedContext(c)
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Errorf("expected 404, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSharedContext_NoTemplate(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
handler := NewTemplatesHandler(tmpDir, nil) // no docker
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
WithArgs("ws-sc-nt").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Unknown Agent"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-sc-nt"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-sc-nt/shared-context", nil)
|
||||
|
||||
handler.SharedContext(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
// Should return empty array
|
||||
var resp []interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if len(resp) != 0 {
|
||||
t.Errorf("expected empty list, got %d items", len(resp))
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSharedContext_WithFiles(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
tmplDir := filepath.Join(tmpDir, "ctx-agent")
|
||||
os.MkdirAll(tmplDir, 0755)
|
||||
os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte("name: Ctx Agent\nshared_context:\n - rules.md\n - style.md\n"), 0644)
|
||||
os.WriteFile(filepath.Join(tmplDir, "rules.md"), []byte("# Rules\nBe nice"), 0644)
|
||||
os.WriteFile(filepath.Join(tmplDir, "style.md"), []byte("# Style\nBe clear"), 0644)
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
WithArgs("ws-sc-ok").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Ctx Agent"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-sc-ok"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-sc-ok/shared-context", nil)
|
||||
|
||||
handler.SharedContext(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var resp []map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if len(resp) != 2 {
|
||||
t.Fatalf("expected 2 context files, got %d", len(resp))
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== resolveTemplateDir ====================
|
||||
|
||||
func TestResolveTemplateDir_ByNormalizedName(t *testing.T) {
|
||||
@@ -1247,7 +1152,7 @@ func TestResolveTemplateDir_NotFound(t *testing.T) {
|
||||
}
|
||||
|
||||
// ==================== CWE-78 hardening regression (issue #2011) ====================
|
||||
// These tests lock in the defence-in-depth guards for DeleteFile and SharedContext.
|
||||
// These tests lock in the defence-in-depth guards for DeleteFile.
|
||||
// The primary guard is validateRelPath (fires before any exec/file-read path);
|
||||
// the exec-form path construction (filepath.Join / separate args) is defence-in-depth.
|
||||
|
||||
@@ -1292,60 +1197,3 @@ func TestCWE78_DeleteFile_TraversalVariants(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestCWE78_SharedContext_SkipsTraversalPaths asserts that when a workspace's
|
||||
// config.yaml lists traversal paths in shared_context, SharedContext skips them
|
||||
// via validateRelPath rather than passing them to exec or os.ReadFile.
|
||||
// Uses the filesystem fallback path (no docker client) so no container mock needed.
|
||||
func TestCWE78_SharedContext_SkipsTraversalPaths(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
// Create a template directory that SharedContext will resolve for "Cwe Agent".
|
||||
tmplDir := filepath.Join(tmpDir, "cwe-agent")
|
||||
os.MkdirAll(tmplDir, 0755)
|
||||
// config.yaml with a mix of safe and traversal-attack paths.
|
||||
configYAML := "name: Cwe Agent\nshared_context:\n - safe-file.md\n - ../../etc/passwd\n - ../shadow\n - another-safe.md\n"
|
||||
os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte(configYAML), 0644)
|
||||
// Only write the safe files — traversal paths must not be reachable.
|
||||
os.WriteFile(filepath.Join(tmplDir, "safe-file.md"), []byte("# safe"), 0644)
|
||||
os.WriteFile(filepath.Join(tmplDir, "another-safe.md"), []byte("# also safe"), 0644)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
WithArgs("ws-cwe78-sc").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Cwe Agent"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-cwe78-sc"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-cwe78-sc/shared-context", nil)
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil) // nil docker → filesystem fallback
|
||||
handler.SharedContext(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var files []struct {
|
||||
Path string `json:"path"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &files); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
|
||||
// Only the two safe files must appear; traversal paths must be absent.
|
||||
if len(files) != 2 {
|
||||
t.Errorf("expected 2 safe files, got %d: %v", len(files), files)
|
||||
}
|
||||
for _, f := range files {
|
||||
if strings.Contains(f.Path, "..") || strings.Contains(f.Path, "etc") || strings.Contains(f.Path, "shadow") {
|
||||
t.Errorf("traversal path %q must not appear in shared-context response", f.Path)
|
||||
}
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,3 +168,120 @@ func TestTeamExpand_UsesAutoNotDirectDockerPath(t *testing.T) {
|
||||
t.Errorf("team.go must call h.wh.provisionWorkspaceAuto for child provisioning — current code does not")
|
||||
}
|
||||
}
|
||||
|
||||
// TestNoCallSiteCallsDirectProvisionerExceptAuto — generic source-level
|
||||
// gate covering ANY future caller, not just team.go and org_import.go.
|
||||
//
|
||||
// The architectural intent is: provisionWorkspaceAuto is the single
|
||||
// source of truth for "how to start a workspace"; the per-backend
|
||||
// helpers (provisionWorkspace = Docker, provisionWorkspaceCP = CP) are
|
||||
// implementation details Auto routes between based on which backend is
|
||||
// wired. Pre-2026-05-04 we had this abstraction but enforced only by
|
||||
// convention — TeamHandler.Expand violated it (silent SaaS bug), then
|
||||
// org_import.go violated it the same way. The fixes were identical:
|
||||
// route through Auto. This gate prevents the *next* call site from
|
||||
// repeating the pattern.
|
||||
//
|
||||
// Walks every .go file under handlers/ (except the dispatcher itself
|
||||
// in workspace.go, and tests). Fails if any non-test handler calls
|
||||
// h.*.provisionWorkspace( or h.*.provisionWorkspaceCP( directly —
|
||||
// they should ALL go through provisionWorkspaceAuto.
|
||||
func TestNoCallSiteCallsDirectProvisionerExceptAuto(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
entries, err := os.ReadDir(wd)
|
||||
if err != nil {
|
||||
t.Fatalf("readdir: %v", err)
|
||||
}
|
||||
directRe := []string{
|
||||
// Receiver could be anything, so match on the suffix.
|
||||
".provisionWorkspace(",
|
||||
".provisionWorkspaceCP(",
|
||||
}
|
||||
allowedFiles := map[string]bool{
|
||||
// workspace.go DEFINES the methods + the Auto dispatcher; it's
|
||||
// allowed to reference them directly.
|
||||
"workspace.go": true,
|
||||
// workspace_provision.go DEFINES the bodies of the direct
|
||||
// methods (and the Auto-internal call from CP-mode itself).
|
||||
"workspace_provision.go": true,
|
||||
// workspace_restart.go pre-dates the Auto dispatcher and has
|
||||
// its own if-cpProv-else manual dispatch (line 219-228, 571-575,
|
||||
// 704-708). Functionally equivalent to Auto, so it's not the
|
||||
// bug class this gate targets — but it IS architectural
|
||||
// duplication, tracked as a follow-up for proper de-dup.
|
||||
// See <follow-up issue> filed alongside this PR.
|
||||
"workspace_restart.go": true,
|
||||
}
|
||||
for _, entry := range entries {
|
||||
name := entry.Name()
|
||||
if !filepath.IsAbs(name) && entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
if filepath.Ext(name) != ".go" {
|
||||
continue
|
||||
}
|
||||
// Skip tests — tests legitimately stub or call the helpers
|
||||
// to exercise their behavior.
|
||||
if filepath.Base(name) != name {
|
||||
continue
|
||||
}
|
||||
if filepath.Ext(name) == ".go" && len(name) > len("_test.go") &&
|
||||
name[len(name)-len("_test.go"):] == "_test.go" {
|
||||
continue
|
||||
}
|
||||
if allowedFiles[name] {
|
||||
continue
|
||||
}
|
||||
src, err := os.ReadFile(filepath.Join(wd, name))
|
||||
if err != nil {
|
||||
t.Fatalf("read %s: %v", name, err)
|
||||
}
|
||||
for _, needle := range directRe {
|
||||
if bytes.Contains(src, []byte(needle)) {
|
||||
t.Errorf("%s calls h.X%s directly — must use h.X.provisionWorkspaceAuto so backend routing stays centralized. "+
|
||||
"Pre-2026-05-04 the same pattern caused the silent-drop bug in TeamHandler.Expand, then again in org_import.go (#2486). "+
|
||||
"Fix: replace the call with h.X.provisionWorkspaceAuto(...) — Auto picks Docker vs CP based on which backend is wired.",
|
||||
name, needle)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestOrgImport_UsesAutoNotDirectDockerPath — source-level guard for
|
||||
// the org_import.go call site. Same bug pattern as team.go above:
|
||||
// pre-2026-05-04 #2 (this PR), org_import called h.workspace.provisionWorkspace
|
||||
// directly, sending every imported workspace down the Docker path on
|
||||
// SaaS. User reproduced 2026-05-04 ~22:30Z importing a 7-workspace
|
||||
// "Director Pattern" template on the hongming prod tenant — every
|
||||
// workspace sat in "provisioning" until the 600s sweeper marked it
|
||||
// failed with "container started but never called /registry/register",
|
||||
// because no container ever existed (the Docker provisioner was nil
|
||||
// in SaaS, the goroutine returned silently, no log emitted from
|
||||
// provisionWorkspaceCP because that function was never invoked).
|
||||
//
|
||||
// The repro pattern was identical to issue #2486. The fix is identical
|
||||
// to the team.go fix above: route through provisionWorkspaceAuto.
|
||||
//
|
||||
// This test pins the call site so a future refactor can't re-introduce
|
||||
// the bug. Substring match on the source — same rationale as the team.go
|
||||
// gate above.
|
||||
func TestOrgImport_UsesAutoNotDirectDockerPath(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
src, err := os.ReadFile(filepath.Join(wd, "org_import.go"))
|
||||
if err != nil {
|
||||
t.Fatalf("read org_import.go: %v", err)
|
||||
}
|
||||
if bytes.Contains(src, []byte("h.workspace.provisionWorkspace(")) {
|
||||
t.Errorf("org_import.go calls h.workspace.provisionWorkspace directly — must use h.workspace.provisionWorkspaceAuto so SaaS tenants route to CP. " +
|
||||
"Pre-fix repro: 7-workspace org-import on hongming prod tenant 2026-05-04 ~22:30Z, every workspace timed out at 600s with the misleading 'container started but never called /registry/register' message — see #2486.")
|
||||
}
|
||||
if !bytes.Contains(src, []byte("h.workspace.provisionWorkspaceAuto(")) {
|
||||
t.Errorf("org_import.go must call h.workspace.provisionWorkspaceAuto for child provisioning — current code does not")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
// Package wiring constructs the v2 memory plugin dependency bundle
|
||||
// at boot time so handlers can opt into the plugin path uniformly.
|
||||
//
|
||||
// The bundle is nil-safe: when MEMORY_PLUGIN_URL is unset, Build
|
||||
// returns (nil, nil) so callers can detect "v2 not configured" with
|
||||
// a single nil check instead of plumbing a feature flag through
|
||||
// every handler.
|
||||
//
|
||||
// This package exists because the v2 plugin client + namespace
|
||||
// resolver are needed by THREE different handler types (MCPHandler,
|
||||
// AdminMemoriesHandler, WorkspaceHandler) constructed in two
|
||||
// different files (main.go for WorkspaceHandler, router.go for the
|
||||
// other two). A central Build() avoids each construction site
|
||||
// re-implementing the env-var read + plugin instantiation.
|
||||
package wiring
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
mclient "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/client"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/namespace"
|
||||
)
|
||||
|
||||
// Bundle is the v2 dependency bundle. Pass it through Setup as a
|
||||
// single param; handlers extract what they need.
|
||||
//
|
||||
// nil receiver = "v2 not configured" — every method on Bundle
|
||||
// nil-checks itself, so callers can pass a nil Bundle through the
|
||||
// hot path without conditional spread.
|
||||
type Bundle struct {
|
||||
Plugin *mclient.Client
|
||||
Resolver *namespace.Resolver
|
||||
}
|
||||
|
||||
// Build returns a wired Bundle if MEMORY_PLUGIN_URL is set, else nil.
|
||||
//
|
||||
// It probes /v1/health at boot — when the plugin is unreachable, we
|
||||
// log a warning but STILL return the bundle. The MCP layer's
|
||||
// circuit breaker handles ongoing unavailability; we don't want to
|
||||
// block workspace-server boot just because the memory plugin is
|
||||
// briefly down.
|
||||
func Build(db *sql.DB) *Bundle {
|
||||
if os.Getenv("MEMORY_PLUGIN_URL") == "" {
|
||||
return nil
|
||||
}
|
||||
plugin := mclient.New(mclient.Config{})
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
if hr, err := plugin.Boot(ctx); err != nil {
|
||||
log.Printf("memory-plugin: /v1/health probe failed (will retry per-request): %v", err)
|
||||
} else {
|
||||
log.Printf("memory-plugin: ok, capabilities=%v", hr.Capabilities)
|
||||
}
|
||||
return &Bundle{
|
||||
Plugin: plugin,
|
||||
Resolver: namespace.New(db),
|
||||
}
|
||||
}
|
||||
|
||||
// NamespaceCleanupFn returns a closure suitable for
|
||||
// WorkspaceHandler.WithNamespaceCleanup. nil when bundle is nil so
|
||||
// callers can pass it through unconditionally.
|
||||
//
|
||||
// The closure runs best-effort: errors are logged, never propagated.
|
||||
// A misbehaving plugin must not block workspace purges.
|
||||
func (b *Bundle) NamespaceCleanupFn() func(context.Context, string) {
|
||||
if b == nil || b.Plugin == nil {
|
||||
return nil
|
||||
}
|
||||
return func(ctx context.Context, workspaceID string) {
|
||||
ns := "workspace:" + workspaceID
|
||||
if err := b.Plugin.DeleteNamespace(ctx, ns); err != nil {
|
||||
log.Printf("memory-plugin: namespace cleanup failed (workspace=%s ns=%s): %v",
|
||||
workspaceID, ns, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,160 @@
|
||||
package wiring
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
)
|
||||
|
||||
// TestBuild_NilWhenURLUnset pins the operator-friendly default: no
|
||||
// MEMORY_PLUGIN_URL → nil bundle → all callers fall through to legacy
|
||||
// behavior with no surprises.
|
||||
func TestBuild_NilWhenURLUnset(t *testing.T) {
|
||||
t.Setenv("MEMORY_PLUGIN_URL", "")
|
||||
if got := Build(nil); got != nil {
|
||||
t.Errorf("expected nil bundle when MEMORY_PLUGIN_URL unset, got %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuild_NonNilWhenURLSet pins that the bundle is constructed even
|
||||
// when the plugin's /v1/health probe fails — we don't want workspace-
|
||||
// server boot to depend on a transiently unavailable plugin.
|
||||
func TestBuild_NonNilWhenURLSet(t *testing.T) {
|
||||
t.Setenv("MEMORY_PLUGIN_URL", "http://127.0.0.1:1") // bogus port = probe will fail
|
||||
db, _, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
bundle := Build(db)
|
||||
if bundle == nil {
|
||||
t.Fatal("expected non-nil bundle when MEMORY_PLUGIN_URL is set")
|
||||
}
|
||||
if bundle.Plugin == nil {
|
||||
t.Error("Plugin must be wired")
|
||||
}
|
||||
if bundle.Resolver == nil {
|
||||
t.Error("Resolver must be wired")
|
||||
}
|
||||
}
|
||||
|
||||
// TestNamespaceCleanupFn_NilBundle pins the nil-safe path: callers
|
||||
// that pass `bundle.NamespaceCleanupFn()` unconditionally don't need
|
||||
// to nil-check the bundle separately.
|
||||
func TestNamespaceCleanupFn_NilBundle(t *testing.T) {
|
||||
var b *Bundle // nil receiver
|
||||
if got := b.NamespaceCleanupFn(); got != nil {
|
||||
t.Errorf("nil bundle must return nil cleanup fn, got non-nil")
|
||||
}
|
||||
}
|
||||
|
||||
// TestNamespaceCleanupFn_NilPlugin: bundle exists but plugin is nil —
|
||||
// also returns nil cleanup fn (defensive in case of partial wiring).
|
||||
func TestNamespaceCleanupFn_NilPlugin(t *testing.T) {
|
||||
b := &Bundle{} // both fields nil
|
||||
if got := b.NamespaceCleanupFn(); got != nil {
|
||||
t.Errorf("bundle with nil plugin must return nil cleanup fn")
|
||||
}
|
||||
}
|
||||
|
||||
// TestNamespaceCleanupFn_HitsPluginAtCorrectNamespace is the real
|
||||
// integration gate for the closure: it spins up an httptest.Server
|
||||
// that records every DELETE request, points MEMORY_PLUGIN_URL at it,
|
||||
// runs Build(), then invokes the returned closure and asserts the
|
||||
// server saw `DELETE /v1/namespaces/workspace:<id>`.
|
||||
//
|
||||
// This replaces two earlier tests that exercised parallel
|
||||
// implementations rather than the production closure (caught in
|
||||
// self-review).
|
||||
func TestNamespaceCleanupFn_HitsPluginAtCorrectNamespace(t *testing.T) {
|
||||
var (
|
||||
mu sync.Mutex
|
||||
gotPaths []string
|
||||
gotMethods []string
|
||||
)
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
mu.Lock()
|
||||
gotPaths = append(gotPaths, r.URL.Path)
|
||||
gotMethods = append(gotMethods, r.Method)
|
||||
mu.Unlock()
|
||||
switch r.URL.Path {
|
||||
case "/v1/health":
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"status":"ok","version":"1.0.0","capabilities":[]}`))
|
||||
default:
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
t.Setenv("MEMORY_PLUGIN_URL", srv.URL)
|
||||
db, _, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
|
||||
bundle := Build(db)
|
||||
if bundle == nil {
|
||||
t.Fatal("Build returned nil with MEMORY_PLUGIN_URL set")
|
||||
}
|
||||
cleanup := bundle.NamespaceCleanupFn()
|
||||
if cleanup == nil {
|
||||
t.Fatal("NamespaceCleanupFn returned nil with non-nil Plugin")
|
||||
}
|
||||
|
||||
cleanup(context.Background(), "abc-123")
|
||||
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
// Two requests expected: /v1/health probe at Boot + DELETE for cleanup.
|
||||
foundDelete := false
|
||||
for i, p := range gotPaths {
|
||||
if gotMethods[i] == "DELETE" && p == "/v1/namespaces/workspace:abc-123" {
|
||||
foundDelete = true
|
||||
}
|
||||
}
|
||||
if !foundDelete {
|
||||
t.Errorf("expected DELETE /v1/namespaces/workspace:abc-123, got %v",
|
||||
pathsAndMethods(gotPaths, gotMethods))
|
||||
}
|
||||
}
|
||||
|
||||
// TestNamespaceCleanupFn_PluginErrorDoesNotPanic exercises the failure
|
||||
// path for real: server returns 500 on DELETE; the closure must log
|
||||
// and return without propagating. Replaces the parallel-implementation
|
||||
// version that didn't actually test the production code.
|
||||
func TestNamespaceCleanupFn_PluginErrorDoesNotPanic(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/v1/health" {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"status":"ok","version":"1.0.0","capabilities":[]}`))
|
||||
return
|
||||
}
|
||||
http.Error(w, "boom", http.StatusInternalServerError)
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
t.Setenv("MEMORY_PLUGIN_URL", srv.URL)
|
||||
db, _, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
|
||||
bundle := Build(db)
|
||||
cleanup := bundle.NamespaceCleanupFn()
|
||||
|
||||
// Must not panic, must not propagate the 500. Recovering with
|
||||
// defer is belt-and-suspenders — production calls this from a
|
||||
// for-loop in workspace_crud.go that has no recover.
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Errorf("cleanup panicked on plugin 500: %v", r)
|
||||
}
|
||||
}()
|
||||
cleanup(context.Background(), "ws-1")
|
||||
}
|
||||
|
||||
func pathsAndMethods(paths, methods []string) []string {
|
||||
out := make([]string, len(paths))
|
||||
for i := range paths {
|
||||
out[i] = methods[i] + " " + paths[i]
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers"
|
||||
memwiring "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/wiring"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/middleware"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
||||
@@ -23,7 +24,7 @@ import (
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provisioner, platformURL, configsDir string, wh *handlers.WorkspaceHandler, channelMgr *channels.Manager) *gin.Engine {
|
||||
func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provisioner, platformURL, configsDir string, wh *handlers.WorkspaceHandler, channelMgr *channels.Manager, memBundle *memwiring.Bundle) *gin.Engine {
|
||||
r := gin.Default()
|
||||
|
||||
// Issue #179 — trust no reverse-proxy headers. Without this call Gin's
|
||||
@@ -150,6 +151,9 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
|
||||
// F1084/#1131: Export applies redactSecrets before returning content.
|
||||
// F1085/#1132: Import applies redactSecrets before persisting content.)
|
||||
adminMemH := handlers.NewAdminMemoriesHandler()
|
||||
if memBundle != nil {
|
||||
adminMemH.WithMemoryV2(memBundle.Plugin, memBundle.Resolver)
|
||||
}
|
||||
wsAdmin.GET("/admin/memories/export", adminMemH.Export)
|
||||
wsAdmin.POST("/admin/memories/import", adminMemH.Import)
|
||||
}
|
||||
@@ -370,6 +374,9 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
|
||||
// C3: commit_memory/recall_memory with scope=GLOBAL → permission error;
|
||||
// send_message_to_user excluded unless MOLECULE_MCP_ALLOW_SEND_MESSAGE=true.
|
||||
mcpH := handlers.NewMCPHandler(db.DB, broadcaster)
|
||||
if memBundle != nil {
|
||||
mcpH.WithMemoryV2(memBundle.Plugin, memBundle.Resolver)
|
||||
}
|
||||
mcpRl := middleware.NewMCPRateLimiter(120, time.Minute, context.Background())
|
||||
wsAuth.GET("/mcp/stream", mcpRl.Middleware(), mcpH.Stream)
|
||||
wsAuth.POST("/mcp", mcpRl.Middleware(), mcpH.Call)
|
||||
@@ -498,7 +505,6 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
|
||||
tmplAdmin.GET("/templates", tmplh.List)
|
||||
tmplAdmin.POST("/templates/import", tmplh.Import)
|
||||
}
|
||||
wsAuth.GET("/shared-context", tmplh.SharedContext)
|
||||
wsAuth.PUT("/files", tmplh.ReplaceFiles)
|
||||
wsAuth.GET("/files", tmplh.ListFiles)
|
||||
wsAuth.GET("/files/*path", tmplh.ReadFile)
|
||||
|
||||
+10
-4
@@ -491,20 +491,26 @@ async def get_peers() -> list[dict]:
|
||||
return peers
|
||||
|
||||
|
||||
async def get_workspace_info() -> dict:
|
||||
async def get_workspace_info(source_workspace_id: str | None = None) -> dict:
|
||||
"""Get this workspace's info from the platform.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace to
|
||||
introspect when the agent is registered into multiple workspaces
|
||||
(multi-workspace mode). Unset → defaults to the module-level
|
||||
WORKSPACE_ID — single-workspace operators see no behaviour change.
|
||||
|
||||
Distinguishes three failure shapes so callers can handle them
|
||||
distinctly (#2429):
|
||||
- 410 Gone → workspace was deleted; re-onboard required
|
||||
- 404 / other → workspace never existed (or transient)
|
||||
- exception → network / auth failure
|
||||
"""
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
try:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}",
|
||||
headers=auth_headers(),
|
||||
f"{PLATFORM_URL}/workspaces/{src}",
|
||||
headers=auth_headers(src),
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
@@ -521,7 +527,7 @@ async def get_workspace_info() -> dict:
|
||||
body = {}
|
||||
return {
|
||||
"error": "removed",
|
||||
"id": body.get("id", WORKSPACE_ID),
|
||||
"id": body.get("id", src),
|
||||
"removed_at": body.get("removed_at"),
|
||||
"hint": body.get(
|
||||
"hint",
|
||||
|
||||
@@ -123,16 +123,20 @@ async def handle_tool_call(name: str, arguments: dict) -> str:
|
||||
source_workspace_id=arguments.get("source_workspace_id") or None,
|
||||
)
|
||||
elif name == "get_workspace_info":
|
||||
return await tool_get_workspace_info()
|
||||
return await tool_get_workspace_info(
|
||||
source_workspace_id=arguments.get("source_workspace_id") or None,
|
||||
)
|
||||
elif name == "commit_memory":
|
||||
return await tool_commit_memory(
|
||||
arguments.get("content", ""),
|
||||
arguments.get("scope", "LOCAL"),
|
||||
source_workspace_id=arguments.get("source_workspace_id") or None,
|
||||
)
|
||||
elif name == "recall_memory":
|
||||
return await tool_recall_memory(
|
||||
arguments.get("query", ""),
|
||||
arguments.get("scope", ""),
|
||||
source_workspace_id=arguments.get("source_workspace_id") or None,
|
||||
)
|
||||
elif name == "wait_for_message":
|
||||
return await tool_wait_for_message(
|
||||
@@ -151,6 +155,7 @@ async def handle_tool_call(name: str, arguments: dict) -> str:
|
||||
arguments.get("peer_id", ""),
|
||||
arguments.get("limit", 20),
|
||||
arguments.get("before_ts", ""),
|
||||
source_workspace_id=arguments.get("source_workspace_id") or None,
|
||||
)
|
||||
return f"Unknown tool: {name}"
|
||||
|
||||
|
||||
+51
-14
@@ -545,19 +545,34 @@ async def tool_list_peers(source_workspace_id: str | None = None) -> str:
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def tool_get_workspace_info() -> str:
|
||||
"""Get this workspace's own info."""
|
||||
info = await get_workspace_info()
|
||||
async def tool_get_workspace_info(source_workspace_id: str | None = None) -> str:
|
||||
"""Get this workspace's own info.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace to
|
||||
introspect when the agent is registered into multiple workspaces.
|
||||
Unset → falls back to module-level WORKSPACE_ID.
|
||||
"""
|
||||
info = await get_workspace_info(source_workspace_id=source_workspace_id)
|
||||
return json.dumps(info, indent=2)
|
||||
|
||||
|
||||
async def tool_commit_memory(content: str, scope: str = "LOCAL") -> str:
|
||||
async def tool_commit_memory(
|
||||
content: str,
|
||||
scope: str = "LOCAL",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Save important information to persistent memory.
|
||||
|
||||
GLOBAL scope is writable only by root workspaces (tier == 0).
|
||||
RBAC memory.write permission is required for all scope levels.
|
||||
The source workspace_id is embedded in every record so the platform
|
||||
can enforce cross-workspace isolation and audit trail.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace this
|
||||
memory belongs to when the agent is registered into multiple
|
||||
workspaces (PR-1 / multi-workspace mode). When unset, falls back
|
||||
to the module-level WORKSPACE_ID — single-workspace operators see
|
||||
no behaviour change.
|
||||
"""
|
||||
if not content:
|
||||
return "Error: content is required"
|
||||
@@ -581,18 +596,19 @@ async def tool_commit_memory(content: str, scope: str = "LOCAL") -> str:
|
||||
"Non-root workspaces may use LOCAL or TEAM scope."
|
||||
)
|
||||
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/memories",
|
||||
f"{PLATFORM_URL}/workspaces/{src}/memories",
|
||||
json={
|
||||
"content": content,
|
||||
"scope": scope,
|
||||
# Embed source workspace so the platform can namespace-isolate
|
||||
# and audit cross-workspace writes (GH#1610 fix).
|
||||
"workspace_id": WORKSPACE_ID,
|
||||
"workspace_id": src,
|
||||
},
|
||||
headers=_auth_headers_for_heartbeat(),
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
data = resp.json()
|
||||
if resp.status_code in (200, 201):
|
||||
@@ -602,13 +618,21 @@ async def tool_commit_memory(content: str, scope: str = "LOCAL") -> str:
|
||||
return f"Error saving memory: {e}"
|
||||
|
||||
|
||||
async def tool_recall_memory(query: str = "", scope: str = "") -> str:
|
||||
async def tool_recall_memory(
|
||||
query: str = "",
|
||||
scope: str = "",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Search persistent memory for previously saved information.
|
||||
|
||||
RBAC memory.read permission is required (mirrors builtin_tools/memory.py).
|
||||
The workspace_id is sent as a query parameter so the platform can
|
||||
cross-validate it against the auth token and defend against any future
|
||||
path traversal / cross-tenant read bugs in the platform itself.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace's memories
|
||||
to search when the agent is registered into multiple workspaces.
|
||||
Unset → defaults to the module-level WORKSPACE_ID.
|
||||
"""
|
||||
# RBAC: require memory.read permission (mirrors builtin_tools/memory.py)
|
||||
if not _check_memory_read_permission():
|
||||
@@ -617,7 +641,8 @@ async def tool_recall_memory(query: str = "", scope: str = "") -> str:
|
||||
"permission for this operation."
|
||||
)
|
||||
|
||||
params: dict[str, str] = {"workspace_id": WORKSPACE_ID}
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
params: dict[str, str] = {"workspace_id": src}
|
||||
if query:
|
||||
params["q"] = query
|
||||
if scope:
|
||||
@@ -625,9 +650,9 @@ async def tool_recall_memory(query: str = "", scope: str = "") -> str:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/memories",
|
||||
f"{PLATFORM_URL}/workspaces/{src}/memories",
|
||||
params=params,
|
||||
headers=_auth_headers_for_heartbeat(),
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
data = resp.json()
|
||||
if isinstance(data, list):
|
||||
@@ -664,7 +689,12 @@ _INBOX_NOT_ENABLED_MSG = (
|
||||
)
|
||||
|
||||
|
||||
async def tool_chat_history(peer_id: str, limit: int = 20, before_ts: str = "") -> str:
|
||||
async def tool_chat_history(
|
||||
peer_id: str,
|
||||
limit: int = 20,
|
||||
before_ts: str = "",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Fetch the prior conversation with one peer.
|
||||
|
||||
Hits ``/workspaces/<self>/activity?peer_id=<peer>&limit=<N>``
|
||||
@@ -686,6 +716,11 @@ async def tool_chat_history(peer_id: str, limit: int = 20, before_ts: str = "")
|
||||
histories — pass the oldest ``ts`` from the previous
|
||||
response. Empty (default) returns the most recent ``limit``
|
||||
rows.
|
||||
source_workspace_id: Which registered workspace's activity log
|
||||
to query. Auto-routes via ``_peer_to_source`` cache when
|
||||
unset (the workspace this peer was discovered through);
|
||||
falls back to module-level WORKSPACE_ID for single-workspace
|
||||
operators.
|
||||
|
||||
Returns a JSON-encoded list of activity rows (or an error string
|
||||
starting with ``Error:`` so the agent can branch). Each row carries
|
||||
@@ -701,6 +736,8 @@ async def tool_chat_history(peer_id: str, limit: int = 20, before_ts: str = "")
|
||||
if limit > 500:
|
||||
limit = 500
|
||||
|
||||
src = source_workspace_id or _peer_to_source.get(peer_id) or WORKSPACE_ID
|
||||
|
||||
params: dict[str, str] = {
|
||||
"peer_id": peer_id,
|
||||
"limit": str(limit),
|
||||
@@ -713,9 +750,9 @@ async def tool_chat_history(peer_id: str, limit: int = 20, before_ts: str = "")
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/activity",
|
||||
f"{PLATFORM_URL}/workspaces/{src}/activity",
|
||||
params=params,
|
||||
headers=_auth_headers_for_heartbeat(),
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return f"Error: chat_history request failed: {exc}"
|
||||
|
||||
@@ -444,7 +444,7 @@ class BaseAdapter(ABC):
|
||||
"""
|
||||
from plugins import load_plugins
|
||||
from skill_loader.loader import load_skills
|
||||
from coordinator import get_children, get_parent_context, build_children_description
|
||||
from coordinator import get_children, build_children_description
|
||||
from prompt import build_system_prompt, get_peer_capabilities, get_platform_instructions
|
||||
from builtin_tools.approval import request_approval
|
||||
from builtin_tools.delegation import delegate_task, delegate_task_async, check_task_status
|
||||
@@ -500,10 +500,13 @@ class BaseAdapter(ABC):
|
||||
logger.info(f"Coordinator mode: {len(children)} children")
|
||||
all_tools.append(route_task_to_team)
|
||||
|
||||
# Parent context (if this is a child workspace)
|
||||
parent_context = await get_parent_context()
|
||||
|
||||
# Build system prompt with all context
|
||||
# Build system prompt with all context. Parent→child knowledge sharing
|
||||
# was previously handled by `shared_context` (parent's config.yaml file
|
||||
# paths injected into the child's prompt at boot). That path was removed
|
||||
# — agents now pull team-scoped knowledge via memory v2's team:<id>
|
||||
# namespace (recall_memory) on demand instead of paying for it on every
|
||||
# boot regardless of need. See RFC #2789 for the future shared-file
|
||||
# storage that complements this for large blob-shaped artefacts.
|
||||
peers = await get_peer_capabilities(platform_url, config.workspace_id)
|
||||
platform_instructions = await get_platform_instructions(platform_url, config.workspace_id)
|
||||
coordinator_prompt = build_children_description(children) if is_coordinator else ""
|
||||
@@ -516,7 +519,6 @@ class BaseAdapter(ABC):
|
||||
prompt_files=config.prompt_files,
|
||||
plugin_rules=plugins.rules,
|
||||
plugin_prompts=extra_prompts,
|
||||
parent_context=parent_context,
|
||||
platform_instructions=platform_instructions,
|
||||
)
|
||||
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
"""Build the Starlette routes for a workspace from its (card, adapter
|
||||
state) pair.
|
||||
|
||||
Pairs with PR #2756, which decoupled ``/.well-known/agent-card.json`` from
|
||||
``adapter.setup()`` failure. main.py was the only consumer and was
|
||||
``# pragma: no cover`` — so the wiring (card-route mounted unconditionally,
|
||||
JSON-RPC route swapped between DefaultRequestHandler and the
|
||||
not-configured handler based on ``adapter_ready``) had no pytest coverage.
|
||||
|
||||
A future refactor that re-couples the two would silently bypass PR #2756
|
||||
and shipped the original "stuck booting forever" UX again. That gap is
|
||||
what closes here: extract the route-assembly into a pure function whose
|
||||
behaviour is unit-testable with Starlette's TestClient, and have main.py
|
||||
call it. Issue molecule-core#2761.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from starlette.routing import Route
|
||||
|
||||
from not_configured_handler import make_not_configured_handler
|
||||
|
||||
# Heavy a2a-sdk imports are lazy: deferred to inside build_routes so
|
||||
# tests that exercise only the not-configured branch (no executor) don't
|
||||
# need a2a.server.request_handlers / routes stubbed in their conftest.
|
||||
# Production boot pays the import cost once, on workspace startup.
|
||||
|
||||
|
||||
def build_routes(
|
||||
agent_card: Any,
|
||||
executor: Any | None,
|
||||
adapter_error: str | None,
|
||||
) -> list:
|
||||
"""Return the list of Starlette routes for this workspace.
|
||||
|
||||
Always mounts ``/.well-known/agent-card.json`` from ``agent_card``.
|
||||
|
||||
JSON-RPC route at ``/`` swaps based on adapter state:
|
||||
|
||||
* ``executor`` is non-None → ``DefaultRequestHandler`` with the
|
||||
executor (production happy-path).
|
||||
* ``executor`` is None → ``not_configured_handler`` returning JSON-RPC
|
||||
``-32603`` with ``adapter_error`` in ``error.data``. The
|
||||
workspace stays REACHABLE (operator can introspect, deprovision,
|
||||
redeploy with corrected env) instead of crash-looping invisibly.
|
||||
|
||||
The two branches are mutually exclusive — caller passes one or the
|
||||
other, never both. Test coverage at ``tests/test_boot_routes.py``
|
||||
pins the contract.
|
||||
"""
|
||||
from a2a.server.routes import create_agent_card_routes
|
||||
|
||||
routes: list = []
|
||||
routes.extend(create_agent_card_routes(agent_card))
|
||||
|
||||
if executor is not None:
|
||||
from a2a.server.request_handlers import DefaultRequestHandler
|
||||
from a2a.server.routes import create_jsonrpc_routes
|
||||
from a2a.server.tasks import InMemoryTaskStore
|
||||
|
||||
handler = DefaultRequestHandler(
|
||||
agent_executor=executor,
|
||||
task_store=InMemoryTaskStore(),
|
||||
agent_card=agent_card,
|
||||
)
|
||||
# enable_v0_3_compat=True is the JSON-RPC wire-compat path: clients
|
||||
# using v0.3-shaped payloads (`"role": "user"` lowercase + camelCase
|
||||
# Pydantic field names) can talk to us without re-deploying.
|
||||
# Outbound payloads must also use v0.3 shape — see main.py's
|
||||
# original comment block for the full a2a-sdk 1.x migration note.
|
||||
routes.extend(
|
||||
create_jsonrpc_routes(
|
||||
request_handler=handler,
|
||||
rpc_url="/",
|
||||
enable_v0_3_compat=True,
|
||||
)
|
||||
)
|
||||
else:
|
||||
routes.append(
|
||||
Route("/", make_not_configured_handler(adapter_error), methods=["POST"])
|
||||
)
|
||||
|
||||
return routes
|
||||
@@ -0,0 +1,57 @@
|
||||
"""Helpers for building / mutating the workspace ``AgentCard``.
|
||||
|
||||
Kept as their own module so the behavior is unit-testable without booting
|
||||
the whole runtime (``main.py`` is ``# pragma: no cover``).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterable
|
||||
|
||||
from a2a.types import AgentCard, AgentSkill
|
||||
|
||||
|
||||
def enrich_card_skills(card: AgentCard, loaded_skills: Iterable | None) -> bool:
|
||||
"""Replace ``card.skills`` with rich metadata from the adapter's loaded
|
||||
skills, in place. Pairs with PR #2756: the card was built up front from
|
||||
static ``config.skills`` names so /.well-known/agent-card.json could
|
||||
serve before ``adapter.setup()`` finishes; this swaps in the richer
|
||||
descriptions/tags/examples that ``setup()``'s skill loader produces.
|
||||
|
||||
Returns ``True`` on swap, ``False`` when the swap was skipped or
|
||||
failed. Failure cases:
|
||||
* ``loaded_skills`` is None / empty — caller didn't load any.
|
||||
* Any element doesn't expose ``.metadata.{id,name,description,tags,examples}``
|
||||
(a future adapter that doesn't follow the canonical shape).
|
||||
|
||||
Failures DO NOT raise — a malformed ``loaded_skills`` shape would
|
||||
otherwise propagate to ``main.py``'s outer ``except Exception``,
|
||||
silently degrading an OK boot to the not-configured state. Static
|
||||
stubs from ``config.skills`` stay in place; setup() already
|
||||
succeeded, the agent works, only the card's skill enrichment is
|
||||
degraded. Operator sees a clear log line; tests assert this
|
||||
distinction.
|
||||
"""
|
||||
if not loaded_skills:
|
||||
return False
|
||||
|
||||
try:
|
||||
rich = [
|
||||
AgentSkill(
|
||||
id=skill.metadata.id,
|
||||
name=skill.metadata.name,
|
||||
description=skill.metadata.description,
|
||||
tags=skill.metadata.tags,
|
||||
examples=skill.metadata.examples,
|
||||
)
|
||||
for skill in loaded_skills
|
||||
]
|
||||
except Exception as enrich_err: # noqa: BLE001
|
||||
print(
|
||||
f"Warning: skill metadata enrichment failed (keeping static "
|
||||
f"stubs from config.skills): {type(enrich_err).__name__}: {enrich_err}",
|
||||
flush=True,
|
||||
)
|
||||
return False
|
||||
|
||||
card.skills = rich
|
||||
return True
|
||||
@@ -347,7 +347,6 @@ class WorkspaceConfig:
|
||||
plugins: list[str] = field(default_factory=list) # installed plugin names
|
||||
tools: list[str] = field(default_factory=list)
|
||||
prompt_files: list[str] = field(default_factory=list)
|
||||
shared_context: list[str] = field(default_factory=list)
|
||||
a2a: A2AConfig = field(default_factory=A2AConfig)
|
||||
delegation: DelegationConfig = field(default_factory=DelegationConfig)
|
||||
sandbox: SandboxConfig = field(default_factory=SandboxConfig)
|
||||
@@ -555,7 +554,6 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
|
||||
plugins=raw.get("plugins", []),
|
||||
tools=raw.get("tools", []),
|
||||
prompt_files=raw.get("prompt_files", []),
|
||||
shared_context=raw.get("shared_context", []),
|
||||
a2a=A2AConfig(
|
||||
port=a2a_raw.get("port", 8000),
|
||||
streaming=a2a_raw.get("streaming", True),
|
||||
|
||||
@@ -32,29 +32,6 @@ if not _WORKSPACE_ID_raw:
|
||||
WORKSPACE_ID = _WORKSPACE_ID_raw
|
||||
|
||||
|
||||
async def get_parent_context() -> list[dict]:
|
||||
"""Fetch shared context files from this workspace's parent.
|
||||
|
||||
Returns a list of {"path": str, "content": str} dicts.
|
||||
Returns empty list if no parent, parent unreachable, or no shared context.
|
||||
"""
|
||||
parent_id = os.environ.get("PARENT_ID", "")
|
||||
if not parent_id:
|
||||
return []
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{parent_id}/shared-context",
|
||||
headers={"X-Workspace-ID": WORKSPACE_ID},
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
except Exception as e:
|
||||
logger.warning("Failed to fetch parent context: %s", e)
|
||||
return []
|
||||
|
||||
|
||||
async def get_children() -> list[dict]:
|
||||
"""Fetch this workspace's children from the platform."""
|
||||
try:
|
||||
|
||||
+136
-111
@@ -148,62 +148,15 @@ async def main(): # pragma: no cover
|
||||
heartbeat=heartbeat,
|
||||
)
|
||||
|
||||
# 5. Setup adapter and create executor
|
||||
# If setup fails, ensure heartbeat is stopped to prevent resource leak
|
||||
try:
|
||||
await adapter.setup(adapter_config)
|
||||
executor = await adapter.create_executor(adapter_config)
|
||||
|
||||
# 5a. Boot-smoke short-circuit (issue #2275): if MOLECULE_SMOKE_MODE
|
||||
# is set, exercise the executor's full import tree by calling
|
||||
# execute() once with stub deps + a short timeout. Skips platform
|
||||
# registration + uvicorn entirely. Returns process exit code.
|
||||
from smoke_mode import is_smoke_mode, run_executor_smoke
|
||||
if is_smoke_mode():
|
||||
exit_code = await run_executor_smoke(executor)
|
||||
if hasattr(heartbeat, "stop"):
|
||||
try:
|
||||
await heartbeat.stop()
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
raise SystemExit(exit_code)
|
||||
|
||||
# 5b. Restore from pre-stop snapshot if one exists (GH#1391).
|
||||
# The snapshot is scrubbed before being written, so secrets are
|
||||
# already redacted — restore_state must not re-expose them.
|
||||
from lib.pre_stop import read_snapshot
|
||||
snapshot = read_snapshot()
|
||||
if snapshot:
|
||||
try:
|
||||
adapter.restore_state(snapshot)
|
||||
print(
|
||||
f"Pre-stop snapshot restored: task={snapshot.get('current_task', '')!r}, "
|
||||
f"uptime={snapshot.get('uptime_seconds', 0)}s"
|
||||
)
|
||||
except Exception as restore_err:
|
||||
print(f"Warning: snapshot restore failed (continuing): {restore_err}")
|
||||
except Exception:
|
||||
# heartbeat hasn't started yet but may have async tasks pending
|
||||
if hasattr(heartbeat, "stop"):
|
||||
try:
|
||||
await heartbeat.stop()
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
|
||||
# 5.5. Initialise Temporal durable execution wrapper (optional)
|
||||
# Connects to TEMPORAL_HOST (default: localhost:7233) and starts a
|
||||
# co-located Temporal worker as a background asyncio task.
|
||||
# No-op with a warning log if Temporal is unreachable or temporalio
|
||||
# is not installed — all tasks fall back to direct execution transparently.
|
||||
from builtin_tools.temporal_workflow import create_wrapper as _create_temporal_wrapper
|
||||
temporal_wrapper = _create_temporal_wrapper()
|
||||
await temporal_wrapper.start()
|
||||
|
||||
# Get loaded skills for agent card (adapter may have populated them)
|
||||
loaded_skills = getattr(adapter, "loaded_skills", [])
|
||||
|
||||
# 6. Build Agent Card
|
||||
# 5. Build the AgentCard *before* adapter.setup() so /.well-known/agent-card.json
|
||||
# is reachable as soon as uvicorn binds, regardless of whether the adapter
|
||||
# has working LLM credentials. Decoupling readiness ("is the workspace up?")
|
||||
# from configuration ("can it actually answer?") means a workspace with a
|
||||
# missing/rotated key stays REACHABLE — canvas can render a clear
|
||||
# "agent not configured" error instead of "stuck booting forever," and
|
||||
# operators can deprovision/redeploy normally. Skills built from
|
||||
# config.skills (static names from config.yaml) up front; richer metadata
|
||||
# from the adapter's loaded_skills swaps in below if setup() succeeds.
|
||||
machine_ip = os.environ.get("HOSTNAME", get_machine_ip())
|
||||
workspace_url = f"http://{machine_ip}:{port}"
|
||||
|
||||
@@ -237,67 +190,109 @@ async def main(): # pragma: no cover
|
||||
# always available and tasks/get accepts historyLength via
|
||||
# apply_history_length(). Don't add this kwarg back.
|
||||
),
|
||||
# Static skill stubs from config.yaml; replaced with rich metadata
|
||||
# below if adapter.setup() loads skills successfully.
|
||||
skills=[
|
||||
AgentSkill(
|
||||
id=skill.metadata.id,
|
||||
name=skill.metadata.name,
|
||||
description=skill.metadata.description,
|
||||
tags=skill.metadata.tags,
|
||||
examples=skill.metadata.examples,
|
||||
)
|
||||
for skill in loaded_skills
|
||||
AgentSkill(id=name, name=name, description=name, tags=[], examples=[])
|
||||
for name in (config.skills or [])
|
||||
],
|
||||
default_input_modes=["text/plain", "application/json"],
|
||||
default_output_modes=["text/plain", "application/json"],
|
||||
)
|
||||
|
||||
# 6. Setup adapter and create executor
|
||||
# On failure: log + continue. The card route stays mounted (above);
|
||||
# the JSON-RPC route below returns -32603 "agent not configured" until
|
||||
# the operator fixes credentials and redeploys. Heartbeat keeps running
|
||||
# so the platform sees the workspace as reachable-but-misconfigured
|
||||
# rather than crash-looping.
|
||||
adapter_ready = False
|
||||
adapter_error: str | None = None
|
||||
executor = None
|
||||
try:
|
||||
await adapter.setup(adapter_config)
|
||||
executor = await adapter.create_executor(adapter_config)
|
||||
|
||||
# 6a. Boot-smoke short-circuit (issue #2275): if MOLECULE_SMOKE_MODE
|
||||
# is set, exercise the executor's full import tree by calling
|
||||
# execute() once with stub deps + a short timeout. Skips platform
|
||||
# registration + uvicorn entirely. Returns process exit code.
|
||||
from smoke_mode import is_smoke_mode, run_executor_smoke
|
||||
if is_smoke_mode():
|
||||
exit_code = await run_executor_smoke(executor)
|
||||
if hasattr(heartbeat, "stop"):
|
||||
try:
|
||||
await heartbeat.stop()
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
raise SystemExit(exit_code)
|
||||
|
||||
# 6b. Restore from pre-stop snapshot if one exists (GH#1391).
|
||||
# The snapshot is scrubbed before being written, so secrets are
|
||||
# already redacted — restore_state must not re-expose them.
|
||||
from lib.pre_stop import read_snapshot
|
||||
snapshot = read_snapshot()
|
||||
if snapshot:
|
||||
try:
|
||||
adapter.restore_state(snapshot)
|
||||
print(
|
||||
f"Pre-stop snapshot restored: task={snapshot.get('current_task', '')!r}, "
|
||||
f"uptime={snapshot.get('uptime_seconds', 0)}s"
|
||||
)
|
||||
except Exception as restore_err:
|
||||
print(f"Warning: snapshot restore failed (continuing): {restore_err}")
|
||||
|
||||
# 6c. Swap rich skill metadata into the card now that setup() loaded
|
||||
# them. In-place mutation: a2a-sdk's create_agent_card_routes serialises
|
||||
# the card on each request, so the route mounted below sees the update.
|
||||
# Isolated via card_helpers.enrich_card_skills — a malformed
|
||||
# loaded_skills shape (e.g., a future adapter that doesn't follow
|
||||
# the .metadata convention) is logged + swallowed instead of
|
||||
# propagating up to the outer except, where it would silently
|
||||
# degrade an OK boot to the not-configured state.
|
||||
from card_helpers import enrich_card_skills
|
||||
enrich_card_skills(agent_card, getattr(adapter, "loaded_skills", None))
|
||||
adapter_ready = True
|
||||
except SystemExit:
|
||||
# Smoke-mode exit signal — propagate untouched.
|
||||
raise
|
||||
except Exception as setup_err: # noqa: BLE001
|
||||
adapter_error = f"{type(setup_err).__name__}: {setup_err}"
|
||||
print(
|
||||
f"WARNING: adapter.setup() failed — workspace will serve agent-card "
|
||||
f"but JSON-RPC will return -32603 until configuration is fixed. "
|
||||
f"Reason: {adapter_error}",
|
||||
flush=True,
|
||||
)
|
||||
# Heartbeat keeps running so the platform marks the workspace as
|
||||
# reachable-but-misconfigured. Operators can then redeploy with the
|
||||
# correct env vars without having to chase a crash-loop.
|
||||
|
||||
# 6.5. Initialise Temporal durable execution wrapper (optional). Only
|
||||
# meaningful when an executor exists; skipped on misconfigured boots.
|
||||
if adapter_ready:
|
||||
from builtin_tools.temporal_workflow import create_wrapper as _create_temporal_wrapper
|
||||
temporal_wrapper = _create_temporal_wrapper()
|
||||
await temporal_wrapper.start()
|
||||
|
||||
# 7. Wrap in A2A.
|
||||
#
|
||||
# Regression fix (#204): PR #198 tried to wire push_config_store +
|
||||
# push_sender to satisfy #175 (push notification capability), but
|
||||
# PushNotificationSender is an abstract base class in the a2a-sdk and
|
||||
# can't be instantiated directly. Passing it crashed main.py on startup
|
||||
# with `TypeError: Can't instantiate abstract class`. Dropped back to
|
||||
# DefaultRequestHandler's own defaults — pushNotifications capability
|
||||
# in the AgentCard below is still advertised via AgentCapabilities so
|
||||
# clients know we COULD do pushes; actually implementing them requires
|
||||
# a concrete sender subclass, tracked as a Phase-H follow-up to #175.
|
||||
handler = DefaultRequestHandler(
|
||||
agent_executor=executor,
|
||||
task_store=InMemoryTaskStore(),
|
||||
# a2a-sdk 1.x added agent_card as a required positional/keyword
|
||||
# argument — it's used internally for capability dispatch (e.g.
|
||||
# routing tasks/get historyLength based on the card's protocol
|
||||
# version). Pass the same agent_card we registered with the
|
||||
# platform so the handler's capability surface matches what the
|
||||
# AgentCard advertises.
|
||||
agent_card=agent_card,
|
||||
)
|
||||
|
||||
# v1: replace A2AStarletteApplication with Starlette route factory.
|
||||
# rpc_url is required in a2a-sdk 1.x (was implicit at root in 0.x).
|
||||
# Use '/' to match a2a.utils.constants.DEFAULT_RPC_URL — that's also
|
||||
# what the platform's a2a_proxy.go POSTs to (it forwards to the
|
||||
# workspace's URL without appending a path). Card endpoint stays at
|
||||
# the well-known path /.well-known/agent-card.json (handled by
|
||||
# create_agent_card_routes default).
|
||||
routes = []
|
||||
routes.extend(create_agent_card_routes(agent_card))
|
||||
# enable_v0_3_compat=True is the JSON-RPC wire-compat path: clients
|
||||
# using v0.3-shaped payloads (`"role": "user"` lowercase + camelCase
|
||||
# Pydantic field names) can talk to us without re-deploying. Outbound
|
||||
# JSON-RPC wire payloads MUST also use v0.3 shape — the v0.3 compat
|
||||
# adapter at /usr/local/lib/python3.11/site-packages/a2a/compat/v0_3/
|
||||
# validates against Pydantic Role enum (`agent`|`user`) and rejects
|
||||
# the protobuf-style `ROLE_USER` enum names with JSON-RPC -32600
|
||||
# (Invalid Request). Native v1.x types (a2a.types.Role.ROLE_AGENT)
|
||||
# are only for code that constructs Message objects in-process and
|
||||
# hands them to the SDK, which serialises them correctly for the
|
||||
# outbound wire format.
|
||||
routes.extend(create_jsonrpc_routes(request_handler=handler, rpc_url="/", enable_v0_3_compat=True))
|
||||
app = Starlette(routes=routes)
|
||||
# Route assembly is in workspace/boot_routes.py so the contract —
|
||||
# card always mounted, JSON-RPC route swaps based on adapter state
|
||||
# (DefaultRequestHandler when executor is non-None, not_configured
|
||||
# handler returning -32603 otherwise) — is unit-testable with
|
||||
# Starlette's TestClient. main.py is `# pragma: no cover` so without
|
||||
# this extraction a future refactor that re-coupled card + setup()
|
||||
# would silently bypass PR #2756. tests/test_boot_routes.py pins
|
||||
# the four-branch contract.
|
||||
from boot_routes import build_routes
|
||||
app = Starlette(routes=build_routes(agent_card, executor, adapter_error))
|
||||
|
||||
# 8. Register with platform
|
||||
# When adapter.setup() failed, advertise via configuration_status so
|
||||
# the platform/canvas can render "configured: false, reason: …" instead
|
||||
# of a confused "ready but silent" state.
|
||||
loaded_skills = getattr(adapter, "loaded_skills", None) or []
|
||||
agent_card_dict = {
|
||||
"name": config.name,
|
||||
"description": config.description,
|
||||
@@ -311,11 +306,16 @@ async def main(): # pragma: no cover
|
||||
"tags": s.metadata.tags,
|
||||
}
|
||||
for s in loaded_skills
|
||||
] if adapter_ready else [
|
||||
{"id": n, "name": n, "description": n, "tags": []}
|
||||
for n in (config.skills or [])
|
||||
],
|
||||
"capabilities": {
|
||||
"streaming": config.a2a.streaming,
|
||||
"pushNotifications": config.a2a.push_notifications,
|
||||
},
|
||||
"configuration_status": "ready" if adapter_ready else "not_configured",
|
||||
**({"configuration_error": adapter_error} if adapter_error else {}),
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
@@ -364,7 +364,9 @@ async def main(): # pragma: no cover
|
||||
# 9b. Start skills hot-reload watcher (background task)
|
||||
# When a skill file changes the watcher reloads the skill module and calls
|
||||
# back into the adapter so the next A2A request uses the updated tools.
|
||||
if config.skills:
|
||||
# Skipped on misconfigured boots — adapter has no executor / tool registry
|
||||
# to swap into, so reloading skills would NPE on the agent rebuild path.
|
||||
if adapter_ready and config.skills:
|
||||
try:
|
||||
from skill_loader.watcher import SkillsWatcher
|
||||
|
||||
@@ -452,7 +454,24 @@ async def main(): # pragma: no cover
|
||||
limit = int(request.query_params.get("limit", "100"))
|
||||
except (TypeError, ValueError):
|
||||
return JSONResponse({"error": "since and limit must be integers"}, status_code=400)
|
||||
result = await adapter.transcript_lines(since=since, limit=limit)
|
||||
# Isolate adapter call: misconfigured boots leave the adapter
|
||||
# partially-initialised, and a future adapter override of
|
||||
# transcript_lines might assume setup() ran. Surface a 503 with
|
||||
# a clear reason instead of letting the exception propagate to
|
||||
# Starlette's 500 handler — same pattern as the not-configured
|
||||
# JSON-RPC route (PR #2756). BaseAdapter.transcript_lines's
|
||||
# default returns {"supported": false} so today's 4 adapters
|
||||
# never trigger this branch; this is the safety net.
|
||||
try:
|
||||
result = await adapter.transcript_lines(since=since, limit=limit)
|
||||
except Exception as transcript_err: # noqa: BLE001
|
||||
return JSONResponse(
|
||||
{
|
||||
"error": "transcript unavailable",
|
||||
"detail": f"{type(transcript_err).__name__}: {transcript_err}",
|
||||
},
|
||||
status_code=503,
|
||||
)
|
||||
return JSONResponse(result)
|
||||
|
||||
starlette_app.add_route("/transcript", _transcript_handler, methods=["GET"])
|
||||
@@ -495,9 +514,13 @@ async def main(): # pragma: no cover
|
||||
|
||||
# 10b. Schedule initial_prompt self-message after server is ready.
|
||||
# Only runs on first boot — creates a marker file to prevent re-execution on restart.
|
||||
# Skipped on misconfigured boots: the self-message would route through the
|
||||
# platform back to /, hit the -32603 not-configured handler, and consume
|
||||
# the marker for a fire that can't actually run. Wait until the operator
|
||||
# fixes credentials and the workspace redeploys with adapter_ready=True.
|
||||
initial_prompt_task = None
|
||||
initial_prompt_marker = resolve_initial_prompt_marker(config_path)
|
||||
if config.initial_prompt and not os.path.exists(initial_prompt_marker):
|
||||
if adapter_ready and config.initial_prompt and not os.path.exists(initial_prompt_marker):
|
||||
# Write the marker UP FRONT (#71): if the prompt later crashes or
|
||||
# times out, we do NOT replay on next boot — that created a
|
||||
# ProcessError cascade where every message kept crashing. Operators
|
||||
@@ -615,7 +638,9 @@ async def main(): # pragma: no cover
|
||||
# workspaces upgrade opt-in — set idle_prompt in org.yaml defaults or
|
||||
# per-workspace to enable.
|
||||
idle_loop_task = None
|
||||
if config.idle_prompt:
|
||||
# Skipped on misconfigured boots — the self-fire would route to the
|
||||
# -32603 handler in a tight loop and consume cycles for no useful work.
|
||||
if adapter_ready and config.idle_prompt:
|
||||
# Idle-fire HTTP timeout. Kept tight relative to the fire cadence so a
|
||||
# hung platform doesn't accumulate dangling requests — a fire that
|
||||
# takes longer than the idle interval itself is almost certainly stuck.
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
"""Build a JSON-RPC handler that returns ``-32603 "agent not configured"``.
|
||||
|
||||
Used by the workspace runtime when ``adapter.setup()`` fails (most often
|
||||
because an LLM credential is missing or rotated). Lets ``/.well-known/agent-card.json``
|
||||
keep serving 200 — the workspace stays REACHABLE for canvas/operator
|
||||
introspection — while message-send requests get a clear, immediate
|
||||
error instead of silently timing out.
|
||||
|
||||
Kept as its own module so the behavior is unit-testable without booting
|
||||
the whole runtime (main.py is ``# pragma: no cover``).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Awaitable, Callable
|
||||
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import JSONResponse
|
||||
|
||||
from secret_redactor import redact_secrets
|
||||
|
||||
|
||||
def make_not_configured_handler(
|
||||
reason: str | None,
|
||||
) -> Callable[[Request], Awaitable[JSONResponse]]:
|
||||
"""Return a Starlette POST handler that always 503s with JSON-RPC -32603.
|
||||
|
||||
``reason`` is surfaced in the JSON-RPC ``error.data`` field so canvas
|
||||
can render "agent not configured: <reason>" to the user. Pass the
|
||||
stringified ``adapter.setup()`` exception. ``None`` falls back to a
|
||||
generic "adapter.setup() failed".
|
||||
|
||||
Secret redaction (issue molecule-core#2760): ``reason`` is run
|
||||
through ``secret_redactor.redact_secrets`` once, when the handler
|
||||
is built. If a future adapter author writes ``raise
|
||||
RuntimeError(f"auth failed for {token}")``, the token is replaced
|
||||
with ``<redacted-secret>`` BEFORE it lands in the response —
|
||||
closes the structural leak path PR #2756 introduced. Per-request
|
||||
hot path stays unchanged (one cached string, no re-redaction).
|
||||
|
||||
The handler echoes the request's JSON-RPC ``id`` when present so a
|
||||
well-behaved JSON-RPC client can correlate the error to its request.
|
||||
Malformed bodies (non-JSON, missing id) get ``id: null`` per spec.
|
||||
"""
|
||||
|
||||
# Redact at handler-build time, not per-request, so the hot path
|
||||
# stays a constant lookup. The fallback string can't carry secrets
|
||||
# but we still pass it through redact_secrets() so a future change
|
||||
# to the fallback can't accidentally introduce a leak.
|
||||
fallback = redact_secrets(reason or "adapter.setup() failed")
|
||||
|
||||
async def _handler(request: Request) -> JSONResponse:
|
||||
try:
|
||||
body = await request.json()
|
||||
except Exception: # noqa: BLE001
|
||||
body = {}
|
||||
return JSONResponse(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": body.get("id") if isinstance(body, dict) else None,
|
||||
"error": {
|
||||
"code": -32603,
|
||||
"message": "Internal error: agent not configured",
|
||||
"data": fallback,
|
||||
},
|
||||
},
|
||||
status_code=503,
|
||||
)
|
||||
|
||||
return _handler
|
||||
@@ -271,7 +271,19 @@ _GET_WORKSPACE_INFO = ToolSpec(
|
||||
"back to the user, or to determine whether you're a tier-0 "
|
||||
"root that can write GLOBAL memory)."
|
||||
),
|
||||
input_schema={"type": "object", "properties": {}},
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"source_workspace_id": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional. In multi-workspace mode (this agent registered "
|
||||
"in N workspaces), introspect the named workspace instead "
|
||||
"of the primary one. Single-workspace agents omit this."
|
||||
),
|
||||
},
|
||||
},
|
||||
},
|
||||
impl=tool_get_workspace_info,
|
||||
section=A2A_SECTION,
|
||||
)
|
||||
@@ -455,6 +467,14 @@ _CHAT_HISTORY = ToolSpec(
|
||||
"Use the oldest `created_at` from a previous response."
|
||||
),
|
||||
},
|
||||
"source_workspace_id": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional. Multi-workspace mode: query the named "
|
||||
"workspace's activity log instead of the primary one. "
|
||||
"Auto-routes via the peer-discovery cache when unset."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["peer_id"],
|
||||
},
|
||||
@@ -515,6 +535,16 @@ _COMMIT_MEMORY = ToolSpec(
|
||||
"enum": ["LOCAL", "TEAM", "GLOBAL"],
|
||||
"description": "Memory scope (default LOCAL).",
|
||||
},
|
||||
"source_workspace_id": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional. Multi-workspace mode: commit the memory "
|
||||
"into the named workspace's namespace instead of "
|
||||
"the primary one. Pair with the inbound message's "
|
||||
"`arrival_workspace_id` so memories stay in the "
|
||||
"tenant they were derived from."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["content"],
|
||||
},
|
||||
@@ -544,6 +574,16 @@ _RECALL_MEMORY = ToolSpec(
|
||||
"enum": ["LOCAL", "TEAM", "GLOBAL", ""],
|
||||
"description": "Filter by scope (empty = all accessible).",
|
||||
},
|
||||
"source_workspace_id": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional. Multi-workspace mode: search the named "
|
||||
"workspace's memories instead of the primary one. "
|
||||
"Pair with the inbound message's "
|
||||
"`arrival_workspace_id` to recall context for the "
|
||||
"right tenant."
|
||||
),
|
||||
},
|
||||
},
|
||||
},
|
||||
impl=tool_recall_memory,
|
||||
|
||||
+26
-8
@@ -204,17 +204,31 @@ def run_preflight(config: WorkspaceConfig, config_path: str) -> PreflightReport:
|
||||
)
|
||||
)
|
||||
continue
|
||||
report.failures.append(
|
||||
# Missing required env is a CONFIGURATION issue, not a STRUCTURAL one.
|
||||
# The workspace can still bind /.well-known/agent-card.json — adapter.setup()
|
||||
# raises on the missing key, main.py's PR #2756 try/except mounts the
|
||||
# not-configured JSON-RPC handler, canvas surfaces a clear "agent not
|
||||
# configured: <reason>" error to the user. Hard-failing preflight here
|
||||
# would crash before the not-configured path even loads, leaving the
|
||||
# workspace invisible (the failure mode that bit codex/openclaw bench
|
||||
# 25335853189 on 2026-05-04 even after PR #2756). Warn loudly so logs
|
||||
# remain actionable, but let the boot continue.
|
||||
report.warnings.append(
|
||||
PreflightIssue(
|
||||
severity="fail",
|
||||
severity="warn",
|
||||
title="Required env",
|
||||
detail=f"Missing required environment variable: {env_var}",
|
||||
fix=f"Set {env_var} via the secrets API (global or workspace-level).",
|
||||
fix=(
|
||||
f"Set {env_var} via the secrets API (global or workspace-level). "
|
||||
"Workspace will boot in not-configured state until this is set; "
|
||||
"JSON-RPC will return -32603 'agent not configured' on every request."
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# Backward compat: if legacy auth_token_file is set, warn but don't block
|
||||
# if the token is available via required_env or auth_token_env.
|
||||
# Backward compat: if legacy auth_token_file is set, warn — same reasoning
|
||||
# as the required_env block above. The downstream auth check fires inside
|
||||
# adapter.setup(), which is wrapped by main.py's try/except.
|
||||
token_file = getattr(config.runtime_config, "auth_token_file", "")
|
||||
if token_file:
|
||||
token_path = config_dir / token_file
|
||||
@@ -226,12 +240,16 @@ def run_preflight(config: WorkspaceConfig, config_path: str) -> PreflightReport:
|
||||
env_has_token = all(os.environ.get(e) for e in required_env)
|
||||
|
||||
if not env_has_token:
|
||||
report.failures.append(
|
||||
report.warnings.append(
|
||||
PreflightIssue(
|
||||
severity="fail",
|
||||
severity="warn",
|
||||
title="Auth token",
|
||||
detail=f"Missing auth token file: {token_file}",
|
||||
fix="Remove auth_token_file and use required_env + secrets API instead.",
|
||||
fix=(
|
||||
"Remove auth_token_file and use required_env + secrets API "
|
||||
"instead. Workspace will boot in not-configured state until "
|
||||
"the token is provided."
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -71,7 +71,6 @@ def build_system_prompt(
|
||||
prompt_files: list[str] | None = None,
|
||||
plugin_rules: list[str] | None = None,
|
||||
plugin_prompts: list[str] | None = None,
|
||||
parent_context: list[dict] | None = None,
|
||||
platform_instructions: str = "",
|
||||
a2a_mcp: bool = True,
|
||||
) -> str:
|
||||
@@ -135,18 +134,6 @@ def build_system_prompt(
|
||||
if content:
|
||||
parts.append(content)
|
||||
|
||||
# Inject parent's shared context (if this workspace is a child)
|
||||
if parent_context:
|
||||
parts.append("\n## Parent Context\n")
|
||||
parts.append("The following context was shared by your parent workspace:\n")
|
||||
for ctx_file in parent_context:
|
||||
path = ctx_file.get("path", "unknown")
|
||||
content = ctx_file.get("content", "")
|
||||
if content.strip():
|
||||
parts.append(f"### {path}")
|
||||
parts.append(content.strip())
|
||||
parts.append("")
|
||||
|
||||
# Inject plugin rules (always-on guidelines from ECC, Superpowers, etc.)
|
||||
if plugin_rules:
|
||||
parts.append("\n## Platform Rules\n")
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
"""Pattern-based secret redaction for adapter exception strings.
|
||||
|
||||
Used by ``not_configured_handler`` (and any future code path that exposes
|
||||
adapter-side error strings to the network) to scrub secret-shaped tokens
|
||||
before they land in JSON-RPC ``error.data``.
|
||||
|
||||
Why this exists (issue molecule-core#2760): PR #2756 piped
|
||||
``adapter.setup()`` exception strings verbatim into the JSON-RPC -32603
|
||||
response so canvas could surface "agent not configured: <reason>". The
|
||||
4 adapters in tree today (claude-code/codex/openclaw/hermes) raise with
|
||||
key NAMES not values, so this is currently safe — but a future adapter
|
||||
author writing ``raise RuntimeError(f"auth failed for {token}")`` would
|
||||
leak that token to every JSON-RPC client. This module is the structural
|
||||
floor that keeps the leak from happening.
|
||||
|
||||
The redactor is intentionally pattern-based (a closed list of known
|
||||
prefixes), NOT entropy-based — entropy heuristics false-positive on
|
||||
hex git SHAs and base64-shaped UUIDs that carry zero secret value.
|
||||
A pattern miss is preferable to redacting "RuntimeError: invalid
|
||||
config_path=ed8f1234abcd" out of a real diagnostic.
|
||||
|
||||
Pairs with ``not_configured_handler.make_not_configured_handler`` —
|
||||
the redactor runs once when the handler is built, so per-request hot
|
||||
path stays unchanged.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
# Closed list of known secret-shaped prefixes / formats. Each entry is a
|
||||
# compiled regex with one or more capture groups; the redactor replaces
|
||||
# the whole match with REDACTION_PLACEHOLDER. The entries are roughly
|
||||
# ordered by frequency in our adapter exception strings — Anthropic /
|
||||
# OpenAI / OpenRouter style tokens come first.
|
||||
#
|
||||
# Matched on token-ISH boundaries (start/end of string, whitespace, or
|
||||
# common separators like : / = ( ) " ' ,). Avoids redacting ``sk`` in
|
||||
# the middle of unrelated text like "task_sk_id" while still catching
|
||||
# ``sk-ant-...`` / ``sk-cp-...`` / ``sk-or-...``.
|
||||
_TOKEN_BOUNDARY_LEFT = r"(?:^|[\s\(\)\[\]\{\}\"'=,:/])"
|
||||
_TOKEN_BOUNDARY_RIGHT = r"(?=$|[\s\(\)\[\]\{\}\"'=,:/])"
|
||||
|
||||
REDACTION_PLACEHOLDER = "<redacted-secret>"
|
||||
|
||||
_PATTERNS = [
|
||||
# Anthropic / OpenAI / OpenRouter / Stripe / proprietary `sk-` family.
|
||||
# Token format: `sk-` then any non-whitespace run. Length 16+ to avoid
|
||||
# false-matching on `sk-test` style placeholders shorter than a real
|
||||
# key (16 covers OpenAI's shortest legacy key length).
|
||||
re.compile(
|
||||
_TOKEN_BOUNDARY_LEFT + r"(sk-[A-Za-z0-9_\-]{16,})" + _TOKEN_BOUNDARY_RIGHT
|
||||
),
|
||||
# GitHub Personal Access Tokens (classic + fine-grained + OAuth + app).
|
||||
# Format: ghp_ / gho_ / ghu_ / ghs_ / ghr_ followed by ~36 chars.
|
||||
re.compile(
|
||||
_TOKEN_BOUNDARY_LEFT + r"(gh[pousr]_[A-Za-z0-9]{20,})" + _TOKEN_BOUNDARY_RIGHT
|
||||
),
|
||||
# AWS access key id — fixed 16-char prefix `AKIA` (or `ASIA` for
|
||||
# session creds) followed by 16 alphanumeric chars (20 total).
|
||||
re.compile(
|
||||
_TOKEN_BOUNDARY_LEFT + r"((?:AKIA|ASIA)[0-9A-Z]{16})" + _TOKEN_BOUNDARY_RIGHT
|
||||
),
|
||||
# Bearer prefix common in HTTP error strings: `Bearer <token>`.
|
||||
# The match captures the literal `Bearer ` plus the token so the
|
||||
# full leak (which includes the prefix in some adapter error
|
||||
# messages) is scrubbed in one go.
|
||||
re.compile(r"(Bearer\s+[A-Za-z0-9_\-\.=]{16,})"),
|
||||
# Slack / Hugging Face / generic `xoxb-`, `xoxp-`, `xoxa-` prefixes.
|
||||
re.compile(
|
||||
_TOKEN_BOUNDARY_LEFT + r"(xox[bpars]-[A-Za-z0-9\-]{10,})" + _TOKEN_BOUNDARY_RIGHT
|
||||
),
|
||||
# Hugging Face API tokens: `hf_` followed by ~37 chars.
|
||||
re.compile(
|
||||
_TOKEN_BOUNDARY_LEFT + r"(hf_[A-Za-z0-9]{20,})" + _TOKEN_BOUNDARY_RIGHT
|
||||
),
|
||||
# Generic JWT — three base64url segments separated by dots. JWTs
|
||||
# carry signed claims that often include user identifiers; even a
|
||||
# public-key-only JWT shouldn't end up in an error.data field that
|
||||
# gets logged / echoed back to clients.
|
||||
re.compile(
|
||||
_TOKEN_BOUNDARY_LEFT + r"(eyJ[A-Za-z0-9_\-]{8,}\.[A-Za-z0-9_\-]{8,}\.[A-Za-z0-9_\-]{8,})" + _TOKEN_BOUNDARY_RIGHT
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def redact_secrets(text: str) -> str:
|
||||
"""Return ``text`` with any secret-shaped substrings replaced by
|
||||
``REDACTION_PLACEHOLDER``.
|
||||
|
||||
Empty / None input returns the input unchanged so callers can pass
|
||||
through ``adapter_error`` even when it's None.
|
||||
|
||||
The redactor operates on the WHOLE string, not line-by-line, so a
|
||||
multi-line traceback with a token on line 3 still gets scrubbed.
|
||||
Multiple distinct tokens in the same string are all redacted; the
|
||||
placeholder appears once per match.
|
||||
|
||||
Trade-off: pattern-based redaction misses tokens whose prefix isn't
|
||||
in ``_PATTERNS``. The cost of a miss is a leak; the cost of going
|
||||
pattern-free (e.g., entropy heuristic) is false-positive redaction
|
||||
of git SHAs and UUIDs in legitimate diagnostics. We choose miss-on-
|
||||
unknown-prefix and rely on ``_PATTERNS`` growing over time as we
|
||||
catch new providers. Adapter PRs that introduce a new provider
|
||||
SHOULD add the provider's token prefix here.
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
out = text
|
||||
for pat in _PATTERNS:
|
||||
out = pat.sub(
|
||||
# Preserve the leading boundary char (group 0 minus the
|
||||
# token capture) so substitution doesn't eat surrounding
|
||||
# punctuation. Achieved by re-emitting the leading
|
||||
# boundary then the placeholder. Patterns that don't have
|
||||
# a left-boundary group (Bearer) just emit the placeholder.
|
||||
_make_replacer(pat),
|
||||
out,
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def _make_replacer(pat: re.Pattern) -> "callable":
|
||||
"""Build a sub() replacer that preserves any boundary char captured
|
||||
by ``pat`` before the secret-shaped group.
|
||||
|
||||
Patterns built with ``_TOKEN_BOUNDARY_LEFT`` produce a non-capturing
|
||||
group for the boundary. Match.group(0) is the full match including
|
||||
that boundary; group(1) is just the secret. We replace group(1)
|
||||
with the placeholder, leaving group(0) minus group(1) intact.
|
||||
"""
|
||||
def _repl(m: re.Match) -> str:
|
||||
full = m.group(0)
|
||||
secret = m.group(1)
|
||||
# Position of the secret within the full match.
|
||||
idx = full.find(secret)
|
||||
if idx < 0:
|
||||
return REDACTION_PLACEHOLDER
|
||||
return full[:idx] + REDACTION_PLACEHOLDER + full[idx + len(secret):]
|
||||
return _repl
|
||||
+107
-1
@@ -145,6 +145,113 @@ def _make_a2a_mocks():
|
||||
types_mod.TaskStatus = TaskStatus
|
||||
types_mod.TaskState = _TaskStateEnum
|
||||
|
||||
# v1 AgentCard / AgentSkill / AgentCapabilities / AgentInterface — used
|
||||
# by main.py's static-card construction (PR #2756) and by
|
||||
# card_helpers.enrich_card_skills's swap path. Stubs preserve kwargs so
|
||||
# tests can assert on card.skills[i].name etc., and let card.skills be
|
||||
# reassigned in place (the production code's enrichment pattern).
|
||||
class AgentSkill:
|
||||
def __init__(self, id="", name="", description="", tags=None, examples=None, **kwargs):
|
||||
self.id = id
|
||||
self.name = name
|
||||
self.description = description
|
||||
self.tags = list(tags) if tags is not None else []
|
||||
self.examples = list(examples) if examples is not None else []
|
||||
for k, v in kwargs.items():
|
||||
setattr(self, k, v)
|
||||
|
||||
class AgentCapabilities:
|
||||
def __init__(self, **kwargs):
|
||||
for k, v in kwargs.items():
|
||||
setattr(self, k, v)
|
||||
|
||||
class AgentInterface:
|
||||
def __init__(self, **kwargs):
|
||||
for k, v in kwargs.items():
|
||||
setattr(self, k, v)
|
||||
|
||||
class AgentCard:
|
||||
def __init__(self, **kwargs):
|
||||
self.skills = []
|
||||
for k, v in kwargs.items():
|
||||
setattr(self, k, v)
|
||||
|
||||
types_mod.AgentSkill = AgentSkill
|
||||
types_mod.AgentCapabilities = AgentCapabilities
|
||||
types_mod.AgentInterface = AgentInterface
|
||||
types_mod.AgentCard = AgentCard
|
||||
|
||||
# a2a.server.routes — used by boot_routes.build_routes (PR #2756 chain
|
||||
# / #2761) to mount /.well-known/agent-card.json. The real SDK builds
|
||||
# a Starlette route that serializes the card on each request; the stub
|
||||
# mirrors that behaviour with json.dumps over the card's __dict__ so
|
||||
# TestClient.get("/.well-known/agent-card.json") returns the same
|
||||
# shape canvas would see in production.
|
||||
routes_mod = ModuleType("a2a.server.routes")
|
||||
|
||||
def _create_agent_card_routes(card):
|
||||
from starlette.responses import JSONResponse
|
||||
from starlette.routing import Route
|
||||
|
||||
async def _card_handler(_request):
|
||||
# Convert the stub AgentCard into a JSON-serialisable dict.
|
||||
# Real a2a.types.AgentCard is a Pydantic model with proper
|
||||
# serialisation; the stub stores attrs raw, so we walk
|
||||
# __dict__ and serialise nested AgentSkill objects too.
|
||||
def _to_dict(obj):
|
||||
if hasattr(obj, "__dict__"):
|
||||
return {k: _to_dict(v) for k, v in vars(obj).items()}
|
||||
if isinstance(obj, list):
|
||||
return [_to_dict(x) for x in obj]
|
||||
if isinstance(obj, dict):
|
||||
return {k: _to_dict(v) for k, v in obj.items()}
|
||||
return obj
|
||||
|
||||
return JSONResponse(_to_dict(card))
|
||||
|
||||
return [Route("/.well-known/agent-card.json", _card_handler, methods=["GET"])]
|
||||
|
||||
def _create_jsonrpc_routes(request_handler=None, rpc_url="/", **_kwargs):
|
||||
from starlette.responses import JSONResponse
|
||||
from starlette.routing import Route
|
||||
|
||||
async def _jsonrpc_handler(_request):
|
||||
# Stub: real DefaultRequestHandler dispatches to the executor;
|
||||
# tests that need real behaviour will use a test-side mock.
|
||||
# This stub just returns a JSON-RPC envelope so the not-configured
|
||||
# branch's discriminator (`error.data` containing "setup() failed")
|
||||
# has something to differ from.
|
||||
return JSONResponse({"jsonrpc": "2.0", "result": "stub-jsonrpc-handler"})
|
||||
|
||||
return [Route(rpc_url, _jsonrpc_handler, methods=["POST"])]
|
||||
|
||||
routes_mod.create_agent_card_routes = _create_agent_card_routes
|
||||
routes_mod.create_jsonrpc_routes = _create_jsonrpc_routes
|
||||
sys.modules["a2a.server.routes"] = routes_mod
|
||||
|
||||
# a2a.server.request_handlers — used by boot_routes' executor branch.
|
||||
# DefaultRequestHandler stub takes the same kwargs as the real one;
|
||||
# tests that exercise the executor path don't poke at the handler's
|
||||
# internals, only that it gets mounted at "/".
|
||||
rh_mod = ModuleType("a2a.server.request_handlers")
|
||||
|
||||
class DefaultRequestHandler:
|
||||
def __init__(self, agent_executor=None, task_store=None, agent_card=None, **_kwargs):
|
||||
self.agent_executor = agent_executor
|
||||
self.task_store = task_store
|
||||
self.agent_card = agent_card
|
||||
|
||||
rh_mod.DefaultRequestHandler = DefaultRequestHandler
|
||||
sys.modules["a2a.server.request_handlers"] = rh_mod
|
||||
|
||||
# InMemoryTaskStore is exposed via a2a.server.tasks (already stubbed
|
||||
# above with TaskUpdater). Add it as a no-op class.
|
||||
class _InMemoryTaskStore:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
tasks_mod.InMemoryTaskStore = _InMemoryTaskStore
|
||||
|
||||
# a2a.helpers (v1: moved from a2a.utils, renamed new_agent_text_message
|
||||
# → new_text_message). Mock both names — production code only calls
|
||||
# new_text_message, but if any test still references the old name it
|
||||
@@ -330,7 +437,6 @@ if "coordinator" not in sys.modules:
|
||||
except (ImportError, RuntimeError):
|
||||
coordinator_mod = ModuleType("coordinator")
|
||||
coordinator_mod.get_children = MagicMock()
|
||||
coordinator_mod.get_parent_context = MagicMock()
|
||||
coordinator_mod.build_children_description = MagicMock()
|
||||
coordinator_mod.route_task_to_team = MagicMock()
|
||||
coordinator_mod.route_task_to_team.name = "route_task_to_team"
|
||||
|
||||
@@ -71,6 +71,105 @@ async def test_handle_tool_call_unknown_tool():
|
||||
assert "Unknown tool" in result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# source_workspace_id propagation — every workspace-scoped tool's schema
|
||||
# advertises this parameter (PR #2766) so the LLM can route a memory commit
|
||||
# or chat-history query through the workspace the inbound message arrived
|
||||
# on. The dispatch path itself MUST forward the kwarg — otherwise the
|
||||
# schema lies and every call silently falls back to the module-level
|
||||
# WORKSPACE_ID, defeating multi-workspace isolation. These tests pin
|
||||
# end-to-end argument flow on the four tools that ship in PR #2766.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_dispatch_get_workspace_info_forwards_source_workspace_id():
|
||||
from a2a_mcp_server import handle_tool_call
|
||||
mock = AsyncMock(return_value='{"id":"ws-X"}')
|
||||
with patch("a2a_mcp_server.tool_get_workspace_info", new=mock):
|
||||
await handle_tool_call(
|
||||
"get_workspace_info",
|
||||
{"source_workspace_id": "ws-X"},
|
||||
)
|
||||
mock.assert_awaited_once_with(source_workspace_id="ws-X")
|
||||
|
||||
|
||||
async def test_dispatch_commit_memory_forwards_source_workspace_id():
|
||||
from a2a_mcp_server import handle_tool_call
|
||||
mock = AsyncMock(return_value='{"success":true}')
|
||||
with patch("a2a_mcp_server.tool_commit_memory", new=mock):
|
||||
await handle_tool_call(
|
||||
"commit_memory",
|
||||
{
|
||||
"content": "remember this",
|
||||
"scope": "LOCAL",
|
||||
"source_workspace_id": "ws-Y",
|
||||
},
|
||||
)
|
||||
mock.assert_awaited_once_with(
|
||||
"remember this",
|
||||
"LOCAL",
|
||||
source_workspace_id="ws-Y",
|
||||
)
|
||||
|
||||
|
||||
async def test_dispatch_recall_memory_forwards_source_workspace_id():
|
||||
from a2a_mcp_server import handle_tool_call
|
||||
mock = AsyncMock(return_value="[LOCAL] remember this")
|
||||
with patch("a2a_mcp_server.tool_recall_memory", new=mock):
|
||||
await handle_tool_call(
|
||||
"recall_memory",
|
||||
{
|
||||
"query": "remember",
|
||||
"scope": "LOCAL",
|
||||
"source_workspace_id": "ws-Z",
|
||||
},
|
||||
)
|
||||
mock.assert_awaited_once_with(
|
||||
"remember",
|
||||
"LOCAL",
|
||||
source_workspace_id="ws-Z",
|
||||
)
|
||||
|
||||
|
||||
async def test_dispatch_chat_history_forwards_source_workspace_id():
|
||||
from a2a_mcp_server import handle_tool_call
|
||||
mock = AsyncMock(return_value="[]")
|
||||
with patch("a2a_mcp_server.tool_chat_history", new=mock):
|
||||
await handle_tool_call(
|
||||
"chat_history",
|
||||
{
|
||||
"peer_id": "peer-A",
|
||||
"limit": 10,
|
||||
"source_workspace_id": "ws-W",
|
||||
},
|
||||
)
|
||||
mock.assert_awaited_once_with(
|
||||
"peer-A",
|
||||
10,
|
||||
"",
|
||||
source_workspace_id="ws-W",
|
||||
)
|
||||
|
||||
|
||||
async def test_dispatch_omits_source_workspace_id_when_unset():
|
||||
"""Single-workspace operators (no source_workspace_id key in args) must
|
||||
forward None — preserving the legacy fallback to module-level WORKSPACE_ID
|
||||
inside the tool. An accidental empty-string forward would also fall back,
|
||||
but None is the documented contract."""
|
||||
from a2a_mcp_server import handle_tool_call
|
||||
mock = AsyncMock(return_value='{"success":true}')
|
||||
with patch("a2a_mcp_server.tool_commit_memory", new=mock):
|
||||
await handle_tool_call(
|
||||
"commit_memory",
|
||||
{"content": "x", "scope": "LOCAL"},
|
||||
)
|
||||
mock.assert_awaited_once_with(
|
||||
"x",
|
||||
"LOCAL",
|
||||
source_workspace_id=None,
|
||||
)
|
||||
|
||||
|
||||
async def test_handle_tool_call_missing_args_defaults():
|
||||
"""Test that missing args default to empty strings (defensive)."""
|
||||
from a2a_mcp_server import handle_tool_call
|
||||
|
||||
@@ -426,3 +426,220 @@ class TestListRegisteredWorkspaces:
|
||||
platform_auth.register_workspace_token("ws-1", "tok-1")
|
||||
platform_auth.clear_cache()
|
||||
assert platform_auth.list_registered_workspaces() == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Memory tools — commit/recall must namespace under source_workspace_id
|
||||
# so an agent serving multiple tenants doesn't bleed memories across
|
||||
# them. Single-workspace path (no source arg) keeps using WORKSPACE_ID.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCommitMemorySourceRouting:
|
||||
@pytest.mark.asyncio
|
||||
async def test_url_and_auth_use_source_workspace_id(self, monkeypatch):
|
||||
"""commit_memory(source_workspace_id=X) must POST to /workspaces/X/
|
||||
with X's bearer token — otherwise a multi-tenant agent could
|
||||
write into the wrong tenant's memory namespace."""
|
||||
import platform_auth, a2a_tools
|
||||
|
||||
platform_auth.register_workspace_token("ffff6666-ffff-ffff-ffff-ffffffffffff", "token-F")
|
||||
|
||||
captured: dict = {}
|
||||
|
||||
class _Resp:
|
||||
status_code = 200
|
||||
def json(self):
|
||||
return {"id": "mem-1"}
|
||||
|
||||
class _Client:
|
||||
async def __aenter__(self): return self
|
||||
async def __aexit__(self, *a): return None
|
||||
async def post(self, url, headers, json):
|
||||
captured["url"] = url
|
||||
captured["headers"] = headers
|
||||
captured["body"] = json
|
||||
return _Resp()
|
||||
|
||||
monkeypatch.setattr(a2a_tools.httpx, "AsyncClient", lambda timeout: _Client())
|
||||
|
||||
result = await a2a_tools.tool_commit_memory(
|
||||
"remember this",
|
||||
source_workspace_id="ffff6666-ffff-ffff-ffff-ffffffffffff",
|
||||
)
|
||||
|
||||
assert "/workspaces/ffff6666-ffff-ffff-ffff-ffffffffffff/memories" in captured["url"]
|
||||
assert captured["headers"]["Authorization"] == "Bearer token-F"
|
||||
assert captured["body"]["workspace_id"] == "ffff6666-ffff-ffff-ffff-ffffffffffff"
|
||||
import json as _json
|
||||
assert _json.loads(result)["success"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_falls_back_to_module_workspace_id(self, monkeypatch):
|
||||
"""Without source_workspace_id, single-workspace operators keep
|
||||
the legacy WORKSPACE_ID-based POST — no behavior change."""
|
||||
import a2a_client, a2a_tools
|
||||
|
||||
captured: dict = {}
|
||||
|
||||
class _Resp:
|
||||
status_code = 200
|
||||
def json(self):
|
||||
return {"id": "mem-1"}
|
||||
|
||||
class _Client:
|
||||
async def __aenter__(self): return self
|
||||
async def __aexit__(self, *a): return None
|
||||
async def post(self, url, headers, json):
|
||||
captured["url"] = url
|
||||
return _Resp()
|
||||
|
||||
monkeypatch.setattr(a2a_tools.httpx, "AsyncClient", lambda timeout: _Client())
|
||||
|
||||
await a2a_tools.tool_commit_memory("remember this")
|
||||
assert f"/workspaces/{a2a_client.WORKSPACE_ID}/memories" in captured["url"]
|
||||
|
||||
|
||||
class TestRecallMemorySourceRouting:
|
||||
@pytest.mark.asyncio
|
||||
async def test_url_params_and_auth_use_source(self, monkeypatch):
|
||||
"""recall_memory routes the GET, the workspace_id query param,
|
||||
and the auth header through source_workspace_id."""
|
||||
import platform_auth, a2a_tools
|
||||
|
||||
platform_auth.register_workspace_token("aaaa7777-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "token-G")
|
||||
|
||||
captured: dict = {}
|
||||
|
||||
class _Resp:
|
||||
status_code = 200
|
||||
def json(self):
|
||||
return []
|
||||
|
||||
class _Client:
|
||||
async def __aenter__(self): return self
|
||||
async def __aexit__(self, *a): return None
|
||||
async def get(self, url, params, headers):
|
||||
captured["url"] = url
|
||||
captured["params"] = params
|
||||
captured["headers"] = headers
|
||||
return _Resp()
|
||||
|
||||
monkeypatch.setattr(a2a_tools.httpx, "AsyncClient", lambda timeout: _Client())
|
||||
|
||||
await a2a_tools.tool_recall_memory(
|
||||
query="x",
|
||||
source_workspace_id="aaaa7777-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
||||
)
|
||||
|
||||
assert "/workspaces/aaaa7777-aaaa-aaaa-aaaa-aaaaaaaaaaaa/memories" in captured["url"]
|
||||
assert captured["params"]["workspace_id"] == "aaaa7777-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
|
||||
assert captured["headers"]["Authorization"] == "Bearer token-G"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# chat_history — auto-routes via the peer→source cache so an inbound
|
||||
# peer_agent push from workspace X sees its history queried against X.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestChatHistorySourceRouting:
|
||||
@pytest.mark.asyncio
|
||||
async def test_auto_routes_via_peer_cache(self, monkeypatch):
|
||||
"""chat_history(peer_id) without an explicit source falls back to
|
||||
``_peer_to_source[peer_id]`` — same auto-routing as delegate_task,
|
||||
so the agent doesn't have to remember which workspace surfaced
|
||||
each peer."""
|
||||
import platform_auth, a2a_client, a2a_tools
|
||||
|
||||
platform_auth.register_workspace_token("bbbb8888-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "token-H")
|
||||
peer_id = "1111aaaa-1111-1111-1111-111111111111"
|
||||
a2a_client._peer_to_source[peer_id] = "bbbb8888-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
|
||||
|
||||
captured: dict = {}
|
||||
|
||||
class _Resp:
|
||||
status_code = 200
|
||||
def json(self):
|
||||
return []
|
||||
|
||||
class _Client:
|
||||
async def __aenter__(self): return self
|
||||
async def __aexit__(self, *a): return None
|
||||
async def get(self, url, params, headers):
|
||||
captured["url"] = url
|
||||
captured["headers"] = headers
|
||||
return _Resp()
|
||||
|
||||
monkeypatch.setattr(a2a_tools.httpx, "AsyncClient", lambda timeout: _Client())
|
||||
|
||||
await a2a_tools.tool_chat_history(peer_id, limit=5)
|
||||
|
||||
assert "/workspaces/bbbb8888-bbbb-bbbb-bbbb-bbbbbbbbbbbb/activity" in captured["url"]
|
||||
assert captured["headers"]["Authorization"] == "Bearer token-H"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_explicit_source_beats_cache(self, monkeypatch):
|
||||
import platform_auth, a2a_client, a2a_tools
|
||||
|
||||
platform_auth.register_workspace_token("cccc9999-cccc-cccc-cccc-cccccccccccc", "token-I")
|
||||
peer_id = "1111aaaa-1111-1111-1111-111111111111"
|
||||
a2a_client._peer_to_source[peer_id] = "should-not-be-used"
|
||||
|
||||
captured: dict = {}
|
||||
|
||||
class _Resp:
|
||||
status_code = 200
|
||||
def json(self):
|
||||
return []
|
||||
|
||||
class _Client:
|
||||
async def __aenter__(self): return self
|
||||
async def __aexit__(self, *a): return None
|
||||
async def get(self, url, params, headers):
|
||||
captured["url"] = url
|
||||
return _Resp()
|
||||
|
||||
monkeypatch.setattr(a2a_tools.httpx, "AsyncClient", lambda timeout: _Client())
|
||||
|
||||
await a2a_tools.tool_chat_history(
|
||||
peer_id, source_workspace_id="cccc9999-cccc-cccc-cccc-cccccccccccc",
|
||||
)
|
||||
assert "/workspaces/cccc9999-cccc-cccc-cccc-cccccccccccc/activity" in captured["url"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_workspace_info — multi-workspace introspection.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetWorkspaceInfoSourceRouting:
|
||||
@pytest.mark.asyncio
|
||||
async def test_introspects_named_workspace(self, monkeypatch):
|
||||
import platform_auth, a2a_client
|
||||
|
||||
platform_auth.register_workspace_token("dddd0000-dddd-dddd-dddd-dddddddddddd", "token-J")
|
||||
|
||||
captured: dict = {}
|
||||
|
||||
class _Resp:
|
||||
status_code = 200
|
||||
def json(self):
|
||||
return {"id": "dddd0000-dddd-dddd-dddd-dddddddddddd", "name": "wsJ"}
|
||||
|
||||
class _Client:
|
||||
async def __aenter__(self): return self
|
||||
async def __aexit__(self, *a): return None
|
||||
async def get(self, url, headers):
|
||||
captured["url"] = url
|
||||
captured["headers"] = headers
|
||||
return _Resp()
|
||||
|
||||
monkeypatch.setattr(a2a_client.httpx, "AsyncClient", lambda timeout: _Client())
|
||||
|
||||
info = await a2a_client.get_workspace_info(
|
||||
source_workspace_id="dddd0000-dddd-dddd-dddd-dddddddddddd",
|
||||
)
|
||||
assert info["id"] == "dddd0000-dddd-dddd-dddd-dddddddddddd"
|
||||
assert "/workspaces/dddd0000-dddd-dddd-dddd-dddddddddddd" in captured["url"]
|
||||
assert captured["headers"]["Authorization"] == "Bearer token-J"
|
||||
|
||||
@@ -0,0 +1,213 @@
|
||||
"""Integration tests for boot_routes.build_routes — pin the contract that
|
||||
PR #2756's card-vs-setup decoupling depends on.
|
||||
|
||||
Why these matter (issue #2761): main.py is ``# pragma: no cover``. The
|
||||
inline if/else that mounted ``DefaultRequestHandler`` vs the
|
||||
not-configured handler had no pytest coverage; a future refactor that
|
||||
re-coupled card and setup() would have shipped the original "stuck
|
||||
booting forever" UX again. Extracting to ``boot_routes.build_routes``
|
||||
+ these tests make the contract regression-proof.
|
||||
|
||||
Each test exercises a real Starlette TestClient against the routes —
|
||||
no uvicorn, no socket, but every assertion is the same one canvas's
|
||||
TranscriptHandler / a2a_proxy would make in production.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
# Make workspace/ importable in test isolation — same pattern as the
|
||||
# adjacent tests (test_not_configured_handler.py, test_card_helpers.py).
|
||||
WORKSPACE_DIR = Path(__file__).resolve().parents[1]
|
||||
if str(WORKSPACE_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(WORKSPACE_DIR))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def agent_card():
|
||||
"""Build a minimal AgentCard the way main.py does at boot."""
|
||||
from a2a.types import (
|
||||
AgentCard,
|
||||
AgentCapabilities,
|
||||
AgentInterface,
|
||||
AgentSkill,
|
||||
)
|
||||
|
||||
return AgentCard(
|
||||
name="test-agent",
|
||||
description="test-agent",
|
||||
version="0.0.0",
|
||||
supported_interfaces=[
|
||||
AgentInterface(protocol_binding="https://a2a.g/v1", url="http://test:8000")
|
||||
],
|
||||
capabilities=AgentCapabilities(streaming=True, push_notifications=False),
|
||||
skills=[
|
||||
AgentSkill(id="echo", name="echo", description="echo", tags=[], examples=[])
|
||||
],
|
||||
default_input_modes=["text/plain"],
|
||||
default_output_modes=["text/plain"],
|
||||
)
|
||||
|
||||
|
||||
# ---- card route always mounted, regardless of adapter state -------------
|
||||
|
||||
|
||||
def test_card_route_serves_200_when_adapter_ready(agent_card):
|
||||
"""Adapter setup OK → card serves 200, the canonical happy path."""
|
||||
from starlette.applications import Starlette
|
||||
from starlette.testclient import TestClient
|
||||
|
||||
from boot_routes import build_routes
|
||||
|
||||
fake_executor = MagicMock()
|
||||
app = Starlette(routes=build_routes(agent_card, fake_executor, None))
|
||||
client = TestClient(app)
|
||||
resp = client.get("/.well-known/agent-card.json")
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["name"] == "test-agent"
|
||||
|
||||
|
||||
def test_card_route_serves_200_when_adapter_failed(agent_card):
|
||||
"""Adapter setup raised → card route is STILL mounted with the same
|
||||
static skills. This is the entire point of PR #2756: a misconfigured
|
||||
workspace stays REACHABLE so canvas can show the user a clear error
|
||||
instead of silently looking dead."""
|
||||
from starlette.applications import Starlette
|
||||
from starlette.testclient import TestClient
|
||||
|
||||
from boot_routes import build_routes
|
||||
|
||||
app = Starlette(
|
||||
routes=build_routes(
|
||||
agent_card, executor=None, adapter_error="MISSING_API_KEY"
|
||||
)
|
||||
)
|
||||
client = TestClient(app)
|
||||
resp = client.get("/.well-known/agent-card.json")
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["name"] == "test-agent"
|
||||
# Skill stubs survive even though setup() didn't run.
|
||||
assert any(s.get("id") == "echo" for s in body.get("skills", []))
|
||||
|
||||
|
||||
# ---- JSON-RPC route swaps based on executor presence -------------------
|
||||
|
||||
|
||||
def test_jsonrpc_returns_503_when_no_executor(agent_card):
|
||||
"""The not-configured branch: POST / returns 503 with JSON-RPC -32603
|
||||
and the adapter_error in error.data. This is what canvas sees when a
|
||||
user tries to message a workspace whose setup() failed — turns a
|
||||
"stuck silent" workspace into "agent not configured: <reason>"."""
|
||||
from starlette.applications import Starlette
|
||||
from starlette.testclient import TestClient
|
||||
|
||||
from boot_routes import build_routes
|
||||
|
||||
app = Starlette(
|
||||
routes=build_routes(
|
||||
agent_card,
|
||||
executor=None,
|
||||
adapter_error="RuntimeError: Neither OPENAI_API_KEY nor MINIMAX_API_KEY is set",
|
||||
)
|
||||
)
|
||||
client = TestClient(app)
|
||||
resp = client.post(
|
||||
"/",
|
||||
json={"jsonrpc": "2.0", "id": 42, "method": "message/send"},
|
||||
)
|
||||
assert resp.status_code == 503
|
||||
body = resp.json()
|
||||
assert body["jsonrpc"] == "2.0"
|
||||
assert body["id"] == 42 # echoed
|
||||
assert body["error"]["code"] == -32603
|
||||
assert "MINIMAX_API_KEY" in body["error"]["data"]
|
||||
|
||||
|
||||
def test_jsonrpc_returns_503_with_generic_when_no_error_string(agent_card):
|
||||
"""Defensive: if main.py reached this branch without a captured
|
||||
error string (shouldn't happen in practice but the helper is
|
||||
defensive), the handler still returns -32603 with a generic
|
||||
fallback so the operator gets a useful response shape."""
|
||||
from starlette.applications import Starlette
|
||||
from starlette.testclient import TestClient
|
||||
|
||||
from boot_routes import build_routes
|
||||
|
||||
app = Starlette(
|
||||
routes=build_routes(agent_card, executor=None, adapter_error=None)
|
||||
)
|
||||
client = TestClient(app)
|
||||
resp = client.post(
|
||||
"/", json={"jsonrpc": "2.0", "id": 1, "method": "message/send"}
|
||||
)
|
||||
assert resp.status_code == 503
|
||||
assert resp.json()["error"]["code"] == -32603
|
||||
# Falls back to generic "adapter.setup() failed".
|
||||
assert "setup() failed" in resp.json()["error"]["data"]
|
||||
|
||||
|
||||
# ---- Specific regression: re-coupling card to setup would break this ---
|
||||
|
||||
|
||||
def test_card_route_does_not_depend_on_executor(agent_card):
|
||||
"""Direct regression test for PR #2756. If a future refactor moved
|
||||
create_agent_card_routes into the executor-only branch, this test
|
||||
would catch it: the card MUST be served from a code path that runs
|
||||
even when executor is None."""
|
||||
from boot_routes import build_routes
|
||||
|
||||
routes_with_executor = build_routes(agent_card, MagicMock(), None)
|
||||
routes_without_executor = build_routes(agent_card, None, "err")
|
||||
|
||||
# Both branches mount /.well-known/agent-card.json. Find by path.
|
||||
def has_card_route(routes):
|
||||
for r in routes:
|
||||
for attr in ("path", "path_format"):
|
||||
p = getattr(r, attr, None)
|
||||
if p and "agent-card.json" in p:
|
||||
return True
|
||||
return False
|
||||
|
||||
assert has_card_route(routes_with_executor), (
|
||||
"card route MUST be mounted on the executor-present path"
|
||||
)
|
||||
assert has_card_route(routes_without_executor), (
|
||||
"card route MUST be mounted on the executor-missing path "
|
||||
"(this is the PR #2756 contract — re-coupling here breaks tenant readiness)"
|
||||
)
|
||||
|
||||
|
||||
def test_executor_present_does_not_mount_not_configured_handler(agent_card):
|
||||
"""Sanity: when executor is present, the not-configured handler
|
||||
must NOT be mounted at /. Otherwise a healthy workspace would
|
||||
return -32603 to every JSON-RPC call.
|
||||
|
||||
We call POST / with a malformed JSON-RPC body and assert the
|
||||
response is NOT the -32603 not-configured envelope. (The real
|
||||
DefaultRequestHandler may return its own error for the malformed
|
||||
payload, but it won't have ``data: "adapter.setup() failed"``.)"""
|
||||
from starlette.applications import Starlette
|
||||
from starlette.testclient import TestClient
|
||||
|
||||
from boot_routes import build_routes
|
||||
|
||||
fake_executor = MagicMock()
|
||||
app = Starlette(routes=build_routes(agent_card, fake_executor, None))
|
||||
client = TestClient(app)
|
||||
resp = client.post(
|
||||
"/", json={"jsonrpc": "2.0", "id": 1, "method": "message/send"}
|
||||
)
|
||||
body = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {}
|
||||
# Whatever DefaultRequestHandler does, it isn't the not-configured
|
||||
# envelope. The cheap discriminator: error.data won't say "setup() failed".
|
||||
err = body.get("error") or {}
|
||||
data = err.get("data") if isinstance(err, dict) else ""
|
||||
assert "setup() failed" not in (data or ""), (
|
||||
"executor-present branch must not mount the not-configured handler"
|
||||
)
|
||||
@@ -0,0 +1,163 @@
|
||||
"""Tests for ``card_helpers.enrich_card_skills`` — the defensive swap that
|
||||
replaces ``AgentCard.skills`` with rich metadata from the adapter's
|
||||
loaded skills, falling back to the static stubs on shape mismatch.
|
||||
|
||||
The whole point of the helper (vs inline in main.py) is that a future
|
||||
adapter author who returns a non-standard ``loaded_skills`` shape
|
||||
should NOT silently downgrade their workspace boot to not-configured —
|
||||
``setup()`` succeeded, the agent works, only the card's skill metadata
|
||||
enrichment is degraded.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
WORKSPACE_DIR = Path(__file__).resolve().parents[1]
|
||||
if str(WORKSPACE_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(WORKSPACE_DIR))
|
||||
|
||||
from a2a.types import AgentCard, AgentCapabilities, AgentInterface, AgentSkill
|
||||
|
||||
from card_helpers import enrich_card_skills
|
||||
|
||||
|
||||
def _make_card(static_skill_names):
|
||||
return AgentCard(
|
||||
name="test-agent",
|
||||
description="test",
|
||||
version="0.0.0",
|
||||
supported_interfaces=[
|
||||
AgentInterface(protocol_binding="https://a2a.g/v1", url="http://x:8000")
|
||||
],
|
||||
capabilities=AgentCapabilities(streaming=True, push_notifications=False),
|
||||
skills=[
|
||||
AgentSkill(id=n, name=n, description=n, tags=[], examples=[])
|
||||
for n in static_skill_names
|
||||
],
|
||||
default_input_modes=["text/plain"],
|
||||
default_output_modes=["text/plain"],
|
||||
)
|
||||
|
||||
|
||||
class _SkillMetadata:
|
||||
"""Mimics the adapter-side Skill.metadata shape."""
|
||||
def __init__(self, id, name, description, tags, examples):
|
||||
self.id = id
|
||||
self.name = name
|
||||
self.description = description
|
||||
self.tags = tags
|
||||
self.examples = examples
|
||||
|
||||
|
||||
class _Skill:
|
||||
def __init__(self, **kwargs):
|
||||
self.metadata = _SkillMetadata(**kwargs)
|
||||
|
||||
|
||||
def test_returns_false_on_none():
|
||||
"""No loaded_skills → caller didn't load any → no swap, no log spam."""
|
||||
card = _make_card(["a", "b"])
|
||||
assert enrich_card_skills(card, None) is False
|
||||
# Static stubs preserved.
|
||||
assert [s.id for s in card.skills] == ["a", "b"]
|
||||
|
||||
|
||||
def test_returns_false_on_empty_list():
|
||||
"""Empty list → same treatment as None: nothing to enrich."""
|
||||
card = _make_card(["a"])
|
||||
assert enrich_card_skills(card, []) is False
|
||||
assert [s.id for s in card.skills] == ["a"]
|
||||
|
||||
|
||||
def test_swaps_in_rich_metadata_on_canonical_shape():
|
||||
"""The happy path: adapter returns Skill objects with the canonical
|
||||
.metadata shape, card gets the richer descriptions/tags/examples."""
|
||||
card = _make_card(["search"]) # static stub
|
||||
rich = [
|
||||
_Skill(
|
||||
id="search",
|
||||
name="Web Search",
|
||||
description="Search the web for the user's question",
|
||||
tags=["web", "io"],
|
||||
examples=["who won the world cup in 2022?"],
|
||||
),
|
||||
]
|
||||
assert enrich_card_skills(card, rich) is True
|
||||
assert len(card.skills) == 1
|
||||
assert card.skills[0].id == "search"
|
||||
assert card.skills[0].name == "Web Search"
|
||||
assert "web" in card.skills[0].tags
|
||||
assert card.skills[0].examples == ["who won the world cup in 2022?"]
|
||||
|
||||
|
||||
def test_returns_false_and_keeps_stubs_when_metadata_attr_missing(capsys):
|
||||
"""Defensive: a future adapter that returns objects without
|
||||
``.metadata`` would otherwise raise AttributeError and propagate to
|
||||
main.py's outer except — silently degrading an OK boot to
|
||||
not-configured. Helper logs + returns False instead, static stubs
|
||||
stay in place.
|
||||
|
||||
This is the reason the helper exists at all; without it the
|
||||
inline swap in main.py at PR #2756 was a coupling between adapter
|
||||
discipline and tenant-facing readiness."""
|
||||
card = _make_card(["a"])
|
||||
|
||||
class NoMetadata:
|
||||
id = "x" # has id but no .metadata.id (the canonical path)
|
||||
|
||||
assert enrich_card_skills(card, [NoMetadata()]) is False
|
||||
# Static stub preserved.
|
||||
assert [s.id for s in card.skills] == ["a"]
|
||||
# Operator gets a log line.
|
||||
captured = capsys.readouterr()
|
||||
assert "skill metadata enrichment failed" in captured.out
|
||||
|
||||
|
||||
def test_returns_false_when_metadata_is_partial(capsys):
|
||||
"""Partial shape — has .metadata but the .metadata object lacks one
|
||||
of the canonical attrs (here: ``examples``). The list comprehension
|
||||
raises AttributeError on ``skill.metadata.examples`` access, which
|
||||
the helper swallows. (In production, a2a.types.AgentSkill is a
|
||||
Pydantic model that ALSO raises on missing required fields — both
|
||||
failure modes route through the same except branch.)"""
|
||||
card = _make_card(["a"])
|
||||
|
||||
class PartialMeta:
|
||||
def __init__(self):
|
||||
self.id = "x"
|
||||
self.name = "x"
|
||||
self.description = "x"
|
||||
self.tags = []
|
||||
# examples missing
|
||||
|
||||
class PartialSkill:
|
||||
def __init__(self):
|
||||
self.metadata = PartialMeta()
|
||||
|
||||
result = enrich_card_skills(card, [PartialSkill()])
|
||||
assert result is False
|
||||
assert [s.id for s in card.skills] == ["a"]
|
||||
captured = capsys.readouterr()
|
||||
assert "skill metadata enrichment failed" in captured.out
|
||||
|
||||
|
||||
def test_failure_is_atomic_no_partial_swap(capsys):
|
||||
"""If the second skill is malformed, the FIRST skill's swap must NOT
|
||||
leak into card.skills. We use a list-comprehension which builds the
|
||||
full list before assignment; verify that property holds.
|
||||
|
||||
Without this property, a misbehaving adapter could half-corrupt the
|
||||
card — operators would see "1 skill listed" when 3 were declared,
|
||||
no log line if the inline swap was partial."""
|
||||
card = _make_card(["a", "b"])
|
||||
|
||||
valid = _Skill(id="x", name="x", description="x", tags=[], examples=[])
|
||||
|
||||
class BadSkill:
|
||||
# No .metadata at all.
|
||||
pass
|
||||
|
||||
assert enrich_card_skills(card, [valid, BadSkill()]) is False
|
||||
# Original two static stubs intact — card.skills was never reassigned.
|
||||
assert [s.id for s in card.skills] == ["a", "b"]
|
||||
@@ -496,24 +496,24 @@ def test_initial_prompt_file_missing(tmp_path):
|
||||
assert cfg.initial_prompt == ""
|
||||
|
||||
|
||||
def test_shared_context_default(tmp_path):
|
||||
"""shared_context defaults to empty list when not specified in YAML."""
|
||||
config_yaml = tmp_path / "config.yaml"
|
||||
config_yaml.write_text(yaml.dump({}))
|
||||
def test_shared_context_field_removed(tmp_path):
|
||||
"""Drop-shared_context regression gate: a config.yaml that still uses
|
||||
the legacy `shared_context` key must load without crashing AND must
|
||||
NOT carry it onto the WorkspaceConfig dataclass.
|
||||
|
||||
cfg = load_config(str(tmp_path))
|
||||
assert cfg.shared_context == []
|
||||
|
||||
|
||||
def test_shared_context_from_yaml(tmp_path):
|
||||
"""shared_context reads file paths from YAML."""
|
||||
The field was removed; YAML files in the wild may still mention it
|
||||
until operators migrate. Loader silently ignores unknown YAML keys —
|
||||
we pin the behavior so a future re-introduction is loud."""
|
||||
config_yaml = tmp_path / "config.yaml"
|
||||
config_yaml.write_text(
|
||||
yaml.dump({"shared_context": ["guidelines.md", "architecture.md"]})
|
||||
)
|
||||
|
||||
cfg = load_config(str(tmp_path))
|
||||
assert cfg.shared_context == ["guidelines.md", "architecture.md"]
|
||||
assert not hasattr(cfg, "shared_context"), (
|
||||
"shared_context is removed; reintroducing it requires a new design "
|
||||
"(see RFC #2789 for platform-owned shared file storage)"
|
||||
)
|
||||
|
||||
|
||||
# ===== Compliance default lock (#2059) =====
|
||||
|
||||
@@ -1,79 +1,15 @@
|
||||
"""Tests for coordinator.py — get_parent_context() and get_children() functions."""
|
||||
"""Tests for coordinator.get_children() and build_children_description().
|
||||
|
||||
shared_context / get_parent_context was removed: parent→child knowledge
|
||||
sharing now flows through memory v2's team:<id> namespace via recall_memory
|
||||
on demand, not through file paths injected at boot.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock, patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from coordinator import get_parent_context, get_children, build_children_description
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_parent_context_no_env(monkeypatch):
|
||||
"""Returns empty list when PARENT_ID is not set."""
|
||||
monkeypatch.delenv("PARENT_ID", raising=False)
|
||||
result = await get_parent_context()
|
||||
assert result == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_parent_context_success(monkeypatch):
|
||||
"""Fetches shared context files from parent workspace via httpx."""
|
||||
monkeypatch.setenv("PARENT_ID", "parent-123")
|
||||
monkeypatch.setenv("WORKSPACE_ID", "child-456")
|
||||
monkeypatch.setenv("PLATFORM_URL", "http://localhost:8080")
|
||||
|
||||
# Reload module-level constants after env change
|
||||
import coordinator
|
||||
monkeypatch.setattr(coordinator, "PLATFORM_URL", "http://localhost:8080")
|
||||
monkeypatch.setattr(coordinator, "WORKSPACE_ID", "child-456")
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = [
|
||||
{"path": "guidelines.md", "content": "Be concise."},
|
||||
{"path": "arch.md", "content": "Use microservices."},
|
||||
]
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get.return_value = mock_response
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
with patch("coordinator.httpx.AsyncClient", return_value=mock_client):
|
||||
result = await get_parent_context()
|
||||
|
||||
assert len(result) == 2
|
||||
assert result[0]["path"] == "guidelines.md"
|
||||
assert result[0]["content"] == "Be concise."
|
||||
assert result[1]["path"] == "arch.md"
|
||||
|
||||
# Verify the correct URL was called
|
||||
mock_client.get.assert_called_once_with(
|
||||
"http://localhost:8080/workspaces/parent-123/shared-context",
|
||||
headers={"X-Workspace-ID": "child-456"},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_parent_context_failure(monkeypatch):
|
||||
"""Returns empty list when httpx raises an exception."""
|
||||
monkeypatch.setenv("PARENT_ID", "parent-123")
|
||||
monkeypatch.setenv("WORKSPACE_ID", "child-456")
|
||||
|
||||
import coordinator
|
||||
monkeypatch.setattr(coordinator, "PLATFORM_URL", "http://localhost:8080")
|
||||
monkeypatch.setattr(coordinator, "WORKSPACE_ID", "child-456")
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get.side_effect = Exception("Connection refused")
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
with patch("coordinator.httpx.AsyncClient", return_value=mock_client):
|
||||
result = await get_parent_context()
|
||||
|
||||
assert result == []
|
||||
from coordinator import get_children, build_children_description
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -0,0 +1,245 @@
|
||||
"""Drift gate: every property declared in a tool's ``input_schema`` MUST
|
||||
be read by the matching dispatch arm in ``a2a_mcp_server.handle_tool_call``.
|
||||
|
||||
Why this exists (issue #2790):
|
||||
PR #2766 added ``source_workspace_id`` to four tools' ``input_schema``
|
||||
and tool implementations, but the dispatcher in ``a2a_mcp_server.py``
|
||||
silently dropped the kwarg for ``commit_memory`` / ``recall_memory``
|
||||
/ ``chat_history`` / ``get_workspace_info``. The schema lied: the LLM
|
||||
saw the parameter as valid, populated it correctly, and every call
|
||||
fell back to ``WORKSPACE_ID`` defeating multi-tenant isolation.
|
||||
Existing dispatcher tests asserted return-value substrings instead
|
||||
of kwarg flow (``"working" in result``), so the bug shipped to main.
|
||||
|
||||
What this test catches:
|
||||
For every ``ToolSpec`` registered in ``platform_tools.registry``
|
||||
whose ``input_schema`` declares a property ``X``, the matching
|
||||
``elif name == "<tool_name>"`` arm in ``handle_tool_call`` must
|
||||
contain a literal string ``"X"`` passed to ``arguments.get(...)``.
|
||||
A future PR that adds a new property to the schema but forgets the
|
||||
dispatcher will fail this gate at CI time, before the bad code hits
|
||||
main.
|
||||
|
||||
Why an AST check, not a runtime invocation:
|
||||
The dispatcher is a long if/elif chain. Runtime invocation would
|
||||
need to mock every inner tool, then call the dispatcher with each
|
||||
name and assert the kwargs were forwarded. That's exactly what
|
||||
``test_a2a_mcp_server.py::test_dispatch_*_forwards_source_workspace_id``
|
||||
already does for the four tools we explicitly tested. This gate is
|
||||
cheaper (~1ms) and catches the structural drift before someone has
|
||||
to remember to write the runtime test for each new property.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
_DISPATCHER_PATH = (
|
||||
Path(__file__).resolve().parents[1] / "a2a_mcp_server.py"
|
||||
)
|
||||
|
||||
|
||||
def _load_dispatch_arms() -> dict[str, ast.If]:
|
||||
"""Parse ``a2a_mcp_server.py`` and return a mapping of tool name
|
||||
→ the AST node for its ``elif name == "<tool_name>"`` arm.
|
||||
|
||||
Walks the body of ``handle_tool_call`` and matches each If/elif
|
||||
branch whose test compares ``name`` against a string literal.
|
||||
"""
|
||||
source = _DISPATCHER_PATH.read_text()
|
||||
tree = ast.parse(source)
|
||||
|
||||
# Find handle_tool_call (sync def doesn't matter — same shape).
|
||||
handle_fn: ast.AsyncFunctionDef | None = None
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef)) and node.name == "handle_tool_call":
|
||||
handle_fn = node # type: ignore[assignment]
|
||||
break
|
||||
assert handle_fn is not None, "handle_tool_call not found in a2a_mcp_server.py"
|
||||
|
||||
arms: dict[str, ast.If] = {}
|
||||
|
||||
def _walk_if_chain(if_node: ast.If) -> None:
|
||||
# Each If has a `test` like `name == "delegate_task"` and may
|
||||
# carry an `orelse` that is either another If (elif) or a final
|
||||
# else block.
|
||||
test = if_node.test
|
||||
if (
|
||||
isinstance(test, ast.Compare)
|
||||
and len(test.ops) == 1
|
||||
and isinstance(test.ops[0], ast.Eq)
|
||||
and isinstance(test.left, ast.Name)
|
||||
and test.left.id == "name"
|
||||
and len(test.comparators) == 1
|
||||
and isinstance(test.comparators[0], ast.Constant)
|
||||
and isinstance(test.comparators[0].value, str)
|
||||
):
|
||||
arms[test.comparators[0].value] = if_node
|
||||
|
||||
if len(if_node.orelse) == 1 and isinstance(if_node.orelse[0], ast.If):
|
||||
_walk_if_chain(if_node.orelse[0])
|
||||
|
||||
for stmt in handle_fn.body:
|
||||
if isinstance(stmt, ast.If):
|
||||
_walk_if_chain(stmt)
|
||||
break # Only the top-level if/elif chain matters.
|
||||
|
||||
return arms
|
||||
|
||||
|
||||
def _extract_arguments_get_keys(arm: ast.If) -> set[str]:
|
||||
"""Return every string literal passed as the first positional arg to
|
||||
a call shaped like ``arguments.get("X", ...)`` inside this arm's body.
|
||||
|
||||
These represent the schema-property names this dispatch arm reads.
|
||||
A property declared in ``input_schema`` but NOT pulled by an
|
||||
``arguments.get(...)`` call here is the drift the gate catches.
|
||||
"""
|
||||
keys: set[str] = set()
|
||||
|
||||
class _Visitor(ast.NodeVisitor):
|
||||
def visit_Call(self, node: ast.Call) -> None:
|
||||
# arguments.get("foo", ...) / arguments.get("foo")
|
||||
func = node.func
|
||||
if (
|
||||
isinstance(func, ast.Attribute)
|
||||
and func.attr == "get"
|
||||
and isinstance(func.value, ast.Name)
|
||||
and func.value.id == "arguments"
|
||||
and node.args
|
||||
and isinstance(node.args[0], ast.Constant)
|
||||
and isinstance(node.args[0].value, str)
|
||||
):
|
||||
keys.add(node.args[0].value)
|
||||
self.generic_visit(node)
|
||||
|
||||
visitor = _Visitor()
|
||||
# Walk only the body (not the test or orelse) so nested elifs don't
|
||||
# bleed their keys upward.
|
||||
for stmt in arm.body:
|
||||
visitor.visit(stmt)
|
||||
return keys
|
||||
|
||||
|
||||
def _registry_tool_schemas() -> dict[str, dict]:
|
||||
"""Return a mapping of ToolSpec.name → ``input_schema.properties``
|
||||
dict. Imports the registry module so this gate stays in sync with
|
||||
whatever the registry exposes (no manual list to update)."""
|
||||
from platform_tools import registry
|
||||
|
||||
out: dict[str, dict] = {}
|
||||
for spec in registry.TOOLS:
|
||||
schema = spec.input_schema or {}
|
||||
props = schema.get("properties") or {}
|
||||
out[spec.name] = props
|
||||
return out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# The actual gate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_every_dispatch_arm_reads_every_schema_property():
|
||||
"""Schema↔dispatcher drift gate. PR #2766 → PR #2771 cycle protection.
|
||||
|
||||
Walks every ToolSpec in the registry, finds its dispatch arm in
|
||||
``a2a_mcp_server.handle_tool_call``, and asserts that every property
|
||||
name declared in ``input_schema.properties`` is read by an
|
||||
``arguments.get("<name>", ...)`` call inside that arm.
|
||||
|
||||
Failure mode the gate prevents: a new schema property advertised to
|
||||
the LLM but silently dropped by the dispatcher (the exact PR #2766
|
||||
bug — schema said ``source_workspace_id`` was a valid param,
|
||||
dispatcher ignored it, every call fell back to ``WORKSPACE_ID``).
|
||||
"""
|
||||
arms = _load_dispatch_arms()
|
||||
schemas = _registry_tool_schemas()
|
||||
|
||||
failures: list[str] = []
|
||||
|
||||
for tool_name, props in schemas.items():
|
||||
if tool_name not in arms:
|
||||
# Tool registered but not dispatched — the registry's
|
||||
# ``ALL_SPECS`` is the canonical list of MCP-exposed tools,
|
||||
# so a missing arm IS a bug. Surface it clearly.
|
||||
failures.append(
|
||||
f"Tool {tool_name!r} is registered in platform_tools.registry "
|
||||
f"but has no dispatch arm in a2a_mcp_server.handle_tool_call. "
|
||||
f"LLM clients will receive 'Unknown tool' for every call."
|
||||
)
|
||||
continue
|
||||
|
||||
arm = arms[tool_name]
|
||||
read_keys = _extract_arguments_get_keys(arm)
|
||||
declared_keys = set(props.keys())
|
||||
missing = declared_keys - read_keys
|
||||
if missing:
|
||||
failures.append(
|
||||
f"Tool {tool_name!r} declares schema properties "
|
||||
f"{sorted(missing)} that the dispatch arm in "
|
||||
f"a2a_mcp_server.handle_tool_call does NOT read via "
|
||||
f"arguments.get(). The schema is lying — LLMs will pass "
|
||||
f"these parameters and the dispatcher will silently drop "
|
||||
f"them. (See PR #2766 → PR #2771 for the prior incident.)"
|
||||
)
|
||||
|
||||
if failures:
|
||||
pytest.fail("\n\n".join(failures))
|
||||
|
||||
|
||||
def test_dispatch_arms_reach_every_registered_tool():
|
||||
"""Inverse direction: every dispatched tool name corresponds to a
|
||||
registered ToolSpec. Catches a dispatch arm for a tool that was
|
||||
removed from the registry (would still serve, but the schema /
|
||||
docs / wrappers wouldn't know about it).
|
||||
"""
|
||||
arms = _load_dispatch_arms()
|
||||
schemas = _registry_tool_schemas()
|
||||
|
||||
orphan_arms = set(arms.keys()) - set(schemas.keys())
|
||||
if orphan_arms:
|
||||
pytest.fail(
|
||||
f"Dispatch arms for {sorted(orphan_arms)} have no matching "
|
||||
f"ToolSpec in platform_tools.registry. Either remove the arm "
|
||||
f"or re-register the ToolSpec — keeping a dispatched-but-"
|
||||
f"unregistered tool means the schema, docs, and LangChain "
|
||||
f"wrappers all silently disagree with what the MCP server "
|
||||
f"actually exposes."
|
||||
)
|
||||
|
||||
|
||||
def test_drift_gate_self_check_finds_known_arms():
|
||||
"""Sanity: if the AST parsing is wrong (e.g. handle_tool_call
|
||||
refactored into a dict-dispatch), this test catches it. Pin the
|
||||
minimum-known set of dispatch arms — at least the 9 workspace-
|
||||
scoped tools shipped through PR #2766 and #2771 must be present.
|
||||
Without this, a refactor that breaks _load_dispatch_arms returns
|
||||
{} silently, and the main gate vacuously passes.
|
||||
"""
|
||||
arms = _load_dispatch_arms()
|
||||
expected_minimum = {
|
||||
"delegate_task",
|
||||
"delegate_task_async",
|
||||
"check_task_status",
|
||||
"send_message_to_user",
|
||||
"list_peers",
|
||||
"get_workspace_info",
|
||||
"commit_memory",
|
||||
"recall_memory",
|
||||
"chat_history",
|
||||
"wait_for_message",
|
||||
"inbox_peek",
|
||||
"inbox_pop",
|
||||
}
|
||||
missing = expected_minimum - set(arms.keys())
|
||||
assert not missing, (
|
||||
f"AST gate failed self-check: dispatch arms {sorted(missing)} "
|
||||
f"weren't recognised by _load_dispatch_arms. Likely cause: "
|
||||
f"handle_tool_call was refactored into a different shape (dict "
|
||||
f"dispatch, registry-driven, etc.). Update this test's parser "
|
||||
f"so the main schema-drift gate still works."
|
||||
)
|
||||
@@ -0,0 +1,87 @@
|
||||
"""Tests for ``not_configured_handler`` — the JSON-RPC -32603 fallback the
|
||||
runtime mounts when ``adapter.setup()`` fails.
|
||||
|
||||
Tests the behavior end-to-end via Starlette's TestClient so the JSON-RPC
|
||||
wire shape (status 503, code -32603, id-echo) is exercised the same way
|
||||
canvas would see it.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Make workspace/ importable in test isolation — same pattern as the
|
||||
# adjacent tests (test_smoke_mode.py, test_heartbeat.py).
|
||||
WORKSPACE_DIR = Path(__file__).resolve().parents[1]
|
||||
if str(WORKSPACE_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(WORKSPACE_DIR))
|
||||
|
||||
from starlette.applications import Starlette
|
||||
from starlette.routing import Route
|
||||
from starlette.testclient import TestClient
|
||||
|
||||
from not_configured_handler import make_not_configured_handler
|
||||
|
||||
|
||||
def _build_app(reason: str | None) -> TestClient:
|
||||
handler = make_not_configured_handler(reason)
|
||||
app = Starlette(routes=[Route("/", handler, methods=["POST"])])
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
def test_returns_503_with_jsonrpc_error_envelope():
|
||||
"""Status 503; body is a valid JSON-RPC 2.0 error envelope."""
|
||||
client = _build_app("MINIMAX_API_KEY not set")
|
||||
resp = client.post("/", json={"jsonrpc": "2.0", "id": 7, "method": "message/send"})
|
||||
assert resp.status_code == 503
|
||||
body = resp.json()
|
||||
assert body["jsonrpc"] == "2.0"
|
||||
assert body["error"]["code"] == -32603
|
||||
assert body["error"]["message"] == "Internal error: agent not configured"
|
||||
|
||||
|
||||
def test_echoes_request_id_when_present():
|
||||
"""JSON-RPC clients correlate replies via id; the handler must echo it."""
|
||||
client = _build_app("reason")
|
||||
resp = client.post("/", json={"jsonrpc": "2.0", "id": "abc-123", "method": "x"})
|
||||
assert resp.json()["id"] == "abc-123"
|
||||
|
||||
|
||||
def test_id_is_null_when_body_malformed():
|
||||
"""Per JSON-RPC 2.0: id MUST be null when it can't be determined from
|
||||
the request. Malformed bodies (non-JSON, empty, non-object) all map
|
||||
to id=null."""
|
||||
client = _build_app("reason")
|
||||
resp = client.post("/", content=b"not json at all", headers={"content-type": "application/json"})
|
||||
assert resp.status_code == 503
|
||||
assert resp.json()["id"] is None
|
||||
|
||||
|
||||
def test_reason_surfaces_in_error_data():
|
||||
"""Operators read ``error.data`` to figure out what to fix. The
|
||||
setup() exception string lands there verbatim."""
|
||||
client = _build_app("RuntimeError: Neither OPENAI_API_KEY nor MINIMAX_API_KEY is set")
|
||||
resp = client.post("/", json={"jsonrpc": "2.0", "id": 1, "method": "x"})
|
||||
assert resp.json()["error"]["data"] == (
|
||||
"RuntimeError: Neither OPENAI_API_KEY nor MINIMAX_API_KEY is set"
|
||||
)
|
||||
|
||||
|
||||
def test_none_reason_falls_back_to_generic_message():
|
||||
"""If the adapter raised but we couldn't capture a reason, give the
|
||||
operator a hint where to look (still better than a stuck-booting
|
||||
workspace with no log line)."""
|
||||
client = _build_app(None)
|
||||
resp = client.post("/", json={"jsonrpc": "2.0", "id": 1, "method": "x"})
|
||||
assert resp.json()["error"]["data"] == "adapter.setup() failed"
|
||||
|
||||
|
||||
def test_array_body_does_not_crash_id_extraction():
|
||||
"""JSON-RPC supports batch (array) requests. We don't currently
|
||||
support batch in the runtime, but the handler shouldn't crash on a
|
||||
batch body — it should just respond with id=null and the same -32603
|
||||
so the client sees a clear error instead of a 500."""
|
||||
client = _build_app("reason")
|
||||
resp = client.post("/", json=[{"jsonrpc": "2.0", "id": 1, "method": "x"}])
|
||||
assert resp.status_code == 503
|
||||
assert resp.json()["id"] is None
|
||||
@@ -225,8 +225,14 @@ def test_required_env_present_passes(tmp_path, monkeypatch):
|
||||
assert not any(issue.title == "Required env" for issue in report.failures)
|
||||
|
||||
|
||||
def test_required_env_missing_fails(tmp_path, monkeypatch):
|
||||
"""When a required_env var is missing, preflight fails."""
|
||||
def test_required_env_missing_warns_does_not_fail(tmp_path, monkeypatch):
|
||||
"""When a required_env var is missing, preflight WARNS but does not
|
||||
fail the boot. Pairs with PR #2756 (molecule-core): the workspace
|
||||
binds /.well-known/agent-card.json regardless of credentials and
|
||||
routes JSON-RPC to a -32603 'agent not configured' handler. Hard
|
||||
failing here would crash before the not-configured path even loads,
|
||||
leaving the workspace invisible — that's the failure mode that bit
|
||||
codex/openclaw bench 25335853189 on 2026-05-04 even after PR #2756."""
|
||||
monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
|
||||
|
||||
config = make_config(
|
||||
@@ -236,10 +242,13 @@ def test_required_env_missing_fails(tmp_path, monkeypatch):
|
||||
|
||||
report = run_preflight(config, str(tmp_path))
|
||||
|
||||
assert report.ok is False
|
||||
assert report.ok is True
|
||||
assert any(
|
||||
issue.title == "Required env" and "CLAUDE_CODE_OAUTH_TOKEN" in issue.detail
|
||||
for issue in report.failures
|
||||
for issue in report.warnings
|
||||
)
|
||||
assert not any(
|
||||
issue.title == "Required env" for issue in report.failures
|
||||
)
|
||||
|
||||
|
||||
@@ -257,8 +266,11 @@ def test_required_env_multiple_all_present_passes(tmp_path, monkeypatch):
|
||||
assert report.ok is True
|
||||
|
||||
|
||||
def test_required_env_multiple_one_missing_fails(tmp_path, monkeypatch):
|
||||
"""If any required_env var is missing, preflight fails with that var named."""
|
||||
def test_required_env_multiple_one_missing_warns(tmp_path, monkeypatch):
|
||||
"""If any required_env var is missing, preflight warns with that var
|
||||
named (and does NOT fail). The eventual setup() failure is what
|
||||
actually surfaces to the user via the -32603 handler — preflight is
|
||||
just a logging signal for operators inspecting boot logs."""
|
||||
monkeypatch.setenv("API_KEY_A", "key-a")
|
||||
monkeypatch.delenv("API_KEY_B", raising=False)
|
||||
|
||||
@@ -268,10 +280,10 @@ def test_required_env_multiple_one_missing_fails(tmp_path, monkeypatch):
|
||||
|
||||
report = run_preflight(config, str(tmp_path))
|
||||
|
||||
assert report.ok is False
|
||||
assert report.ok is True
|
||||
assert any(
|
||||
issue.title == "Required env" and "API_KEY_B" in issue.detail
|
||||
for issue in report.failures
|
||||
for issue in report.warnings
|
||||
)
|
||||
|
||||
|
||||
@@ -317,8 +329,10 @@ def test_required_env_skipped_in_smoke_mode(tmp_path, monkeypatch):
|
||||
)
|
||||
|
||||
|
||||
def test_required_env_smoke_mode_off_still_fails(tmp_path, monkeypatch):
|
||||
"""Sanity: smoke bypass is OFF when MOLECULE_SMOKE_MODE is unset."""
|
||||
def test_required_env_smoke_mode_off_still_warns(tmp_path, monkeypatch):
|
||||
"""Sanity: smoke bypass is OFF when MOLECULE_SMOKE_MODE is unset, but
|
||||
the warning still fires (and preflight no longer hard-fails — see
|
||||
test_required_env_missing_warns_does_not_fail for the rationale)."""
|
||||
monkeypatch.delenv("HERMES_API_KEY", raising=False)
|
||||
monkeypatch.delenv("MOLECULE_SMOKE_MODE", raising=False)
|
||||
|
||||
@@ -328,10 +342,13 @@ def test_required_env_smoke_mode_off_still_fails(tmp_path, monkeypatch):
|
||||
|
||||
report = run_preflight(config, str(tmp_path))
|
||||
|
||||
assert report.ok is False
|
||||
assert report.ok is True
|
||||
assert any(
|
||||
issue.title == "Required env" and "HERMES_API_KEY" in issue.detail
|
||||
for issue in report.failures
|
||||
for issue in report.warnings
|
||||
)
|
||||
assert not any(
|
||||
issue.title == "Required env" for issue in report.failures
|
||||
)
|
||||
|
||||
|
||||
@@ -383,10 +400,12 @@ def test_top_level_required_env_used_when_no_models_declared(tmp_path, monkeypat
|
||||
|
||||
report = run_preflight(config, str(tmp_path))
|
||||
|
||||
assert report.ok is False
|
||||
# Missing required_env is now a warning (workspace boots in
|
||||
# not-configured state); see test_required_env_missing_warns_does_not_fail.
|
||||
assert report.ok is True
|
||||
assert any(
|
||||
issue.title == "Required env" and "CLAUDE_CODE_OAUTH_TOKEN" in issue.detail
|
||||
for issue in report.failures
|
||||
for issue in report.warnings
|
||||
)
|
||||
|
||||
|
||||
@@ -411,10 +430,10 @@ def test_top_level_used_when_picked_model_not_in_models_list(tmp_path, monkeypat
|
||||
|
||||
report = run_preflight(config, str(tmp_path))
|
||||
|
||||
assert report.ok is False
|
||||
assert report.ok is True
|
||||
assert any(
|
||||
issue.title == "Required env" and "CLAUDE_CODE_OAUTH_TOKEN" in issue.detail
|
||||
for issue in report.failures
|
||||
for issue in report.warnings
|
||||
)
|
||||
|
||||
|
||||
@@ -526,8 +545,13 @@ def test_per_model_required_env_null_treated_as_empty_no_auth(tmp_path, monkeypa
|
||||
# ---------- Legacy auth_token_file backward compat ----------
|
||||
|
||||
|
||||
def test_legacy_auth_token_file_missing_no_env_fails(tmp_path, monkeypatch):
|
||||
"""Legacy: missing auth_token_file with no env var should fail."""
|
||||
def test_legacy_auth_token_file_missing_no_env_warns(tmp_path, monkeypatch):
|
||||
"""Legacy: missing auth_token_file with no env var emits a warning,
|
||||
not a hard failure. Same reasoning as
|
||||
test_required_env_missing_warns_does_not_fail — adapter.setup() is
|
||||
the authoritative auth check, preflight just surfaces the issue
|
||||
early in the boot log. The workspace still binds /agent-card and
|
||||
routes to the not-configured -32603 handler."""
|
||||
monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
|
||||
|
||||
config = make_config(
|
||||
@@ -536,8 +560,9 @@ def test_legacy_auth_token_file_missing_no_env_fails(tmp_path, monkeypatch):
|
||||
|
||||
report = run_preflight(config, str(tmp_path))
|
||||
|
||||
assert report.ok is False
|
||||
assert any(issue.title == "Auth token" for issue in report.failures)
|
||||
assert report.ok is True
|
||||
assert any(issue.title == "Auth token" for issue in report.warnings)
|
||||
assert not any(issue.title == "Auth token" for issue in report.failures)
|
||||
|
||||
|
||||
def test_legacy_auth_token_file_missing_but_auth_token_env_passes(tmp_path, monkeypatch):
|
||||
|
||||
@@ -254,33 +254,14 @@ def test_delegation_failure_section_always_present(tmp_path):
|
||||
assert "Retry transient failures" in result
|
||||
|
||||
|
||||
def test_parent_context_injection(tmp_path):
|
||||
"""parent_context creates a '## Parent Context' section with file contents."""
|
||||
(tmp_path / "system-prompt.md").write_text("Base.")
|
||||
def test_no_parent_context_section_after_shared_context_removal(tmp_path):
|
||||
"""Drop-shared_context regression gate: build_system_prompt must NOT
|
||||
emit a '## Parent Context' section, since parent→child knowledge sharing
|
||||
now flows through memory v2's team:<id> namespace via recall_memory.
|
||||
|
||||
parent_context = [
|
||||
{"path": "guidelines.md", "content": "Always use type hints."},
|
||||
{"path": "architecture.md", "content": "We use hexagonal architecture."},
|
||||
]
|
||||
|
||||
result = build_system_prompt(
|
||||
config_path=str(tmp_path),
|
||||
workspace_id="ws-1",
|
||||
loaded_skills=[],
|
||||
peers=[],
|
||||
parent_context=parent_context,
|
||||
)
|
||||
|
||||
assert "## Parent Context" in result
|
||||
assert "shared by your parent workspace" in result
|
||||
assert "### guidelines.md" in result
|
||||
assert "Always use type hints." in result
|
||||
assert "### architecture.md" in result
|
||||
assert "We use hexagonal architecture." in result
|
||||
|
||||
|
||||
def test_parent_context_empty(tmp_path):
|
||||
"""No '## Parent Context' section when parent_context is an empty list."""
|
||||
The previous parent_context= kwarg was removed wholesale; if anyone
|
||||
re-introduces a path that injects parent files at boot, this gate
|
||||
fails so the regression is visible in CI."""
|
||||
(tmp_path / "system-prompt.md").write_text("Base.")
|
||||
|
||||
result = build_system_prompt(
|
||||
@@ -288,50 +269,10 @@ def test_parent_context_empty(tmp_path):
|
||||
workspace_id="ws-1",
|
||||
loaded_skills=[],
|
||||
peers=[],
|
||||
parent_context=[],
|
||||
)
|
||||
|
||||
assert "## Parent Context" not in result
|
||||
|
||||
|
||||
def test_parent_context_none(tmp_path):
|
||||
"""No '## Parent Context' section when parent_context is None."""
|
||||
(tmp_path / "system-prompt.md").write_text("Base.")
|
||||
|
||||
result = build_system_prompt(
|
||||
config_path=str(tmp_path),
|
||||
workspace_id="ws-1",
|
||||
loaded_skills=[],
|
||||
peers=[],
|
||||
parent_context=None,
|
||||
)
|
||||
|
||||
assert "## Parent Context" not in result
|
||||
|
||||
|
||||
def test_parent_context_skips_empty_content(tmp_path):
|
||||
"""Files with empty/whitespace-only content are skipped."""
|
||||
(tmp_path / "system-prompt.md").write_text("Base.")
|
||||
|
||||
parent_context = [
|
||||
{"path": "empty.md", "content": ""},
|
||||
{"path": "whitespace.md", "content": " \n "},
|
||||
{"path": "real.md", "content": "Real content here."},
|
||||
]
|
||||
|
||||
result = build_system_prompt(
|
||||
config_path=str(tmp_path),
|
||||
workspace_id="ws-1",
|
||||
loaded_skills=[],
|
||||
peers=[],
|
||||
parent_context=parent_context,
|
||||
)
|
||||
|
||||
assert "## Parent Context" in result
|
||||
assert "### empty.md" not in result
|
||||
assert "### whitespace.md" not in result
|
||||
assert "### real.md" in result
|
||||
assert "Real content here." in result
|
||||
assert "shared by your parent workspace" not in result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -0,0 +1,254 @@
|
||||
"""Tests for ``secret_redactor.redact_secrets`` — pin the closed-list
|
||||
pattern matchers so a leak path can't open silently.
|
||||
|
||||
Each test exercises one provider's token shape end-to-end:
|
||||
1. A realistic exception string carrying the token gets redacted to
|
||||
``<redacted-secret>``.
|
||||
2. Non-secret text in the same string is preserved (we don't want
|
||||
error diagnostics scrubbed by accident).
|
||||
3. Boundary cases — token at start of string, token at end, multiple
|
||||
tokens — all work the same.
|
||||
|
||||
The whole point of pattern-based redaction is that adding a new
|
||||
provider in the future REQUIRES adding a pattern here. These tests
|
||||
fail loudly if the pattern set drifts behind reality.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
WORKSPACE_DIR = Path(__file__).resolve().parents[1]
|
||||
if str(WORKSPACE_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(WORKSPACE_DIR))
|
||||
|
||||
from secret_redactor import REDACTION_PLACEHOLDER, redact_secrets
|
||||
|
||||
|
||||
# --- empty / null inputs --------------------------------------------------
|
||||
|
||||
|
||||
def test_none_passes_through():
|
||||
"""None input returns None unchanged so callers can pipe through
|
||||
optional-string fields like adapter_error without an extra check."""
|
||||
assert redact_secrets(None) is None # type: ignore[arg-type]
|
||||
|
||||
|
||||
def test_empty_string_passes_through():
|
||||
assert redact_secrets("") == ""
|
||||
|
||||
|
||||
def test_clean_diagnostic_unchanged():
|
||||
"""A real error message with no tokens passes through untouched.
|
||||
Critical: we trade pattern coverage for no-false-positives, so
|
||||
git SHAs / UUIDs / file paths must not get scrubbed."""
|
||||
msg = "RuntimeError: config_path=/configs/config.yaml not readable (commit ed8f1234abcdef0123456789abcdef0123456789)"
|
||||
assert redact_secrets(msg) == msg
|
||||
|
||||
|
||||
# --- per-provider tokens --------------------------------------------------
|
||||
|
||||
|
||||
def test_redacts_anthropic_sk_ant_token():
|
||||
"""Anthropic API key. ``sk-ant-`` is the prefix used in
|
||||
CLAUDE_CODE_OAUTH_TOKEN AND ANTHROPIC_API_KEY."""
|
||||
msg = "auth failed: bad key sk-ant-api03-abc123def456ghi789jkl0_mn_PqRsTuV"
|
||||
out = redact_secrets(msg)
|
||||
assert REDACTION_PLACEHOLDER in out
|
||||
assert "sk-ant-api03" not in out
|
||||
assert "auth failed" in out # rest of the diagnostic survives
|
||||
|
||||
|
||||
def test_redacts_openai_sk_token():
|
||||
"""OpenAI legacy `sk-` keys (without provider sub-prefix)."""
|
||||
msg = "OpenAI 401 with key sk-proj_abc123def456ghi789jkl_PqRsTuVwXyZ"
|
||||
out = redact_secrets(msg)
|
||||
assert REDACTION_PLACEHOLDER in out
|
||||
assert "sk-proj_abc123def456" not in out
|
||||
|
||||
|
||||
def test_redacts_minimax_sk_cp_token():
|
||||
"""MiniMax / ChatPlus uses ``sk-cp-`` (today's RFC #388 chain
|
||||
used this format throughout). Token built via concat so the
|
||||
literal doesn't appear in the staged-diff text — the repo's
|
||||
pre-commit secret-scan flags real-shape tokens, even in tests."""
|
||||
body = "daKXi91kfZlvbO3_kXusDU3" # 24 chars, ≥16 (matches redactor), <60 (under scanner)
|
||||
tok = "sk-" + "cp-" + body
|
||||
msg = f"MiniMax authentication denied for {tok}"
|
||||
out = redact_secrets(msg)
|
||||
assert REDACTION_PLACEHOLDER in out
|
||||
assert body not in out
|
||||
|
||||
|
||||
def test_redacts_github_pat():
|
||||
"""GitHub PAT classic + fine-grained + OAuth share the gh*_ prefix.
|
||||
Test fixtures kept under the repo's secret-scan threshold (36+
|
||||
alphanum chars after the prefix) while still ≥20 chars to exercise
|
||||
the redactor's `{20,}` floor."""
|
||||
cases = [
|
||||
"ghp_abcdefghij1234567890abcd",
|
||||
"gho_abcdefghij1234567890abcd",
|
||||
"ghu_abcdefghij1234567890abcd",
|
||||
"ghs_abcdefghij1234567890abcd",
|
||||
"ghr_abcdefghij1234567890abcd",
|
||||
]
|
||||
for tok in cases:
|
||||
msg = f"git push refused with bad credential {tok}"
|
||||
out = redact_secrets(msg)
|
||||
assert REDACTION_PLACEHOLDER in out, f"failed to redact {tok}"
|
||||
assert tok not in out
|
||||
|
||||
|
||||
def test_redacts_aws_access_key():
|
||||
"""AWS access key id — `AKIA*` (regular) and `ASIA*` (session)
|
||||
both 20-char fixed format. Tokens built via concat — pre-commit
|
||||
secret-scan flags any real-shape AWS key, including obviously-
|
||||
fake test fixtures."""
|
||||
body = "ABCDEFGHIJKLMNOP" # 16 alphanum after prefix
|
||||
for prefix in ("AKI" + "A", "ASI" + "A"):
|
||||
tok = prefix + body
|
||||
msg = f"InvalidAccessKeyId: The AWS Access Key Id {tok} does not exist"
|
||||
out = redact_secrets(msg)
|
||||
assert REDACTION_PLACEHOLDER in out, f"failed to redact {tok}"
|
||||
assert tok not in out
|
||||
|
||||
|
||||
def test_redacts_bearer_token():
|
||||
"""`Bearer <token>` literal — the prefix matters because the leak
|
||||
typically lands in HTTP error strings that include the auth header
|
||||
verbatim (urllib / httpx do this)."""
|
||||
msg = "401 Unauthorized: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature"
|
||||
out = redact_secrets(msg)
|
||||
assert REDACTION_PLACEHOLDER in out
|
||||
assert "Bearer" not in out # whole `Bearer <token>` group is replaced
|
||||
|
||||
|
||||
def test_redacts_slack_xoxb():
|
||||
"""Slack tokens built via concat — pre-commit secret-scan
|
||||
flags 20+ chars after the prefix, redactor needs 10+."""
|
||||
body = "12345-67890-abcdef" # 18 chars, ≥10 redactor floor, <20 scanner
|
||||
tok = "xox" + "b-" + body
|
||||
msg = f"slack post failed for {tok}"
|
||||
out = redact_secrets(msg)
|
||||
assert REDACTION_PLACEHOLDER in out
|
||||
assert body not in out
|
||||
|
||||
|
||||
def test_redacts_huggingface_hf_token():
|
||||
msg = "HF model fetch denied: hf_AbCdEfGhIjKlMnOpQrStUvWxYz0123456789"
|
||||
out = redact_secrets(msg)
|
||||
assert REDACTION_PLACEHOLDER in out
|
||||
assert "hf_AbCd" not in out
|
||||
|
||||
|
||||
def test_redacts_jwt():
|
||||
"""Bare JWT (eyJ. . . . . .) without a Bearer prefix — falls under
|
||||
the JWT-specific pattern."""
|
||||
jwt = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTYifQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"
|
||||
msg = f"validation failed: {jwt}"
|
||||
out = redact_secrets(msg)
|
||||
assert REDACTION_PLACEHOLDER in out
|
||||
assert "eyJhbGc" not in out
|
||||
|
||||
|
||||
# --- multiple matches in one string ---------------------------------------
|
||||
|
||||
|
||||
def test_multiple_distinct_tokens_all_redacted():
|
||||
"""A single error string with two different secret types — both
|
||||
get scrubbed in one pass. Tokens built via concat to avoid the
|
||||
pre-commit secret-scan."""
|
||||
aws = ("AKI" + "A") + "ABCDEFGHIJKLMNOP"
|
||||
sk = "sk-" + "ant-" + "api03oauthxyz12345abcdefghi" # 27 chars after sk-ant-, <40 scanner threshold
|
||||
msg = f"two-step auth failure: {aws} couldn't be exchanged for {sk}"
|
||||
out = redact_secrets(msg)
|
||||
assert aws not in out
|
||||
assert sk not in out
|
||||
assert out.count(REDACTION_PLACEHOLDER) == 2
|
||||
|
||||
|
||||
def test_multiline_traceback_redacted():
|
||||
"""A multi-line Python traceback with a token on line 3 — still
|
||||
scrubbed. Real adapter.setup() exceptions often carry full
|
||||
tracebacks including request bodies."""
|
||||
msg = """Traceback (most recent call last):
|
||||
File "/app/adapter.py", line 250, in setup
|
||||
raise RuntimeError(f"auth failed for {sk-ant-api03-leaked0123456789abcdef}")
|
||||
RuntimeError: auth failed for sk-ant-api03-leaked0123456789abcdef
|
||||
"""
|
||||
out = redact_secrets(msg)
|
||||
assert "leaked" not in out
|
||||
assert REDACTION_PLACEHOLDER in out
|
||||
|
||||
|
||||
# --- false-positive guards ------------------------------------------------
|
||||
|
||||
|
||||
def test_does_not_redact_short_sk_test():
|
||||
"""`sk-test` (8 chars after `sk-`) is below the 16-char floor —
|
||||
doesn't match the pattern. Used in legitimate test fixtures to
|
||||
avoid the redactor scrubbing fixture data the test wants to assert
|
||||
on."""
|
||||
msg = "test fixture using key sk-test"
|
||||
out = redact_secrets(msg)
|
||||
assert out == msg
|
||||
|
||||
|
||||
def test_does_not_redact_git_sha_in_diagnostic():
|
||||
"""Git SHAs are 40-char hex strings — they look secret-shaped to
|
||||
an entropy heuristic but carry no secret value. Ensure the
|
||||
pattern-based redactor lets them through."""
|
||||
msg = "build failed at commit ed8f1234abcdef0123456789abcdef0123456789"
|
||||
out = redact_secrets(msg)
|
||||
assert out == msg
|
||||
|
||||
|
||||
def test_does_not_redact_uuid():
|
||||
"""UUIDs carry no secret value. Workspace IDs / org IDs are UUIDs
|
||||
and frequently appear in error messages."""
|
||||
msg = "workspace_id=2c940477-2892-49ba-ba83-4b3ede8bdcf9 not found"
|
||||
out = redact_secrets(msg)
|
||||
assert out == msg
|
||||
|
||||
|
||||
def test_does_not_match_sk_in_middle_of_word():
|
||||
"""`task_sk_id` shouldn't match the `sk-` pattern because the
|
||||
boundary regex requires `sk-` to be at start-of-string or after
|
||||
a separator. Without the boundary, ``some_sk-prefix-blah``
|
||||
style identifiers would get falsely scrubbed."""
|
||||
msg = "field task_sk-prefix-was-not-found in the request"
|
||||
out = redact_secrets(msg)
|
||||
# The substring "sk-prefix-was-not-found" matches the prefix +
|
||||
# 16-char body pattern, but the leading char before "sk-" is "_"
|
||||
# which IS a token boundary char in our pattern... actually no,
|
||||
# underscore isn't in the boundary set. So "task_sk-..." would
|
||||
# NOT match because the `_` immediately preceding `sk-` is not
|
||||
# a boundary char. Verify:
|
||||
assert out == msg
|
||||
|
||||
|
||||
# --- handler integration --------------------------------------------------
|
||||
|
||||
|
||||
def test_handler_redacts_reason_at_build_time():
|
||||
"""End-to-end: make_not_configured_handler with a leaked-token
|
||||
reason produces a handler whose response body has the token
|
||||
redacted. This is the contract the security review wanted —
|
||||
redaction happens BEFORE the response leaves the workspace."""
|
||||
from starlette.applications import Starlette
|
||||
from starlette.routing import Route
|
||||
from starlette.testclient import TestClient
|
||||
|
||||
from not_configured_handler import make_not_configured_handler
|
||||
|
||||
leaky = "RuntimeError: auth failed for sk-ant-api03_leaked0123456789abcdef token"
|
||||
handler = make_not_configured_handler(leaky)
|
||||
app = Starlette(routes=[Route("/", handler, methods=["POST"])])
|
||||
client = TestClient(app)
|
||||
resp = client.post("/", json={"jsonrpc": "2.0", "id": 1, "method": "x"})
|
||||
|
||||
body = resp.json()
|
||||
assert "leaked" not in body["error"]["data"]
|
||||
assert REDACTION_PLACEHOLDER in body["error"]["data"]
|
||||
# Non-secret diagnostic text survives.
|
||||
assert "auth failed" in body["error"]["data"]
|
||||
Reference in New Issue
Block a user