forked from molecule-ai/molecule-core
Compare commits
46 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e0df90c294 | |||
| 1edee1131b | |||
| f5ea812e9d | |||
| 3b7ed9cf53 | |||
| da9061c131 | |||
| c4807a930d | |||
| d22fbb29b8 | |||
| 899c53550d | |||
| cdfc9f743f | |||
| 7a2664523c | |||
| 632e906640 | |||
| 475da5b64c | |||
| 1ad107cc15 | |||
| e4bd1e4293 | |||
| 01deeb36cf | |||
| b906e1da61 | |||
| 226e57a942 | |||
| abc3affcb6 | |||
| 3322524b0f | |||
| de01ff51b0 | |||
| f3782662bd | |||
| e9eb3868d5 | |||
| cb70d3d437 | |||
| a1d202723d | |||
| 0d0840d9d9 | |||
| fc30b5c9de | |||
| ef67dc513e | |||
| 23d3f057d3 | |||
| 8ca027ddf3 | |||
| 46a4ef83bb | |||
| a6afc18de5 | |||
| 423d58d42c | |||
| 9386f1d399 | |||
| a766e5ce48 | |||
| 5ad2669f88 | |||
| 0ca4e431c1 | |||
| 2bf6a7005f | |||
| 16ead69641 | |||
| 60afcd43c9 | |||
| 9a53529047 | |||
| 575f893f4e | |||
| 4cac4e7710 | |||
| 3d0a7c381b | |||
| 8e5d193761 | |||
| 3e0d2e650a | |||
| 210a26d31a |
@@ -387,6 +387,7 @@ jobs:
|
||||
"a2a_mcp_server.py"
|
||||
"mcp_cli.py"
|
||||
"a2a_tools.py"
|
||||
"a2a_tools_inbox.py"
|
||||
"inbox.py"
|
||||
"platform_auth.py"
|
||||
)
|
||||
|
||||
@@ -172,6 +172,9 @@ jobs:
|
||||
- name: Run poll-mode + since_id cursor E2E (#2339)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_poll_mode_e2e.sh
|
||||
- name: Run poll-mode chat upload E2E (RFC #2891)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_poll_mode_chat_upload_e2e.sh
|
||||
- name: Dump platform log on failure
|
||||
if: failure() && needs.detect-changes.outputs.api == 'true'
|
||||
run: cat workspace-server/platform.log || true
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
// quick bounce between signup and either Checkout or the tenant UI.
|
||||
|
||||
import { useEffect, useState } from "react";
|
||||
import { fetchSession, redirectToLogin, type Session } from "@/lib/auth";
|
||||
import { fetchSession, redirectToLogin, signOut, type Session } from "@/lib/auth";
|
||||
import { PLATFORM_URL } from "@/lib/api";
|
||||
import { formatCredits, pillTone, bannerKind } from "@/lib/credits";
|
||||
import { TermsGate } from "@/components/TermsGate";
|
||||
@@ -129,7 +129,7 @@ export default function OrgsPage() {
|
||||
return <EmptyState banner={justCheckedOut ? <CheckoutBanner /> : null} />;
|
||||
}
|
||||
return (
|
||||
<Shell>
|
||||
<Shell session={session}>
|
||||
{justCheckedOut && <CheckoutBanner />}
|
||||
<ul className="space-y-3">
|
||||
{orgs.map((o) => (
|
||||
@@ -160,11 +160,21 @@ function CheckoutBanner() {
|
||||
);
|
||||
}
|
||||
|
||||
function Shell({ children }: { children: React.ReactNode }) {
|
||||
function Shell({
|
||||
children,
|
||||
session,
|
||||
}: {
|
||||
children: React.ReactNode;
|
||||
// Optional: when present, the header renders the signed-in email +
|
||||
// a Sign-out button. The empty-state Shell call doesn't have a
|
||||
// session in scope, so accept null and skip the header chrome there.
|
||||
session?: Session | null;
|
||||
}) {
|
||||
return (
|
||||
<main className="min-h-screen bg-surface text-ink">
|
||||
<TermsGate>
|
||||
<div className="mx-auto max-w-2xl px-6 pt-20 pb-12">
|
||||
{session ? <AccountBar session={session} /> : null}
|
||||
<h1 className="text-3xl font-bold text-ink">Your organizations</h1>
|
||||
<p className="mt-2 text-ink-mid">
|
||||
Each org is an isolated Molecule workspace.
|
||||
@@ -177,6 +187,40 @@ function Shell({ children }: { children: React.ReactNode }) {
|
||||
);
|
||||
}
|
||||
|
||||
// AccountBar renders the signed-in email + a Sign-out button at the
|
||||
// top of the page. Without this the user has no way to log out — the
|
||||
// /cp/auth/signout endpoint exists on the control plane but no UI ever
|
||||
// called it. Reported externally on 2026-05-05; this is the fix.
|
||||
//
|
||||
// Click → calls signOut() which POSTs /cp/auth/signout (clears the
|
||||
// WorkOS session cookie + revokes at the provider) then bounces to
|
||||
// /cp/auth/login. The signOut helper is best-effort — even on a 5xx
|
||||
// or network failure the redirect fires so the user never gets stuck
|
||||
// on an authed-looking page after they clicked Sign out.
|
||||
function AccountBar({ session }: { session: Session }) {
|
||||
const [signingOut, setSigningOut] = useState(false);
|
||||
return (
|
||||
<div className="mb-6 flex items-center justify-between text-sm text-ink-mid">
|
||||
<span title="Signed-in user">{session.email}</span>
|
||||
<button
|
||||
type="button"
|
||||
disabled={signingOut}
|
||||
onClick={async () => {
|
||||
setSigningOut(true);
|
||||
await signOut();
|
||||
// Redirect happens inside signOut; this line is for tests +
|
||||
// edge cases (jsdom, blocked navigation) where it doesn't.
|
||||
setSigningOut(false);
|
||||
}}
|
||||
className="rounded border border-line bg-surface-card px-3 py-1 text-xs text-ink hover:bg-surface-card disabled:opacity-50"
|
||||
aria-label="Sign out"
|
||||
>
|
||||
{signingOut ? "Signing out…" : "Sign out"}
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// DataResidencyNotice surfaces where workspace data lives so EU-based
|
||||
// signups can make an informed choice (GDPR Art. 13 disclosure
|
||||
// requirement). Plain text, no icon — the goal is clarity, not
|
||||
|
||||
@@ -20,160 +20,6 @@ import * as Dialog from "@radix-ui/react-dialog";
|
||||
|
||||
type Tab = "python" | "curl" | "claude" | "mcp" | "hermes" | "codex" | "openclaw" | "fields";
|
||||
|
||||
// Per-tab help metadata: docs link, where-to-install link, common errors.
|
||||
// All URLs verified against repo content (docs/guides/* file paths map to
|
||||
// docs.molecule.ai/docs/guides/*; canonical hostname confirmed by existing
|
||||
// blog post canonical metadata) or against the snippet text the operator
|
||||
// just copied. Never linking to a URL that wasn't already in product —
|
||||
// dead links here defeat the purpose of "more comprehensive instructions."
|
||||
const TAB_HELP: Record<
|
||||
Tab,
|
||||
{
|
||||
docsUrl?: string;
|
||||
docsLabel?: string;
|
||||
downloadUrl?: string;
|
||||
downloadLabel?: string;
|
||||
commonIssues?: { symptom: string; check: string }[];
|
||||
}
|
||||
> = {
|
||||
mcp: {
|
||||
docsUrl: "https://docs.molecule.ai/docs/guides/mcp-server-setup",
|
||||
docsLabel: "MCP server setup guide",
|
||||
downloadUrl: "https://pypi.org/project/molecule-ai-workspace-runtime/",
|
||||
downloadLabel: "molecule-ai-workspace-runtime on PyPI",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "Tools not appearing in your agent",
|
||||
check:
|
||||
"Run `claude mcp list` (or your runtime's equivalent) — the molecule entry should be listed. If missing, re-run the `claude mcp add` line.",
|
||||
},
|
||||
{
|
||||
symptom: "ConnectionRefused / DNS error on first call",
|
||||
check:
|
||||
"PLATFORM_URL must include the scheme (https://) and have no trailing slash. Verify with `curl $PLATFORM_URL/healthz`.",
|
||||
},
|
||||
],
|
||||
},
|
||||
python: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
downloadUrl: "https://pypi.org/project/molecule-ai-workspace-runtime/",
|
||||
downloadLabel: "molecule-ai-workspace-runtime on PyPI",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "401 from /heartbeat",
|
||||
check:
|
||||
"AUTH_TOKEN expired or wrong workspace_id. Tokens are shown only once at create time — re-create the workspace to get a fresh token.",
|
||||
},
|
||||
{
|
||||
symptom: "AGENT_URL not reachable from platform",
|
||||
check:
|
||||
"Public HTTPS URL required for inbound A2A. Use ngrok or Cloudflare Tunnel if your agent is behind NAT.",
|
||||
},
|
||||
],
|
||||
},
|
||||
claude: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
downloadUrl: "https://claude.com/claude-code",
|
||||
downloadLabel: "Claude Code (claude.com)",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "plugin not installed",
|
||||
check:
|
||||
"Run `/plugin marketplace add Molecule-AI/molecule-mcp-claude-channel` then `/plugin install molecule@molecule-mcp-claude-channel` inside Claude Code, then `/reload-plugins`.",
|
||||
},
|
||||
{
|
||||
symptom: "not on the approved channels allowlist",
|
||||
check:
|
||||
"Custom channels need `--dangerously-load-development-channels` on the launch command. Team/Enterprise orgs need admin to set `channelsEnabled` + `allowedChannelPlugins` in claude.ai admin settings.",
|
||||
},
|
||||
{
|
||||
symptom: "Inbound messages not arriving",
|
||||
check:
|
||||
"Check stderr for `molecule channel: connected — watching N workspace(s)`. Verify ~/.claude/channels/molecule/.env has the right PLATFORM_URL + token.",
|
||||
},
|
||||
],
|
||||
},
|
||||
hermes: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
downloadUrl: "https://github.com/NousResearch/hermes-agent",
|
||||
downloadLabel: "hermes-agent (NousResearch)",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "Gateway start failure",
|
||||
check:
|
||||
"Tail ~/.hermes/gateway.log. YAML duplicate-key in config.yaml is the most common cause — `gateway:` block must appear exactly once.",
|
||||
},
|
||||
{
|
||||
symptom: "Plugin not discovered after install",
|
||||
check:
|
||||
"Run `pip show hermes-channel-molecule` to confirm install. Some hermes builds need `hermes plugin reload` before the new platform_plugins entry takes effect.",
|
||||
},
|
||||
],
|
||||
},
|
||||
codex: {
|
||||
docsUrl: "https://docs.molecule.ai/docs/guides/mcp-server-setup",
|
||||
docsLabel: "MCP server setup guide",
|
||||
downloadUrl: "https://github.com/openai/codex",
|
||||
downloadLabel: "openai/codex",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "[mcp_servers.molecule] not loaded",
|
||||
check:
|
||||
"Codex must be ≥ 0.57. Check with `codex --version`; upgrade via `npm install -g @openai/codex@latest`.",
|
||||
},
|
||||
{
|
||||
symptom: "TOML parse error after re-running setup",
|
||||
check:
|
||||
"TOML rejects duplicate `[mcp_servers.molecule]` tables. Open ~/.codex/config.toml and remove the old block before pasting the new one.",
|
||||
},
|
||||
{
|
||||
symptom: "Canvas messages don't wake codex",
|
||||
check:
|
||||
"Step 3 (codex-channel-molecule bridge daemon) is required for inbound push. Check `pgrep -f codex-channel-molecule` and `tail ~/.codex-channel-molecule/daemon.log`.",
|
||||
},
|
||||
],
|
||||
},
|
||||
openclaw: {
|
||||
docsUrl: "https://docs.molecule.ai/docs/guides/mcp-server-setup",
|
||||
docsLabel: "MCP server setup guide",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "Gateway not starting",
|
||||
check:
|
||||
"Tail ~/.openclaw/gateway.log. The loopback bind requires :18789 to be free — check with `lsof -iTCP:18789`.",
|
||||
},
|
||||
{
|
||||
symptom: "openclaw mcp set rejected",
|
||||
check:
|
||||
"The heredoc generates JSON; verify it parsed by running `jq < ~/.openclaw/mcp/molecule.json`. Re-run `openclaw mcp set` if the file is malformed.",
|
||||
},
|
||||
],
|
||||
},
|
||||
curl: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "401 / 403 on register",
|
||||
check:
|
||||
"WORKSPACE_AUTH_TOKEN must be the value shown at workspace create. Tokens are shown only once.",
|
||||
},
|
||||
],
|
||||
},
|
||||
fields: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
},
|
||||
};
|
||||
|
||||
export interface ExternalConnectionInfo {
|
||||
workspace_id: string;
|
||||
platform_url: string;
|
||||
@@ -457,7 +303,6 @@ export function ExternalConnectModal({ info, onClose }: Props) {
|
||||
<Field label="heartbeat_endpoint" value={info.heartbeat_endpoint} onCopy={() => copy(info.heartbeat_endpoint, "hb")} copied={copiedKey === "hb"} />
|
||||
</div>
|
||||
)}
|
||||
<HelpBlock help={TAB_HELP[tab]} />
|
||||
</div>
|
||||
|
||||
<div className="mt-5 flex justify-end gap-2">
|
||||
@@ -506,70 +351,6 @@ function SnippetBlock({
|
||||
);
|
||||
}
|
||||
|
||||
// HelpBlock — collapsible "Need help?" section under each tab's snippet.
|
||||
// Renders only the keys present in the per-tab help metadata (no empty
|
||||
// sections). Closed by default so the snippet stays the visual focus;
|
||||
// operators with a working setup never see this. Uses native <details>
|
||||
// for keyboard accessibility (Tab + Enter) without extra ARIA wiring.
|
||||
function HelpBlock({
|
||||
help,
|
||||
}: {
|
||||
help: (typeof TAB_HELP)[Tab] | undefined;
|
||||
}) {
|
||||
if (!help) return null;
|
||||
const { docsUrl, docsLabel, downloadUrl, downloadLabel, commonIssues } = help;
|
||||
if (!docsUrl && !downloadUrl && !commonIssues?.length) return null;
|
||||
|
||||
return (
|
||||
<details className="mt-3 border border-line rounded-lg bg-surface text-xs">
|
||||
<summary className="cursor-pointer select-none px-3 py-2 text-ink-mid hover:text-ink">
|
||||
Need help? — install link, docs, common errors
|
||||
</summary>
|
||||
<div className="px-3 pb-3 pt-1 space-y-2">
|
||||
{downloadUrl && (
|
||||
<div>
|
||||
<span className="text-ink-soft">Where to install: </span>
|
||||
<a
|
||||
href={downloadUrl}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-accent underline hover:text-accent-strong"
|
||||
>
|
||||
{downloadLabel || downloadUrl}
|
||||
</a>
|
||||
</div>
|
||||
)}
|
||||
{docsUrl && (
|
||||
<div>
|
||||
<span className="text-ink-soft">Documentation: </span>
|
||||
<a
|
||||
href={docsUrl}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-accent underline hover:text-accent-strong"
|
||||
>
|
||||
{docsLabel || docsUrl}
|
||||
</a>
|
||||
</div>
|
||||
)}
|
||||
{commonIssues && commonIssues.length > 0 && (
|
||||
<div>
|
||||
<div className="text-ink-soft mb-1">Common errors:</div>
|
||||
<ul className="space-y-1.5 pl-3">
|
||||
{commonIssues.map((issue, i) => (
|
||||
<li key={i}>
|
||||
<code className="text-warm font-mono">{issue.symptom}</code>
|
||||
<span className="text-ink-mid"> — {issue.check}</span>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</details>
|
||||
);
|
||||
}
|
||||
|
||||
function Field({
|
||||
label,
|
||||
value,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useEffect, useMemo, useRef } from "react";
|
||||
import { useState, useEffect, useLayoutEffect, useMemo, useRef, useCallback } from "react";
|
||||
import ReactMarkdown from "react-markdown";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import { api } from "@/lib/api";
|
||||
@@ -184,13 +184,23 @@ function unwrapErrorText(raw: string | null): string {
|
||||
export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
|
||||
const [messages, setMessages] = useState<CommMessage[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [loadError, setLoadError] = useState<string | null>(null);
|
||||
// Dedup by timestamp+type+peer to handle API load + WebSocket race
|
||||
const seenKeys = useRef(new Set<string>());
|
||||
const bottomRef = useRef<HTMLDivElement>(null);
|
||||
// Mirrors the my-chat scroll behaviour from ChatTab (PR #2903) —
|
||||
// smooth-scroll on a long history gets interrupted by concurrent
|
||||
// renders and lands the panel mid-conversation. Switch the first
|
||||
// arrival to instant; subsequent appends animate.
|
||||
const hasInitialScrollRef = useRef(false);
|
||||
|
||||
// Load history
|
||||
useEffect(() => {
|
||||
// Load history. Extracted so the error-state retry button can
|
||||
// re-invoke without remount. ChatTab uses the same shape
|
||||
// (loadInitial → loadError state → retry button).
|
||||
const loadInitial = useCallback(() => {
|
||||
setLoading(true);
|
||||
setLoadError(null);
|
||||
seenKeys.current.clear();
|
||||
api.get<ActivityEntry[]>(`/workspaces/${workspaceId}/activity?source=agent&limit=50`)
|
||||
.then((entries) => {
|
||||
const filtered = (entries ?? [])
|
||||
@@ -234,10 +244,15 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
|
||||
// the .then body) — the panel just sat on the empty state
|
||||
// with zero signal.
|
||||
console.warn("AgentCommsPanel: load activity failed", err);
|
||||
setLoadError(err instanceof Error ? err.message : String(err));
|
||||
setLoading(false);
|
||||
});
|
||||
}, [workspaceId]);
|
||||
|
||||
useEffect(() => {
|
||||
loadInitial();
|
||||
}, [loadInitial]);
|
||||
|
||||
// Live updates routed through the global ReconnectingSocket. The
|
||||
// previous pattern of `new WebSocket(WS_URL)` per panel had no
|
||||
// onclose / no reconnect, so any drop (idle timeout, browser
|
||||
@@ -358,7 +373,18 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
|
||||
} catch { /* ignore */ }
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
// useLayoutEffect (not useEffect) so the scroll runs BEFORE paint —
|
||||
// otherwise the user sees the panel jump for one frame on every
|
||||
// append. Mirrors ChatTab's MyChatPanel scroll block.
|
||||
useLayoutEffect(() => {
|
||||
if (!hasInitialScrollRef.current && messages.length > 0) {
|
||||
// Instant on first arrival — smooth-scroll on a long history
|
||||
// gets interrupted by concurrent renders and lands the panel
|
||||
// mid-conversation (the chat-opens-in-middle bug class).
|
||||
hasInitialScrollRef.current = true;
|
||||
bottomRef.current?.scrollIntoView({ behavior: "instant" as ScrollBehavior });
|
||||
return;
|
||||
}
|
||||
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
|
||||
}, [messages]);
|
||||
|
||||
@@ -366,6 +392,27 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
|
||||
return <div className="text-xs text-ink-soft text-center py-8">Loading agent communications...</div>;
|
||||
}
|
||||
|
||||
if (loadError !== null && messages.length === 0) {
|
||||
// Mirrors ChatTab my-chat error UI — surfaces the load failure
|
||||
// with a retry button instead of silently rendering empty state.
|
||||
return (
|
||||
<div
|
||||
role="alert"
|
||||
className="mx-2 mt-2 rounded-lg border border-red-800/50 bg-red-950/30 px-3 py-2.5"
|
||||
>
|
||||
<p className="text-[11px] text-bad mb-1.5">
|
||||
Failed to load agent communications: {loadError}
|
||||
</p>
|
||||
<button
|
||||
onClick={loadInitial}
|
||||
className="text-[10px] px-2 py-0.5 rounded bg-red-800/40 text-bad hover:bg-red-700/50 transition-colors"
|
||||
>
|
||||
Retry
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (messages.length === 0) {
|
||||
return (
|
||||
<div className="text-xs text-ink-soft text-center py-8">
|
||||
|
||||
@@ -0,0 +1,115 @@
|
||||
// @vitest-environment jsdom
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { render, screen, fireEvent, waitFor } from "@testing-library/react";
|
||||
|
||||
// API mock — tests can override per case via apiGetMock.mockImplementationOnce.
|
||||
const apiGetMock = vi.fn<(url: string) => Promise<unknown>>();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (url: string) => apiGetMock(url),
|
||||
},
|
||||
}));
|
||||
|
||||
// useSocketEvent — no-op for these render tests; live updates aren't
|
||||
// what we're verifying here.
|
||||
vi.mock("@/hooks/useSocketEvent", () => ({
|
||||
useSocketEvent: () => {},
|
||||
}));
|
||||
|
||||
// Canvas store — peer name resolution.
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: {
|
||||
getState: () => ({
|
||||
nodes: [
|
||||
{ id: "ws-self", data: { name: "Self" } },
|
||||
{ id: "ws-peer", data: { name: "Peer Agent" } },
|
||||
],
|
||||
}),
|
||||
},
|
||||
}));
|
||||
|
||||
// Toaster shim — AgentCommsPanel imports showToast.
|
||||
vi.mock("../../Toaster", () => ({
|
||||
showToast: vi.fn(),
|
||||
}));
|
||||
|
||||
import { AgentCommsPanel } from "../AgentCommsPanel";
|
||||
|
||||
// jsdom doesn't implement scrollIntoView. Tests that observe the call
|
||||
// install a spy here; tests that don't care still need a no-op stub
|
||||
// so the component doesn't throw.
|
||||
const scrollSpy = vi.fn<(opts?: ScrollIntoViewOptions | boolean) => void>();
|
||||
beforeEach(() => {
|
||||
apiGetMock.mockReset();
|
||||
scrollSpy.mockReset();
|
||||
Element.prototype.scrollIntoView = scrollSpy as unknown as Element["scrollIntoView"];
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("AgentCommsPanel — initial-state parity with ChatTab my-chat", () => {
|
||||
it("shows loading text while history fetch is in flight", () => {
|
||||
apiGetMock.mockReturnValueOnce(new Promise(() => { /* never resolves */ }));
|
||||
render(<AgentCommsPanel workspaceId="ws-self" />);
|
||||
expect(screen.getByText("Loading agent communications...")).toBeDefined();
|
||||
});
|
||||
|
||||
it("renders error UI with a Retry button when the history fetch rejects", async () => {
|
||||
apiGetMock.mockRejectedValueOnce(new Error("network down"));
|
||||
render(<AgentCommsPanel workspaceId="ws-self" />);
|
||||
|
||||
// Wait for the error state to render — loading→error transition is async.
|
||||
const alert = await waitFor(() => screen.getByRole("alert"));
|
||||
expect(alert.textContent).toMatch(/Failed to load agent communications/);
|
||||
expect(alert.textContent).toMatch(/network down/);
|
||||
|
||||
// Retry button must be present and trigger a refetch.
|
||||
const retry = screen.getByRole("button", { name: "Retry" });
|
||||
apiGetMock.mockResolvedValueOnce([]); // success on retry
|
||||
fireEvent.click(retry);
|
||||
|
||||
// Two calls total: initial load + retry. Pin via mock call count.
|
||||
await waitFor(() => expect(apiGetMock.mock.calls.length).toBe(2));
|
||||
});
|
||||
|
||||
it("falls back to empty-state copy when load succeeds with zero rows", async () => {
|
||||
apiGetMock.mockResolvedValueOnce([]);
|
||||
render(<AgentCommsPanel workspaceId="ws-self" />);
|
||||
await waitFor(() =>
|
||||
expect(screen.getByText("No agent-to-agent communications yet.")).toBeDefined(),
|
||||
);
|
||||
});
|
||||
|
||||
it("scrollIntoView is called with behavior=instant on the first message arrival", async () => {
|
||||
apiGetMock.mockResolvedValueOnce([
|
||||
{
|
||||
id: "act-1",
|
||||
activity_type: "a2a_send",
|
||||
source_id: "ws-self",
|
||||
target_id: "ws-peer",
|
||||
method: "message/send",
|
||||
summary: "Delegating",
|
||||
request_body: { message: { parts: [{ text: "hi" }] } },
|
||||
response_body: null,
|
||||
status: "ok",
|
||||
created_at: "2026-04-25T18:00:00Z",
|
||||
},
|
||||
]);
|
||||
render(<AgentCommsPanel workspaceId="ws-self" />);
|
||||
|
||||
// useLayoutEffect is what makes the first call instant — wait for
|
||||
// the panel to render at least one message.
|
||||
await waitFor(() => expect(scrollSpy.mock.calls.length).toBeGreaterThan(0));
|
||||
|
||||
// The pinned contract: SOME call uses behavior: "instant" — the
|
||||
// first-arrival case. Subsequent appends use "smooth", but those
|
||||
// can't fire here (no live update yet).
|
||||
const sawInstant = scrollSpy.mock.calls.some((args) => {
|
||||
const opts = args[0];
|
||||
return typeof opts === "object" && opts !== null && "behavior" in opts && opts.behavior === "instant";
|
||||
});
|
||||
expect(sawInstant).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -2,7 +2,7 @@
|
||||
* @vitest-environment jsdom
|
||||
*/
|
||||
import { describe, it, expect, vi, afterEach } from "vitest";
|
||||
import { fetchSession, redirectToLogin } from "../auth";
|
||||
import { fetchSession, redirectToLogin, signOut } from "../auth";
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
@@ -110,3 +110,157 @@ describe("redirectToLogin", () => {
|
||||
expect((window.location as unknown as { href: string }).href).toBe(signupHref);
|
||||
});
|
||||
});
|
||||
|
||||
describe("signOut", () => {
|
||||
// Helper — most tests need the same window.location stub.
|
||||
function stubLocation(): void {
|
||||
Object.defineProperty(window, "location", {
|
||||
writable: true,
|
||||
value: {
|
||||
href: "https://acme.moleculesai.app/orgs",
|
||||
pathname: "/orgs",
|
||||
hostname: "acme.moleculesai.app",
|
||||
protocol: "https:",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
it("POSTs to /cp/auth/signout with credentials:include", async () => {
|
||||
stubLocation();
|
||||
const fetchMock = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({ ok: true, logout_url: "" }),
|
||||
});
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
await signOut();
|
||||
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||
expect(fetchMock).toHaveBeenCalledWith(
|
||||
expect.stringContaining("/cp/auth/signout"),
|
||||
expect.objectContaining({ method: "POST", credentials: "include" }),
|
||||
);
|
||||
});
|
||||
|
||||
it("navigates to provider logout_url when the response includes one", async () => {
|
||||
// The hosted-logout path is what actually breaks the SSO re-auth
|
||||
// loop reported on PR #2913. Without this, AuthKit's browser
|
||||
// cookie keeps the user signed in via SSO and any subsequent
|
||||
// /cp/auth/login silently re-auths.
|
||||
stubLocation();
|
||||
const hostedLogout =
|
||||
"https://api.workos.com/user_management/sessions/logout?session_id=cookie&return_to=https%3A%2F%2Fapp.moleculesai.app%2Forgs";
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({ ok: true, logout_url: hostedLogout }),
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe(hostedLogout);
|
||||
});
|
||||
|
||||
it("falls back to /cp/auth/login when logout_url is empty (DisabledProvider / dev)", async () => {
|
||||
// DisabledProvider returns "" — the local /cp/auth/login redirect
|
||||
// works in dev/test where there's no SSO session to escape.
|
||||
stubLocation();
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({ ok: true, logout_url: "" }),
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
// Tenant subdomain (acme.moleculesai.app) → auth origin is app.moleculesai.app.
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
|
||||
it("redirects even when the POST fails so the user isn't stuck on an authed page", async () => {
|
||||
// Critical UX invariant: clicking 'Sign out' MUST navigate away from
|
||||
// the authenticated app, even if the network is down or the cookie
|
||||
// is already invalid. Anything else looks like the button is
|
||||
// broken — the precise complaint that triggered this fix.
|
||||
stubLocation();
|
||||
vi.stubGlobal("fetch", vi.fn().mockRejectedValue(new Error("network down")));
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
|
||||
it("redirects on 401 (session already invalid) just like 200", async () => {
|
||||
// A user with an already-invalid cookie should still see the
|
||||
// logout flow complete — no error, no stuck-on-app dead end.
|
||||
// Note: 401 means res.ok=false → we don't read .json() at all,
|
||||
// so a missing body is fine.
|
||||
stubLocation();
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: false,
|
||||
status: 401,
|
||||
json: async () => ({}),
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
|
||||
it("falls back to /cp/auth/login when the response body is malformed", async () => {
|
||||
// Defensive parsing: a body that isn't valid JSON, or doesn't
|
||||
// have logout_url, or has logout_url as the wrong type — none of
|
||||
// these should strand the user on the authed page. Fallback path
|
||||
// takes over.
|
||||
stubLocation();
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => {
|
||||
throw new Error("not json");
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
|
||||
it("falls back to /cp/auth/login when logout_url is the wrong type", async () => {
|
||||
// Even valid JSON should be type-checked: a non-string logout_url
|
||||
// (e.g. server-side bug, version drift) must not crash or open-
|
||||
// redirect the user.
|
||||
stubLocation();
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({ ok: true, logout_url: 42 }),
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -67,3 +67,80 @@ export function redirectToLogin(screenHint: "sign-up" | "sign-in" = "sign-in"):
|
||||
const dest = `${authOrigin}${AUTH_BASE}/${path}?return_to=${encodeURIComponent(returnTo)}`;
|
||||
window.location.href = dest;
|
||||
}
|
||||
|
||||
/**
|
||||
* signOut posts to /cp/auth/signout to clear the WorkOS session cookie
|
||||
* + revoke at the provider, then navigates the browser to the
|
||||
* provider-supplied hosted logout URL (so the provider's BROWSER-side
|
||||
* SSO cookie is cleared too — without this, AuthKit silently re-auths
|
||||
* via SSO on the next /cp/auth/login and the user is "still signed
|
||||
* in" after pressing Sign out).
|
||||
*
|
||||
* Two-layer flow:
|
||||
* 1. POST /cp/auth/signout → CP clears OUR session cookie + revokes
|
||||
* session_id at the provider API. Response includes
|
||||
* `logout_url` — the AuthKit hosted URL the BROWSER must navigate
|
||||
* to so the provider's own browser cookie is cleared.
|
||||
* 2. window.location.href = <logout_url> → AuthKit clears its
|
||||
* session, then redirects the browser to the configured
|
||||
* return_to (defaults to APP_URL/orgs).
|
||||
*
|
||||
* Best-effort by design: a 5xx, network failure, missing logout_url
|
||||
* (DisabledProvider, dev), or stale cookie still results in the
|
||||
* browser navigating away — leaving the user on a logged-in-looking
|
||||
* page after they clicked "Sign out" is the worst possible UX. The
|
||||
* fallback path navigates to /cp/auth/login on the auth origin, which
|
||||
* works correctly in environments without a hosted logout flow (dev,
|
||||
* tests, DisabledProvider).
|
||||
*
|
||||
* Throws nothing — callers can disable the button optimistically or
|
||||
* await this and trust it returns. On a redirect-blocked test
|
||||
* environment (jsdom under vitest) we still exit cleanly so unit tests
|
||||
* can spy on the fetch call.
|
||||
*/
|
||||
export async function signOut(): Promise<void> {
|
||||
let logoutURL: string | undefined;
|
||||
// Fire-and-tolerate the POST. credentials:include is mandatory cross-
|
||||
// origin so the SaaS canvas (acme.moleculesai.app) can hit
|
||||
// app.moleculesai.app/cp/auth/signout with the session cookie.
|
||||
try {
|
||||
const res = await fetch(`${getAuthOrigin()}${AUTH_BASE}/signout`, {
|
||||
method: "POST",
|
||||
credentials: "include",
|
||||
});
|
||||
if (res.ok) {
|
||||
// Body shape: {"ok": true, "logout_url": "..."}. logout_url is
|
||||
// empty for DisabledProvider (dev/local) — we fall back to
|
||||
// /cp/auth/login below. Defensive parsing: a malformed body
|
||||
// shouldn't strand the user on the authed page.
|
||||
const body: unknown = await res.json().catch(() => null);
|
||||
if (
|
||||
body &&
|
||||
typeof body === "object" &&
|
||||
"logout_url" in body &&
|
||||
typeof (body as { logout_url: unknown }).logout_url === "string" &&
|
||||
(body as { logout_url: string }).logout_url
|
||||
) {
|
||||
logoutURL = (body as { logout_url: string }).logout_url;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Ignore — we still redirect below.
|
||||
}
|
||||
if (typeof window === "undefined") return;
|
||||
if (logoutURL) {
|
||||
// Hosted logout: AuthKit clears its SSO cookie + redirects to
|
||||
// return_to (configured server-side). This is the path that
|
||||
// actually breaks the SSO re-auth loop.
|
||||
window.location.href = logoutURL;
|
||||
return;
|
||||
}
|
||||
// Fallback: no hosted logout (dev, DisabledProvider, network
|
||||
// failure). Land on the login screen rather than the current URL:
|
||||
// returning to a tenant URL after signout would just re-redirect
|
||||
// through /cp/auth/login due to AuthGate. Send the user straight
|
||||
// there with no return_to so they don't loop back into the org they
|
||||
// just left.
|
||||
const authOrigin = getAuthOrigin();
|
||||
window.location.href = `${authOrigin}${AUTH_BASE}/login`;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
# Workspace Runtime PyPI Package
|
||||
|
||||
## Requires Python >= 3.11
|
||||
|
||||
The wheel pins `requires_python>=3.11`. On Python 3.10 or older, `pip install
|
||||
molecule-ai-workspace-runtime` fails with `Could not find a version that
|
||||
satisfies the requirement (from versions: none)` — the pin filters the only
|
||||
available artifact before pip even attempts install. Upgrade the interpreter
|
||||
(`brew install python@3.12` / `apt install python3.12` / etc.) or use a
|
||||
3.11+ venv.
|
||||
|
||||
## Overview
|
||||
|
||||
The shared workspace runtime infrastructure has **one editable source** and
|
||||
|
||||
@@ -56,6 +56,9 @@ TOP_LEVEL_MODULES = {
|
||||
"a2a_mcp_server",
|
||||
"a2a_tools",
|
||||
"a2a_tools_delegation",
|
||||
"a2a_tools_inbox",
|
||||
"a2a_tools_memory",
|
||||
"a2a_tools_messaging",
|
||||
"a2a_tools_rbac",
|
||||
"adapter_base",
|
||||
"agent",
|
||||
@@ -288,10 +291,37 @@ directory** by the `publish-runtime` GitHub Actions workflow on every
|
||||
Operators running an agent outside the platform's container fleet
|
||||
(any runtime that supports MCP stdio — Claude Code, hermes, codex,
|
||||
etc.) can install this wheel and run the universal MCP server
|
||||
locally:
|
||||
locally.
|
||||
|
||||
### Requirements
|
||||
|
||||
* **Python ≥3.11.** The wheel sets `requires-python = ">=3.11"`. On
|
||||
older interpreters `pip install` returns the cryptic
|
||||
`Could not find a version that satisfies the requirement` — that
|
||||
message is pip filtering this wheel out, NOT the package missing
|
||||
from PyPI. Upgrade with `brew install python@3.12` /
|
||||
`apt install python3.12` / `pyenv install 3.12` first.
|
||||
* **`pipx` recommended over `pip`.** `pipx install` puts
|
||||
`molecule-mcp` on PATH automatically and isolates the runtime's
|
||||
deps from your system Python. Plain `pip install --user` works
|
||||
but the binary lands in `~/.local/bin` (Linux) or
|
||||
`~/Library/Python/3.X/bin` (macOS) which is often not on PATH on
|
||||
a fresh shell — `claude mcp add molecule -- molecule-mcp` then
|
||||
fails with "command not found" at first use.
|
||||
|
||||
### Install
|
||||
|
||||
```sh
|
||||
# Recommended:
|
||||
pipx install molecule-ai-workspace-runtime
|
||||
|
||||
# Alternative (manage PATH yourself):
|
||||
pip install --user molecule-ai-workspace-runtime
|
||||
```
|
||||
|
||||
### Run
|
||||
|
||||
```sh
|
||||
pip install molecule-ai-workspace-runtime
|
||||
WORKSPACE_ID=<uuid> \\
|
||||
PLATFORM_URL=https://<tenant>.staging.moleculesai.app \\
|
||||
MOLECULE_WORKSPACE_TOKEN=<bearer> \\
|
||||
@@ -304,10 +334,66 @@ runtimes already get via the workspace's auto-spawned MCP. Register
|
||||
the binary in your agent's MCP config (e.g. Claude Code's
|
||||
`claude mcp add molecule -- molecule-mcp` with the env above).
|
||||
|
||||
### Keeping the token out of shell history
|
||||
|
||||
Inline `MOLECULE_WORKSPACE_TOKEN=<bearer>` ends up in `~/.zsh_history`
|
||||
and (when registered via `claude mcp add`) plaintext in
|
||||
`~/.claude.json`. To avoid that, write the token to a 0600 file and
|
||||
point `MOLECULE_WORKSPACE_TOKEN_FILE` at it:
|
||||
|
||||
```sh
|
||||
umask 077
|
||||
printf '%s' "<bearer>" > ~/.config/molecule/token
|
||||
WORKSPACE_ID=<uuid> \\
|
||||
PLATFORM_URL=https://<tenant>.staging.moleculesai.app \\
|
||||
MOLECULE_WORKSPACE_TOKEN_FILE=$HOME/.config/molecule/token \\
|
||||
molecule-mcp
|
||||
```
|
||||
|
||||
Token resolution order: `MOLECULE_WORKSPACE_TOKEN` (inline env) →
|
||||
`MOLECULE_WORKSPACE_TOKEN_FILE` (path) → `${CONFIGS_DIR}/.auth_token`
|
||||
(in-container default).
|
||||
|
||||
The token comes from the canvas → Tokens tab. Restarting an external
|
||||
workspace from the canvas no longer revokes the token (PR #2412), so
|
||||
operator tokens persist across status nudges.
|
||||
|
||||
### Push vs poll delivery (Claude Code specifics)
|
||||
|
||||
By default the inbox runs in **poll mode** — every turn the agent
|
||||
calls `wait_for_message`, which blocks up to ~60s on
|
||||
`/activity?since_id=…`. Real-time push delivery is also supported,
|
||||
but on Claude Code it requires THREE conditions, ALL of which must
|
||||
hold:
|
||||
|
||||
1. **The MCP server declares `experimental.claude/channel`** — this
|
||||
wheel does (see `_build_initialize_result`). Nothing for you to
|
||||
do.
|
||||
2. **Claude Code installs the server as a marketplace plugin** — a
|
||||
plain `claude mcp add molecule -- molecule-mcp` produces a
|
||||
non-plugin-sourced server, which Claude Code rejects with
|
||||
`channel_enable requires a marketplace plugin`. Until the
|
||||
official `moleculesai/claude-code-plugin` marketplace lands
|
||||
(tracking [#2936](https://github.com/Molecule-AI/molecule-core/issues/2936)),
|
||||
operators who want push must scaffold their own local marketplace
|
||||
under
|
||||
`~/.claude/marketplaces/molecule-local/` containing a
|
||||
`marketplace.json` + `plugin.json` that points at this wheel.
|
||||
3. **Claude Code is launched with the dev-channels flag** — pass
|
||||
`--dangerously-load-development-channels plugin:molecule@<marketplace>`
|
||||
on the `claude` invocation. Without this flag the channel
|
||||
capability is silently ignored.
|
||||
|
||||
Symptom of any condition failing: messages arrive but only via the
|
||||
poll path (every ~1–60s), not real-time. There's currently no
|
||||
diagnostic surfaced — `molecule-mcp doctor` (tracking
|
||||
[#2937](https://github.com/Molecule-AI/molecule-core/issues/2937)) is
|
||||
planned.
|
||||
|
||||
If you don't need real-time push, the default poll path works
|
||||
universally with no extra setup; both modes converge on the same
|
||||
`inbox_pop` ack so messages never duplicate.
|
||||
|
||||
See [`docs/workspace-runtime-package.md`](https://github.com/Molecule-AI/molecule-core/blob/main/docs/workspace-runtime-package.md)
|
||||
for the publish flow and architecture.
|
||||
"""
|
||||
|
||||
Executable
+295
@@ -0,0 +1,295 @@
|
||||
#!/usr/bin/env bash
|
||||
# E2E for poll-mode chat upload (RFC #2891 phases 1-5b).
|
||||
#
|
||||
# Round-trip: register a workspace as poll-mode (no callback URL) → POST a
|
||||
# multi-file chat upload → verify each file becomes (a) one
|
||||
# `chat_upload_receive` activity row and (b) one /pending-uploads row → fetch
|
||||
# the bytes back via the poll endpoint → ack → verify the row 404s on
|
||||
# subsequent fetch. Also pins cross-workspace bleed protection: workspace B
|
||||
# cannot read workspace A's pending uploads even with its own valid bearer.
|
||||
#
|
||||
# Why this exists separately from test_chat_upload_e2e.sh: that script
|
||||
# covers the PUSH path (the workspace's own /internal/chat/uploads/ingest).
|
||||
# This script covers the POLL path: the same canvas-side request lands on
|
||||
# the platform's pendinguploads.Storage instead, and the workspace fetches
|
||||
# it later. The two paths share zero handler code on the platform side, so
|
||||
# both need their own E2E.
|
||||
#
|
||||
# Requires: platform running on localhost:8080 with migrations applied.
|
||||
# bash workspace-server/scripts/dev-start.sh
|
||||
# bash workspace-server/scripts/run-migrations.sh
|
||||
#
|
||||
# Idempotent: each run uses fresh per-script workspace UUIDs so reruns
|
||||
# don't collide. Best-effort cleanup on EXIT — does NOT call
|
||||
# e2e_cleanup_all_workspaces (see
|
||||
# `feedback_never_run_cluster_cleanup_tests_on_live_platform.md`).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
source "$(dirname "$0")/_lib.sh"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
TIMEOUT="${A2A_TIMEOUT:-30}"
|
||||
|
||||
gen_uuid() {
|
||||
if command -v uuidgen >/dev/null 2>&1; then
|
||||
uuidgen | tr '[:upper:]' '[:lower:]'
|
||||
else
|
||||
python3 -c 'import uuid; print(uuid.uuid4())'
|
||||
fi
|
||||
}
|
||||
WS_A="$(gen_uuid)"
|
||||
WS_B="$(gen_uuid)"
|
||||
|
||||
# Per-run scratch dir collected under one trap so every assertion-failure
|
||||
# path drops the temp files it made (see test_chat_attachments_e2e.sh).
|
||||
TMPDIR_E2E=$(mktemp -d -t poll-chat-upload-e2e-XXXXXX)
|
||||
|
||||
cleanup() {
|
||||
local rc=$?
|
||||
curl -s -X DELETE "$BASE/workspaces/$WS_A?confirm=true" >/dev/null 2>&1 || true
|
||||
curl -s -X DELETE "$BASE/workspaces/$WS_B?confirm=true" >/dev/null 2>&1 || true
|
||||
rm -rf "$TMPDIR_E2E"
|
||||
exit $rc
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
check() {
|
||||
local desc="$1" expected="$2" actual="$3"
|
||||
if echo "$actual" | grep -qF -- "$expected"; then
|
||||
echo "PASS: $desc"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "FAIL: $desc"
|
||||
echo " expected to contain: $expected"
|
||||
echo " got: $(echo "$actual" | head -10)"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
check_eq() {
|
||||
local desc="$1" expected="$2" actual="$3"
|
||||
if [ "$actual" = "$expected" ]; then
|
||||
echo "PASS: $desc"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "FAIL: $desc"
|
||||
echo " expected: $expected"
|
||||
echo " got: $actual"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "=== Poll-Mode Chat Upload E2E ==="
|
||||
echo " base: $BASE"
|
||||
echo " workspace A: $WS_A"
|
||||
echo " workspace B: $WS_B"
|
||||
echo ""
|
||||
|
||||
# ---------- Phase 1: register poll-mode workspace ----------
|
||||
echo "--- Phase 1: Register poll-mode workspace A ---"
|
||||
|
||||
REG_A=$(curl -s -X POST "$BASE/registry/register" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"id\": \"$WS_A\",
|
||||
\"delivery_mode\": \"poll\",
|
||||
\"agent_card\": {\"name\": \"poll-chat-upload-test-a\"}
|
||||
}")
|
||||
check "register accepts poll mode without URL" '"status":"registered"' "$REG_A"
|
||||
TOK_A=$(echo "$REG_A" | e2e_extract_token || true)
|
||||
[ -n "$TOK_A" ] || { echo "FAIL: no auth_token in register response (ws A)"; FAIL=$((FAIL + 1)); exit 1; }
|
||||
|
||||
# ---------- Phase 2: multi-file chat upload ----------
|
||||
echo ""
|
||||
echo "--- Phase 2: POST /chat/uploads with two files ---"
|
||||
|
||||
FILE1="$TMPDIR_E2E/alpha.txt"
|
||||
FILE2="$TMPDIR_E2E/beta.txt"
|
||||
EXPECTED1="alpha-secret-$(openssl rand -hex 4)"
|
||||
EXPECTED2="beta-secret-$(openssl rand -hex 4)"
|
||||
printf '%s' "$EXPECTED1" > "$FILE1"
|
||||
printf '%s' "$EXPECTED2" > "$FILE2"
|
||||
|
||||
UPLOAD=$(curl -s -X POST "$BASE/workspaces/$WS_A/chat/uploads" \
|
||||
-H "Authorization: Bearer $TOK_A" \
|
||||
-F "files=@$FILE1;filename=alpha.txt;type=text/plain" \
|
||||
-F "files=@$FILE2;filename=beta.txt;type=text/plain" \
|
||||
-w "\nHTTP_CODE=%{http_code}\n")
|
||||
UPLOAD_CODE=$(echo "$UPLOAD" | grep -oE 'HTTP_CODE=[0-9]+' | cut -d= -f2)
|
||||
UPLOAD_BODY=$(echo "$UPLOAD" | sed '/^HTTP_CODE=/,$d')
|
||||
|
||||
check_eq "upload returns 200" "200" "$UPLOAD_CODE"
|
||||
check "upload response has files array" '"files":' "$UPLOAD_BODY"
|
||||
|
||||
# Pull file_ids out of the URI in the response. URI shape is
|
||||
# `platform-pending:<wsid>/<file_id>` — proves the response came from the
|
||||
# poll-mode branch, not the push-mode internal-ingest branch.
|
||||
URI1=$(echo "$UPLOAD_BODY" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][0]["uri"])')
|
||||
URI2=$(echo "$UPLOAD_BODY" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][1]["uri"])')
|
||||
check "URI 1 has platform-pending: scheme" "platform-pending:$WS_A/" "$URI1"
|
||||
check "URI 2 has platform-pending: scheme" "platform-pending:$WS_A/" "$URI2"
|
||||
|
||||
FID1="${URI1##*/}"
|
||||
FID2="${URI2##*/}"
|
||||
[ -n "$FID1" ] && [ -n "$FID2" ] || { echo "FAIL: could not extract file IDs"; FAIL=$((FAIL + 1)); exit 1; }
|
||||
echo " file_id 1: $FID1"
|
||||
echo " file_id 2: $FID2"
|
||||
|
||||
# ---------- Phase 3: activity rows visible to the workspace ----------
|
||||
echo ""
|
||||
echo "--- Phase 3: /activity shows two chat_upload_receive rows ---"
|
||||
|
||||
# activity_logs INSERTs run in a goroutine — give them a moment.
|
||||
sleep 1
|
||||
ACT=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/activity?type=a2a_receive&limit=20")
|
||||
check "activity feed has the alpha file" "$FID1" "$ACT"
|
||||
check "activity feed has the beta file" "$FID2" "$ACT"
|
||||
check "activity rows tagged chat_upload_receive" '"method":"chat_upload_receive"' "$ACT"
|
||||
check "activity rows record alpha mimetype" '"mimeType":"text/plain"' "$ACT"
|
||||
|
||||
CHAT_UPLOAD_COUNT=$(echo "$ACT" | python3 -c '
|
||||
import json, sys
|
||||
rows = json.load(sys.stdin)
|
||||
n = sum(1 for r in rows if (r.get("method") or "") == "chat_upload_receive")
|
||||
print(n)
|
||||
')
|
||||
check_eq "exactly two chat_upload_receive rows" "2" "$CHAT_UPLOAD_COUNT"
|
||||
|
||||
# ---------- Phase 4: GET /pending-uploads/:file_id/content ----------
|
||||
echo ""
|
||||
echo "--- Phase 4: Fetch content for each pending upload ---"
|
||||
|
||||
GOT1=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
|
||||
check_eq "alpha bytes round-trip" "$EXPECTED1" "$GOT1"
|
||||
|
||||
GOT2=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID2/content")
|
||||
check_eq "beta bytes round-trip" "$EXPECTED2" "$GOT2"
|
||||
|
||||
# Mimetype + Content-Disposition headers should match what was uploaded.
|
||||
HEAD1=$(curl -s -D - -o /dev/null --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
|
||||
check "alpha response carries text/plain Content-Type" "Content-Type: text/plain" "$HEAD1"
|
||||
check "alpha response carries Content-Disposition with filename" 'filename="alpha.txt"' "$HEAD1"
|
||||
|
||||
# ---------- Phase 5: idempotent re-fetch (until ack) ----------
|
||||
echo ""
|
||||
echo "--- Phase 5: Re-fetch before ack returns the same bytes ---"
|
||||
|
||||
RE_GOT1=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
|
||||
check_eq "re-fetch returns same alpha bytes" "$EXPECTED1" "$RE_GOT1"
|
||||
|
||||
# ---------- Phase 6: ack each row ----------
|
||||
echo ""
|
||||
echo "--- Phase 6: Ack each pending upload ---"
|
||||
|
||||
ACK1=$(curl -s -X POST --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/ack")
|
||||
check "alpha ack returns acked:true" '"acked":true' "$ACK1"
|
||||
|
||||
ACK2=$(curl -s -X POST --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID2/ack")
|
||||
check "beta ack returns acked:true" '"acked":true' "$ACK2"
|
||||
|
||||
# Re-ack should still 200 (idempotent — the row's gone but the workspace's
|
||||
# at-least-once intent was already honored, and the second ack hits the
|
||||
# raced path which also returns 200).
|
||||
RE_ACK1=$(curl -s -w '\n%{http_code}' -X POST --max-time "$TIMEOUT" \
|
||||
-H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/ack")
|
||||
RE_ACK1_CODE=$(printf '%s' "$RE_ACK1" | tail -n1)
|
||||
# Acked rows return 404 on Get-before-Ack (the row's still in the table
|
||||
# but Get filters acked_at IS NULL); workspace would not normally re-ack
|
||||
# since it already saw the success. Accept both 200 and 404 here so the
|
||||
# test pins the contract without being brittle on the inner ordering.
|
||||
case "$RE_ACK1_CODE" in
|
||||
200|404)
|
||||
echo "PASS: re-ack returns 200 or 404 ($RE_ACK1_CODE)"
|
||||
PASS=$((PASS + 1))
|
||||
;;
|
||||
*)
|
||||
echo "FAIL: re-ack returned unexpected $RE_ACK1_CODE"
|
||||
FAIL=$((FAIL + 1))
|
||||
;;
|
||||
esac
|
||||
|
||||
# ---------- Phase 7: GET content after ack returns 404 ----------
|
||||
echo ""
|
||||
echo "--- Phase 7: Acked file 404s on subsequent fetch ---"
|
||||
|
||||
POST_ACK=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
|
||||
POST_ACK_CODE=$(printf '%s' "$POST_ACK" | tail -n1)
|
||||
check_eq "acked alpha returns HTTP 404" "404" "$POST_ACK_CODE"
|
||||
|
||||
# ---------- Phase 8: cross-workspace bleed protection ----------
|
||||
echo ""
|
||||
echo "--- Phase 8: Workspace B cannot read workspace A's pending uploads ---"
|
||||
|
||||
# Stage a fresh upload on workspace A so we have an UN-acked row to probe.
|
||||
PROBE_FILE="$TMPDIR_E2E/probe.txt"
|
||||
printf '%s' "probe-bytes-$(openssl rand -hex 4)" > "$PROBE_FILE"
|
||||
PROBE_UP=$(curl -s -X POST "$BASE/workspaces/$WS_A/chat/uploads" \
|
||||
-H "Authorization: Bearer $TOK_A" \
|
||||
-F "files=@$PROBE_FILE;filename=probe.txt;type=text/plain")
|
||||
PROBE_FID=$(echo "$PROBE_UP" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][0]["uri"].split("/")[-1])')
|
||||
[ -n "$PROBE_FID" ] || { echo "FAIL: probe upload returned no file_id"; FAIL=$((FAIL + 1)); exit 1; }
|
||||
|
||||
# Register a SECOND poll-mode workspace and capture its bearer.
|
||||
REG_B=$(curl -s -X POST "$BASE/registry/register" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"id\": \"$WS_B\",
|
||||
\"delivery_mode\": \"poll\",
|
||||
\"agent_card\": {\"name\": \"poll-chat-upload-test-b\"}
|
||||
}")
|
||||
check "second workspace registers" '"status":"registered"' "$REG_B"
|
||||
TOK_B=$(echo "$REG_B" | e2e_extract_token || true)
|
||||
[ -n "$TOK_B" ] || { echo "FAIL: no auth_token (ws B)"; FAIL=$((FAIL + 1)); exit 1; }
|
||||
|
||||
# B's bearer hitting B's URL with A's file_id → 404 (handler checks the row's
|
||||
# workspace_id matches the URL :id, not the bearer's workspace).
|
||||
CROSS_RESP=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
|
||||
-H "Authorization: Bearer $TOK_B" \
|
||||
"$BASE/workspaces/$WS_B/pending-uploads/$PROBE_FID/content")
|
||||
CROSS_CODE=$(printf '%s' "$CROSS_RESP" | tail -n1)
|
||||
check_eq "B's URL with A's file_id returns 404" "404" "$CROSS_CODE"
|
||||
|
||||
# B's bearer hitting A's URL → 401 (wsAuth pins bearer to :id). This is the
|
||||
# strictest cross-workspace check: a presented-but-wrong bearer is rejected
|
||||
# in EVERY platform posture (dev-mode fail-open only triggers when no bearer
|
||||
# is presented at all — invalid tokens always 401).
|
||||
WRONG_BEARER=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
|
||||
-H "Authorization: Bearer $TOK_B" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$PROBE_FID/content")
|
||||
WRONG_CODE=$(printf '%s' "$WRONG_BEARER" | tail -n1)
|
||||
check_eq "B's bearer on A's URL returns 401" "401" "$WRONG_CODE"
|
||||
|
||||
# NB: a fully bearerless request to /pending-uploads/:fid/content returns
|
||||
# 401 ONLY when the platform has MOLECULE_ENV != development (production /
|
||||
# staging). On local-dev with MOLECULE_ENV=development the wsauth middleware
|
||||
# fail-opens for bearerless requests so the canvas at :3000 can talk to the
|
||||
# platform at :8080 without per-call token plumbing — see middleware/
|
||||
# devmode.go. The strict bearerless-401 contract is covered by the wsauth
|
||||
# unit + middleware tests; we don't reassert it here because the result
|
||||
# depends on platform posture, not the poll-mode upload contract.
|
||||
|
||||
# ---------- Phase 9: invalid file_id rejected at the URL parser ----------
|
||||
echo ""
|
||||
echo "--- Phase 9: Invalid file_id returns 400 ---"
|
||||
|
||||
BAD_FID=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
|
||||
-H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/not-a-uuid/content")
|
||||
BAD_FID_CODE=$(printf '%s' "$BAD_FID" | tail -n1)
|
||||
check_eq "invalid file_id UUID returns 400" "400" "$BAD_FID_CODE"
|
||||
|
||||
# ---------- Results ----------
|
||||
echo ""
|
||||
echo "=== Results: $PASS passed, $FAIL failed ==="
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -0,0 +1,177 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/token"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestAgentMessageBroadcastsArePersisted is a forward-looking AST
|
||||
// gate: every function in this package that broadcasts an
|
||||
// `AGENT_MESSAGE` WebSocket event MUST also call
|
||||
// `INSERT INTO activity_logs` somewhere in its body.
|
||||
//
|
||||
// The reno-stars production data-loss bug (CEO Ryan PC's long-form
|
||||
// onboarding-friction message visible live but missing on reload)
|
||||
// happened because mcp_tools.go:toolSendMessageToUser broadcast WS
|
||||
// without a paired INSERT — while the HTTP /notify sibling DID
|
||||
// persist. The fix added the INSERT; this gate prevents the regression
|
||||
// class from re-emerging in any future chat-bearing tool.
|
||||
//
|
||||
// Why an AST gate vs a code-review checklist (per memory
|
||||
// feedback_behavior_based_ast_gates.md): "pin invariants by what a
|
||||
// function calls, not what it's named". The shape that loses data is:
|
||||
//
|
||||
// BroadcastOnly(_, "AGENT_MESSAGE", _) without an INSERT companion
|
||||
//
|
||||
// Any new tool that emits AGENT_MESSAGE must persist or the next
|
||||
// canvas refresh drops the message — same shape as reno-stars. A
|
||||
// reviewer can miss this; the AST walk can't.
|
||||
//
|
||||
// Allowlist: empty by intent. If a future use case genuinely needs
|
||||
// fire-and-forget broadcast (e.g., transient typing indicators that
|
||||
// should NOT survive reload), add an entry here AND document why.
|
||||
// "Doesn't need to persist" is rarely the right answer for chat —
|
||||
// the canvas history is the source of truth.
|
||||
func TestAgentMessageBroadcastsArePersisted(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
entries, err := os.ReadDir(wd)
|
||||
if err != nil {
|
||||
t.Fatalf("readdir %s: %v", wd, err)
|
||||
}
|
||||
|
||||
type violation struct {
|
||||
file string
|
||||
fn string
|
||||
}
|
||||
var violations []violation
|
||||
|
||||
for _, ent := range entries {
|
||||
name := ent.Name()
|
||||
if ent.IsDir() || !strings.HasSuffix(name, ".go") || strings.HasSuffix(name, "_test.go") {
|
||||
continue
|
||||
}
|
||||
path := filepath.Join(wd, name)
|
||||
fset := token.NewFileSet()
|
||||
file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse %s: %v", path, err)
|
||||
}
|
||||
|
||||
for _, decl := range file.Decls {
|
||||
fn, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fn.Body == nil {
|
||||
continue
|
||||
}
|
||||
if !funcEmitsAgentMessageBroadcast(fn) {
|
||||
continue
|
||||
}
|
||||
if !funcInsertsIntoActivityLogs(fn) {
|
||||
violations = append(violations, violation{file: name, fn: fn.Name.Name})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(violations) > 0 {
|
||||
sort.Slice(violations, func(i, j int) bool {
|
||||
if violations[i].file != violations[j].file {
|
||||
return violations[i].file < violations[j].file
|
||||
}
|
||||
return violations[i].fn < violations[j].fn
|
||||
})
|
||||
var buf strings.Builder
|
||||
for _, v := range violations {
|
||||
buf.WriteString(" - ")
|
||||
buf.WriteString(v.file)
|
||||
buf.WriteString(":")
|
||||
buf.WriteString(v.fn)
|
||||
buf.WriteString("\n")
|
||||
}
|
||||
t.Errorf(`function(s) broadcast `+"`AGENT_MESSAGE`"+` without persisting to activity_logs:
|
||||
|
||||
%s
|
||||
This is the reno-stars data-loss regression class: live message
|
||||
visible to the user, but missing on reload because activity_log was
|
||||
never written. Every chat-bearing broadcast MUST be paired with:
|
||||
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, method,
|
||||
summary, response_body, status)
|
||||
VALUES ($1, 'a2a_receive', 'notify', $2, $3::jsonb, 'ok')
|
||||
|
||||
See activity.go:Notify and mcp_tools.go:toolSendMessageToUser for
|
||||
the canonical shapes. Don't add an allowlist entry without a
|
||||
documented reason — the canvas chat history is the source of truth
|
||||
and silently dropping messages is a P0 user trust break.`,
|
||||
buf.String())
|
||||
}
|
||||
}
|
||||
|
||||
// funcEmitsAgentMessageBroadcast walks fn.Body for any CallExpr that
|
||||
// looks like `*.BroadcastOnly(_, "AGENT_MESSAGE", _)`.
|
||||
func funcEmitsAgentMessageBroadcast(fn *ast.FuncDecl) bool {
|
||||
var found bool
|
||||
ast.Inspect(fn.Body, func(n ast.Node) bool {
|
||||
call, ok := n.(*ast.CallExpr)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
sel, ok := call.Fun.(*ast.SelectorExpr)
|
||||
if !ok || sel.Sel.Name != "BroadcastOnly" {
|
||||
return true
|
||||
}
|
||||
// BroadcastOnly(workspaceID, eventType, payload) — the second
|
||||
// arg is the event name. Match by string-literal value.
|
||||
if len(call.Args) < 2 {
|
||||
return true
|
||||
}
|
||||
lit, ok := call.Args[1].(*ast.BasicLit)
|
||||
if !ok || lit.Kind != token.STRING {
|
||||
return true
|
||||
}
|
||||
raw := lit.Value
|
||||
if unq, err := strconv.Unquote(raw); err == nil {
|
||||
raw = unq
|
||||
}
|
||||
if raw == "AGENT_MESSAGE" {
|
||||
found = true
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
return found
|
||||
}
|
||||
|
||||
// funcInsertsIntoActivityLogs walks fn.Body for any STRING BasicLit
|
||||
// whose body contains `INSERT INTO activity_logs` (the SQL literal
|
||||
// passed to ExecContext). Matches the substring rather than a strict
|
||||
// regex because we don't care about the exact INSERT shape here —
|
||||
// only that the function persists. Specific shape pinning lives in
|
||||
// the per-handler test (see TestMCPHandler_SendMessageToUser_*).
|
||||
func funcInsertsIntoActivityLogs(fn *ast.FuncDecl) bool {
|
||||
var found bool
|
||||
ast.Inspect(fn.Body, func(n ast.Node) bool {
|
||||
lit, ok := n.(*ast.BasicLit)
|
||||
if !ok || lit.Kind != token.STRING {
|
||||
return true
|
||||
}
|
||||
raw := lit.Value
|
||||
if unq, err := strconv.Unquote(raw); err == nil {
|
||||
raw = unq
|
||||
}
|
||||
if strings.Contains(raw, "INSERT INTO activity_logs") {
|
||||
found = true
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
return found
|
||||
}
|
||||
@@ -0,0 +1,468 @@
|
||||
package handlers
|
||||
|
||||
// class1_ast_gate_test.go — generic Class 1 leak gate per #2867 PR-A.
|
||||
//
|
||||
// What this gate prevents:
|
||||
// The tenant-hongming leak class — a handler iterates a YAML-derived
|
||||
// slice (ws.Children, sub_workspaces, etc.) and calls
|
||||
// `INSERT INTO workspaces` inside the loop body without first
|
||||
// checking whether a workspace with the same (parent_id, name) is
|
||||
// already there. Each call to such a handler doubles the tree.
|
||||
//
|
||||
// Why this is broader than TestCreateWorkspaceTree_CallsLookupBeforeInsert:
|
||||
// The existing gate is hard-coded to org_import.go's createWorkspaceTree.
|
||||
// That catches the specific function that triggered the original
|
||||
// incident — but a future handler written from scratch in a different
|
||||
// file would not be covered. This gate walks every production handler
|
||||
// .go file and applies a structural rule that does not depend on
|
||||
// function or file names.
|
||||
//
|
||||
// The rule (verbatim from #2867 PR-A):
|
||||
//
|
||||
// "No handler in handlers/ may iterate a slice (any RangeStmt) AND
|
||||
// call INSERT INTO workspaces inside the loop body without a
|
||||
// preceding SELECT id FROM workspaces WHERE name=$1 AND parent_id IS
|
||||
// NOT DISTINCT FROM $2 in the same function (== a lookupExistingChild
|
||||
// call, OR an ON CONFLICT clause baked into the same INSERT, OR an
|
||||
// explicit allowlist annotation)."
|
||||
//
|
||||
// Allowlist mechanism: a function whose body contains the exact comment
|
||||
// string `// class1-gate: idempotent-by-design` is treated as safe.
|
||||
// Use this only after writing a unit test that pins WHY the function
|
||||
// is safe. The annotation is intentionally awkward to type — it should
|
||||
// be rare.
|
||||
|
||||
import (
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/token"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// reINSERTWorkspaces matches the exact statement shape we care about.
|
||||
// Tightened (vs bytes.Index "INSERT INTO workspaces") so the audit
|
||||
// table `workspaces_audit` literal — or any other lookalike — does not
|
||||
// false-positive trigger this gate. The same regex is used in the
|
||||
// existing createWorkspaceTree gate (workspaces_insert_allowlist_test.go)
|
||||
// — keep them in sync if either changes.
|
||||
var reINSERTWorkspaces = regexp.MustCompile(`(?m)^\s*INSERT INTO workspaces\s*\(`)
|
||||
|
||||
// reONCONFLICT matches ON CONFLICT clauses anywhere in the same SQL
|
||||
// literal. An UPSERT (INSERT ... ON CONFLICT ... DO UPDATE) is
|
||||
// idempotent by definition, so the gate exempts it.
|
||||
var reONCONFLICT = regexp.MustCompile(`(?i)\bON CONFLICT\b`)
|
||||
|
||||
// gateAllowlistComment is the magic comment a function author writes
|
||||
// to opt out of this gate. Forces an explicit decision.
|
||||
const gateAllowlistComment = "// class1-gate: idempotent-by-design"
|
||||
|
||||
// preflightCallNames are function names whose presence in a function
|
||||
// body counts as "did a SELECT-by-(parent_id, name) preflight". Add
|
||||
// new names here as new preflight helpers are introduced. Keep the
|
||||
// list TIGHT — any sloppy addition weakens the gate.
|
||||
var preflightCallNames = map[string]bool{
|
||||
"lookupExistingChild": true,
|
||||
}
|
||||
|
||||
// TestClass1_NoUnpreflightedInsertInsideRange walks every production
|
||||
// .go file in this package, parses the AST, and fails the test if any
|
||||
// FuncDecl violates the rule above.
|
||||
//
|
||||
// Failure message must include: file path, function name, line of
|
||||
// the offending INSERT, line of the enclosing range, and a hint at
|
||||
// the three escape hatches (preflight call, ON CONFLICT, allowlist
|
||||
// comment).
|
||||
func TestClass1_NoUnpreflightedInsertInsideRange(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
|
||||
entries, err := os.ReadDir(wd)
|
||||
if err != nil {
|
||||
t.Fatalf("readdir %s: %v", wd, err)
|
||||
}
|
||||
|
||||
type violation struct {
|
||||
file string
|
||||
fn string
|
||||
insertLine int
|
||||
rangeLine int
|
||||
}
|
||||
var violations []violation
|
||||
scanned := 0
|
||||
|
||||
for _, e := range entries {
|
||||
name := e.Name()
|
||||
if e.IsDir() || !strings.HasSuffix(name, ".go") {
|
||||
continue
|
||||
}
|
||||
if strings.HasSuffix(name, "_test.go") {
|
||||
continue
|
||||
}
|
||||
path := filepath.Join(wd, name)
|
||||
src, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("read %s: %v", path, err)
|
||||
}
|
||||
fset := token.NewFileSet()
|
||||
file, err := parser.ParseFile(fset, name, src, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse %s: %v", path, err)
|
||||
}
|
||||
scanned++
|
||||
|
||||
// Walk every function declaration and apply the rule.
|
||||
for _, decl := range file.Decls {
|
||||
fd, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fd.Body == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Allowlist: skip if the function body contains the magic
|
||||
// comment. We check via the source range of the function
|
||||
// — comments inside the body are in file.Comments and
|
||||
// must overlap the function's Pos/End range.
|
||||
if functionHasAllowlistComment(file, fd) {
|
||||
continue
|
||||
}
|
||||
|
||||
// First pass: locate every INSERT INTO workspaces literal
|
||||
// in this function. We treat each such literal as a
|
||||
// candidate violation and try to clear it via the rules.
|
||||
candidates := findInsertWorkspacesLiterals(fd, src, fset)
|
||||
if len(candidates) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Has the function called a preflight helper? Single
|
||||
// pass — if any preflight name appears, every INSERT in
|
||||
// the function is considered preflighted. This is more
|
||||
// permissive than position-aware (preflight could be
|
||||
// AFTER the INSERT and still satisfy the gate), but the
|
||||
// existing org_import.go gate already pins the position
|
||||
// invariant for createWorkspaceTree, and a function that
|
||||
// preflights AFTER inserting would fail the position
|
||||
// gate in a separate test.
|
||||
hasPreflight := functionCallsAny(fd, preflightCallNames)
|
||||
|
||||
for _, c := range candidates {
|
||||
if c.hasONCONFLICT {
|
||||
continue
|
||||
}
|
||||
if hasPreflight {
|
||||
continue
|
||||
}
|
||||
if c.enclosingRangeLine == 0 {
|
||||
// INSERT not inside any RangeStmt — single-shot,
|
||||
// not the bug pattern.
|
||||
continue
|
||||
}
|
||||
violations = append(violations, violation{
|
||||
file: name,
|
||||
fn: fd.Name.Name,
|
||||
insertLine: c.insertLine,
|
||||
rangeLine: c.enclosingRangeLine,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if scanned == 0 {
|
||||
t.Fatal("scanned 0 .go files — wrong working directory? gate would always pass")
|
||||
}
|
||||
|
||||
if len(violations) > 0 {
|
||||
// Stable sort so the failure message is deterministic across
|
||||
// reruns.
|
||||
sort.Slice(violations, func(i, j int) bool {
|
||||
if violations[i].file != violations[j].file {
|
||||
return violations[i].file < violations[j].file
|
||||
}
|
||||
return violations[i].insertLine < violations[j].insertLine
|
||||
})
|
||||
var b strings.Builder
|
||||
b.WriteString("Class 1 leak gate (#2867 PR-A) — these handler functions iterate a slice and INSERT INTO workspaces inside the loop body without a (parent_id, name) preflight.\n\n")
|
||||
b.WriteString("This is the bug shape that triggered the tenant-hongming leak (TeamHandler.Expand re-inserting the entire sub_workspaces tree on every call). To fix any reported violation, choose ONE of:\n")
|
||||
b.WriteString(" 1. Call h.lookupExistingChild(ctx, name, parentID) before the INSERT and skip the INSERT when it returns existing=true. (preferred)\n")
|
||||
b.WriteString(" 2. Use INSERT ... ON CONFLICT ... DO ... (idempotent UPSERT, like registry.go).\n")
|
||||
b.WriteString(" 3. Annotate the function with a `// class1-gate: idempotent-by-design` comment AND a unit test that pins why the function is structurally idempotent. (rare; require code review)\n\n")
|
||||
b.WriteString("Violations:\n")
|
||||
for _, v := range violations {
|
||||
b.WriteString(" - ")
|
||||
b.WriteString(v.file)
|
||||
b.WriteString(":")
|
||||
b.WriteString(itoa(v.insertLine))
|
||||
b.WriteString(" — function ")
|
||||
b.WriteString(v.fn)
|
||||
b.WriteString("() INSERTs inside RangeStmt at line ")
|
||||
b.WriteString(itoa(v.rangeLine))
|
||||
b.WriteString("\n")
|
||||
}
|
||||
t.Fatal(b.String())
|
||||
}
|
||||
}
|
||||
|
||||
func itoa(n int) string {
|
||||
// Avoid strconv import for one call site — keeps the test focused.
|
||||
if n == 0 {
|
||||
return "0"
|
||||
}
|
||||
neg := n < 0
|
||||
if neg {
|
||||
n = -n
|
||||
}
|
||||
var buf [20]byte
|
||||
i := len(buf)
|
||||
for n > 0 {
|
||||
i--
|
||||
buf[i] = byte('0' + n%10)
|
||||
n /= 10
|
||||
}
|
||||
if neg {
|
||||
i--
|
||||
buf[i] = '-'
|
||||
}
|
||||
return string(buf[i:])
|
||||
}
|
||||
|
||||
// candidateInsert holds the per-INSERT facts needed to decide whether
|
||||
// the gate fires.
|
||||
type candidateInsert struct {
|
||||
insertLine int
|
||||
hasONCONFLICT bool
|
||||
enclosingRangeLine int // 0 means not inside any range
|
||||
}
|
||||
|
||||
// findInsertWorkspacesLiterals walks fd's body and returns one
|
||||
// candidateInsert per INSERT INTO workspaces string literal.
|
||||
//
|
||||
// Position-based detection: collect every RangeStmt's body span first,
|
||||
// then for each INSERT literal check if its position is inside any
|
||||
// span. ast.Inspect's nil-call ordering does NOT give per-node pop
|
||||
// semantics, so a stack-based approach against ast.Inspect would
|
||||
// silently miscount. Position spans are deterministic and easy to
|
||||
// reason about.
|
||||
func findInsertWorkspacesLiterals(fd *ast.FuncDecl, src []byte, fset *token.FileSet) []candidateInsert {
|
||||
var out []candidateInsert
|
||||
|
||||
type span struct{ start, end token.Pos }
|
||||
var ranges []span
|
||||
ast.Inspect(fd.Body, func(n ast.Node) bool {
|
||||
rs, ok := n.(*ast.RangeStmt)
|
||||
if !ok || rs.Body == nil {
|
||||
return true
|
||||
}
|
||||
ranges = append(ranges, span{rs.Body.Lbrace, rs.Body.Rbrace})
|
||||
return true
|
||||
})
|
||||
|
||||
enclosingRangeLineFor := func(p token.Pos) int {
|
||||
// Pick the innermost enclosing range — i.e., the one with the
|
||||
// largest start that still covers p. Innermost is the one
|
||||
// whose body actually contains the INSERT, which is the line
|
||||
// most useful in a violation message.
|
||||
bestStart := token.NoPos
|
||||
bestLine := 0
|
||||
for _, s := range ranges {
|
||||
if p > s.start && p < s.end && s.start > bestStart {
|
||||
bestStart = s.start
|
||||
bestLine = fset.Position(s.start).Line
|
||||
}
|
||||
}
|
||||
return bestLine
|
||||
}
|
||||
|
||||
ast.Inspect(fd.Body, func(n ast.Node) bool {
|
||||
bl, ok := n.(*ast.BasicLit)
|
||||
if !ok || bl.Kind != token.STRING {
|
||||
return true
|
||||
}
|
||||
// Strip surrounding backticks/quotes — value includes them.
|
||||
lit := bl.Value
|
||||
if len(lit) >= 2 {
|
||||
lit = lit[1 : len(lit)-1]
|
||||
}
|
||||
if !reINSERTWorkspaces.MatchString(lit) {
|
||||
return true
|
||||
}
|
||||
out = append(out, candidateInsert{
|
||||
insertLine: fset.Position(bl.Pos()).Line,
|
||||
hasONCONFLICT: reONCONFLICT.MatchString(lit),
|
||||
enclosingRangeLine: enclosingRangeLineFor(bl.Pos()),
|
||||
})
|
||||
return true
|
||||
})
|
||||
return out
|
||||
}
|
||||
|
||||
// functionCallsAny returns true if any CallExpr in fd's body has a
|
||||
// function name (either a SelectorExpr Sel.Name or an Ident name)
|
||||
// matching a key in names.
|
||||
func functionCallsAny(fd *ast.FuncDecl, names map[string]bool) bool {
|
||||
found := false
|
||||
ast.Inspect(fd.Body, func(n ast.Node) bool {
|
||||
if found {
|
||||
return false
|
||||
}
|
||||
ce, ok := n.(*ast.CallExpr)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
switch fun := ce.Fun.(type) {
|
||||
case *ast.Ident:
|
||||
if names[fun.Name] {
|
||||
found = true
|
||||
return false
|
||||
}
|
||||
case *ast.SelectorExpr:
|
||||
if names[fun.Sel.Name] {
|
||||
found = true
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
return found
|
||||
}
|
||||
|
||||
// functionHasAllowlistComment returns true if the function body
|
||||
// (between fd.Body.Lbrace and fd.Body.Rbrace) contains a comment
|
||||
// equal to gateAllowlistComment.
|
||||
func functionHasAllowlistComment(file *ast.File, fd *ast.FuncDecl) bool {
|
||||
if fd.Body == nil {
|
||||
return false
|
||||
}
|
||||
start := fd.Body.Lbrace
|
||||
end := fd.Body.Rbrace
|
||||
for _, cg := range file.Comments {
|
||||
for _, c := range cg.List {
|
||||
if c.Pos() < start || c.Pos() > end {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(c.Text) == gateAllowlistComment {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// TestClass1_GateFiresOnSyntheticBuggySource — proves the gate actually
|
||||
// catches the bug shape it's named after. Without this, a regression
|
||||
// to "always pass" would not be noticed until the leak shipped again.
|
||||
// Per memory feedback_assert_exact_not_substring.md: tighten the test
|
||||
// + verify it FAILS on old-shape source before merging.
|
||||
func TestClass1_GateFiresOnSyntheticBuggySource(t *testing.T) {
|
||||
const buggySrc = `package handlers
|
||||
|
||||
import "context"
|
||||
|
||||
type fakeDB struct{}
|
||||
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
|
||||
|
||||
func buggyExpand(db fakeDB, ctx context.Context, children []string) {
|
||||
for _, child := range children {
|
||||
// Bug shape: INSERT inside the range body, no preflight.
|
||||
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", child)
|
||||
}
|
||||
}
|
||||
`
|
||||
fset := token.NewFileSet()
|
||||
file, err := parser.ParseFile(fset, "buggy.go", buggySrc, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse synthetic source: %v", err)
|
||||
}
|
||||
for _, decl := range file.Decls {
|
||||
fd, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fd.Name.Name != "buggyExpand" {
|
||||
continue
|
||||
}
|
||||
candidates := findInsertWorkspacesLiterals(fd, []byte(buggySrc), fset)
|
||||
if len(candidates) != 1 {
|
||||
t.Fatalf("expected 1 INSERT literal, got %d", len(candidates))
|
||||
}
|
||||
c := candidates[0]
|
||||
if c.enclosingRangeLine == 0 {
|
||||
t.Errorf("synthetic INSERT inside `for _, child := range` should be detected as enclosed by range, got enclosingRangeLine=0 — gate would miss the bug shape")
|
||||
}
|
||||
if c.hasONCONFLICT {
|
||||
t.Errorf("synthetic INSERT has no ON CONFLICT, gate falsely treated it as idempotent")
|
||||
}
|
||||
if functionCallsAny(fd, preflightCallNames) {
|
||||
t.Errorf("synthetic function does not call lookupExistingChild — gate falsely treated it as preflighted")
|
||||
}
|
||||
// All three guards say the gate WOULD fire. Pass.
|
||||
return
|
||||
}
|
||||
t.Fatal("buggyExpand FuncDecl not found in synthetic source")
|
||||
}
|
||||
|
||||
// TestClass1_GateAllowsONCONFLICT — pins that an INSERT with ON
|
||||
// CONFLICT inside a range body is NOT flagged. registry.go's
|
||||
// upsert pattern is the prod example.
|
||||
func TestClass1_GateAllowsONCONFLICT(t *testing.T) {
|
||||
const safeSrc = `package handlers
|
||||
|
||||
import "context"
|
||||
|
||||
type fakeDB struct{}
|
||||
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
|
||||
|
||||
func upsertLoop(db fakeDB, ctx context.Context, children []string) {
|
||||
for _, child := range children {
|
||||
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2) ON CONFLICT (id) DO UPDATE SET name = $2`" + `, "x", child)
|
||||
}
|
||||
}
|
||||
`
|
||||
fset := token.NewFileSet()
|
||||
file, _ := parser.ParseFile(fset, "safe.go", safeSrc, parser.ParseComments)
|
||||
for _, decl := range file.Decls {
|
||||
fd, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fd.Name.Name != "upsertLoop" {
|
||||
continue
|
||||
}
|
||||
candidates := findInsertWorkspacesLiterals(fd, []byte(safeSrc), fset)
|
||||
if len(candidates) != 1 {
|
||||
t.Fatalf("expected 1 candidate, got %d", len(candidates))
|
||||
}
|
||||
if !candidates[0].hasONCONFLICT {
|
||||
t.Errorf("ON CONFLICT clause should be detected, was missed — gate would falsely flag idempotent UPSERTs")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestClass1_GateAllowsAllowlistAnnotation — pins the escape hatch
|
||||
// works. Annotated functions are skipped at the FuncDecl level.
|
||||
func TestClass1_GateAllowsAllowlistAnnotation(t *testing.T) {
|
||||
const annotatedSrc = `package handlers
|
||||
|
||||
import "context"
|
||||
|
||||
type fakeDB struct{}
|
||||
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
|
||||
|
||||
func intentionallyUnpreflighted(db fakeDB, ctx context.Context, children []string) {
|
||||
// class1-gate: idempotent-by-design
|
||||
for _, child := range children {
|
||||
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", child)
|
||||
}
|
||||
}
|
||||
`
|
||||
fset := token.NewFileSet()
|
||||
file, _ := parser.ParseFile(fset, "annotated.go", annotatedSrc, parser.ParseComments)
|
||||
for _, decl := range file.Decls {
|
||||
fd, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fd.Name.Name != "intentionallyUnpreflighted" {
|
||||
continue
|
||||
}
|
||||
if !functionHasAllowlistComment(file, fd) {
|
||||
t.Error("allowlist comment should be detected for the intentionallyUnpreflighted function — escape hatch not working")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -109,6 +109,12 @@ curl -fsS -X POST "{{PLATFORM_URL}}/registry/register" \
|
||||
"version": "0.1.0"
|
||||
}
|
||||
}'
|
||||
|
||||
# Need help?
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/external-agent-registration
|
||||
# Common errors:
|
||||
# • 401 / 403 on register — WORKSPACE_AUTH_TOKEN must be the value
|
||||
# shown at workspace create. Tokens are shown only once.
|
||||
`
|
||||
|
||||
// externalChannelTemplate — Claude Code channel plugin install + .env. For
|
||||
@@ -172,6 +178,18 @@ claude --dangerously-load-development-channels \
|
||||
# Multi-workspace: comma-separate IDs and tokens (same order). See
|
||||
# https://github.com/Molecule-AI/molecule-mcp-claude-channel for
|
||||
# pairing flow, push-mode upgrade, and v0.2 roadmap.
|
||||
|
||||
# Need help?
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/claude-code-channel-plugin
|
||||
# Common errors:
|
||||
# • "plugin not installed" — run /plugin marketplace add then
|
||||
# /plugin install lines above; /reload-plugins or restart.
|
||||
# • "not on the approved channels allowlist" — custom channels need
|
||||
# --dangerously-load-development-channels; team/enterprise orgs
|
||||
# need admin to set channelsEnabled + allowedChannelPlugins.
|
||||
# • "Inbound messages not arriving" — stderr should show
|
||||
# "molecule channel: connected — watching N workspace(s)";
|
||||
# verify ~/.claude/channels/molecule/.env has PLATFORM_URL + token.
|
||||
`
|
||||
|
||||
// externalUniversalMcpTemplate — runtime-agnostic standalone path.
|
||||
@@ -198,6 +216,13 @@ const externalUniversalMcpTemplate = `# Universal MCP — standalone register +
|
||||
# Pair with the Claude Code or Python SDK tab if your runtime needs
|
||||
# inbound A2A delivery (canvas messages → agent conversation turns).
|
||||
|
||||
# Requires Python >= 3.11. On 3.10 or older pip says
|
||||
# "Could not find a version that satisfies the requirement
|
||||
# (from versions: none)" — the wheel's requires_python pin filters
|
||||
# the only available artifact before pip even attempts install.
|
||||
# Upgrade the interpreter (brew install python@3.12 / apt install
|
||||
# python3.12 / etc.) or use a 3.11+ venv.
|
||||
|
||||
# 1. Install the workspace runtime wheel:
|
||||
pip install molecule-ai-workspace-runtime
|
||||
|
||||
@@ -217,6 +242,17 @@ claude mcp add molecule -s user -- env \
|
||||
#
|
||||
# Origin/WAF handling is built into the wheel — no manual headers
|
||||
# needed when calling tools through the MCP server.
|
||||
|
||||
# Need help?
|
||||
# Where to install: https://pypi.org/project/molecule-ai-workspace-runtime/
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/mcp-server-setup
|
||||
# Common errors:
|
||||
# • "Tools not appearing in your agent" — run ` + "`claude mcp list`" + ` (or
|
||||
# your runtime's equivalent) and confirm the molecule entry. If
|
||||
# missing, re-run the ` + "`claude mcp add`" + ` line above.
|
||||
# • "ConnectionRefused / DNS error on first call" — PLATFORM_URL must
|
||||
# include the scheme (https://) and have NO trailing slash. Verify
|
||||
# with: curl ${PLATFORM_URL}/healthz
|
||||
`
|
||||
|
||||
// externalPythonTemplate uses molecule-sdk-python's RemoteAgentClient +
|
||||
@@ -255,6 +291,15 @@ async def main():
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
||||
# Need help?
|
||||
# Where to install: https://pypi.org/project/molecule-ai-workspace-runtime/
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/external-agent-registration
|
||||
# Common errors:
|
||||
# • 401 from /heartbeat — AUTH_TOKEN expired or wrong workspace_id.
|
||||
# Tokens shown only once at create time; re-create to get a fresh one.
|
||||
# • AGENT_URL not reachable from platform — public HTTPS URL required
|
||||
# for inbound A2A. Use ngrok or Cloudflare Tunnel if behind NAT.
|
||||
`
|
||||
|
||||
// externalHermesChannelTemplate — install snippet for operators whose
|
||||
@@ -322,6 +367,16 @@ hermes gateway --replace
|
||||
#
|
||||
# Source + issue tracker:
|
||||
# https://github.com/Molecule-AI/hermes-channel-molecule
|
||||
|
||||
# Need help?
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/external-agent-registration
|
||||
# Common errors:
|
||||
# • Gateway start failure — tail ~/.hermes/gateway.log. YAML
|
||||
# duplicate-key in config.yaml is the most common cause; the
|
||||
# gateway: block must appear exactly once.
|
||||
# • Plugin not discovered after install — pip show hermes-channel-molecule
|
||||
# to confirm install. Some hermes builds need ` + "`hermes plugin reload`" + `
|
||||
# before the new platform_plugins entry takes effect.
|
||||
`
|
||||
|
||||
// externalCodexTemplate — for operators whose external agent is a
|
||||
@@ -403,6 +458,18 @@ disown
|
||||
# available to the agent, and the bridge wakes a non-interactive
|
||||
# codex turn for any inbound canvas/peer message:
|
||||
codex
|
||||
|
||||
# Need help?
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/mcp-server-setup
|
||||
# Common errors:
|
||||
# • [mcp_servers.molecule] not loaded — codex must be ≥ 0.57.
|
||||
# Check with ` + "`codex --version`" + `; upgrade via npm install -g @openai/codex@latest.
|
||||
# • TOML parse error after re-running setup — TOML rejects duplicate
|
||||
# [mcp_servers.molecule] tables. Open ~/.codex/config.toml and
|
||||
# remove the old block before pasting the new one.
|
||||
# • Canvas messages don't wake codex — step 3 (codex-channel-molecule
|
||||
# bridge daemon) is required for inbound push. Check
|
||||
# pgrep -f codex-channel-molecule and tail ~/.codex-channel-molecule/daemon.log.
|
||||
`
|
||||
|
||||
// externalOpenClawTemplate — for operators whose external agent is an
|
||||
@@ -464,4 +531,13 @@ disown
|
||||
|
||||
# 5. Run an agent turn — molecule tools are now available:
|
||||
openclaw agent --message "list my peers"
|
||||
|
||||
# Need help?
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/mcp-server-setup
|
||||
# Common errors:
|
||||
# • Gateway not starting — tail ~/.openclaw/gateway.log. The loopback
|
||||
# bind requires :18789 to be free; check with ` + "`lsof -iTCP:18789`" + `.
|
||||
# • ` + "`openclaw mcp set`" + ` rejected — the heredoc generates JSON;
|
||||
# verify with ` + "`jq < ~/.openclaw/mcp/molecule.json`" + ` and re-run
|
||||
# ` + "`openclaw mcp set`" + ` if the file is malformed.
|
||||
`
|
||||
|
||||
@@ -11,18 +11,21 @@ import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"errors"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// newMCPHandler is a test helper that constructs an MCPHandler backed by the
|
||||
// sqlmock DB set up by setupTestDB.
|
||||
// sqlmock DB set up by setupTestDB. Uses newTestBroadcaster so handlers
|
||||
// that BroadcastOnly (send_message_to_user, etc.) don't nil-panic on the
|
||||
// hub — events.NewBroadcaster(nil) crashes inside hub.Broadcast.
|
||||
func newMCPHandler(t *testing.T) (*MCPHandler, sqlmock.Sqlmock) {
|
||||
t.Helper()
|
||||
mock := setupTestDB(t)
|
||||
h := NewMCPHandler(db.DB, events.NewBroadcaster(nil))
|
||||
h := NewMCPHandler(db.DB, newTestBroadcaster())
|
||||
return h, mock
|
||||
}
|
||||
|
||||
@@ -628,6 +631,170 @@ func TestMCPHandler_SendMessageToUser_Blocked_WhenEnvNotSet(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestMCPHandler_SendMessageToUser_DBErrorLogsAndStill200s pins the
|
||||
// "best-effort persistence" contract: when the activity_log INSERT
|
||||
// fails (DB hiccup, constraint violation, transient connection drop),
|
||||
// the tool MUST still return success to the agent because the WS
|
||||
// broadcast already succeeded — the user has seen the message.
|
||||
//
|
||||
// This matches /notify (activity.go) behavior. Returning an error
|
||||
// here would cause the agent to retry and re-broadcast, double-
|
||||
// rendering the message in the user's live chat panel for every
|
||||
// retry until the DB recovers.
|
||||
func TestMCPHandler_SendMessageToUser_DBErrorLogsAndStill200s(t *testing.T) {
|
||||
t.Setenv("MOLECULE_MCP_ALLOW_SEND_MESSAGE", "true")
|
||||
h, mock := newMCPHandler(t)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-err").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("CEO Ryan PC"))
|
||||
|
||||
// INSERT fails — must NOT abort the tool response.
|
||||
mock.ExpectExec(`INSERT INTO activity_logs.*'a2a_receive'.*'notify'`).
|
||||
WillReturnError(errors.New("transient db error"))
|
||||
|
||||
w := mcpPost(t, h, "ws-err", map[string]interface{}{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 100,
|
||||
"method": "tools/call",
|
||||
"params": map[string]interface{}{
|
||||
"name": "send_message_to_user",
|
||||
"arguments": map[string]interface{}{
|
||||
"message": "should not be lost from the live chat",
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
var resp mcpResponse
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("response was not valid JSON-RPC: %v", err)
|
||||
}
|
||||
// Tool response is success — INSERT failure logged, broadcast
|
||||
// already succeeded.
|
||||
if resp.Error != nil {
|
||||
t.Errorf("tool response should be success on DB error (broadcast won), got JSON-RPC error: %+v", resp.Error)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("expected DB calls in order: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMCPHandler_SendMessageToUser_ResponseBodyShape pins the
|
||||
// response_body JSON shape stored in activity_logs. This shape MUST
|
||||
// match what the canvas hydrater (extractResponseText in
|
||||
// historyHydration.ts) reads — specifically `{"result": "<text>"}`.
|
||||
// Any drift in the JSON shape silently breaks chat history without
|
||||
// failing the INSERT.
|
||||
//
|
||||
// Caught the same drift class flagged in
|
||||
// feedback_assert_exact_not_substring.md: a substring match on
|
||||
// "result" would pass even if the field were renamed; we assert the
|
||||
// exact JSON shape.
|
||||
func TestMCPHandler_SendMessageToUser_ResponseBodyShape(t *testing.T) {
|
||||
t.Setenv("MOLECULE_MCP_ALLOW_SEND_MESSAGE", "true")
|
||||
h, mock := newMCPHandler(t)
|
||||
|
||||
const userMessage = "Hi there from the agent"
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-shape").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("CEO Ryan PC"))
|
||||
|
||||
// Capture the response_body argument and assert its exact shape.
|
||||
mock.ExpectExec(`INSERT INTO activity_logs.*'a2a_receive'.*'notify'`).
|
||||
WithArgs(
|
||||
"ws-shape",
|
||||
sqlmock.AnyArg(), // summary
|
||||
// The response_body MUST be JSON `{"result": "<message>"}`.
|
||||
// Any other shape (e.g., wrapping in a Task object) breaks
|
||||
// the canvas hydrater's `body.result` extractor.
|
||||
`{"result":"`+userMessage+`"}`,
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
w := mcpPost(t, h, "ws-shape", map[string]interface{}{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 101,
|
||||
"method": "tools/call",
|
||||
"params": map[string]interface{}{
|
||||
"name": "send_message_to_user",
|
||||
"arguments": map[string]interface{}{
|
||||
"message": userMessage,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if w.Code != 200 {
|
||||
t.Fatalf("expected 200, got %d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("response_body shape drift — would silently break canvas chat history: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMCPHandler_SendMessageToUser_PersistsToActivityLog pins the fix
|
||||
// for the reno-stars / CEO Ryan PC chat-history data-loss bug:
|
||||
// external claude-code agents using molecule-mcp's send_message_to_user
|
||||
// tool route through THIS handler (not the HTTP /notify endpoint),
|
||||
// and the handler used to broadcast WS only — visible live, gone on
|
||||
// reload because nothing wrote to activity_logs.
|
||||
//
|
||||
// Pins:
|
||||
// - INSERT happens on the success path (broadcast + DB write).
|
||||
// - INSERT shape mirrors the HTTP /notify handler exactly:
|
||||
// activity_type='a2a_receive', method='notify', request_body NULL,
|
||||
// response_body={"result": message}, status='ok'. The canvas
|
||||
// hydration query (`type=a2a_receive&source=canvas`) treats
|
||||
// both writers as the same shape — drift here means the bug
|
||||
// re-surfaces silently.
|
||||
func TestMCPHandler_SendMessageToUser_PersistsToActivityLog(t *testing.T) {
|
||||
t.Setenv("MOLECULE_MCP_ALLOW_SEND_MESSAGE", "true")
|
||||
h, mock := newMCPHandler(t)
|
||||
|
||||
// Workspace lookup — the handler verifies the workspace exists
|
||||
// before it does anything else. Returning a name lets the
|
||||
// broadcast payload populate; the test doesn't assert on the
|
||||
// broadcast (no observable WS in this fake), only on the DB.
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-msg").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("CEO Ryan PC"))
|
||||
|
||||
// The persistence INSERT — pin the exact shape so a future
|
||||
// refactor that switches columns or drops `method='notify'`
|
||||
// breaks the test loud, not silently. Match by regex on the
|
||||
// table + activity_type + method literals.
|
||||
mock.ExpectExec(`INSERT INTO activity_logs.*'a2a_receive'.*'notify'`).
|
||||
WithArgs(
|
||||
"ws-msg",
|
||||
sqlmock.AnyArg(), // summary "Agent message: ..."
|
||||
sqlmock.AnyArg(), // response_body JSON
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
w := mcpPost(t, h, "ws-msg", map[string]interface{}{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 99,
|
||||
"method": "tools/call",
|
||||
"params": map[string]interface{}{
|
||||
"name": "send_message_to_user",
|
||||
"arguments": map[string]interface{}{
|
||||
"message": "Hello, this should persist!",
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
var resp mcpResponse
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("response was not valid JSON-RPC: %v\nbody=%s", err, w.Body.String())
|
||||
}
|
||||
if resp.Error != nil {
|
||||
t.Errorf("unexpected JSON-RPC error: %+v", resp.Error)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("DB expectations not met (INSERT missing → reno-stars data-loss regression): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Parse error
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -344,6 +344,43 @@ func (h *MCPHandler) toolSendMessageToUser(ctx context.Context, workspaceID stri
|
||||
"name": wsName,
|
||||
})
|
||||
|
||||
// Persist to activity_logs so chat history loaders surface this
|
||||
// message after a page reload. Pre-fix (reno-stars 2026-05-05),
|
||||
// the MCP-bridge variant of send_message_to_user broadcast WS
|
||||
// only — visible live, gone on reload — while the HTTP /notify
|
||||
// sibling already had this fix (activity.go:535). External
|
||||
// claude-code agents using molecule-mcp's send_message_to_user
|
||||
// tool route through THIS handler, not /notify, so they were
|
||||
// hitting the unfixed path.
|
||||
//
|
||||
// Shape mirrors activity.go's Notify handler exactly so the
|
||||
// canvas chat-history hydration treats both the same:
|
||||
// - activity_type='a2a_receive' joins the source=canvas filter
|
||||
// - source_id is omitted → defaults to NULL ("canvas-source")
|
||||
// - method='notify' tags it as a push (vs a real A2A receive)
|
||||
// - request_body=NULL so the loader doesn't draw a duplicate
|
||||
// "user" bubble
|
||||
// - response_body={"result": "<text>"} feeds extractResponseText
|
||||
// directly
|
||||
//
|
||||
// Errors are log-only — the broadcast already returned ok to the
|
||||
// caller, the user has seen the message, and the persistence
|
||||
// failure mode (DB hiccup) shouldn't block the tool call. The
|
||||
// downside is the same as pre-fix: message vanishes on reload
|
||||
// after a transient DB error. Log it so operators have a signal.
|
||||
respPayload := map[string]interface{}{"result": message}
|
||||
respJSON, _ := json.Marshal(respPayload)
|
||||
preview := message
|
||||
if len(preview) > 80 {
|
||||
preview = preview[:80] + "…"
|
||||
}
|
||||
if _, err := h.database.ExecContext(ctx, `
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, method, summary, response_body, status)
|
||||
VALUES ($1, 'a2a_receive', 'notify', $2, $3::jsonb, 'ok')
|
||||
`, workspaceID, "Agent message: "+preview, string(respJSON)); err != nil {
|
||||
log.Printf("MCP send_message_to_user: failed to persist for %s: %v", workspaceID, err)
|
||||
}
|
||||
|
||||
return "Message sent.", nil
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
@@ -79,7 +80,16 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
}
|
||||
}
|
||||
|
||||
ctxLookup := context.Background()
|
||||
// 5s timeout bounds the lookup independently of any HTTP request
|
||||
// context. createWorkspaceTree runs in goroutines spawned from the
|
||||
// /org/import handler, so plumbing the request context here would
|
||||
// cascade-cancel into provisionWorkspaceAuto and abort in-flight
|
||||
// EC2 provisioning if the client disconnected mid-import — that's
|
||||
// the wrong behaviour. A short bounded timeout protects the
|
||||
// per-row SELECT against a wedged DB without taking the
|
||||
// drop-everything-on-disconnect tradeoff.
|
||||
ctxLookup, cancelLookup := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancelLookup()
|
||||
// Idempotency: if a workspace with the same (parent_id, name) already
|
||||
// exists, skip the INSERT + canvas_layouts + broadcast + provisioning.
|
||||
// This is what makes /org/import safe to call multiple times — the
|
||||
@@ -91,6 +101,15 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
// (parent exists, some children missing) backfill the missing children
|
||||
// instead of either no-op'ing the whole subtree or duplicating the
|
||||
// existing children.
|
||||
//
|
||||
// /org/import is ADDITIVE-ONLY, never destructive. Children present
|
||||
// in the existing tree but absent from the new template are
|
||||
// preserved (no DELETE on diff). Skip-path also does NOT propagate
|
||||
// updates to existing nodes — a re-import that adds an
|
||||
// initial_memory or schedule to an existing workspace is silently
|
||||
// dropped (the function bypasses seedInitialMemories, schedule SQL,
|
||||
// channel config for skipped rows). To force-update an existing
|
||||
// tree, delete and re-import or use a future /org/sync route.
|
||||
existingID, existing, lookupErr := h.lookupExistingChild(ctxLookup, ws.Name, parentID)
|
||||
if lookupErr != nil {
|
||||
return fmt.Errorf("idempotency check for %s: %w", ws.Name, lookupErr)
|
||||
@@ -605,6 +624,12 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
//
|
||||
// On sql.ErrNoRows: returns ("", false, nil) — caller should INSERT.
|
||||
// On a real DB error: returns ("", false, err) — caller propagates.
|
||||
//
|
||||
// errors.Is is wrap-safe — a future caller wrapping the error
|
||||
// (database/sql can wrap driver errors with %w in some setups) would
|
||||
// silently break a `err == sql.ErrNoRows` equality check, causing the
|
||||
// no-rows path to fall through to the "real DB error" branch and
|
||||
// abort the import. errors.Is unwraps.
|
||||
func (h *OrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) {
|
||||
var existingID string
|
||||
err := db.DB.QueryRowContext(ctx, `
|
||||
@@ -614,7 +639,7 @@ func (h *OrgHandler) lookupExistingChild(ctx context.Context, name string, paren
|
||||
AND status != 'removed'
|
||||
LIMIT 1
|
||||
`, name, parentID).Scan(&existingID)
|
||||
if err == sql.ErrNoRows {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return "", false, nil
|
||||
}
|
||||
if err != nil {
|
||||
|
||||
@@ -2,7 +2,9 @@ package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/token"
|
||||
@@ -123,6 +125,36 @@ func TestLookupExistingChild_DBError_Propagates(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestLookupExistingChild_WrappedNoRows_TreatedAsNotFound — pins the
|
||||
// wrap-safety of the errors.Is(err, sql.ErrNoRows) check. The previous
|
||||
// `err == sql.ErrNoRows` equality would fall through to the
|
||||
// "real DB error" branch on a wrapped no-rows error, aborting the
|
||||
// import for what is in fact the no-rows happy path. driver/sql
|
||||
// wrapping is currently a non-issue but a future driver change or a
|
||||
// caller that wraps the result via fmt.Errorf("…: %w", err) would
|
||||
// silently break the equality check. errors.Is unwraps.
|
||||
func TestLookupExistingChild_WrappedNoRows_TreatedAsNotFound(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
parent := "parent-1"
|
||||
wrapped := fmt.Errorf("driver-wrapped: %w", sql.ErrNoRows)
|
||||
mock.ExpectQuery(`SELECT id FROM workspaces`).
|
||||
WithArgs("Alpha", &parent).
|
||||
WillReturnError(wrapped)
|
||||
|
||||
h := &OrgHandler{}
|
||||
id, found, err := h.lookupExistingChild(context.Background(), "Alpha", &parent)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("expected wrapped no-rows to be treated as not-found (err=nil), got: %v", err)
|
||||
}
|
||||
if found {
|
||||
t.Errorf("expected found=false on wrapped no-rows, got found=true")
|
||||
}
|
||||
if id != "" {
|
||||
t.Errorf("expected empty id on wrapped no-rows, got %q", id)
|
||||
}
|
||||
}
|
||||
|
||||
// workspacesInsertRE matches a SQL literal that begins (after optional
|
||||
// leading whitespace) with `INSERT INTO workspaces` followed by `(` —
|
||||
// requiring the open-paren rules out lookalikes like
|
||||
|
||||
@@ -425,7 +425,16 @@ def _build_initialize_result() -> dict:
|
||||
"tools": {"listChanged": False},
|
||||
"experimental": {"claude/channel": {}},
|
||||
},
|
||||
"serverInfo": {"name": "a2a-delegation", "version": "1.0.0"},
|
||||
# Identifier convention: this server is what users register with
|
||||
# `claude mcp add molecule -- molecule-mcp` (and similar across
|
||||
# other MCP hosts), so the canonical name is "molecule". Earlier
|
||||
# versions reported "a2a-delegation" — accurate to the original
|
||||
# purpose but a mismatch with how operators actually name it.
|
||||
# Mismatch is harmless on tool routing (all MCP hosts dispatch
|
||||
# by the user-supplied registration name, NOT serverInfo.name)
|
||||
# but matters for any future Claude Code allowlist that gates
|
||||
# channel push by hardcoded server name (issue #2934).
|
||||
"serverInfo": {"name": "molecule", "version": "1.0.0"},
|
||||
# Built per-call (not the module-level constant) so an operator
|
||||
# who sets MOLECULE_MCP_POLL_TIMEOUT_SECS after import — e.g.
|
||||
# via a wrapper script that exports then re-imports — sees
|
||||
|
||||
+32
-473
@@ -129,481 +129,40 @@ from a2a_tools_delegation import ( # noqa: E402 (import after the from-a2a_cli
|
||||
)
|
||||
|
||||
|
||||
async def _upload_chat_files(
|
||||
client: httpx.AsyncClient,
|
||||
paths: list[str],
|
||||
workspace_id: str | None = None,
|
||||
) -> tuple[list[dict], str | None]:
|
||||
"""Upload local file paths through /workspaces/<self>/chat/uploads.
|
||||
|
||||
The platform stages each upload under /workspace/.molecule/chat-uploads
|
||||
(an "allowed root" the canvas knows how to render via the Download
|
||||
endpoint) and returns metadata the broadcast payload references.
|
||||
|
||||
Why we route through upload instead of just passing the agent's path:
|
||||
the canvas's allowed-root list is /configs, /workspace, /home, /plugins
|
||||
— files at /tmp or /root would be unreachable. Uploading copies the
|
||||
bytes into an allowed root regardless of where the agent wrote them.
|
||||
|
||||
Returns (attachments, error). On any failure the caller should NOT
|
||||
fire the notify — partial-attach would surface a half-rendered chip.
|
||||
"""
|
||||
if not paths:
|
||||
return [], None
|
||||
files_payload: list[tuple[str, tuple[str, bytes, str]]] = []
|
||||
for p in paths:
|
||||
if not isinstance(p, str) or not p:
|
||||
return [], f"Error: invalid attachment path {p!r}"
|
||||
if not os.path.isfile(p):
|
||||
return [], f"Error: attachment not found: {p}"
|
||||
try:
|
||||
with open(p, "rb") as fh:
|
||||
data = fh.read()
|
||||
except OSError as e:
|
||||
return [], f"Error reading {p}: {e}"
|
||||
# Sniff mime from filename so the canvas can pick the right
|
||||
# icon / preview / inline-image renderer. Pre-fix this was
|
||||
# hardcoded application/octet-stream and chat_files.go's
|
||||
# Upload trusts whatever Content-Type the multipart part
|
||||
# carries — `mt := fh.Header.Get("Content-Type")` only falls
|
||||
# back to extension-sniffing when the header is empty. So a
|
||||
# hardcoded octet-stream meant every attachment lost its
|
||||
# real type forever, breaking the canvas chip's icon logic.
|
||||
mime_type, _ = mimetypes.guess_type(p)
|
||||
if not mime_type:
|
||||
mime_type = "application/octet-stream"
|
||||
files_payload.append(("files", (os.path.basename(p), data, mime_type)))
|
||||
target_workspace_id = (workspace_id or "").strip() or WORKSPACE_ID
|
||||
try:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{target_workspace_id}/chat/uploads",
|
||||
files=files_payload,
|
||||
headers=_auth_headers_for_heartbeat(target_workspace_id),
|
||||
)
|
||||
except Exception as e:
|
||||
return [], f"Error uploading attachments: {e}"
|
||||
if resp.status_code != 200:
|
||||
return [], f"Error: chat/uploads returned {resp.status_code}: {resp.text[:200]}"
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception as e:
|
||||
return [], f"Error parsing upload response: {e}"
|
||||
uploaded = body.get("files") or []
|
||||
if not isinstance(uploaded, list) or len(uploaded) != len(paths):
|
||||
return [], f"Error: upload returned {len(uploaded) if isinstance(uploaded, list) else 'invalid'} entries for {len(paths)} files"
|
||||
return uploaded, None
|
||||
|
||||
|
||||
async def tool_send_message_to_user(
|
||||
message: str,
|
||||
attachments: list[str] | None = None,
|
||||
workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Send a message directly to the user's canvas chat via WebSocket.
|
||||
|
||||
Args:
|
||||
message: The text to display in the user's chat. Required even
|
||||
when sending attachments — set to a short caption like
|
||||
"Here's the build output:" or "Done — see attached."
|
||||
attachments: Optional list of absolute file paths inside this
|
||||
container. Each is uploaded to the platform and rendered
|
||||
in the canvas as a clickable download chip. Use this
|
||||
instead of pasting paths in the message text — paths
|
||||
render as plain text and the user can't click them.
|
||||
Examples:
|
||||
attachments=["/tmp/build-output.zip"]
|
||||
attachments=["/workspace/report.pdf", "/workspace/data.csv"]
|
||||
workspace_id: Optional. When the agent is registered in MULTIPLE
|
||||
workspaces (external multi-workspace MCP path), this
|
||||
selects which workspace's chat to deliver the message to —
|
||||
should match the ``arrival_workspace_id`` of the inbound
|
||||
message you're replying to so the user sees the reply in
|
||||
the same canvas they typed in. Single-workspace agents
|
||||
omit this; the message routes to the only registered
|
||||
workspace.
|
||||
"""
|
||||
if not message:
|
||||
return "Error: message is required"
|
||||
target_workspace_id = (workspace_id or "").strip() or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
uploaded, upload_err = await _upload_chat_files(
|
||||
client, attachments or [], workspace_id=target_workspace_id,
|
||||
)
|
||||
if upload_err:
|
||||
return upload_err
|
||||
payload: dict = {"message": message}
|
||||
if uploaded:
|
||||
payload["attachments"] = uploaded
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{target_workspace_id}/notify",
|
||||
json=payload,
|
||||
headers=_auth_headers_for_heartbeat(target_workspace_id),
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
if uploaded:
|
||||
return f"Message sent to user with {len(uploaded)} attachment(s)"
|
||||
return "Message sent to user"
|
||||
return f"Error: platform returned {resp.status_code}"
|
||||
except Exception as e:
|
||||
return f"Error sending message: {e}"
|
||||
|
||||
|
||||
async def tool_list_peers(source_workspace_id: str | None = None) -> str:
|
||||
"""List all workspaces this agent can communicate with.
|
||||
|
||||
Behavior:
|
||||
- ``source_workspace_id`` set → list peers of that one workspace.
|
||||
- Unset, single-workspace mode → list peers of WORKSPACE_ID
|
||||
(the legacy path, unchanged).
|
||||
- Unset, multi-workspace mode (MOLECULE_WORKSPACES populated) →
|
||||
aggregate across every registered workspace, prefixing each
|
||||
peer with its source so the agent / user can see the full peer
|
||||
surface in one call.
|
||||
|
||||
Side-effect: populates ``_peer_to_source`` so subsequent
|
||||
``tool_delegate_task(target)`` auto-routes through the correct
|
||||
sending workspace without the agent needing ``source_workspace_id``.
|
||||
"""
|
||||
sources: list[str]
|
||||
aggregate = False
|
||||
if source_workspace_id:
|
||||
sources = [source_workspace_id]
|
||||
else:
|
||||
registered = list_registered_workspaces()
|
||||
if len(registered) > 1:
|
||||
sources = registered
|
||||
aggregate = True
|
||||
else:
|
||||
sources = [WORKSPACE_ID]
|
||||
|
||||
all_peers: list[tuple[str, dict]] = [] # (source, peer_record)
|
||||
diagnostics: list[tuple[str, str]] = [] # (source, diagnostic)
|
||||
for src in sources:
|
||||
peers, diagnostic = await get_peers_with_diagnostic(source_workspace_id=src)
|
||||
if peers:
|
||||
for p in peers:
|
||||
all_peers.append((src, p))
|
||||
elif diagnostic is not None:
|
||||
diagnostics.append((src, diagnostic))
|
||||
|
||||
if not all_peers:
|
||||
if diagnostics:
|
||||
joined = "; ".join(f"[{src[:8]}] {d}" for src, d in diagnostics)
|
||||
return f"No peers found. {joined}"
|
||||
return (
|
||||
"You have no peers in the platform registry. "
|
||||
"(No parent, no children, no siblings registered.)"
|
||||
)
|
||||
|
||||
lines = []
|
||||
for src, p in all_peers:
|
||||
status = p.get("status", "unknown")
|
||||
role = p.get("role", "")
|
||||
peer_id = p["id"]
|
||||
# Cache name for use in delegate_task
|
||||
_peer_names[peer_id] = p["name"]
|
||||
# Cache the source workspace so tool_delegate_task auto-routes
|
||||
_peer_to_source[peer_id] = src
|
||||
if aggregate:
|
||||
lines.append(
|
||||
f"- {p['name']} (ID: {peer_id}, status: {status}, role: {role}, via: {src[:8]})"
|
||||
)
|
||||
else:
|
||||
lines.append(f"- {p['name']} (ID: {peer_id}, status: {status}, role: {role})")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def tool_get_workspace_info(source_workspace_id: str | None = None) -> str:
|
||||
"""Get this workspace's own info.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace to
|
||||
introspect when the agent is registered into multiple workspaces.
|
||||
Unset → falls back to module-level WORKSPACE_ID.
|
||||
"""
|
||||
info = await get_workspace_info(source_workspace_id=source_workspace_id)
|
||||
return json.dumps(info, indent=2)
|
||||
|
||||
|
||||
async def tool_commit_memory(
|
||||
content: str,
|
||||
scope: str = "LOCAL",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Save important information to persistent memory.
|
||||
|
||||
GLOBAL scope is writable only by root workspaces (tier == 0).
|
||||
RBAC memory.write permission is required for all scope levels.
|
||||
The source workspace_id is embedded in every record so the platform
|
||||
can enforce cross-workspace isolation and audit trail.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace this
|
||||
memory belongs to when the agent is registered into multiple
|
||||
workspaces (PR-1 / multi-workspace mode). When unset, falls back
|
||||
to the module-level WORKSPACE_ID — single-workspace operators see
|
||||
no behaviour change.
|
||||
"""
|
||||
if not content:
|
||||
return "Error: content is required"
|
||||
content = _redact_secrets(content)
|
||||
scope = scope.upper()
|
||||
if scope not in ("LOCAL", "TEAM", "GLOBAL"):
|
||||
scope = "LOCAL"
|
||||
|
||||
# RBAC: require memory.write permission (mirrors builtin_tools/memory.py)
|
||||
if not _check_memory_write_permission():
|
||||
return (
|
||||
"Error: RBAC — this workspace does not have the 'memory.write' "
|
||||
"permission for this operation."
|
||||
)
|
||||
|
||||
# Scope enforcement: only root workspaces (tier 0) can write GLOBAL memory.
|
||||
# This prevents tenant workspaces from poisoning org-wide memory (GH#1610).
|
||||
if scope == "GLOBAL" and not _is_root_workspace():
|
||||
return (
|
||||
"Error: RBAC — only root workspaces (tier 0) can write to GLOBAL scope. "
|
||||
"Non-root workspaces may use LOCAL or TEAM scope."
|
||||
)
|
||||
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/memories",
|
||||
json={
|
||||
"content": content,
|
||||
"scope": scope,
|
||||
# Embed source workspace so the platform can namespace-isolate
|
||||
# and audit cross-workspace writes (GH#1610 fix).
|
||||
"workspace_id": src,
|
||||
},
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
data = resp.json()
|
||||
if resp.status_code in (200, 201):
|
||||
return json.dumps({"success": True, "id": data.get("id"), "scope": scope})
|
||||
return f"Error: {data.get('error', resp.text)}"
|
||||
except Exception as e:
|
||||
return f"Error saving memory: {e}"
|
||||
|
||||
|
||||
async def tool_recall_memory(
|
||||
query: str = "",
|
||||
scope: str = "",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Search persistent memory for previously saved information.
|
||||
|
||||
RBAC memory.read permission is required (mirrors builtin_tools/memory.py).
|
||||
The workspace_id is sent as a query parameter so the platform can
|
||||
cross-validate it against the auth token and defend against any future
|
||||
path traversal / cross-tenant read bugs in the platform itself.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace's memories
|
||||
to search when the agent is registered into multiple workspaces.
|
||||
Unset → defaults to the module-level WORKSPACE_ID.
|
||||
"""
|
||||
# RBAC: require memory.read permission (mirrors builtin_tools/memory.py)
|
||||
if not _check_memory_read_permission():
|
||||
return (
|
||||
"Error: RBAC — this workspace does not have the 'memory.read' "
|
||||
"permission for this operation."
|
||||
)
|
||||
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
params: dict[str, str] = {"workspace_id": src}
|
||||
if query:
|
||||
params["q"] = query
|
||||
if scope:
|
||||
params["scope"] = scope.upper()
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/memories",
|
||||
params=params,
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
data = resp.json()
|
||||
if isinstance(data, list):
|
||||
if not data:
|
||||
return "No memories found."
|
||||
lines = []
|
||||
for m in data:
|
||||
lines.append(f"[{m.get('scope', '?')}] {m.get('content', '')}")
|
||||
return "\n".join(lines)
|
||||
return json.dumps(data)
|
||||
except Exception as e:
|
||||
return f"Error recalling memory: {e}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Inbox tools — inbound delivery for the standalone molecule-mcp path.
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# The InboxState singleton is set by mcp_cli before the MCP server starts
|
||||
# (see workspace/inbox.py for the rationale). In-container runtimes never
|
||||
# call ``inbox.activate(...)``, so ``inbox.get_state()`` returns None and
|
||||
# these tools surface an informational error rather than raising.
|
||||
#
|
||||
# When-to-use guidance (mirrored in platform_tools/registry.py): agents
|
||||
# in standalone-runtime mode should call ``wait_for_message`` to block
|
||||
# on the next inbound message after they've emitted a reply, forming
|
||||
# the loop ``wait → respond → wait``. ``inbox_peek`` is for inspecting
|
||||
# the queue without consuming; ``inbox_pop`` removes a handled message.
|
||||
|
||||
_INBOX_NOT_ENABLED_MSG = (
|
||||
"Error: inbox polling is not enabled in this runtime. The standalone "
|
||||
"molecule-mcp wrapper activates it; in-container runtimes receive "
|
||||
"messages via push delivery and do not need these tools."
|
||||
# Messaging tool handlers — extracted to a2a_tools_messaging
|
||||
# (RFC #2873 iter 4d). Re-imported here so call sites + tests that
|
||||
# reference ``a2a_tools.tool_send_message_to_user`` /
|
||||
# ``tool_list_peers`` / ``tool_get_workspace_info`` /
|
||||
# ``tool_chat_history`` / ``_upload_chat_files`` keep resolving
|
||||
# identically.
|
||||
from a2a_tools_messaging import ( # noqa: E402 (import after the top-of-module imports)
|
||||
_upload_chat_files,
|
||||
tool_chat_history,
|
||||
tool_get_workspace_info,
|
||||
tool_list_peers,
|
||||
tool_send_message_to_user,
|
||||
)
|
||||
|
||||
|
||||
async def tool_chat_history(
|
||||
peer_id: str,
|
||||
limit: int = 20,
|
||||
before_ts: str = "",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Fetch the prior conversation with one peer.
|
||||
|
||||
Hits ``/workspaces/<self>/activity?peer_id=<peer>&limit=<N>``
|
||||
against the workspace-server, which returns activity rows where
|
||||
the peer is either the sender (``source_id=peer`` — they sent us
|
||||
the message) or the recipient (``target_id=peer`` — we sent to
|
||||
them) of an A2A turn — both sides of the conversation in
|
||||
chronological order.
|
||||
|
||||
Args:
|
||||
peer_id: The other workspace's UUID. Same value the agent
|
||||
sees as ``peer_id`` on a peer_agent push or ``workspace_id``
|
||||
on a delegate_task call.
|
||||
limit: Maximum rows to return; capped server-side at 500. The
|
||||
default of 20 covers \"most recent context for this peer\"
|
||||
without flooding the agent's context window.
|
||||
before_ts: Optional RFC3339 timestamp; only rows strictly
|
||||
older are returned. Used to page backward through long
|
||||
histories — pass the oldest ``ts`` from the previous
|
||||
response. Empty (default) returns the most recent ``limit``
|
||||
rows.
|
||||
source_workspace_id: Which registered workspace's activity log
|
||||
to query. Auto-routes via ``_peer_to_source`` cache when
|
||||
unset (the workspace this peer was discovered through);
|
||||
falls back to module-level WORKSPACE_ID for single-workspace
|
||||
operators.
|
||||
|
||||
Returns a JSON-encoded list of activity rows (or an error string
|
||||
starting with ``Error:`` so the agent can branch). Each row carries
|
||||
``activity_type``, ``source_id``, ``target_id``, ``method``,
|
||||
``summary``, ``request_body``, ``response_body``, ``status``,
|
||||
``created_at`` — same shape ``inbox_peek`` and the canvas chat
|
||||
loader already see.
|
||||
"""
|
||||
if not peer_id or not isinstance(peer_id, str):
|
||||
return "Error: peer_id is required"
|
||||
if not isinstance(limit, int) or limit <= 0:
|
||||
limit = 20
|
||||
if limit > 500:
|
||||
limit = 500
|
||||
|
||||
src = source_workspace_id or _peer_to_source.get(peer_id) or WORKSPACE_ID
|
||||
|
||||
params: dict[str, str] = {
|
||||
"peer_id": peer_id,
|
||||
"limit": str(limit),
|
||||
}
|
||||
# Forward verbatim — the server route validates as RFC3339 at the
|
||||
# trust boundary and translates into a `created_at < $X` clause.
|
||||
if before_ts:
|
||||
params["before_ts"] = before_ts
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/activity",
|
||||
params=params,
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return f"Error: chat_history request failed: {exc}"
|
||||
|
||||
if resp.status_code == 400:
|
||||
# Trust-boundary rejection (malformed peer_id, etc.) — surface
|
||||
# the server's reason verbatim so the agent can correct itself.
|
||||
try:
|
||||
err = resp.json().get("error", "bad request")
|
||||
except Exception: # noqa: BLE001
|
||||
err = "bad request"
|
||||
return f"Error: {err}"
|
||||
if resp.status_code >= 400:
|
||||
return f"Error: chat_history returned HTTP {resp.status_code}"
|
||||
|
||||
try:
|
||||
rows = resp.json()
|
||||
except Exception: # noqa: BLE001
|
||||
return "Error: chat_history response was not JSON"
|
||||
if not isinstance(rows, list):
|
||||
return "Error: chat_history response was not a list"
|
||||
|
||||
# Server returns DESC (most recent first); reverse to chronological
|
||||
# so the agent reads the conversation top-down like a chat log.
|
||||
rows.reverse()
|
||||
return json.dumps(rows)
|
||||
# Memory tool handlers — extracted to a2a_tools_memory (RFC #2873 iter 4c).
|
||||
# Re-imported here so call sites + tests that reference
|
||||
# ``a2a_tools.tool_commit_memory`` / ``tool_recall_memory`` keep
|
||||
# resolving identically.
|
||||
from a2a_tools_memory import ( # noqa: E402 (import after the top-of-module imports)
|
||||
tool_commit_memory,
|
||||
tool_recall_memory,
|
||||
)
|
||||
|
||||
|
||||
async def tool_inbox_peek(limit: int = 10) -> str:
|
||||
"""Return up to ``limit`` pending inbound messages without removing them."""
|
||||
import inbox # local import — avoids a circular dep at module load
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
messages = state.peek(limit=limit if isinstance(limit, int) else 10)
|
||||
return json.dumps([m.to_dict() for m in messages])
|
||||
|
||||
|
||||
async def tool_inbox_pop(activity_id: str) -> str:
|
||||
"""Remove a message from the inbox queue by activity_id."""
|
||||
import inbox
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
if not isinstance(activity_id, str) or not activity_id:
|
||||
return "Error: activity_id is required."
|
||||
removed = state.pop(activity_id)
|
||||
if removed is None:
|
||||
return json.dumps({"removed": False, "activity_id": activity_id})
|
||||
return json.dumps({"removed": True, "activity_id": activity_id})
|
||||
|
||||
|
||||
async def tool_wait_for_message(timeout_secs: float = 60.0) -> str:
|
||||
"""Block until a new message arrives or ``timeout_secs`` elapses.
|
||||
|
||||
Returns the head message non-destructively; the agent decides
|
||||
whether to ``inbox_pop`` it after acting.
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
import inbox
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
|
||||
try:
|
||||
timeout = float(timeout_secs)
|
||||
except (TypeError, ValueError):
|
||||
timeout = 60.0
|
||||
# Cap at 300s — Claude Code's default tool timeout is ~10min, and
|
||||
# blocking longer than 5min wastes the prompt cache window for
|
||||
# nothing useful. Operators who want longer can call repeatedly.
|
||||
timeout = max(0.0, min(timeout, 300.0))
|
||||
|
||||
# The threading.Event-based wait would block the asyncio loop.
|
||||
# Run it on the default executor so the MCP server can keep
|
||||
# processing other JSON-RPC requests while we sleep.
|
||||
loop = asyncio.get_running_loop()
|
||||
message = await loop.run_in_executor(None, state.wait, timeout)
|
||||
if message is None:
|
||||
return json.dumps({"timeout": True, "timeout_secs": timeout})
|
||||
return json.dumps(message.to_dict())
|
||||
# Inbox tool handlers — extracted to a2a_tools_inbox (RFC #2873 iter 4e).
|
||||
# Re-imported here so call sites + tests that reference
|
||||
# ``a2a_tools.tool_inbox_peek`` / ``tool_inbox_pop`` / ``tool_wait_for_message``
|
||||
# / ``_enrich_inbound_for_agent`` / ``_INBOX_NOT_ENABLED_MSG`` keep
|
||||
# resolving identically.
|
||||
from a2a_tools_inbox import ( # noqa: E402 (import after the top-of-module imports)
|
||||
_INBOX_NOT_ENABLED_MSG,
|
||||
_enrich_inbound_for_agent,
|
||||
tool_inbox_peek,
|
||||
tool_inbox_pop,
|
||||
tool_wait_for_message,
|
||||
)
|
||||
|
||||
@@ -0,0 +1,140 @@
|
||||
"""Inbox tool handlers — single-concern slice of the a2a_tools surface.
|
||||
|
||||
Standalone-runtime path for inbound-message delivery (push-mode runtimes
|
||||
get messages via the channel-tag synthesis in a2a_mcp_server). The
|
||||
``InboxState`` singleton is set by ``mcp_cli`` before the MCP server
|
||||
starts; in-container runtimes never call ``inbox.activate(...)`` so
|
||||
``inbox.get_state()`` returns None and these tools surface an
|
||||
informational error instead of raising.
|
||||
|
||||
When-to-use guidance for agents (mirrored in
|
||||
``platform_tools/registry.py``):
|
||||
- ``wait_for_message``: block until a new inbound message arrives, then
|
||||
decide what to do with it; forms the loop ``wait → respond → wait``.
|
||||
- ``inbox_peek``: inspect the queue non-destructively.
|
||||
- ``inbox_pop``: remove a handled message by activity_id.
|
||||
|
||||
Extracted from ``a2a_tools.py`` in RFC #2873 iter 4e so the kitchen-sink
|
||||
module shrinks to a back-compat shim. The extraction also makes the
|
||||
``_enrich_inbound_for_agent`` helper unit-testable in isolation —
|
||||
previously it was buried in ``a2a_tools`` and only exercised through
|
||||
the inbox wrappers, leaving its peer-id-empty / cache-miss / registry-
|
||||
unavailable branches under-covered.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
|
||||
# Surfaced when the inbox subsystem is not initialised. Returned by the
|
||||
# three inbox tool wrappers below so the agent gets a clear "this
|
||||
# runtime delivers via push" message instead of a NameError.
|
||||
_INBOX_NOT_ENABLED_MSG = (
|
||||
"Error: inbox polling is not enabled in this runtime. The standalone "
|
||||
"molecule-mcp wrapper activates it; in-container runtimes receive "
|
||||
"messages via push delivery and do not need these tools."
|
||||
)
|
||||
|
||||
|
||||
def _enrich_inbound_for_agent(d: dict) -> dict:
|
||||
"""Add peer_name / peer_role / agent_card_url to a poll-path message.
|
||||
|
||||
The PUSH path (a2a_mcp_server._build_channel_notification) already
|
||||
enriches the meta dict with these fields, so a Claude Code host
|
||||
with channel-push sees them. The POLL path goes through
|
||||
InboxMessage.to_dict, which is intentionally identity-free (the
|
||||
storage layer doesn't know about the registry cache). Without this
|
||||
helper, every non-Claude-Code MCP client that uses inbox_peek /
|
||||
wait_for_message gets a plain message and the receiving agent
|
||||
can't tell who's writing — breaking the contract documented in
|
||||
a2a_mcp_server.py:303-345 ("In both paths the same fields apply").
|
||||
|
||||
Cache-first non-blocking enrichment (same shape as push): on cache
|
||||
miss the helper returns the bare message; the next call within the
|
||||
5-min TTL hits the warm cache. Failure to enrich is non-fatal —
|
||||
the agent still gets text + peer_id + kind + activity_id, just
|
||||
without the friendly identity.
|
||||
"""
|
||||
peer_id = d.get("peer_id") or ""
|
||||
if not peer_id:
|
||||
# canvas_user — no peer to enrich; helper returns the plain
|
||||
# message unchanged so the canvas reply path still works.
|
||||
return d
|
||||
try:
|
||||
from a2a_client import ( # local import — avoid module-load cycle
|
||||
_agent_card_url_for,
|
||||
enrich_peer_metadata_nonblocking,
|
||||
)
|
||||
except Exception: # noqa: BLE001
|
||||
# If a2a_client is unavailable (test harness, partial install),
|
||||
# degrade gracefully — agent still gets the bare envelope.
|
||||
return d
|
||||
record = enrich_peer_metadata_nonblocking(peer_id)
|
||||
if record is not None:
|
||||
if name := record.get("name"):
|
||||
d["peer_name"] = name
|
||||
if role := record.get("role"):
|
||||
d["peer_role"] = role
|
||||
# agent_card_url is constructable from peer_id alone — surface it
|
||||
# even when registry enrichment misses, so the receiving agent has
|
||||
# a single endpoint to hit for the peer's full capability list.
|
||||
d["agent_card_url"] = _agent_card_url_for(peer_id)
|
||||
return d
|
||||
|
||||
|
||||
async def tool_inbox_peek(limit: int = 10) -> str:
|
||||
"""Return up to ``limit`` pending inbound messages without removing them."""
|
||||
import inbox # local import — avoids a circular dep at module load
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
messages = state.peek(limit=limit if isinstance(limit, int) else 10)
|
||||
return json.dumps([_enrich_inbound_for_agent(m.to_dict()) for m in messages])
|
||||
|
||||
|
||||
async def tool_inbox_pop(activity_id: str) -> str:
|
||||
"""Remove a message from the inbox queue by activity_id."""
|
||||
import inbox
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
if not isinstance(activity_id, str) or not activity_id:
|
||||
return "Error: activity_id is required."
|
||||
removed = state.pop(activity_id)
|
||||
if removed is None:
|
||||
return json.dumps({"removed": False, "activity_id": activity_id})
|
||||
return json.dumps({"removed": True, "activity_id": activity_id})
|
||||
|
||||
|
||||
async def tool_wait_for_message(timeout_secs: float = 60.0) -> str:
|
||||
"""Block until a new message arrives or ``timeout_secs`` elapses.
|
||||
|
||||
Returns the head message non-destructively; the agent decides
|
||||
whether to ``inbox_pop`` it after acting.
|
||||
"""
|
||||
import inbox
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
|
||||
try:
|
||||
timeout = float(timeout_secs)
|
||||
except (TypeError, ValueError):
|
||||
timeout = 60.0
|
||||
# Cap at 300s — Claude Code's default tool timeout is ~10min, and
|
||||
# blocking longer than 5min wastes the prompt cache window for
|
||||
# nothing useful. Operators who want longer can call repeatedly.
|
||||
timeout = max(0.0, min(timeout, 300.0))
|
||||
|
||||
# The threading.Event-based wait would block the asyncio loop.
|
||||
# Run it on the default executor so the MCP server can keep
|
||||
# processing other JSON-RPC requests while we sleep.
|
||||
loop = asyncio.get_running_loop()
|
||||
message = await loop.run_in_executor(None, state.wait, timeout)
|
||||
if message is None:
|
||||
return json.dumps({"timeout": True, "timeout_secs": timeout})
|
||||
return json.dumps(_enrich_inbound_for_agent(message.to_dict()))
|
||||
@@ -0,0 +1,141 @@
|
||||
"""Memory tool handlers — single-concern slice of the a2a_tools surface.
|
||||
|
||||
Extracted from ``a2a_tools.py`` (RFC #2873 iter 4c). Owns the two
|
||||
agent-memory MCP tools:
|
||||
|
||||
* ``tool_commit_memory`` — write to the workspace's persistent memory.
|
||||
* ``tool_recall_memory`` — search the workspace's persistent memory.
|
||||
|
||||
Both go through the platform's ``/workspaces/:id/memories`` endpoint;
|
||||
the platform is the source of truth for namespace isolation + audit
|
||||
trail. Local responsibility here is RBAC enforcement BEFORE hitting
|
||||
the network so a denied operation surfaces a clear in-band error
|
||||
instead of an opaque platform 403.
|
||||
|
||||
Imports the RBAC primitives from ``a2a_tools_rbac`` (iter 4a).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import httpx
|
||||
|
||||
from a2a_client import PLATFORM_URL, WORKSPACE_ID
|
||||
from a2a_tools_rbac import (
|
||||
auth_headers_for_heartbeat as _auth_headers_for_heartbeat,
|
||||
check_memory_read_permission as _check_memory_read_permission,
|
||||
check_memory_write_permission as _check_memory_write_permission,
|
||||
is_root_workspace as _is_root_workspace,
|
||||
)
|
||||
from builtin_tools.security import _redact_secrets
|
||||
|
||||
|
||||
async def tool_commit_memory(
|
||||
content: str,
|
||||
scope: str = "LOCAL",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Save important information to persistent memory.
|
||||
|
||||
GLOBAL scope is writable only by root workspaces (tier == 0).
|
||||
RBAC memory.write permission is required for all scope levels.
|
||||
The source workspace_id is embedded in every record so the platform
|
||||
can enforce cross-workspace isolation and audit trail.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace this
|
||||
memory belongs to when the agent is registered into multiple
|
||||
workspaces (PR-1 / multi-workspace mode). When unset, falls back
|
||||
to the module-level WORKSPACE_ID — single-workspace operators see
|
||||
no behaviour change.
|
||||
"""
|
||||
if not content:
|
||||
return "Error: content is required"
|
||||
content = _redact_secrets(content)
|
||||
scope = scope.upper()
|
||||
if scope not in ("LOCAL", "TEAM", "GLOBAL"):
|
||||
scope = "LOCAL"
|
||||
|
||||
# RBAC: require memory.write permission (mirrors builtin_tools/memory.py)
|
||||
if not _check_memory_write_permission():
|
||||
return (
|
||||
"Error: RBAC — this workspace does not have the 'memory.write' "
|
||||
"permission for this operation."
|
||||
)
|
||||
|
||||
# Scope enforcement: only root workspaces (tier 0) can write GLOBAL memory.
|
||||
# This prevents tenant workspaces from poisoning org-wide memory (GH#1610).
|
||||
if scope == "GLOBAL" and not _is_root_workspace():
|
||||
return (
|
||||
"Error: RBAC — only root workspaces (tier 0) can write to GLOBAL scope. "
|
||||
"Non-root workspaces may use LOCAL or TEAM scope."
|
||||
)
|
||||
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/memories",
|
||||
json={
|
||||
"content": content,
|
||||
"scope": scope,
|
||||
# Embed source workspace so the platform can namespace-isolate
|
||||
# and audit cross-workspace writes (GH#1610 fix).
|
||||
"workspace_id": src,
|
||||
},
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
data = resp.json()
|
||||
if resp.status_code in (200, 201):
|
||||
return json.dumps({"success": True, "id": data.get("id"), "scope": scope})
|
||||
return f"Error: {data.get('error', resp.text)}"
|
||||
except Exception as e:
|
||||
return f"Error saving memory: {e}"
|
||||
|
||||
|
||||
async def tool_recall_memory(
|
||||
query: str = "",
|
||||
scope: str = "",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Search persistent memory for previously saved information.
|
||||
|
||||
RBAC memory.read permission is required (mirrors builtin_tools/memory.py).
|
||||
The workspace_id is sent as a query parameter so the platform can
|
||||
cross-validate it against the auth token and defend against any future
|
||||
path traversal / cross-tenant read bugs in the platform itself.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace's memories
|
||||
to search when the agent is registered into multiple workspaces.
|
||||
Unset → defaults to the module-level WORKSPACE_ID.
|
||||
"""
|
||||
# RBAC: require memory.read permission (mirrors builtin_tools/memory.py)
|
||||
if not _check_memory_read_permission():
|
||||
return (
|
||||
"Error: RBAC — this workspace does not have the 'memory.read' "
|
||||
"permission for this operation."
|
||||
)
|
||||
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
params: dict[str, str] = {"workspace_id": src}
|
||||
if query:
|
||||
params["q"] = query
|
||||
if scope:
|
||||
params["scope"] = scope.upper()
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/memories",
|
||||
params=params,
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
data = resp.json()
|
||||
if isinstance(data, list):
|
||||
if not data:
|
||||
return "No memories found."
|
||||
lines = []
|
||||
for m in data:
|
||||
lines.append(f"[{m.get('scope', '?')}] {m.get('content', '')}")
|
||||
return "\n".join(lines)
|
||||
return json.dumps(data)
|
||||
except Exception as e:
|
||||
return f"Error recalling memory: {e}"
|
||||
@@ -0,0 +1,324 @@
|
||||
"""Messaging tool handlers — single-concern slice of the a2a_tools surface.
|
||||
|
||||
Extracted from ``a2a_tools.py`` (RFC #2873 iter 4d). Owns the four
|
||||
human-and-peer messaging MCP tools + the chat-upload helper they share:
|
||||
|
||||
* ``tool_send_message_to_user`` — push a canvas-chat message via the
|
||||
platform's ``/notify`` endpoint.
|
||||
* ``tool_list_peers`` — discover peers across one or many registered
|
||||
workspaces, with side-effect of populating ``_peer_to_source`` for
|
||||
delegate-task auto-routing.
|
||||
* ``tool_get_workspace_info`` — JSON-encode the workspace's own info.
|
||||
* ``tool_chat_history`` — fetch prior conversation rows with a peer.
|
||||
* ``_upload_chat_files`` — internal helper for the message-attachments
|
||||
code path; routes local file paths through the platform's
|
||||
``/chat/uploads`` so the canvas can render them as download chips.
|
||||
|
||||
Imports the auth-header primitive from ``a2a_tools_rbac`` (iter 4a).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import mimetypes
|
||||
import os
|
||||
|
||||
import httpx
|
||||
|
||||
from a2a_client import (
|
||||
PLATFORM_URL,
|
||||
WORKSPACE_ID,
|
||||
_peer_names,
|
||||
_peer_to_source,
|
||||
get_peers_with_diagnostic,
|
||||
get_workspace_info,
|
||||
)
|
||||
from a2a_tools_rbac import auth_headers_for_heartbeat as _auth_headers_for_heartbeat
|
||||
from platform_auth import list_registered_workspaces
|
||||
|
||||
|
||||
async def _upload_chat_files(
|
||||
client: httpx.AsyncClient,
|
||||
paths: list[str],
|
||||
workspace_id: str | None = None,
|
||||
) -> tuple[list[dict], str | None]:
|
||||
"""Upload local file paths through /workspaces/<self>/chat/uploads.
|
||||
|
||||
The platform stages each upload under /workspace/.molecule/chat-uploads
|
||||
(an "allowed root" the canvas knows how to render via the Download
|
||||
endpoint) and returns metadata the broadcast payload references.
|
||||
|
||||
Why we route through upload instead of just passing the agent's path:
|
||||
the canvas's allowed-root list is /configs, /workspace, /home, /plugins
|
||||
— files at /tmp or /root would be unreachable. Uploading copies the
|
||||
bytes into an allowed root regardless of where the agent wrote them.
|
||||
|
||||
Returns (attachments, error). On any failure the caller should NOT
|
||||
fire the notify — partial-attach would surface a half-rendered chip.
|
||||
"""
|
||||
if not paths:
|
||||
return [], None
|
||||
files_payload: list[tuple[str, tuple[str, bytes, str]]] = []
|
||||
for p in paths:
|
||||
if not isinstance(p, str) or not p:
|
||||
return [], f"Error: invalid attachment path {p!r}"
|
||||
if not os.path.isfile(p):
|
||||
return [], f"Error: attachment not found: {p}"
|
||||
try:
|
||||
with open(p, "rb") as fh:
|
||||
data = fh.read()
|
||||
except OSError as e:
|
||||
return [], f"Error reading {p}: {e}"
|
||||
# Sniff mime from filename so the canvas can pick the right
|
||||
# icon / preview / inline-image renderer. Pre-fix this was
|
||||
# hardcoded application/octet-stream and chat_files.go's
|
||||
# Upload trusts whatever Content-Type the multipart part
|
||||
# carries — `mt := fh.Header.Get("Content-Type")` only falls
|
||||
# back to extension-sniffing when the header is empty. So a
|
||||
# hardcoded octet-stream meant every attachment lost its
|
||||
# real type forever, breaking the canvas chip's icon logic.
|
||||
mime_type, _ = mimetypes.guess_type(p)
|
||||
if not mime_type:
|
||||
mime_type = "application/octet-stream"
|
||||
files_payload.append(("files", (os.path.basename(p), data, mime_type)))
|
||||
target_workspace_id = (workspace_id or "").strip() or WORKSPACE_ID
|
||||
try:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{target_workspace_id}/chat/uploads",
|
||||
files=files_payload,
|
||||
headers=_auth_headers_for_heartbeat(target_workspace_id),
|
||||
)
|
||||
except Exception as e:
|
||||
return [], f"Error uploading attachments: {e}"
|
||||
if resp.status_code != 200:
|
||||
return [], f"Error: chat/uploads returned {resp.status_code}: {resp.text[:200]}"
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception as e:
|
||||
return [], f"Error parsing upload response: {e}"
|
||||
uploaded = body.get("files") or []
|
||||
if not isinstance(uploaded, list) or len(uploaded) != len(paths):
|
||||
return [], f"Error: upload returned {len(uploaded) if isinstance(uploaded, list) else 'invalid'} entries for {len(paths)} files"
|
||||
return uploaded, None
|
||||
|
||||
|
||||
async def tool_send_message_to_user(
|
||||
message: str,
|
||||
attachments: list[str] | None = None,
|
||||
workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Send a message directly to the user's canvas chat via WebSocket.
|
||||
|
||||
Args:
|
||||
message: The text to display in the user's chat. Required even
|
||||
when sending attachments — set to a short caption like
|
||||
"Here's the build output:" or "Done — see attached."
|
||||
attachments: Optional list of absolute file paths inside this
|
||||
container. Each is uploaded to the platform and rendered
|
||||
in the canvas as a clickable download chip. Use this
|
||||
instead of pasting paths in the message text — paths
|
||||
render as plain text and the user can't click them.
|
||||
Examples:
|
||||
attachments=["/tmp/build-output.zip"]
|
||||
attachments=["/workspace/report.pdf", "/workspace/data.csv"]
|
||||
workspace_id: Optional. When the agent is registered in MULTIPLE
|
||||
workspaces (external multi-workspace MCP path), this
|
||||
selects which workspace's chat to deliver the message to —
|
||||
should match the ``arrival_workspace_id`` of the inbound
|
||||
message you're replying to so the user sees the reply in
|
||||
the same canvas they typed in. Single-workspace agents
|
||||
omit this; the message routes to the only registered
|
||||
workspace.
|
||||
"""
|
||||
if not message:
|
||||
return "Error: message is required"
|
||||
target_workspace_id = (workspace_id or "").strip() or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
uploaded, upload_err = await _upload_chat_files(
|
||||
client, attachments or [], workspace_id=target_workspace_id,
|
||||
)
|
||||
if upload_err:
|
||||
return upload_err
|
||||
payload: dict = {"message": message}
|
||||
if uploaded:
|
||||
payload["attachments"] = uploaded
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{target_workspace_id}/notify",
|
||||
json=payload,
|
||||
headers=_auth_headers_for_heartbeat(target_workspace_id),
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
if uploaded:
|
||||
return f"Message sent to user with {len(uploaded)} attachment(s)"
|
||||
return "Message sent to user"
|
||||
return f"Error: platform returned {resp.status_code}"
|
||||
except Exception as e:
|
||||
return f"Error sending message: {e}"
|
||||
|
||||
|
||||
async def tool_list_peers(source_workspace_id: str | None = None) -> str:
|
||||
"""List all workspaces this agent can communicate with.
|
||||
|
||||
Behavior:
|
||||
- ``source_workspace_id`` set → list peers of that one workspace.
|
||||
- Unset, single-workspace mode → list peers of WORKSPACE_ID
|
||||
(the legacy path, unchanged).
|
||||
- Unset, multi-workspace mode (MOLECULE_WORKSPACES populated) →
|
||||
aggregate across every registered workspace, prefixing each
|
||||
peer with its source so the agent / user can see the full peer
|
||||
surface in one call.
|
||||
|
||||
Side-effect: populates ``_peer_to_source`` so subsequent
|
||||
``tool_delegate_task(target)`` auto-routes through the correct
|
||||
sending workspace without the agent needing ``source_workspace_id``.
|
||||
"""
|
||||
sources: list[str]
|
||||
aggregate = False
|
||||
if source_workspace_id:
|
||||
sources = [source_workspace_id]
|
||||
else:
|
||||
registered = list_registered_workspaces()
|
||||
if len(registered) > 1:
|
||||
sources = registered
|
||||
aggregate = True
|
||||
else:
|
||||
sources = [WORKSPACE_ID]
|
||||
|
||||
all_peers: list[tuple[str, dict]] = [] # (source, peer_record)
|
||||
diagnostics: list[tuple[str, str]] = [] # (source, diagnostic)
|
||||
for src in sources:
|
||||
peers, diagnostic = await get_peers_with_diagnostic(source_workspace_id=src)
|
||||
if peers:
|
||||
for p in peers:
|
||||
all_peers.append((src, p))
|
||||
elif diagnostic is not None:
|
||||
diagnostics.append((src, diagnostic))
|
||||
|
||||
if not all_peers:
|
||||
if diagnostics:
|
||||
joined = "; ".join(f"[{src[:8]}] {d}" for src, d in diagnostics)
|
||||
return f"No peers found. {joined}"
|
||||
return (
|
||||
"You have no peers in the platform registry. "
|
||||
"(No parent, no children, no siblings registered.)"
|
||||
)
|
||||
|
||||
lines = []
|
||||
for src, p in all_peers:
|
||||
status = p.get("status", "unknown")
|
||||
role = p.get("role", "")
|
||||
peer_id = p["id"]
|
||||
# Cache name for use in delegate_task
|
||||
_peer_names[peer_id] = p["name"]
|
||||
# Cache the source workspace so tool_delegate_task auto-routes
|
||||
_peer_to_source[peer_id] = src
|
||||
if aggregate:
|
||||
lines.append(
|
||||
f"- {p['name']} (ID: {peer_id}, status: {status}, role: {role}, via: {src[:8]})"
|
||||
)
|
||||
else:
|
||||
lines.append(f"- {p['name']} (ID: {peer_id}, status: {status}, role: {role})")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def tool_get_workspace_info(source_workspace_id: str | None = None) -> str:
|
||||
"""Get this workspace's own info.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace to
|
||||
introspect when the agent is registered into multiple workspaces.
|
||||
Unset → falls back to module-level WORKSPACE_ID.
|
||||
"""
|
||||
info = await get_workspace_info(source_workspace_id=source_workspace_id)
|
||||
return json.dumps(info, indent=2)
|
||||
|
||||
|
||||
async def tool_chat_history(
|
||||
peer_id: str,
|
||||
limit: int = 20,
|
||||
before_ts: str = "",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Fetch the prior conversation with one peer.
|
||||
|
||||
Hits ``/workspaces/<self>/activity?peer_id=<peer>&limit=<N>``
|
||||
against the workspace-server, which returns activity rows where
|
||||
the peer is either the sender (``source_id=peer`` — they sent us
|
||||
the message) or the recipient (``target_id=peer`` — we sent to
|
||||
them) of an A2A turn — both sides of the conversation in
|
||||
chronological order.
|
||||
|
||||
Args:
|
||||
peer_id: The other workspace's UUID. Same value the agent
|
||||
sees as ``peer_id`` on a peer_agent push or ``workspace_id``
|
||||
on a delegate_task call.
|
||||
limit: Maximum rows to return; capped server-side at 500. The
|
||||
default of 20 covers "most recent context for this peer"
|
||||
without flooding the agent's context window.
|
||||
before_ts: Optional RFC3339 timestamp; only rows strictly
|
||||
older are returned. Used to page backward through long
|
||||
histories — pass the oldest ``ts`` from the previous
|
||||
response. Empty (default) returns the most recent ``limit``
|
||||
rows.
|
||||
source_workspace_id: Which registered workspace's activity log
|
||||
to query. Auto-routes via ``_peer_to_source`` cache when
|
||||
unset (the workspace this peer was discovered through);
|
||||
falls back to module-level WORKSPACE_ID for single-workspace
|
||||
operators.
|
||||
|
||||
Returns a JSON-encoded list of activity rows (or an error string
|
||||
starting with ``Error:`` so the agent can branch). Each row carries
|
||||
``activity_type``, ``source_id``, ``target_id``, ``method``,
|
||||
``summary``, ``request_body``, ``response_body``, ``status``,
|
||||
``created_at`` — same shape ``inbox_peek`` and the canvas chat
|
||||
loader already see.
|
||||
"""
|
||||
if not peer_id or not isinstance(peer_id, str):
|
||||
return "Error: peer_id is required"
|
||||
if not isinstance(limit, int) or limit <= 0:
|
||||
limit = 20
|
||||
if limit > 500:
|
||||
limit = 500
|
||||
|
||||
src = source_workspace_id or _peer_to_source.get(peer_id) or WORKSPACE_ID
|
||||
|
||||
params: dict[str, str] = {
|
||||
"peer_id": peer_id,
|
||||
"limit": str(limit),
|
||||
}
|
||||
# Forward verbatim — the server route validates as RFC3339 at the
|
||||
# trust boundary and translates into a `created_at < $X` clause.
|
||||
if before_ts:
|
||||
params["before_ts"] = before_ts
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/activity",
|
||||
params=params,
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return f"Error: chat_history request failed: {exc}"
|
||||
|
||||
if resp.status_code == 400:
|
||||
# Trust-boundary rejection (malformed peer_id, etc.) — surface
|
||||
# the server's reason verbatim so the agent can correct itself.
|
||||
try:
|
||||
err = resp.json().get("error", "bad request")
|
||||
except Exception: # noqa: BLE001
|
||||
err = "bad request"
|
||||
return f"Error: {err}"
|
||||
if resp.status_code >= 400:
|
||||
return f"Error: chat_history returned HTTP {resp.status_code}"
|
||||
|
||||
try:
|
||||
rows = resp.json()
|
||||
except Exception: # noqa: BLE001
|
||||
return "Error: chat_history response was not JSON"
|
||||
if not isinstance(rows, list):
|
||||
return "Error: chat_history response was not a list"
|
||||
|
||||
# Server returns DESC (most recent first); reverse to chronological
|
||||
# so the agent reads the conversation top-down like a chat log.
|
||||
rows.reverse()
|
||||
return json.dumps(rows)
|
||||
@@ -35,9 +35,15 @@ def resolve_workspaces() -> tuple[list[tuple[str, str]], list[str]]:
|
||||
N workspaces). When set, ``WORKSPACE_ID`` / ``MOLECULE_WORKSPACE_TOKEN``
|
||||
are IGNORED — the JSON is the source of truth.
|
||||
|
||||
2. Single-workspace fallback — ``WORKSPACE_ID`` env var + token from
|
||||
``MOLECULE_WORKSPACE_TOKEN`` or ``${CONFIGS_DIR}/.auth_token``.
|
||||
This is the pre-existing path; back-compat exact.
|
||||
2. Single-workspace fallback — ``WORKSPACE_ID`` env var + token
|
||||
resolved in this order:
|
||||
a. ``MOLECULE_WORKSPACE_TOKEN`` (inline env — convenient but
|
||||
leaks into shell history + plaintext MCP-host config).
|
||||
b. ``MOLECULE_WORKSPACE_TOKEN_FILE`` (path to a file holding
|
||||
the token — operator can keep it 0600 in their home dir;
|
||||
survives shell-history scrubs).
|
||||
c. ``${CONFIGS_DIR}/.auth_token`` (in-container runtimes —
|
||||
the platform writes this on provision).
|
||||
|
||||
Returns ``(workspaces, errors)``:
|
||||
* ``workspaces``: list of ``(workspace_id, token)`` — non-empty
|
||||
@@ -98,16 +104,94 @@ def resolve_workspaces() -> tuple[list[tuple[str, str]], list[str]]:
|
||||
wsid = os.environ.get("WORKSPACE_ID", "").strip()
|
||||
if not wsid:
|
||||
return [], ["WORKSPACE_ID (or MOLECULE_WORKSPACES) is required"]
|
||||
# Token resolution order (#2934): inline env → file path → CONFIGS_DIR
|
||||
# default. The file-path option exists so operators can keep the
|
||||
# bearer out of shell history and out of MCP-host config plaintext
|
||||
# (e.g. ~/.claude.json) — set MOLECULE_WORKSPACE_TOKEN_FILE to a
|
||||
# 0600 file containing the token. The CONFIGS_DIR/.auth_token
|
||||
# fallback predates this and stays for in-container runtimes.
|
||||
tok = os.environ.get("MOLECULE_WORKSPACE_TOKEN", "").strip()
|
||||
if not tok:
|
||||
tok, tf_err = _read_token_from_file_env()
|
||||
if tf_err:
|
||||
# Operator explicitly pointed TOKEN_FILE somewhere — surface
|
||||
# the SPECIFIC failure (path doesn't exist, isn't readable,
|
||||
# or holds a blank file) instead of falling through to the
|
||||
# generic "set one of these three vars" message. Otherwise
|
||||
# they get exactly the silent failure mode #2934 flagged
|
||||
# ("a new user has no chance"). Skip the CONFIGS_DIR
|
||||
# fallback in this case — the operator's intent is clearly
|
||||
# to use the file path; deferring to a different source
|
||||
# would mask their config error.
|
||||
return [], [tf_err]
|
||||
if not tok:
|
||||
tok = read_token_file()
|
||||
if not tok:
|
||||
return [], [
|
||||
"MOLECULE_WORKSPACE_TOKEN (or CONFIGS_DIR/.auth_token) is required"
|
||||
"MOLECULE_WORKSPACE_TOKEN, MOLECULE_WORKSPACE_TOKEN_FILE, or "
|
||||
"CONFIGS_DIR/.auth_token is required"
|
||||
]
|
||||
return [(wsid, tok)], []
|
||||
|
||||
|
||||
def _read_token_from_file_env() -> tuple[str, str]:
|
||||
"""Read the token from the file path in MOLECULE_WORKSPACE_TOKEN_FILE.
|
||||
|
||||
Returns ``(token, error)``:
|
||||
* env var unset/blank → ``("", "")`` — caller falls through silently
|
||||
to the next source; the operator didn't ask for this path.
|
||||
* file open/read fails (missing, permission denied, decode error)
|
||||
→ ``("", "<specific error>")`` — caller surfaces it directly.
|
||||
The operator EXPLICITLY pointed at this path, so a generic
|
||||
fallthrough error would mask their config bug (#2934).
|
||||
* file is blank → ``("", "<blank file error>")`` — same reasoning.
|
||||
* file read returns junk with internal whitespace/newlines (e.g.
|
||||
a CSV cell, accidental multi-token paste) → ``("", "<error>")``
|
||||
rather than concatenating into a malformed bearer that 401s
|
||||
against the platform with no context.
|
||||
* happy path → ``("<token>", "")``.
|
||||
"""
|
||||
path = os.environ.get("MOLECULE_WORKSPACE_TOKEN_FILE", "").strip()
|
||||
if not path:
|
||||
return "", ""
|
||||
try:
|
||||
with open(path, encoding="utf-8") as fh:
|
||||
raw = fh.read()
|
||||
except FileNotFoundError:
|
||||
return "", (
|
||||
f"MOLECULE_WORKSPACE_TOKEN_FILE points to {path!r} which "
|
||||
f"does not exist"
|
||||
)
|
||||
except PermissionError:
|
||||
return "", (
|
||||
f"MOLECULE_WORKSPACE_TOKEN_FILE={path!r} is not readable "
|
||||
f"(permission denied)"
|
||||
)
|
||||
except OSError as exc:
|
||||
return "", (
|
||||
f"MOLECULE_WORKSPACE_TOKEN_FILE={path!r} could not be read: "
|
||||
f"{exc}"
|
||||
)
|
||||
except UnicodeDecodeError:
|
||||
return "", (
|
||||
f"MOLECULE_WORKSPACE_TOKEN_FILE={path!r} is not valid UTF-8"
|
||||
)
|
||||
tok = raw.strip()
|
||||
if not tok:
|
||||
return "", (
|
||||
f"MOLECULE_WORKSPACE_TOKEN_FILE={path!r} is empty"
|
||||
)
|
||||
# Reject tokens with internal whitespace — a CSV cell or accidental
|
||||
# multi-token paste would otherwise become a malformed bearer that
|
||||
# 401s against the platform with no diagnostic.
|
||||
if any(ch.isspace() for ch in tok):
|
||||
return "", (
|
||||
f"MOLECULE_WORKSPACE_TOKEN_FILE={path!r} contains internal "
|
||||
f"whitespace — expected a single token"
|
||||
)
|
||||
return tok, ""
|
||||
|
||||
|
||||
def print_missing_env_help(missing: list[str], have_token_file: bool) -> None:
|
||||
print("molecule-mcp: missing required environment.\n", file=sys.stderr)
|
||||
print("Set the following before running molecule-mcp:", file=sys.stderr)
|
||||
@@ -123,6 +207,16 @@ def print_missing_env_help(missing: list[str], have_token_file: bool) -> None:
|
||||
"(canvas → Tokens tab)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
" OR set MOLECULE_WORKSPACE_TOKEN_FILE"
|
||||
" to a path that holds the token",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
" (keeps the secret out of shell"
|
||||
" history and MCP-host config plaintext)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print("", file=sys.stderr)
|
||||
print(f"Currently missing: {', '.join(missing)}", file=sys.stderr)
|
||||
|
||||
|
||||
@@ -241,7 +241,7 @@ class TestToolListPeersAggregation:
|
||||
return [{"id": "2222bbbb-2222-2222-2222-222222222222", "name": "bob", "status": "online", "role": "dev"}], None
|
||||
return [], None
|
||||
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
output = await a2a_tools.tool_list_peers()
|
||||
|
||||
assert "alice" in output
|
||||
@@ -263,7 +263,7 @@ class TestToolListPeersAggregation:
|
||||
assert source_workspace_id == a2a_client.WORKSPACE_ID
|
||||
return [{"id": "1111aaaa-1111-1111-1111-111111111111", "name": "alice", "status": "online", "role": "ops"}], None
|
||||
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
output = await a2a_tools.tool_list_peers()
|
||||
|
||||
assert "alice" in output
|
||||
@@ -286,7 +286,7 @@ class TestToolListPeersAggregation:
|
||||
seen.append(source_workspace_id)
|
||||
return [{"id": "1111aaaa-1111-1111-1111-111111111111", "name": "alice", "status": "online", "role": "ops"}], None
|
||||
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
output = await a2a_tools.tool_list_peers(source_workspace_id=ws_a)
|
||||
|
||||
assert seen == [ws_a]
|
||||
@@ -309,7 +309,7 @@ class TestToolListPeersAggregation:
|
||||
return [], "auth failed"
|
||||
return [], "platform 5xx"
|
||||
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
out = await a2a_tools.tool_list_peers()
|
||||
|
||||
assert "[aaaa1111] auth failed" in out
|
||||
|
||||
@@ -453,14 +453,14 @@ class TestToolSendMessageToUser:
|
||||
async def test_success_200_returns_sent_message(self):
|
||||
import a2a_tools
|
||||
mc = _make_http_mock(post_resp=_resp(200, {}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
result = await a2a_tools.tool_send_message_to_user("Hello user!")
|
||||
assert result == "Message sent to user"
|
||||
|
||||
async def test_non_200_returns_status_code_in_error(self):
|
||||
import a2a_tools
|
||||
mc = _make_http_mock(post_resp=_resp(503, {}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
result = await a2a_tools.tool_send_message_to_user("Hello user!")
|
||||
assert "503" in result
|
||||
assert "Error" in result
|
||||
@@ -468,7 +468,7 @@ class TestToolSendMessageToUser:
|
||||
async def test_exception_returns_error_message(self):
|
||||
import a2a_tools
|
||||
mc = _make_http_mock(post_exc=RuntimeError("platform unreachable"))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
result = await a2a_tools.tool_send_message_to_user("Hi!")
|
||||
assert "Error sending message" in result
|
||||
assert "platform unreachable" in result
|
||||
@@ -495,7 +495,7 @@ class TestToolSendMessageToUser:
|
||||
mc = _make_http_mock(post_resp=notify_resp)
|
||||
mc.post = AsyncMock(side_effect=[upload_resp, notify_resp])
|
||||
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
result = await a2a_tools.tool_send_message_to_user(
|
||||
"Done — see attached.",
|
||||
attachments=[str(f)],
|
||||
@@ -523,7 +523,7 @@ class TestToolSendMessageToUser:
|
||||
# with a half-rendered attachment chip.
|
||||
import a2a_tools
|
||||
mc = _make_http_mock()
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
result = await a2a_tools.tool_send_message_to_user(
|
||||
"Hi", attachments=["/no/such/file.zip"],
|
||||
)
|
||||
@@ -541,7 +541,7 @@ class TestToolSendMessageToUser:
|
||||
mc = _make_http_mock()
|
||||
mc.post = AsyncMock(return_value=upload_resp)
|
||||
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
result = await a2a_tools.tool_send_message_to_user(
|
||||
"Hi", attachments=[str(f)],
|
||||
)
|
||||
@@ -555,7 +555,7 @@ class TestToolSendMessageToUser:
|
||||
# an `attachments` field added to the notify body.
|
||||
import a2a_tools
|
||||
mc = _make_http_mock(post_resp=_resp(200, {}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
await a2a_tools.tool_send_message_to_user("plain text")
|
||||
body = mc.post.await_args.kwargs.get("json") or {}
|
||||
assert body == {"message": "plain text"}
|
||||
@@ -570,7 +570,7 @@ class TestToolListPeers:
|
||||
async def test_true_empty_returns_no_peers_message_without_diagnostic(self):
|
||||
"""200 + empty list → 'no peers in the platform registry' (no failure)."""
|
||||
import a2a_tools
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", return_value=([], None)):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", return_value=([], None)):
|
||||
result = await a2a_tools.tool_list_peers()
|
||||
# The new wording explicitly says no peers exist (no parent/sibling/child).
|
||||
# Avoids the misleading "may be isolated" hint when discovery succeeded.
|
||||
@@ -582,7 +582,7 @@ class TestToolListPeers:
|
||||
"""401/403 → tool_list_peers must surface the auth failure + restart hint, not 'isolated'."""
|
||||
import a2a_tools
|
||||
diag = "Authentication to platform failed (HTTP 401). Restart the workspace to re-mint."
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", return_value=([], diag)):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", return_value=([], diag)):
|
||||
result = await a2a_tools.tool_list_peers()
|
||||
assert "401" in result
|
||||
assert "Authentication" in result
|
||||
@@ -593,7 +593,7 @@ class TestToolListPeers:
|
||||
"""404 → tool_list_peers tells the user re-registration is needed."""
|
||||
import a2a_tools
|
||||
diag = "Workspace ID ws-test is not registered with the platform (HTTP 404). Re-register."
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", return_value=([], diag)):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", return_value=([], diag)):
|
||||
result = await a2a_tools.tool_list_peers()
|
||||
assert "404" in result
|
||||
assert "registered" in result.lower()
|
||||
@@ -602,7 +602,7 @@ class TestToolListPeers:
|
||||
"""5xx → 'Platform error' surfaced; agent / user can correctly route to oncall."""
|
||||
import a2a_tools
|
||||
diag = "Platform error: HTTP 503."
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", return_value=([], diag)):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", return_value=([], diag)):
|
||||
result = await a2a_tools.tool_list_peers()
|
||||
assert "503" in result
|
||||
assert "Platform error" in result
|
||||
@@ -611,7 +611,7 @@ class TestToolListPeers:
|
||||
"""Network error → operator can tell that the workspace can't reach the platform at all."""
|
||||
import a2a_tools
|
||||
diag = "Cannot reach platform at http://platform.example: timed out"
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", return_value=([], diag)):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", return_value=([], diag)):
|
||||
result = await a2a_tools.tool_list_peers()
|
||||
assert "Cannot reach platform" in result
|
||||
assert "timed out" in result
|
||||
@@ -624,7 +624,7 @@ class TestToolListPeers:
|
||||
{"id": "ws-1", "name": "Alpha", "status": "online", "role": "worker"},
|
||||
{"id": "ws-2", "name": "Beta", "status": "idle", "role": "analyst"},
|
||||
]
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", return_value=(peers, None)):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", return_value=(peers, None)):
|
||||
result = await a2a_tools.tool_list_peers()
|
||||
|
||||
assert "Alpha" in result
|
||||
@@ -641,7 +641,7 @@ class TestToolListPeers:
|
||||
# Clear any prior cache entries for these IDs
|
||||
a2a_tools._peer_names.pop("ws-cache-test", None)
|
||||
peers = [{"id": "ws-cache-test", "name": "CacheMe", "status": "online", "role": "w"}]
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", return_value=(peers, None)):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", return_value=(peers, None)):
|
||||
await a2a_tools.tool_list_peers()
|
||||
|
||||
assert a2a_tools._peer_names.get("ws-cache-test") == "CacheMe"
|
||||
@@ -651,7 +651,7 @@ class TestToolListPeers:
|
||||
import a2a_tools
|
||||
|
||||
peers = [{"id": "ws-3", "name": "Gamma"}] # no status, no role
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", return_value=(peers, None)):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", return_value=(peers, None)):
|
||||
result = await a2a_tools.tool_list_peers()
|
||||
|
||||
assert "Gamma" in result
|
||||
@@ -669,7 +669,7 @@ class TestToolGetWorkspaceInfo:
|
||||
import a2a_tools
|
||||
|
||||
info = {"id": "ws-test", "name": "My Workspace", "status": "online"}
|
||||
with patch("a2a_tools.get_workspace_info", return_value=info):
|
||||
with patch("a2a_tools_messaging.get_workspace_info", return_value=info):
|
||||
result = await a2a_tools.tool_get_workspace_info()
|
||||
|
||||
parsed = json.loads(result)
|
||||
@@ -678,7 +678,7 @@ class TestToolGetWorkspaceInfo:
|
||||
async def test_returns_error_dict_as_json(self):
|
||||
import a2a_tools
|
||||
|
||||
with patch("a2a_tools.get_workspace_info", return_value={"error": "not found"}):
|
||||
with patch("a2a_tools_messaging.get_workspace_info", return_value={"error": "not found"}):
|
||||
result = await a2a_tools.tool_get_workspace_info()
|
||||
|
||||
parsed = json.loads(result)
|
||||
@@ -702,9 +702,9 @@ class TestToolCommitMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(post_resp=_resp(201, {"id": "mem-1"}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools._is_root_workspace", return_value=False):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools_memory._is_root_workspace", return_value=False):
|
||||
result = await a2a_tools.tool_commit_memory("Remember this", scope="local")
|
||||
|
||||
data = json.loads(result)
|
||||
@@ -716,9 +716,9 @@ class TestToolCommitMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(post_resp=_resp(200, {"id": "mem-2"}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools._is_root_workspace", return_value=False):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools_memory._is_root_workspace", return_value=False):
|
||||
result = await a2a_tools.tool_commit_memory("Remember this", scope="INVALID")
|
||||
|
||||
data = json.loads(result)
|
||||
@@ -728,9 +728,9 @@ class TestToolCommitMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(post_resp=_resp(200, {"id": "mem-3"}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools._is_root_workspace", return_value=False):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools_memory._is_root_workspace", return_value=False):
|
||||
result = await a2a_tools.tool_commit_memory("Team info", scope="TEAM")
|
||||
|
||||
data = json.loads(result)
|
||||
@@ -741,9 +741,9 @@ class TestToolCommitMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(post_resp=_resp(201, {"id": "mem-4"}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools._is_root_workspace", return_value=True):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools_memory._is_root_workspace", return_value=True):
|
||||
result = await a2a_tools.tool_commit_memory("Global info", scope="GLOBAL")
|
||||
|
||||
data = json.loads(result)
|
||||
@@ -753,9 +753,9 @@ class TestToolCommitMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(post_resp=_resp(200, {"id": "mem-5"}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools._is_root_workspace", return_value=False):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools_memory._is_root_workspace", return_value=False):
|
||||
result = await a2a_tools.tool_commit_memory("info")
|
||||
|
||||
data = json.loads(result)
|
||||
@@ -766,9 +766,9 @@ class TestToolCommitMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(post_resp=_resp(201, {"id": "mem-6"}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools._is_root_workspace", return_value=False):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools_memory._is_root_workspace", return_value=False):
|
||||
result = await a2a_tools.tool_commit_memory("info")
|
||||
|
||||
data = json.loads(result)
|
||||
@@ -779,9 +779,9 @@ class TestToolCommitMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(post_resp=_resp(400, {"error": "bad request payload"}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools._is_root_workspace", return_value=False):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools_memory._is_root_workspace", return_value=False):
|
||||
result = await a2a_tools.tool_commit_memory("info")
|
||||
|
||||
assert "Error" in result
|
||||
@@ -791,9 +791,9 @@ class TestToolCommitMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(post_exc=RuntimeError("storage failure"))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools._is_root_workspace", return_value=False):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools_memory._is_root_workspace", return_value=False):
|
||||
result = await a2a_tools.tool_commit_memory("info")
|
||||
|
||||
assert "Error saving memory" in result
|
||||
@@ -808,9 +808,9 @@ class TestToolCommitMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(post_resp=_resp(201, {"id": "mem-poison"}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools._is_root_workspace", return_value=False):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools_memory._is_root_workspace", return_value=False):
|
||||
result = await a2a_tools.tool_commit_memory("poisoned GLOBAL memory", scope="GLOBAL")
|
||||
|
||||
# Must NOT have called the platform — early rejection
|
||||
@@ -824,9 +824,9 @@ class TestToolCommitMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(post_resp=_resp(201, {"id": "mem-7"}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_write_permission", return_value=False), \
|
||||
patch("a2a_tools._is_root_workspace", return_value=False):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_write_permission", return_value=False), \
|
||||
patch("a2a_tools_memory._is_root_workspace", return_value=False):
|
||||
result = await a2a_tools.tool_commit_memory("should be denied", scope="LOCAL")
|
||||
|
||||
mc.post.assert_not_called()
|
||||
@@ -838,9 +838,9 @@ class TestToolCommitMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(post_resp=_resp(201, {"id": "mem-8"}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools._is_root_workspace", return_value=False):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_write_permission", return_value=True), \
|
||||
patch("a2a_tools_memory._is_root_workspace", return_value=False):
|
||||
await a2a_tools.tool_commit_memory("test content", scope="LOCAL")
|
||||
|
||||
call_kwargs = mc.post.call_args.kwargs
|
||||
@@ -865,8 +865,8 @@ class TestToolRecallMemory:
|
||||
{"scope": "TEAM", "content": "We use Python 3.11"},
|
||||
]
|
||||
mc = _make_http_mock(get_resp=_resp(200, memories))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_read_permission", return_value=True):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_read_permission", return_value=True):
|
||||
result = await a2a_tools.tool_recall_memory(query="capital")
|
||||
|
||||
assert "[LOCAL]" in result
|
||||
@@ -878,8 +878,8 @@ class TestToolRecallMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_resp=_resp(200, []))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_read_permission", return_value=True):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_read_permission", return_value=True):
|
||||
result = await a2a_tools.tool_recall_memory(query="anything")
|
||||
|
||||
assert result == "No memories found."
|
||||
@@ -890,8 +890,8 @@ class TestToolRecallMemory:
|
||||
|
||||
payload = {"error": "search unavailable"}
|
||||
mc = _make_http_mock(get_resp=_resp(200, payload))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_read_permission", return_value=True):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_read_permission", return_value=True):
|
||||
result = await a2a_tools.tool_recall_memory()
|
||||
|
||||
parsed = json.loads(result)
|
||||
@@ -901,8 +901,8 @@ class TestToolRecallMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_exc=RuntimeError("search service down"))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_read_permission", return_value=True):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_read_permission", return_value=True):
|
||||
result = await a2a_tools.tool_recall_memory(query="test")
|
||||
|
||||
assert "Error recalling memory" in result
|
||||
@@ -913,8 +913,8 @@ class TestToolRecallMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_resp=_resp(200, []))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_read_permission", return_value=True):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_read_permission", return_value=True):
|
||||
await a2a_tools.tool_recall_memory(query="paris", scope="local")
|
||||
|
||||
call_kwargs = mc.get.call_args.kwargs
|
||||
@@ -928,8 +928,8 @@ class TestToolRecallMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_resp=_resp(200, []))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_read_permission", return_value=True):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_read_permission", return_value=True):
|
||||
await a2a_tools.tool_recall_memory()
|
||||
|
||||
call_kwargs = mc.get.call_args.kwargs
|
||||
@@ -942,8 +942,8 @@ class TestToolRecallMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_resp=_resp(200, []))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_read_permission", return_value=True):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_read_permission", return_value=True):
|
||||
await a2a_tools.tool_recall_memory(scope="team")
|
||||
|
||||
call_kwargs = mc.get.call_args.kwargs
|
||||
@@ -960,8 +960,8 @@ class TestToolRecallMemory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_resp=_resp(200, [{"scope": "GLOBAL", "content": "secret"}]))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools._check_memory_read_permission", return_value=False):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=mc), \
|
||||
patch("a2a_tools_memory._check_memory_read_permission", return_value=False):
|
||||
result = await a2a_tools.tool_recall_memory(query="secret")
|
||||
|
||||
mc.get.assert_not_called()
|
||||
@@ -994,7 +994,7 @@ class TestChatHistory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock()
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
result = await a2a_tools.tool_chat_history(peer_id="")
|
||||
|
||||
mc.get.assert_not_called()
|
||||
@@ -1006,7 +1006,7 @@ class TestChatHistory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_resp=_resp(200, []))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
await a2a_tools.tool_chat_history(peer_id=_PEER)
|
||||
|
||||
url, kwargs = mc.get.call_args.args[0], mc.get.call_args.kwargs
|
||||
@@ -1023,7 +1023,7 @@ class TestChatHistory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_resp=_resp(200, []))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
await a2a_tools.tool_chat_history(peer_id=_PEER, limit=10000)
|
||||
|
||||
params = mc.get.call_args.kwargs["params"]
|
||||
@@ -1035,7 +1035,7 @@ class TestChatHistory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_resp=_resp(200, []))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
await a2a_tools.tool_chat_history(peer_id=_PEER, limit=0)
|
||||
|
||||
assert mc.get.call_args.kwargs["params"]["limit"] == "20"
|
||||
@@ -1044,7 +1044,7 @@ class TestChatHistory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_resp=_resp(200, []))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
await a2a_tools.tool_chat_history(
|
||||
peer_id=_PEER, before_ts="2026-05-01T00:00:00Z",
|
||||
)
|
||||
@@ -1063,7 +1063,7 @@ class TestChatHistory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_resp=_resp(200, []))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
result = await a2a_tools.tool_chat_history(peer_id=_PEER)
|
||||
|
||||
# Exact-equality on the JSON literal (per assert-exact memory) —
|
||||
@@ -1084,7 +1084,7 @@ class TestChatHistory:
|
||||
{"id": "act-1", "created_at": "2026-05-01T00:01:00Z"},
|
||||
]
|
||||
mc = _make_http_mock(get_resp=_resp(200, rows))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
result = await a2a_tools.tool_chat_history(peer_id=_PEER)
|
||||
|
||||
out = json.loads(result)
|
||||
@@ -1097,7 +1097,7 @@ class TestChatHistory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_resp=_resp(400, {"error": "peer_id must be a UUID"}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
result = await a2a_tools.tool_chat_history(peer_id="bad")
|
||||
|
||||
assert "peer_id must be a UUID" in result
|
||||
@@ -1108,7 +1108,7 @@ class TestChatHistory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_resp=_resp(500, {"error": "internal"}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
result = await a2a_tools.tool_chat_history(peer_id=_PEER)
|
||||
|
||||
assert result.startswith("Error:")
|
||||
@@ -1121,7 +1121,7 @@ class TestChatHistory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_exc=httpx.ConnectError("network down"))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
result = await a2a_tools.tool_chat_history(peer_id=_PEER)
|
||||
|
||||
assert result.startswith("Error:")
|
||||
@@ -1135,7 +1135,7 @@ class TestChatHistory:
|
||||
import a2a_tools
|
||||
|
||||
mc = _make_http_mock(get_resp=_resp(200, {"unexpected": "shape"}))
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=mc):
|
||||
with patch("a2a_tools_messaging.httpx.AsyncClient", return_value=mc):
|
||||
result = await a2a_tools.tool_chat_history(peer_id=_PEER)
|
||||
|
||||
assert result.startswith("Error:")
|
||||
|
||||
@@ -0,0 +1,150 @@
|
||||
"""Tests for `_enrich_inbound_for_agent` — the poll-path companion to
|
||||
the push-path enrichment in `a2a_mcp_server._build_channel_notification`.
|
||||
|
||||
The MCP poll path (inbox_peek / wait_for_message) returns
|
||||
`InboxMessage.to_dict()`, which has `activity_id, text, peer_id, kind,
|
||||
method, created_at` but NOT the registry-resolved `peer_name`,
|
||||
`peer_role`, or `agent_card_url`. The receiving agent then sees a
|
||||
plain message and can't tell who's writing — breaking the universal
|
||||
contract documented in `a2a_mcp_server.py:303-345` ("In both paths
|
||||
the same fields apply").
|
||||
|
||||
The enrichment helper closes that gap. These tests pin:
|
||||
- canvas_user (peer_id="") passes through unchanged
|
||||
- peer_agent with cache hit gets peer_name + peer_role + agent_card_url
|
||||
- peer_agent with cache miss still gets agent_card_url (constructable
|
||||
from peer_id alone)
|
||||
- a2a_client unavailable (test harness without registry) degrades
|
||||
gracefully — agent still gets the bare envelope
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
# a2a_client.py reads WORKSPACE_ID at import time and raises if it's
|
||||
# unset. Stamp a stub before any test pulls in a2a_tools (which transitively
|
||||
# imports a2a_client). conftest.py mocks the SDK but not this env var.
|
||||
os.environ.setdefault("WORKSPACE_ID", "00000000-0000-0000-0000-000000000001")
|
||||
|
||||
import sys
|
||||
import types
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
PEER_UUID = "11111111-2222-3333-4444-555555555555"
|
||||
|
||||
|
||||
def test_canvas_user_passes_through_unchanged():
|
||||
from a2a_tools import _enrich_inbound_for_agent
|
||||
|
||||
base = {
|
||||
"activity_id": "act-1",
|
||||
"text": "hello from canvas",
|
||||
"peer_id": "",
|
||||
"kind": "canvas_user",
|
||||
"method": "message/send",
|
||||
"created_at": "2026-05-05T11:00:00Z",
|
||||
}
|
||||
|
||||
out = _enrich_inbound_for_agent(dict(base))
|
||||
|
||||
# Plain pass-through — no enrichment fields added for canvas_user.
|
||||
assert out == base
|
||||
assert "peer_name" not in out
|
||||
assert "peer_role" not in out
|
||||
assert "agent_card_url" not in out
|
||||
|
||||
|
||||
def test_peer_agent_cache_hit_adds_name_role_and_card_url():
|
||||
from a2a_tools import _enrich_inbound_for_agent
|
||||
|
||||
record = {"name": "ops-agent", "role": "sre"}
|
||||
card_url = f"https://platform.example/registry/{PEER_UUID}/agent-card"
|
||||
|
||||
with patch(
|
||||
"a2a_client.enrich_peer_metadata_nonblocking",
|
||||
return_value=record,
|
||||
), patch(
|
||||
"a2a_client._agent_card_url_for",
|
||||
return_value=card_url,
|
||||
):
|
||||
out = _enrich_inbound_for_agent({
|
||||
"activity_id": "act-2",
|
||||
"text": "ping",
|
||||
"peer_id": PEER_UUID,
|
||||
"kind": "peer_agent",
|
||||
"method": "message/send",
|
||||
"created_at": "2026-05-05T11:01:00Z",
|
||||
})
|
||||
|
||||
assert out["peer_name"] == "ops-agent"
|
||||
assert out["peer_role"] == "sre"
|
||||
assert out["agent_card_url"] == card_url
|
||||
|
||||
|
||||
def test_peer_agent_cache_miss_still_gets_agent_card_url():
|
||||
"""agent_card_url is constructable from peer_id alone — surface it
|
||||
even when registry enrichment misses, so the receiving agent has a
|
||||
single endpoint to hit for the peer's full capability list."""
|
||||
from a2a_tools import _enrich_inbound_for_agent
|
||||
|
||||
card_url = f"https://platform.example/registry/{PEER_UUID}/agent-card"
|
||||
|
||||
with patch(
|
||||
"a2a_client.enrich_peer_metadata_nonblocking",
|
||||
return_value=None, # cache miss
|
||||
), patch(
|
||||
"a2a_client._agent_card_url_for",
|
||||
return_value=card_url,
|
||||
):
|
||||
out = _enrich_inbound_for_agent({
|
||||
"activity_id": "act-3",
|
||||
"text": "ping",
|
||||
"peer_id": PEER_UUID,
|
||||
"kind": "peer_agent",
|
||||
"method": "message/send",
|
||||
"created_at": "2026-05-05T11:02:00Z",
|
||||
})
|
||||
|
||||
assert "peer_name" not in out
|
||||
assert "peer_role" not in out
|
||||
assert out["agent_card_url"] == card_url
|
||||
|
||||
|
||||
def test_peer_agent_a2a_client_unavailable_degrades_gracefully(monkeypatch):
|
||||
"""If a2a_client can't be imported (test harness, partial install),
|
||||
return the bare envelope — agent still gets text + peer_id + kind +
|
||||
activity_id, just without the friendly identity."""
|
||||
from a2a_tools import _enrich_inbound_for_agent
|
||||
|
||||
# Stub a2a_client import to fail.
|
||||
real_module = sys.modules.pop("a2a_client", None)
|
||||
fake = types.ModuleType("a2a_client")
|
||||
# Deliberately omit enrich_peer_metadata_nonblocking and
|
||||
# _agent_card_url_for so the helper's fallback path fires.
|
||||
sys.modules["a2a_client"] = fake
|
||||
|
||||
try:
|
||||
out = _enrich_inbound_for_agent({
|
||||
"activity_id": "act-4",
|
||||
"text": "ping",
|
||||
"peer_id": PEER_UUID,
|
||||
"kind": "peer_agent",
|
||||
"method": "message/send",
|
||||
"created_at": "2026-05-05T11:03:00Z",
|
||||
})
|
||||
finally:
|
||||
if real_module is not None:
|
||||
sys.modules["a2a_client"] = real_module
|
||||
else:
|
||||
sys.modules.pop("a2a_client", None)
|
||||
|
||||
# Bare envelope passes through — receiving agent still has enough
|
||||
# to act, even if the friendly identity is missing.
|
||||
assert out["peer_id"] == PEER_UUID
|
||||
assert out["text"] == "ping"
|
||||
assert out["kind"] == "peer_agent"
|
||||
assert "peer_name" not in out
|
||||
assert "peer_role" not in out
|
||||
assert "agent_card_url" not in out
|
||||
@@ -0,0 +1,181 @@
|
||||
"""Drift gate + import-contract tests for ``a2a_tools_inbox`` (RFC #2873 iter 4e).
|
||||
|
||||
The full behavior matrix for the three inbox tool wrappers lives in
|
||||
``test_a2a_tools_inbox_wrappers.py`` (kept on the public ``a2a_tools``
|
||||
module so the same tests pin both the alias and the underlying impl).
|
||||
|
||||
This file pins:
|
||||
|
||||
1. **Drift gate** — every previously-public symbol on ``a2a_tools``
|
||||
(``tool_inbox_peek``, ``tool_inbox_pop``, ``tool_wait_for_message``,
|
||||
``_enrich_inbound_for_agent``, ``_INBOX_NOT_ENABLED_MSG``) is the
|
||||
EXACT same object as ``a2a_tools_inbox.foo``. Refactor wrapping
|
||||
silently loses existing test coverage; this gate makes that drift
|
||||
fail fast.
|
||||
2. **Import contract** — ``a2a_tools_inbox`` does NOT pull in
|
||||
``a2a_tools`` at module-load time (the layered architecture: it
|
||||
depends only on stdlib + a lazy import of ``inbox`` + a lazy
|
||||
import of ``a2a_client``, never the kitchen-sink module that
|
||||
re-exports it).
|
||||
3. **_enrich_inbound_for_agent** branches that the wrapper tests
|
||||
can't easily reach: peer_id-empty (canvas_user) returns the
|
||||
dict unchanged; a2a_client unavailable degrades gracefully.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _require_workspace_id(monkeypatch):
|
||||
monkeypatch.setenv("WORKSPACE_ID", "00000000-0000-0000-0000-000000000000")
|
||||
monkeypatch.setenv("PLATFORM_URL", "http://test.invalid")
|
||||
yield
|
||||
|
||||
|
||||
# ============== Drift gate ==============
|
||||
|
||||
class TestBackCompatAliases:
|
||||
def test_tool_inbox_peek_alias(self):
|
||||
import a2a_tools
|
||||
import a2a_tools_inbox
|
||||
assert a2a_tools.tool_inbox_peek is a2a_tools_inbox.tool_inbox_peek
|
||||
|
||||
def test_tool_inbox_pop_alias(self):
|
||||
import a2a_tools
|
||||
import a2a_tools_inbox
|
||||
assert a2a_tools.tool_inbox_pop is a2a_tools_inbox.tool_inbox_pop
|
||||
|
||||
def test_tool_wait_for_message_alias(self):
|
||||
import a2a_tools
|
||||
import a2a_tools_inbox
|
||||
assert (
|
||||
a2a_tools.tool_wait_for_message is a2a_tools_inbox.tool_wait_for_message
|
||||
)
|
||||
|
||||
def test_enrich_helper_alias(self):
|
||||
import a2a_tools
|
||||
import a2a_tools_inbox
|
||||
assert (
|
||||
a2a_tools._enrich_inbound_for_agent
|
||||
is a2a_tools_inbox._enrich_inbound_for_agent
|
||||
)
|
||||
|
||||
def test_inbox_not_enabled_msg_alias(self):
|
||||
import a2a_tools
|
||||
import a2a_tools_inbox
|
||||
assert (
|
||||
a2a_tools._INBOX_NOT_ENABLED_MSG is a2a_tools_inbox._INBOX_NOT_ENABLED_MSG
|
||||
)
|
||||
|
||||
|
||||
# ============== Import contract ==============
|
||||
|
||||
class TestImportContract:
|
||||
def test_inbox_module_does_not_import_a2a_tools_eagerly(self):
|
||||
# Force a fresh load of a2a_tools_inbox without a2a_tools in sight.
|
||||
for k in [k for k in list(sys.modules) if k in (
|
||||
"a2a_tools_inbox", "a2a_tools",
|
||||
)]:
|
||||
sys.modules.pop(k, None)
|
||||
import a2a_tools_inbox # noqa: F401 — load only
|
||||
|
||||
# a2a_tools_inbox MUST NOT have caused a2a_tools to load. The
|
||||
# extracted module sits BELOW the kitchen-sink in the layering;
|
||||
# the dependency arrow points the other direction.
|
||||
assert "a2a_tools" not in sys.modules, (
|
||||
"a2a_tools_inbox eagerly imported a2a_tools — the kitchen-sink "
|
||||
"module must not be a load-time dependency of its slices."
|
||||
)
|
||||
|
||||
|
||||
# ============== _enrich_inbound_for_agent branches ==============
|
||||
|
||||
class TestEnrichInboundForAgent:
|
||||
def test_canvas_user_returns_dict_unchanged(self):
|
||||
# peer_id empty → canvas_user → no enrichment, no a2a_client touch.
|
||||
from a2a_tools_inbox import _enrich_inbound_for_agent
|
||||
|
||||
msg = {"activity_id": "a-1", "kind": "canvas_user", "peer_id": ""}
|
||||
result = _enrich_inbound_for_agent(msg)
|
||||
assert result is msg # same dict, mutated in place if at all
|
||||
assert "peer_name" not in result
|
||||
assert "peer_role" not in result
|
||||
assert "agent_card_url" not in result
|
||||
|
||||
def test_missing_peer_id_key_returns_unchanged(self):
|
||||
from a2a_tools_inbox import _enrich_inbound_for_agent
|
||||
|
||||
msg = {"activity_id": "a-2", "kind": "canvas_user"} # no peer_id key
|
||||
result = _enrich_inbound_for_agent(msg)
|
||||
assert result is msg
|
||||
assert "agent_card_url" not in result
|
||||
|
||||
def test_a2a_client_unavailable_degrades_gracefully(self, monkeypatch):
|
||||
# Simulate a2a_client import failing (test harness, partial
|
||||
# install). The helper must return the bare envelope, not raise.
|
||||
from a2a_tools_inbox import _enrich_inbound_for_agent
|
||||
|
||||
# Force an ImportError by poisoning sys.modules.
|
||||
import builtins
|
||||
real_import = builtins.__import__
|
||||
|
||||
def fake_import(name, *args, **kwargs):
|
||||
if name == "a2a_client":
|
||||
raise ImportError("simulated a2a_client unavailable")
|
||||
return real_import(name, *args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(builtins, "__import__", fake_import)
|
||||
|
||||
msg = {"activity_id": "a-3", "kind": "peer_agent", "peer_id": "ws-x"}
|
||||
result = _enrich_inbound_for_agent(msg)
|
||||
# Bare envelope back — no peer_name, no agent_card_url. Crucially
|
||||
# the helper did NOT raise, so the inbox tool surfaces the message
|
||||
# to the agent even when the registry is unreachable.
|
||||
assert result is msg
|
||||
assert "peer_name" not in result
|
||||
assert "agent_card_url" not in result
|
||||
|
||||
def test_registry_record_populates_peer_name_and_role(self, monkeypatch):
|
||||
from a2a_tools_inbox import _enrich_inbound_for_agent
|
||||
|
||||
# Stub out the lazy-imported a2a_client functions.
|
||||
import sys
|
||||
import types
|
||||
fake_a2a_client = types.SimpleNamespace(
|
||||
_agent_card_url_for=lambda pid: f"http://test/agent/{pid}",
|
||||
enrich_peer_metadata_nonblocking=lambda pid: {
|
||||
"name": "PeerOne",
|
||||
"role": "worker",
|
||||
},
|
||||
)
|
||||
monkeypatch.setitem(sys.modules, "a2a_client", fake_a2a_client)
|
||||
|
||||
msg = {"activity_id": "a-4", "kind": "peer_agent", "peer_id": "ws-1"}
|
||||
result = _enrich_inbound_for_agent(msg)
|
||||
assert result["peer_name"] == "PeerOne"
|
||||
assert result["peer_role"] == "worker"
|
||||
assert result["agent_card_url"] == "http://test/agent/ws-1"
|
||||
|
||||
def test_registry_miss_keeps_agent_card_url(self, monkeypatch):
|
||||
# On registry cache miss the helper still surfaces agent_card_url
|
||||
# because it's constructable from peer_id alone — preserves the
|
||||
# contract that the receiving agent always has somewhere to
|
||||
# fetch the peer's full capability list.
|
||||
from a2a_tools_inbox import _enrich_inbound_for_agent
|
||||
|
||||
import sys
|
||||
import types
|
||||
fake_a2a_client = types.SimpleNamespace(
|
||||
_agent_card_url_for=lambda pid: f"http://test/agent/{pid}",
|
||||
enrich_peer_metadata_nonblocking=lambda pid: None, # cache miss
|
||||
)
|
||||
monkeypatch.setitem(sys.modules, "a2a_client", fake_a2a_client)
|
||||
|
||||
msg = {"activity_id": "a-5", "kind": "peer_agent", "peer_id": "ws-2"}
|
||||
result = _enrich_inbound_for_agent(msg)
|
||||
assert "peer_name" not in result
|
||||
assert "peer_role" not in result
|
||||
assert result["agent_card_url"] == "http://test/agent/ws-2"
|
||||
@@ -0,0 +1,196 @@
|
||||
"""Direct unit tests for the three inbox tool wrappers in ``a2a_tools``.
|
||||
|
||||
After RFC #2873 iter 4d (messaging extraction), ``a2a_tools.py`` is
|
||||
mostly back-compat re-exports — the only behavior still defined here
|
||||
is ``report_activity`` plus three thin wrappers around the inbox state
|
||||
machine: ``tool_inbox_peek`` / ``tool_inbox_pop`` / ``tool_wait_for_message``.
|
||||
|
||||
These wrappers were never exercised at the module level, so the
|
||||
critical-path coverage gate (75% per-file floor for MCP/inbox/auth)
|
||||
dropped to 54% on iter 4d. This file pins each wrapper's behavior
|
||||
directly so the floor is met without changing the gate.
|
||||
|
||||
The wrappers are ~40 LOC of glue. The full delivery behavior
|
||||
(persistence, 410 recovery, etc.) is exercised in test_inbox.py.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _require_workspace_id(monkeypatch):
|
||||
monkeypatch.setenv("WORKSPACE_ID", "00000000-0000-0000-0000-000000000000")
|
||||
monkeypatch.setenv("PLATFORM_URL", "http://test.invalid")
|
||||
yield
|
||||
|
||||
|
||||
def _run(coro):
|
||||
return asyncio.get_event_loop().run_until_complete(coro)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# tool_inbox_peek
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestToolInboxPeek:
|
||||
def test_returns_not_enabled_when_state_none(self):
|
||||
import a2a_tools
|
||||
|
||||
with patch("inbox.get_state", return_value=None):
|
||||
out = _run(a2a_tools.tool_inbox_peek())
|
||||
assert "not enabled" in out
|
||||
|
||||
def test_returns_json_array_of_messages(self):
|
||||
import a2a_tools
|
||||
|
||||
msg1 = MagicMock()
|
||||
msg1.to_dict.return_value = {"activity_id": "a1", "kind": "canvas_user"}
|
||||
msg2 = MagicMock()
|
||||
msg2.to_dict.return_value = {"activity_id": "a2", "kind": "peer_agent"}
|
||||
|
||||
fake_state = MagicMock()
|
||||
fake_state.peek.return_value = [msg1, msg2]
|
||||
|
||||
with patch("inbox.get_state", return_value=fake_state):
|
||||
out = _run(a2a_tools.tool_inbox_peek(limit=5))
|
||||
# peek limit is forwarded
|
||||
fake_state.peek.assert_called_once_with(limit=5)
|
||||
parsed = json.loads(out)
|
||||
assert len(parsed) == 2
|
||||
assert parsed[0]["activity_id"] == "a1"
|
||||
|
||||
def test_non_int_limit_falls_back_to_10(self):
|
||||
import a2a_tools
|
||||
|
||||
fake_state = MagicMock()
|
||||
fake_state.peek.return_value = []
|
||||
with patch("inbox.get_state", return_value=fake_state):
|
||||
_run(a2a_tools.tool_inbox_peek(limit="garbage")) # type: ignore[arg-type]
|
||||
fake_state.peek.assert_called_once_with(limit=10)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# tool_inbox_pop
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestToolInboxPop:
|
||||
def test_returns_not_enabled_when_state_none(self):
|
||||
import a2a_tools
|
||||
|
||||
with patch("inbox.get_state", return_value=None):
|
||||
out = _run(a2a_tools.tool_inbox_pop("act-1"))
|
||||
assert "not enabled" in out
|
||||
|
||||
def test_rejects_empty_activity_id(self):
|
||||
import a2a_tools
|
||||
|
||||
fake_state = MagicMock()
|
||||
with patch("inbox.get_state", return_value=fake_state):
|
||||
out = _run(a2a_tools.tool_inbox_pop(""))
|
||||
assert "activity_id is required" in out
|
||||
fake_state.pop.assert_not_called()
|
||||
|
||||
def test_rejects_non_str_activity_id(self):
|
||||
import a2a_tools
|
||||
|
||||
fake_state = MagicMock()
|
||||
with patch("inbox.get_state", return_value=fake_state):
|
||||
out = _run(a2a_tools.tool_inbox_pop(123)) # type: ignore[arg-type]
|
||||
assert "activity_id is required" in out
|
||||
fake_state.pop.assert_not_called()
|
||||
|
||||
def test_returns_removed_true_when_popped(self):
|
||||
import a2a_tools
|
||||
|
||||
fake_state = MagicMock()
|
||||
fake_state.pop.return_value = MagicMock() # truthy = something was removed
|
||||
with patch("inbox.get_state", return_value=fake_state):
|
||||
out = _run(a2a_tools.tool_inbox_pop("act-7"))
|
||||
parsed = json.loads(out)
|
||||
assert parsed == {"removed": True, "activity_id": "act-7"}
|
||||
fake_state.pop.assert_called_once_with("act-7")
|
||||
|
||||
def test_returns_removed_false_when_unknown(self):
|
||||
import a2a_tools
|
||||
|
||||
fake_state = MagicMock()
|
||||
fake_state.pop.return_value = None
|
||||
with patch("inbox.get_state", return_value=fake_state):
|
||||
out = _run(a2a_tools.tool_inbox_pop("act-missing"))
|
||||
parsed = json.loads(out)
|
||||
assert parsed == {"removed": False, "activity_id": "act-missing"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# tool_wait_for_message
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestToolWaitForMessage:
|
||||
def test_returns_not_enabled_when_state_none(self):
|
||||
import a2a_tools
|
||||
|
||||
with patch("inbox.get_state", return_value=None):
|
||||
out = _run(a2a_tools.tool_wait_for_message(timeout_secs=1.0))
|
||||
assert "not enabled" in out
|
||||
|
||||
def test_timeout_payload_when_no_message(self):
|
||||
import a2a_tools
|
||||
|
||||
fake_state = MagicMock()
|
||||
fake_state.wait.return_value = None
|
||||
with patch("inbox.get_state", return_value=fake_state):
|
||||
out = _run(a2a_tools.tool_wait_for_message(timeout_secs=0.1))
|
||||
parsed = json.loads(out)
|
||||
assert parsed["timeout"] is True
|
||||
assert parsed["timeout_secs"] == 0.1
|
||||
|
||||
def test_returns_message_when_delivered(self):
|
||||
import a2a_tools
|
||||
|
||||
msg = MagicMock()
|
||||
msg.to_dict.return_value = {"activity_id": "a-9", "kind": "peer_agent"}
|
||||
fake_state = MagicMock()
|
||||
fake_state.wait.return_value = msg
|
||||
with patch("inbox.get_state", return_value=fake_state):
|
||||
out = _run(a2a_tools.tool_wait_for_message(timeout_secs=2.0))
|
||||
parsed = json.loads(out)
|
||||
assert parsed["activity_id"] == "a-9"
|
||||
|
||||
def test_timeout_clamped_to_300(self):
|
||||
import a2a_tools
|
||||
|
||||
fake_state = MagicMock()
|
||||
fake_state.wait.return_value = None
|
||||
with patch("inbox.get_state", return_value=fake_state):
|
||||
_run(a2a_tools.tool_wait_for_message(timeout_secs=99999))
|
||||
# Whatever wait was called with, it must not exceed 300
|
||||
passed = fake_state.wait.call_args.args[0]
|
||||
assert passed == 300.0
|
||||
|
||||
def test_timeout_clamped_to_zero_floor(self):
|
||||
import a2a_tools
|
||||
|
||||
fake_state = MagicMock()
|
||||
fake_state.wait.return_value = None
|
||||
with patch("inbox.get_state", return_value=fake_state):
|
||||
_run(a2a_tools.tool_wait_for_message(timeout_secs=-5))
|
||||
passed = fake_state.wait.call_args.args[0]
|
||||
assert passed == 0.0
|
||||
|
||||
def test_non_numeric_timeout_falls_back_to_60(self):
|
||||
import a2a_tools
|
||||
|
||||
fake_state = MagicMock()
|
||||
fake_state.wait.return_value = None
|
||||
with patch("inbox.get_state", return_value=fake_state):
|
||||
_run(a2a_tools.tool_wait_for_message(timeout_secs="garbage")) # type: ignore[arg-type]
|
||||
passed = fake_state.wait.call_args.args[0]
|
||||
assert passed == 60.0
|
||||
@@ -0,0 +1,69 @@
|
||||
"""Drift gate + smoke tests for ``a2a_tools_memory`` (RFC #2873 iter 4c).
|
||||
|
||||
The full behavior matrix (RBAC denies, scope enforcement, platform
|
||||
HTTP error paths) lives in ``test_a2a_tools_impl.py`` (TestToolCommitMemory
|
||||
+ TestToolRecallMemory) which patches `a2a_tools_memory.foo` after the
|
||||
iter 4c retarget.
|
||||
|
||||
This file pins:
|
||||
|
||||
1. **Drift gate** — every previously-public symbol on ``a2a_tools``
|
||||
(``tool_commit_memory``, ``tool_recall_memory``) is the EXACT same
|
||||
callable as ``a2a_tools_memory.foo``. Refactor wrapping silently
|
||||
loses the existing test coverage; this gate makes that drift fail
|
||||
fast.
|
||||
2. **Import contract** — ``a2a_tools_memory`` does NOT pull in
|
||||
``a2a_tools`` at module-load time. The handlers depend on
|
||||
``a2a_tools_rbac`` (the layered architecture) and ``a2a_client``,
|
||||
not on the kitchen-sink module that re-exports them.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _require_workspace_id(monkeypatch):
|
||||
monkeypatch.setenv("WORKSPACE_ID", "00000000-0000-0000-0000-000000000000")
|
||||
monkeypatch.setenv("PLATFORM_URL", "http://test.invalid")
|
||||
yield
|
||||
|
||||
|
||||
# ============== Drift gate ==============
|
||||
|
||||
class TestBackCompatAliases:
|
||||
def test_tool_commit_memory_alias(self):
|
||||
import a2a_tools
|
||||
import a2a_tools_memory
|
||||
assert a2a_tools.tool_commit_memory is a2a_tools_memory.tool_commit_memory
|
||||
|
||||
def test_tool_recall_memory_alias(self):
|
||||
import a2a_tools
|
||||
import a2a_tools_memory
|
||||
assert a2a_tools.tool_recall_memory is a2a_tools_memory.tool_recall_memory
|
||||
|
||||
|
||||
# ============== Import contract ==============
|
||||
|
||||
class TestImportContract:
|
||||
def test_memory_module_does_not_load_a2a_tools(self, monkeypatch):
|
||||
"""`a2a_tools_memory` must depend on `a2a_tools_rbac` (the layered
|
||||
architecture) and `a2a_client`, NEVER on the kitchen-sink
|
||||
`a2a_tools`. Top-level `from a2a_tools import …` would defeat
|
||||
the modularization goal and risk a circular-import."""
|
||||
# Drop both modules to control import order
|
||||
for m in ("a2a_tools", "a2a_tools_memory"):
|
||||
sys.modules.pop(m, None)
|
||||
|
||||
# Import memory module. Should succeed without a2a_tools loaded.
|
||||
import a2a_tools_memory # noqa: F401
|
||||
assert "a2a_tools_memory" in sys.modules
|
||||
|
||||
def test_a2a_tools_re_exports_memory_handlers(self):
|
||||
"""The opposite direction: a2a_tools must surface every memory
|
||||
symbol so existing call sites + tests work unchanged."""
|
||||
import a2a_tools
|
||||
assert hasattr(a2a_tools, "tool_commit_memory")
|
||||
assert hasattr(a2a_tools, "tool_recall_memory")
|
||||
@@ -0,0 +1,92 @@
|
||||
"""Drift gate + smoke tests for ``a2a_tools_messaging`` (RFC #2873 iter 4d).
|
||||
|
||||
The full behavior matrix lives in ``test_a2a_tools_impl.py`` —
|
||||
TestToolSendMessageToUser + TestToolListPeers + TestToolGetWorkspaceInfo
|
||||
+ TestChatHistory all patch ``a2a_tools_messaging.foo`` after the iter
|
||||
4d retarget.
|
||||
|
||||
This file pins:
|
||||
|
||||
1. **Drift gate** — every previously-public symbol on ``a2a_tools``
|
||||
is the EXACT same callable / value as ``a2a_tools_messaging.foo``.
|
||||
Wraps would silently lose existing test coverage; this gate
|
||||
fails fast on that drift.
|
||||
2. **Import contract** — ``a2a_tools_messaging`` does NOT pull in
|
||||
``a2a_tools`` at module-load time (the layered architecture: it
|
||||
depends on ``a2a_tools_rbac`` + ``a2a_client`` + ``platform_auth``,
|
||||
never the kitchen-sink module).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _require_workspace_id(monkeypatch):
|
||||
monkeypatch.setenv("WORKSPACE_ID", "00000000-0000-0000-0000-000000000000")
|
||||
monkeypatch.setenv("PLATFORM_URL", "http://test.invalid")
|
||||
yield
|
||||
|
||||
|
||||
# ============== Drift gate ==============
|
||||
|
||||
class TestBackCompatAliases:
|
||||
def test_tool_send_message_to_user_alias(self):
|
||||
import a2a_tools
|
||||
import a2a_tools_messaging
|
||||
assert (
|
||||
a2a_tools.tool_send_message_to_user
|
||||
is a2a_tools_messaging.tool_send_message_to_user
|
||||
)
|
||||
|
||||
def test_tool_list_peers_alias(self):
|
||||
import a2a_tools
|
||||
import a2a_tools_messaging
|
||||
assert a2a_tools.tool_list_peers is a2a_tools_messaging.tool_list_peers
|
||||
|
||||
def test_tool_get_workspace_info_alias(self):
|
||||
import a2a_tools
|
||||
import a2a_tools_messaging
|
||||
assert (
|
||||
a2a_tools.tool_get_workspace_info
|
||||
is a2a_tools_messaging.tool_get_workspace_info
|
||||
)
|
||||
|
||||
def test_tool_chat_history_alias(self):
|
||||
import a2a_tools
|
||||
import a2a_tools_messaging
|
||||
assert a2a_tools.tool_chat_history is a2a_tools_messaging.tool_chat_history
|
||||
|
||||
def test_upload_chat_files_alias(self):
|
||||
import a2a_tools
|
||||
import a2a_tools_messaging
|
||||
assert a2a_tools._upload_chat_files is a2a_tools_messaging._upload_chat_files
|
||||
|
||||
|
||||
# ============== Import contract ==============
|
||||
|
||||
class TestImportContract:
|
||||
def test_messaging_module_does_not_load_a2a_tools(self, monkeypatch):
|
||||
"""`a2a_tools_messaging` must depend on `a2a_tools_rbac` (the
|
||||
layered architecture), `a2a_client`, and `platform_auth` — but
|
||||
NEVER on the kitchen-sink `a2a_tools`. Top-level
|
||||
`from a2a_tools import …` would re-introduce the circular
|
||||
dependency that motivated the lazy-import contract for the
|
||||
delegation module."""
|
||||
for m in ("a2a_tools", "a2a_tools_messaging"):
|
||||
sys.modules.pop(m, None)
|
||||
|
||||
import a2a_tools_messaging # noqa: F401
|
||||
assert "a2a_tools_messaging" in sys.modules
|
||||
|
||||
def test_a2a_tools_re_exports_messaging_handlers(self):
|
||||
"""Opposite direction: a2a_tools surfaces every messaging
|
||||
symbol so existing call sites + tests work unchanged."""
|
||||
import a2a_tools
|
||||
assert hasattr(a2a_tools, "tool_send_message_to_user")
|
||||
assert hasattr(a2a_tools, "tool_list_peers")
|
||||
assert hasattr(a2a_tools, "tool_get_workspace_info")
|
||||
assert hasattr(a2a_tools, "tool_chat_history")
|
||||
assert hasattr(a2a_tools, "_upload_chat_files")
|
||||
@@ -229,3 +229,129 @@ class TestResolveWorkspacesDirect:
|
||||
out, errors = mcp_workspace_resolver.resolve_workspaces()
|
||||
assert out == [("ws-a", "a"), ("ws-b", "b")]
|
||||
assert errors == []
|
||||
|
||||
|
||||
# ============== Token-from-file env var (issue #2934) ==============
|
||||
|
||||
class TestTokenFileEnv:
|
||||
"""``MOLECULE_WORKSPACE_TOKEN_FILE`` lets operators keep the bearer
|
||||
out of shell history and out of MCP-host config plaintext (e.g.
|
||||
~/.claude.json). Resolution order: inline TOKEN env > TOKEN_FILE
|
||||
env > ${CONFIGS_DIR}/.auth_token.
|
||||
"""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolate(self, monkeypatch, tmp_path):
|
||||
for v in (
|
||||
"WORKSPACE_ID",
|
||||
"MOLECULE_WORKSPACE_TOKEN",
|
||||
"MOLECULE_WORKSPACE_TOKEN_FILE",
|
||||
"MOLECULE_WORKSPACES",
|
||||
):
|
||||
monkeypatch.delenv(v, raising=False)
|
||||
# Point CONFIGS_DIR at an empty tmp_path so the .auth_token
|
||||
# fallback returns "" — keeps the test cases unambiguous.
|
||||
monkeypatch.setenv("CONFIGS_DIR", str(tmp_path))
|
||||
yield tmp_path
|
||||
|
||||
def test_token_file_env_resolves(self, monkeypatch, tmp_path):
|
||||
token_path = tmp_path / "token.txt"
|
||||
token_path.write_text("file-tok-123\n") # trailing newline must strip
|
||||
monkeypatch.setenv("WORKSPACE_ID", "ws-1")
|
||||
monkeypatch.setenv("MOLECULE_WORKSPACE_TOKEN_FILE", str(token_path))
|
||||
out, errors = mcp_workspace_resolver.resolve_workspaces()
|
||||
assert out == [("ws-1", "file-tok-123")]
|
||||
assert errors == []
|
||||
|
||||
def test_inline_token_takes_precedence_over_file(self, monkeypatch, tmp_path):
|
||||
# If both env vars are set, inline wins — matches the docstring's
|
||||
# documented order. (Operators sometimes set both during a
|
||||
# rotation; we want predictable behavior.)
|
||||
token_path = tmp_path / "token.txt"
|
||||
token_path.write_text("file-tok")
|
||||
monkeypatch.setenv("WORKSPACE_ID", "ws-1")
|
||||
monkeypatch.setenv("MOLECULE_WORKSPACE_TOKEN", "inline-tok")
|
||||
monkeypatch.setenv("MOLECULE_WORKSPACE_TOKEN_FILE", str(token_path))
|
||||
out, _ = mcp_workspace_resolver.resolve_workspaces()
|
||||
assert out == [("ws-1", "inline-tok")]
|
||||
|
||||
def test_missing_file_returns_specific_error(self, monkeypatch, tmp_path):
|
||||
# Operator EXPLICITLY pointed TOKEN_FILE at a non-existent path —
|
||||
# surface the SPECIFIC failure (not the generic "set one of these
|
||||
# three vars" message). Otherwise they hit the silent failure mode
|
||||
# #2934 flagged ("a new user has no chance").
|
||||
bad_path = tmp_path / "does-not-exist"
|
||||
monkeypatch.setenv("WORKSPACE_ID", "ws-1")
|
||||
monkeypatch.setenv("MOLECULE_WORKSPACE_TOKEN_FILE", str(bad_path))
|
||||
out, errors = mcp_workspace_resolver.resolve_workspaces()
|
||||
assert out == []
|
||||
assert len(errors) == 1
|
||||
assert "MOLECULE_WORKSPACE_TOKEN_FILE" in errors[0]
|
||||
assert "does not exist" in errors[0]
|
||||
assert str(bad_path) in errors[0]
|
||||
|
||||
def test_empty_file_returns_specific_error(self, monkeypatch, tmp_path):
|
||||
# Blank file — operator's intent was clearly the file path, so a
|
||||
# generic "no token" error would mask their config bug.
|
||||
token_path = tmp_path / "empty.txt"
|
||||
token_path.write_text("")
|
||||
monkeypatch.setenv("WORKSPACE_ID", "ws-1")
|
||||
monkeypatch.setenv("MOLECULE_WORKSPACE_TOKEN_FILE", str(token_path))
|
||||
out, errors = mcp_workspace_resolver.resolve_workspaces()
|
||||
assert out == []
|
||||
assert len(errors) == 1
|
||||
assert "MOLECULE_WORKSPACE_TOKEN_FILE" in errors[0]
|
||||
assert "is empty" in errors[0]
|
||||
|
||||
def test_multi_line_file_rejected(self, monkeypatch, tmp_path):
|
||||
# CSV cell or accidental multi-token paste — would otherwise become
|
||||
# a malformed bearer that 401s against the platform with no
|
||||
# diagnostic. Reject upfront with a specific error.
|
||||
token_path = tmp_path / "junk.txt"
|
||||
token_path.write_text("tok-a tok-b\n")
|
||||
monkeypatch.setenv("WORKSPACE_ID", "ws-1")
|
||||
monkeypatch.setenv("MOLECULE_WORKSPACE_TOKEN_FILE", str(token_path))
|
||||
out, errors = mcp_workspace_resolver.resolve_workspaces()
|
||||
assert out == []
|
||||
assert len(errors) == 1
|
||||
assert "internal whitespace" in errors[0]
|
||||
|
||||
def test_token_file_error_skips_configs_dir_fallback(
|
||||
self, monkeypatch, tmp_path
|
||||
):
|
||||
# When TOKEN_FILE is explicitly set but broken, do NOT fall through
|
||||
# to a valid CONFIGS_DIR/.auth_token — the operator's intent is
|
||||
# clearly to use the file path; deferring to a different source
|
||||
# would mask their config error.
|
||||
configs_dir = tmp_path / "configs"
|
||||
configs_dir.mkdir()
|
||||
(configs_dir / ".auth_token").write_text("configs-tok")
|
||||
monkeypatch.setenv("CONFIGS_DIR", str(configs_dir))
|
||||
monkeypatch.setenv("WORKSPACE_ID", "ws-1")
|
||||
monkeypatch.setenv(
|
||||
"MOLECULE_WORKSPACE_TOKEN_FILE", str(tmp_path / "missing")
|
||||
)
|
||||
out, errors = mcp_workspace_resolver.resolve_workspaces()
|
||||
assert out == []
|
||||
# Specific TOKEN_FILE error — not the generic "no token" fallback
|
||||
# and crucially not the silent success of using configs-tok.
|
||||
assert len(errors) == 1
|
||||
assert "does not exist" in errors[0]
|
||||
|
||||
def test_blank_env_var_treated_as_unset(self, monkeypatch):
|
||||
# Empty string is treated as "not set" — common pitfall when
|
||||
# users export an unset shell var.
|
||||
monkeypatch.setenv("WORKSPACE_ID", "ws-1")
|
||||
monkeypatch.setenv("MOLECULE_WORKSPACE_TOKEN_FILE", "")
|
||||
out, errors = mcp_workspace_resolver.resolve_workspaces()
|
||||
assert out == []
|
||||
assert errors
|
||||
|
||||
def test_help_message_advertises_token_file(self, capsys):
|
||||
# Help text must mention TOKEN_FILE so a first-run operator
|
||||
# learns about the safer option without grepping the source.
|
||||
mcp_workspace_resolver.print_missing_env_help(
|
||||
["WORKSPACE_ID", "MOLECULE_WORKSPACE_TOKEN"], have_token_file=False
|
||||
)
|
||||
err = capsys.readouterr().err
|
||||
assert "MOLECULE_WORKSPACE_TOKEN_FILE" in err
|
||||
|
||||
@@ -63,7 +63,7 @@ async def test_commit_memory_success(monkeypatch):
|
||||
mcp = _load_mcp()
|
||||
|
||||
client = FakeClient()
|
||||
monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client)
|
||||
monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client)
|
||||
|
||||
result = await mcp.handle_tool_call("commit_memory", {
|
||||
"content": "Architecture decision: use Go for backend",
|
||||
@@ -92,7 +92,7 @@ async def test_commit_memory_default_scope(monkeypatch):
|
||||
mcp = _load_mcp()
|
||||
|
||||
client = FakeClient()
|
||||
monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client)
|
||||
monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client)
|
||||
|
||||
result = await mcp.handle_tool_call("commit_memory", {
|
||||
"content": "Some note",
|
||||
@@ -108,7 +108,7 @@ async def test_recall_memory_success(monkeypatch):
|
||||
mcp = _load_mcp()
|
||||
|
||||
client = FakeClient()
|
||||
monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client)
|
||||
monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client)
|
||||
|
||||
result = await mcp.handle_tool_call("recall_memory", {"query": "architecture"})
|
||||
|
||||
@@ -127,7 +127,7 @@ async def test_recall_memory_empty(monkeypatch):
|
||||
async def get(self, url, params=None, headers=None, **kwargs):
|
||||
return FakeResponse(200, [])
|
||||
|
||||
monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: EmptyClient())
|
||||
monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: EmptyClient())
|
||||
|
||||
result = await mcp.handle_tool_call("recall_memory", {})
|
||||
assert "No memories found" in result
|
||||
@@ -139,7 +139,7 @@ async def test_recall_memory_with_scope_filter(monkeypatch):
|
||||
mcp = _load_mcp()
|
||||
|
||||
client = FakeClient()
|
||||
monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client)
|
||||
monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client)
|
||||
|
||||
await mcp.handle_tool_call("recall_memory", {"scope": "TEAM"})
|
||||
|
||||
|
||||
@@ -357,7 +357,7 @@ class TestA2AToolCommitMemoryRedactsSecrets:
|
||||
|
||||
fake_client.post = _capture
|
||||
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=fake_client):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=fake_client):
|
||||
await a2a_tools.tool_commit_memory(content_with_secret)
|
||||
|
||||
stored = captured.get("content", "")
|
||||
@@ -385,7 +385,7 @@ class TestA2AToolCommitMemoryRedactsSecrets:
|
||||
|
||||
fake_client.post = _capture
|
||||
|
||||
with patch("a2a_tools.httpx.AsyncClient", return_value=fake_client):
|
||||
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=fake_client):
|
||||
await a2a_tools.tool_commit_memory(f"key={key}")
|
||||
|
||||
stored = captured.get("content", "")
|
||||
|
||||
Reference in New Issue
Block a user