Compare commits
128 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a869bc1536 | |||
| d3e115cb06 | |||
| b372c265ab | |||
| 146c0e7c60 | |||
| 5d8b5e96e3 | |||
| dc6e1ac2bf | |||
| c2e12f3fb6 | |||
| dd5df70e59 | |||
| f1dc721eeb | |||
| 5b78bea10d | |||
| a5903af459 | |||
| 07d09f3696 | |||
| f7c270bf24 | |||
| 0301f90183 | |||
| feef80423b | |||
| 469b24ff8f | |||
| c4d3c9a451 | |||
| 2652ea8342 | |||
| 1e01083e55 | |||
| eab36e217e | |||
| 7ee696ec9a | |||
| decec9b9a1 | |||
| ada27fdb5d | |||
| f0f4d0e761 | |||
| e0df90c294 | |||
| f01f374072 | |||
| 1edee1131b | |||
| d99b3f2aec | |||
| f5ea812e9d | |||
| 3b7ed9cf53 | |||
| da9061c131 | |||
| c4807a930d | |||
| d22fbb29b8 | |||
| 899c53550d | |||
| cdfc9f743f | |||
| 7a2664523c | |||
| 632e906640 | |||
| 475da5b64c | |||
| 1ad107cc15 | |||
| e4bd1e4293 | |||
| 01deeb36cf | |||
| b906e1da61 | |||
| 226e57a942 | |||
| abc3affcb6 | |||
| 3322524b0f | |||
| de01ff51b0 | |||
| f3782662bd | |||
| e9eb3868d5 | |||
| cb70d3d437 | |||
| a1d202723d | |||
| 0d0840d9d9 | |||
| fc30b5c9de | |||
| ef67dc513e | |||
| 23d3f057d3 | |||
| 8ca027ddf3 | |||
| 46a4ef83bb | |||
| a6afc18de5 | |||
| 423d58d42c | |||
| 9386f1d399 | |||
| a766e5ce48 | |||
| 5ad2669f88 | |||
| 0ca4e431c1 | |||
| 184ce7ae4e | |||
| 2bf6a7005f | |||
| 16ead69641 | |||
| 60afcd43c9 | |||
| ff75aeb43e | |||
| 81cf0cbf98 | |||
| 412dec0d87 | |||
| 9a53529047 | |||
| 39931acd9c | |||
| 6f19b88fa7 | |||
| 83454e5efd | |||
| 575f893f4e | |||
| 4cac4e7710 | |||
| 8254bedf30 | |||
| ec72f199e6 | |||
| ae22a55675 | |||
| 08648bf4b1 | |||
| eec4ea2e7d | |||
| 6201d12533 | |||
| 81e83c05b7 | |||
| 5b5eacbb29 | |||
| c8fca1467e | |||
| 7c8b81c6eb | |||
| fc1c45789e | |||
| e3a18ed8e8 | |||
| 9f551319d2 | |||
| 1052f8bdb0 | |||
| 30fb507165 | |||
| 77e9a965ac | |||
| 5334d60de4 | |||
| d6c0227e3f | |||
| 27db090d3d | |||
| 0f25f6de97 | |||
| 9991057ad1 | |||
| b89a49ec93 | |||
| 3d0a7c381b | |||
| f5613bf099 | |||
| 9bd2a2c45f | |||
| a489ee1a7c | |||
| c79ba05ed5 | |||
| 6470e5f41b | |||
| aa560c0314 | |||
| 7644e82f2f | |||
| 33fabdf483 | |||
| abba16beb4 | |||
| 9c752e0673 | |||
| 8e5d193761 | |||
| 3e0d2e650a | |||
| 210a26d31a | |||
| be18b9c8f9 | |||
| 2cb1b26512 | |||
| 48d1945269 | |||
| a04a49f7aa | |||
| bbec4cfcfb | |||
| 19c25a9278 | |||
| e50799bc29 | |||
| 07839580a0 | |||
| 2227a14b1e | |||
| e72f9ad107 | |||
| 17aec22f9b | |||
| 8388144098 | |||
| a327d207da | |||
| 529c3f3922 | |||
| c778b62202 | |||
| 0c461eb9f1 | |||
| 28ef75d25e |
@@ -387,6 +387,7 @@ jobs:
|
||||
"a2a_mcp_server.py"
|
||||
"mcp_cli.py"
|
||||
"a2a_tools.py"
|
||||
"a2a_tools_inbox.py"
|
||||
"inbox.py"
|
||||
"platform_auth.py"
|
||||
)
|
||||
|
||||
@@ -172,6 +172,9 @@ jobs:
|
||||
- name: Run poll-mode + since_id cursor E2E (#2339)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_poll_mode_e2e.sh
|
||||
- name: Run poll-mode chat upload E2E (RFC #2891)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_poll_mode_chat_upload_e2e.sh
|
||||
- name: Dump platform log on failure
|
||||
if: failure() && needs.detect-changes.outputs.api == 'true'
|
||||
run: cat workspace-server/platform.log || true
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
// quick bounce between signup and either Checkout or the tenant UI.
|
||||
|
||||
import { useEffect, useState } from "react";
|
||||
import { fetchSession, redirectToLogin, type Session } from "@/lib/auth";
|
||||
import { fetchSession, redirectToLogin, signOut, type Session } from "@/lib/auth";
|
||||
import { PLATFORM_URL } from "@/lib/api";
|
||||
import { formatCredits, pillTone, bannerKind } from "@/lib/credits";
|
||||
import { TermsGate } from "@/components/TermsGate";
|
||||
@@ -129,7 +129,7 @@ export default function OrgsPage() {
|
||||
return <EmptyState banner={justCheckedOut ? <CheckoutBanner /> : null} />;
|
||||
}
|
||||
return (
|
||||
<Shell>
|
||||
<Shell session={session}>
|
||||
{justCheckedOut && <CheckoutBanner />}
|
||||
<ul className="space-y-3">
|
||||
{orgs.map((o) => (
|
||||
@@ -160,11 +160,21 @@ function CheckoutBanner() {
|
||||
);
|
||||
}
|
||||
|
||||
function Shell({ children }: { children: React.ReactNode }) {
|
||||
function Shell({
|
||||
children,
|
||||
session,
|
||||
}: {
|
||||
children: React.ReactNode;
|
||||
// Optional: when present, the header renders the signed-in email +
|
||||
// a Sign-out button. The empty-state Shell call doesn't have a
|
||||
// session in scope, so accept null and skip the header chrome there.
|
||||
session?: Session | null;
|
||||
}) {
|
||||
return (
|
||||
<main className="min-h-screen bg-surface text-ink">
|
||||
<TermsGate>
|
||||
<div className="mx-auto max-w-2xl px-6 pt-20 pb-12">
|
||||
{session ? <AccountBar session={session} /> : null}
|
||||
<h1 className="text-3xl font-bold text-ink">Your organizations</h1>
|
||||
<p className="mt-2 text-ink-mid">
|
||||
Each org is an isolated Molecule workspace.
|
||||
@@ -177,6 +187,40 @@ function Shell({ children }: { children: React.ReactNode }) {
|
||||
);
|
||||
}
|
||||
|
||||
// AccountBar renders the signed-in email + a Sign-out button at the
|
||||
// top of the page. Without this the user has no way to log out — the
|
||||
// /cp/auth/signout endpoint exists on the control plane but no UI ever
|
||||
// called it. Reported externally on 2026-05-05; this is the fix.
|
||||
//
|
||||
// Click → calls signOut() which POSTs /cp/auth/signout (clears the
|
||||
// WorkOS session cookie + revokes at the provider) then bounces to
|
||||
// /cp/auth/login. The signOut helper is best-effort — even on a 5xx
|
||||
// or network failure the redirect fires so the user never gets stuck
|
||||
// on an authed-looking page after they clicked Sign out.
|
||||
function AccountBar({ session }: { session: Session }) {
|
||||
const [signingOut, setSigningOut] = useState(false);
|
||||
return (
|
||||
<div className="mb-6 flex items-center justify-between text-sm text-ink-mid">
|
||||
<span title="Signed-in user">{session.email}</span>
|
||||
<button
|
||||
type="button"
|
||||
disabled={signingOut}
|
||||
onClick={async () => {
|
||||
setSigningOut(true);
|
||||
await signOut();
|
||||
// Redirect happens inside signOut; this line is for tests +
|
||||
// edge cases (jsdom, blocked navigation) where it doesn't.
|
||||
setSigningOut(false);
|
||||
}}
|
||||
className="rounded border border-line bg-surface-card px-3 py-1 text-xs text-ink hover:bg-surface-card disabled:opacity-50"
|
||||
aria-label="Sign out"
|
||||
>
|
||||
{signingOut ? "Signing out…" : "Sign out"}
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// DataResidencyNotice surfaces where workspace data lives so EU-based
|
||||
// signups can make an informed choice (GDPR Art. 13 disclosure
|
||||
// requirement). Plain text, no icon — the goal is clarity, not
|
||||
|
||||
@@ -48,16 +48,21 @@ export function EmptyState() {
|
||||
});
|
||||
|
||||
// "Create blank" bypasses templates entirely — no preflight, no
|
||||
// modal, just POST /workspaces with a default name and tier.
|
||||
// Deliberately NOT routed through useTemplateDeploy because it
|
||||
// has no `template.id` to deploy against.
|
||||
// modal, just POST /workspaces with a default name. Deliberately
|
||||
// NOT routed through useTemplateDeploy because it has no
|
||||
// `template.id` to deploy against.
|
||||
//
|
||||
// tier is omitted so the backend picks a SaaS-aware default
|
||||
// (T4 on SaaS, T3 on self-hosted — see WorkspaceHandler.DefaultTier).
|
||||
// The previous hardcoded `tier: 2` shipped every fresh-tenant agent
|
||||
// at Standard regardless of host, which surprised SaaS users whose
|
||||
// CreateWorkspaceDialog already defaults to T4.
|
||||
const createBlank = async () => {
|
||||
setBlankCreating(true);
|
||||
setBlankError(null);
|
||||
try {
|
||||
const ws = await api.post<{ id: string }>("/workspaces", {
|
||||
name: "My First Agent",
|
||||
tier: 2,
|
||||
canvas: firstDeployCoords(),
|
||||
});
|
||||
handleDeployed(ws.id);
|
||||
|
||||
@@ -20,160 +20,6 @@ import * as Dialog from "@radix-ui/react-dialog";
|
||||
|
||||
type Tab = "python" | "curl" | "claude" | "mcp" | "hermes" | "codex" | "openclaw" | "fields";
|
||||
|
||||
// Per-tab help metadata: docs link, where-to-install link, common errors.
|
||||
// All URLs verified against repo content (docs/guides/* file paths map to
|
||||
// docs.molecule.ai/docs/guides/*; canonical hostname confirmed by existing
|
||||
// blog post canonical metadata) or against the snippet text the operator
|
||||
// just copied. Never linking to a URL that wasn't already in product —
|
||||
// dead links here defeat the purpose of "more comprehensive instructions."
|
||||
const TAB_HELP: Record<
|
||||
Tab,
|
||||
{
|
||||
docsUrl?: string;
|
||||
docsLabel?: string;
|
||||
downloadUrl?: string;
|
||||
downloadLabel?: string;
|
||||
commonIssues?: { symptom: string; check: string }[];
|
||||
}
|
||||
> = {
|
||||
mcp: {
|
||||
docsUrl: "https://docs.molecule.ai/docs/guides/mcp-server-setup",
|
||||
docsLabel: "MCP server setup guide",
|
||||
downloadUrl: "https://pypi.org/project/molecule-ai-workspace-runtime/",
|
||||
downloadLabel: "molecule-ai-workspace-runtime on PyPI",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "Tools not appearing in your agent",
|
||||
check:
|
||||
"Run `claude mcp list` (or your runtime's equivalent) — the molecule entry should be listed. If missing, re-run the `claude mcp add` line.",
|
||||
},
|
||||
{
|
||||
symptom: "ConnectionRefused / DNS error on first call",
|
||||
check:
|
||||
"PLATFORM_URL must include the scheme (https://) and have no trailing slash. Verify with `curl $PLATFORM_URL/healthz`.",
|
||||
},
|
||||
],
|
||||
},
|
||||
python: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
downloadUrl: "https://pypi.org/project/molecule-ai-workspace-runtime/",
|
||||
downloadLabel: "molecule-ai-workspace-runtime on PyPI",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "401 from /heartbeat",
|
||||
check:
|
||||
"AUTH_TOKEN expired or wrong workspace_id. Tokens are shown only once at create time — re-create the workspace to get a fresh token.",
|
||||
},
|
||||
{
|
||||
symptom: "AGENT_URL not reachable from platform",
|
||||
check:
|
||||
"Public HTTPS URL required for inbound A2A. Use ngrok or Cloudflare Tunnel if your agent is behind NAT.",
|
||||
},
|
||||
],
|
||||
},
|
||||
claude: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
downloadUrl: "https://claude.com/claude-code",
|
||||
downloadLabel: "Claude Code (claude.com)",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "plugin not installed",
|
||||
check:
|
||||
"Run `/plugin marketplace add Molecule-AI/molecule-mcp-claude-channel` then `/plugin install molecule@molecule-mcp-claude-channel` inside Claude Code, then `/reload-plugins`.",
|
||||
},
|
||||
{
|
||||
symptom: "not on the approved channels allowlist",
|
||||
check:
|
||||
"Custom channels need `--dangerously-load-development-channels` on the launch command. Team/Enterprise orgs need admin to set `channelsEnabled` + `allowedChannelPlugins` in claude.ai admin settings.",
|
||||
},
|
||||
{
|
||||
symptom: "Inbound messages not arriving",
|
||||
check:
|
||||
"Check stderr for `molecule channel: connected — watching N workspace(s)`. Verify ~/.claude/channels/molecule/.env has the right PLATFORM_URL + token.",
|
||||
},
|
||||
],
|
||||
},
|
||||
hermes: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
downloadUrl: "https://github.com/NousResearch/hermes-agent",
|
||||
downloadLabel: "hermes-agent (NousResearch)",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "Gateway start failure",
|
||||
check:
|
||||
"Tail ~/.hermes/gateway.log. YAML duplicate-key in config.yaml is the most common cause — `gateway:` block must appear exactly once.",
|
||||
},
|
||||
{
|
||||
symptom: "Plugin not discovered after install",
|
||||
check:
|
||||
"Run `pip show hermes-channel-molecule` to confirm install. Some hermes builds need `hermes plugin reload` before the new platform_plugins entry takes effect.",
|
||||
},
|
||||
],
|
||||
},
|
||||
codex: {
|
||||
docsUrl: "https://docs.molecule.ai/docs/guides/mcp-server-setup",
|
||||
docsLabel: "MCP server setup guide",
|
||||
downloadUrl: "https://github.com/openai/codex",
|
||||
downloadLabel: "openai/codex",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "[mcp_servers.molecule] not loaded",
|
||||
check:
|
||||
"Codex must be ≥ 0.57. Check with `codex --version`; upgrade via `npm install -g @openai/codex@latest`.",
|
||||
},
|
||||
{
|
||||
symptom: "TOML parse error after re-running setup",
|
||||
check:
|
||||
"TOML rejects duplicate `[mcp_servers.molecule]` tables. Open ~/.codex/config.toml and remove the old block before pasting the new one.",
|
||||
},
|
||||
{
|
||||
symptom: "Canvas messages don't wake codex",
|
||||
check:
|
||||
"Step 3 (codex-channel-molecule bridge daemon) is required for inbound push. Check `pgrep -f codex-channel-molecule` and `tail ~/.codex-channel-molecule/daemon.log`.",
|
||||
},
|
||||
],
|
||||
},
|
||||
openclaw: {
|
||||
docsUrl: "https://docs.molecule.ai/docs/guides/mcp-server-setup",
|
||||
docsLabel: "MCP server setup guide",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "Gateway not starting",
|
||||
check:
|
||||
"Tail ~/.openclaw/gateway.log. The loopback bind requires :18789 to be free — check with `lsof -iTCP:18789`.",
|
||||
},
|
||||
{
|
||||
symptom: "openclaw mcp set rejected",
|
||||
check:
|
||||
"The heredoc generates JSON; verify it parsed by running `jq < ~/.openclaw/mcp/molecule.json`. Re-run `openclaw mcp set` if the file is malformed.",
|
||||
},
|
||||
],
|
||||
},
|
||||
curl: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "401 / 403 on register",
|
||||
check:
|
||||
"WORKSPACE_AUTH_TOKEN must be the value shown at workspace create. Tokens are shown only once.",
|
||||
},
|
||||
],
|
||||
},
|
||||
fields: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
},
|
||||
};
|
||||
|
||||
export interface ExternalConnectionInfo {
|
||||
workspace_id: string;
|
||||
platform_url: string;
|
||||
@@ -457,7 +303,6 @@ export function ExternalConnectModal({ info, onClose }: Props) {
|
||||
<Field label="heartbeat_endpoint" value={info.heartbeat_endpoint} onCopy={() => copy(info.heartbeat_endpoint, "hb")} copied={copiedKey === "hb"} />
|
||||
</div>
|
||||
)}
|
||||
<HelpBlock help={TAB_HELP[tab]} />
|
||||
</div>
|
||||
|
||||
<div className="mt-5 flex justify-end gap-2">
|
||||
@@ -506,70 +351,6 @@ function SnippetBlock({
|
||||
);
|
||||
}
|
||||
|
||||
// HelpBlock — collapsible "Need help?" section under each tab's snippet.
|
||||
// Renders only the keys present in the per-tab help metadata (no empty
|
||||
// sections). Closed by default so the snippet stays the visual focus;
|
||||
// operators with a working setup never see this. Uses native <details>
|
||||
// for keyboard accessibility (Tab + Enter) without extra ARIA wiring.
|
||||
function HelpBlock({
|
||||
help,
|
||||
}: {
|
||||
help: (typeof TAB_HELP)[Tab] | undefined;
|
||||
}) {
|
||||
if (!help) return null;
|
||||
const { docsUrl, docsLabel, downloadUrl, downloadLabel, commonIssues } = help;
|
||||
if (!docsUrl && !downloadUrl && !commonIssues?.length) return null;
|
||||
|
||||
return (
|
||||
<details className="mt-3 border border-line rounded-lg bg-surface text-xs">
|
||||
<summary className="cursor-pointer select-none px-3 py-2 text-ink-mid hover:text-ink">
|
||||
Need help? — install link, docs, common errors
|
||||
</summary>
|
||||
<div className="px-3 pb-3 pt-1 space-y-2">
|
||||
{downloadUrl && (
|
||||
<div>
|
||||
<span className="text-ink-soft">Where to install: </span>
|
||||
<a
|
||||
href={downloadUrl}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-accent underline hover:text-accent-strong"
|
||||
>
|
||||
{downloadLabel || downloadUrl}
|
||||
</a>
|
||||
</div>
|
||||
)}
|
||||
{docsUrl && (
|
||||
<div>
|
||||
<span className="text-ink-soft">Documentation: </span>
|
||||
<a
|
||||
href={docsUrl}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-accent underline hover:text-accent-strong"
|
||||
>
|
||||
{docsLabel || docsUrl}
|
||||
</a>
|
||||
</div>
|
||||
)}
|
||||
{commonIssues && commonIssues.length > 0 && (
|
||||
<div>
|
||||
<div className="text-ink-soft mb-1">Common errors:</div>
|
||||
<ul className="space-y-1.5 pl-3">
|
||||
{commonIssues.map((issue, i) => (
|
||||
<li key={i}>
|
||||
<code className="text-warm font-mono">{issue.symptom}</code>
|
||||
<span className="text-ink-mid"> — {issue.check}</span>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</details>
|
||||
);
|
||||
}
|
||||
|
||||
function Field({
|
||||
label,
|
||||
value,
|
||||
|
||||
@@ -1,261 +0,0 @@
|
||||
'use client';
|
||||
|
||||
import { useEffect, useRef, useState } from "react";
|
||||
import { createPortal } from "react-dom";
|
||||
import { api } from "@/lib/api";
|
||||
import type { MemoryEntry } from "@/components/MemoryInspectorPanel";
|
||||
|
||||
type Scope = "LOCAL" | "TEAM" | "GLOBAL";
|
||||
const SCOPES: Scope[] = ["LOCAL", "TEAM", "GLOBAL"];
|
||||
|
||||
interface AddProps {
|
||||
open: boolean;
|
||||
mode: "add";
|
||||
workspaceId: string;
|
||||
defaultScope: Scope;
|
||||
defaultNamespace?: string;
|
||||
entry?: undefined;
|
||||
onClose: () => void;
|
||||
onSaved: () => void;
|
||||
}
|
||||
|
||||
interface EditProps {
|
||||
open: boolean;
|
||||
mode: "edit";
|
||||
workspaceId: string;
|
||||
entry: MemoryEntry;
|
||||
defaultScope?: undefined;
|
||||
defaultNamespace?: undefined;
|
||||
onClose: () => void;
|
||||
onSaved: () => void;
|
||||
}
|
||||
|
||||
type Props = AddProps | EditProps;
|
||||
|
||||
export function MemoryEditorDialog(props: Props) {
|
||||
const { open, mode, workspaceId, onClose, onSaved } = props;
|
||||
const dialogRef = useRef<HTMLDivElement>(null);
|
||||
const [mounted, setMounted] = useState(false);
|
||||
const [scope, setScope] = useState<Scope>("LOCAL");
|
||||
const [namespace, setNamespace] = useState("general");
|
||||
const [content, setContent] = useState("");
|
||||
const [saving, setSaving] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
setMounted(true);
|
||||
}, []);
|
||||
|
||||
// Reset form whenever the dialog opens.
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
setError(null);
|
||||
setSaving(false);
|
||||
if (mode === "edit" && props.entry) {
|
||||
setScope(props.entry.scope);
|
||||
setNamespace(props.entry.namespace || "general");
|
||||
setContent(props.entry.content);
|
||||
} else if (mode === "add") {
|
||||
setScope(props.defaultScope);
|
||||
setNamespace(props.defaultNamespace || "general");
|
||||
setContent("");
|
||||
}
|
||||
// mode/props are stable per-open; intentional shallow deps.
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [open]);
|
||||
|
||||
// Move focus into the dialog when it opens (WCAG SC 2.4.3).
|
||||
useEffect(() => {
|
||||
if (!open || !mounted) return;
|
||||
const raf = requestAnimationFrame(() => {
|
||||
dialogRef.current?.querySelector<HTMLElement>("textarea, input, select")?.focus();
|
||||
});
|
||||
return () => cancelAnimationFrame(raf);
|
||||
}, [open, mounted]);
|
||||
|
||||
// Escape closes; Cmd/Ctrl-Enter saves.
|
||||
const onCloseRef = useRef(onClose);
|
||||
onCloseRef.current = onClose;
|
||||
const handleSaveRef = useRef<() => void>(() => {});
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
if (e.key === "Escape") {
|
||||
e.preventDefault();
|
||||
onCloseRef.current();
|
||||
} else if (e.key === "Enter" && (e.metaKey || e.ctrlKey)) {
|
||||
e.preventDefault();
|
||||
handleSaveRef.current();
|
||||
}
|
||||
};
|
||||
window.addEventListener("keydown", handler);
|
||||
return () => window.removeEventListener("keydown", handler);
|
||||
}, [open]);
|
||||
|
||||
const handleSave = async () => {
|
||||
if (saving) return;
|
||||
const trimmed = content.trim();
|
||||
if (!trimmed) {
|
||||
setError("Content cannot be empty");
|
||||
return;
|
||||
}
|
||||
setError(null);
|
||||
setSaving(true);
|
||||
try {
|
||||
if (mode === "add") {
|
||||
await api.post(`/workspaces/${workspaceId}/memories`, {
|
||||
content: trimmed,
|
||||
scope,
|
||||
namespace: namespace.trim() || "general",
|
||||
});
|
||||
} else {
|
||||
// PATCH only sends fields that changed. Content always changeable;
|
||||
// namespace only sent if it differs from the original (saves a
|
||||
// no-op write through redactSecrets + re-embed).
|
||||
const original = props.entry;
|
||||
const body: Record<string, string> = {};
|
||||
if (trimmed !== original.content) body.content = trimmed;
|
||||
const ns = namespace.trim() || "general";
|
||||
if (ns !== original.namespace) body.namespace = ns;
|
||||
if (Object.keys(body).length === 0) {
|
||||
// No-op edit — close without an HTTP round-trip.
|
||||
onSaved();
|
||||
onClose();
|
||||
return;
|
||||
}
|
||||
await api.patch(
|
||||
`/workspaces/${workspaceId}/memories/${encodeURIComponent(original.id)}`,
|
||||
body,
|
||||
);
|
||||
}
|
||||
onSaved();
|
||||
onClose();
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : "Save failed");
|
||||
} finally {
|
||||
setSaving(false);
|
||||
}
|
||||
};
|
||||
handleSaveRef.current = handleSave;
|
||||
|
||||
if (!open || !mounted) return null;
|
||||
|
||||
const titleId = "memory-editor-title";
|
||||
const isEdit = mode === "edit";
|
||||
|
||||
return createPortal(
|
||||
<div className="fixed inset-0 z-[9999] flex items-center justify-center">
|
||||
<div className="absolute inset-0 bg-black/60 backdrop-blur-sm" onClick={onClose} />
|
||||
|
||||
<div
|
||||
ref={dialogRef}
|
||||
role="dialog"
|
||||
aria-modal="true"
|
||||
aria-labelledby={titleId}
|
||||
className="relative bg-surface-sunken border border-line rounded-xl shadow-2xl shadow-black/50 max-w-[480px] w-full mx-4 overflow-hidden"
|
||||
>
|
||||
<div className="px-5 py-4 space-y-3">
|
||||
<h3 id={titleId} className="text-sm font-semibold text-ink">
|
||||
{isEdit ? "Edit memory" : "Add memory"}
|
||||
</h3>
|
||||
|
||||
{/* Scope */}
|
||||
<div className="space-y-1">
|
||||
<label className="text-[10px] text-ink-soft block" htmlFor="memory-editor-scope">
|
||||
Scope
|
||||
</label>
|
||||
{isEdit ? (
|
||||
<div
|
||||
id="memory-editor-scope"
|
||||
className="text-[12px] font-mono text-ink-mid bg-surface rounded px-2 py-1.5 border border-line/50"
|
||||
title="Scope is fixed on edit. To move a memory across scopes, delete and re-create it."
|
||||
>
|
||||
{scope}
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center gap-1" id="memory-editor-scope" role="radiogroup" aria-label="Scope">
|
||||
{SCOPES.map((s) => (
|
||||
<button
|
||||
key={s}
|
||||
type="button"
|
||||
role="radio"
|
||||
aria-checked={scope === s}
|
||||
onClick={() => setScope(s)}
|
||||
className={[
|
||||
"px-3 py-1 text-[11px] rounded transition-colors",
|
||||
scope === s
|
||||
? "bg-accent-strong text-white"
|
||||
: "bg-surface-card text-ink-mid hover:text-ink",
|
||||
].join(" ")}
|
||||
>
|
||||
{s}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Namespace */}
|
||||
<div className="space-y-1">
|
||||
<label htmlFor="memory-editor-namespace" className="text-[10px] text-ink-soft block">
|
||||
Namespace
|
||||
</label>
|
||||
<input
|
||||
id="memory-editor-namespace"
|
||||
type="text"
|
||||
value={namespace}
|
||||
onChange={(e) => setNamespace(e.target.value)}
|
||||
placeholder="general"
|
||||
className="w-full bg-surface border border-line/60 focus:border-accent/60 rounded px-2 py-1.5 text-[12px] text-ink placeholder-zinc-600 focus:outline-none transition-colors"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Content */}
|
||||
<div className="space-y-1">
|
||||
<label htmlFor="memory-editor-content" className="text-[10px] text-ink-soft block">
|
||||
Content
|
||||
</label>
|
||||
<textarea
|
||||
id="memory-editor-content"
|
||||
value={content}
|
||||
onChange={(e) => setContent(e.target.value)}
|
||||
rows={6}
|
||||
placeholder="What should the agent remember?"
|
||||
className="w-full bg-surface border border-line/60 focus:border-accent/60 rounded px-2 py-1.5 text-[12px] font-mono text-ink placeholder-zinc-600 focus:outline-none transition-colors resize-y min-h-[100px] max-h-[300px]"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<div
|
||||
role="alert"
|
||||
aria-live="assertive"
|
||||
className="px-2 py-1.5 bg-red-950/30 border border-red-800/40 rounded text-[11px] text-bad"
|
||||
>
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="flex items-center justify-end gap-2 px-5 py-3 border-t border-line bg-surface/50">
|
||||
<button
|
||||
type="button"
|
||||
onClick={onClose}
|
||||
disabled={saving}
|
||||
className="px-3.5 py-1.5 text-[13px] text-ink-mid hover:text-ink bg-surface-card hover:bg-surface-elevated border border-line hover:border-line-soft rounded-lg transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleSave}
|
||||
disabled={saving}
|
||||
className="px-3.5 py-1.5 text-[13px] rounded-lg transition-colors bg-accent hover:bg-accent-strong text-white focus:outline-none focus-visible:ring-2 focus-visible:ring-offset-2 focus-visible:ring-offset-surface-sunken focus-visible:ring-accent/60 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
{saving ? "Saving…" : isEdit ? "Save changes" : "Add memory"}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>,
|
||||
document.body,
|
||||
);
|
||||
}
|
||||
@@ -1,30 +1,81 @@
|
||||
'use client';
|
||||
|
||||
import { useState, useEffect, useCallback } from "react";
|
||||
import { api } from "@/lib/api";
|
||||
import { ConfirmDialog } from "@/components/ConfirmDialog";
|
||||
import { MemoryEditorDialog } from "@/components/MemoryEditorDialog";
|
||||
/**
|
||||
* MemoryInspectorPanel — Memory v2 redesign.
|
||||
*
|
||||
* Reads the canvas Memory tab from the v2 plugin via the
|
||||
* workspace-server proxy at /v2/{namespaces,memories}, replacing the
|
||||
* v1 LOCAL/TEAM/GLOBAL trio that mapped to the deprecated
|
||||
* shared_context model.
|
||||
*
|
||||
* Surface differences from v1:
|
||||
* - Namespace dropdown driven by GET /v2/namespaces (workspace /
|
||||
* team / org / custom — labels rendered server-side).
|
||||
* - Per-row badges for kind (fact|summary|checkpoint), source
|
||||
* (agent|runtime|user), pin (📌), TTL countdown, and propagation
|
||||
* source-workspace if the memory came from a peer.
|
||||
* - No Edit affordance — v2's plugin contract has no PATCH; the
|
||||
* model is forget + recommit. Delete (Forget) stays.
|
||||
*
|
||||
* Shipping note: when the plugin isn't wired (MEMORY_PLUGIN_URL
|
||||
* unset), every endpoint returns 503 with a clear hint. The panel
|
||||
* surfaces that as a banner so operators know to set the env var,
|
||||
* rather than rendering a perpetual empty state that looks like
|
||||
* "no memories yet".
|
||||
*/
|
||||
|
||||
import { useCallback, useEffect, useMemo, useState } from 'react';
|
||||
import { api } from '@/lib/api';
|
||||
import { ConfirmDialog } from '@/components/ConfirmDialog';
|
||||
|
||||
// ── Types ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
/** Memory entry returned by GET /workspaces/:id/memories */
|
||||
export interface MemoryEntry {
|
||||
id: string;
|
||||
workspace_id: string;
|
||||
content: string;
|
||||
scope: "LOCAL" | "TEAM" | "GLOBAL";
|
||||
namespace: string;
|
||||
created_at: string;
|
||||
/**
|
||||
* Semantic similarity score (0–1). Only present when the API is queried
|
||||
* with ?q=<query> and the pgvector backend has been deployed.
|
||||
* Absent on plain list fetches — renders gracefully without a badge.
|
||||
*/
|
||||
similarity_score?: number;
|
||||
export type NamespaceKind = 'workspace' | 'team' | 'org' | 'custom';
|
||||
|
||||
export interface NamespaceView {
|
||||
name: string;
|
||||
kind: NamespaceKind;
|
||||
label: string;
|
||||
}
|
||||
|
||||
type Scope = "LOCAL" | "TEAM" | "GLOBAL";
|
||||
const SCOPES: Scope[] = ["LOCAL", "TEAM", "GLOBAL"];
|
||||
export interface NamespacesResponse {
|
||||
readable: NamespaceView[];
|
||||
writable: NamespaceView[];
|
||||
}
|
||||
|
||||
export type MemoryKind = 'fact' | 'summary' | 'checkpoint';
|
||||
export type MemorySource = 'agent' | 'runtime' | 'user';
|
||||
|
||||
export interface MemoryV2 {
|
||||
id: string;
|
||||
namespace: string;
|
||||
content: string;
|
||||
kind: MemoryKind;
|
||||
source: MemorySource;
|
||||
pin: boolean;
|
||||
expires_at?: string | null;
|
||||
created_at: string;
|
||||
/** 0..1 plugin similarity score; only present when ?q= is set. */
|
||||
score?: number | null;
|
||||
// Note: an earlier iteration of this type carried a `source_workspace_id`
|
||||
// field rendered as a "from peer" badge. The propagation contract that
|
||||
// would have populated it ("Reserved for future cross-namespace
|
||||
// propagation semantics" in memory-plugin-v1.yaml) is unimplemented —
|
||||
// nothing in the codebase writes that key. Removed in self-review.
|
||||
// Re-add when propagation gains a concrete shape.
|
||||
}
|
||||
|
||||
interface MemoriesResponse {
|
||||
memories: MemoryV2[];
|
||||
}
|
||||
|
||||
// MemoryEntry kept as a back-compat type alias so any other component
|
||||
// still importing it doesn't break the build. New consumers should
|
||||
// prefer MemoryV2 — the v1 shape (LOCAL/TEAM/GLOBAL scope) is gone.
|
||||
//
|
||||
// `unknown` is used over `any` so TS still flags accidental field
|
||||
// access on the legacy shape.
|
||||
export type MemoryEntry = MemoryV2;
|
||||
|
||||
interface Props {
|
||||
workspaceId: string;
|
||||
@@ -32,11 +83,26 @@ interface Props {
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Sanitise a memory id for use in an HTML id attribute.
|
||||
*/
|
||||
function sanitizeId(id: string): string {
|
||||
return id.replace(/[^a-zA-Z0-9]/g, "-");
|
||||
return id.replace(/[^a-zA-Z0-9]/g, '-');
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect a memory-plugin-503 error from the api wrapper's stringified
|
||||
* Error message. Matches on the literal env-var name rather than the
|
||||
* status code, because the api shim renders status codes inside a
|
||||
* larger formatted message and a future status-code reformat would
|
||||
* silently break the detection.
|
||||
*
|
||||
* The substring `MEMORY_PLUGIN_URL` is hard-coded in the handler at
|
||||
* `workspace-server/internal/handlers/memories_v2.go:available()`,
|
||||
* so this is a pinned cross-layer contract — drift is caught by both
|
||||
* the Go test (TestMemoriesV2_PluginUnwired_All503) and the canvas
|
||||
* test (TestMemoryInspectorPanel — plugin unavailable).
|
||||
*/
|
||||
export function isPluginUnavailableError(err: unknown): boolean {
|
||||
const msg = err instanceof Error ? err.message : '';
|
||||
return msg.includes('MEMORY_PLUGIN_URL');
|
||||
}
|
||||
|
||||
function formatRelativeTime(iso: string): string {
|
||||
@@ -47,6 +113,24 @@ function formatRelativeTime(iso: string): string {
|
||||
return new Date(iso).toLocaleDateString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Render a TTL countdown like "12h", "3d", or "expired" (when the
|
||||
* stored expires_at is in the past). Non-fatal if expires_at is null
|
||||
* or invalid — falls through to empty string so the badge doesn't
|
||||
* render.
|
||||
*/
|
||||
export function formatTTL(expiresAt: string | null | undefined): string {
|
||||
if (!expiresAt) return '';
|
||||
const ts = new Date(expiresAt).getTime();
|
||||
if (Number.isNaN(ts)) return '';
|
||||
const diff = ts - Date.now();
|
||||
if (diff <= 0) return 'expired';
|
||||
if (diff < 60_000) return `${Math.floor(diff / 1000)}s`;
|
||||
if (diff < 3_600_000) return `${Math.floor(diff / 60_000)}m`;
|
||||
if (diff < 86_400_000) return `${Math.floor(diff / 3_600_000)}h`;
|
||||
return `${Math.floor(diff / 86_400_000)}d`;
|
||||
}
|
||||
|
||||
// ── Skeleton rows ──────────────────────────────────────────────────────────────
|
||||
|
||||
function MemorySkeletonRows() {
|
||||
@@ -71,63 +155,92 @@ function MemorySkeletonRows() {
|
||||
|
||||
// ── Component ─────────────────────────────────────────────────────────────────
|
||||
|
||||
const ALL_NAMESPACES = '__all__';
|
||||
|
||||
export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
const [activeScope, setActiveScope] = useState<Scope>("LOCAL");
|
||||
const [activeNamespace, setActiveNamespace] = useState("");
|
||||
const [entries, setEntries] = useState<MemoryEntry[]>([]);
|
||||
const [namespaces, setNamespaces] = useState<NamespacesResponse | null>(null);
|
||||
const [activeNamespace, setActiveNamespace] = useState<string>(ALL_NAMESPACES);
|
||||
const [entries, setEntries] = useState<MemoryV2[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
// ── Search state (debounced) ────────────────────────────────────────────────
|
||||
const [searchQuery, setSearchQuery] = useState("");
|
||||
const [debouncedQuery, setDebouncedQuery] = useState("");
|
||||
// Plugin-disabled banner (503 from server). Stored separately so we
|
||||
// can keep showing the namespace dropdown empty rather than
|
||||
// hiding the whole panel.
|
||||
const [pluginUnavailable, setPluginUnavailable] = useState(false);
|
||||
|
||||
// Search state (debounced)
|
||||
const [searchQuery, setSearchQuery] = useState('');
|
||||
const [debouncedQuery, setDebouncedQuery] = useState('');
|
||||
|
||||
useEffect(() => {
|
||||
const timer = setTimeout(
|
||||
() => setDebouncedQuery(searchQuery.trim()),
|
||||
300
|
||||
);
|
||||
const timer = setTimeout(() => setDebouncedQuery(searchQuery.trim()), 300);
|
||||
return () => clearTimeout(timer);
|
||||
}, [searchQuery]);
|
||||
|
||||
// ── Delete state ─────────────────────────────────────────────────────────────
|
||||
// Delete state
|
||||
const [pendingDeleteId, setPendingDeleteId] = useState<string | null>(null);
|
||||
|
||||
// ── Editor state (Add + Edit share one modal) ───────────────────────────────
|
||||
type EditorState =
|
||||
| { mode: "add" }
|
||||
| { mode: "edit"; entry: MemoryEntry }
|
||||
| null;
|
||||
const [editorState, setEditorState] = useState<EditorState>(null);
|
||||
// ── Namespace loading ──────────────────────────────────────────────────────
|
||||
|
||||
// ── Data loading ────────────────────────────────────────────────────────────
|
||||
const loadNamespaces = useCallback(async () => {
|
||||
try {
|
||||
const data = await api.get<NamespacesResponse>(
|
||||
`/workspaces/${workspaceId}/v2/namespaces`,
|
||||
);
|
||||
setNamespaces(data);
|
||||
setPluginUnavailable(false);
|
||||
} catch (e) {
|
||||
// Plugin-unavailable (503) indicates MEMORY_PLUGIN_URL isn't set.
|
||||
// Anything else stays as a generic load failure that the
|
||||
// entries-load path will also flag.
|
||||
if (isPluginUnavailableError(e)) {
|
||||
setPluginUnavailable(true);
|
||||
}
|
||||
setNamespaces({ readable: [], writable: [] });
|
||||
}
|
||||
}, [workspaceId]);
|
||||
|
||||
// ── Entries loading ────────────────────────────────────────────────────────
|
||||
|
||||
const loadEntries = useCallback(async () => {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
try {
|
||||
const params = new URLSearchParams();
|
||||
params.set("scope", activeScope);
|
||||
if (debouncedQuery) params.set("q", debouncedQuery);
|
||||
if (activeNamespace) params.set("namespace", activeNamespace);
|
||||
if (activeNamespace !== ALL_NAMESPACES) {
|
||||
params.set('namespace', activeNamespace);
|
||||
}
|
||||
if (debouncedQuery) params.set('q', debouncedQuery);
|
||||
|
||||
const url = `/workspaces/${workspaceId}/memories?${params.toString()}`;
|
||||
const data = await api.get<MemoryEntry[]>(url);
|
||||
const url = `/workspaces/${workspaceId}/v2/memories?${params.toString()}`;
|
||||
const data = await api.get<MemoriesResponse>(url);
|
||||
|
||||
// When a semantic query is active, sort by similarity_score descending.
|
||||
// When a semantic query is active and the plugin returns
|
||||
// scores, sort by score descending so the most-relevant hit
|
||||
// sits at the top. Empty score → push to bottom.
|
||||
const sorted = debouncedQuery
|
||||
? [...data].sort(
|
||||
(a, b) => (b.similarity_score ?? 0) - (a.similarity_score ?? 0)
|
||||
? [...data.memories].sort(
|
||||
(a, b) => (b.score ?? 0) - (a.score ?? 0),
|
||||
)
|
||||
: data;
|
||||
: data.memories;
|
||||
setEntries(sorted);
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : "Failed to load memories");
|
||||
if (isPluginUnavailableError(e)) {
|
||||
setPluginUnavailable(true);
|
||||
setError(null); // surfaced via banner, not row error
|
||||
} else {
|
||||
setError(e instanceof Error ? e.message : 'Failed to load memories');
|
||||
}
|
||||
setEntries([]);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [workspaceId, activeScope, debouncedQuery, activeNamespace]);
|
||||
}, [workspaceId, activeNamespace, debouncedQuery]);
|
||||
|
||||
useEffect(() => {
|
||||
loadNamespaces();
|
||||
}, [loadNamespaces]);
|
||||
|
||||
useEffect(() => {
|
||||
loadEntries();
|
||||
@@ -144,16 +257,35 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
setEntries((prev) => prev.filter((e) => e.id !== id));
|
||||
|
||||
try {
|
||||
await api.del(`/workspaces/${workspaceId}/memories/${encodeURIComponent(id)}`);
|
||||
await api.del(`/workspaces/${workspaceId}/v2/memories/${encodeURIComponent(id)}`);
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : "Delete failed — reloading...");
|
||||
// Reload first (which clears any stale error), THEN set the
|
||||
// delete-failure message — otherwise loadEntries' own
|
||||
// `setError(null)` wipes our error before the user sees it.
|
||||
// Caught by the rollback test in MemoryInspectorPanel.test.tsx.
|
||||
const msg = e instanceof Error ? e.message : 'Delete failed — reloading…';
|
||||
await loadEntries();
|
||||
setError(msg);
|
||||
}
|
||||
}, [pendingDeleteId, workspaceId, loadEntries]);
|
||||
|
||||
// ── Namespace dropdown options ─────────────────────────────────────────────
|
||||
|
||||
const dropdownOptions = useMemo(() => {
|
||||
const opts: Array<{ value: string; label: string; kind?: NamespaceKind }> = [
|
||||
{ value: ALL_NAMESPACES, label: 'All namespaces' },
|
||||
];
|
||||
if (namespaces) {
|
||||
for (const ns of namespaces.readable) {
|
||||
opts.push({ value: ns.name, label: ns.label, kind: ns.kind });
|
||||
}
|
||||
}
|
||||
return opts;
|
||||
}, [namespaces]);
|
||||
|
||||
// ── Render ──────────────────────────────────────────────────────────────────
|
||||
|
||||
if (loading && entries.length === 0 && !error) {
|
||||
if (loading && entries.length === 0 && !error && !pluginUnavailable) {
|
||||
return (
|
||||
<div className="flex items-center justify-center h-32">
|
||||
<span className="text-xs text-ink-soft">Loading memories…</span>
|
||||
@@ -163,32 +295,44 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
|
||||
return (
|
||||
<div className="flex flex-col h-full">
|
||||
{/* Scope tabs */}
|
||||
<div className="px-4 pt-3 pb-2 border-b border-line/40 shrink-0">
|
||||
<div className="flex items-center gap-1">
|
||||
{SCOPES.map((scope) => (
|
||||
<button
|
||||
type="button"
|
||||
key={scope}
|
||||
onClick={() => setActiveScope(scope)}
|
||||
aria-pressed={activeScope === scope}
|
||||
className={[
|
||||
"px-3 py-1 text-[11px] rounded transition-colors",
|
||||
activeScope === scope
|
||||
? "bg-accent-strong text-white"
|
||||
: "bg-surface-card text-ink-mid hover:bg-surface-card hover:text-ink",
|
||||
].join(" ")}
|
||||
>
|
||||
{scope}
|
||||
</button>
|
||||
))}
|
||||
{/* Plugin-unavailable banner */}
|
||||
{pluginUnavailable && (
|
||||
<div
|
||||
role="alert"
|
||||
aria-live="polite"
|
||||
className="mx-4 mt-3 px-3 py-2 bg-amber-950/30 border border-amber-800/40 rounded text-xs text-amber-300 shrink-0"
|
||||
data-testid="plugin-unavailable-banner"
|
||||
>
|
||||
Memory plugin not configured. Set <code>MEMORY_PLUGIN_URL</code> on the
|
||||
workspace-server to enable v2 memory.
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Search bar + namespace filter */}
|
||||
{/* Namespace dropdown */}
|
||||
<div className="px-4 pt-3 pb-2 border-b border-line/40 shrink-0 space-y-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<label htmlFor="namespace-dropdown" className="text-[10px] text-ink-soft shrink-0">
|
||||
Namespace:
|
||||
</label>
|
||||
<select
|
||||
id="namespace-dropdown"
|
||||
value={activeNamespace}
|
||||
onChange={(e) => setActiveNamespace(e.target.value)}
|
||||
aria-label="Filter by namespace"
|
||||
disabled={pluginUnavailable}
|
||||
className="flex-1 bg-surface-sunken border border-line/60 focus:border-accent/60 rounded px-2 py-1 text-[11px] text-ink focus:outline-none transition-colors min-w-0 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
{dropdownOptions.map((opt) => (
|
||||
<option key={opt.value} value={opt.value}>
|
||||
{opt.label}
|
||||
{opt.kind ? ` (${opt.kind})` : ''}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
{/* Search bar */}
|
||||
<div className="relative flex items-center">
|
||||
{/* Magnifying glass icon */}
|
||||
<svg
|
||||
width="12"
|
||||
height="12"
|
||||
@@ -206,14 +350,15 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
onChange={(e) => setSearchQuery(e.target.value)}
|
||||
placeholder="Semantic search…"
|
||||
aria-label="Search memories"
|
||||
className="w-full bg-surface-sunken border border-line/60 focus:border-accent/60 rounded-lg pl-8 pr-7 py-1.5 text-[11px] text-ink placeholder-zinc-600 focus:outline-none transition-colors"
|
||||
disabled={pluginUnavailable}
|
||||
className="w-full bg-surface-sunken border border-line/60 focus:border-accent/60 rounded-lg pl-8 pr-7 py-1.5 text-[11px] text-ink placeholder-zinc-600 focus:outline-none transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
/>
|
||||
{searchQuery && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => {
|
||||
setSearchQuery("");
|
||||
setDebouncedQuery("");
|
||||
setSearchQuery('');
|
||||
setDebouncedQuery('');
|
||||
}}
|
||||
aria-label="Clear search"
|
||||
className="absolute right-2 text-ink-soft hover:text-ink transition-colors text-sm leading-none"
|
||||
@@ -222,51 +367,26 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Namespace filter */}
|
||||
<div className="flex items-center gap-2">
|
||||
<label htmlFor="namespace-filter" className="text-[10px] text-ink-soft shrink-0">
|
||||
Namespace:
|
||||
</label>
|
||||
<input
|
||||
id="namespace-filter"
|
||||
type="text"
|
||||
value={activeNamespace}
|
||||
onChange={(e) => setActiveNamespace(e.target.value)}
|
||||
placeholder="all namespaces"
|
||||
aria-label="Filter by namespace"
|
||||
className="flex-1 bg-surface-sunken border border-line/60 focus:border-accent/60 rounded px-2 py-1 text-[11px] text-ink placeholder-zinc-600 focus:outline-none transition-colors min-w-0"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Toolbar */}
|
||||
<div className="px-4 py-2.5 border-b border-line/40 flex items-center justify-between shrink-0">
|
||||
<span className="text-[11px] text-ink-soft">
|
||||
{debouncedQuery
|
||||
? `${entries.length} result${entries.length !== 1 ? "s" : ""}`
|
||||
? `${entries.length} result${entries.length !== 1 ? 's' : ''}`
|
||||
: entries.length === 1
|
||||
? "1 memory"
|
||||
: `${entries.length} memories`}
|
||||
? '1 memory'
|
||||
: `${entries.length} memories`}
|
||||
</span>
|
||||
<div className="flex items-center gap-1.5">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setEditorState({ mode: "add" })}
|
||||
className="px-2 py-1 text-[11px] bg-accent hover:bg-accent-strong text-white rounded transition-colors"
|
||||
aria-label="Add memory"
|
||||
>
|
||||
+ Add
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={loadEntries}
|
||||
className="px-2 py-1 text-[11px] bg-surface-card hover:bg-surface-card text-ink-mid rounded transition-colors"
|
||||
aria-label="Refresh memories"
|
||||
>
|
||||
↻ Refresh
|
||||
</button>
|
||||
</div>
|
||||
<button
|
||||
type="button"
|
||||
onClick={loadEntries}
|
||||
disabled={pluginUnavailable}
|
||||
className="px-2 py-1 text-[11px] bg-surface-card hover:bg-surface-card text-ink-mid rounded transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
aria-label="Refresh memories"
|
||||
>
|
||||
↻ Refresh
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Error banner */}
|
||||
@@ -285,47 +405,13 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
{loading ? (
|
||||
<MemorySkeletonRows />
|
||||
) : entries.length === 0 ? (
|
||||
debouncedQuery ? (
|
||||
<div className="flex flex-col items-center justify-center py-16 gap-3 text-center">
|
||||
<span className="text-4xl text-ink-soft" aria-hidden="true">◇</span>
|
||||
<p className="text-sm font-medium text-ink-mid">
|
||||
No memories match your search
|
||||
</p>
|
||||
<p className="text-[11px] text-ink-soft max-w-[200px] leading-relaxed">
|
||||
Try a different query or{" "}
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => {
|
||||
setSearchQuery("");
|
||||
setDebouncedQuery("");
|
||||
}}
|
||||
className="text-accent hover:text-accent underline transition-colors"
|
||||
>
|
||||
clear the search
|
||||
</button>
|
||||
.
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex flex-col items-center justify-center py-16 gap-3 text-center">
|
||||
<span className="text-4xl text-ink-soft" aria-hidden="true">◇</span>
|
||||
<p className="text-sm font-medium text-ink-mid">No {activeScope} memories</p>
|
||||
<p className="text-[11px] text-ink-soft max-w-[200px] leading-relaxed">
|
||||
{activeScope === "LOCAL"
|
||||
? "This workspace has not written any local memories yet."
|
||||
: activeScope === "TEAM"
|
||||
? "No team memories shared with this workspace yet."
|
||||
: "No global memories exist yet."}
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
<EmptyState query={debouncedQuery} pluginUnavailable={pluginUnavailable} />
|
||||
) : (
|
||||
<div className="space-y-1.5">
|
||||
{entries.map((entry) => (
|
||||
<MemoryEntryRow
|
||||
key={entry.id}
|
||||
entry={entry}
|
||||
onEdit={() => setEditorState({ mode: "edit", entry })}
|
||||
onDelete={() => setPendingDeleteId(entry.id)}
|
||||
/>
|
||||
))}
|
||||
@@ -336,36 +422,64 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
{/* Delete confirmation dialog */}
|
||||
<ConfirmDialog
|
||||
open={pendingDeleteId !== null}
|
||||
title="Delete memory"
|
||||
message={`Delete this ${activeScope} memory? This cannot be undone.`}
|
||||
confirmLabel="Delete"
|
||||
title="Forget memory"
|
||||
message="Forget this memory? This cannot be undone."
|
||||
confirmLabel="Forget"
|
||||
confirmVariant="danger"
|
||||
onConfirm={confirmDelete}
|
||||
onCancel={() => setPendingDeleteId(null)}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
{/* Add / Edit dialog */}
|
||||
{editorState?.mode === "add" && (
|
||||
<MemoryEditorDialog
|
||||
open={true}
|
||||
mode="add"
|
||||
workspaceId={workspaceId}
|
||||
defaultScope={activeScope}
|
||||
defaultNamespace={activeNamespace || "general"}
|
||||
onClose={() => setEditorState(null)}
|
||||
onSaved={loadEntries}
|
||||
/>
|
||||
)}
|
||||
{editorState?.mode === "edit" && (
|
||||
<MemoryEditorDialog
|
||||
open={true}
|
||||
mode="edit"
|
||||
workspaceId={workspaceId}
|
||||
entry={editorState.entry}
|
||||
onClose={() => setEditorState(null)}
|
||||
onSaved={loadEntries}
|
||||
/>
|
||||
)}
|
||||
// ── Empty state ─────────────────────────────────────────────────────────────
|
||||
|
||||
function EmptyState({
|
||||
query,
|
||||
pluginUnavailable,
|
||||
}: {
|
||||
query: string;
|
||||
pluginUnavailable: boolean;
|
||||
}) {
|
||||
if (pluginUnavailable) {
|
||||
// The banner already explains the problem; the empty rows just
|
||||
// mirror it so the operator sees both signals.
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 gap-3 text-center">
|
||||
<span className="text-4xl text-ink-soft" aria-hidden="true">
|
||||
◇
|
||||
</span>
|
||||
<p className="text-sm font-medium text-ink-mid">Memory plugin disabled</p>
|
||||
<p className="text-[11px] text-ink-soft max-w-[220px] leading-relaxed">
|
||||
See banner above for the operator-side fix.
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
if (query) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 gap-3 text-center">
|
||||
<span className="text-4xl text-ink-soft" aria-hidden="true">
|
||||
◇
|
||||
</span>
|
||||
<p className="text-sm font-medium text-ink-mid">No memories match your search</p>
|
||||
<p className="text-[11px] text-ink-soft max-w-[200px] leading-relaxed">
|
||||
Try a different query or clear the search.
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 gap-3 text-center">
|
||||
<span className="text-4xl text-ink-soft" aria-hidden="true">
|
||||
◇
|
||||
</span>
|
||||
<p className="text-sm font-medium text-ink-mid">No memories yet</p>
|
||||
<p className="text-[11px] text-ink-soft max-w-[220px] leading-relaxed">
|
||||
Agents commit memories via MCP tools (commit_memory, commit_summary). They
|
||||
appear here once written.
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -373,17 +487,32 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
// ── MemoryEntryRow sub-component ──────────────────────────────────────────────
|
||||
|
||||
interface MemoryEntryRowProps {
|
||||
entry: MemoryEntry;
|
||||
onEdit: () => void;
|
||||
entry: MemoryV2;
|
||||
onDelete: () => void;
|
||||
}
|
||||
|
||||
function MemoryEntryRow({ entry, onEdit, onDelete }: MemoryEntryRowProps) {
|
||||
const KIND_BADGE_CLASS: Record<MemoryKind, string> = {
|
||||
fact: 'bg-surface-card text-ink-mid',
|
||||
summary: 'bg-blue-950 text-accent',
|
||||
checkpoint: 'bg-violet-950 text-violet-400',
|
||||
};
|
||||
|
||||
const SOURCE_BADGE_CLASS: Record<MemorySource, string> = {
|
||||
agent: 'bg-surface-card text-ink-mid',
|
||||
runtime: 'bg-amber-950 text-amber-300',
|
||||
user: 'bg-emerald-950 text-emerald-400',
|
||||
};
|
||||
|
||||
function MemoryEntryRow({ entry, onDelete }: MemoryEntryRowProps) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
const bodyId = `mem-body-${sanitizeId(entry.id)}`;
|
||||
const ttl = formatTTL(entry.expires_at);
|
||||
|
||||
return (
|
||||
<div className="rounded-lg border border-line/60 bg-surface-sunken/50 overflow-hidden">
|
||||
<div
|
||||
className="rounded-lg border border-line/60 bg-surface-sunken/50 overflow-hidden"
|
||||
data-testid={`memory-row-${entry.id}`}
|
||||
>
|
||||
{/* Header row */}
|
||||
<button
|
||||
type="button"
|
||||
@@ -392,52 +521,89 @@ function MemoryEntryRow({ entry, onEdit, onDelete }: MemoryEntryRowProps) {
|
||||
aria-expanded={expanded}
|
||||
aria-controls={bodyId}
|
||||
>
|
||||
{/* Scope badge */}
|
||||
{/* Kind badge */}
|
||||
<span
|
||||
className={[
|
||||
"text-[9px] shrink-0 font-mono px-1 py-0.5 rounded",
|
||||
entry.scope === "LOCAL"
|
||||
? "bg-surface-card text-ink-mid"
|
||||
: entry.scope === "TEAM"
|
||||
? "bg-blue-950 text-accent"
|
||||
: "bg-violet-950 text-violet-400",
|
||||
].join(" ")}
|
||||
title={`Scope: ${entry.scope}`}
|
||||
'text-[9px] shrink-0 font-mono px-1 py-0.5 rounded',
|
||||
KIND_BADGE_CLASS[entry.kind] ?? 'bg-surface-card text-ink-mid',
|
||||
].join(' ')}
|
||||
title={`Kind: ${entry.kind}`}
|
||||
data-testid="kind-badge"
|
||||
>
|
||||
{entry.scope[0]}
|
||||
{entry.kind[0].toUpperCase()}
|
||||
</span>
|
||||
|
||||
{/* Source badge */}
|
||||
<span
|
||||
className={[
|
||||
'text-[9px] shrink-0 font-mono px-1 py-0.5 rounded',
|
||||
SOURCE_BADGE_CLASS[entry.source] ?? 'bg-surface-card text-ink-mid',
|
||||
].join(' ')}
|
||||
title={`Source: ${entry.source}`}
|
||||
data-testid="source-badge"
|
||||
>
|
||||
{entry.source}
|
||||
</span>
|
||||
|
||||
{/* Pin indicator */}
|
||||
{entry.pin && (
|
||||
<span
|
||||
className="text-[9px] shrink-0"
|
||||
title="Pinned"
|
||||
data-testid="pin-badge"
|
||||
aria-label="Pinned"
|
||||
>
|
||||
📌
|
||||
</span>
|
||||
)}
|
||||
|
||||
{/* Namespace tag */}
|
||||
<span className="text-[9px] shrink-0 font-mono text-ink-soft truncate max-w-[80px]" title={entry.namespace}>
|
||||
<span
|
||||
className="text-[9px] shrink-0 font-mono text-ink-soft truncate max-w-[100px]"
|
||||
title={entry.namespace}
|
||||
>
|
||||
{entry.namespace}
|
||||
</span>
|
||||
|
||||
{/* Content preview */}
|
||||
<span className="flex-1 min-w-0 text-[10px] font-mono text-ink-mid truncate text-left">
|
||||
{entry.content.length > 60 ? entry.content.slice(0, 60) + "…" : entry.content}
|
||||
{entry.content.length > 60 ? entry.content.slice(0, 60) + '…' : entry.content}
|
||||
</span>
|
||||
|
||||
{/* Similarity badge */}
|
||||
{entry.similarity_score != null && (
|
||||
{/* Score badge (semantic search only) */}
|
||||
{entry.score != null && (
|
||||
<span
|
||||
className={[
|
||||
"text-[9px] shrink-0 font-mono tabular-nums",
|
||||
entry.similarity_score >= 0.8
|
||||
? "text-accent"
|
||||
: "text-ink-mid",
|
||||
].join(" ")}
|
||||
title={`Similarity: ${(entry.similarity_score * 100).toFixed(1)}%`}
|
||||
data-testid="similarity-badge"
|
||||
'text-[9px] shrink-0 font-mono tabular-nums',
|
||||
entry.score >= 0.8 ? 'text-accent' : 'text-ink-mid',
|
||||
].join(' ')}
|
||||
title={`Similarity: ${(entry.score * 100).toFixed(1)}%`}
|
||||
data-testid="score-badge"
|
||||
>
|
||||
{Math.round(entry.similarity_score * 100)}%
|
||||
{Math.round(entry.score * 100)}%
|
||||
</span>
|
||||
)}
|
||||
|
||||
{/* TTL countdown */}
|
||||
{ttl && (
|
||||
<span
|
||||
className={[
|
||||
'text-[9px] shrink-0 font-mono',
|
||||
ttl === 'expired' ? 'text-bad' : 'text-amber-400',
|
||||
].join(' ')}
|
||||
title={`Expires: ${entry.expires_at}`}
|
||||
data-testid="ttl-badge"
|
||||
>
|
||||
⌛{ttl}
|
||||
</span>
|
||||
)}
|
||||
|
||||
|
||||
<span className="text-[9px] text-ink-soft shrink-0">
|
||||
{formatRelativeTime(entry.created_at)}
|
||||
</span>
|
||||
<span className="text-[9px] text-ink-soft shrink-0" aria-hidden="true">
|
||||
{expanded ? "▼" : "▶"}
|
||||
{expanded ? '▼' : '▶'}
|
||||
</span>
|
||||
</button>
|
||||
|
||||
@@ -455,31 +621,19 @@ function MemoryEntryRow({ entry, onEdit, onDelete }: MemoryEntryRowProps) {
|
||||
<div className="flex items-center justify-between gap-2">
|
||||
<span className="text-[9px] text-ink-soft">
|
||||
Created: {new Date(entry.created_at).toLocaleString()}
|
||||
{entry.expires_at && ` · Expires: ${new Date(entry.expires_at).toLocaleString()}`}
|
||||
</span>
|
||||
<div className="flex items-center gap-1.5 shrink-0">
|
||||
<button
|
||||
type="button"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
onEdit();
|
||||
}}
|
||||
aria-label="Edit memory"
|
||||
className="text-[10px] px-2 py-0.5 bg-surface-card hover:bg-surface-elevated border border-line/40 rounded text-ink-mid hover:text-ink transition-colors"
|
||||
>
|
||||
Edit
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
onDelete();
|
||||
}}
|
||||
aria-label="Delete memory"
|
||||
className="text-[10px] px-2 py-0.5 bg-red-950/40 hover:bg-red-900/50 border border-red-900/30 rounded text-bad transition-colors"
|
||||
>
|
||||
Delete
|
||||
</button>
|
||||
</div>
|
||||
<button
|
||||
type="button"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
onDelete();
|
||||
}}
|
||||
aria-label="Forget memory"
|
||||
className="text-[10px] px-2 py-0.5 bg-red-950/40 hover:bg-red-900/50 border border-red-900/30 rounded text-bad transition-colors shrink-0"
|
||||
>
|
||||
Forget
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -1,202 +0,0 @@
|
||||
// @vitest-environment jsdom
|
||||
/**
|
||||
* MemoryEditorDialog tests — covers Add (POST /memories) and Edit
|
||||
* (PATCH /memories/:id) flows. Pins:
|
||||
* - Add posts {content, scope, namespace} with the trimmed defaults
|
||||
* - Edit only sends fields that changed (no-op edit short-circuits, no PATCH fires)
|
||||
* - Empty content blocks save
|
||||
* - Save error surfaces in the dialog and keeps the modal open
|
||||
*/
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { render, screen, fireEvent, waitFor, cleanup } from "@testing-library/react";
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: vi.fn(),
|
||||
post: vi.fn(),
|
||||
patch: vi.fn(),
|
||||
del: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
import { api } from "@/lib/api";
|
||||
import { MemoryEditorDialog } from "../MemoryEditorDialog";
|
||||
import type { MemoryEntry } from "../MemoryInspectorPanel";
|
||||
|
||||
const mockPost = vi.mocked(api.post);
|
||||
const mockPatch = vi.mocked(api.patch);
|
||||
|
||||
const SAMPLE: MemoryEntry = {
|
||||
id: "mem-x",
|
||||
workspace_id: "ws-1",
|
||||
content: "original content",
|
||||
scope: "TEAM",
|
||||
namespace: "procedures",
|
||||
created_at: "2026-04-17T12:00:00.000Z",
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mockPost.mockResolvedValue({} as never);
|
||||
mockPatch.mockResolvedValue({} as never);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
cleanup();
|
||||
});
|
||||
|
||||
describe("Add mode", () => {
|
||||
it("POSTs scope+namespace+trimmed-content and calls onSaved+onClose", async () => {
|
||||
const onClose = vi.fn();
|
||||
const onSaved = vi.fn();
|
||||
render(
|
||||
<MemoryEditorDialog
|
||||
open
|
||||
mode="add"
|
||||
workspaceId="ws-1"
|
||||
defaultScope="GLOBAL"
|
||||
defaultNamespace="facts"
|
||||
onClose={onClose}
|
||||
onSaved={onSaved}
|
||||
/>,
|
||||
);
|
||||
|
||||
const textarea = screen.getByLabelText(/Content/i) as HTMLTextAreaElement;
|
||||
fireEvent.change(textarea, { target: { value: " new fact " } });
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /Add memory$/i }));
|
||||
|
||||
await waitFor(() => expect(mockPost).toHaveBeenCalledTimes(1));
|
||||
expect(mockPost).toHaveBeenCalledWith("/workspaces/ws-1/memories", {
|
||||
content: "new fact",
|
||||
scope: "GLOBAL",
|
||||
namespace: "facts",
|
||||
});
|
||||
expect(onSaved).toHaveBeenCalledTimes(1);
|
||||
expect(onClose).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("blocks save when content is empty (whitespace-only)", () => {
|
||||
const onClose = vi.fn();
|
||||
const onSaved = vi.fn();
|
||||
render(
|
||||
<MemoryEditorDialog
|
||||
open
|
||||
mode="add"
|
||||
workspaceId="ws-1"
|
||||
defaultScope="LOCAL"
|
||||
onClose={onClose}
|
||||
onSaved={onSaved}
|
||||
/>,
|
||||
);
|
||||
const textarea = screen.getByLabelText(/Content/i) as HTMLTextAreaElement;
|
||||
fireEvent.change(textarea, { target: { value: " " } });
|
||||
fireEvent.click(screen.getByRole("button", { name: /Add memory$/i }));
|
||||
expect(mockPost).not.toHaveBeenCalled();
|
||||
expect(screen.getByRole("alert").textContent).toMatch(/empty/i);
|
||||
expect(onSaved).not.toHaveBeenCalled();
|
||||
expect(onClose).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Edit mode", () => {
|
||||
it("PATCHes only changed fields", async () => {
|
||||
const onClose = vi.fn();
|
||||
const onSaved = vi.fn();
|
||||
render(
|
||||
<MemoryEditorDialog
|
||||
open
|
||||
mode="edit"
|
||||
workspaceId="ws-1"
|
||||
entry={SAMPLE}
|
||||
onClose={onClose}
|
||||
onSaved={onSaved}
|
||||
/>,
|
||||
);
|
||||
|
||||
const textarea = screen.getByLabelText(/Content/i) as HTMLTextAreaElement;
|
||||
fireEvent.change(textarea, { target: { value: "rewritten content" } });
|
||||
// namespace untouched
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /Save changes/i }));
|
||||
|
||||
await waitFor(() => expect(mockPatch).toHaveBeenCalledTimes(1));
|
||||
expect(mockPatch).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-1/memories/mem-x",
|
||||
{ content: "rewritten content" },
|
||||
);
|
||||
expect(onSaved).toHaveBeenCalledTimes(1);
|
||||
expect(onClose).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("no-op edit short-circuits (no PATCH fires) and still closes", async () => {
|
||||
const onClose = vi.fn();
|
||||
const onSaved = vi.fn();
|
||||
render(
|
||||
<MemoryEditorDialog
|
||||
open
|
||||
mode="edit"
|
||||
workspaceId="ws-1"
|
||||
entry={SAMPLE}
|
||||
onClose={onClose}
|
||||
onSaved={onSaved}
|
||||
/>,
|
||||
);
|
||||
fireEvent.click(screen.getByRole("button", { name: /Save changes/i }));
|
||||
await waitFor(() => expect(onClose).toHaveBeenCalled());
|
||||
expect(mockPatch).not.toHaveBeenCalled();
|
||||
expect(onSaved).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("sends namespace too when both content and namespace changed", async () => {
|
||||
const onClose = vi.fn();
|
||||
const onSaved = vi.fn();
|
||||
render(
|
||||
<MemoryEditorDialog
|
||||
open
|
||||
mode="edit"
|
||||
workspaceId="ws-1"
|
||||
entry={SAMPLE}
|
||||
onClose={onClose}
|
||||
onSaved={onSaved}
|
||||
/>,
|
||||
);
|
||||
fireEvent.change(screen.getByLabelText(/Content/i), {
|
||||
target: { value: "newer content" },
|
||||
});
|
||||
fireEvent.change(screen.getByLabelText(/Namespace/i), {
|
||||
target: { value: "blockers" },
|
||||
});
|
||||
fireEvent.click(screen.getByRole("button", { name: /Save changes/i }));
|
||||
await waitFor(() => expect(mockPatch).toHaveBeenCalledTimes(1));
|
||||
expect(mockPatch).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-1/memories/mem-x",
|
||||
{ content: "newer content", namespace: "blockers" },
|
||||
);
|
||||
});
|
||||
|
||||
it("surfaces save error and keeps the modal open", async () => {
|
||||
const onClose = vi.fn();
|
||||
const onSaved = vi.fn();
|
||||
mockPatch.mockRejectedValueOnce(new Error("boom"));
|
||||
render(
|
||||
<MemoryEditorDialog
|
||||
open
|
||||
mode="edit"
|
||||
workspaceId="ws-1"
|
||||
entry={SAMPLE}
|
||||
onClose={onClose}
|
||||
onSaved={onSaved}
|
||||
/>,
|
||||
);
|
||||
fireEvent.change(screen.getByLabelText(/Content/i), {
|
||||
target: { value: "rewritten content" },
|
||||
});
|
||||
fireEvent.click(screen.getByRole("button", { name: /Save changes/i }));
|
||||
await waitFor(() =>
|
||||
expect(screen.getByRole("alert").textContent).toMatch(/boom/),
|
||||
);
|
||||
expect(onClose).not.toHaveBeenCalled();
|
||||
expect(onSaved).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -1,16 +1,29 @@
|
||||
// @vitest-environment jsdom
|
||||
/**
|
||||
* MemoryInspectorPanel tests — issue #909
|
||||
* MemoryInspectorPanel — v2 redesign tests.
|
||||
*
|
||||
* Covers: loading, empty state, scope tabs, namespace filter,
|
||||
* entry list, expand, delete flow, optimistic updates, Refresh, semantic search.
|
||||
* Coverage targets every behavior the panel surfaces:
|
||||
* - Initial load wires GET /v2/namespaces + GET /v2/memories
|
||||
* - Plugin-unavailable banner (503) renders + disables interactions
|
||||
* - Generic error renders in the error banner
|
||||
* - Namespace dropdown populates from /v2/namespaces.readable; "All
|
||||
* namespaces" is the default
|
||||
* - Selecting a namespace re-fetches with ?namespace=...
|
||||
* - Search input debounces + scopes the request to ?q=
|
||||
* - Search results sort by score descending
|
||||
* - Empty-state copy differs by query / plugin-state / no-data
|
||||
* - Per-row badges render (kind / source / pin / TTL / score /
|
||||
* score) and TTL countdown handles past/future/null
|
||||
* - Delete (Forget) flow: optimistic removal, confirmation dialog,
|
||||
* server failure rolls back via reload
|
||||
* - formatTTL helper covers s/m/h/d/expired/null/invalid branches
|
||||
*/
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { render, screen, fireEvent, waitFor, cleanup, act } from "@testing-library/react";
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { render, screen, fireEvent, waitFor, cleanup } from '@testing-library/react';
|
||||
|
||||
// ── Mocks ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
vi.mock('@/lib/api', () => ({
|
||||
api: {
|
||||
get: vi.fn(),
|
||||
post: vi.fn(),
|
||||
@@ -18,7 +31,7 @@ vi.mock("@/lib/api", () => ({
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("@/components/ConfirmDialog", () => ({
|
||||
vi.mock('@/components/ConfirmDialog', () => ({
|
||||
ConfirmDialog: ({
|
||||
open,
|
||||
title,
|
||||
@@ -33,435 +46,473 @@ vi.mock("@/components/ConfirmDialog", () => ({
|
||||
confirmVariant?: string;
|
||||
onConfirm: () => void;
|
||||
onCancel: () => void;
|
||||
singleButton?: boolean;
|
||||
}) =>
|
||||
open ? (
|
||||
<div data-testid="confirm-dialog">
|
||||
<p data-testid="dialog-title">{title}</p>
|
||||
<p data-testid="dialog-message">{message}</p>
|
||||
<button onClick={onConfirm}>Confirm Delete</button>
|
||||
<button onClick={onCancel}>Cancel Delete</button>
|
||||
<button onClick={onConfirm}>Confirm</button>
|
||||
<button onClick={onCancel}>Cancel</button>
|
||||
</div>
|
||||
) : null,
|
||||
}));
|
||||
|
||||
import { api } from "@/lib/api";
|
||||
import { MemoryInspectorPanel } from "../MemoryInspectorPanel";
|
||||
|
||||
// ── Typed mock helpers ────────────────────────────────────────────────────────
|
||||
import { api } from '@/lib/api';
|
||||
import {
|
||||
MemoryInspectorPanel,
|
||||
formatTTL,
|
||||
isPluginUnavailableError,
|
||||
type MemoryV2,
|
||||
type NamespacesResponse,
|
||||
} from '../MemoryInspectorPanel';
|
||||
|
||||
const mockGet = vi.mocked(api.get);
|
||||
const mockDel = vi.mocked(api.del);
|
||||
|
||||
// ── Sample fixtures ───────────────────────────────────────────────────────────
|
||||
// ── Fixtures ──────────────────────────────────────────────────────────────────
|
||||
|
||||
const NOW = "2026-04-17T12:00:00.000Z";
|
||||
|
||||
const MEMORY_A: import("../MemoryInspectorPanel").MemoryEntry = {
|
||||
id: "mem-a",
|
||||
workspace_id: "ws-1",
|
||||
content: "Remember to review PRs before merging",
|
||||
scope: "LOCAL",
|
||||
namespace: "general",
|
||||
created_at: NOW,
|
||||
const NS_RESPONSE: NamespacesResponse = {
|
||||
readable: [
|
||||
{ name: 'workspace:ws-1', kind: 'workspace', label: 'Workspace (ws-1)' },
|
||||
{ name: 'team:t-1', kind: 'team', label: 'Team (t-1)' },
|
||||
],
|
||||
writable: [{ name: 'workspace:ws-1', kind: 'workspace', label: 'Workspace (ws-1)' }],
|
||||
};
|
||||
|
||||
const MEMORY_B: import("../MemoryInspectorPanel").MemoryEntry = {
|
||||
id: "mem-b",
|
||||
workspace_id: "ws-1",
|
||||
content: "Team knowledge: deploy happens on Fridays",
|
||||
scope: "TEAM",
|
||||
namespace: "procedures",
|
||||
created_at: NOW,
|
||||
const MEM_BASIC: MemoryV2 = {
|
||||
id: 'mem-a',
|
||||
namespace: 'workspace:ws-1',
|
||||
content: 'Remember the standup is at 10am',
|
||||
kind: 'fact',
|
||||
source: 'agent',
|
||||
pin: false,
|
||||
created_at: '2026-04-17T12:00:00.000Z',
|
||||
};
|
||||
|
||||
const TWO_MEMORIES = [MEMORY_A, MEMORY_B];
|
||||
const MEM_PINNED: MemoryV2 = {
|
||||
id: 'mem-pinned',
|
||||
namespace: 'team:t-1',
|
||||
content: 'Team retro every Friday',
|
||||
kind: 'summary',
|
||||
source: 'user',
|
||||
pin: true,
|
||||
expires_at: new Date(Date.now() + 86_400_000).toISOString(),
|
||||
created_at: '2026-04-17T12:00:00.000Z',
|
||||
};
|
||||
|
||||
const MEM_RUNTIME_CHECKPOINT: MemoryV2 = {
|
||||
id: 'mem-checkpoint',
|
||||
namespace: 'team:t-1',
|
||||
content: 'Runtime checkpoint',
|
||||
kind: 'checkpoint',
|
||||
source: 'runtime',
|
||||
pin: false,
|
||||
created_at: '2026-04-17T12:00:00.000Z',
|
||||
};
|
||||
|
||||
const MEM_EXPIRED: MemoryV2 = {
|
||||
id: 'mem-expired',
|
||||
namespace: 'workspace:ws-1',
|
||||
content: 'Stale memory',
|
||||
kind: 'fact',
|
||||
source: 'agent',
|
||||
pin: false,
|
||||
expires_at: new Date(Date.now() - 1000).toISOString(),
|
||||
created_at: '2026-04-17T12:00:00.000Z',
|
||||
};
|
||||
|
||||
// ── Setup / teardown ──────────────────────────────────────────────────────────
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mockGet.mockReset();
|
||||
mockDel.mockReset();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
cleanup();
|
||||
});
|
||||
|
||||
// ── Helper: flush microtasks + React state updates ─────────────────────────────
|
||||
async function flushUpdates(): Promise<void> {
|
||||
await act(async () => {});
|
||||
// Helper: stub a basic two-call flow (namespaces + memories).
|
||||
function stubFetch(memories: MemoryV2[], namespaces: NamespacesResponse = NS_RESPONSE) {
|
||||
mockGet.mockImplementation(((url: string) => {
|
||||
if (url.includes('/v2/namespaces')) {
|
||||
return Promise.resolve(namespaces);
|
||||
}
|
||||
return Promise.resolve({ memories });
|
||||
}) as typeof api.get);
|
||||
}
|
||||
|
||||
// ── Loading & empty state ─────────────────────────────────────────────────────
|
||||
// ── isPluginUnavailableError helper ─────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — loading and empty state", () => {
|
||||
it("shows loading indicator before data arrives", () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockReturnValue(new Promise(() => {}) as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
expect(screen.getByText(/loading memories/i)).toBeTruthy();
|
||||
});
|
||||
|
||||
it("renders empty state when API returns []", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByText("No LOCAL memories")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("fetches from the correct workspace memories endpoint with scope=LOCAL", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-abc-123" />);
|
||||
await flushUpdates();
|
||||
expect(mockGet).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-abc-123/memories?scope=LOCAL"
|
||||
);
|
||||
});
|
||||
|
||||
it("shows error banner when fetch throws", async () => {
|
||||
mockGet.mockRejectedValue(new Error("Network error"));
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByText("Network error")).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Scope tabs ────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — scope tabs", () => {
|
||||
it("renders LOCAL, TEAM, GLOBAL tabs", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByRole("button", { name: "LOCAL" })).toBeTruthy();
|
||||
expect(screen.getByRole("button", { name: "TEAM" })).toBeTruthy();
|
||||
expect(screen.getByRole("button", { name: "GLOBAL" })).toBeTruthy();
|
||||
});
|
||||
|
||||
it("LOCAL is active by default", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByRole("button", { name: "LOCAL" }).getAttribute("aria-pressed")).toBe("true");
|
||||
});
|
||||
|
||||
it("clicking TEAM tab re-fetches with scope=TEAM", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
|
||||
mockGet.mockClear();
|
||||
fireEvent.click(screen.getByRole("button", { name: "TEAM" }));
|
||||
await flushUpdates();
|
||||
expect(mockGet).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-1/memories?scope=TEAM"
|
||||
);
|
||||
});
|
||||
|
||||
it("clicking GLOBAL tab re-fetches with scope=GLOBAL", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
|
||||
mockGet.mockClear();
|
||||
fireEvent.click(screen.getByRole("button", { name: "GLOBAL" }));
|
||||
await flushUpdates();
|
||||
expect(mockGet).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-1/memories?scope=GLOBAL"
|
||||
);
|
||||
});
|
||||
|
||||
it("shows scope-specific empty state when switching tabs", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: "TEAM" }));
|
||||
await flushUpdates();
|
||||
expect(screen.getByText("No TEAM memories")).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Namespace filter ──────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — namespace filter", () => {
|
||||
it("renders namespace filter input", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByLabelText("Filter by namespace")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("includes namespace param in API call when set", async () => {
|
||||
vi.useFakeTimers();
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
|
||||
mockGet.mockClear();
|
||||
fireEvent.change(screen.getByLabelText("Filter by namespace"), {
|
||||
target: { value: "facts" },
|
||||
});
|
||||
// Advance past the 300ms debounce
|
||||
act(() => { vi.advanceTimersByTime(350); });
|
||||
await flushUpdates();
|
||||
|
||||
expect(mockGet).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-1/memories?scope=LOCAL&namespace=facts"
|
||||
);
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ── Entry list ───────────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — entry list", () => {
|
||||
beforeEach(() => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue(TWO_MEMORIES as any);
|
||||
});
|
||||
|
||||
it("renders a row for every memory", async () => {
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByText(/Remember to review PRs before merging/)).toBeTruthy();
|
||||
expect(screen.getByText(/Team knowledge: deploy happens on Fridays/)).toBeTruthy();
|
||||
});
|
||||
|
||||
it("displays memory count in toolbar", async () => {
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByText("2 memories")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("displays scope badge for each entry", async () => {
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByTitle("Scope: LOCAL")).toBeTruthy();
|
||||
expect(screen.getByTitle("Scope: TEAM")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("entries are collapsed by default (pre region not visible)", async () => {
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
// Expanded region (pre tag) should not exist in DOM yet
|
||||
expect(screen.queryByRole("region")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Expand / collapse ─────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — expand/collapse", () => {
|
||||
beforeEach(() => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue(TWO_MEMORIES as any);
|
||||
});
|
||||
|
||||
it("clicking a row header expands it and shows the full content in a pre tag", async () => {
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
|
||||
fireEvent.click(
|
||||
screen.getByText(/Remember to review PRs before merging/).closest("button")!
|
||||
);
|
||||
await flushUpdates();
|
||||
// After expand, a region with the full content <pre> should appear
|
||||
expect(screen.getByRole("region")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("clicking the header again collapses the row (pre region removed)", async () => {
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
|
||||
const headerBtn = screen
|
||||
.getByText(/Remember to review PRs before merging/)
|
||||
.closest("button")!;
|
||||
fireEvent.click(headerBtn); // expand
|
||||
await flushUpdates();
|
||||
expect(screen.getByRole("region")).toBeTruthy();
|
||||
|
||||
fireEvent.click(headerBtn); // collapse
|
||||
await flushUpdates();
|
||||
// After collapse, the region (pre) is removed from the DOM
|
||||
expect(screen.queryByRole("region")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Delete flow ───────────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — delete flow", () => {
|
||||
beforeEach(() => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue(TWO_MEMORIES as any);
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockDel.mockResolvedValue({ status: "deleted" } as any);
|
||||
});
|
||||
|
||||
/** Helper: expand memory-A and click its Delete button */
|
||||
async function openDeleteForMemoryA() {
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
fireEvent.click(
|
||||
screen.getByText(/Remember to review PRs before merging/).closest("button")!
|
||||
);
|
||||
await flushUpdates();
|
||||
fireEvent.click(screen.getByRole("button", { name: "Delete memory" }));
|
||||
await flushUpdates();
|
||||
}
|
||||
|
||||
it("opens ConfirmDialog when Delete is clicked", async () => {
|
||||
await openDeleteForMemoryA();
|
||||
expect(screen.getByTestId("confirm-dialog")).toBeTruthy();
|
||||
expect(screen.getByTestId("dialog-title").textContent).toBe("Delete memory");
|
||||
});
|
||||
|
||||
it("calls api.del with the correct URL-encoded path on confirm", async () => {
|
||||
await openDeleteForMemoryA();
|
||||
fireEvent.click(screen.getByText("Confirm Delete"));
|
||||
await flushUpdates();
|
||||
expect(mockDel).toHaveBeenCalledWith("/workspaces/ws-1/memories/mem-a");
|
||||
});
|
||||
|
||||
it("removes the entry optimistically after confirm", async () => {
|
||||
await openDeleteForMemoryA();
|
||||
fireEvent.click(screen.getByText("Confirm Delete"));
|
||||
await flushUpdates();
|
||||
expect(screen.queryByText(/Remember to review PRs before merging/)).toBeNull();
|
||||
// Sibling entry unaffected
|
||||
expect(screen.getByText(/Team knowledge: deploy happens on Fridays/)).toBeTruthy();
|
||||
});
|
||||
|
||||
it("closes ConfirmDialog without deleting when Cancel is clicked", async () => {
|
||||
await openDeleteForMemoryA();
|
||||
fireEvent.click(screen.getByText("Cancel Delete"));
|
||||
await flushUpdates();
|
||||
expect(screen.queryByTestId("confirm-dialog")).toBeNull();
|
||||
expect(mockDel).not.toHaveBeenCalled();
|
||||
// Sibling memory entry (MEMORY_B) is still in the list
|
||||
expect(screen.getByText(/Team knowledge: deploy happens on Fridays/)).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Refresh ───────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — Refresh button", () => {
|
||||
it("re-fetches entries when Refresh is clicked", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByText("No LOCAL memories")).toBeTruthy();
|
||||
|
||||
expect(mockGet).toHaveBeenCalledTimes(1);
|
||||
fireEvent.click(screen.getByRole("button", { name: "Refresh memories" }));
|
||||
await flushUpdates();
|
||||
expect(mockGet).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
// ── role=alert a11y ──────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — error elements have role=alert", () => {
|
||||
it("fetch error banner has role='alert'", async () => {
|
||||
mockGet.mockRejectedValue(new Error("Network error"));
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
const alert = screen.getByRole("alert");
|
||||
expect(alert).toBeTruthy();
|
||||
expect(alert.textContent).toContain("Network error");
|
||||
});
|
||||
});
|
||||
|
||||
// ── Semantic search ──────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — semantic search", () => {
|
||||
afterEach(() => {
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it("debounces search input by 300ms before calling API", async () => {
|
||||
vi.useFakeTimers();
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
|
||||
mockGet.mockClear();
|
||||
|
||||
fireEvent.change(screen.getByLabelText("Search memories"), {
|
||||
target: { value: "deploy" },
|
||||
});
|
||||
|
||||
// 200ms — debounce has NOT fired yet
|
||||
act(() => { vi.advanceTimersByTime(200); });
|
||||
await flushUpdates();
|
||||
expect(mockGet).not.toHaveBeenCalled();
|
||||
|
||||
// 350ms total — debounce fires
|
||||
act(() => { vi.advanceTimersByTime(150); });
|
||||
await flushUpdates();
|
||||
|
||||
expect(mockGet).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-1/memories?scope=LOCAL&q=deploy"
|
||||
);
|
||||
});
|
||||
|
||||
it("renders similarity-badge when entry has similarity_score", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([{ ...MEMORY_A, similarity_score: 0.87 }] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
const badge = document.querySelector('[data-testid="similarity-badge"]');
|
||||
expect(badge).toBeTruthy();
|
||||
expect(badge?.textContent).toBe("87%");
|
||||
});
|
||||
|
||||
it("does not render similarity-badge when entry has no similarity_score", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([MEMORY_A] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
describe('isPluginUnavailableError', () => {
|
||||
it('matches the literal env var contract from the server handler', () => {
|
||||
expect(
|
||||
document.querySelector('[data-testid="similarity-badge"]')
|
||||
).toBeNull();
|
||||
isPluginUnavailableError(
|
||||
new Error('API GET /workspaces/x/v2/memories: 503 {"error":"memory plugin is not configured (set MEMORY_PLUGIN_URL)"}'),
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("clear button resets query immediately and re-fetches without ?q=", async () => {
|
||||
vi.useFakeTimers();
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
it('does not false-match on generic 503 errors that don\'t mention the env var', () => {
|
||||
expect(isPluginUnavailableError(new Error('API GET /foo: 503 something else'))).toBe(false);
|
||||
});
|
||||
|
||||
fireEvent.change(screen.getByLabelText("Search memories"), {
|
||||
target: { value: "deploy" },
|
||||
it('does not false-match on plain 4xx errors', () => {
|
||||
expect(isPluginUnavailableError(new Error('API GET /foo: 401 unauthorized'))).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false for non-Error inputs', () => {
|
||||
expect(isPluginUnavailableError(null)).toBe(false);
|
||||
expect(isPluginUnavailableError(undefined)).toBe(false);
|
||||
expect(isPluginUnavailableError('a string')).toBe(false);
|
||||
expect(isPluginUnavailableError({ message: 'MEMORY_PLUGIN_URL' })).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ── formatTTL helper ─────────────────────────────────────────────────────────
|
||||
|
||||
describe('formatTTL', () => {
|
||||
it('returns empty string for null/undefined/empty', () => {
|
||||
expect(formatTTL(null)).toBe('');
|
||||
expect(formatTTL(undefined)).toBe('');
|
||||
expect(formatTTL('')).toBe('');
|
||||
});
|
||||
|
||||
it('returns empty for invalid date strings', () => {
|
||||
expect(formatTTL('not-a-date')).toBe('');
|
||||
});
|
||||
|
||||
it('returns "expired" for past timestamps', () => {
|
||||
const past = new Date(Date.now() - 5000).toISOString();
|
||||
expect(formatTTL(past)).toBe('expired');
|
||||
});
|
||||
|
||||
it('formats <60s as seconds', () => {
|
||||
const future = new Date(Date.now() + 30_000).toISOString();
|
||||
expect(formatTTL(future)).toMatch(/^\d{1,2}s$/);
|
||||
});
|
||||
|
||||
it('formats <60m as minutes', () => {
|
||||
const future = new Date(Date.now() + 30 * 60_000).toISOString();
|
||||
expect(formatTTL(future)).toMatch(/^\d{1,2}m$/);
|
||||
});
|
||||
|
||||
it('formats <24h as hours', () => {
|
||||
const future = new Date(Date.now() + 5 * 3_600_000).toISOString();
|
||||
expect(formatTTL(future)).toMatch(/^\d{1,2}h$/);
|
||||
});
|
||||
|
||||
it('formats >24h as days', () => {
|
||||
const future = new Date(Date.now() + 3 * 86_400_000).toISOString();
|
||||
expect(formatTTL(future)).toMatch(/^\d{1,2}d$/);
|
||||
});
|
||||
});
|
||||
|
||||
// ── Initial load + dropdown ─────────────────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — initial load', () => {
|
||||
it('fetches namespaces and memories on mount', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
|
||||
await waitFor(() => {
|
||||
const calls = mockGet.mock.calls.map((c) => c[0]);
|
||||
expect(calls.some((u) => u.includes('/v2/namespaces'))).toBe(true);
|
||||
expect(calls.some((u) => u.includes('/v2/memories'))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
it('renders the row contents from the memories response', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText(/Remember the standup is at 10am/)).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
it('populates the namespace dropdown with readable entries + "All namespaces"', async () => {
|
||||
stubFetch([]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByLabelText('Filter by namespace'));
|
||||
const select = screen.getByLabelText('Filter by namespace') as HTMLSelectElement;
|
||||
const optionLabels = Array.from(select.options).map((o) => o.textContent ?? '');
|
||||
expect(optionLabels[0]).toContain('All namespaces');
|
||||
expect(optionLabels.join('|')).toContain('Workspace (ws-1)');
|
||||
expect(optionLabels.join('|')).toContain('Team (t-1)');
|
||||
});
|
||||
|
||||
it('selecting a namespace re-fetches with ?namespace=', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByLabelText('Filter by namespace'));
|
||||
|
||||
const select = screen.getByLabelText('Filter by namespace') as HTMLSelectElement;
|
||||
fireEvent.change(select, { target: { value: 'team:t-1' } });
|
||||
|
||||
await waitFor(() => {
|
||||
const calls = mockGet.mock.calls.map((c) => c[0] as string);
|
||||
expect(calls.some((u) => u.includes('namespace=team%3At-1'))).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ── Plugin unavailable (503) ────────────────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — plugin unavailable', () => {
|
||||
it('renders the operator-hint banner and disables search input', async () => {
|
||||
mockGet.mockRejectedValue(new Error('HTTP 503: memory plugin is not configured (set MEMORY_PLUGIN_URL)'));
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByTestId('plugin-unavailable-banner'));
|
||||
|
||||
const searchInput = screen.getByLabelText('Search memories') as HTMLInputElement;
|
||||
expect(searchInput.disabled).toBe(true);
|
||||
});
|
||||
|
||||
it('shows the empty-state explaining plugin disabled', async () => {
|
||||
mockGet.mockRejectedValue(new Error('API GET /workspaces/x/v2/memories: 503 {"error":"memory plugin is not configured (set MEMORY_PLUGIN_URL)"}'));
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByText(/Memory plugin disabled/i));
|
||||
});
|
||||
});
|
||||
|
||||
// ── Generic error (non-503) ─────────────────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — generic errors', () => {
|
||||
it('surfaces a non-503 error in the error banner', async () => {
|
||||
mockGet.mockImplementation(((url: string) => {
|
||||
if (url.includes('/v2/namespaces')) {
|
||||
return Promise.resolve(NS_RESPONSE);
|
||||
}
|
||||
return Promise.reject(new Error('upstream timeout'));
|
||||
}) as typeof api.get);
|
||||
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => {
|
||||
// Error banner has role=alert
|
||||
const alerts = screen.getAllByRole('alert');
|
||||
const found = alerts.some((a) => a.textContent?.includes('upstream timeout'));
|
||||
expect(found).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ── Search ──────────────────────────────────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — search', () => {
|
||||
it('eventually fires query with ?q= after debounce', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByLabelText('Search memories'));
|
||||
|
||||
fireEvent.change(screen.getByLabelText('Search memories'), {
|
||||
target: { value: 'standup' },
|
||||
});
|
||||
|
||||
act(() => { vi.advanceTimersByTime(350); });
|
||||
await flushUpdates();
|
||||
|
||||
expect(mockGet).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-1/memories?scope=LOCAL&q=deploy"
|
||||
await waitFor(
|
||||
() => {
|
||||
const calls = mockGet.mock.calls.map((c) => c[0] as string);
|
||||
expect(calls.some((u) => u.includes('q=standup'))).toBe(true);
|
||||
},
|
||||
{ timeout: 1500 },
|
||||
);
|
||||
mockGet.mockClear();
|
||||
});
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: "Clear search" }));
|
||||
await flushUpdates();
|
||||
it('sorts results by score descending when query active', async () => {
|
||||
const lowScore: MemoryV2 = { ...MEM_BASIC, id: 'low', score: 0.2, content: 'low' };
|
||||
const highScore: MemoryV2 = { ...MEM_BASIC, id: 'high', score: 0.95, content: 'high' };
|
||||
// Plugin returns in arbitrary order; component sorts.
|
||||
mockGet.mockImplementation(((url: string) => {
|
||||
if (url.includes('/v2/namespaces')) return Promise.resolve(NS_RESPONSE);
|
||||
return Promise.resolve({ memories: [lowScore, highScore] });
|
||||
}) as typeof api.get);
|
||||
|
||||
expect(mockGet).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-1/memories?scope=LOCAL"
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByLabelText('Search memories'));
|
||||
fireEvent.change(screen.getByLabelText('Search memories'), {
|
||||
target: { value: 'something' },
|
||||
});
|
||||
|
||||
await waitFor(
|
||||
() => {
|
||||
const rows = screen.getAllByTestId(/^memory-row-/);
|
||||
// First row should be the high-score one
|
||||
expect(rows[0].getAttribute('data-testid')).toBe('memory-row-high');
|
||||
},
|
||||
{ timeout: 1500 },
|
||||
);
|
||||
});
|
||||
|
||||
it('clear-button resets the query', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByLabelText('Search memories'));
|
||||
|
||||
fireEvent.change(screen.getByLabelText('Search memories'), {
|
||||
target: { value: 'foo' },
|
||||
});
|
||||
fireEvent.click(screen.getByLabelText('Clear search'));
|
||||
expect((screen.getByLabelText('Search memories') as HTMLInputElement).value).toBe('');
|
||||
});
|
||||
|
||||
it('renders no-results empty-state when search has no matches', async () => {
|
||||
stubFetch([]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByLabelText('Search memories'));
|
||||
fireEvent.change(screen.getByLabelText('Search memories'), {
|
||||
target: { value: 'nothing' },
|
||||
});
|
||||
await waitFor(
|
||||
() => {
|
||||
expect(screen.getByText(/No memories match your search/i)).toBeTruthy();
|
||||
},
|
||||
{ timeout: 1500 },
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ── Per-row badges ───────────────────────────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — row badges', () => {
|
||||
it('renders kind, source, pin, TTL badges per shape', async () => {
|
||||
stubFetch([MEM_PINNED, MEM_RUNTIME_CHECKPOINT]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
|
||||
await waitFor(() => {
|
||||
// Pinned memory: kind=summary, source=user, pin=true, TTL>0
|
||||
const pinnedRow = screen.getByTestId('memory-row-mem-pinned');
|
||||
expect(pinnedRow.querySelector('[data-testid="kind-badge"]')?.textContent).toBe('S');
|
||||
expect(pinnedRow.querySelector('[data-testid="source-badge"]')?.textContent).toBe('user');
|
||||
expect(pinnedRow.querySelector('[data-testid="pin-badge"]')).toBeTruthy();
|
||||
expect(pinnedRow.querySelector('[data-testid="ttl-badge"]')?.textContent).toMatch(/^⌛\d+[hd]$/);
|
||||
|
||||
// Checkpoint memory: kind=checkpoint, source=runtime, no pin, no TTL
|
||||
const propRow = screen.getByTestId('memory-row-mem-checkpoint');
|
||||
expect(propRow.querySelector('[data-testid="kind-badge"]')?.textContent).toBe('C');
|
||||
expect(propRow.querySelector('[data-testid="source-badge"]')?.textContent).toBe('runtime');
|
||||
expect(propRow.querySelector('[data-testid="pin-badge"]')).toBeNull();
|
||||
expect(propRow.querySelector('[data-testid="ttl-badge"]')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
it('TTL badge shows "expired" for past expires_at', async () => {
|
||||
stubFetch([MEM_EXPIRED]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => {
|
||||
const row = screen.getByTestId('memory-row-mem-expired');
|
||||
expect(row.querySelector('[data-testid="ttl-badge"]')?.textContent).toBe('⌛expired');
|
||||
});
|
||||
});
|
||||
|
||||
it('expanding a row shows full content + Forget button', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByTestId('memory-row-mem-a'));
|
||||
|
||||
const row = screen.getByTestId('memory-row-mem-a');
|
||||
const headerButton = row.querySelector('button');
|
||||
expect(headerButton).toBeTruthy();
|
||||
fireEvent.click(headerButton!);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByLabelText('Forget memory')).toBeTruthy();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ── Delete (Forget) flow ──────────────────────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — forget flow', () => {
|
||||
it('opens the confirm dialog on Forget click and removes optimistically on confirm', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
mockDel.mockResolvedValue({ status: 'deleted' });
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
|
||||
// Expand row, click Forget
|
||||
await waitFor(() => screen.getByTestId('memory-row-mem-a'));
|
||||
const row = screen.getByTestId('memory-row-mem-a');
|
||||
fireEvent.click(row.querySelector('button')!);
|
||||
await waitFor(() => screen.getByLabelText('Forget memory'));
|
||||
fireEvent.click(screen.getByLabelText('Forget memory'));
|
||||
|
||||
// Dialog appears with v2-shaped copy (Forget, not Delete)
|
||||
expect(screen.getByTestId('dialog-title').textContent).toBe('Forget memory');
|
||||
fireEvent.click(screen.getByText('Confirm'));
|
||||
|
||||
// Optimistic removal happens immediately
|
||||
await waitFor(() => {
|
||||
expect(screen.queryByTestId('memory-row-mem-a')).toBeNull();
|
||||
});
|
||||
// DELETE called with the right path
|
||||
await waitFor(() => {
|
||||
const delPaths = mockDel.mock.calls.map((c) => c[0] as string);
|
||||
expect(delPaths.some((p) => p.includes('/v2/memories/mem-a'))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
it('cancelling the dialog leaves the row in place', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByTestId('memory-row-mem-a'));
|
||||
|
||||
fireEvent.click(screen.getByTestId('memory-row-mem-a').querySelector('button')!);
|
||||
await waitFor(() => screen.getByLabelText('Forget memory'));
|
||||
fireEvent.click(screen.getByLabelText('Forget memory'));
|
||||
fireEvent.click(screen.getByText('Cancel'));
|
||||
|
||||
expect(screen.queryByTestId('memory-row-mem-a')).toBeTruthy();
|
||||
expect(mockDel).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('rolls back on server failure by reloading entries', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
mockDel.mockRejectedValue(new Error('upstream 502'));
|
||||
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByTestId('memory-row-mem-a'));
|
||||
fireEvent.click(screen.getByTestId('memory-row-mem-a').querySelector('button')!);
|
||||
await waitFor(() => screen.getByLabelText('Forget memory'));
|
||||
fireEvent.click(screen.getByLabelText('Forget memory'));
|
||||
fireEvent.click(screen.getByText('Confirm'));
|
||||
|
||||
// After failure, error banner surfaces + reload re-fetches memories
|
||||
await waitFor(() => {
|
||||
const alerts = screen.getAllByRole('alert');
|
||||
const found = alerts.some((a) => a.textContent?.includes('upstream 502'));
|
||||
expect(found).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ── Empty state when no memories at all ────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — empty state', () => {
|
||||
it('renders the "no memories yet" empty state when not searching', async () => {
|
||||
stubFetch([]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText('No memories yet')).toBeTruthy();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ── Refresh ─────────────────────────────────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — refresh', () => {
|
||||
it('Refresh button refetches memories', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByLabelText('Refresh memories'));
|
||||
|
||||
const before = mockGet.mock.calls.filter((c) =>
|
||||
(c[0] as string).includes('/v2/memories'),
|
||||
).length;
|
||||
fireEvent.click(screen.getByLabelText('Refresh memories'));
|
||||
|
||||
await waitFor(() => {
|
||||
const after = mockGet.mock.calls.filter((c) =>
|
||||
(c[0] as string).includes('/v2/memories'),
|
||||
).length;
|
||||
expect(after).toBe(before + 1);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -7,7 +7,7 @@ import { api } from "@/lib/api";
|
||||
import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
|
||||
import { useSocketEvent } from "@/hooks/useSocketEvent";
|
||||
import { type ChatMessage, type ChatAttachment, createMessage, appendMessageDeduped } from "./chat/types";
|
||||
import { uploadChatFiles, downloadChatFile } from "./chat/uploads";
|
||||
import { uploadChatFiles, downloadChatFile, isPlatformAttachment } from "./chat/uploads";
|
||||
import { AttachmentChip, PendingAttachmentPill } from "./chat/AttachmentViews";
|
||||
import { extractFilesFromTask } from "./chat/message-parser";
|
||||
import { AgentCommsPanel } from "./chat/AgentCommsPanel";
|
||||
@@ -286,6 +286,14 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [confirmRestart, setConfirmRestart] = useState(false);
|
||||
const bottomRef = useRef<HTMLDivElement>(null);
|
||||
// First-mount scroll-to-bottom needs `behavior: "instant"` — long
|
||||
// conversations smooth-animate for ~300ms which any concurrent
|
||||
// re-render can interrupt, leaving the user stuck mid-conversation
|
||||
// when the chat tab opens. Subsequent appends (new agent messages)
|
||||
// keep `smooth` for the visual "landing" feel. Flipped the first
|
||||
// time messages.length goes positive, so a workspace switch (which
|
||||
// remounts ChatTab) gets a fresh instant jump too.
|
||||
const hasInitialScrollRef = useRef(false);
|
||||
// Lazy-load older history on scroll-up.
|
||||
// - containerRef = the scrollable messages viewport
|
||||
// - topRef = sentinel above the messages list; IO observes it
|
||||
@@ -545,6 +553,15 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
scrollAnchorRef.current = null;
|
||||
return;
|
||||
}
|
||||
// Instant on first arrival of messages — smooth-scroll on a long
|
||||
// conversation gets interrupted by concurrent renders and leaves
|
||||
// the user stuck in the middle. After the first jump, subsequent
|
||||
// appends animate as before.
|
||||
if (!hasInitialScrollRef.current && messages.length > 0) {
|
||||
hasInitialScrollRef.current = true;
|
||||
bottomRef.current?.scrollIntoView({ behavior: "instant" as ScrollBehavior });
|
||||
return;
|
||||
}
|
||||
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
|
||||
}, [messages]);
|
||||
|
||||
@@ -1044,7 +1061,77 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
: "dark:prose-invert dark:[--tw-prose-invert-body:theme(colors.zinc.100)] dark:[--tw-prose-invert-headings:theme(colors.white)] dark:[--tw-prose-invert-bold:theme(colors.white)] dark:[--tw-prose-invert-code:theme(colors.zinc.100)]"
|
||||
}`}
|
||||
>
|
||||
<ReactMarkdown remarkPlugins={[remarkGfm]}>{msg.content}</ReactMarkdown>
|
||||
<ReactMarkdown
|
||||
remarkPlugins={[remarkGfm]}
|
||||
components={{
|
||||
// Default ReactMarkdown renders `<a href="...">`
|
||||
// with no target and no scheme handling, so:
|
||||
//
|
||||
// 1. http/https links navigate the canvas tab
|
||||
// itself away — user loses canvas state.
|
||||
// 2. workspace://, file://, and bare /workspace/
|
||||
// paths from agent-authored markdown produce
|
||||
// an unhandled-protocol click → browser ends
|
||||
// up at about:blank with no download (the
|
||||
// reported bug from 2026-05-05).
|
||||
//
|
||||
// Override: external URLs open in a new tab with
|
||||
// rel="noopener noreferrer"; in-container paths
|
||||
// route through downloadChatFile so the browser
|
||||
// gets a real Blob with proper auth headers.
|
||||
a: ({ href, children, ...rest }) => {
|
||||
const url = String(href ?? "");
|
||||
// Use the SSOT helper isPlatformAttachment so
|
||||
// the markdown link override and the chip
|
||||
// download path agree on which schemes need
|
||||
// auth-routed download. Pre-fix this list was
|
||||
// duplicated and missed `platform-pending:`,
|
||||
// producing about:blank for poll-mode uploads.
|
||||
if (isPlatformAttachment(url)) {
|
||||
return (
|
||||
<a
|
||||
href={url}
|
||||
{...rest}
|
||||
onClick={(e) => {
|
||||
e.preventDefault();
|
||||
// Construct a synthetic ChatAttachment
|
||||
// and route through the same
|
||||
// authenticated download path the
|
||||
// download chips use. Filename is the
|
||||
// last path segment so Save-As prefills
|
||||
// sensibly.
|
||||
const name = url.split(/[\\/]/).pop() || "download";
|
||||
downloadChatFile(workspaceId, {
|
||||
uri: url,
|
||||
name,
|
||||
}).catch((err) => {
|
||||
setError(
|
||||
err instanceof Error
|
||||
? `Download failed: ${err.message}`
|
||||
: "Download failed",
|
||||
);
|
||||
});
|
||||
}}
|
||||
>
|
||||
{children}
|
||||
</a>
|
||||
);
|
||||
}
|
||||
// External (http(s) / mailto / unknown scheme):
|
||||
// open in new tab so canvas state survives.
|
||||
return (
|
||||
<a
|
||||
href={url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
{...rest}
|
||||
>
|
||||
{children}
|
||||
</a>
|
||||
);
|
||||
},
|
||||
}}
|
||||
>{msg.content}</ReactMarkdown>
|
||||
</div>
|
||||
)}
|
||||
{msg.attachments && msg.attachments.length > 0 && (
|
||||
@@ -1150,7 +1237,22 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
value={input}
|
||||
onChange={(e) => setInput(e.target.value)}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter" && !e.shiftKey) {
|
||||
// IME-safe send: while a CJK / Japanese / Korean IME is
|
||||
// composing, Enter accepts the candidate selection — not a
|
||||
// newline, not a send. `e.nativeEvent.isComposing` is the
|
||||
// standard signal (modern WebKit/Blink/Gecko); the keyCode
|
||||
// 229 fallback covers older Safari / WebKit-based mobile
|
||||
// browsers that delay setting isComposing on the
|
||||
// composition-end Enter. Reported 2026-05-05: typing
|
||||
// Chinese with the system IME, pressing Enter to commit
|
||||
// a candidate would inadvertently send the half-typed
|
||||
// message.
|
||||
if (
|
||||
e.key === "Enter" &&
|
||||
!e.shiftKey &&
|
||||
!e.nativeEvent.isComposing &&
|
||||
e.keyCode !== 229
|
||||
) {
|
||||
e.preventDefault();
|
||||
sendMessage();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,141 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Pins two regressions reported on production 2026-05-05:
|
||||
//
|
||||
// 1. IME composition + Enter key: typing Chinese (or any CJK / IME-
|
||||
// composed text) and pressing Enter to commit the candidate
|
||||
// selection used to send the half-typed message. The fix checks
|
||||
// `event.nativeEvent.isComposing` (and a `keyCode === 229`
|
||||
// fallback for older WebKit) before treating Enter as send.
|
||||
//
|
||||
// 2. Markdown link clicks: the agent's ReactMarkdown-rendered links
|
||||
// used to:
|
||||
// - http/https → navigate canvas tab away (user lost canvas state)
|
||||
// - workspace://path / file:///workspace/... / /workspace/... →
|
||||
// browser hit about:blank (unhandled protocol).
|
||||
// Fix: external links get target="_blank" + noopener; in-container
|
||||
// paths route through downloadChatFile (same auth path as chips).
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, fireEvent, waitFor } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
// Mock the api module so render doesn't try to talk to a real CP.
|
||||
const apiGet = vi.fn((_path: string): Promise<unknown> => Promise.resolve([]));
|
||||
const apiPost = vi.fn((_path: string, _body: unknown): Promise<unknown> => Promise.resolve({}));
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
post: (path: string, body: unknown) => apiPost(path, body),
|
||||
del: vi.fn(),
|
||||
patch: vi.fn(),
|
||||
put: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: vi.fn((selector?: (s: unknown) => unknown) =>
|
||||
selector ? selector({ agentMessages: {}, consumeAgentMessages: () => [] }) : {},
|
||||
),
|
||||
}));
|
||||
|
||||
// Capture the downloadChatFile call so the markdown-link test can
|
||||
// assert in-container paths route through the authenticated download
|
||||
// path rather than the browser's bare anchor click.
|
||||
const downloadChatFileMock = vi.fn((_workspaceId: string, _att: { uri: string; name: string }) => Promise.resolve());
|
||||
vi.mock("../chat/uploads", async () => {
|
||||
const actual = await vi.importActual<typeof import("../chat/uploads")>("../chat/uploads");
|
||||
return {
|
||||
...actual,
|
||||
downloadChatFile: (workspaceId: string, att: { uri: string; name: string }) =>
|
||||
downloadChatFileMock(workspaceId, att),
|
||||
};
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockClear();
|
||||
apiPost.mockClear();
|
||||
downloadChatFileMock.mockClear();
|
||||
// jsdom doesn't implement scrollIntoView; ChatTab calls it after
|
||||
// every render with a new message.
|
||||
Element.prototype.scrollIntoView = vi.fn();
|
||||
// Stub IntersectionObserver — the lazy-history sentinel uses it.
|
||||
class FakeIO {
|
||||
observe() {}
|
||||
unobserve() {}
|
||||
disconnect() {}
|
||||
}
|
||||
(window as unknown as { IntersectionObserver: unknown }).IntersectionObserver = FakeIO;
|
||||
(globalThis as unknown as { IntersectionObserver: unknown }).IntersectionObserver = FakeIO;
|
||||
});
|
||||
|
||||
import { ChatTab } from "../ChatTab";
|
||||
|
||||
const minimalData = {
|
||||
status: "online" as const,
|
||||
runtime: "claude-code",
|
||||
currentTask: null,
|
||||
} as unknown as Parameters<typeof ChatTab>[0]["data"];
|
||||
|
||||
describe("ChatTab — IME-safe Enter key", () => {
|
||||
it("does NOT send the message when Enter fires during IME composition (isComposing)", async () => {
|
||||
render(<ChatTab workspaceId="ws-ime" data={minimalData} />);
|
||||
|
||||
// Find the textarea by its aria-label.
|
||||
const textarea = await screen.findByLabelText(/Message to agent/i);
|
||||
fireEvent.change(textarea, { target: { value: "你好" } });
|
||||
|
||||
// Simulate the Enter that commits an IME selection: isComposing=true.
|
||||
fireEvent.keyDown(textarea, { key: "Enter", isComposing: true });
|
||||
|
||||
// sendMessage POSTs via api.post; assert it was NOT called.
|
||||
await waitFor(() => {
|
||||
expect(apiPost).not.toHaveBeenCalled();
|
||||
});
|
||||
// And the input is preserved — ChatTab clears it only on actual send.
|
||||
expect((textarea as HTMLTextAreaElement).value).toBe("你好");
|
||||
});
|
||||
|
||||
it("does NOT send when keyCode is 229 (older Safari IME fallback)", async () => {
|
||||
render(<ChatTab workspaceId="ws-ime2" data={minimalData} />);
|
||||
const textarea = await screen.findByLabelText(/Message to agent/i);
|
||||
fireEvent.change(textarea, { target: { value: "한국어" } });
|
||||
|
||||
// keyCode 229 is the older-Safari signal that an IME is composing.
|
||||
// Some mobile WebKit-based browsers delay setting isComposing on
|
||||
// the composition-end Enter; the keyCode fallback covers that.
|
||||
fireEvent.keyDown(textarea, { key: "Enter", keyCode: 229 });
|
||||
|
||||
await waitFor(() => {
|
||||
expect(apiPost).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
it("DOES send on a non-composing Enter (the happy path stays intact)", async () => {
|
||||
render(<ChatTab workspaceId="ws-ok" data={minimalData} />);
|
||||
const textarea = await screen.findByLabelText(/Message to agent/i);
|
||||
fireEvent.change(textarea, { target: { value: "hello world" } });
|
||||
|
||||
fireEvent.keyDown(textarea, { key: "Enter" /* no isComposing, no 229 */ });
|
||||
|
||||
// The api.post for /a2a fires inside sendMessage. waitFor since
|
||||
// the call goes through several effects.
|
||||
await waitFor(() => {
|
||||
expect(apiPost).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
it("Shift+Enter inserts newline regardless (no send)", async () => {
|
||||
render(<ChatTab workspaceId="ws-shift" data={minimalData} />);
|
||||
const textarea = await screen.findByLabelText(/Message to agent/i);
|
||||
fireEvent.change(textarea, { target: { value: "line 1" } });
|
||||
|
||||
fireEvent.keyDown(textarea, { key: "Enter", shiftKey: true });
|
||||
|
||||
await waitFor(() => {
|
||||
expect(apiPost).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,220 +0,0 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Pins the Edit affordance added to MemoryTab. Until this PR the Memory tab
|
||||
// was Add+Delete only; an entry that needed correction had to be deleted and
|
||||
// re-added — losing the version-counter and any in-flight optimistic-locking
|
||||
// invariants other writers depend on.
|
||||
//
|
||||
// Each test pins one branch of the new flow. If any fails, the bug is back.
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
const apiGet = vi.fn();
|
||||
const apiPost = vi.fn();
|
||||
const apiDel = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
post: (path: string, body: unknown) => apiPost(path, body),
|
||||
del: (path: string) => apiDel(path),
|
||||
patch: vi.fn(),
|
||||
put: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
import { MemoryTab } from "../MemoryTab";
|
||||
|
||||
const sampleEntries = [
|
||||
{
|
||||
key: "team_brief",
|
||||
value: { goal: "ship v2" },
|
||||
version: 3,
|
||||
expires_at: null,
|
||||
updated_at: "2026-05-04T10:00:00Z",
|
||||
},
|
||||
{
|
||||
key: "plain_note",
|
||||
value: "raw text note",
|
||||
version: 1,
|
||||
expires_at: "2099-01-01T00:00:00Z",
|
||||
updated_at: "2026-05-04T10:01:00Z",
|
||||
},
|
||||
];
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiPost.mockReset();
|
||||
apiDel.mockReset();
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === "/workspaces/ws-test/memory") {
|
||||
return Promise.resolve(sampleEntries);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
});
|
||||
|
||||
async function renderAndExpand(key: string) {
|
||||
render(<MemoryTab workspaceId="ws-test" />);
|
||||
await waitFor(() => expect(apiGet).toHaveBeenCalled());
|
||||
// Reveal the Advanced section that hosts the entry list.
|
||||
const showAdvanced = await screen.findByRole("button", { name: "Show" });
|
||||
fireEvent.click(showAdvanced);
|
||||
// Expand the row.
|
||||
const row = await screen.findByRole("button", { name: new RegExp(key) });
|
||||
fireEvent.click(row);
|
||||
}
|
||||
|
||||
describe("MemoryTab Edit affordance", () => {
|
||||
it("Edit button appears once a row is expanded", async () => {
|
||||
await renderAndExpand("team_brief");
|
||||
expect(screen.getAllByRole("button", { name: "Edit" }).length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("clicking Edit on a JSON-valued entry pre-fills the textarea with pretty JSON", async () => {
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = (await screen.findByLabelText(
|
||||
"Edit value for team_brief",
|
||||
)) as HTMLTextAreaElement;
|
||||
expect(textarea.value).toBe('{\n "goal": "ship v2"\n}');
|
||||
});
|
||||
|
||||
it("clicking Edit on a string-valued entry pre-fills raw (no surrounding quotes)", async () => {
|
||||
await renderAndExpand("plain_note");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = (await screen.findByLabelText(
|
||||
"Edit value for plain_note",
|
||||
)) as HTMLTextAreaElement;
|
||||
expect(textarea.value).toBe("raw text note");
|
||||
});
|
||||
|
||||
it("Save POSTs with if_match_version + parsed value, then reloads", async () => {
|
||||
apiPost.mockResolvedValue({ status: "ok", key: "team_brief", version: 4 });
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for team_brief");
|
||||
fireEvent.change(textarea, { target: { value: '{"goal":"ship v3"}' } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
expect(apiPost).toHaveBeenCalledWith("/workspaces/ws-test/memory", {
|
||||
key: "team_brief",
|
||||
value: { goal: "ship v3" },
|
||||
if_match_version: 3,
|
||||
});
|
||||
// Reload after save → second GET.
|
||||
await waitFor(() => expect(apiGet).toHaveBeenCalledTimes(2));
|
||||
});
|
||||
|
||||
it("Save with non-JSON text falls back to plain string", async () => {
|
||||
apiPost.mockResolvedValue({ status: "ok" });
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for team_brief");
|
||||
fireEvent.change(textarea, { target: { value: "free-form note" } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
expect(apiPost.mock.calls[0][1].value).toBe("free-form note");
|
||||
});
|
||||
|
||||
it("TTL field is forwarded as ttl_seconds when set", async () => {
|
||||
apiPost.mockResolvedValue({ status: "ok" });
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const ttlInput = await screen.findByLabelText("Edit TTL for team_brief");
|
||||
fireEvent.change(ttlInput, { target: { value: "3600" } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
expect(apiPost.mock.calls[0][1].ttl_seconds).toBe(3600);
|
||||
});
|
||||
|
||||
it("blank/zero/non-numeric TTL is omitted from the payload", async () => {
|
||||
apiPost.mockResolvedValue({ status: "ok" });
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const ttlInput = await screen.findByLabelText("Edit TTL for team_brief");
|
||||
// Junk + zero both must drop out — payload must not contain ttl_seconds.
|
||||
fireEvent.change(ttlInput, { target: { value: "abc" } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
expect(apiPost.mock.calls[0][1]).not.toHaveProperty("ttl_seconds");
|
||||
});
|
||||
|
||||
it("Cancel discards edits and restores the rendered value", async () => {
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for team_brief");
|
||||
fireEvent.change(textarea, { target: { value: '{"goal":"discarded"}' } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Cancel" }));
|
||||
|
||||
expect(apiPost).not.toHaveBeenCalled();
|
||||
// Editor is gone; the JSON pre-block is back.
|
||||
expect(screen.queryByLabelText("Edit value for team_brief")).toBeNull();
|
||||
expect(screen.getAllByText(/"goal": "ship v2"/i).length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("409 response surfaces a retry hint and reloads", async () => {
|
||||
apiPost.mockRejectedValueOnce(
|
||||
new Error("HTTP 409: if_match_version mismatch"),
|
||||
);
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for team_brief");
|
||||
fireEvent.change(textarea, { target: { value: '{"goal":"ship v3"}' } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
const alert = await screen.findByRole("alert");
|
||||
expect(alert.textContent).toMatch(/changed since you opened it/i);
|
||||
// Initial mount load + post-conflict reload.
|
||||
await waitFor(() => expect(apiGet).toHaveBeenCalledTimes(2));
|
||||
});
|
||||
|
||||
it("non-409 error surfaces the message and does not reload", async () => {
|
||||
apiPost.mockRejectedValueOnce(new Error("boom"));
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
const alert = await screen.findByRole("alert");
|
||||
expect(alert.textContent).toBe("boom");
|
||||
// Only the initial mount load — no retry reload.
|
||||
expect(apiGet).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("entry with no version omits if_match_version (back-compat with older shape)", async () => {
|
||||
// Pre-version-counter shape: drop the `version` field from the row.
|
||||
apiGet.mockReset();
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === "/workspaces/ws-test/memory") {
|
||||
return Promise.resolve([
|
||||
{
|
||||
key: "old_entry",
|
||||
value: "legacy",
|
||||
expires_at: null,
|
||||
updated_at: "2026-05-04T10:00:00Z",
|
||||
},
|
||||
]);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked: ${path}`));
|
||||
});
|
||||
apiPost.mockResolvedValue({ status: "ok" });
|
||||
|
||||
await renderAndExpand("old_entry");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for old_entry");
|
||||
fireEvent.change(textarea, { target: { value: "updated" } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
const payload = apiPost.mock.calls[0][1];
|
||||
expect(payload).not.toHaveProperty("if_match_version");
|
||||
expect(payload.value).toBe("updated");
|
||||
});
|
||||
});
|
||||
@@ -1,6 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useEffect, useMemo, useRef } from "react";
|
||||
import { useState, useEffect, useLayoutEffect, useMemo, useRef, useCallback } from "react";
|
||||
import ReactMarkdown from "react-markdown";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import { api } from "@/lib/api";
|
||||
@@ -184,13 +184,23 @@ function unwrapErrorText(raw: string | null): string {
|
||||
export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
|
||||
const [messages, setMessages] = useState<CommMessage[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [loadError, setLoadError] = useState<string | null>(null);
|
||||
// Dedup by timestamp+type+peer to handle API load + WebSocket race
|
||||
const seenKeys = useRef(new Set<string>());
|
||||
const bottomRef = useRef<HTMLDivElement>(null);
|
||||
// Mirrors the my-chat scroll behaviour from ChatTab (PR #2903) —
|
||||
// smooth-scroll on a long history gets interrupted by concurrent
|
||||
// renders and lands the panel mid-conversation. Switch the first
|
||||
// arrival to instant; subsequent appends animate.
|
||||
const hasInitialScrollRef = useRef(false);
|
||||
|
||||
// Load history
|
||||
useEffect(() => {
|
||||
// Load history. Extracted so the error-state retry button can
|
||||
// re-invoke without remount. ChatTab uses the same shape
|
||||
// (loadInitial → loadError state → retry button).
|
||||
const loadInitial = useCallback(() => {
|
||||
setLoading(true);
|
||||
setLoadError(null);
|
||||
seenKeys.current.clear();
|
||||
api.get<ActivityEntry[]>(`/workspaces/${workspaceId}/activity?source=agent&limit=50`)
|
||||
.then((entries) => {
|
||||
const filtered = (entries ?? [])
|
||||
@@ -234,10 +244,15 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
|
||||
// the .then body) — the panel just sat on the empty state
|
||||
// with zero signal.
|
||||
console.warn("AgentCommsPanel: load activity failed", err);
|
||||
setLoadError(err instanceof Error ? err.message : String(err));
|
||||
setLoading(false);
|
||||
});
|
||||
}, [workspaceId]);
|
||||
|
||||
useEffect(() => {
|
||||
loadInitial();
|
||||
}, [loadInitial]);
|
||||
|
||||
// Live updates routed through the global ReconnectingSocket. The
|
||||
// previous pattern of `new WebSocket(WS_URL)` per panel had no
|
||||
// onclose / no reconnect, so any drop (idle timeout, browser
|
||||
@@ -358,7 +373,18 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
|
||||
} catch { /* ignore */ }
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
// useLayoutEffect (not useEffect) so the scroll runs BEFORE paint —
|
||||
// otherwise the user sees the panel jump for one frame on every
|
||||
// append. Mirrors ChatTab's MyChatPanel scroll block.
|
||||
useLayoutEffect(() => {
|
||||
if (!hasInitialScrollRef.current && messages.length > 0) {
|
||||
// Instant on first arrival — smooth-scroll on a long history
|
||||
// gets interrupted by concurrent renders and lands the panel
|
||||
// mid-conversation (the chat-opens-in-middle bug class).
|
||||
hasInitialScrollRef.current = true;
|
||||
bottomRef.current?.scrollIntoView({ behavior: "instant" as ScrollBehavior });
|
||||
return;
|
||||
}
|
||||
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
|
||||
}, [messages]);
|
||||
|
||||
@@ -366,6 +392,27 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
|
||||
return <div className="text-xs text-ink-soft text-center py-8">Loading agent communications...</div>;
|
||||
}
|
||||
|
||||
if (loadError !== null && messages.length === 0) {
|
||||
// Mirrors ChatTab my-chat error UI — surfaces the load failure
|
||||
// with a retry button instead of silently rendering empty state.
|
||||
return (
|
||||
<div
|
||||
role="alert"
|
||||
className="mx-2 mt-2 rounded-lg border border-red-800/50 bg-red-950/30 px-3 py-2.5"
|
||||
>
|
||||
<p className="text-[11px] text-bad mb-1.5">
|
||||
Failed to load agent communications: {loadError}
|
||||
</p>
|
||||
<button
|
||||
onClick={loadInitial}
|
||||
className="text-[10px] px-2 py-0.5 rounded bg-red-800/40 text-bad hover:bg-red-700/50 transition-colors"
|
||||
>
|
||||
Retry
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (messages.length === 0) {
|
||||
return (
|
||||
<div className="text-xs text-ink-soft text-center py-8">
|
||||
|
||||
@@ -0,0 +1,115 @@
|
||||
// @vitest-environment jsdom
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { render, screen, fireEvent, waitFor } from "@testing-library/react";
|
||||
|
||||
// API mock — tests can override per case via apiGetMock.mockImplementationOnce.
|
||||
const apiGetMock = vi.fn<(url: string) => Promise<unknown>>();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (url: string) => apiGetMock(url),
|
||||
},
|
||||
}));
|
||||
|
||||
// useSocketEvent — no-op for these render tests; live updates aren't
|
||||
// what we're verifying here.
|
||||
vi.mock("@/hooks/useSocketEvent", () => ({
|
||||
useSocketEvent: () => {},
|
||||
}));
|
||||
|
||||
// Canvas store — peer name resolution.
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: {
|
||||
getState: () => ({
|
||||
nodes: [
|
||||
{ id: "ws-self", data: { name: "Self" } },
|
||||
{ id: "ws-peer", data: { name: "Peer Agent" } },
|
||||
],
|
||||
}),
|
||||
},
|
||||
}));
|
||||
|
||||
// Toaster shim — AgentCommsPanel imports showToast.
|
||||
vi.mock("../../Toaster", () => ({
|
||||
showToast: vi.fn(),
|
||||
}));
|
||||
|
||||
import { AgentCommsPanel } from "../AgentCommsPanel";
|
||||
|
||||
// jsdom doesn't implement scrollIntoView. Tests that observe the call
|
||||
// install a spy here; tests that don't care still need a no-op stub
|
||||
// so the component doesn't throw.
|
||||
const scrollSpy = vi.fn<(opts?: ScrollIntoViewOptions | boolean) => void>();
|
||||
beforeEach(() => {
|
||||
apiGetMock.mockReset();
|
||||
scrollSpy.mockReset();
|
||||
Element.prototype.scrollIntoView = scrollSpy as unknown as Element["scrollIntoView"];
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("AgentCommsPanel — initial-state parity with ChatTab my-chat", () => {
|
||||
it("shows loading text while history fetch is in flight", () => {
|
||||
apiGetMock.mockReturnValueOnce(new Promise(() => { /* never resolves */ }));
|
||||
render(<AgentCommsPanel workspaceId="ws-self" />);
|
||||
expect(screen.getByText("Loading agent communications...")).toBeDefined();
|
||||
});
|
||||
|
||||
it("renders error UI with a Retry button when the history fetch rejects", async () => {
|
||||
apiGetMock.mockRejectedValueOnce(new Error("network down"));
|
||||
render(<AgentCommsPanel workspaceId="ws-self" />);
|
||||
|
||||
// Wait for the error state to render — loading→error transition is async.
|
||||
const alert = await waitFor(() => screen.getByRole("alert"));
|
||||
expect(alert.textContent).toMatch(/Failed to load agent communications/);
|
||||
expect(alert.textContent).toMatch(/network down/);
|
||||
|
||||
// Retry button must be present and trigger a refetch.
|
||||
const retry = screen.getByRole("button", { name: "Retry" });
|
||||
apiGetMock.mockResolvedValueOnce([]); // success on retry
|
||||
fireEvent.click(retry);
|
||||
|
||||
// Two calls total: initial load + retry. Pin via mock call count.
|
||||
await waitFor(() => expect(apiGetMock.mock.calls.length).toBe(2));
|
||||
});
|
||||
|
||||
it("falls back to empty-state copy when load succeeds with zero rows", async () => {
|
||||
apiGetMock.mockResolvedValueOnce([]);
|
||||
render(<AgentCommsPanel workspaceId="ws-self" />);
|
||||
await waitFor(() =>
|
||||
expect(screen.getByText("No agent-to-agent communications yet.")).toBeDefined(),
|
||||
);
|
||||
});
|
||||
|
||||
it("scrollIntoView is called with behavior=instant on the first message arrival", async () => {
|
||||
apiGetMock.mockResolvedValueOnce([
|
||||
{
|
||||
id: "act-1",
|
||||
activity_type: "a2a_send",
|
||||
source_id: "ws-self",
|
||||
target_id: "ws-peer",
|
||||
method: "message/send",
|
||||
summary: "Delegating",
|
||||
request_body: { message: { parts: [{ text: "hi" }] } },
|
||||
response_body: null,
|
||||
status: "ok",
|
||||
created_at: "2026-04-25T18:00:00Z",
|
||||
},
|
||||
]);
|
||||
render(<AgentCommsPanel workspaceId="ws-self" />);
|
||||
|
||||
// useLayoutEffect is what makes the first call instant — wait for
|
||||
// the panel to render at least one message.
|
||||
await waitFor(() => expect(scrollSpy.mock.calls.length).toBeGreaterThan(0));
|
||||
|
||||
// The pinned contract: SOME call uses behavior: "instant" — the
|
||||
// first-arrival case. Subsequent appends use "smooth", but those
|
||||
// can't fire here (no live update yet).
|
||||
const sawInstant = scrollSpy.mock.calls.some((args) => {
|
||||
const opts = args[0];
|
||||
return typeof opts === "object" && opts !== null && "behavior" in opts && opts.behavior === "instant";
|
||||
});
|
||||
expect(sawInstant).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -44,6 +44,8 @@ export async function uploadChatFiles(
|
||||
* - `workspace:<abs-path>` (our canonical form)
|
||||
* - `file:///workspace/...` (some agents emit this)
|
||||
* - `/workspace/...` (bare absolute path inside the container)
|
||||
* - `platform-pending:<wsid>/<file_id>` (poll-mode upload, staged
|
||||
* on platform side; resolves to /pending-uploads/<file_id>/content)
|
||||
* Everything that looks like an allowed-root container path is
|
||||
* rewritten to the authenticated /chat/download endpoint. HTTP(S)
|
||||
* URIs pass through unchanged so we can also render links to
|
||||
@@ -53,6 +55,35 @@ export function resolveAttachmentHref(
|
||||
workspaceId: string,
|
||||
uri: string,
|
||||
): string {
|
||||
// platform-pending: agents-emitted URI that lives in the platform-side
|
||||
// staging layer (poll-mode chat uploads, see workspace-server's
|
||||
// chat_files.go ~line 690 + pendinguploads.Storage). The wire shape
|
||||
// is `platform-pending:<workspace_id>/<file_id>`. Resolving it
|
||||
// requires hitting GET /workspaces/<wsid>/pending-uploads/<file_id>/content
|
||||
// which streams the bytes with full workspace auth. Without this
|
||||
// case the browser sees an unhandled-protocol click → about:blank,
|
||||
// which was the user-visible bug from 2026-05-05 (reno-stars).
|
||||
if (uri.startsWith("platform-pending:")) {
|
||||
const rest = uri.slice("platform-pending:".length);
|
||||
const slash = rest.indexOf("/");
|
||||
// Defensive: if the URI doesn't have the expected wsid/fileid
|
||||
// shape, fall through to raw-URI handling so the consumer can
|
||||
// still try to render it (rather than producing a broken /pending-
|
||||
// uploads/// path).
|
||||
if (slash > 0) {
|
||||
const wsid = rest.slice(0, slash);
|
||||
const fileID = rest.slice(slash + 1);
|
||||
if (wsid && fileID) {
|
||||
// Use the URI's own workspace_id (the bytes live in THAT
|
||||
// workspace's pending-uploads store), not the chat's
|
||||
// workspace_id — these CAN differ when a user drags a file
|
||||
// into one workspace's chat that gets forwarded to another
|
||||
// (cross-workspace delegation, agent forwarding).
|
||||
return `${PLATFORM_URL}/workspaces/${wsid}/pending-uploads/${fileID}/content`;
|
||||
}
|
||||
}
|
||||
return uri;
|
||||
}
|
||||
const containerPath = normalizeWorkspaceUri(uri);
|
||||
if (containerPath) {
|
||||
return `${PLATFORM_URL}/workspaces/${workspaceId}/chat/download?path=${encodeURIComponent(containerPath)}`;
|
||||
@@ -60,6 +91,14 @@ export function resolveAttachmentHref(
|
||||
return uri;
|
||||
}
|
||||
|
||||
/** Returns true when the URI points at a platform-side resource that
|
||||
* requires our auth headers — caller should route through
|
||||
* downloadChatFile rather than letting the browser navigate. */
|
||||
export function isPlatformAttachment(uri: string): boolean {
|
||||
if (uri.startsWith("platform-pending:")) return true;
|
||||
return normalizeWorkspaceUri(uri) !== null;
|
||||
}
|
||||
|
||||
/** Extracts the absolute container path from a workspace-scoped URI,
|
||||
* or null if the URI isn't a container path. The matching roots
|
||||
* mirror the server's `allowedRoots` allowlist. */
|
||||
@@ -96,8 +135,7 @@ export async function downloadChatFile(
|
||||
attachment: ChatAttachment,
|
||||
): Promise<void> {
|
||||
const href = resolveAttachmentHref(workspaceId, attachment.uri);
|
||||
const isContainerPath = normalizeWorkspaceUri(attachment.uri) !== null;
|
||||
if (!isContainerPath) {
|
||||
if (!isPlatformAttachment(attachment.uri)) {
|
||||
// External URL — let the browser navigate. Opens in new tab so
|
||||
// the canvas context survives a navigation. `href` here is the
|
||||
// raw URI (http(s), or anything else the agent sent back).
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
* @vitest-environment jsdom
|
||||
*/
|
||||
import { describe, it, expect, vi, afterEach } from "vitest";
|
||||
import { fetchSession, redirectToLogin } from "../auth";
|
||||
import { fetchSession, redirectToLogin, signOut } from "../auth";
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
@@ -110,3 +110,157 @@ describe("redirectToLogin", () => {
|
||||
expect((window.location as unknown as { href: string }).href).toBe(signupHref);
|
||||
});
|
||||
});
|
||||
|
||||
describe("signOut", () => {
|
||||
// Helper — most tests need the same window.location stub.
|
||||
function stubLocation(): void {
|
||||
Object.defineProperty(window, "location", {
|
||||
writable: true,
|
||||
value: {
|
||||
href: "https://acme.moleculesai.app/orgs",
|
||||
pathname: "/orgs",
|
||||
hostname: "acme.moleculesai.app",
|
||||
protocol: "https:",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
it("POSTs to /cp/auth/signout with credentials:include", async () => {
|
||||
stubLocation();
|
||||
const fetchMock = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({ ok: true, logout_url: "" }),
|
||||
});
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
await signOut();
|
||||
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||
expect(fetchMock).toHaveBeenCalledWith(
|
||||
expect.stringContaining("/cp/auth/signout"),
|
||||
expect.objectContaining({ method: "POST", credentials: "include" }),
|
||||
);
|
||||
});
|
||||
|
||||
it("navigates to provider logout_url when the response includes one", async () => {
|
||||
// The hosted-logout path is what actually breaks the SSO re-auth
|
||||
// loop reported on PR #2913. Without this, AuthKit's browser
|
||||
// cookie keeps the user signed in via SSO and any subsequent
|
||||
// /cp/auth/login silently re-auths.
|
||||
stubLocation();
|
||||
const hostedLogout =
|
||||
"https://api.workos.com/user_management/sessions/logout?session_id=cookie&return_to=https%3A%2F%2Fapp.moleculesai.app%2Forgs";
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({ ok: true, logout_url: hostedLogout }),
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe(hostedLogout);
|
||||
});
|
||||
|
||||
it("falls back to /cp/auth/login when logout_url is empty (DisabledProvider / dev)", async () => {
|
||||
// DisabledProvider returns "" — the local /cp/auth/login redirect
|
||||
// works in dev/test where there's no SSO session to escape.
|
||||
stubLocation();
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({ ok: true, logout_url: "" }),
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
// Tenant subdomain (acme.moleculesai.app) → auth origin is app.moleculesai.app.
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
|
||||
it("redirects even when the POST fails so the user isn't stuck on an authed page", async () => {
|
||||
// Critical UX invariant: clicking 'Sign out' MUST navigate away from
|
||||
// the authenticated app, even if the network is down or the cookie
|
||||
// is already invalid. Anything else looks like the button is
|
||||
// broken — the precise complaint that triggered this fix.
|
||||
stubLocation();
|
||||
vi.stubGlobal("fetch", vi.fn().mockRejectedValue(new Error("network down")));
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
|
||||
it("redirects on 401 (session already invalid) just like 200", async () => {
|
||||
// A user with an already-invalid cookie should still see the
|
||||
// logout flow complete — no error, no stuck-on-app dead end.
|
||||
// Note: 401 means res.ok=false → we don't read .json() at all,
|
||||
// so a missing body is fine.
|
||||
stubLocation();
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: false,
|
||||
status: 401,
|
||||
json: async () => ({}),
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
|
||||
it("falls back to /cp/auth/login when the response body is malformed", async () => {
|
||||
// Defensive parsing: a body that isn't valid JSON, or doesn't
|
||||
// have logout_url, or has logout_url as the wrong type — none of
|
||||
// these should strand the user on the authed page. Fallback path
|
||||
// takes over.
|
||||
stubLocation();
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => {
|
||||
throw new Error("not json");
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
|
||||
it("falls back to /cp/auth/login when logout_url is the wrong type", async () => {
|
||||
// Even valid JSON should be type-checked: a non-string logout_url
|
||||
// (e.g. server-side bug, version drift) must not crash or open-
|
||||
// redirect the user.
|
||||
stubLocation();
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({ ok: true, logout_url: 42 }),
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -67,3 +67,80 @@ export function redirectToLogin(screenHint: "sign-up" | "sign-in" = "sign-in"):
|
||||
const dest = `${authOrigin}${AUTH_BASE}/${path}?return_to=${encodeURIComponent(returnTo)}`;
|
||||
window.location.href = dest;
|
||||
}
|
||||
|
||||
/**
|
||||
* signOut posts to /cp/auth/signout to clear the WorkOS session cookie
|
||||
* + revoke at the provider, then navigates the browser to the
|
||||
* provider-supplied hosted logout URL (so the provider's BROWSER-side
|
||||
* SSO cookie is cleared too — without this, AuthKit silently re-auths
|
||||
* via SSO on the next /cp/auth/login and the user is "still signed
|
||||
* in" after pressing Sign out).
|
||||
*
|
||||
* Two-layer flow:
|
||||
* 1. POST /cp/auth/signout → CP clears OUR session cookie + revokes
|
||||
* session_id at the provider API. Response includes
|
||||
* `logout_url` — the AuthKit hosted URL the BROWSER must navigate
|
||||
* to so the provider's own browser cookie is cleared.
|
||||
* 2. window.location.href = <logout_url> → AuthKit clears its
|
||||
* session, then redirects the browser to the configured
|
||||
* return_to (defaults to APP_URL/orgs).
|
||||
*
|
||||
* Best-effort by design: a 5xx, network failure, missing logout_url
|
||||
* (DisabledProvider, dev), or stale cookie still results in the
|
||||
* browser navigating away — leaving the user on a logged-in-looking
|
||||
* page after they clicked "Sign out" is the worst possible UX. The
|
||||
* fallback path navigates to /cp/auth/login on the auth origin, which
|
||||
* works correctly in environments without a hosted logout flow (dev,
|
||||
* tests, DisabledProvider).
|
||||
*
|
||||
* Throws nothing — callers can disable the button optimistically or
|
||||
* await this and trust it returns. On a redirect-blocked test
|
||||
* environment (jsdom under vitest) we still exit cleanly so unit tests
|
||||
* can spy on the fetch call.
|
||||
*/
|
||||
export async function signOut(): Promise<void> {
|
||||
let logoutURL: string | undefined;
|
||||
// Fire-and-tolerate the POST. credentials:include is mandatory cross-
|
||||
// origin so the SaaS canvas (acme.moleculesai.app) can hit
|
||||
// app.moleculesai.app/cp/auth/signout with the session cookie.
|
||||
try {
|
||||
const res = await fetch(`${getAuthOrigin()}${AUTH_BASE}/signout`, {
|
||||
method: "POST",
|
||||
credentials: "include",
|
||||
});
|
||||
if (res.ok) {
|
||||
// Body shape: {"ok": true, "logout_url": "..."}. logout_url is
|
||||
// empty for DisabledProvider (dev/local) — we fall back to
|
||||
// /cp/auth/login below. Defensive parsing: a malformed body
|
||||
// shouldn't strand the user on the authed page.
|
||||
const body: unknown = await res.json().catch(() => null);
|
||||
if (
|
||||
body &&
|
||||
typeof body === "object" &&
|
||||
"logout_url" in body &&
|
||||
typeof (body as { logout_url: unknown }).logout_url === "string" &&
|
||||
(body as { logout_url: string }).logout_url
|
||||
) {
|
||||
logoutURL = (body as { logout_url: string }).logout_url;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Ignore — we still redirect below.
|
||||
}
|
||||
if (typeof window === "undefined") return;
|
||||
if (logoutURL) {
|
||||
// Hosted logout: AuthKit clears its SSO cookie + redirects to
|
||||
// return_to (configured server-side). This is the path that
|
||||
// actually breaks the SSO re-auth loop.
|
||||
window.location.href = logoutURL;
|
||||
return;
|
||||
}
|
||||
// Fallback: no hosted logout (dev, DisabledProvider, network
|
||||
// failure). Land on the login screen rather than the current URL:
|
||||
// returning to a tenant URL after signout would just re-redirect
|
||||
// through /cp/auth/login due to AuthGate. Send the user straight
|
||||
// there with no return_to so they don't loop back into the org they
|
||||
// just left.
|
||||
const authOrigin = getAuthOrigin();
|
||||
window.location.href = `${authOrigin}${AUTH_BASE}/login`;
|
||||
}
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
# Team Expansion (Recursive Workspaces)
|
||||
|
||||
When a workspace is expanded into a team, it gains sub-workspaces while its own agent remains as the **team lead** (coordinator). This is recursive — sub-workspaces can themselves be expanded into teams, infinitely deep.
|
||||
|
||||
## How It Works
|
||||
|
||||
When Developer PM is expanded into a team:
|
||||
|
||||
```
|
||||
Business Core
|
||||
|
|
||||
+-- Developer PM (agent stays, becomes coordinator)
|
||||
|
|
||||
+-- Frontend Agent (sub-workspace, private scope)
|
||||
+-- Backend Agent (sub-workspace, private scope)
|
||||
+-- QA Agent (sub-workspace, private scope)
|
||||
```
|
||||
|
||||
- Developer PM's agent **still exists** and acts as coordinator
|
||||
- Developer PM receives incoming A2A messages from Business Core
|
||||
- Developer PM's agent decides how to delegate to sub-workspaces
|
||||
- Sub-workspaces talk to Developer PM and to each other (same level)
|
||||
- Sub-workspaces **cannot** talk to Business Core or any workspace outside the team
|
||||
|
||||
## Communication Rules
|
||||
|
||||
| Direction | Allowed? | Example |
|
||||
|-----------|----------|---------|
|
||||
| Parent level -> team lead | Yes | Business Core -> Developer PM |
|
||||
| Team lead -> sub-workspaces | Yes | Developer PM -> Frontend Agent |
|
||||
| Sub-workspace -> team lead | Yes | Frontend Agent -> Developer PM |
|
||||
| Sub-workspace <-> sibling | Yes | Frontend Agent <-> Backend Agent |
|
||||
| Outside -> sub-workspace directly | No (403) | Business Core -> Frontend Agent |
|
||||
| Sub-workspace -> outside directly | No | Frontend Agent -> Business Core |
|
||||
|
||||
The team lead (Developer PM) is the **only** bridge between the team's internal world and the outside.
|
||||
|
||||
## Scoped Registry
|
||||
|
||||
Sub-workspaces register in the platform registry but with a **private scope**. The registry knows about them but enforces access control.
|
||||
|
||||
```
|
||||
Registry:
|
||||
Business Core :8001 scope: public
|
||||
Developer PM :8002 scope: public
|
||||
Frontend Agent :8010 scope: private, parent=Developer PM
|
||||
Backend Agent :8011 scope: private, parent=Developer PM
|
||||
QA Agent :8012 scope: private, parent=Developer PM
|
||||
```
|
||||
|
||||
- The platform can always discover any workspace (for provisioning, monitoring)
|
||||
- The parent workspace can discover its sub-workspaces
|
||||
- Sub-workspaces can discover their siblings (same parent)
|
||||
- Outside workspaces get a **403 Forbidden** if they try to discover a private sub-workspace
|
||||
|
||||
## How to Expand
|
||||
|
||||
Expansion is triggered via `POST /workspaces/:id/expand`. The platform reads the `sub_workspaces` list from the workspace's config and provisions each one. On the canvas, users right-click a workspace node and select "Expand into team."
|
||||
|
||||
Collapsing is the inverse: `POST /workspaces/:id/collapse`. Sub-workspaces are stopped and removed.
|
||||
|
||||
## What Happens on Expansion
|
||||
|
||||
When Developer PM is expanded into a team, the hierarchy changes but the outside view doesn't. Business Core's parent/child relationship to Developer PM is unaffected — Developer PM still responds to the same A2A endpoint.
|
||||
|
||||
The events fired:
|
||||
- `WORKSPACE_EXPANDED` with the new `sub_workspace_ids` in the payload
|
||||
- `WORKSPACE_PROVISIONING` for each new sub-workspace
|
||||
- `WORKSPACE_ONLINE` for each sub-workspace as they come up
|
||||
|
||||
Communication rules are automatically derived from the new hierarchy — no manual wiring needed.
|
||||
|
||||
## Canvas Behavior
|
||||
|
||||
- Children render as embedded mini-cards (`TeamMemberChip`) inside the parent node, not as separate canvas nodes
|
||||
- Each mini-card shows full status: gradient bar, name, tier badge, skills pills, active tasks, descendant count
|
||||
- **Recursive rendering** up to 3 levels deep (`MAX_NESTING_DEPTH = 3`) — sub-cards can contain their own "Team" sections
|
||||
- Parent node dynamically resizes: 210-280px (no children), 320-450px (children), 400-560px (grandchildren)
|
||||
- Eject button (sky-blue arrow icon) on hover extracts a child from the team
|
||||
- "Extract from Team" also available in the right-click context menu
|
||||
- Double-click a team node to zoom/fit to the parent area
|
||||
- The parent workspace node shows a badge with total descendant count
|
||||
|
||||
## Collapsing a Team
|
||||
|
||||
The inverse of expansion, triggered via `POST /workspaces/:id/collapse`:
|
||||
|
||||
1. Each sub-workspace agent wraps up current work and writes a handoff document to memory
|
||||
2. Sub-workspaces are stopped and removed
|
||||
3. The team lead's agent goes back to handling everything directly
|
||||
4. A `WORKSPACE_COLLAPSED` event fires
|
||||
|
||||
Sub-workspace memory is cleaned up based on backend (see [Memory — Cleanup](../architecture/memory.md#cleanup-on-workspace-deletion)).
|
||||
|
||||
## Deleting a Team Workspace
|
||||
|
||||
When a team workspace is deleted:
|
||||
1. Platform shows a warning listing all sub-workspaces that will be deleted
|
||||
2. User can **drag sub-workspaces out** of the team before confirming (promotes them to the parent level)
|
||||
3. On confirmation, cascade delete removes the parent and all remaining sub-workspaces
|
||||
4. `WORKSPACE_REMOVED` events fire for each deleted workspace
|
||||
|
||||
## Related Docs
|
||||
|
||||
- [Communication Rules](../api-protocol/communication-rules.md) — Full access control model
|
||||
- [Core Concepts](../product/core-concepts.md) — Workspace fundamentals
|
||||
- [System Prompt Structure](./system-prompt-structure.md) — How peer capabilities are injected
|
||||
- [Provisioner](../architecture/provisioner.md) — How sub-workspaces are deployed
|
||||
- [Registry & Heartbeat](../api-protocol/registry-and-heartbeat.md) — How registration works
|
||||
- [Event Log](../architecture/event-log.md) — Events fired during expansion
|
||||
- [Canvas UI](../frontend/canvas.md) — Visual behavior of teams
|
||||
@@ -41,8 +41,6 @@ Full contract: `docs/runbooks/admin-auth.md`.
|
||||
| GET | /admin/workspaces/:id/test-token | admin_test_token.go — mint a fresh bearer token for E2E scripts; returns 404 unless `MOLECULE_ENV != production` or `MOLECULE_ENABLE_TEST_TOKENS=1` |
|
||||
| GET/POST/DELETE | /admin/secrets[/:key] | secrets.go — legacy aliases for /settings/secrets |
|
||||
| WS | /workspaces/:id/terminal | terminal.go |
|
||||
| POST | /workspaces/:id/expand | team.go |
|
||||
| POST | /workspaces/:id/collapse | team.go |
|
||||
| POST/GET | /workspaces/:id/approvals | approvals.go |
|
||||
| POST | /workspaces/:id/approvals/:id/decide | approvals.go |
|
||||
| GET | /approvals/pending | approvals.go |
|
||||
|
||||
@@ -336,8 +336,6 @@ This same logic governs: A2A delegation, memory scope enforcement, activity visi
|
||||
|
||||
| Method | Endpoint | Purpose |
|
||||
|--------|----------|---------|
|
||||
| `POST` | `/workspaces/:id/expand` | Expand workspace into team (become coordinator) |
|
||||
| `POST` | `/workspaces/:id/collapse` | Collapse team back to single workspace |
|
||||
|
||||
### Files, Terminal, Templates, Bundles (8 endpoints)
|
||||
|
||||
|
||||
@@ -186,4 +186,3 @@ So the UI now exposes more operational failure state directly instead of silentl
|
||||
- [Quickstart](../quickstart.md)
|
||||
- [Platform API](../api-protocol/platform-api.md)
|
||||
- [Workspace Runtime](../agent-runtime/workspace-runtime.md)
|
||||
- [Team Expansion](../agent-runtime/team-expansion.md)
|
||||
|
||||
+1
-1
@@ -18,7 +18,7 @@ lands in the watch list with a colliding term, add a row here.
|
||||
| **plugin** | A directory under `plugins/` packaging one or more skills or an MCP server wrapper, installable per-workspace via `POST /workspaces/:id/plugins`. Governed by `plugin.yaml`. | **Langflow**: a visual UI node / component in a flowchart. **CrewAI**: a Python-importable callable registered as a capability. |
|
||||
| **agent** | A persistent containerized workspace running continuously — an identity with memory, a role, and a schedule. Not a one-shot invocation. | Most frameworks (AutoGPT, LangChain agents, OpenAI Assistants): a stateless function-call loop. No persistence between invocations unless explicitly checkpointed. |
|
||||
| **flow** | A task execution within a workspace — a request enters, the agent runs tools, emits a response, logs activity. No explicit graph abstraction. | **Langflow**: a directed graph of nodes you author visually. **LangGraph**: a stateful graph of callable nodes. Our "flow" is an imperative timeline, not a graph. |
|
||||
| **team** | A named cluster of workspaces under a PM (org template `expand_team`). Used for role grouping in Canvas. | **CrewAI**: a "crew" is a sequence of agents that pass a task through a declared order. Our "team" is an org-chart abstraction, not an execution order. |
|
||||
| **team** | A named cluster of workspaces under a PM . Used for role grouping in Canvas. | **CrewAI**: a "crew" is a sequence of agents that pass a task through a declared order. Our "team" is an org-chart abstraction, not an execution order. |
|
||||
| **skill** | A directory with `SKILL.md` that an agent invokes via the `Skill` tool. Skills are documentation + optional scripts that teach an agent a recipe. | **Anthropic Skills API**: nearly identical. **CrewAI tool**: closer to our plugin's MCP tool, not our skill. |
|
||||
| **channel** | An outbound/inbound social integration (Telegram, Slack, …) per-workspace, wired in `workspace_channels`. | Slack's "channel": the container for messages. We use "channel" for the adapter + credentials, not the conversation itself. |
|
||||
| **runtime** | The execution engine image tag for a workspace: one of `langgraph`, `claude-code`, `openclaw`, `crewai`, `autogen`, `deepagents`, `hermes`. | **LangGraph runtime**: the Python process running the graph. We use "runtime" for the Docker image + adapter pairing, not the inner process. |
|
||||
|
||||
@@ -166,8 +166,6 @@ list_workspaces
|
||||
|
||||
| MCP Tool | API Route | Method | Description |
|
||||
|----------|-----------|--------|-------------|
|
||||
| `expand_team` | `/workspaces/:id/expand` | POST | Expand team node |
|
||||
| `collapse_team` | `/workspaces/:id/collapse` | POST | Collapse team node |
|
||||
|
||||
### Templates & Bundles
|
||||
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
# Workspace Runtime PyPI Package
|
||||
|
||||
## Requires Python >= 3.11
|
||||
|
||||
The wheel pins `requires_python>=3.11`. On Python 3.10 or older, `pip install
|
||||
molecule-ai-workspace-runtime` fails with `Could not find a version that
|
||||
satisfies the requirement (from versions: none)` — the pin filters the only
|
||||
available artifact before pip even attempts install. Upgrade the interpreter
|
||||
(`brew install python@3.12` / `apt install python3.12` / etc.) or use a
|
||||
3.11+ venv.
|
||||
|
||||
## Overview
|
||||
|
||||
The shared workspace runtime infrastructure has **one editable source** and
|
||||
|
||||
@@ -55,6 +55,11 @@ TOP_LEVEL_MODULES = {
|
||||
"a2a_executor",
|
||||
"a2a_mcp_server",
|
||||
"a2a_tools",
|
||||
"a2a_tools_delegation",
|
||||
"a2a_tools_inbox",
|
||||
"a2a_tools_memory",
|
||||
"a2a_tools_messaging",
|
||||
"a2a_tools_rbac",
|
||||
"adapter_base",
|
||||
"agent",
|
||||
"agents_md",
|
||||
@@ -75,6 +80,10 @@ TOP_LEVEL_MODULES = {
|
||||
"internal_file_read",
|
||||
"main",
|
||||
"mcp_cli",
|
||||
"mcp_doctor",
|
||||
"mcp_heartbeat",
|
||||
"mcp_inbox_pollers",
|
||||
"mcp_workspace_resolver",
|
||||
"molecule_ai_status",
|
||||
"not_configured_handler",
|
||||
"platform_auth",
|
||||
@@ -283,10 +292,37 @@ directory** by the `publish-runtime` GitHub Actions workflow on every
|
||||
Operators running an agent outside the platform's container fleet
|
||||
(any runtime that supports MCP stdio — Claude Code, hermes, codex,
|
||||
etc.) can install this wheel and run the universal MCP server
|
||||
locally:
|
||||
locally.
|
||||
|
||||
### Requirements
|
||||
|
||||
* **Python ≥3.11.** The wheel sets `requires-python = ">=3.11"`. On
|
||||
older interpreters `pip install` returns the cryptic
|
||||
`Could not find a version that satisfies the requirement` — that
|
||||
message is pip filtering this wheel out, NOT the package missing
|
||||
from PyPI. Upgrade with `brew install python@3.12` /
|
||||
`apt install python3.12` / `pyenv install 3.12` first.
|
||||
* **`pipx` recommended over `pip`.** `pipx install` puts
|
||||
`molecule-mcp` on PATH automatically and isolates the runtime's
|
||||
deps from your system Python. Plain `pip install --user` works
|
||||
but the binary lands in `~/.local/bin` (Linux) or
|
||||
`~/Library/Python/3.X/bin` (macOS) which is often not on PATH on
|
||||
a fresh shell — `claude mcp add molecule -- molecule-mcp` then
|
||||
fails with "command not found" at first use.
|
||||
|
||||
### Install
|
||||
|
||||
```sh
|
||||
# Recommended:
|
||||
pipx install molecule-ai-workspace-runtime
|
||||
|
||||
# Alternative (manage PATH yourself):
|
||||
pip install --user molecule-ai-workspace-runtime
|
||||
```
|
||||
|
||||
### Run
|
||||
|
||||
```sh
|
||||
pip install molecule-ai-workspace-runtime
|
||||
WORKSPACE_ID=<uuid> \\
|
||||
PLATFORM_URL=https://<tenant>.staging.moleculesai.app \\
|
||||
MOLECULE_WORKSPACE_TOKEN=<bearer> \\
|
||||
@@ -299,10 +335,66 @@ runtimes already get via the workspace's auto-spawned MCP. Register
|
||||
the binary in your agent's MCP config (e.g. Claude Code's
|
||||
`claude mcp add molecule -- molecule-mcp` with the env above).
|
||||
|
||||
### Keeping the token out of shell history
|
||||
|
||||
Inline `MOLECULE_WORKSPACE_TOKEN=<bearer>` ends up in `~/.zsh_history`
|
||||
and (when registered via `claude mcp add`) plaintext in
|
||||
`~/.claude.json`. To avoid that, write the token to a 0600 file and
|
||||
point `MOLECULE_WORKSPACE_TOKEN_FILE` at it:
|
||||
|
||||
```sh
|
||||
umask 077
|
||||
printf '%s' "<bearer>" > ~/.config/molecule/token
|
||||
WORKSPACE_ID=<uuid> \\
|
||||
PLATFORM_URL=https://<tenant>.staging.moleculesai.app \\
|
||||
MOLECULE_WORKSPACE_TOKEN_FILE=$HOME/.config/molecule/token \\
|
||||
molecule-mcp
|
||||
```
|
||||
|
||||
Token resolution order: `MOLECULE_WORKSPACE_TOKEN` (inline env) →
|
||||
`MOLECULE_WORKSPACE_TOKEN_FILE` (path) → `${CONFIGS_DIR}/.auth_token`
|
||||
(in-container default).
|
||||
|
||||
The token comes from the canvas → Tokens tab. Restarting an external
|
||||
workspace from the canvas no longer revokes the token (PR #2412), so
|
||||
operator tokens persist across status nudges.
|
||||
|
||||
### Push vs poll delivery (Claude Code specifics)
|
||||
|
||||
By default the inbox runs in **poll mode** — every turn the agent
|
||||
calls `wait_for_message`, which blocks up to ~60s on
|
||||
`/activity?since_id=…`. Real-time push delivery is also supported,
|
||||
but on Claude Code it requires THREE conditions, ALL of which must
|
||||
hold:
|
||||
|
||||
1. **The MCP server declares `experimental.claude/channel`** — this
|
||||
wheel does (see `_build_initialize_result`). Nothing for you to
|
||||
do.
|
||||
2. **Claude Code installs the server as a marketplace plugin** — a
|
||||
plain `claude mcp add molecule -- molecule-mcp` produces a
|
||||
non-plugin-sourced server, which Claude Code rejects with
|
||||
`channel_enable requires a marketplace plugin`. Until the
|
||||
official `moleculesai/claude-code-plugin` marketplace lands
|
||||
(tracking [#2936](https://github.com/Molecule-AI/molecule-core/issues/2936)),
|
||||
operators who want push must scaffold their own local marketplace
|
||||
under
|
||||
`~/.claude/marketplaces/molecule-local/` containing a
|
||||
`marketplace.json` + `plugin.json` that points at this wheel.
|
||||
3. **Claude Code is launched with the dev-channels flag** — pass
|
||||
`--dangerously-load-development-channels plugin:molecule@<marketplace>`
|
||||
on the `claude` invocation. Without this flag the channel
|
||||
capability is silently ignored.
|
||||
|
||||
Symptom of any condition failing: messages arrive but only via the
|
||||
poll path (every ~1–60s), not real-time. There's currently no
|
||||
diagnostic surfaced — `molecule-mcp doctor` (tracking
|
||||
[#2937](https://github.com/Molecule-AI/molecule-core/issues/2937)) is
|
||||
planned.
|
||||
|
||||
If you don't need real-time push, the default poll path works
|
||||
universally with no extra setup; both modes converge on the same
|
||||
`inbox_pop` ack so messages never duplicate.
|
||||
|
||||
See [`docs/workspace-runtime-package.md`](https://github.com/Molecule-AI/molecule-core/blob/main/docs/workspace-runtime-package.md)
|
||||
for the publish flow and architecture.
|
||||
"""
|
||||
|
||||
Executable
+295
@@ -0,0 +1,295 @@
|
||||
#!/usr/bin/env bash
|
||||
# E2E for poll-mode chat upload (RFC #2891 phases 1-5b).
|
||||
#
|
||||
# Round-trip: register a workspace as poll-mode (no callback URL) → POST a
|
||||
# multi-file chat upload → verify each file becomes (a) one
|
||||
# `chat_upload_receive` activity row and (b) one /pending-uploads row → fetch
|
||||
# the bytes back via the poll endpoint → ack → verify the row 404s on
|
||||
# subsequent fetch. Also pins cross-workspace bleed protection: workspace B
|
||||
# cannot read workspace A's pending uploads even with its own valid bearer.
|
||||
#
|
||||
# Why this exists separately from test_chat_upload_e2e.sh: that script
|
||||
# covers the PUSH path (the workspace's own /internal/chat/uploads/ingest).
|
||||
# This script covers the POLL path: the same canvas-side request lands on
|
||||
# the platform's pendinguploads.Storage instead, and the workspace fetches
|
||||
# it later. The two paths share zero handler code on the platform side, so
|
||||
# both need their own E2E.
|
||||
#
|
||||
# Requires: platform running on localhost:8080 with migrations applied.
|
||||
# bash workspace-server/scripts/dev-start.sh
|
||||
# bash workspace-server/scripts/run-migrations.sh
|
||||
#
|
||||
# Idempotent: each run uses fresh per-script workspace UUIDs so reruns
|
||||
# don't collide. Best-effort cleanup on EXIT — does NOT call
|
||||
# e2e_cleanup_all_workspaces (see
|
||||
# `feedback_never_run_cluster_cleanup_tests_on_live_platform.md`).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
source "$(dirname "$0")/_lib.sh"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
TIMEOUT="${A2A_TIMEOUT:-30}"
|
||||
|
||||
gen_uuid() {
|
||||
if command -v uuidgen >/dev/null 2>&1; then
|
||||
uuidgen | tr '[:upper:]' '[:lower:]'
|
||||
else
|
||||
python3 -c 'import uuid; print(uuid.uuid4())'
|
||||
fi
|
||||
}
|
||||
WS_A="$(gen_uuid)"
|
||||
WS_B="$(gen_uuid)"
|
||||
|
||||
# Per-run scratch dir collected under one trap so every assertion-failure
|
||||
# path drops the temp files it made (see test_chat_attachments_e2e.sh).
|
||||
TMPDIR_E2E=$(mktemp -d -t poll-chat-upload-e2e-XXXXXX)
|
||||
|
||||
cleanup() {
|
||||
local rc=$?
|
||||
curl -s -X DELETE "$BASE/workspaces/$WS_A?confirm=true" >/dev/null 2>&1 || true
|
||||
curl -s -X DELETE "$BASE/workspaces/$WS_B?confirm=true" >/dev/null 2>&1 || true
|
||||
rm -rf "$TMPDIR_E2E"
|
||||
exit $rc
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
check() {
|
||||
local desc="$1" expected="$2" actual="$3"
|
||||
if echo "$actual" | grep -qF -- "$expected"; then
|
||||
echo "PASS: $desc"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "FAIL: $desc"
|
||||
echo " expected to contain: $expected"
|
||||
echo " got: $(echo "$actual" | head -10)"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
check_eq() {
|
||||
local desc="$1" expected="$2" actual="$3"
|
||||
if [ "$actual" = "$expected" ]; then
|
||||
echo "PASS: $desc"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "FAIL: $desc"
|
||||
echo " expected: $expected"
|
||||
echo " got: $actual"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "=== Poll-Mode Chat Upload E2E ==="
|
||||
echo " base: $BASE"
|
||||
echo " workspace A: $WS_A"
|
||||
echo " workspace B: $WS_B"
|
||||
echo ""
|
||||
|
||||
# ---------- Phase 1: register poll-mode workspace ----------
|
||||
echo "--- Phase 1: Register poll-mode workspace A ---"
|
||||
|
||||
REG_A=$(curl -s -X POST "$BASE/registry/register" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"id\": \"$WS_A\",
|
||||
\"delivery_mode\": \"poll\",
|
||||
\"agent_card\": {\"name\": \"poll-chat-upload-test-a\"}
|
||||
}")
|
||||
check "register accepts poll mode without URL" '"status":"registered"' "$REG_A"
|
||||
TOK_A=$(echo "$REG_A" | e2e_extract_token || true)
|
||||
[ -n "$TOK_A" ] || { echo "FAIL: no auth_token in register response (ws A)"; FAIL=$((FAIL + 1)); exit 1; }
|
||||
|
||||
# ---------- Phase 2: multi-file chat upload ----------
|
||||
echo ""
|
||||
echo "--- Phase 2: POST /chat/uploads with two files ---"
|
||||
|
||||
FILE1="$TMPDIR_E2E/alpha.txt"
|
||||
FILE2="$TMPDIR_E2E/beta.txt"
|
||||
EXPECTED1="alpha-secret-$(openssl rand -hex 4)"
|
||||
EXPECTED2="beta-secret-$(openssl rand -hex 4)"
|
||||
printf '%s' "$EXPECTED1" > "$FILE1"
|
||||
printf '%s' "$EXPECTED2" > "$FILE2"
|
||||
|
||||
UPLOAD=$(curl -s -X POST "$BASE/workspaces/$WS_A/chat/uploads" \
|
||||
-H "Authorization: Bearer $TOK_A" \
|
||||
-F "files=@$FILE1;filename=alpha.txt;type=text/plain" \
|
||||
-F "files=@$FILE2;filename=beta.txt;type=text/plain" \
|
||||
-w "\nHTTP_CODE=%{http_code}\n")
|
||||
UPLOAD_CODE=$(echo "$UPLOAD" | grep -oE 'HTTP_CODE=[0-9]+' | cut -d= -f2)
|
||||
UPLOAD_BODY=$(echo "$UPLOAD" | sed '/^HTTP_CODE=/,$d')
|
||||
|
||||
check_eq "upload returns 200" "200" "$UPLOAD_CODE"
|
||||
check "upload response has files array" '"files":' "$UPLOAD_BODY"
|
||||
|
||||
# Pull file_ids out of the URI in the response. URI shape is
|
||||
# `platform-pending:<wsid>/<file_id>` — proves the response came from the
|
||||
# poll-mode branch, not the push-mode internal-ingest branch.
|
||||
URI1=$(echo "$UPLOAD_BODY" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][0]["uri"])')
|
||||
URI2=$(echo "$UPLOAD_BODY" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][1]["uri"])')
|
||||
check "URI 1 has platform-pending: scheme" "platform-pending:$WS_A/" "$URI1"
|
||||
check "URI 2 has platform-pending: scheme" "platform-pending:$WS_A/" "$URI2"
|
||||
|
||||
FID1="${URI1##*/}"
|
||||
FID2="${URI2##*/}"
|
||||
[ -n "$FID1" ] && [ -n "$FID2" ] || { echo "FAIL: could not extract file IDs"; FAIL=$((FAIL + 1)); exit 1; }
|
||||
echo " file_id 1: $FID1"
|
||||
echo " file_id 2: $FID2"
|
||||
|
||||
# ---------- Phase 3: activity rows visible to the workspace ----------
|
||||
echo ""
|
||||
echo "--- Phase 3: /activity shows two chat_upload_receive rows ---"
|
||||
|
||||
# activity_logs INSERTs run in a goroutine — give them a moment.
|
||||
sleep 1
|
||||
ACT=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/activity?type=a2a_receive&limit=20")
|
||||
check "activity feed has the alpha file" "$FID1" "$ACT"
|
||||
check "activity feed has the beta file" "$FID2" "$ACT"
|
||||
check "activity rows tagged chat_upload_receive" '"method":"chat_upload_receive"' "$ACT"
|
||||
check "activity rows record alpha mimetype" '"mimeType":"text/plain"' "$ACT"
|
||||
|
||||
CHAT_UPLOAD_COUNT=$(echo "$ACT" | python3 -c '
|
||||
import json, sys
|
||||
rows = json.load(sys.stdin)
|
||||
n = sum(1 for r in rows if (r.get("method") or "") == "chat_upload_receive")
|
||||
print(n)
|
||||
')
|
||||
check_eq "exactly two chat_upload_receive rows" "2" "$CHAT_UPLOAD_COUNT"
|
||||
|
||||
# ---------- Phase 4: GET /pending-uploads/:file_id/content ----------
|
||||
echo ""
|
||||
echo "--- Phase 4: Fetch content for each pending upload ---"
|
||||
|
||||
GOT1=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
|
||||
check_eq "alpha bytes round-trip" "$EXPECTED1" "$GOT1"
|
||||
|
||||
GOT2=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID2/content")
|
||||
check_eq "beta bytes round-trip" "$EXPECTED2" "$GOT2"
|
||||
|
||||
# Mimetype + Content-Disposition headers should match what was uploaded.
|
||||
HEAD1=$(curl -s -D - -o /dev/null --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
|
||||
check "alpha response carries text/plain Content-Type" "Content-Type: text/plain" "$HEAD1"
|
||||
check "alpha response carries Content-Disposition with filename" 'filename="alpha.txt"' "$HEAD1"
|
||||
|
||||
# ---------- Phase 5: idempotent re-fetch (until ack) ----------
|
||||
echo ""
|
||||
echo "--- Phase 5: Re-fetch before ack returns the same bytes ---"
|
||||
|
||||
RE_GOT1=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
|
||||
check_eq "re-fetch returns same alpha bytes" "$EXPECTED1" "$RE_GOT1"
|
||||
|
||||
# ---------- Phase 6: ack each row ----------
|
||||
echo ""
|
||||
echo "--- Phase 6: Ack each pending upload ---"
|
||||
|
||||
ACK1=$(curl -s -X POST --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/ack")
|
||||
check "alpha ack returns acked:true" '"acked":true' "$ACK1"
|
||||
|
||||
ACK2=$(curl -s -X POST --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID2/ack")
|
||||
check "beta ack returns acked:true" '"acked":true' "$ACK2"
|
||||
|
||||
# Re-ack should still 200 (idempotent — the row's gone but the workspace's
|
||||
# at-least-once intent was already honored, and the second ack hits the
|
||||
# raced path which also returns 200).
|
||||
RE_ACK1=$(curl -s -w '\n%{http_code}' -X POST --max-time "$TIMEOUT" \
|
||||
-H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/ack")
|
||||
RE_ACK1_CODE=$(printf '%s' "$RE_ACK1" | tail -n1)
|
||||
# Acked rows return 404 on Get-before-Ack (the row's still in the table
|
||||
# but Get filters acked_at IS NULL); workspace would not normally re-ack
|
||||
# since it already saw the success. Accept both 200 and 404 here so the
|
||||
# test pins the contract without being brittle on the inner ordering.
|
||||
case "$RE_ACK1_CODE" in
|
||||
200|404)
|
||||
echo "PASS: re-ack returns 200 or 404 ($RE_ACK1_CODE)"
|
||||
PASS=$((PASS + 1))
|
||||
;;
|
||||
*)
|
||||
echo "FAIL: re-ack returned unexpected $RE_ACK1_CODE"
|
||||
FAIL=$((FAIL + 1))
|
||||
;;
|
||||
esac
|
||||
|
||||
# ---------- Phase 7: GET content after ack returns 404 ----------
|
||||
echo ""
|
||||
echo "--- Phase 7: Acked file 404s on subsequent fetch ---"
|
||||
|
||||
POST_ACK=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
|
||||
POST_ACK_CODE=$(printf '%s' "$POST_ACK" | tail -n1)
|
||||
check_eq "acked alpha returns HTTP 404" "404" "$POST_ACK_CODE"
|
||||
|
||||
# ---------- Phase 8: cross-workspace bleed protection ----------
|
||||
echo ""
|
||||
echo "--- Phase 8: Workspace B cannot read workspace A's pending uploads ---"
|
||||
|
||||
# Stage a fresh upload on workspace A so we have an UN-acked row to probe.
|
||||
PROBE_FILE="$TMPDIR_E2E/probe.txt"
|
||||
printf '%s' "probe-bytes-$(openssl rand -hex 4)" > "$PROBE_FILE"
|
||||
PROBE_UP=$(curl -s -X POST "$BASE/workspaces/$WS_A/chat/uploads" \
|
||||
-H "Authorization: Bearer $TOK_A" \
|
||||
-F "files=@$PROBE_FILE;filename=probe.txt;type=text/plain")
|
||||
PROBE_FID=$(echo "$PROBE_UP" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][0]["uri"].split("/")[-1])')
|
||||
[ -n "$PROBE_FID" ] || { echo "FAIL: probe upload returned no file_id"; FAIL=$((FAIL + 1)); exit 1; }
|
||||
|
||||
# Register a SECOND poll-mode workspace and capture its bearer.
|
||||
REG_B=$(curl -s -X POST "$BASE/registry/register" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"id\": \"$WS_B\",
|
||||
\"delivery_mode\": \"poll\",
|
||||
\"agent_card\": {\"name\": \"poll-chat-upload-test-b\"}
|
||||
}")
|
||||
check "second workspace registers" '"status":"registered"' "$REG_B"
|
||||
TOK_B=$(echo "$REG_B" | e2e_extract_token || true)
|
||||
[ -n "$TOK_B" ] || { echo "FAIL: no auth_token (ws B)"; FAIL=$((FAIL + 1)); exit 1; }
|
||||
|
||||
# B's bearer hitting B's URL with A's file_id → 404 (handler checks the row's
|
||||
# workspace_id matches the URL :id, not the bearer's workspace).
|
||||
CROSS_RESP=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
|
||||
-H "Authorization: Bearer $TOK_B" \
|
||||
"$BASE/workspaces/$WS_B/pending-uploads/$PROBE_FID/content")
|
||||
CROSS_CODE=$(printf '%s' "$CROSS_RESP" | tail -n1)
|
||||
check_eq "B's URL with A's file_id returns 404" "404" "$CROSS_CODE"
|
||||
|
||||
# B's bearer hitting A's URL → 401 (wsAuth pins bearer to :id). This is the
|
||||
# strictest cross-workspace check: a presented-but-wrong bearer is rejected
|
||||
# in EVERY platform posture (dev-mode fail-open only triggers when no bearer
|
||||
# is presented at all — invalid tokens always 401).
|
||||
WRONG_BEARER=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
|
||||
-H "Authorization: Bearer $TOK_B" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$PROBE_FID/content")
|
||||
WRONG_CODE=$(printf '%s' "$WRONG_BEARER" | tail -n1)
|
||||
check_eq "B's bearer on A's URL returns 401" "401" "$WRONG_CODE"
|
||||
|
||||
# NB: a fully bearerless request to /pending-uploads/:fid/content returns
|
||||
# 401 ONLY when the platform has MOLECULE_ENV != development (production /
|
||||
# staging). On local-dev with MOLECULE_ENV=development the wsauth middleware
|
||||
# fail-opens for bearerless requests so the canvas at :3000 can talk to the
|
||||
# platform at :8080 without per-call token plumbing — see middleware/
|
||||
# devmode.go. The strict bearerless-401 contract is covered by the wsauth
|
||||
# unit + middleware tests; we don't reassert it here because the result
|
||||
# depends on platform posture, not the poll-mode upload contract.
|
||||
|
||||
# ---------- Phase 9: invalid file_id rejected at the URL parser ----------
|
||||
echo ""
|
||||
echo "--- Phase 9: Invalid file_id returns 400 ---"
|
||||
|
||||
BAD_FID=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
|
||||
-H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/not-a-uuid/content")
|
||||
BAD_FID_CODE=$(printf '%s' "$BAD_FID" | tail -n1)
|
||||
check_eq "invalid file_id UUID returns 400" "400" "$BAD_FID_CODE"
|
||||
|
||||
# ---------- Results ----------
|
||||
echo ""
|
||||
echo "=== Results: $PASS passed, $FAIL failed ==="
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -94,6 +94,13 @@ services:
|
||||
CP_UPSTREAM_URL: "http://cp-stub:9090"
|
||||
RATE_LIMIT: "1000"
|
||||
CANVAS_PROXY_URL: "http://localhost:3000"
|
||||
# Memory v2 sidecar (PR #2906) bundles the plugin into the
|
||||
# tenant image and starts it before the main server. The plugin
|
||||
# runs `CREATE EXTENSION vector` on first boot, which fails on
|
||||
# the harness's plain postgres:15-alpine (no pgvector). The
|
||||
# harness doesn't exercise memory features, so disable the
|
||||
# sidecar via the entrypoint's documented escape hatch.
|
||||
MEMORY_PLUGIN_DISABLE: "1"
|
||||
networks: [harness-net]
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/health || exit 1"]
|
||||
@@ -142,6 +149,13 @@ services:
|
||||
CP_UPSTREAM_URL: "http://cp-stub:9090"
|
||||
RATE_LIMIT: "1000"
|
||||
CANVAS_PROXY_URL: "http://localhost:3000"
|
||||
# Memory v2 sidecar (PR #2906) bundles the plugin into the
|
||||
# tenant image and starts it before the main server. The plugin
|
||||
# runs `CREATE EXTENSION vector` on first boot, which fails on
|
||||
# the harness's plain postgres:15-alpine (no pgvector). The
|
||||
# harness doesn't exercise memory features, so disable the
|
||||
# sidecar via the entrypoint's documented escape hatch.
|
||||
MEMORY_PLUGIN_DISABLE: "1"
|
||||
networks: [harness-net]
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/health || exit 1"]
|
||||
|
||||
@@ -21,6 +21,14 @@ ARG GIT_SHA=dev
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build \
|
||||
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
|
||||
-o /platform ./cmd/server
|
||||
# Bundle the built-in memory-plugin-postgres binary so an operator can
|
||||
# activate Memory v2 by setting MEMORY_V2_CUTOVER=true + (default)
|
||||
# MEMORY_PLUGIN_URL=http://localhost:9100. The entrypoint starts this
|
||||
# binary in the background; main /platform talks to it over loopback.
|
||||
# Stays inert until the operator flips the cutover env var.
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build \
|
||||
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
|
||||
-o /memory-plugin ./cmd/memory-plugin-postgres
|
||||
|
||||
# Clone templates + plugins at build time from manifest.json
|
||||
FROM alpine:3.20 AS templates
|
||||
@@ -30,8 +38,9 @@ COPY scripts/clone-manifest.sh /scripts/clone-manifest.sh
|
||||
RUN chmod +x /scripts/clone-manifest.sh && /scripts/clone-manifest.sh /manifest.json /workspace-configs-templates /org-templates /plugins
|
||||
|
||||
FROM alpine:3.20
|
||||
RUN apk add --no-cache ca-certificates git tzdata
|
||||
RUN apk add --no-cache ca-certificates git tzdata wget
|
||||
COPY --from=builder /platform /platform
|
||||
COPY --from=builder /memory-plugin /memory-plugin
|
||||
COPY workspace-server/migrations /migrations
|
||||
COPY --from=templates /workspace-configs-templates /workspace-configs-templates
|
||||
COPY --from=templates /org-templates /org-templates
|
||||
@@ -41,6 +50,7 @@ RUN addgroup -g 1000 platform && adduser -u 1000 -G platform -s /bin/sh -D platf
|
||||
EXPOSE 8080
|
||||
COPY <<'ENTRY' /entrypoint.sh
|
||||
#!/bin/sh
|
||||
# Set up docker-socket group (unchanged from pre-sidecar entrypoint).
|
||||
if [ -S /var/run/docker.sock ]; then
|
||||
SOCK_GID=$(stat -c '%g' /var/run/docker.sock 2>/dev/null || stat -f '%g' /var/run/docker.sock 2>/dev/null)
|
||||
if [ -n "$SOCK_GID" ] && [ "$SOCK_GID" != "0" ]; then
|
||||
@@ -50,6 +60,61 @@ if [ -S /var/run/docker.sock ]; then
|
||||
addgroup platform root 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
|
||||
# Memory v2 sidecar (built-in postgres plugin). Co-located with the
|
||||
# main server so operators flipping MEMORY_V2_CUTOVER=true don't need
|
||||
# to provision a separate service.
|
||||
#
|
||||
# Spawn-gating: only start the sidecar when the operator has indicated
|
||||
# they want it — either MEMORY_V2_CUTOVER=true OR MEMORY_PLUGIN_URL set.
|
||||
# Without that signal, the sidecar adds zero value (the platform's
|
||||
# wiring.go skips building the client too) but pays a real cost: the
|
||||
# plugin's first migration runs `CREATE EXTENSION vector`, which fails
|
||||
# on tenant Postgres without pgvector preinstalled and aborts container
|
||||
# boot via the 30s health gate. Caught on staging redeploy 2026-05-05.
|
||||
#
|
||||
# Env defaults (when sidecar IS spawned):
|
||||
# MEMORY_PLUGIN_DATABASE_URL = $DATABASE_URL (share existing Postgres;
|
||||
# plugin's `memory_namespaces` / `memory_records` tables coexist
|
||||
# with `agent_memories` and the rest of the platform schema —
|
||||
# no conflicts. Operator can override with a separate URL.)
|
||||
# MEMORY_PLUGIN_LISTEN_ADDR = 127.0.0.1:9100
|
||||
#
|
||||
# Set MEMORY_PLUGIN_DISABLE=1 to force-skip the sidecar even with
|
||||
# cutover env set (e.g. running the plugin externally on a separate host).
|
||||
memory_plugin_wanted=""
|
||||
if [ "$MEMORY_V2_CUTOVER" = "true" ] || [ -n "$MEMORY_PLUGIN_URL" ]; then
|
||||
memory_plugin_wanted=1
|
||||
fi
|
||||
if [ -z "$MEMORY_PLUGIN_DISABLE" ] && [ -n "$memory_plugin_wanted" ] && [ -n "$DATABASE_URL" ]; then
|
||||
: "${MEMORY_PLUGIN_DATABASE_URL:=$DATABASE_URL}"
|
||||
: "${MEMORY_PLUGIN_LISTEN_ADDR:=:9100}"
|
||||
export MEMORY_PLUGIN_DATABASE_URL MEMORY_PLUGIN_LISTEN_ADDR
|
||||
echo "memory-plugin: starting sidecar on $MEMORY_PLUGIN_LISTEN_ADDR" >&2
|
||||
# Drop privs to the platform user — the plugin doesn't need root and
|
||||
# runs unprivileged elsewhere (tenant image already starts as canvas).
|
||||
su-exec platform /memory-plugin &
|
||||
MEMORY_PLUGIN_PID=$!
|
||||
# Wait up to 30s for the plugin's /v1/health to return 200. Boot
|
||||
# failure here is fatal — better to crash-loop than to silently
|
||||
# serve cutover traffic against a dead plugin.
|
||||
health_port=${MEMORY_PLUGIN_LISTEN_ADDR#:}
|
||||
ready=0
|
||||
for _ in $(seq 1 30); do
|
||||
if wget -qO- --timeout=2 "http://localhost:${health_port}/v1/health" >/dev/null 2>&1; then
|
||||
ready=1
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
if [ "$ready" != "1" ]; then
|
||||
echo "memory-plugin: ❌ /v1/health never returned 200 after 30s — aborting boot. Check that DATABASE_URL is reachable, has the pgvector extension, and the plugin's migrations applied." >&2
|
||||
kill "$MEMORY_PLUGIN_PID" 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
echo "memory-plugin: ✅ sidecar healthy on :$health_port" >&2
|
||||
fi
|
||||
|
||||
exec su-exec platform /platform "$@"
|
||||
ENTRY
|
||||
RUN chmod +x /entrypoint.sh && apk add --no-cache su-exec
|
||||
|
||||
@@ -34,6 +34,13 @@ ARG GIT_SHA=dev
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build \
|
||||
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
|
||||
-o /platform ./cmd/server
|
||||
# Memory v2 sidecar binary (Memory v2 #2728). Bundled so an operator
|
||||
# can activate cutover by flipping MEMORY_V2_CUTOVER=true without
|
||||
# provisioning a separate service. See entrypoint-tenant.sh for the
|
||||
# launch logic.
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build \
|
||||
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
|
||||
-o /memory-plugin ./cmd/memory-plugin-postgres
|
||||
|
||||
# ── Stage 2: Canvas Next.js standalone ────────────────────────────────
|
||||
FROM node:20-alpine AS canvas-builder
|
||||
@@ -74,8 +81,9 @@ RUN deluser --remove-home node 2>/dev/null || true; \
|
||||
delgroup node 2>/dev/null || true; \
|
||||
addgroup -g 1000 canvas && adduser -u 1000 -G canvas -s /bin/sh -D canvas
|
||||
|
||||
# Go platform binary
|
||||
# Go platform binary + Memory v2 sidecar
|
||||
COPY --from=go-builder /platform /platform
|
||||
COPY --from=go-builder /memory-plugin /memory-plugin
|
||||
COPY workspace-server/migrations /migrations
|
||||
|
||||
# Templates + plugins (cloned from GitHub in stage 3)
|
||||
@@ -91,7 +99,7 @@ COPY --from=canvas-builder /canvas/public ./public
|
||||
|
||||
COPY workspace-server/entrypoint-tenant.sh /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh && \
|
||||
chown -R canvas:canvas /canvas /platform /migrations
|
||||
chown -R canvas:canvas /canvas /platform /memory-plugin /migrations
|
||||
|
||||
EXPOSE 8080
|
||||
# entrypoint.sh starts as root to fix volume perms, then drops to
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestLoadConfig_DefaultListenAddrIsLoopback pins the default-bind contract.
|
||||
//
|
||||
// Why this matters: with the prior `:9100` default, the plugin listened on
|
||||
// every interface. Inside the container it didn't matter (no host port
|
||||
// mapping today), but a future change that publishes 9100 OR a cross-host
|
||||
// sidecar deploy would have exposed an unauth'd memory store. Loopback by
|
||||
// default is the least-privilege baseline; operators with a multi-host
|
||||
// topology override via MEMORY_PLUGIN_LISTEN_ADDR.
|
||||
func TestLoadConfig_DefaultListenAddrIsLoopback(t *testing.T) {
|
||||
t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "postgres://stub")
|
||||
t.Setenv("MEMORY_PLUGIN_LISTEN_ADDR", "")
|
||||
|
||||
cfg, err := loadConfig()
|
||||
if err != nil {
|
||||
t.Fatalf("loadConfig: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(cfg.ListenAddr, "127.0.0.1:") {
|
||||
t.Errorf("default ListenAddr must bind loopback-only, got %q "+
|
||||
"(security regression — would expose plugin on every interface)",
|
||||
cfg.ListenAddr)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfig_ListenAddrEnvOverride(t *testing.T) {
|
||||
t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "postgres://stub")
|
||||
t.Setenv("MEMORY_PLUGIN_LISTEN_ADDR", ":9100")
|
||||
|
||||
cfg, err := loadConfig()
|
||||
if err != nil {
|
||||
t.Fatalf("loadConfig: %v", err)
|
||||
}
|
||||
if cfg.ListenAddr != ":9100" {
|
||||
t.Errorf("env override ignored: want :9100, got %q", cfg.ListenAddr)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfig_MissingDatabaseURL(t *testing.T) {
|
||||
t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "")
|
||||
|
||||
if _, err := loadConfig(); err == nil {
|
||||
t.Fatal("loadConfig must error when MEMORY_PLUGIN_DATABASE_URL is empty")
|
||||
}
|
||||
}
|
||||
@@ -10,6 +10,7 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"embed"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
@@ -17,6 +18,7 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"sort"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
@@ -26,12 +28,28 @@ import (
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/pgplugin"
|
||||
)
|
||||
|
||||
// migrationsFS bundles the .up.sql files into the binary at build time
|
||||
// so the prebuilt image doesn't need the source tree at runtime. The
|
||||
// prior `os.ReadDir("cmd/memory-plugin-postgres/migrations")` path
|
||||
// only resolved during `go test` from the repo root — in the published
|
||||
// image the path didn't exist and boot failed after the 30s health gate
|
||||
// (caught on staging redeploy 2026-05-05 after PR #2906).
|
||||
//
|
||||
//go:embed migrations/*.up.sql
|
||||
var migrationsFS embed.FS
|
||||
|
||||
const (
|
||||
envDatabaseURL = "MEMORY_PLUGIN_DATABASE_URL"
|
||||
envListenAddr = "MEMORY_PLUGIN_LISTEN_ADDR"
|
||||
envSkipMigrate = "MEMORY_PLUGIN_SKIP_MIGRATE"
|
||||
|
||||
defaultListenAddr = ":9100"
|
||||
// Loopback-only by default (defense in depth). The platform talks to
|
||||
// the plugin over `http://localhost:9100` from the same container, so
|
||||
// binding to all interfaces would only widen the reachable surface
|
||||
// without enabling any in-design caller. Operators running the plugin
|
||||
// on a separate host override via MEMORY_PLUGIN_LISTEN_ADDR=:9100 (or
|
||||
// some other interface).
|
||||
defaultListenAddr = "127.0.0.1:9100"
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -143,32 +161,71 @@ func openDB(databaseURL string) (*sql.DB, error) {
|
||||
return db, nil
|
||||
}
|
||||
|
||||
// runMigrations applies the schema migrations bundled at
|
||||
// cmd/memory-plugin-postgres/migrations/. Idempotent on repeat boot.
|
||||
// runMigrations applies the schema migrations bundled into the binary
|
||||
// via go:embed (see migrationsFS at the top of this file). Idempotent
|
||||
// on repeat boot — every migration file uses CREATE … IF NOT EXISTS.
|
||||
//
|
||||
// Implementation note: rather than embedding the full migrate engine,
|
||||
// we read the migration files at boot from a known relative path. The
|
||||
// down migrations are deliberately NOT applied here — that's a manual
|
||||
// operator action. This keeps the binary tiny and avoids dragging in
|
||||
// golang-migrate's drivers.
|
||||
// The down migrations are deliberately NOT applied here — that's a
|
||||
// manual operator action. This keeps the binary tiny and avoids
|
||||
// dragging in golang-migrate's drivers.
|
||||
//
|
||||
// MEMORY_PLUGIN_MIGRATIONS_DIR (filesystem path) is honored as an
|
||||
// override for operators who need to ship custom migrations alongside
|
||||
// the binary without rebuilding. When unset (the common case) we read
|
||||
// from the embedded FS.
|
||||
func runMigrations(db *sql.DB) error {
|
||||
// Find the migrations directory. In `go run` mode it's relative
|
||||
// to the cmd dir; in the prebuilt binary case it's expected next
|
||||
// to the binary OR via env var override.
|
||||
dir := os.Getenv("MEMORY_PLUGIN_MIGRATIONS_DIR")
|
||||
if dir == "" {
|
||||
// Best-effort: try the cwd-relative path that works for `go test`.
|
||||
dir = "cmd/memory-plugin-postgres/migrations"
|
||||
if dir := strings.TrimSpace(os.Getenv("MEMORY_PLUGIN_MIGRATIONS_DIR")); dir != "" {
|
||||
return runMigrationsFromDisk(db, dir)
|
||||
}
|
||||
entries, err := os.ReadDir(dir)
|
||||
return runMigrationsFromEmbed(db)
|
||||
}
|
||||
|
||||
// runMigrationsFromEmbed applies the *.up.sql files bundled into the
|
||||
// binary at build time. Order is alphabetical (matches the on-disk
|
||||
// behavior of os.ReadDir on Linux for the same set of names).
|
||||
func runMigrationsFromEmbed(db *sql.DB) error {
|
||||
entries, err := migrationsFS.ReadDir("migrations")
|
||||
if err != nil {
|
||||
return fmt.Errorf("read migrations dir %q: %w", dir, err)
|
||||
return fmt.Errorf("read embedded migrations: %w", err)
|
||||
}
|
||||
names := make([]string, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
|
||||
continue
|
||||
}
|
||||
path := dir + "/" + e.Name()
|
||||
names = append(names, e.Name())
|
||||
}
|
||||
sort.Strings(names)
|
||||
for _, name := range names {
|
||||
data, err := migrationsFS.ReadFile("migrations/" + name)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read embedded %q: %w", name, err)
|
||||
}
|
||||
if _, err := db.Exec(string(data)); err != nil {
|
||||
return fmt.Errorf("apply %q: %w", name, err)
|
||||
}
|
||||
log.Printf("applied embedded migration %s", name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// runMigrationsFromDisk preserves the legacy filesystem-path mode for
|
||||
// operator-supplied custom migrations.
|
||||
func runMigrationsFromDisk(db *sql.DB, dir string) error {
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read migrations dir %q: %w", dir, err)
|
||||
}
|
||||
names := make([]string, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
|
||||
continue
|
||||
}
|
||||
names = append(names, e.Name())
|
||||
}
|
||||
sort.Strings(names)
|
||||
for _, name := range names {
|
||||
path := dir + "/" + name
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read %q: %w", path, err)
|
||||
@@ -176,7 +233,7 @@ func runMigrations(db *sql.DB) error {
|
||||
if _, err := db.Exec(string(data)); err != nil {
|
||||
return fmt.Errorf("apply %q: %w", path, err)
|
||||
}
|
||||
log.Printf("applied migration %s", e.Name())
|
||||
log.Printf("applied disk migration %s (from %s)", name, dir)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -0,0 +1,72 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestMigrationsEmbedded_ContainsCreateTable pins that the migrations
|
||||
// are bundled into the binary at build time, NOT loaded from a
|
||||
// filesystem path that doesn't exist at runtime in the published image.
|
||||
//
|
||||
// Pre-fix: PR #2906 shipped the binary without the migrations dir;
|
||||
// `os.ReadDir("cmd/memory-plugin-postgres/migrations")` errored on every
|
||||
// tenant boot, the 30s health gate aborted the container, and the
|
||||
// staging redeploy fleet job marked all tenants as failed. Embedding
|
||||
// the migrations into the binary removes the runtime path entirely.
|
||||
func TestMigrationsEmbedded_ContainsCreateTable(t *testing.T) {
|
||||
entries, err := migrationsFS.ReadDir("migrations")
|
||||
if err != nil {
|
||||
t.Fatalf("embedded migrations dir unreadable: %v", err)
|
||||
}
|
||||
if len(entries) == 0 {
|
||||
t.Fatal("embedded migrations dir is empty — go:embed pattern matched no files")
|
||||
}
|
||||
|
||||
var seenUp bool
|
||||
for _, e := range entries {
|
||||
if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
|
||||
continue
|
||||
}
|
||||
seenUp = true
|
||||
data, err := migrationsFS.ReadFile("migrations/" + e.Name())
|
||||
if err != nil {
|
||||
t.Errorf("read embedded %q: %v", e.Name(), err)
|
||||
continue
|
||||
}
|
||||
if !strings.Contains(string(data), "CREATE TABLE") {
|
||||
t.Errorf("embedded %q has no CREATE TABLE — wrong file embedded?", e.Name())
|
||||
}
|
||||
}
|
||||
if !seenUp {
|
||||
t.Fatal("no *.up.sql in embedded migrations — runtime would have no schema to apply")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunMigrationsFromEmbed_OrderingIsAlphabetic pins that we apply
|
||||
// migrations in deterministic alphabetical order, not in whatever
|
||||
// arbitrary order migrationsFS.ReadDir happens to return. With one
|
||||
// migration today this is moot, but a future second migration ('002_…')
|
||||
// MUST run after '001_…' or the schema is broken.
|
||||
//
|
||||
// We can't easily exercise db.Exec here (no test DB); instead pin the
|
||||
// sort step on the directory listing itself.
|
||||
func TestRunMigrationsFromEmbed_OrderingIsAlphabetic(t *testing.T) {
|
||||
entries, err := migrationsFS.ReadDir("migrations")
|
||||
if err != nil {
|
||||
t.Fatalf("embedded migrations dir unreadable: %v", err)
|
||||
}
|
||||
var names []string
|
||||
for _, e := range entries {
|
||||
if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
|
||||
continue
|
||||
}
|
||||
names = append(names, e.Name())
|
||||
}
|
||||
for i := 1; i < len(names); i++ {
|
||||
if names[i-1] > names[i] {
|
||||
t.Errorf("ReadDir returned non-sorted names; runMigrationsFromEmbed must sort. "+
|
||||
"Got %q before %q", names[i-1], names[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/imagewatch"
|
||||
memwiring "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/wiring"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/router"
|
||||
@@ -265,6 +266,14 @@ func main() {
|
||||
})
|
||||
}
|
||||
|
||||
// Pending-uploads GC sweep — deletes acked rows past their retention
|
||||
// window plus unacked rows past expires_at. Without this the
|
||||
// pending_uploads table grows unbounded; even with the 24h hard TTL,
|
||||
// nothing actually deletes a row, just makes it un-fetchable.
|
||||
go supervised.RunWithRecover(ctx, "pending-uploads-sweeper", func(c context.Context) {
|
||||
pendinguploads.StartSweeper(c, pendinguploads.NewPostgres(db.DB), 0)
|
||||
})
|
||||
|
||||
// Provision-timeout sweep — flips workspaces that have been stuck in
|
||||
// status='provisioning' past the timeout window to 'failed' and emits
|
||||
// WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic
|
||||
|
||||
@@ -20,6 +20,51 @@ cd /canvas
|
||||
PORT=3000 HOSTNAME=0.0.0.0 node server.js &
|
||||
CANVAS_PID=$!
|
||||
|
||||
# Memory v2 sidecar (built-in postgres plugin). See Dockerfile entrypoint
|
||||
# comment for rationale.
|
||||
#
|
||||
# Spawn-gating: only start the sidecar when the operator has indicated
|
||||
# they want it (MEMORY_V2_CUTOVER=true OR MEMORY_PLUGIN_URL set).
|
||||
# Without that signal, the sidecar adds zero value and risks aborting
|
||||
# tenant boot via the 30s health gate when the tenant Postgres lacks
|
||||
# pgvector. Caught on staging redeploy 2026-05-05:
|
||||
# pq: extension "vector" is not available
|
||||
#
|
||||
# Defaults (when sidecar IS spawned): MEMORY_PLUGIN_DATABASE_URL
|
||||
# falls back to the tenant's DATABASE_URL.
|
||||
MEMORY_PLUGIN_PID=""
|
||||
memory_plugin_wanted=""
|
||||
if [ "$MEMORY_V2_CUTOVER" = "true" ] || [ -n "$MEMORY_PLUGIN_URL" ]; then
|
||||
memory_plugin_wanted=1
|
||||
fi
|
||||
if [ -z "$MEMORY_PLUGIN_DISABLE" ] && [ -n "$memory_plugin_wanted" ] && [ -n "$DATABASE_URL" ]; then
|
||||
: "${MEMORY_PLUGIN_DATABASE_URL:=$DATABASE_URL}"
|
||||
: "${MEMORY_PLUGIN_LISTEN_ADDR:=:9100}"
|
||||
export MEMORY_PLUGIN_DATABASE_URL MEMORY_PLUGIN_LISTEN_ADDR
|
||||
echo "memory-plugin: starting sidecar on $MEMORY_PLUGIN_LISTEN_ADDR" >&2
|
||||
/memory-plugin &
|
||||
MEMORY_PLUGIN_PID=$!
|
||||
# Wait up to 30s for /v1/health. Boot failure is fatal so a misconfigured
|
||||
# tenant crash-loops instead of silently serving cutover traffic against
|
||||
# a dead plugin.
|
||||
health_port=${MEMORY_PLUGIN_LISTEN_ADDR#:}
|
||||
ready=0
|
||||
for _ in $(seq 1 30); do
|
||||
if wget -qO- --timeout=2 "http://localhost:${health_port}/v1/health" >/dev/null 2>&1; then
|
||||
ready=1
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
if [ "$ready" != "1" ]; then
|
||||
echo "memory-plugin: ❌ /v1/health never returned 200 after 30s — aborting boot. Check DATABASE_URL reachability + pgvector extension + migrations." >&2
|
||||
kill "$MEMORY_PLUGIN_PID" 2>/dev/null || true
|
||||
kill "$CANVAS_PID" 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
echo "memory-plugin: ✅ sidecar healthy on :$health_port" >&2
|
||||
fi
|
||||
|
||||
# Start Go platform in foreground-ish (we trap signals)
|
||||
# CANVAS_PROXY_URL tells the platform to proxy unmatched routes to Canvas.
|
||||
# CONTAINER_BACKEND: empty = Docker (default for self-hosted/local).
|
||||
@@ -29,15 +74,20 @@ cd /
|
||||
/platform &
|
||||
PLATFORM_PID=$!
|
||||
|
||||
# If either process exits, kill the other
|
||||
# If any process exits, kill the others
|
||||
cleanup() {
|
||||
kill $CANVAS_PID 2>/dev/null || true
|
||||
kill $PLATFORM_PID 2>/dev/null || true
|
||||
[ -n "$MEMORY_PLUGIN_PID" ] && kill $MEMORY_PLUGIN_PID 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT SIGTERM SIGINT
|
||||
|
||||
# Wait for either to exit — whichever exits first triggers cleanup
|
||||
wait -n $CANVAS_PID $PLATFORM_PID
|
||||
# Wait for any to exit — whichever exits first triggers cleanup
|
||||
if [ -n "$MEMORY_PLUGIN_PID" ]; then
|
||||
wait -n $CANVAS_PID $PLATFORM_PID $MEMORY_PLUGIN_PID
|
||||
else
|
||||
wait -n $CANVAS_PID $PLATFORM_PID
|
||||
fi
|
||||
EXIT_CODE=$?
|
||||
cleanup
|
||||
exit $EXIT_CODE
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
package events
|
||||
|
||||
// types.go — typed taxonomy of WebSocket event names emitted by the
|
||||
// workspace-server.
|
||||
//
|
||||
// RFC #2945 PR-B. Pre-consolidation, every BroadcastOnly /
|
||||
// RecordAndBroadcast call site passed a bare string literal:
|
||||
//
|
||||
// h.broadcaster.BroadcastOnly(workspaceID, "AGENT_MESSAGE", payload)
|
||||
//
|
||||
// Producers (Go workspace-server, ~30 call sites across handlers/,
|
||||
// scheduler/, registry/, bundle/) and consumers (canvas TS store +
|
||||
// component listeners) duplicated the same string with no shared
|
||||
// definition. A producer renaming an event silently broke every
|
||||
// consumer — same drift class that produced the reno-stars data-loss
|
||||
// regression on the persistence side. The fix on that side was the
|
||||
// AgentMessageWriter SSOT (PR-A); the fix on this side is named
|
||||
// constants.
|
||||
//
|
||||
// Why a typed string (not a plain enum / iota): the event name
|
||||
// crosses the wire to TypeScript consumers as the literal string in
|
||||
// `WSMessage.Event`. Iota integers would break the canvas store's
|
||||
// switch (`case "AGENT_MESSAGE":`); a typed string preserves the
|
||||
// wire contract while giving Go callers compile-time discipline.
|
||||
//
|
||||
// Mirror in canvas: a parity gate (PR-B-2 follow-up) will assert this
|
||||
// constant set ≡ the TypeScript union members in
|
||||
// `canvas/src/lib/ws-events.ts`. Today the canvas consumes the names
|
||||
// via bare-string comparisons; the mirror lands separately to keep
|
||||
// PR-B narrow.
|
||||
|
||||
// EventType is the wire-typed name of a WebSocket event the platform
|
||||
// broadcasts. Always emit constants from this file rather than bare
|
||||
// strings — the AST gate in events_types_drift_test.go guards
|
||||
// against bare-string usage in the broadcaster surfaces.
|
||||
type EventType string
|
||||
|
||||
// Event constants — the canonical taxonomy. New events MUST be added
|
||||
// here AND mirrored in canvas/src/lib/ws-events.ts (parity gate
|
||||
// pending in PR-B-2). Group by semantic family so the list stays
|
||||
// scan-friendly as it grows.
|
||||
const (
|
||||
// Chat / agent messaging — surfaces in canvas chat panels.
|
||||
EventAgentMessage EventType = "AGENT_MESSAGE"
|
||||
EventA2AResponse EventType = "A2A_RESPONSE"
|
||||
EventActivityLogged EventType = "ACTIVITY_LOGGED"
|
||||
EventChannelMessage EventType = "CHANNEL_MESSAGE"
|
||||
|
||||
// Workspace lifecycle.
|
||||
EventWorkspaceProvisioning EventType = "WORKSPACE_PROVISIONING"
|
||||
EventWorkspaceProvisionFailed EventType = "WORKSPACE_PROVISION_FAILED"
|
||||
EventWorkspaceOnline EventType = "WORKSPACE_ONLINE"
|
||||
EventWorkspaceOffline EventType = "WORKSPACE_OFFLINE"
|
||||
EventWorkspaceDegraded EventType = "WORKSPACE_DEGRADED"
|
||||
EventWorkspaceHibernated EventType = "WORKSPACE_HIBERNATED"
|
||||
EventWorkspacePaused EventType = "WORKSPACE_PAUSED"
|
||||
EventWorkspaceRemoved EventType = "WORKSPACE_REMOVED"
|
||||
EventWorkspaceAwaitingAgent EventType = "WORKSPACE_AWAITING_AGENT"
|
||||
EventWorkspaceHeartbeat EventType = "WORKSPACE_HEARTBEAT"
|
||||
|
||||
// Agent assignment + identity.
|
||||
EventAgentAssigned EventType = "AGENT_ASSIGNED"
|
||||
EventAgentReplaced EventType = "AGENT_REPLACED"
|
||||
EventAgentRemoved EventType = "AGENT_REMOVED"
|
||||
EventAgentMoved EventType = "AGENT_MOVED"
|
||||
EventAgentCardUpdated EventType = "AGENT_CARD_UPDATED"
|
||||
|
||||
// Delegation lifecycle.
|
||||
EventDelegationSent EventType = "DELEGATION_SENT"
|
||||
EventDelegationStatus EventType = "DELEGATION_STATUS"
|
||||
EventDelegationComplete EventType = "DELEGATION_COMPLETE"
|
||||
EventDelegationFailed EventType = "DELEGATION_FAILED"
|
||||
|
||||
// Task progression + scheduler.
|
||||
EventTaskUpdated EventType = "TASK_UPDATED"
|
||||
EventCronExecuted EventType = "CRON_EXECUTED"
|
||||
EventCronSkipped EventType = "CRON_SKIPPED"
|
||||
|
||||
// Approvals.
|
||||
EventApprovalRequested EventType = "APPROVAL_REQUESTED"
|
||||
EventApprovalEscalated EventType = "APPROVAL_ESCALATED"
|
||||
|
||||
// Auth / credentials.
|
||||
EventExternalCredentialsRotated EventType = "EXTERNAL_CREDENTIALS_ROTATED"
|
||||
)
|
||||
|
||||
// AllEventTypes lists every constant in this file. Used by the
|
||||
// snapshot test (events_types_drift_test.go) to detect when a new
|
||||
// constant is added without updating the snapshot — the catch-up
|
||||
// step is mirroring the addition into canvas/src/lib/ws-events.ts so
|
||||
// canvas consumers can switch on it.
|
||||
//
|
||||
// Keep in lexicographic order so the snapshot diff is stable on
|
||||
// renames and the parity-with-TS comparison is order-independent.
|
||||
var AllEventTypes = []EventType{
|
||||
EventA2AResponse,
|
||||
EventActivityLogged,
|
||||
EventAgentAssigned,
|
||||
EventAgentCardUpdated,
|
||||
EventAgentMessage,
|
||||
EventAgentMoved,
|
||||
EventAgentRemoved,
|
||||
EventAgentReplaced,
|
||||
EventApprovalEscalated,
|
||||
EventApprovalRequested,
|
||||
EventChannelMessage,
|
||||
EventCronExecuted,
|
||||
EventCronSkipped,
|
||||
EventDelegationComplete,
|
||||
EventDelegationFailed,
|
||||
EventDelegationSent,
|
||||
EventDelegationStatus,
|
||||
EventExternalCredentialsRotated,
|
||||
EventTaskUpdated,
|
||||
EventWorkspaceAwaitingAgent,
|
||||
EventWorkspaceDegraded,
|
||||
EventWorkspaceHeartbeat,
|
||||
EventWorkspaceHibernated,
|
||||
EventWorkspaceOffline,
|
||||
EventWorkspaceOnline,
|
||||
EventWorkspacePaused,
|
||||
EventWorkspaceProvisionFailed,
|
||||
EventWorkspaceProvisioning,
|
||||
EventWorkspaceRemoved,
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
package events
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestAllEventTypes_IsSnapshot pins the canonical event taxonomy.
|
||||
// Adding a new constant in types.go without updating AllEventTypes
|
||||
// (or vice versa) fails this test.
|
||||
//
|
||||
// The snapshot is also the authoritative input to the canvas-side
|
||||
// parity gate (PR-B-2 follow-up): the TypeScript union members in
|
||||
// canvas/src/lib/ws-events.ts MUST match this list exactly. A drift
|
||||
// gate at CI time will assert set equality once the TS file lands.
|
||||
func TestAllEventTypes_IsSnapshot(t *testing.T) {
|
||||
// Every named constant must appear in AllEventTypes. Walk via
|
||||
// reflection over the package-level vars would over-include test
|
||||
// fixtures, so list the canonical names here. When a constant
|
||||
// is added in types.go, append the EventType's literal value
|
||||
// to the expected list below — the failure message names
|
||||
// exactly what's missing so the diff is one-line obvious.
|
||||
expected := []string{
|
||||
"A2A_RESPONSE",
|
||||
"ACTIVITY_LOGGED",
|
||||
"AGENT_ASSIGNED",
|
||||
"AGENT_CARD_UPDATED",
|
||||
"AGENT_MESSAGE",
|
||||
"AGENT_MOVED",
|
||||
"AGENT_REMOVED",
|
||||
"AGENT_REPLACED",
|
||||
"APPROVAL_ESCALATED",
|
||||
"APPROVAL_REQUESTED",
|
||||
"CHANNEL_MESSAGE",
|
||||
"CRON_EXECUTED",
|
||||
"CRON_SKIPPED",
|
||||
"DELEGATION_COMPLETE",
|
||||
"DELEGATION_FAILED",
|
||||
"DELEGATION_SENT",
|
||||
"DELEGATION_STATUS",
|
||||
"EXTERNAL_CREDENTIALS_ROTATED",
|
||||
"TASK_UPDATED",
|
||||
"WORKSPACE_AWAITING_AGENT",
|
||||
"WORKSPACE_DEGRADED",
|
||||
"WORKSPACE_HEARTBEAT",
|
||||
"WORKSPACE_HIBERNATED",
|
||||
"WORKSPACE_OFFLINE",
|
||||
"WORKSPACE_ONLINE",
|
||||
"WORKSPACE_PAUSED",
|
||||
"WORKSPACE_PROVISIONING",
|
||||
"WORKSPACE_PROVISION_FAILED",
|
||||
"WORKSPACE_REMOVED",
|
||||
}
|
||||
sort.Strings(expected)
|
||||
|
||||
actual := make([]string, 0, len(AllEventTypes))
|
||||
for _, e := range AllEventTypes {
|
||||
actual = append(actual, string(e))
|
||||
}
|
||||
sort.Strings(actual)
|
||||
|
||||
if len(actual) != len(expected) {
|
||||
t.Errorf("AllEventTypes count = %d, want %d\nactual: %s\nexpected: %s",
|
||||
len(actual), len(expected),
|
||||
strings.Join(actual, ", "),
|
||||
strings.Join(expected, ", "))
|
||||
return
|
||||
}
|
||||
for i, want := range expected {
|
||||
if actual[i] != want {
|
||||
t.Errorf("AllEventTypes[%d] = %q, want %q (full diff:\n actual: %v\n expected: %v\n)",
|
||||
i, actual[i], want, actual, expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestEventType_NoEmptyConstants pins that no constant declared in
|
||||
// types.go has an accidentally-empty value. The catch is the
|
||||
// "WORKSPACE_X" → forgot-to-fill pattern: a typo in the literal
|
||||
// would surface as the empty string, and broadcast pipelines would
|
||||
// silently filter empty-name events without any error signal.
|
||||
func TestEventType_NoEmptyConstants(t *testing.T) {
|
||||
for _, e := range AllEventTypes {
|
||||
if string(e) == "" {
|
||||
t.Errorf("found empty EventType in AllEventTypes — typo in types.go?")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestEventType_AllUppercaseSnakeCase pins the wire format. Mixed
|
||||
// case or kebab-case would break the canvas TypeScript switch
|
||||
// statements (every consumer's `case "AGENT_MESSAGE":` is upper-
|
||||
// snake). The check is the catch for an accidental
|
||||
// `"agent_message"` typo that wouldn't fail the snapshot gate.
|
||||
func TestEventType_AllUppercaseSnakeCase(t *testing.T) {
|
||||
for _, e := range AllEventTypes {
|
||||
s := string(e)
|
||||
// Allowed chars: A-Z, 0-9, _ — nothing else, no leading/
|
||||
// trailing underscores, no consecutive underscores.
|
||||
if s != strings.ToUpper(s) {
|
||||
t.Errorf("EventType %q is not all-uppercase — wire format requires upper-snake", s)
|
||||
}
|
||||
if strings.HasPrefix(s, "_") || strings.HasSuffix(s, "_") {
|
||||
t.Errorf("EventType %q has leading/trailing underscore — disallowed", s)
|
||||
}
|
||||
if strings.Contains(s, "__") {
|
||||
t.Errorf("EventType %q has consecutive underscores — disallowed", s)
|
||||
}
|
||||
for _, r := range s {
|
||||
if !((r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_') {
|
||||
t.Errorf("EventType %q contains disallowed char %q", s, r)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -465,78 +465,30 @@ func (h *ActivityHandler) Notify(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// Verify workspace exists
|
||||
var wsName string
|
||||
err := db.DB.QueryRowContext(c.Request.Context(),
|
||||
`SELECT name FROM workspaces WHERE id = $1 AND status != 'removed'`, workspaceID,
|
||||
).Scan(&wsName)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return
|
||||
// Single source of truth for chat-bearing agent → user messages —
|
||||
// see agent_message_writer.go for the contract. Pre-RFC-#2945, the
|
||||
// broadcast + INSERT pair was inlined here and again in
|
||||
// mcp_tools.go's send_message_to_user, and the duplication is what
|
||||
// produced the reno-stars data-loss regression. Both paths now
|
||||
// route through the same writer; future channels (Slack, Discord,
|
||||
// Lark) hook in here too.
|
||||
attachments := make([]AgentMessageAttachment, 0, len(body.Attachments))
|
||||
for _, a := range body.Attachments {
|
||||
attachments = append(attachments, AgentMessageAttachment{
|
||||
URI: a.URI,
|
||||
Name: a.Name,
|
||||
MimeType: a.MimeType,
|
||||
Size: a.Size,
|
||||
})
|
||||
}
|
||||
|
||||
broadcastPayload := map[string]interface{}{
|
||||
"message": body.Message,
|
||||
"workspace_id": workspaceID,
|
||||
"name": wsName,
|
||||
}
|
||||
if len(body.Attachments) > 0 {
|
||||
broadcastPayload["attachments"] = body.Attachments
|
||||
}
|
||||
h.broadcaster.BroadcastOnly(workspaceID, "AGENT_MESSAGE", broadcastPayload)
|
||||
|
||||
// Persist to activity_logs so the chat history loader restores this
|
||||
// message after a page reload. Pre-fix, send_message_to_user pushes
|
||||
// were broadcast-only — survived the WebSocket session but vanished
|
||||
// when the user refreshed because nothing wrote them to the DB.
|
||||
//
|
||||
// Shape chosen to match the existing loader query
|
||||
// (`type=a2a_receive&source=canvas`):
|
||||
// - activity_type='a2a_receive' so it joins the same query path
|
||||
// - source_id=NULL so the canvas-source filter accepts it
|
||||
// - method='notify' to distinguish from real A2A receives in audits
|
||||
// - request_body=NULL so the loader doesn't append a duplicate
|
||||
// "user message" bubble for it
|
||||
// - response_body={"result": "<text>"} matches extractResponseText's
|
||||
// simplest branch ({result: string} → take verbatim)
|
||||
//
|
||||
// Errors are logged-only — broadcast already succeeded, the user
|
||||
// sees the message; persistence failure just means the message
|
||||
// won't survive reload (pre-fix behavior). Don't fail the whole
|
||||
// notify on a DB hiccup.
|
||||
// response_body shape — chosen to feed BOTH:
|
||||
// - extractResponseText: looks at body.result (string) and returns it
|
||||
// - extractFilesFromTask: looks at body.parts[] for kind=file
|
||||
// so a chat reload after a notify-with-attachments restores both
|
||||
// the text bubble AND the download chips.
|
||||
respPayload := map[string]interface{}{"result": body.Message}
|
||||
if len(body.Attachments) > 0 {
|
||||
fileParts := make([]map[string]interface{}, 0, len(body.Attachments))
|
||||
for _, a := range body.Attachments {
|
||||
fileMeta := map[string]interface{}{"uri": a.URI, "name": a.Name}
|
||||
if a.MimeType != "" {
|
||||
fileMeta["mimeType"] = a.MimeType
|
||||
}
|
||||
if a.Size > 0 {
|
||||
fileMeta["size"] = a.Size
|
||||
}
|
||||
fileParts = append(fileParts, map[string]interface{}{
|
||||
"kind": "file",
|
||||
"file": fileMeta,
|
||||
})
|
||||
writer := NewAgentMessageWriter(db.DB, h.broadcaster)
|
||||
if err := writer.Send(c.Request.Context(), workspaceID, body.Message, attachments); err != nil {
|
||||
if errors.Is(err, ErrWorkspaceNotFound) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return
|
||||
}
|
||||
respPayload["parts"] = fileParts
|
||||
}
|
||||
respJSON, _ := json.Marshal(respPayload)
|
||||
preview := body.Message
|
||||
if len(preview) > 80 {
|
||||
preview = preview[:80] + "…"
|
||||
}
|
||||
if _, err := db.DB.ExecContext(c.Request.Context(), `
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, method, summary, response_body, status)
|
||||
VALUES ($1, 'a2a_receive', 'notify', $2, $3::jsonb, 'ok')
|
||||
`, workspaceID, "Agent message: "+preview, string(respJSON)); err != nil {
|
||||
log.Printf("Notify: failed to persist message for %s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "internal error"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"status": "sent"})
|
||||
|
||||
@@ -0,0 +1,177 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/token"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestAgentMessageBroadcastsArePersisted is a forward-looking AST
|
||||
// gate: every function in this package that broadcasts an
|
||||
// `AGENT_MESSAGE` WebSocket event MUST also call
|
||||
// `INSERT INTO activity_logs` somewhere in its body.
|
||||
//
|
||||
// The reno-stars production data-loss bug (CEO Ryan PC's long-form
|
||||
// onboarding-friction message visible live but missing on reload)
|
||||
// happened because mcp_tools.go:toolSendMessageToUser broadcast WS
|
||||
// without a paired INSERT — while the HTTP /notify sibling DID
|
||||
// persist. The fix added the INSERT; this gate prevents the regression
|
||||
// class from re-emerging in any future chat-bearing tool.
|
||||
//
|
||||
// Why an AST gate vs a code-review checklist (per memory
|
||||
// feedback_behavior_based_ast_gates.md): "pin invariants by what a
|
||||
// function calls, not what it's named". The shape that loses data is:
|
||||
//
|
||||
// BroadcastOnly(_, "AGENT_MESSAGE", _) without an INSERT companion
|
||||
//
|
||||
// Any new tool that emits AGENT_MESSAGE must persist or the next
|
||||
// canvas refresh drops the message — same shape as reno-stars. A
|
||||
// reviewer can miss this; the AST walk can't.
|
||||
//
|
||||
// Allowlist: empty by intent. If a future use case genuinely needs
|
||||
// fire-and-forget broadcast (e.g., transient typing indicators that
|
||||
// should NOT survive reload), add an entry here AND document why.
|
||||
// "Doesn't need to persist" is rarely the right answer for chat —
|
||||
// the canvas history is the source of truth.
|
||||
func TestAgentMessageBroadcastsArePersisted(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
entries, err := os.ReadDir(wd)
|
||||
if err != nil {
|
||||
t.Fatalf("readdir %s: %v", wd, err)
|
||||
}
|
||||
|
||||
type violation struct {
|
||||
file string
|
||||
fn string
|
||||
}
|
||||
var violations []violation
|
||||
|
||||
for _, ent := range entries {
|
||||
name := ent.Name()
|
||||
if ent.IsDir() || !strings.HasSuffix(name, ".go") || strings.HasSuffix(name, "_test.go") {
|
||||
continue
|
||||
}
|
||||
path := filepath.Join(wd, name)
|
||||
fset := token.NewFileSet()
|
||||
file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse %s: %v", path, err)
|
||||
}
|
||||
|
||||
for _, decl := range file.Decls {
|
||||
fn, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fn.Body == nil {
|
||||
continue
|
||||
}
|
||||
if !funcEmitsAgentMessageBroadcast(fn) {
|
||||
continue
|
||||
}
|
||||
if !funcInsertsIntoActivityLogs(fn) {
|
||||
violations = append(violations, violation{file: name, fn: fn.Name.Name})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(violations) > 0 {
|
||||
sort.Slice(violations, func(i, j int) bool {
|
||||
if violations[i].file != violations[j].file {
|
||||
return violations[i].file < violations[j].file
|
||||
}
|
||||
return violations[i].fn < violations[j].fn
|
||||
})
|
||||
var buf strings.Builder
|
||||
for _, v := range violations {
|
||||
buf.WriteString(" - ")
|
||||
buf.WriteString(v.file)
|
||||
buf.WriteString(":")
|
||||
buf.WriteString(v.fn)
|
||||
buf.WriteString("\n")
|
||||
}
|
||||
t.Errorf(`function(s) broadcast `+"`AGENT_MESSAGE`"+` without persisting to activity_logs:
|
||||
|
||||
%s
|
||||
This is the reno-stars data-loss regression class: live message
|
||||
visible to the user, but missing on reload because activity_log was
|
||||
never written. Every chat-bearing broadcast MUST be paired with:
|
||||
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, method,
|
||||
summary, response_body, status)
|
||||
VALUES ($1, 'a2a_receive', 'notify', $2, $3::jsonb, 'ok')
|
||||
|
||||
See activity.go:Notify and mcp_tools.go:toolSendMessageToUser for
|
||||
the canonical shapes. Don't add an allowlist entry without a
|
||||
documented reason — the canvas chat history is the source of truth
|
||||
and silently dropping messages is a P0 user trust break.`,
|
||||
buf.String())
|
||||
}
|
||||
}
|
||||
|
||||
// funcEmitsAgentMessageBroadcast walks fn.Body for any CallExpr that
|
||||
// looks like `*.BroadcastOnly(_, "AGENT_MESSAGE", _)`.
|
||||
func funcEmitsAgentMessageBroadcast(fn *ast.FuncDecl) bool {
|
||||
var found bool
|
||||
ast.Inspect(fn.Body, func(n ast.Node) bool {
|
||||
call, ok := n.(*ast.CallExpr)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
sel, ok := call.Fun.(*ast.SelectorExpr)
|
||||
if !ok || sel.Sel.Name != "BroadcastOnly" {
|
||||
return true
|
||||
}
|
||||
// BroadcastOnly(workspaceID, eventType, payload) — the second
|
||||
// arg is the event name. Match by string-literal value.
|
||||
if len(call.Args) < 2 {
|
||||
return true
|
||||
}
|
||||
lit, ok := call.Args[1].(*ast.BasicLit)
|
||||
if !ok || lit.Kind != token.STRING {
|
||||
return true
|
||||
}
|
||||
raw := lit.Value
|
||||
if unq, err := strconv.Unquote(raw); err == nil {
|
||||
raw = unq
|
||||
}
|
||||
if raw == "AGENT_MESSAGE" {
|
||||
found = true
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
return found
|
||||
}
|
||||
|
||||
// funcInsertsIntoActivityLogs walks fn.Body for any STRING BasicLit
|
||||
// whose body contains `INSERT INTO activity_logs` (the SQL literal
|
||||
// passed to ExecContext). Matches the substring rather than a strict
|
||||
// regex because we don't care about the exact INSERT shape here —
|
||||
// only that the function persists. Specific shape pinning lives in
|
||||
// the per-handler test (see TestMCPHandler_SendMessageToUser_*).
|
||||
func funcInsertsIntoActivityLogs(fn *ast.FuncDecl) bool {
|
||||
var found bool
|
||||
ast.Inspect(fn.Body, func(n ast.Node) bool {
|
||||
lit, ok := n.(*ast.BasicLit)
|
||||
if !ok || lit.Kind != token.STRING {
|
||||
return true
|
||||
}
|
||||
raw := lit.Value
|
||||
if unq, err := strconv.Unquote(raw); err == nil {
|
||||
raw = unq
|
||||
}
|
||||
if strings.Contains(raw, "INSERT INTO activity_logs") {
|
||||
found = true
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
return found
|
||||
}
|
||||
@@ -0,0 +1,203 @@
|
||||
package handlers
|
||||
|
||||
// AgentMessageWriter is the SSOT for "agent → user" message delivery in the
|
||||
// workspace-server. Every chat-bearing path that surfaces a message to the
|
||||
// canvas — HTTP /notify (Notify handler), MCP tools/call
|
||||
// send_message_to_user (toolSendMessageToUser), any future channel — MUST
|
||||
// route through this writer rather than re-implement the broadcast +
|
||||
// persist contract inline.
|
||||
//
|
||||
// Why: pre-consolidation, two handlers duplicated the same "broadcast then
|
||||
// INSERT activity_logs" sequence. The reno-stars production data-loss
|
||||
// incident (2026-05-05, RFC #2945, PR #2944) was the symptom — the
|
||||
// persistence half landed for /notify but lagged for the MCP bridge by
|
||||
// months, silently dropping every long-form external-agent message until
|
||||
// reload. The AST gate from #2944 catches drift; this writer eliminates
|
||||
// the *possibility* of drift by giving both call sites a single
|
||||
// well-tested function to call.
|
||||
//
|
||||
// Contract:
|
||||
// 1. Look up the workspace by id; ErrWorkspaceNotFound on miss so the
|
||||
// caller can return 404 with a clean message.
|
||||
// 2. Broadcast a WS AGENT_MESSAGE event with {message, workspace_id,
|
||||
// name, attachments?}.
|
||||
// 3. INSERT a row into activity_logs:
|
||||
// type='a2a_receive', method='notify', source_id NULL,
|
||||
// response_body={"result": message[, "parts": [file kind...]]},
|
||||
// status='ok'
|
||||
// Best-effort — INSERT failure logs only, returns nil so the broadcast
|
||||
// success isn't undone on the caller side.
|
||||
// 4. Returns nil on success.
|
||||
//
|
||||
// The shape (especially the JSON response_body) is the wire contract the
|
||||
// canvas's chat-history hydrator (canvas/src/.../historyHydration.ts)
|
||||
// reads. Drift here silently breaks chat replay across all consumers, so
|
||||
// changes to the JSON shape MUST be cross-verified against the hydrator
|
||||
// in the same PR.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
)
|
||||
|
||||
// ErrWorkspaceNotFound is returned by AgentMessageWriter.Send when the
|
||||
// workspace lookup turns up nothing (or the workspace is in
|
||||
// status='removed'). Callers translate to HTTP 404 / JSON-RPC error /
|
||||
// whatever surface they expose. Real DB errors (connection drop, query
|
||||
// timeout) surface as wrapped errors and should be treated as 503.
|
||||
var ErrWorkspaceNotFound = errors.New("agent_message: workspace not found")
|
||||
|
||||
// truncatePreviewRunes returns at most maxRunes runes of s, plus an ellipsis
|
||||
// when truncated. Operates on the rune (codepoint) boundary instead of
|
||||
// byte indices — the previous byte-slice version produced invalid UTF-8
|
||||
// when maxRunes landed mid-codepoint (CJK, emoji, accented characters
|
||||
// in agent-authored chat messages), and Postgres JSONB rejects invalid
|
||||
// UTF-8, dropping the activity_log INSERT silently. The persistence
|
||||
// failure log fires but the message vanishes from chat history — the
|
||||
// exact regression class the SSOT consolidation was built to prevent.
|
||||
//
|
||||
// maxRunes is in runes, not bytes — `truncatePreviewRunes("你好", 1)` returns
|
||||
// `"你…"`, not `"\xe4…"`. Set the cap on a UI-friendly basis (visible
|
||||
// character count, not stored byte count); 80 runes covers the
|
||||
// activity_logs.summary column comfortably.
|
||||
func truncatePreviewRunes(s string, maxRunes int) string {
|
||||
if utf8.RuneCountInString(s) <= maxRunes {
|
||||
return s
|
||||
}
|
||||
// Walk runes until we've consumed maxRunes; cut at that byte index.
|
||||
count := 0
|
||||
cut := len(s)
|
||||
for i := range s {
|
||||
if count == maxRunes {
|
||||
cut = i
|
||||
break
|
||||
}
|
||||
count++
|
||||
}
|
||||
return s[:cut] + "…"
|
||||
}
|
||||
|
||||
// AgentMessageAttachment is one file attached to an agent → user
|
||||
// message. Identical to handlers.NotifyAttachment in field set; kept
|
||||
// distinct so the writer's API doesn't import a handler type with HTTP
|
||||
// binding tags.
|
||||
type AgentMessageAttachment struct {
|
||||
URI string
|
||||
Name string
|
||||
MimeType string
|
||||
Size int64
|
||||
}
|
||||
|
||||
// AgentMessageWriter persists + broadcasts agent → user messages. Construct
|
||||
// once per process via NewAgentMessageWriter; pass the same instance to
|
||||
// every handler that delivers chat (Notify, toolSendMessageToUser, etc.).
|
||||
//
|
||||
// Takes events.EventEmitter (not the *Broadcaster concrete type) so tests
|
||||
// can substitute a fake emitter and producers in other packages can wrap
|
||||
// the real broadcaster behind their own metrics / retries without leaking
|
||||
// the concrete dependency.
|
||||
type AgentMessageWriter struct {
|
||||
db *sql.DB
|
||||
broadcaster events.EventEmitter
|
||||
}
|
||||
|
||||
// NewAgentMessageWriter binds the writer to the platform's DB pool +
|
||||
// WebSocket broadcaster.
|
||||
func NewAgentMessageWriter(db *sql.DB, broadcaster events.EventEmitter) *AgentMessageWriter {
|
||||
return &AgentMessageWriter{db: db, broadcaster: broadcaster}
|
||||
}
|
||||
|
||||
// Send delivers a single agent → user message. Look up + broadcast +
|
||||
// persist in that order; ErrWorkspaceNotFound short-circuits before any
|
||||
// broadcast or DB write so callers can 404 cleanly.
|
||||
//
|
||||
// Returns nil on success — including on DB-INSERT failure (the broadcast
|
||||
// already returned successfully and the user has seen the message; the
|
||||
// persistence-failure mode is logged at WARN but the caller's response
|
||||
// stays 200 so the agent doesn't retry and double-broadcast).
|
||||
func (w *AgentMessageWriter) Send(
|
||||
ctx context.Context,
|
||||
workspaceID, message string,
|
||||
attachments []AgentMessageAttachment,
|
||||
) error {
|
||||
// 1. Workspace lookup. status='removed' filter is the same shape /notify
|
||||
// used pre-consolidation; deleted workspaces don't get notifications.
|
||||
//
|
||||
// Distinguish sql.ErrNoRows ("workspace genuinely not present" — caller
|
||||
// should 404) from real DB errors (connection drop, statement timeout,
|
||||
// pool exhaustion — caller should 503). Pre-fix this branch returned
|
||||
// ErrWorkspaceNotFound for any error, so during a DB outage every
|
||||
// notify call surfaced as "workspace not found" and masked real
|
||||
// incidents in the alert path.
|
||||
var wsName string
|
||||
err := w.db.QueryRowContext(ctx,
|
||||
`SELECT name FROM workspaces WHERE id = $1 AND status != 'removed'`,
|
||||
workspaceID,
|
||||
).Scan(&wsName)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return ErrWorkspaceNotFound
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("agent_message: workspace lookup: %w", err)
|
||||
}
|
||||
|
||||
// 2. Build broadcast payload + WS-emit. Same shape that ChatTab's
|
||||
// AGENT_MESSAGE handler in canvas/src/store/canvas-events.ts has
|
||||
// consumed since the canvas chat shipped — drift here would orphan
|
||||
// every live chat panel.
|
||||
broadcastPayload := map[string]interface{}{
|
||||
"message": message,
|
||||
"workspace_id": workspaceID,
|
||||
"name": wsName,
|
||||
}
|
||||
if len(attachments) > 0 {
|
||||
broadcastPayload["attachments"] = attachments
|
||||
}
|
||||
w.broadcaster.BroadcastOnly(workspaceID, string(events.EventAgentMessage), broadcastPayload)
|
||||
|
||||
// 3. Persist for chat-history hydration. response_body shape MUST stay
|
||||
// in sync with extractResponseText + extractFilesFromTask in
|
||||
// canvas/src/components/tabs/chat/historyHydration.ts:
|
||||
// - extractResponseText reads body.result (string) → renders text
|
||||
// - extractFilesFromTask reads body.parts[] (kind=file) → renders chips
|
||||
respPayload := map[string]interface{}{"result": message}
|
||||
if len(attachments) > 0 {
|
||||
fileParts := make([]map[string]interface{}, 0, len(attachments))
|
||||
for _, a := range attachments {
|
||||
fileMeta := map[string]interface{}{"uri": a.URI, "name": a.Name}
|
||||
if a.MimeType != "" {
|
||||
fileMeta["mimeType"] = a.MimeType
|
||||
}
|
||||
if a.Size > 0 {
|
||||
fileMeta["size"] = a.Size
|
||||
}
|
||||
fileParts = append(fileParts, map[string]interface{}{
|
||||
"kind": "file",
|
||||
"file": fileMeta,
|
||||
})
|
||||
}
|
||||
respPayload["parts"] = fileParts
|
||||
}
|
||||
respJSON, _ := json.Marshal(respPayload)
|
||||
preview := truncatePreviewRunes(message, 80)
|
||||
if _, err := w.db.ExecContext(ctx, `
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, method, summary, response_body, status)
|
||||
VALUES ($1, 'a2a_receive', 'notify', $2, $3::jsonb, 'ok')
|
||||
`, workspaceID, "Agent message: "+preview, string(respJSON)); err != nil {
|
||||
// Best-effort: the broadcast already returned ok and the user
|
||||
// has seen the message. Logging a structured line lets operators
|
||||
// notice persistence-failure rates spike if the DB is unhealthy,
|
||||
// without breaking the tool response or causing the agent to
|
||||
// retry-and-double-broadcast.
|
||||
log.Printf("agent_message: failed to persist for %s: %v", workspaceID, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,448 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql/driver"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
)
|
||||
|
||||
// AgentMessageWriter is the SSOT for agent → user chat delivery
|
||||
// (RFC #2945 PR-A). These tests pin the contract the writer
|
||||
// guarantees: workspace lookup, broadcast, INSERT, error semantics —
|
||||
// every shape that producers (Notify, toolSendMessageToUser, future
|
||||
// channels) rely on.
|
||||
//
|
||||
// Pre-consolidation, the broadcast-then-INSERT logic was duplicated
|
||||
// across two handlers and they drifted (reno-stars, 2026-05-05). With
|
||||
// the writer being the only place this logic lives, these tests are
|
||||
// the regression line for every chat-bearing path simultaneously.
|
||||
|
||||
// jsonMatcher is a sqlmock Argument matcher that decodes the actual
|
||||
// SQL arg as JSON and runs a caller-supplied predicate over the
|
||||
// resulting structure. Tighter than substring matching (which can
|
||||
// false-pass on a renamed key) and tolerant of map-key ordering
|
||||
// (which exact-string matching is not).
|
||||
type jsonMatcher struct {
|
||||
predicate func(parsed map[string]any) bool
|
||||
desc string
|
||||
}
|
||||
|
||||
func (m jsonMatcher) Match(v driver.Value) bool {
|
||||
s, ok := v.(string)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal([]byte(s), &parsed); err != nil {
|
||||
return false
|
||||
}
|
||||
return m.predicate(parsed)
|
||||
}
|
||||
|
||||
// stringMatcher pins exact prefix/suffix/equality checks against a
|
||||
// driver.Value that's actually a string.
|
||||
type stringMatcher func(string) bool
|
||||
|
||||
func (f stringMatcher) Match(v driver.Value) bool {
|
||||
s, ok := v.(string)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return f(s)
|
||||
}
|
||||
|
||||
// capturingEmitter records every BroadcastOnly call so tests can pin
|
||||
// the WS event shape without a real ws.Hub. RecordAndBroadcast is
|
||||
// also captured for completeness — the writer doesn't call it today,
|
||||
// but a future producer might, and a captured-but-unasserted record
|
||||
// is easier to diagnose than a nil panic.
|
||||
type capturingEmitter struct {
|
||||
events []capturedEvent
|
||||
}
|
||||
|
||||
type capturedEvent struct {
|
||||
workspaceID string
|
||||
eventType string
|
||||
payload interface{}
|
||||
}
|
||||
|
||||
func (c *capturingEmitter) BroadcastOnly(workspaceID string, eventType string, payload interface{}) {
|
||||
c.events = append(c.events, capturedEvent{workspaceID, eventType, payload})
|
||||
}
|
||||
|
||||
func (c *capturingEmitter) RecordAndBroadcast(_ context.Context, eventType string, workspaceID string, payload interface{}) error {
|
||||
c.events = append(c.events, capturedEvent{workspaceID, eventType, payload})
|
||||
return nil
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_Success_NoAttachments pins the happy
|
||||
// path: workspace lookup, broadcast, INSERT, return nil.
|
||||
func TestAgentMessageWriter_Send_Success_NoAttachments(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
w := NewAgentMessageWriter(db.DB, newTestBroadcaster())
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-1").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("CEO Ryan PC"))
|
||||
|
||||
mock.ExpectExec(`INSERT INTO activity_logs.*'a2a_receive'.*'notify'`).
|
||||
WithArgs(
|
||||
"ws-1",
|
||||
sqlmock.AnyArg(), // summary
|
||||
`{"result":"hi"}`,
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
if err := w.Send(context.Background(), "ws-1", "hi", nil); err != nil {
|
||||
t.Fatalf("Send returned %v, want nil", err)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("DB expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_Success_WithAttachments pins the file
|
||||
// attachment shape — response_body MUST contain a parts[] array with
|
||||
// kind=file entries so the canvas hydrater renders download chips.
|
||||
// Drift here = chips disappear on chat reload.
|
||||
func TestAgentMessageWriter_Send_Success_WithAttachments(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
w := NewAgentMessageWriter(db.DB, newTestBroadcaster())
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-att").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Ryan"))
|
||||
|
||||
mock.ExpectExec(`INSERT INTO activity_logs.*'a2a_receive'.*'notify'`).
|
||||
WithArgs(
|
||||
"ws-att",
|
||||
sqlmock.AnyArg(),
|
||||
jsonMatcher{
|
||||
desc: "response_body has result + parts with kind=file metadata",
|
||||
predicate: func(p map[string]any) bool {
|
||||
if p["result"] != "see attached" {
|
||||
return false
|
||||
}
|
||||
parts, ok := p["parts"].([]any)
|
||||
if !ok || len(parts) != 1 {
|
||||
return false
|
||||
}
|
||||
part, ok := parts[0].(map[string]any)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
if part["kind"] != "file" {
|
||||
return false
|
||||
}
|
||||
file, ok := part["file"].(map[string]any)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return file["uri"] == "workspace://x.zip" &&
|
||||
file["name"] == "x.zip" &&
|
||||
file["mimeType"] == "application/zip" &&
|
||||
file["size"].(float64) == 1234
|
||||
},
|
||||
},
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
atts := []AgentMessageAttachment{
|
||||
{URI: "workspace://x.zip", Name: "x.zip", MimeType: "application/zip", Size: 1234},
|
||||
}
|
||||
if err := w.Send(context.Background(), "ws-att", "see attached", atts); err != nil {
|
||||
t.Fatalf("Send returned %v, want nil", err)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("DB expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_WorkspaceNotFound pins ErrWorkspaceNotFound
|
||||
// short-circuit. Must NOT broadcast, MUST NOT INSERT — caller will 404
|
||||
// or surface a JSON-RPC error.
|
||||
func TestAgentMessageWriter_Send_WorkspaceNotFound(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
emitter := &capturingEmitter{}
|
||||
w := NewAgentMessageWriter(db.DB, emitter)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-missing").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}))
|
||||
|
||||
err := w.Send(context.Background(), "ws-missing", "lost in the void", nil)
|
||||
if !errors.Is(err, ErrWorkspaceNotFound) {
|
||||
t.Errorf("Send returned %v, want ErrWorkspaceNotFound", err)
|
||||
}
|
||||
if len(emitter.events) != 0 {
|
||||
t.Errorf("workspace-not-found path MUST NOT broadcast, got %d events", len(emitter.events))
|
||||
}
|
||||
// Implicit: no INSERT expectation registered, so a stray INSERT
|
||||
// would fail ExpectationsWereMet.
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("DB expectations (INSERT must NOT fire on workspace-not-found): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_DBInsertFailureStillReturnsNil pins the
|
||||
// "best-effort persistence" contract: when the activity_log INSERT
|
||||
// fails (DB hiccup, transient connection, constraint), the writer
|
||||
// MUST still return nil. The broadcast already succeeded; the user
|
||||
// has seen the message; returning an error here would cause the
|
||||
// caller (and the agent calling the tool) to retry and double-
|
||||
// broadcast.
|
||||
func TestAgentMessageWriter_Send_DBInsertFailureStillReturnsNil(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
w := NewAgentMessageWriter(db.DB, newTestBroadcaster())
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-dbfail").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("CEO Ryan PC"))
|
||||
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WillReturnError(errors.New("transient db error"))
|
||||
|
||||
err := w.Send(context.Background(), "ws-dbfail", "should not be lost from live chat", nil)
|
||||
if err != nil {
|
||||
t.Errorf("DB INSERT failure must return nil (broadcast already succeeded), got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_PreviewTruncation pins the summary
|
||||
// preview cap. Long messages (Ryan's onboarding-friction report was
|
||||
// ~2k chars) must summarise to ≤80 chars + ellipsis so the activity
|
||||
// table doesn't carry multi-KB summaries that bloat list queries.
|
||||
func TestAgentMessageWriter_Send_PreviewTruncation(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
w := NewAgentMessageWriter(db.DB, newTestBroadcaster())
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-trunc").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Ryan"))
|
||||
|
||||
longMsg := strings.Repeat("x", 200)
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WithArgs(
|
||||
"ws-trunc",
|
||||
stringMatcher(func(s string) bool {
|
||||
if !strings.HasPrefix(s, "Agent message: ") {
|
||||
return false
|
||||
}
|
||||
preview := strings.TrimPrefix(s, "Agent message: ")
|
||||
if !strings.HasSuffix(preview, "…") {
|
||||
return false
|
||||
}
|
||||
body := strings.TrimSuffix(preview, "…")
|
||||
return len(body) == 80
|
||||
}),
|
||||
sqlmock.AnyArg(),
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
if err := w.Send(context.Background(), "ws-trunc", longMsg, nil); err != nil {
|
||||
t.Fatalf("Send: %v", err)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("preview truncation drift: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_BroadcastsAgentMessageEvent pins the
|
||||
// WS event name + payload shape. The canvas's
|
||||
// canvas-events.ts:AGENT_MESSAGE handler reads {message, workspace_id,
|
||||
// name, attachments?} — drift here orphans every live chat panel.
|
||||
func TestAgentMessageWriter_Send_BroadcastsAgentMessageEvent(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
emitter := &capturingEmitter{}
|
||||
w := NewAgentMessageWriter(db.DB, emitter)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-bc").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Workspace Name"))
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
atts := []AgentMessageAttachment{
|
||||
{URI: "workspace://a.txt", Name: "a.txt"},
|
||||
}
|
||||
if err := w.Send(context.Background(), "ws-bc", "hi", atts); err != nil {
|
||||
t.Fatalf("Send: %v", err)
|
||||
}
|
||||
|
||||
if len(emitter.events) != 1 {
|
||||
t.Fatalf("expected exactly 1 broadcast, got %d", len(emitter.events))
|
||||
}
|
||||
ev := emitter.events[0]
|
||||
if ev.eventType != "AGENT_MESSAGE" {
|
||||
t.Errorf("event type = %q, want AGENT_MESSAGE", ev.eventType)
|
||||
}
|
||||
if ev.workspaceID != "ws-bc" {
|
||||
t.Errorf("workspace_id = %q, want ws-bc", ev.workspaceID)
|
||||
}
|
||||
pl, ok := ev.payload.(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("payload not a map: %T", ev.payload)
|
||||
}
|
||||
if pl["message"] != "hi" {
|
||||
t.Errorf("payload.message = %v, want hi", pl["message"])
|
||||
}
|
||||
if pl["workspace_id"] != "ws-bc" {
|
||||
t.Errorf("payload.workspace_id = %v, want ws-bc", pl["workspace_id"])
|
||||
}
|
||||
if pl["name"] != "Workspace Name" {
|
||||
t.Errorf("payload.name = %v, want Workspace Name", pl["name"])
|
||||
}
|
||||
if pl["attachments"] == nil {
|
||||
t.Error("payload.attachments missing on attachment-bearing send")
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_DBErrorOnLookupReturnsWrapped pins the
|
||||
// distinction between sql.ErrNoRows (legit not-found → 404) and real
|
||||
// DB errors (connection drop → 503). Pre-followup the lookup branch
|
||||
// returned ErrWorkspaceNotFound for ANY error, so during a DB outage
|
||||
// every notify call surfaced as "workspace not found" and masked
|
||||
// real incidents in alerting.
|
||||
func TestAgentMessageWriter_Send_DBErrorOnLookupReturnsWrapped(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
w := NewAgentMessageWriter(db.DB, newTestBroadcaster())
|
||||
|
||||
transientErr := errors.New("connection refused")
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-dbdown").
|
||||
WillReturnError(transientErr)
|
||||
|
||||
err := w.Send(context.Background(), "ws-dbdown", "hi", nil)
|
||||
if err == nil {
|
||||
t.Fatal("expected wrapped DB error, got nil")
|
||||
}
|
||||
if errors.Is(err, ErrWorkspaceNotFound) {
|
||||
t.Errorf("DB outage MUST NOT surface as ErrWorkspaceNotFound (masks incidents in alerting); got %v", err)
|
||||
}
|
||||
if !errors.Is(err, transientErr) {
|
||||
t.Errorf("expected wrapped %v, got %v", transientErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTruncatePreviewRunes_RuneBoundary pins the multi-byte-safe
|
||||
// truncation. The previous byte-slice version produced invalid UTF-8
|
||||
// when the cut landed mid-codepoint (CJK, emoji, accented), and
|
||||
// Postgres JSONB rejects invalid UTF-8 — INSERT fails, log.Printf
|
||||
// fires, message vanishes from chat history. Per memory
|
||||
// feedback_assert_exact_not_substring.md, pin the boundary cases
|
||||
// directly.
|
||||
func TestTruncatePreviewRunes_RuneBoundary(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
in string
|
||||
max int
|
||||
want string
|
||||
}{
|
||||
{"under-max ASCII", "hi", 80, "hi"},
|
||||
{"under-max CJK", "你好", 80, "你好"},
|
||||
{"exactly-at-max", "abcde", 5, "abcde"},
|
||||
{"truncate ASCII", "abcdefghij", 5, "abcde…"},
|
||||
{"truncate CJK at rune boundary", "你好世界你好世界", 4, "你好世界…"},
|
||||
{"truncate emoji at rune boundary", "😀😀😀😀😀😀", 3, "😀😀😀…"},
|
||||
// The pre-fix bug shape: byte-slice on non-ASCII would have
|
||||
// mangled the codepoint here. With rune-boundary truncation
|
||||
// the result is well-formed UTF-8.
|
||||
{"non-zero with emoji prefix", "🚀abcdefghijk", 5, "🚀abcd…"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
got := truncatePreviewRunes(c.in, c.max)
|
||||
if got != c.want {
|
||||
t.Errorf("truncatePreviewRunes(%q, %d) = %q, want %q", c.in, c.max, got, c.want)
|
||||
}
|
||||
// Always-valid UTF-8 invariant. A byte-slice truncation
|
||||
// could leave partial codepoints; this version must not.
|
||||
if !utf8.ValidString(got) {
|
||||
t.Errorf("truncatePreviewRunes(%q, %d) returned invalid UTF-8: %q", c.in, c.max, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_NonASCIIMessagePersists pins the end-to-end
|
||||
// path for non-ASCII messages — the original reno-stars regression
|
||||
// surfaced via byte-slice truncation breaking JSONB INSERT. Every
|
||||
// handler-level test had ASCII content, so this branch had no
|
||||
// coverage. Now it does.
|
||||
func TestAgentMessageWriter_Send_NonASCIIMessagePersists(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
w := NewAgentMessageWriter(db.DB, newTestBroadcaster())
|
||||
|
||||
// 200-rune CJK message — exceeds the 80-rune cap, would have hit
|
||||
// the byte-slice bug.
|
||||
msg := strings.Repeat("你", 200)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-cjk").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("CEO Ryan PC"))
|
||||
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WithArgs(
|
||||
"ws-cjk",
|
||||
stringMatcher(func(s string) bool {
|
||||
if !strings.HasPrefix(s, "Agent message: ") {
|
||||
return false
|
||||
}
|
||||
preview := strings.TrimPrefix(s, "Agent message: ")
|
||||
if !strings.HasSuffix(preview, "…") {
|
||||
return false
|
||||
}
|
||||
body := strings.TrimSuffix(preview, "…")
|
||||
// 80 runes of 你 = 80 codepoints. Each is 3 bytes UTF-8.
|
||||
if utf8.RuneCountInString(body) != 80 {
|
||||
return false
|
||||
}
|
||||
// MUST be valid UTF-8 — pre-fix byte-slice would have
|
||||
// returned half a codepoint here.
|
||||
return utf8.ValidString(body)
|
||||
}),
|
||||
sqlmock.AnyArg(),
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
if err := w.Send(context.Background(), "ws-cjk", msg, nil); err != nil {
|
||||
t.Fatalf("Send: %v", err)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("non-ASCII path drift: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_OmitsAttachmentsKeyWhenEmpty pins the
|
||||
// "no key when nil" wire contract — extra empty fields would force
|
||||
// canvas consumers to defensively check for [] vs undefined; the
|
||||
// existing AGENT_MESSAGE handler treats absence as "no attachments".
|
||||
func TestAgentMessageWriter_Send_OmitsAttachmentsKeyWhenEmpty(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
emitter := &capturingEmitter{}
|
||||
w := NewAgentMessageWriter(db.DB, emitter)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-noatt").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("X"))
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
if err := w.Send(context.Background(), "ws-noatt", "plain text", nil); err != nil {
|
||||
t.Fatalf("Send: %v", err)
|
||||
}
|
||||
if len(emitter.events) != 1 {
|
||||
t.Fatalf("expected 1 event, got %d", len(emitter.events))
|
||||
}
|
||||
pl := emitter.events[0].payload.(map[string]interface{})
|
||||
if _, present := pl["attachments"]; present {
|
||||
t.Errorf("attachments key MUST NOT be present when empty (canvas treats absence as 'none'); payload=%v", pl)
|
||||
}
|
||||
}
|
||||
@@ -600,14 +600,21 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w
|
||||
return
|
||||
}
|
||||
|
||||
out := make([]uploadedFile, 0, len(headers))
|
||||
// Phase 1: pre-validate + read every part BEFORE any DB write.
|
||||
// A multi-file upload must commit all-or-nothing; a per-file
|
||||
// failure halfway through used to leave rows 1..K-1 in the table
|
||||
// while the client got a 500 and retried the whole batch — duplicate
|
||||
// rows, orphan activity rows. Validating up-front + atomic PutBatch
|
||||
// closes that gap.
|
||||
type prepped struct {
|
||||
Sanitized string
|
||||
Mimetype string
|
||||
Content []byte
|
||||
Original string // original (unsanitized) filename for error messages
|
||||
}
|
||||
prepReady := make([]prepped, 0, len(headers))
|
||||
items := make([]pendinguploads.PutItem, 0, len(headers))
|
||||
for _, fh := range headers {
|
||||
// Read full content. Per-file cap enforced post-read so an
|
||||
// oversized file fails with a clean 413 rather than a torn
|
||||
// stream. The +1 byte ReadAll trick that the Python side
|
||||
// uses isn't easy through multipart.FileHeader; instead we
|
||||
// rely on the multipart layer's ContentLength header and
|
||||
// short-circuit before opening the part.
|
||||
if fh.Size > pendinguploads.MaxFileBytes {
|
||||
log.Printf("chat_files uploadPollMode: per-file cap exceeded for %s: %s (%d bytes)",
|
||||
workspaceID, fh.Filename, fh.Size)
|
||||
@@ -621,45 +628,67 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w
|
||||
}
|
||||
content, err := readMultipartFile(fh)
|
||||
if err != nil {
|
||||
log.Printf("chat_files uploadPollMode: read part failed for %s/%s: %v", workspaceID, fh.Filename, err)
|
||||
log.Printf("chat_files uploadPollMode: read part failed for %s/%s: %v",
|
||||
workspaceID, fh.Filename, err)
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "could not read file part"})
|
||||
return
|
||||
}
|
||||
|
||||
sanitized := SanitizeFilename(fh.Filename)
|
||||
mimetype := fh.Header.Get("Content-Type")
|
||||
|
||||
fileID, err := h.pendingUploads.Put(ctx, wsUUID, content, sanitized, mimetype)
|
||||
if err != nil {
|
||||
if errors.Is(err, pendinguploads.ErrTooLarge) {
|
||||
// Belt + suspenders: the size check above already
|
||||
// caught this, but Storage.Put re-validates so a
|
||||
// malformed FileHeader can't slip through. 413 with
|
||||
// the same shape so the client sees one error class.
|
||||
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
|
||||
"error": "file exceeds per-file cap",
|
||||
"filename": fh.Filename,
|
||||
"size": len(content),
|
||||
"max": pendinguploads.MaxFileBytes,
|
||||
})
|
||||
return
|
||||
}
|
||||
log.Printf("chat_files uploadPollMode: storage.Put failed for %s/%s: %v",
|
||||
workspaceID, sanitized, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "could not stage file"})
|
||||
// Belt-and-braces post-read cap (multipart.FileHeader.Size can lie
|
||||
// on some clients that don't set Content-Length per part).
|
||||
if len(content) > pendinguploads.MaxFileBytes {
|
||||
log.Printf("chat_files uploadPollMode: per-file cap exceeded post-read for %s: %s (%d bytes)",
|
||||
workspaceID, fh.Filename, len(content))
|
||||
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
|
||||
"error": "file exceeds per-file cap",
|
||||
"filename": fh.Filename,
|
||||
"size": len(content),
|
||||
"max": pendinguploads.MaxFileBytes,
|
||||
})
|
||||
return
|
||||
}
|
||||
sanitized := SanitizeFilename(fh.Filename)
|
||||
mimetype := safeMimetype(fh.Header.Get("Content-Type"))
|
||||
prepReady = append(prepReady, prepped{
|
||||
Sanitized: sanitized, Mimetype: mimetype, Content: content, Original: fh.Filename,
|
||||
})
|
||||
items = append(items, pendinguploads.PutItem{
|
||||
Content: content, Filename: sanitized, Mimetype: mimetype,
|
||||
})
|
||||
}
|
||||
|
||||
// Activity row so the workspace's inbox poller picks this up
|
||||
// on its next cycle. activity_type=a2a_receive (NOT a new
|
||||
// type) so the existing poll filter
|
||||
// `?type=a2a_receive` catches it without poll-side changes;
|
||||
// method=chat_upload_receive is the discriminator the
|
||||
// workspace's adapter (Phase 2) uses to route to the upload
|
||||
// fetcher instead of the agent's message handler. Same
|
||||
// shape as A2A's tasks/send vs message/send method split.
|
||||
// Phase 2: atomic batch insert. On failure no rows commit.
|
||||
fileIDs, err := h.pendingUploads.PutBatch(ctx, wsUUID, items)
|
||||
if err != nil {
|
||||
if errors.Is(err, pendinguploads.ErrTooLarge) {
|
||||
// Belt + suspenders: pre-validation above already caught
|
||||
// this; surface a clean 413 if a malformed FileHeader
|
||||
// somehow slipped through.
|
||||
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
|
||||
"error": "one or more files exceed per-file cap",
|
||||
"max": pendinguploads.MaxFileBytes,
|
||||
})
|
||||
return
|
||||
}
|
||||
log.Printf("chat_files uploadPollMode: storage.PutBatch failed for %s: %v",
|
||||
workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "could not stage files"})
|
||||
return
|
||||
}
|
||||
|
||||
// Phase 3: write per-file activity rows and build the response. Activity
|
||||
// rows are written individually (not part of the same Tx as PutBatch)
|
||||
// because LogActivity is shared across many handlers and threading the
|
||||
// Tx through would be a bigger refactor. The trade-off: if an activity
|
||||
// write fails after the PutBatch commits, the pending_uploads rows
|
||||
// orphan until the 24h TTL — significantly better than the previous
|
||||
// "every multi-file upload could orphan" behavior, and the workspace's
|
||||
// fetcher handles soft-404 cleanly when activity rows reference a row
|
||||
// the platform later expired.
|
||||
out := make([]uploadedFile, 0, len(prepReady))
|
||||
for i, p := range prepReady {
|
||||
fileID := fileIDs[i]
|
||||
uri := fmt.Sprintf("platform-pending:%s/%s", workspaceID, fileID)
|
||||
summary := "chat_upload_receive: " + sanitized
|
||||
summary := "chat_upload_receive: " + p.Sanitized
|
||||
method := "chat_upload_receive"
|
||||
LogActivity(ctx, h.broadcaster, ActivityParams{
|
||||
WorkspaceID: workspaceID,
|
||||
@@ -669,28 +698,65 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w
|
||||
Summary: &summary,
|
||||
RequestBody: map[string]interface{}{
|
||||
"file_id": fileID.String(),
|
||||
"name": sanitized,
|
||||
"mimeType": mimetype,
|
||||
"size": len(content),
|
||||
"name": p.Sanitized,
|
||||
"mimeType": p.Mimetype,
|
||||
"size": len(p.Content),
|
||||
"uri": uri,
|
||||
},
|
||||
Status: "ok",
|
||||
})
|
||||
|
||||
log.Printf("chat_files uploadPollMode: staged %s/%s (file_id=%s size=%d mimetype=%q)",
|
||||
workspaceID, sanitized, fileID, len(content), mimetype)
|
||||
workspaceID, p.Sanitized, fileID, len(p.Content), p.Mimetype)
|
||||
|
||||
out = append(out, uploadedFile{
|
||||
URI: uri,
|
||||
Name: sanitized,
|
||||
Mimetype: mimetype,
|
||||
Size: int64(len(content)),
|
||||
Name: p.Sanitized,
|
||||
Mimetype: p.Mimetype,
|
||||
Size: int64(len(p.Content)),
|
||||
})
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"files": out})
|
||||
}
|
||||
|
||||
// safeMimetype validates a multipart-supplied Content-Type header and
|
||||
// returns a sanitized value safe to store + serve back unmodified.
|
||||
//
|
||||
// The platform's GET /content handler reflects the stored mimetype as
|
||||
// the response Content-Type. An attacker-controlled header that
|
||||
// embedded CR/LF could split the response (header injection); a value
|
||||
// containing semicolons could carry an unexpected charset parameter
|
||||
// that confuses a downstream renderer. Strip CR/LF/control chars +
|
||||
// keep only the type/subtype prefix; reject anything that doesn't
|
||||
// match a basic `type/subtype` regex by falling back to the safe
|
||||
// default (application/octet-stream — the workspace-side handler does
|
||||
// the same fallback).
|
||||
func safeMimetype(raw string) string {
|
||||
const fallback = "application/octet-stream"
|
||||
// Trim parameters (`text/html; charset=utf-8` → `text/html`).
|
||||
if i := strings.IndexByte(raw, ';'); i >= 0 {
|
||||
raw = raw[:i]
|
||||
}
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
return ""
|
||||
}
|
||||
// Reject if any control char or whitespace is present (header
|
||||
// injection defense). RFC 7231 mimetype grammar forbids whitespace.
|
||||
for _, r := range raw {
|
||||
if r < 0x21 || r > 0x7e {
|
||||
return fallback
|
||||
}
|
||||
}
|
||||
// Require exactly one slash separating type and subtype.
|
||||
parts := strings.Split(raw, "/")
|
||||
if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
|
||||
return fallback
|
||||
}
|
||||
return raw
|
||||
}
|
||||
|
||||
// readMultipartFile reads a multipart part fully into memory. Wraps
|
||||
// the open + io.ReadAll + close idiom so the call site stays clean,
|
||||
// and so a future change (chunked reads / hashing) has one place to
|
||||
|
||||
@@ -67,12 +67,59 @@ func (s *inMemStorage) Put(_ context.Context, ws uuid.UUID, content []byte, file
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// PutBatch mirrors the production atomic-batch contract: any per-item
|
||||
// failure leaves the in-memory state unchanged, simulating Tx rollback.
|
||||
// Pre-validation matches PostgresStorage.PutBatch; oversized items
|
||||
// return ErrTooLarge before any row is added.
|
||||
func (s *inMemStorage) PutBatch(_ context.Context, ws uuid.UUID, items []pendinguploads.PutItem) ([]uuid.UUID, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.putErr != nil {
|
||||
return nil, s.putErr
|
||||
}
|
||||
// Pre-validate so an oversized item rejects the whole batch before
|
||||
// any state mutation — matches the Tx-rollback semantics.
|
||||
for _, it := range items {
|
||||
if len(it.Content) > pendinguploads.MaxFileBytes {
|
||||
return nil, pendinguploads.ErrTooLarge
|
||||
}
|
||||
}
|
||||
ids := make([]uuid.UUID, 0, len(items))
|
||||
stagedRows := make(map[uuid.UUID]pendinguploads.Record, len(items))
|
||||
stagedPuts := make([]putCall, 0, len(items))
|
||||
for _, it := range items {
|
||||
id := uuid.New()
|
||||
stagedRows[id] = pendinguploads.Record{
|
||||
FileID: id, WorkspaceID: ws, Content: it.Content,
|
||||
Filename: it.Filename, Mimetype: it.Mimetype,
|
||||
SizeBytes: int64(len(it.Content)), CreatedAt: time.Now(),
|
||||
ExpiresAt: time.Now().Add(24 * time.Hour),
|
||||
}
|
||||
stagedPuts = append(stagedPuts, putCall{
|
||||
WorkspaceID: ws, Filename: it.Filename, Mimetype: it.Mimetype, Size: len(it.Content),
|
||||
})
|
||||
ids = append(ids, id)
|
||||
}
|
||||
for id, r := range stagedRows {
|
||||
s.rows[id] = r
|
||||
}
|
||||
s.puts = append(s.puts, stagedPuts...)
|
||||
return ids, nil
|
||||
}
|
||||
|
||||
func (s *inMemStorage) Get(context.Context, uuid.UUID) (pendinguploads.Record, error) {
|
||||
return pendinguploads.Record{}, pendinguploads.ErrNotFound
|
||||
}
|
||||
func (s *inMemStorage) MarkFetched(context.Context, uuid.UUID) error { return nil }
|
||||
func (s *inMemStorage) Ack(context.Context, uuid.UUID) error { return nil }
|
||||
|
||||
// Sweep is required by the Storage interface (Phase 3 GC). Not
|
||||
// exercised by upload-branch tests — the dedicated sweeper_test.go +
|
||||
// storage_sweep_test.go cover it.
|
||||
func (s *inMemStorage) Sweep(context.Context, time.Duration) (pendinguploads.SweepResult, error) {
|
||||
return pendinguploads.SweepResult{}, nil
|
||||
}
|
||||
|
||||
// expectPollDeliveryMode stubs the SELECT delivery_mode lookup that
|
||||
// uploadPollMode does (separate from the one resolveWorkspaceForwardCreds
|
||||
// does — this is the new helper introduced for the poll branch).
|
||||
@@ -154,7 +201,7 @@ func TestPollUpload_HappyPath_OneFile_StagesAndLogs(t *testing.T) {
|
||||
expectActivityInsert(mock)
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"report.pdf": []byte("PDF-bytes")})
|
||||
@@ -212,7 +259,7 @@ func TestPollUpload_MultipleFiles_AllStagedAndLogged(t *testing.T) {
|
||||
expectActivityInsert(mock)
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{
|
||||
@@ -250,7 +297,7 @@ func TestPollUpload_PushModeFallsThroughToForward(t *testing.T) {
|
||||
// URL empty + mode=push → 503 (no inbound secret check needed).
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("data")})
|
||||
@@ -274,7 +321,7 @@ func TestPollUpload_NotConfigured_FallsThrough(t *testing.T) {
|
||||
wsID := "33333333-2222-3333-4444-555555555555"
|
||||
expectURLAndMode(mock, wsID, "", "poll") // resolveWorkspaceForwardCreds emits 422
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
// No WithPendingUploads — pendingUploads is nil.
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("data")})
|
||||
@@ -295,7 +342,7 @@ func TestPollUpload_WorkspaceMissing_404(t *testing.T) {
|
||||
wsID := "44444444-2222-3333-4444-555555555555"
|
||||
expectPollDeliveryModeMissing(mock, wsID)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(newInMemStorage(), nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("d")})
|
||||
@@ -315,7 +362,7 @@ func TestPollUpload_DeliveryModeLookupDBError_500(t *testing.T) {
|
||||
mock.ExpectQuery(`SELECT delivery_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).WillReturnError(errors.New("connection lost"))
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(newInMemStorage(), nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("d")})
|
||||
@@ -335,7 +382,7 @@ func TestPollUpload_NoFilesField_400(t *testing.T) {
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
// Multipart with a non-files field — no actual files.
|
||||
@@ -360,7 +407,7 @@ func TestPollUpload_MalformedMultipart_400(t *testing.T) {
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
// Body that doesn't match the boundary in Content-Type.
|
||||
@@ -381,7 +428,7 @@ func TestPollUpload_StorageError_500(t *testing.T) {
|
||||
|
||||
store := newInMemStorage()
|
||||
store.putErr = errors.New("disk full")
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")})
|
||||
@@ -402,7 +449,7 @@ func TestPollUpload_StorageTooLarge_413(t *testing.T) {
|
||||
|
||||
store := newInMemStorage()
|
||||
store.putErr = pendinguploads.ErrTooLarge
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")})
|
||||
@@ -422,7 +469,7 @@ func TestPollUpload_TooManyFiles_400(t *testing.T) {
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
// 65 files — over the per-batch cap.
|
||||
@@ -457,7 +504,7 @@ func TestPollUpload_NullDeliveryMode_TreatedAsPush(t *testing.T) {
|
||||
expectURLAndMode(mock, wsID, "", "")
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")})
|
||||
@@ -490,7 +537,7 @@ func TestPollUpload_PerFileCapPreStorage_413(t *testing.T) {
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
// 25 MB + 1 byte. Single file, large enough to trip the early
|
||||
@@ -525,7 +572,7 @@ func TestPollUpload_SanitizesFilenameInResponse(t *testing.T) {
|
||||
expectActivityInsert(mock)
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"hello world!.pdf": []byte("data")})
|
||||
@@ -550,6 +597,120 @@ func TestPollUpload_SanitizesFilenameInResponse(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestPollUpload_AtomicRollbackOnSecondFileTooLarge pins the
|
||||
// transactional contract introduced in phase 5: when one file in a
|
||||
// multi-file batch fails pre-validation (oversize), NONE of the files
|
||||
// in the batch land in storage. Previously a per-file Put loop would
|
||||
// stage rows 1..K-1 before failing on row K, leaving orphan
|
||||
// pending_uploads + activity rows the client would re-create on retry.
|
||||
//
|
||||
// Pinned via inMemStorage's PutBatch (which mirrors PostgresStorage's
|
||||
// Tx-rollback behavior on a per-item validation failure) — but the
|
||||
// real atomicity guarantee is the integration test in
|
||||
// pending_uploads_integration_test.go.
|
||||
func TestPollUpload_AtomicRollbackOnSecondFileTooLarge(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "aaaaaaaa-3333-3333-4444-555555555555"
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
// Two files: first OK, second over the per-file cap. Pre-validation
|
||||
// in uploadPollMode catches it BEFORE any Put — store.puts must
|
||||
// stay empty. (If the test ever sees len=1, the regression is
|
||||
// "first file slipped through into storage on a partial-failure
|
||||
// batch.")
|
||||
tooBig := bytes.Repeat([]byte{0x42}, pendinguploads.MaxFileBytes+1)
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{
|
||||
"ok.txt": []byte("small"),
|
||||
"huge.bin": tooBig,
|
||||
})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusRequestEntityTooLarge {
|
||||
t.Errorf("status=%d body=%s, want 413", w.Code, w.Body.String())
|
||||
}
|
||||
if len(store.puts) != 0 {
|
||||
t.Errorf("expected zero Puts on rollback, got %d: %+v", len(store.puts), store.puts)
|
||||
}
|
||||
}
|
||||
|
||||
// TestPollUpload_AtomicRollbackOnPutBatchError validates that an in-
|
||||
// flight PutBatch failure (e.g. simulated DB error) leaves zero rows
|
||||
// — same guarantee as the pre-validation path, but exercises the
|
||||
// "Tx-Rollback after BEGIN" branch via the fake.
|
||||
func TestPollUpload_AtomicRollbackOnPutBatchError(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "bbbbbbbb-3333-3333-4444-555555555555"
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
|
||||
store := newInMemStorage()
|
||||
store.putErr = errors.New("db down mid-batch")
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{
|
||||
"a.txt": []byte("aaa"),
|
||||
"b.txt": []byte("bbb"),
|
||||
"c.txt": []byte("ccc"),
|
||||
})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Errorf("status=%d, want 500", w.Code)
|
||||
}
|
||||
if len(store.puts) != 0 {
|
||||
t.Errorf("expected zero Puts after PutBatch error, got %d", len(store.puts))
|
||||
}
|
||||
}
|
||||
|
||||
// TestPollUpload_MimetypeWithCRLFInjectionStripped pins the safeMimetype
|
||||
// hardening: a multipart-supplied Content-Type header with CR/LF is
|
||||
// rewritten to application/octet-stream so the eventual /content
|
||||
// response can't be header-split on the wire.
|
||||
func TestPollUpload_MimetypeWithCRLFInjectionStripped(t *testing.T) {
|
||||
got := safeMimetype("text/html\r\nX-Injected: pwn")
|
||||
if got != "application/octet-stream" {
|
||||
t.Errorf("CRLF mimetype not stripped, got %q", got)
|
||||
}
|
||||
got = safeMimetype("image/png\x00")
|
||||
if got != "application/octet-stream" {
|
||||
t.Errorf("NUL byte mimetype not stripped, got %q", got)
|
||||
}
|
||||
got = safeMimetype("text/plain; charset=utf-8")
|
||||
if got != "text/plain" {
|
||||
t.Errorf("parameter not stripped, got %q", got)
|
||||
}
|
||||
got = safeMimetype("application/pdf")
|
||||
if got != "application/pdf" {
|
||||
t.Errorf("clean mime modified, got %q", got)
|
||||
}
|
||||
got = safeMimetype("")
|
||||
if got != "" {
|
||||
t.Errorf("empty input should pass through, got %q", got)
|
||||
}
|
||||
got = safeMimetype("notamime")
|
||||
if got != "application/octet-stream" {
|
||||
t.Errorf("non-type/subtype not coerced, got %q", got)
|
||||
}
|
||||
got = safeMimetype("/empty-type")
|
||||
if got != "application/octet-stream" {
|
||||
t.Errorf("missing type half not coerced, got %q", got)
|
||||
}
|
||||
got = safeMimetype("type/")
|
||||
if got != "application/octet-stream" {
|
||||
t.Errorf("missing subtype half not coerced, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestPollUpload_ActivityRowDiscriminator pins the
|
||||
// activity_type / method shape that the workspace inbox poller depends
|
||||
// on. The poller filters `GET /workspaces/:id/activity?type=a2a_receive`
|
||||
@@ -573,7 +734,7 @@ func TestPollUpload_ActivityRowDiscriminator(t *testing.T) {
|
||||
expectActivityInsertWithTypeAndMethod(mock, wsID, "a2a_receive", "chat_upload_receive")
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x.pdf": []byte("xx")})
|
||||
|
||||
@@ -105,7 +105,7 @@ func TestChatUpload_InvalidWorkspaceID(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
|
||||
c, w := makeUploadRequest(t, "not-a-uuid", &bytes.Buffer{}, "")
|
||||
h.Upload(c)
|
||||
@@ -122,7 +122,7 @@ func TestChatUpload_WorkspaceNotInDB(t *testing.T) {
|
||||
wsID := "00000000-0000-0000-0000-000000000099"
|
||||
expectURLMissing(mock, wsID)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -166,7 +166,7 @@ func TestChatUpload_NoInboundSecret_LazyHeal(t *testing.T) {
|
||||
WithArgs(sqlmock.AnyArg(), wsID).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -203,7 +203,7 @@ func TestChatUpload_NoInboundSecret_LazyHealFailure(t *testing.T) {
|
||||
WithArgs(sqlmock.AnyArg(), wsID).
|
||||
WillReturnError(sql.ErrConnDone) // mint fails
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -231,7 +231,7 @@ func TestChatUpload_NoURL(t *testing.T) {
|
||||
wsID := "00000000-0000-0000-0000-000000000042"
|
||||
expectURLAndMode(mock, wsID, "", "push")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -256,7 +256,7 @@ func TestChatUpload_PollModeEmptyURL(t *testing.T) {
|
||||
wsID := "00000000-0000-0000-0000-000000000099"
|
||||
expectURLAndMode(mock, wsID, "", "poll")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -286,7 +286,7 @@ func TestChatUpload_NullModeEmptyURL(t *testing.T) {
|
||||
wsID := "30ba7f0b-b303-4a20-aefe-3a4a675b8aa4" // user's "mac laptop"
|
||||
expectURLNullMode(mock, wsID, "")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -338,7 +338,7 @@ func TestChatUpload_ForwardsToWorkspace_HappyPath(t *testing.T) {
|
||||
expectURL(mock, wsID, srv.URL)
|
||||
expectInboundSecret(mock, wsID, "super-secret-123")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -380,7 +380,7 @@ func TestChatUpload_ForwardsErrorStatusUnchanged(t *testing.T) {
|
||||
expectURL(mock, wsID, srv.URL)
|
||||
expectInboundSecret(mock, wsID, "tok")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -402,7 +402,7 @@ func TestChatUpload_WorkspaceUnreachable(t *testing.T) {
|
||||
expectURL(mock, wsID, "http://127.0.0.1:1")
|
||||
expectInboundSecret(mock, wsID, "tok")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -418,7 +418,7 @@ func TestChatDownload_InvalidPath(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
|
||||
cases := []struct {
|
||||
name, path, wantSubstr string
|
||||
@@ -507,7 +507,7 @@ func TestChatDownload_WorkspaceNotInDB(t *testing.T) {
|
||||
WithArgs(wsID).
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
|
||||
h.Download(c)
|
||||
|
||||
@@ -533,7 +533,7 @@ func TestChatDownload_NoInboundSecret_LazyHeal(t *testing.T) {
|
||||
WithArgs(sqlmock.AnyArg(), wsID).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
|
||||
h.Download(c)
|
||||
|
||||
@@ -559,7 +559,7 @@ func TestChatDownload_NoInboundSecret_LazyHealFailure(t *testing.T) {
|
||||
WithArgs(sqlmock.AnyArg(), wsID).
|
||||
WillReturnError(sql.ErrConnDone)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
|
||||
h.Download(c)
|
||||
|
||||
@@ -592,7 +592,7 @@ func TestChatDownload_ForwardsToWorkspace_HappyPath(t *testing.T) {
|
||||
expectURL(mock, wsID, srv.URL)
|
||||
expectInboundSecret(mock, wsID, "the-secret")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/report.txt")
|
||||
h.Download(c)
|
||||
|
||||
@@ -634,7 +634,7 @@ func TestChatDownload_404FromWorkspacePropagated(t *testing.T) {
|
||||
expectURL(mock, wsID, srv.URL)
|
||||
expectInboundSecret(mock, wsID, "tok")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/missing.txt")
|
||||
h.Download(c)
|
||||
|
||||
|
||||
@@ -0,0 +1,468 @@
|
||||
package handlers
|
||||
|
||||
// class1_ast_gate_test.go — generic Class 1 leak gate per #2867 PR-A.
|
||||
//
|
||||
// What this gate prevents:
|
||||
// The tenant-hongming leak class — a handler iterates a YAML-derived
|
||||
// slice (ws.Children, sub_workspaces, etc.) and calls
|
||||
// `INSERT INTO workspaces` inside the loop body without first
|
||||
// checking whether a workspace with the same (parent_id, name) is
|
||||
// already there. Each call to such a handler doubles the tree.
|
||||
//
|
||||
// Why this is broader than TestCreateWorkspaceTree_CallsLookupBeforeInsert:
|
||||
// The existing gate is hard-coded to org_import.go's createWorkspaceTree.
|
||||
// That catches the specific function that triggered the original
|
||||
// incident — but a future handler written from scratch in a different
|
||||
// file would not be covered. This gate walks every production handler
|
||||
// .go file and applies a structural rule that does not depend on
|
||||
// function or file names.
|
||||
//
|
||||
// The rule (verbatim from #2867 PR-A):
|
||||
//
|
||||
// "No handler in handlers/ may iterate a slice (any RangeStmt) AND
|
||||
// call INSERT INTO workspaces inside the loop body without a
|
||||
// preceding SELECT id FROM workspaces WHERE name=$1 AND parent_id IS
|
||||
// NOT DISTINCT FROM $2 in the same function (== a lookupExistingChild
|
||||
// call, OR an ON CONFLICT clause baked into the same INSERT, OR an
|
||||
// explicit allowlist annotation)."
|
||||
//
|
||||
// Allowlist mechanism: a function whose body contains the exact comment
|
||||
// string `// class1-gate: idempotent-by-design` is treated as safe.
|
||||
// Use this only after writing a unit test that pins WHY the function
|
||||
// is safe. The annotation is intentionally awkward to type — it should
|
||||
// be rare.
|
||||
|
||||
import (
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/token"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// reINSERTWorkspaces matches the exact statement shape we care about.
|
||||
// Tightened (vs bytes.Index "INSERT INTO workspaces") so the audit
|
||||
// table `workspaces_audit` literal — or any other lookalike — does not
|
||||
// false-positive trigger this gate. The same regex is used in the
|
||||
// existing createWorkspaceTree gate (workspaces_insert_allowlist_test.go)
|
||||
// — keep them in sync if either changes.
|
||||
var reINSERTWorkspaces = regexp.MustCompile(`(?m)^\s*INSERT INTO workspaces\s*\(`)
|
||||
|
||||
// reONCONFLICT matches ON CONFLICT clauses anywhere in the same SQL
|
||||
// literal. An UPSERT (INSERT ... ON CONFLICT ... DO UPDATE) is
|
||||
// idempotent by definition, so the gate exempts it.
|
||||
var reONCONFLICT = regexp.MustCompile(`(?i)\bON CONFLICT\b`)
|
||||
|
||||
// gateAllowlistComment is the magic comment a function author writes
|
||||
// to opt out of this gate. Forces an explicit decision.
|
||||
const gateAllowlistComment = "// class1-gate: idempotent-by-design"
|
||||
|
||||
// preflightCallNames are function names whose presence in a function
|
||||
// body counts as "did a SELECT-by-(parent_id, name) preflight". Add
|
||||
// new names here as new preflight helpers are introduced. Keep the
|
||||
// list TIGHT — any sloppy addition weakens the gate.
|
||||
var preflightCallNames = map[string]bool{
|
||||
"lookupExistingChild": true,
|
||||
}
|
||||
|
||||
// TestClass1_NoUnpreflightedInsertInsideRange walks every production
|
||||
// .go file in this package, parses the AST, and fails the test if any
|
||||
// FuncDecl violates the rule above.
|
||||
//
|
||||
// Failure message must include: file path, function name, line of
|
||||
// the offending INSERT, line of the enclosing range, and a hint at
|
||||
// the three escape hatches (preflight call, ON CONFLICT, allowlist
|
||||
// comment).
|
||||
func TestClass1_NoUnpreflightedInsertInsideRange(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
|
||||
entries, err := os.ReadDir(wd)
|
||||
if err != nil {
|
||||
t.Fatalf("readdir %s: %v", wd, err)
|
||||
}
|
||||
|
||||
type violation struct {
|
||||
file string
|
||||
fn string
|
||||
insertLine int
|
||||
rangeLine int
|
||||
}
|
||||
var violations []violation
|
||||
scanned := 0
|
||||
|
||||
for _, e := range entries {
|
||||
name := e.Name()
|
||||
if e.IsDir() || !strings.HasSuffix(name, ".go") {
|
||||
continue
|
||||
}
|
||||
if strings.HasSuffix(name, "_test.go") {
|
||||
continue
|
||||
}
|
||||
path := filepath.Join(wd, name)
|
||||
src, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("read %s: %v", path, err)
|
||||
}
|
||||
fset := token.NewFileSet()
|
||||
file, err := parser.ParseFile(fset, name, src, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse %s: %v", path, err)
|
||||
}
|
||||
scanned++
|
||||
|
||||
// Walk every function declaration and apply the rule.
|
||||
for _, decl := range file.Decls {
|
||||
fd, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fd.Body == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Allowlist: skip if the function body contains the magic
|
||||
// comment. We check via the source range of the function
|
||||
// — comments inside the body are in file.Comments and
|
||||
// must overlap the function's Pos/End range.
|
||||
if functionHasAllowlistComment(file, fd) {
|
||||
continue
|
||||
}
|
||||
|
||||
// First pass: locate every INSERT INTO workspaces literal
|
||||
// in this function. We treat each such literal as a
|
||||
// candidate violation and try to clear it via the rules.
|
||||
candidates := findInsertWorkspacesLiterals(fd, src, fset)
|
||||
if len(candidates) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Has the function called a preflight helper? Single
|
||||
// pass — if any preflight name appears, every INSERT in
|
||||
// the function is considered preflighted. This is more
|
||||
// permissive than position-aware (preflight could be
|
||||
// AFTER the INSERT and still satisfy the gate), but the
|
||||
// existing org_import.go gate already pins the position
|
||||
// invariant for createWorkspaceTree, and a function that
|
||||
// preflights AFTER inserting would fail the position
|
||||
// gate in a separate test.
|
||||
hasPreflight := functionCallsAny(fd, preflightCallNames)
|
||||
|
||||
for _, c := range candidates {
|
||||
if c.hasONCONFLICT {
|
||||
continue
|
||||
}
|
||||
if hasPreflight {
|
||||
continue
|
||||
}
|
||||
if c.enclosingRangeLine == 0 {
|
||||
// INSERT not inside any RangeStmt — single-shot,
|
||||
// not the bug pattern.
|
||||
continue
|
||||
}
|
||||
violations = append(violations, violation{
|
||||
file: name,
|
||||
fn: fd.Name.Name,
|
||||
insertLine: c.insertLine,
|
||||
rangeLine: c.enclosingRangeLine,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if scanned == 0 {
|
||||
t.Fatal("scanned 0 .go files — wrong working directory? gate would always pass")
|
||||
}
|
||||
|
||||
if len(violations) > 0 {
|
||||
// Stable sort so the failure message is deterministic across
|
||||
// reruns.
|
||||
sort.Slice(violations, func(i, j int) bool {
|
||||
if violations[i].file != violations[j].file {
|
||||
return violations[i].file < violations[j].file
|
||||
}
|
||||
return violations[i].insertLine < violations[j].insertLine
|
||||
})
|
||||
var b strings.Builder
|
||||
b.WriteString("Class 1 leak gate (#2867 PR-A) — these handler functions iterate a slice and INSERT INTO workspaces inside the loop body without a (parent_id, name) preflight.\n\n")
|
||||
b.WriteString("This is the bug shape that triggered the tenant-hongming leak (TeamHandler.Expand re-inserting the entire sub_workspaces tree on every call). To fix any reported violation, choose ONE of:\n")
|
||||
b.WriteString(" 1. Call h.lookupExistingChild(ctx, name, parentID) before the INSERT and skip the INSERT when it returns existing=true. (preferred)\n")
|
||||
b.WriteString(" 2. Use INSERT ... ON CONFLICT ... DO ... (idempotent UPSERT, like registry.go).\n")
|
||||
b.WriteString(" 3. Annotate the function with a `// class1-gate: idempotent-by-design` comment AND a unit test that pins why the function is structurally idempotent. (rare; require code review)\n\n")
|
||||
b.WriteString("Violations:\n")
|
||||
for _, v := range violations {
|
||||
b.WriteString(" - ")
|
||||
b.WriteString(v.file)
|
||||
b.WriteString(":")
|
||||
b.WriteString(itoa(v.insertLine))
|
||||
b.WriteString(" — function ")
|
||||
b.WriteString(v.fn)
|
||||
b.WriteString("() INSERTs inside RangeStmt at line ")
|
||||
b.WriteString(itoa(v.rangeLine))
|
||||
b.WriteString("\n")
|
||||
}
|
||||
t.Fatal(b.String())
|
||||
}
|
||||
}
|
||||
|
||||
func itoa(n int) string {
|
||||
// Avoid strconv import for one call site — keeps the test focused.
|
||||
if n == 0 {
|
||||
return "0"
|
||||
}
|
||||
neg := n < 0
|
||||
if neg {
|
||||
n = -n
|
||||
}
|
||||
var buf [20]byte
|
||||
i := len(buf)
|
||||
for n > 0 {
|
||||
i--
|
||||
buf[i] = byte('0' + n%10)
|
||||
n /= 10
|
||||
}
|
||||
if neg {
|
||||
i--
|
||||
buf[i] = '-'
|
||||
}
|
||||
return string(buf[i:])
|
||||
}
|
||||
|
||||
// candidateInsert holds the per-INSERT facts needed to decide whether
|
||||
// the gate fires.
|
||||
type candidateInsert struct {
|
||||
insertLine int
|
||||
hasONCONFLICT bool
|
||||
enclosingRangeLine int // 0 means not inside any range
|
||||
}
|
||||
|
||||
// findInsertWorkspacesLiterals walks fd's body and returns one
|
||||
// candidateInsert per INSERT INTO workspaces string literal.
|
||||
//
|
||||
// Position-based detection: collect every RangeStmt's body span first,
|
||||
// then for each INSERT literal check if its position is inside any
|
||||
// span. ast.Inspect's nil-call ordering does NOT give per-node pop
|
||||
// semantics, so a stack-based approach against ast.Inspect would
|
||||
// silently miscount. Position spans are deterministic and easy to
|
||||
// reason about.
|
||||
func findInsertWorkspacesLiterals(fd *ast.FuncDecl, src []byte, fset *token.FileSet) []candidateInsert {
|
||||
var out []candidateInsert
|
||||
|
||||
type span struct{ start, end token.Pos }
|
||||
var ranges []span
|
||||
ast.Inspect(fd.Body, func(n ast.Node) bool {
|
||||
rs, ok := n.(*ast.RangeStmt)
|
||||
if !ok || rs.Body == nil {
|
||||
return true
|
||||
}
|
||||
ranges = append(ranges, span{rs.Body.Lbrace, rs.Body.Rbrace})
|
||||
return true
|
||||
})
|
||||
|
||||
enclosingRangeLineFor := func(p token.Pos) int {
|
||||
// Pick the innermost enclosing range — i.e., the one with the
|
||||
// largest start that still covers p. Innermost is the one
|
||||
// whose body actually contains the INSERT, which is the line
|
||||
// most useful in a violation message.
|
||||
bestStart := token.NoPos
|
||||
bestLine := 0
|
||||
for _, s := range ranges {
|
||||
if p > s.start && p < s.end && s.start > bestStart {
|
||||
bestStart = s.start
|
||||
bestLine = fset.Position(s.start).Line
|
||||
}
|
||||
}
|
||||
return bestLine
|
||||
}
|
||||
|
||||
ast.Inspect(fd.Body, func(n ast.Node) bool {
|
||||
bl, ok := n.(*ast.BasicLit)
|
||||
if !ok || bl.Kind != token.STRING {
|
||||
return true
|
||||
}
|
||||
// Strip surrounding backticks/quotes — value includes them.
|
||||
lit := bl.Value
|
||||
if len(lit) >= 2 {
|
||||
lit = lit[1 : len(lit)-1]
|
||||
}
|
||||
if !reINSERTWorkspaces.MatchString(lit) {
|
||||
return true
|
||||
}
|
||||
out = append(out, candidateInsert{
|
||||
insertLine: fset.Position(bl.Pos()).Line,
|
||||
hasONCONFLICT: reONCONFLICT.MatchString(lit),
|
||||
enclosingRangeLine: enclosingRangeLineFor(bl.Pos()),
|
||||
})
|
||||
return true
|
||||
})
|
||||
return out
|
||||
}
|
||||
|
||||
// functionCallsAny returns true if any CallExpr in fd's body has a
|
||||
// function name (either a SelectorExpr Sel.Name or an Ident name)
|
||||
// matching a key in names.
|
||||
func functionCallsAny(fd *ast.FuncDecl, names map[string]bool) bool {
|
||||
found := false
|
||||
ast.Inspect(fd.Body, func(n ast.Node) bool {
|
||||
if found {
|
||||
return false
|
||||
}
|
||||
ce, ok := n.(*ast.CallExpr)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
switch fun := ce.Fun.(type) {
|
||||
case *ast.Ident:
|
||||
if names[fun.Name] {
|
||||
found = true
|
||||
return false
|
||||
}
|
||||
case *ast.SelectorExpr:
|
||||
if names[fun.Sel.Name] {
|
||||
found = true
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
return found
|
||||
}
|
||||
|
||||
// functionHasAllowlistComment returns true if the function body
|
||||
// (between fd.Body.Lbrace and fd.Body.Rbrace) contains a comment
|
||||
// equal to gateAllowlistComment.
|
||||
func functionHasAllowlistComment(file *ast.File, fd *ast.FuncDecl) bool {
|
||||
if fd.Body == nil {
|
||||
return false
|
||||
}
|
||||
start := fd.Body.Lbrace
|
||||
end := fd.Body.Rbrace
|
||||
for _, cg := range file.Comments {
|
||||
for _, c := range cg.List {
|
||||
if c.Pos() < start || c.Pos() > end {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(c.Text) == gateAllowlistComment {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// TestClass1_GateFiresOnSyntheticBuggySource — proves the gate actually
|
||||
// catches the bug shape it's named after. Without this, a regression
|
||||
// to "always pass" would not be noticed until the leak shipped again.
|
||||
// Per memory feedback_assert_exact_not_substring.md: tighten the test
|
||||
// + verify it FAILS on old-shape source before merging.
|
||||
func TestClass1_GateFiresOnSyntheticBuggySource(t *testing.T) {
|
||||
const buggySrc = `package handlers
|
||||
|
||||
import "context"
|
||||
|
||||
type fakeDB struct{}
|
||||
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
|
||||
|
||||
func buggyExpand(db fakeDB, ctx context.Context, children []string) {
|
||||
for _, child := range children {
|
||||
// Bug shape: INSERT inside the range body, no preflight.
|
||||
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", child)
|
||||
}
|
||||
}
|
||||
`
|
||||
fset := token.NewFileSet()
|
||||
file, err := parser.ParseFile(fset, "buggy.go", buggySrc, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse synthetic source: %v", err)
|
||||
}
|
||||
for _, decl := range file.Decls {
|
||||
fd, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fd.Name.Name != "buggyExpand" {
|
||||
continue
|
||||
}
|
||||
candidates := findInsertWorkspacesLiterals(fd, []byte(buggySrc), fset)
|
||||
if len(candidates) != 1 {
|
||||
t.Fatalf("expected 1 INSERT literal, got %d", len(candidates))
|
||||
}
|
||||
c := candidates[0]
|
||||
if c.enclosingRangeLine == 0 {
|
||||
t.Errorf("synthetic INSERT inside `for _, child := range` should be detected as enclosed by range, got enclosingRangeLine=0 — gate would miss the bug shape")
|
||||
}
|
||||
if c.hasONCONFLICT {
|
||||
t.Errorf("synthetic INSERT has no ON CONFLICT, gate falsely treated it as idempotent")
|
||||
}
|
||||
if functionCallsAny(fd, preflightCallNames) {
|
||||
t.Errorf("synthetic function does not call lookupExistingChild — gate falsely treated it as preflighted")
|
||||
}
|
||||
// All three guards say the gate WOULD fire. Pass.
|
||||
return
|
||||
}
|
||||
t.Fatal("buggyExpand FuncDecl not found in synthetic source")
|
||||
}
|
||||
|
||||
// TestClass1_GateAllowsONCONFLICT — pins that an INSERT with ON
|
||||
// CONFLICT inside a range body is NOT flagged. registry.go's
|
||||
// upsert pattern is the prod example.
|
||||
func TestClass1_GateAllowsONCONFLICT(t *testing.T) {
|
||||
const safeSrc = `package handlers
|
||||
|
||||
import "context"
|
||||
|
||||
type fakeDB struct{}
|
||||
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
|
||||
|
||||
func upsertLoop(db fakeDB, ctx context.Context, children []string) {
|
||||
for _, child := range children {
|
||||
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2) ON CONFLICT (id) DO UPDATE SET name = $2`" + `, "x", child)
|
||||
}
|
||||
}
|
||||
`
|
||||
fset := token.NewFileSet()
|
||||
file, _ := parser.ParseFile(fset, "safe.go", safeSrc, parser.ParseComments)
|
||||
for _, decl := range file.Decls {
|
||||
fd, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fd.Name.Name != "upsertLoop" {
|
||||
continue
|
||||
}
|
||||
candidates := findInsertWorkspacesLiterals(fd, []byte(safeSrc), fset)
|
||||
if len(candidates) != 1 {
|
||||
t.Fatalf("expected 1 candidate, got %d", len(candidates))
|
||||
}
|
||||
if !candidates[0].hasONCONFLICT {
|
||||
t.Errorf("ON CONFLICT clause should be detected, was missed — gate would falsely flag idempotent UPSERTs")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestClass1_GateAllowsAllowlistAnnotation — pins the escape hatch
|
||||
// works. Annotated functions are skipped at the FuncDecl level.
|
||||
func TestClass1_GateAllowsAllowlistAnnotation(t *testing.T) {
|
||||
const annotatedSrc = `package handlers
|
||||
|
||||
import "context"
|
||||
|
||||
type fakeDB struct{}
|
||||
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
|
||||
|
||||
func intentionallyUnpreflighted(db fakeDB, ctx context.Context, children []string) {
|
||||
// class1-gate: idempotent-by-design
|
||||
for _, child := range children {
|
||||
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", child)
|
||||
}
|
||||
}
|
||||
`
|
||||
fset := token.NewFileSet()
|
||||
file, _ := parser.ParseFile(fset, "annotated.go", annotatedSrc, parser.ParseComments)
|
||||
for _, decl := range file.Decls {
|
||||
fd, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fd.Name.Name != "intentionallyUnpreflighted" {
|
||||
continue
|
||||
}
|
||||
if !functionHasAllowlistComment(file, fd) {
|
||||
t.Error("allowlist comment should be detected for the intentionallyUnpreflighted function — escape hatch not working")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -53,13 +53,35 @@ func NewDelegationLedger(handle *sql.DB) *DelegationLedger {
|
||||
// truncatePreview caps stored preview at 4KB. The full prompt/response is
|
||||
// already in activity_logs.{request,response}_body — this is the at-a-glance
|
||||
// view for the dashboard, not a forensic record.
|
||||
//
|
||||
// Rune-safe: previous byte-slice form (s[:previewCap]) split on a byte
|
||||
// boundary, which on a multi-byte codepoint at byte 4096 produced
|
||||
// invalid UTF-8 — Postgres JSONB rejects → ledger row not inserted →
|
||||
// audit gap. Issue #2962. Walks the string by rune, stops at the last
|
||||
// rune-boundary index that fits inside the cap. ASCII-only strings hit
|
||||
// the cap exactly; CJK/emoji strings stop slightly under the cap,
|
||||
// never over.
|
||||
//
|
||||
// Mirrors the truncatePreviewRunes fix from agent_message_writer.go
|
||||
// (#2959). Both call sites should consume a shared helper after both
|
||||
// fixes have landed — followup deduplication tracked in #2962's body.
|
||||
const previewCap = 4096
|
||||
|
||||
func truncatePreview(s string) string {
|
||||
if len(s) <= previewCap {
|
||||
return s
|
||||
}
|
||||
return s[:previewCap]
|
||||
// Range over a string yields rune-boundary byte indices. Walk
|
||||
// until the next index would exceed previewCap; the previous
|
||||
// index is the safe truncation point.
|
||||
end := 0
|
||||
for i := range s {
|
||||
if i > previewCap {
|
||||
break
|
||||
}
|
||||
end = i
|
||||
}
|
||||
return s[:end]
|
||||
}
|
||||
|
||||
// InsertOpts is the agent's record-of-intent. Caller, callee, task preview,
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
)
|
||||
@@ -121,6 +122,63 @@ func TestTruncatePreview_ExactlyAtCap(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestTruncatePreview_NeverProducesInvalidUTF8 — pins #2962. The old
|
||||
// byte-slice implementation (s[:previewCap]) split on a byte boundary,
|
||||
// so a multi-byte codepoint straddling byte 4096 produced invalid
|
||||
// UTF-8 → Postgres JSONB rejects → ledger row not inserted → audit
|
||||
// gap. Test feeds a CJK / emoji-padded string longer than previewCap
|
||||
// and asserts utf8.ValidString on the result.
|
||||
func TestTruncatePreview_NeverProducesInvalidUTF8(t *testing.T) {
|
||||
// Build a string of '世' (3 bytes per rune in UTF-8) that's just
|
||||
// past the cap. With the old implementation, the slice at byte
|
||||
// previewCap would land mid-rune and ValidString would fail.
|
||||
// With the rune-aware implementation, the result is always valid
|
||||
// UTF-8 even if the byte length is < previewCap.
|
||||
rune3 := "世" // U+4E16, 3 bytes
|
||||
// Need at least previewCap/3 + 1 runes so we cross the cap with
|
||||
// margin to spare.
|
||||
in := strings.Repeat(rune3, (previewCap/3)+10)
|
||||
if len(in) <= previewCap {
|
||||
t.Fatalf("test setup: input too short (%d bytes) — must exceed previewCap=%d", len(in), previewCap)
|
||||
}
|
||||
got := truncatePreview(in)
|
||||
if !utf8.ValidString(got) {
|
||||
t.Errorf("truncatePreview produced invalid UTF-8 — JSONB will reject this row. len(got)=%d", len(got))
|
||||
}
|
||||
if len(got) > previewCap {
|
||||
t.Errorf("truncatePreview exceeded cap: len(got)=%d > previewCap=%d", len(got), previewCap)
|
||||
}
|
||||
// Defense-in-depth: the result should also be a clean rune
|
||||
// prefix of the input — not some garbled sequence.
|
||||
if !strings.HasPrefix(in, got) {
|
||||
t.Errorf("truncatePreview should return a prefix of the input")
|
||||
}
|
||||
}
|
||||
|
||||
// TestTruncatePreview_MultiByteAtBoundary — most-targeted regression.
|
||||
// Feeds an input where the cap byte falls EXACTLY in the middle of a
|
||||
// 3-byte codepoint. Pre-fix, this is the case that produces invalid
|
||||
// UTF-8; post-fix, the truncate stops at the previous rune boundary.
|
||||
func TestTruncatePreview_MultiByteAtBoundary(t *testing.T) {
|
||||
// Build a string that's `previewCap-1` ASCII bytes followed by
|
||||
// '世' (3 bytes). Total = previewCap + 2. The old impl would
|
||||
// slice at byte previewCap, landing inside the '世' codepoint.
|
||||
prefix := strings.Repeat("a", previewCap-1)
|
||||
in := prefix + "世"
|
||||
if len(in) != previewCap+2 {
|
||||
t.Fatalf("test setup: expected len %d, got %d", previewCap+2, len(in))
|
||||
}
|
||||
got := truncatePreview(in)
|
||||
if !utf8.ValidString(got) {
|
||||
t.Errorf("truncatePreview produced invalid UTF-8 at the multi-byte boundary case")
|
||||
}
|
||||
// Result should be exactly the ASCII prefix — '世' was past
|
||||
// the cap so it must be dropped entirely.
|
||||
if got != prefix {
|
||||
t.Errorf("expected exact ASCII prefix, got %q (len=%d)", got[len(got)-10:], len(got))
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- SetStatus lifecycle ----------
|
||||
|
||||
func TestLedgerSetStatus_QueuedToDispatched(t *testing.T) {
|
||||
|
||||
@@ -109,6 +109,12 @@ curl -fsS -X POST "{{PLATFORM_URL}}/registry/register" \
|
||||
"version": "0.1.0"
|
||||
}
|
||||
}'
|
||||
|
||||
# Need help?
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/external-agent-registration
|
||||
# Common errors:
|
||||
# • 401 / 403 on register — WORKSPACE_AUTH_TOKEN must be the value
|
||||
# shown at workspace create. Tokens are shown only once.
|
||||
`
|
||||
|
||||
// externalChannelTemplate — Claude Code channel plugin install + .env. For
|
||||
@@ -172,6 +178,18 @@ claude --dangerously-load-development-channels \
|
||||
# Multi-workspace: comma-separate IDs and tokens (same order). See
|
||||
# https://github.com/Molecule-AI/molecule-mcp-claude-channel for
|
||||
# pairing flow, push-mode upgrade, and v0.2 roadmap.
|
||||
|
||||
# Need help?
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/claude-code-channel-plugin
|
||||
# Common errors:
|
||||
# • "plugin not installed" — run /plugin marketplace add then
|
||||
# /plugin install lines above; /reload-plugins or restart.
|
||||
# • "not on the approved channels allowlist" — custom channels need
|
||||
# --dangerously-load-development-channels; team/enterprise orgs
|
||||
# need admin to set channelsEnabled + allowedChannelPlugins.
|
||||
# • "Inbound messages not arriving" — stderr should show
|
||||
# "molecule channel: connected — watching N workspace(s)";
|
||||
# verify ~/.claude/channels/molecule/.env has PLATFORM_URL + token.
|
||||
`
|
||||
|
||||
// externalUniversalMcpTemplate — runtime-agnostic standalone path.
|
||||
@@ -198,6 +216,13 @@ const externalUniversalMcpTemplate = `# Universal MCP — standalone register +
|
||||
# Pair with the Claude Code or Python SDK tab if your runtime needs
|
||||
# inbound A2A delivery (canvas messages → agent conversation turns).
|
||||
|
||||
# Requires Python >= 3.11. On 3.10 or older pip says
|
||||
# "Could not find a version that satisfies the requirement
|
||||
# (from versions: none)" — the wheel's requires_python pin filters
|
||||
# the only available artifact before pip even attempts install.
|
||||
# Upgrade the interpreter (brew install python@3.12 / apt install
|
||||
# python3.12 / etc.) or use a 3.11+ venv.
|
||||
|
||||
# 1. Install the workspace runtime wheel:
|
||||
pip install molecule-ai-workspace-runtime
|
||||
|
||||
@@ -217,6 +242,17 @@ claude mcp add molecule -s user -- env \
|
||||
#
|
||||
# Origin/WAF handling is built into the wheel — no manual headers
|
||||
# needed when calling tools through the MCP server.
|
||||
|
||||
# Need help?
|
||||
# Where to install: https://pypi.org/project/molecule-ai-workspace-runtime/
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/mcp-server-setup
|
||||
# Common errors:
|
||||
# • "Tools not appearing in your agent" — run ` + "`claude mcp list`" + ` (or
|
||||
# your runtime's equivalent) and confirm the molecule entry. If
|
||||
# missing, re-run the ` + "`claude mcp add`" + ` line above.
|
||||
# • "ConnectionRefused / DNS error on first call" — PLATFORM_URL must
|
||||
# include the scheme (https://) and have NO trailing slash. Verify
|
||||
# with: curl ${PLATFORM_URL}/healthz
|
||||
`
|
||||
|
||||
// externalPythonTemplate uses molecule-sdk-python's RemoteAgentClient +
|
||||
@@ -255,6 +291,15 @@ async def main():
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
||||
# Need help?
|
||||
# Where to install: https://pypi.org/project/molecule-ai-workspace-runtime/
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/external-agent-registration
|
||||
# Common errors:
|
||||
# • 401 from /heartbeat — AUTH_TOKEN expired or wrong workspace_id.
|
||||
# Tokens shown only once at create time; re-create to get a fresh one.
|
||||
# • AGENT_URL not reachable from platform — public HTTPS URL required
|
||||
# for inbound A2A. Use ngrok or Cloudflare Tunnel if behind NAT.
|
||||
`
|
||||
|
||||
// externalHermesChannelTemplate — install snippet for operators whose
|
||||
@@ -322,6 +367,16 @@ hermes gateway --replace
|
||||
#
|
||||
# Source + issue tracker:
|
||||
# https://github.com/Molecule-AI/hermes-channel-molecule
|
||||
|
||||
# Need help?
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/external-agent-registration
|
||||
# Common errors:
|
||||
# • Gateway start failure — tail ~/.hermes/gateway.log. YAML
|
||||
# duplicate-key in config.yaml is the most common cause; the
|
||||
# gateway: block must appear exactly once.
|
||||
# • Plugin not discovered after install — pip show hermes-channel-molecule
|
||||
# to confirm install. Some hermes builds need ` + "`hermes plugin reload`" + `
|
||||
# before the new platform_plugins entry takes effect.
|
||||
`
|
||||
|
||||
// externalCodexTemplate — for operators whose external agent is a
|
||||
@@ -368,14 +423,23 @@ mkdir -p ~/.codex
|
||||
# (then open ~/.codex/config.toml in your editor and paste:)
|
||||
#
|
||||
# [mcp_servers.molecule]
|
||||
# command = "python3"
|
||||
# args = ["-m", "molecule_runtime.a2a_mcp_server"]
|
||||
# command = "molecule-mcp"
|
||||
# args = []
|
||||
# startup_timeout_sec = 30
|
||||
#
|
||||
# [mcp_servers.molecule.env]
|
||||
# WORKSPACE_ID = "{{WORKSPACE_ID}}"
|
||||
# PLATFORM_URL = "{{PLATFORM_URL}}"
|
||||
# MOLECULE_WORKSPACE_TOKEN = "<paste from create response>"
|
||||
#
|
||||
# Use the "molecule-mcp" console-script wrapper (NOT
|
||||
# "python3 -m molecule_runtime.a2a_mcp_server"). The wrapper is what
|
||||
# keeps the workspace ALIVE on the canvas: it POSTs /registry/register
|
||||
# at startup and runs a 20s heartbeat thread alongside the MCP stdio
|
||||
# loop. The bare a2a_mcp_server module exposes tools but does NOT
|
||||
# heartbeat — pointing codex at it leaves the canvas showing this
|
||||
# workspace as awaiting_agent (OFFLINE) within 60-90s even while
|
||||
# tools work.
|
||||
|
||||
# 3. Run the bridge daemon as a durable background process — this
|
||||
# is the INBOUND path. Long-polls the platform inbox and runs
|
||||
@@ -403,6 +467,18 @@ disown
|
||||
# available to the agent, and the bridge wakes a non-interactive
|
||||
# codex turn for any inbound canvas/peer message:
|
||||
codex
|
||||
|
||||
# Need help?
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/mcp-server-setup
|
||||
# Common errors:
|
||||
# • [mcp_servers.molecule] not loaded — codex must be ≥ 0.57.
|
||||
# Check with ` + "`codex --version`" + `; upgrade via npm install -g @openai/codex@latest.
|
||||
# • TOML parse error after re-running setup — TOML rejects duplicate
|
||||
# [mcp_servers.molecule] tables. Open ~/.codex/config.toml and
|
||||
# remove the old block before pasting the new one.
|
||||
# • Canvas messages don't wake codex — step 3 (codex-channel-molecule
|
||||
# bridge daemon) is required for inbound push. Check
|
||||
# pgrep -f codex-channel-molecule and tail ~/.codex-channel-molecule/daemon.log.
|
||||
`
|
||||
|
||||
// externalOpenClawTemplate — for operators whose external agent is an
|
||||
@@ -440,11 +516,20 @@ pip install molecule-ai-workspace-runtime
|
||||
|
||||
# 3. Wire the molecule MCP server. {{WORKSPACE_ID}} + {{PLATFORM_URL}}
|
||||
# are stamped server-side; paste the auth token before running.
|
||||
#
|
||||
# Use the "molecule-mcp" console-script wrapper (NOT
|
||||
# "python3 -m molecule_runtime.a2a_mcp_server"). The wrapper is what
|
||||
# keeps the workspace ALIVE on the canvas: it POSTs /registry/register
|
||||
# at startup and runs a 20s heartbeat thread alongside the MCP stdio
|
||||
# loop. The bare a2a_mcp_server module exposes tools but does NOT
|
||||
# heartbeat — pointing openclaw at it leaves the canvas showing this
|
||||
# workspace as awaiting_agent (OFFLINE) within 60-90s even while
|
||||
# tools work.
|
||||
WORKSPACE_TOKEN="<paste from create response>"
|
||||
openclaw mcp set molecule "$(cat <<EOF
|
||||
{
|
||||
"command": "python3",
|
||||
"args": ["-m", "molecule_runtime.a2a_mcp_server"],
|
||||
"command": "molecule-mcp",
|
||||
"args": [],
|
||||
"env": {
|
||||
"WORKSPACE_ID": "{{WORKSPACE_ID}}",
|
||||
"PLATFORM_URL": "{{PLATFORM_URL}}",
|
||||
@@ -464,4 +549,13 @@ disown
|
||||
|
||||
# 5. Run an agent turn — molecule tools are now available:
|
||||
openclaw agent --message "list my peers"
|
||||
|
||||
# Need help?
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/mcp-server-setup
|
||||
# Common errors:
|
||||
# • Gateway not starting — tail ~/.openclaw/gateway.log. The loopback
|
||||
# bind requires :18789 to be free; check with ` + "`lsof -iTCP:18789`" + `.
|
||||
# • ` + "`openclaw mcp set`" + ` rejected — the heredoc generates JSON;
|
||||
# verify with ` + "`jq < ~/.openclaw/mcp/molecule.json`" + ` and re-run
|
||||
# ` + "`openclaw mcp set`" + ` if the file is malformed.
|
||||
`
|
||||
|
||||
@@ -38,3 +38,40 @@ func TestExternalTemplates_NoMoleculeOrgIDPlaceholder(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestExternalMcpTemplates_UseMoleculeMcpWrapper pins the invariant
|
||||
// that operator-facing snippets configuring an MCP server entry point
|
||||
// use the ``molecule-mcp`` console-script wrapper (mcp_cli.main),
|
||||
// NOT the bare ``a2a_mcp_server`` module.
|
||||
//
|
||||
// Why: a2a_mcp_server exposes the MCP tools but does NOT call
|
||||
// /registry/register or run the 20s heartbeat thread. mcp_cli wraps
|
||||
// it with both, which is what flips the canvas presence indicator
|
||||
// from awaiting_agent (OFFLINE) to online and keeps it that way.
|
||||
// Originally tracked by molecule-core#2957 — operator hit the
|
||||
// silent-OFFLINE failure mode when the Codex tab pointed at the bare
|
||||
// module.
|
||||
//
|
||||
// The hermes-channel template intentionally uses the bare module: it
|
||||
// owns the platform plugin path and runs its own
|
||||
// register_platform/heartbeat code in-process, so wrapping with
|
||||
// mcp_cli would double-heartbeat. universalMcp / codex / openclaw
|
||||
// must all use the wrapper.
|
||||
func TestExternalMcpTemplates_UseMoleculeMcpWrapper(t *testing.T) {
|
||||
mustUseWrapper := map[string]string{
|
||||
"externalUniversalMcpTemplate": externalUniversalMcpTemplate,
|
||||
"externalCodexTemplate": externalCodexTemplate,
|
||||
"externalOpenClawTemplate": externalOpenClawTemplate,
|
||||
}
|
||||
for name, body := range mustUseWrapper {
|
||||
if !strings.Contains(body, "molecule-mcp") {
|
||||
t.Errorf("%s does not reference 'molecule-mcp' — operator-facing MCP snippets must point at the heartbeat-wrapping console script, not the bare a2a_mcp_server module (#2957)", name)
|
||||
}
|
||||
if strings.Contains(body, `"-m", "molecule_runtime.a2a_mcp_server"`) {
|
||||
t.Errorf("%s spawns 'python3 -m molecule_runtime.a2a_mcp_server' — that bypasses the standalone register/heartbeat wrapper, leaving the canvas showing the workspace OFFLINE (#2957). Use 'molecule-mcp' instead.", name)
|
||||
}
|
||||
if strings.Contains(body, `["-m", "molecule_runtime.a2a_mcp_server"]`) {
|
||||
t.Errorf("%s spawns 'python3 -m molecule_runtime.a2a_mcp_server' — that bypasses the standalone register/heartbeat wrapper, leaving the canvas showing the workspace OFFLINE (#2957). Use 'molecule-mcp' instead.", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,18 +11,21 @@ import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"errors"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// newMCPHandler is a test helper that constructs an MCPHandler backed by the
|
||||
// sqlmock DB set up by setupTestDB.
|
||||
// sqlmock DB set up by setupTestDB. Uses newTestBroadcaster so handlers
|
||||
// that BroadcastOnly (send_message_to_user, etc.) don't nil-panic on the
|
||||
// hub — events.NewBroadcaster(nil) crashes inside hub.Broadcast.
|
||||
func newMCPHandler(t *testing.T) (*MCPHandler, sqlmock.Sqlmock) {
|
||||
t.Helper()
|
||||
mock := setupTestDB(t)
|
||||
h := NewMCPHandler(db.DB, events.NewBroadcaster(nil))
|
||||
h := NewMCPHandler(db.DB, newTestBroadcaster())
|
||||
return h, mock
|
||||
}
|
||||
|
||||
@@ -628,6 +631,170 @@ func TestMCPHandler_SendMessageToUser_Blocked_WhenEnvNotSet(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestMCPHandler_SendMessageToUser_DBErrorLogsAndStill200s pins the
|
||||
// "best-effort persistence" contract: when the activity_log INSERT
|
||||
// fails (DB hiccup, constraint violation, transient connection drop),
|
||||
// the tool MUST still return success to the agent because the WS
|
||||
// broadcast already succeeded — the user has seen the message.
|
||||
//
|
||||
// This matches /notify (activity.go) behavior. Returning an error
|
||||
// here would cause the agent to retry and re-broadcast, double-
|
||||
// rendering the message in the user's live chat panel for every
|
||||
// retry until the DB recovers.
|
||||
func TestMCPHandler_SendMessageToUser_DBErrorLogsAndStill200s(t *testing.T) {
|
||||
t.Setenv("MOLECULE_MCP_ALLOW_SEND_MESSAGE", "true")
|
||||
h, mock := newMCPHandler(t)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-err").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("CEO Ryan PC"))
|
||||
|
||||
// INSERT fails — must NOT abort the tool response.
|
||||
mock.ExpectExec(`INSERT INTO activity_logs.*'a2a_receive'.*'notify'`).
|
||||
WillReturnError(errors.New("transient db error"))
|
||||
|
||||
w := mcpPost(t, h, "ws-err", map[string]interface{}{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 100,
|
||||
"method": "tools/call",
|
||||
"params": map[string]interface{}{
|
||||
"name": "send_message_to_user",
|
||||
"arguments": map[string]interface{}{
|
||||
"message": "should not be lost from the live chat",
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
var resp mcpResponse
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("response was not valid JSON-RPC: %v", err)
|
||||
}
|
||||
// Tool response is success — INSERT failure logged, broadcast
|
||||
// already succeeded.
|
||||
if resp.Error != nil {
|
||||
t.Errorf("tool response should be success on DB error (broadcast won), got JSON-RPC error: %+v", resp.Error)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("expected DB calls in order: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMCPHandler_SendMessageToUser_ResponseBodyShape pins the
|
||||
// response_body JSON shape stored in activity_logs. This shape MUST
|
||||
// match what the canvas hydrater (extractResponseText in
|
||||
// historyHydration.ts) reads — specifically `{"result": "<text>"}`.
|
||||
// Any drift in the JSON shape silently breaks chat history without
|
||||
// failing the INSERT.
|
||||
//
|
||||
// Caught the same drift class flagged in
|
||||
// feedback_assert_exact_not_substring.md: a substring match on
|
||||
// "result" would pass even if the field were renamed; we assert the
|
||||
// exact JSON shape.
|
||||
func TestMCPHandler_SendMessageToUser_ResponseBodyShape(t *testing.T) {
|
||||
t.Setenv("MOLECULE_MCP_ALLOW_SEND_MESSAGE", "true")
|
||||
h, mock := newMCPHandler(t)
|
||||
|
||||
const userMessage = "Hi there from the agent"
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-shape").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("CEO Ryan PC"))
|
||||
|
||||
// Capture the response_body argument and assert its exact shape.
|
||||
mock.ExpectExec(`INSERT INTO activity_logs.*'a2a_receive'.*'notify'`).
|
||||
WithArgs(
|
||||
"ws-shape",
|
||||
sqlmock.AnyArg(), // summary
|
||||
// The response_body MUST be JSON `{"result": "<message>"}`.
|
||||
// Any other shape (e.g., wrapping in a Task object) breaks
|
||||
// the canvas hydrater's `body.result` extractor.
|
||||
`{"result":"`+userMessage+`"}`,
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
w := mcpPost(t, h, "ws-shape", map[string]interface{}{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 101,
|
||||
"method": "tools/call",
|
||||
"params": map[string]interface{}{
|
||||
"name": "send_message_to_user",
|
||||
"arguments": map[string]interface{}{
|
||||
"message": userMessage,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if w.Code != 200 {
|
||||
t.Fatalf("expected 200, got %d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("response_body shape drift — would silently break canvas chat history: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMCPHandler_SendMessageToUser_PersistsToActivityLog pins the fix
|
||||
// for the reno-stars / CEO Ryan PC chat-history data-loss bug:
|
||||
// external claude-code agents using molecule-mcp's send_message_to_user
|
||||
// tool route through THIS handler (not the HTTP /notify endpoint),
|
||||
// and the handler used to broadcast WS only — visible live, gone on
|
||||
// reload because nothing wrote to activity_logs.
|
||||
//
|
||||
// Pins:
|
||||
// - INSERT happens on the success path (broadcast + DB write).
|
||||
// - INSERT shape mirrors the HTTP /notify handler exactly:
|
||||
// activity_type='a2a_receive', method='notify', request_body NULL,
|
||||
// response_body={"result": message}, status='ok'. The canvas
|
||||
// hydration query (`type=a2a_receive&source=canvas`) treats
|
||||
// both writers as the same shape — drift here means the bug
|
||||
// re-surfaces silently.
|
||||
func TestMCPHandler_SendMessageToUser_PersistsToActivityLog(t *testing.T) {
|
||||
t.Setenv("MOLECULE_MCP_ALLOW_SEND_MESSAGE", "true")
|
||||
h, mock := newMCPHandler(t)
|
||||
|
||||
// Workspace lookup — the handler verifies the workspace exists
|
||||
// before it does anything else. Returning a name lets the
|
||||
// broadcast payload populate; the test doesn't assert on the
|
||||
// broadcast (no observable WS in this fake), only on the DB.
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-msg").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("CEO Ryan PC"))
|
||||
|
||||
// The persistence INSERT — pin the exact shape so a future
|
||||
// refactor that switches columns or drops `method='notify'`
|
||||
// breaks the test loud, not silently. Match by regex on the
|
||||
// table + activity_type + method literals.
|
||||
mock.ExpectExec(`INSERT INTO activity_logs.*'a2a_receive'.*'notify'`).
|
||||
WithArgs(
|
||||
"ws-msg",
|
||||
sqlmock.AnyArg(), // summary "Agent message: ..."
|
||||
sqlmock.AnyArg(), // response_body JSON
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
w := mcpPost(t, h, "ws-msg", map[string]interface{}{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 99,
|
||||
"method": "tools/call",
|
||||
"params": map[string]interface{}{
|
||||
"name": "send_message_to_user",
|
||||
"arguments": map[string]interface{}{
|
||||
"message": "Hello, this should persist!",
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
var resp mcpResponse
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("response was not valid JSON-RPC: %v\nbody=%s", err, w.Body.String())
|
||||
}
|
||||
if resp.Error != nil {
|
||||
t.Errorf("unexpected JSON-RPC error: %+v", resp.Error)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("DB expectations not met (INSERT missing → reno-stars data-loss regression): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Parse error
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
@@ -330,20 +331,23 @@ func (h *MCPHandler) toolSendMessageToUser(ctx context.Context, workspaceID stri
|
||||
return "", fmt.Errorf("send_message_to_user is not enabled on this MCP bridge (set MOLECULE_MCP_ALLOW_SEND_MESSAGE=true)")
|
||||
}
|
||||
|
||||
var wsName string
|
||||
err := h.database.QueryRowContext(ctx,
|
||||
`SELECT name FROM workspaces WHERE id = $1 AND status != 'removed'`, workspaceID,
|
||||
).Scan(&wsName)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("workspace not found")
|
||||
// Single source of truth for chat-bearing agent → user messages —
|
||||
// see agent_message_writer.go for the contract. The pre-RFC-#2945
|
||||
// duplication of broadcast + INSERT logic between this handler and
|
||||
// activity.go:Notify is what produced the reno-stars data-loss
|
||||
// regression; both paths now route through the same writer.
|
||||
//
|
||||
// MCP send_message_to_user does not currently surface attachments
|
||||
// (the tool args don't accept them); pass nil. If a future tool
|
||||
// schema adds an attachments arg, build []AgentMessageAttachment
|
||||
// and pass through.
|
||||
writer := NewAgentMessageWriter(h.database, h.broadcaster)
|
||||
if err := writer.Send(ctx, workspaceID, message, nil); err != nil {
|
||||
if errors.Is(err, ErrWorkspaceNotFound) {
|
||||
return "", fmt.Errorf("workspace not found")
|
||||
}
|
||||
return "", err
|
||||
}
|
||||
|
||||
h.broadcaster.BroadcastOnly(workspaceID, "AGENT_MESSAGE", map[string]interface{}{
|
||||
"message": message,
|
||||
"workspace_id": workspaceID,
|
||||
"name": wsName,
|
||||
})
|
||||
|
||||
return "Message sent.", nil
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,416 @@
|
||||
package handlers
|
||||
|
||||
// memories_v2.go — HTTP endpoints that expose Memory v2 plugin state to
|
||||
// the canvas Memory tab. Reads-only; writes still go through the MCP
|
||||
// path (see mcp_tools_memory_v2.go) where SAFE-T1201 redaction +
|
||||
// org-write audit happen at a single funnel.
|
||||
//
|
||||
// Why a separate v2 endpoint set rather than retrofitting memories.go:
|
||||
//
|
||||
// - memories.go reads `agent_memories` (legacy v1 table). After the
|
||||
// 2026-05-05 cutover, agent commits go to the plugin's
|
||||
// memory_records — agent_memories is frozen. The canvas Memory
|
||||
// tab reading memories.go shows STALE data.
|
||||
// - The plugin is loopback-only on each tenant (127.0.0.1:9100), so
|
||||
// the canvas (browser) cannot call it directly. workspace-server
|
||||
// proxies through these endpoints.
|
||||
// - v2 has different shape (namespace tree, kind/source/pin/TTL,
|
||||
// score) — overloading memories.go would break v1 consumers
|
||||
// (admin export, the back-compat MCP shim).
|
||||
//
|
||||
// All endpoints sit under the same wsAuth group memories.go uses,
|
||||
// so the existing per-tenant token gates them automatically.
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/client"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/namespace"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// MemoriesV2Handler bundles the plugin client + namespace resolver
|
||||
// behind a slim HTTP surface. Construction matches the rest of the
|
||||
// handlers package: NewMemoriesV2Handler followed by WithMemoryV2 (or
|
||||
// the test-only withMemoryV2APIs) at boot.
|
||||
type MemoriesV2Handler struct {
|
||||
plugin memoryPluginAPI
|
||||
resolver namespaceResolverAPI
|
||||
}
|
||||
|
||||
// NewMemoriesV2Handler constructs an unwired handler. Every method
|
||||
// returns 503 until WithMemoryV2 is called — keeps a partial deploy
|
||||
// (MEMORY_PLUGIN_URL absent) from crashing the canvas with 500s.
|
||||
func NewMemoriesV2Handler() *MemoriesV2Handler {
|
||||
return &MemoriesV2Handler{}
|
||||
}
|
||||
|
||||
// WithMemoryV2 attaches the live plugin client + resolver. Returns
|
||||
// the receiver for fluent boot-time wiring, mirroring MCPHandler.
|
||||
func (h *MemoriesV2Handler) WithMemoryV2(plugin *client.Client, resolver *namespace.Resolver) *MemoriesV2Handler {
|
||||
h.plugin = plugin
|
||||
h.resolver = resolver
|
||||
return h
|
||||
}
|
||||
|
||||
// withMemoryV2APIs is the test-only injection path: takes the
|
||||
// interfaces directly so unit tests don't have to construct a real
|
||||
// *client.Client / namespace.Resolver. Keep symmetric with
|
||||
// MCPHandler.withMemoryV2APIs so handler tests can re-use the same
|
||||
// stubs.
|
||||
func (h *MemoriesV2Handler) withMemoryV2APIs(plugin memoryPluginAPI, resolver namespaceResolverAPI) *MemoriesV2Handler {
|
||||
h.plugin = plugin
|
||||
h.resolver = resolver
|
||||
return h
|
||||
}
|
||||
|
||||
// available reports whether the v2 deps are wired. Each route checks
|
||||
// this and returns 503 + a clear hint when the plugin isn't
|
||||
// configured, matching the MCP-side error.
|
||||
func (h *MemoriesV2Handler) available() error {
|
||||
if h == nil || h.plugin == nil || h.resolver == nil {
|
||||
return errors.New("memory plugin is not configured (set MEMORY_PLUGIN_URL)")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// GET /workspaces/:id/v2/namespaces
|
||||
//
|
||||
// Returns the namespace tree the canvas uses to drive the Memory tab's
|
||||
// namespace dropdown. Two arrays:
|
||||
//
|
||||
// - readable[]: every namespace this workspace can READ from. Drives
|
||||
// the "show me memories from X" filter dropdown.
|
||||
// - writable[]: subset of readable that this workspace can WRITE to.
|
||||
// Used for future canvas-side commit (not in this PR but the
|
||||
// contract is symmetric so the dropdown can disable read-only
|
||||
// entries when wiring up commit).
|
||||
//
|
||||
// Each entry carries name + kind + a friendly label so the canvas
|
||||
// doesn't have to parse `workspace:abc-123` itself. Kind ranks the
|
||||
// dropdown grouping (workspace → team → org → custom).
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// NamespaceView is the UI-friendly DTO returned by GET v2/namespaces.
|
||||
// Internal namespace.Namespace has fields the canvas doesn't need
|
||||
// (resolver-internal flags, raw metadata blobs); this strips it down.
|
||||
type NamespaceView struct {
|
||||
Name string `json:"name"`
|
||||
Kind contract.NamespaceKind `json:"kind"`
|
||||
// Label is a stable display string the canvas can render directly.
|
||||
// For workspace:<id> it's "Workspace (<short-id>)"; for team:<id>
|
||||
// it's "Team (<short-id>)"; org/custom carry the raw suffix.
|
||||
Label string `json:"label"`
|
||||
}
|
||||
|
||||
// NamespacesResponse is the body of GET v2/namespaces.
|
||||
type NamespacesResponse struct {
|
||||
Readable []NamespaceView `json:"readable"`
|
||||
Writable []NamespaceView `json:"writable"`
|
||||
}
|
||||
|
||||
// Namespaces handles GET /workspaces/:id/v2/namespaces.
|
||||
func (h *MemoriesV2Handler) Namespaces(c *gin.Context) {
|
||||
if err := h.available(); err != nil {
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
workspaceID := c.Param("id")
|
||||
ctx := c.Request.Context()
|
||||
|
||||
readable, err := h.resolver.ReadableNamespaces(ctx, workspaceID)
|
||||
if err != nil {
|
||||
log.Printf("v2/namespaces readable error workspace=%s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to resolve readable namespaces"})
|
||||
return
|
||||
}
|
||||
writable, err := h.resolver.WritableNamespaces(ctx, workspaceID)
|
||||
if err != nil {
|
||||
log.Printf("v2/namespaces writable error workspace=%s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to resolve writable namespaces"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, NamespacesResponse{
|
||||
Readable: namespacesToViews(readable),
|
||||
Writable: namespacesToViews(writable),
|
||||
})
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// GET /workspaces/:id/v2/memories
|
||||
//
|
||||
// Search the plugin for memories visible to this workspace.
|
||||
//
|
||||
// Query params (all optional):
|
||||
// - namespace: a single readable namespace to scope to. Omitted ⇒ all
|
||||
// readable namespaces (dropdown's "All" mode).
|
||||
// - q: full-text query string. Empty ⇒ recency-ordered listing.
|
||||
// - kind: one of fact|summary|checkpoint. Empty ⇒ all kinds.
|
||||
// - limit: max rows. Defaults to 50, clamped to 100. Matches the
|
||||
// v1 endpoint's clamp shape (memories.go:memoryRecallMaxLimit).
|
||||
//
|
||||
// Server-side ACL invariant: the request is ALWAYS intersected with
|
||||
// the resolver's readable set on the server. A canvas-supplied
|
||||
// `namespace=foo:bar` that this workspace can't read returns an empty
|
||||
// list, NOT 403 — the canvas dropdown is built from /v2/namespaces
|
||||
// so a forbidden value is a stale-cache bug, not malice. Existence
|
||||
// non-inference: empty result is indistinguishable from "you can't
|
||||
// read this namespace" — same as the wsAuth-protected v1 endpoints.
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
const memoriesV2DefaultLimit = 50
|
||||
const memoriesV2MaxLimit = 100
|
||||
|
||||
// Search handles GET /workspaces/:id/v2/memories.
|
||||
func (h *MemoriesV2Handler) Search(c *gin.Context) {
|
||||
if err := h.available(); err != nil {
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
workspaceID := c.Param("id")
|
||||
ctx := c.Request.Context()
|
||||
|
||||
requestedNS := c.Query("namespace")
|
||||
query := c.Query("q")
|
||||
kindStr := c.Query("kind")
|
||||
limit := parseLimit(c.Query("limit"))
|
||||
|
||||
// Resolve the readable set, then intersect the request.
|
||||
// IntersectReadable handles both the empty-request case (return
|
||||
// all readable) and the explicit-namespace case (return [ns] iff
|
||||
// readable, else []).
|
||||
var requested []string
|
||||
if requestedNS != "" {
|
||||
requested = []string{requestedNS}
|
||||
}
|
||||
scopedNamespaces, err := h.resolver.IntersectReadable(ctx, workspaceID, requested)
|
||||
if err != nil {
|
||||
log.Printf("v2/memories intersect error workspace=%s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to resolve namespaces"})
|
||||
return
|
||||
}
|
||||
// Empty after intersection — caller asked for a namespace they
|
||||
// can't read, OR they have no readable namespaces at all. Return
|
||||
// [] (not 404) so the canvas can render its empty-state without
|
||||
// special-casing.
|
||||
if len(scopedNamespaces) == 0 {
|
||||
c.JSON(http.StatusOK, MemoriesResponse{Memories: []MemoryView{}})
|
||||
return
|
||||
}
|
||||
|
||||
req := contract.SearchRequest{
|
||||
Namespaces: scopedNamespaces,
|
||||
Query: query,
|
||||
Limit: limit,
|
||||
}
|
||||
if kindStr != "" {
|
||||
req.Kinds = []contract.MemoryKind{contract.MemoryKind(kindStr)}
|
||||
}
|
||||
|
||||
resp, err := h.plugin.Search(ctx, req)
|
||||
if err != nil {
|
||||
log.Printf("v2/memories plugin error workspace=%s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusBadGateway, gin.H{"error": "memory plugin search failed"})
|
||||
return
|
||||
}
|
||||
|
||||
out := MemoriesResponse{Memories: make([]MemoryView, 0, len(resp.Memories))}
|
||||
for _, m := range resp.Memories {
|
||||
out.Memories = append(out.Memories, memoryToView(m))
|
||||
}
|
||||
c.JSON(http.StatusOK, out)
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// DELETE /workspaces/:id/v2/memories/:memoryId
|
||||
//
|
||||
// Forget a memory. The plugin enforces its own ownership model — we
|
||||
// pass `requested_by_namespace = workspace:<id>` so the audit trail
|
||||
// records who initiated the forget; the plugin's ACL gate decides
|
||||
// whether the deletion is allowed.
|
||||
//
|
||||
// 404 (not 403) on a missing or non-owned memory: existence-non-
|
||||
// inferring response, matches the v1 DELETE in memories.go.
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// Forget handles DELETE /workspaces/:id/v2/memories/:memoryId.
|
||||
func (h *MemoriesV2Handler) Forget(c *gin.Context) {
|
||||
if err := h.available(); err != nil {
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
workspaceID := c.Param("id")
|
||||
memoryID := c.Param("memoryId")
|
||||
ctx := c.Request.Context()
|
||||
|
||||
if memoryID == "" {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "memoryId is required"})
|
||||
return
|
||||
}
|
||||
|
||||
body := contract.ForgetRequest{
|
||||
RequestedByNamespace: "workspace:" + workspaceID,
|
||||
}
|
||||
if err := h.plugin.ForgetMemory(ctx, memoryID, body); err != nil {
|
||||
// Map plugin not_found → 404. Anything else is upstream error.
|
||||
var ce *contract.Error
|
||||
if errors.As(err, &ce) && ce.Code == contract.ErrorCodeNotFound {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "memory not found"})
|
||||
return
|
||||
}
|
||||
log.Printf("v2/memories forget error workspace=%s memory=%s: %v", workspaceID, memoryID, err)
|
||||
c.JSON(http.StatusBadGateway, gin.H{"error": "memory plugin delete failed"})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"status": "deleted"})
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// View shaping helpers
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// MemoryView is the canvas-facing shape of a v2 memory record. The raw
|
||||
// contract.Memory carries internal fields we don't expose (raw
|
||||
// `propagation` blob); MemoryView strips it to what the Memory tab
|
||||
// renders.
|
||||
type MemoryView struct {
|
||||
ID string `json:"id"`
|
||||
Namespace string `json:"namespace"`
|
||||
Content string `json:"content"`
|
||||
Kind contract.MemoryKind `json:"kind"`
|
||||
Source contract.MemorySource `json:"source"`
|
||||
Pin bool `json:"pin"`
|
||||
ExpiresAt *time.Time `json:"expires_at,omitempty"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
// Score is the plugin's similarity score (1.0 = exact); only
|
||||
// populated when ?q= is set and the plugin supports embedding.
|
||||
Score *float64 `json:"score,omitempty"`
|
||||
// SourceWorkspaceID is parsed out of `propagation.source_workspace_id`
|
||||
// when present (cross-workspace propagation) — lets the canvas
|
||||
// render a "from <peer>" badge so users can tell their own writes
|
||||
// apart from team-shared memory.
|
||||
SourceWorkspaceID string `json:"source_workspace_id,omitempty"`
|
||||
}
|
||||
|
||||
// MemoriesResponse is the body of GET v2/memories.
|
||||
type MemoriesResponse struct {
|
||||
Memories []MemoryView `json:"memories"`
|
||||
}
|
||||
|
||||
func memoryToView(m contract.Memory) MemoryView {
|
||||
v := MemoryView{
|
||||
ID: m.ID,
|
||||
Namespace: m.Namespace,
|
||||
Content: m.Content,
|
||||
Kind: m.Kind,
|
||||
Source: m.Source,
|
||||
Pin: m.Pin,
|
||||
ExpiresAt: m.ExpiresAt,
|
||||
CreatedAt: m.CreatedAt,
|
||||
Score: m.Score,
|
||||
}
|
||||
if m.Propagation != nil {
|
||||
// `source_workspace_id` is a propagation contract field
|
||||
// (RFC #2728 §5). Plugin emits it on writes that originated
|
||||
// from a different workspace. Best-effort string extraction —
|
||||
// don't fail rendering if shape drifts.
|
||||
if raw, ok := m.Propagation["source_workspace_id"]; ok {
|
||||
if s, ok := raw.(string); ok && s != "" {
|
||||
v.SourceWorkspaceID = s
|
||||
}
|
||||
}
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// namespacesToViews converts resolver namespaces into UI-friendly
|
||||
// views. Stable sort: workspace → team → org → custom, then by name.
|
||||
func namespacesToViews(in []namespace.Namespace) []NamespaceView {
|
||||
views := make([]NamespaceView, 0, len(in))
|
||||
for _, n := range in {
|
||||
views = append(views, NamespaceView{
|
||||
Name: n.Name,
|
||||
Kind: n.Kind,
|
||||
Label: namespaceLabel(n.Name, n.Kind),
|
||||
})
|
||||
}
|
||||
return views
|
||||
}
|
||||
|
||||
// namespaceLabel renders a human-friendly label for a namespace. The
|
||||
// canvas displays this directly; we keep the formatting server-side
|
||||
// so the shape stays consistent across UIs (canvas, future TUI, etc.).
|
||||
//
|
||||
// Format:
|
||||
// workspace:abc-123 → "Workspace (abc-123)" (UUID short-prefixed)
|
||||
// team:t-1 → "Team (t-1)"
|
||||
// org:acme → "Org (acme)"
|
||||
// custom:foo → "foo" (operator-defined; raw)
|
||||
func namespaceLabel(name string, kind contract.NamespaceKind) string {
|
||||
suffix := ""
|
||||
if i := indexOfColon(name); i >= 0 && i+1 < len(name) {
|
||||
suffix = name[i+1:]
|
||||
}
|
||||
switch kind {
|
||||
case contract.NamespaceKindWorkspace:
|
||||
return "Workspace (" + shortID(suffix) + ")"
|
||||
case contract.NamespaceKindTeam:
|
||||
return "Team (" + shortID(suffix) + ")"
|
||||
case contract.NamespaceKindOrg:
|
||||
return "Org (" + suffix + ")"
|
||||
case contract.NamespaceKindCustom:
|
||||
// Custom namespaces are operator-defined; surface the raw
|
||||
// suffix so they can label them however they want.
|
||||
if suffix == "" {
|
||||
return name
|
||||
}
|
||||
return suffix
|
||||
default:
|
||||
return name
|
||||
}
|
||||
}
|
||||
|
||||
// shortID truncates a UUID-like string to the first 8 chars so the
|
||||
// dropdown stays readable. Keeps the full id available via the
|
||||
// `name` field for click-to-copy / debugging.
|
||||
func shortID(s string) string {
|
||||
if len(s) <= 8 {
|
||||
return s
|
||||
}
|
||||
return s[:8]
|
||||
}
|
||||
|
||||
// indexOfColon is strings.IndexByte without the import, kept inline so
|
||||
// the helper stays trivially auditable next to namespaceLabel.
|
||||
func indexOfColon(s string) int {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] == ':' {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// parseLimit validates the ?limit= query value. Defaults +
|
||||
// clamps mirror memoriesV2DefaultLimit / memoriesV2MaxLimit.
|
||||
func parseLimit(raw string) int {
|
||||
if raw == "" {
|
||||
return memoriesV2DefaultLimit
|
||||
}
|
||||
n, err := strconv.Atoi(raw)
|
||||
if err != nil || n <= 0 {
|
||||
return memoriesV2DefaultLimit
|
||||
}
|
||||
if n > memoriesV2MaxLimit {
|
||||
return memoriesV2MaxLimit
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
@@ -0,0 +1,669 @@
|
||||
package handlers
|
||||
|
||||
// memories_v2_test.go — comprehensive coverage for the Memory v2
|
||||
// canvas-facing HTTP surface. Pinned shape:
|
||||
//
|
||||
// - 503 path when plugin unwired (every route)
|
||||
// - GET /v2/namespaces success + readable/writable propagation
|
||||
// - GET /v2/namespaces error path (resolver failure on either call)
|
||||
// - GET /v2/memories: empty intersection, namespace passthrough,
|
||||
// query+kind+limit propagation, plugin error mapping
|
||||
// - DELETE /v2/memories/:id: success, plugin not_found→404, other
|
||||
// plugin errors→502, missing memoryId→400
|
||||
// - View shaping: namespaceLabel for all four kinds + truncation,
|
||||
// memoryToView with/without propagation source, parseLimit edge
|
||||
// cases (default, negative, zero, over-cap, non-numeric)
|
||||
//
|
||||
// Tests use the same `memoryPluginAPI` / `namespaceResolverAPI` fakes
|
||||
// the MCP v2 tests use so we don't spin up a real plugin server.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/namespace"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Fakes
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
type fakePlugin struct {
|
||||
searchResp *contract.SearchResponse
|
||||
searchErr error
|
||||
searchReq contract.SearchRequest // captured for assertion
|
||||
forgetErr error
|
||||
forgetID string
|
||||
forgetReq contract.ForgetRequest
|
||||
}
|
||||
|
||||
func (f *fakePlugin) CommitMemory(ctx context.Context, ns string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error) {
|
||||
return nil, errors.New("not implemented in fake")
|
||||
}
|
||||
func (f *fakePlugin) Search(ctx context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
f.searchReq = body
|
||||
if f.searchErr != nil {
|
||||
return nil, f.searchErr
|
||||
}
|
||||
return f.searchResp, nil
|
||||
}
|
||||
func (f *fakePlugin) ForgetMemory(ctx context.Context, id string, body contract.ForgetRequest) error {
|
||||
f.forgetID = id
|
||||
f.forgetReq = body
|
||||
return f.forgetErr
|
||||
}
|
||||
|
||||
type fakeNSResolver struct {
|
||||
readable []namespace.Namespace
|
||||
readableErr error
|
||||
writable []namespace.Namespace
|
||||
writableErr error
|
||||
intersect []string
|
||||
intersectErr error
|
||||
intersectIn []string // captured
|
||||
}
|
||||
|
||||
func (f *fakeNSResolver) ReadableNamespaces(ctx context.Context, ws string) ([]namespace.Namespace, error) {
|
||||
return f.readable, f.readableErr
|
||||
}
|
||||
func (f *fakeNSResolver) WritableNamespaces(ctx context.Context, ws string) ([]namespace.Namespace, error) {
|
||||
return f.writable, f.writableErr
|
||||
}
|
||||
func (f *fakeNSResolver) CanWrite(ctx context.Context, ws, ns string) (bool, error) {
|
||||
return true, nil
|
||||
}
|
||||
func (f *fakeNSResolver) IntersectReadable(ctx context.Context, ws string, requested []string) ([]string, error) {
|
||||
f.intersectIn = requested
|
||||
return f.intersect, f.intersectErr
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Test helpers
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
func init() {
|
||||
gin.SetMode(gin.TestMode)
|
||||
}
|
||||
|
||||
// newWiredHandler returns a handler with both the fake plugin + fake
|
||||
// resolver attached. Tests that need the unwired (503) path use
|
||||
// NewMemoriesV2Handler() directly.
|
||||
func newWiredHandler(p *fakePlugin, r *fakeNSResolver) *MemoriesV2Handler {
|
||||
return NewMemoriesV2Handler().withMemoryV2APIs(p, r)
|
||||
}
|
||||
|
||||
func doRequest(t *testing.T, h *MemoriesV2Handler, method, path string, params gin.Params) *httptest.ResponseRecorder {
|
||||
t.Helper()
|
||||
rec := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(rec)
|
||||
c.Params = params
|
||||
req := httptest.NewRequest(method, path, nil)
|
||||
c.Request = req
|
||||
switch {
|
||||
case method == http.MethodGet && strings.HasSuffix(path, "/v2/namespaces"):
|
||||
h.Namespaces(c)
|
||||
case method == http.MethodGet && strings.Contains(path, "/v2/memories"):
|
||||
h.Search(c)
|
||||
case method == http.MethodDelete:
|
||||
h.Forget(c)
|
||||
default:
|
||||
t.Fatalf("doRequest: don't know how to dispatch %s %s", method, path)
|
||||
}
|
||||
return rec
|
||||
}
|
||||
|
||||
func mustJSON(t *testing.T, body []byte, out interface{}) {
|
||||
t.Helper()
|
||||
if err := json.Unmarshal(body, out); err != nil {
|
||||
t.Fatalf("json decode: %v\nbody=%s", err, string(body))
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// 503 — plugin unwired
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestMemoriesV2_PluginUnwired_All503(t *testing.T) {
|
||||
h := NewMemoriesV2Handler() // no WithMemoryV2 / withMemoryV2APIs
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
method string
|
||||
path string
|
||||
params gin.Params
|
||||
}{
|
||||
{"namespaces", http.MethodGet, "/workspaces/ws-a/v2/namespaces", gin.Params{{Key: "id", Value: "ws-a"}}},
|
||||
{"search", http.MethodGet, "/workspaces/ws-a/v2/memories", gin.Params{{Key: "id", Value: "ws-a"}}},
|
||||
{"forget", http.MethodDelete, "/workspaces/ws-a/v2/memories/m-1", gin.Params{{Key: "id", Value: "ws-a"}, {Key: "memoryId", Value: "m-1"}}},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
rec := doRequest(t, h, tc.method, tc.path, tc.params)
|
||||
if rec.Code != http.StatusServiceUnavailable {
|
||||
t.Errorf("expected 503, got %d", rec.Code)
|
||||
}
|
||||
var body map[string]string
|
||||
mustJSON(t, rec.Body.Bytes(), &body)
|
||||
if !strings.Contains(body["error"], "MEMORY_PLUGIN_URL") {
|
||||
t.Errorf("503 body missing operator hint, got: %q", body["error"])
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// GET /v2/namespaces
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestMemoriesV2_Namespaces_Success(t *testing.T) {
|
||||
resolver := &fakeNSResolver{
|
||||
readable: []namespace.Namespace{
|
||||
{Name: "workspace:abc-1234-5678", Kind: contract.NamespaceKindWorkspace},
|
||||
{Name: "team:t-99", Kind: contract.NamespaceKindTeam},
|
||||
{Name: "org:acme", Kind: contract.NamespaceKindOrg},
|
||||
{Name: "custom:special", Kind: contract.NamespaceKindCustom},
|
||||
},
|
||||
writable: []namespace.Namespace{
|
||||
{Name: "workspace:abc-1234-5678", Kind: contract.NamespaceKindWorkspace},
|
||||
},
|
||||
}
|
||||
h := newWiredHandler(&fakePlugin{}, resolver)
|
||||
|
||||
rec := doRequest(t, h, http.MethodGet, "/workspaces/ws-a/v2/namespaces",
|
||||
gin.Params{{Key: "id", Value: "ws-a"}})
|
||||
if rec.Code != 200 {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
var body NamespacesResponse
|
||||
mustJSON(t, rec.Body.Bytes(), &body)
|
||||
|
||||
if len(body.Readable) != 4 {
|
||||
t.Errorf("expected 4 readable, got %d", len(body.Readable))
|
||||
}
|
||||
if len(body.Writable) != 1 {
|
||||
t.Errorf("expected 1 writable, got %d", len(body.Writable))
|
||||
}
|
||||
|
||||
// Label shaping pinned exactly — drift would silently break the
|
||||
// dropdown rendering.
|
||||
wantLabels := map[string]string{
|
||||
"workspace:abc-1234-5678": "Workspace (abc-1234)",
|
||||
"team:t-99": "Team (t-99)",
|
||||
"org:acme": "Org (acme)",
|
||||
"custom:special": "special",
|
||||
}
|
||||
for _, v := range body.Readable {
|
||||
want, ok := wantLabels[v.Name]
|
||||
if !ok {
|
||||
t.Errorf("unexpected namespace name %q", v.Name)
|
||||
continue
|
||||
}
|
||||
if v.Label != want {
|
||||
t.Errorf("namespace %q: want label %q, got %q", v.Name, want, v.Label)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoriesV2_Namespaces_ReadableError(t *testing.T) {
|
||||
resolver := &fakeNSResolver{readableErr: errors.New("boom")}
|
||||
h := newWiredHandler(&fakePlugin{}, resolver)
|
||||
|
||||
rec := doRequest(t, h, http.MethodGet, "/workspaces/ws-a/v2/namespaces",
|
||||
gin.Params{{Key: "id", Value: "ws-a"}})
|
||||
if rec.Code != http.StatusInternalServerError {
|
||||
t.Errorf("expected 500, got %d", rec.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoriesV2_Namespaces_WritableError(t *testing.T) {
|
||||
resolver := &fakeNSResolver{
|
||||
readable: []namespace.Namespace{},
|
||||
writableErr: errors.New("boom"),
|
||||
}
|
||||
h := newWiredHandler(&fakePlugin{}, resolver)
|
||||
|
||||
rec := doRequest(t, h, http.MethodGet, "/workspaces/ws-a/v2/namespaces",
|
||||
gin.Params{{Key: "id", Value: "ws-a"}})
|
||||
if rec.Code != http.StatusInternalServerError {
|
||||
t.Errorf("expected 500, got %d", rec.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// GET /v2/memories — search path
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestMemoriesV2_Search_NoReadableNamespaces_EmptyResult(t *testing.T) {
|
||||
// Empty intersection (e.g. workspace just provisioned, plugin
|
||||
// hasn't created namespaces yet, OR caller asked for ns they
|
||||
// can't read). Expected: 200 with empty memories array, NOT 404.
|
||||
resolver := &fakeNSResolver{intersect: []string{}}
|
||||
plugin := &fakePlugin{searchResp: &contract.SearchResponse{Memories: []contract.Memory{}}}
|
||||
h := newWiredHandler(plugin, resolver)
|
||||
|
||||
rec := doRequest(t, h, http.MethodGet, "/workspaces/ws-a/v2/memories",
|
||||
gin.Params{{Key: "id", Value: "ws-a"}})
|
||||
if rec.Code != 200 {
|
||||
t.Errorf("expected 200, got %d", rec.Code)
|
||||
}
|
||||
var body MemoriesResponse
|
||||
mustJSON(t, rec.Body.Bytes(), &body)
|
||||
if body.Memories == nil {
|
||||
t.Error("Memories should be empty array, not nil — JSON would render null")
|
||||
}
|
||||
if len(body.Memories) != 0 {
|
||||
t.Errorf("expected empty memories, got %d", len(body.Memories))
|
||||
}
|
||||
// Plugin must NOT be called when intersection is empty.
|
||||
if plugin.searchReq.Namespaces != nil {
|
||||
t.Error("plugin Search should not be called when intersection is empty")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoriesV2_Search_FullPath_NamespaceQueryKindLimit(t *testing.T) {
|
||||
expiresAt := time.Now().Add(24 * time.Hour)
|
||||
resolver := &fakeNSResolver{intersect: []string{"workspace:ws-a"}}
|
||||
score := 0.87
|
||||
plugin := &fakePlugin{
|
||||
searchResp: &contract.SearchResponse{
|
||||
Memories: []contract.Memory{
|
||||
{
|
||||
ID: "m-1",
|
||||
Namespace: "workspace:ws-a",
|
||||
Content: "fact one",
|
||||
Kind: contract.MemoryKindFact,
|
||||
Source: contract.MemorySourceAgent,
|
||||
Pin: true,
|
||||
ExpiresAt: &expiresAt,
|
||||
CreatedAt: time.Now(),
|
||||
Score: &score,
|
||||
Propagation: map[string]interface{}{
|
||||
"source_workspace_id": "ws-peer-42",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
h := newWiredHandler(plugin, resolver)
|
||||
|
||||
rec := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(rec)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-a"}}
|
||||
c.Request = httptest.NewRequest(http.MethodGet,
|
||||
"/workspaces/ws-a/v2/memories?namespace=workspace:ws-a&q=hello&kind=fact&limit=10", nil)
|
||||
h.Search(c)
|
||||
|
||||
if rec.Code != 200 {
|
||||
t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
// Resolver received the requested namespace as a single-element list
|
||||
if len(resolver.intersectIn) != 1 || resolver.intersectIn[0] != "workspace:ws-a" {
|
||||
t.Errorf("resolver.IntersectReadable received %v, want [workspace:ws-a]", resolver.intersectIn)
|
||||
}
|
||||
// Plugin received query + kind + limit propagated through
|
||||
if plugin.searchReq.Query != "hello" {
|
||||
t.Errorf("plugin.Query=%q, want hello", plugin.searchReq.Query)
|
||||
}
|
||||
if len(plugin.searchReq.Kinds) != 1 || plugin.searchReq.Kinds[0] != contract.MemoryKindFact {
|
||||
t.Errorf("plugin.Kinds=%v, want [fact]", plugin.searchReq.Kinds)
|
||||
}
|
||||
if plugin.searchReq.Limit != 10 {
|
||||
t.Errorf("plugin.Limit=%d, want 10", plugin.searchReq.Limit)
|
||||
}
|
||||
// Response shape — pin/expires_at/score/source_workspace_id all
|
||||
// surfaced into MemoryView so the canvas doesn't have to dig
|
||||
// through propagation map.
|
||||
var body MemoriesResponse
|
||||
mustJSON(t, rec.Body.Bytes(), &body)
|
||||
if len(body.Memories) != 1 {
|
||||
t.Fatalf("expected 1 memory, got %d", len(body.Memories))
|
||||
}
|
||||
m := body.Memories[0]
|
||||
if !m.Pin {
|
||||
t.Error("Pin not propagated")
|
||||
}
|
||||
if m.ExpiresAt == nil {
|
||||
t.Error("ExpiresAt not propagated")
|
||||
}
|
||||
if m.Score == nil || *m.Score != 0.87 {
|
||||
t.Errorf("Score=%v, want 0.87", m.Score)
|
||||
}
|
||||
if m.SourceWorkspaceID != "ws-peer-42" {
|
||||
t.Errorf("SourceWorkspaceID=%q, want ws-peer-42", m.SourceWorkspaceID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoriesV2_Search_NoNamespaceQuery_AllReadable(t *testing.T) {
|
||||
// No ?namespace= → resolver.IntersectReadable receives nil (empty
|
||||
// requested) and returns ALL readable. Plugin gets full set.
|
||||
resolver := &fakeNSResolver{intersect: []string{"workspace:ws-a", "team:t-1"}}
|
||||
plugin := &fakePlugin{searchResp: &contract.SearchResponse{}}
|
||||
h := newWiredHandler(plugin, resolver)
|
||||
|
||||
rec := doRequest(t, h, http.MethodGet, "/workspaces/ws-a/v2/memories",
|
||||
gin.Params{{Key: "id", Value: "ws-a"}})
|
||||
if rec.Code != 200 {
|
||||
t.Errorf("expected 200, got %d", rec.Code)
|
||||
}
|
||||
if resolver.intersectIn != nil {
|
||||
t.Errorf("requested should be nil for unscoped query, got %v", resolver.intersectIn)
|
||||
}
|
||||
if len(plugin.searchReq.Namespaces) != 2 {
|
||||
t.Errorf("plugin.Namespaces=%v, want both readable", plugin.searchReq.Namespaces)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoriesV2_Search_IntersectError(t *testing.T) {
|
||||
resolver := &fakeNSResolver{intersectErr: errors.New("db down")}
|
||||
h := newWiredHandler(&fakePlugin{}, resolver)
|
||||
|
||||
rec := doRequest(t, h, http.MethodGet, "/workspaces/ws-a/v2/memories",
|
||||
gin.Params{{Key: "id", Value: "ws-a"}})
|
||||
if rec.Code != http.StatusInternalServerError {
|
||||
t.Errorf("expected 500, got %d", rec.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoriesV2_Search_PluginError(t *testing.T) {
|
||||
resolver := &fakeNSResolver{intersect: []string{"workspace:ws-a"}}
|
||||
plugin := &fakePlugin{searchErr: errors.New("plugin down")}
|
||||
h := newWiredHandler(plugin, resolver)
|
||||
|
||||
rec := doRequest(t, h, http.MethodGet, "/workspaces/ws-a/v2/memories",
|
||||
gin.Params{{Key: "id", Value: "ws-a"}})
|
||||
if rec.Code != http.StatusBadGateway {
|
||||
t.Errorf("expected 502 (plugin error), got %d", rec.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoriesV2_Search_PropagationMissing_NoSourceWorkspaceID(t *testing.T) {
|
||||
resolver := &fakeNSResolver{intersect: []string{"workspace:ws-a"}}
|
||||
plugin := &fakePlugin{
|
||||
searchResp: &contract.SearchResponse{
|
||||
Memories: []contract.Memory{
|
||||
{ID: "m-1", Namespace: "workspace:ws-a", Content: "no propagation"},
|
||||
},
|
||||
},
|
||||
}
|
||||
h := newWiredHandler(plugin, resolver)
|
||||
|
||||
rec := doRequest(t, h, http.MethodGet, "/workspaces/ws-a/v2/memories",
|
||||
gin.Params{{Key: "id", Value: "ws-a"}})
|
||||
var body MemoriesResponse
|
||||
mustJSON(t, rec.Body.Bytes(), &body)
|
||||
if len(body.Memories) != 1 || body.Memories[0].SourceWorkspaceID != "" {
|
||||
t.Errorf("SourceWorkspaceID should be empty when propagation is nil, got %q", body.Memories[0].SourceWorkspaceID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoriesV2_Search_PropagationWrongType_DoesNotPanic(t *testing.T) {
|
||||
resolver := &fakeNSResolver{intersect: []string{"workspace:ws-a"}}
|
||||
plugin := &fakePlugin{
|
||||
searchResp: &contract.SearchResponse{
|
||||
Memories: []contract.Memory{
|
||||
{
|
||||
ID: "m-1",
|
||||
Content: "wrong-type propagation",
|
||||
Propagation: map[string]interface{}{
|
||||
"source_workspace_id": 12345, // int, not string
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
h := newWiredHandler(plugin, resolver)
|
||||
|
||||
rec := doRequest(t, h, http.MethodGet, "/workspaces/ws-a/v2/memories",
|
||||
gin.Params{{Key: "id", Value: "ws-a"}})
|
||||
if rec.Code != 200 {
|
||||
t.Fatalf("expected 200 (graceful), got %d", rec.Code)
|
||||
}
|
||||
var body MemoriesResponse
|
||||
mustJSON(t, rec.Body.Bytes(), &body)
|
||||
// Wrong-typed prop entry → empty SourceWorkspaceID, no panic.
|
||||
if body.Memories[0].SourceWorkspaceID != "" {
|
||||
t.Errorf("expected empty SourceWorkspaceID for non-string propagation, got %q", body.Memories[0].SourceWorkspaceID)
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// DELETE /v2/memories/:memoryId
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestMemoriesV2_Forget_Success(t *testing.T) {
|
||||
plugin := &fakePlugin{} // forgetErr nil
|
||||
h := newWiredHandler(plugin, &fakeNSResolver{})
|
||||
|
||||
rec := doRequest(t, h, http.MethodDelete, "/workspaces/ws-a/v2/memories/mem-42",
|
||||
gin.Params{{Key: "id", Value: "ws-a"}, {Key: "memoryId", Value: "mem-42"}})
|
||||
if rec.Code != 200 {
|
||||
t.Errorf("expected 200, got %d body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
if plugin.forgetID != "mem-42" {
|
||||
t.Errorf("plugin received memoryID=%q, want mem-42", plugin.forgetID)
|
||||
}
|
||||
if plugin.forgetReq.RequestedByNamespace != "workspace:ws-a" {
|
||||
t.Errorf("requested_by_namespace=%q, want workspace:ws-a", plugin.forgetReq.RequestedByNamespace)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoriesV2_Forget_PluginNotFound_Maps404(t *testing.T) {
|
||||
plugin := &fakePlugin{
|
||||
forgetErr: &contract.Error{Code: contract.ErrorCodeNotFound, Message: "no such memory"},
|
||||
}
|
||||
h := newWiredHandler(plugin, &fakeNSResolver{})
|
||||
|
||||
rec := doRequest(t, h, http.MethodDelete, "/workspaces/ws-a/v2/memories/m-1",
|
||||
gin.Params{{Key: "id", Value: "ws-a"}, {Key: "memoryId", Value: "m-1"}})
|
||||
if rec.Code != http.StatusNotFound {
|
||||
t.Errorf("expected 404, got %d", rec.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoriesV2_Forget_PluginOtherError_Maps502(t *testing.T) {
|
||||
plugin := &fakePlugin{
|
||||
forgetErr: &contract.Error{Code: contract.ErrorCodeInternal, Message: "db dead"},
|
||||
}
|
||||
h := newWiredHandler(plugin, &fakeNSResolver{})
|
||||
|
||||
rec := doRequest(t, h, http.MethodDelete, "/workspaces/ws-a/v2/memories/m-1",
|
||||
gin.Params{{Key: "id", Value: "ws-a"}, {Key: "memoryId", Value: "m-1"}})
|
||||
if rec.Code != http.StatusBadGateway {
|
||||
t.Errorf("expected 502, got %d", rec.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoriesV2_Forget_NonContractError_Maps502(t *testing.T) {
|
||||
// A raw error (e.g. transport failure) — not a contract.Error —
|
||||
// also bubbles up as 502.
|
||||
plugin := &fakePlugin{forgetErr: errors.New("connection reset")}
|
||||
h := newWiredHandler(plugin, &fakeNSResolver{})
|
||||
|
||||
rec := doRequest(t, h, http.MethodDelete, "/workspaces/ws-a/v2/memories/m-1",
|
||||
gin.Params{{Key: "id", Value: "ws-a"}, {Key: "memoryId", Value: "m-1"}})
|
||||
if rec.Code != http.StatusBadGateway {
|
||||
t.Errorf("expected 502, got %d", rec.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoriesV2_Forget_MissingMemoryID_400(t *testing.T) {
|
||||
h := newWiredHandler(&fakePlugin{}, &fakeNSResolver{})
|
||||
rec := doRequest(t, h, http.MethodDelete, "/workspaces/ws-a/v2/memories/",
|
||||
gin.Params{{Key: "id", Value: "ws-a"}, {Key: "memoryId", Value: ""}})
|
||||
if rec.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400, got %d", rec.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// View-shaping unit tests — pin individual helpers
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestNamespaceLabel_AllKinds(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
kind contract.NamespaceKind
|
||||
want string
|
||||
}{
|
||||
{"workspace:abcdefghij", contract.NamespaceKindWorkspace, "Workspace (abcdefgh)"}, // truncated to 8
|
||||
{"workspace:abc", contract.NamespaceKindWorkspace, "Workspace (abc)"}, // shorter than 8, kept as-is
|
||||
{"team:t-99", contract.NamespaceKindTeam, "Team (t-99)"},
|
||||
{"org:acme", contract.NamespaceKindOrg, "Org (acme)"},
|
||||
{"custom:my-ns", contract.NamespaceKindCustom, "my-ns"},
|
||||
{"custom:", contract.NamespaceKindCustom, "custom:"}, // empty suffix → fallback to raw name
|
||||
{"weird-no-colon", contract.NamespaceKindWorkspace, "Workspace ()"},
|
||||
{"unknown:x", contract.NamespaceKind("future"), "unknown:x"}, // unknown kind → fallback to raw name
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := namespaceLabel(tc.name, tc.kind)
|
||||
if got != tc.want {
|
||||
t.Errorf("namespaceLabel(%q, %q) = %q, want %q", tc.name, tc.kind, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseLimit(t *testing.T) {
|
||||
cases := []struct {
|
||||
raw string
|
||||
want int
|
||||
}{
|
||||
{"", memoriesV2DefaultLimit},
|
||||
{"10", 10},
|
||||
{"0", memoriesV2DefaultLimit}, // ≤0 → default, not error
|
||||
{"-5", memoriesV2DefaultLimit}, // negative → default
|
||||
{"abc", memoriesV2DefaultLimit}, // non-numeric → default
|
||||
{"99999", memoriesV2MaxLimit}, // over cap → clamped
|
||||
{"100", memoriesV2MaxLimit}, // exactly cap → kept
|
||||
{"99", 99}, // just under cap → kept
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run("raw="+tc.raw, func(t *testing.T) {
|
||||
if got := parseLimit(tc.raw); got != tc.want {
|
||||
t.Errorf("parseLimit(%q) = %d, want %d", tc.raw, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryToView_AllFieldsPropagated(t *testing.T) {
|
||||
now := time.Now()
|
||||
exp := now.Add(time.Hour)
|
||||
score := 0.95
|
||||
m := contract.Memory{
|
||||
ID: "m-1",
|
||||
Namespace: "team:t-1",
|
||||
Content: "hello",
|
||||
Kind: contract.MemoryKindSummary,
|
||||
Source: contract.MemorySourceUser,
|
||||
Pin: true,
|
||||
ExpiresAt: &exp,
|
||||
CreatedAt: now,
|
||||
Score: &score,
|
||||
Propagation: map[string]interface{}{
|
||||
"source_workspace_id": "ws-other",
|
||||
},
|
||||
}
|
||||
v := memoryToView(m)
|
||||
if v.ID != m.ID || v.Namespace != m.Namespace || v.Content != m.Content {
|
||||
t.Errorf("basic fields: %+v", v)
|
||||
}
|
||||
if v.Kind != contract.MemoryKindSummary || v.Source != contract.MemorySourceUser {
|
||||
t.Errorf("kind/source: %+v", v)
|
||||
}
|
||||
if !v.Pin || v.ExpiresAt == nil || v.Score == nil || *v.Score != 0.95 {
|
||||
t.Errorf("pin/expires/score: %+v", v)
|
||||
}
|
||||
if v.SourceWorkspaceID != "ws-other" {
|
||||
t.Errorf("SourceWorkspaceID=%q, want ws-other", v.SourceWorkspaceID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNamespacesToViews_PreservesOrder(t *testing.T) {
|
||||
in := []namespace.Namespace{
|
||||
{Name: "team:t1", Kind: contract.NamespaceKindTeam},
|
||||
{Name: "workspace:w1", Kind: contract.NamespaceKindWorkspace},
|
||||
}
|
||||
out := namespacesToViews(in)
|
||||
if len(out) != 2 {
|
||||
t.Fatalf("len=%d", len(out))
|
||||
}
|
||||
// Resolver determines order; we just preserve it. (Sorting can be
|
||||
// added at the resolver layer if the canvas needs it.)
|
||||
if out[0].Name != "team:t1" || out[1].Name != "workspace:w1" {
|
||||
t.Errorf("order not preserved: %+v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNamespacesToViews_EmptyInput_EmptySlice(t *testing.T) {
|
||||
out := namespacesToViews(nil)
|
||||
if out == nil {
|
||||
t.Error("expected empty slice, not nil — JSON-marshals as null otherwise")
|
||||
}
|
||||
if len(out) != 0 {
|
||||
t.Errorf("expected len 0, got %d", len(out))
|
||||
}
|
||||
}
|
||||
|
||||
func TestIndexOfColon(t *testing.T) {
|
||||
cases := []struct {
|
||||
s string
|
||||
want int
|
||||
}{
|
||||
{"abc:def", 3},
|
||||
{":foo", 0},
|
||||
{"nocolon", -1},
|
||||
{"", -1},
|
||||
{"a:b:c", 1}, // first colon only
|
||||
}
|
||||
for _, tc := range cases {
|
||||
if got := indexOfColon(tc.s); got != tc.want {
|
||||
t.Errorf("indexOfColon(%q) = %d, want %d", tc.s, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestWithMemoryV2_FluentReturnsReceiver(t *testing.T) {
|
||||
// WithMemoryV2 is the production wiring path (takes *client.Client +
|
||||
// *namespace.Resolver). withMemoryV2APIs is the test path. The
|
||||
// production call is structural — assigns the two fields and
|
||||
// returns the receiver — but we still want a 100% coverage gate
|
||||
// to catch a future refactor that accidentally drops the fluent
|
||||
// return (breaking the boot-time chain in router.go).
|
||||
//
|
||||
// We can't pass nil for the typed pointers and call available()
|
||||
// here because Go interface-with-nil-pointer is non-nil at the
|
||||
// interface level — `available()` would not detect that as
|
||||
// "unwired". The unwired-plugin behaviour is exhaustively
|
||||
// covered by TestMemoriesV2_PluginUnwired_All503; this test just
|
||||
// pins the fluent contract.
|
||||
h := NewMemoriesV2Handler()
|
||||
got := h.WithMemoryV2(nil, nil)
|
||||
if got != h {
|
||||
t.Error("WithMemoryV2 must return receiver for fluent chaining")
|
||||
}
|
||||
}
|
||||
|
||||
func TestShortID(t *testing.T) {
|
||||
cases := map[string]string{
|
||||
"": "",
|
||||
"short": "short",
|
||||
"exactly8": "exactly8",
|
||||
"longer-than-eight": "longer-t",
|
||||
"abc-1234-5678-90ab": "abc-1234",
|
||||
}
|
||||
for in, want := range cases {
|
||||
if got := shortID(in); got != want {
|
||||
t.Errorf("shortID(%q) = %q, want %q", in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
@@ -21,6 +22,7 @@ import (
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/scheduler"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
@@ -61,10 +63,33 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
tier = defaults.Tier
|
||||
}
|
||||
if tier == 0 {
|
||||
tier = 2
|
||||
// Resolved via the same DefaultTier helper Create + Templates
|
||||
// use (#2910 PR-E). SaaS → T4 (one container per sibling EC2,
|
||||
// no neighbour to protect from), self-hosted → T3. Pre-#2910
|
||||
// this path returned T2 on self-hosted, asymmetric with
|
||||
// workspace.go's T3 — undocumented drift. Lifting to
|
||||
// DefaultTier collapses both call sites onto one source of
|
||||
// truth so a future tier-default change sweeps every entry
|
||||
// point at once. Templates that want a different floor still
|
||||
// declare `tier:` in config.yaml or `defaults.tier` in
|
||||
// org.yaml.
|
||||
if h.workspace != nil {
|
||||
tier = h.workspace.DefaultTier()
|
||||
} else {
|
||||
tier = 3
|
||||
}
|
||||
}
|
||||
|
||||
ctxLookup := context.Background()
|
||||
// 5s timeout bounds the lookup independently of any HTTP request
|
||||
// context. createWorkspaceTree runs in goroutines spawned from the
|
||||
// /org/import handler, so plumbing the request context here would
|
||||
// cascade-cancel into provisionWorkspaceAuto and abort in-flight
|
||||
// EC2 provisioning if the client disconnected mid-import — that's
|
||||
// the wrong behaviour. A short bounded timeout protects the
|
||||
// per-row SELECT against a wedged DB without taking the
|
||||
// drop-everything-on-disconnect tradeoff.
|
||||
ctxLookup, cancelLookup := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancelLookup()
|
||||
// Idempotency: if a workspace with the same (parent_id, name) already
|
||||
// exists, skip the INSERT + canvas_layouts + broadcast + provisioning.
|
||||
// This is what makes /org/import safe to call multiple times — the
|
||||
@@ -76,12 +101,31 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
// (parent exists, some children missing) backfill the missing children
|
||||
// instead of either no-op'ing the whole subtree or duplicating the
|
||||
// existing children.
|
||||
//
|
||||
// /org/import is ADDITIVE-ONLY, never destructive. Children present
|
||||
// in the existing tree but absent from the new template are
|
||||
// preserved (no DELETE on diff). Skip-path also does NOT propagate
|
||||
// updates to existing nodes — a re-import that adds an
|
||||
// initial_memory or schedule to an existing workspace is silently
|
||||
// dropped (the function bypasses seedInitialMemories, schedule SQL,
|
||||
// channel config for skipped rows). To force-update an existing
|
||||
// tree, delete and re-import or use a future /org/sync route.
|
||||
existingID, existing, lookupErr := h.lookupExistingChild(ctxLookup, ws.Name, parentID)
|
||||
if lookupErr != nil {
|
||||
return fmt.Errorf("idempotency check for %s: %w", ws.Name, lookupErr)
|
||||
}
|
||||
if existing {
|
||||
log.Printf("Org import: %q already exists (id=%s) — skipping create+provision, recursing into children for partial-match", ws.Name, existingID)
|
||||
parentRef := ""
|
||||
if parentID != nil {
|
||||
parentRef = *parentID
|
||||
}
|
||||
provlog.Event("provision.skip_existing", map[string]any{
|
||||
"name": ws.Name,
|
||||
"existing_id": existingID,
|
||||
"parent_id": parentRef,
|
||||
"tier": tier,
|
||||
})
|
||||
*results = append(*results, map[string]interface{}{
|
||||
"id": existingID,
|
||||
"name": ws.Name,
|
||||
@@ -580,6 +624,12 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
//
|
||||
// On sql.ErrNoRows: returns ("", false, nil) — caller should INSERT.
|
||||
// On a real DB error: returns ("", false, err) — caller propagates.
|
||||
//
|
||||
// errors.Is is wrap-safe — a future caller wrapping the error
|
||||
// (database/sql can wrap driver errors with %w in some setups) would
|
||||
// silently break a `err == sql.ErrNoRows` equality check, causing the
|
||||
// no-rows path to fall through to the "real DB error" branch and
|
||||
// abort the import. errors.Is unwraps.
|
||||
func (h *OrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) {
|
||||
var existingID string
|
||||
err := db.DB.QueryRowContext(ctx, `
|
||||
@@ -589,7 +639,7 @@ func (h *OrgHandler) lookupExistingChild(ctx context.Context, name string, paren
|
||||
AND status != 'removed'
|
||||
LIMIT 1
|
||||
`, name, parentID).Scan(&existingID)
|
||||
if err == sql.ErrNoRows {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return "", false, nil
|
||||
}
|
||||
if err != nil {
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/token"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
@@ -119,6 +125,90 @@ func TestLookupExistingChild_DBError_Propagates(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestLookupExistingChild_WrappedNoRows_TreatedAsNotFound — pins the
|
||||
// wrap-safety of the errors.Is(err, sql.ErrNoRows) check. The previous
|
||||
// `err == sql.ErrNoRows` equality would fall through to the
|
||||
// "real DB error" branch on a wrapped no-rows error, aborting the
|
||||
// import for what is in fact the no-rows happy path. driver/sql
|
||||
// wrapping is currently a non-issue but a future driver change or a
|
||||
// caller that wraps the result via fmt.Errorf("…: %w", err) would
|
||||
// silently break the equality check. errors.Is unwraps.
|
||||
func TestLookupExistingChild_WrappedNoRows_TreatedAsNotFound(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
parent := "parent-1"
|
||||
wrapped := fmt.Errorf("driver-wrapped: %w", sql.ErrNoRows)
|
||||
mock.ExpectQuery(`SELECT id FROM workspaces`).
|
||||
WithArgs("Alpha", &parent).
|
||||
WillReturnError(wrapped)
|
||||
|
||||
h := &OrgHandler{}
|
||||
id, found, err := h.lookupExistingChild(context.Background(), "Alpha", &parent)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("expected wrapped no-rows to be treated as not-found (err=nil), got: %v", err)
|
||||
}
|
||||
if found {
|
||||
t.Errorf("expected found=false on wrapped no-rows, got found=true")
|
||||
}
|
||||
if id != "" {
|
||||
t.Errorf("expected empty id on wrapped no-rows, got %q", id)
|
||||
}
|
||||
}
|
||||
|
||||
// workspacesInsertRE matches a SQL literal that begins (after optional
|
||||
// leading whitespace) with `INSERT INTO workspaces` followed by `(` —
|
||||
// requiring the open-paren rules out lookalikes like
|
||||
// `INSERT INTO workspaces_audit`, `INSERT INTO workspace_secrets`,
|
||||
// `INSERT INTO workspace_channels`, `INSERT INTO canvas_layouts`. The
|
||||
// previous bytes.Index gate accepted `workspaces_audit` as a prefix
|
||||
// match — see RFC #2872 Important-1 for the silent-false-pass shape.
|
||||
var workspacesInsertRE = regexp.MustCompile(`(?s)^\s*INSERT\s+INTO\s+workspaces\s*\(`)
|
||||
|
||||
// findLookupAndWorkspacesInsertPos walks the AST of `src` and returns
|
||||
// the source positions of (a) the first call to `lookupExistingChild`
|
||||
// and (b) the first CallExpr whose argument list contains a STRING
|
||||
// BasicLit matching workspacesInsertRE. Either may be token.NoPos if
|
||||
// not found.
|
||||
//
|
||||
// Extracted as a helper so the gate logic can be exercised against
|
||||
// synthetic source — TestGate_FailsWhenLookupAfterInsert below proves
|
||||
// the gate actually catches the bug shape, not just the happy path.
|
||||
func findLookupAndWorkspacesInsertPos(t *testing.T, fname string, src []byte) (lookupPos, insertPos token.Pos, fset *token.FileSet) {
|
||||
t.Helper()
|
||||
fset = token.NewFileSet()
|
||||
file, err := parser.ParseFile(fset, fname, src, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse %s: %v", fname, err)
|
||||
}
|
||||
lookupPos, insertPos = token.NoPos, token.NoPos
|
||||
ast.Inspect(file, func(n ast.Node) bool {
|
||||
call, ok := n.(*ast.CallExpr)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
if sel, ok := call.Fun.(*ast.SelectorExpr); ok {
|
||||
if sel.Sel.Name == "lookupExistingChild" && lookupPos == token.NoPos {
|
||||
lookupPos = call.Pos()
|
||||
}
|
||||
}
|
||||
for _, arg := range call.Args {
|
||||
lit, ok := arg.(*ast.BasicLit)
|
||||
if !ok || lit.Kind != token.STRING {
|
||||
continue
|
||||
}
|
||||
raw := lit.Value
|
||||
if unq, err := strconv.Unquote(raw); err == nil {
|
||||
raw = unq
|
||||
}
|
||||
if workspacesInsertRE.MatchString(raw) && insertPos == token.NoPos {
|
||||
insertPos = call.Pos()
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Source-level guard — pins that org_import.go calls
|
||||
// h.lookupExistingChild BEFORE its INSERT INTO workspaces.
|
||||
//
|
||||
@@ -126,6 +216,11 @@ func TestLookupExistingChild_DBError_Propagates(t *testing.T) {
|
||||
// (idempotency check before INSERT), not just function names. If a
|
||||
// future refactor reintroduces the un-checked INSERT (the original
|
||||
// bug shape that leaked 72 workspaces in 4 days), this test fails.
|
||||
//
|
||||
// AST-walk implementation closes the silent-false-pass mode that the
|
||||
// previous bytes.Index gate had — see workspacesInsertRE comment for
|
||||
// the failure mode (workspaces_audit / workspace_secrets / etc.
|
||||
// shadowing the real target via prefix match).
|
||||
func TestCreateWorkspaceTree_CallsLookupBeforeInsert(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
@@ -135,17 +230,189 @@ func TestCreateWorkspaceTree_CallsLookupBeforeInsert(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("read org_import.go: %v", err)
|
||||
}
|
||||
lookupPos, insertPos, fset := findLookupAndWorkspacesInsertPos(t, "org_import.go", src)
|
||||
|
||||
lookupAt := bytes.Index(src, []byte("h.lookupExistingChild("))
|
||||
insertAt := bytes.Index(src, []byte("INSERT INTO workspaces"))
|
||||
|
||||
if lookupAt < 0 {
|
||||
t.Fatalf("org_import.go missing call to h.lookupExistingChild — idempotency check removed?")
|
||||
if lookupPos == token.NoPos {
|
||||
t.Fatalf("AST: no call to lookupExistingChild in org_import.go — idempotency check removed?")
|
||||
}
|
||||
if insertAt < 0 {
|
||||
t.Fatalf("org_import.go missing INSERT INTO workspaces — schema change?")
|
||||
if insertPos == token.NoPos {
|
||||
t.Fatalf("AST: no SQL literal matching `^\\s*INSERT INTO workspaces\\s*\\(` in any CallExpr in org_import.go — schema change or rename?")
|
||||
}
|
||||
if lookupAt > insertAt {
|
||||
t.Errorf("h.lookupExistingChild must come BEFORE INSERT INTO workspaces in org_import.go (lookup@%d, insert@%d) — non-idempotent ordering would re-leak under repeat /org/import calls", lookupAt, insertAt)
|
||||
if lookupPos > insertPos {
|
||||
t.Errorf("lookupExistingChild call at %s must come BEFORE INSERT INTO workspaces at %s — non-idempotent ordering would re-leak under repeat /org/import calls",
|
||||
fset.Position(lookupPos), fset.Position(insertPos))
|
||||
}
|
||||
}
|
||||
|
||||
// TestGate_FailsWhenLookupAfterInsert proves the gate actually catches
|
||||
// the bug it's named after — running it against synthetic Go source
|
||||
// where the lookup call is positioned AFTER the workspaces INSERT must
|
||||
// produce lookupPos > insertPos, which the production gate flags as
|
||||
// an ERROR. Without this test the gate could regress to "always pass"
|
||||
// and we wouldn't notice until the bug shipped again.
|
||||
//
|
||||
// Per memory feedback_assert_exact_not_substring.md: verify a
|
||||
// tightened test FAILS on old code before merging.
|
||||
func TestGate_FailsWhenLookupAfterInsert(t *testing.T) {
|
||||
const buggySrc = `package handlers
|
||||
|
||||
import "context"
|
||||
|
||||
type fakeDB struct{}
|
||||
|
||||
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
|
||||
|
||||
type fakeOrgHandler struct{}
|
||||
|
||||
func (h *fakeOrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) {
|
||||
return "", false, nil
|
||||
}
|
||||
|
||||
func buggyCreate(h *fakeOrgHandler, db fakeDB, ctx context.Context, name string, parentID *string) {
|
||||
// Bug shape: INSERT runs FIRST, lookup runs AFTER. This is the
|
||||
// non-idempotent ordering the gate exists to forbid.
|
||||
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", name)
|
||||
h.lookupExistingChild(ctx, name, parentID)
|
||||
}
|
||||
`
|
||||
lookupPos, insertPos, _ := findLookupAndWorkspacesInsertPos(t, "buggy.go", []byte(buggySrc))
|
||||
if lookupPos == token.NoPos || insertPos == token.NoPos {
|
||||
t.Fatalf("synthetic buggy source missing expected nodes (lookupPos=%v insertPos=%v) — helper logic regression", lookupPos, insertPos)
|
||||
}
|
||||
if lookupPos < insertPos {
|
||||
t.Fatalf("synthetic bug shape (lookup AFTER insert) returned lookupPos=%d < insertPos=%d — gate would NOT fire on actual bug, regression!", lookupPos, insertPos)
|
||||
}
|
||||
// Implicit: lookupPos > insertPos here, which the production gate
|
||||
// flags via t.Errorf. This proves the gate is live, not vestigial.
|
||||
}
|
||||
|
||||
// TestGate_IgnoresAuditTableShadow proves the regex tightening
|
||||
// actually ignores `INSERT INTO workspaces_audit` literals — the
|
||||
// specific shape #2872 cited as the silent-false-pass failure mode
|
||||
// for the previous bytes.Index gate.
|
||||
func TestGate_IgnoresAuditTableShadow(t *testing.T) {
|
||||
// Synthetic source with audit-table INSERT at line 1 (would be
|
||||
// position 0 under prefix-match) and lookup + real INSERT at later
|
||||
// positions. With the tightened regex, the audit literal is
|
||||
// ignored: insertPos points at the REAL INSERT, lookup precedes it,
|
||||
// gate passes correctly.
|
||||
const src = `package handlers
|
||||
|
||||
import "context"
|
||||
|
||||
type fakeDB struct{}
|
||||
|
||||
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
|
||||
|
||||
type fakeOrgHandler struct{}
|
||||
|
||||
func (h *fakeOrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) {
|
||||
return "", false, nil
|
||||
}
|
||||
|
||||
func okCreateWithAudit(h *fakeOrgHandler, db fakeDB, ctx context.Context, name string, parentID *string) {
|
||||
// Audit-table INSERT — should be IGNORED by the tightened regex.
|
||||
db.ExecContext(ctx, ` + "`INSERT INTO workspaces_audit (id, action) VALUES ($1, $2)`" + `, "x", "create_attempt")
|
||||
// Lookup BEFORE real INSERT — correct order.
|
||||
h.lookupExistingChild(ctx, name, parentID)
|
||||
// Real INSERT.
|
||||
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", name)
|
||||
}
|
||||
`
|
||||
lookupPos, insertPos, fset := findLookupAndWorkspacesInsertPos(t, "shadow.go", []byte(src))
|
||||
if lookupPos == token.NoPos || insertPos == token.NoPos {
|
||||
t.Fatalf("expected to find lookup + real INSERT, got lookupPos=%v insertPos=%v", lookupPos, insertPos)
|
||||
}
|
||||
// The audit-table INSERT is at line ~16 (column ~20-ish), the
|
||||
// lookup is at line 19, the real INSERT is at line 21. If the
|
||||
// regex regressed to prefix-match, insertPos would point at the
|
||||
// audit literal at line 16, and the gate would falsely fail
|
||||
// (lookup at 19 > "insert" at 16). With the tightened regex,
|
||||
// insertPos correctly points at line 21, and the gate passes.
|
||||
insertLine := fset.Position(insertPos).Line
|
||||
lookupLine := fset.Position(lookupPos).Line
|
||||
if insertLine < lookupLine {
|
||||
t.Errorf("regex regressed: audit shadow at line %d swallowed real INSERT (lookup at line %d). insertPos should point at the real INSERT (line ~21), not the audit literal.",
|
||||
insertLine, lookupLine)
|
||||
}
|
||||
if lookupPos > insertPos {
|
||||
t.Errorf("synthetic source has lookup at line %d before real INSERT at line %d, gate should pass (lookupPos < insertPos), got lookupPos=%d > insertPos=%d",
|
||||
lookupLine, insertLine, lookupPos, insertPos)
|
||||
}
|
||||
}
|
||||
|
||||
// TestWorkspacesInsertRE_RejectsLookalikes pins the regex that
|
||||
// discriminates the real workspaces INSERT from prefix-matching
|
||||
// lookalikes. If this regex regresses to a substring match, the
|
||||
// AST gate above silently false-passes when a future refactor
|
||||
// shadows the real INSERT with a workspaces_audit / workspace_secrets
|
||||
// / canvas_layouts literal placed earlier in source.
|
||||
func TestWorkspacesInsertRE_RejectsLookalikes(t *testing.T) {
|
||||
cases := []struct {
|
||||
sql string
|
||||
want bool
|
||||
comment string
|
||||
}{
|
||||
{"INSERT INTO workspaces (id, name) VALUES ($1, $2)", true, "real target"},
|
||||
{"\n\t\tINSERT INTO workspaces (id, name)\n\t\tVALUES ($1, $2)", true, "real target with leading whitespace + newlines (raw string literal shape)"},
|
||||
{"INSERT INTO workspaces_audit (id) VALUES ($1)", false, "underscore-suffix lookalike (the #2872 specific failure mode)"},
|
||||
{"INSERT INTO workspace_secrets (key, value) VALUES ($1, $2)", false, "prefix without trailing 's' (workspace_*)"},
|
||||
{"INSERT INTO workspace_channels (id) VALUES ($1)", false, "another workspace_* prefix"},
|
||||
{"INSERT INTO canvas_layouts (workspace_id, x, y) VALUES ($1, $2, $3)", false, "unrelated table that contains 'workspace' in a column ref"},
|
||||
{"UPDATE workspaces SET status='running' WHERE id=$1", false, "UPDATE shouldn't match"},
|
||||
{"SELECT * FROM workspaces WHERE id=$1", false, "SELECT shouldn't match"},
|
||||
{"-- comment about INSERT INTO workspaces (\nSELECT 1", false, "comment shouldn't match"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := workspacesInsertRE.MatchString(c.sql)
|
||||
if got != c.want {
|
||||
t.Errorf("workspacesInsertRE.MatchString(%q) = %v, want %v (%s)", c.sql, got, c.want, c.comment)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Confirm the regex actually matches the literal currently in
|
||||
// org_import.go. Pins the shape so `gofmt` reflows or trivial edits
|
||||
// to the SQL string don't silently disable the gate above.
|
||||
func TestWorkspacesInsertRE_MatchesActualSourceLiteral(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
src, err := os.ReadFile(filepath.Join(wd, "org_import.go"))
|
||||
if err != nil {
|
||||
t.Fatalf("read org_import.go: %v", err)
|
||||
}
|
||||
// Strip backtick strings, find any whose content matches.
|
||||
// Walk the source via parser.ParseFile to avoid string-search
|
||||
// drift if the literal is reflowed.
|
||||
fset := token.NewFileSet()
|
||||
file, err := parser.ParseFile(fset, filepath.Join(wd, "org_import.go"), src, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse org_import.go: %v", err)
|
||||
}
|
||||
var matched bool
|
||||
ast.Inspect(file, func(n ast.Node) bool {
|
||||
lit, ok := n.(*ast.BasicLit)
|
||||
if !ok || lit.Kind != token.STRING {
|
||||
return true
|
||||
}
|
||||
raw := lit.Value
|
||||
if unq, err := strconv.Unquote(raw); err == nil {
|
||||
raw = unq
|
||||
}
|
||||
if workspacesInsertRE.MatchString(raw) {
|
||||
matched = true
|
||||
}
|
||||
return true
|
||||
})
|
||||
if !matched {
|
||||
t.Fatalf("no SQL literal in org_import.go matches workspacesInsertRE — gate is dead. Either the INSERT was renamed (update the regex) or the file was restructured (review the gate logic).")
|
||||
}
|
||||
// strings.Contains keeps the test informative: if the regex
|
||||
// stopped matching but the literal source still contains the
|
||||
// magic phrase, that's a regex-side failure (test the fix above).
|
||||
if !strings.Contains(string(src), "INSERT INTO workspaces") {
|
||||
t.Fatalf("org_import.go has no `INSERT INTO workspaces` substring at all — schema change?")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,476 @@
|
||||
//go:build integration
|
||||
// +build integration
|
||||
|
||||
// pending_uploads_integration_test.go — REAL Postgres integration
|
||||
// tests for the poll-mode chat upload flow (RFC: phases 1–3).
|
||||
//
|
||||
// Run with:
|
||||
//
|
||||
// docker run --rm -d --name pg-integration \
|
||||
// -e POSTGRES_PASSWORD=test -e POSTGRES_DB=molecule \
|
||||
// -p 55432:5432 postgres:15-alpine
|
||||
// sleep 4
|
||||
// psql ... < workspace-server/migrations/20260505100000_pending_uploads.up.sql
|
||||
// cd workspace-server
|
||||
// INTEGRATION_DB_URL="postgres://postgres:test@localhost:55432/molecule?sslmode=disable" \
|
||||
// go test -tags=integration ./internal/handlers/ -run Integration_PendingUploads
|
||||
//
|
||||
// CI (.github/workflows/handlers-postgres-integration.yml) runs this on
|
||||
// every PR that touches workspace-server/internal/handlers/** OR
|
||||
// workspace-server/migrations/**.
|
||||
//
|
||||
// Why these are NOT plain unit tests
|
||||
// ----------------------------------
|
||||
// The strict-sqlmock unit tests in storage_test.go pin which SQL
|
||||
// statements fire — they are fast and let us iterate without a DB. But
|
||||
// sqlmock CANNOT detect bugs that depend on the actual row state after
|
||||
// the SQL runs. In particular:
|
||||
//
|
||||
// - the WITH … DELETE … RETURNING CTE used by Sweep depends on
|
||||
// Postgres' `make_interval` function and the table's CHECK
|
||||
// constraints. sqlmock would happily accept a hand-written SQL
|
||||
// literal that Postgres rejects at runtime.
|
||||
// - the partial index `idx_pending_uploads_unacked` (created by the
|
||||
// Phase 1 migration) only catches a wrong WHERE predicate at real-
|
||||
// query-plan time.
|
||||
//
|
||||
// These tests close those gaps by booting a real Postgres, running the
|
||||
// production helpers, and SELECTing the row to verify the observable
|
||||
// state matches the expected outcome.
|
||||
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
_ "github.com/lib/pq"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
|
||||
)
|
||||
|
||||
// integrationDB_PendingUploads opens a connection from $INTEGRATION_DB_URL
|
||||
// (skipping the test if unset), wipes the pending_uploads table for
|
||||
// isolation, and registers a Cleanup that closes the connection.
|
||||
//
|
||||
// NOT SAFE FOR `t.Parallel()` — each test gets the table to itself.
|
||||
// Mirrors the integrationDB helper in delegation_ledger_integration_test.go
|
||||
// but kept separate so each table's wipe step is local to its tests.
|
||||
func integrationDB_PendingUploads(t *testing.T) *sql.DB {
|
||||
t.Helper()
|
||||
url := os.Getenv("INTEGRATION_DB_URL")
|
||||
if url == "" {
|
||||
t.Skip("INTEGRATION_DB_URL not set; skipping (local devs: see file header)")
|
||||
}
|
||||
conn, err := sql.Open("postgres", url)
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
if err := conn.Ping(); err != nil {
|
||||
t.Fatalf("ping: %v", err)
|
||||
}
|
||||
if _, err := conn.ExecContext(context.Background(), `DELETE FROM pending_uploads`); err != nil {
|
||||
t.Fatalf("cleanup: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { conn.Close() })
|
||||
return conn
|
||||
}
|
||||
|
||||
func TestIntegration_PendingUploads_PutGetAckRoundTrip(t *testing.T) {
|
||||
conn := integrationDB_PendingUploads(t)
|
||||
store := pendinguploads.NewPostgres(conn)
|
||||
ctx := context.Background()
|
||||
|
||||
wsID := uuid.New()
|
||||
fileID, err := store.Put(ctx, wsID, []byte("hello PDF"), "report.pdf", "application/pdf")
|
||||
if err != nil {
|
||||
t.Fatalf("Put: %v", err)
|
||||
}
|
||||
|
||||
// Get reads back the row.
|
||||
rec, err := store.Get(ctx, fileID)
|
||||
if err != nil {
|
||||
t.Fatalf("Get: %v", err)
|
||||
}
|
||||
if rec.WorkspaceID != wsID {
|
||||
t.Errorf("workspace_id = %s, want %s", rec.WorkspaceID, wsID)
|
||||
}
|
||||
if string(rec.Content) != "hello PDF" {
|
||||
t.Errorf("content = %q, want %q", rec.Content, "hello PDF")
|
||||
}
|
||||
if rec.Filename != "report.pdf" {
|
||||
t.Errorf("filename = %q, want %q", rec.Filename, "report.pdf")
|
||||
}
|
||||
if rec.AckedAt != nil {
|
||||
t.Errorf("AckedAt should be nil before Ack, got %v", rec.AckedAt)
|
||||
}
|
||||
|
||||
// MarkFetched stamps fetched_at.
|
||||
if err := store.MarkFetched(ctx, fileID); err != nil {
|
||||
t.Fatalf("MarkFetched: %v", err)
|
||||
}
|
||||
|
||||
// Re-read to confirm.
|
||||
rec2, err := store.Get(ctx, fileID)
|
||||
if err != nil {
|
||||
t.Fatalf("Get after MarkFetched: %v", err)
|
||||
}
|
||||
if rec2.FetchedAt == nil {
|
||||
t.Errorf("FetchedAt should be set after MarkFetched")
|
||||
}
|
||||
|
||||
// Ack flips acked_at; subsequent Gets return ErrNotFound (acked rows
|
||||
// are filtered out at the SELECT predicate).
|
||||
if err := store.Ack(ctx, fileID); err != nil {
|
||||
t.Fatalf("Ack: %v", err)
|
||||
}
|
||||
if _, err := store.Get(ctx, fileID); err != pendinguploads.ErrNotFound {
|
||||
t.Errorf("Get after Ack: got %v, want ErrNotFound", err)
|
||||
}
|
||||
|
||||
// Idempotent re-ack succeeds.
|
||||
if err := store.Ack(ctx, fileID); err != nil {
|
||||
t.Errorf("re-Ack should be idempotent, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegration_PendingUploads_Sweep_DeletesAckedAfterRetention(t *testing.T) {
|
||||
conn := integrationDB_PendingUploads(t)
|
||||
store := pendinguploads.NewPostgres(conn)
|
||||
ctx := context.Background()
|
||||
|
||||
wsID := uuid.New()
|
||||
fid, err := store.Put(ctx, wsID, []byte("data"), "x.txt", "text/plain")
|
||||
if err != nil {
|
||||
t.Fatalf("Put: %v", err)
|
||||
}
|
||||
if err := store.Ack(ctx, fid); err != nil {
|
||||
t.Fatalf("Ack: %v", err)
|
||||
}
|
||||
|
||||
// retention=1h, row was acked just now → not yet eligible.
|
||||
res, err := store.Sweep(ctx, time.Hour)
|
||||
if err != nil {
|
||||
t.Fatalf("Sweep(1h): %v", err)
|
||||
}
|
||||
if res.Total() != 0 {
|
||||
t.Errorf("expected 0 deletions yet, got %+v", res)
|
||||
}
|
||||
|
||||
// retention=0 → row IS eligible immediately.
|
||||
res, err = store.Sweep(ctx, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("Sweep(0): %v", err)
|
||||
}
|
||||
if res.Acked != 1 || res.Expired != 0 {
|
||||
t.Errorf("expected acked=1 expired=0, got %+v", res)
|
||||
}
|
||||
|
||||
// Verify row is actually gone — not just un-fetchable.
|
||||
var n int
|
||||
if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE file_id = $1`, fid).Scan(&n); err != nil {
|
||||
t.Fatalf("count: %v", err)
|
||||
}
|
||||
if n != 0 {
|
||||
t.Errorf("row should be DELETEd, found %d rows", n)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegration_PendingUploads_Sweep_DeletesExpiredUnacked(t *testing.T) {
|
||||
conn := integrationDB_PendingUploads(t)
|
||||
store := pendinguploads.NewPostgres(conn)
|
||||
ctx := context.Background()
|
||||
|
||||
wsID := uuid.New()
|
||||
fid, err := store.Put(ctx, wsID, []byte("data"), "x.txt", "text/plain")
|
||||
if err != nil {
|
||||
t.Fatalf("Put: %v", err)
|
||||
}
|
||||
|
||||
// Manually backdate expires_at so the row IS expired. We don't ack,
|
||||
// so this exercises the unacked-and-expired branch of the WHERE
|
||||
// clause specifically.
|
||||
if _, err := conn.ExecContext(ctx,
|
||||
`UPDATE pending_uploads SET expires_at = now() - interval '1 minute' WHERE file_id = $1`,
|
||||
fid,
|
||||
); err != nil {
|
||||
t.Fatalf("backdate: %v", err)
|
||||
}
|
||||
|
||||
res, err := store.Sweep(ctx, time.Hour)
|
||||
if err != nil {
|
||||
t.Fatalf("Sweep: %v", err)
|
||||
}
|
||||
if res.Acked != 0 || res.Expired != 1 {
|
||||
t.Errorf("expected acked=0 expired=1, got %+v", res)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegration_PendingUploads_Sweep_DeletesBothCategoriesInOneCycle(t *testing.T) {
|
||||
conn := integrationDB_PendingUploads(t)
|
||||
store := pendinguploads.NewPostgres(conn)
|
||||
ctx := context.Background()
|
||||
|
||||
wsID := uuid.New()
|
||||
|
||||
// Three rows: one acked (eligible at retention=0), one expired
|
||||
// unacked, one fresh unacked (must NOT be deleted).
|
||||
ackedFID, err := store.Put(ctx, wsID, []byte("acked"), "a.txt", "text/plain")
|
||||
if err != nil {
|
||||
t.Fatalf("Put acked: %v", err)
|
||||
}
|
||||
if err := store.Ack(ctx, ackedFID); err != nil {
|
||||
t.Fatalf("Ack: %v", err)
|
||||
}
|
||||
|
||||
expiredFID, err := store.Put(ctx, wsID, []byte("expired"), "e.txt", "text/plain")
|
||||
if err != nil {
|
||||
t.Fatalf("Put expired: %v", err)
|
||||
}
|
||||
if _, err := conn.ExecContext(ctx,
|
||||
`UPDATE pending_uploads SET expires_at = now() - interval '1 minute' WHERE file_id = $1`,
|
||||
expiredFID,
|
||||
); err != nil {
|
||||
t.Fatalf("backdate: %v", err)
|
||||
}
|
||||
|
||||
freshFID, err := store.Put(ctx, wsID, []byte("fresh"), "f.txt", "text/plain")
|
||||
if err != nil {
|
||||
t.Fatalf("Put fresh: %v", err)
|
||||
}
|
||||
|
||||
res, err := store.Sweep(ctx, 0) // retention=0 makes the acked row eligible
|
||||
if err != nil {
|
||||
t.Fatalf("Sweep: %v", err)
|
||||
}
|
||||
if res.Acked != 1 || res.Expired != 1 {
|
||||
t.Errorf("expected acked=1 expired=1, got %+v", res)
|
||||
}
|
||||
|
||||
// Fresh row survives.
|
||||
rec, err := store.Get(ctx, freshFID)
|
||||
if err != nil {
|
||||
t.Errorf("fresh row should still be Get-able, got err=%v", err)
|
||||
}
|
||||
if rec.FileID != freshFID {
|
||||
t.Errorf("fresh row file_id = %s, want %s", rec.FileID, freshFID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegration_PendingUploads_PutEnforcesSizeCap(t *testing.T) {
|
||||
conn := integrationDB_PendingUploads(t)
|
||||
store := pendinguploads.NewPostgres(conn)
|
||||
ctx := context.Background()
|
||||
|
||||
wsID := uuid.New()
|
||||
tooBig := make([]byte, pendinguploads.MaxFileBytes+1)
|
||||
if _, err := store.Put(ctx, wsID, tooBig, "big.bin", "application/octet-stream"); err != pendinguploads.ErrTooLarge {
|
||||
t.Errorf("expected ErrTooLarge, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestIntegration_PendingUploads_PutBatch_HappyPath_AllRowsCommit pins the
|
||||
// "all rows commit" leg of the PutBatch atomicity contract against a real
|
||||
// Postgres. sqlmock can't catch a regression where the Go-side Tx machinery
|
||||
// silently no-ops the inserts (e.g., wrong driver options on BeginTx); only
|
||||
// COUNT(*) on the real table can.
|
||||
func TestIntegration_PendingUploads_PutBatch_HappyPath_AllRowsCommit(t *testing.T) {
|
||||
conn := integrationDB_PendingUploads(t)
|
||||
store := pendinguploads.NewPostgres(conn)
|
||||
ctx := context.Background()
|
||||
|
||||
wsID := uuid.New()
|
||||
|
||||
// Pre-existing row so the COUNT(*) baseline is non-zero — proves
|
||||
// PutBatch adds rows incrementally rather than overwriting.
|
||||
if _, err := store.Put(ctx, wsID, []byte("seed"), "seed.txt", "text/plain"); err != nil {
|
||||
t.Fatalf("seed Put: %v", err)
|
||||
}
|
||||
|
||||
items := []pendinguploads.PutItem{
|
||||
{Content: []byte("alpha"), Filename: "alpha.txt", Mimetype: "text/plain"},
|
||||
{Content: []byte("beta"), Filename: "beta.bin", Mimetype: "application/octet-stream"},
|
||||
{Content: []byte("gamma"), Filename: "gamma.pdf", Mimetype: "application/pdf"},
|
||||
}
|
||||
ids, err := store.PutBatch(ctx, wsID, items)
|
||||
if err != nil {
|
||||
t.Fatalf("PutBatch: %v", err)
|
||||
}
|
||||
if len(ids) != len(items) {
|
||||
t.Fatalf("ids length %d, want %d", len(ids), len(items))
|
||||
}
|
||||
|
||||
// Each returned id round-trips through Get with the right content.
|
||||
for i, id := range ids {
|
||||
rec, err := store.Get(ctx, id)
|
||||
if err != nil {
|
||||
t.Fatalf("Get item %d (%s): %v", i, id, err)
|
||||
}
|
||||
if string(rec.Content) != string(items[i].Content) {
|
||||
t.Errorf("item %d content = %q, want %q", i, rec.Content, items[i].Content)
|
||||
}
|
||||
if rec.Filename != items[i].Filename {
|
||||
t.Errorf("item %d filename = %q, want %q", i, rec.Filename, items[i].Filename)
|
||||
}
|
||||
}
|
||||
|
||||
var n int
|
||||
if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&n); err != nil {
|
||||
t.Fatalf("count: %v", err)
|
||||
}
|
||||
if n != 4 {
|
||||
t.Errorf("workspace row count = %d, want 4 (1 seed + 3 batch)", n)
|
||||
}
|
||||
}
|
||||
|
||||
// TestIntegration_PendingUploads_PutBatch_AtomicRollback_NoLeakOnFailure
|
||||
// proves the all-or-nothing contract end-to-end against real Postgres MVCC.
|
||||
//
|
||||
// Strategy: build a 3-item batch where item index 1 carries a filename with
|
||||
// an embedded NUL byte. lib/pq rejects NULs in TEXT columns at the protocol
|
||||
// layer (`pq: invalid byte sequence for encoding "UTF8": 0x00`), which
|
||||
// triggers the per-row INSERT error path in PutBatch. The first item's
|
||||
// INSERT…RETURNING already wrote a row to the Tx's snapshot, so a buggy
|
||||
// rollback would leave that row visible after PutBatch returns.
|
||||
//
|
||||
// Postgrest semantics: ROLLBACK is the only way a real DB can guarantee the
|
||||
// "no leak" contract; a unit test with sqlmock can prove the Go function
|
||||
// CALLED Rollback, but only this integration test proves Postgres actually
|
||||
// HONORED it.
|
||||
func TestIntegration_PendingUploads_PutBatch_AtomicRollback_NoLeakOnFailure(t *testing.T) {
|
||||
conn := integrationDB_PendingUploads(t)
|
||||
store := pendinguploads.NewPostgres(conn)
|
||||
ctx := context.Background()
|
||||
|
||||
wsID := uuid.New()
|
||||
|
||||
// Baseline COUNT(*) for this workspace — must remain 0 after a failed batch.
|
||||
var before int
|
||||
if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&before); err != nil {
|
||||
t.Fatalf("baseline count: %v", err)
|
||||
}
|
||||
if before != 0 {
|
||||
t.Fatalf("workspace not isolated: baseline = %d, want 0", before)
|
||||
}
|
||||
|
||||
// Item 1 has a NUL byte in the filename — Go-side pre-validation
|
||||
// (which only checks empty/length) lets it through, so the INSERT
|
||||
// reaches lib/pq, which rejects it at the protocol level. That's the
|
||||
// canonical "DB-side error mid-batch" we want to exercise.
|
||||
items := []pendinguploads.PutItem{
|
||||
{Content: []byte("ok"), Filename: "ok.txt", Mimetype: "text/plain"},
|
||||
{Content: []byte("bad"), Filename: "bad\x00name.txt", Mimetype: "text/plain"},
|
||||
{Content: []byte("never"), Filename: "never.txt", Mimetype: "text/plain"},
|
||||
}
|
||||
_, err := store.PutBatch(ctx, wsID, items)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error from NUL-byte filename, got nil")
|
||||
}
|
||||
|
||||
// THE assertion this whole test exists for: even though item 0's
|
||||
// INSERT…RETURNING succeeded inside the Tx, the rollback unwound
|
||||
// it — zero rows for this workspace, not one (let alone three).
|
||||
var after int
|
||||
if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&after); err != nil {
|
||||
t.Fatalf("post-failure count: %v", err)
|
||||
}
|
||||
if after != 0 {
|
||||
t.Errorf("Tx rollback leaked rows: workspace count = %d, want 0", after)
|
||||
}
|
||||
}
|
||||
|
||||
// TestIntegration_PendingUploads_PutBatch_Oversize_NoTxOpened verifies the
|
||||
// pre-validation short-circuit: an oversized item rejects with ErrTooLarge
|
||||
// BEFORE any Tx opens, so the table is untouched. The unit test (sqlmock
|
||||
// with zero expectations) catches the Go-side path; this test sanity-checks
|
||||
// no real DB I/O happens by confirming COUNT(*) doesn't move.
|
||||
func TestIntegration_PendingUploads_PutBatch_Oversize_NoTxOpened(t *testing.T) {
|
||||
conn := integrationDB_PendingUploads(t)
|
||||
store := pendinguploads.NewPostgres(conn)
|
||||
ctx := context.Background()
|
||||
|
||||
wsID := uuid.New()
|
||||
tooBig := make([]byte, pendinguploads.MaxFileBytes+1)
|
||||
_, err := store.PutBatch(ctx, wsID, []pendinguploads.PutItem{
|
||||
{Content: []byte("ok"), Filename: "ok.txt"},
|
||||
{Content: tooBig, Filename: "too-big.bin"},
|
||||
})
|
||||
if err != pendinguploads.ErrTooLarge {
|
||||
t.Fatalf("expected ErrTooLarge, got %v", err)
|
||||
}
|
||||
var n int
|
||||
if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&n); err != nil {
|
||||
t.Fatalf("count: %v", err)
|
||||
}
|
||||
if n != 0 {
|
||||
t.Errorf("pre-validation did NOT short-circuit: count = %d, want 0", n)
|
||||
}
|
||||
}
|
||||
|
||||
// TestIntegration_PendingUploads_AckedIndexExists verifies the Phase 5a
|
||||
// migration (20260505200000_pending_uploads_acked_index.up.sql) actually
|
||||
// created idx_pending_uploads_acked with the right partial-index predicate.
|
||||
//
|
||||
// Why pg_indexes and not EXPLAIN: the planner prefers Seq Scan on tiny
|
||||
// tables regardless of available indexes — a plan-shape check would be
|
||||
// flaky under real test loads. The contract we care about is "the index
|
||||
// exists with the predicate we wrote in the migration"; pg_indexes is
|
||||
// the canonical source for that, robust to row count and planner version.
|
||||
func TestIntegration_PendingUploads_AckedIndexExists(t *testing.T) {
|
||||
conn := integrationDB_PendingUploads(t)
|
||||
ctx := context.Background()
|
||||
|
||||
var indexdef string
|
||||
err := conn.QueryRowContext(ctx, `
|
||||
SELECT indexdef FROM pg_indexes
|
||||
WHERE schemaname = 'public'
|
||||
AND tablename = 'pending_uploads'
|
||||
AND indexname = 'idx_pending_uploads_acked'
|
||||
`).Scan(&indexdef)
|
||||
if err == sql.ErrNoRows {
|
||||
t.Fatal("idx_pending_uploads_acked is missing — migration 20260505200000 not applied")
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("pg_indexes query: %v", err)
|
||||
}
|
||||
|
||||
// Pin the partial-index predicate. Without "WHERE acked_at IS NOT NULL"
|
||||
// we'd be indexing the entire table (defeats the point — most rows are
|
||||
// unacked), and the existing idx_pending_uploads_unacked already covers
|
||||
// the inverse predicate.
|
||||
if !strings.Contains(indexdef, "(acked_at)") {
|
||||
t.Errorf("index missing acked_at column: %s", indexdef)
|
||||
}
|
||||
if !strings.Contains(indexdef, "WHERE (acked_at IS NOT NULL)") {
|
||||
t.Errorf("index missing partial predicate: %s", indexdef)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegration_PendingUploads_GetIgnoresExpiredAndAcked(t *testing.T) {
|
||||
conn := integrationDB_PendingUploads(t)
|
||||
store := pendinguploads.NewPostgres(conn)
|
||||
ctx := context.Background()
|
||||
|
||||
wsID := uuid.New()
|
||||
fid, err := store.Put(ctx, wsID, []byte("data"), "x.txt", "text/plain")
|
||||
if err != nil {
|
||||
t.Fatalf("Put: %v", err)
|
||||
}
|
||||
|
||||
// Backdate expires_at — Get must return ErrNotFound, even though the
|
||||
// row physically exists in the table (Sweep hasn't run).
|
||||
if _, err := conn.ExecContext(ctx,
|
||||
`UPDATE pending_uploads SET expires_at = now() - interval '1 minute' WHERE file_id = $1`,
|
||||
fid,
|
||||
); err != nil {
|
||||
t.Fatalf("backdate: %v", err)
|
||||
}
|
||||
if _, err := store.Get(ctx, fid); err != pendinguploads.ErrNotFound {
|
||||
t.Errorf("Get after expiry: got %v, want ErrNotFound", err)
|
||||
}
|
||||
}
|
||||
@@ -71,6 +71,20 @@ func (f *fakeStorage) Ack(_ context.Context, fileID uuid.UUID) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sweep is required by the Storage interface (Phase 3 GC). Not exercised
|
||||
// by these handler tests — the dedicated sweeper_test.go covers it.
|
||||
func (f *fakeStorage) Sweep(_ context.Context, _ time.Duration) (pendinguploads.SweepResult, error) {
|
||||
return pendinguploads.SweepResult{}, nil
|
||||
}
|
||||
|
||||
// PutBatch is required by the Storage interface; the upload handler
|
||||
// tests live in chat_files_poll_test.go and use a separate fake
|
||||
// (inMemStorage). Stubbed here because the Get/Ack tests don't drive
|
||||
// PutBatch, but the interface must be satisfied.
|
||||
func (f *fakeStorage) PutBatch(_ context.Context, _ uuid.UUID, _ []pendinguploads.PutItem) ([]uuid.UUID, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func newRouter(handler *handlers.PendingUploadsHandler) *gin.Engine {
|
||||
gin.SetMode(gin.TestMode)
|
||||
r := gin.New()
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
package handlers
|
||||
|
||||
// provlog_emit_test.go — pins that the structured-logging emit sites
|
||||
// added for #2867 PR-D actually fire when their boundary is crossed.
|
||||
//
|
||||
// These are call-site contract tests, not provlog package tests (those
|
||||
// live next to the helper). The assertion is "this dispatcher path
|
||||
// emits this event name" — if a refactor moves the call out of the
|
||||
// boundary helper, the gate fails. Fields are NOT pinned here on
|
||||
// purpose; the field set is convenience for ops, not contract for the
|
||||
// emit point. Pinning fields would block additive evolution of the
|
||||
// payload (see also feedback_behavior_based_ast_gates.md).
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"log"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
|
||||
)
|
||||
|
||||
// captureProvLog redirects the global logger to a buffer for the test
|
||||
// duration. provlog.Event uses log.Printf, so this is the only seam.
|
||||
// Returned mutex protects against concurrent reads from the goroutine
|
||||
// fired by provisionWorkspaceAuto (the goroutine never returns in
|
||||
// these tests because Start() is stubbed, but the buffer can still be
|
||||
// touched by it racing the assertion).
|
||||
func captureProvLog(t *testing.T) (read func() string) {
|
||||
t.Helper()
|
||||
var buf bytes.Buffer
|
||||
var mu sync.Mutex
|
||||
prevWriter := log.Writer()
|
||||
prevFlags := log.Flags()
|
||||
log.SetFlags(0)
|
||||
log.SetOutput(&safeWriter{buf: &buf, mu: &mu})
|
||||
t.Cleanup(func() {
|
||||
log.SetOutput(prevWriter)
|
||||
log.SetFlags(prevFlags)
|
||||
})
|
||||
return func() string {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
return buf.String()
|
||||
}
|
||||
}
|
||||
|
||||
// TestProvisionWorkspaceAutoSync_EmitsProvisionStart — sync variant is
|
||||
// chosen for the assertion path because it returns once the (stubbed)
|
||||
// Start() has been called, so we know the emit has flushed. The async
|
||||
// variant would race a goroutine.
|
||||
func TestProvisionWorkspaceAutoSync_EmitsProvisionStart(t *testing.T) {
|
||||
read := captureProvLog(t)
|
||||
h := &WorkspaceHandler{cpProv: &trackingCPProv{}}
|
||||
// Best-effort: the body will hit DB code under provisionWorkspaceCP
|
||||
// — we only need the emit at the entry, which fires unconditionally
|
||||
// before the dispatch. Recovering from any later panic keeps the
|
||||
// test focused.
|
||||
defer func() { _ = recover() }()
|
||||
h.provisionWorkspaceAutoSync("ws-test-1", "tmpl", nil, models.CreateWorkspacePayload{
|
||||
Name: "n", Tier: 4, Runtime: "claude-code",
|
||||
})
|
||||
got := read()
|
||||
if !strings.Contains(got, "evt: provision.start ") {
|
||||
t.Fatalf("expected provision.start emit, got log:\n%s", got)
|
||||
}
|
||||
if !strings.Contains(got, `"workspace_id":"ws-test-1"`) {
|
||||
t.Errorf("workspace_id not in payload: %s", got)
|
||||
}
|
||||
if !strings.Contains(got, `"sync":true`) {
|
||||
t.Errorf("sync flag not pinned for sync dispatcher: %s", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStopForRestart_EmitsRestartPreStop — emit fires before the actual
|
||||
// Stop call, so the trackingCPProv stub doesn't need to be wired for
|
||||
// real Stop semantics. Backend label "cp" pinned because that's the
|
||||
// SaaS path; we don't pin "docker" or "none" branches here (separate
|
||||
// tests would only re-test the trivial branch label switch).
|
||||
func TestStopForRestart_EmitsRestartPreStop(t *testing.T) {
|
||||
read := captureProvLog(t)
|
||||
h := &WorkspaceHandler{cpProv: &trackingCPProv{}}
|
||||
defer func() { _ = recover() }()
|
||||
h.stopForRestart(context.Background(), "ws-restart-1")
|
||||
got := read()
|
||||
if !strings.Contains(got, "evt: restart.pre_stop ") {
|
||||
t.Fatalf("expected restart.pre_stop emit, got log:\n%s", got)
|
||||
}
|
||||
if !strings.Contains(got, `"workspace_id":"ws-restart-1"`) {
|
||||
t.Errorf("workspace_id not in payload: %s", got)
|
||||
}
|
||||
if !strings.Contains(got, `"backend":"cp"`) {
|
||||
t.Errorf("backend label missing or wrong: %s", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStopForRestart_EmitsBackendNoneWhenUnwired — pin the no-backend
|
||||
// branch so a future refactor that drops the label switch is caught.
|
||||
// This is the silent-Stop case (workspace_dispatchers.go:StopWorkspaceAuto
|
||||
// returns nil for unwired backends); the emit ensures the operator can
|
||||
// still see the boundary in the log.
|
||||
func TestStopForRestart_EmitsBackendNoneWhenUnwired(t *testing.T) {
|
||||
read := captureProvLog(t)
|
||||
h := &WorkspaceHandler{} // both nil
|
||||
h.stopForRestart(context.Background(), "ws-restart-2")
|
||||
got := read()
|
||||
if !strings.Contains(got, `"backend":"none"`) {
|
||||
t.Fatalf("expected backend=none for unwired handler: %s", got)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
||||
)
|
||||
|
||||
// Tests for the SaaS-aware default-tier resolution introduced in #2901
|
||||
// and hardened in #2910 (multi-model review of #2901 found the original
|
||||
// claim of "all green" was passing because no SaaS-mode test existed).
|
||||
//
|
||||
// These tests pin three invariants:
|
||||
//
|
||||
// 1. WorkspaceHandler.IsSaaS() returns true when cpProv is wired,
|
||||
// false otherwise.
|
||||
// 2. WorkspaceHandler.DefaultTier() returns 4 on SaaS, 3 self-hosted.
|
||||
// 3. generateDefaultConfig (TemplatesHandler.Import path) writes the
|
||||
// passed-in tier into the generated config.yaml — pre-#2910 it
|
||||
// was hardcoded to 3 and silently disagreed with the create-
|
||||
// handler default on SaaS.
|
||||
|
||||
// stubCPProv is a minimal stand-in for the CP provisioner — only
|
||||
// exercises the IsSaaS / HasProvisioner contract, never invoked in
|
||||
// these tests.
|
||||
type stubCPProv struct{}
|
||||
|
||||
func (stubCPProv) Start(_ interface{}, _ provisioner.WorkspaceConfig) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
func (stubCPProv) Stop(_ interface{}, _ string) error { return nil }
|
||||
func (stubCPProv) Restart(_ interface{}, _ provisioner.WorkspaceConfig) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func TestIsSaaS_TrueWhenCPProvWired(t *testing.T) {
|
||||
h := &WorkspaceHandler{cpProv: &trackingCPProv{}}
|
||||
if !h.IsSaaS() {
|
||||
t.Errorf("IsSaaS()=false with cpProv wired; expected true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsSaaS_FalseWhenOnlyDocker(t *testing.T) {
|
||||
// provisioner field set, cpProv nil — the self-hosted path.
|
||||
// Use a non-nil sentinel so the check actually has something to
|
||||
// disagree with. trackingCPProv lives in workspace_provision_auto_test.go
|
||||
// and is the established stub for these handler-level tests.
|
||||
h := &WorkspaceHandler{provisioner: nil, cpProv: nil}
|
||||
if h.IsSaaS() {
|
||||
t.Errorf("IsSaaS()=true with both backends nil; expected false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultTier_SaaS_IsT4(t *testing.T) {
|
||||
h := &WorkspaceHandler{cpProv: &trackingCPProv{}}
|
||||
if got := h.DefaultTier(); got != 4 {
|
||||
t.Errorf("SaaS DefaultTier()=%d; expected 4", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultTier_SelfHosted_IsT3(t *testing.T) {
|
||||
h := &WorkspaceHandler{}
|
||||
if got := h.DefaultTier(); got != 3 {
|
||||
t.Errorf("self-hosted DefaultTier()=%d; expected 3", got)
|
||||
}
|
||||
}
|
||||
|
||||
// generateDefaultConfig — pin that the tier param flows into the
|
||||
// emitted config.yaml verbatim. Pre-#2910 this was hardcoded "tier: 3"
|
||||
// regardless of caller intent.
|
||||
func TestGenerateDefaultConfig_RespectsTierParam(t *testing.T) {
|
||||
cfg := generateDefaultConfig("Test Agent", map[string]string{"system-prompt.md": ""}, 4)
|
||||
if !strings.Contains(cfg, "tier: 4\n") {
|
||||
t.Errorf("expected `tier: 4` in generated config, got:\n%s", cfg)
|
||||
}
|
||||
// The pre-#2910 hardcoded `tier: 3` line must NOT appear.
|
||||
if strings.Contains(cfg, "tier: 3\n") {
|
||||
t.Errorf("config should not contain `tier: 3` when caller passed 4, got:\n%s", cfg)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateDefaultConfig_SelfHostedTierT3(t *testing.T) {
|
||||
cfg := generateDefaultConfig("Test Agent", map[string]string{"system-prompt.md": ""}, 3)
|
||||
if !strings.Contains(cfg, "tier: 3\n") {
|
||||
t.Errorf("expected `tier: 3` in generated config, got:\n%s", cfg)
|
||||
}
|
||||
}
|
||||
|
||||
// Bounds check — caller passes 0 or out-of-range, helper falls back
|
||||
// to T3 (the safer-of-the-two when deployment mode can't be resolved).
|
||||
func TestGenerateDefaultConfig_OutOfRangeFallsBackToT3(t *testing.T) {
|
||||
for _, tier := range []int{0, -1, 99} {
|
||||
cfg := generateDefaultConfig("X", map[string]string{}, tier)
|
||||
if !strings.Contains(cfg, "tier: 3\n") {
|
||||
t.Errorf("invalid tier %d should fall back to T3, got:\n%s", tier, cfg)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -71,7 +71,7 @@ func TestSecurity_GetTemplates_NoAuth_Returns401(t *testing.T) {
|
||||
authDB, authMock := newEnrolledAuthDB(t)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
tmplh := NewTemplatesHandler(tmpDir, nil)
|
||||
tmplh := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
|
||||
r := gin.New()
|
||||
r.GET("/templates", middleware.AdminAuth(authDB), tmplh.List)
|
||||
@@ -98,7 +98,7 @@ func TestSecurity_GetTemplates_FreshInstall_FailsOpen(t *testing.T) {
|
||||
authDB, authMock := newFreshInstallAuthDB(t)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
tmplh := NewTemplatesHandler(tmpDir, nil)
|
||||
tmplh := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
|
||||
r := gin.New()
|
||||
r.GET("/templates", middleware.AdminAuth(authDB), tmplh.List)
|
||||
|
||||
@@ -1,132 +0,0 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
|
||||
"github.com/gin-gonic/gin"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// TeamHandler now hosts only Collapse — the visual "expand" action is
|
||||
// canvas-side and creating children goes through the regular
|
||||
// WorkspaceHandler.Create path with parent_id set, like any other
|
||||
// workspace. Every workspace can have children; "team" is just the
|
||||
// state of having children. The old Expand handler bulk-created
|
||||
// children by reading sub_workspaces from a parent's config and was
|
||||
// non-idempotent — calling it N times leaked N×children EC2s, which
|
||||
// is how tenant-hongming accumulated 72 stale workspaces.
|
||||
type TeamHandler struct {
|
||||
wh *WorkspaceHandler
|
||||
b *events.Broadcaster
|
||||
}
|
||||
|
||||
// NewTeamHandler constructs a TeamHandler. wh is used by Collapse to
|
||||
// route StopWorkspaceAuto through the backend dispatcher.
|
||||
func NewTeamHandler(b *events.Broadcaster, wh *WorkspaceHandler, platformURL, configsDir string) *TeamHandler {
|
||||
return &TeamHandler{wh: wh, b: b}
|
||||
}
|
||||
|
||||
// Collapse handles POST /workspaces/:id/collapse
|
||||
// Stops and removes all child workspaces.
|
||||
func (h *TeamHandler) Collapse(c *gin.Context) {
|
||||
parentID := c.Param("id")
|
||||
ctx := c.Request.Context()
|
||||
|
||||
// Find children
|
||||
rows, err := db.DB.QueryContext(ctx,
|
||||
`SELECT id, name FROM workspaces WHERE parent_id = $1 AND status != 'removed'`, parentID)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to query children"})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
removed := make([]string, 0)
|
||||
for rows.Next() {
|
||||
var childID, childName string
|
||||
if rows.Scan(&childID, &childName) != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Stop the workload via the backend dispatcher (CP for SaaS,
|
||||
// Docker for self-hosted). Pre-2026-05-05 this was
|
||||
// `if h.provisioner != nil { h.provisioner.Stop(...) }`, which
|
||||
// silently skipped on every SaaS tenant — child EC2s kept running
|
||||
// after team-collapse until the orphan sweeper caught them
|
||||
// (issue #2813).
|
||||
if err := h.wh.StopWorkspaceAuto(ctx, childID); err != nil {
|
||||
log.Printf("Team collapse: stop %s failed: %v — orphan sweeper will reconcile", childID, err)
|
||||
}
|
||||
|
||||
// Mark as removed
|
||||
if _, err := db.DB.ExecContext(ctx,
|
||||
`UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2`, models.StatusRemoved, childID); err != nil {
|
||||
log.Printf("Team collapse: failed to remove workspace %s: %v", childID, err)
|
||||
}
|
||||
if _, err := db.DB.ExecContext(ctx,
|
||||
`DELETE FROM canvas_layouts WHERE workspace_id = $1`, childID); err != nil {
|
||||
log.Printf("Team collapse: failed to delete layout for %s: %v", childID, err)
|
||||
}
|
||||
|
||||
h.b.RecordAndBroadcast(ctx, "WORKSPACE_REMOVED", childID, map[string]interface{}{})
|
||||
|
||||
removed = append(removed, childName)
|
||||
}
|
||||
|
||||
h.b.RecordAndBroadcast(ctx, "WORKSPACE_COLLAPSED", parentID, map[string]interface{}{
|
||||
"removed_children": removed,
|
||||
})
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"status": "collapsed",
|
||||
"removed": removed,
|
||||
})
|
||||
}
|
||||
|
||||
// findTemplateDirByName resolves a workspace name to its template
|
||||
// directory. Kept here because callers outside this package may use
|
||||
// it, even though the in-package consumer (Expand) is gone.
|
||||
//
|
||||
// TODO: relocate alongside the templates handler if no other callers
|
||||
// surface, or delete entirely after a deprecation cycle.
|
||||
func findTemplateDirByName(configsDir, name string) string {
|
||||
normalized := normalizeName(name)
|
||||
|
||||
candidate := filepath.Join(configsDir, normalized)
|
||||
if _, err := os.Stat(filepath.Join(candidate, "config.yaml")); err == nil {
|
||||
return candidate
|
||||
}
|
||||
|
||||
// Fall back to scanning all dirs
|
||||
entries, err := os.ReadDir(configsDir)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
for _, e := range entries {
|
||||
if !e.IsDir() {
|
||||
continue
|
||||
}
|
||||
cfgPath := filepath.Join(configsDir, e.Name(), "config.yaml")
|
||||
data, err := os.ReadFile(cfgPath)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
var cfg struct {
|
||||
Name string `yaml:"name"`
|
||||
}
|
||||
if json.Unmarshal(data, &cfg) == nil && cfg.Name == name {
|
||||
return filepath.Join(configsDir, e.Name())
|
||||
}
|
||||
if yaml.Unmarshal(data, &cfg) == nil && cfg.Name == name {
|
||||
return filepath.Join(configsDir, e.Name())
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
@@ -1,130 +0,0 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// ---------- TeamHandler: Collapse ----------
|
||||
|
||||
func TestTeamCollapse_NoChildren(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewTeamHandler(broadcaster, NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()), "http://localhost:8080", "/tmp/configs")
|
||||
|
||||
// No children
|
||||
mock.ExpectQuery("SELECT id, name FROM workspaces WHERE parent_id").
|
||||
WithArgs("ws-parent").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}))
|
||||
|
||||
// WORKSPACE_COLLAPSED broadcast
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-parent"}}
|
||||
c.Request = httptest.NewRequest("POST", "/", nil)
|
||||
|
||||
handler.Collapse(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["status"] != "collapsed" {
|
||||
t.Errorf("expected status 'collapsed', got %v", resp["status"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestTeamCollapse_WithChildren(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewTeamHandler(broadcaster, NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()), "http://localhost:8080", "/tmp/configs")
|
||||
|
||||
// Two children
|
||||
mock.ExpectQuery("SELECT id, name FROM workspaces WHERE parent_id").
|
||||
WithArgs("ws-parent").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
|
||||
AddRow("child-1", "Worker A").
|
||||
AddRow("child-2", "Worker B"))
|
||||
|
||||
// UPDATE + DELETE + broadcast for child-1
|
||||
mock.ExpectExec("UPDATE workspaces SET status =").
|
||||
WithArgs("child-1").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec("DELETE FROM canvas_layouts").
|
||||
WithArgs("child-1").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// UPDATE + DELETE + broadcast for child-2
|
||||
mock.ExpectExec("UPDATE workspaces SET status =").
|
||||
WithArgs("child-2").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec("DELETE FROM canvas_layouts").
|
||||
WithArgs("child-2").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// WORKSPACE_COLLAPSED broadcast for parent
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-parent"}}
|
||||
c.Request = httptest.NewRequest("POST", "/", nil)
|
||||
|
||||
handler.Collapse(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
removed, ok := resp["removed"].([]interface{})
|
||||
if !ok || len(removed) != 2 {
|
||||
t.Errorf("expected 2 removed children, got %v", resp["removed"])
|
||||
}
|
||||
}
|
||||
// ---------- findTemplateDirByName helper ----------
|
||||
|
||||
func TestFindTemplateDirByName_DirectMatch(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
subDir := filepath.Join(dir, "mybot")
|
||||
os.MkdirAll(subDir, 0755)
|
||||
os.WriteFile(filepath.Join(subDir, "config.yaml"), []byte("name: MyBot"), 0644)
|
||||
|
||||
result := findTemplateDirByName(dir, "mybot")
|
||||
if result != subDir {
|
||||
t.Errorf("expected %s, got %s", subDir, result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindTemplateDirByName_NotFound(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
result := findTemplateDirByName(dir, "nonexistent")
|
||||
if result != "" {
|
||||
t.Errorf("expected empty string, got %s", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindTemplateDirByName_InvalidConfigsDir(t *testing.T) {
|
||||
result := findTemplateDirByName("/nonexistent/path", "anything")
|
||||
if result != "" {
|
||||
t.Errorf("expected empty string for invalid dir, got %s", result)
|
||||
}
|
||||
}
|
||||
@@ -36,8 +36,14 @@ func normalizeName(name string) string {
|
||||
return result
|
||||
}
|
||||
|
||||
// generateDefaultConfig creates a config.yaml from detected prompt files and skills.
|
||||
func generateDefaultConfig(name string, files map[string]string) string {
|
||||
// generateDefaultConfig creates a config.yaml from detected prompt files
|
||||
// and skills. tier is the deployment-aware default (caller passes
|
||||
// h.wh.DefaultTier() — T4 on SaaS, T3 on self-hosted) so the generated
|
||||
// file matches what POST /workspaces would default to. Pre-#2910 this
|
||||
// was hardcoded to 3, which split-brained with the create-handler
|
||||
// default on SaaS (T4) and pinned newly-imported templates at T3 even
|
||||
// when downstream Create paths picked T4.
|
||||
func generateDefaultConfig(name string, files map[string]string, tier int) string {
|
||||
promptFiles := []string{}
|
||||
skillSet := map[string]bool{}
|
||||
|
||||
@@ -74,9 +80,15 @@ func generateDefaultConfig(name string, files map[string]string) string {
|
||||
var cfg strings.Builder
|
||||
cfg.WriteString(`name: "` + escaped + `"` + "\n")
|
||||
cfg.WriteString("description: Imported agent\n")
|
||||
// Default to tier 3 ("Privileged") — matches the workspace.go
|
||||
// create handler default. See its comment for rationale.
|
||||
cfg.WriteString("version: 1.0.0\ntier: 3\n")
|
||||
// Tier is SaaS-aware via the caller's DefaultTier (#2910 PR-B).
|
||||
// Bounds-checked: invalid input falls back to T3 (the historical
|
||||
// default + the safer-of-the-two when the deployment mode can't
|
||||
// be resolved).
|
||||
if tier < 1 || tier > 4 {
|
||||
tier = 3
|
||||
}
|
||||
cfg.WriteString("version: 1.0.0\n")
|
||||
cfg.WriteString(fmt.Sprintf("tier: %d\n", tier))
|
||||
cfg.WriteString("model: anthropic:claude-haiku-4-5-20251001\n")
|
||||
cfg.WriteString("\nprompt_files:\n")
|
||||
if len(promptFiles) > 0 {
|
||||
@@ -148,7 +160,11 @@ func (h *TemplatesHandler) Import(c *gin.Context) {
|
||||
|
||||
// Auto-generate config.yaml if not provided
|
||||
if _, exists := body.Files["config.yaml"]; !exists {
|
||||
cfg := generateDefaultConfig(body.Name, body.Files)
|
||||
tier := 3
|
||||
if h.wh != nil {
|
||||
tier = h.wh.DefaultTier()
|
||||
}
|
||||
cfg := generateDefaultConfig(body.Name, body.Files, tier)
|
||||
if err := os.WriteFile(filepath.Join(destDir, "config.yaml"), []byte(cfg), 0600); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to write config.yaml"})
|
||||
return
|
||||
@@ -227,7 +243,11 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
|
||||
if _, exists := body.Files["config.yaml"]; !exists {
|
||||
// Check if config.yaml exists in container
|
||||
if _, err := h.execInContainer(ctx, containerName, []string{"test", "-f", "/configs/config.yaml"}); err != nil {
|
||||
cfg := generateDefaultConfig(wsName, body.Files)
|
||||
tier := 3
|
||||
if h.wh != nil {
|
||||
tier = h.wh.DefaultTier()
|
||||
}
|
||||
cfg := generateDefaultConfig(wsName, body.Files, tier)
|
||||
singleFile := map[string]string{"config.yaml": cfg}
|
||||
h.copyFilesToContainer(ctx, containerName, "/configs", singleFile)
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ func TestGenerateDefaultConfig_WithFiles(t *testing.T) {
|
||||
"skills/review/templates.md": "Templates",
|
||||
}
|
||||
|
||||
cfg := generateDefaultConfig("Test Agent", files)
|
||||
cfg := generateDefaultConfig("Test Agent", files, 3)
|
||||
|
||||
// Name is emitted as a double-quoted scalar (#221 sanitizer).
|
||||
if !strings.Contains(cfg, `name: "Test Agent"`) {
|
||||
@@ -85,7 +85,7 @@ func TestGenerateDefaultConfig_Empty(t *testing.T) {
|
||||
"data/something.json": `{"key": "value"}`,
|
||||
}
|
||||
|
||||
cfg := generateDefaultConfig("Empty Agent", files)
|
||||
cfg := generateDefaultConfig("Empty Agent", files, 3)
|
||||
|
||||
if !strings.Contains(cfg, `name: "Empty Agent"`) {
|
||||
t.Errorf("config should contain quoted agent name, got:\n%s", cfg)
|
||||
@@ -134,7 +134,7 @@ func TestGenerateDefaultConfig_YAMLInjection(t *testing.T) {
|
||||
|
||||
for _, tc := range adversarialCases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
cfg := generateDefaultConfig(tc.name, map[string]string{})
|
||||
cfg := generateDefaultConfig(tc.name, map[string]string{}, 3)
|
||||
var parsed map[string]interface{}
|
||||
if err := yaml.Unmarshal([]byte(cfg), &parsed); err != nil {
|
||||
t.Fatalf("sanitized config does not parse as YAML: %v\n--- config ---\n%s", err, cfg)
|
||||
@@ -205,7 +205,7 @@ func TestImport_Success(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
|
||||
body := `{
|
||||
"name": "New Agent",
|
||||
@@ -245,7 +245,7 @@ func TestImport_MissingName(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
body := `{"files": {"test.md": "content"}}`
|
||||
|
||||
@@ -265,7 +265,7 @@ func TestImport_TooManyFiles(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
files := make(map[string]string)
|
||||
for i := 0; i <= maxUploadFiles; i++ {
|
||||
@@ -296,7 +296,7 @@ func TestImport_AlreadyExists(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
os.MkdirAll(filepath.Join(tmpDir, "existing-agent"), 0755)
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
|
||||
body := `{"name": "Existing Agent", "files": {"test.md": "content"}}`
|
||||
|
||||
@@ -317,7 +317,7 @@ func TestImport_WithConfigYaml(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
|
||||
body := `{
|
||||
"name": "Custom Agent",
|
||||
@@ -354,7 +354,7 @@ func TestReplaceFiles_MissingBody(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -373,7 +373,7 @@ func TestReplaceFiles_TooManyFiles(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
files := make(map[string]string)
|
||||
for i := 0; i <= maxUploadFiles; i++ {
|
||||
@@ -398,7 +398,7 @@ func TestReplaceFiles_WorkspaceNotFound(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
// ReplaceFiles now selects (name, instance_id, runtime) for the
|
||||
// restart-cascade. Match the full column list rather than just the
|
||||
@@ -429,7 +429,7 @@ func TestReplaceFiles_PathTraversal(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
|
||||
WithArgs("ws-rf-pt").
|
||||
|
||||
@@ -31,10 +31,20 @@ const maxUploadFiles = 200
|
||||
type TemplatesHandler struct {
|
||||
configsDir string
|
||||
docker *client.Client
|
||||
// wh is used by Import and ReplaceFiles to call DefaultTier() so a
|
||||
// generated config.yaml's tier matches the SaaS-vs-self-hosted
|
||||
// boundary (#2910 PR-B). nil-tolerant — the field is unused when
|
||||
// the caller doesn't import templates that need a fresh config
|
||||
// generated.
|
||||
wh *WorkspaceHandler
|
||||
}
|
||||
|
||||
func NewTemplatesHandler(configsDir string, dockerCli *client.Client) *TemplatesHandler {
|
||||
return &TemplatesHandler{configsDir: configsDir, docker: dockerCli}
|
||||
// NewTemplatesHandler constructs a TemplatesHandler. wh may be nil for
|
||||
// callers that only use the read-only template surfaces (List,
|
||||
// ReadFile, ListFiles). Import + ReplaceFiles need wh non-nil so the
|
||||
// generated config.yaml picks the SaaS-aware default tier.
|
||||
func NewTemplatesHandler(configsDir string, dockerCli *client.Client, wh *WorkspaceHandler) *TemplatesHandler {
|
||||
return &TemplatesHandler{configsDir: configsDir, docker: dockerCli, wh: wh}
|
||||
}
|
||||
|
||||
// modelSpec describes a single supported model on a template: its id (sent
|
||||
|
||||
@@ -53,7 +53,7 @@ func TestTemplatesList_EmptyDir(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -99,7 +99,7 @@ skills:
|
||||
// Create a directory without config.yaml (should be skipped)
|
||||
os.MkdirAll(filepath.Join(tmpDir, "no-config"), 0755)
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -160,7 +160,7 @@ skills: []
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/templates", nil)
|
||||
@@ -237,7 +237,7 @@ skills: []
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/templates", nil)
|
||||
@@ -315,7 +315,7 @@ skills: []
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/templates", nil)
|
||||
@@ -434,7 +434,7 @@ skills: []
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/templates", nil)
|
||||
@@ -512,7 +512,7 @@ skills: []
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/templates", nil)
|
||||
@@ -555,7 +555,7 @@ skills: []
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/templates", nil)
|
||||
@@ -589,7 +589,7 @@ skills: []
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/templates", nil)
|
||||
@@ -661,7 +661,7 @@ skills: []
|
||||
log.SetOutput(&logBuf)
|
||||
defer log.SetOutput(prevOutput)
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/templates", nil)
|
||||
@@ -698,7 +698,7 @@ func TestTemplatesList_NonexistentDir(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler("/nonexistent/path/to/templates", nil)
|
||||
handler := NewTemplatesHandler("/nonexistent/path/to/templates", nil, nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -723,7 +723,7 @@ func TestListFiles_InvalidRoot(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -748,7 +748,7 @@ func TestListFiles_WorkspaceNotFound(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
WithArgs("ws-nonexist").
|
||||
@@ -775,7 +775,7 @@ func TestListFiles_FallbackToHost_NoTemplate(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
handler := NewTemplatesHandler(tmpDir, nil) // nil docker = no container
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil) // nil docker = no container
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
WithArgs("ws-fallback").
|
||||
@@ -815,7 +815,7 @@ func TestListFiles_FallbackToHost_WithTemplate(t *testing.T) {
|
||||
os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte("name: Test Agent\n"), 0644)
|
||||
os.WriteFile(filepath.Join(tmplDir, "system-prompt.md"), []byte("# prompt"), 0644)
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
WithArgs("ws-tmpl").
|
||||
@@ -849,7 +849,7 @@ func TestReadFile_PathTraversal(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -870,7 +870,7 @@ func TestReadFile_InvalidRoot(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -892,7 +892,7 @@ func TestReadFile_WorkspaceNotFound(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
|
||||
WithArgs("ws-nf").
|
||||
@@ -926,7 +926,7 @@ func TestReadFile_FallbackToHost_Success(t *testing.T) {
|
||||
os.MkdirAll(tmplDir, 0755)
|
||||
os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte("name: Reader Agent\ntier: 1\n"), 0644)
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
|
||||
// instance_id="" → SaaS branch skipped → falls through to local
|
||||
// Docker / template-dir host fallback (the only path the test
|
||||
@@ -967,7 +967,7 @@ func TestReadFile_FallbackToHost_NotFound(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
|
||||
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
|
||||
WithArgs("ws-nofile").
|
||||
@@ -999,7 +999,7 @@ func TestWriteFile_PathTraversal(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -1023,7 +1023,7 @@ func TestWriteFile_InvalidBody(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -1046,7 +1046,7 @@ func TestWriteFile_WorkspaceNotFound(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
|
||||
WithArgs("ws-wf-nf").
|
||||
@@ -1080,7 +1080,7 @@ func TestDeleteFile_PathTraversal(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -1101,7 +1101,7 @@ func TestDeleteFile_WorkspaceNotFound(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
WithArgs("ws-del-nf").
|
||||
@@ -1133,7 +1133,7 @@ func TestResolveTemplateDir_ByNormalizedName(t *testing.T) {
|
||||
tmplDir := filepath.Join(tmpDir, "my-agent")
|
||||
os.MkdirAll(tmplDir, 0755)
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
result := handler.resolveTemplateDir("My Agent")
|
||||
|
||||
if result != tmplDir {
|
||||
@@ -1143,7 +1143,7 @@ func TestResolveTemplateDir_ByNormalizedName(t *testing.T) {
|
||||
|
||||
func TestResolveTemplateDir_NotFound(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
result := handler.resolveTemplateDir("Nonexistent Agent")
|
||||
|
||||
if result != "" {
|
||||
@@ -1177,7 +1177,7 @@ func TestCWE78_DeleteFile_TraversalVariants(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil)
|
||||
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
|
||||
@@ -148,15 +148,15 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
|
||||
id := uuid.New().String()
|
||||
awarenessNamespace := workspaceAwarenessNamespace(id)
|
||||
if payload.Tier == 0 {
|
||||
// Default to T3 ("Privileged"). T3 gives agents a read_write
|
||||
// workspace mount + Docker daemon access — the level most
|
||||
// templates need to do real work. Lower tiers (T1 sandboxed,
|
||||
// T2 standard) stay available as explicit opt-ins for
|
||||
// low-trust agents. Matches the Canvas CreateWorkspaceDialog
|
||||
// default for self-hosted hosts (SaaS defaults to T4 via
|
||||
// CreateWorkspaceDialog because each SaaS workspace runs on
|
||||
// its own sibling EC2).
|
||||
payload.Tier = 3
|
||||
// SaaS-aware default. SaaS → T4 (full host access; each
|
||||
// workspace runs on its own sibling EC2 so the tier boundary
|
||||
// is a Docker resource limit on the only container present —
|
||||
// no neighbour to protect from). Self-hosted → T3 (read-write
|
||||
// workspace mount + Docker daemon access, most templates'
|
||||
// baseline). Lower tiers (T1 sandboxed, T2 standard) remain
|
||||
// explicit opt-ins for low-trust agents. Matches the canvas
|
||||
// CreateWorkspaceDialog defaults so the API and the UI agree.
|
||||
payload.Tier = h.DefaultTier()
|
||||
}
|
||||
|
||||
// Detect runtime + default model from template config.yaml when the
|
||||
|
||||
@@ -35,6 +35,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog"
|
||||
)
|
||||
|
||||
// HasProvisioner reports whether either backend (CP or local Docker) is
|
||||
@@ -49,6 +50,32 @@ func (h *WorkspaceHandler) HasProvisioner() bool {
|
||||
return h.cpProv != nil || h.provisioner != nil
|
||||
}
|
||||
|
||||
// IsSaaS reports whether the CP (EC2) provisioner is wired. Each SaaS
|
||||
// workspace runs on its own sibling EC2, so the per-workspace tier
|
||||
// boundary is a Docker resource limit applied to the only container
|
||||
// on that EC2 — there's no neighbour to protect from. Self-hosted
|
||||
// runs many workspaces in one Docker daemon on a single host, so
|
||||
// the tier-2-by-default safe-neighbour-share posture stays.
|
||||
//
|
||||
// Tier defaults across Create / OrgImport / canvas EmptyState branch
|
||||
// on IsSaaS so SaaS users get T4 (full host access) by default and
|
||||
// self-hosted users keep the lower-trust caps.
|
||||
func (h *WorkspaceHandler) IsSaaS() bool {
|
||||
return h.cpProv != nil
|
||||
}
|
||||
|
||||
// DefaultTier is the SaaS-aware default tier. T4 on SaaS (single
|
||||
// container per EC2 — full host access matches the boundary), T3 on
|
||||
// self-hosted (read-write workspace mount + Docker daemon access,
|
||||
// most templates' baseline). Callers default to this when the user
|
||||
// hasn't explicitly picked a tier.
|
||||
func (h *WorkspaceHandler) DefaultTier() int {
|
||||
if h.IsSaaS() {
|
||||
return 4
|
||||
}
|
||||
return 3
|
||||
}
|
||||
|
||||
// provisionWorkspaceAuto picks the backend (CP for SaaS, local Docker
|
||||
// for self-hosted) and starts provisioning in a goroutine. Returns true
|
||||
// when a backend was kicked off, false when neither is wired.
|
||||
@@ -75,6 +102,14 @@ func (h *WorkspaceHandler) HasProvisioner() bool {
|
||||
// lives in prepareProvisionContext (shared by both per-backend
|
||||
// goroutines).
|
||||
func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
|
||||
provlog.Event("provision.start", map[string]any{
|
||||
"workspace_id": workspaceID,
|
||||
"name": payload.Name,
|
||||
"tier": payload.Tier,
|
||||
"runtime": payload.Runtime,
|
||||
"template": payload.Template,
|
||||
"sync": false,
|
||||
})
|
||||
if h.cpProv != nil {
|
||||
go h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
|
||||
return true
|
||||
@@ -110,6 +145,14 @@ func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath stri
|
||||
// Keep these two helpers in sync — when one grows a new arm (third
|
||||
// backend, retry semantics), the other should too.
|
||||
func (h *WorkspaceHandler) provisionWorkspaceAutoSync(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
|
||||
provlog.Event("provision.start", map[string]any{
|
||||
"workspace_id": workspaceID,
|
||||
"name": payload.Name,
|
||||
"tier": payload.Tier,
|
||||
"runtime": payload.Runtime,
|
||||
"template": payload.Template,
|
||||
"sync": true,
|
||||
})
|
||||
if h.cpProv != nil {
|
||||
h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
|
||||
return true
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
@@ -431,6 +432,16 @@ func coalesceRestart(workspaceID string, cycle func()) {
|
||||
// NPE'd before reaching the reprovision step — which is why every SaaS dead-
|
||||
// agent incident pre-this-fix required manual restart from canvas.
|
||||
func (h *WorkspaceHandler) stopForRestart(ctx context.Context, workspaceID string) {
|
||||
backend := "none"
|
||||
if h.provisioner != nil {
|
||||
backend = "docker"
|
||||
} else if h.cpProv != nil {
|
||||
backend = "cp"
|
||||
}
|
||||
provlog.Event("restart.pre_stop", map[string]any{
|
||||
"workspace_id": workspaceID,
|
||||
"backend": backend,
|
||||
})
|
||||
if h.provisioner != nil {
|
||||
h.provisioner.Stop(ctx, workspaceID)
|
||||
return
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/token"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestINSERTworkspacesAllowlist enumerates every function in this
|
||||
// package that emits an `INSERT INTO workspaces (` SQL literal, and
|
||||
// pins the result against an explicit allowlist. New entries fail the
|
||||
// build until a reviewer adds them — forcing the question "what
|
||||
// makes this INSERT idempotent?" at PR-review time, not after the
|
||||
// next bulk-create leak.
|
||||
//
|
||||
// Pairs with TestCreateWorkspaceTree_CallsLookupBeforeInsert (the
|
||||
// behavior pin for the one bulk path). Together they close the
|
||||
// regression class: this test catches "did a new function start
|
||||
// inserting workspaces?", that test catches "did the existing bulk
|
||||
// path drop its idempotency check?". Either fires immediately when
|
||||
// drift happens.
|
||||
//
|
||||
// Why allowlist rather than pure behavior gate (per memory
|
||||
// feedback_behavior_based_ast_gates.md): the bulk-create leak class
|
||||
// is small + stable (1 path today), and a behavior gate would have
|
||||
// to disambiguate "iterating a YAML array of workspaces" from the
|
||||
// many other `for ... range` patterns in a Create handler (config
|
||||
// lines, secrets map, channels). Type-info-aware AST analysis would
|
||||
// catch the YAML-iteration shape but is heavy. Allowlisting is the
|
||||
// minimum-viable pin: any PR that adds a new INSERT site is forced
|
||||
// to pause, add an entry here, and document the safety mechanism in
|
||||
// the comment alongside.
|
||||
//
|
||||
// RFC #2867 class 1.
|
||||
func TestINSERTworkspacesAllowlist(t *testing.T) {
|
||||
// expected[key] = safety mechanism. Keep the comment pinned to
|
||||
// what makes that function safe — if the safety changes, the
|
||||
// allowlist must be re-reviewed.
|
||||
expected := map[string]string{
|
||||
// org_import.createWorkspaceTree: lookupExistingChild
|
||||
// before INSERT (#2868 phase 3). Also pinned by
|
||||
// TestCreateWorkspaceTree_CallsLookupBeforeInsert.
|
||||
"org_import.go:createWorkspaceTree": "lookup-then-insert via lookupExistingChild",
|
||||
// registry.Register: external workspace registers itself with
|
||||
// its known UUID; INSERT is idempotent via ON CONFLICT (id)
|
||||
// DO UPDATE — re-registration upserts, never duplicates.
|
||||
"registry.go:Register": "ON CONFLICT (id) DO UPDATE",
|
||||
// workspace.Create: single-workspace POST /workspaces from a
|
||||
// human or automation. No iteration; payload describes one
|
||||
// workspace; UUID is server-generated. Caller intent IS to
|
||||
// create, so no idempotency check is needed.
|
||||
"workspace.go:Create": "single-workspace POST, server-generated UUID",
|
||||
}
|
||||
|
||||
actual := map[string]string{}
|
||||
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
|
||||
entries, err := os.ReadDir(wd)
|
||||
if err != nil {
|
||||
t.Fatalf("readdir %s: %v", wd, err)
|
||||
}
|
||||
for _, ent := range entries {
|
||||
name := ent.Name()
|
||||
if ent.IsDir() {
|
||||
continue
|
||||
}
|
||||
if !strings.HasSuffix(name, ".go") {
|
||||
continue
|
||||
}
|
||||
if strings.HasSuffix(name, "_test.go") {
|
||||
continue
|
||||
}
|
||||
path := filepath.Join(wd, name)
|
||||
fset := token.NewFileSet()
|
||||
file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse %s: %v", path, err)
|
||||
}
|
||||
// For each top-level FuncDecl, walk its body and check for an
|
||||
// `INSERT INTO workspaces (` SQL literal in any CallExpr arg.
|
||||
for _, decl := range file.Decls {
|
||||
fn, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fn.Body == nil {
|
||||
continue
|
||||
}
|
||||
var foundInsert bool
|
||||
ast.Inspect(fn.Body, func(n ast.Node) bool {
|
||||
lit, ok := n.(*ast.BasicLit)
|
||||
if !ok || lit.Kind != token.STRING {
|
||||
return true
|
||||
}
|
||||
raw := lit.Value
|
||||
if unq, err := strconv.Unquote(raw); err == nil {
|
||||
raw = unq
|
||||
}
|
||||
if workspacesInsertRE.MatchString(raw) {
|
||||
foundInsert = true
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
if foundInsert {
|
||||
key := name + ":" + fn.Name.Name
|
||||
actual[key] = "(observed via AST walk)"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute set diffs so failures point at the specific drift.
|
||||
missing := []string{}
|
||||
unexpected := []string{}
|
||||
for k := range expected {
|
||||
if _, ok := actual[k]; !ok {
|
||||
missing = append(missing, k)
|
||||
}
|
||||
}
|
||||
for k := range actual {
|
||||
if _, ok := expected[k]; !ok {
|
||||
unexpected = append(unexpected, k)
|
||||
}
|
||||
}
|
||||
sort.Strings(missing)
|
||||
sort.Strings(unexpected)
|
||||
|
||||
if len(unexpected) > 0 {
|
||||
t.Errorf(`new function(s) emit `+"`INSERT INTO workspaces (`"+` and aren't in the allowlist:
|
||||
%s
|
||||
|
||||
If this is a legitimate addition, add an entry to expected[] in this test
|
||||
with the safety mechanism pinned in the comment alongside (lookup-then-
|
||||
insert / ON CONFLICT / single-workspace path / etc.). The bulk-create
|
||||
regression class needs explicit per-handler review, not silent drift.
|
||||
|
||||
Reference: RFC #2867 class 1, sibling test
|
||||
TestCreateWorkspaceTree_CallsLookupBeforeInsert.`,
|
||||
strings.Join(unexpected, "\n "))
|
||||
}
|
||||
if len(missing) > 0 {
|
||||
t.Errorf(`expected function(s) no longer emit `+"`INSERT INTO workspaces (`"+`:
|
||||
%s
|
||||
|
||||
Either the function was renamed/deleted (update the allowlist) or the
|
||||
INSERT was moved out (verify the new home is also covered). Don't just
|
||||
delete the entry — confirm the safety mechanism is still in place
|
||||
elsewhere or that the workspace-create path was intentionally
|
||||
restructured.`,
|
||||
strings.Join(missing, "\n "))
|
||||
}
|
||||
}
|
||||
@@ -5,14 +5,15 @@
|
||||
//
|
||||
// Exposed metrics:
|
||||
//
|
||||
// molecule_http_requests_total{method,path,status} - counter
|
||||
// molecule_http_request_duration_seconds{method,path} - counter (sum, for avg rate)
|
||||
// molecule_websocket_connections_active - gauge
|
||||
// go_goroutines - gauge
|
||||
// go_memstats_alloc_bytes - gauge
|
||||
// go_memstats_sys_bytes - gauge
|
||||
// go_memstats_heap_inuse_bytes - gauge
|
||||
// go_gc_duration_seconds_total - counter
|
||||
// molecule_http_requests_total{method,path,status} - counter
|
||||
// molecule_http_request_duration_seconds{method,path} - counter (sum, for avg rate)
|
||||
// molecule_websocket_connections_active - gauge
|
||||
// molecule_pending_uploads_swept_total{outcome} - counter (acked|expired|error)
|
||||
// go_goroutines - gauge
|
||||
// go_memstats_alloc_bytes - gauge
|
||||
// go_memstats_sys_bytes - gauge
|
||||
// go_memstats_heap_inuse_bytes - gauge
|
||||
// go_gc_duration_seconds_total - counter
|
||||
package metrics
|
||||
|
||||
import (
|
||||
@@ -38,6 +39,12 @@ var (
|
||||
reqCounts = map[reqKey]int64{} // molecule_http_requests_total
|
||||
reqDurSums = map[reqKey]float64{} // sum of durations (seconds)
|
||||
activeWSConns int64 // molecule_websocket_connections_active
|
||||
|
||||
// pendinguploads sweeper counters — atomic so the sweeper goroutine
|
||||
// doesn't contend with the /metrics handler.
|
||||
pendingUploadsSweptAcked int64 // molecule_pending_uploads_swept_total{outcome="acked"}
|
||||
pendingUploadsSweptExpired int64 // molecule_pending_uploads_swept_total{outcome="expired"}
|
||||
pendingUploadsSweepErrors int64 // molecule_pending_uploads_swept_total{outcome="error"}
|
||||
)
|
||||
|
||||
// Middleware records per-request counts and latency.
|
||||
@@ -76,6 +83,50 @@ func TrackWSConnect() { atomic.AddInt64(&activeWSConns, 1) }
|
||||
// Call from the WebSocket disconnect / cleanup path.
|
||||
func TrackWSDisconnect() { atomic.AddInt64(&activeWSConns, -1) }
|
||||
|
||||
// phantomBusyResets is the cumulative count of workspace rows the
|
||||
// phantom-busy sweep reset (active_tasks=0 → active_tasks=0+counter
|
||||
// cleared). Surfaced as molecule_phantom_busy_resets_total — a high
|
||||
// reset rate signals a regression in task-lifecycle accounting (most
|
||||
// often: missing env vars cause claude --print to time out, the
|
||||
// agent loop never decrements active_tasks, and the sweep cleans up
|
||||
// the counter ~10 min later). Issue #2865.
|
||||
var phantomBusyResets int64
|
||||
|
||||
// TrackPhantomBusyReset increments the phantom-busy reset counter.
|
||||
// Called from sweepPhantomBusy in workspace-server/internal/scheduler/
|
||||
// after each row whose active_tasks was reset to 0. Idempotent +
|
||||
// goroutine-safe; called once per row per sweep tick.
|
||||
func TrackPhantomBusyReset() { atomic.AddInt64(&phantomBusyResets, 1) }
|
||||
|
||||
// PendingUploadsSwept records a successful sweep cycle. acked/expired
|
||||
// are added to the per-outcome counters so dashboards can spot the
|
||||
// stuck-fetch pattern (high expired, low acked) vs healthy churn.
|
||||
func PendingUploadsSwept(acked, expired int) {
|
||||
if acked > 0 {
|
||||
atomic.AddInt64(&pendingUploadsSweptAcked, int64(acked))
|
||||
}
|
||||
if expired > 0 {
|
||||
atomic.AddInt64(&pendingUploadsSweptExpired, int64(expired))
|
||||
}
|
||||
}
|
||||
|
||||
// PendingUploadsSweepError records a sweeper-cycle failure (transient
|
||||
// DB error etc). Counted separately so the rate of errored sweeps is
|
||||
// observable independent of how many rows the successful sweeps deleted.
|
||||
func PendingUploadsSweepError() {
|
||||
atomic.AddInt64(&pendingUploadsSweepErrors, 1)
|
||||
}
|
||||
|
||||
// PendingUploadsSweepCounts returns the current (acked, expired, error)
|
||||
// totals. Exposed for tests that need a deterministic delta probe of
|
||||
// the sweeper's metric writes — the /metrics endpoint is the production
|
||||
// observability surface; this is a unit-test escape hatch.
|
||||
func PendingUploadsSweepCounts() (acked, expired, errored int64) {
|
||||
return atomic.LoadInt64(&pendingUploadsSweptAcked),
|
||||
atomic.LoadInt64(&pendingUploadsSweptExpired),
|
||||
atomic.LoadInt64(&pendingUploadsSweepErrors)
|
||||
}
|
||||
|
||||
// Handler returns a Gin handler that serialises all collected metrics in
|
||||
// Prometheus text exposition format (v0.0.4). Mount this at GET /metrics.
|
||||
func Handler() gin.HandlerFunc {
|
||||
@@ -144,6 +195,21 @@ func Handler() gin.HandlerFunc {
|
||||
writeln(w, "# HELP molecule_websocket_connections_active Number of active WebSocket connections.")
|
||||
writeln(w, "# TYPE molecule_websocket_connections_active gauge")
|
||||
fmt.Fprintf(w, "molecule_websocket_connections_active %d\n", atomic.LoadInt64(&activeWSConns))
|
||||
|
||||
// ── Molecule AI scheduler ──────────────────────────────────────────────
|
||||
writeln(w, "# HELP molecule_phantom_busy_resets_total Cumulative count of workspace rows reset by the phantom-busy sweep (active_tasks cleared after >10 min of activity_log silence). High reset rate signals task-lifecycle accounting regressions — see issue #2865.")
|
||||
writeln(w, "# TYPE molecule_phantom_busy_resets_total counter")
|
||||
fmt.Fprintf(w, "molecule_phantom_busy_resets_total %d\n", atomic.LoadInt64(&phantomBusyResets))
|
||||
|
||||
// ── Pending-uploads sweeper ────────────────────────────────────────────
|
||||
writeln(w, "# HELP molecule_pending_uploads_swept_total Pending-uploads rows deleted by the GC sweeper, by outcome.")
|
||||
writeln(w, "# TYPE molecule_pending_uploads_swept_total counter")
|
||||
fmt.Fprintf(w, "molecule_pending_uploads_swept_total{outcome=\"acked\"} %d\n",
|
||||
atomic.LoadInt64(&pendingUploadsSweptAcked))
|
||||
fmt.Fprintf(w, "molecule_pending_uploads_swept_total{outcome=\"expired\"} %d\n",
|
||||
atomic.LoadInt64(&pendingUploadsSweptExpired))
|
||||
fmt.Fprintf(w, "molecule_pending_uploads_swept_total{outcome=\"error\"} %d\n",
|
||||
atomic.LoadInt64(&pendingUploadsSweepErrors))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
package metrics
|
||||
|
||||
// Tests for the phantom-busy reset counter wired up by issue #2865.
|
||||
// The counter is exposed at /metrics as
|
||||
// molecule_phantom_busy_resets_total. A high steady-state value
|
||||
// signals task-lifecycle accounting regressions in the agent loop —
|
||||
// see scheduler.sweepPhantomBusy for the writer.
|
||||
|
||||
import (
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// resetForTest zeroes the counter so a single test's TrackPhantomBusyReset
|
||||
// calls don't compound onto a previous test's run. metrics.go's package-
|
||||
// level state means every test that touches the counter must reset.
|
||||
func resetForTest() {
|
||||
atomic.StoreInt64(&phantomBusyResets, 0)
|
||||
}
|
||||
|
||||
func TestTrackPhantomBusyReset_IncrementsCounter(t *testing.T) {
|
||||
resetForTest()
|
||||
for i := 0; i < 7; i++ {
|
||||
TrackPhantomBusyReset()
|
||||
}
|
||||
got := atomic.LoadInt64(&phantomBusyResets)
|
||||
if got != 7 {
|
||||
t.Errorf("counter after 7 calls = %d, want 7", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTrackPhantomBusyReset_RaceFreeUnderConcurrentWrites(t *testing.T) {
|
||||
resetForTest()
|
||||
var wg sync.WaitGroup
|
||||
const goroutines = 50
|
||||
const callsPerGoroutine = 200
|
||||
wg.Add(goroutines)
|
||||
for i := 0; i < goroutines; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for j := 0; j < callsPerGoroutine; j++ {
|
||||
TrackPhantomBusyReset()
|
||||
}
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
want := int64(goroutines * callsPerGoroutine)
|
||||
got := atomic.LoadInt64(&phantomBusyResets)
|
||||
if got != want {
|
||||
t.Errorf("counter under concurrent writes = %d, want %d (lost increments → atomic broken)",
|
||||
got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_ExposesPhantomBusyResetsCounter(t *testing.T) {
|
||||
resetForTest()
|
||||
for i := 0; i < 3; i++ {
|
||||
TrackPhantomBusyReset()
|
||||
}
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
r := gin.New()
|
||||
r.GET("/metrics", Handler())
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
req := httptest.NewRequest("GET", "/metrics", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
body := w.Body.String()
|
||||
// HELP + TYPE lines must precede the metric (Prometheus text exposition format).
|
||||
if !strings.Contains(body, "# HELP molecule_phantom_busy_resets_total") {
|
||||
t.Errorf("metrics output missing HELP line for molecule_phantom_busy_resets_total:\n%s", body)
|
||||
}
|
||||
if !strings.Contains(body, "# TYPE molecule_phantom_busy_resets_total counter") {
|
||||
t.Errorf("metrics output missing TYPE line for molecule_phantom_busy_resets_total:\n%s", body)
|
||||
}
|
||||
if !strings.Contains(body, "molecule_phantom_busy_resets_total 3\n") {
|
||||
t.Errorf("metrics output missing counter value 3:\n%s", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_PhantomBusyResetsZeroByDefault(t *testing.T) {
|
||||
// Fresh process should report 0 — pin the contract so a future
|
||||
// refactor that lazy-inits the counter to nil doesn't silently
|
||||
// drop the metric from /metrics.
|
||||
resetForTest()
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
r := gin.New()
|
||||
r.GET("/metrics", Handler())
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
req := httptest.NewRequest("GET", "/metrics", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if !strings.Contains(w.Body.String(), "molecule_phantom_busy_resets_total 0\n") {
|
||||
t.Errorf("metric must report 0 by default:\n%s", w.Body.String())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
package pendinguploads
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
)
|
||||
|
||||
// StartSweeperWithIntervalForTest exposes startSweeperWithInterval to
|
||||
// the external test package. The production code uses StartSweeper
|
||||
// (which pins the canonical SweepInterval); tests pin a short interval
|
||||
// to exercise the ticker-driven cycle without burning real wall-clock
|
||||
// time. The Go convention `export_test.go` keeps this seam OUT of the
|
||||
// production binary — files ending in _test.go are stripped at build
|
||||
// time, so this re-export only exists during `go test`.
|
||||
func StartSweeperWithIntervalForTest(ctx context.Context, storage Storage, ackRetention, interval time.Duration) {
|
||||
startSweeperWithInterval(ctx, storage, ackRetention, interval)
|
||||
}
|
||||
@@ -72,6 +72,28 @@ type Record struct {
|
||||
ExpiresAt time.Time
|
||||
}
|
||||
|
||||
// SweepResult is the per-cycle accounting from Sweep. Both counts are
|
||||
// non-negative; Total is just Acked + Expired for log/metrics
|
||||
// convenience. Phase 3 metrics expose these as separate counters so
|
||||
// dashboards can spot a stuck-ack pattern (high Expired, low Acked) vs.
|
||||
// healthy churn (Acked dominates).
|
||||
type SweepResult struct {
|
||||
Acked int // rows deleted because acked_at + retention elapsed
|
||||
Expired int // rows deleted because expires_at < now AND never acked
|
||||
}
|
||||
|
||||
// Total returns the sum of Acked + Expired — convenient for log lines.
|
||||
func (r SweepResult) Total() int { return r.Acked + r.Expired }
|
||||
|
||||
// PutItem is one file in a PutBatch call. Same per-field rules as Put —
|
||||
// empty content, missing filename, or content > MaxFileBytes is rejected
|
||||
// up-front so a bad item in the batch doesn't poison the transaction.
|
||||
type PutItem struct {
|
||||
Content []byte
|
||||
Filename string
|
||||
Mimetype string
|
||||
}
|
||||
|
||||
// Storage is the platform-side persistence boundary for poll-mode chat
|
||||
// uploads. The Postgres implementation backs all callers today; an S3-
|
||||
// backed implementation can drop in once RFC #2789 lands by making
|
||||
@@ -86,6 +108,17 @@ type Storage interface {
|
||||
// content > MaxFileBytes return errors before any DB write.
|
||||
Put(ctx context.Context, workspaceID uuid.UUID, content []byte, filename, mimetype string) (uuid.UUID, error)
|
||||
|
||||
// PutBatch inserts N uploads atomically — either all rows commit or
|
||||
// none do. Returns assigned file_ids in input order on success;
|
||||
// returns an error and does NOT insert any row on failure.
|
||||
//
|
||||
// Use this from multi-file upload handlers so a per-row failure on
|
||||
// row K doesn't leave rows 1..K-1 orphaned in the table (a client
|
||||
// retry would then double-insert them on success). All-or-nothing
|
||||
// semantics match the multipart request the canvas sends — either
|
||||
// the whole batch succeeds or the user re-uploads.
|
||||
PutBatch(ctx context.Context, workspaceID uuid.UUID, items []PutItem) ([]uuid.UUID, error)
|
||||
|
||||
// Get returns the full row including content. Returns ErrNotFound
|
||||
// when the row is absent, acked, or past expires_at. Caller should
|
||||
// not differentiate the three cases in the response — from the
|
||||
@@ -103,6 +136,18 @@ type Storage interface {
|
||||
// absent or already expired; on already-acked, returns nil so
|
||||
// the workspace's at-least-once retry succeeds without an error.
|
||||
Ack(ctx context.Context, fileID uuid.UUID) error
|
||||
|
||||
// Sweep deletes rows past their retention window:
|
||||
// - acked rows older than ackRetention (give the workspace a
|
||||
// window to re-fetch in case it processed but failed to write
|
||||
// the file before crashing — at-least-once behavior).
|
||||
// - unacked rows past expires_at (the platform's hard TTL — 24h
|
||||
// by default; a workspace that hasn't fetched by then is
|
||||
// considered dead from the upload's perspective).
|
||||
// Returns the per-category deletion counts for observability.
|
||||
// Errors are surfaced to the caller; a transient DB error must NOT
|
||||
// crash the sweeper loop (it just retries on the next tick).
|
||||
Sweep(ctx context.Context, ackRetention time.Duration) (SweepResult, error)
|
||||
}
|
||||
|
||||
// PostgresStorage is the production Storage implementation backed by
|
||||
@@ -149,6 +194,64 @@ func (p *PostgresStorage) Put(ctx context.Context, workspaceID uuid.UUID, conten
|
||||
return fileID, nil
|
||||
}
|
||||
|
||||
// PutBatch inserts every item atomically inside a single Tx. On any
|
||||
// per-item validation or per-row INSERT error the Tx is rolled back and
|
||||
// the caller sees the error without any rows committed — no partial
|
||||
// orphans for a multi-file upload that fails mid-batch.
|
||||
//
|
||||
// Validation runs BEFORE BEGIN so a bad input shape (empty content,
|
||||
// over-cap size) doesn't even open a Tx. Once we're in the Tx, the only
|
||||
// failures expected are DB-side (broken connection, statement timeout)
|
||||
// — those abort cleanly via Rollback.
|
||||
func (p *PostgresStorage) PutBatch(ctx context.Context, workspaceID uuid.UUID, items []PutItem) ([]uuid.UUID, error) {
|
||||
if len(items) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
for i, it := range items {
|
||||
if len(it.Content) == 0 {
|
||||
return nil, fmt.Errorf("pendinguploads: item %d: empty content", i)
|
||||
}
|
||||
if len(it.Content) > MaxFileBytes {
|
||||
return nil, ErrTooLarge
|
||||
}
|
||||
if it.Filename == "" {
|
||||
return nil, fmt.Errorf("pendinguploads: item %d: empty filename", i)
|
||||
}
|
||||
if len(it.Filename) > 100 {
|
||||
return nil, fmt.Errorf("pendinguploads: item %d: filename exceeds 100 chars", i)
|
||||
}
|
||||
}
|
||||
|
||||
tx, err := p.db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("pendinguploads: begin tx: %w", err)
|
||||
}
|
||||
// Defer-rollback is safe even after a successful Commit — the second
|
||||
// Rollback is a no-op (database/sql tracks tx state).
|
||||
defer func() {
|
||||
_ = tx.Rollback()
|
||||
}()
|
||||
|
||||
out := make([]uuid.UUID, 0, len(items))
|
||||
for i, it := range items {
|
||||
var fid uuid.UUID
|
||||
err := tx.QueryRowContext(ctx, `
|
||||
INSERT INTO pending_uploads (workspace_id, content, size_bytes, filename, mimetype)
|
||||
VALUES ($1, $2, $3, $4, $5)
|
||||
RETURNING file_id
|
||||
`, workspaceID, it.Content, int64(len(it.Content)), it.Filename, it.Mimetype).Scan(&fid)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("pendinguploads: batch insert item %d: %w", i, err)
|
||||
}
|
||||
out = append(out, fid)
|
||||
}
|
||||
|
||||
if err := tx.Commit(); err != nil {
|
||||
return nil, fmt.Errorf("pendinguploads: commit batch: %w", err)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (p *PostgresStorage) Get(ctx context.Context, fileID uuid.UUID) (Record, error) {
|
||||
// The expires_at + acked_at filter in the WHERE clause means a
|
||||
// caller sees ErrNotFound for absent / acked / expired without
|
||||
@@ -251,3 +354,41 @@ func (p *PostgresStorage) Ack(ctx context.Context, fileID uuid.UUID) error {
|
||||
// the workspace's intent ("I'm done with this file") was honored.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sweep deletes acked rows past their retention window plus any
|
||||
// unacked rows whose hard TTL has elapsed. Single round-trip: a CTE
|
||||
// captures the deletion in one DELETE … RETURNING and the outer
|
||||
// SELECT sums by category. Cheaper and tighter than two round trips,
|
||||
// and atomic w.r.t. concurrent writes (the WHERE predicate sees a
|
||||
// consistent snapshot via Postgres MVCC).
|
||||
//
|
||||
// ackRetention=0 deletes all acked rows immediately; values <0 are
|
||||
// clamped to 0 for safety. Caller defaults are documented at
|
||||
// StartSweeper's DefaultAckRetention.
|
||||
func (p *PostgresStorage) Sweep(ctx context.Context, ackRetention time.Duration) (SweepResult, error) {
|
||||
if ackRetention < 0 {
|
||||
ackRetention = 0
|
||||
}
|
||||
// make_interval expects integer seconds — Postgres accepts a
|
||||
// floating point but we deliberately round to the nearest second
|
||||
// so test fixtures pin a deterministic value across PG versions.
|
||||
retentionSecs := int64(ackRetention.Seconds())
|
||||
|
||||
var acked, expired int
|
||||
err := p.db.QueryRowContext(ctx, `
|
||||
WITH deleted AS (
|
||||
DELETE FROM pending_uploads
|
||||
WHERE (acked_at IS NOT NULL AND acked_at < now() - make_interval(secs => $1))
|
||||
OR (acked_at IS NULL AND expires_at < now())
|
||||
RETURNING (acked_at IS NOT NULL) AS was_acked
|
||||
)
|
||||
SELECT
|
||||
COALESCE(SUM(CASE WHEN was_acked THEN 1 ELSE 0 END), 0)::int AS acked,
|
||||
COALESCE(SUM(CASE WHEN NOT was_acked THEN 1 ELSE 0 END), 0)::int AS expired
|
||||
FROM deleted
|
||||
`, retentionSecs).Scan(&acked, &expired)
|
||||
if err != nil {
|
||||
return SweepResult{}, fmt.Errorf("pendinguploads: sweep: %w", err)
|
||||
}
|
||||
return SweepResult{Acked: acked, Expired: expired}, nil
|
||||
}
|
||||
|
||||
@@ -71,6 +71,18 @@ const (
|
||||
SELECT acked_at FROM pending_uploads
|
||||
WHERE file_id = $1 AND expires_at > now()
|
||||
`
|
||||
sweepSQL = `
|
||||
WITH deleted AS (
|
||||
DELETE FROM pending_uploads
|
||||
WHERE (acked_at IS NOT NULL AND acked_at < now() - make_interval(secs => $1))
|
||||
OR (acked_at IS NULL AND expires_at < now())
|
||||
RETURNING (acked_at IS NOT NULL) AS was_acked
|
||||
)
|
||||
SELECT
|
||||
COALESCE(SUM(CASE WHEN was_acked THEN 1 ELSE 0 END), 0)::int AS acked,
|
||||
COALESCE(SUM(CASE WHEN NOT was_acked THEN 1 ELSE 0 END), 0)::int AS expired
|
||||
FROM deleted
|
||||
`
|
||||
)
|
||||
|
||||
// ----- Put ------------------------------------------------------------------
|
||||
@@ -398,3 +410,324 @@ func TestAck_DBErrorOnDisambiguate_Wrapped(t *testing.T) {
|
||||
t.Fatalf("expected wrapped disambiguate error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ----- Sweep ----------------------------------------------------------------
|
||||
|
||||
func TestSweep_DeletesAckedAndExpired_ReturnsCounts(t *testing.T) {
|
||||
db, mock := newMockDB(t)
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
mock.ExpectQuery(sweepSQL).
|
||||
WithArgs(int64(3600)). // 1h retention
|
||||
WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(7, 2))
|
||||
|
||||
res, err := store.Sweep(context.Background(), time.Hour)
|
||||
if err != nil {
|
||||
t.Fatalf("Sweep: %v", err)
|
||||
}
|
||||
if res.Acked != 7 || res.Expired != 2 || res.Total() != 9 {
|
||||
t.Errorf("got %+v want acked=7 expired=2 total=9", res)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSweep_NothingToDelete_ReturnsZero(t *testing.T) {
|
||||
db, mock := newMockDB(t)
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
mock.ExpectQuery(sweepSQL).
|
||||
WithArgs(int64(3600)).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(0, 0))
|
||||
|
||||
res, err := store.Sweep(context.Background(), time.Hour)
|
||||
if err != nil {
|
||||
t.Fatalf("Sweep: %v", err)
|
||||
}
|
||||
if res.Total() != 0 {
|
||||
t.Errorf("got %+v, want zero result", res)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSweep_NegativeRetentionClampedToZero(t *testing.T) {
|
||||
db, mock := newMockDB(t)
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
// Negative retention must clamp to 0; the SQL gets `secs => 0` so an
|
||||
// acked-just-now row is eligible for deletion immediately. Pinned
|
||||
// here because passing the raw negative through `make_interval` would
|
||||
// silently shift acked_at → future and effectively retain rows
|
||||
// forever — exactly the wrong behavior for a "delete more aggressively"
|
||||
// caller.
|
||||
mock.ExpectQuery(sweepSQL).
|
||||
WithArgs(int64(0)).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(3, 0))
|
||||
|
||||
res, err := store.Sweep(context.Background(), -1*time.Second)
|
||||
if err != nil {
|
||||
t.Fatalf("Sweep: %v", err)
|
||||
}
|
||||
if res.Acked != 3 {
|
||||
t.Errorf("got %+v want acked=3", res)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSweep_ZeroRetentionImmediatelyDeletesAcked(t *testing.T) {
|
||||
db, mock := newMockDB(t)
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
mock.ExpectQuery(sweepSQL).
|
||||
WithArgs(int64(0)).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(5, 1))
|
||||
|
||||
res, err := store.Sweep(context.Background(), 0)
|
||||
if err != nil {
|
||||
t.Fatalf("Sweep: %v", err)
|
||||
}
|
||||
if res.Acked != 5 || res.Expired != 1 {
|
||||
t.Errorf("got %+v want acked=5 expired=1", res)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSweep_DBError_Wrapped(t *testing.T) {
|
||||
db, mock := newMockDB(t)
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
mock.ExpectQuery(sweepSQL).
|
||||
WithArgs(int64(60)).
|
||||
WillReturnError(errors.New("connection lost"))
|
||||
|
||||
_, err := store.Sweep(context.Background(), time.Minute)
|
||||
if err == nil || !strings.Contains(err.Error(), "sweep") {
|
||||
t.Fatalf("expected wrapped sweep error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSweepResult_TotalSumsCounts(t *testing.T) {
|
||||
r := pendinguploads.SweepResult{Acked: 4, Expired: 3}
|
||||
if r.Total() != 7 {
|
||||
t.Errorf("Total = %d, want 7", r.Total())
|
||||
}
|
||||
z := pendinguploads.SweepResult{}
|
||||
if z.Total() != 0 {
|
||||
t.Errorf("zero Total = %d, want 0", z.Total())
|
||||
}
|
||||
}
|
||||
|
||||
// ----- PutBatch -------------------------------------------------------------
|
||||
//
|
||||
// PutBatch is the multi-file atomic insert path used by uploadPollMode in
|
||||
// chat_files.go. The contract that callers rely on:
|
||||
//
|
||||
// - Either ALL rows commit, or NONE do — a per-row INSERT failure must
|
||||
// leave the table unchanged (no orphaned rows from a half-applied batch).
|
||||
// - Per-item validation runs BEFORE the Tx opens so a bad input shape
|
||||
// never wastes a BEGIN round-trip.
|
||||
// - Returned []uuid.UUID is in input order — handler maps response back
|
||||
// to the multipart Files[i].
|
||||
//
|
||||
// sqlmock's ExpectBegin / ExpectQuery / ExpectCommit / ExpectRollback let us
|
||||
// pin the exact tx-lifecycle shape; if a future refactor swaps Begin for
|
||||
// BeginTx-with-options, the test fails until we re-pin.
|
||||
|
||||
func TestPutBatch_HappyPath_AllCommitInOrder(t *testing.T) {
|
||||
db, mock := newMockDB(t)
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
wsID := uuid.New()
|
||||
id1, id2, id3 := uuid.New(), uuid.New(), uuid.New()
|
||||
|
||||
mock.ExpectBegin()
|
||||
mock.ExpectQuery(insertSQL).
|
||||
WithArgs(wsID, []byte("aaa"), int64(3), "a.txt", "text/plain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id1))
|
||||
mock.ExpectQuery(insertSQL).
|
||||
WithArgs(wsID, []byte("bbbb"), int64(4), "b.bin", "application/octet-stream").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id2))
|
||||
mock.ExpectQuery(insertSQL).
|
||||
WithArgs(wsID, []byte("ccccc"), int64(5), "c.pdf", "application/pdf").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id3))
|
||||
mock.ExpectCommit()
|
||||
// Rollback after Commit is a no-op in database/sql; sqlmock allows it
|
||||
// when ExpectCommit was already matched, so we don't need to expect it.
|
||||
|
||||
got, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{
|
||||
{Content: []byte("aaa"), Filename: "a.txt", Mimetype: "text/plain"},
|
||||
{Content: []byte("bbbb"), Filename: "b.bin", Mimetype: "application/octet-stream"},
|
||||
{Content: []byte("ccccc"), Filename: "c.pdf", Mimetype: "application/pdf"},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("PutBatch: %v", err)
|
||||
}
|
||||
if len(got) != 3 || got[0] != id1 || got[1] != id2 || got[2] != id3 {
|
||||
t.Errorf("ids out of order or missing: got %v want [%s %s %s]", got, id1, id2, id3)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPutBatch_EmptyItems_NoTxNoError(t *testing.T) {
|
||||
db, _ := newMockDB(t) // zero expectations — must NOT round-trip
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
got, err := store.PutBatch(context.Background(), uuid.New(), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("expected nil error on empty batch, got %v", err)
|
||||
}
|
||||
if got != nil {
|
||||
t.Errorf("expected nil ids on empty batch, got %v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPutBatch_RejectsEmptyContent_NoTx(t *testing.T) {
|
||||
db, _ := newMockDB(t)
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
_, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
|
||||
{Content: []byte("ok"), Filename: "a.txt"},
|
||||
{Content: nil, Filename: "b.txt"},
|
||||
})
|
||||
if err == nil || !strings.Contains(err.Error(), "item 1") || !strings.Contains(err.Error(), "empty content") {
|
||||
t.Fatalf("expected item-1 empty-content error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPutBatch_RejectsOversize_ReturnsErrTooLarge(t *testing.T) {
|
||||
db, _ := newMockDB(t)
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
too := make([]byte, pendinguploads.MaxFileBytes+1)
|
||||
_, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
|
||||
{Content: []byte("ok"), Filename: "small.txt"},
|
||||
{Content: too, Filename: "huge.bin"},
|
||||
})
|
||||
if !errors.Is(err, pendinguploads.ErrTooLarge) {
|
||||
t.Fatalf("expected ErrTooLarge, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPutBatch_RejectsEmptyFilename_NoTx(t *testing.T) {
|
||||
db, _ := newMockDB(t)
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
_, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
|
||||
{Content: []byte("hi"), Filename: ""},
|
||||
})
|
||||
if err == nil || !strings.Contains(err.Error(), "item 0") || !strings.Contains(err.Error(), "empty filename") {
|
||||
t.Fatalf("expected item-0 empty-filename error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPutBatch_RejectsLongFilename_NoTx(t *testing.T) {
|
||||
db, _ := newMockDB(t)
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
long := strings.Repeat("z", 101)
|
||||
_, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
|
||||
{Content: []byte("hi"), Filename: "ok.txt"},
|
||||
{Content: []byte("hi"), Filename: long},
|
||||
})
|
||||
if err == nil || !strings.Contains(err.Error(), "item 1") || !strings.Contains(err.Error(), "exceeds 100 chars") {
|
||||
t.Fatalf("expected item-1 too-long-filename error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPutBatch_BeginTxError_Wrapped(t *testing.T) {
|
||||
db, mock := newMockDB(t)
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
mock.ExpectBegin().WillReturnError(errors.New("conn refused"))
|
||||
|
||||
_, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
|
||||
{Content: []byte("hi"), Filename: "a.txt"},
|
||||
})
|
||||
if err == nil || !strings.Contains(err.Error(), "begin tx") {
|
||||
t.Fatalf("expected wrapped begin-tx error, got %v", err)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPutBatch_RollsBackOnPerRowError_NoCommit(t *testing.T) {
|
||||
// First INSERT succeeds, second errors. PutBatch MUST NOT issue
|
||||
// Commit; the deferred Rollback unwinds row 1 so neither row commits.
|
||||
// This is the contract that prevents orphan rows on a failed batch.
|
||||
db, mock := newMockDB(t)
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
wsID := uuid.New()
|
||||
id1 := uuid.New()
|
||||
|
||||
mock.ExpectBegin()
|
||||
mock.ExpectQuery(insertSQL).
|
||||
WithArgs(wsID, []byte("aaa"), int64(3), "a.txt", "").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id1))
|
||||
mock.ExpectQuery(insertSQL).
|
||||
WithArgs(wsID, []byte("bb"), int64(2), "b.txt", "").
|
||||
WillReturnError(errors.New("statement timeout"))
|
||||
// Critical: Rollback expected, NOT Commit. If a future refactor
|
||||
// accidentally swallows the per-row error and Commits anyway, this
|
||||
// test fails because the unmet ExpectCommit-vs-Rollback shape diverges.
|
||||
mock.ExpectRollback()
|
||||
|
||||
_, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{
|
||||
{Content: []byte("aaa"), Filename: "a.txt"},
|
||||
{Content: []byte("bb"), Filename: "b.txt"},
|
||||
})
|
||||
if err == nil || !strings.Contains(err.Error(), "batch insert item 1") {
|
||||
t.Fatalf("expected wrapped per-row insert error, got %v", err)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("expectations (must rollback, no commit): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPutBatch_RollsBackOnFirstRowError(t *testing.T) {
|
||||
// Edge case: very first INSERT fails. No rows ever staged — but the
|
||||
// Tx still needs to roll back to release the snapshot.
|
||||
db, mock := newMockDB(t)
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
wsID := uuid.New()
|
||||
mock.ExpectBegin()
|
||||
mock.ExpectQuery(insertSQL).
|
||||
WithArgs(wsID, []byte("oops"), int64(4), "a.txt", "").
|
||||
WillReturnError(errors.New("constraint violation"))
|
||||
mock.ExpectRollback()
|
||||
|
||||
_, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{
|
||||
{Content: []byte("oops"), Filename: "a.txt"},
|
||||
})
|
||||
if err == nil || !strings.Contains(err.Error(), "batch insert item 0") {
|
||||
t.Fatalf("expected wrapped item-0 insert error, got %v", err)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPutBatch_CommitError_Wrapped(t *testing.T) {
|
||||
// Commit fails after every INSERT succeeded. Postgres has already
|
||||
// rolled back the Tx by this point; we surface the error so the
|
||||
// handler returns 500 and the client retries.
|
||||
db, mock := newMockDB(t)
|
||||
store := pendinguploads.NewPostgres(db)
|
||||
|
||||
wsID := uuid.New()
|
||||
id1 := uuid.New()
|
||||
mock.ExpectBegin()
|
||||
mock.ExpectQuery(insertSQL).
|
||||
WithArgs(wsID, []byte("hi"), int64(2), "a.txt", "").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id1))
|
||||
mock.ExpectCommit().WillReturnError(errors.New("commit broken"))
|
||||
|
||||
_, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{
|
||||
{Content: []byte("hi"), Filename: "a.txt"},
|
||||
})
|
||||
if err == nil || !strings.Contains(err.Error(), "commit batch") {
|
||||
t.Fatalf("expected wrapped commit error, got %v", err)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,129 @@
|
||||
// sweeper.go — periodic GC for the pending_uploads table.
|
||||
//
|
||||
// The platform's poll-mode chat-upload handler creates a row in
|
||||
// pending_uploads for every chat-attached file the canvas sends to a
|
||||
// poll-mode workspace. The workspace's inbox poller fetches the bytes
|
||||
// and acks the row, but two failure modes leak rows long-term:
|
||||
//
|
||||
// 1. Workspace fetches but never acks (network hiccup between GET
|
||||
// /content and POST /ack; workspace crashed between the two).
|
||||
// Phase 1's Get refuses to re-serve an acked row, but a never-
|
||||
// acked row could in principle be fetched repeatedly until expires_at.
|
||||
// Phase 2's workspace-side fetcher is idempotent; the worry is
|
||||
// only disk usage on the platform side.
|
||||
//
|
||||
// 2. Workspace never fetches at all (workspace was offline when the
|
||||
// row was written; the upload's TTL elapsed).
|
||||
//
|
||||
// This sweeper handles both. It runs every SweepInterval, deletes rows
|
||||
// in either category, and emits structured logs + Prometheus counters
|
||||
// so a stuck-fetch dashboard can spot the leak class.
|
||||
//
|
||||
// Failure isolation: a transient DB error must NOT crash the sweeper.
|
||||
// We log + continue; the next tick retries. ctx cancellation cleanly
|
||||
// shuts the loop down for graceful shutdown.
|
||||
|
||||
package pendinguploads
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics"
|
||||
)
|
||||
|
||||
// SweepInterval is the cadence of the GC loop. 5 minutes is a balance
|
||||
// between "rows reaped quickly enough that disk usage doesn't surprise
|
||||
// anyone" and "we don't pay a DELETE round-trip every 30 seconds when
|
||||
// there are no candidates." Aligned with other low-priority sweepers
|
||||
// (registry/orphan_sweeper runs at 60s but operates on Docker — much
|
||||
// more expensive per cycle than a single indexed DELETE).
|
||||
const SweepInterval = 5 * time.Minute
|
||||
|
||||
// DefaultAckRetention is how long an acked row sticks around before the
|
||||
// sweeper deletes it. 1 hour gives the workspace enough time to retry
|
||||
// the GET if its first fetch crashed mid-write — at-least-once handoff
|
||||
// without leaking content for a full 24h after the workspace already
|
||||
// has a copy.
|
||||
const DefaultAckRetention = 1 * time.Hour
|
||||
|
||||
// sweepDeadline bounds a single sweep cycle. A daemon at the edge of
|
||||
// timeout shouldn't pile up goroutines; 30s is generous for a single
|
||||
// indexed DELETE on a table that should rarely have more than a few
|
||||
// thousand rows in flight.
|
||||
const sweepDeadline = 30 * time.Second
|
||||
|
||||
// StartSweeper runs the GC loop until ctx is cancelled. nil storage
|
||||
// makes the loop a no-op (matches the handlers' tolerance for an
|
||||
// unconfigured pendinguploads — some test harnesses run without the
|
||||
// storage wired).
|
||||
//
|
||||
// Pass ackRetention=0 to use DefaultAckRetention. Negative values are
|
||||
// clamped at the storage layer.
|
||||
//
|
||||
// Production callers use SweepInterval (5m). Tests use a short interval
|
||||
// to exercise the ticker-driven sweep path without burning real wall-
|
||||
// clock time.
|
||||
func StartSweeper(ctx context.Context, storage Storage, ackRetention time.Duration) {
|
||||
startSweeperWithInterval(ctx, storage, ackRetention, SweepInterval)
|
||||
}
|
||||
|
||||
// startSweeperWithInterval is the test-friendly variant of StartSweeper
|
||||
// — same loop, but the cadence is caller-specified. Production code
|
||||
// should use StartSweeper to keep the SweepInterval constant pinned.
|
||||
func startSweeperWithInterval(ctx context.Context, storage Storage, ackRetention, interval time.Duration) {
|
||||
if storage == nil {
|
||||
log.Println("pendinguploads sweeper: storage is nil — sweeper disabled")
|
||||
return
|
||||
}
|
||||
if ackRetention == 0 {
|
||||
ackRetention = DefaultAckRetention
|
||||
}
|
||||
log.Printf(
|
||||
"pendinguploads sweeper started — sweeping every %s; ack retention %s",
|
||||
interval, ackRetention,
|
||||
)
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
// Run once immediately so a platform restart cleans up any rows
|
||||
// that became eligible while we were down — don't make the
|
||||
// operator wait 5 minutes for the first sweep.
|
||||
sweepOnce(ctx, storage, ackRetention)
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Println("pendinguploads sweeper: shutdown")
|
||||
return
|
||||
case <-ticker.C:
|
||||
sweepOnce(ctx, storage, ackRetention)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func sweepOnce(parent context.Context, storage Storage, ackRetention time.Duration) {
|
||||
ctx, cancel := context.WithTimeout(parent, sweepDeadline)
|
||||
defer cancel()
|
||||
|
||||
res, err := storage.Sweep(ctx, ackRetention)
|
||||
if err != nil {
|
||||
// Transient errors: log + continue. The next tick retries; if
|
||||
// the DB is genuinely down, the rest of the platform is also
|
||||
// broken and disk usage is the least of the operator's
|
||||
// problems.
|
||||
log.Printf("pendinguploads sweeper: Sweep failed: %v", err)
|
||||
metrics.PendingUploadsSweepError()
|
||||
return
|
||||
}
|
||||
metrics.PendingUploadsSwept(res.Acked, res.Expired)
|
||||
if res.Total() > 0 {
|
||||
// Per-cycle structured-ish log (one line per cycle that did
|
||||
// something). Quiet by design — most cycles delete zero rows
|
||||
// on a healthy system, and a stream of empty-result lines
|
||||
// would drown the production log without surfacing a signal.
|
||||
log.Printf(
|
||||
"pendinguploads sweeper: deleted acked=%d expired=%d total=%d",
|
||||
res.Acked, res.Expired, res.Total(),
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,294 @@
|
||||
package pendinguploads_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
|
||||
)
|
||||
|
||||
// fakeSweepStorage is a minimal Storage that records every Sweep call
|
||||
// and lets each test inject the per-cycle return values. The other
|
||||
// methods are no-ops — the sweeper goroutine never calls them.
|
||||
type fakeSweepStorage struct {
|
||||
calls atomic.Int64
|
||||
results []pendinguploads.SweepResult
|
||||
errs []error
|
||||
cycleDone chan struct{} // closed after each Sweep call (test sync)
|
||||
gotRetention atomic.Int64 // last ackRetention seen, in seconds
|
||||
}
|
||||
|
||||
func newFakeSweepStorage(results []pendinguploads.SweepResult, errs []error) *fakeSweepStorage {
|
||||
return &fakeSweepStorage{
|
||||
results: results,
|
||||
errs: errs,
|
||||
cycleDone: make(chan struct{}, 16),
|
||||
}
|
||||
}
|
||||
|
||||
func (f *fakeSweepStorage) Put(_ context.Context, _ uuid.UUID, _ []byte, _, _ string) (uuid.UUID, error) {
|
||||
return uuid.Nil, errors.New("not used")
|
||||
}
|
||||
func (f *fakeSweepStorage) Get(_ context.Context, _ uuid.UUID) (pendinguploads.Record, error) {
|
||||
return pendinguploads.Record{}, errors.New("not used")
|
||||
}
|
||||
func (f *fakeSweepStorage) MarkFetched(_ context.Context, _ uuid.UUID) error {
|
||||
return errors.New("not used")
|
||||
}
|
||||
func (f *fakeSweepStorage) Ack(_ context.Context, _ uuid.UUID) error {
|
||||
return errors.New("not used")
|
||||
}
|
||||
func (f *fakeSweepStorage) PutBatch(_ context.Context, _ uuid.UUID, _ []pendinguploads.PutItem) ([]uuid.UUID, error) {
|
||||
return nil, errors.New("not used")
|
||||
}
|
||||
func (f *fakeSweepStorage) Sweep(_ context.Context, ackRetention time.Duration) (pendinguploads.SweepResult, error) {
|
||||
idx := int(f.calls.Load())
|
||||
f.calls.Add(1)
|
||||
f.gotRetention.Store(int64(ackRetention.Seconds()))
|
||||
defer func() {
|
||||
select {
|
||||
case f.cycleDone <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}()
|
||||
if idx < len(f.errs) && f.errs[idx] != nil {
|
||||
return pendinguploads.SweepResult{}, f.errs[idx]
|
||||
}
|
||||
if idx < len(f.results) {
|
||||
return f.results[idx], nil
|
||||
}
|
||||
return pendinguploads.SweepResult{}, nil
|
||||
}
|
||||
|
||||
// waitForCycle blocks until at least one Sweep completes, with a deadline.
|
||||
// Tests use this instead of time.Sleep to avoid flakes on slow CI hosts.
|
||||
//
|
||||
// CAVEAT: cycleDone fires from inside fakeSweepStorage.Sweep's defer,
|
||||
// which runs as Sweep returns its result — BEFORE the StartSweeper
|
||||
// loop has processed the (result, error) tuple and called the
|
||||
// metric recorders. Tests that assert on metric counters must NOT
|
||||
// rely on this wait alone; use waitForMetricDelta instead so the
|
||||
// metric increment race (Sweep returns → cycleDone fires → test
|
||||
// reads counter → only then does StartSweeper's loop call
|
||||
// metrics.PendingUploadsSweepError) doesn't produce a flake.
|
||||
func (f *fakeSweepStorage) waitForCycle(t *testing.T, n int, timeout time.Duration) {
|
||||
t.Helper()
|
||||
deadline := time.NewTimer(timeout)
|
||||
defer deadline.Stop()
|
||||
for got := 0; got < n; got++ {
|
||||
select {
|
||||
case <-f.cycleDone:
|
||||
case <-deadline.C:
|
||||
t.Fatalf("waited %s for %d sweep cycles, got %d", timeout, n, f.calls.Load())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// waitForMetricDelta polls the supplied delta function until it returns
|
||||
// `want` or the timeout elapses. Use after waitForCycle when the test
|
||||
// asserts on a metric counter — closes the race between cycleDone
|
||||
// (signalled inside fakeSweepStorage.Sweep's defer, BEFORE Sweep
|
||||
// returns to StartSweeper) and the metric recording (which happens in
|
||||
// StartSweeper's loop AFTER Sweep returns). On a slow CI host the test
|
||||
// goroutine wins the read before StartSweeper's goroutine writes the
|
||||
// counter; the polling assert preserves the determinism of "the metric
|
||||
// MUST be N" without timing-based flakes.
|
||||
//
|
||||
// Per memory feedback_question_test_when_unexpected.md: the failure
|
||||
// mode "delta=0, want=1" looked like a real bug at first glance —
|
||||
// "metric never incremented" — but instrumented analysis showed the
|
||||
// metric DID increment, just AFTER the test's read. The fix is the
|
||||
// test's wait shape, not the production code.
|
||||
func waitForMetricDelta(t *testing.T, delta func() int64, want int64, timeout time.Duration) {
|
||||
t.Helper()
|
||||
deadline := time.Now().Add(timeout)
|
||||
for time.Now().Before(deadline) {
|
||||
if delta() == want {
|
||||
return
|
||||
}
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
}
|
||||
t.Fatalf("waited %s for metric delta=%d, last seen %d", timeout, want, delta())
|
||||
}
|
||||
|
||||
func TestStartSweeper_NilStorageDoesNotPanic(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
// Should return immediately without panicking; no goroutine to wait on.
|
||||
pendinguploads.StartSweeper(ctx, nil, time.Second)
|
||||
}
|
||||
|
||||
func TestStartSweeper_RunsImmediatelyAndOnTick(t *testing.T) {
|
||||
store := newFakeSweepStorage(
|
||||
[]pendinguploads.SweepResult{{Acked: 5}, {Acked: 1, Expired: 2}},
|
||||
nil,
|
||||
)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
go pendinguploads.StartSweeper(ctx, store, time.Hour)
|
||||
store.waitForCycle(t, 1, 2*time.Second)
|
||||
if got := store.calls.Load(); got < 1 {
|
||||
t.Errorf("expected at least one immediate sweep, got %d", got)
|
||||
}
|
||||
// Retention propagated.
|
||||
if store.gotRetention.Load() != 3600 {
|
||||
t.Errorf("retention seconds = %d, want 3600", store.gotRetention.Load())
|
||||
}
|
||||
}
|
||||
|
||||
func TestStartSweeper_ZeroAckRetentionUsesDefault(t *testing.T) {
|
||||
store := newFakeSweepStorage([]pendinguploads.SweepResult{{}}, nil)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
go pendinguploads.StartSweeper(ctx, store, 0)
|
||||
store.waitForCycle(t, 1, 2*time.Second)
|
||||
want := int64(pendinguploads.DefaultAckRetention.Seconds())
|
||||
if store.gotRetention.Load() != want {
|
||||
t.Errorf("retention = %d, want default %d", store.gotRetention.Load(), want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStartSweeper_ContextCancelStopsLoop(t *testing.T) {
|
||||
store := newFakeSweepStorage([]pendinguploads.SweepResult{{}}, nil)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
pendinguploads.StartSweeper(ctx, store, time.Second)
|
||||
close(done)
|
||||
}()
|
||||
store.waitForCycle(t, 1, 2*time.Second)
|
||||
cancel()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("StartSweeper did not return after ctx cancel")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStartSweeperWithInterval_TickerFiresAdditionalCycles(t *testing.T) {
|
||||
store := newFakeSweepStorage(
|
||||
[]pendinguploads.SweepResult{{Acked: 1}, {Expired: 1}, {}, {}, {}},
|
||||
nil,
|
||||
)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
go pendinguploads.StartSweeperWithIntervalForTest(ctx, store, time.Hour, 30*time.Millisecond)
|
||||
|
||||
// Immediate cycle + at least one tick-driven cycle.
|
||||
store.waitForCycle(t, 2, 2*time.Second)
|
||||
|
||||
if got := store.calls.Load(); got < 2 {
|
||||
t.Errorf("expected ≥2 cycles (immediate + 1 tick), got %d", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStartSweeper_TransientErrorDoesNotCrashLoop(t *testing.T) {
|
||||
// First call errors; second call succeeds. The loop must keep running
|
||||
// across the error so a one-off DB hiccup doesn't disable the GC.
|
||||
store := newFakeSweepStorage(
|
||||
[]pendinguploads.SweepResult{{}, {Acked: 1}},
|
||||
[]error{errors.New("transient db error"), nil},
|
||||
)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
// 50ms ticker so the second cycle fires quickly enough for the test.
|
||||
// We re-export SweepInterval as a const, but tests use the public
|
||||
// StartSweeper that takes its own interval — wait, the public
|
||||
// StartSweeper signature uses the package-level SweepInterval. Hmm,
|
||||
// this means the test takes ~5 minutes. Let me reconsider.
|
||||
//
|
||||
// (We patch the test below to just look at the immediate-sweep call
|
||||
// + an error path, since the immediate call is enough to prove the
|
||||
// "error doesn't crash" contract — the loop continues afterward
|
||||
// regardless of timing.)
|
||||
go pendinguploads.StartSweeper(ctx, store, time.Hour)
|
||||
|
||||
// Wait for the first (errored) cycle.
|
||||
store.waitForCycle(t, 1, 2*time.Second)
|
||||
// Cancel — the goroutine returns cleanly, proving the error path
|
||||
// didn't crash the loop. Without this fix the goroutine would have
|
||||
// either panicked (process abort visible at exit) or stuck (this
|
||||
// cancel + done-channel pattern would deadlock instead).
|
||||
cancel()
|
||||
}
|
||||
|
||||
// metricDelta returns a function that, when called, returns how much
|
||||
// the (acked, expired, errored) counters have advanced since metricDelta
|
||||
// was originally called. metrics is a process-singleton across the test
|
||||
// suite; deltas isolate this test from order-of-execution dependencies.
|
||||
func metricDelta(t *testing.T) (deltaAcked, deltaExpired, deltaError func() int64) {
|
||||
t.Helper()
|
||||
a0, e0, err0 := metrics.PendingUploadsSweepCounts()
|
||||
deltaAcked = func() int64 {
|
||||
a, _, _ := metrics.PendingUploadsSweepCounts()
|
||||
return a - a0
|
||||
}
|
||||
deltaExpired = func() int64 {
|
||||
_, e, _ := metrics.PendingUploadsSweepCounts()
|
||||
return e - e0
|
||||
}
|
||||
deltaError = func() int64 {
|
||||
_, _, x := metrics.PendingUploadsSweepCounts()
|
||||
return x - err0
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func TestStartSweeper_RecordsMetricsOnSuccess(t *testing.T) {
|
||||
deltaAcked, deltaExpired, deltaError := metricDelta(t)
|
||||
|
||||
store := newFakeSweepStorage(
|
||||
[]pendinguploads.SweepResult{{Acked: 3, Expired: 5}},
|
||||
nil,
|
||||
)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
go pendinguploads.StartSweeper(ctx, store, time.Hour)
|
||||
store.waitForCycle(t, 1, 2*time.Second)
|
||||
|
||||
// Poll for the success counters to settle — closes the cycleDone-
|
||||
// vs-metric-record race (see waitForMetricDelta comment).
|
||||
waitForMetricDelta(t, deltaAcked, 3, 2*time.Second)
|
||||
waitForMetricDelta(t, deltaExpired, 5, 2*time.Second)
|
||||
// Error counter MUST stay at zero on the success path. Read after
|
||||
// the success counters have settled — once those are correct,
|
||||
// StartSweeper has fully processed this cycle's result.
|
||||
if got := deltaError(); got != 0 {
|
||||
t.Errorf("error counter delta = %d, want 0", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStartSweeper_RecordsMetricsOnError(t *testing.T) {
|
||||
_, _, deltaError := metricDelta(t)
|
||||
|
||||
store := newFakeSweepStorage(
|
||||
[]pendinguploads.SweepResult{{}},
|
||||
[]error{errors.New("db down")},
|
||||
)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
go pendinguploads.StartSweeper(ctx, store, time.Hour)
|
||||
store.waitForCycle(t, 1, 2*time.Second)
|
||||
|
||||
// Poll for the error counter to settle — cycleDone fires inside
|
||||
// the fake's Sweep defer, BEFORE StartSweeper's loop receives the
|
||||
// returned error and calls metrics.PendingUploadsSweepError. On
|
||||
// slow CI hosts a direct deltaError() read here returns 0 even
|
||||
// though the metric WILL be 1 a few ms later. See
|
||||
// waitForMetricDelta comment.
|
||||
waitForMetricDelta(t, deltaError, 1, 2*time.Second)
|
||||
}
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog"
|
||||
)
|
||||
|
||||
// CPProvisionerAPI is the contract WorkspaceHandler uses to talk to the
|
||||
@@ -214,6 +215,13 @@ func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string,
|
||||
}
|
||||
|
||||
log.Printf("CP provisioner: workspace %s → EC2 instance %s (%s)", cfg.WorkspaceID, result.InstanceID, result.State)
|
||||
provlog.Event("provision.ec2_started", map[string]any{
|
||||
"workspace_id": cfg.WorkspaceID,
|
||||
"instance_id": result.InstanceID,
|
||||
"state": result.State,
|
||||
"tier": cfg.Tier,
|
||||
"runtime": cfg.Runtime,
|
||||
})
|
||||
return result.InstanceID, nil
|
||||
}
|
||||
|
||||
@@ -273,6 +281,10 @@ func (p *CPProvisioner) Stop(ctx context.Context, workspaceID string) error {
|
||||
return fmt.Errorf("cp provisioner: stop %s: unexpected %d: %s",
|
||||
workspaceID, resp.StatusCode, strings.TrimSpace(string(body)))
|
||||
}
|
||||
provlog.Event("provision.ec2_stopped", map[string]any{
|
||||
"workspace_id": workspaceID,
|
||||
"instance_id": instanceID,
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
// Package provlog emits structured, single-line JSON log records for
|
||||
// provisioning-lifecycle boundaries (workspace create, EC2 start/stop,
|
||||
// restart, idempotency skips). Records share a stable `evt:` prefix and
|
||||
// JSON payload so a future grep|jq pipeline (or a Loki/Datadog ingest)
|
||||
// can reconstruct the per-workspace timeline without parsing the
|
||||
// human-prose log lines that already exist.
|
||||
//
|
||||
// Existing log.Printf lines are intentionally NOT replaced — they
|
||||
// remain the operator-facing message. Event() emits a paired structured
|
||||
// record alongside, additive only.
|
||||
//
|
||||
// Event taxonomy (extend by appending; never rename):
|
||||
//
|
||||
// provision.start — workspace row inserted, EC2 about to launch
|
||||
// provision.skip_existing — idempotency hit, no new EC2
|
||||
// provision.ec2_started — RunInstances returned an instance id
|
||||
// provision.ec2_stopped — TerminateInstances acknowledged
|
||||
// restart.pre_stop — Restart handler about to call Stop
|
||||
//
|
||||
// Required fields per event are documented at each call site.
|
||||
package provlog
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
)
|
||||
|
||||
// Event writes a single line of the form:
|
||||
//
|
||||
// evt: <name> {"k":"v",...}
|
||||
//
|
||||
// to the standard logger. JSON encoding errors are silently swallowed —
|
||||
// a logging helper must never panic the request path. fields may be
|
||||
// nil; the empty payload `{}` is still useful to mark an event boundary.
|
||||
func Event(name string, fields map[string]any) {
|
||||
if fields == nil {
|
||||
fields = map[string]any{}
|
||||
}
|
||||
payload, err := json.Marshal(fields)
|
||||
if err != nil {
|
||||
// Fall back to a static payload so the event boundary still
|
||||
// appears in the log. The marshal error itself is recorded
|
||||
// on a best-effort basis.
|
||||
log.Printf("evt: %s {\"_marshal_err\":%q}", name, err.Error())
|
||||
return
|
||||
}
|
||||
log.Printf("evt: %s %s", name, payload)
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
package provlog
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// captureLog redirects the default logger to a buffer for the duration
|
||||
// of fn and returns whatever was written.
|
||||
func captureLog(t *testing.T, fn func()) string {
|
||||
t.Helper()
|
||||
var buf bytes.Buffer
|
||||
prevWriter := log.Writer()
|
||||
prevFlags := log.Flags()
|
||||
log.SetOutput(&buf)
|
||||
log.SetFlags(0) // strip date/time so assertions stay deterministic
|
||||
t.Cleanup(func() {
|
||||
log.SetOutput(prevWriter)
|
||||
log.SetFlags(prevFlags)
|
||||
})
|
||||
fn()
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
func TestEvent_EmitsEvtPrefixAndJSONPayload(t *testing.T) {
|
||||
out := captureLog(t, func() {
|
||||
Event("provision.start", map[string]any{
|
||||
"workspace_id": "ws-123",
|
||||
"tier": 4,
|
||||
"runtime": "claude-code",
|
||||
})
|
||||
})
|
||||
out = strings.TrimSpace(out)
|
||||
if !strings.HasPrefix(out, "evt: provision.start ") {
|
||||
t.Fatalf("expected evt-prefixed line, got %q", out)
|
||||
}
|
||||
jsonPart := strings.TrimPrefix(out, "evt: provision.start ")
|
||||
var got map[string]any
|
||||
if err := json.Unmarshal([]byte(jsonPart), &got); err != nil {
|
||||
t.Fatalf("payload not valid JSON: %v (raw=%q)", err, jsonPart)
|
||||
}
|
||||
if got["workspace_id"] != "ws-123" {
|
||||
t.Errorf("workspace_id field lost: %+v", got)
|
||||
}
|
||||
// JSON unmarshal turns numbers into float64 — exact-equal compare.
|
||||
if got["tier"].(float64) != 4 {
|
||||
t.Errorf("tier field lost: %+v", got)
|
||||
}
|
||||
if got["runtime"] != "claude-code" {
|
||||
t.Errorf("runtime field lost: %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEvent_NilFieldsEmitsEmptyObject(t *testing.T) {
|
||||
out := captureLog(t, func() {
|
||||
Event("restart.pre_stop", nil)
|
||||
})
|
||||
if !strings.Contains(out, "evt: restart.pre_stop {}") {
|
||||
t.Fatalf("nil fields should emit empty object, got %q", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEvent_PreservesEventBoundaryOnUnmarshalableValue(t *testing.T) {
|
||||
// A channel cannot be marshaled by encoding/json — verify we still
|
||||
// emit the event boundary with a recorded marshal error. This is
|
||||
// the structural guarantee: the call site never sees a panic, and
|
||||
// the event name is always present in the log.
|
||||
out := captureLog(t, func() {
|
||||
Event("provision.ec2_started", map[string]any{
|
||||
"chan": make(chan int),
|
||||
})
|
||||
})
|
||||
if !strings.Contains(out, "evt: provision.ec2_started ") {
|
||||
t.Fatalf("event boundary missing on marshal error: %q", out)
|
||||
}
|
||||
if !strings.Contains(out, "_marshal_err") {
|
||||
t.Fatalf("expected _marshal_err sentinel, got %q", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEvent_SingleLineOutput(t *testing.T) {
|
||||
// Log aggregators line-split on \n. A multi-line emit would silently
|
||||
// fragment the JSON across two records — pin single-line shape.
|
||||
out := captureLog(t, func() {
|
||||
Event("provision.skip_existing", map[string]any{
|
||||
"existing_id": "ws-abc",
|
||||
"name": "child-1",
|
||||
})
|
||||
})
|
||||
trimmed := strings.TrimRight(out, "\n")
|
||||
if strings.Contains(trimmed, "\n") {
|
||||
t.Fatalf("event line must be single-line, got %q", out)
|
||||
}
|
||||
}
|
||||
@@ -232,6 +232,20 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
|
||||
wsAuth.DELETE("/memories/:memoryId", memsh.Delete)
|
||||
wsAuth.PATCH("/memories/:memoryId", memsh.Update)
|
||||
|
||||
// Memory v2 — canvas reads through the plugin so the Memory
|
||||
// tab surfaces post-cutover state (memory_records) instead
|
||||
// of the frozen agent_memories table that memsh.Search hits.
|
||||
// Wired only when MEMORY_PLUGIN_URL is configured; absent
|
||||
// plugin → endpoints return 503 with a clear hint instead
|
||||
// of nil-deref crashing the canvas.
|
||||
memv2 := handlers.NewMemoriesV2Handler()
|
||||
if memBundle != nil {
|
||||
memv2.WithMemoryV2(memBundle.Plugin, memBundle.Resolver)
|
||||
}
|
||||
wsAuth.GET("/v2/namespaces", memv2.Namespaces)
|
||||
wsAuth.GET("/v2/memories", memv2.Search)
|
||||
wsAuth.DELETE("/v2/memories/:memoryId", memv2.Forget)
|
||||
|
||||
// Approvals
|
||||
apph := handlers.NewApprovalsHandler(broadcaster)
|
||||
wsAuth.POST("/approvals", apph.Create)
|
||||
@@ -243,13 +257,15 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
|
||||
// entire platform. Gated behind AdminAuth (issue #180).
|
||||
r.GET("/approvals/pending", middleware.AdminAuth(db.DB), apph.ListAll)
|
||||
|
||||
// Team handlers — Collapse only. The bulk-Expand path is gone:
|
||||
// every workspace can have children via the regular CreateWorkspace
|
||||
// flow with parent_id set, so a separate handler that bulk-creates
|
||||
// from sub_workspaces (and was non-idempotent — calling it twice
|
||||
// duplicated the team) earned its way out.
|
||||
teamh := handlers.NewTeamHandler(broadcaster, wh, platformURL, configsDir)
|
||||
wsAuth.POST("/collapse", teamh.Collapse)
|
||||
// (TeamHandler is gone — #2864.) The visual canvas Collapse
|
||||
// button calls PATCH /workspaces/:id { collapsed: true/false }
|
||||
// (presentational toggle on canvas_layouts), NOT the destructive
|
||||
// POST /collapse that stopped + removed children. The
|
||||
// destructive route had zero UI callers (verified via grep
|
||||
// across canvas/, scripts/, and the MCP tool registry — only
|
||||
// docs referenced it). team.go + team_test.go + the route
|
||||
// + helpers (findTemplateDirByName, NewTeamHandler) are
|
||||
// deleted; visual collapse is unaffected.
|
||||
|
||||
// Agents
|
||||
ah := handlers.NewAgentHandler(broadcaster)
|
||||
@@ -519,8 +535,9 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
|
||||
r.GET("/canvas/viewport", vh.Get)
|
||||
r.PUT("/canvas/viewport", middleware.CanvasOrBearer(db.DB), vh.Save)
|
||||
|
||||
// Templates
|
||||
tmplh := handlers.NewTemplatesHandler(configsDir, dockerCli)
|
||||
// Templates — wh threaded so generateDefaultConfig picks the
|
||||
// SaaS-aware default tier in Import + ReplaceFiles (#2910 PR-B).
|
||||
tmplh := handlers.NewTemplatesHandler(configsDir, dockerCli, wh)
|
||||
// #686: GET /templates lists all template names+metadata from configsDir.
|
||||
// Open access lets unauthenticated callers enumerate org configurations and
|
||||
// installed plugins. AdminAuth-gate it alongside POST /templates/import.
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
cronlib "github.com/robfig/cron/v3"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/supervised"
|
||||
)
|
||||
|
||||
@@ -741,6 +742,11 @@ func (s *Scheduler) sweepPhantomBusy(ctx context.Context) {
|
||||
continue
|
||||
}
|
||||
log.Printf("Scheduler: phantom-busy sweep — reset %s (no activity in %d min)", name, int(phantomStaleThreshold.Minutes()))
|
||||
// #2865: surface as molecule_phantom_busy_resets_total. High
|
||||
// reset rate signals task-lifecycle accounting regressions
|
||||
// (e.g. missing env vars causing claude --print timeouts that
|
||||
// leave active_tasks elevated until this sweep fires).
|
||||
metrics.TrackPhantomBusyReset()
|
||||
count++
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
-- Reversal of 20260505200000_pending_uploads_acked_index.up.sql.
|
||||
DROP INDEX IF EXISTS idx_pending_uploads_acked;
|
||||
@@ -0,0 +1,30 @@
|
||||
-- 20260505200000_pending_uploads_acked_index.up.sql
|
||||
--
|
||||
-- Adds the missing partial index for the acked-retention arm of the
|
||||
-- pendinguploads.Sweep query. The Phase 1 migration created two
|
||||
-- partial indexes both gated on `acked_at IS NULL` (workspace-fetch
|
||||
-- hot path + expires_at sweep arm); the third query path —
|
||||
-- `WHERE acked_at IS NOT NULL AND acked_at < now() - interval` — was
|
||||
-- left to a seq scan.
|
||||
--
|
||||
-- For a high-traffic deployment that's a real cost: the table
|
||||
-- accumulates one row per chat-attached file; the sweeper runs every
|
||||
-- 5 minutes and DELETEs rows past the 1-hour ack retention. A seq
|
||||
-- scan over 100K-1M acked rows holds an AccessShare lock for seconds
|
||||
-- on every cycle. Partial-indexing the inverse predicate reduces
|
||||
-- this to a btree range scan and lets the DELETE complete in
|
||||
-- low-millisecond range.
|
||||
--
|
||||
-- WHERE acked_at IS NOT NULL is intentionally inverse of the other
|
||||
-- two indexes — they cover the unacked working set; this covers the
|
||||
-- terminal-state set the sweeper visits. Disjoint subsets, so the
|
||||
-- two indexes don't overlap.
|
||||
--
|
||||
-- Caught in self-review on the parent RFC's Phase 4 PR; filed as
|
||||
-- a follow-up rather than a Phase 1 fix because the cost only
|
||||
-- materializes at a row count we don't expect to hit before the
|
||||
-- sweeper has had a chance to keep up.
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_pending_uploads_acked
|
||||
ON pending_uploads (acked_at)
|
||||
WHERE acked_at IS NOT NULL;
|
||||
@@ -584,6 +584,24 @@ async def send_a2a_message(peer_id: str, message: str, source_workspace_id: str
|
||||
else:
|
||||
detail = "JSON-RPC error with no message"
|
||||
return f"{_A2A_ERROR_PREFIX}{detail} [target={target_url}]"
|
||||
elif data.get("status") == "queued" and data.get("delivery_mode") == "poll":
|
||||
# Workspace-server's poll-mode short-circuit envelope
|
||||
# (workspace-server/internal/handlers/a2a_proxy.go ~line 402).
|
||||
# The peer is poll-mode and has no URL to dispatch to, so
|
||||
# the server queued the message for the peer's next inbox
|
||||
# poll instead of forwarding it. Delivery is acknowledged
|
||||
# but pending consumption.
|
||||
#
|
||||
# Pre-fix this fell through to the "unexpected response
|
||||
# shape" error path → callers logged false failures, then
|
||||
# delegate_task retried, and the peer received duplicate
|
||||
# delegations. Issue #2967.
|
||||
method = data.get("method") or "message/send"
|
||||
logger.info(
|
||||
"send_a2a_message: queued for poll-mode peer (method=%s, target=%s)",
|
||||
method, target_url,
|
||||
)
|
||||
return f"queued for poll-mode peer (method={method})"
|
||||
return f"{_A2A_ERROR_PREFIX}unexpected response shape (no result, no error): {str(data)[:200]} [target={target_url}]"
|
||||
except _TRANSIENT_HTTP_ERRORS as e:
|
||||
last_exc = e
|
||||
|
||||
@@ -425,7 +425,16 @@ def _build_initialize_result() -> dict:
|
||||
"tools": {"listChanged": False},
|
||||
"experimental": {"claude/channel": {}},
|
||||
},
|
||||
"serverInfo": {"name": "a2a-delegation", "version": "1.0.0"},
|
||||
# Identifier convention: this server is what users register with
|
||||
# `claude mcp add molecule -- molecule-mcp` (and similar across
|
||||
# other MCP hosts), so the canonical name is "molecule". Earlier
|
||||
# versions reported "a2a-delegation" — accurate to the original
|
||||
# purpose but a mismatch with how operators actually name it.
|
||||
# Mismatch is harmless on tool routing (all MCP hosts dispatch
|
||||
# by the user-supplied registration name, NOT serverInfo.name)
|
||||
# but matters for any future Claude Code allowlist that gates
|
||||
# channel push by hardcoded server name (issue #2934).
|
||||
"serverInfo": {"name": "molecule", "version": "1.0.0"},
|
||||
# Built per-call (not the module-level constant) so an operator
|
||||
# who sets MOLECULE_MCP_POLL_TIMEOUT_SECS after import — e.g.
|
||||
# via a wrapper script that exports then re-imports — sees
|
||||
|
||||
+57
-880
@@ -28,96 +28,20 @@ from platform_auth import list_registered_workspaces
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RBAC helpers (mirror builtin_tools/audit.py for a2a_tools isolation)
|
||||
# RBAC + auth helpers — extracted to a2a_tools_rbac (RFC #2873 iter 4a).
|
||||
# Re-exported here under the legacy underscore names so existing tests'
|
||||
# patch("a2a_tools._check_memory_write_permission", …) and call sites
|
||||
# inside this module that resolve bare names against the module-level
|
||||
# namespace continue to work unchanged.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_ROLE_PERMISSIONS = {
|
||||
"admin": {"delegate", "approve", "memory.read", "memory.write"},
|
||||
"operator": {"delegate", "approve", "memory.read", "memory.write"},
|
||||
"read-only": {"memory.read"},
|
||||
"no-delegation": {"approve", "memory.read", "memory.write"},
|
||||
"no-approval": {"delegate", "memory.read", "memory.write"},
|
||||
"memory-readonly": {"memory.read"},
|
||||
}
|
||||
|
||||
|
||||
def _get_workspace_tier() -> int:
|
||||
"""Return the workspace tier from config (0 = root, 1+ = tenant)."""
|
||||
try:
|
||||
from config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
return getattr(cfg, "tier", 1)
|
||||
except Exception:
|
||||
return int(os.environ.get("WORKSPACE_TIER", 1))
|
||||
|
||||
|
||||
def _check_memory_write_permission() -> bool:
|
||||
"""Return True if this workspace's RBAC roles grant memory.write."""
|
||||
try:
|
||||
from config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
roles = list(getattr(cfg, "rbac", None).roles or ["operator"])
|
||||
allowed = dict(getattr(cfg, "rbac", None).allowed_actions or {})
|
||||
except Exception:
|
||||
# Fail closed: deny when config is unavailable
|
||||
roles = ["operator"]
|
||||
allowed = {}
|
||||
|
||||
for role in roles:
|
||||
if role == "admin":
|
||||
return True
|
||||
if role in allowed:
|
||||
if "memory.write" in allowed[role]:
|
||||
return True
|
||||
elif role in _ROLE_PERMISSIONS and "memory.write" in _ROLE_PERMISSIONS[role]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _check_memory_read_permission() -> bool:
|
||||
"""Return True if this workspace's RBAC roles grant memory.read."""
|
||||
try:
|
||||
from config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
roles = list(getattr(cfg, "rbac", None).roles or ["operator"])
|
||||
allowed = dict(getattr(cfg, "rbac", None).allowed_actions or {})
|
||||
except Exception:
|
||||
roles = ["operator"]
|
||||
allowed = {}
|
||||
|
||||
for role in roles:
|
||||
if role == "admin":
|
||||
return True
|
||||
if role in allowed:
|
||||
if "memory.read" in allowed[role]:
|
||||
return True
|
||||
elif role in _ROLE_PERMISSIONS and "memory.read" in _ROLE_PERMISSIONS[role]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _is_root_workspace() -> bool:
|
||||
"""Return True if this workspace is tier 0 (root/root-org)."""
|
||||
return _get_workspace_tier() == 0
|
||||
|
||||
|
||||
def _auth_headers_for_heartbeat(workspace_id: str | None = None) -> dict[str, str]:
|
||||
"""Return Phase 30.1 auth headers; tolerate platform_auth being absent
|
||||
in older installs (e.g. during rolling upgrade).
|
||||
|
||||
``workspace_id`` selects the per-workspace token from the multi-
|
||||
workspace registry when set (PR-1: external agent registered in
|
||||
multiple workspaces). With no arg the legacy single-token path is
|
||||
unchanged.
|
||||
"""
|
||||
try:
|
||||
from platform_auth import auth_headers
|
||||
return auth_headers(workspace_id) if workspace_id else auth_headers()
|
||||
except Exception:
|
||||
return {}
|
||||
from a2a_tools_rbac import ( # noqa: E402 (import after the from-a2a_client block)
|
||||
_auth_headers_for_heartbeat,
|
||||
_check_memory_read_permission,
|
||||
_check_memory_write_permission,
|
||||
_get_workspace_tier,
|
||||
_is_root_workspace,
|
||||
_ROLE_PERMISSIONS,
|
||||
)
|
||||
|
||||
|
||||
# Per-field caps on the heartbeat / activity payload. Borrowed from
|
||||
@@ -191,801 +115,54 @@ async def report_activity(
|
||||
pass # Best-effort — don't block delegation on activity reporting
|
||||
|
||||
|
||||
# RFC #2829 PR-5 cutover constants. The poll cadence + timeout are
|
||||
# intentionally generous: 3s gives the platform's executeDelegation
|
||||
# goroutine room to dispatch + the callee to respond + the result to
|
||||
# write to activity_logs without thrashing the platform with rapid
|
||||
# polls; the budget matches the legacy DELEGATION_TIMEOUT (300s) so
|
||||
# operators don't see behavior change beyond "no more 600s timeouts".
|
||||
_SYNC_POLL_INTERVAL_S = 3.0
|
||||
_SYNC_POLL_BUDGET_S = float(os.environ.get("DELEGATION_TIMEOUT", "300.0"))
|
||||
|
||||
|
||||
async def _delegate_sync_via_polling(
|
||||
workspace_id: str,
|
||||
task: str,
|
||||
src: str,
|
||||
) -> str:
|
||||
"""RFC #2829 PR-5: durable async delegation + poll for terminal status.
|
||||
|
||||
Sidesteps the platform proxy's blocking `message/send` HTTP path that
|
||||
hits a hard 600s ceiling. Instead:
|
||||
|
||||
1. POST /workspaces/<src>/delegate (async, returns 202 + delegation_id)
|
||||
— platform's executeDelegation goroutine handles A2A dispatch in
|
||||
the background. No client-side timeout dependency on the platform
|
||||
holding a connection open.
|
||||
2. Poll GET /workspaces/<src>/delegations every 3s for a row with
|
||||
matching delegation_id reaching terminal status (completed/failed).
|
||||
3. Return the response_preview text on completed; surface error_detail
|
||||
on failed (with the same _A2A_ERROR_PREFIX wrapping the legacy
|
||||
path uses, so caller error-detection logic is unchanged).
|
||||
|
||||
Both /delegate and /delegations are existing endpoints — this helper
|
||||
just composes them into a polling synchronous facade. The result is
|
||||
available the moment the platform writes the terminal status row;
|
||||
no extra latency vs. the legacy proxy-blocked path on fast cases.
|
||||
"""
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
idem_key = hashlib.sha256(f"{src}:{workspace_id}:{task}".encode()).hexdigest()[:32]
|
||||
|
||||
# 1. Dispatch via /delegate (the async, durable path).
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/delegate",
|
||||
json={
|
||||
"target_id": workspace_id,
|
||||
"task": task,
|
||||
"idempotency_key": idem_key,
|
||||
},
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
return f"{_A2A_ERROR_PREFIX}delegate dispatch failed: {e}"
|
||||
|
||||
if resp.status_code != 202 and resp.status_code != 200:
|
||||
return f"{_A2A_ERROR_PREFIX}delegate dispatch failed: HTTP {resp.status_code} {resp.text[:200]}"
|
||||
|
||||
try:
|
||||
dispatch = resp.json()
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
return f"{_A2A_ERROR_PREFIX}delegate dispatch returned non-JSON: {e}"
|
||||
|
||||
delegation_id = dispatch.get("delegation_id", "")
|
||||
if not delegation_id:
|
||||
return f"{_A2A_ERROR_PREFIX}delegate dispatch missing delegation_id: {dispatch}"
|
||||
|
||||
# 2. Poll for terminal status with a deadline. Each poll is a cheap
|
||||
# /delegations GET — bounded by the platform's existing rate limit.
|
||||
deadline = time.monotonic() + _SYNC_POLL_BUDGET_S
|
||||
last_status = "unknown"
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
poll = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/delegations",
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
# Transient — keep polling. The platform IS holding the
|
||||
# delegation row; we just lost a network request.
|
||||
last_status = f"poll-error: {e}"
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
continue
|
||||
|
||||
if poll.status_code != 200:
|
||||
last_status = f"poll HTTP {poll.status_code}"
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
continue
|
||||
|
||||
try:
|
||||
rows = poll.json()
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
last_status = f"poll non-JSON: {e}"
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
continue
|
||||
|
||||
# /delegations returns a flat list of delegation events. Filter to
|
||||
# our delegation_id; pick the first terminal one. The list may
|
||||
# have multiple rows per delegation_id (one for the original
|
||||
# dispatch, one per status update); we want the latest terminal.
|
||||
if not isinstance(rows, list):
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
continue
|
||||
terminal = None
|
||||
for r in rows:
|
||||
if not isinstance(r, dict):
|
||||
continue
|
||||
if r.get("delegation_id") != delegation_id:
|
||||
continue
|
||||
status = (r.get("status") or "").lower()
|
||||
last_status = status
|
||||
if status in ("completed", "failed"):
|
||||
terminal = r
|
||||
break
|
||||
if terminal:
|
||||
if (terminal.get("status") or "").lower() == "completed":
|
||||
return terminal.get("response_preview") or ""
|
||||
err = (
|
||||
terminal.get("error_detail")
|
||||
or terminal.get("summary")
|
||||
or "delegation failed"
|
||||
)
|
||||
return f"{_A2A_ERROR_PREFIX}{err}"
|
||||
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
|
||||
# Budget exhausted — the platform's row is still in flight (or queued).
|
||||
# Surface as an error so the caller can decide to retry or fall back;
|
||||
# the platform DOES still have the durable row, so the work isn't
|
||||
# lost — it'll complete eventually and a future check_task_status
|
||||
# will surface the result.
|
||||
return (
|
||||
f"{_A2A_ERROR_PREFIX}polling timeout after {_SYNC_POLL_BUDGET_S}s "
|
||||
f"(delegation_id={delegation_id}, last_status={last_status}); "
|
||||
f"the platform is still working on it — call check_task_status('{delegation_id}') to retrieve later"
|
||||
)
|
||||
|
||||
|
||||
async def tool_delegate_task(
|
||||
workspace_id: str,
|
||||
task: str,
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Delegate a task to another workspace via A2A (synchronous — waits for response).
|
||||
|
||||
``source_workspace_id`` selects which registered workspace this
|
||||
delegation originates from — drives auth + the X-Workspace-ID source
|
||||
header so the platform's a2a_proxy logs the correct sender. Single-
|
||||
workspace operators leave it None and routing falls back to the
|
||||
module-level WORKSPACE_ID.
|
||||
"""
|
||||
if not workspace_id or not task:
|
||||
return "Error: workspace_id and task are required"
|
||||
|
||||
# Auto-route: if source not specified, look up which registered
|
||||
# workspace last saw this peer (populated by tool_list_peers). Falls
|
||||
# back to the legacy WORKSPACE_ID for single-workspace operators.
|
||||
src = source_workspace_id or _peer_to_source.get(workspace_id) or None
|
||||
|
||||
# Discover the target. discover_peer is the access-control gate +
|
||||
# name/status lookup. The peer's reported ``url`` field is NOT used
|
||||
# for routing — see send_a2a_message, which constructs the URL via
|
||||
# the platform's A2A proxy.
|
||||
peer = await discover_peer(workspace_id, source_workspace_id=src)
|
||||
if not peer:
|
||||
return f"Error: workspace {workspace_id} not found or not accessible (check access control)"
|
||||
|
||||
if (peer.get("status") or "").lower() == "offline":
|
||||
return f"Error: workspace {workspace_id} is offline"
|
||||
|
||||
# Report delegation start — include the task text for traceability
|
||||
peer_name = peer.get("name") or _peer_names.get(workspace_id) or workspace_id[:8]
|
||||
_peer_names[workspace_id] = peer_name # cache for future use
|
||||
# Brief summary for canvas display — just the delegation target
|
||||
await report_activity("a2a_send", workspace_id, f"Delegating to {peer_name}", task_text=task)
|
||||
|
||||
# RFC #2829 PR-5: agent-side cutover. When DELEGATION_SYNC_VIA_INBOX=1,
|
||||
# use the platform's durable async delegation API (POST /delegate +
|
||||
# poll /delegations) instead of the proxy-blocked message/send path.
|
||||
# This sidesteps the 600s message/send timeout class that broke
|
||||
# iteration-14/90-style long-running delegations on 2026-05-05.
|
||||
#
|
||||
# Default off — staging-canary first, flip default after PR-2's
|
||||
# result-push flag (DELEGATION_RESULT_INBOX_PUSH) has been on for
|
||||
# ≥1 week without incident.
|
||||
if os.environ.get("DELEGATION_SYNC_VIA_INBOX") == "1":
|
||||
result = await _delegate_sync_via_polling(workspace_id, task, src or WORKSPACE_ID)
|
||||
else:
|
||||
# send_a2a_message routes through ${PLATFORM_URL}/workspaces/{id}/a2a
|
||||
# (the platform proxy) so the same code works for in-container and
|
||||
# external (standalone molecule-mcp) callers.
|
||||
result = await send_a2a_message(workspace_id, task, source_workspace_id=src)
|
||||
|
||||
# Detect delegation failures — wrap them clearly so the calling agent
|
||||
# can decide to retry, use another peer, or handle the task itself.
|
||||
is_error = result.startswith(_A2A_ERROR_PREFIX)
|
||||
# Strip the sentinel prefix so error_detail is the human-readable
|
||||
# cause directly. The Activity tab's red error chip surfaces this
|
||||
# without the user having to scroll into the raw response JSON.
|
||||
#
|
||||
# Cap at 4096 chars before sending — the platform's
|
||||
# activity_logs.error_detail column is unbounded TEXT and a
|
||||
# malicious or buggy peer could otherwise stream an arbitrarily
|
||||
# large error message into the caller's activity log. 4096 is
|
||||
# comfortably above any real exception traceback we've seen and
|
||||
# well below an obvious-DoS threshold.
|
||||
error_detail = result[len(_A2A_ERROR_PREFIX):].strip()[:4096] if is_error else ""
|
||||
await report_activity(
|
||||
"a2a_receive", workspace_id,
|
||||
f"{peer_name} responded ({len(result)} chars)" if not is_error else f"{peer_name} failed: {error_detail[:120]}",
|
||||
task_text=task, response_text=result,
|
||||
status="error" if is_error else "ok",
|
||||
error_detail=error_detail,
|
||||
)
|
||||
if is_error:
|
||||
return (
|
||||
f"DELEGATION FAILED to {peer_name}: {result}\n"
|
||||
f"You should either: (1) try a different peer, (2) handle this task yourself, "
|
||||
f"or (3) inform the user that {peer_name} is unavailable and provide your best answer."
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
async def tool_delegate_task_async(
|
||||
workspace_id: str,
|
||||
task: str,
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Delegate a task via the platform's async delegation API (fire-and-forget).
|
||||
|
||||
Uses POST /workspaces/:id/delegate which runs the A2A request in the background.
|
||||
Results are tracked in the platform DB and broadcast via WebSocket.
|
||||
Use check_task_status to poll for results.
|
||||
|
||||
``source_workspace_id`` selects the sending workspace (which one of
|
||||
this agent's registered workspaces gets logged as the originator);
|
||||
auto-routes via the peer→source cache when omitted.
|
||||
"""
|
||||
if not workspace_id or not task:
|
||||
return "Error: workspace_id and task are required"
|
||||
|
||||
src = source_workspace_id or _peer_to_source.get(workspace_id) or WORKSPACE_ID
|
||||
|
||||
# Idempotency key: SHA-256 of (source, target, task) so that a
|
||||
# restarted agent firing the same delegation gets the same key and
|
||||
# the platform returns the existing delegation_id instead of
|
||||
# creating a duplicate. Fixes #1456. Source is in the key so the
|
||||
# SAME task delegated from two different registered workspaces
|
||||
# produces two distinct delegations (the right behavior — one per
|
||||
# tenant audit trail).
|
||||
idem_key = hashlib.sha256(f"{src}:{workspace_id}:{task}".encode()).hexdigest()[:32]
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/delegate",
|
||||
json={"target_id": workspace_id, "task": task, "idempotency_key": idem_key},
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
if resp.status_code == 202:
|
||||
data = resp.json()
|
||||
return json.dumps({
|
||||
"delegation_id": data.get("delegation_id", ""),
|
||||
"workspace_id": workspace_id,
|
||||
"status": "delegated",
|
||||
"note": "Task delegated. The platform runs it in the background. Use check_task_status to poll for results.",
|
||||
})
|
||||
else:
|
||||
return f"Error: delegation failed with status {resp.status_code}: {resp.text[:200]}"
|
||||
except Exception as e:
|
||||
return f"Error: delegation failed — {e}"
|
||||
|
||||
|
||||
async def tool_check_task_status(
|
||||
workspace_id: str,
|
||||
task_id: str,
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Check delegations for this workspace via the platform API.
|
||||
|
||||
Args:
|
||||
workspace_id: Ignored (kept for backward compat). Checks
|
||||
``source_workspace_id``'s delegations (the workspace that
|
||||
FIRED the delegations), not the target's.
|
||||
task_id: Optional delegation_id to filter. If empty, returns all recent delegations.
|
||||
source_workspace_id: Which registered workspace's delegation log
|
||||
to query. Defaults to the module-level WORKSPACE_ID.
|
||||
"""
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/delegations",
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return f"Error: failed to check delegations ({resp.status_code})"
|
||||
delegations = resp.json()
|
||||
if task_id:
|
||||
# Filter by delegation_id
|
||||
matching = [d for d in delegations if d.get("delegation_id") == task_id]
|
||||
if matching:
|
||||
return json.dumps(matching[0])
|
||||
return json.dumps({"status": "not_found", "delegation_id": task_id})
|
||||
# Return all recent delegations
|
||||
summary = []
|
||||
for d in delegations[:10]:
|
||||
summary.append({
|
||||
"delegation_id": d.get("delegation_id", ""),
|
||||
"target_id": d.get("target_id", ""),
|
||||
"status": d.get("status", ""),
|
||||
"summary": d.get("summary", ""),
|
||||
"response_preview": d.get("response_preview", ""),
|
||||
})
|
||||
return json.dumps({"delegations": summary, "count": len(delegations)})
|
||||
except Exception as e:
|
||||
return f"Error checking delegations: {e}"
|
||||
|
||||
|
||||
async def _upload_chat_files(
|
||||
client: httpx.AsyncClient,
|
||||
paths: list[str],
|
||||
workspace_id: str | None = None,
|
||||
) -> tuple[list[dict], str | None]:
|
||||
"""Upload local file paths through /workspaces/<self>/chat/uploads.
|
||||
|
||||
The platform stages each upload under /workspace/.molecule/chat-uploads
|
||||
(an "allowed root" the canvas knows how to render via the Download
|
||||
endpoint) and returns metadata the broadcast payload references.
|
||||
|
||||
Why we route through upload instead of just passing the agent's path:
|
||||
the canvas's allowed-root list is /configs, /workspace, /home, /plugins
|
||||
— files at /tmp or /root would be unreachable. Uploading copies the
|
||||
bytes into an allowed root regardless of where the agent wrote them.
|
||||
|
||||
Returns (attachments, error). On any failure the caller should NOT
|
||||
fire the notify — partial-attach would surface a half-rendered chip.
|
||||
"""
|
||||
if not paths:
|
||||
return [], None
|
||||
files_payload: list[tuple[str, tuple[str, bytes, str]]] = []
|
||||
for p in paths:
|
||||
if not isinstance(p, str) or not p:
|
||||
return [], f"Error: invalid attachment path {p!r}"
|
||||
if not os.path.isfile(p):
|
||||
return [], f"Error: attachment not found: {p}"
|
||||
try:
|
||||
with open(p, "rb") as fh:
|
||||
data = fh.read()
|
||||
except OSError as e:
|
||||
return [], f"Error reading {p}: {e}"
|
||||
# Sniff mime from filename so the canvas can pick the right
|
||||
# icon / preview / inline-image renderer. Pre-fix this was
|
||||
# hardcoded application/octet-stream and chat_files.go's
|
||||
# Upload trusts whatever Content-Type the multipart part
|
||||
# carries — `mt := fh.Header.Get("Content-Type")` only falls
|
||||
# back to extension-sniffing when the header is empty. So a
|
||||
# hardcoded octet-stream meant every attachment lost its
|
||||
# real type forever, breaking the canvas chip's icon logic.
|
||||
mime_type, _ = mimetypes.guess_type(p)
|
||||
if not mime_type:
|
||||
mime_type = "application/octet-stream"
|
||||
files_payload.append(("files", (os.path.basename(p), data, mime_type)))
|
||||
target_workspace_id = (workspace_id or "").strip() or WORKSPACE_ID
|
||||
try:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{target_workspace_id}/chat/uploads",
|
||||
files=files_payload,
|
||||
headers=_auth_headers_for_heartbeat(target_workspace_id),
|
||||
)
|
||||
except Exception as e:
|
||||
return [], f"Error uploading attachments: {e}"
|
||||
if resp.status_code != 200:
|
||||
return [], f"Error: chat/uploads returned {resp.status_code}: {resp.text[:200]}"
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception as e:
|
||||
return [], f"Error parsing upload response: {e}"
|
||||
uploaded = body.get("files") or []
|
||||
if not isinstance(uploaded, list) or len(uploaded) != len(paths):
|
||||
return [], f"Error: upload returned {len(uploaded) if isinstance(uploaded, list) else 'invalid'} entries for {len(paths)} files"
|
||||
return uploaded, None
|
||||
|
||||
|
||||
async def tool_send_message_to_user(
|
||||
message: str,
|
||||
attachments: list[str] | None = None,
|
||||
workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Send a message directly to the user's canvas chat via WebSocket.
|
||||
|
||||
Args:
|
||||
message: The text to display in the user's chat. Required even
|
||||
when sending attachments — set to a short caption like
|
||||
"Here's the build output:" or "Done — see attached."
|
||||
attachments: Optional list of absolute file paths inside this
|
||||
container. Each is uploaded to the platform and rendered
|
||||
in the canvas as a clickable download chip. Use this
|
||||
instead of pasting paths in the message text — paths
|
||||
render as plain text and the user can't click them.
|
||||
Examples:
|
||||
attachments=["/tmp/build-output.zip"]
|
||||
attachments=["/workspace/report.pdf", "/workspace/data.csv"]
|
||||
workspace_id: Optional. When the agent is registered in MULTIPLE
|
||||
workspaces (external multi-workspace MCP path), this
|
||||
selects which workspace's chat to deliver the message to —
|
||||
should match the ``arrival_workspace_id`` of the inbound
|
||||
message you're replying to so the user sees the reply in
|
||||
the same canvas they typed in. Single-workspace agents
|
||||
omit this; the message routes to the only registered
|
||||
workspace.
|
||||
"""
|
||||
if not message:
|
||||
return "Error: message is required"
|
||||
target_workspace_id = (workspace_id or "").strip() or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
uploaded, upload_err = await _upload_chat_files(
|
||||
client, attachments or [], workspace_id=target_workspace_id,
|
||||
)
|
||||
if upload_err:
|
||||
return upload_err
|
||||
payload: dict = {"message": message}
|
||||
if uploaded:
|
||||
payload["attachments"] = uploaded
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{target_workspace_id}/notify",
|
||||
json=payload,
|
||||
headers=_auth_headers_for_heartbeat(target_workspace_id),
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
if uploaded:
|
||||
return f"Message sent to user with {len(uploaded)} attachment(s)"
|
||||
return "Message sent to user"
|
||||
return f"Error: platform returned {resp.status_code}"
|
||||
except Exception as e:
|
||||
return f"Error sending message: {e}"
|
||||
|
||||
|
||||
async def tool_list_peers(source_workspace_id: str | None = None) -> str:
|
||||
"""List all workspaces this agent can communicate with.
|
||||
|
||||
Behavior:
|
||||
- ``source_workspace_id`` set → list peers of that one workspace.
|
||||
- Unset, single-workspace mode → list peers of WORKSPACE_ID
|
||||
(the legacy path, unchanged).
|
||||
- Unset, multi-workspace mode (MOLECULE_WORKSPACES populated) →
|
||||
aggregate across every registered workspace, prefixing each
|
||||
peer with its source so the agent / user can see the full peer
|
||||
surface in one call.
|
||||
|
||||
Side-effect: populates ``_peer_to_source`` so subsequent
|
||||
``tool_delegate_task(target)`` auto-routes through the correct
|
||||
sending workspace without the agent needing ``source_workspace_id``.
|
||||
"""
|
||||
sources: list[str]
|
||||
aggregate = False
|
||||
if source_workspace_id:
|
||||
sources = [source_workspace_id]
|
||||
else:
|
||||
registered = list_registered_workspaces()
|
||||
if len(registered) > 1:
|
||||
sources = registered
|
||||
aggregate = True
|
||||
else:
|
||||
sources = [WORKSPACE_ID]
|
||||
|
||||
all_peers: list[tuple[str, dict]] = [] # (source, peer_record)
|
||||
diagnostics: list[tuple[str, str]] = [] # (source, diagnostic)
|
||||
for src in sources:
|
||||
peers, diagnostic = await get_peers_with_diagnostic(source_workspace_id=src)
|
||||
if peers:
|
||||
for p in peers:
|
||||
all_peers.append((src, p))
|
||||
elif diagnostic is not None:
|
||||
diagnostics.append((src, diagnostic))
|
||||
|
||||
if not all_peers:
|
||||
if diagnostics:
|
||||
joined = "; ".join(f"[{src[:8]}] {d}" for src, d in diagnostics)
|
||||
return f"No peers found. {joined}"
|
||||
return (
|
||||
"You have no peers in the platform registry. "
|
||||
"(No parent, no children, no siblings registered.)"
|
||||
)
|
||||
|
||||
lines = []
|
||||
for src, p in all_peers:
|
||||
status = p.get("status", "unknown")
|
||||
role = p.get("role", "")
|
||||
peer_id = p["id"]
|
||||
# Cache name for use in delegate_task
|
||||
_peer_names[peer_id] = p["name"]
|
||||
# Cache the source workspace so tool_delegate_task auto-routes
|
||||
_peer_to_source[peer_id] = src
|
||||
if aggregate:
|
||||
lines.append(
|
||||
f"- {p['name']} (ID: {peer_id}, status: {status}, role: {role}, via: {src[:8]})"
|
||||
)
|
||||
else:
|
||||
lines.append(f"- {p['name']} (ID: {peer_id}, status: {status}, role: {role})")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def tool_get_workspace_info(source_workspace_id: str | None = None) -> str:
|
||||
"""Get this workspace's own info.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace to
|
||||
introspect when the agent is registered into multiple workspaces.
|
||||
Unset → falls back to module-level WORKSPACE_ID.
|
||||
"""
|
||||
info = await get_workspace_info(source_workspace_id=source_workspace_id)
|
||||
return json.dumps(info, indent=2)
|
||||
|
||||
|
||||
async def tool_commit_memory(
|
||||
content: str,
|
||||
scope: str = "LOCAL",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Save important information to persistent memory.
|
||||
|
||||
GLOBAL scope is writable only by root workspaces (tier == 0).
|
||||
RBAC memory.write permission is required for all scope levels.
|
||||
The source workspace_id is embedded in every record so the platform
|
||||
can enforce cross-workspace isolation and audit trail.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace this
|
||||
memory belongs to when the agent is registered into multiple
|
||||
workspaces (PR-1 / multi-workspace mode). When unset, falls back
|
||||
to the module-level WORKSPACE_ID — single-workspace operators see
|
||||
no behaviour change.
|
||||
"""
|
||||
if not content:
|
||||
return "Error: content is required"
|
||||
content = _redact_secrets(content)
|
||||
scope = scope.upper()
|
||||
if scope not in ("LOCAL", "TEAM", "GLOBAL"):
|
||||
scope = "LOCAL"
|
||||
|
||||
# RBAC: require memory.write permission (mirrors builtin_tools/memory.py)
|
||||
if not _check_memory_write_permission():
|
||||
return (
|
||||
"Error: RBAC — this workspace does not have the 'memory.write' "
|
||||
"permission for this operation."
|
||||
)
|
||||
|
||||
# Scope enforcement: only root workspaces (tier 0) can write GLOBAL memory.
|
||||
# This prevents tenant workspaces from poisoning org-wide memory (GH#1610).
|
||||
if scope == "GLOBAL" and not _is_root_workspace():
|
||||
return (
|
||||
"Error: RBAC — only root workspaces (tier 0) can write to GLOBAL scope. "
|
||||
"Non-root workspaces may use LOCAL or TEAM scope."
|
||||
)
|
||||
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/memories",
|
||||
json={
|
||||
"content": content,
|
||||
"scope": scope,
|
||||
# Embed source workspace so the platform can namespace-isolate
|
||||
# and audit cross-workspace writes (GH#1610 fix).
|
||||
"workspace_id": src,
|
||||
},
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
data = resp.json()
|
||||
if resp.status_code in (200, 201):
|
||||
return json.dumps({"success": True, "id": data.get("id"), "scope": scope})
|
||||
return f"Error: {data.get('error', resp.text)}"
|
||||
except Exception as e:
|
||||
return f"Error saving memory: {e}"
|
||||
|
||||
|
||||
async def tool_recall_memory(
|
||||
query: str = "",
|
||||
scope: str = "",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Search persistent memory for previously saved information.
|
||||
|
||||
RBAC memory.read permission is required (mirrors builtin_tools/memory.py).
|
||||
The workspace_id is sent as a query parameter so the platform can
|
||||
cross-validate it against the auth token and defend against any future
|
||||
path traversal / cross-tenant read bugs in the platform itself.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace's memories
|
||||
to search when the agent is registered into multiple workspaces.
|
||||
Unset → defaults to the module-level WORKSPACE_ID.
|
||||
"""
|
||||
# RBAC: require memory.read permission (mirrors builtin_tools/memory.py)
|
||||
if not _check_memory_read_permission():
|
||||
return (
|
||||
"Error: RBAC — this workspace does not have the 'memory.read' "
|
||||
"permission for this operation."
|
||||
)
|
||||
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
params: dict[str, str] = {"workspace_id": src}
|
||||
if query:
|
||||
params["q"] = query
|
||||
if scope:
|
||||
params["scope"] = scope.upper()
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/memories",
|
||||
params=params,
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
data = resp.json()
|
||||
if isinstance(data, list):
|
||||
if not data:
|
||||
return "No memories found."
|
||||
lines = []
|
||||
for m in data:
|
||||
lines.append(f"[{m.get('scope', '?')}] {m.get('content', '')}")
|
||||
return "\n".join(lines)
|
||||
return json.dumps(data)
|
||||
except Exception as e:
|
||||
return f"Error recalling memory: {e}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Inbox tools — inbound delivery for the standalone molecule-mcp path.
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# The InboxState singleton is set by mcp_cli before the MCP server starts
|
||||
# (see workspace/inbox.py for the rationale). In-container runtimes never
|
||||
# call ``inbox.activate(...)``, so ``inbox.get_state()`` returns None and
|
||||
# these tools surface an informational error rather than raising.
|
||||
#
|
||||
# When-to-use guidance (mirrored in platform_tools/registry.py): agents
|
||||
# in standalone-runtime mode should call ``wait_for_message`` to block
|
||||
# on the next inbound message after they've emitted a reply, forming
|
||||
# the loop ``wait → respond → wait``. ``inbox_peek`` is for inspecting
|
||||
# the queue without consuming; ``inbox_pop`` removes a handled message.
|
||||
|
||||
_INBOX_NOT_ENABLED_MSG = (
|
||||
"Error: inbox polling is not enabled in this runtime. The standalone "
|
||||
"molecule-mcp wrapper activates it; in-container runtimes receive "
|
||||
"messages via push delivery and do not need these tools."
|
||||
# Delegation tool handlers — extracted to a2a_tools_delegation
|
||||
# (RFC #2873 iter 4b). Re-imported here so call sites + tests that
|
||||
# reference ``a2a_tools.tool_delegate_task`` /
|
||||
# ``a2a_tools._delegate_sync_via_polling`` keep resolving identically.
|
||||
from a2a_tools_delegation import ( # noqa: E402 (import after the from-a2a_client block)
|
||||
_SYNC_POLL_BUDGET_S,
|
||||
_SYNC_POLL_INTERVAL_S,
|
||||
_delegate_sync_via_polling,
|
||||
tool_check_task_status,
|
||||
tool_delegate_task,
|
||||
tool_delegate_task_async,
|
||||
)
|
||||
|
||||
|
||||
async def tool_chat_history(
|
||||
peer_id: str,
|
||||
limit: int = 20,
|
||||
before_ts: str = "",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Fetch the prior conversation with one peer.
|
||||
|
||||
Hits ``/workspaces/<self>/activity?peer_id=<peer>&limit=<N>``
|
||||
against the workspace-server, which returns activity rows where
|
||||
the peer is either the sender (``source_id=peer`` — they sent us
|
||||
the message) or the recipient (``target_id=peer`` — we sent to
|
||||
them) of an A2A turn — both sides of the conversation in
|
||||
chronological order.
|
||||
|
||||
Args:
|
||||
peer_id: The other workspace's UUID. Same value the agent
|
||||
sees as ``peer_id`` on a peer_agent push or ``workspace_id``
|
||||
on a delegate_task call.
|
||||
limit: Maximum rows to return; capped server-side at 500. The
|
||||
default of 20 covers \"most recent context for this peer\"
|
||||
without flooding the agent's context window.
|
||||
before_ts: Optional RFC3339 timestamp; only rows strictly
|
||||
older are returned. Used to page backward through long
|
||||
histories — pass the oldest ``ts`` from the previous
|
||||
response. Empty (default) returns the most recent ``limit``
|
||||
rows.
|
||||
source_workspace_id: Which registered workspace's activity log
|
||||
to query. Auto-routes via ``_peer_to_source`` cache when
|
||||
unset (the workspace this peer was discovered through);
|
||||
falls back to module-level WORKSPACE_ID for single-workspace
|
||||
operators.
|
||||
|
||||
Returns a JSON-encoded list of activity rows (or an error string
|
||||
starting with ``Error:`` so the agent can branch). Each row carries
|
||||
``activity_type``, ``source_id``, ``target_id``, ``method``,
|
||||
``summary``, ``request_body``, ``response_body``, ``status``,
|
||||
``created_at`` — same shape ``inbox_peek`` and the canvas chat
|
||||
loader already see.
|
||||
"""
|
||||
if not peer_id or not isinstance(peer_id, str):
|
||||
return "Error: peer_id is required"
|
||||
if not isinstance(limit, int) or limit <= 0:
|
||||
limit = 20
|
||||
if limit > 500:
|
||||
limit = 500
|
||||
|
||||
src = source_workspace_id or _peer_to_source.get(peer_id) or WORKSPACE_ID
|
||||
|
||||
params: dict[str, str] = {
|
||||
"peer_id": peer_id,
|
||||
"limit": str(limit),
|
||||
}
|
||||
# Forward verbatim — the server route validates as RFC3339 at the
|
||||
# trust boundary and translates into a `created_at < $X` clause.
|
||||
if before_ts:
|
||||
params["before_ts"] = before_ts
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/activity",
|
||||
params=params,
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return f"Error: chat_history request failed: {exc}"
|
||||
|
||||
if resp.status_code == 400:
|
||||
# Trust-boundary rejection (malformed peer_id, etc.) — surface
|
||||
# the server's reason verbatim so the agent can correct itself.
|
||||
try:
|
||||
err = resp.json().get("error", "bad request")
|
||||
except Exception: # noqa: BLE001
|
||||
err = "bad request"
|
||||
return f"Error: {err}"
|
||||
if resp.status_code >= 400:
|
||||
return f"Error: chat_history returned HTTP {resp.status_code}"
|
||||
|
||||
try:
|
||||
rows = resp.json()
|
||||
except Exception: # noqa: BLE001
|
||||
return "Error: chat_history response was not JSON"
|
||||
if not isinstance(rows, list):
|
||||
return "Error: chat_history response was not a list"
|
||||
|
||||
# Server returns DESC (most recent first); reverse to chronological
|
||||
# so the agent reads the conversation top-down like a chat log.
|
||||
rows.reverse()
|
||||
return json.dumps(rows)
|
||||
# Messaging tool handlers — extracted to a2a_tools_messaging
|
||||
# (RFC #2873 iter 4d). Re-imported here so call sites + tests that
|
||||
# reference ``a2a_tools.tool_send_message_to_user`` /
|
||||
# ``tool_list_peers`` / ``tool_get_workspace_info`` /
|
||||
# ``tool_chat_history`` / ``_upload_chat_files`` keep resolving
|
||||
# identically.
|
||||
from a2a_tools_messaging import ( # noqa: E402 (import after the top-of-module imports)
|
||||
_upload_chat_files,
|
||||
tool_chat_history,
|
||||
tool_get_workspace_info,
|
||||
tool_list_peers,
|
||||
tool_send_message_to_user,
|
||||
)
|
||||
|
||||
|
||||
async def tool_inbox_peek(limit: int = 10) -> str:
|
||||
"""Return up to ``limit`` pending inbound messages without removing them."""
|
||||
import inbox # local import — avoids a circular dep at module load
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
messages = state.peek(limit=limit if isinstance(limit, int) else 10)
|
||||
return json.dumps([m.to_dict() for m in messages])
|
||||
# Memory tool handlers — extracted to a2a_tools_memory (RFC #2873 iter 4c).
|
||||
# Re-imported here so call sites + tests that reference
|
||||
# ``a2a_tools.tool_commit_memory`` / ``tool_recall_memory`` keep
|
||||
# resolving identically.
|
||||
from a2a_tools_memory import ( # noqa: E402 (import after the top-of-module imports)
|
||||
tool_commit_memory,
|
||||
tool_recall_memory,
|
||||
)
|
||||
|
||||
|
||||
async def tool_inbox_pop(activity_id: str) -> str:
|
||||
"""Remove a message from the inbox queue by activity_id."""
|
||||
import inbox
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
if not isinstance(activity_id, str) or not activity_id:
|
||||
return "Error: activity_id is required."
|
||||
removed = state.pop(activity_id)
|
||||
if removed is None:
|
||||
return json.dumps({"removed": False, "activity_id": activity_id})
|
||||
return json.dumps({"removed": True, "activity_id": activity_id})
|
||||
|
||||
|
||||
async def tool_wait_for_message(timeout_secs: float = 60.0) -> str:
|
||||
"""Block until a new message arrives or ``timeout_secs`` elapses.
|
||||
|
||||
Returns the head message non-destructively; the agent decides
|
||||
whether to ``inbox_pop`` it after acting.
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
import inbox
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
|
||||
try:
|
||||
timeout = float(timeout_secs)
|
||||
except (TypeError, ValueError):
|
||||
timeout = 60.0
|
||||
# Cap at 300s — Claude Code's default tool timeout is ~10min, and
|
||||
# blocking longer than 5min wastes the prompt cache window for
|
||||
# nothing useful. Operators who want longer can call repeatedly.
|
||||
timeout = max(0.0, min(timeout, 300.0))
|
||||
|
||||
# The threading.Event-based wait would block the asyncio loop.
|
||||
# Run it on the default executor so the MCP server can keep
|
||||
# processing other JSON-RPC requests while we sleep.
|
||||
loop = asyncio.get_running_loop()
|
||||
message = await loop.run_in_executor(None, state.wait, timeout)
|
||||
if message is None:
|
||||
return json.dumps({"timeout": True, "timeout_secs": timeout})
|
||||
return json.dumps(message.to_dict())
|
||||
# Inbox tool handlers — extracted to a2a_tools_inbox (RFC #2873 iter 4e).
|
||||
# Re-imported here so call sites + tests that reference
|
||||
# ``a2a_tools.tool_inbox_peek`` / ``tool_inbox_pop`` / ``tool_wait_for_message``
|
||||
# / ``_enrich_inbound_for_agent`` / ``_INBOX_NOT_ENABLED_MSG`` keep
|
||||
# resolving identically.
|
||||
from a2a_tools_inbox import ( # noqa: E402 (import after the top-of-module imports)
|
||||
_INBOX_NOT_ENABLED_MSG,
|
||||
_enrich_inbound_for_agent,
|
||||
tool_inbox_peek,
|
||||
tool_inbox_pop,
|
||||
tool_wait_for_message,
|
||||
)
|
||||
|
||||
@@ -0,0 +1,372 @@
|
||||
"""Delegation tool handlers — single-concern slice of the a2a_tools surface.
|
||||
|
||||
Extracted from ``a2a_tools.py`` (RFC #2873 iter 4b). Owns the three
|
||||
delegation MCP tools + the RFC #2829 PR-5 sync-via-polling helper they
|
||||
share.
|
||||
|
||||
Public surface:
|
||||
|
||||
* ``tool_delegate_task`` — synchronous delegation, waits for response.
|
||||
* ``tool_delegate_task_async`` — fire-and-forget delegation; returns
|
||||
``{delegation_id, ...}``.
|
||||
* ``tool_check_task_status`` — poll the platform's ``/delegations`` log.
|
||||
|
||||
Internal:
|
||||
|
||||
* ``_delegate_sync_via_polling`` — durable async + poll for terminal
|
||||
status (RFC #2829 PR-5 cutover path; toggled by
|
||||
``DELEGATION_SYNC_VIA_INBOX=1``).
|
||||
* ``_SYNC_POLL_INTERVAL_S`` / ``_SYNC_POLL_BUDGET_S`` constants.
|
||||
|
||||
Circular-import note: this module calls ``report_activity`` from
|
||||
``a2a_tools`` to emit activity rows around the delegate dispatch.
|
||||
``a2a_tools`` imports the public symbols here at module-load time,
|
||||
so we use a LAZY import for ``report_activity`` inside the function
|
||||
that needs it. Without the lazy hop Python raises an ImportError
|
||||
on first ``a2a_tools`` import.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
|
||||
import httpx
|
||||
|
||||
from a2a_client import (
|
||||
PLATFORM_URL,
|
||||
WORKSPACE_ID,
|
||||
_A2A_ERROR_PREFIX,
|
||||
_peer_names,
|
||||
_peer_to_source,
|
||||
discover_peer,
|
||||
send_a2a_message,
|
||||
)
|
||||
from a2a_tools_rbac import auth_headers_for_heartbeat as _auth_headers_for_heartbeat
|
||||
|
||||
|
||||
# RFC #2829 PR-5 cutover constants. The poll cadence + timeout are
|
||||
# intentionally generous: 3s gives the platform's executeDelegation
|
||||
# goroutine room to dispatch + the callee to respond + the result to
|
||||
# write to activity_logs without thrashing the platform with rapid
|
||||
# polls; the budget matches the legacy DELEGATION_TIMEOUT (300s) so
|
||||
# operators don't see behavior change beyond "no more 600s timeouts".
|
||||
_SYNC_POLL_INTERVAL_S = 3.0
|
||||
_SYNC_POLL_BUDGET_S = float(os.environ.get("DELEGATION_TIMEOUT", "300.0"))
|
||||
|
||||
|
||||
async def _delegate_sync_via_polling(
|
||||
workspace_id: str,
|
||||
task: str,
|
||||
src: str,
|
||||
) -> str:
|
||||
"""RFC #2829 PR-5: durable async delegation + poll for terminal status.
|
||||
|
||||
Sidesteps the platform proxy's blocking `message/send` HTTP path that
|
||||
hits a hard 600s ceiling. Instead:
|
||||
|
||||
1. POST /workspaces/<src>/delegate (async, returns 202 + delegation_id)
|
||||
— platform's executeDelegation goroutine handles A2A dispatch in
|
||||
the background. No client-side timeout dependency on the platform
|
||||
holding a connection open.
|
||||
2. Poll GET /workspaces/<src>/delegations every 3s for a row with
|
||||
matching delegation_id reaching terminal status (completed/failed).
|
||||
3. Return the response_preview text on completed; surface error_detail
|
||||
on failed (with the same _A2A_ERROR_PREFIX wrapping the legacy
|
||||
path uses, so caller error-detection logic is unchanged).
|
||||
|
||||
Both /delegate and /delegations are existing endpoints — this helper
|
||||
just composes them into a polling synchronous facade. The result is
|
||||
available the moment the platform writes the terminal status row;
|
||||
no extra latency vs. the legacy proxy-blocked path on fast cases.
|
||||
"""
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
idem_key = hashlib.sha256(f"{src}:{workspace_id}:{task}".encode()).hexdigest()[:32]
|
||||
|
||||
# 1. Dispatch via /delegate (the async, durable path).
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/delegate",
|
||||
json={
|
||||
"target_id": workspace_id,
|
||||
"task": task,
|
||||
"idempotency_key": idem_key,
|
||||
},
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
return f"{_A2A_ERROR_PREFIX}delegate dispatch failed: {e}"
|
||||
|
||||
if resp.status_code != 202 and resp.status_code != 200:
|
||||
return f"{_A2A_ERROR_PREFIX}delegate dispatch failed: HTTP {resp.status_code} {resp.text[:200]}"
|
||||
|
||||
try:
|
||||
dispatch = resp.json()
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
return f"{_A2A_ERROR_PREFIX}delegate dispatch returned non-JSON: {e}"
|
||||
|
||||
delegation_id = dispatch.get("delegation_id", "")
|
||||
if not delegation_id:
|
||||
return f"{_A2A_ERROR_PREFIX}delegate dispatch missing delegation_id: {dispatch}"
|
||||
|
||||
# 2. Poll for terminal status with a deadline. Each poll is a cheap
|
||||
# /delegations GET — bounded by the platform's existing rate limit.
|
||||
deadline = time.monotonic() + _SYNC_POLL_BUDGET_S
|
||||
last_status = "unknown"
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
poll = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/delegations",
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
# Transient — keep polling. The platform IS holding the
|
||||
# delegation row; we just lost a network request.
|
||||
last_status = f"poll-error: {e}"
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
continue
|
||||
|
||||
if poll.status_code != 200:
|
||||
last_status = f"poll HTTP {poll.status_code}"
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
continue
|
||||
|
||||
try:
|
||||
rows = poll.json()
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
last_status = f"poll non-JSON: {e}"
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
continue
|
||||
|
||||
# /delegations returns a flat list of delegation events. Filter to
|
||||
# our delegation_id; pick the first terminal one. The list may
|
||||
# have multiple rows per delegation_id (one for the original
|
||||
# dispatch, one per status update); we want the latest terminal.
|
||||
if not isinstance(rows, list):
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
continue
|
||||
terminal = None
|
||||
for r in rows:
|
||||
if not isinstance(r, dict):
|
||||
continue
|
||||
if r.get("delegation_id") != delegation_id:
|
||||
continue
|
||||
status = (r.get("status") or "").lower()
|
||||
last_status = status
|
||||
if status in ("completed", "failed"):
|
||||
terminal = r
|
||||
break
|
||||
if terminal:
|
||||
if (terminal.get("status") or "").lower() == "completed":
|
||||
return terminal.get("response_preview") or ""
|
||||
err = (
|
||||
terminal.get("error_detail")
|
||||
or terminal.get("summary")
|
||||
or "delegation failed"
|
||||
)
|
||||
return f"{_A2A_ERROR_PREFIX}{err}"
|
||||
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
|
||||
# Budget exhausted — the platform's row is still in flight (or queued).
|
||||
# Surface as an error so the caller can decide to retry or fall back;
|
||||
# the platform DOES still have the durable row, so the work isn't
|
||||
# lost — it'll complete eventually and a future check_task_status
|
||||
# will surface the result.
|
||||
return (
|
||||
f"{_A2A_ERROR_PREFIX}polling timeout after {_SYNC_POLL_BUDGET_S}s "
|
||||
f"(delegation_id={delegation_id}, last_status={last_status}); "
|
||||
f"the platform is still working on it — call check_task_status('{delegation_id}') to retrieve later"
|
||||
)
|
||||
|
||||
|
||||
async def tool_delegate_task(
|
||||
workspace_id: str,
|
||||
task: str,
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Delegate a task to another workspace via A2A (synchronous — waits for response).
|
||||
|
||||
``source_workspace_id`` selects which registered workspace this
|
||||
delegation originates from — drives auth + the X-Workspace-ID source
|
||||
header so the platform's a2a_proxy logs the correct sender. Single-
|
||||
workspace operators leave it None and routing falls back to the
|
||||
module-level WORKSPACE_ID.
|
||||
"""
|
||||
if not workspace_id or not task:
|
||||
return "Error: workspace_id and task are required"
|
||||
|
||||
# Auto-route: if source not specified, look up which registered
|
||||
# workspace last saw this peer (populated by tool_list_peers). Falls
|
||||
# back to the legacy WORKSPACE_ID for single-workspace operators.
|
||||
src = source_workspace_id or _peer_to_source.get(workspace_id) or None
|
||||
|
||||
# Discover the target. discover_peer is the access-control gate +
|
||||
# name/status lookup. The peer's reported ``url`` field is NOT used
|
||||
# for routing — see send_a2a_message, which constructs the URL via
|
||||
# the platform's A2A proxy.
|
||||
peer = await discover_peer(workspace_id, source_workspace_id=src)
|
||||
if not peer:
|
||||
return f"Error: workspace {workspace_id} not found or not accessible (check access control)"
|
||||
|
||||
if (peer.get("status") or "").lower() == "offline":
|
||||
return f"Error: workspace {workspace_id} is offline"
|
||||
|
||||
# Lazy import: a2a_tools imports this module at top-level, so a
|
||||
# top-level import of report_activity from a2a_tools would create a
|
||||
# circular dependency at first-import time. Lazy resolution inside
|
||||
# the function body breaks the cycle without forcing a ground-up
|
||||
# restructure of the activity-reporting layer.
|
||||
from a2a_tools import report_activity
|
||||
|
||||
# Report delegation start — include the task text for traceability
|
||||
peer_name = peer.get("name") or _peer_names.get(workspace_id) or workspace_id[:8]
|
||||
_peer_names[workspace_id] = peer_name # cache for future use
|
||||
# Brief summary for canvas display — just the delegation target
|
||||
await report_activity("a2a_send", workspace_id, f"Delegating to {peer_name}", task_text=task)
|
||||
|
||||
# RFC #2829 PR-5: agent-side cutover. When DELEGATION_SYNC_VIA_INBOX=1,
|
||||
# use the platform's durable async delegation API (POST /delegate +
|
||||
# poll /delegations) instead of the proxy-blocked message/send path.
|
||||
# This sidesteps the 600s message/send timeout class that broke
|
||||
# iteration-14/90-style long-running delegations on 2026-05-05.
|
||||
#
|
||||
# Default off — staging-canary first, flip default after PR-2's
|
||||
# result-push flag (DELEGATION_RESULT_INBOX_PUSH) has been on for
|
||||
# ≥1 week without incident.
|
||||
if os.environ.get("DELEGATION_SYNC_VIA_INBOX") == "1":
|
||||
result = await _delegate_sync_via_polling(workspace_id, task, src or WORKSPACE_ID)
|
||||
else:
|
||||
# send_a2a_message routes through ${PLATFORM_URL}/workspaces/{id}/a2a
|
||||
# (the platform proxy) so the same code works for in-container and
|
||||
# external (standalone molecule-mcp) callers.
|
||||
result = await send_a2a_message(workspace_id, task, source_workspace_id=src)
|
||||
|
||||
# Detect delegation failures — wrap them clearly so the calling agent
|
||||
# can decide to retry, use another peer, or handle the task itself.
|
||||
is_error = result.startswith(_A2A_ERROR_PREFIX)
|
||||
# Strip the sentinel prefix so error_detail is the human-readable
|
||||
# cause directly. The Activity tab's red error chip surfaces this
|
||||
# without the user having to scroll into the raw response JSON.
|
||||
#
|
||||
# Cap at 4096 chars before sending — the platform's
|
||||
# activity_logs.error_detail column is unbounded TEXT and a
|
||||
# malicious or buggy peer could otherwise stream an arbitrarily
|
||||
# large error message into the caller's activity log. 4096 is
|
||||
# comfortably above any real exception traceback we've seen and
|
||||
# well below an obvious-DoS threshold.
|
||||
error_detail = result[len(_A2A_ERROR_PREFIX):].strip()[:4096] if is_error else ""
|
||||
await report_activity(
|
||||
"a2a_receive", workspace_id,
|
||||
f"{peer_name} responded ({len(result)} chars)" if not is_error else f"{peer_name} failed: {error_detail[:120]}",
|
||||
task_text=task, response_text=result,
|
||||
status="error" if is_error else "ok",
|
||||
error_detail=error_detail,
|
||||
)
|
||||
if is_error:
|
||||
return (
|
||||
f"DELEGATION FAILED to {peer_name}: {result}\n"
|
||||
f"You should either: (1) try a different peer, (2) handle this task yourself, "
|
||||
f"or (3) inform the user that {peer_name} is unavailable and provide your best answer."
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
async def tool_delegate_task_async(
|
||||
workspace_id: str,
|
||||
task: str,
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Delegate a task via the platform's async delegation API (fire-and-forget).
|
||||
|
||||
Uses POST /workspaces/:id/delegate which runs the A2A request in the background.
|
||||
Results are tracked in the platform DB and broadcast via WebSocket.
|
||||
Use check_task_status to poll for results.
|
||||
|
||||
``source_workspace_id`` selects the sending workspace (which one of
|
||||
this agent's registered workspaces gets logged as the originator);
|
||||
auto-routes via the peer→source cache when omitted.
|
||||
"""
|
||||
if not workspace_id or not task:
|
||||
return "Error: workspace_id and task are required"
|
||||
|
||||
src = source_workspace_id or _peer_to_source.get(workspace_id) or WORKSPACE_ID
|
||||
|
||||
# Idempotency key: SHA-256 of (source, target, task) so that a
|
||||
# restarted agent firing the same delegation gets the same key and
|
||||
# the platform returns the existing delegation_id instead of
|
||||
# creating a duplicate. Fixes #1456. Source is in the key so the
|
||||
# SAME task delegated from two different registered workspaces
|
||||
# produces two distinct delegations (the right behavior — one per
|
||||
# tenant audit trail).
|
||||
idem_key = hashlib.sha256(f"{src}:{workspace_id}:{task}".encode()).hexdigest()[:32]
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/delegate",
|
||||
json={"target_id": workspace_id, "task": task, "idempotency_key": idem_key},
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
if resp.status_code == 202:
|
||||
data = resp.json()
|
||||
return json.dumps({
|
||||
"delegation_id": data.get("delegation_id", ""),
|
||||
"workspace_id": workspace_id,
|
||||
"status": "delegated",
|
||||
"note": "Task delegated. The platform runs it in the background. Use check_task_status to poll for results.",
|
||||
})
|
||||
else:
|
||||
return f"Error: delegation failed with status {resp.status_code}: {resp.text[:200]}"
|
||||
except Exception as e:
|
||||
return f"Error: delegation failed — {e}"
|
||||
|
||||
|
||||
async def tool_check_task_status(
|
||||
workspace_id: str,
|
||||
task_id: str,
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Check delegations for this workspace via the platform API.
|
||||
|
||||
Args:
|
||||
workspace_id: Ignored (kept for backward compat). Checks
|
||||
``source_workspace_id``'s delegations (the workspace that
|
||||
FIRED the delegations), not the target's.
|
||||
task_id: Optional delegation_id to filter. If empty, returns all recent delegations.
|
||||
source_workspace_id: Which registered workspace's delegation log
|
||||
to query. Defaults to the module-level WORKSPACE_ID.
|
||||
"""
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/delegations",
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return f"Error: failed to check delegations ({resp.status_code})"
|
||||
delegations = resp.json()
|
||||
if task_id:
|
||||
# Filter by delegation_id
|
||||
matching = [d for d in delegations if d.get("delegation_id") == task_id]
|
||||
if matching:
|
||||
return json.dumps(matching[0])
|
||||
return json.dumps({"status": "not_found", "delegation_id": task_id})
|
||||
# Return all recent delegations
|
||||
summary = []
|
||||
for d in delegations[:10]:
|
||||
summary.append({
|
||||
"delegation_id": d.get("delegation_id", ""),
|
||||
"target_id": d.get("target_id", ""),
|
||||
"status": d.get("status", ""),
|
||||
"summary": d.get("summary", ""),
|
||||
"response_preview": d.get("response_preview", ""),
|
||||
})
|
||||
return json.dumps({"delegations": summary, "count": len(delegations)})
|
||||
except Exception as e:
|
||||
return f"Error checking delegations: {e}"
|
||||
@@ -0,0 +1,140 @@
|
||||
"""Inbox tool handlers — single-concern slice of the a2a_tools surface.
|
||||
|
||||
Standalone-runtime path for inbound-message delivery (push-mode runtimes
|
||||
get messages via the channel-tag synthesis in a2a_mcp_server). The
|
||||
``InboxState`` singleton is set by ``mcp_cli`` before the MCP server
|
||||
starts; in-container runtimes never call ``inbox.activate(...)`` so
|
||||
``inbox.get_state()`` returns None and these tools surface an
|
||||
informational error instead of raising.
|
||||
|
||||
When-to-use guidance for agents (mirrored in
|
||||
``platform_tools/registry.py``):
|
||||
- ``wait_for_message``: block until a new inbound message arrives, then
|
||||
decide what to do with it; forms the loop ``wait → respond → wait``.
|
||||
- ``inbox_peek``: inspect the queue non-destructively.
|
||||
- ``inbox_pop``: remove a handled message by activity_id.
|
||||
|
||||
Extracted from ``a2a_tools.py`` in RFC #2873 iter 4e so the kitchen-sink
|
||||
module shrinks to a back-compat shim. The extraction also makes the
|
||||
``_enrich_inbound_for_agent`` helper unit-testable in isolation —
|
||||
previously it was buried in ``a2a_tools`` and only exercised through
|
||||
the inbox wrappers, leaving its peer-id-empty / cache-miss / registry-
|
||||
unavailable branches under-covered.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
|
||||
# Surfaced when the inbox subsystem is not initialised. Returned by the
|
||||
# three inbox tool wrappers below so the agent gets a clear "this
|
||||
# runtime delivers via push" message instead of a NameError.
|
||||
_INBOX_NOT_ENABLED_MSG = (
|
||||
"Error: inbox polling is not enabled in this runtime. The standalone "
|
||||
"molecule-mcp wrapper activates it; in-container runtimes receive "
|
||||
"messages via push delivery and do not need these tools."
|
||||
)
|
||||
|
||||
|
||||
def _enrich_inbound_for_agent(d: dict) -> dict:
|
||||
"""Add peer_name / peer_role / agent_card_url to a poll-path message.
|
||||
|
||||
The PUSH path (a2a_mcp_server._build_channel_notification) already
|
||||
enriches the meta dict with these fields, so a Claude Code host
|
||||
with channel-push sees them. The POLL path goes through
|
||||
InboxMessage.to_dict, which is intentionally identity-free (the
|
||||
storage layer doesn't know about the registry cache). Without this
|
||||
helper, every non-Claude-Code MCP client that uses inbox_peek /
|
||||
wait_for_message gets a plain message and the receiving agent
|
||||
can't tell who's writing — breaking the contract documented in
|
||||
a2a_mcp_server.py:303-345 ("In both paths the same fields apply").
|
||||
|
||||
Cache-first non-blocking enrichment (same shape as push): on cache
|
||||
miss the helper returns the bare message; the next call within the
|
||||
5-min TTL hits the warm cache. Failure to enrich is non-fatal —
|
||||
the agent still gets text + peer_id + kind + activity_id, just
|
||||
without the friendly identity.
|
||||
"""
|
||||
peer_id = d.get("peer_id") or ""
|
||||
if not peer_id:
|
||||
# canvas_user — no peer to enrich; helper returns the plain
|
||||
# message unchanged so the canvas reply path still works.
|
||||
return d
|
||||
try:
|
||||
from a2a_client import ( # local import — avoid module-load cycle
|
||||
_agent_card_url_for,
|
||||
enrich_peer_metadata_nonblocking,
|
||||
)
|
||||
except Exception: # noqa: BLE001
|
||||
# If a2a_client is unavailable (test harness, partial install),
|
||||
# degrade gracefully — agent still gets the bare envelope.
|
||||
return d
|
||||
record = enrich_peer_metadata_nonblocking(peer_id)
|
||||
if record is not None:
|
||||
if name := record.get("name"):
|
||||
d["peer_name"] = name
|
||||
if role := record.get("role"):
|
||||
d["peer_role"] = role
|
||||
# agent_card_url is constructable from peer_id alone — surface it
|
||||
# even when registry enrichment misses, so the receiving agent has
|
||||
# a single endpoint to hit for the peer's full capability list.
|
||||
d["agent_card_url"] = _agent_card_url_for(peer_id)
|
||||
return d
|
||||
|
||||
|
||||
async def tool_inbox_peek(limit: int = 10) -> str:
|
||||
"""Return up to ``limit`` pending inbound messages without removing them."""
|
||||
import inbox # local import — avoids a circular dep at module load
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
messages = state.peek(limit=limit if isinstance(limit, int) else 10)
|
||||
return json.dumps([_enrich_inbound_for_agent(m.to_dict()) for m in messages])
|
||||
|
||||
|
||||
async def tool_inbox_pop(activity_id: str) -> str:
|
||||
"""Remove a message from the inbox queue by activity_id."""
|
||||
import inbox
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
if not isinstance(activity_id, str) or not activity_id:
|
||||
return "Error: activity_id is required."
|
||||
removed = state.pop(activity_id)
|
||||
if removed is None:
|
||||
return json.dumps({"removed": False, "activity_id": activity_id})
|
||||
return json.dumps({"removed": True, "activity_id": activity_id})
|
||||
|
||||
|
||||
async def tool_wait_for_message(timeout_secs: float = 60.0) -> str:
|
||||
"""Block until a new message arrives or ``timeout_secs`` elapses.
|
||||
|
||||
Returns the head message non-destructively; the agent decides
|
||||
whether to ``inbox_pop`` it after acting.
|
||||
"""
|
||||
import inbox
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
|
||||
try:
|
||||
timeout = float(timeout_secs)
|
||||
except (TypeError, ValueError):
|
||||
timeout = 60.0
|
||||
# Cap at 300s — Claude Code's default tool timeout is ~10min, and
|
||||
# blocking longer than 5min wastes the prompt cache window for
|
||||
# nothing useful. Operators who want longer can call repeatedly.
|
||||
timeout = max(0.0, min(timeout, 300.0))
|
||||
|
||||
# The threading.Event-based wait would block the asyncio loop.
|
||||
# Run it on the default executor so the MCP server can keep
|
||||
# processing other JSON-RPC requests while we sleep.
|
||||
loop = asyncio.get_running_loop()
|
||||
message = await loop.run_in_executor(None, state.wait, timeout)
|
||||
if message is None:
|
||||
return json.dumps({"timeout": True, "timeout_secs": timeout})
|
||||
return json.dumps(_enrich_inbound_for_agent(message.to_dict()))
|
||||
@@ -0,0 +1,141 @@
|
||||
"""Memory tool handlers — single-concern slice of the a2a_tools surface.
|
||||
|
||||
Extracted from ``a2a_tools.py`` (RFC #2873 iter 4c). Owns the two
|
||||
agent-memory MCP tools:
|
||||
|
||||
* ``tool_commit_memory`` — write to the workspace's persistent memory.
|
||||
* ``tool_recall_memory`` — search the workspace's persistent memory.
|
||||
|
||||
Both go through the platform's ``/workspaces/:id/memories`` endpoint;
|
||||
the platform is the source of truth for namespace isolation + audit
|
||||
trail. Local responsibility here is RBAC enforcement BEFORE hitting
|
||||
the network so a denied operation surfaces a clear in-band error
|
||||
instead of an opaque platform 403.
|
||||
|
||||
Imports the RBAC primitives from ``a2a_tools_rbac`` (iter 4a).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import httpx
|
||||
|
||||
from a2a_client import PLATFORM_URL, WORKSPACE_ID
|
||||
from a2a_tools_rbac import (
|
||||
auth_headers_for_heartbeat as _auth_headers_for_heartbeat,
|
||||
check_memory_read_permission as _check_memory_read_permission,
|
||||
check_memory_write_permission as _check_memory_write_permission,
|
||||
is_root_workspace as _is_root_workspace,
|
||||
)
|
||||
from builtin_tools.security import _redact_secrets
|
||||
|
||||
|
||||
async def tool_commit_memory(
|
||||
content: str,
|
||||
scope: str = "LOCAL",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Save important information to persistent memory.
|
||||
|
||||
GLOBAL scope is writable only by root workspaces (tier == 0).
|
||||
RBAC memory.write permission is required for all scope levels.
|
||||
The source workspace_id is embedded in every record so the platform
|
||||
can enforce cross-workspace isolation and audit trail.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace this
|
||||
memory belongs to when the agent is registered into multiple
|
||||
workspaces (PR-1 / multi-workspace mode). When unset, falls back
|
||||
to the module-level WORKSPACE_ID — single-workspace operators see
|
||||
no behaviour change.
|
||||
"""
|
||||
if not content:
|
||||
return "Error: content is required"
|
||||
content = _redact_secrets(content)
|
||||
scope = scope.upper()
|
||||
if scope not in ("LOCAL", "TEAM", "GLOBAL"):
|
||||
scope = "LOCAL"
|
||||
|
||||
# RBAC: require memory.write permission (mirrors builtin_tools/memory.py)
|
||||
if not _check_memory_write_permission():
|
||||
return (
|
||||
"Error: RBAC — this workspace does not have the 'memory.write' "
|
||||
"permission for this operation."
|
||||
)
|
||||
|
||||
# Scope enforcement: only root workspaces (tier 0) can write GLOBAL memory.
|
||||
# This prevents tenant workspaces from poisoning org-wide memory (GH#1610).
|
||||
if scope == "GLOBAL" and not _is_root_workspace():
|
||||
return (
|
||||
"Error: RBAC — only root workspaces (tier 0) can write to GLOBAL scope. "
|
||||
"Non-root workspaces may use LOCAL or TEAM scope."
|
||||
)
|
||||
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/memories",
|
||||
json={
|
||||
"content": content,
|
||||
"scope": scope,
|
||||
# Embed source workspace so the platform can namespace-isolate
|
||||
# and audit cross-workspace writes (GH#1610 fix).
|
||||
"workspace_id": src,
|
||||
},
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
data = resp.json()
|
||||
if resp.status_code in (200, 201):
|
||||
return json.dumps({"success": True, "id": data.get("id"), "scope": scope})
|
||||
return f"Error: {data.get('error', resp.text)}"
|
||||
except Exception as e:
|
||||
return f"Error saving memory: {e}"
|
||||
|
||||
|
||||
async def tool_recall_memory(
|
||||
query: str = "",
|
||||
scope: str = "",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Search persistent memory for previously saved information.
|
||||
|
||||
RBAC memory.read permission is required (mirrors builtin_tools/memory.py).
|
||||
The workspace_id is sent as a query parameter so the platform can
|
||||
cross-validate it against the auth token and defend against any future
|
||||
path traversal / cross-tenant read bugs in the platform itself.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace's memories
|
||||
to search when the agent is registered into multiple workspaces.
|
||||
Unset → defaults to the module-level WORKSPACE_ID.
|
||||
"""
|
||||
# RBAC: require memory.read permission (mirrors builtin_tools/memory.py)
|
||||
if not _check_memory_read_permission():
|
||||
return (
|
||||
"Error: RBAC — this workspace does not have the 'memory.read' "
|
||||
"permission for this operation."
|
||||
)
|
||||
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
params: dict[str, str] = {"workspace_id": src}
|
||||
if query:
|
||||
params["q"] = query
|
||||
if scope:
|
||||
params["scope"] = scope.upper()
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/memories",
|
||||
params=params,
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
data = resp.json()
|
||||
if isinstance(data, list):
|
||||
if not data:
|
||||
return "No memories found."
|
||||
lines = []
|
||||
for m in data:
|
||||
lines.append(f"[{m.get('scope', '?')}] {m.get('content', '')}")
|
||||
return "\n".join(lines)
|
||||
return json.dumps(data)
|
||||
except Exception as e:
|
||||
return f"Error recalling memory: {e}"
|
||||
@@ -0,0 +1,324 @@
|
||||
"""Messaging tool handlers — single-concern slice of the a2a_tools surface.
|
||||
|
||||
Extracted from ``a2a_tools.py`` (RFC #2873 iter 4d). Owns the four
|
||||
human-and-peer messaging MCP tools + the chat-upload helper they share:
|
||||
|
||||
* ``tool_send_message_to_user`` — push a canvas-chat message via the
|
||||
platform's ``/notify`` endpoint.
|
||||
* ``tool_list_peers`` — discover peers across one or many registered
|
||||
workspaces, with side-effect of populating ``_peer_to_source`` for
|
||||
delegate-task auto-routing.
|
||||
* ``tool_get_workspace_info`` — JSON-encode the workspace's own info.
|
||||
* ``tool_chat_history`` — fetch prior conversation rows with a peer.
|
||||
* ``_upload_chat_files`` — internal helper for the message-attachments
|
||||
code path; routes local file paths through the platform's
|
||||
``/chat/uploads`` so the canvas can render them as download chips.
|
||||
|
||||
Imports the auth-header primitive from ``a2a_tools_rbac`` (iter 4a).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import mimetypes
|
||||
import os
|
||||
|
||||
import httpx
|
||||
|
||||
from a2a_client import (
|
||||
PLATFORM_URL,
|
||||
WORKSPACE_ID,
|
||||
_peer_names,
|
||||
_peer_to_source,
|
||||
get_peers_with_diagnostic,
|
||||
get_workspace_info,
|
||||
)
|
||||
from a2a_tools_rbac import auth_headers_for_heartbeat as _auth_headers_for_heartbeat
|
||||
from platform_auth import list_registered_workspaces
|
||||
|
||||
|
||||
async def _upload_chat_files(
|
||||
client: httpx.AsyncClient,
|
||||
paths: list[str],
|
||||
workspace_id: str | None = None,
|
||||
) -> tuple[list[dict], str | None]:
|
||||
"""Upload local file paths through /workspaces/<self>/chat/uploads.
|
||||
|
||||
The platform stages each upload under /workspace/.molecule/chat-uploads
|
||||
(an "allowed root" the canvas knows how to render via the Download
|
||||
endpoint) and returns metadata the broadcast payload references.
|
||||
|
||||
Why we route through upload instead of just passing the agent's path:
|
||||
the canvas's allowed-root list is /configs, /workspace, /home, /plugins
|
||||
— files at /tmp or /root would be unreachable. Uploading copies the
|
||||
bytes into an allowed root regardless of where the agent wrote them.
|
||||
|
||||
Returns (attachments, error). On any failure the caller should NOT
|
||||
fire the notify — partial-attach would surface a half-rendered chip.
|
||||
"""
|
||||
if not paths:
|
||||
return [], None
|
||||
files_payload: list[tuple[str, tuple[str, bytes, str]]] = []
|
||||
for p in paths:
|
||||
if not isinstance(p, str) or not p:
|
||||
return [], f"Error: invalid attachment path {p!r}"
|
||||
if not os.path.isfile(p):
|
||||
return [], f"Error: attachment not found: {p}"
|
||||
try:
|
||||
with open(p, "rb") as fh:
|
||||
data = fh.read()
|
||||
except OSError as e:
|
||||
return [], f"Error reading {p}: {e}"
|
||||
# Sniff mime from filename so the canvas can pick the right
|
||||
# icon / preview / inline-image renderer. Pre-fix this was
|
||||
# hardcoded application/octet-stream and chat_files.go's
|
||||
# Upload trusts whatever Content-Type the multipart part
|
||||
# carries — `mt := fh.Header.Get("Content-Type")` only falls
|
||||
# back to extension-sniffing when the header is empty. So a
|
||||
# hardcoded octet-stream meant every attachment lost its
|
||||
# real type forever, breaking the canvas chip's icon logic.
|
||||
mime_type, _ = mimetypes.guess_type(p)
|
||||
if not mime_type:
|
||||
mime_type = "application/octet-stream"
|
||||
files_payload.append(("files", (os.path.basename(p), data, mime_type)))
|
||||
target_workspace_id = (workspace_id or "").strip() or WORKSPACE_ID
|
||||
try:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{target_workspace_id}/chat/uploads",
|
||||
files=files_payload,
|
||||
headers=_auth_headers_for_heartbeat(target_workspace_id),
|
||||
)
|
||||
except Exception as e:
|
||||
return [], f"Error uploading attachments: {e}"
|
||||
if resp.status_code != 200:
|
||||
return [], f"Error: chat/uploads returned {resp.status_code}: {resp.text[:200]}"
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception as e:
|
||||
return [], f"Error parsing upload response: {e}"
|
||||
uploaded = body.get("files") or []
|
||||
if not isinstance(uploaded, list) or len(uploaded) != len(paths):
|
||||
return [], f"Error: upload returned {len(uploaded) if isinstance(uploaded, list) else 'invalid'} entries for {len(paths)} files"
|
||||
return uploaded, None
|
||||
|
||||
|
||||
async def tool_send_message_to_user(
|
||||
message: str,
|
||||
attachments: list[str] | None = None,
|
||||
workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Send a message directly to the user's canvas chat via WebSocket.
|
||||
|
||||
Args:
|
||||
message: The text to display in the user's chat. Required even
|
||||
when sending attachments — set to a short caption like
|
||||
"Here's the build output:" or "Done — see attached."
|
||||
attachments: Optional list of absolute file paths inside this
|
||||
container. Each is uploaded to the platform and rendered
|
||||
in the canvas as a clickable download chip. Use this
|
||||
instead of pasting paths in the message text — paths
|
||||
render as plain text and the user can't click them.
|
||||
Examples:
|
||||
attachments=["/tmp/build-output.zip"]
|
||||
attachments=["/workspace/report.pdf", "/workspace/data.csv"]
|
||||
workspace_id: Optional. When the agent is registered in MULTIPLE
|
||||
workspaces (external multi-workspace MCP path), this
|
||||
selects which workspace's chat to deliver the message to —
|
||||
should match the ``arrival_workspace_id`` of the inbound
|
||||
message you're replying to so the user sees the reply in
|
||||
the same canvas they typed in. Single-workspace agents
|
||||
omit this; the message routes to the only registered
|
||||
workspace.
|
||||
"""
|
||||
if not message:
|
||||
return "Error: message is required"
|
||||
target_workspace_id = (workspace_id or "").strip() or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
uploaded, upload_err = await _upload_chat_files(
|
||||
client, attachments or [], workspace_id=target_workspace_id,
|
||||
)
|
||||
if upload_err:
|
||||
return upload_err
|
||||
payload: dict = {"message": message}
|
||||
if uploaded:
|
||||
payload["attachments"] = uploaded
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{target_workspace_id}/notify",
|
||||
json=payload,
|
||||
headers=_auth_headers_for_heartbeat(target_workspace_id),
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
if uploaded:
|
||||
return f"Message sent to user with {len(uploaded)} attachment(s)"
|
||||
return "Message sent to user"
|
||||
return f"Error: platform returned {resp.status_code}"
|
||||
except Exception as e:
|
||||
return f"Error sending message: {e}"
|
||||
|
||||
|
||||
async def tool_list_peers(source_workspace_id: str | None = None) -> str:
|
||||
"""List all workspaces this agent can communicate with.
|
||||
|
||||
Behavior:
|
||||
- ``source_workspace_id`` set → list peers of that one workspace.
|
||||
- Unset, single-workspace mode → list peers of WORKSPACE_ID
|
||||
(the legacy path, unchanged).
|
||||
- Unset, multi-workspace mode (MOLECULE_WORKSPACES populated) →
|
||||
aggregate across every registered workspace, prefixing each
|
||||
peer with its source so the agent / user can see the full peer
|
||||
surface in one call.
|
||||
|
||||
Side-effect: populates ``_peer_to_source`` so subsequent
|
||||
``tool_delegate_task(target)`` auto-routes through the correct
|
||||
sending workspace without the agent needing ``source_workspace_id``.
|
||||
"""
|
||||
sources: list[str]
|
||||
aggregate = False
|
||||
if source_workspace_id:
|
||||
sources = [source_workspace_id]
|
||||
else:
|
||||
registered = list_registered_workspaces()
|
||||
if len(registered) > 1:
|
||||
sources = registered
|
||||
aggregate = True
|
||||
else:
|
||||
sources = [WORKSPACE_ID]
|
||||
|
||||
all_peers: list[tuple[str, dict]] = [] # (source, peer_record)
|
||||
diagnostics: list[tuple[str, str]] = [] # (source, diagnostic)
|
||||
for src in sources:
|
||||
peers, diagnostic = await get_peers_with_diagnostic(source_workspace_id=src)
|
||||
if peers:
|
||||
for p in peers:
|
||||
all_peers.append((src, p))
|
||||
elif diagnostic is not None:
|
||||
diagnostics.append((src, diagnostic))
|
||||
|
||||
if not all_peers:
|
||||
if diagnostics:
|
||||
joined = "; ".join(f"[{src[:8]}] {d}" for src, d in diagnostics)
|
||||
return f"No peers found. {joined}"
|
||||
return (
|
||||
"You have no peers in the platform registry. "
|
||||
"(No parent, no children, no siblings registered.)"
|
||||
)
|
||||
|
||||
lines = []
|
||||
for src, p in all_peers:
|
||||
status = p.get("status", "unknown")
|
||||
role = p.get("role", "")
|
||||
peer_id = p["id"]
|
||||
# Cache name for use in delegate_task
|
||||
_peer_names[peer_id] = p["name"]
|
||||
# Cache the source workspace so tool_delegate_task auto-routes
|
||||
_peer_to_source[peer_id] = src
|
||||
if aggregate:
|
||||
lines.append(
|
||||
f"- {p['name']} (ID: {peer_id}, status: {status}, role: {role}, via: {src[:8]})"
|
||||
)
|
||||
else:
|
||||
lines.append(f"- {p['name']} (ID: {peer_id}, status: {status}, role: {role})")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def tool_get_workspace_info(source_workspace_id: str | None = None) -> str:
|
||||
"""Get this workspace's own info.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace to
|
||||
introspect when the agent is registered into multiple workspaces.
|
||||
Unset → falls back to module-level WORKSPACE_ID.
|
||||
"""
|
||||
info = await get_workspace_info(source_workspace_id=source_workspace_id)
|
||||
return json.dumps(info, indent=2)
|
||||
|
||||
|
||||
async def tool_chat_history(
|
||||
peer_id: str,
|
||||
limit: int = 20,
|
||||
before_ts: str = "",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Fetch the prior conversation with one peer.
|
||||
|
||||
Hits ``/workspaces/<self>/activity?peer_id=<peer>&limit=<N>``
|
||||
against the workspace-server, which returns activity rows where
|
||||
the peer is either the sender (``source_id=peer`` — they sent us
|
||||
the message) or the recipient (``target_id=peer`` — we sent to
|
||||
them) of an A2A turn — both sides of the conversation in
|
||||
chronological order.
|
||||
|
||||
Args:
|
||||
peer_id: The other workspace's UUID. Same value the agent
|
||||
sees as ``peer_id`` on a peer_agent push or ``workspace_id``
|
||||
on a delegate_task call.
|
||||
limit: Maximum rows to return; capped server-side at 500. The
|
||||
default of 20 covers "most recent context for this peer"
|
||||
without flooding the agent's context window.
|
||||
before_ts: Optional RFC3339 timestamp; only rows strictly
|
||||
older are returned. Used to page backward through long
|
||||
histories — pass the oldest ``ts`` from the previous
|
||||
response. Empty (default) returns the most recent ``limit``
|
||||
rows.
|
||||
source_workspace_id: Which registered workspace's activity log
|
||||
to query. Auto-routes via ``_peer_to_source`` cache when
|
||||
unset (the workspace this peer was discovered through);
|
||||
falls back to module-level WORKSPACE_ID for single-workspace
|
||||
operators.
|
||||
|
||||
Returns a JSON-encoded list of activity rows (or an error string
|
||||
starting with ``Error:`` so the agent can branch). Each row carries
|
||||
``activity_type``, ``source_id``, ``target_id``, ``method``,
|
||||
``summary``, ``request_body``, ``response_body``, ``status``,
|
||||
``created_at`` — same shape ``inbox_peek`` and the canvas chat
|
||||
loader already see.
|
||||
"""
|
||||
if not peer_id or not isinstance(peer_id, str):
|
||||
return "Error: peer_id is required"
|
||||
if not isinstance(limit, int) or limit <= 0:
|
||||
limit = 20
|
||||
if limit > 500:
|
||||
limit = 500
|
||||
|
||||
src = source_workspace_id or _peer_to_source.get(peer_id) or WORKSPACE_ID
|
||||
|
||||
params: dict[str, str] = {
|
||||
"peer_id": peer_id,
|
||||
"limit": str(limit),
|
||||
}
|
||||
# Forward verbatim — the server route validates as RFC3339 at the
|
||||
# trust boundary and translates into a `created_at < $X` clause.
|
||||
if before_ts:
|
||||
params["before_ts"] = before_ts
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/activity",
|
||||
params=params,
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return f"Error: chat_history request failed: {exc}"
|
||||
|
||||
if resp.status_code == 400:
|
||||
# Trust-boundary rejection (malformed peer_id, etc.) — surface
|
||||
# the server's reason verbatim so the agent can correct itself.
|
||||
try:
|
||||
err = resp.json().get("error", "bad request")
|
||||
except Exception: # noqa: BLE001
|
||||
err = "bad request"
|
||||
return f"Error: {err}"
|
||||
if resp.status_code >= 400:
|
||||
return f"Error: chat_history returned HTTP {resp.status_code}"
|
||||
|
||||
try:
|
||||
rows = resp.json()
|
||||
except Exception: # noqa: BLE001
|
||||
return "Error: chat_history response was not JSON"
|
||||
if not isinstance(rows, list):
|
||||
return "Error: chat_history response was not a list"
|
||||
|
||||
# Server returns DESC (most recent first); reverse to chronological
|
||||
# so the agent reads the conversation top-down like a chat log.
|
||||
rows.reverse()
|
||||
return json.dumps(rows)
|
||||
@@ -0,0 +1,138 @@
|
||||
"""RBAC + auth-header helpers shared by all a2a_tools tool handlers.
|
||||
|
||||
Extracted from ``a2a_tools.py`` (RFC #2873 iter 4a). Centralises the
|
||||
"what can this workspace do" + "how do I prove it on a platform call"
|
||||
concerns into a single module so:
|
||||
|
||||
* Future tools added under ``a2a_tools/`` see one obvious helper to
|
||||
call instead of re-implementing the role/tier check.
|
||||
* The role-permission table is in ONE place — adding a new role
|
||||
or capability touches one file, not every tool that gates on it.
|
||||
* Tests targeting these helpers don't have to import the whole
|
||||
991-LOC ``a2a_tools`` surface.
|
||||
|
||||
Public surface:
|
||||
|
||||
* ``ROLE_PERMISSIONS`` — canonical role → action set table.
|
||||
* ``get_workspace_tier()`` — config-resolved tier (0 = root).
|
||||
* ``check_memory_write_permission()`` — boolean.
|
||||
* ``check_memory_read_permission()`` — boolean.
|
||||
* ``is_root_workspace()`` — boolean (tier == 0).
|
||||
* ``auth_headers_for_heartbeat(workspace_id=None)`` — auth-header dict
|
||||
with the multi-workspace registry lookup; tolerates ``platform_auth``
|
||||
missing on older installs (returns ``{}``).
|
||||
|
||||
Underscore-prefixed back-compat aliases (``_ROLE_PERMISSIONS``,
|
||||
``_check_memory_write_permission``, etc.) match the names previously
|
||||
exposed in ``a2a_tools`` so existing tests'
|
||||
``patch("a2a_tools._foo", ...)`` continue to work via the re-exports
|
||||
in ``a2a_tools.py``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
|
||||
# Mirror ``builtin_tools/audit.py`` for a2a_tools isolation. Listed as a
|
||||
# module-level constant rather than computed lazily so the table is
|
||||
# discoverable in static analysis + ``grep``.
|
||||
ROLE_PERMISSIONS: dict[str, set[str]] = {
|
||||
"admin": {"delegate", "approve", "memory.read", "memory.write"},
|
||||
"operator": {"delegate", "approve", "memory.read", "memory.write"},
|
||||
"read-only": {"memory.read"},
|
||||
"no-delegation": {"approve", "memory.read", "memory.write"},
|
||||
"no-approval": {"delegate", "memory.read", "memory.write"},
|
||||
"memory-readonly": {"memory.read"},
|
||||
}
|
||||
|
||||
|
||||
def get_workspace_tier() -> int:
|
||||
"""Return the workspace tier from config (0 = root, 1+ = tenant)."""
|
||||
try:
|
||||
from config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
return getattr(cfg, "tier", 1)
|
||||
except Exception:
|
||||
return int(os.environ.get("WORKSPACE_TIER", 1))
|
||||
|
||||
|
||||
def _resolve_role_state() -> tuple[list[str], dict]:
|
||||
"""Return (roles, allowed_actions) from config.
|
||||
|
||||
Fail-closed: if config is unavailable, fall back to an "operator"
|
||||
default with no per-role overrides. Operator has memory.read +
|
||||
memory.write but not the elevated approve/delegate over GLOBAL
|
||||
scope, so a config outage doesn't grant unexpected privileges.
|
||||
"""
|
||||
try:
|
||||
from config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
roles = list(getattr(cfg, "rbac", None).roles or ["operator"])
|
||||
allowed = dict(getattr(cfg, "rbac", None).allowed_actions or {})
|
||||
return roles, allowed
|
||||
except Exception:
|
||||
return ["operator"], {}
|
||||
|
||||
|
||||
def check_memory_write_permission() -> bool:
|
||||
"""Return True if this workspace's RBAC roles grant memory.write."""
|
||||
roles, allowed = _resolve_role_state()
|
||||
for role in roles:
|
||||
if role == "admin":
|
||||
return True
|
||||
if role in allowed:
|
||||
if "memory.write" in allowed[role]:
|
||||
return True
|
||||
elif role in ROLE_PERMISSIONS and "memory.write" in ROLE_PERMISSIONS[role]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def check_memory_read_permission() -> bool:
|
||||
"""Return True if this workspace's RBAC roles grant memory.read."""
|
||||
roles, allowed = _resolve_role_state()
|
||||
for role in roles:
|
||||
if role == "admin":
|
||||
return True
|
||||
if role in allowed:
|
||||
if "memory.read" in allowed[role]:
|
||||
return True
|
||||
elif role in ROLE_PERMISSIONS and "memory.read" in ROLE_PERMISSIONS[role]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_root_workspace() -> bool:
|
||||
"""Return True if this workspace is tier 0 (root/root-org)."""
|
||||
return get_workspace_tier() == 0
|
||||
|
||||
|
||||
def auth_headers_for_heartbeat(workspace_id: str | None = None) -> dict[str, str]:
|
||||
"""Return Phase 30.1 auth headers; tolerate platform_auth being absent
|
||||
in older installs (e.g. during rolling upgrade).
|
||||
|
||||
``workspace_id`` selects the per-workspace token from the multi-
|
||||
workspace registry when set (PR-1: external agent registered in
|
||||
multiple workspaces). With no arg the legacy single-token path is
|
||||
unchanged.
|
||||
"""
|
||||
try:
|
||||
from platform_auth import auth_headers
|
||||
return auth_headers(workspace_id) if workspace_id else auth_headers()
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
# ============== Back-compat aliases for the previous a2a_tools names ==============
|
||||
# Tests + downstream call sites refer to the pre-extract names; aliasing
|
||||
# keeps both forms valid. The new public names (no underscore prefix)
|
||||
# are preferred for new code.
|
||||
|
||||
_ROLE_PERMISSIONS = ROLE_PERMISSIONS
|
||||
_get_workspace_tier = get_workspace_tier
|
||||
_check_memory_write_permission = check_memory_write_permission
|
||||
_check_memory_read_permission = check_memory_read_permission
|
||||
_is_root_workspace = is_root_workspace
|
||||
_auth_headers_for_heartbeat = auth_headers_for_heartbeat
|
||||
+44
-8
@@ -553,10 +553,26 @@ def _poll_once(
|
||||
# Imported lazily at use-site so a runtime that never sees an
|
||||
# upload-receive row never imports the module. Cheap on the hot
|
||||
# path because Python caches the import.
|
||||
from inbox_uploads import is_chat_upload_row, fetch_and_stage
|
||||
from inbox_uploads import is_chat_upload_row, BatchFetcher
|
||||
|
||||
new_count = 0
|
||||
last_id: str | None = None
|
||||
# ``batch_fetcher`` is lazy: a poll batch with no upload rows pays
|
||||
# zero overhead. Once the first upload row appears we open one
|
||||
# BatchFetcher and submit every subsequent upload row to its thread
|
||||
# pool; before processing the FIRST non-upload row we drain the
|
||||
# pool (wait_all) so the URI cache is hot when message rewriting
|
||||
# runs. Without the barrier, the chat message that references the
|
||||
# upload would arrive at the agent with the un-rewritten
|
||||
# platform-pending: URI.
|
||||
batch_fetcher: BatchFetcher | None = None
|
||||
|
||||
def _drain_uploads(bf: BatchFetcher | None) -> None:
|
||||
if bf is None:
|
||||
return
|
||||
bf.wait_all()
|
||||
bf.close()
|
||||
|
||||
for row in rows:
|
||||
if not isinstance(row, dict):
|
||||
continue
|
||||
@@ -570,14 +586,21 @@ def _poll_once(
|
||||
# message_from_activity. We DO advance the cursor past
|
||||
# this row so a permanent network outage on /content
|
||||
# doesn't stall the cursor and block real chat traffic.
|
||||
fetch_and_stage(
|
||||
row,
|
||||
platform_url=platform_url,
|
||||
workspace_id=workspace_id,
|
||||
headers=headers,
|
||||
)
|
||||
if batch_fetcher is None:
|
||||
batch_fetcher = BatchFetcher(
|
||||
platform_url=platform_url,
|
||||
workspace_id=workspace_id,
|
||||
headers=headers,
|
||||
)
|
||||
batch_fetcher.submit(row)
|
||||
last_id = str(row.get("id", "")) or last_id
|
||||
continue
|
||||
# Non-upload row: drain any pending uploads first so the URI
|
||||
# cache is populated before we run rewrite_request_body /
|
||||
# message_from_activity on a row that may reference one.
|
||||
if batch_fetcher is not None:
|
||||
_drain_uploads(batch_fetcher)
|
||||
batch_fetcher = None
|
||||
if _is_self_notify_row(row):
|
||||
# The workspace-server's `/notify` handler writes the agent's
|
||||
# own send_message_to_user POSTs to activity_logs with
|
||||
@@ -612,6 +635,13 @@ def _poll_once(
|
||||
last_id = message.activity_id
|
||||
new_count += 1
|
||||
|
||||
# Drain any uploads still in flight if the batch ended with upload
|
||||
# rows (no chat-message row to trigger the inline drain). Without
|
||||
# this, a future poll that picks up the chat-message row first
|
||||
# would race with the still-running fetches.
|
||||
if batch_fetcher is not None:
|
||||
_drain_uploads(batch_fetcher)
|
||||
|
||||
if last_id is not None:
|
||||
state.save_cursor(last_id, cursor_key)
|
||||
return new_count
|
||||
@@ -654,6 +684,7 @@ def start_poller_thread(
|
||||
platform_url: str,
|
||||
workspace_id: str,
|
||||
interval: float = POLL_INTERVAL_SECONDS,
|
||||
stop_event: threading.Event | None = None,
|
||||
) -> threading.Thread:
|
||||
"""Spawn the poller as a daemon thread. Returns the Thread handle.
|
||||
|
||||
@@ -665,13 +696,18 @@ def start_poller_thread(
|
||||
operator running ``ps -eL`` or eyeballing ``threading.enumerate()``
|
||||
can tell which thread is which without reverse-engineering it from
|
||||
crash tracebacks.
|
||||
|
||||
Pass ``stop_event`` to enable graceful shutdown — used by tests so
|
||||
the daemon thread doesn't outlive the test that started it and race
|
||||
with later tests' httpx patches. Production code passes None and
|
||||
relies on the daemon flag for process-exit cleanup.
|
||||
"""
|
||||
name = "molecule-mcp-inbox-poller"
|
||||
if workspace_id:
|
||||
name = f"{name}-{workspace_id[:8]}"
|
||||
t = threading.Thread(
|
||||
target=_poll_loop,
|
||||
args=(state, platform_url, workspace_id, interval),
|
||||
args=(state, platform_url, workspace_id, interval, stop_event),
|
||||
name=name,
|
||||
daemon=True,
|
||||
)
|
||||
|
||||
+264
-15
@@ -37,6 +37,7 @@ read another tenant's bytes even if a token is misrouted.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import concurrent.futures
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
@@ -68,6 +69,24 @@ MAX_FILE_BYTES = 25 * 1024 * 1024
|
||||
# 10s default for /activity calls — both are user-perceived latency.
|
||||
DEFAULT_FETCH_TIMEOUT = 60.0
|
||||
|
||||
# Concurrency cap for ``BatchFetcher``. Four workers is enough headroom
|
||||
# for the realistic "user dragged 3-4 files into chat at once" case
|
||||
# while bounding the platform's per-workspace fan-out. The cap matters
|
||||
# because the platform's /content endpoint reads bytea from Postgres in
|
||||
# a single round-trip per request — N workers = N concurrent DB reads
|
||||
# of up to 25 MB each, so a higher cap could pressure platform memory
|
||||
# without much UX win (network bandwidth is the bottleneck once the
|
||||
# bytes are buffered).
|
||||
DEFAULT_BATCH_FETCH_WORKERS = 4
|
||||
|
||||
# Upper bound on how long ``BatchFetcher.wait_all`` blocks the inbox
|
||||
# poll loop before giving up on still-in-flight fetches. Aligned with
|
||||
# DEFAULT_FETCH_TIMEOUT so a single hung fetch can't stall the loop
|
||||
# longer than its own deadline. A timeout fires only if a worker thread
|
||||
# is stuck past the underlying httpx timeout — pathological case;
|
||||
# normal completion is bounded by per-fetch timeout × ceil(N/W).
|
||||
DEFAULT_BATCH_WAIT_TIMEOUT = DEFAULT_FETCH_TIMEOUT + 5.0
|
||||
|
||||
# Cap on the URI cache. A long-lived workspace handling thousands of
|
||||
# uploads shouldn't grow without bound; an LRU cap of 1024 keeps the
|
||||
# entries-needed-for-a-typical-conversation well within memory.
|
||||
@@ -275,6 +294,7 @@ def fetch_and_stage(
|
||||
workspace_id: str,
|
||||
headers: dict[str, str],
|
||||
timeout_secs: float = DEFAULT_FETCH_TIMEOUT,
|
||||
client: Any = None,
|
||||
) -> str | None:
|
||||
"""Fetch the row's bytes, stage them under chat-uploads, and ack.
|
||||
|
||||
@@ -289,6 +309,11 @@ def fetch_and_stage(
|
||||
On success, the URI cache is updated so a subsequent chat message
|
||||
referencing the same ``platform-pending:`` URI is rewritten before
|
||||
the agent sees it.
|
||||
|
||||
Pass ``client`` to reuse a shared ``httpx.Client`` for both GET and
|
||||
POST ack (saves one TLS handshake per row vs. constructing one
|
||||
per-call). ``BatchFetcher`` does this across an entire poll batch so
|
||||
N concurrent fetches share one connection pool.
|
||||
"""
|
||||
body = _request_body_dict(row)
|
||||
if body is None:
|
||||
@@ -317,25 +342,58 @@ def fetch_and_stage(
|
||||
if not isinstance(filename, str):
|
||||
filename = "file"
|
||||
|
||||
# Lazy httpx import: the standalone MCP path uses httpx; an in-
|
||||
# container caller that imports this module by accident shouldn't
|
||||
# explode at import time.
|
||||
try:
|
||||
import httpx # noqa: WPS433
|
||||
except ImportError:
|
||||
logger.error("inbox_uploads: httpx not installed; cannot fetch %s", file_id)
|
||||
return None
|
||||
# Caller-supplied client: reuse for both GET + POST ack. Otherwise
|
||||
# build a one-shot client and close it on the way out. Lazy httpx
|
||||
# import keeps the standalone MCP path's optional dep optional.
|
||||
own_client = client is None
|
||||
if own_client:
|
||||
try:
|
||||
import httpx # noqa: WPS433
|
||||
except ImportError:
|
||||
logger.error("inbox_uploads: httpx not installed; cannot fetch %s", file_id)
|
||||
return None
|
||||
client = httpx.Client(timeout=timeout_secs)
|
||||
|
||||
try:
|
||||
return _fetch_and_stage_with_client(
|
||||
client,
|
||||
platform_url=platform_url,
|
||||
workspace_id=workspace_id,
|
||||
headers=headers,
|
||||
file_id=file_id,
|
||||
pending_uri=pending_uri,
|
||||
filename=filename,
|
||||
body=body,
|
||||
)
|
||||
finally:
|
||||
if own_client:
|
||||
try:
|
||||
client.close()
|
||||
except Exception: # noqa: BLE001 — close should never crash the caller
|
||||
pass
|
||||
|
||||
|
||||
def _fetch_and_stage_with_client(
|
||||
client: Any,
|
||||
*,
|
||||
platform_url: str,
|
||||
workspace_id: str,
|
||||
headers: dict[str, str],
|
||||
file_id: str,
|
||||
pending_uri: str,
|
||||
filename: str,
|
||||
body: dict[str, Any],
|
||||
) -> str | None:
|
||||
"""Inner body of fetch_and_stage. Always uses the supplied client for
|
||||
both GET and POST so the connection pool is shared across the call.
|
||||
"""
|
||||
content_url = f"{platform_url}/workspaces/{workspace_id}/pending-uploads/{file_id}/content"
|
||||
ack_url = f"{platform_url}/workspaces/{workspace_id}/pending-uploads/{file_id}/ack"
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=timeout_secs) as client:
|
||||
resp = client.get(content_url, headers=headers)
|
||||
resp = client.get(content_url, headers=headers)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning(
|
||||
"inbox_uploads: GET %s failed: %s", content_url, exc
|
||||
)
|
||||
logger.warning("inbox_uploads: GET %s failed: %s", content_url, exc)
|
||||
return None
|
||||
|
||||
if resp.status_code == 404:
|
||||
@@ -403,8 +461,7 @@ def fetch_and_stage(
|
||||
# back the on-disk file — the platform's sweep will clean up
|
||||
# eventually.
|
||||
try:
|
||||
with httpx.Client(timeout=timeout_secs) as client:
|
||||
ack_resp = client.post(ack_url, headers=headers)
|
||||
ack_resp = client.post(ack_url, headers=headers)
|
||||
if ack_resp.status_code >= 400:
|
||||
logger.warning(
|
||||
"inbox_uploads: ack %s returned %d: %s",
|
||||
@@ -418,6 +475,198 @@ def fetch_and_stage(
|
||||
return local_uri
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# BatchFetcher — concurrent fetch across a single poll batch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class BatchFetcher:
|
||||
"""Fetch + stage + ack a batch of upload-receive rows concurrently.
|
||||
|
||||
Why this exists: the inbox poll loop used to call ``fetch_and_stage``
|
||||
serially per row. With N upload rows in a batch (a user dragging
|
||||
multiple files into chat at once), the loop blocked for
|
||||
``N × per_fetch_latency`` before processing the chat message that
|
||||
referenced them — a 4-file upload at 5s each = 20s of stall
|
||||
before the agent saw the user's prompt. ``BatchFetcher`` runs the
|
||||
fetches on a small thread pool (default 4 workers) so the stall is
|
||||
bounded by ``ceil(N/W) × per_fetch_latency`` instead.
|
||||
|
||||
Connection reuse: one ``httpx.Client`` is shared across every fetch
|
||||
in the batch. httpx clients carry a connection pool, so a second
|
||||
fetch to the same platform host reuses the TCP+TLS handshake from
|
||||
the first — measurable win when fetches happen back-to-back.
|
||||
|
||||
Correctness invariant the caller MUST preserve: the inbox loop is
|
||||
expected to call ``wait_all()`` before processing the chat-message
|
||||
activity row that REFERENCES one of these uploads. Without the
|
||||
barrier, the URI cache is empty when ``rewrite_request_body`` runs
|
||||
and the agent sees the un-rewritten ``platform-pending:`` URI. The
|
||||
caller-side test ``test_poll_once_waits_for_uploads_before_messages``
|
||||
pins this end-to-end.
|
||||
|
||||
Use as a context manager so the executor + client are torn down
|
||||
even if the caller raises mid-batch.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
platform_url: str,
|
||||
workspace_id: str,
|
||||
headers: dict[str, str],
|
||||
timeout_secs: float = DEFAULT_FETCH_TIMEOUT,
|
||||
max_workers: int = DEFAULT_BATCH_FETCH_WORKERS,
|
||||
client: Any = None,
|
||||
):
|
||||
self._platform_url = platform_url
|
||||
self._workspace_id = workspace_id
|
||||
self._headers = dict(headers) # copy so caller mutations don't leak in
|
||||
self._timeout_secs = timeout_secs
|
||||
|
||||
# Caller can inject a client (tests do this); production callers
|
||||
# let us build one. Track ownership so we only close ours.
|
||||
self._own_client = client is None
|
||||
if self._own_client:
|
||||
try:
|
||||
import httpx # noqa: WPS433
|
||||
except ImportError:
|
||||
# Match fetch_and_stage's behavior: log + degrade rather
|
||||
# than raising at construction time. submit() will then
|
||||
# return None for every row.
|
||||
logger.error("inbox_uploads: httpx not installed; BatchFetcher inert")
|
||||
self._client: Any = None
|
||||
else:
|
||||
self._client = httpx.Client(timeout=timeout_secs)
|
||||
else:
|
||||
self._client = client
|
||||
|
||||
self._executor = concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=max_workers,
|
||||
thread_name_prefix="upload-fetch",
|
||||
)
|
||||
self._futures: list[concurrent.futures.Future[Any]] = []
|
||||
self._closed = False
|
||||
# Flipped to True by wait_all when the timeout fires; close()
|
||||
# reads this to decide between drain-and-wait vs cancel-queued.
|
||||
self._timed_out = False
|
||||
|
||||
def submit(self, row: dict[str, Any]) -> concurrent.futures.Future[Any] | None:
|
||||
"""Submit ``row`` for fetch + stage + ack. Non-blocking — the
|
||||
worker thread runs ``fetch_and_stage`` with the shared client.
|
||||
|
||||
Returns the Future so a caller that wants per-row outcome can
|
||||
await it; ``None`` if the BatchFetcher is in a degraded state
|
||||
(httpx missing).
|
||||
"""
|
||||
if self._closed:
|
||||
raise RuntimeError("BatchFetcher: submit after close")
|
||||
if self._client is None:
|
||||
return None
|
||||
fut = self._executor.submit(
|
||||
fetch_and_stage,
|
||||
row,
|
||||
platform_url=self._platform_url,
|
||||
workspace_id=self._workspace_id,
|
||||
headers=self._headers,
|
||||
timeout_secs=self._timeout_secs,
|
||||
client=self._client,
|
||||
)
|
||||
self._futures.append(fut)
|
||||
return fut
|
||||
|
||||
def wait_all(self, timeout: float | None = DEFAULT_BATCH_WAIT_TIMEOUT) -> None:
|
||||
"""Block until every submitted future completes (or times out).
|
||||
|
||||
Per-future exceptions are logged + swallowed — ``fetch_and_stage``
|
||||
already converts every error path to ``return None``, so a real
|
||||
exception propagating up to here is unexpected and we don't want
|
||||
one bad fetch to abort the whole batch.
|
||||
|
||||
Timeouts are also logged + swallowed AND record the timed-out
|
||||
futures on ``self._timed_out`` so ``close`` can cancel them
|
||||
without paying their full latency. Without this hand-off,
|
||||
``close()``'s ``shutdown(wait=True)`` would block on the leaked
|
||||
workers and undo the user-facing timeout — the inbox poll loop
|
||||
would stall indefinitely on a hung /content fetch.
|
||||
"""
|
||||
if not self._futures:
|
||||
return
|
||||
try:
|
||||
done, not_done = concurrent.futures.wait(
|
||||
self._futures,
|
||||
timeout=timeout,
|
||||
return_when=concurrent.futures.ALL_COMPLETED,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001 — concurrent.futures shouldn't raise here
|
||||
logger.warning("inbox_uploads: BatchFetcher.wait_all crashed: %s", exc)
|
||||
return
|
||||
for fut in done:
|
||||
exc = fut.exception()
|
||||
if exc is not None:
|
||||
logger.warning(
|
||||
"inbox_uploads: BatchFetcher worker raised: %s", exc
|
||||
)
|
||||
if not_done:
|
||||
logger.warning(
|
||||
"inbox_uploads: BatchFetcher.wait_all left %d in-flight after %ss timeout",
|
||||
len(not_done),
|
||||
timeout,
|
||||
)
|
||||
# Mark these futures so close() knows to cancel-not-wait. We
|
||||
# cancel queued-but-not-started ones immediately; futures
|
||||
# already running can't be cancelled (Python's threading
|
||||
# model), but close() will pass cancel_futures=True so any
|
||||
# remaining queued items don't run.
|
||||
for fut in not_done:
|
||||
fut.cancel()
|
||||
self._timed_out = True
|
||||
|
||||
def close(self) -> None:
|
||||
"""Tear down the executor + (if owned) the httpx client.
|
||||
|
||||
Idempotent. After close, ``submit`` raises and the BatchFetcher
|
||||
cannot be reused — construct a fresh one for the next poll.
|
||||
|
||||
If ``wait_all`` reported a timeout, shutdown skips the
|
||||
``wait=True`` drain and instead asks the executor to drop queued
|
||||
futures (``cancel_futures=True``). Currently-running workers
|
||||
can't be interrupted by Python's threading model, but the poll
|
||||
loop returns immediately rather than blocking on a hung fetch.
|
||||
"""
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
timed_out = getattr(self, "_timed_out", False)
|
||||
try:
|
||||
if timed_out:
|
||||
# cancel_futures landed in Python 3.9 — guarded for older
|
||||
# interpreters via a TypeError fallback. Drop queued
|
||||
# tasks; running ones will exit when their httpx call
|
||||
# eventually returns or the daemon thread dies.
|
||||
try:
|
||||
self._executor.shutdown(wait=False, cancel_futures=True)
|
||||
except TypeError:
|
||||
self._executor.shutdown(wait=False)
|
||||
else:
|
||||
# Healthy path: wait for in-flight work so we don't
|
||||
# interrupt a fetch mid-write.
|
||||
self._executor.shutdown(wait=True)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning("inbox_uploads: executor shutdown error: %s", exc)
|
||||
if self._own_client and self._client is not None:
|
||||
try:
|
||||
self._client.close()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning("inbox_uploads: client close error: %s", exc)
|
||||
|
||||
def __enter__(self) -> "BatchFetcher":
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb) -> None:
|
||||
self.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# URI rewrite for incoming chat messages
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
+60
-466
@@ -31,422 +31,53 @@ dependency via ``a2a-sdk``.
|
||||
In-container usage (``python -m molecule_runtime.a2a_mcp_server`` or
|
||||
direct import) bypasses this wrapper — the workspace runtime has its
|
||||
own heartbeat loop in ``heartbeat.py`` so we don't double-heartbeat.
|
||||
|
||||
Module layout (RFC #2873 iter 3 split):
|
||||
* ``mcp_heartbeat`` — register POST + heartbeat loop + auth-failure
|
||||
escalation + inbound-secret persistence.
|
||||
* ``mcp_workspace_resolver`` — env validation, single + multi-workspace
|
||||
resolution, operator-help printer, on-disk token-file read.
|
||||
* ``mcp_inbox_pollers`` — activate the inbox singleton + spawn one
|
||||
daemon poller per workspace.
|
||||
|
||||
This file keeps just ``main()`` plus thin re-exports of the private
|
||||
symbols so existing tests' imports (``mcp_cli._build_agent_card``,
|
||||
``mcp_cli._heartbeat_loop``, etc.) keep working without churn.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import configs_dir
|
||||
import mcp_heartbeat
|
||||
import mcp_inbox_pollers
|
||||
import mcp_workspace_resolver
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Heartbeat cadence. Must be tighter than healthsweep's stale window
|
||||
# (currently 60-90s — see registry/healthsweep.go) by a comfortable
|
||||
# margin so a single missed heartbeat doesn't flip awaiting_agent.
|
||||
# 20s gives the operator's network 3 attempts within the budget; long
|
||||
# enough that it doesn't spam, short enough to recover quickly after
|
||||
# laptop sleep.
|
||||
HEARTBEAT_INTERVAL_SECONDS = 20.0
|
||||
# Re-export public surface for back-compat with the pre-split callers
|
||||
# and tests. The underscore-prefixed names mirror the names that
|
||||
# existed in this module before the split — keeping them ensures
|
||||
# `mcp_cli._build_agent_card`, `mcp_cli._heartbeat_loop`, etc.
|
||||
# resolve identically to the new functions.
|
||||
HEARTBEAT_INTERVAL_SECONDS = mcp_heartbeat.HEARTBEAT_INTERVAL_SECONDS
|
||||
_HEARTBEAT_AUTH_LOUD_THRESHOLD = mcp_heartbeat.HEARTBEAT_AUTH_LOUD_THRESHOLD
|
||||
_HEARTBEAT_AUTH_RELOG_INTERVAL = mcp_heartbeat.HEARTBEAT_AUTH_RELOG_INTERVAL
|
||||
|
||||
# After this many consecutive 401/403 heartbeats, escalate from
|
||||
# WARNING to ERROR with re-onboard guidance. 3 ticks at 20s = ~1 minute
|
||||
# of sustained auth failure — enough to rule out a transient platform
|
||||
# blip but quick enough that an operator doesn't sit puzzled for 10
|
||||
# minutes wondering why their MCP tools 401. Same threshold used for
|
||||
# repeat-logging at 20-tick (~7 min) intervals so a long-running
|
||||
# session that missed the first ERROR still sees the message.
|
||||
_HEARTBEAT_AUTH_LOUD_THRESHOLD = 3
|
||||
_HEARTBEAT_AUTH_RELOG_INTERVAL = 20
|
||||
_build_agent_card = mcp_heartbeat.build_agent_card
|
||||
_platform_register = mcp_heartbeat.platform_register
|
||||
_heartbeat_loop = mcp_heartbeat.heartbeat_loop
|
||||
_log_heartbeat_auth_failure = mcp_heartbeat.log_heartbeat_auth_failure
|
||||
_persist_inbound_secret_from_heartbeat = mcp_heartbeat.persist_inbound_secret_from_heartbeat
|
||||
_start_heartbeat_thread = mcp_heartbeat.start_heartbeat_thread
|
||||
|
||||
_resolve_workspaces = mcp_workspace_resolver.resolve_workspaces
|
||||
_print_missing_env_help = mcp_workspace_resolver.print_missing_env_help
|
||||
_read_token_file = mcp_workspace_resolver.read_token_file
|
||||
|
||||
def _build_agent_card(workspace_id: str) -> dict:
|
||||
"""Build the ``agent_card`` payload sent to /registry/register.
|
||||
|
||||
Three optional env vars override the defaults so an operator can
|
||||
surface human-readable identity + capabilities to peers and the
|
||||
canvas Skills tab without code changes:
|
||||
|
||||
* ``MOLECULE_AGENT_NAME`` — display name (defaults to
|
||||
``molecule-mcp-{id[:8]}``). Surfaced in canvas workspace cards
|
||||
and ``list_peers`` output.
|
||||
* ``MOLECULE_AGENT_DESCRIPTION`` — one-liner about the agent's
|
||||
purpose. Rendered in canvas Details + Skills tabs.
|
||||
* ``MOLECULE_AGENT_SKILLS`` — comma-separated skill names
|
||||
(e.g. ``research,code-review,memory-curation``). Each name is
|
||||
expanded to a ``{"name": ...}`` skill object — the minimum
|
||||
shape that satisfies both ``shared_runtime.summarize_peers``
|
||||
(uses ``s["name"]``) and the canvas SkillsTab.tsx schema
|
||||
(id falls back to name when omitted). Empty / whitespace
|
||||
entries are dropped.
|
||||
|
||||
Defaults match the previous hardcoded behaviour exactly so this
|
||||
is a strict superset — an operator who sets none of the env vars
|
||||
sees no change.
|
||||
"""
|
||||
name = (os.environ.get("MOLECULE_AGENT_NAME") or "").strip()
|
||||
if not name:
|
||||
name = f"molecule-mcp-{workspace_id[:8]}"
|
||||
|
||||
description = (os.environ.get("MOLECULE_AGENT_DESCRIPTION") or "").strip()
|
||||
|
||||
skills_raw = (os.environ.get("MOLECULE_AGENT_SKILLS") or "").strip()
|
||||
skills: list[dict] = []
|
||||
if skills_raw:
|
||||
for s in skills_raw.split(","):
|
||||
label = s.strip()
|
||||
if label:
|
||||
skills.append({"name": label})
|
||||
|
||||
card: dict = {"name": name, "skills": skills}
|
||||
if description:
|
||||
card["description"] = description
|
||||
return card
|
||||
|
||||
|
||||
def _platform_register(platform_url: str, workspace_id: str, token: str) -> None:
|
||||
"""One-shot register at startup; fails fast on auth errors.
|
||||
|
||||
Lifts the workspace from ``awaiting_agent`` to ``online`` for
|
||||
operators who never ran the curl-register snippet. Safe to call
|
||||
repeatedly: the platform's register handler is an upsert that
|
||||
just refreshes ``url``, ``agent_card``, and ``status``.
|
||||
|
||||
Failure model (post-review):
|
||||
- 401 / 403 → ``sys.exit(3)`` immediately. The operator's
|
||||
token is wrong; silently looping in a broken state would
|
||||
make this hard to diagnose because the MCP tools would 401
|
||||
on every call too. Hard-fail is the kindest option.
|
||||
- Other 4xx/5xx → log a warning + continue. The heartbeat
|
||||
thread will surface persistent failures; transient platform
|
||||
blips shouldn't abort the MCP loop.
|
||||
- Network / transport errors → log + continue. Same reasoning.
|
||||
|
||||
Origin header is required by the SaaS edge WAF; without it
|
||||
/registry/register currently still works (it's on the WAF
|
||||
allowlist), but the heartbeat path needs Origin and we want one
|
||||
consistent header set across both calls.
|
||||
"""
|
||||
try:
|
||||
import httpx
|
||||
except ImportError:
|
||||
# httpx is a transitive dep via a2a-sdk; if missing, the MCP
|
||||
# server won't import either. Let the caller's later import
|
||||
# surface the real error.
|
||||
return
|
||||
|
||||
payload = {
|
||||
"id": workspace_id,
|
||||
"url": "",
|
||||
"agent_card": _build_agent_card(workspace_id),
|
||||
"delivery_mode": "poll",
|
||||
}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Origin": platform_url,
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
try:
|
||||
with httpx.Client(timeout=10.0) as client:
|
||||
resp = client.post(
|
||||
f"{platform_url}/registry/register",
|
||||
json=payload,
|
||||
headers=headers,
|
||||
)
|
||||
if resp.status_code in (401, 403):
|
||||
print(
|
||||
f"molecule-mcp: register rejected with HTTP {resp.status_code} — "
|
||||
f"the token in MOLECULE_WORKSPACE_TOKEN is invalid for workspace "
|
||||
f"{workspace_id}. Regenerate from the canvas → Tokens tab.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(3)
|
||||
if resp.status_code >= 400:
|
||||
logger.warning(
|
||||
"molecule-mcp: register POST returned HTTP %d: %s",
|
||||
resp.status_code,
|
||||
(resp.text or "")[:200],
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"molecule-mcp: registered workspace %s with platform",
|
||||
workspace_id,
|
||||
)
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning("molecule-mcp: register POST failed: %s", exc)
|
||||
|
||||
|
||||
def _heartbeat_loop(
|
||||
platform_url: str,
|
||||
workspace_id: str,
|
||||
token: str,
|
||||
interval: float = HEARTBEAT_INTERVAL_SECONDS,
|
||||
) -> None:
|
||||
"""Daemon thread body: POST /registry/heartbeat every ``interval``s.
|
||||
|
||||
Failures are logged at WARNING and the loop continues. The thread
|
||||
exits when the main process does (daemon=True). Each iteration
|
||||
rebuilds the payload + headers — cheap and ensures token rotation
|
||||
via env var (rare but possible) is picked up on the next tick.
|
||||
"""
|
||||
try:
|
||||
import httpx
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
start_time = time.time()
|
||||
consecutive_auth_failures = 0
|
||||
while True:
|
||||
body = {
|
||||
"workspace_id": workspace_id,
|
||||
"error_rate": 0.0,
|
||||
"sample_error": "",
|
||||
"active_tasks": 0,
|
||||
"uptime_seconds": int(time.time() - start_time),
|
||||
}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Origin": platform_url,
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
try:
|
||||
with httpx.Client(timeout=10.0) as client:
|
||||
resp = client.post(
|
||||
f"{platform_url}/registry/heartbeat",
|
||||
json=body,
|
||||
headers=headers,
|
||||
)
|
||||
if resp.status_code in (401, 403):
|
||||
consecutive_auth_failures += 1
|
||||
_log_heartbeat_auth_failure(
|
||||
consecutive_auth_failures, workspace_id, resp.status_code,
|
||||
)
|
||||
elif resp.status_code >= 400:
|
||||
# Non-auth HTTP error — log, but DO NOT touch the
|
||||
# auth-failure counter (5xx blips, 429, etc. are
|
||||
# transient and unrelated to token validity).
|
||||
logger.warning(
|
||||
"molecule-mcp: heartbeat HTTP %d: %s",
|
||||
resp.status_code,
|
||||
(resp.text or "")[:200],
|
||||
)
|
||||
else:
|
||||
consecutive_auth_failures = 0
|
||||
_persist_inbound_secret_from_heartbeat(resp)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning("molecule-mcp: heartbeat failed: %s", exc)
|
||||
time.sleep(interval)
|
||||
|
||||
|
||||
def _log_heartbeat_auth_failure(count: int, workspace_id: str, status_code: int) -> None:
|
||||
"""Escalate consecutive heartbeat 401/403s from quiet WARNING to
|
||||
actionable ERROR.
|
||||
|
||||
The operator's first sign of trouble shouldn't be "tools 401 with no
|
||||
explanation" — that was the failure mode that motivated this code,
|
||||
triggered by a workspace being deleted server-side and its tokens
|
||||
revoked while the runtime kept heartbeating in silence.
|
||||
|
||||
Cadence:
|
||||
* count < threshold: WARNING per tick (transient — could be a
|
||||
platform blip, don't shout yet)
|
||||
* count == threshold: ERROR with re-onboard instructions
|
||||
(the first signal the operator can't miss)
|
||||
* count > threshold and (count - threshold) % relog == 0: re-log
|
||||
ERROR (so a session that started after the first ERROR still
|
||||
sees the message scrolling past in their logs)
|
||||
"""
|
||||
if count < _HEARTBEAT_AUTH_LOUD_THRESHOLD:
|
||||
logger.warning(
|
||||
"molecule-mcp: heartbeat HTTP %d (auth failure %d/%d) — "
|
||||
"token may be revoked. Will retry; if persistent, regenerate "
|
||||
"from canvas → Tokens.",
|
||||
status_code, count, _HEARTBEAT_AUTH_LOUD_THRESHOLD,
|
||||
)
|
||||
return
|
||||
# At or past the threshold — this is the loud actionable error.
|
||||
if count == _HEARTBEAT_AUTH_LOUD_THRESHOLD or (
|
||||
count - _HEARTBEAT_AUTH_LOUD_THRESHOLD
|
||||
) % _HEARTBEAT_AUTH_RELOG_INTERVAL == 0:
|
||||
logger.error(
|
||||
"molecule-mcp: %d consecutive heartbeat auth failures (HTTP %d) — "
|
||||
"the token in MOLECULE_WORKSPACE_TOKEN has been REVOKED, likely "
|
||||
"because workspace %s was deleted server-side. The MCP server is "
|
||||
"still running but every platform call will fail. Regenerate the "
|
||||
"workspace + token from the canvas (Tokens tab), update your MCP "
|
||||
"config, and restart your runtime.",
|
||||
count, status_code, workspace_id,
|
||||
)
|
||||
|
||||
|
||||
def _persist_inbound_secret_from_heartbeat(resp: object) -> None:
|
||||
"""Persist ``platform_inbound_secret`` from a heartbeat response, if any.
|
||||
|
||||
The platform's heartbeat handler returns the secret on every beat
|
||||
(mirroring /registry/register) so a workspace that lazy-healed the
|
||||
secret on the platform side — typical recovery path for a workspace
|
||||
whose row had a NULL ``platform_inbound_secret`` after a partial
|
||||
bootstrap — picks it up within one heartbeat tick instead of
|
||||
requiring a runtime restart.
|
||||
|
||||
Without this delivery path the chat-upload code path's "secret was
|
||||
just minted, will pick up on next heartbeat" 503 message is a lie
|
||||
and the workspace stays 401-forever until the operator restarts
|
||||
the runtime. Caught 2026-04-30 on hongmingwang tenant.
|
||||
|
||||
Failure is non-fatal: if the body isn't JSON, doesn't carry the
|
||||
field, or the disk write fails, the next heartbeat retries. This
|
||||
matches the cold-start register flow in main.py:319-323.
|
||||
"""
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception: # noqa: BLE001
|
||||
return
|
||||
if not isinstance(body, dict):
|
||||
return
|
||||
secret = body.get("platform_inbound_secret")
|
||||
if not secret:
|
||||
return
|
||||
try:
|
||||
from platform_inbound_auth import save_inbound_secret
|
||||
|
||||
save_inbound_secret(secret)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning(
|
||||
"molecule-mcp: persist inbound secret from heartbeat failed: %s", exc
|
||||
)
|
||||
|
||||
|
||||
def _start_heartbeat_thread(
|
||||
platform_url: str,
|
||||
workspace_id: str,
|
||||
token: str,
|
||||
) -> threading.Thread:
|
||||
"""Start the heartbeat daemon thread. Returns the Thread handle.
|
||||
|
||||
The MCP stdio loop runs in the foreground (asyncio); this thread
|
||||
runs alongside it. ``daemon=True`` so when the operator hits
|
||||
Ctrl-C / closes the runtime, the heartbeat dies with it instead
|
||||
of leaking and writing to a stale workspace.
|
||||
"""
|
||||
t = threading.Thread(
|
||||
target=_heartbeat_loop,
|
||||
args=(platform_url, workspace_id, token),
|
||||
name="molecule-mcp-heartbeat",
|
||||
daemon=True,
|
||||
)
|
||||
t.start()
|
||||
return t
|
||||
|
||||
|
||||
def _resolve_workspaces() -> tuple[list[tuple[str, str]], list[str]]:
|
||||
"""Return the list of ``(workspace_id, token)`` pairs to register.
|
||||
|
||||
Resolution order:
|
||||
|
||||
1. ``MOLECULE_WORKSPACES`` env var — JSON array of
|
||||
``{"id": "...", "token": "..."}`` objects. Activates the
|
||||
multi-workspace external-agent path (one process registered into
|
||||
N workspaces). When set, ``WORKSPACE_ID`` / ``MOLECULE_WORKSPACE_TOKEN``
|
||||
are IGNORED — the JSON is the source of truth.
|
||||
|
||||
2. Single-workspace fallback — ``WORKSPACE_ID`` env var + token from
|
||||
``MOLECULE_WORKSPACE_TOKEN`` or ``${CONFIGS_DIR}/.auth_token``.
|
||||
This is the pre-existing path; back-compat exact.
|
||||
|
||||
Returns ``(workspaces, errors)``:
|
||||
* ``workspaces``: list of ``(workspace_id, token)`` — non-empty
|
||||
on the happy path.
|
||||
* ``errors``: human-readable strings describing what's missing /
|
||||
malformed. ``main()`` surfaces these with the same shape as
|
||||
``_print_missing_env_help`` so the operator's first run gives
|
||||
actionable output.
|
||||
|
||||
Why JSON env (not file): ergonomic for Claude Code MCP config (one
|
||||
string in ``mcpServers.molecule.env`` instead of a sidecar file)
|
||||
and for CI / launchers. A separate config-file path can be added
|
||||
later without breaking this.
|
||||
"""
|
||||
raw = os.environ.get("MOLECULE_WORKSPACES", "").strip()
|
||||
if raw:
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
except json.JSONDecodeError as exc:
|
||||
return [], [
|
||||
f"MOLECULE_WORKSPACES is not valid JSON ({exc.msg} at pos "
|
||||
f"{exc.pos}). Expected: '[{{\"id\":\"<wsid>\",\"token\":"
|
||||
f"\"<tok>\"}},{{...}}]'"
|
||||
]
|
||||
if not isinstance(parsed, list) or not parsed:
|
||||
return [], [
|
||||
"MOLECULE_WORKSPACES must be a non-empty JSON array of "
|
||||
"{\"id\":\"...\",\"token\":\"...\"} objects"
|
||||
]
|
||||
out: list[tuple[str, str]] = []
|
||||
seen: set[str] = set()
|
||||
errors: list[str] = []
|
||||
for i, entry in enumerate(parsed):
|
||||
if not isinstance(entry, dict):
|
||||
errors.append(
|
||||
f"MOLECULE_WORKSPACES[{i}] is not an object — got {type(entry).__name__}"
|
||||
)
|
||||
continue
|
||||
wsid = str(entry.get("id", "")).strip()
|
||||
tok = str(entry.get("token", "")).strip()
|
||||
if not wsid or not tok:
|
||||
errors.append(
|
||||
f"MOLECULE_WORKSPACES[{i}] missing 'id' or 'token'"
|
||||
)
|
||||
continue
|
||||
if wsid in seen:
|
||||
errors.append(
|
||||
f"MOLECULE_WORKSPACES[{i}] duplicate workspace id {wsid!r}"
|
||||
)
|
||||
continue
|
||||
seen.add(wsid)
|
||||
out.append((wsid, tok))
|
||||
if errors:
|
||||
return [], errors
|
||||
return out, []
|
||||
|
||||
# Single-workspace back-compat path.
|
||||
wsid = os.environ.get("WORKSPACE_ID", "").strip()
|
||||
if not wsid:
|
||||
return [], ["WORKSPACE_ID (or MOLECULE_WORKSPACES) is required"]
|
||||
tok = os.environ.get("MOLECULE_WORKSPACE_TOKEN", "").strip()
|
||||
if not tok:
|
||||
tok = _read_token_file()
|
||||
if not tok:
|
||||
return [], [
|
||||
"MOLECULE_WORKSPACE_TOKEN (or CONFIGS_DIR/.auth_token) is required"
|
||||
]
|
||||
return [(wsid, tok)], []
|
||||
|
||||
|
||||
def _print_missing_env_help(missing: list[str], have_token_file: bool) -> None:
|
||||
print("molecule-mcp: missing required environment.\n", file=sys.stderr)
|
||||
print("Set the following before running molecule-mcp:", file=sys.stderr)
|
||||
print(" WORKSPACE_ID — your workspace UUID (from canvas)", file=sys.stderr)
|
||||
print(
|
||||
" PLATFORM_URL — base URL of your Molecule platform "
|
||||
"(e.g. https://your-tenant.staging.moleculesai.app)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
if not have_token_file:
|
||||
print(
|
||||
" MOLECULE_WORKSPACE_TOKEN — bearer token for this workspace "
|
||||
"(canvas → Tokens tab)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print("", file=sys.stderr)
|
||||
print(f"Currently missing: {', '.join(missing)}", file=sys.stderr)
|
||||
_start_inbox_pollers = mcp_inbox_pollers.start_inbox_pollers
|
||||
|
||||
|
||||
def main() -> None:
|
||||
@@ -462,7 +93,34 @@ def main() -> None:
|
||||
``{"id": ..., "token": ...}`` entries. One register + heartbeat
|
||||
+ inbox poller per entry; messages from any workspace land in
|
||||
the same agent inbox tagged with ``arrival_workspace_id``.
|
||||
|
||||
Subcommand:
|
||||
``molecule-mcp doctor`` runs an onboarding diagnostic against the
|
||||
current shell environment + platform reachability and exits.
|
||||
Closes Ryan's #2934 item 6.
|
||||
"""
|
||||
# Subcommand dispatch — must come BEFORE env-var validation so
|
||||
# `molecule-mcp doctor` can run on a partially-configured shell
|
||||
# and tell the operator what's missing. Argv shapes:
|
||||
# molecule-mcp → run server (this function's main path)
|
||||
# molecule-mcp doctor → run diagnostic, exit
|
||||
# molecule-mcp --help → defer to doctor for now (no other
|
||||
# flags are supported yet)
|
||||
if len(sys.argv) > 1:
|
||||
if sys.argv[1] in ("doctor", "--doctor"):
|
||||
import mcp_doctor
|
||||
sys.exit(mcp_doctor.run())
|
||||
if sys.argv[1] in ("--help", "-h", "help"):
|
||||
print(
|
||||
"molecule-mcp — Molecule AI universal MCP server\n\n"
|
||||
"Usage:\n"
|
||||
" molecule-mcp Run the MCP stdio server (registers + heartbeats)\n"
|
||||
" molecule-mcp doctor Run onboarding diagnostic + exit\n\n"
|
||||
"Required env: PLATFORM_URL, WORKSPACE_ID (or MOLECULE_WORKSPACES),\n"
|
||||
" MOLECULE_WORKSPACE_TOKEN (or MOLECULE_WORKSPACE_TOKEN_FILE)\n",
|
||||
)
|
||||
sys.exit(0)
|
||||
|
||||
if not os.environ.get("PLATFORM_URL", "").strip():
|
||||
_print_missing_env_help(
|
||||
["PLATFORM_URL"],
|
||||
@@ -558,69 +216,5 @@ def main() -> None:
|
||||
cli_main()
|
||||
|
||||
|
||||
def _start_inbox_pollers(platform_url: str, workspace_ids: list[str]) -> None:
|
||||
"""Activate the inbox singleton + spawn one poller daemon thread per workspace.
|
||||
|
||||
Done lazily here (not at module import) because importing inbox
|
||||
pulls in platform_auth, which only resolves cleanly AFTER env
|
||||
validation succeeds. Activation is idempotent within a process,
|
||||
so a stray double-call (e.g. test harness re-entering main) is
|
||||
harmless.
|
||||
|
||||
The poller threads are daemon=True — die with the main process.
|
||||
|
||||
Single-workspace path: one poller, single cursor file at the legacy
|
||||
location (``.mcp_inbox_cursor``). Cursor-key resolution falls back
|
||||
to the empty string for back-compat with operators whose existing
|
||||
on-disk cursor was written by the pre-multi-workspace code.
|
||||
|
||||
Multi-workspace path: N pollers, each with its own cursor file
|
||||
keyed by ``workspace_id[:8]``. Cursors live next to each other in
|
||||
configs_dir so an operator inspecting state sees all of them
|
||||
together.
|
||||
"""
|
||||
try:
|
||||
import inbox
|
||||
except ImportError as exc:
|
||||
logger.warning("molecule-mcp: inbox module unavailable: %s", exc)
|
||||
return
|
||||
|
||||
if len(workspace_ids) <= 1:
|
||||
# Back-compat exact: single-workspace mode reuses the legacy
|
||||
# cursor filename + cursor_path constructor arg, so an existing
|
||||
# operator's on-disk state isn't invalidated by upgrade.
|
||||
wsid = workspace_ids[0]
|
||||
state = inbox.InboxState(cursor_path=inbox.default_cursor_path())
|
||||
inbox.activate(state)
|
||||
inbox.start_poller_thread(state, platform_url, wsid)
|
||||
return
|
||||
|
||||
# Multi-workspace: per-workspace cursor file, one shared queue.
|
||||
cursor_paths = {wsid: inbox.default_cursor_path(wsid) for wsid in workspace_ids}
|
||||
state = inbox.InboxState(cursor_paths=cursor_paths)
|
||||
inbox.activate(state)
|
||||
for wsid in workspace_ids:
|
||||
inbox.start_poller_thread(state, platform_url, wsid)
|
||||
|
||||
|
||||
def _read_token_file() -> str:
|
||||
"""Read the token from the resolved configs dir's ``.auth_token`` if
|
||||
present.
|
||||
|
||||
Mirrors platform_auth._token_file's location resolution but without
|
||||
importing the heavy module here (that import triggers a2a_client's
|
||||
WORKSPACE_ID guard which is fine after env validation, but cheaper
|
||||
to inline a 4-line file read than pull in the whole stack just for
|
||||
the path).
|
||||
"""
|
||||
path = configs_dir.resolve() / ".auth_token"
|
||||
if not path.is_file():
|
||||
return ""
|
||||
try:
|
||||
return path.read_text().strip()
|
||||
except OSError:
|
||||
return ""
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
main()
|
||||
|
||||
@@ -0,0 +1,426 @@
|
||||
"""molecule-mcp doctor — diagnostic subcommand for first-run install.
|
||||
|
||||
Run via ``molecule-mcp doctor``. Prints a checklist of common
|
||||
onboarding failure modes and concrete next-step suggestions for each
|
||||
failed check.
|
||||
|
||||
Closes Ryan's #2934 item 6 ("Add a molecule-mcp doctor subcommand —
|
||||
this single command would have saved me 30 of the 45 minutes").
|
||||
Pairs with #2935 (Python>=3.11 callout, PATH guidance, TOKEN_FILE
|
||||
support) — those fixed the snippet, this gives the operator a way to
|
||||
self-diagnose when something still goes wrong.
|
||||
|
||||
Six checks, in operator-encounter order:
|
||||
|
||||
1. Python version — wheel requires >=3.11 (pip says
|
||||
"no versions found" on older).
|
||||
2. Wheel install — molecule_runtime importable + version reported.
|
||||
3. PATH for molecule-mcp — pip user-site installs land at
|
||||
~/Library/Python/3.X/bin which isn't on
|
||||
PATH on a fresh macOS shell. Most common
|
||||
"claude mcp add can't find molecule-mcp"
|
||||
cause.
|
||||
4. Env vars — PLATFORM_URL set + reachable;
|
||||
WORKSPACE_ID set; auth token resolvable
|
||||
(env or *_FILE or .auth_token).
|
||||
5. Platform health — GET ${PLATFORM_URL}/healthz returns 2xx.
|
||||
Catches DNS/firewall/wrong-scheme issues
|
||||
before the operator hits the real
|
||||
register call.
|
||||
6. Token auth — POST ${PLATFORM_URL}/registry/heartbeat
|
||||
with the resolved workspace_id+token
|
||||
returns 2xx. End-to-end auth verification.
|
||||
Uses heartbeat (idempotent timestamp
|
||||
update) instead of register (UPSERT —
|
||||
would clobber agent_card metadata) so
|
||||
the doctor is safe to run against a
|
||||
live workspace.
|
||||
|
||||
Each check prints one of:
|
||||
[OK] <one-line status>
|
||||
[WARN] <one-line status> next: <fix suggestion>
|
||||
[FAIL] <one-line status> next: <fix suggestion>
|
||||
|
||||
Exit 0 if all pass or only WARNs; exit 1 if any FAIL — so the
|
||||
subcommand is scriptable from CI / install-checks too.
|
||||
|
||||
Out of scope for now (deferred follow-ups):
|
||||
- Claude Code-specific checks (parse ~/.claude.json, verify each
|
||||
MCP entry is plugin-sourced + dev-channels flag is set). That's
|
||||
a separate Claude-Code-specific doctor and lives in the
|
||||
claude-code-channel plugin, not the universal-MCP doctor.
|
||||
- Automated remediation (running the suggested fix). Doctor is
|
||||
a diagnostic tool — it tells the operator what's wrong + how
|
||||
to fix it, doesn't apply changes.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import importlib.metadata
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
# urllib avoids a hard dep on `requests` for the doctor — the real
|
||||
# CLI already imports requests via mcp_heartbeat, but doctor should
|
||||
# keep working even on a partial install where requests is missing
|
||||
# (that itself is a finding worth surfacing).
|
||||
from urllib import request as urllib_request
|
||||
from urllib.error import URLError
|
||||
|
||||
|
||||
# ANSI colors are friendly on TTYs; auto-disable on pipe / NO_COLOR
|
||||
# for CI logs where the escape sequences clutter the diff.
|
||||
def _color(name: str) -> str:
|
||||
if not sys.stdout.isatty() or os.environ.get("NO_COLOR"):
|
||||
return ""
|
||||
return {
|
||||
"green": "\033[32m",
|
||||
"yellow": "\033[33m",
|
||||
"red": "\033[31m",
|
||||
"dim": "\033[2m",
|
||||
"reset": "\033[0m",
|
||||
}.get(name, "")
|
||||
|
||||
|
||||
def _ok(label: str, msg: str) -> None:
|
||||
print(f" {_color('green')}[OK]{_color('reset')} {label}: {msg}")
|
||||
|
||||
|
||||
def _warn(label: str, msg: str, fix: str) -> None:
|
||||
print(f" {_color('yellow')}[WARN]{_color('reset')} {label}: {msg}")
|
||||
print(f" {_color('dim')}next:{_color('reset')} {fix}")
|
||||
|
||||
|
||||
def _fail(label: str, msg: str, fix: str) -> None:
|
||||
print(f" {_color('red')}[FAIL]{_color('reset')} {label}: {msg}")
|
||||
print(f" {_color('dim')}next:{_color('reset')} {fix}")
|
||||
|
||||
|
||||
# Each check returns a "ok" | "warn" | "fail" verdict so the caller
|
||||
# can compute an exit code without re-walking the print stream.
|
||||
Verdict = str # "ok" | "warn" | "fail"
|
||||
|
||||
|
||||
def check_python_version() -> Verdict:
|
||||
label = "Python version"
|
||||
major, minor = sys.version_info[:2]
|
||||
if (major, minor) >= (3, 11):
|
||||
_ok(label, f"Python {major}.{minor} (wheel requires >=3.11)")
|
||||
return "ok"
|
||||
_fail(
|
||||
label,
|
||||
f"Python {major}.{minor} is below the wheel's >=3.11 floor",
|
||||
"upgrade Python (brew install python@3.12 / apt install python3.12) "
|
||||
"or run molecule-mcp via a 3.11+ venv.",
|
||||
)
|
||||
return "fail"
|
||||
|
||||
|
||||
def check_wheel_install() -> Verdict:
|
||||
label = "Wheel install"
|
||||
try:
|
||||
version = importlib.metadata.version("molecule-ai-workspace-runtime")
|
||||
except importlib.metadata.PackageNotFoundError:
|
||||
_fail(
|
||||
label,
|
||||
"molecule-ai-workspace-runtime not found in this interpreter's site-packages",
|
||||
"pip install molecule-ai-workspace-runtime "
|
||||
"(or pipx install molecule-ai-workspace-runtime to get the "
|
||||
"binary on PATH automatically).",
|
||||
)
|
||||
return "fail"
|
||||
try:
|
||||
importlib.import_module("molecule_runtime.mcp_cli")
|
||||
except ImportError as e:
|
||||
_fail(
|
||||
label,
|
||||
f"package found ({version}) but `molecule_runtime.mcp_cli` won't import: {e}",
|
||||
"reinstall the wheel (pip install --force-reinstall "
|
||||
"molecule-ai-workspace-runtime); if it still fails, file "
|
||||
"a bug with the traceback.",
|
||||
)
|
||||
return "fail"
|
||||
_ok(label, f"molecule-ai-workspace-runtime=={version}")
|
||||
return "ok"
|
||||
|
||||
|
||||
def check_path_for_binary() -> Verdict:
|
||||
label = "PATH for molecule-mcp"
|
||||
found = shutil.which("molecule-mcp")
|
||||
if found:
|
||||
_ok(label, f"resolves to {found}")
|
||||
return "ok"
|
||||
# Not on PATH — work out where pip put it so the suggestion is
|
||||
# actionable instead of generic.
|
||||
user_base = os.environ.get("PYTHONUSERBASE")
|
||||
if not user_base:
|
||||
try:
|
||||
import site
|
||||
user_base = site.getuserbase()
|
||||
except Exception:
|
||||
user_base = None
|
||||
hint = (
|
||||
f"add `{user_base}/bin` to PATH"
|
||||
if user_base
|
||||
else "switch to `pipx install molecule-ai-workspace-runtime` so the "
|
||||
"binary lands in pipx's managed bin/ on PATH"
|
||||
)
|
||||
_fail(
|
||||
label,
|
||||
"molecule-mcp not found on PATH",
|
||||
f"{hint}, or invoke via `python -m molecule_runtime.mcp_cli` directly.",
|
||||
)
|
||||
return "fail"
|
||||
|
||||
|
||||
def _resolve_token() -> tuple[Optional[str], Optional[str]]:
|
||||
"""Return ``(token_value, source_label)`` if the operator's
|
||||
environment exposes a token, else ``(None, None)``.
|
||||
|
||||
Single source of truth used by both ``check_env_vars()`` (which
|
||||
only needs the source label) and ``check_register()`` (which
|
||||
needs the actual value to send a Bearer header). Keeping these
|
||||
in one place means a future env-var addition only updates the
|
||||
resolver — not two parallel readers that can drift.
|
||||
"""
|
||||
val = os.environ.get("MOLECULE_WORKSPACE_TOKEN", "").strip()
|
||||
if val:
|
||||
return val, "env MOLECULE_WORKSPACE_TOKEN"
|
||||
file_var = os.environ.get("MOLECULE_WORKSPACE_TOKEN_FILE", "").strip()
|
||||
if file_var:
|
||||
if os.path.isfile(file_var):
|
||||
try:
|
||||
from pathlib import Path as _Path
|
||||
return (
|
||||
_Path(file_var).read_text().strip(),
|
||||
f"file {file_var} (via MOLECULE_WORKSPACE_TOKEN_FILE)",
|
||||
)
|
||||
except OSError:
|
||||
return None, None
|
||||
return None, None
|
||||
# Per-runtime container path used by the in-platform path; rarely
|
||||
# set on external setups but check anyway so the message is
|
||||
# accurate for both shapes.
|
||||
try:
|
||||
import configs_dir
|
||||
candidate = configs_dir.resolve() / ".auth_token"
|
||||
if candidate.is_file():
|
||||
try:
|
||||
return candidate.read_text().strip(), f"file {candidate}"
|
||||
except OSError:
|
||||
return None, None
|
||||
except Exception:
|
||||
pass
|
||||
return None, None
|
||||
|
||||
|
||||
def _resolve_token_summary() -> Optional[str]:
|
||||
"""Return just the source label (no secret value). Convenience
|
||||
wrapper around :func:`_resolve_token` for callers that don't
|
||||
need the value itself.
|
||||
"""
|
||||
_, label = _resolve_token()
|
||||
return label
|
||||
|
||||
|
||||
def check_env_vars() -> Verdict:
|
||||
label = "Env vars"
|
||||
missing: list[str] = []
|
||||
if not os.environ.get("PLATFORM_URL", "").strip():
|
||||
missing.append("PLATFORM_URL")
|
||||
if not os.environ.get("WORKSPACE_ID", "").strip() and not os.environ.get(
|
||||
"MOLECULE_WORKSPACES", "",
|
||||
).strip():
|
||||
missing.append("WORKSPACE_ID (or MOLECULE_WORKSPACES)")
|
||||
token_summary = _resolve_token_summary()
|
||||
if not token_summary and not os.environ.get("MOLECULE_WORKSPACES", "").strip():
|
||||
# MOLECULE_WORKSPACES is a JSON-array env that bundles its
|
||||
# own per-workspace tokens — if it's set we trust the
|
||||
# resolver to validate.
|
||||
missing.append(
|
||||
"MOLECULE_WORKSPACE_TOKEN (or MOLECULE_WORKSPACE_TOKEN_FILE, or "
|
||||
"/configs/.auth_token)",
|
||||
)
|
||||
if missing:
|
||||
_fail(
|
||||
label,
|
||||
f"unset: {', '.join(missing)}",
|
||||
"see the canvas Connect-External-Agent modal — the snippet "
|
||||
"exports all three. Use MOLECULE_WORKSPACE_TOKEN_FILE for the "
|
||||
"token to keep secrets out of shell history.",
|
||||
)
|
||||
return "fail"
|
||||
_ok(
|
||||
label,
|
||||
f"PLATFORM_URL + WORKSPACE_ID set; token from {token_summary or 'MOLECULE_WORKSPACES'}",
|
||||
)
|
||||
return "ok"
|
||||
|
||||
|
||||
def _http_get(url: str, timeout: float = 5.0) -> tuple[Optional[int], Optional[str]]:
|
||||
"""Best-effort GET that swallows transport errors and returns
|
||||
(status, error_message). Status is None when the request couldn't
|
||||
complete; error_message is None when the request returned 2xx.
|
||||
"""
|
||||
try:
|
||||
# Origin header — staging tenants enforce same-origin via WAF;
|
||||
# /healthz tolerates either way but matching production headers
|
||||
# surfaces auth-style 401s correctly during the doctor run.
|
||||
req = urllib_request.Request(
|
||||
url,
|
||||
headers={"Origin": os.environ.get("PLATFORM_URL", "").rstrip("/")},
|
||||
)
|
||||
with urllib_request.urlopen(req, timeout=timeout) as resp:
|
||||
return resp.status, None
|
||||
except URLError as e:
|
||||
return None, str(e.reason if hasattr(e, "reason") else e)
|
||||
except Exception as e:
|
||||
return None, str(e)
|
||||
|
||||
|
||||
def check_platform_health() -> Verdict:
|
||||
label = "Platform reachability"
|
||||
base = os.environ.get("PLATFORM_URL", "").strip().rstrip("/")
|
||||
if not base:
|
||||
_warn(label, "skipped (PLATFORM_URL unset — see Env vars)", "set PLATFORM_URL first")
|
||||
return "warn"
|
||||
if not base.startswith(("http://", "https://")):
|
||||
_fail(
|
||||
label,
|
||||
f"PLATFORM_URL missing scheme: {base!r}",
|
||||
"set PLATFORM_URL to include https:// — e.g. "
|
||||
"PLATFORM_URL=https://your-tenant.staging.moleculesai.app",
|
||||
)
|
||||
return "fail"
|
||||
if base.endswith("/"):
|
||||
_warn(
|
||||
label,
|
||||
"PLATFORM_URL has trailing slash (will be stripped automatically)",
|
||||
"remove the trailing slash to match the snippet shape",
|
||||
)
|
||||
status, err = _http_get(f"{base}/healthz")
|
||||
if status is None:
|
||||
_fail(label, f"GET {base}/healthz failed: {err}", "check DNS + firewall + scheme")
|
||||
return "fail"
|
||||
if not (200 <= status < 300):
|
||||
_fail(label, f"GET {base}/healthz returned HTTP {status}", "verify the tenant subdomain is correct + provisioned")
|
||||
return "fail"
|
||||
_ok(label, f"GET {base}/healthz → {status}")
|
||||
return "ok"
|
||||
|
||||
|
||||
def check_token_auth() -> Verdict:
|
||||
"""Light auth check via POST /registry/heartbeat.
|
||||
|
||||
Why heartbeat and not register: register is an UPSERT — sending
|
||||
it from doctor would clobber the workspace's actual agent_card
|
||||
(name, description, version) until the real agent next calls
|
||||
register. That's an invisible production-disruption: someone
|
||||
runs ``molecule-mcp doctor`` against a live workspace and the
|
||||
canvas briefly displays "doctor-probe" as the agent name.
|
||||
|
||||
Heartbeat only updates last_heartbeat_at (and clears
|
||||
awaiting_agent if needed) — that's exactly what a normal
|
||||
molecule-mcp boot does every 20s, so an extra heartbeat from
|
||||
the doctor is indistinguishable from background traffic.
|
||||
|
||||
Skipped when env vars failed earlier so the operator isn't shown
|
||||
a redundant 401.
|
||||
"""
|
||||
label = "Token auth"
|
||||
base = os.environ.get("PLATFORM_URL", "").strip().rstrip("/")
|
||||
workspace_id = os.environ.get("WORKSPACE_ID", "").strip()
|
||||
token, source_label = _resolve_token()
|
||||
if not (base and workspace_id and token):
|
||||
_warn(label, "skipped (Env vars must pass first)", "fix Env vars, re-run")
|
||||
return "warn"
|
||||
import json
|
||||
body = json.dumps({"id": workspace_id}).encode()
|
||||
req = urllib_request.Request(
|
||||
f"{base}/registry/heartbeat",
|
||||
data=body,
|
||||
method="POST",
|
||||
headers={
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json",
|
||||
"Origin": base,
|
||||
},
|
||||
)
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=8.0) as resp:
|
||||
status = resp.status
|
||||
except URLError as e:
|
||||
# Pull HTTP code from HTTPError; transport errors don't have one.
|
||||
status = getattr(e, "code", None)
|
||||
err = str(e.reason if hasattr(e, "reason") else e)
|
||||
if status is None:
|
||||
_fail(label, f"POST {base}/registry/heartbeat failed: {err}", "check network")
|
||||
return "fail"
|
||||
except Exception as e:
|
||||
_fail(label, f"POST heartbeat failed: {e}", "check network")
|
||||
return "fail"
|
||||
if status == 401:
|
||||
_fail(
|
||||
label,
|
||||
"401 Unauthorized — token rejected",
|
||||
"tokens are shown only once at workspace-create time; "
|
||||
"re-create the workspace OR rotate via canvas Tokens tab.",
|
||||
)
|
||||
return "fail"
|
||||
if status == 404:
|
||||
_fail(
|
||||
label,
|
||||
f"404 — workspace_id {workspace_id} not found on {base}",
|
||||
"verify WORKSPACE_ID matches a real workspace + the tenant "
|
||||
"subdomain in PLATFORM_URL.",
|
||||
)
|
||||
return "fail"
|
||||
if not (200 <= status < 300):
|
||||
_fail(label, f"POST heartbeat returned HTTP {status}", "see platform logs")
|
||||
return "fail"
|
||||
_ok(label, f"POST {base}/registry/heartbeat → {status} (token from {source_label})")
|
||||
return "ok"
|
||||
|
||||
|
||||
# Back-compat alias: the previous name was check_register, but the
|
||||
# implementation switched to a non-mutating heartbeat probe (see
|
||||
# check_token_auth's docstring). Kept so external test suites or
|
||||
# pinned-import scripts don't break on the rename.
|
||||
check_register = check_token_auth
|
||||
|
||||
|
||||
CHECKS = [
|
||||
check_python_version,
|
||||
check_wheel_install,
|
||||
check_path_for_binary,
|
||||
check_env_vars,
|
||||
check_platform_health,
|
||||
check_token_auth,
|
||||
]
|
||||
|
||||
|
||||
def run() -> int:
|
||||
"""Run all checks and return a process exit code (0 ok, 1 if any fail)."""
|
||||
print("molecule-mcp doctor — onboarding diagnostic")
|
||||
print()
|
||||
verdicts = []
|
||||
for chk in CHECKS:
|
||||
try:
|
||||
verdicts.append(chk())
|
||||
except Exception as e:
|
||||
# A buggy check shouldn't kill the rest of the doctor run.
|
||||
print(f" [BUG] {chk.__name__}: unexpected {type(e).__name__}: {e}")
|
||||
verdicts.append("fail")
|
||||
print()
|
||||
fails = sum(1 for v in verdicts if v == "fail")
|
||||
warns = sum(1 for v in verdicts if v == "warn")
|
||||
if fails:
|
||||
print(f"{fails} check(s) failed, {warns} warning(s). Fix the FAIL items above and re-run.")
|
||||
return 1
|
||||
if warns:
|
||||
print(f"All required checks passed; {warns} warning(s) — review the next-step hints.")
|
||||
return 0
|
||||
print("All checks passed.")
|
||||
return 0
|
||||
@@ -0,0 +1,325 @@
|
||||
"""Heartbeat + register thread for the standalone ``molecule-mcp`` wrapper.
|
||||
|
||||
Extracted from ``mcp_cli.py`` (RFC #2873 iter 3) so the heartbeat /
|
||||
register concern lives in its own module. The console-script entry
|
||||
``mcp_cli:main`` still drives the spawn, but the loop body, auth-failure
|
||||
escalation, and inbound-secret persistence now live here so they can be
|
||||
read, tested, and replaced independently of the orchestrator.
|
||||
|
||||
Public surface:
|
||||
|
||||
* ``HEARTBEAT_INTERVAL_SECONDS`` — cadence constant.
|
||||
* ``build_agent_card(workspace_id)`` — payload helper.
|
||||
* ``platform_register(platform_url, workspace_id, token)`` — one-shot
|
||||
POST /registry/register at startup.
|
||||
* ``start_heartbeat_thread(platform_url, workspace_id, token)`` — spawn
|
||||
the daemon thread.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Heartbeat cadence. Must be tighter than healthsweep's stale window
|
||||
# (currently 60-90s — see registry/healthsweep.go) by a comfortable
|
||||
# margin so a single missed heartbeat doesn't flip awaiting_agent.
|
||||
# 20s gives the operator's network 3 attempts within the budget; long
|
||||
# enough that it doesn't spam, short enough to recover quickly after
|
||||
# laptop sleep.
|
||||
HEARTBEAT_INTERVAL_SECONDS = 20.0
|
||||
|
||||
# After this many consecutive 401/403 heartbeats, escalate from
|
||||
# WARNING to ERROR with re-onboard guidance. 3 ticks at 20s = ~1 minute
|
||||
# of sustained auth failure — enough to rule out a transient platform
|
||||
# blip but quick enough that an operator doesn't sit puzzled for 10
|
||||
# minutes wondering why their MCP tools 401. Same threshold used for
|
||||
# repeat-logging at 20-tick (~7 min) intervals so a long-running
|
||||
# session that missed the first ERROR still sees the message.
|
||||
HEARTBEAT_AUTH_LOUD_THRESHOLD = 3
|
||||
HEARTBEAT_AUTH_RELOG_INTERVAL = 20
|
||||
|
||||
|
||||
def build_agent_card(workspace_id: str) -> dict:
|
||||
"""Build the ``agent_card`` payload sent to /registry/register.
|
||||
|
||||
Three optional env vars override the defaults so an operator can
|
||||
surface human-readable identity + capabilities to peers and the
|
||||
canvas Skills tab without code changes:
|
||||
|
||||
* ``MOLECULE_AGENT_NAME`` — display name (defaults to
|
||||
``molecule-mcp-{id[:8]}``). Surfaced in canvas workspace cards
|
||||
and ``list_peers`` output.
|
||||
* ``MOLECULE_AGENT_DESCRIPTION`` — one-liner about the agent's
|
||||
purpose. Rendered in canvas Details + Skills tabs.
|
||||
* ``MOLECULE_AGENT_SKILLS`` — comma-separated skill names
|
||||
(e.g. ``research,code-review,memory-curation``). Each name is
|
||||
expanded to a ``{"name": ...}`` skill object — the minimum
|
||||
shape that satisfies both ``shared_runtime.summarize_peers``
|
||||
(uses ``s["name"]``) and the canvas SkillsTab.tsx schema
|
||||
(id falls back to name when omitted). Empty / whitespace
|
||||
entries are dropped.
|
||||
|
||||
Defaults match the previous hardcoded behaviour exactly so this
|
||||
is a strict superset — an operator who sets none of the env vars
|
||||
sees no change.
|
||||
"""
|
||||
name = (os.environ.get("MOLECULE_AGENT_NAME") or "").strip()
|
||||
if not name:
|
||||
name = f"molecule-mcp-{workspace_id[:8]}"
|
||||
|
||||
description = (os.environ.get("MOLECULE_AGENT_DESCRIPTION") or "").strip()
|
||||
|
||||
skills_raw = (os.environ.get("MOLECULE_AGENT_SKILLS") or "").strip()
|
||||
skills: list[dict] = []
|
||||
if skills_raw:
|
||||
for s in skills_raw.split(","):
|
||||
label = s.strip()
|
||||
if label:
|
||||
skills.append({"name": label})
|
||||
|
||||
card: dict = {"name": name, "skills": skills}
|
||||
if description:
|
||||
card["description"] = description
|
||||
return card
|
||||
|
||||
|
||||
def platform_register(platform_url: str, workspace_id: str, token: str) -> None:
|
||||
"""One-shot register at startup; fails fast on auth errors.
|
||||
|
||||
Lifts the workspace from ``awaiting_agent`` to ``online`` for
|
||||
operators who never ran the curl-register snippet. Safe to call
|
||||
repeatedly: the platform's register handler is an upsert that
|
||||
just refreshes ``url``, ``agent_card``, and ``status``.
|
||||
|
||||
Failure model (post-review):
|
||||
- 401 / 403 → ``sys.exit(3)`` immediately. The operator's
|
||||
token is wrong; silently looping in a broken state would
|
||||
make this hard to diagnose because the MCP tools would 401
|
||||
on every call too. Hard-fail is the kindest option.
|
||||
- Other 4xx/5xx → log a warning + continue. The heartbeat
|
||||
thread will surface persistent failures; transient platform
|
||||
blips shouldn't abort the MCP loop.
|
||||
- Network / transport errors → log + continue. Same reasoning.
|
||||
|
||||
Origin header is required by the SaaS edge WAF; without it
|
||||
/registry/register currently still works (it's on the WAF
|
||||
allowlist), but the heartbeat path needs Origin and we want one
|
||||
consistent header set across both calls.
|
||||
"""
|
||||
try:
|
||||
import httpx
|
||||
except ImportError:
|
||||
# httpx is a transitive dep via a2a-sdk; if missing, the MCP
|
||||
# server won't import either. Let the caller's later import
|
||||
# surface the real error.
|
||||
return
|
||||
|
||||
payload = {
|
||||
"id": workspace_id,
|
||||
"url": "",
|
||||
"agent_card": build_agent_card(workspace_id),
|
||||
"delivery_mode": "poll",
|
||||
}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Origin": platform_url,
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
try:
|
||||
with httpx.Client(timeout=10.0) as client:
|
||||
resp = client.post(
|
||||
f"{platform_url}/registry/register",
|
||||
json=payload,
|
||||
headers=headers,
|
||||
)
|
||||
if resp.status_code in (401, 403):
|
||||
print(
|
||||
f"molecule-mcp: register rejected with HTTP {resp.status_code} — "
|
||||
f"the token in MOLECULE_WORKSPACE_TOKEN is invalid for workspace "
|
||||
f"{workspace_id}. Regenerate from the canvas → Tokens tab.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(3)
|
||||
if resp.status_code >= 400:
|
||||
logger.warning(
|
||||
"molecule-mcp: register POST returned HTTP %d: %s",
|
||||
resp.status_code,
|
||||
(resp.text or "")[:200],
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"molecule-mcp: registered workspace %s with platform",
|
||||
workspace_id,
|
||||
)
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning("molecule-mcp: register POST failed: %s", exc)
|
||||
|
||||
|
||||
def heartbeat_loop(
|
||||
platform_url: str,
|
||||
workspace_id: str,
|
||||
token: str,
|
||||
interval: float = HEARTBEAT_INTERVAL_SECONDS,
|
||||
) -> None:
|
||||
"""Daemon thread body: POST /registry/heartbeat every ``interval``s.
|
||||
|
||||
Failures are logged at WARNING and the loop continues. The thread
|
||||
exits when the main process does (daemon=True). Each iteration
|
||||
rebuilds the payload + headers — cheap and ensures token rotation
|
||||
via env var (rare but possible) is picked up on the next tick.
|
||||
"""
|
||||
try:
|
||||
import httpx
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
start_time = time.time()
|
||||
consecutive_auth_failures = 0
|
||||
while True:
|
||||
body = {
|
||||
"workspace_id": workspace_id,
|
||||
"error_rate": 0.0,
|
||||
"sample_error": "",
|
||||
"active_tasks": 0,
|
||||
"uptime_seconds": int(time.time() - start_time),
|
||||
}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Origin": platform_url,
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
try:
|
||||
with httpx.Client(timeout=10.0) as client:
|
||||
resp = client.post(
|
||||
f"{platform_url}/registry/heartbeat",
|
||||
json=body,
|
||||
headers=headers,
|
||||
)
|
||||
if resp.status_code in (401, 403):
|
||||
consecutive_auth_failures += 1
|
||||
log_heartbeat_auth_failure(
|
||||
consecutive_auth_failures, workspace_id, resp.status_code,
|
||||
)
|
||||
elif resp.status_code >= 400:
|
||||
# Non-auth HTTP error — log, but DO NOT touch the
|
||||
# auth-failure counter (5xx blips, 429, etc. are
|
||||
# transient and unrelated to token validity).
|
||||
logger.warning(
|
||||
"molecule-mcp: heartbeat HTTP %d: %s",
|
||||
resp.status_code,
|
||||
(resp.text or "")[:200],
|
||||
)
|
||||
else:
|
||||
consecutive_auth_failures = 0
|
||||
persist_inbound_secret_from_heartbeat(resp)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning("molecule-mcp: heartbeat failed: %s", exc)
|
||||
time.sleep(interval)
|
||||
|
||||
|
||||
def log_heartbeat_auth_failure(count: int, workspace_id: str, status_code: int) -> None:
|
||||
"""Escalate consecutive heartbeat 401/403s from quiet WARNING to
|
||||
actionable ERROR.
|
||||
|
||||
The operator's first sign of trouble shouldn't be "tools 401 with no
|
||||
explanation" — that was the failure mode that motivated this code,
|
||||
triggered by a workspace being deleted server-side and its tokens
|
||||
revoked while the runtime kept heartbeating in silence.
|
||||
|
||||
Cadence:
|
||||
* count < threshold: WARNING per tick (transient — could be a
|
||||
platform blip, don't shout yet)
|
||||
* count == threshold: ERROR with re-onboard instructions
|
||||
(the first signal the operator can't miss)
|
||||
* count > threshold and (count - threshold) % relog == 0: re-log
|
||||
ERROR (so a session that started after the first ERROR still
|
||||
sees the message scrolling past in their logs)
|
||||
"""
|
||||
if count < HEARTBEAT_AUTH_LOUD_THRESHOLD:
|
||||
logger.warning(
|
||||
"molecule-mcp: heartbeat HTTP %d (auth failure %d/%d) — "
|
||||
"token may be revoked. Will retry; if persistent, regenerate "
|
||||
"from canvas → Tokens.",
|
||||
status_code, count, HEARTBEAT_AUTH_LOUD_THRESHOLD,
|
||||
)
|
||||
return
|
||||
# At or past the threshold — this is the loud actionable error.
|
||||
if count == HEARTBEAT_AUTH_LOUD_THRESHOLD or (
|
||||
count - HEARTBEAT_AUTH_LOUD_THRESHOLD
|
||||
) % HEARTBEAT_AUTH_RELOG_INTERVAL == 0:
|
||||
logger.error(
|
||||
"molecule-mcp: %d consecutive heartbeat auth failures (HTTP %d) — "
|
||||
"the token in MOLECULE_WORKSPACE_TOKEN has been REVOKED, likely "
|
||||
"because workspace %s was deleted server-side. The MCP server is "
|
||||
"still running but every platform call will fail. Regenerate the "
|
||||
"workspace + token from the canvas (Tokens tab), update your MCP "
|
||||
"config, and restart your runtime.",
|
||||
count, status_code, workspace_id,
|
||||
)
|
||||
|
||||
|
||||
def persist_inbound_secret_from_heartbeat(resp: object) -> None:
|
||||
"""Persist ``platform_inbound_secret`` from a heartbeat response, if any.
|
||||
|
||||
The platform's heartbeat handler returns the secret on every beat
|
||||
(mirroring /registry/register) so a workspace that lazy-healed the
|
||||
secret on the platform side — typical recovery path for a workspace
|
||||
whose row had a NULL ``platform_inbound_secret`` after a partial
|
||||
bootstrap — picks it up within one heartbeat tick instead of
|
||||
requiring a runtime restart.
|
||||
|
||||
Without this delivery path the chat-upload code path's "secret was
|
||||
just minted, will pick up on next heartbeat" 503 message is a lie
|
||||
and the workspace stays 401-forever until the operator restarts
|
||||
the runtime. Caught 2026-04-30 on hongmingwang tenant.
|
||||
|
||||
Failure is non-fatal: if the body isn't JSON, doesn't carry the
|
||||
field, or the disk write fails, the next heartbeat retries. This
|
||||
matches the cold-start register flow in main.py:319-323.
|
||||
"""
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception: # noqa: BLE001
|
||||
return
|
||||
if not isinstance(body, dict):
|
||||
return
|
||||
secret = body.get("platform_inbound_secret")
|
||||
if not secret:
|
||||
return
|
||||
try:
|
||||
from platform_inbound_auth import save_inbound_secret
|
||||
|
||||
save_inbound_secret(secret)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning(
|
||||
"molecule-mcp: persist inbound secret from heartbeat failed: %s", exc
|
||||
)
|
||||
|
||||
|
||||
def start_heartbeat_thread(
|
||||
platform_url: str,
|
||||
workspace_id: str,
|
||||
token: str,
|
||||
) -> threading.Thread:
|
||||
"""Start the heartbeat daemon thread. Returns the Thread handle.
|
||||
|
||||
The MCP stdio loop runs in the foreground (asyncio); this thread
|
||||
runs alongside it. ``daemon=True`` so when the operator hits
|
||||
Ctrl-C / closes the runtime, the heartbeat dies with it instead
|
||||
of leaking and writing to a stale workspace.
|
||||
"""
|
||||
t = threading.Thread(
|
||||
target=heartbeat_loop,
|
||||
args=(platform_url, workspace_id, token),
|
||||
name="molecule-mcp-heartbeat",
|
||||
daemon=True,
|
||||
)
|
||||
t.start()
|
||||
return t
|
||||
@@ -0,0 +1,63 @@
|
||||
"""Inbox-poller spawn helpers for the standalone ``molecule-mcp`` wrapper.
|
||||
|
||||
Extracted from ``mcp_cli.py`` (RFC #2873 iter 3). The poller is the
|
||||
INBOUND side of the standalone path — without it, the universal MCP
|
||||
server is outbound-only (can call ``delegate_task`` /
|
||||
``send_message_to_user``, never observes canvas-user / peer-agent
|
||||
messages).
|
||||
|
||||
Public surface:
|
||||
|
||||
* ``start_inbox_pollers(platform_url, workspace_ids)`` — activate the
|
||||
inbox singleton and spawn one daemon poller per workspace.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def start_inbox_pollers(platform_url: str, workspace_ids: list[str]) -> None:
|
||||
"""Activate the inbox singleton + spawn one poller daemon thread per workspace.
|
||||
|
||||
Done lazily here (not at module import) because importing inbox
|
||||
pulls in platform_auth, which only resolves cleanly AFTER env
|
||||
validation succeeds. Activation is idempotent within a process,
|
||||
so a stray double-call (e.g. test harness re-entering main) is
|
||||
harmless.
|
||||
|
||||
The poller threads are daemon=True — die with the main process.
|
||||
|
||||
Single-workspace path: one poller, single cursor file at the legacy
|
||||
location (``.mcp_inbox_cursor``). Cursor-key resolution falls back
|
||||
to the empty string for back-compat with operators whose existing
|
||||
on-disk cursor was written by the pre-multi-workspace code.
|
||||
|
||||
Multi-workspace path: N pollers, each with its own cursor file
|
||||
keyed by ``workspace_id[:8]``. Cursors live next to each other in
|
||||
configs_dir so an operator inspecting state sees all of them
|
||||
together.
|
||||
"""
|
||||
try:
|
||||
import inbox
|
||||
except ImportError as exc:
|
||||
logger.warning("molecule-mcp: inbox module unavailable: %s", exc)
|
||||
return
|
||||
|
||||
if len(workspace_ids) <= 1:
|
||||
# Back-compat exact: single-workspace mode reuses the legacy
|
||||
# cursor filename + cursor_path constructor arg, so an existing
|
||||
# operator's on-disk state isn't invalidated by upgrade.
|
||||
wsid = workspace_ids[0]
|
||||
state = inbox.InboxState(cursor_path=inbox.default_cursor_path())
|
||||
inbox.activate(state)
|
||||
inbox.start_poller_thread(state, platform_url, wsid)
|
||||
return
|
||||
|
||||
# Multi-workspace: per-workspace cursor file, one shared queue.
|
||||
cursor_paths = {wsid: inbox.default_cursor_path(wsid) for wsid in workspace_ids}
|
||||
state = inbox.InboxState(cursor_paths=cursor_paths)
|
||||
inbox.activate(state)
|
||||
for wsid in workspace_ids:
|
||||
inbox.start_poller_thread(state, platform_url, wsid)
|
||||
@@ -0,0 +1,240 @@
|
||||
"""Env validation + workspace resolution for the standalone ``molecule-mcp``.
|
||||
|
||||
Extracted from ``mcp_cli.py`` (RFC #2873 iter 3). Deals with the two
|
||||
shapes ``molecule-mcp`` accepts:
|
||||
|
||||
* Single-workspace legacy shape: ``WORKSPACE_ID`` + token from
|
||||
``MOLECULE_WORKSPACE_TOKEN`` or ``${CONFIGS_DIR}/.auth_token``.
|
||||
* Multi-workspace JSON shape: ``MOLECULE_WORKSPACES`` env var carries a
|
||||
JSON array of ``{"id": ..., "token": ...}`` entries.
|
||||
|
||||
Public surface:
|
||||
|
||||
* ``resolve_workspaces()`` → ``(workspaces, errors)``.
|
||||
* ``read_token_file()`` → token text or ``""``.
|
||||
* ``print_missing_env_help(missing, have_token_file)`` — operator-help
|
||||
printer.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import configs_dir
|
||||
|
||||
|
||||
def resolve_workspaces() -> tuple[list[tuple[str, str]], list[str]]:
|
||||
"""Return the list of ``(workspace_id, token)`` pairs to register.
|
||||
|
||||
Resolution order:
|
||||
|
||||
1. ``MOLECULE_WORKSPACES`` env var — JSON array of
|
||||
``{"id": "...", "token": "..."}`` objects. Activates the
|
||||
multi-workspace external-agent path (one process registered into
|
||||
N workspaces). When set, ``WORKSPACE_ID`` / ``MOLECULE_WORKSPACE_TOKEN``
|
||||
are IGNORED — the JSON is the source of truth.
|
||||
|
||||
2. Single-workspace fallback — ``WORKSPACE_ID`` env var + token
|
||||
resolved in this order:
|
||||
a. ``MOLECULE_WORKSPACE_TOKEN`` (inline env — convenient but
|
||||
leaks into shell history + plaintext MCP-host config).
|
||||
b. ``MOLECULE_WORKSPACE_TOKEN_FILE`` (path to a file holding
|
||||
the token — operator can keep it 0600 in their home dir;
|
||||
survives shell-history scrubs).
|
||||
c. ``${CONFIGS_DIR}/.auth_token`` (in-container runtimes —
|
||||
the platform writes this on provision).
|
||||
|
||||
Returns ``(workspaces, errors)``:
|
||||
* ``workspaces``: list of ``(workspace_id, token)`` — non-empty
|
||||
on the happy path.
|
||||
* ``errors``: human-readable strings describing what's missing /
|
||||
malformed. ``main()`` surfaces these with the same shape as
|
||||
``print_missing_env_help`` so the operator's first run gives
|
||||
actionable output.
|
||||
|
||||
Why JSON env (not file): ergonomic for Claude Code MCP config (one
|
||||
string in ``mcpServers.molecule.env`` instead of a sidecar file)
|
||||
and for CI / launchers. A separate config-file path can be added
|
||||
later without breaking this.
|
||||
"""
|
||||
raw = os.environ.get("MOLECULE_WORKSPACES", "").strip()
|
||||
if raw:
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
except json.JSONDecodeError as exc:
|
||||
return [], [
|
||||
f"MOLECULE_WORKSPACES is not valid JSON ({exc.msg} at pos "
|
||||
f"{exc.pos}). Expected: '[{{\"id\":\"<wsid>\",\"token\":"
|
||||
f"\"<tok>\"}},{{...}}]'"
|
||||
]
|
||||
if not isinstance(parsed, list) or not parsed:
|
||||
return [], [
|
||||
"MOLECULE_WORKSPACES must be a non-empty JSON array of "
|
||||
"{\"id\":\"...\",\"token\":\"...\"} objects"
|
||||
]
|
||||
out: list[tuple[str, str]] = []
|
||||
seen: set[str] = set()
|
||||
errors: list[str] = []
|
||||
for i, entry in enumerate(parsed):
|
||||
if not isinstance(entry, dict):
|
||||
errors.append(
|
||||
f"MOLECULE_WORKSPACES[{i}] is not an object — got {type(entry).__name__}"
|
||||
)
|
||||
continue
|
||||
wsid = str(entry.get("id", "")).strip()
|
||||
tok = str(entry.get("token", "")).strip()
|
||||
if not wsid or not tok:
|
||||
errors.append(
|
||||
f"MOLECULE_WORKSPACES[{i}] missing 'id' or 'token'"
|
||||
)
|
||||
continue
|
||||
if wsid in seen:
|
||||
errors.append(
|
||||
f"MOLECULE_WORKSPACES[{i}] duplicate workspace id {wsid!r}"
|
||||
)
|
||||
continue
|
||||
seen.add(wsid)
|
||||
out.append((wsid, tok))
|
||||
if errors:
|
||||
return [], errors
|
||||
return out, []
|
||||
|
||||
# Single-workspace back-compat path.
|
||||
wsid = os.environ.get("WORKSPACE_ID", "").strip()
|
||||
if not wsid:
|
||||
return [], ["WORKSPACE_ID (or MOLECULE_WORKSPACES) is required"]
|
||||
# Token resolution order (#2934): inline env → file path → CONFIGS_DIR
|
||||
# default. The file-path option exists so operators can keep the
|
||||
# bearer out of shell history and out of MCP-host config plaintext
|
||||
# (e.g. ~/.claude.json) — set MOLECULE_WORKSPACE_TOKEN_FILE to a
|
||||
# 0600 file containing the token. The CONFIGS_DIR/.auth_token
|
||||
# fallback predates this and stays for in-container runtimes.
|
||||
tok = os.environ.get("MOLECULE_WORKSPACE_TOKEN", "").strip()
|
||||
if not tok:
|
||||
tok, tf_err = _read_token_from_file_env()
|
||||
if tf_err:
|
||||
# Operator explicitly pointed TOKEN_FILE somewhere — surface
|
||||
# the SPECIFIC failure (path doesn't exist, isn't readable,
|
||||
# or holds a blank file) instead of falling through to the
|
||||
# generic "set one of these three vars" message. Otherwise
|
||||
# they get exactly the silent failure mode #2934 flagged
|
||||
# ("a new user has no chance"). Skip the CONFIGS_DIR
|
||||
# fallback in this case — the operator's intent is clearly
|
||||
# to use the file path; deferring to a different source
|
||||
# would mask their config error.
|
||||
return [], [tf_err]
|
||||
if not tok:
|
||||
tok = read_token_file()
|
||||
if not tok:
|
||||
return [], [
|
||||
"MOLECULE_WORKSPACE_TOKEN, MOLECULE_WORKSPACE_TOKEN_FILE, or "
|
||||
"CONFIGS_DIR/.auth_token is required"
|
||||
]
|
||||
return [(wsid, tok)], []
|
||||
|
||||
|
||||
def _read_token_from_file_env() -> tuple[str, str]:
|
||||
"""Read the token from the file path in MOLECULE_WORKSPACE_TOKEN_FILE.
|
||||
|
||||
Returns ``(token, error)``:
|
||||
* env var unset/blank → ``("", "")`` — caller falls through silently
|
||||
to the next source; the operator didn't ask for this path.
|
||||
* file open/read fails (missing, permission denied, decode error)
|
||||
→ ``("", "<specific error>")`` — caller surfaces it directly.
|
||||
The operator EXPLICITLY pointed at this path, so a generic
|
||||
fallthrough error would mask their config bug (#2934).
|
||||
* file is blank → ``("", "<blank file error>")`` — same reasoning.
|
||||
* file read returns junk with internal whitespace/newlines (e.g.
|
||||
a CSV cell, accidental multi-token paste) → ``("", "<error>")``
|
||||
rather than concatenating into a malformed bearer that 401s
|
||||
against the platform with no context.
|
||||
* happy path → ``("<token>", "")``.
|
||||
"""
|
||||
path = os.environ.get("MOLECULE_WORKSPACE_TOKEN_FILE", "").strip()
|
||||
if not path:
|
||||
return "", ""
|
||||
try:
|
||||
with open(path, encoding="utf-8") as fh:
|
||||
raw = fh.read()
|
||||
except FileNotFoundError:
|
||||
return "", (
|
||||
f"MOLECULE_WORKSPACE_TOKEN_FILE points to {path!r} which "
|
||||
f"does not exist"
|
||||
)
|
||||
except PermissionError:
|
||||
return "", (
|
||||
f"MOLECULE_WORKSPACE_TOKEN_FILE={path!r} is not readable "
|
||||
f"(permission denied)"
|
||||
)
|
||||
except OSError as exc:
|
||||
return "", (
|
||||
f"MOLECULE_WORKSPACE_TOKEN_FILE={path!r} could not be read: "
|
||||
f"{exc}"
|
||||
)
|
||||
except UnicodeDecodeError:
|
||||
return "", (
|
||||
f"MOLECULE_WORKSPACE_TOKEN_FILE={path!r} is not valid UTF-8"
|
||||
)
|
||||
tok = raw.strip()
|
||||
if not tok:
|
||||
return "", (
|
||||
f"MOLECULE_WORKSPACE_TOKEN_FILE={path!r} is empty"
|
||||
)
|
||||
# Reject tokens with internal whitespace — a CSV cell or accidental
|
||||
# multi-token paste would otherwise become a malformed bearer that
|
||||
# 401s against the platform with no diagnostic.
|
||||
if any(ch.isspace() for ch in tok):
|
||||
return "", (
|
||||
f"MOLECULE_WORKSPACE_TOKEN_FILE={path!r} contains internal "
|
||||
f"whitespace — expected a single token"
|
||||
)
|
||||
return tok, ""
|
||||
|
||||
|
||||
def print_missing_env_help(missing: list[str], have_token_file: bool) -> None:
|
||||
print("molecule-mcp: missing required environment.\n", file=sys.stderr)
|
||||
print("Set the following before running molecule-mcp:", file=sys.stderr)
|
||||
print(" WORKSPACE_ID — your workspace UUID (from canvas)", file=sys.stderr)
|
||||
print(
|
||||
" PLATFORM_URL — base URL of your Molecule platform "
|
||||
"(e.g. https://your-tenant.staging.moleculesai.app)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
if not have_token_file:
|
||||
print(
|
||||
" MOLECULE_WORKSPACE_TOKEN — bearer token for this workspace "
|
||||
"(canvas → Tokens tab)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
" OR set MOLECULE_WORKSPACE_TOKEN_FILE"
|
||||
" to a path that holds the token",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
" (keeps the secret out of shell"
|
||||
" history and MCP-host config plaintext)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print("", file=sys.stderr)
|
||||
print(f"Currently missing: {', '.join(missing)}", file=sys.stderr)
|
||||
|
||||
|
||||
def read_token_file() -> str:
|
||||
"""Read the token from the resolved configs dir's ``.auth_token`` if
|
||||
present.
|
||||
|
||||
Mirrors platform_auth._token_file's location resolution but without
|
||||
importing the heavy module here (that import triggers a2a_client's
|
||||
WORKSPACE_ID guard which is fine after env validation, but cheaper
|
||||
to inline a 4-line file read than pull in the whole stack just for
|
||||
the path).
|
||||
"""
|
||||
path = configs_dir.resolve() / ".auth_token"
|
||||
if not path.is_file():
|
||||
return ""
|
||||
try:
|
||||
return path.read_text().strip()
|
||||
except OSError:
|
||||
return ""
|
||||
@@ -273,6 +273,87 @@ class TestSendA2AMessage:
|
||||
assert _TEST_PEER_ID in result
|
||||
assert "/workspaces/" in result and "/a2a" in result
|
||||
|
||||
async def test_poll_queued_envelope_returns_success_string(self):
|
||||
"""Issue #2967: workspace-server's poll-mode short-circuit returns
|
||||
{status:"queued", delivery_mode:"poll", method:...} when the peer
|
||||
has no URL to dispatch to. Pre-fix the bare send_a2a_message parser
|
||||
only knew about JSON-RPC {result, error} keys, so this fell through
|
||||
to the 'unexpected response shape' error path → callers retried,
|
||||
peer got duplicate delegations.
|
||||
|
||||
Pin: poll-queued envelope returns a clean success string that does
|
||||
NOT start with _A2A_ERROR_PREFIX, so callers route it through the
|
||||
normal-outcome path. Verified discriminating: assert_NOT_startswith
|
||||
the error prefix would FAIL on the old code (which returned an
|
||||
error-prefixed string) and PASSES on the new code.
|
||||
"""
|
||||
import a2a_client
|
||||
|
||||
resp = _make_response(200, {
|
||||
"status": "queued",
|
||||
"delivery_mode": "poll",
|
||||
"method": "message/send",
|
||||
})
|
||||
mock_client = _make_mock_client(post_resp=resp)
|
||||
|
||||
with patch("a2a_client.httpx.AsyncClient", return_value=mock_client):
|
||||
result = await a2a_client.send_a2a_message(_TEST_PEER_ID, "task")
|
||||
|
||||
# Discriminating: pre-fix returned a string that startswith
|
||||
# _A2A_ERROR_PREFIX, so this assertion would have FAILED on the
|
||||
# old code. New code returns a queued-success string.
|
||||
assert not result.startswith(a2a_client._A2A_ERROR_PREFIX), (
|
||||
f"poll-queued envelope must not be tagged as A2A error; got: {result!r}"
|
||||
)
|
||||
assert "queued" in result.lower()
|
||||
assert "poll" in result.lower()
|
||||
# The method is included so a structured-log scraper can route by
|
||||
# protocol verb if needed.
|
||||
assert "message/send" in result
|
||||
|
||||
async def test_poll_queued_envelope_with_other_method(self):
|
||||
"""Same envelope but a different a2a_method (the future could add
|
||||
message/sendStream or similar). Pin that the parser doesn't hardcode
|
||||
message/send — whatever method the server echoed is preserved.
|
||||
"""
|
||||
import a2a_client
|
||||
|
||||
resp = _make_response(200, {
|
||||
"status": "queued",
|
||||
"delivery_mode": "poll",
|
||||
"method": "message/sendStream",
|
||||
})
|
||||
mock_client = _make_mock_client(post_resp=resp)
|
||||
|
||||
with patch("a2a_client.httpx.AsyncClient", return_value=mock_client):
|
||||
result = await a2a_client.send_a2a_message(_TEST_PEER_ID, "task")
|
||||
|
||||
assert not result.startswith(a2a_client._A2A_ERROR_PREFIX)
|
||||
assert "message/sendStream" in result
|
||||
|
||||
async def test_status_queued_without_poll_mode_still_falls_through(self):
|
||||
"""Defensive: only the {status:"queued", delivery_mode:"poll"} pair
|
||||
triggers the queued-success branch. A response with status:"queued"
|
||||
but a different delivery_mode (or none) is still 'unexpected' —
|
||||
we don't want to silently swallow a future server bug that emits
|
||||
a partial envelope. Pin both keys are required.
|
||||
"""
|
||||
import a2a_client
|
||||
|
||||
resp = _make_response(200, {
|
||||
"status": "queued",
|
||||
# delivery_mode missing
|
||||
"method": "message/send",
|
||||
})
|
||||
mock_client = _make_mock_client(post_resp=resp)
|
||||
|
||||
with patch("a2a_client.httpx.AsyncClient", return_value=mock_client):
|
||||
result = await a2a_client.send_a2a_message(_TEST_PEER_ID, "task")
|
||||
|
||||
# Falls through — must STILL be tagged as error.
|
||||
assert result.startswith(a2a_client._A2A_ERROR_PREFIX)
|
||||
assert "unexpected response shape" in result
|
||||
|
||||
async def test_exception_returns_error_prefix_and_message(self):
|
||||
"""Network exception → returns _A2A_ERROR_PREFIX + exception text."""
|
||||
import a2a_client
|
||||
|
||||
@@ -241,7 +241,7 @@ class TestToolListPeersAggregation:
|
||||
return [{"id": "2222bbbb-2222-2222-2222-222222222222", "name": "bob", "status": "online", "role": "dev"}], None
|
||||
return [], None
|
||||
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
output = await a2a_tools.tool_list_peers()
|
||||
|
||||
assert "alice" in output
|
||||
@@ -263,7 +263,7 @@ class TestToolListPeersAggregation:
|
||||
assert source_workspace_id == a2a_client.WORKSPACE_ID
|
||||
return [{"id": "1111aaaa-1111-1111-1111-111111111111", "name": "alice", "status": "online", "role": "ops"}], None
|
||||
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
output = await a2a_tools.tool_list_peers()
|
||||
|
||||
assert "alice" in output
|
||||
@@ -286,7 +286,7 @@ class TestToolListPeersAggregation:
|
||||
seen.append(source_workspace_id)
|
||||
return [{"id": "1111aaaa-1111-1111-1111-111111111111", "name": "alice", "status": "online", "role": "ops"}], None
|
||||
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
output = await a2a_tools.tool_list_peers(source_workspace_id=ws_a)
|
||||
|
||||
assert seen == [ws_a]
|
||||
@@ -309,7 +309,7 @@ class TestToolListPeersAggregation:
|
||||
return [], "auth failed"
|
||||
return [], "platform 5xx"
|
||||
|
||||
with patch("a2a_tools.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
with patch("a2a_tools_messaging.get_peers_with_diagnostic", side_effect=fake_get_peers):
|
||||
out = await a2a_tools.tool_list_peers()
|
||||
|
||||
assert "[aaaa1111] auth failed" in out
|
||||
@@ -339,8 +339,8 @@ class TestToolDelegateTaskAutoRouting:
|
||||
seen_send_src["src"] = source_workspace_id
|
||||
return "ok"
|
||||
|
||||
with patch("a2a_tools.discover_peer", side_effect=fake_discover), \
|
||||
patch("a2a_tools.send_a2a_message", side_effect=fake_send), \
|
||||
with patch("a2a_tools_delegation.discover_peer", side_effect=fake_discover), \
|
||||
patch("a2a_tools_delegation.send_a2a_message", side_effect=fake_send), \
|
||||
patch("a2a_tools.report_activity", new=AsyncMock()):
|
||||
await a2a_tools.tool_delegate_task(peer_id, "do thing")
|
||||
|
||||
@@ -367,8 +367,8 @@ class TestToolDelegateTaskAutoRouting:
|
||||
seen["send"] = source_workspace_id
|
||||
return "ok"
|
||||
|
||||
with patch("a2a_tools.discover_peer", side_effect=fake_discover), \
|
||||
patch("a2a_tools.send_a2a_message", side_effect=fake_send), \
|
||||
with patch("a2a_tools_delegation.discover_peer", side_effect=fake_discover), \
|
||||
patch("a2a_tools_delegation.send_a2a_message", side_effect=fake_send), \
|
||||
patch("a2a_tools.report_activity", new=AsyncMock()):
|
||||
await a2a_tools.tool_delegate_task(
|
||||
peer_id, "do thing", source_workspace_id=ws_explicit,
|
||||
@@ -395,8 +395,8 @@ class TestToolDelegateTaskAutoRouting:
|
||||
seen["send"] = source_workspace_id
|
||||
return "ok"
|
||||
|
||||
with patch("a2a_tools.discover_peer", side_effect=fake_discover), \
|
||||
patch("a2a_tools.send_a2a_message", side_effect=fake_send), \
|
||||
with patch("a2a_tools_delegation.discover_peer", side_effect=fake_discover), \
|
||||
patch("a2a_tools_delegation.send_a2a_message", side_effect=fake_send), \
|
||||
patch("a2a_tools.report_activity", new=AsyncMock()):
|
||||
await a2a_tools.tool_delegate_task(peer_id, "do thing")
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user