forked from molecule-ai/molecule-core
Compare commits
340 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4b16c95450 | |||
| f1b72af97e | |||
| 31facfc5c4 | |||
| 19e7acdc22 | |||
| 1ce51abea4 | |||
| 0ec226e119 | |||
| 872b781f64 | |||
| 0dd1244510 | |||
| 26fa220bef | |||
| 5559e96400 | |||
| 3bc7749e84 | |||
| 6d7a7fc86f | |||
| ecb3c75d74 | |||
| 2f7beb9bce | |||
| bd881f8756 | |||
| e39d818ac4 | |||
| ed4d24fb8c | |||
| 3a5544a9e6 | |||
| 095171f163 | |||
| 9c7b34cb7f | |||
| 8514ff1a96 | |||
| 1785732bbb | |||
| 066a0772ee | |||
| 3f2cc8cdd6 | |||
| 5c80b9c3d6 | |||
| a8850bac55 | |||
| adfa34c4ae | |||
| 7692dd4975 | |||
| 28f22609d9 | |||
| e67a854a33 | |||
| 3e7d483b8c | |||
| 4f4b6c4f90 | |||
| fc10386a78 | |||
| 1282c1c8ff | |||
| a242ca8b01 | |||
| ac9b07b7ad | |||
| 41ae4ec50b | |||
| 02960209a0 | |||
| d866d3aa5f | |||
| 61d5908817 | |||
| 89bdf29d6f | |||
| 700d44ec3d | |||
| f70071e1e1 | |||
| 63ac99788b | |||
| 28472f0d2d | |||
| f42feb4ed7 | |||
| 99e7f13149 | |||
| 6488ba09e7 | |||
| 8176b5142d | |||
| 314277769e | |||
| e0b567e992 | |||
| 707e4d7342 | |||
| 4f9e3feece | |||
| 10752fe330 | |||
| 8f7122a9b6 | |||
| b3982035b3 | |||
| d1122f8d28 | |||
| 4b35d25d86 | |||
| 46731729d4 | |||
| 6dc2d907a2 | |||
| 849bc97349 | |||
| e13dcab5e0 | |||
| 721010307c | |||
| 9f47ecf86e | |||
| ebc20794f3 | |||
| 73a949bb5c | |||
| 281cb04163 | |||
| fe7ff5440d | |||
| 5b0a75ab73 | |||
| a6dadc7ee0 | |||
| 5e52a0fdad | |||
| 6b445aae2d | |||
| 4f3d51bd61 | |||
| 9a64aeaa2c | |||
| 2d783b5ca6 | |||
| 6fc328ef44 | |||
| bb3212ad37 | |||
| 1986260603 | |||
| d297e75fc9 | |||
| 3ae0513209 | |||
| 4b6373861c | |||
| 3886e8fb9f | |||
| d48693144b | |||
| 1b207b214d | |||
| 1e97fb9a16 | |||
| 7cffff844b | |||
| 4a0d7cd545 | |||
| 35b3ea598a | |||
| 1161b97faf | |||
| 059962a0a3 | |||
| b07575c710 | |||
| 586fa5f84e | |||
| b937415e1e | |||
| 0f46c7eefe | |||
| 8aea1f008c | |||
| 8417bce50d | |||
| 3195657837 | |||
| 7b0bd32957 | |||
| 6fb9bc9bcd | |||
| 9cd2c02f14 | |||
| 9929f73e80 | |||
| 829ab66462 | |||
| 3b3e821a60 | |||
| a08eaa6ca2 | |||
| c5322f318a | |||
| 290e6dfdc3 | |||
| f74fff6ae4 | |||
| 5bfa4b1d80 | |||
| 51e7d94605 | |||
| f2397bf138 | |||
| ff5f4cbf7c | |||
| c53b2b104f | |||
| 01b653d6b0 | |||
| f05633f5b0 | |||
| ff1003e5f6 | |||
| d9fb57092c | |||
| c1cff3169f | |||
| f52de74b7b | |||
| 53d823e719 | |||
| 4511659a9e | |||
| 032c011b37 | |||
| c0997a5703 | |||
| 1d3d18fd66 | |||
| be997883c9 | |||
| 3f4c5f8076 | |||
| e1c99cd24c | |||
| 26b5b21238 | |||
| 25cb17c906 | |||
| 238f4d45df | |||
| bcea8ac822 | |||
| 87ae691e67 | |||
| 99f6481acc | |||
| 2c4bfd83e4 | |||
| 9e8aa39692 | |||
| b7f0b279eb | |||
| fa3353a3ca | |||
| 1187a66d2e | |||
| d360c34a30 | |||
| 287961375f | |||
| 98f883cb99 | |||
| f1840d467c | |||
| 5596cb52ef | |||
| 563e58a835 | |||
| eaee113416 | |||
| 170e037ad1 | |||
| 6f8f978975 | |||
| 034350f823 | |||
| a6b4758f5d | |||
| b4a2c990fb | |||
| ffd90dcf1e | |||
| 44df1befef | |||
| 32fc77bad4 | |||
| ead920ac09 | |||
| 5978cb3c45 | |||
| 3934325e23 | |||
| 2e3e36b91f | |||
| 63d9158e12 | |||
| b7c962bf86 | |||
| 26789988df | |||
| b6ff280ca3 | |||
| acc10ca467 | |||
| f071cbb0a3 | |||
| 3c70ddea5c | |||
| 89e10962b9 | |||
| ff20fe4f61 | |||
| da59b8c5bc | |||
| e307334ca4 | |||
| 0945936eee | |||
| 16ad941a1e | |||
| 25979072fd | |||
| 99738975e2 | |||
| 66de1f1471 | |||
| 0e3e2559af | |||
| 4f6678ae52 | |||
| 5de0eee328 | |||
| 40e35e0b6d | |||
| 7a30af5af0 | |||
| 67e2c9c6b3 | |||
| 43e0f69dc8 | |||
| a6ef5f9583 | |||
| 38b1af3b84 | |||
| 5a50ba86e8 | |||
| 9fea10524e | |||
| 211e375ef1 | |||
| 38e0fc8ea0 | |||
| dd5832a8fc | |||
| 8622829848 | |||
| c5d8ce9ffe | |||
| 90b561add0 | |||
| 81c8a8b35d | |||
| 7ce0138150 | |||
| 408e308ce5 | |||
| 05596803f7 | |||
| 6cd650f48c | |||
| 754e5b2da1 | |||
| f23665d4d9 | |||
| 68c9bd8fe4 | |||
| 4d747de218 | |||
| 4a8a72f4ae | |||
| c4d476d0dc | |||
| 9689c6f6d5 | |||
| 3e4ff1ce9c | |||
| ad24703d74 | |||
| 3e6c7075d0 | |||
| 390425afbc | |||
| 663c5b7e70 | |||
| b70d857409 | |||
| 2f89a05f2f | |||
| d684e28228 | |||
| 71fb499dee | |||
| e5c9656016 | |||
| e5a8ace677 | |||
| d5eb58af56 | |||
| 166c677a09 | |||
| a7f1b378de | |||
| a306a97dd3 | |||
| ec54942628 | |||
| 065e39dda2 | |||
| 54d32d1ee2 | |||
| 4cd01a2df1 | |||
| ccb7ca5d8a | |||
| 10f2b9f01c | |||
| 8760ee1628 | |||
| 28f5108a7c | |||
| e9fdd992a9 | |||
| f6fa3669dc | |||
| b1a1c8e4a9 | |||
| aedbbc4a10 | |||
| 8b9e7e6d59 | |||
| 3c127ae3b9 | |||
| 98da627170 | |||
| 3cd8c53de0 | |||
| 69fcfe9b3a | |||
| 24d64677ab | |||
| 1141a42910 | |||
| 84448d452b | |||
| f689c81a70 | |||
| 2268027581 | |||
| 652124284b | |||
| 79a0203798 | |||
| bae2727074 | |||
| 2c4d92d9bc | |||
| 4c49ff75f6 | |||
| 2e9686036d | |||
| 2bc304bfd3 | |||
| 7ca764f917 | |||
| d2c0041b2b | |||
| 149d0bf3d7 | |||
| c6eec15292 | |||
| 68f8fa2621 | |||
| e4db4cfb11 | |||
| 65b42c33b9 | |||
| 9d45211fd3 | |||
| 14494fe67c | |||
| 3b244ca6c6 | |||
| 18e88e7039 | |||
| f7d663d19a | |||
| c8e422f6c6 | |||
| 1d303ee75e | |||
| 1ec7e4af6d | |||
| ae4739f35b | |||
| 6f203c5646 | |||
| ff0d4dae77 | |||
| 01bbf4c87b | |||
| e89dd892ac | |||
| eba0c5e3f1 | |||
| c3ba5df9ff | |||
| c37596fc26 | |||
| d2c202ddab | |||
| 79590eb861 | |||
| 2d1a86cac9 | |||
| 954d2172f0 | |||
| 9fd52e9cd4 | |||
| ffcffa1375 | |||
| b5dea3c5df | |||
| 54f3c4d34f | |||
| 8d5e78d629 | |||
| ac6f65ab5e | |||
| 5cd5a28bd1 | |||
| 026c81acf0 | |||
| a03045e5e4 | |||
| 61223de305 | |||
| 1355a1b539 | |||
| db132351a3 | |||
| 4e90d3a32d | |||
| e1d635a099 | |||
| 80c6f6e4b6 | |||
| a1e40fe0d9 | |||
| a8708caf73 | |||
| 02ae2fd6fb | |||
| f21d79c4ad | |||
| 120bb1f0a2 | |||
| cfd5ec8d82 | |||
| a4a32cded5 | |||
| 257079c7a2 | |||
| 0567502316 | |||
| 7cba0477cc | |||
| ff3dcd37f6 | |||
| 4e72f1d1db | |||
| e22f7969f8 | |||
| 3d145da99d | |||
| 46c8c1de23 | |||
| 6d38b96043 | |||
| 270a95aa67 | |||
| 6431bdc631 | |||
| 72b6be82b0 | |||
| b42599585e | |||
| 06bfed2e35 | |||
| 80b38900de | |||
| d1eab79d28 | |||
| 824a2a7657 | |||
| 876d6ec8c9 | |||
| 63e3d385d6 | |||
| 2e78812ff9 | |||
| 9664d66e4b | |||
| 19cc83313a | |||
| 097d513b65 | |||
| 2b3f44c3c8 | |||
| c45aa8d7ee | |||
| b4e45374bf | |||
| f2d69f0088 | |||
| bc11ed8a2b | |||
| e2328abedc | |||
| bdad75ae3e | |||
| 90ba2cd4df | |||
| b002247f12 | |||
| 03bcce3eb3 | |||
| c74e71d604 | |||
| d7f88674d8 | |||
| 7abb94dab8 | |||
| effbcd737b | |||
| 6eb79adfd5 | |||
| 8f48a38550 | |||
| 55d85147f7 | |||
| f7e8f98cf7 | |||
| dc6425fe39 | |||
| cbc69f5e7e | |||
| c71f641b12 | |||
| 173e22e091 | |||
| 60a516bc8d |
@@ -111,7 +111,60 @@ jobs:
|
||||
all_green: ${{ steps.gates.outputs.all_green }}
|
||||
head_sha: ${{ steps.gates.outputs.head_sha }}
|
||||
steps:
|
||||
# Skip empty-tree promotes (the perpetual auto-promote↔auto-sync cycle
|
||||
# observed 2026-05-03). Sequence: auto-promote merges via the staging
|
||||
# merge-queue's MERGE strategy, creating a merge commit on main that
|
||||
# staging doesn't have. auto-sync then merges main back into staging
|
||||
# via another merge commit (the queue's MERGE strategy applies on
|
||||
# the staging side too, even when the workflow's local FF would
|
||||
# have sufficed). Now staging has a new merge-commit SHA whose
|
||||
# tree == main's tree — but auto-promote sees "staging ahead of
|
||||
# main by 1" and opens YET another empty promote PR. Each round
|
||||
# costs ~30-40 min wallclock, ~2 manual approvals, and burns a
|
||||
# full CodeQL Go run (~15 min). Without this guard the cycle
|
||||
# repeats indefinitely.
|
||||
#
|
||||
# Long-term fix is to switch the merge_queue ruleset's
|
||||
# `merge_method` away from MERGE so FF-able PRs land cleanly,
|
||||
# but that's a broader change affecting every staging PR's
|
||||
# commit shape. This guard is the one-line surgical fix that
|
||||
# breaks the cycle without touching merge-queue config.
|
||||
#
|
||||
# Fail-open: if `git diff` errors for any reason, fall through
|
||||
# to the gate check (preserve existing behavior). Only skip
|
||||
# when the diff is DEFINITIVELY empty.
|
||||
- name: Checkout for tree-diff check
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: staging
|
||||
- name: Skip if staging tree == main tree (perpetual-cycle break)
|
||||
id: tree-diff
|
||||
env:
|
||||
HEAD_SHA: ${{ github.event.workflow_run.head_sha || github.sha }}
|
||||
run: |
|
||||
set -eu
|
||||
git fetch origin main --depth=50 || { echo "::warning::git fetch main failed — proceeding (fail-open)"; exit 0; }
|
||||
# Compare staging tip's tree against main's tree. `git diff
|
||||
# --quiet` exits 0 if no differences, 1 if there are.
|
||||
if git diff --quiet origin/main "$HEAD_SHA" -- 2>/dev/null; then
|
||||
{
|
||||
echo "## ⏭ Skipped — no code to promote"
|
||||
echo
|
||||
echo "staging tip (\`${HEAD_SHA:0:8}\`) and \`main\` have identical trees."
|
||||
echo "This is the auto-promote↔auto-sync merge-commit cycle: staging has a"
|
||||
echo "new SHA (a sync-back merge commit) but the underlying file tree is"
|
||||
echo "already on main, so there's no real code to ship."
|
||||
echo
|
||||
echo "Skipping to avoid opening an empty promote PR. Cycle terminates here."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "::notice::auto-promote: staging tree == main tree — no code to promote, skipping"
|
||||
echo "skip=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "skip=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
- name: Check all required gates on this SHA
|
||||
if: steps.tree-diff.outputs.skip != 'true'
|
||||
id: gates
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
@@ -209,10 +262,25 @@ jobs:
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Mint the App token BEFORE the promote-PR step so the auto-merge
|
||||
# call can use it. GITHUB_TOKEN-initiated merges suppress the
|
||||
# downstream `push` event on main, breaking the
|
||||
# publish-workspace-server-image → canary-verify → redeploy-tenants
|
||||
# chain (issue #2357). Using the App token here means the
|
||||
# merge-queue-landed merge IS able to fire the cascade naturally;
|
||||
# the polling tail below stays as defense-in-depth.
|
||||
- name: Mint App token for promote-PR + downstream dispatch
|
||||
if: ${{ vars.AUTO_PROMOTE_ENABLED == 'true' || github.event.inputs.force == 'true' }}
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
|
||||
with:
|
||||
app-id: ${{ secrets.MOLECULE_AI_APP_ID }}
|
||||
private-key: ${{ secrets.MOLECULE_AI_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Open (or reuse) staging → main promote PR + enable auto-merge
|
||||
if: ${{ vars.AUTO_PROMOTE_ENABLED == 'true' || github.event.inputs.force == 'true' }}
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GH_TOKEN: ${{ steps.app-token.outputs.token }}
|
||||
REPO: ${{ github.repository }}
|
||||
TARGET_SHA: ${{ needs.check-all-gates-green.outputs.head_sha }}
|
||||
run: |
|
||||
@@ -267,52 +335,34 @@ jobs:
|
||||
echo "promote_pr_num=${PR_NUM}" >> "$GITHUB_OUTPUT"
|
||||
id: promote_pr
|
||||
|
||||
# Mint a short-lived GitHub App installation token for the dispatch
|
||||
# step below. We CANNOT use `secrets.GITHUB_TOKEN` to dispatch the
|
||||
# downstream publish chain — workflow runs created by GITHUB_TOKEN
|
||||
# do not fire `workflow_run` triggers on completion (the
|
||||
# documented "no recursion" rule —
|
||||
# https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow).
|
||||
#
|
||||
# Symptom this caused (root-caused on 2026-04-30): publish-image
|
||||
# ran successfully twice (21313dc 14:41Z, 59dec57 15:21Z) but
|
||||
# canary-verify and redeploy-tenants-on-main never chained,
|
||||
# because the publish run's `triggering_actor` was
|
||||
# `github-actions[bot]` (i.e. GITHUB_TOKEN). A manual dispatch
|
||||
# earlier in the day with the operator's PAT (d850ec7 06:52Z) did
|
||||
# chain — same workflow file, only the actor differed.
|
||||
#
|
||||
# An App token's triggering_actor is the App user (e.g.
|
||||
# `molecule-ai[bot]`), which IS allowed to fire downstream
|
||||
# workflow_run cascades.
|
||||
- name: Mint App token for downstream dispatch
|
||||
if: steps.promote_pr.outputs.promote_pr_num != ''
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
|
||||
with:
|
||||
app-id: ${{ secrets.MOLECULE_AI_APP_ID }}
|
||||
private-key: ${{ secrets.MOLECULE_AI_APP_PRIVATE_KEY }}
|
||||
|
||||
# The App token minted above (before the promote-PR step) is
|
||||
# also used by the polling tail below. Defense-in-depth: with
|
||||
# the merge-queue-landed merge now using the App token, the
|
||||
# main-branch push event SHOULD fire the publish/canary/redeploy
|
||||
# cascade naturally — but if for any reason it doesn't (e.g. an
|
||||
# unrelated event-suppression edge case), the explicit dispatches
|
||||
# below still wake the chain.
|
||||
- name: Wait for promote merge, then dispatch publish + redeploy (#2357)
|
||||
# GITHUB_TOKEN-initiated merges suppress downstream `push` events
|
||||
# (https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow).
|
||||
# Result: when the merge queue lands the promote PR, the resulting
|
||||
# main-branch push DOES NOT fire publish-workspace-server-image,
|
||||
# so canary-verify and redeploy-tenants-on-main never run and
|
||||
# tenants stay on stale code (issue #2357).
|
||||
# Defense-in-depth dispatch. With the auto-merge call above
|
||||
# now using the App token (this commit), the merge-queue-landed
|
||||
# merge SHOULD fire publish-workspace-server-image naturally
|
||||
# via on:push:[main] — App-token-initiated pushes DO trigger
|
||||
# workflow_run cascades, unlike GITHUB_TOKEN-initiated ones
|
||||
# (the documented "no recursion" rule —
|
||||
# https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow).
|
||||
#
|
||||
# Workaround: poll for the merge to land, then explicitly
|
||||
# `gh workflow run` publish-workspace-server-image. The dispatch
|
||||
# MUST authenticate as the molecule-ai App (App token minted
|
||||
# above) — not GITHUB_TOKEN — so that the resulting publish
|
||||
# run's completion event can fire the workflow_run cascade
|
||||
# into canary-verify + redeploy-tenants-on-main. See the prior
|
||||
# step's comment for the GITHUB_TOKEN no-recursion details.
|
||||
# This explicit dispatch stays as belt-and-suspenders for any
|
||||
# edge case where the natural cascade misfires. If it never
|
||||
# observably fires after this token swap (i.e. the publish
|
||||
# workflow has already started by the time we get here), the
|
||||
# second dispatch is a harmless no-op (publish-workspace-server-image
|
||||
# has its own concurrency group that dedupes).
|
||||
#
|
||||
# Long-term fix: switch the auto-merge call above to use the
|
||||
# same App token, so the merge's push event fires
|
||||
# publish-workspace-server-image naturally and this polling tail
|
||||
# becomes unnecessary. Tracked in #2357.
|
||||
# See PR for #2357: pre-fix the merge action was via
|
||||
# GITHUB_TOKEN, suppressing the cascade and forcing this tail
|
||||
# to be the SOLE chain trigger. With the auto-merge token swap
|
||||
# the tail becomes redundant in the happy path; keep until
|
||||
# we've observed >=10 successful natural cascades, then drop.
|
||||
if: steps.promote_pr.outputs.promote_pr_num != ''
|
||||
env:
|
||||
GH_TOKEN: ${{ steps.app-token.outputs.token }}
|
||||
|
||||
@@ -50,19 +50,35 @@ jobs:
|
||||
env:
|
||||
MOLECULE_CP_URL: https://staging-api.moleculesai.app
|
||||
MOLECULE_ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }}
|
||||
# Without an LLM key the test_staging_full_saas.sh script provisions
|
||||
# the workspace with empty secrets, hermes derive-provider.sh resolves
|
||||
# `openai/gpt-4o` to PROVIDER=openrouter, no OPENROUTER_API_KEY is
|
||||
# found in env, and A2A returns "No LLM provider configured" at
|
||||
# request time (canary step 8/11). The full-lifecycle workflow
|
||||
# (e2e-staging-saas.yml) has carried this secret since launch — the
|
||||
# canary regressed when it was first split out and lost the env
|
||||
# block. Issue #1500 had ~30 consecutive failures before this was
|
||||
# spotted; do NOT remove without re-reading the script's secrets-
|
||||
# injection block.
|
||||
# MiniMax is the canary's PRIMARY LLM auth path post-2026-05-04.
|
||||
# Switched from hermes+OpenAI after #2578 (the staging OpenAI key
|
||||
# account went over quota and stayed dead for 36+ hours, taking
|
||||
# the canary red the entire time). claude-code template's
|
||||
# `minimax` provider routes ANTHROPIC_BASE_URL to
|
||||
# api.minimax.io/anthropic and reads MINIMAX_API_KEY at boot —
|
||||
# ~5-10x cheaper per token than gpt-4.1-mini AND on a separate
|
||||
# billing account, so OpenAI quota collapse no longer wedges the
|
||||
# canary. Mirrors the migration continuous-synth-e2e.yml made on
|
||||
# 2026-05-03 (#265) for the same reason. tests/e2e/test_staging_
|
||||
# full_saas.sh branches SECRETS_JSON on which key is present —
|
||||
# MiniMax wins when set.
|
||||
E2E_MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
|
||||
# Direct-Anthropic alternative for operators who don't want to
|
||||
# set up a MiniMax account (priority below MiniMax — first
|
||||
# non-empty wins in test_staging_full_saas.sh's secrets-injection
|
||||
# block). See #2578 PR comment for the rationale.
|
||||
E2E_ANTHROPIC_API_KEY: ${{ secrets.MOLECULE_STAGING_ANTHROPIC_API_KEY }}
|
||||
# OpenAI fallback — kept wired so an operator-dispatched run with
|
||||
# E2E_RUNTIME=hermes overridden via workflow_dispatch can still
|
||||
# exercise the OpenAI path without re-editing the workflow.
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_KEY }}
|
||||
E2E_MODE: canary
|
||||
E2E_RUNTIME: hermes
|
||||
E2E_RUNTIME: claude-code
|
||||
# Pin the canary to a specific MiniMax model rather than relying
|
||||
# on the per-runtime default (which could resolve to "sonnet" →
|
||||
# direct Anthropic and defeat the cost saving). M2.7-highspeed
|
||||
# is "Token Plan only" but cheap-per-token and fast.
|
||||
E2E_MODEL_SLUG: MiniMax-M2.7-highspeed
|
||||
E2E_RUN_ID: "canary-${{ github.run_id }}"
|
||||
|
||||
steps:
|
||||
@@ -75,13 +91,47 @@ jobs:
|
||||
exit 2
|
||||
fi
|
||||
|
||||
- name: Verify OpenAI key present
|
||||
- name: Verify LLM key present
|
||||
run: |
|
||||
if [ -z "$E2E_OPENAI_API_KEY" ]; then
|
||||
echo "::error::MOLECULE_STAGING_OPENAI_KEY secret not set — A2A will fail at request time with 'No LLM provider configured'"
|
||||
# Per-runtime key check — claude-code uses MiniMax; hermes /
|
||||
# langgraph (operator-dispatched only) use OpenAI. Hard-fail
|
||||
# rather than soft-skip per the lesson from synth E2E #2578:
|
||||
# an empty key silently falls through to the wrong
|
||||
# SECRETS_JSON branch and the canary fails 5 min later with
|
||||
# a confusing auth error instead of the clean "secret
|
||||
# missing" message at the top.
|
||||
case "${E2E_RUNTIME}" in
|
||||
claude-code)
|
||||
# Either MiniMax OR direct-Anthropic works — first
|
||||
# non-empty wins in the test script's secrets-injection
|
||||
# priority chain. Operators only need to set ONE of these
|
||||
# secrets; we don't force a choice between them.
|
||||
if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
required_secret_name="MOLECULE_STAGING_MINIMAX_API_KEY"
|
||||
required_secret_value="${E2E_MINIMAX_API_KEY}"
|
||||
elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
|
||||
required_secret_name="MOLECULE_STAGING_ANTHROPIC_API_KEY"
|
||||
required_secret_value="${E2E_ANTHROPIC_API_KEY}"
|
||||
else
|
||||
required_secret_name="MOLECULE_STAGING_MINIMAX_API_KEY or MOLECULE_STAGING_ANTHROPIC_API_KEY"
|
||||
required_secret_value=""
|
||||
fi
|
||||
;;
|
||||
langgraph|hermes)
|
||||
required_secret_name="MOLECULE_STAGING_OPENAI_KEY"
|
||||
required_secret_value="${E2E_OPENAI_API_KEY:-}"
|
||||
;;
|
||||
*)
|
||||
echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check"
|
||||
required_secret_name=""
|
||||
required_secret_value="present"
|
||||
;;
|
||||
esac
|
||||
if [ -n "$required_secret_name" ] && [ -z "$required_secret_value" ]; then
|
||||
echo "::error::${required_secret_name} secret not set for runtime=${E2E_RUNTIME} — A2A will fail at request time with 'No LLM provider configured'"
|
||||
exit 2
|
||||
fi
|
||||
echo "OpenAI key present ✓ (len=${#E2E_OPENAI_API_KEY})"
|
||||
echo "LLM key present ✓ (runtime=${E2E_RUNTIME}, key=${required_secret_name}, len=${#required_secret_value})"
|
||||
|
||||
- name: Canary run
|
||||
id: canary
|
||||
@@ -231,10 +281,34 @@ jobs:
|
||||
and o.get('status') not in ('purged',)]
|
||||
print('\n'.join(candidates))
|
||||
" 2>/dev/null)
|
||||
# Per-slug DELETE with HTTP-code verification. The previous
|
||||
# `... >/dev/null || true` swallowed every failure, so a 5xx
|
||||
# or timeout from CP looked identical to "successfully cleaned
|
||||
# up" and the tenant kept eating ~2 vCPU until the hourly
|
||||
# stale sweep caught it (up to 2h later). Now we capture the
|
||||
# response code and surface non-2xx as a workflow warning, so
|
||||
# the run page shows which slug leaked. We still don't `exit 1`
|
||||
# on cleanup failure — a single-canary cleanup miss shouldn't
|
||||
# fail-flag the canary itself when the actual smoke check
|
||||
# passed. The sweep-stale-e2e-orgs cron (now every 15 min,
|
||||
# 30-min threshold) is the safety net for whatever slips past.
|
||||
# See molecule-controlplane#420.
|
||||
leaks=()
|
||||
for slug in $orgs; do
|
||||
curl -sS -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
code=$(curl -sS -o /tmp/canary-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/dev/null || true
|
||||
-d "{\"confirm\":\"$slug\"}" \
|
||||
|| echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
echo "::warning::canary teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/canary-cleanup.out 2>/dev/null)"
|
||||
leaks+=("$slug")
|
||||
fi
|
||||
done
|
||||
if [ ${#leaks[@]} -gt 0 ]; then
|
||||
echo "::warning::canary teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
|
||||
fi
|
||||
exit 0
|
||||
|
||||
@@ -272,6 +272,18 @@ jobs:
|
||||
find tests/e2e infra/scripts -type f -name '*.sh' -print0 \
|
||||
| xargs -0 shellcheck --severity=warning
|
||||
|
||||
- if: needs.changes.outputs.scripts == 'true'
|
||||
name: Run E2E bash unit tests (no live infra)
|
||||
# Pure-bash unit tests for E2E helper libs (lib/*.sh). These pin
|
||||
# behavior of dispatch logic that — when broken — silently masks as
|
||||
# "Could not resolve authentication method" only after a successful
|
||||
# tenant + workspace provision (PR #2571 incident, 2026-05-03). Add
|
||||
# new self-contained unit tests here as the lib/ directory grows;
|
||||
# tests requiring live CP/tenant credentials belong in the dedicated
|
||||
# e2e-staging-* workflows, not this job.
|
||||
run: |
|
||||
bash tests/e2e/test_model_slug.sh
|
||||
|
||||
canvas-deploy-reminder:
|
||||
name: Canvas Deploy Reminder
|
||||
runs-on: ubuntu-latest
|
||||
@@ -346,6 +358,72 @@ jobs:
|
||||
- if: needs.changes.outputs.python == 'true'
|
||||
run: python -m pytest --tb=short
|
||||
|
||||
- if: needs.changes.outputs.python == 'true'
|
||||
name: Per-file critical-path coverage (MCP / inbox / auth)
|
||||
# MCP-critical Python files have a per-file floor on top of the
|
||||
# 86% total floor in pytest.ini. Rationale (issue #2790, after
|
||||
# the PR #2766 → PR #2771 cycle): the total floor averages ~6000
|
||||
# lines, so a single MCP file could regress to ~50% with no
|
||||
# complaint as long as other modules compensate. These five
|
||||
# files handle multi-tenant routing + auth + inbox dispatch —
|
||||
# a coverage drop here is the same risk shape as a Go-side
|
||||
# workspace-server token/secrets file dropping below 10%.
|
||||
#
|
||||
# Floor 75% sits below current actuals (80-96%) so this gate is
|
||||
# strictly additive — no existing PR fails. Ratchet plan in
|
||||
# COVERAGE_FLOOR.md.
|
||||
run: |
|
||||
set -e
|
||||
PER_FILE_FLOOR=75
|
||||
CRITICAL_FILES=(
|
||||
"a2a_mcp_server.py"
|
||||
"mcp_cli.py"
|
||||
"a2a_tools.py"
|
||||
"inbox.py"
|
||||
"platform_auth.py"
|
||||
)
|
||||
|
||||
# pytest already wrote .coverage; emit a JSON view scoped to
|
||||
# the critical files so jq/python can read the per-file pct
|
||||
# without parsing tabular text. --include uses fnmatch, and
|
||||
# the leading "*" allows the file to live anywhere under the
|
||||
# workspace root (today they sit at workspace/<name>.py).
|
||||
INCLUDES=$(printf '*%s,' "${CRITICAL_FILES[@]}")
|
||||
INCLUDES="${INCLUDES%,}"
|
||||
python -m coverage json -o /tmp/critical-cov.json --include="$INCLUDES"
|
||||
|
||||
FAILED=0
|
||||
for f in "${CRITICAL_FILES[@]}"; do
|
||||
# Match by top-level path key (e.g. "a2a_tools.py", not
|
||||
# "builtin_tools/a2a_tools.py" — different file at 100%).
|
||||
# The keys in coverage.json are paths relative to the run
|
||||
# cwd (workspace/), so the critical-path entry sits at the
|
||||
# bare basename.
|
||||
pct=$(jq -r --arg f "$f" '.files | to_entries | map(select(.key == $f)) | .[0].value.summary.percent_covered // "MISSING"' /tmp/critical-cov.json)
|
||||
if [ "$pct" = "MISSING" ]; then
|
||||
echo "::error file=workspace/$f::No coverage data — file may have moved or test exclusion mis-set."
|
||||
FAILED=$((FAILED+1))
|
||||
continue
|
||||
fi
|
||||
echo "$f: ${pct}%"
|
||||
if awk "BEGIN{exit !($pct < $PER_FILE_FLOOR)}"; then
|
||||
echo "::error file=workspace/$f::${pct}% < ${PER_FILE_FLOOR}% per-file floor (MCP critical path). See COVERAGE_FLOOR.md."
|
||||
FAILED=$((FAILED+1))
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$FAILED" -gt 0 ]; then
|
||||
echo ""
|
||||
echo "$FAILED MCP critical-path file(s) below the ${PER_FILE_FLOOR}% per-file floor."
|
||||
echo "These paths handle multi-tenant routing, auth tokens, and inbox dispatch."
|
||||
echo "A coverage drop here is the same risk shape as Go-side tokens/secrets files"
|
||||
echo "dropping below 10% (see COVERAGE_FLOOR.md). Either:"
|
||||
echo " (a) add tests to raise coverage back above ${PER_FILE_FLOOR}%, or"
|
||||
echo " (b) if this is unavoidable historical debt, file an issue and propose"
|
||||
echo " adjusting the floor with rationale in COVERAGE_FLOOR.md."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# SDK + plugin validation moved to standalone repo:
|
||||
# github.com/Molecule-AI/molecule-sdk-python
|
||||
|
||||
|
||||
@@ -32,16 +32,41 @@ name: Continuous synthetic E2E (staging)
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Every 20 minutes, on the :00 :20 :40. Offsets the existing :15
|
||||
# sweep-cf-orphans and :45 sweep-cf-tunnels so the three
|
||||
# operations don't all hit Cloudflare/AWS at the same minute.
|
||||
- cron: '0,20,40 * * * *'
|
||||
# Every 10 minutes, on :02 :12 :22 :32 :42 :52. Three constraints:
|
||||
# 1. Stay off the top-of-hour. GitHub Actions scheduler drops
|
||||
# :00 firings under high load (own docs:
|
||||
# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#schedule).
|
||||
# Prior history: cron was '0,20,40' (2026-05-02) — only :00
|
||||
# ever survived. Bumped to '10,30,50' (2026-05-03) on the
|
||||
# theory that further-from-:00 wins. Empirically 2026-05-04
|
||||
# that ALSO dropped to ~60 min effective cadence (only ~1
|
||||
# schedule fire per hour — see molecule-core#2726). Detection
|
||||
# latency was claimed 20 min, actual 60 min.
|
||||
# 2. Avoid colliding with the existing :15 sweep-cf-orphans
|
||||
# and :45 sweep-cf-tunnels — both hit the CF API and we
|
||||
# don't want to fight for rate-limit tokens.
|
||||
# 3. Avoid the :30 heavy slot (canary-staging /30, sweep-aws-
|
||||
# secrets, sweep-stale-e2e-orgs every :15) — multiple
|
||||
# overlapping cron registrations on the same minute is part
|
||||
# of what GH drops under load.
|
||||
# Solution: bump fires-per-hour 3 → 6 AND keep all slots in clean
|
||||
# lanes (1-3 min away from any other cron). Even with empirically-
|
||||
# observed ~67% GH drop ratio, 6 attempts/hour yields ~2 effective
|
||||
# fires = ~30 min cadence; closer to the 20-min target than the
|
||||
# current shape and provides a real degradation alarm if drops
|
||||
# get worse.
|
||||
- cron: '2,12,22,32,42,52 * * * *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
runtime:
|
||||
description: "Runtime to provision (langgraph = fastest, default; hermes = slower but covers SDK-native path; claude-code = needs OAUTH token in tenant env)"
|
||||
description: "Runtime to provision (claude-code = default + cheapest via MiniMax; langgraph = OpenAI-only; hermes = SDK-native path, slower)"
|
||||
required: false
|
||||
default: "langgraph"
|
||||
default: "claude-code"
|
||||
type: string
|
||||
model_slug:
|
||||
description: "Model id to provision the workspace with (default MiniMax-M2.7-highspeed; e.g. 'sonnet' to test direct Anthropic, 'openai/gpt-4o' for hermes)"
|
||||
required: false
|
||||
default: "MiniMax-M2.7-highspeed"
|
||||
type: string
|
||||
keep_org:
|
||||
description: "Skip teardown for post-mortem debugging (only manual dispatch — never set this for cron runs)"
|
||||
@@ -68,15 +93,36 @@ jobs:
|
||||
synth:
|
||||
name: Synthetic E2E against staging
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 12
|
||||
# Bumped from 12 → 20 (2026-05-04). Tenant user-data install phase
|
||||
# (apt-get update + install docker.io/jq/awscli/caddy + snap install
|
||||
# ssm-agent) runs from raw Ubuntu on every boot — none of it is
|
||||
# pre-baked into the tenant AMI. Empirical fetch_secrets/ok timing
|
||||
# across today's canaries: 51s → 82s → 143s → 625s. apt-mirror tail
|
||||
# latency drives the boot-to-fetch_secrets phase from ~1min to >10min.
|
||||
# A 12min budget leaves only ~2min for the workspace (which needs
|
||||
# ~3.5min for claude-code cold boot) on slow-apt days, blowing the
|
||||
# budget. 20min absorbs the worst tenant tail so the workspace probe
|
||||
# gets the full ~7min it needs even on a slow apt day. Real fix:
|
||||
# pre-bake caddy + ssm-agent into the tenant AMI (controlplane#TBD).
|
||||
timeout-minutes: 20
|
||||
env:
|
||||
# langgraph default keeps cold-start under 5 min on staging EC2.
|
||||
# hermes is slower (~7-10 min) and isn't needed for the
|
||||
# regression class this gate exists to catch (deployment-pipeline
|
||||
# + schema-drift + integration). Operators can pick hermes via
|
||||
# workflow_dispatch when they need to exercise the SDK-native
|
||||
# session path.
|
||||
E2E_RUNTIME: ${{ github.event.inputs.runtime || 'langgraph' }}
|
||||
# claude-code default: cold-start ~5 min (comparable to langgraph),
|
||||
# but uses MiniMax-M2.7-highspeed via the template's third-party-
|
||||
# Anthropic-compat path (workspace-configs-templates/claude-code-
|
||||
# default/config.yaml:64-69). MiniMax is ~5-10x cheaper than
|
||||
# gpt-4.1-mini per token AND avoids the recurring OpenAI quota-
|
||||
# exhaustion class that took the canary down 2026-05-03 (#265).
|
||||
# Operators can pick langgraph / hermes via workflow_dispatch
|
||||
# when they specifically need to exercise the OpenAI or SDK-
|
||||
# native paths.
|
||||
E2E_RUNTIME: ${{ github.event.inputs.runtime || 'claude-code' }}
|
||||
# Pin the canary to a specific MiniMax model rather than relying
|
||||
# on the per-runtime default ("sonnet" → routes to direct
|
||||
# Anthropic, defeats the cost saving). Operators can override
|
||||
# via workflow_dispatch by setting a different E2E_MODEL_SLUG
|
||||
# input if they need to exercise a specific model. M2.7-highspeed
|
||||
# is "Token Plan only" but cheap-per-token and fast.
|
||||
E2E_MODEL_SLUG: ${{ github.event.inputs.model_slug || 'MiniMax-M2.7-highspeed' }}
|
||||
# Bound to 10 min so a stuck provision fails the run instead of
|
||||
# holding up the next cron firing. 15-min default in the script
|
||||
# is for the on-PR full lifecycle where we have more headroom.
|
||||
@@ -88,37 +134,79 @@ jobs:
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org == 'true' && '1' || '' }}
|
||||
MOLECULE_CP_URL: ${{ vars.STAGING_CP_URL || 'https://staging-api.moleculesai.app' }}
|
||||
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
# Provisioned tenant's default model (langgraph: openai:gpt-4.1-mini)
|
||||
# needs OPENAI_API_KEY at first call. Sibling workflows
|
||||
# e2e-staging-saas.yml + canary-staging.yml use the same secret;
|
||||
# without this wire-up the tenant boots, accepts a2a messages,
|
||||
# then returns "Could not resolve authentication method" — masked
|
||||
# earlier by the a2a-sdk task-mode contract bugs PR #2558+#2563
|
||||
# fixed. tests/e2e/test_staging_full_saas.sh:325 reads this and
|
||||
# persists it as a workspace_secret on tenant create.
|
||||
# MiniMax key is the canary's PRIMARY auth path. claude-code
|
||||
# template's `minimax` provider routes ANTHROPIC_BASE_URL to
|
||||
# api.minimax.io/anthropic and reads MINIMAX_API_KEY at boot.
|
||||
# tests/e2e/test_staging_full_saas.sh branches SECRETS_JSON on
|
||||
# which key is present — MiniMax wins when set.
|
||||
E2E_MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
|
||||
# Direct-Anthropic alternative for operators who don't want to
|
||||
# set up a MiniMax account (priority below MiniMax — first
|
||||
# non-empty wins in test_staging_full_saas.sh's secrets-injection
|
||||
# block). See #2578 PR comment for the rationale.
|
||||
E2E_ANTHROPIC_API_KEY: ${{ secrets.MOLECULE_STAGING_ANTHROPIC_API_KEY }}
|
||||
# OpenAI fallback — kept wired so operators can dispatch with
|
||||
# E2E_RUNTIME=langgraph or =hermes and still have a working
|
||||
# canary path. The script picks the right blob shape based on
|
||||
# which key is non-empty.
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_KEY }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Verify required secret present
|
||||
- name: Verify required secrets present
|
||||
run: |
|
||||
# Schedule-vs-dispatch hardening (mirrors the sweep-cf-* and
|
||||
# redeploy-tenants-on-* workflows): hard-fail on missing secret
|
||||
# for cron firing so a misconfigured-repo doesn't silently
|
||||
# report green while doing nothing. Soft-skip on operator
|
||||
# dispatch — operators can dispatch ad-hoc to verify a fix
|
||||
# without setting up the secret first.
|
||||
# Hard-fail on missing secret REGARDLESS of trigger. Previously
|
||||
# this step soft-skipped on workflow_dispatch via `exit 0`, but
|
||||
# `exit 0` only ends the STEP — subsequent steps still ran with
|
||||
# the empty secret, the synth script fell through to the wrong
|
||||
# SECRETS_JSON branch, and the canary failed 5 min later with a
|
||||
# confusing "Agent error (Exception)" instead of the clean
|
||||
# "secret missing" message at the top. Caught 2026-05-04 by
|
||||
# dispatched run 25296530706: claude-code + missing MINIMAX
|
||||
# silently used OpenAI keys but kept model=MiniMax-M2.7, then
|
||||
# the workspace 401'd against MiniMax once it tried to call.
|
||||
# Fix: exit 1 in both cron and dispatch paths. Operators who
|
||||
# want to verify a YAML change without setting up the secret
|
||||
# can read the verify-secrets step's stderr — the failure is
|
||||
# itself the verification signal.
|
||||
if [ -z "${MOLECULE_ADMIN_TOKEN:-}" ]; then
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
echo "::warning::CP_STAGING_ADMIN_API_TOKEN not set — synth E2E cannot run"
|
||||
echo "::warning::Set it at Settings → Secrets and Variables → Actions"
|
||||
exit 0
|
||||
fi
|
||||
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret missing — synth E2E cannot run"
|
||||
echo "::error::Set it at Settings → Secrets and Variables → Actions; pull from staging-CP's CP_ADMIN_API_TOKEN env in Railway."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# LLM-key requirement is per-runtime: claude-code accepts
|
||||
# EITHER MiniMax OR direct-Anthropic (whichever is set first),
|
||||
# langgraph + hermes use OpenAI (MOLECULE_STAGING_OPENAI_KEY).
|
||||
case "${E2E_RUNTIME}" in
|
||||
claude-code)
|
||||
if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
required_secret_name="MOLECULE_STAGING_MINIMAX_API_KEY"
|
||||
required_secret_value="${E2E_MINIMAX_API_KEY}"
|
||||
elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
|
||||
required_secret_name="MOLECULE_STAGING_ANTHROPIC_API_KEY"
|
||||
required_secret_value="${E2E_ANTHROPIC_API_KEY}"
|
||||
else
|
||||
required_secret_name="MOLECULE_STAGING_MINIMAX_API_KEY or MOLECULE_STAGING_ANTHROPIC_API_KEY"
|
||||
required_secret_value=""
|
||||
fi
|
||||
;;
|
||||
langgraph|hermes)
|
||||
required_secret_name="MOLECULE_STAGING_OPENAI_KEY"
|
||||
required_secret_value="${E2E_OPENAI_API_KEY:-}"
|
||||
;;
|
||||
*)
|
||||
echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check"
|
||||
required_secret_name=""
|
||||
required_secret_value="present"
|
||||
;;
|
||||
esac
|
||||
if [ -n "$required_secret_name" ] && [ -z "$required_secret_value" ]; then
|
||||
echo "::error::${required_secret_name} secret missing — runtime=${E2E_RUNTIME} cannot authenticate against its LLM provider"
|
||||
echo "::error::Set it at Settings → Secrets and Variables → Actions, OR dispatch with a different runtime"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install required tools
|
||||
run: |
|
||||
# The script depends on jq + curl (already on ubuntu-latest)
|
||||
|
||||
@@ -184,8 +184,23 @@ jobs:
|
||||
exit 0
|
||||
fi
|
||||
echo "Deleting orphan tenant: $slug"
|
||||
curl -sS -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
# Verify HTTP 2xx instead of `>/dev/null || true` swallowing
|
||||
# failures. A 5xx or timeout previously looked identical to
|
||||
# success, leaving the tenant alive for up to ~45 min until
|
||||
# sweep-stale-e2e-orgs caught it. Surface failures as
|
||||
# workflow warnings naming the slug. Don't `exit 1` — a single
|
||||
# cleanup miss shouldn't fail-flag the canvas test when the
|
||||
# actual smoke check passed; the sweeper is the safety net.
|
||||
# See molecule-controlplane#420.
|
||||
code=$(curl -sS -o /tmp/canvas-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/dev/null || true
|
||||
-d "{\"confirm\":\"$slug\"}" \
|
||||
|| echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
echo "::warning::canvas teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/canvas-cleanup.out 2>/dev/null)"
|
||||
fi
|
||||
exit 0
|
||||
|
||||
@@ -153,12 +153,28 @@ jobs:
|
||||
if [ -n "$orgs" ]; then
|
||||
echo "Safety-net sweep: deleting leftover orgs:"
|
||||
echo "$orgs"
|
||||
# Per-slug verified DELETE — see molecule-controlplane#420.
|
||||
# `>/dev/null 2>&1` previously hid every failure; surface
|
||||
# non-2xx as workflow warnings so the run page names what
|
||||
# leaked. Sweeper catches the rest within ~45 min.
|
||||
leaks=()
|
||||
for slug in $orgs; do
|
||||
curl -sS -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
code=$(curl -sS -o /tmp/external-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/dev/null 2>&1
|
||||
-d "{\"confirm\":\"$slug\"}" \
|
||||
|| echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
echo "::warning::external teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/external-cleanup.out 2>/dev/null)"
|
||||
leaks+=("$slug")
|
||||
fi
|
||||
done
|
||||
if [ ${#leaks[@]} -gt 0 ]; then
|
||||
echo "::warning::external teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
|
||||
fi
|
||||
else
|
||||
echo "Safety-net sweep: no leftover orgs to clean."
|
||||
fi
|
||||
|
||||
@@ -48,9 +48,9 @@ on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
runtime:
|
||||
description: "Runtime to test (hermes | claude-code | langgraph)"
|
||||
description: "Runtime to test (claude-code [default, MiniMax] | hermes [OpenAI] | langgraph [OpenAI])"
|
||||
required: false
|
||||
default: "hermes"
|
||||
default: "claude-code"
|
||||
keep_org:
|
||||
description: "Skip teardown for debugging (only use via manual dispatch!)"
|
||||
required: false
|
||||
@@ -83,11 +83,32 @@ jobs:
|
||||
# retrieval + teardown. Configure in
|
||||
# Settings → Secrets and variables → Actions → Repository secrets.
|
||||
MOLECULE_ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }}
|
||||
# OpenAI key for workspace LLM calls (section 8 A2A). Without it,
|
||||
# Hermes runtime crashes at boot with "No provider API key found".
|
||||
# Configure at Settings → Secrets → Actions → MOLECULE_STAGING_OPENAI_KEY.
|
||||
# MiniMax is the PRIMARY LLM auth path post-2026-05-04. Switched
|
||||
# from hermes+OpenAI default after #2578 (the staging OpenAI key
|
||||
# account went over quota and stayed dead for 36+ hours, taking
|
||||
# the full-lifecycle E2E red on every provisioning-critical push).
|
||||
# claude-code template's `minimax` provider routes
|
||||
# ANTHROPIC_BASE_URL to api.minimax.io/anthropic and reads
|
||||
# MINIMAX_API_KEY at boot — separate billing account so an
|
||||
# OpenAI quota collapse no longer wedges the gate. Mirrors the
|
||||
# canary-staging.yml + continuous-synth-e2e.yml migrations.
|
||||
E2E_MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
|
||||
# Direct-Anthropic alternative for operators who don't want to
|
||||
# set up a MiniMax account (priority below MiniMax — first
|
||||
# non-empty wins in test_staging_full_saas.sh's secrets-injection
|
||||
# block). See #2578 PR comment for the rationale.
|
||||
E2E_ANTHROPIC_API_KEY: ${{ secrets.MOLECULE_STAGING_ANTHROPIC_API_KEY }}
|
||||
# OpenAI fallback — kept wired so an operator-dispatched run with
|
||||
# E2E_RUNTIME=hermes or =langgraph via workflow_dispatch can still
|
||||
# exercise the OpenAI path.
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_KEY }}
|
||||
E2E_RUNTIME: ${{ github.event.inputs.runtime || 'hermes' }}
|
||||
E2E_RUNTIME: ${{ github.event.inputs.runtime || 'claude-code' }}
|
||||
# Pin the model when running on the default claude-code path —
|
||||
# the per-runtime default ("sonnet") routes to direct Anthropic
|
||||
# and defeats the cost saving. Operators can override via the
|
||||
# workflow_dispatch flow (no input wired here yet — runtime
|
||||
# override is enough for ad-hoc).
|
||||
E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'langgraph' && 'openai:gpt-4o' || 'MiniMax-M2.7-highspeed' }}
|
||||
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
|
||||
@@ -102,13 +123,45 @@ jobs:
|
||||
fi
|
||||
echo "Admin token present ✓"
|
||||
|
||||
- name: Verify OpenAI key present
|
||||
- name: Verify LLM key present
|
||||
run: |
|
||||
if [ -z "$E2E_OPENAI_API_KEY" ]; then
|
||||
echo "::error::MOLECULE_STAGING_OPENAI_KEY secret not set — workspaces will fail at boot with 'No provider API key found'"
|
||||
# Per-runtime key check — claude-code uses MiniMax; hermes /
|
||||
# langgraph (operator-dispatched only) use OpenAI. Hard-fail
|
||||
# rather than soft-skip per #2578's lesson — empty key
|
||||
# silently falls through to the wrong SECRETS_JSON branch and
|
||||
# produces a confusing auth error 5 min later instead of the
|
||||
# clean "secret missing" message at the top.
|
||||
case "${E2E_RUNTIME}" in
|
||||
claude-code)
|
||||
# Either MiniMax OR direct-Anthropic works — first
|
||||
# non-empty wins in the test script's secrets-injection
|
||||
# priority chain.
|
||||
if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
required_secret_name="MOLECULE_STAGING_MINIMAX_API_KEY"
|
||||
required_secret_value="${E2E_MINIMAX_API_KEY}"
|
||||
elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
|
||||
required_secret_name="MOLECULE_STAGING_ANTHROPIC_API_KEY"
|
||||
required_secret_value="${E2E_ANTHROPIC_API_KEY}"
|
||||
else
|
||||
required_secret_name="MOLECULE_STAGING_MINIMAX_API_KEY or MOLECULE_STAGING_ANTHROPIC_API_KEY"
|
||||
required_secret_value=""
|
||||
fi
|
||||
;;
|
||||
langgraph|hermes)
|
||||
required_secret_name="MOLECULE_STAGING_OPENAI_KEY"
|
||||
required_secret_value="${E2E_OPENAI_API_KEY:-}"
|
||||
;;
|
||||
*)
|
||||
echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check"
|
||||
required_secret_name=""
|
||||
required_secret_value="present"
|
||||
;;
|
||||
esac
|
||||
if [ -n "$required_secret_name" ] && [ -z "$required_secret_value" ]; then
|
||||
echo "::error::${required_secret_name} secret not set for runtime=${E2E_RUNTIME} — workspaces will fail at boot with 'No provider API key found'"
|
||||
exit 2
|
||||
fi
|
||||
echo "OpenAI key present ✓ (len=${#E2E_OPENAI_API_KEY})"
|
||||
echo "LLM key present ✓ (runtime=${E2E_RUNTIME}, key=${required_secret_name}, len=${#required_secret_value})"
|
||||
|
||||
- name: CP staging health preflight
|
||||
run: |
|
||||
@@ -164,11 +217,27 @@ jobs:
|
||||
and o.get('instance_status') not in ('purged',)]
|
||||
print('\n'.join(candidates))
|
||||
" 2>/dev/null)
|
||||
# Per-slug verified DELETE (was `>/dev/null || true` — see
|
||||
# molecule-controlplane#420). Surface non-2xx as a workflow
|
||||
# warning naming the leaked slug; don't exit 1 (sweeper is
|
||||
# the safety net within ~45 min).
|
||||
leaks=()
|
||||
for slug in $orgs; do
|
||||
echo "Safety-net teardown: $slug"
|
||||
curl -sS -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
code=$(curl -sS -o /tmp/saas-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/dev/null || true
|
||||
-d "{\"confirm\":\"$slug\"}" \
|
||||
|| echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
echo "::warning::saas teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/saas-cleanup.out 2>/dev/null)"
|
||||
leaks+=("$slug")
|
||||
fi
|
||||
done
|
||||
if [ ${#leaks[@]} -gt 0 ]; then
|
||||
echo "::warning::saas teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
|
||||
fi
|
||||
exit 0
|
||||
|
||||
@@ -143,10 +143,25 @@ jobs:
|
||||
and o.get('status') not in ('purged',)]
|
||||
print('\n'.join(candidates))
|
||||
" 2>/dev/null)
|
||||
# Per-slug verified DELETE — see molecule-controlplane#420.
|
||||
# Failures surface as workflow warnings; the sweeper is the
|
||||
# safety net within ~45 min.
|
||||
leaks=()
|
||||
for slug in $orgs; do
|
||||
curl -sS -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
code=$(curl -sS -o /tmp/sanity-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/dev/null || true
|
||||
-d "{\"confirm\":\"$slug\"}" \
|
||||
|| echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
echo "::warning::sanity teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/sanity-cleanup.out 2>/dev/null)"
|
||||
leaks+=("$slug")
|
||||
fi
|
||||
done
|
||||
if [ ${#leaks[@]} -gt 0 ]; then
|
||||
echo "::warning::sanity teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
|
||||
fi
|
||||
exit 0
|
||||
|
||||
@@ -327,13 +327,19 @@ jobs:
|
||||
echo "::error::publish job did not expose a version output — cascade cannot fan out"
|
||||
exit 1
|
||||
fi
|
||||
# Source of truth: manifest.json workspace_templates (PR #2536 pruned
|
||||
# to 4 actively-supported runtimes: claude-code, hermes, openclaw, codex).
|
||||
# Removed langgraph/crewai/autogen/deepagents/gemini-cli (deprecated, no
|
||||
# shipping images); added codex (had been missing since #2512).
|
||||
# Long-term: derive this list from manifest.json so the cascade can't
|
||||
# drift again — tracked in RFC #388 as a Phase-1 invariant.
|
||||
TEMPLATES="claude-code hermes openclaw codex"
|
||||
# All 9 active workspace template repos. The PR #2536 pruning
|
||||
# ("deprecated, no shipping images") was empirically wrong:
|
||||
# continuous-synth-e2e.yml defaults to langgraph as its primary
|
||||
# canary (line 44), and every excluded template had successful
|
||||
# publish-image runs as of 2026-05-03 — none were dormant.
|
||||
# Symptom of the prune: today's a2a-sdk strict-mode fix
|
||||
# (#2566 / commit e1628c4) cascaded to 4 templates but never
|
||||
# reached langgraph, so the synth-E2E correctly canary'd a fix
|
||||
# that had landed but not deployed. Re-added the 5 templates.
|
||||
# Long-term: derive this list from manifest.json so cascade
|
||||
# scope can't drift from E2E scope — tracked in RFC #388 as a
|
||||
# Phase-1 invariant.
|
||||
TEMPLATES="claude-code hermes openclaw codex langgraph crewai autogen deepagents gemini-cli"
|
||||
FAILED=""
|
||||
for tpl in $TEMPLATES; do
|
||||
REPO="Molecule-AI/molecule-ai-workspace-template-$tpl"
|
||||
|
||||
@@ -17,7 +17,7 @@ name: redeploy-tenants-on-main
|
||||
# 1. publish-workspace-server-image completes → new :latest in GHCR.
|
||||
# 2. This workflow fires via workflow_run, waits 30s for GHCR's
|
||||
# CDN to propagate the new tag to the region the tenants pull from.
|
||||
# 3. Calls redeploy-fleet with canary_slug=hongmingwang and a 60s
|
||||
# 3. Calls redeploy-fleet with canary_slug=hongming and a 60s
|
||||
# soak. Canary proves the image boots; batches follow.
|
||||
# 4. Any failure aborts the rollout and leaves older tenants on the
|
||||
# prior image — safer default than half-and-half state.
|
||||
@@ -56,7 +56,12 @@ on:
|
||||
description: 'Tenant slug to deploy first + soak (empty = skip canary, fan out immediately).'
|
||||
required: false
|
||||
type: string
|
||||
default: 'hongmingwang'
|
||||
# Must be an actual prod tenant slug (current: hongming,
|
||||
# chloe-dong, reno-stars). The previous default 'hongmingwang'
|
||||
# didn't match any tenant — CP soft-skipped the missing canary
|
||||
# and the fleet rolled out without the soak gate, defeating the
|
||||
# whole point of canary-first.
|
||||
default: 'hongming'
|
||||
soak_seconds:
|
||||
description: 'Seconds to wait after canary before fanning out.'
|
||||
required: false
|
||||
@@ -148,7 +153,7 @@ jobs:
|
||||
CP_URL: ${{ vars.CP_URL || 'https://api.moleculesai.app' }}
|
||||
CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
|
||||
TARGET_TAG: ${{ steps.tag.outputs.target_tag }}
|
||||
CANARY_SLUG: ${{ inputs.canary_slug || 'hongmingwang' }}
|
||||
CANARY_SLUG: ${{ inputs.canary_slug || 'hongming' }}
|
||||
SOAK_SECONDS: ${{ inputs.soak_seconds || '60' }}
|
||||
BATCH_SIZE: ${{ inputs.batch_size || '3' }}
|
||||
DRY_RUN: ${{ inputs.dry_run || false }}
|
||||
|
||||
@@ -26,11 +26,22 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
# Only fire for bot-authored PRs. Human CEO PRs (staging→main promotion)
|
||||
# are intentional and pass through.
|
||||
#
|
||||
# Head-ref guard: never retarget a PR whose head IS `staging` — those
|
||||
# are the auto-promote staging→main PRs (opened by molecule-ai[bot]
|
||||
# since #2586 switched to an App token, which now passes the bot
|
||||
# filter below). Retargeting head=staging onto base=staging fails
|
||||
# with HTTP 422 "no new commits between base 'staging' and head
|
||||
# 'staging'", which used to surface as a noisy red workflow run on
|
||||
# every auto-promote (caught 2026-05-03 on PR #2588).
|
||||
if: >-
|
||||
github.event.pull_request.user.type == 'Bot'
|
||||
|| endsWith(github.event.pull_request.user.login, '[bot]')
|
||||
|| github.event.pull_request.user.login == 'app/molecule-ai'
|
||||
|| github.event.pull_request.user.login == 'molecule-ai[bot]'
|
||||
github.event.pull_request.head.ref != 'staging'
|
||||
&& (
|
||||
github.event.pull_request.user.type == 'Bot'
|
||||
|| endsWith(github.event.pull_request.user.login, '[bot]')
|
||||
|| github.event.pull_request.user.login == 'app/molecule-ai'
|
||||
|| github.event.pull_request.user.login == 'molecule-ai[bot]'
|
||||
)
|
||||
steps:
|
||||
- name: Retarget PR base to staging
|
||||
id: retarget
|
||||
|
||||
@@ -25,16 +25,23 @@ name: Sweep stale e2e-* orgs (staging)
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Every hour on the hour. E2E orgs are short-lived (~10-25 min wall
|
||||
# clock from create to teardown). Anything older than the
|
||||
# MAX_AGE_MINUTES threshold below is presumed dead.
|
||||
- cron: '0 * * * *'
|
||||
# Every 15 min. E2E orgs are short-lived (~8-25 min wall clock from
|
||||
# create to teardown — canary is ~8 min, full SaaS ~25 min). The
|
||||
# previous hourly + 120-min stale threshold meant a leaked tenant
|
||||
# could keep an EC2 alive for up to 2 hours, eating ~2 vCPU per
|
||||
# leak. Tightening the cadence + threshold reduces the worst-case
|
||||
# leak window from 120 min to ~45 min (15-min sweep cadence + 30-min
|
||||
# threshold) without risk of catching in-progress runs (the longest
|
||||
# e2e run is the 25-min canary, well under the 30-min threshold).
|
||||
# See molecule-controlplane#420 for the leak-class accounting that
|
||||
# motivated this tightening.
|
||||
- cron: '*/15 * * * *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
max_age_minutes:
|
||||
description: "Delete e2e-* orgs older than N minutes (default 120)"
|
||||
description: "Delete e2e-* orgs older than N minutes (default 30)"
|
||||
required: false
|
||||
default: "120"
|
||||
default: "30"
|
||||
dry_run:
|
||||
description: "Dry run only — list what would be deleted"
|
||||
required: false
|
||||
@@ -58,7 +65,7 @@ jobs:
|
||||
env:
|
||||
MOLECULE_CP_URL: https://staging-api.moleculesai.app
|
||||
ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }}
|
||||
MAX_AGE_MINUTES: ${{ github.event.inputs.max_age_minutes || '120' }}
|
||||
MAX_AGE_MINUTES: ${{ github.event.inputs.max_age_minutes || '30' }}
|
||||
DRY_RUN: ${{ github.event.inputs.dry_run || 'false' }}
|
||||
# Refuse to delete more than this many orgs in one tick. If the
|
||||
# CP DB is briefly empty (or the admin endpoint goes weird and
|
||||
|
||||
+50
-2
@@ -1,7 +1,7 @@
|
||||
# Coverage Floor
|
||||
|
||||
CI enforces three coverage gates on `workspace-server` (Go). All defined in
|
||||
`.github/workflows/ci.yml` → `platform-build` job.
|
||||
CI enforces coverage gates on two surfaces — `workspace-server` (Go) and
|
||||
`workspace/` (Python). All defined in `.github/workflows/ci.yml`.
|
||||
|
||||
## Current floors (2026-04-23)
|
||||
|
||||
@@ -76,3 +76,51 @@ This gate makes "no untested critical paths merged" a mechanical property of
|
||||
the CI, not a behavioural property of QA agents or individual reviewers —
|
||||
which is the only way to make it survive fleet outages, agent rotations, or
|
||||
QA process changes.
|
||||
|
||||
## Python (workspace/) — added 2026-05-04 from #2790
|
||||
|
||||
The Python side has its own gates in the `python-lint` job:
|
||||
|
||||
| Gate | Threshold | Where |
|
||||
|---|---|---|
|
||||
| **Total floor** | `86%` | `workspace/pytest.ini` `--cov-fail-under=86` (issue #1817) |
|
||||
| **Critical-path per-file floor** | `75%` | Inline shell step after the pytest run |
|
||||
|
||||
### Critical-path Python files
|
||||
|
||||
These handle multi-tenant routing, auth tokens, and inbox dispatch. A
|
||||
coverage drop here is the same risk shape as a Go-side `tokens*` /
|
||||
`secrets*` file regressing below 10%.
|
||||
|
||||
- `workspace/a2a_mcp_server.py` — MCP dispatcher (PR #2766 / #2771)
|
||||
- `workspace/mcp_cli.py` — molecule-mcp standalone CLI entry
|
||||
- `workspace/a2a_tools.py` — workspace-scoped tool implementations
|
||||
- `workspace/inbox.py` — multi-workspace inbox + per-workspace cursors
|
||||
- `workspace/platform_auth.py` — per-workspace token resolver
|
||||
|
||||
### Why 75% (vs 86% total)
|
||||
|
||||
The total floor averages ~6000 lines across `workspace/`. A single MCP
|
||||
file could drop to ~50% with no CI complaint as long as other modules
|
||||
compensate. The per-file floor closes that distribution gap. 75% sits
|
||||
below current actuals (80–96% as of 2026-05-04) — strictly additive,
|
||||
no existing PR fails.
|
||||
|
||||
### Python ratchet plan
|
||||
|
||||
| Date | Total | Per-file critical | Notes |
|
||||
|---|---|---|---|
|
||||
| 2026-05-04 | 86% | 75% | Initial gate (this file). |
|
||||
| 2026-06-04 | 86% | 80% | First ratchet — at-floor files must catch up. |
|
||||
| 2026-07-04 | 88% | 85% | |
|
||||
| 2026-08-04 | 90% | 90% | Target steady-state. |
|
||||
|
||||
### Why this Python gate exists
|
||||
|
||||
Issue #2790, after the PR #2766 → PR #2771 cycle. PR #2766 added
|
||||
multi-workspace routing through `a2a_tools.py` + `a2a_mcp_server.py`,
|
||||
shipped to main with green CI, but the dispatcher silently dropped a
|
||||
load-bearing kwarg for 4 of 9 tools — caught only by post-merge code
|
||||
review. The structural drift gate (`test_dispatcher_schema_drift.py`,
|
||||
PR #2791) catches the schema↔dispatcher mismatch class; this floor
|
||||
catches the broader "MCP-critical file regressed" class.
|
||||
|
||||
@@ -169,7 +169,17 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
orgID = row.id;
|
||||
return true;
|
||||
}
|
||||
if (row.instance_status === "failed") throw new Error(`provision failed: ${slug}`);
|
||||
if (row.instance_status === "failed") {
|
||||
// Dump every diagnostic field the admin row carries — boot stage,
|
||||
// last error, terraform/SSM state, etc. The bare slug message used
|
||||
// to surface ZERO context, so triaging a failed provision meant
|
||||
// re-running locally to repro. Now the failure log carries enough
|
||||
// to point at the right subsystem (CP/AWS/SSM/runtime) without a
|
||||
// second round-trip.
|
||||
throw new Error(
|
||||
`provision failed: ${slug} — admin-orgs row: ${JSON.stringify(row)}`,
|
||||
);
|
||||
}
|
||||
return null;
|
||||
},
|
||||
PROVISION_TIMEOUT_MS,
|
||||
@@ -249,7 +259,17 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
if (r.status !== 200) return null;
|
||||
if (r.body?.status === "online") return true;
|
||||
if (r.body?.status === "failed") {
|
||||
throw new Error(`Workspace failed: ${r.body.last_sample_error || ""}`);
|
||||
// last_sample_error is often empty when the failure happens before
|
||||
// the agent emits a sample (e.g. boot crash, image pull error,
|
||||
// missing PYTHONPATH, OpenAI quota at startup). Dumping the full
|
||||
// body gives triage the boot_stage / last_error / image fields it
|
||||
// needs without a second probe. Otherwise this propagates as a
|
||||
// bare "Workspace failed: " — the exact useless message that
|
||||
// sent #2632 to the issue tracker.
|
||||
const detail = r.body.last_sample_error
|
||||
? r.body.last_sample_error
|
||||
: `(no last_sample_error) full body: ${JSON.stringify(r.body)}`;
|
||||
throw new Error(`Workspace failed: ${detail}`);
|
||||
}
|
||||
return null;
|
||||
},
|
||||
|
||||
@@ -1,6 +1,15 @@
|
||||
@import "tailwindcss";
|
||||
@plugin "@tailwindcss/typography";
|
||||
|
||||
/*
|
||||
* Tailwind v4 defaults the `dark:` variant to `prefers-color-scheme: dark`.
|
||||
* Our theme switcher writes `data-theme="dark"` on <html> instead (so user
|
||||
* choice via the toggle wins over OS preference). Re-bind `dark:` to that
|
||||
* attribute so component classes like `dark:bg-zinc-800` track the same
|
||||
* source of truth as the `[data-theme="dark"]` token overrides below.
|
||||
*/
|
||||
@custom-variant dark (&:where([data-theme="dark"], [data-theme="dark"] *));
|
||||
|
||||
/*
|
||||
* Load order:
|
||||
* 1. Tailwind core (v4) — provides preflight + utility generation.
|
||||
|
||||
@@ -54,7 +54,7 @@ export default function Home() {
|
||||
if (hydrating) {
|
||||
return (
|
||||
<div className="fixed inset-0 flex items-center justify-center bg-surface">
|
||||
<div className="flex flex-col items-center gap-3">
|
||||
<div role="status" aria-live="polite" className="flex flex-col items-center gap-3">
|
||||
<Spinner size="lg" />
|
||||
<span className="text-xs text-ink-soft">Loading canvas...</span>
|
||||
</div>
|
||||
|
||||
@@ -138,14 +138,37 @@ export function A2ATopologyOverlay() {
|
||||
// Stable Zustand action reference — safe to call inside effects
|
||||
const setA2AEdges = useCanvasStore((s) => s.setA2AEdges);
|
||||
|
||||
// Read the nodes array as a primitive ref; derive visible IDs outside the selector
|
||||
const nodes = useCanvasStore((s) => s.nodes);
|
||||
// Subscribe to a STABLE STRING KEY of visible workspace IDs, not the
|
||||
// nodes array itself. Zustand returns a new array reference on every
|
||||
// store update (status flips, position drags, peer-discovery writes,
|
||||
// workspace-tab opens, etc.) — even when the set of visible IDs is
|
||||
// unchanged. Selecting a sorted-CSV string makes Zustand's default
|
||||
// shallow-equal short-circuit the re-render unless the actual ID set
|
||||
// changes.
|
||||
//
|
||||
// Why this matters: previously visibleIds was useMemo'd on `nodes`, so
|
||||
// the array reference recreated on every store mutation. fetchAndUpdate
|
||||
// (useCallback'd on visibleIds) then recreated, the useEffect re-fired,
|
||||
// it tore down the 60s setInterval and immediately re-ran the fan-out.
|
||||
// With ~5 store updates/second from heartbeats + polling, the canvas
|
||||
// hammered /workspaces/<id>/activity?type=delegation 5×N requests/sec
|
||||
// until edge rate-limit kicked in with HTTP 429. The recursive React
|
||||
// render trace in the original bug report (uE → ux → uE → ux ...) is
|
||||
// the symptom of this re-render storm.
|
||||
//
|
||||
// The fix is purely the dependency-stability change here; the fetch
|
||||
// logic is unchanged.
|
||||
const visibleIdsKey = useCanvasStore((s) =>
|
||||
s.nodes
|
||||
.filter((n) => !n.hidden)
|
||||
.map((n) => n.id)
|
||||
.sort()
|
||||
.join(",")
|
||||
);
|
||||
|
||||
// IDs of visible (non-nested, non-hidden) workspace nodes.
|
||||
// Recomputed only when the nodes array reference changes.
|
||||
const visibleIds = useMemo(
|
||||
() => nodes.filter((n) => !n.hidden).map((n) => n.id),
|
||||
[nodes]
|
||||
() => (visibleIdsKey ? visibleIdsKey.split(",") : []),
|
||||
[visibleIdsKey]
|
||||
);
|
||||
|
||||
// Fetch delegation activity for all visible workspaces and rebuild overlay edges.
|
||||
|
||||
@@ -73,14 +73,19 @@ export function ApprovalBanner() {
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleDecide(approval, "approved")}
|
||||
className="px-3 py-1.5 bg-emerald-600 hover:bg-emerald-500 text-xs rounded-lg text-white font-medium transition-colors"
|
||||
// Hover DARKER not lighter — emerald-500 on white text
|
||||
// drops contrast vs emerald-700.
|
||||
className="px-3 py-1.5 bg-emerald-600 hover:bg-emerald-700 text-xs rounded-lg text-white font-medium transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-offset-2 focus-visible:ring-offset-amber-950 focus-visible:ring-emerald-400/70"
|
||||
>
|
||||
Approve
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleDecide(approval, "denied")}
|
||||
className="px-3 py-1.5 bg-surface-card hover:bg-surface-card text-xs rounded-lg text-ink-mid transition-colors"
|
||||
// Was a no-op hover (`bg-surface-card hover:bg-surface-card`).
|
||||
// Lift to surface-elevated on hover so the button visibly
|
||||
// responds before a destructive deny.
|
||||
className="px-3 py-1.5 bg-surface-card hover:bg-surface-elevated hover:text-ink text-xs rounded-lg text-ink-mid transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-offset-2 focus-visible:ring-offset-amber-950 focus-visible:ring-amber-400/70"
|
||||
>
|
||||
Deny
|
||||
</button>
|
||||
|
||||
@@ -30,6 +30,24 @@ export function BatchActionBar() {
|
||||
if (count === 0 && hasFailedBatch) setHasFailedBatch(false);
|
||||
}, [count, hasFailedBatch]);
|
||||
|
||||
// Esc clears selection — the deselect button title has been promising
|
||||
// "(Escape)" since the bar shipped, but no handler was wired. Skip when
|
||||
// the confirm dialog is open (`pending !== null`) so the dialog's own
|
||||
// Esc-cancels takes precedence and we don't double-handle the keystroke.
|
||||
// Also skip during a busy in-flight action so the user can't accidentally
|
||||
// strand a partial-failure mid-flight.
|
||||
useEffect(() => {
|
||||
if (count === 0 || pending !== null || busy) return;
|
||||
const onKey = (e: KeyboardEvent) => {
|
||||
if (e.key === "Escape") {
|
||||
e.stopPropagation();
|
||||
clearSelection();
|
||||
}
|
||||
};
|
||||
window.addEventListener("keydown", onKey);
|
||||
return () => window.removeEventListener("keydown", onKey);
|
||||
}, [count, pending, busy, clearSelection]);
|
||||
|
||||
// Hide when nothing is selected. Hide for single-node selection UNLESS a
|
||||
// partial-failure left a survivor awaiting retry.
|
||||
if (count === 0) return null;
|
||||
@@ -129,7 +147,7 @@ export function BatchActionBar() {
|
||||
onClick={clearSelection}
|
||||
aria-label="Clear selection"
|
||||
title="Clear selection (Escape)"
|
||||
className="p-1.5 rounded-lg text-[12px] text-ink-mid hover:text-ink hover:bg-surface-card/50 transition-colors disabled:opacity-50 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-zinc-500/70"
|
||||
className="p-1.5 rounded-lg text-[12px] text-ink-mid hover:text-ink hover:bg-surface-card/50 transition-colors disabled:opacity-50 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent/50"
|
||||
>
|
||||
✕
|
||||
</button>
|
||||
|
||||
@@ -117,9 +117,11 @@ export function BundleDropZone() {
|
||||
📦 Import bundle
|
||||
</button>
|
||||
|
||||
{/* Visual overlay when dragging */}
|
||||
{/* Visual overlay when dragging — was hardcoded blue-950/blue-400
|
||||
which doesn't flip with theme. accent colors stay visually
|
||||
consistent with the rest of the canvas in both modes. */}
|
||||
{isDragging && (
|
||||
<div className="fixed inset-0 z-20 flex items-center justify-center bg-blue-950/40 backdrop-blur-sm border-2 border-dashed border-blue-400/50 pointer-events-none">
|
||||
<div className="fixed inset-0 z-20 flex items-center justify-center bg-accent/15 backdrop-blur-sm border-2 border-dashed border-accent/40 pointer-events-none">
|
||||
<div className="bg-surface-sunken/95 border border-accent/50 rounded-2xl px-8 py-6 shadow-2xl text-center">
|
||||
<div className="text-3xl mb-2" aria-hidden="true">📦</div>
|
||||
<div className="text-sm font-semibold text-ink">Drop Bundle to Import</div>
|
||||
@@ -128,10 +130,21 @@ export function BundleDropZone() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Importing spinner */}
|
||||
{/* Importing indicator — role=status + aria-live so SR users hear
|
||||
"Importing bundle..." while the API call is in flight, not just
|
||||
the result toast that fires after. motion-safe:animate-spin
|
||||
respects prefers-reduced-motion (Tailwind's motion-safe variant
|
||||
gates animation on the user's OS setting). */}
|
||||
{importing && (
|
||||
<div className="fixed bottom-6 left-1/2 -translate-x-1/2 z-50 bg-surface-sunken/95 border border-line/60 rounded-xl px-5 py-3 shadow-2xl flex items-center gap-3">
|
||||
<div className="w-4 h-4 border-2 border-sky-400 border-t-transparent rounded-full animate-spin" />
|
||||
<div
|
||||
role="status"
|
||||
aria-live="polite"
|
||||
className="fixed bottom-6 left-1/2 -translate-x-1/2 z-50 bg-surface-sunken/95 border border-line/60 rounded-xl px-5 py-3 shadow-2xl flex items-center gap-3"
|
||||
>
|
||||
<div
|
||||
aria-hidden="true"
|
||||
className="w-4 h-4 border-2 border-accent border-t-transparent rounded-full motion-safe:animate-spin"
|
||||
/>
|
||||
<span className="text-sm text-ink">Importing bundle...</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -32,11 +32,18 @@ export function CommunicationOverlay() {
|
||||
|
||||
const fetchComms = useCallback(async () => {
|
||||
try {
|
||||
// Fetch activity from all online workspaces
|
||||
// Fan-out cap: each polled workspace = 1 round-trip. The platform
|
||||
// rate limits at 600 req/min/IP; combined with heartbeats + other
|
||||
// canvas polling, every workspace polled here costs ~6 req/min
|
||||
// (1 every 30s × 1 per workspace). Capping at 3 keeps this
|
||||
// overlay's footprint at 18 req/min worst case — well under
|
||||
// budget even with 8+ workspaces visible. Caught 2026-05-04 when
|
||||
// a user with 8+ workspaces (Design Director + 6 sub-agents +
|
||||
// 3 standalones) saw sustained 429s in canvas console.
|
||||
const onlineNodes = nodesRef.current.filter((n) => n.data.status === "online");
|
||||
const allComms: Communication[] = [];
|
||||
|
||||
for (const node of onlineNodes.slice(0, 6)) {
|
||||
for (const node of onlineNodes.slice(0, 3)) {
|
||||
try {
|
||||
const activities = await api.get<Array<{
|
||||
id: string;
|
||||
@@ -91,10 +98,20 @@ export function CommunicationOverlay() {
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
// Gate polling on visibility — when the user collapses the overlay
|
||||
// the data isn't being read, so the per-workspace fan-out becomes
|
||||
// pure rate-limit overhead. Pre-fix this overlay polled regardless
|
||||
// of whether the panel was shown, costing ~36 req/min from a
|
||||
// hidden surface.
|
||||
if (!visible) return;
|
||||
fetchComms();
|
||||
const interval = setInterval(fetchComms, 10000);
|
||||
// 30s cadence (was 10s). At 3-workspace fan-out that's 6 req/min
|
||||
// worst case from this overlay. Combined with heartbeats (~30/min)
|
||||
// and other canvas polling, leaves ample headroom under the 600/
|
||||
// min/IP server-side rate limit even at 8+ workspace tenants.
|
||||
const interval = setInterval(fetchComms, 30000);
|
||||
return () => clearInterval(interval);
|
||||
}, [fetchComms]);
|
||||
}, [fetchComms, visible]);
|
||||
|
||||
if (!visible || comms.length === 0) {
|
||||
return (
|
||||
|
||||
@@ -91,12 +91,15 @@ export function ConfirmDialog({
|
||||
|
||||
if (!open || !mounted) return null;
|
||||
|
||||
// Hover goes DARKER, not lighter — lighter shades on white text drop
|
||||
// contrast below AA on the accent and red ramps. Darker hovers stay
|
||||
// readable in both light and dark themes.
|
||||
const confirmColors =
|
||||
confirmVariant === "danger"
|
||||
? "bg-red-600 hover:bg-red-500 text-white"
|
||||
? "bg-red-600 hover:bg-red-700 text-white"
|
||||
: confirmVariant === "warning"
|
||||
? "bg-amber-600 hover:bg-amber-500 text-white"
|
||||
: "bg-accent-strong hover:bg-accent text-white";
|
||||
? "bg-amber-600 hover:bg-amber-700 text-white"
|
||||
: "bg-accent hover:bg-accent-strong text-white";
|
||||
|
||||
// Render via Portal so the fixed-position dialog escapes any containing block
|
||||
// (e.g. parents with transform, filter, will-change that break position:fixed).
|
||||
@@ -123,7 +126,7 @@ export function ConfirmDialog({
|
||||
<button
|
||||
type="button"
|
||||
onClick={onCancel}
|
||||
className="px-3.5 py-1.5 text-[13px] text-ink-mid hover:text-ink bg-surface-card hover:bg-surface-card border border-line rounded-lg transition-colors"
|
||||
className="px-3.5 py-1.5 text-[13px] text-ink-mid hover:text-ink bg-surface-card hover:bg-surface-elevated border border-line hover:border-line-soft rounded-lg transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
@@ -131,7 +134,7 @@ export function ConfirmDialog({
|
||||
<button
|
||||
type="button"
|
||||
onClick={onConfirm}
|
||||
className={`px-3.5 py-1.5 text-[13px] rounded-lg transition-colors ${confirmColors}`}
|
||||
className={`px-3.5 py-1.5 text-[13px] rounded-lg transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-offset-2 focus-visible:ring-offset-surface-sunken focus-visible:ring-accent/60 ${confirmColors}`}
|
||||
>
|
||||
{confirmLabel}
|
||||
</button>
|
||||
|
||||
@@ -113,7 +113,10 @@ export function ConsoleModal({ workspaceId, workspaceName, open, onClose }: Prop
|
||||
ref={closeButtonRef}
|
||||
onClick={onClose}
|
||||
aria-label="Close"
|
||||
className="text-ink-mid hover:text-ink text-sm px-2"
|
||||
// 24x24 touch target (was ~10x16, well under WCAG 2.5.5).
|
||||
// Hover bg makes the area visible; focus-visible ring matches
|
||||
// the rest of the canvas chrome.
|
||||
className="w-6 h-6 inline-flex items-center justify-center rounded text-sm text-ink-mid hover:text-ink hover:bg-surface-card/40 focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 transition-colors"
|
||||
>
|
||||
✕
|
||||
</button>
|
||||
@@ -150,12 +153,19 @@ export function ConsoleModal({ workspaceId, workspaceName, open, onClose }: Prop
|
||||
type="button"
|
||||
onClick={() => {
|
||||
if (navigator.clipboard) {
|
||||
navigator.clipboard.writeText(output);
|
||||
// Add success feedback — without it, clicking Copy
|
||||
// looked like a no-op since the previous hover bg was
|
||||
// also a no-op (`hover:bg-surface-card` on top of the
|
||||
// same base). Toast confirms the write actually fired.
|
||||
navigator.clipboard
|
||||
.writeText(output)
|
||||
.then(() => showToast("Console output copied", "success"))
|
||||
.catch(() => showToast("Copy failed", "error"));
|
||||
} else {
|
||||
showToast("Copy requires HTTPS — please select and copy manually", "info");
|
||||
}
|
||||
}}
|
||||
className="px-3 py-1.5 text-[11px] text-ink-mid hover:text-ink bg-surface-card hover:bg-surface-card border border-line rounded-lg transition-colors"
|
||||
className="px-3 py-1.5 text-[11px] text-ink-mid hover:text-ink bg-surface-card hover:bg-surface-elevated border border-line hover:border-line-soft rounded-lg transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-2 focus-visible:ring-offset-surface"
|
||||
>
|
||||
Copy
|
||||
</button>
|
||||
@@ -163,7 +173,10 @@ export function ConsoleModal({ workspaceId, workspaceName, open, onClose }: Prop
|
||||
<button
|
||||
type="button"
|
||||
onClick={onClose}
|
||||
className="px-3 py-1.5 text-[11px] text-ink-mid bg-surface-card hover:bg-surface-card border border-line rounded-lg transition-colors"
|
||||
// Was hover:bg-surface-card (same as base — silent no-op).
|
||||
// Lift to surface-elevated so the button visibly responds,
|
||||
// matching the Cancel button in ConfirmDialog.
|
||||
className="px-3 py-1.5 text-[11px] text-ink-mid hover:text-ink bg-surface-card hover:bg-surface-elevated border border-line hover:border-line-soft rounded-lg transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-2 focus-visible:ring-offset-surface"
|
||||
>
|
||||
Close
|
||||
</button>
|
||||
|
||||
@@ -29,15 +29,38 @@ export function ContextMenu() {
|
||||
const setPendingDelete = useCanvasStore((s) => s.setPendingDelete);
|
||||
const ref = useRef<HTMLDivElement>(null);
|
||||
const [actionLoading, setActionLoading] = useState(false);
|
||||
// Clamped position — (left, top) from contextMenu may overflow when the
|
||||
// user right-clicks near the right/bottom viewport edge. We measure the
|
||||
// rendered menu and shift it back inside on the same frame the cursor
|
||||
// opens it, so it never visibly clips. Falls back to the raw cursor
|
||||
// coords until the rAF runs.
|
||||
const [clamped, setClamped] = useState<{ x: number; y: number } | null>(null);
|
||||
|
||||
// Auto-focus first enabled item when menu opens
|
||||
// Auto-focus first enabled item when menu opens, AND clamp position.
|
||||
// Both run together in a single rAF so we avoid two synchronous layout
|
||||
// reads + a paint between them.
|
||||
useEffect(() => {
|
||||
if (!contextMenu) return;
|
||||
requestAnimationFrame(() => {
|
||||
const first = ref.current?.querySelector<HTMLButtonElement>("button:not(:disabled)");
|
||||
setClamped(null);
|
||||
const raf = requestAnimationFrame(() => {
|
||||
const node = ref.current;
|
||||
if (!node) return;
|
||||
const first = node.querySelector<HTMLButtonElement>("button:not(:disabled)");
|
||||
first?.focus();
|
||||
// 8px viewport margin so the menu doesn't kiss the edge — matches
|
||||
// the floating-tooltip top-edge clamp in Tooltip.tsx.
|
||||
const margin = 8;
|
||||
const rect = node.getBoundingClientRect();
|
||||
const vw = window.innerWidth;
|
||||
const vh = window.innerHeight;
|
||||
let x = contextMenu.x;
|
||||
let y = contextMenu.y;
|
||||
if (x + rect.width + margin > vw) x = Math.max(margin, vw - rect.width - margin);
|
||||
if (y + rect.height + margin > vh) y = Math.max(margin, vh - rect.height - margin);
|
||||
if (x !== contextMenu.x || y !== contextMenu.y) setClamped({ x, y });
|
||||
});
|
||||
}, [contextMenu?.nodeId]);
|
||||
return () => cancelAnimationFrame(raf);
|
||||
}, [contextMenu?.nodeId, contextMenu?.x, contextMenu?.y]);
|
||||
|
||||
// Close on click outside or Escape
|
||||
useEffect(() => {
|
||||
@@ -288,7 +311,7 @@ export function ContextMenu() {
|
||||
aria-label={`Actions for ${contextMenu.nodeData.name}`}
|
||||
onKeyDown={handleMenuKeyDown}
|
||||
className="fixed z-[60] min-w-[200px] bg-surface/95 backdrop-blur-xl border border-line/60 rounded-xl shadow-2xl shadow-black/60 py-1 overflow-hidden"
|
||||
style={{ left: contextMenu.x, top: contextMenu.y }}
|
||||
style={{ left: clamped?.x ?? contextMenu.x, top: clamped?.y ?? contextMenu.y }}
|
||||
>
|
||||
{/* Header */}
|
||||
<div className="px-3.5 py-2 border-b border-line/40 mb-0.5">
|
||||
@@ -314,7 +337,7 @@ export function ContextMenu() {
|
||||
onClick={item.action}
|
||||
disabled={item.disabled}
|
||||
aria-disabled={item.disabled}
|
||||
className={`w-full px-3.5 py-1.5 flex items-center gap-2.5 text-left text-[11px] transition-colors focus:outline-none focus:ring-1 focus:ring-inset focus:ring-zinc-600 disabled:opacity-25 disabled:cursor-not-allowed ${
|
||||
className={`w-full px-3.5 py-1.5 flex items-center gap-2.5 text-left text-[11px] transition-colors focus:outline-none focus-visible:ring-1 focus-visible:ring-inset focus-visible:ring-accent/50 disabled:opacity-25 disabled:cursor-not-allowed ${
|
||||
item.danger
|
||||
? "text-bad hover:bg-red-950/40 hover:text-bad"
|
||||
: "text-ink-mid hover:bg-surface-card/40 hover:text-ink"
|
||||
|
||||
@@ -98,9 +98,17 @@ export function CookieConsent() {
|
||||
};
|
||||
|
||||
return (
|
||||
<div
|
||||
role="dialog"
|
||||
aria-modal="true"
|
||||
// role="region" + aria-label, NOT role="dialog" + aria-modal. The
|
||||
// banner is informational — it never blocks the page, never traps
|
||||
// focus, and the user can keep using the canvas while it's up.
|
||||
// Claiming aria-modal="true" without a focus trap is genuinely
|
||||
// harmful for screen-reader users: they get told the rest of the
|
||||
// page is inert, jump into the banner, and then can't escape.
|
||||
// Region semantics let assistive tech navigate around it normally.
|
||||
// (Also: forcing a modal cookie banner would be a dark pattern —
|
||||
// GDPR explicitly discourages it.)
|
||||
<section
|
||||
role="region"
|
||||
aria-labelledby="cookie-consent-title"
|
||||
aria-describedby="cookie-consent-body"
|
||||
className="fixed bottom-0 left-0 right-0 z-[9999] border-t border-line bg-surface/95 backdrop-blur-sm p-4 shadow-[0_-4px_12px_rgba(0,0,0,0.4)]"
|
||||
@@ -117,7 +125,7 @@ export function CookieConsent() {
|
||||
workspaces). See our{" "}
|
||||
<a
|
||||
href="https://moleculesai.app/legal/privacy"
|
||||
className="text-accent underline hover:text-accent"
|
||||
className="text-accent underline underline-offset-2 hover:text-accent-strong focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 rounded-sm"
|
||||
target="_blank"
|
||||
rel="noreferrer"
|
||||
>
|
||||
@@ -130,20 +138,20 @@ export function CookieConsent() {
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => decide("rejected")}
|
||||
className="rounded border border-line bg-surface-sunken px-4 py-2 text-sm text-ink hover:bg-surface-card"
|
||||
className="rounded border border-line bg-surface-sunken px-4 py-2 text-sm text-ink hover:bg-surface-card focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-2 focus-visible:ring-offset-surface"
|
||||
>
|
||||
Necessary only
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => decide("accepted")}
|
||||
className="rounded border border-accent bg-accent-strong px-4 py-2 text-sm font-medium text-white hover:bg-accent"
|
||||
className="rounded border border-accent bg-accent-strong px-4 py-2 text-sm font-medium text-white hover:bg-accent focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-2 focus-visible:ring-offset-surface"
|
||||
>
|
||||
Accept all
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -310,7 +310,7 @@ export function CreateWorkspaceButton() {
|
||||
return (
|
||||
<Dialog.Root open={open} onOpenChange={setOpen}>
|
||||
<Dialog.Trigger asChild>
|
||||
<button type="button" className="fixed bottom-6 right-6 z-40 px-5 py-2.5 bg-accent-strong hover:bg-accent active:bg-accent-strong text-sm font-medium rounded-xl text-white shadow-lg shadow-blue-600/20 hover:shadow-xl hover:shadow-blue-500/30 transition-all duration-200 flex items-center gap-2">
|
||||
<button type="button" className="fixed bottom-6 right-6 z-40 px-5 py-2.5 bg-accent hover:bg-accent-strong active:bg-accent text-sm font-medium rounded-xl text-white shadow-lg shadow-accent/20 hover:shadow-xl hover:shadow-accent/30 transition-all duration-200 flex items-center gap-2 focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-2 focus-visible:ring-offset-surface">
|
||||
<svg
|
||||
width="14"
|
||||
height="14"
|
||||
@@ -502,7 +502,7 @@ export function CreateWorkspaceButton() {
|
||||
placeholder="sk-…"
|
||||
aria-label="Hermes API key"
|
||||
autoComplete="off"
|
||||
className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink placeholder-zinc-600 focus:outline-none focus:border-violet-500/60 focus:ring-1 focus:ring-violet-500/20 transition-colors font-mono"
|
||||
className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink placeholder-ink-soft focus:outline-none focus:border-violet-500/60 focus:ring-1 focus:ring-violet-500/20 transition-colors font-mono"
|
||||
/>
|
||||
</div>
|
||||
|
||||
@@ -527,7 +527,7 @@ export function CreateWorkspaceButton() {
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
list="hermes-model-suggestions"
|
||||
className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink placeholder-zinc-600 focus:outline-none focus:border-violet-500/60 focus:ring-1 focus:ring-violet-500/20 transition-colors font-mono"
|
||||
className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink placeholder-ink-soft focus:outline-none focus:border-violet-500/60 focus:ring-1 focus:ring-violet-500/20 transition-colors font-mono"
|
||||
/>
|
||||
<datalist id="hermes-model-suggestions">
|
||||
{HERMES_PROVIDERS.find((p) => p.id === hermesProvider)?.models.map(
|
||||
@@ -552,7 +552,7 @@ export function CreateWorkspaceButton() {
|
||||
|
||||
<div className="flex justify-end gap-2.5 mt-6">
|
||||
<Dialog.Close asChild>
|
||||
<button type="button" className="px-4 py-2 bg-surface-card hover:bg-surface-card text-sm rounded-lg text-ink-mid transition-colors">
|
||||
<button type="button" className="px-4 py-2 bg-surface-card hover:bg-surface-elevated hover:text-ink text-sm rounded-lg text-ink-mid transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40 focus-visible:ring-offset-1 focus-visible:ring-offset-surface">
|
||||
Cancel
|
||||
</button>
|
||||
</Dialog.Close>
|
||||
@@ -560,7 +560,7 @@ export function CreateWorkspaceButton() {
|
||||
type="button"
|
||||
onClick={handleCreate}
|
||||
disabled={creating}
|
||||
className="px-5 py-2 bg-accent-strong hover:bg-accent active:bg-accent-strong text-sm rounded-lg text-white disabled:opacity-50 transition-colors"
|
||||
className="px-5 py-2 bg-accent hover:bg-accent-strong active:bg-accent text-sm rounded-lg text-white disabled:opacity-50 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-1 focus-visible:ring-offset-surface"
|
||||
>
|
||||
{creating ? "Creating..." : "Create"}
|
||||
</button>
|
||||
@@ -623,7 +623,7 @@ function InputField({
|
||||
placeholder={placeholder}
|
||||
min={type === "number" ? "0" : undefined}
|
||||
step={type === "number" ? "0.01" : undefined}
|
||||
className={`w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink placeholder-zinc-500 focus:outline-none focus:border-accent/60 focus:ring-1 focus:ring-accent/20 transition-colors ${mono ? "font-mono text-xs" : ""}`}
|
||||
className={`w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink placeholder-ink-soft focus:outline-none focus:border-accent/60 focus:ring-1 focus:ring-accent/20 transition-colors ${mono ? "font-mono text-xs" : ""}`}
|
||||
/>
|
||||
{helper && (
|
||||
<p className="mt-1 text-xs text-ink-soft">{helper}</p>
|
||||
|
||||
@@ -127,13 +127,16 @@ export function DeleteCascadeConfirmDialog({
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Checkbox guard */}
|
||||
{/* Checkbox guard. Ring-offset color was zinc-900 — the dialog
|
||||
actually sits on bg-surface-sunken, so the offset showed
|
||||
the wrong color through the ring gap. Switched to the
|
||||
real bg + a danger-tinted ring. */}
|
||||
<label className="flex items-start gap-2.5 cursor-pointer group select-none">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={checked}
|
||||
onChange={(e) => onCheckedChange(e.target.checked)}
|
||||
className="mt-0.5 w-4 h-4 rounded border-line bg-surface-card text-bad focus:ring-red-500 focus:ring-offset-0 focus:ring-offset-zinc-900 cursor-pointer"
|
||||
className="mt-0.5 w-4 h-4 rounded border-line bg-surface-card text-bad cursor-pointer focus:outline-none focus-visible:ring-2 focus-visible:ring-red-500/60 focus-visible:ring-offset-2 focus-visible:ring-offset-surface-sunken"
|
||||
/>
|
||||
<span className="text-[12px] text-ink-mid group-hover:text-ink-mid leading-relaxed">
|
||||
I understand this will permanently delete all listed workspaces and their data
|
||||
@@ -145,7 +148,11 @@ export function DeleteCascadeConfirmDialog({
|
||||
<button
|
||||
type="button"
|
||||
onClick={onCancel}
|
||||
className="px-3.5 py-1.5 text-[13px] text-ink-mid hover:text-ink bg-surface-card hover:bg-surface-card border border-line rounded-lg transition-colors"
|
||||
// Was hover:bg-surface-card (same as base — silent no-op).
|
||||
// Lift to surface-elevated to match the Cancel pattern in
|
||||
// ConfirmDialog. Added focus-visible ring so keyboard users
|
||||
// see where focus lands.
|
||||
className="px-3.5 py-1.5 text-[13px] text-ink-mid hover:text-ink bg-surface-card hover:bg-surface-elevated border border-line hover:border-line-soft rounded-lg transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40 focus-visible:ring-offset-2 focus-visible:ring-offset-surface-sunken"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
@@ -153,9 +160,12 @@ export function DeleteCascadeConfirmDialog({
|
||||
type="button"
|
||||
onClick={onConfirm}
|
||||
disabled={!checked}
|
||||
className={`px-3.5 py-1.5 text-[13px] rounded-lg transition-colors
|
||||
// Hover goes DARKER, not lighter — bg-red-500 on white text
|
||||
// drops contrast below AA vs bg-red-700. Same trap fixed in
|
||||
// ConfirmDialog and ApprovalBanner. focus-visible ring matches.
|
||||
className={`px-3.5 py-1.5 text-[13px] rounded-lg transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-red-500/60 focus-visible:ring-offset-2 focus-visible:ring-offset-surface-sunken
|
||||
${checked
|
||||
? "bg-red-600 hover:bg-red-500 text-white cursor-pointer"
|
||||
? "bg-red-600 hover:bg-red-700 text-white cursor-pointer"
|
||||
: "bg-red-900/30 text-bad/40 cursor-not-allowed"
|
||||
}`}
|
||||
>
|
||||
|
||||
@@ -18,6 +18,157 @@
|
||||
import { useCallback, useState } from "react";
|
||||
import * as Dialog from "@radix-ui/react-dialog";
|
||||
|
||||
type Tab = "python" | "curl" | "claude" | "mcp" | "hermes" | "codex" | "openclaw" | "fields";
|
||||
|
||||
// Per-tab help metadata: docs link, where-to-install link, common errors.
|
||||
// All URLs verified against repo content (docs/guides/* file paths map to
|
||||
// docs.molecule.ai/docs/guides/*; canonical hostname confirmed by existing
|
||||
// blog post canonical metadata) or against the snippet text the operator
|
||||
// just copied. Never linking to a URL that wasn't already in product —
|
||||
// dead links here defeat the purpose of "more comprehensive instructions."
|
||||
const TAB_HELP: Record<
|
||||
Tab,
|
||||
{
|
||||
docsUrl?: string;
|
||||
docsLabel?: string;
|
||||
downloadUrl?: string;
|
||||
downloadLabel?: string;
|
||||
commonIssues?: { symptom: string; check: string }[];
|
||||
}
|
||||
> = {
|
||||
mcp: {
|
||||
docsUrl: "https://docs.molecule.ai/docs/guides/mcp-server-setup",
|
||||
docsLabel: "MCP server setup guide",
|
||||
downloadUrl: "https://pypi.org/project/molecule-ai-workspace-runtime/",
|
||||
downloadLabel: "molecule-ai-workspace-runtime on PyPI",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "Tools not appearing in your agent",
|
||||
check:
|
||||
"Run `claude mcp list` (or your runtime's equivalent) — the molecule entry should be listed. If missing, re-run the `claude mcp add` line.",
|
||||
},
|
||||
{
|
||||
symptom: "ConnectionRefused / DNS error on first call",
|
||||
check:
|
||||
"PLATFORM_URL must include the scheme (https://) and have no trailing slash. Verify with `curl $PLATFORM_URL/healthz`.",
|
||||
},
|
||||
],
|
||||
},
|
||||
python: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
downloadUrl: "https://pypi.org/project/molecule-ai-workspace-runtime/",
|
||||
downloadLabel: "molecule-ai-workspace-runtime on PyPI",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "401 from /heartbeat",
|
||||
check:
|
||||
"AUTH_TOKEN expired or wrong workspace_id. Tokens are shown only once at create time — re-create the workspace to get a fresh token.",
|
||||
},
|
||||
{
|
||||
symptom: "AGENT_URL not reachable from platform",
|
||||
check:
|
||||
"Public HTTPS URL required for inbound A2A. Use ngrok or Cloudflare Tunnel if your agent is behind NAT.",
|
||||
},
|
||||
],
|
||||
},
|
||||
claude: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
downloadUrl: "https://claude.com/claude-code",
|
||||
downloadLabel: "Claude Code (claude.com)",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "plugin not installed",
|
||||
check:
|
||||
"Run `/plugin marketplace add Molecule-AI/molecule-mcp-claude-channel` then `/plugin install molecule@molecule-mcp-claude-channel` inside Claude Code, then `/reload-plugins`.",
|
||||
},
|
||||
{
|
||||
symptom: "not on the approved channels allowlist",
|
||||
check:
|
||||
"Custom channels need `--dangerously-load-development-channels` on the launch command. Team/Enterprise orgs need admin to set `channelsEnabled` + `allowedChannelPlugins` in claude.ai admin settings.",
|
||||
},
|
||||
{
|
||||
symptom: "Inbound messages not arriving",
|
||||
check:
|
||||
"Check stderr for `molecule channel: connected — watching N workspace(s)`. Verify ~/.claude/channels/molecule/.env has the right PLATFORM_URL + token.",
|
||||
},
|
||||
],
|
||||
},
|
||||
hermes: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
downloadUrl: "https://github.com/NousResearch/hermes-agent",
|
||||
downloadLabel: "hermes-agent (NousResearch)",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "Gateway start failure",
|
||||
check:
|
||||
"Tail ~/.hermes/gateway.log. YAML duplicate-key in config.yaml is the most common cause — `gateway:` block must appear exactly once.",
|
||||
},
|
||||
{
|
||||
symptom: "Plugin not discovered after install",
|
||||
check:
|
||||
"Run `pip show hermes-channel-molecule` to confirm install. Some hermes builds need `hermes plugin reload` before the new platform_plugins entry takes effect.",
|
||||
},
|
||||
],
|
||||
},
|
||||
codex: {
|
||||
docsUrl: "https://docs.molecule.ai/docs/guides/mcp-server-setup",
|
||||
docsLabel: "MCP server setup guide",
|
||||
downloadUrl: "https://github.com/openai/codex",
|
||||
downloadLabel: "openai/codex",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "[mcp_servers.molecule] not loaded",
|
||||
check:
|
||||
"Codex must be ≥ 0.57. Check with `codex --version`; upgrade via `npm install -g @openai/codex@latest`.",
|
||||
},
|
||||
{
|
||||
symptom: "TOML parse error after re-running setup",
|
||||
check:
|
||||
"TOML rejects duplicate `[mcp_servers.molecule]` tables. Open ~/.codex/config.toml and remove the old block before pasting the new one.",
|
||||
},
|
||||
],
|
||||
},
|
||||
openclaw: {
|
||||
docsUrl: "https://docs.molecule.ai/docs/guides/mcp-server-setup",
|
||||
docsLabel: "MCP server setup guide",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "Gateway not starting",
|
||||
check:
|
||||
"Tail ~/.openclaw/gateway.log. The loopback bind requires :18789 to be free — check with `lsof -iTCP:18789`.",
|
||||
},
|
||||
{
|
||||
symptom: "openclaw mcp set rejected",
|
||||
check:
|
||||
"The heredoc generates JSON; verify it parsed by running `jq < ~/.openclaw/mcp/molecule.json`. Re-run `openclaw mcp set` if the file is malformed.",
|
||||
},
|
||||
],
|
||||
},
|
||||
curl: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "401 / 403 on register",
|
||||
check:
|
||||
"WORKSPACE_AUTH_TOKEN must be the value shown at workspace create. Tokens are shown only once.",
|
||||
},
|
||||
],
|
||||
},
|
||||
fields: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
},
|
||||
};
|
||||
|
||||
export interface ExternalConnectionInfo {
|
||||
workspace_id: string;
|
||||
platform_url: string;
|
||||
@@ -40,6 +191,22 @@ export interface ExternalConnectionInfo {
|
||||
// + inbound. Optional for backward compat with platforms that
|
||||
// haven't shipped PR #2413 yet.
|
||||
universal_mcp_snippet?: string;
|
||||
// Hermes channel snippet — for operators whose external agent IS a
|
||||
// hermes-agent session. Routes A2A traffic into the hermes gateway
|
||||
// via the molecule-channel plugin (Molecule-AI/hermes-channel-molecule).
|
||||
// Long-poll based (no tunnel) — same UX shape as the Claude Code
|
||||
// channel tab. Gives hermes true push parity. Optional for backward
|
||||
// compat with platforms that haven't shipped this PR yet.
|
||||
hermes_channel_snippet?: string;
|
||||
// Codex MCP config snippet — wires the molecule MCP server into
|
||||
// ~/.codex/config.toml so codex agents can call platform tools.
|
||||
// Outbound-tools-only today (codex's MCP client doesn't route
|
||||
// notifications/*); push parity would need a separate bridge daemon.
|
||||
codex_snippet?: string;
|
||||
// OpenClaw MCP config snippet — wires molecule MCP + starts the
|
||||
// openclaw gateway on loopback. Outbound-tools-only today; push
|
||||
// parity on an external openclaw needs a sessions.steer bridge.
|
||||
openclaw_snippet?: string;
|
||||
}
|
||||
|
||||
interface Props {
|
||||
@@ -47,13 +214,19 @@ interface Props {
|
||||
onClose: () => void;
|
||||
}
|
||||
|
||||
type Tab = "python" | "curl" | "claude" | "mcp" | "fields";
|
||||
|
||||
export function ExternalConnectModal({ info, onClose }: Props) {
|
||||
// Default to Claude Code when the platform offers it — that's the
|
||||
// newest + simplest path (no tunnel needed). Falls back to Python
|
||||
// for older platform builds that don't ship the snippet.
|
||||
const initialTab: Tab = info?.claude_code_channel_snippet ? "claude" : "python";
|
||||
// Default to Universal MCP when the platform offers it — runtime-
|
||||
// agnostic outbound tool path that works for any MCP-aware runtime
|
||||
// (Claude Code, hermes, codex, etc.) and lets operators inspect the
|
||||
// primitives before picking a runtime-specific tab. Python SDK is
|
||||
// the fallback for platforms predating the universal_mcp_snippet
|
||||
// field. Pre-2026-05-03 the default was "claude" (Claude Code first)
|
||||
// but operators using non-Claude runtimes opened to a tab they had
|
||||
// to skip past — universal MCP works for everyone as a starting
|
||||
// point and the runtime-specific tabs are still one click away.
|
||||
const initialTab: Tab = info?.universal_mcp_snippet
|
||||
? "mcp"
|
||||
: "python";
|
||||
const [tab, setTab] = useState<Tab>(initialTab);
|
||||
const [copiedKey, setCopiedKey] = useState<string | null>(null);
|
||||
|
||||
@@ -108,6 +281,24 @@ export function ExternalConnectModal({ info, onClose }: Props) {
|
||||
'MOLECULE_WORKSPACE_TOKEN="<paste from create response>"',
|
||||
`MOLECULE_WORKSPACE_TOKEN="${info.auth_token}"`,
|
||||
);
|
||||
// Hermes channel snippet uses MOLECULE_WORKSPACE_TOKEN (same env-var
|
||||
// name as Universal MCP). Stamp the auth_token in so the operator's
|
||||
// copy-paste is fully ready-to-run.
|
||||
const filledHermes = info.hermes_channel_snippet?.replace(
|
||||
'MOLECULE_WORKSPACE_TOKEN="<paste from create response>"',
|
||||
`MOLECULE_WORKSPACE_TOKEN="${info.auth_token}"`,
|
||||
);
|
||||
// Codex + OpenClaw snippets carry the placeholder inside the
|
||||
// generated config block (TOML / JSON respectively). Stamp the
|
||||
// token in so the copy-paste is one less manual edit.
|
||||
const filledCodex = info.codex_snippet?.replace(
|
||||
'MOLECULE_WORKSPACE_TOKEN = "<paste from create response>"',
|
||||
`MOLECULE_WORKSPACE_TOKEN = "${info.auth_token}"`,
|
||||
);
|
||||
const filledOpenClaw = info.openclaw_snippet?.replace(
|
||||
'WORKSPACE_TOKEN="<paste from create response>"',
|
||||
`WORKSPACE_TOKEN="${info.auth_token}"`,
|
||||
);
|
||||
|
||||
return (
|
||||
<Dialog.Root open onOpenChange={(o) => !o && onClose()}>
|
||||
@@ -135,10 +326,18 @@ export function ExternalConnectModal({ info, onClose }: Props) {
|
||||
// SDK second (full register+heartbeat+inbound); Universal
|
||||
// MCP third (any MCP-aware runtime, outbound-only); curl
|
||||
// for one-shot register; Fields for raw values.
|
||||
// Tab order: Universal MCP first (default, runtime-
|
||||
// agnostic primitives), then runtime-specific channel/
|
||||
// SDK tabs, then curl + Fields. Each runtime tab only
|
||||
// appears when the platform supplies the snippet — no
|
||||
// dead "tab missing snippet" UX.
|
||||
const tabs: Tab[] = [];
|
||||
if (filledChannel) tabs.push("claude");
|
||||
tabs.push("python");
|
||||
if (filledUniversalMcp) tabs.push("mcp");
|
||||
tabs.push("python");
|
||||
if (filledChannel) tabs.push("claude");
|
||||
if (filledHermes) tabs.push("hermes");
|
||||
if (filledCodex) tabs.push("codex");
|
||||
if (filledOpenClaw) tabs.push("openclaw");
|
||||
tabs.push("curl", "fields");
|
||||
return tabs;
|
||||
})().map((t) => (
|
||||
@@ -156,6 +355,12 @@ export function ExternalConnectModal({ info, onClose }: Props) {
|
||||
>
|
||||
{t === "claude"
|
||||
? "Claude Code"
|
||||
: t === "hermes"
|
||||
? "Hermes"
|
||||
: t === "codex"
|
||||
? "Codex"
|
||||
: t === "openclaw"
|
||||
? "OpenClaw"
|
||||
: t === "python"
|
||||
? "Python SDK"
|
||||
: t === "mcp"
|
||||
@@ -205,6 +410,33 @@ export function ExternalConnectModal({ info, onClose }: Props) {
|
||||
onCopy={() => copy(filledUniversalMcp, "mcp")}
|
||||
/>
|
||||
)}
|
||||
{tab === "hermes" && filledHermes && (
|
||||
<SnippetBlock
|
||||
value={filledHermes}
|
||||
label="Hermes channel — bridges this workspace's A2A traffic into your hermes-agent session as platform messages (push parity with Claude Code). Long-poll based; no tunnel needed."
|
||||
copyKey="hermes"
|
||||
copied={copiedKey === "hermes"}
|
||||
onCopy={() => copy(filledHermes, "hermes")}
|
||||
/>
|
||||
)}
|
||||
{tab === "codex" && filledCodex && (
|
||||
<SnippetBlock
|
||||
value={filledCodex}
|
||||
label="Codex MCP config — wires the molecule MCP server into ~/.codex/config.toml. Outbound tools today; inbound A2A push needs the Python SDK tab paired in (codex's MCP runtime doesn't route arbitrary notifications/* yet)."
|
||||
copyKey="codex"
|
||||
copied={copiedKey === "codex"}
|
||||
onCopy={() => copy(filledCodex, "codex")}
|
||||
/>
|
||||
)}
|
||||
{tab === "openclaw" && filledOpenClaw && (
|
||||
<SnippetBlock
|
||||
value={filledOpenClaw}
|
||||
label="OpenClaw MCP config — wires the molecule MCP server via openclaw mcp set + starts the gateway on loopback. Outbound tools today; inbound A2A push on an external openclaw needs the Python SDK tab paired in (a sessions.steer bridge daemon is future work)."
|
||||
copyKey="openclaw"
|
||||
copied={copiedKey === "openclaw"}
|
||||
onCopy={() => copy(filledOpenClaw, "openclaw")}
|
||||
/>
|
||||
)}
|
||||
{tab === "fields" && (
|
||||
<div className="space-y-2">
|
||||
<Field label="workspace_id" value={info.workspace_id} onCopy={() => copy(info.workspace_id, "wsid")} copied={copiedKey === "wsid"} />
|
||||
@@ -220,6 +452,7 @@ export function ExternalConnectModal({ info, onClose }: Props) {
|
||||
<Field label="heartbeat_endpoint" value={info.heartbeat_endpoint} onCopy={() => copy(info.heartbeat_endpoint, "hb")} copied={copiedKey === "hb"} />
|
||||
</div>
|
||||
)}
|
||||
<HelpBlock help={TAB_HELP[tab]} />
|
||||
</div>
|
||||
|
||||
<div className="mt-5 flex justify-end gap-2">
|
||||
@@ -268,6 +501,70 @@ function SnippetBlock({
|
||||
);
|
||||
}
|
||||
|
||||
// HelpBlock — collapsible "Need help?" section under each tab's snippet.
|
||||
// Renders only the keys present in the per-tab help metadata (no empty
|
||||
// sections). Closed by default so the snippet stays the visual focus;
|
||||
// operators with a working setup never see this. Uses native <details>
|
||||
// for keyboard accessibility (Tab + Enter) without extra ARIA wiring.
|
||||
function HelpBlock({
|
||||
help,
|
||||
}: {
|
||||
help: (typeof TAB_HELP)[Tab] | undefined;
|
||||
}) {
|
||||
if (!help) return null;
|
||||
const { docsUrl, docsLabel, downloadUrl, downloadLabel, commonIssues } = help;
|
||||
if (!docsUrl && !downloadUrl && !commonIssues?.length) return null;
|
||||
|
||||
return (
|
||||
<details className="mt-3 border border-line rounded-lg bg-surface text-xs">
|
||||
<summary className="cursor-pointer select-none px-3 py-2 text-ink-mid hover:text-ink">
|
||||
Need help? — install link, docs, common errors
|
||||
</summary>
|
||||
<div className="px-3 pb-3 pt-1 space-y-2">
|
||||
{downloadUrl && (
|
||||
<div>
|
||||
<span className="text-ink-soft">Where to install: </span>
|
||||
<a
|
||||
href={downloadUrl}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-accent underline hover:text-accent-strong"
|
||||
>
|
||||
{downloadLabel || downloadUrl}
|
||||
</a>
|
||||
</div>
|
||||
)}
|
||||
{docsUrl && (
|
||||
<div>
|
||||
<span className="text-ink-soft">Documentation: </span>
|
||||
<a
|
||||
href={docsUrl}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-accent underline hover:text-accent-strong"
|
||||
>
|
||||
{docsLabel || docsUrl}
|
||||
</a>
|
||||
</div>
|
||||
)}
|
||||
{commonIssues && commonIssues.length > 0 && (
|
||||
<div>
|
||||
<div className="text-ink-soft mb-1">Common errors:</div>
|
||||
<ul className="space-y-1.5 pl-3">
|
||||
{commonIssues.map((issue, i) => (
|
||||
<li key={i}>
|
||||
<code className="text-warm font-mono">{issue.symptom}</code>
|
||||
<span className="text-ink-mid"> — {issue.check}</span>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</details>
|
||||
);
|
||||
}
|
||||
|
||||
function Field({
|
||||
label,
|
||||
value,
|
||||
|
||||
@@ -1,11 +1,23 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useState } from "react";
|
||||
import { STATUS_CONFIG } from "@/lib/design-tokens";
|
||||
import { STATUS_CONFIG, TIER_CONFIG } from "@/lib/design-tokens";
|
||||
import { useCanvasStore } from "@/store/canvas";
|
||||
|
||||
const LEGEND_STATUSES = ["online", "provisioning", "degraded", "failed", "paused", "offline"] as const;
|
||||
|
||||
// Tier descriptions kept in sync with CreateWorkspaceDialog.tsx (the
|
||||
// source of truth for what each tier means semantically). Colors come
|
||||
// from TIER_CONFIG so the legend swatch matches the badge actually
|
||||
// rendered on every WorkspaceNode — drift here misled users into
|
||||
// thinking the legend documented a different tier than the one shown.
|
||||
const LEGEND_TIERS: ReadonlyArray<{ tier: number; label: string }> = [
|
||||
{ tier: 1, label: "Sandboxed" },
|
||||
{ tier: 2, label: "Standard" },
|
||||
{ tier: 3, label: "Privileged" },
|
||||
{ tier: 4, label: "Full Access" },
|
||||
];
|
||||
|
||||
// Persist the user's choice across sessions. Default is "open" so
|
||||
// first-time users still see the symbol key; once dismissed we
|
||||
// respect that until they explicitly reopen via the floating pill.
|
||||
@@ -65,7 +77,7 @@ export function Legend() {
|
||||
onClick={openLegend}
|
||||
aria-label="Show legend"
|
||||
title="Show legend"
|
||||
className={`fixed bottom-6 ${leftClass} z-30 flex items-center gap-1.5 rounded-full bg-surface-sunken/95 border border-line/50 px-3 py-1.5 text-[11px] font-semibold text-ink-mid uppercase tracking-wider shadow-xl shadow-black/30 backdrop-blur-sm hover:text-ink hover:border-line transition-[left,colors] duration-200`}
|
||||
className={`fixed bottom-6 ${leftClass} z-30 flex items-center gap-1.5 rounded-full bg-surface-sunken/95 border border-line/50 px-3 py-1.5 text-[11px] font-semibold text-ink-mid uppercase tracking-wider shadow-xl shadow-black/30 backdrop-blur-sm hover:text-ink hover:border-line focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-2 focus-visible:ring-offset-surface transition-[left,colors] duration-200`}
|
||||
>
|
||||
<span aria-hidden="true" className="text-[10px]">ⓘ</span>
|
||||
Legend
|
||||
@@ -82,7 +94,10 @@ export function Legend() {
|
||||
onClick={closeLegend}
|
||||
aria-label="Hide legend"
|
||||
title="Hide legend"
|
||||
className="-mt-0.5 -mr-1 px-1.5 text-[14px] leading-none text-ink-soft hover:text-ink transition-colors"
|
||||
// 24×24 touch target (was ~10×16, well under WCAG 2.5.5 min).
|
||||
// Negative margin keeps the visual position the same as before
|
||||
// — only the hit area + focus ring are larger.
|
||||
className="-mt-1.5 -mr-1.5 w-6 h-6 inline-flex items-center justify-center rounded text-[14px] leading-none text-ink-soft hover:text-ink hover:bg-surface-card/40 focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 transition-colors"
|
||||
>
|
||||
×
|
||||
</button>
|
||||
@@ -102,9 +117,9 @@ export function Legend() {
|
||||
<div className="mb-2">
|
||||
<div className="text-[11px] text-ink-soft font-medium mb-1">Tier</div>
|
||||
<div className="flex flex-wrap gap-x-3 gap-y-1">
|
||||
<TierItem tier={1} label="Sandboxed" color="text-sky-300 bg-sky-950/40 border-sky-700/30" />
|
||||
<TierItem tier={2} label="Standard" color="text-violet-300 bg-violet-950/40 border-violet-700/30" />
|
||||
<TierItem tier={3} label="Full Access" color="text-warm bg-amber-950/40 border-amber-700/30" />
|
||||
{LEGEND_TIERS.map(({ tier, label }) => (
|
||||
<TierItem key={tier} tier={tier} label={label} color={TIER_CONFIG[tier].border} />
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -134,10 +134,12 @@ export function OnboardingWizard() {
|
||||
aria-label="Onboarding guide"
|
||||
className="fixed bottom-20 left-4 z-50 w-80 rounded-2xl border border-line/60 bg-surface-sunken/95 backdrop-blur-xl shadow-2xl shadow-black/40 overflow-hidden"
|
||||
>
|
||||
{/* Progress bar */}
|
||||
{/* Progress bar — was hardcoded from-blue-500 to-sky-400, neither
|
||||
tone exists in warm-paper light theme. Switched to the accent
|
||||
ramp so the gradient reads as brand color in both themes. */}
|
||||
<div className="h-1 bg-surface-card">
|
||||
<div
|
||||
className="h-full bg-gradient-to-r from-blue-500 to-sky-400 transition-all duration-500"
|
||||
className="h-full bg-gradient-to-r from-accent to-accent-strong transition-all duration-500"
|
||||
style={{ width: `${((currentStepIdx + 1) / STEPS.length) * 100}%` }}
|
||||
/>
|
||||
</div>
|
||||
@@ -155,14 +157,16 @@ export function OnboardingWizard() {
|
||||
<div className="p-4">
|
||||
{/* Step indicator */}
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<span className="text-[9px] font-semibold uppercase tracking-widest text-sky-400/80">
|
||||
{/* text-sky-400/80 was hardcoded; flip to text-accent so the
|
||||
indicator stays brand-tinted in both themes. */}
|
||||
<span className="text-[9px] font-semibold uppercase tracking-widest text-accent">
|
||||
Step {currentStepIdx + 1} of {STEPS.length}
|
||||
</span>
|
||||
<button
|
||||
type="button"
|
||||
onClick={dismiss}
|
||||
aria-label="Skip onboarding guide"
|
||||
className="text-[10px] text-ink-mid hover:text-ink transition-colors"
|
||||
className="text-[10px] text-ink-mid hover:text-ink transition-colors rounded-sm focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/50"
|
||||
>
|
||||
Skip guide
|
||||
</button>
|
||||
@@ -181,7 +185,11 @@ export function OnboardingWizard() {
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleAction}
|
||||
className="flex-1 px-3 py-1.5 bg-accent-strong/90 hover:bg-accent rounded-lg text-[11px] font-medium text-white transition-colors"
|
||||
// Was bg-accent-strong/90 hover:bg-accent — accent is the
|
||||
// LIGHTER variant, so this hovered lighter on white text and
|
||||
// dropped contrast below AA. Same trap fixed in
|
||||
// ConfirmDialog/ApprovalBanner. Hover the OTHER direction.
|
||||
className="flex-1 px-3 py-1.5 bg-accent hover:bg-accent-strong rounded-lg text-[11px] font-medium text-white transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-2 focus-visible:ring-offset-surface-sunken"
|
||||
>
|
||||
{step === "welcome"
|
||||
? "Create Workspace"
|
||||
@@ -199,7 +207,10 @@ export function OnboardingWizard() {
|
||||
if (next) setStep(next.id);
|
||||
else dismiss();
|
||||
}}
|
||||
className="px-3 py-1.5 bg-surface-card hover:bg-surface-card rounded-lg text-[11px] text-ink-mid transition-colors"
|
||||
// Was hover:bg-surface-card on top of bg-surface-card —
|
||||
// silent no-op hover. Lift to surface-elevated, matching
|
||||
// the Cancel pattern in ConfirmDialog.
|
||||
className="px-3 py-1.5 bg-surface-card hover:bg-surface-elevated hover:text-ink rounded-lg text-[11px] text-ink-mid transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40 focus-visible:ring-offset-2 focus-visible:ring-offset-surface-sunken"
|
||||
>
|
||||
Next
|
||||
</button>
|
||||
|
||||
@@ -293,7 +293,7 @@ export function OrgImportPreflightModal({
|
||||
<button
|
||||
type="button"
|
||||
onClick={onCancel}
|
||||
className="px-3 py-1.5 text-[11px] rounded bg-surface-card hover:bg-surface-card text-ink-mid"
|
||||
className="px-3 py-1.5 text-[11px] rounded bg-surface-card hover:bg-surface-elevated hover:text-ink text-ink-mid transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40 focus-visible:ring-offset-1 focus-visible:ring-offset-surface"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
@@ -308,7 +308,7 @@ export function OrgImportPreflightModal({
|
||||
type="button"
|
||||
onClick={onProceed}
|
||||
disabled={!canProceed}
|
||||
className="px-4 py-1.5 text-[11px] font-semibold rounded bg-accent-strong hover:bg-accent text-white disabled:bg-surface-card disabled:text-white-soft disabled:cursor-not-allowed"
|
||||
className="px-4 py-1.5 text-[11px] font-semibold rounded bg-accent hover:bg-accent-strong text-white disabled:bg-surface-card disabled:text-white-soft disabled:cursor-not-allowed"
|
||||
>
|
||||
Import
|
||||
</button>
|
||||
@@ -428,7 +428,7 @@ function StrictEnvRow({
|
||||
type="button"
|
||||
onClick={() => onSave(envKey)}
|
||||
disabled={d?.saving || !d?.value.trim()}
|
||||
className="px-2 py-1 text-[10px] rounded bg-accent-strong hover:bg-accent text-white disabled:opacity-40 disabled:cursor-not-allowed"
|
||||
className="px-2 py-1 text-[10px] rounded bg-accent hover:bg-accent-strong text-white disabled:opacity-40 disabled:cursor-not-allowed"
|
||||
>
|
||||
{d?.saving ? "…" : "Save"}
|
||||
</button>
|
||||
@@ -520,7 +520,7 @@ function AnyOfEnvGroup({
|
||||
type="button"
|
||||
onClick={() => onSave(m)}
|
||||
disabled={d?.saving || !d?.value.trim()}
|
||||
className="px-2 py-1 text-[10px] rounded bg-accent-strong hover:bg-accent text-white disabled:opacity-40 disabled:cursor-not-allowed"
|
||||
className="px-2 py-1 text-[10px] rounded bg-accent hover:bg-accent-strong text-white disabled:opacity-40 disabled:cursor-not-allowed"
|
||||
>
|
||||
{d?.saving ? "…" : "Save"}
|
||||
</button>
|
||||
|
||||
@@ -36,11 +36,6 @@ export function SearchDialog() {
|
||||
}
|
||||
}, [open]);
|
||||
|
||||
// Reset focused index when query changes
|
||||
useEffect(() => {
|
||||
setFocusedIndex(-1);
|
||||
}, [query]);
|
||||
|
||||
const filtered = nodes.filter((n) => {
|
||||
if (!query) return true;
|
||||
const q = query.toLowerCase();
|
||||
@@ -51,6 +46,18 @@ export function SearchDialog() {
|
||||
);
|
||||
});
|
||||
|
||||
// Auto-highlight the first match while the user is typing, so Enter
|
||||
// selects something instead of being a no-op. With an empty query we
|
||||
// keep -1 so opening the dialog (which shows ALL workspaces) doesn't
|
||||
// visually pin one row arbitrarily — only commit a highlight once the
|
||||
// user has narrowed the list.
|
||||
useEffect(() => {
|
||||
setFocusedIndex(query && filtered.length > 0 ? 0 : -1);
|
||||
// Re-running on filtered.length keeps the highlight pinned to the
|
||||
// first row while the result set shrinks/grows; the effect handler
|
||||
// above already short-circuits to -1 when results disappear.
|
||||
}, [query, filtered.length]);
|
||||
|
||||
const handleSelect = useCallback(
|
||||
(nodeId: string) => {
|
||||
selectNode(nodeId);
|
||||
@@ -113,7 +120,7 @@ export function SearchDialog() {
|
||||
onChange={(e) => setQuery(e.target.value)}
|
||||
onKeyDown={handleInputKeyDown}
|
||||
placeholder="Search workspaces..."
|
||||
className="flex-1 bg-transparent text-sm text-ink placeholder-zinc-400 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus:outline-none rounded"
|
||||
className="flex-1 bg-transparent text-sm text-ink placeholder-ink-soft focus:outline-none focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent rounded"
|
||||
/>
|
||||
<kbd className="text-[9px] text-ink-mid bg-surface-card/60 px-1.5 py-0.5 rounded border border-line/40">ESC</kbd>
|
||||
</div>
|
||||
|
||||
@@ -202,7 +202,7 @@ export function SidePanel() {
|
||||
{/* Tabs — relative wrapper lets the fade gradient position against the scroll container */}
|
||||
<div className="relative border-b border-line/40">
|
||||
{/* Right-edge fade: signals more tabs are hidden off-screen when the bar overflows */}
|
||||
<div className="pointer-events-none absolute inset-y-0 right-0 w-8 bg-gradient-to-l from-zinc-950 to-transparent z-10" aria-hidden="true" />
|
||||
<div className="pointer-events-none absolute inset-y-0 right-0 w-8 bg-gradient-to-l from-surface to-transparent z-10" aria-hidden="true" />
|
||||
<div
|
||||
role="tablist"
|
||||
aria-label="Workspace panel tabs"
|
||||
@@ -232,8 +232,8 @@ export function SidePanel() {
|
||||
onClick={() => setPanelTab(tab.id)}
|
||||
className={`shrink-0 px-3 py-2.5 text-[10px] font-medium tracking-wide transition-all rounded-t-lg mx-0.5 focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/70 ${
|
||||
panelTab === tab.id
|
||||
? "text-ink bg-surface-card/40 border-b-2 border-accent"
|
||||
: "text-ink-soft hover:text-ink hover:bg-surface-card/40"
|
||||
? "text-ink bg-surface-card border-b-2 border-accent"
|
||||
: "text-ink-mid hover:text-ink hover:bg-surface-card/60"
|
||||
}`}
|
||||
>
|
||||
<span className="mr-1 opacity-50" aria-hidden="true">{tab.icon}</span>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useState } from "react";
|
||||
import { useEffect, useRef, useState } from "react";
|
||||
import { PLATFORM_URL } from "@/lib/api";
|
||||
|
||||
// TermsGate blocks the page it wraps until the user has accepted the
|
||||
@@ -73,39 +73,72 @@ export function TermsGate({ children }: { children: React.ReactNode }) {
|
||||
}
|
||||
};
|
||||
|
||||
// Move focus to the "I agree" button when the modal opens (WCAG 2.4.3).
|
||||
// The dialog is a hard gate — no Esc dismiss — so we don't need a focus
|
||||
// trap loop, just a one-shot focus move into the dialog.
|
||||
const agreeButtonRef = useRef<HTMLButtonElement>(null);
|
||||
useEffect(() => {
|
||||
if (status !== "pending") return;
|
||||
const raf = requestAnimationFrame(() => agreeButtonRef.current?.focus());
|
||||
return () => cancelAnimationFrame(raf);
|
||||
}, [status]);
|
||||
|
||||
return (
|
||||
<>
|
||||
{children}
|
||||
{status === "pending" && (
|
||||
<div aria-hidden="true" className="fixed inset-0 z-50 flex items-center justify-center bg-surface/80 backdrop-blur-sm">
|
||||
// Backdrop is decorative — does NOT carry aria-hidden anymore.
|
||||
// The earlier version put aria-hidden="true" on this wrapper,
|
||||
// which hid the dialog AND its descendants from screen readers,
|
||||
// making the entire terms-acceptance flow invisible to AT users.
|
||||
// Backdrop click intentionally does nothing — this is a hard
|
||||
// gate.
|
||||
<div className="fixed inset-0 z-50 flex items-center justify-center bg-surface/80 backdrop-blur-sm">
|
||||
<div
|
||||
role="dialog"
|
||||
aria-modal="true"
|
||||
aria-labelledby="terms-dialog-title"
|
||||
aria-describedby="terms-dialog-body"
|
||||
className="mx-4 max-w-lg rounded-lg border border-line bg-surface-sunken p-6 shadow-xl"
|
||||
>
|
||||
<h2 id="terms-dialog-title" className="text-lg font-semibold text-ink">Terms & conditions</h2>
|
||||
<p className="mt-3 text-sm text-ink-mid">
|
||||
Before you create an organization, please review our{" "}
|
||||
<a href="/legal/terms" className="text-sky-400 underline" target="_blank" rel="noreferrer">
|
||||
Terms of Service
|
||||
</a>{" "}
|
||||
and{" "}
|
||||
<a href="/legal/privacy" className="text-sky-400 underline" target="_blank" rel="noreferrer">
|
||||
Privacy Policy
|
||||
</a>
|
||||
. Click agree to continue.
|
||||
</p>
|
||||
<p className="mt-3 text-xs text-ink-soft">
|
||||
By agreeing you acknowledge that workspace data is stored in AWS us-east-2 (Ohio, United States).
|
||||
</p>
|
||||
<div id="terms-dialog-body">
|
||||
<p className="mt-3 text-sm text-ink-mid">
|
||||
Before you create an organization, please review our{" "}
|
||||
<a
|
||||
href="/legal/terms"
|
||||
className="text-accent underline underline-offset-2 hover:text-accent-strong focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 rounded-sm"
|
||||
target="_blank"
|
||||
rel="noreferrer"
|
||||
>
|
||||
Terms of Service
|
||||
</a>{" "}
|
||||
and{" "}
|
||||
<a
|
||||
href="/legal/privacy"
|
||||
className="text-accent underline underline-offset-2 hover:text-accent-strong focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 rounded-sm"
|
||||
target="_blank"
|
||||
rel="noreferrer"
|
||||
>
|
||||
Privacy Policy
|
||||
</a>
|
||||
. Click agree to continue.
|
||||
</p>
|
||||
<p className="mt-3 text-xs text-ink-soft">
|
||||
By agreeing you acknowledge that workspace data is stored in AWS us-east-2 (Ohio, United States).
|
||||
</p>
|
||||
</div>
|
||||
{error && <p role="alert" className="mt-3 text-sm text-bad">{error}</p>}
|
||||
<div className="mt-5 flex justify-end gap-2">
|
||||
<button
|
||||
type="button"
|
||||
ref={agreeButtonRef}
|
||||
onClick={accept}
|
||||
disabled={submitting}
|
||||
className="rounded bg-emerald-600 px-4 py-2 text-sm font-medium text-white hover:bg-emerald-500 disabled:opacity-50"
|
||||
// Hover goes DARKER, not lighter — emerald-500 on white
|
||||
// text drops contrast below AA vs emerald-700. Same trap
|
||||
// I fixed in ApprovalBanner + ConfirmDialog.
|
||||
className="rounded bg-emerald-600 hover:bg-emerald-700 px-4 py-2 text-sm font-medium text-white disabled:opacity-50 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-emerald-400/70 focus-visible:ring-offset-2 focus-visible:ring-offset-surface-sunken"
|
||||
>
|
||||
{submitting ? "Saving…" : "I agree"}
|
||||
</button>
|
||||
|
||||
@@ -38,6 +38,18 @@ export function Toaster() {
|
||||
};
|
||||
}, []);
|
||||
|
||||
// Esc dismisses the newest toast — keyboard parity with the × button.
|
||||
// Errors never auto-expire, so without this a keyboard-only user has to
|
||||
// tab through the entire app to reach the dismiss button on a stuck error.
|
||||
useEffect(() => {
|
||||
const onKey = (e: KeyboardEvent) => {
|
||||
if (e.key !== "Escape") return;
|
||||
setToasts((prev) => (prev.length === 0 ? prev : prev.slice(0, -1)));
|
||||
};
|
||||
window.addEventListener("keydown", onKey);
|
||||
return () => window.removeEventListener("keydown", onKey);
|
||||
}, []);
|
||||
|
||||
const toastCls = (type: Toast["type"]) =>
|
||||
`flex items-center gap-2 pl-4 pr-2 py-2.5 rounded-xl shadow-2xl shadow-black/40 text-sm backdrop-blur-md animate-in slide-in-from-bottom duration-200 ${
|
||||
type === "success"
|
||||
@@ -47,6 +59,17 @@ export function Toaster() {
|
||||
: "bg-surface-sunken/90 border border-line/40 text-ink"
|
||||
}`;
|
||||
|
||||
// Success/error toasts are intentionally dark in both themes (high-vis).
|
||||
// Info uses the semantic surface that flips with theme — so the dismiss
|
||||
// button needs a tint that stays visible on a light bg in light mode.
|
||||
const dismissCls = (type: Toast["type"]) => {
|
||||
const base =
|
||||
"ml-1 w-7 h-7 inline-flex items-center justify-center text-base leading-none rounded transition-colors opacity-70 hover:opacity-100 focus-visible:opacity-100 focus:outline-none focus-visible:ring-2 shrink-0";
|
||||
return type === "info"
|
||||
? `${base} hover:bg-ink/10 focus-visible:ring-accent/60`
|
||||
: `${base} hover:bg-white/15 focus-visible:ring-white/70`;
|
||||
};
|
||||
|
||||
const pos =
|
||||
"fixed bottom-16 left-1/2 -translate-x-1/2 z-[80] flex flex-col gap-2 items-center";
|
||||
|
||||
@@ -66,7 +89,7 @@ export function Toaster() {
|
||||
type="button"
|
||||
onClick={() => dismiss(toast.id)}
|
||||
aria-label="Dismiss notification"
|
||||
className="ml-1 p-1 rounded hover:bg-surface-card/50 transition-colors opacity-70 hover:opacity-100 shrink-0"
|
||||
className={dismissCls(toast.type)}
|
||||
>
|
||||
×
|
||||
</button>
|
||||
@@ -94,7 +117,7 @@ export function Toaster() {
|
||||
type="button"
|
||||
onClick={() => dismiss(toast.id)}
|
||||
aria-label="Dismiss notification"
|
||||
className="ml-1 p-1 rounded hover:bg-surface-card/50 transition-colors opacity-70 hover:opacity-100 shrink-0"
|
||||
className={dismissCls(toast.type)}
|
||||
>
|
||||
×
|
||||
</button>
|
||||
|
||||
@@ -154,10 +154,10 @@ export function Toolbar() {
|
||||
{counts.failed > 0 && (
|
||||
<StatusPill color={statusDotClass("failed")} count={counts.failed} label="failed" />
|
||||
)}
|
||||
<span className="text-ink-soft" aria-hidden="true">·</span>
|
||||
<span className="text-[10px] text-ink-soft whitespace-nowrap">
|
||||
<span className="text-ink-mid" aria-hidden="true">·</span>
|
||||
<span className="text-[10px] text-ink-mid whitespace-nowrap">
|
||||
{counts.roots} workspace{counts.roots !== 1 ? "s" : ""}
|
||||
{counts.children > 0 && <span className="text-ink-soft"> + {counts.children} sub</span>}
|
||||
{counts.children > 0 && <span className="text-ink-mid"> + {counts.children} sub</span>}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@@ -172,7 +172,7 @@ export function Toolbar() {
|
||||
type="button"
|
||||
onClick={stopAll}
|
||||
disabled={stopping}
|
||||
className="flex items-center gap-1.5 px-2.5 py-1 bg-red-950/50 hover:bg-red-900/60 border border-red-800/40 rounded-lg transition-colors disabled:opacity-50"
|
||||
className="flex items-center gap-1.5 px-2.5 py-1 bg-bad/10 hover:bg-bad/20 border border-bad/40 rounded-lg transition-colors disabled:opacity-50 focus:outline-none focus-visible:ring-2 focus-visible:ring-bad/40"
|
||||
title={`Stop all running tasks (${counts.activeTasks} active)`}
|
||||
aria-label={stopping ? "Stopping all running tasks" : `Stop all running tasks (${counts.activeTasks} active)`}
|
||||
>
|
||||
@@ -191,7 +191,7 @@ export function Toolbar() {
|
||||
type="button"
|
||||
onClick={() => setRestartConfirmOpen(true)}
|
||||
disabled={restartingAll}
|
||||
className="flex items-center gap-1.5 px-2.5 py-1 bg-amber-950/40 hover:bg-amber-900/50 border border-amber-800/40 rounded-lg transition-colors disabled:opacity-50"
|
||||
className="flex items-center gap-1.5 px-2.5 py-1 bg-warm/10 hover:bg-warm/20 border border-warm/40 rounded-lg transition-colors disabled:opacity-50 focus:outline-none focus-visible:ring-2 focus-visible:ring-warm/40"
|
||||
title={`Restart ${needsRestartNodes.length} workspace${needsRestartNodes.length === 1 ? "" : "s"} that need to pick up config or secret changes`}
|
||||
aria-label={restartingAll ? "Restarting workspaces" : `Restart ${needsRestartNodes.length} workspace${needsRestartNodes.length === 1 ? "" : "s"} pending config or secret changes`}
|
||||
>
|
||||
@@ -216,10 +216,10 @@ export function Toolbar() {
|
||||
aria-pressed={showA2AEdges}
|
||||
aria-label={showA2AEdges ? "Hide A2A edges" : "Show A2A edges"}
|
||||
title={showA2AEdges ? "Hide A2A delegation edges" : "Show A2A delegation edges (last 60 min)"}
|
||||
className={`flex items-center justify-center w-7 h-7 border rounded-lg transition-colors ${
|
||||
className={`flex items-center justify-center w-7 h-7 border rounded-lg transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40 ${
|
||||
showA2AEdges
|
||||
? "bg-blue-950/50 hover:bg-blue-900/50 border-blue-800/40 text-accent"
|
||||
: "bg-surface-card/50 hover:bg-surface-card/50 border-line/40 text-ink-soft hover:text-ink-mid"
|
||||
? "bg-accent/15 hover:bg-accent/25 border-accent/50 text-accent"
|
||||
: "bg-surface-card hover:bg-surface-card/70 border-line text-ink-mid hover:text-ink"
|
||||
}`}
|
||||
>
|
||||
{/* Mesh / network icon */}
|
||||
@@ -255,7 +255,7 @@ export function Toolbar() {
|
||||
}}
|
||||
aria-label="Open audit trail for selected workspace"
|
||||
title="Audit — view ledger for the selected workspace"
|
||||
className="flex items-center justify-center w-7 h-7 bg-surface-card/50 hover:bg-surface-card/50 border border-line/40 rounded-lg transition-colors text-ink-soft hover:text-ink-mid"
|
||||
className="flex items-center justify-center w-7 h-7 bg-surface-card hover:bg-surface-card/70 border border-line rounded-lg transition-colors text-ink-mid hover:text-ink focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40"
|
||||
>
|
||||
{/* Scroll / ledger icon */}
|
||||
<svg
|
||||
@@ -277,7 +277,7 @@ export function Toolbar() {
|
||||
onClick={() => useCanvasStore.getState().setSearchOpen(true)}
|
||||
aria-label="Search workspaces"
|
||||
title="Search (⌘K)"
|
||||
className="flex items-center justify-center w-7 h-7 bg-surface-card/50 hover:bg-surface-card/50 border border-line/40 rounded-lg transition-colors text-ink-soft hover:text-ink-mid"
|
||||
className="flex items-center justify-center w-7 h-7 bg-surface-card hover:bg-surface-card/70 border border-line rounded-lg transition-colors text-ink-mid hover:text-ink focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40"
|
||||
>
|
||||
<svg width="14" height="14" viewBox="0 0 16 16" fill="none" aria-hidden="true">
|
||||
<circle cx="7" cy="7" r="5" stroke="currentColor" strokeWidth="1.5" />
|
||||
@@ -290,7 +290,7 @@ export function Toolbar() {
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setHelpOpen((open) => !open)}
|
||||
className="flex items-center justify-center w-7 h-7 bg-surface-card/50 hover:bg-surface-card/50 border border-line/40 rounded-lg transition-colors text-ink-soft hover:text-ink-mid"
|
||||
className="flex items-center justify-center w-7 h-7 bg-surface-card hover:bg-surface-card/70 border border-line rounded-lg transition-colors text-ink-mid hover:text-ink focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40"
|
||||
aria-expanded={helpOpen}
|
||||
aria-label="Open quick help"
|
||||
title="Help — shortcuts & quick start"
|
||||
@@ -308,7 +308,7 @@ export function Toolbar() {
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setHelpOpen(false)}
|
||||
className="text-[10px] text-ink-soft hover:text-ink-mid transition-colors"
|
||||
className="text-[10px] text-ink-mid hover:text-ink transition-colors focus:outline-none focus-visible:underline"
|
||||
>
|
||||
Close
|
||||
</button>
|
||||
@@ -358,7 +358,7 @@ function WsStatusPill({ status }: { status: "connected" | "connecting" | "discon
|
||||
return (
|
||||
<div className="flex items-center gap-1.5" title="Real-time updates: connected" aria-label="Real-time updates: connected">
|
||||
<div className={`w-1.5 h-1.5 rounded-full ${statusDotClass("online")}`} aria-hidden="true" />
|
||||
<span className="text-[10px] text-ink-soft" aria-hidden="true">Live</span>
|
||||
<span className="text-[10px] text-ink-mid" aria-hidden="true">Live</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -366,14 +366,14 @@ function WsStatusPill({ status }: { status: "connected" | "connecting" | "discon
|
||||
return (
|
||||
<div className="flex items-center gap-1.5" title="Real-time updates: reconnecting…" aria-label="Real-time updates: reconnecting">
|
||||
<div className="w-1.5 h-1.5 rounded-full bg-amber-400 motion-safe:animate-pulse" aria-hidden="true" />
|
||||
<span className="text-[10px] text-ink-soft" aria-hidden="true">Reconnecting</span>
|
||||
<span className="text-[10px] text-warm" aria-hidden="true">Reconnecting</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<div className="flex items-center gap-1.5" title="Real-time updates: disconnected" aria-label="Real-time updates: disconnected">
|
||||
<div className={`w-1.5 h-1.5 rounded-full ${statusDotClass("failed")}`} aria-hidden="true" />
|
||||
<span className="text-[10px] text-ink-soft" aria-hidden="true">Offline</span>
|
||||
<span className="text-[10px] text-bad" aria-hidden="true">Offline</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -384,7 +384,7 @@ function HelpRow({ shortcut, text }: { shortcut: string; text: string }) {
|
||||
<span className="shrink-0 rounded-md border border-line/60 bg-surface/70 px-2 py-0.5 text-[9px] font-medium uppercase tracking-[0.18em] text-ink-mid">
|
||||
{shortcut}
|
||||
</span>
|
||||
<p className="text-[11px] leading-relaxed text-ink-soft">{text}</p>
|
||||
<p className="text-[11px] leading-relaxed text-ink-mid">{text}</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -22,6 +22,24 @@ export function Tooltip({ text, children }: Props) {
|
||||
|
||||
useEffect(() => () => clearTimeout(timerRef.current), []);
|
||||
|
||||
// WCAG 1.4.13 (Content on Hover or Focus) — Dismissible: a mechanism
|
||||
// is available to dismiss the additional content WITHOUT moving
|
||||
// pointer hover or keyboard focus. Esc dismisses while the trigger
|
||||
// stays focused/hovered, so a screen-magnifier user can read what
|
||||
// the tooltip was covering without losing their place.
|
||||
useEffect(() => {
|
||||
if (!show) return;
|
||||
const onKey = (e: KeyboardEvent) => {
|
||||
if (e.key === "Escape") {
|
||||
e.stopPropagation();
|
||||
clearTimeout(timerRef.current);
|
||||
setShow(false);
|
||||
}
|
||||
};
|
||||
window.addEventListener("keydown", onKey, true);
|
||||
return () => window.removeEventListener("keydown", onKey, true);
|
||||
}, [show]);
|
||||
|
||||
const enter = useCallback(() => {
|
||||
timerRef.current = setTimeout(() => {
|
||||
if (triggerRef.current) {
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import { useCallback, useMemo } from "react";
|
||||
import { Handle, NodeResizer, Position, type NodeProps, type Node } from "@xyflow/react";
|
||||
import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
|
||||
import { getConfigurationError, getConfigurationStatus } from "@/store/canvas-topology";
|
||||
import { showToast } from "@/components/Toaster";
|
||||
import { Tooltip } from "@/components/Tooltip";
|
||||
import { STATUS_CONFIG, TIER_CONFIG } from "@/lib/design-tokens";
|
||||
@@ -35,8 +36,28 @@ function EjectIcon(props: React.SVGProps<SVGSVGElement>) {
|
||||
}
|
||||
|
||||
export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>) {
|
||||
const statusCfg = STATUS_CONFIG[data.status] || STATUS_CONFIG.offline;
|
||||
const tierCfg = TIER_CONFIG[data.tier] || { label: `T${data.tier}`, color: "text-ink-soft bg-surface-card" };
|
||||
// Configuration-status overlay (PR #2756 / #467 chain). When the
|
||||
// workspace is reachable but adapter.setup() failed (typically a
|
||||
// missing/rotated LLM credential), the agent_card carries
|
||||
// configuration_status: "not_configured". Surface this as a distinct
|
||||
// tile state so the operator sees a useful error instead of an
|
||||
// ambiguous "online but silent" workspace.
|
||||
//
|
||||
// The override only applies when the underlying status is "online" —
|
||||
// a workspace that's actually offline / failed / provisioning gets
|
||||
// its own treatment. "online + not_configured" is the gap PR #2756
|
||||
// introduced; everything else was already covered.
|
||||
const isMisconfigured =
|
||||
data.status === "online" &&
|
||||
getConfigurationStatus(data.agentCard) === "not_configured";
|
||||
const configurationError = getConfigurationError(data.agentCard);
|
||||
const effectiveStatus = isMisconfigured ? "not_configured" : data.status;
|
||||
const statusCfg = STATUS_CONFIG[effectiveStatus] || STATUS_CONFIG.offline;
|
||||
const tierCfg = TIER_CONFIG[data.tier] || { label: `T${data.tier}`, color: "text-ink-mid bg-surface-card border border-line" };
|
||||
const tooltipExtra = isMisconfigured && configurationError
|
||||
? `Agent not configured: ${configurationError}`
|
||||
: null;
|
||||
void tooltipExtra; // wired in via aria-label below; reserved here for future tooltip surface.
|
||||
// Org-deploy context — four derived flags off one store subscription.
|
||||
// Drives the shimmer while provisioning, the dimmed/non-draggable
|
||||
// treatment on locked descendants, and the Cancel pill on the root.
|
||||
@@ -75,7 +96,12 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
<div
|
||||
role="button"
|
||||
tabIndex={0}
|
||||
aria-label={`${data.name} workspace — ${data.status}`}
|
||||
aria-label={
|
||||
isMisconfigured && configurationError
|
||||
? `${data.name} workspace — agent not configured: ${configurationError}`
|
||||
: `${data.name} workspace — ${data.status}`
|
||||
}
|
||||
title={isMisconfigured && configurationError ? `Agent not configured: ${configurationError}` : undefined}
|
||||
aria-pressed={isSelected}
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
@@ -179,7 +205,7 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
</div>
|
||||
<div className="flex items-center gap-1.5 shrink-0">
|
||||
{hasChildren && (
|
||||
<span className="text-[10px] font-mono text-violet-300 bg-violet-900/40 border border-violet-700/30 px-1.5 py-0.5 rounded-md">
|
||||
<span className="text-[10px] font-mono text-accent bg-accent/15 border border-accent/40 px-1.5 py-0.5 rounded-md">
|
||||
{descendantCount} sub
|
||||
</span>
|
||||
)}
|
||||
@@ -207,13 +233,13 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
<div className="mb-1 flex items-center gap-1">
|
||||
{runtime === "external" ? (
|
||||
<span
|
||||
className="text-[7px] font-mono px-1.5 py-0.5 rounded-md text-violet-200 bg-violet-900/50 border border-violet-500/40"
|
||||
className="text-[7px] font-mono px-1.5 py-0.5 rounded-md text-white bg-violet-600 border border-violet-700"
|
||||
title="Phase 30 remote agent — runs outside this platform's Docker network. Lifecycle managed via heartbeat-based polling, not Docker exec."
|
||||
>
|
||||
★ REMOTE
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-[7px] font-mono px-1.5 py-0.5 rounded-md text-ink-mid bg-surface-card/60 border border-line/30">
|
||||
<span className="text-[7px] font-mono px-1.5 py-0.5 rounded-md text-ink-mid bg-surface-card border border-line">
|
||||
{runtime}
|
||||
</span>
|
||||
)}
|
||||
@@ -237,15 +263,15 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
key={skill}
|
||||
className={`text-[10px] px-1.5 py-0.5 rounded-md border ${
|
||||
isOnline
|
||||
? "text-good/80 bg-emerald-950/30 border-emerald-800/30"
|
||||
: "text-ink-mid bg-surface-card/60 border-line/40"
|
||||
? "text-good bg-good/15 border-good/40"
|
||||
: "text-ink-mid bg-surface-card border-line"
|
||||
}`}
|
||||
>
|
||||
{skill}
|
||||
</span>
|
||||
))}
|
||||
{skills.length > 4 && (
|
||||
<span className="text-[10px] text-ink-soft self-center">
|
||||
<span className="text-[10px] text-ink-mid self-center">
|
||||
+{skills.length - 4}
|
||||
</span>
|
||||
)}
|
||||
@@ -274,21 +300,22 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
e.stopPropagation();
|
||||
useCanvasStore.getState().restartWorkspace(id).catch(() => showToast("Restart failed", "error"));
|
||||
}}
|
||||
className="flex items-center gap-1.5 mt-1 w-full bg-sky-950/30 px-2 py-1 rounded-md border border-sky-800/30 hover:bg-sky-900/40 transition-colors text-left focus-visible:ring-2 focus-visible:ring-accent/70 focus-visible:outline-none"
|
||||
className="flex items-center gap-1.5 mt-1 w-full bg-accent/10 px-2 py-1 rounded-md border border-accent/40 hover:bg-accent/20 transition-colors text-left focus-visible:ring-2 focus-visible:ring-accent/70 focus-visible:outline-none"
|
||||
>
|
||||
<span className="text-[10px]">↻</span>
|
||||
<span className="text-[10px] text-sky-300/80">Restart to apply changes</span>
|
||||
<span className="text-[10px] text-accent">↻</span>
|
||||
<span className="text-[10px] text-accent">Restart to apply changes</span>
|
||||
</button>
|
||||
)}
|
||||
|
||||
{/* Bottom row: status / active tasks */}
|
||||
<div className="flex items-center justify-between mt-0.5">
|
||||
{data.status !== "online" ? (
|
||||
{effectiveStatus !== "online" ? (
|
||||
<div className={`text-[10px] uppercase tracking-widest font-medium ${
|
||||
data.status === "failed" ? "text-bad" :
|
||||
data.status === "degraded" ? "text-warm" :
|
||||
data.status === "provisioning" ? "text-sky-400" :
|
||||
"text-ink-soft"
|
||||
effectiveStatus === "failed" ? "text-bad" :
|
||||
effectiveStatus === "degraded" ? "text-warm" :
|
||||
effectiveStatus === "not_configured" ? "text-warm" :
|
||||
effectiveStatus === "provisioning" ? "text-accent" :
|
||||
"text-ink-mid"
|
||||
}`}>
|
||||
{statusCfg.label}
|
||||
</div>
|
||||
@@ -296,8 +323,8 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
|
||||
{data.activeTasks > 0 && (
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="w-1 h-1 rounded-full bg-amber-400 motion-safe:animate-pulse" />
|
||||
<span className="text-[10px] text-warm/80 tabular-nums">
|
||||
<div className="w-1 h-1 rounded-full bg-warm motion-safe:animate-pulse" />
|
||||
<span className="text-[10px] text-warm tabular-nums">
|
||||
{data.activeTasks} task{data.activeTasks > 1 ? "s" : ""}
|
||||
</span>
|
||||
</div>
|
||||
@@ -307,12 +334,25 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
{/* Degraded error preview */}
|
||||
{data.status === "degraded" && data.lastSampleError && (
|
||||
<div
|
||||
className="text-[10px] text-warm/60 truncate mt-1 bg-amber-950/20 px-1.5 py-0.5 rounded border border-amber-800/20"
|
||||
className="text-[10px] text-warm truncate mt-1 bg-warm/10 px-1.5 py-0.5 rounded border border-warm/40"
|
||||
title={data.lastSampleError}
|
||||
>
|
||||
{data.lastSampleError}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Configuration error preview — same visual as the degraded
|
||||
* error preview but keyed off the agent_card's configuration_status.
|
||||
* Tells the operator which env var is missing so they can fix it
|
||||
* without having to dig into the workspace logs. */}
|
||||
{isMisconfigured && configurationError && (
|
||||
<div
|
||||
className="text-[10px] text-warm truncate mt-1 bg-warm/10 px-1.5 py-0.5 rounded border border-warm/40"
|
||||
title={configurationError}
|
||||
>
|
||||
{configurationError}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<Handle
|
||||
@@ -357,7 +397,7 @@ function TeamMemberChip({
|
||||
}) {
|
||||
const { data } = node;
|
||||
const statusCfg = STATUS_CONFIG[data.status] || STATUS_CONFIG.offline;
|
||||
const tierCfg = TIER_CONFIG[data.tier] || { label: `T${data.tier}`, color: "text-ink-soft bg-surface-card" };
|
||||
const tierCfg = TIER_CONFIG[data.tier] || { label: `T${data.tier}`, color: "text-ink-mid bg-surface-card border border-line" };
|
||||
const isOnline = data.status === "online";
|
||||
const skills = getSkillNames(data.agentCard);
|
||||
|
||||
@@ -408,7 +448,7 @@ function TeamMemberChip({
|
||||
</div>
|
||||
<div className="flex items-center gap-1 shrink-0">
|
||||
{hasSubChildren && (
|
||||
<span className="text-[7px] font-mono text-violet-300 bg-violet-900/40 border border-violet-700/30 px-1 py-0.5 rounded">
|
||||
<span className="text-[7px] font-mono text-accent bg-accent/15 border border-accent/40 px-1 py-0.5 rounded">
|
||||
{descendantCount}
|
||||
</span>
|
||||
)}
|
||||
@@ -423,7 +463,7 @@ function TeamMemberChip({
|
||||
e.stopPropagation();
|
||||
onExtract(node.id);
|
||||
}}
|
||||
className="opacity-0 group-hover/child:opacity-100 text-ink-soft hover:text-sky-400 transition-all focus-visible:ring-2 focus-visible:ring-accent/70 focus-visible:outline-none rounded"
|
||||
className="opacity-0 group-hover/child:opacity-100 text-ink-mid hover:text-accent transition-all focus-visible:ring-2 focus-visible:ring-accent/70 focus-visible:outline-none rounded"
|
||||
>
|
||||
<EjectIcon aria-hidden="true" />
|
||||
</button>
|
||||
@@ -432,7 +472,7 @@ function TeamMemberChip({
|
||||
|
||||
{/* Role */}
|
||||
{data.role && (
|
||||
<div className="text-[10px] text-ink-soft mb-1 leading-tight truncate">{data.role}</div>
|
||||
<div className="text-[10px] text-ink-mid mb-1 leading-tight truncate">{data.role}</div>
|
||||
)}
|
||||
|
||||
{/* Skills */}
|
||||
@@ -443,8 +483,8 @@ function TeamMemberChip({
|
||||
key={skill}
|
||||
className={`text-[10px] px-1 py-0.5 rounded border ${
|
||||
isOnline
|
||||
? "text-good/70 bg-emerald-950/20 border-emerald-800/20"
|
||||
: "text-ink-soft bg-surface-card/40 border-line/30"
|
||||
? "text-good bg-good/15 border-good/40"
|
||||
: "text-ink-mid bg-surface-card border-line"
|
||||
}`}
|
||||
>
|
||||
{skill}
|
||||
@@ -462,8 +502,8 @@ function TeamMemberChip({
|
||||
<span className={`text-[10px] uppercase tracking-widest font-medium ${
|
||||
data.status === "failed" ? "text-bad" :
|
||||
data.status === "degraded" ? "text-warm" :
|
||||
data.status === "provisioning" ? "text-sky-400" :
|
||||
"text-ink-soft"
|
||||
data.status === "provisioning" ? "text-accent" :
|
||||
"text-ink-mid"
|
||||
}`}>
|
||||
{statusCfg.label}
|
||||
</span>
|
||||
|
||||
@@ -296,4 +296,75 @@ describe("A2ATopologyOverlay component", () => {
|
||||
// setA2AEdges should still be called with an empty array
|
||||
expect(mockStoreState.setA2AEdges).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Regression for the 2026-05-04 render-loop incident:
|
||||
// tenant heartbeats / status flips / peer-discovery writes mutated
|
||||
// canvas store .nodes ~5x/sec. Previously visibleIds was useMemo'd on
|
||||
// [nodes] so the array reference recreated on every store mutation,
|
||||
// causing fetchAndUpdate to recreate, the useEffect to re-fire, and
|
||||
// the 60-second polling fan-out to fire on EVERY store update. With
|
||||
// 5 visible workspaces and 5 store updates/sec, the canvas hammered
|
||||
// /workspaces/<id>/activity?type=delegation 25×/sec until edge rate
|
||||
// -limit returned 429 (per browser console captured by user).
|
||||
//
|
||||
// Fix: select a stable string key (sorted CSV of IDs) from Zustand
|
||||
// so the selector's shallow-equal short-circuit prevents re-renders
|
||||
// when the actual ID set hasn't changed.
|
||||
//
|
||||
// This test verifies the fetch fires ONCE on mount + only re-fires
|
||||
// when the visible ID set actually changes, NOT on every nodes[]
|
||||
// reference change.
|
||||
it("does not re-fetch when nodes[] reference changes but visible IDs are the same", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
const { rerender } = render(<A2ATopologyOverlay />);
|
||||
await act(async () => { await Promise.resolve(); await Promise.resolve(); });
|
||||
|
||||
const callsAfterMount = mockGet.mock.calls.length;
|
||||
// Sanity: 2 visible nodes (ws-a, ws-b) → 2 fan-out requests on mount
|
||||
expect(callsAfterMount).toBe(2);
|
||||
|
||||
// Simulate a store mutation that changes the nodes array reference
|
||||
// (e.g. status flip on a node) WITHOUT changing the set of visible
|
||||
// IDs. Pre-fix: this triggered a re-fetch storm. Post-fix: the
|
||||
// sorted-CSV selector returns the same key, Zustand's shallow-equal
|
||||
// short-circuits, useMemo keeps the same visibleIds, fetchAndUpdate
|
||||
// keeps the same identity, useEffect does NOT re-fire.
|
||||
mockStoreState.nodes = [
|
||||
{ id: "ws-a", hidden: false, data: { newStatus: "online" } }, // mutated
|
||||
{ id: "ws-b", hidden: false, data: {} },
|
||||
{ id: "ws-hidden", hidden: true, data: {} },
|
||||
];
|
||||
rerender(<A2ATopologyOverlay />);
|
||||
await act(async () => { await Promise.resolve(); await Promise.resolve(); });
|
||||
|
||||
// No additional fetches should have fired.
|
||||
expect(mockGet.mock.calls.length).toBe(callsAfterMount);
|
||||
});
|
||||
|
||||
it("re-fetches when the visible ID set actually changes", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
const { rerender } = render(<A2ATopologyOverlay />);
|
||||
await act(async () => { await Promise.resolve(); await Promise.resolve(); });
|
||||
|
||||
const callsAfterMount = mockGet.mock.calls.length;
|
||||
expect(callsAfterMount).toBe(2);
|
||||
|
||||
// Add a new visible workspace — the visible-ID-set actually changed.
|
||||
mockStoreState.nodes = [
|
||||
{ id: "ws-a", hidden: false, data: {} },
|
||||
{ id: "ws-b", hidden: false, data: {} },
|
||||
{ id: "ws-c", hidden: false, data: {} }, // NEW
|
||||
{ id: "ws-hidden", hidden: true, data: {} },
|
||||
];
|
||||
rerender(<A2ATopologyOverlay />);
|
||||
await act(async () => { await Promise.resolve(); await Promise.resolve(); });
|
||||
|
||||
// Should have fetched the additional workspace + the existing two
|
||||
// (the effect re-fires once with the new ID set). Total: 2 + 3 = 5.
|
||||
expect(mockGet.mock.calls.length).toBe(callsAfterMount + 3);
|
||||
const allPaths = mockGet.mock.calls.map(([p]) => p as string);
|
||||
expect(allPaths.some((p) => p.includes("ws-c"))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -130,6 +130,26 @@ describe("BatchActionBar", () => {
|
||||
const toolbar = screen.getByRole("toolbar");
|
||||
expect(toolbar.getAttribute("aria-label")).toBe("Batch workspace actions");
|
||||
});
|
||||
|
||||
it("Esc clears the selection — matches the deselect button title", () => {
|
||||
// The deselect button has been promising "Clear selection (Escape)"
|
||||
// since the bar shipped, but no handler was wired. This pins the
|
||||
// contract.
|
||||
mockSelectedNodeIds = new Set(["ws-1", "ws-2"]);
|
||||
render(<BatchActionBar />);
|
||||
fireEvent.keyDown(window, { key: "Escape" });
|
||||
expect(mockClearSelection).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("Esc is a no-op when nothing is selected", () => {
|
||||
mockSelectedNodeIds = new Set<string>();
|
||||
render(<BatchActionBar />);
|
||||
fireEvent.keyDown(window, { key: "Escape" });
|
||||
// The early-return at count===0 prevents the bar from mounting at all,
|
||||
// so the keydown listener never registers. clearSelection must NOT be
|
||||
// called.
|
||||
expect(mockClearSelection).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
|
||||
@@ -0,0 +1,178 @@
|
||||
// @vitest-environment jsdom
|
||||
/**
|
||||
* CommunicationOverlay tests — pin the rate-limit fix shipped 2026-05-04.
|
||||
*
|
||||
* The overlay polls /workspaces/:id/activity?limit=5 for each online
|
||||
* workspace. Pre-fix it (a) polled regardless of visibility and (b)
|
||||
* fanned out to 6 workspaces every 10s. With 8+ workspaces a user
|
||||
* triggered sustained 429s (server-side rate limit is 600 req/min/IP).
|
||||
*
|
||||
* These tests pin:
|
||||
* 1. Fan-out cap of 3 — even with 6 online nodes, only 3 fetches
|
||||
* 2. Visibility gate — when collapsed, no polling
|
||||
*
|
||||
* If a future refactor pushes either dial back up, CI fails before
|
||||
* the regression hits a paying tenant.
|
||||
*/
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { render, cleanup, act, fireEvent } from "@testing-library/react";
|
||||
|
||||
// ── Mocks (hoisted before imports) ────────────────────────────────────────────
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: { get: vi.fn() },
|
||||
}));
|
||||
|
||||
// Six online nodes — enough to verify the cap of 3.
|
||||
const mockStoreState = {
|
||||
selectedNodeId: null as string | null,
|
||||
nodes: [
|
||||
{ id: "ws-1", data: { status: "online", name: "ws-1" } },
|
||||
{ id: "ws-2", data: { status: "online", name: "ws-2" } },
|
||||
{ id: "ws-3", data: { status: "online", name: "ws-3" } },
|
||||
{ id: "ws-4", data: { status: "online", name: "ws-4" } },
|
||||
{ id: "ws-5", data: { status: "online", name: "ws-5" } },
|
||||
{ id: "ws-6", data: { status: "online", name: "ws-6" } },
|
||||
{ id: "ws-offline", data: { status: "offline", name: "off" } },
|
||||
],
|
||||
};
|
||||
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: vi.fn(
|
||||
(selector: (s: typeof mockStoreState) => unknown) =>
|
||||
selector(mockStoreState)
|
||||
),
|
||||
}));
|
||||
|
||||
// design-tokens has named exports — keep the shape minimal.
|
||||
vi.mock("@/lib/design-tokens", () => ({
|
||||
COMM_TYPE_LABELS: {
|
||||
a2a_send: "→",
|
||||
a2a_receive: "←",
|
||||
task_update: "✓",
|
||||
},
|
||||
}));
|
||||
|
||||
// ── Imports (after mocks) ─────────────────────────────────────────────────────
|
||||
|
||||
import { api } from "@/lib/api";
|
||||
import { CommunicationOverlay } from "../CommunicationOverlay";
|
||||
|
||||
const mockGet = vi.mocked(api.get);
|
||||
|
||||
// ── Setup ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
beforeEach(() => {
|
||||
vi.useFakeTimers();
|
||||
mockGet.mockReset();
|
||||
mockGet.mockResolvedValue([]);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
cleanup();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
// ── Tests ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("CommunicationOverlay — fan-out cap", () => {
|
||||
it("polls at most 3 of 6 online workspaces (rate-limit floor)", async () => {
|
||||
await act(async () => {
|
||||
render(<CommunicationOverlay />);
|
||||
});
|
||||
// Mount fires the first poll synchronously (no interval tick yet).
|
||||
// Pre-fix: 6 calls. Post-fix: 3.
|
||||
expect(mockGet).toHaveBeenCalledTimes(3);
|
||||
// Verify the calls are for the FIRST 3 online nodes (slice order).
|
||||
expect(mockGet).toHaveBeenCalledWith("/workspaces/ws-1/activity?limit=5");
|
||||
expect(mockGet).toHaveBeenCalledWith("/workspaces/ws-2/activity?limit=5");
|
||||
expect(mockGet).toHaveBeenCalledWith("/workspaces/ws-3/activity?limit=5");
|
||||
});
|
||||
|
||||
it("never polls offline workspaces", async () => {
|
||||
await act(async () => {
|
||||
render(<CommunicationOverlay />);
|
||||
});
|
||||
expect(mockGet).not.toHaveBeenCalledWith(
|
||||
"/workspaces/ws-offline/activity?limit=5",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("CommunicationOverlay — cadence", () => {
|
||||
it("uses 30s interval cadence (was 10s pre-fix)", async () => {
|
||||
await act(async () => {
|
||||
render(<CommunicationOverlay />);
|
||||
});
|
||||
expect(mockGet).toHaveBeenCalledTimes(3); // initial mount poll
|
||||
|
||||
// Advance 10s — pre-fix this would fire another poll. Post-fix: silent.
|
||||
await act(async () => {
|
||||
vi.advanceTimersByTime(10_000);
|
||||
});
|
||||
expect(mockGet).toHaveBeenCalledTimes(3);
|
||||
|
||||
// Advance to 30s — interval fires.
|
||||
await act(async () => {
|
||||
vi.advanceTimersByTime(20_000);
|
||||
});
|
||||
expect(mockGet).toHaveBeenCalledTimes(6); // +3 from second tick
|
||||
});
|
||||
});
|
||||
|
||||
describe("CommunicationOverlay — visibility gate", () => {
|
||||
// The visibility gate is the dial that drops collapsed-panel polling
|
||||
// to ZERO. The cadence test above can't catch its removal — if a
|
||||
// refactor dropped `if (!visible) return`, the cadence test would
|
||||
// still pass because the effect would still fire every 30s.
|
||||
//
|
||||
// Direct probe: render with comms-returning mock so the panel
|
||||
// actually renders (close button only exists in the expanded panel,
|
||||
// not the collapsed button-state). Click close, advance the clock,
|
||||
// assert no further fetches.
|
||||
it("stops polling after the user collapses the panel", async () => {
|
||||
// Mock returns one a2a_send so comms.length > 0 → panel renders →
|
||||
// close button accessible.
|
||||
mockGet.mockResolvedValue([
|
||||
{
|
||||
id: "act-1",
|
||||
workspace_id: "ws-1",
|
||||
activity_type: "a2a_send",
|
||||
source_id: "ws-1",
|
||||
target_id: "ws-2",
|
||||
summary: "test",
|
||||
status: "completed",
|
||||
duration_ms: 100,
|
||||
created_at: new Date().toISOString(),
|
||||
},
|
||||
]);
|
||||
|
||||
const { getByLabelText } = await act(async () => {
|
||||
return render(<CommunicationOverlay />);
|
||||
});
|
||||
// Drain pending microtasks (resolves the await in fetchComms) so
|
||||
// setComms lands and the panel renders. Don't advance time — that
|
||||
// would fire the next interval tick and pollute the assertion.
|
||||
await act(async () => {
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
});
|
||||
// Initial mount polled 3 workspaces.
|
||||
expect(mockGet).toHaveBeenCalledTimes(3);
|
||||
mockGet.mockClear();
|
||||
|
||||
// Click the close button. Synchronous getByLabelText avoids
|
||||
// findBy's internal setTimeout (deadlocks under useFakeTimers).
|
||||
const closeBtn = getByLabelText("Close communications panel");
|
||||
await act(async () => {
|
||||
fireEvent.click(closeBtn);
|
||||
});
|
||||
|
||||
// Advance well past the 30s cadence — gate should suppress the tick.
|
||||
await act(async () => {
|
||||
vi.advanceTimersByTime(60_000);
|
||||
});
|
||||
expect(mockGet).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -40,7 +40,7 @@ afterEach(() => {
|
||||
describe("CookieConsent", () => {
|
||||
it("renders the banner when no decision is stored", () => {
|
||||
render(<CookieConsent />);
|
||||
expect(screen.getByRole("dialog")).toBeTruthy();
|
||||
expect(screen.getByRole("region")).toBeTruthy();
|
||||
expect(screen.getByRole("button", { name: "Accept all" })).toBeTruthy();
|
||||
expect(screen.getByRole("button", { name: "Necessary only" })).toBeTruthy();
|
||||
});
|
||||
@@ -48,7 +48,7 @@ describe("CookieConsent", () => {
|
||||
it("stores 'accepted' and hides the banner when user clicks Accept all", () => {
|
||||
render(<CookieConsent />);
|
||||
fireEvent.click(screen.getByRole("button", { name: "Accept all" }));
|
||||
expect(screen.queryByRole("dialog")).toBeNull();
|
||||
expect(screen.queryByRole("region")).toBeNull();
|
||||
|
||||
const raw = window.localStorage.getItem(STORAGE_KEY);
|
||||
expect(raw).not.toBeNull();
|
||||
@@ -61,7 +61,7 @@ describe("CookieConsent", () => {
|
||||
it("stores 'rejected' and hides the banner when user clicks Necessary only", () => {
|
||||
render(<CookieConsent />);
|
||||
fireEvent.click(screen.getByRole("button", { name: "Necessary only" }));
|
||||
expect(screen.queryByRole("dialog")).toBeNull();
|
||||
expect(screen.queryByRole("region")).toBeNull();
|
||||
|
||||
const parsed = JSON.parse(window.localStorage.getItem(STORAGE_KEY)!);
|
||||
expect(parsed.decision).toBe("rejected");
|
||||
@@ -73,7 +73,7 @@ describe("CookieConsent", () => {
|
||||
JSON.stringify({ decision: "accepted", decidedAt: new Date().toISOString(), version: 1 }),
|
||||
);
|
||||
render(<CookieConsent />);
|
||||
expect(screen.queryByRole("dialog")).toBeNull();
|
||||
expect(screen.queryByRole("region")).toBeNull();
|
||||
});
|
||||
|
||||
it("re-prompts when the stored decision is on an older policy version", () => {
|
||||
@@ -82,13 +82,13 @@ describe("CookieConsent", () => {
|
||||
JSON.stringify({ decision: "accepted", decidedAt: new Date().toISOString(), version: 0 }),
|
||||
);
|
||||
render(<CookieConsent />);
|
||||
expect(screen.getByRole("dialog")).toBeTruthy();
|
||||
expect(screen.getByRole("region")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("re-prompts when localStorage contains invalid JSON", () => {
|
||||
window.localStorage.setItem(STORAGE_KEY, "{not json");
|
||||
render(<CookieConsent />);
|
||||
expect(screen.getByRole("dialog")).toBeTruthy();
|
||||
expect(screen.getByRole("region")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("exposes a privacy-policy link with target='_blank'", () => {
|
||||
@@ -99,11 +99,19 @@ describe("CookieConsent", () => {
|
||||
expect(link.getAttribute("rel")).toContain("noreferrer");
|
||||
});
|
||||
|
||||
it("uses role=dialog with aria-labelledby and aria-describedby for screen readers", () => {
|
||||
it("uses role=region (NOT dialog) with aria-labelledby/describedby — banner is informational, not modal", () => {
|
||||
// Regression guard: an earlier version claimed role="dialog"
|
||||
// aria-modal="true" without a focus trap. That falsely told screen
|
||||
// readers the rest of the page was inert, trapping AT users in a
|
||||
// banner they couldn't escape. role="region" lets assistive tech
|
||||
// navigate around it normally; the banner stays informational.
|
||||
render(<CookieConsent />);
|
||||
const dialog = screen.getByRole("dialog");
|
||||
expect(dialog.getAttribute("aria-labelledby")).toBe("cookie-consent-title");
|
||||
expect(dialog.getAttribute("aria-describedby")).toBe("cookie-consent-body");
|
||||
const banner = screen.getByRole("region");
|
||||
expect(banner.getAttribute("aria-labelledby")).toBe("cookie-consent-title");
|
||||
expect(banner.getAttribute("aria-describedby")).toBe("cookie-consent-body");
|
||||
// No aria-modal claim — explicit guard against regression.
|
||||
expect(banner.getAttribute("aria-modal")).toBeNull();
|
||||
expect(screen.queryByRole("dialog")).toBeNull();
|
||||
});
|
||||
|
||||
it("does NOT render on local dev (non-SaaS hostname)", () => {
|
||||
@@ -116,7 +124,7 @@ describe("CookieConsent", () => {
|
||||
value: { ...window.location, hostname: "localhost" },
|
||||
});
|
||||
render(<CookieConsent />);
|
||||
expect(screen.queryByRole("dialog")).toBeNull();
|
||||
expect(screen.queryByRole("region")).toBeNull();
|
||||
});
|
||||
|
||||
it("does NOT render on a LAN hostname (192.168.*, *.local)", () => {
|
||||
@@ -125,7 +133,7 @@ describe("CookieConsent", () => {
|
||||
value: { ...window.location, hostname: "192.168.1.74" },
|
||||
});
|
||||
render(<CookieConsent />);
|
||||
expect(screen.queryByRole("dialog")).toBeNull();
|
||||
expect(screen.queryByRole("region")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -155,18 +155,31 @@ describe("SearchDialog — keyboard accessibility", () => {
|
||||
expect(selectNode).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("typing a new query resets focusedIndex to -1", () => {
|
||||
it("typing a query that matches auto-highlights the first result", () => {
|
||||
// Replaces the older "resets to -1" assertion. New behavior: a query
|
||||
// with at least one match pins the highlight to row 0 so Enter picks
|
||||
// a result instead of being a no-op. Empty-query case is covered by
|
||||
// "Enter at focusedIndex=-1 does not select anything" above.
|
||||
render(<SearchDialog />);
|
||||
const input = screen.getByRole("combobox");
|
||||
fireEvent.change(input, { target: { value: "Alpha" } });
|
||||
const options = screen.getAllByRole("option");
|
||||
expect(options[0].getAttribute("aria-selected")).toBe("true");
|
||||
// Enter on the auto-highlighted match should select it without
|
||||
// needing a manual ArrowDown first.
|
||||
fireEvent.keyDown(input, { key: "Enter" });
|
||||
expect(selectNode).toHaveBeenCalledWith("ws-1");
|
||||
});
|
||||
|
||||
it("typing a query that matches NOTHING resets focusedIndex to -1", () => {
|
||||
render(<SearchDialog />);
|
||||
const input = screen.getByRole("combobox");
|
||||
fireEvent.keyDown(input, { key: "ArrowDown" }); // focusedIndex → 0
|
||||
// Verify selection before reset
|
||||
expect(screen.getAllByRole("option")[0].getAttribute("aria-selected")).toBe("true");
|
||||
// Change query — triggers the useEffect that resets focusedIndex
|
||||
fireEvent.change(input, { target: { value: "Alpha" } });
|
||||
// After reset all options must have aria-selected="false"
|
||||
screen.getAllByRole("option").forEach((opt) => {
|
||||
expect(opt.getAttribute("aria-selected")).toBe("false");
|
||||
});
|
||||
fireEvent.change(input, { target: { value: "zzz-no-match" } });
|
||||
// No options remain, so nothing to assert on aria-selected directly —
|
||||
// the empty-state message takes over. But Enter should be a no-op.
|
||||
fireEvent.keyDown(input, { key: "Enter" });
|
||||
expect(selectNode).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("aria-activedescendant matches the focused option's id", () => {
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
// @vitest-environment jsdom
|
||||
import { describe, it, expect, afterEach, beforeEach, vi } from "vitest";
|
||||
import { render, screen, fireEvent, cleanup, act } from "@testing-library/react";
|
||||
import { Toaster, showToast } from "../Toaster";
|
||||
|
||||
beforeEach(() => {
|
||||
vi.useFakeTimers();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
cleanup();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
describe("Toaster keyboard a11y", () => {
|
||||
it("Esc dismisses the most recent toast", () => {
|
||||
render(<Toaster />);
|
||||
act(() => {
|
||||
showToast("first", "info");
|
||||
showToast("second", "info");
|
||||
});
|
||||
expect(screen.getByText("first")).toBeTruthy();
|
||||
expect(screen.getByText("second")).toBeTruthy();
|
||||
|
||||
act(() => {
|
||||
fireEvent.keyDown(window, { key: "Escape" });
|
||||
});
|
||||
expect(screen.queryByText("second")).toBeNull();
|
||||
expect(screen.getByText("first")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("Esc dismisses persistent error toasts", () => {
|
||||
render(<Toaster />);
|
||||
act(() => {
|
||||
showToast("boom", "error");
|
||||
});
|
||||
expect(screen.getByText("boom")).toBeTruthy();
|
||||
|
||||
act(() => {
|
||||
fireEvent.keyDown(window, { key: "Escape" });
|
||||
});
|
||||
expect(screen.queryByText("boom")).toBeNull();
|
||||
});
|
||||
|
||||
it("Esc with no toasts is a no-op", () => {
|
||||
render(<Toaster />);
|
||||
act(() => {
|
||||
fireEvent.keyDown(window, { key: "Escape" });
|
||||
});
|
||||
// no throw, nothing rendered
|
||||
expect(screen.queryAllByRole("button", { name: "Dismiss notification" })).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("dismiss button has accessible label and is keyboard reachable", () => {
|
||||
render(<Toaster />);
|
||||
act(() => {
|
||||
showToast("hi", "info");
|
||||
});
|
||||
const btn = screen.getByRole("button", { name: "Dismiss notification" });
|
||||
expect(btn).toBeTruthy();
|
||||
// Native <button> defaults to keyboard-focusable; explicit assertion guards
|
||||
// against a future regression where someone adds tabindex=-1.
|
||||
expect(btn.getAttribute("tabindex")).not.toBe("-1");
|
||||
});
|
||||
|
||||
it("dismiss button click removes that specific toast", () => {
|
||||
render(<Toaster />);
|
||||
act(() => {
|
||||
showToast("a", "info");
|
||||
showToast("b", "info");
|
||||
});
|
||||
const buttons = screen.getAllByRole("button", { name: "Dismiss notification" });
|
||||
expect(buttons).toHaveLength(2);
|
||||
|
||||
// Click the first dismiss → "a" goes away, "b" stays
|
||||
act(() => {
|
||||
fireEvent.click(buttons[0]);
|
||||
});
|
||||
expect(screen.queryByText("a")).toBeNull();
|
||||
expect(screen.getByText("b")).toBeTruthy();
|
||||
});
|
||||
});
|
||||
@@ -182,7 +182,7 @@ export function OrgTokensTab() {
|
||||
|
||||
{/* Token list */}
|
||||
{loading ? (
|
||||
<div className="flex items-center justify-center gap-2 py-6 text-ink-soft text-xs">
|
||||
<div role="status" aria-live="polite" className="flex items-center justify-center gap-2 py-6 text-ink-soft text-xs">
|
||||
<Spinner /> Loading keys...
|
||||
</div>
|
||||
) : tokens.length === 0 ? (
|
||||
|
||||
@@ -129,7 +129,7 @@ export function TokensTab({ workspaceId }: TokensTabProps) {
|
||||
|
||||
{/* Token list */}
|
||||
{loading ? (
|
||||
<div className="flex items-center justify-center gap-2 py-6 text-ink-soft text-xs">
|
||||
<div role="status" aria-live="polite" className="flex items-center justify-center gap-2 py-6 text-ink-soft text-xs">
|
||||
<Spinner /> Loading tokens...
|
||||
</div>
|
||||
) : tokens.length === 0 ? (
|
||||
|
||||
@@ -110,8 +110,11 @@ export function ActivityTab({ workspaceId }: Props) {
|
||||
Full Trace
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={loadActivities}
|
||||
className="px-2 py-1 bg-surface-card hover:bg-surface-card text-[11px] rounded text-ink-mid"
|
||||
// hover:bg-surface-card on top of itself was a no-op;
|
||||
// lift to surface-elevated + focus-visible ring.
|
||||
className="px-2 py-1 bg-surface-card hover:bg-surface-elevated hover:text-ink text-[11px] rounded text-ink-mid transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/50"
|
||||
>
|
||||
Refresh
|
||||
</button>
|
||||
|
||||
@@ -365,8 +365,12 @@ export function ChannelsTab({ workspaceId }: Props) {
|
||||
<p className="text-[10px] text-bad">{formError}</p>
|
||||
)}
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleCreate}
|
||||
className="w-full text-xs py-1.5 rounded bg-accent-strong hover:bg-accent text-white transition"
|
||||
// Was bg-accent-strong hover:bg-accent — accent is the
|
||||
// LIGHTER variant; same AA contrast trap fixed in
|
||||
// ScheduleTab/MemoryTab/OnboardingWizard.
|
||||
className="w-full text-xs py-1.5 rounded bg-accent hover:bg-accent-strong text-white transition focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-2 focus-visible:ring-offset-surface"
|
||||
>
|
||||
Connect Channel
|
||||
</button>
|
||||
|
||||
@@ -177,10 +177,10 @@ export function ChatTab({ workspaceId, data }: Props) {
|
||||
aria-controls="chat-panel-my-chat"
|
||||
tabIndex={subTab === "my-chat" ? 0 : -1}
|
||||
onClick={() => setSubTab("my-chat")}
|
||||
className={`px-3 py-1.5 text-[10px] font-medium transition-colors ${
|
||||
className={`px-3 py-1.5 text-[10px] font-medium transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40 ${
|
||||
subTab === "my-chat"
|
||||
? "text-ink border-b-2 border-accent"
|
||||
: "text-ink-soft hover:text-ink-mid"
|
||||
: "text-ink-mid hover:text-ink"
|
||||
}`}
|
||||
>
|
||||
My Chat
|
||||
@@ -192,10 +192,10 @@ export function ChatTab({ workspaceId, data }: Props) {
|
||||
aria-controls="chat-panel-agent-comms"
|
||||
tabIndex={subTab === "agent-comms" ? 0 : -1}
|
||||
onClick={() => setSubTab("agent-comms")}
|
||||
className={`px-3 py-1.5 text-[10px] font-medium transition-colors ${
|
||||
className={`px-3 py-1.5 text-[10px] font-medium transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40 ${
|
||||
subTab === "agent-comms"
|
||||
? "text-ink border-b-2 border-accent"
|
||||
: "text-ink-soft hover:text-ink-mid"
|
||||
: "text-ink-mid hover:text-ink"
|
||||
}`}
|
||||
>
|
||||
Agent Comms
|
||||
@@ -773,14 +773,39 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
<div
|
||||
className={`max-w-[85%] rounded-lg px-3 py-2 text-xs ${
|
||||
msg.role === "user"
|
||||
? "bg-accent-strong/30 text-blue-100 border border-accent/20"
|
||||
// Solid blue-600 in both modes — `bg-accent` themes
|
||||
// lighter in dark, dropping white-text contrast to
|
||||
// ~3:1 (fails AA). blue-600 keeps ~5:1 against white
|
||||
// on both warm-paper and dark-slate panels.
|
||||
? "bg-blue-600 text-white border border-blue-700 dark:bg-blue-500 dark:border-blue-400 shadow-sm"
|
||||
: msg.role === "system"
|
||||
? "bg-red-900/30 text-red-200 border border-red-800/30"
|
||||
: "bg-surface-card/80 text-ink border border-line/30"
|
||||
// Bump the system bubble's opacity in dark — /10
|
||||
// overlay was nearly invisible against the dark
|
||||
// panel bg.
|
||||
? "bg-bad/10 text-bad border border-bad/40 dark:bg-bad/25 dark:text-bad dark:border-bad/60"
|
||||
// Agent bubble in dark: surface-card (#1a1d23) is
|
||||
// only ~7% lighter than the panel bg-surface
|
||||
// (#0e1014). Bump to zinc-700 for a clearly
|
||||
// elevated bubble; light mode keeps the warm
|
||||
// surface-card tint.
|
||||
: "bg-surface-card text-ink border border-line dark:bg-zinc-700 dark:text-zinc-100 dark:border-zinc-600 shadow-sm"
|
||||
}`}
|
||||
>
|
||||
{msg.content && (
|
||||
<div className="prose prose-sm prose-invert max-w-none [&>p]:mb-1 [&>p:last-child]:mb-0">
|
||||
<div
|
||||
className={`prose prose-sm max-w-none [&>p]:mb-1 [&>p:last-child]:mb-0 ${
|
||||
msg.role === "user"
|
||||
? "prose-invert"
|
||||
// Agent bubbles in dark mode: invert prose AND brighten
|
||||
// the body/heading/bold/code tokens. prose-invert's
|
||||
// default `--tw-prose-invert-body: zinc-300` lands at
|
||||
// ~5.3:1 against bg-zinc-700 — passes AA but reads
|
||||
// washed out next to the user bubble's crisp
|
||||
// white-on-blue (~10:1). Push body to zinc-100 so the
|
||||
// agent text matches that crispness.
|
||||
: "dark:prose-invert dark:[--tw-prose-invert-body:theme(colors.zinc.100)] dark:[--tw-prose-invert-headings:theme(colors.white)] dark:[--tw-prose-invert-bold:theme(colors.white)] dark:[--tw-prose-invert-code:theme(colors.zinc.100)]"
|
||||
}`}
|
||||
>
|
||||
<ReactMarkdown remarkPlugins={[remarkGfm]}>{msg.content}</ReactMarkdown>
|
||||
</div>
|
||||
)}
|
||||
@@ -796,7 +821,7 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
<div className="text-[9px] text-ink-soft mt-1">
|
||||
<div className={`text-[9px] mt-1 ${msg.role === "user" ? "text-white/70" : "text-ink-mid"}`}>
|
||||
{new Date(msg.timestamp).toLocaleTimeString()}
|
||||
</div>
|
||||
</div>
|
||||
@@ -896,7 +921,7 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
placeholder={agentReachable ? "Send a message... (Shift+Enter for new line, paste images to attach)" : `Agent is ${data.status}`}
|
||||
disabled={!agentReachable || sending}
|
||||
rows={1}
|
||||
className="flex-1 bg-surface-card border border-line rounded-lg px-3 py-2 text-xs text-ink placeholder-zinc-500 focus:outline-none focus:border-accent resize-none disabled:opacity-50"
|
||||
className="flex-1 bg-surface-card border border-line rounded-lg px-3 py-2 text-xs text-ink placeholder-ink-soft dark:bg-zinc-800 dark:border-zinc-600 dark:placeholder-zinc-500 focus:outline-none focus:border-accent focus-visible:ring-2 focus-visible:ring-accent/40 resize-none disabled:opacity-50"
|
||||
/>
|
||||
<button
|
||||
onClick={sendMessage}
|
||||
|
||||
@@ -65,11 +65,11 @@ function AgentCardSection({ workspaceId }: { workspaceId: string }) {
|
||||
{error && <div className="px-2 py-1 bg-red-900/30 border border-red-800 rounded text-[10px] text-bad">{error}</div>}
|
||||
<div className="flex gap-2">
|
||||
<button type="button" onClick={handleSave} disabled={saving}
|
||||
className="px-2 py-1 bg-accent-strong hover:bg-accent text-[10px] rounded text-white disabled:opacity-50">
|
||||
className="px-2 py-1 bg-accent hover:bg-accent-strong text-[10px] rounded text-white disabled:opacity-50 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-1 focus-visible:ring-offset-surface">
|
||||
{saving ? "Saving..." : "Save"}
|
||||
</button>
|
||||
<button type="button" onClick={() => setEditing(false)}
|
||||
className="px-2 py-1 bg-surface-card hover:bg-surface-card text-[10px] rounded text-ink-mid">Cancel</button>
|
||||
className="px-2 py-1 bg-surface-card hover:bg-surface-elevated hover:text-ink text-[10px] rounded text-ink-mid transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40 focus-visible:ring-offset-1 focus-visible:ring-offset-surface">Cancel</button>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
@@ -655,7 +655,8 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
>
|
||||
<option value={1}>T1 — Sandboxed</option>
|
||||
<option value={2}>T2 — Standard</option>
|
||||
<option value={3}>T3 — Full Access</option>
|
||||
<option value={3}>T3 — Privileged</option>
|
||||
<option value={4}>T4 — Full Access</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
@@ -889,7 +890,6 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
<TagList label="Skills" values={config.skills || []} onChange={(v) => update("skills", v)} placeholder="e.g. code-review" />
|
||||
<TagList label="Tools" values={config.tools || []} onChange={(v) => update("tools", v)} placeholder="e.g. web_search, filesystem" />
|
||||
<TagList label="Prompt Files" values={config.prompt_files || []} onChange={(v) => update("prompt_files", v)} placeholder="e.g. system-prompt.md" />
|
||||
<TagList label="Shared Context" values={config.shared_context || []} onChange={(v) => update("shared_context", v)} placeholder="e.g. architecture.md" />
|
||||
</Section>
|
||||
|
||||
<Section title="A2A Protocol" defaultOpen={false}>
|
||||
@@ -955,7 +955,8 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
type="button"
|
||||
onClick={() => handleSave(true)}
|
||||
disabled={!isDirty || saving}
|
||||
className="px-3 py-1.5 bg-accent-strong hover:bg-accent text-xs rounded text-white disabled:opacity-30 transition-colors"
|
||||
// Same accent-LIGHTER fix shipped on every other tab.
|
||||
className="px-3 py-1.5 bg-accent hover:bg-accent-strong text-xs rounded text-white disabled:opacity-30 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-1 focus-visible:ring-offset-surface"
|
||||
>
|
||||
{saving ? "Restarting..." : "Save & Restart"}
|
||||
</button>
|
||||
|
||||
@@ -166,7 +166,10 @@ export function DetailsTab({ workspaceId, data }: Props) {
|
||||
type="button"
|
||||
onClick={handleSave}
|
||||
disabled={saving}
|
||||
className="px-3 py-1 bg-accent-strong hover:bg-accent text-xs rounded text-white disabled:opacity-50"
|
||||
// Was bg-accent-strong hover:bg-accent — accent is the
|
||||
// LIGHTER variant; flipped + focus-visible ring (same
|
||||
// trap fix shipped on every other tab).
|
||||
className="px-3 py-1 bg-accent hover:bg-accent-strong text-xs rounded text-white disabled:opacity-50 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-1 focus-visible:ring-offset-surface"
|
||||
>
|
||||
{saving ? "Saving..." : "Save"}
|
||||
</button>
|
||||
@@ -322,7 +325,10 @@ export function DetailsTab({ workspaceId, data }: Props) {
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleDelete}
|
||||
className="px-3 py-1 bg-red-600 hover:bg-red-500 text-xs rounded text-white"
|
||||
// hover:bg-red-500 LIGHTER on white text drops AA;
|
||||
// flipped to bg-red-700 + focus-visible danger ring,
|
||||
// matching the ConfirmDialog/DeleteCascade pattern.
|
||||
className="px-3 py-1 bg-red-600 hover:bg-red-700 text-xs rounded text-white transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-red-500/60 focus-visible:ring-offset-1 focus-visible:ring-offset-surface"
|
||||
>
|
||||
Confirm Delete
|
||||
</button>
|
||||
@@ -334,7 +340,9 @@ export function DetailsTab({ workspaceId, data }: Props) {
|
||||
// Return focus to the trigger so keyboard users aren't stranded
|
||||
deleteButtonRef.current?.focus();
|
||||
}}
|
||||
className="px-3 py-1 bg-surface-card hover:bg-surface-card text-xs rounded text-ink-mid"
|
||||
// Was hover:bg-surface-card on top of itself (no-op);
|
||||
// lift to surface-elevated.
|
||||
className="px-3 py-1 bg-surface-card hover:bg-surface-elevated hover:text-ink text-xs rounded text-ink-mid transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40 focus-visible:ring-offset-1 focus-visible:ring-offset-surface"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
|
||||
@@ -15,14 +15,20 @@ interface EventEntry {
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
// Use semantic warm-paper tokens so colors flip with theme. Earlier
|
||||
// the table referenced text-yellow-400 / text-purple-400 (Tailwind
|
||||
// raw colors, no theme variant), which read fine in dark mode but
|
||||
// washed out in the warm-paper light theme. text-warm covers the
|
||||
// "degraded" amber tone in both modes; AGENT_CARD_UPDATED is informational
|
||||
// metadata, so reuse text-accent for theme-consistency.
|
||||
const EVENT_COLORS: Record<string, string> = {
|
||||
WORKSPACE_ONLINE: "text-good",
|
||||
WORKSPACE_OFFLINE: "text-ink-mid",
|
||||
WORKSPACE_DEGRADED: "text-yellow-400",
|
||||
WORKSPACE_DEGRADED: "text-warm",
|
||||
WORKSPACE_PROVISIONING: "text-accent",
|
||||
WORKSPACE_REMOVED: "text-bad",
|
||||
WORKSPACE_PROVISION_FAILED: "text-bad",
|
||||
AGENT_CARD_UPDATED: "text-purple-400",
|
||||
AGENT_CARD_UPDATED: "text-accent",
|
||||
};
|
||||
|
||||
export function EventsTab({ workspaceId }: Props) {
|
||||
@@ -64,8 +70,12 @@ export function EventsTab({ workspaceId }: Props) {
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<span className="text-xs text-ink-mid">{events.length} events</span>
|
||||
<button
|
||||
type="button"
|
||||
onClick={loadEvents}
|
||||
className="px-2 py-1 bg-surface-card hover:bg-surface-card text-[10px] rounded text-ink-mid"
|
||||
// Was hover:bg-surface-card on top of bg-surface-card — silent
|
||||
// no-op hover. Lift to surface-elevated, matching the Cancel
|
||||
// pattern from ConfirmDialog.
|
||||
className="px-2 py-1 bg-surface-card hover:bg-surface-elevated hover:text-ink text-[10px] rounded text-ink-mid transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/50"
|
||||
>
|
||||
Refresh
|
||||
</button>
|
||||
@@ -81,39 +91,51 @@ export function EventsTab({ workspaceId }: Props) {
|
||||
<p className="text-xs text-ink-soft text-center py-4">No events yet</p>
|
||||
) : (
|
||||
<div className="space-y-1">
|
||||
{events.map((event) => (
|
||||
<div key={event.id} className="bg-surface-card rounded border border-line">
|
||||
<button
|
||||
onClick={() => setExpanded(expanded === event.id ? null : event.id)}
|
||||
className="w-full flex items-center gap-2 px-3 py-2 text-left"
|
||||
>
|
||||
<span
|
||||
className={`text-xs font-mono ${
|
||||
EVENT_COLORS[event.event_type] || "text-ink-mid"
|
||||
}`}
|
||||
{events.map((event) => {
|
||||
const isOpen = expanded === event.id;
|
||||
const panelId = `events-payload-${event.id}`;
|
||||
return (
|
||||
<div key={event.id} className="bg-surface-card rounded border border-line">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setExpanded(isOpen ? null : event.id)}
|
||||
// aria-expanded + aria-controls so screen readers
|
||||
// announce the open/closed state and link the row to
|
||||
// its payload panel. Without these, AT users hear
|
||||
// a generic "button" with no indication that it
|
||||
// toggles or what it controls.
|
||||
aria-expanded={isOpen}
|
||||
aria-controls={panelId}
|
||||
className="w-full flex items-center gap-2 px-3 py-2 text-left rounded-t hover:bg-surface-elevated/40 focus:outline-none focus-visible:ring-2 focus-visible:ring-inset focus-visible:ring-accent/50 transition-colors"
|
||||
>
|
||||
{event.event_type}
|
||||
</span>
|
||||
<span className="text-[9px] text-ink-soft ml-auto">
|
||||
{formatTime(event.created_at)}
|
||||
</span>
|
||||
<span className="text-[10px] text-ink-soft">
|
||||
{expanded === event.id ? "▼" : "▶"}
|
||||
</span>
|
||||
</button>
|
||||
<span
|
||||
className={`text-xs font-mono ${
|
||||
EVENT_COLORS[event.event_type] || "text-ink-mid"
|
||||
}`}
|
||||
>
|
||||
{event.event_type}
|
||||
</span>
|
||||
<span className="text-[9px] text-ink-soft ml-auto">
|
||||
{formatTime(event.created_at)}
|
||||
</span>
|
||||
<span aria-hidden="true" className="text-[10px] text-ink-soft">
|
||||
{isOpen ? "▼" : "▶"}
|
||||
</span>
|
||||
</button>
|
||||
|
||||
{expanded === event.id && (
|
||||
<div className="px-3 pb-2">
|
||||
<pre className="text-[10px] text-ink-mid bg-surface-sunken rounded p-2 overflow-x-auto max-h-40">
|
||||
{JSON.stringify(event.payload, null, 2)}
|
||||
</pre>
|
||||
<div className="mt-1 text-[9px] text-ink-soft font-mono">
|
||||
ID: {event.id}
|
||||
{isOpen && (
|
||||
<div id={panelId} className="px-3 pb-2">
|
||||
<pre className="text-[10px] text-ink-mid bg-surface-sunken rounded p-2 overflow-x-auto max-h-40">
|
||||
{JSON.stringify(event.payload, null, 2)}
|
||||
</pre>
|
||||
<div className="mt-1 text-[9px] text-ink-soft font-mono">
|
||||
ID: {event.id}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -162,25 +162,29 @@ export function FilesTab({ workspaceId }: Props) {
|
||||
/>
|
||||
|
||||
{showDeleteAll && (
|
||||
<div className="mx-3 mt-2 px-3 py-2 bg-red-950/30 border border-red-800/40 rounded space-y-1.5">
|
||||
<p className="text-xs text-bad">Delete all {files.filter((f) => !f.dir).length} files? This cannot be undone.</p>
|
||||
// role=alertdialog so SR users hear this destructive prompt
|
||||
// immediately. Delete-All hovers DARKER (bg-red-700) — same AA
|
||||
// contrast trap that bit ConfirmDialog/ApprovalBanner. Cancel
|
||||
// lifts to surface-elevated instead of the prior no-op hover.
|
||||
<div role="alertdialog" aria-labelledby="files-delete-all-msg" className="mx-3 mt-2 px-3 py-2 bg-red-950/30 border border-red-800/40 rounded space-y-1.5">
|
||||
<p id="files-delete-all-msg" className="text-xs text-bad">Delete all {files.filter((f) => !f.dir).length} files? This cannot be undone.</p>
|
||||
<div className="flex gap-2">
|
||||
<button type="button" onClick={() => { handleDeleteAll(); setShowDeleteAll(false); }} className="px-2 py-0.5 bg-red-600 hover:bg-red-500 text-[10px] rounded text-white">Delete All</button>
|
||||
<button type="button" onClick={() => setShowDeleteAll(false)} className="px-2 py-0.5 bg-surface-card hover:bg-surface-card text-[10px] rounded text-ink-mid">Cancel</button>
|
||||
<button type="button" onClick={() => { handleDeleteAll(); setShowDeleteAll(false); }} className="px-2 py-0.5 bg-red-600 hover:bg-red-700 text-[10px] rounded text-white transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-red-500/60 focus-visible:ring-offset-1 focus-visible:ring-offset-surface">Delete All</button>
|
||||
<button type="button" onClick={() => setShowDeleteAll(false)} className="px-2 py-0.5 bg-surface-card hover:bg-surface-elevated hover:text-ink text-[10px] rounded text-ink-mid transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40 focus-visible:ring-offset-1 focus-visible:ring-offset-surface">Cancel</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="mx-3 mt-2 px-3 py-1.5 bg-red-900/30 border border-red-800 rounded text-xs text-bad">{error}</div>
|
||||
<div role="alert" className="mx-3 mt-2 px-3 py-1.5 bg-red-900/30 border border-red-800 rounded text-xs text-bad">{error}</div>
|
||||
)}
|
||||
|
||||
{confirmDelete && (
|
||||
<div className="mx-3 mt-2 px-3 py-2 bg-amber-950/30 border border-amber-800/40 rounded space-y-1.5">
|
||||
<p className="text-xs text-warm">Delete <span className="font-mono">{confirmDelete}</span>{files.find((f) => f.path === confirmDelete && f.dir) ? " and all its contents" : ""}?</p>
|
||||
<div role="alertdialog" aria-labelledby="files-delete-one-msg" className="mx-3 mt-2 px-3 py-2 bg-amber-950/30 border border-amber-800/40 rounded space-y-1.5">
|
||||
<p id="files-delete-one-msg" className="text-xs text-warm">Delete <span className="font-mono">{confirmDelete}</span>{files.find((f) => f.path === confirmDelete && f.dir) ? " and all its contents" : ""}?</p>
|
||||
<div className="flex gap-2">
|
||||
<button type="button" onClick={confirmDeleteFile} className="px-2 py-0.5 bg-red-600 hover:bg-red-500 text-[10px] rounded text-white">Delete</button>
|
||||
<button type="button" onClick={() => setConfirmDelete(null)} className="px-2 py-0.5 bg-surface-card hover:bg-surface-card text-[10px] rounded text-ink-mid">Cancel</button>
|
||||
<button type="button" onClick={confirmDeleteFile} className="px-2 py-0.5 bg-red-600 hover:bg-red-700 text-[10px] rounded text-white transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-red-500/60 focus-visible:ring-offset-1 focus-visible:ring-offset-surface">Delete</button>
|
||||
<button type="button" onClick={() => setConfirmDelete(null)} className="px-2 py-0.5 bg-surface-card hover:bg-surface-elevated hover:text-ink text-[10px] rounded text-ink-mid transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40 focus-visible:ring-offset-1 focus-visible:ring-offset-surface">Cancel</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -10,6 +10,7 @@ interface Props {
|
||||
interface MemoryEntry {
|
||||
key: string;
|
||||
value: unknown;
|
||||
version?: number;
|
||||
expires_at: string | null;
|
||||
updated_at: string;
|
||||
}
|
||||
@@ -28,6 +29,10 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
const [newValue, setNewValue] = useState("");
|
||||
const [newTTL, setNewTTL] = useState("");
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [editingKey, setEditingKey] = useState<string | null>(null);
|
||||
const [editValue, setEditValue] = useState("");
|
||||
const [editTTL, setEditTTL] = useState("");
|
||||
const [editError, setEditError] = useState<string | null>(null);
|
||||
|
||||
const awarenessUrl = useMemo(() => {
|
||||
try {
|
||||
@@ -109,6 +114,69 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
}
|
||||
};
|
||||
|
||||
const beginEdit = (entry: MemoryEntry) => {
|
||||
setEditError(null);
|
||||
setEditingKey(entry.key);
|
||||
// Stringify objects/arrays as pretty JSON; render plain strings raw so the
|
||||
// editor doesn't surprise users with surrounding quotes.
|
||||
setEditValue(
|
||||
typeof entry.value === "string"
|
||||
? entry.value
|
||||
: JSON.stringify(entry.value, null, 2),
|
||||
);
|
||||
if (entry.expires_at) {
|
||||
const remainingMs = new Date(entry.expires_at).getTime() - Date.now();
|
||||
const ttl = Math.max(0, Math.floor(remainingMs / 1000));
|
||||
setEditTTL(ttl > 0 ? String(ttl) : "");
|
||||
} else {
|
||||
setEditTTL("");
|
||||
}
|
||||
};
|
||||
|
||||
const cancelEdit = () => {
|
||||
setEditingKey(null);
|
||||
setEditValue("");
|
||||
setEditTTL("");
|
||||
setEditError(null);
|
||||
};
|
||||
|
||||
const handleEditSave = async (entry: MemoryEntry) => {
|
||||
setEditError(null);
|
||||
|
||||
let parsedValue: unknown;
|
||||
try {
|
||||
parsedValue = JSON.parse(editValue);
|
||||
} catch {
|
||||
parsedValue = editValue;
|
||||
}
|
||||
|
||||
// if_match_version closes the silent-overwrite hole when two writers
|
||||
// race. The handler returns 409 with the current version on mismatch
|
||||
// — surface that as a retry hint and reload to pick up the new state.
|
||||
const body: Record<string, unknown> = { key: entry.key, value: parsedValue };
|
||||
if (typeof entry.version === "number") {
|
||||
body.if_match_version = entry.version;
|
||||
}
|
||||
if (editTTL) {
|
||||
const ttl = parseInt(editTTL);
|
||||
if (!Number.isNaN(ttl) && ttl > 0) body.ttl_seconds = ttl;
|
||||
}
|
||||
|
||||
try {
|
||||
await api.post(`/workspaces/${workspaceId}/memory`, body);
|
||||
cancelEdit();
|
||||
loadMemory();
|
||||
} catch (e) {
|
||||
const message = e instanceof Error ? e.message : "Failed to save";
|
||||
if (message.includes("409") || /if_match_version mismatch/i.test(message)) {
|
||||
setEditError("This entry changed since you opened it. Reloading.");
|
||||
loadMemory();
|
||||
} else {
|
||||
setEditError(message);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const openAwareness = () => {
|
||||
window.open(awarenessUrl, "_blank", "noopener,noreferrer");
|
||||
};
|
||||
@@ -137,14 +205,14 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setShowAwareness((prev) => !prev)}
|
||||
className="shrink-0 px-2 py-1 bg-surface-card hover:bg-surface-card text-[10px] rounded text-ink"
|
||||
className="shrink-0 px-2 py-1 bg-surface-card hover:bg-surface-elevated text-[10px] rounded text-ink"
|
||||
>
|
||||
{showAwareness ? "Collapse" : "Expand"}
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={openAwareness}
|
||||
className="shrink-0 px-2 py-1 bg-surface-card hover:bg-surface-card text-[10px] rounded text-ink"
|
||||
className="shrink-0 px-2 py-1 bg-surface-card hover:bg-surface-elevated text-[10px] rounded text-ink"
|
||||
>
|
||||
Open
|
||||
</button>
|
||||
@@ -177,7 +245,7 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setShowAwareness(true)}
|
||||
className="shrink-0 px-2 py-1 bg-accent-strong hover:bg-accent text-[10px] rounded text-white"
|
||||
className="shrink-0 px-2 py-1 bg-accent hover:bg-accent-strong text-[10px] rounded text-white"
|
||||
>
|
||||
Expand
|
||||
</button>
|
||||
@@ -212,21 +280,21 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setShowAdvanced((prev) => !prev)}
|
||||
className="px-2 py-1 bg-surface-card hover:bg-surface-card text-[10px] rounded text-ink-mid"
|
||||
className="px-2 py-1 bg-surface-card hover:bg-surface-elevated text-[10px] rounded text-ink-mid"
|
||||
>
|
||||
{showAdvanced ? "Hide Advanced" : "Advanced"}
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={loadMemory}
|
||||
className="px-2 py-1 bg-surface-card hover:bg-surface-card text-[10px] rounded text-ink-mid"
|
||||
className="px-2 py-1 bg-surface-card hover:bg-surface-elevated text-[10px] rounded text-ink-mid"
|
||||
>
|
||||
Refresh
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => { setShowAdd(!showAdd); if (!showAdd) setShowAdvanced(true); }}
|
||||
className="px-2 py-1 bg-accent-strong hover:bg-accent text-[10px] rounded text-white"
|
||||
className="px-2 py-1 bg-accent hover:bg-accent-strong text-[10px] rounded text-white"
|
||||
>
|
||||
+ Add
|
||||
</button>
|
||||
@@ -262,7 +330,7 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleAdd}
|
||||
className="px-3 py-1 bg-accent-strong hover:bg-accent text-xs rounded text-white"
|
||||
className="px-3 py-1 bg-accent hover:bg-accent-strong text-xs rounded text-white"
|
||||
>
|
||||
Save
|
||||
</button>
|
||||
@@ -272,7 +340,7 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
setShowAdd(false);
|
||||
setError(null);
|
||||
}}
|
||||
className="px-3 py-1 bg-surface-card hover:bg-surface-card text-xs rounded text-ink-mid"
|
||||
className="px-3 py-1 bg-surface-card hover:bg-surface-elevated text-xs rounded text-ink-mid"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
@@ -308,20 +376,71 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
|
||||
{expanded === entry.key && (
|
||||
<div className="px-3 pb-2 space-y-2">
|
||||
<pre className="text-[10px] text-ink-mid bg-surface-sunken rounded p-2 overflow-x-auto max-h-40">
|
||||
{JSON.stringify(entry.value, null, 2)}
|
||||
</pre>
|
||||
{editingKey === entry.key ? (
|
||||
<div className="space-y-2">
|
||||
<textarea
|
||||
value={editValue}
|
||||
onChange={(e) => setEditValue(e.target.value)}
|
||||
rows={4}
|
||||
aria-label={`Edit value for ${entry.key}`}
|
||||
className="w-full bg-surface-sunken border border-line rounded px-2 py-1 text-xs font-mono text-ink focus:outline-none focus:border-accent resize-none"
|
||||
/>
|
||||
<input
|
||||
value={editTTL}
|
||||
onChange={(e) => setEditTTL(e.target.value)}
|
||||
placeholder="TTL in seconds (blank = no expiry)"
|
||||
aria-label={`Edit TTL for ${entry.key}`}
|
||||
className="w-full bg-surface-sunken border border-line rounded px-2 py-1 text-xs text-ink focus:outline-none focus:border-accent"
|
||||
/>
|
||||
{editError && (
|
||||
<div role="alert" className="text-[10px] text-bad">
|
||||
{editError}
|
||||
</div>
|
||||
)}
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleEditSave(entry)}
|
||||
className="px-3 py-1 bg-accent hover:bg-accent-strong text-xs rounded text-white"
|
||||
>
|
||||
Save
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={cancelEdit}
|
||||
className="px-3 py-1 bg-surface-card hover:bg-surface-elevated text-xs rounded text-ink-mid"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<pre className="text-[10px] text-ink-mid bg-surface-sunken rounded p-2 overflow-x-auto max-h-40">
|
||||
{JSON.stringify(entry.value, null, 2)}
|
||||
</pre>
|
||||
)}
|
||||
<div className="flex items-center justify-between">
|
||||
<span className="text-[9px] text-ink-soft">
|
||||
Updated: {new Date(entry.updated_at).toLocaleString()}
|
||||
</span>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleDelete(entry.key)}
|
||||
className="text-[10px] text-bad hover:text-bad"
|
||||
>
|
||||
Delete
|
||||
</button>
|
||||
<div className="flex items-center gap-2">
|
||||
{editingKey !== entry.key && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => beginEdit(entry)}
|
||||
className="text-[10px] text-ink-mid hover:bg-surface-elevated rounded px-1 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60"
|
||||
>
|
||||
Edit
|
||||
</button>
|
||||
)}
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleDelete(entry.key)}
|
||||
className="text-[10px] text-bad hover:bg-red-950/40 rounded px-1 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-red-500/60"
|
||||
>
|
||||
Delete
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
@@ -340,7 +459,7 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setShowAdvanced(true)}
|
||||
className="shrink-0 px-2 py-1 bg-accent-strong hover:bg-accent text-[10px] rounded text-white"
|
||||
className="shrink-0 px-2 py-1 bg-accent hover:bg-accent-strong text-[10px] rounded text-white"
|
||||
>
|
||||
Show
|
||||
</button>
|
||||
|
||||
@@ -269,15 +269,23 @@ export function ScheduleTab({ workspaceId }: Props) {
|
||||
{error && <div className="text-[10px] text-bad">{error}</div>}
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleSubmit}
|
||||
disabled={!formCron || !formPrompt}
|
||||
className="text-[11px] px-3 py-1 bg-accent-strong text-white rounded hover:bg-accent disabled:opacity-40 transition-colors"
|
||||
// Was bg-accent-strong hover:bg-accent — accent is the
|
||||
// LIGHTER variant, so this hovered lighter on white text
|
||||
// and dropped contrast below AA. Same trap fixed in
|
||||
// OnboardingWizard, ConfirmDialog, ApprovalBanner.
|
||||
className="text-[11px] px-3 py-1 bg-accent text-white rounded hover:bg-accent-strong disabled:opacity-40 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-1 focus-visible:ring-offset-surface"
|
||||
>
|
||||
{editId ? "Update" : "Create"}
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={resetForm}
|
||||
className="text-[11px] px-3 py-1 bg-surface-card text-ink-mid rounded hover:bg-surface-card transition-colors"
|
||||
// Was hover:bg-surface-card on top of bg-surface-card —
|
||||
// silent no-op hover. Lift to surface-elevated.
|
||||
className="text-[11px] px-3 py-1 bg-surface-card text-ink-mid rounded hover:bg-surface-elevated hover:text-ink transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/40 focus-visible:ring-offset-1 focus-visible:ring-offset-surface"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
|
||||
@@ -403,7 +403,7 @@ export function SkillsTab({ workspaceId, data }: Props) {
|
||||
}}
|
||||
placeholder="e.g. github://owner/repo#v1.0"
|
||||
spellCheck={false}
|
||||
className="flex-1 rounded border border-line bg-surface px-2 py-1 text-[10px] text-ink placeholder:text-ink-soft focus:border-violet-600 focus:outline-none"
|
||||
className="flex-1 rounded border border-line bg-surface px-2 py-1 text-[10px] text-ink placeholder:text-ink-soft focus:outline-none focus:border-violet-600 focus-visible:ring-2 focus-visible:ring-violet-600/50"
|
||||
/>
|
||||
<button
|
||||
onClick={handleInstallCustom}
|
||||
|
||||
@@ -123,15 +123,18 @@ export function TerminalTab({ workspaceId }: Props) {
|
||||
|
||||
return (
|
||||
<div className="flex flex-col h-full">
|
||||
{/* Status bar — role="status" so connection state changes are announced politely */}
|
||||
{/* Status bar — role="status" so connection state changes are announced politely.
|
||||
Terminal body stays dark unconditionally (Canvas v4 design rule), but the
|
||||
chrome wrapping it now uses semantic status colors so the dot/text stay
|
||||
readable in both themes. */}
|
||||
<div role="status" aria-live="polite" className="flex items-center justify-between px-3 py-1.5 border-b border-zinc-700 bg-zinc-800/50">
|
||||
<div className="flex items-center gap-2">
|
||||
<div className={`w-2 h-2 rounded-full ${
|
||||
status === "connected" ? "bg-green-500" :
|
||||
status === "connecting" ? "bg-yellow-500 motion-safe:animate-pulse" :
|
||||
status === "error" ? "bg-red-500" : "bg-zinc-500"
|
||||
status === "connected" ? "bg-good" :
|
||||
status === "connecting" ? "bg-warm motion-safe:animate-pulse" :
|
||||
status === "error" ? "bg-bad" : "bg-ink-soft"
|
||||
}`} />
|
||||
<span className="text-[10px] text-zinc-400">
|
||||
<span className="text-[10px] text-zinc-300">
|
||||
{status === "connected" ? "Shell active" :
|
||||
status === "connecting" ? "Connecting..." :
|
||||
status === "error" ? "Connection failed" : "Disconnected"}
|
||||
@@ -139,8 +142,13 @@ export function TerminalTab({ workspaceId }: Props) {
|
||||
</div>
|
||||
{(status === "disconnected" || status === "error") && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={reconnect}
|
||||
className="text-[10px] text-blue-400 hover:text-blue-300"
|
||||
// Accent over hardcoded blue. text-accent + hover-strong stays
|
||||
// readable on the dark terminal chrome and matches the rest
|
||||
// of the canvas semantic palette. Focus-visible ring added so
|
||||
// keyboard users see where focus lands on a recovery button.
|
||||
className="text-[10px] text-accent hover:text-accent-strong rounded-sm px-1 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60"
|
||||
>
|
||||
Reconnect
|
||||
</button>
|
||||
@@ -149,7 +157,7 @@ export function TerminalTab({ workspaceId }: Props) {
|
||||
|
||||
{/* Error message — role="alert" announces immediately via assertive live region */}
|
||||
{errorMsg && (
|
||||
<div role="alert" className="mx-3 mt-2 px-3 py-1.5 bg-red-900/30 border border-red-800 rounded text-xs text-red-400">
|
||||
<div role="alert" className="mx-3 mt-2 px-3 py-1.5 bg-red-900/30 border border-red-800 rounded text-xs text-bad">
|
||||
{errorMsg}
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -55,7 +55,13 @@ export function TracesTab({ workspaceId }: Props) {
|
||||
<div className="p-4 space-y-2">
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<span className="text-xs text-ink-mid">{traces.length} traces</span>
|
||||
<button type="button" onClick={loadTraces} className="text-[10px] text-ink-soft hover:text-ink-mid">
|
||||
<button
|
||||
type="button"
|
||||
onClick={loadTraces}
|
||||
// Added focus-visible ring; previous version was hover-only,
|
||||
// invisible to keyboard users.
|
||||
className="text-[10px] text-ink-soft hover:text-ink-mid rounded-sm px-1 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/50"
|
||||
>
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
@@ -79,66 +85,79 @@ export function TracesTab({ workspaceId }: Props) {
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-1">
|
||||
{traces.map((trace) => (
|
||||
<div key={trace.id} className="bg-surface-card/40 border border-line/40 rounded-lg overflow-hidden">
|
||||
<button
|
||||
onClick={() => setExpanded(expanded === trace.id ? null : trace.id)}
|
||||
className="w-full px-3 py-2 flex items-center gap-2 text-left hover:bg-surface-card/60 transition-colors"
|
||||
>
|
||||
<div className={`w-1.5 h-1.5 rounded-full shrink-0 ${
|
||||
trace.status === "ERROR" ? "bg-red-400" : "bg-emerald-400"
|
||||
}`} />
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="text-[11px] text-ink truncate">{trace.name || "trace"}</div>
|
||||
<div className="text-[9px] text-ink-soft">{formatTime(trace.timestamp)}</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 shrink-0">
|
||||
{trace.latency != null && (
|
||||
<span className="text-[9px] text-ink-soft tabular-nums">
|
||||
{trace.latency > 1000 ? `${(trace.latency / 1000).toFixed(1)}s` : `${trace.latency}ms`}
|
||||
</span>
|
||||
)}
|
||||
{trace.usage?.total != null && (
|
||||
<span className="text-[9px] text-ink-soft tabular-nums">
|
||||
{trace.usage.total} tok
|
||||
</span>
|
||||
)}
|
||||
<span className="text-[9px] text-ink-soft">
|
||||
{expanded === trace.id ? "▼" : "▶"}
|
||||
</span>
|
||||
</div>
|
||||
</button>
|
||||
|
||||
{expanded === trace.id && (
|
||||
<div className="px-3 pb-2 space-y-2 border-t border-line/30">
|
||||
{trace.input && (
|
||||
<div>
|
||||
<div className="text-[9px] text-ink-soft uppercase tracking-wider mt-2 mb-1">Input</div>
|
||||
<pre className="text-[9px] text-ink-mid bg-surface-sunken rounded p-2 overflow-x-auto max-h-32">
|
||||
{String(typeof trace.input === "string" ? trace.input : JSON.stringify(trace.input, null, 2))}
|
||||
</pre>
|
||||
</div>
|
||||
)}
|
||||
{trace.output && (
|
||||
<div>
|
||||
<div className="text-[9px] text-ink-soft uppercase tracking-wider mb-1">Output</div>
|
||||
<pre className="text-[9px] text-ink-mid bg-surface-sunken rounded p-2 overflow-x-auto max-h-32">
|
||||
{String(typeof trace.output === "string" ? trace.output : JSON.stringify(trace.output, null, 2))}
|
||||
</pre>
|
||||
</div>
|
||||
)}
|
||||
{trace.totalCost != null && (
|
||||
<div className="text-[9px] text-ink-soft">
|
||||
Cost: ${trace.totalCost.toFixed(6)}
|
||||
</div>
|
||||
)}
|
||||
<div className="text-[8px] text-ink-soft font-mono select-all">
|
||||
{trace.id}
|
||||
{traces.map((trace) => {
|
||||
const isOpen = expanded === trace.id;
|
||||
const panelId = `trace-detail-${trace.id}`;
|
||||
return (
|
||||
<div key={trace.id} className="bg-surface-card/40 border border-line/40 rounded-lg overflow-hidden">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setExpanded(isOpen ? null : trace.id)}
|
||||
// aria-expanded + aria-controls so SR announces the
|
||||
// open/closed state and links the row to its detail
|
||||
// panel. Same pattern shipped on EventsTab.
|
||||
aria-expanded={isOpen}
|
||||
aria-controls={panelId}
|
||||
className="w-full px-3 py-2 flex items-center gap-2 text-left hover:bg-surface-card/60 focus:outline-none focus-visible:ring-2 focus-visible:ring-inset focus-visible:ring-accent/50 transition-colors"
|
||||
>
|
||||
{/* Status dot uses semantic bad/good tokens — was hardcoded
|
||||
bg-red-400 / bg-emerald-400 which doesn't pin to the
|
||||
canvas-wide ramp. */}
|
||||
<div className={`w-1.5 h-1.5 rounded-full shrink-0 ${
|
||||
trace.status === "ERROR" ? "bg-bad" : "bg-good"
|
||||
}`} />
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="text-[11px] text-ink truncate">{trace.name || "trace"}</div>
|
||||
<div className="text-[9px] text-ink-soft">{formatTime(trace.timestamp)}</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
<div className="flex items-center gap-2 shrink-0">
|
||||
{trace.latency != null && (
|
||||
<span className="text-[9px] text-ink-soft tabular-nums">
|
||||
{trace.latency > 1000 ? `${(trace.latency / 1000).toFixed(1)}s` : `${trace.latency}ms`}
|
||||
</span>
|
||||
)}
|
||||
{trace.usage?.total != null && (
|
||||
<span className="text-[9px] text-ink-soft tabular-nums">
|
||||
{trace.usage.total} tok
|
||||
</span>
|
||||
)}
|
||||
<span aria-hidden="true" className="text-[9px] text-ink-soft">
|
||||
{isOpen ? "▼" : "▶"}
|
||||
</span>
|
||||
</div>
|
||||
</button>
|
||||
|
||||
{isOpen && (
|
||||
<div id={panelId} className="px-3 pb-2 space-y-2 border-t border-line/30">
|
||||
{trace.input && (
|
||||
<div>
|
||||
<div className="text-[9px] text-ink-soft uppercase tracking-wider mt-2 mb-1">Input</div>
|
||||
<pre className="text-[9px] text-ink-mid bg-surface-sunken rounded p-2 overflow-x-auto max-h-32">
|
||||
{String(typeof trace.input === "string" ? trace.input : JSON.stringify(trace.input, null, 2))}
|
||||
</pre>
|
||||
</div>
|
||||
)}
|
||||
{trace.output && (
|
||||
<div>
|
||||
<div className="text-[9px] text-ink-soft uppercase tracking-wider mb-1">Output</div>
|
||||
<pre className="text-[9px] text-ink-mid bg-surface-sunken rounded p-2 overflow-x-auto max-h-32">
|
||||
{String(typeof trace.output === "string" ? trace.output : JSON.stringify(trace.output, null, 2))}
|
||||
</pre>
|
||||
</div>
|
||||
)}
|
||||
{trace.totalCost != null && (
|
||||
<div className="text-[9px] text-ink-soft">
|
||||
Cost: ${trace.totalCost.toFixed(6)}
|
||||
</div>
|
||||
)}
|
||||
<div className="text-[8px] text-ink-soft font-mono select-all">
|
||||
{trace.id}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -0,0 +1,220 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Pins the Edit affordance added to MemoryTab. Until this PR the Memory tab
|
||||
// was Add+Delete only; an entry that needed correction had to be deleted and
|
||||
// re-added — losing the version-counter and any in-flight optimistic-locking
|
||||
// invariants other writers depend on.
|
||||
//
|
||||
// Each test pins one branch of the new flow. If any fails, the bug is back.
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
const apiGet = vi.fn();
|
||||
const apiPost = vi.fn();
|
||||
const apiDel = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
post: (path: string, body: unknown) => apiPost(path, body),
|
||||
del: (path: string) => apiDel(path),
|
||||
patch: vi.fn(),
|
||||
put: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
import { MemoryTab } from "../MemoryTab";
|
||||
|
||||
const sampleEntries = [
|
||||
{
|
||||
key: "team_brief",
|
||||
value: { goal: "ship v2" },
|
||||
version: 3,
|
||||
expires_at: null,
|
||||
updated_at: "2026-05-04T10:00:00Z",
|
||||
},
|
||||
{
|
||||
key: "plain_note",
|
||||
value: "raw text note",
|
||||
version: 1,
|
||||
expires_at: "2099-01-01T00:00:00Z",
|
||||
updated_at: "2026-05-04T10:01:00Z",
|
||||
},
|
||||
];
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiPost.mockReset();
|
||||
apiDel.mockReset();
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === "/workspaces/ws-test/memory") {
|
||||
return Promise.resolve(sampleEntries);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
});
|
||||
|
||||
async function renderAndExpand(key: string) {
|
||||
render(<MemoryTab workspaceId="ws-test" />);
|
||||
await waitFor(() => expect(apiGet).toHaveBeenCalled());
|
||||
// Reveal the Advanced section that hosts the entry list.
|
||||
const showAdvanced = await screen.findByRole("button", { name: "Show" });
|
||||
fireEvent.click(showAdvanced);
|
||||
// Expand the row.
|
||||
const row = await screen.findByRole("button", { name: new RegExp(key) });
|
||||
fireEvent.click(row);
|
||||
}
|
||||
|
||||
describe("MemoryTab Edit affordance", () => {
|
||||
it("Edit button appears once a row is expanded", async () => {
|
||||
await renderAndExpand("team_brief");
|
||||
expect(screen.getAllByRole("button", { name: "Edit" }).length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("clicking Edit on a JSON-valued entry pre-fills the textarea with pretty JSON", async () => {
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = (await screen.findByLabelText(
|
||||
"Edit value for team_brief",
|
||||
)) as HTMLTextAreaElement;
|
||||
expect(textarea.value).toBe('{\n "goal": "ship v2"\n}');
|
||||
});
|
||||
|
||||
it("clicking Edit on a string-valued entry pre-fills raw (no surrounding quotes)", async () => {
|
||||
await renderAndExpand("plain_note");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = (await screen.findByLabelText(
|
||||
"Edit value for plain_note",
|
||||
)) as HTMLTextAreaElement;
|
||||
expect(textarea.value).toBe("raw text note");
|
||||
});
|
||||
|
||||
it("Save POSTs with if_match_version + parsed value, then reloads", async () => {
|
||||
apiPost.mockResolvedValue({ status: "ok", key: "team_brief", version: 4 });
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for team_brief");
|
||||
fireEvent.change(textarea, { target: { value: '{"goal":"ship v3"}' } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
expect(apiPost).toHaveBeenCalledWith("/workspaces/ws-test/memory", {
|
||||
key: "team_brief",
|
||||
value: { goal: "ship v3" },
|
||||
if_match_version: 3,
|
||||
});
|
||||
// Reload after save → second GET.
|
||||
await waitFor(() => expect(apiGet).toHaveBeenCalledTimes(2));
|
||||
});
|
||||
|
||||
it("Save with non-JSON text falls back to plain string", async () => {
|
||||
apiPost.mockResolvedValue({ status: "ok" });
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for team_brief");
|
||||
fireEvent.change(textarea, { target: { value: "free-form note" } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
expect(apiPost.mock.calls[0][1].value).toBe("free-form note");
|
||||
});
|
||||
|
||||
it("TTL field is forwarded as ttl_seconds when set", async () => {
|
||||
apiPost.mockResolvedValue({ status: "ok" });
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const ttlInput = await screen.findByLabelText("Edit TTL for team_brief");
|
||||
fireEvent.change(ttlInput, { target: { value: "3600" } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
expect(apiPost.mock.calls[0][1].ttl_seconds).toBe(3600);
|
||||
});
|
||||
|
||||
it("blank/zero/non-numeric TTL is omitted from the payload", async () => {
|
||||
apiPost.mockResolvedValue({ status: "ok" });
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const ttlInput = await screen.findByLabelText("Edit TTL for team_brief");
|
||||
// Junk + zero both must drop out — payload must not contain ttl_seconds.
|
||||
fireEvent.change(ttlInput, { target: { value: "abc" } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
expect(apiPost.mock.calls[0][1]).not.toHaveProperty("ttl_seconds");
|
||||
});
|
||||
|
||||
it("Cancel discards edits and restores the rendered value", async () => {
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for team_brief");
|
||||
fireEvent.change(textarea, { target: { value: '{"goal":"discarded"}' } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Cancel" }));
|
||||
|
||||
expect(apiPost).not.toHaveBeenCalled();
|
||||
// Editor is gone; the JSON pre-block is back.
|
||||
expect(screen.queryByLabelText("Edit value for team_brief")).toBeNull();
|
||||
expect(screen.getAllByText(/"goal": "ship v2"/i).length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("409 response surfaces a retry hint and reloads", async () => {
|
||||
apiPost.mockRejectedValueOnce(
|
||||
new Error("HTTP 409: if_match_version mismatch"),
|
||||
);
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for team_brief");
|
||||
fireEvent.change(textarea, { target: { value: '{"goal":"ship v3"}' } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
const alert = await screen.findByRole("alert");
|
||||
expect(alert.textContent).toMatch(/changed since you opened it/i);
|
||||
// Initial mount load + post-conflict reload.
|
||||
await waitFor(() => expect(apiGet).toHaveBeenCalledTimes(2));
|
||||
});
|
||||
|
||||
it("non-409 error surfaces the message and does not reload", async () => {
|
||||
apiPost.mockRejectedValueOnce(new Error("boom"));
|
||||
await renderAndExpand("team_brief");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
const alert = await screen.findByRole("alert");
|
||||
expect(alert.textContent).toBe("boom");
|
||||
// Only the initial mount load — no retry reload.
|
||||
expect(apiGet).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("entry with no version omits if_match_version (back-compat with older shape)", async () => {
|
||||
// Pre-version-counter shape: drop the `version` field from the row.
|
||||
apiGet.mockReset();
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === "/workspaces/ws-test/memory") {
|
||||
return Promise.resolve([
|
||||
{
|
||||
key: "old_entry",
|
||||
value: "legacy",
|
||||
expires_at: null,
|
||||
updated_at: "2026-05-04T10:00:00Z",
|
||||
},
|
||||
]);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked: ${path}`));
|
||||
});
|
||||
apiPost.mockResolvedValue({ status: "ok" });
|
||||
|
||||
await renderAndExpand("old_entry");
|
||||
fireEvent.click(screen.getAllByRole("button", { name: "Edit" })[0]);
|
||||
const textarea = await screen.findByLabelText("Edit value for old_entry");
|
||||
fireEvent.change(textarea, { target: { value: "updated" } });
|
||||
fireEvent.click(screen.getByRole("button", { name: "Save" }));
|
||||
|
||||
await waitFor(() => expect(apiPost).toHaveBeenCalledTimes(1));
|
||||
const payload = apiPost.mock.calls[0][1];
|
||||
expect(payload).not.toHaveProperty("if_match_version");
|
||||
expect(payload.value).toBe("updated");
|
||||
});
|
||||
});
|
||||
@@ -472,6 +472,7 @@ function GroupedCommsView({
|
||||
<NormalMessage key={msg.id} msg={msg} />
|
||||
),
|
||||
)}
|
||||
<WaitingBubbles visible={visible} />
|
||||
<div ref={bottomRef} />
|
||||
</div>
|
||||
</div>
|
||||
@@ -560,6 +561,83 @@ function PeerTabButton({
|
||||
);
|
||||
}
|
||||
|
||||
/** WaitingBubbles renders one "typing" bubble per peer that has an
|
||||
* in-flight outbound delegation — i.e., the most recent outbound
|
||||
* message to that peer is still pending or queued and no later inbound
|
||||
* reply has arrived. Mirrors the bouncing-dots indicator in ChatTab so
|
||||
* the operator sees the same visual cue regardless of whether they're
|
||||
* watching their own chat or a peer thread.
|
||||
*
|
||||
* Why "per peer" not "one global": when multiple delegations are in
|
||||
* flight to different peers (common during fan-out), one shared
|
||||
* spinner under-reports — the user can't tell whether ALL peers are
|
||||
* still working or only the visible ones. Per-peer matches Slack-style
|
||||
* typing indicators and keeps the signal honest.
|
||||
*
|
||||
* Why we look at the LAST per-peer message: once a peer replies (an
|
||||
* "in" bubble lands), the corresponding "out" bubble is no longer the
|
||||
* tail — even if status hasn't been mutated to "completed", the inbound
|
||||
* reply means the wait is over. Looking at the tail collapses both
|
||||
* cases into one rule.
|
||||
*/
|
||||
function WaitingBubbles({ visible }: { visible: CommMessage[] }) {
|
||||
// Group by peer, keep only the chronologically-last message per peer,
|
||||
// emit a bubble when that tail is an outbound pending/queued.
|
||||
const tailByPeer = new Map<string, CommMessage>();
|
||||
for (const m of visible) {
|
||||
const prev = tailByPeer.get(m.peerId);
|
||||
if (!prev || m.timestamp > prev.timestamp) tailByPeer.set(m.peerId, m);
|
||||
}
|
||||
const waitingPeers = Array.from(tailByPeer.values()).filter(
|
||||
(m) => m.flow === "out" && (m.status === "pending" || m.status === "queued"),
|
||||
);
|
||||
if (waitingPeers.length === 0) return null;
|
||||
return (
|
||||
<>
|
||||
{waitingPeers.map((m) => (
|
||||
<div
|
||||
key={`waiting-${m.peerId}`}
|
||||
className="flex justify-end"
|
||||
// Outbound thread → right-justified to match the "out" bubble
|
||||
// alignment, so the dots feel like they belong to the message
|
||||
// they're replying to.
|
||||
>
|
||||
<div
|
||||
className="max-w-[85%] rounded-lg px-3 py-2 text-xs bg-cyan-900/30 border border-cyan-700/20"
|
||||
// role+aria-label so screen readers announce the wait;
|
||||
// matches the announcing pattern used by Toaster.
|
||||
role="status"
|
||||
aria-label={`Waiting for reply from ${m.peerName}`}
|
||||
>
|
||||
<div className="text-[9px] text-ink-soft mb-1">→ To {m.peerName}</div>
|
||||
<span className="flex items-center gap-2 text-ink-mid">
|
||||
<span className="flex gap-0.5" aria-hidden="true">
|
||||
<span
|
||||
className="w-1.5 h-1.5 bg-cyan-300/70 rounded-full motion-safe:animate-bounce"
|
||||
style={{ animationDelay: "0ms" }}
|
||||
/>
|
||||
<span
|
||||
className="w-1.5 h-1.5 bg-cyan-300/70 rounded-full motion-safe:animate-bounce"
|
||||
style={{ animationDelay: "150ms" }}
|
||||
/>
|
||||
<span
|
||||
className="w-1.5 h-1.5 bg-cyan-300/70 rounded-full motion-safe:animate-bounce"
|
||||
style={{ animationDelay: "300ms" }}
|
||||
/>
|
||||
</span>
|
||||
<span className="text-[10px]">
|
||||
{m.status === "queued"
|
||||
? `${m.peerName} is busy — reply will arrive when they're free`
|
||||
: `Waiting for ${m.peerName}…`}
|
||||
</span>
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
function NormalMessage({ msg }: { msg: CommMessage }) {
|
||||
return (
|
||||
<div className={`flex ${msg.flow === "out" ? "justify-end" : "justify-start"}`}>
|
||||
@@ -574,12 +652,22 @@ function NormalMessage({ msg }: { msg: CommMessage }) {
|
||||
{msg.flow === "out" ? `→ To ${msg.peerName}` : `← From ${msg.peerName}`}
|
||||
</div>
|
||||
{msg.text ? (
|
||||
<MarkdownBody className="text-ink-mid">{msg.text}</MarkdownBody>
|
||||
// Outgoing bubble (cyan-900) is dark in both themes → prose-invert default.
|
||||
// Incoming bubble (surface-card) themes light → only invert in dark.
|
||||
<MarkdownBody
|
||||
className="text-ink-mid"
|
||||
invert={msg.flow === "out" ? "always" : "dark-only"}
|
||||
>
|
||||
{msg.text}
|
||||
</MarkdownBody>
|
||||
) : (
|
||||
<div className="text-ink-mid">(no message text)</div>
|
||||
)}
|
||||
{msg.responseText && (
|
||||
<MarkdownBody className="mt-1.5 pt-1.5 border-t border-line/30 text-ink-mid">
|
||||
<MarkdownBody
|
||||
className="mt-1.5 pt-1.5 border-t border-line/30 text-ink-mid"
|
||||
invert={msg.flow === "out" ? "always" : "dark-only"}
|
||||
>
|
||||
{msg.responseText}
|
||||
</MarkdownBody>
|
||||
)}
|
||||
@@ -706,17 +794,29 @@ function ErrorMessage({ msg }: { msg: CommMessage }) {
|
||||
* prose tweaks that keep paragraphs tight inside a small bubble.
|
||||
* Code blocks get an `overflow-x-auto` so a long line of code doesn't
|
||||
* blow out the bubble's max-width — agent-to-agent replies routinely
|
||||
* ship code samples and JSON. */
|
||||
* ship code samples and JSON.
|
||||
*
|
||||
* `invert` controls the prose color flip:
|
||||
* - "always": container bg is dark in BOTH themes (cyan-900, red-950),
|
||||
* so prose always wants light body text.
|
||||
* - "dark-only": container bg uses a theming token that goes light in
|
||||
* light mode (e.g. bg-surface-card). Prose only inverts in dark
|
||||
* mode; light mode keeps default dark prose colors against the
|
||||
* light bg. Without this, light mode rendered light text on light
|
||||
* bg = invisible markdown. */
|
||||
function MarkdownBody({
|
||||
children,
|
||||
className,
|
||||
invert = "always",
|
||||
}: {
|
||||
children: string;
|
||||
className?: string;
|
||||
invert?: "always" | "dark-only";
|
||||
}) {
|
||||
const proseInvert = invert === "always" ? "prose-invert" : "dark:prose-invert";
|
||||
return (
|
||||
<div
|
||||
className={`prose prose-sm prose-invert max-w-none [&>p]:mb-1 [&>p:last-child]:mb-0 [&_pre]:overflow-x-auto [&_table]:block [&_table]:overflow-x-auto ${className ?? ""}`}
|
||||
className={`prose prose-sm ${proseInvert} max-w-none [&>p]:mb-1 [&>p:last-child]:mb-0 [&_pre]:overflow-x-auto [&_table]:block [&_table]:overflow-x-auto ${className ?? ""}`}
|
||||
>
|
||||
<ReactMarkdown remarkPlugins={[remarkGfm]}>{children}</ReactMarkdown>
|
||||
</div>
|
||||
|
||||
@@ -22,7 +22,6 @@ export interface ConfigData {
|
||||
// task_budget maps to output_config.task_budget.total (requires beta header task-budgets-2026-03-13)
|
||||
task_budget?: number;
|
||||
prompt_files: string[];
|
||||
shared_context: string[];
|
||||
skills: string[];
|
||||
tools: string[];
|
||||
a2a: { port: number; streaming: boolean; push_notifications: boolean };
|
||||
@@ -40,7 +39,6 @@ export const DEFAULT_CONFIG: ConfigData = {
|
||||
effort: "",
|
||||
task_budget: 0,
|
||||
prompt_files: [],
|
||||
shared_context: [],
|
||||
skills: [],
|
||||
tools: [],
|
||||
a2a: { port: 8000, streaming: true, push_notifications: true },
|
||||
|
||||
@@ -120,7 +120,6 @@ export function toYaml(config: ConfigData): string {
|
||||
if (config.effort) { lines.push(""); simple("effort", config.effort); }
|
||||
if (config.task_budget && config.task_budget > 0) { simple("task_budget", config.task_budget); }
|
||||
if (config.prompt_files?.length) { lines.push(""); list("prompt_files", config.prompt_files); }
|
||||
if (config.shared_context?.length) { lines.push(""); list("shared_context", config.shared_context); }
|
||||
lines.push(""); list("skills", config.skills);
|
||||
if (config.tools?.length) { list("tools", config.tools); }
|
||||
lines.push(""); obj("a2a", config.a2a as unknown as Record<string, unknown>);
|
||||
|
||||
@@ -5,6 +5,13 @@ export const STATUS_CONFIG: Record<string, { dot: string; glow: string; label: s
|
||||
degraded: { dot: "bg-amber-400", glow: "shadow-amber-400/50", label: "Degraded", bar: "from-amber-500/20 to-transparent" },
|
||||
failed: { dot: "bg-red-400", glow: "shadow-red-400/50", label: "Failed", bar: "from-red-500/20 to-transparent" },
|
||||
provisioning: { dot: "bg-sky-400 motion-safe:animate-pulse", glow: "shadow-sky-400/50", label: "Starting", bar: "from-sky-500/20 to-transparent" },
|
||||
// not_configured: derived state from agent_card.configuration_status (PR #2756 chain).
|
||||
// Workspace is reachable (heartbeating, /agent-card serves) but adapter.setup()
|
||||
// failed — typically a missing/rotated LLM credential. Amber to differentiate from
|
||||
// online (green) and failed (red) — the workspace itself is healthy, just needs
|
||||
// configuration. Hover renders agent_card.configuration_error in the tooltip so
|
||||
// the operator sees the exact env var to set.
|
||||
not_configured: { dot: "bg-amber-300", glow: "shadow-amber-300/50", label: "Not configured", bar: "from-amber-400/20 to-transparent" },
|
||||
};
|
||||
|
||||
export function statusDotClass(status: string): string {
|
||||
@@ -12,10 +19,10 @@ export function statusDotClass(status: string): string {
|
||||
}
|
||||
|
||||
export const TIER_CONFIG: Record<number, { label: string; color: string; border: string }> = {
|
||||
1: { label: "T1", color: "text-ink-soft bg-surface-card/80", border: "text-ink-mid border-line/60" },
|
||||
2: { label: "T2", color: "text-sky-400 bg-sky-950/50", border: "text-sky-400 border-sky-500/30" },
|
||||
3: { label: "T3", color: "text-violet-400 bg-violet-950/50", border: "text-violet-400 border-violet-500/30" },
|
||||
4: { label: "T4", color: "text-warm bg-amber-950/50", border: "text-warm border-amber-500/30" },
|
||||
1: { label: "T1", color: "text-ink-mid bg-surface-card border border-line", border: "text-ink-mid border-line" },
|
||||
2: { label: "T2", color: "text-white bg-accent border border-accent-strong", border: "text-accent border-accent" },
|
||||
3: { label: "T3", color: "text-white bg-violet-600 border border-violet-700", border: "text-violet-600 border-violet-500" },
|
||||
4: { label: "T4", color: "text-white bg-warm border border-warm", border: "text-warm border-warm" },
|
||||
};
|
||||
|
||||
export const COMM_TYPE_LABELS: Record<string, string> = {
|
||||
|
||||
@@ -59,8 +59,8 @@ export function getTenantSlug(): string {
|
||||
* isSaaSTenant reports whether the canvas is running as the UI for a
|
||||
* SaaS tenant (served at <slug>.moleculesai.app). Use for client-side
|
||||
* UX branches that should behave differently on SaaS vs self-hosted —
|
||||
* e.g. the workspace tier picker hides T1/T2 sandbox tiers because every
|
||||
* SaaS workspace gets its own EC2 VM (inherently T3 Full Access).
|
||||
* e.g. the workspace tier picker hides T1/T2/T3 sandbox tiers because
|
||||
* every SaaS workspace gets its own EC2 VM (inherently T4 Full Access).
|
||||
*
|
||||
* SSR-safe: returns false on the server to avoid hydration drift; call
|
||||
* sites should tolerate a flip from false→true on first client render.
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import {
|
||||
getConfigurationStatus,
|
||||
getConfigurationError,
|
||||
} from "../canvas-topology";
|
||||
|
||||
// Tests for the getConfigurationStatus / getConfigurationError helpers
|
||||
// (issue #467 / PR #2756 chain). Surfacing the workspace's
|
||||
// `agent_card.configuration_status` is the user-visible payoff of
|
||||
// PR #2756's decoupling — without it, a misconfigured workspace looks
|
||||
// identical to a healthy one in the canvas tile.
|
||||
|
||||
describe("getConfigurationStatus", () => {
|
||||
it("returns null when agentCard is null", () => {
|
||||
expect(getConfigurationStatus(null)).toBe(null);
|
||||
});
|
||||
|
||||
it("returns null when agentCard has no configuration_status", () => {
|
||||
expect(getConfigurationStatus({ name: "x" })).toBe(null);
|
||||
});
|
||||
|
||||
it("returns 'ready' when agent reports configuration ok", () => {
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: "ready" }),
|
||||
).toBe("ready");
|
||||
});
|
||||
|
||||
it("returns 'not_configured' when agent reports setup failed", () => {
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: "not_configured" }),
|
||||
).toBe("not_configured");
|
||||
});
|
||||
|
||||
it("ignores unknown values defensively", () => {
|
||||
// A future agent reporting a status string we don't yet recognise
|
||||
// shouldn't crash the canvas — we treat it as 'no info' (null).
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: "starting" }),
|
||||
).toBe(null);
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: 42 }),
|
||||
).toBe(null);
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: null }),
|
||||
).toBe(null);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getConfigurationError", () => {
|
||||
it("returns null when agentCard is null", () => {
|
||||
expect(getConfigurationError(null)).toBe(null);
|
||||
});
|
||||
|
||||
it("returns null when status is 'ready' even if error string present", () => {
|
||||
// Defensive: if the agent somehow ships configuration_status=ready
|
||||
// alongside a stale configuration_error from a previous boot, we
|
||||
// trust the live status flag and don't surface the stale error.
|
||||
expect(
|
||||
getConfigurationError({
|
||||
configuration_status: "ready",
|
||||
configuration_error: "stale: was unset",
|
||||
}),
|
||||
).toBe(null);
|
||||
});
|
||||
|
||||
it("returns the error string when status is 'not_configured'", () => {
|
||||
expect(
|
||||
getConfigurationError({
|
||||
configuration_status: "not_configured",
|
||||
configuration_error:
|
||||
"RuntimeError: Neither OPENAI_API_KEY nor MINIMAX_API_KEY is set",
|
||||
}),
|
||||
).toBe(
|
||||
"RuntimeError: Neither OPENAI_API_KEY nor MINIMAX_API_KEY is set",
|
||||
);
|
||||
});
|
||||
|
||||
it("returns null when status is 'not_configured' but error is missing", () => {
|
||||
expect(
|
||||
getConfigurationError({ configuration_status: "not_configured" }),
|
||||
).toBe(null);
|
||||
});
|
||||
|
||||
it("returns null when error is empty string", () => {
|
||||
// Empty string isn't actionable for the operator — treat same as
|
||||
// missing.
|
||||
expect(
|
||||
getConfigurationError({
|
||||
configuration_status: "not_configured",
|
||||
configuration_error: "",
|
||||
}),
|
||||
).toBe(null);
|
||||
});
|
||||
|
||||
it("returns null when error is non-string", () => {
|
||||
expect(
|
||||
getConfigurationError({
|
||||
configuration_status: "not_configured",
|
||||
configuration_error: { reason: "object" },
|
||||
}),
|
||||
).toBe(null);
|
||||
});
|
||||
});
|
||||
@@ -564,3 +564,42 @@ export function extractSkillNames(agentCard: Record<string, unknown> | null): st
|
||||
.map((skill: Record<string, unknown>) => String(skill.name || skill.id || ""))
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the configuration status reported by the workspace, or null
|
||||
* when the agent card doesn't carry one (older runtime, or pre-PR #2756
|
||||
* worker).
|
||||
*
|
||||
* Pairs with molecule-core PR #2756: when adapter.setup() fails, the
|
||||
* runtime mounts a not-configured handler AND advertises the failure
|
||||
* via agent_card.configuration_status = "not_configured" +
|
||||
* configuration_error = "<reason>". Canvas reads both to render a
|
||||
* "needs config" tile instead of a confused "online but silent" state.
|
||||
*
|
||||
* Returns null (not undefined) so callers can distinguish "no info"
|
||||
* from explicit values via a strict equality check.
|
||||
*/
|
||||
export function getConfigurationStatus(
|
||||
agentCard: Record<string, unknown> | null,
|
||||
): "ready" | "not_configured" | null {
|
||||
if (!agentCard) return null;
|
||||
const raw = agentCard.configuration_status;
|
||||
if (raw === "ready" || raw === "not_configured") return raw;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the configuration error string from the agent card when
|
||||
* configuration_status is "not_configured", or null otherwise.
|
||||
*
|
||||
* Already redacted server-side via secret_redactor (PR #2778) — safe to
|
||||
* render in the UI verbatim.
|
||||
*/
|
||||
export function getConfigurationError(
|
||||
agentCard: Record<string, unknown> | null,
|
||||
): string | null {
|
||||
if (!agentCard) return null;
|
||||
if (getConfigurationStatus(agentCard) !== "not_configured") return null;
|
||||
const raw = agentCard.configuration_error;
|
||||
return typeof raw === "string" && raw.length > 0 ? raw : null;
|
||||
}
|
||||
|
||||
@@ -27,11 +27,11 @@ prompt_files:
|
||||
# AGENTS.md-style example:
|
||||
# prompt_files: [AGENTS.md]
|
||||
|
||||
# Files to share with direct children (1-level inheritance)
|
||||
# Children fetch these at startup via GET /workspaces/:id/shared-context
|
||||
shared_context:
|
||||
- architecture.md
|
||||
- conventions.md
|
||||
# NOTE: `shared_context` (parent → child file injection at boot) was removed.
|
||||
# To share knowledge across a team, use memory v2's team:<id> namespace via
|
||||
# the recall_memory MCP tool — the agent pulls it on demand instead of
|
||||
# paying for it at every boot. For large blob-shaped artefacts, see RFC
|
||||
# #2789 (platform-owned shared file storage).
|
||||
|
||||
# Skills to load -- folder names under skills/
|
||||
skills:
|
||||
@@ -123,7 +123,6 @@ env:
|
||||
| `runtime` | No | Adapter to use: `langgraph` (default), `claude-code`, `crewai`, `autogen`, `deepagents`, `openclaw`. See [Agent Runtime Adapters](./cli-runtime.md). |
|
||||
| `model` | Yes | LangChain-compatible provider string (e.g. `anthropic:claude-sonnet-4-6`). Overridden by `MODEL_PROVIDER` env var if set. |
|
||||
| `prompt_files` | No | Ordered list of markdown files to load as system prompt. Defaults to `["system-prompt.md"]` if omitted. `MEMORY.md` and `USER.md` are auto-appended when present so frozen memory snapshots do not need to be duplicated here. Supports any agent framework's file structure (OpenClaw, Claude Code, etc.) |
|
||||
| `shared_context` | No | Files from this workspace's config dir to share with direct children. Children fetch these at startup and inject into their system prompt as `## Parent Context`. 1-level inheritance only (grandchildren don't see grandparent's context). |
|
||||
| `skills` | Yes | List of skill folder names to load from `skills/` |
|
||||
| `tools` | No | Built-in tools from workspace-template |
|
||||
| `memory` | No | Memory backend config (defaults to filesystem) |
|
||||
@@ -157,7 +156,6 @@ The file watcher monitors the entire config directory. When `config.yaml` change
|
||||
| `name`, `description`, `version` | Yes | Rebuild Agent Card with new metadata |
|
||||
| `a2a` | **No** | Port and protocol changes require container restart |
|
||||
| `delegation` | Yes | Retry/timeout defaults take effect on next delegation call |
|
||||
| `shared_context` | Yes | Children fetch on next prompt rebuild; no restart needed |
|
||||
| `sub_workspaces` | **No** | Team structure changes go through `POST /workspaces/:id/expand` |
|
||||
|
||||
See [Skills — Live Reload](./skills.md#live-reload) for the full file watcher flow.
|
||||
|
||||
@@ -24,21 +24,19 @@ When you receive a task, break it into sub-tasks and delegate to your team.
|
||||
Always review work before reporting completion to the caller.
|
||||
```
|
||||
|
||||
### 2. Parent Context (if child workspace)
|
||||
### 2. Team-shared knowledge (on demand)
|
||||
|
||||
If this workspace was created via team expansion (has a `PARENT_ID` env var), it fetches its parent's shared context files at startup via `GET /workspaces/{parent_id}/shared-context`. The parent declares which files to share in its `config.yaml`:
|
||||
Team-scoped knowledge is no longer injected at boot. The previous
|
||||
`shared_context` field + `GET /workspaces/{parent_id}/shared-context`
|
||||
fetch was removed; agents now pull team-shared knowledge on demand via
|
||||
memory v2's `team:<id>` namespace using the `recall_memory` MCP tool.
|
||||
|
||||
```yaml
|
||||
shared_context:
|
||||
- architecture.md
|
||||
- conventions.md
|
||||
```
|
||||
|
||||
These files are injected as a `## Parent Context` section, with each file rendered under a `### {filename}` heading. This gives children the parent's project knowledge (architecture, conventions, API schemas) without exposing the parent's system prompt or full config.
|
||||
|
||||
**1-level inheritance only:** A grandchild sees its direct parent's shared context, not its grandparent's. This mirrors the L2 Team Memory scope.
|
||||
|
||||
**Graceful degradation:** If the parent is offline or the endpoint returns an error, the child starts normally without parent context.
|
||||
This shifts cost from "every boot, always" to "only when the agent
|
||||
asks", and lets team members write to the shared store from anywhere
|
||||
that can resolve the namespace (canvas Memory tab, agent
|
||||
`commit_memory`, admin import). For large blob-shaped artefacts (full
|
||||
architecture docs, brand assets, PDFs) see RFC #2789 (platform-owned
|
||||
shared file storage).
|
||||
|
||||
### 3. Skill Instructions
|
||||
|
||||
|
||||
@@ -0,0 +1,358 @@
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: Molecule Memory Plugin v1
|
||||
version: 1.0.0
|
||||
description: |
|
||||
Contract between workspace-server and a memory backend plugin. The
|
||||
plugin owns its own storage; workspace-server is the security
|
||||
perimeter (secret redaction, namespace ACL, GLOBAL audit/wrap).
|
||||
|
||||
Defined in RFC #2728. See docs/rfc/memory-v2-rationale.md for design
|
||||
rationale.
|
||||
|
||||
Auth: none. Plugins MUST be reachable only on a private network or
|
||||
unix socket — workspace-server is the only sanctioned client.
|
||||
servers:
|
||||
- url: http://localhost:9100
|
||||
description: Built-in postgres-backed plugin (default)
|
||||
|
||||
paths:
|
||||
/v1/health:
|
||||
get:
|
||||
summary: Liveness + capability probe
|
||||
operationId: getHealth
|
||||
responses:
|
||||
'200':
|
||||
description: Plugin healthy
|
||||
content:
|
||||
application/json:
|
||||
schema: { $ref: '#/components/schemas/HealthResponse' }
|
||||
'503':
|
||||
description: Plugin unhealthy (e.g., backing store down)
|
||||
content:
|
||||
application/json:
|
||||
schema: { $ref: '#/components/schemas/Error' }
|
||||
|
||||
/v1/namespaces/{name}:
|
||||
parameters:
|
||||
- $ref: '#/components/parameters/NamespaceName'
|
||||
put:
|
||||
summary: Upsert a namespace (idempotent)
|
||||
operationId: upsertNamespace
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema: { $ref: '#/components/schemas/NamespaceUpsert' }
|
||||
responses:
|
||||
'200': { $ref: '#/components/responses/Namespace' }
|
||||
'400': { $ref: '#/components/responses/BadRequest' }
|
||||
patch:
|
||||
summary: Update namespace metadata or TTL
|
||||
operationId: patchNamespace
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema: { $ref: '#/components/schemas/NamespacePatch' }
|
||||
responses:
|
||||
'200': { $ref: '#/components/responses/Namespace' }
|
||||
'404': { $ref: '#/components/responses/NotFound' }
|
||||
delete:
|
||||
summary: Delete namespace and all its memories (operator action)
|
||||
operationId: deleteNamespace
|
||||
responses:
|
||||
'204':
|
||||
description: Deleted
|
||||
'404': { $ref: '#/components/responses/NotFound' }
|
||||
|
||||
/v1/namespaces/{name}/memories:
|
||||
parameters:
|
||||
- $ref: '#/components/parameters/NamespaceName'
|
||||
post:
|
||||
summary: Write a memory to a namespace
|
||||
description: |
|
||||
`content` MUST already be secret-redacted by the workspace-server.
|
||||
Plugin does not run additional redaction.
|
||||
operationId: commitMemory
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema: { $ref: '#/components/schemas/MemoryWrite' }
|
||||
responses:
|
||||
'201':
|
||||
description: Memory persisted
|
||||
content:
|
||||
application/json:
|
||||
schema: { $ref: '#/components/schemas/MemoryWriteResponse' }
|
||||
'400': { $ref: '#/components/responses/BadRequest' }
|
||||
'404': { $ref: '#/components/responses/NotFound' }
|
||||
|
||||
/v1/search:
|
||||
post:
|
||||
summary: Search memories across one or more namespaces
|
||||
description: |
|
||||
workspace-server MUST intersect the requested `namespaces` with
|
||||
the caller's currently-readable set BEFORE invoking this
|
||||
endpoint. The plugin treats the list as authoritative.
|
||||
operationId: searchMemories
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema: { $ref: '#/components/schemas/SearchRequest' }
|
||||
responses:
|
||||
'200':
|
||||
description: Search results
|
||||
content:
|
||||
application/json:
|
||||
schema: { $ref: '#/components/schemas/SearchResponse' }
|
||||
'400': { $ref: '#/components/responses/BadRequest' }
|
||||
|
||||
/v1/memories/{id}:
|
||||
parameters:
|
||||
- in: path
|
||||
name: id
|
||||
required: true
|
||||
schema: { type: string, format: uuid }
|
||||
delete:
|
||||
summary: Forget a memory by id
|
||||
description: |
|
||||
`requested_by_namespace` is the namespace the caller has write
|
||||
access to; the plugin SHOULD reject if the memory doesn't belong
|
||||
to that namespace.
|
||||
operationId: forgetMemory
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema: { $ref: '#/components/schemas/ForgetRequest' }
|
||||
responses:
|
||||
'204':
|
||||
description: Forgotten
|
||||
'403': { $ref: '#/components/responses/Forbidden' }
|
||||
'404': { $ref: '#/components/responses/NotFound' }
|
||||
|
||||
components:
|
||||
parameters:
|
||||
NamespaceName:
|
||||
in: path
|
||||
name: name
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
minLength: 1
|
||||
maxLength: 256
|
||||
pattern: '^[a-z]+:[A-Za-z0-9_:.\-]+$'
|
||||
example: 'workspace:550e8400-e29b-41d4-a716-446655440000'
|
||||
|
||||
responses:
|
||||
Namespace:
|
||||
description: Namespace state
|
||||
content:
|
||||
application/json:
|
||||
schema: { $ref: '#/components/schemas/Namespace' }
|
||||
BadRequest:
|
||||
description: Invalid input
|
||||
content:
|
||||
application/json:
|
||||
schema: { $ref: '#/components/schemas/Error' }
|
||||
NotFound:
|
||||
description: Resource not found
|
||||
content:
|
||||
application/json:
|
||||
schema: { $ref: '#/components/schemas/Error' }
|
||||
Forbidden:
|
||||
description: Caller lacks write access to the requested namespace
|
||||
content:
|
||||
application/json:
|
||||
schema: { $ref: '#/components/schemas/Error' }
|
||||
|
||||
schemas:
|
||||
HealthResponse:
|
||||
type: object
|
||||
required: [status, version, capabilities]
|
||||
properties:
|
||||
status: { type: string, enum: [ok, degraded] }
|
||||
version: { type: string, example: "1.0.0" }
|
||||
capabilities:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
enum: [embedding, fts, ttl, pin, propagation]
|
||||
description: |
|
||||
Optional features this plugin supports. workspace-server
|
||||
adapts MCP responses based on this list (e.g., agents can
|
||||
request semantic search only when `embedding` is present).
|
||||
|
||||
NamespaceKind:
|
||||
type: string
|
||||
enum: [workspace, team, org, custom]
|
||||
|
||||
Namespace:
|
||||
type: object
|
||||
required: [name, kind, created_at]
|
||||
properties:
|
||||
name: { type: string }
|
||||
kind: { $ref: '#/components/schemas/NamespaceKind' }
|
||||
expires_at:
|
||||
type: string
|
||||
format: date-time
|
||||
nullable: true
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties: true
|
||||
nullable: true
|
||||
created_at: { type: string, format: date-time }
|
||||
|
||||
NamespaceUpsert:
|
||||
type: object
|
||||
required: [kind]
|
||||
properties:
|
||||
kind: { $ref: '#/components/schemas/NamespaceKind' }
|
||||
expires_at: { type: string, format: date-time, nullable: true }
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties: true
|
||||
nullable: true
|
||||
|
||||
NamespacePatch:
|
||||
type: object
|
||||
properties:
|
||||
expires_at: { type: string, format: date-time, nullable: true }
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties: true
|
||||
nullable: true
|
||||
|
||||
MemoryKind:
|
||||
type: string
|
||||
enum: [fact, summary, checkpoint]
|
||||
|
||||
MemorySource:
|
||||
type: string
|
||||
enum: [agent, runtime, user]
|
||||
|
||||
MemoryWrite:
|
||||
type: object
|
||||
required: [content, kind, source]
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
format: uuid
|
||||
nullable: true
|
||||
description: |
|
||||
Optional idempotency key. When supplied, the plugin MUST
|
||||
treat the write as upsert keyed on this id (re-running
|
||||
the same write does not duplicate). When omitted, the
|
||||
plugin generates a fresh UUID. Used by the backfill CLI.
|
||||
content:
|
||||
type: string
|
||||
minLength: 1
|
||||
description: Already secret-redacted by workspace-server.
|
||||
kind: { $ref: '#/components/schemas/MemoryKind' }
|
||||
source: { $ref: '#/components/schemas/MemorySource' }
|
||||
expires_at: { type: string, format: date-time, nullable: true }
|
||||
propagation:
|
||||
type: object
|
||||
additionalProperties: true
|
||||
nullable: true
|
||||
description: |
|
||||
Opaque metadata the plugin stores and returns. Reserved for
|
||||
future cross-namespace propagation semantics.
|
||||
pin: { type: boolean, default: false }
|
||||
embedding:
|
||||
type: array
|
||||
items: { type: number }
|
||||
nullable: true
|
||||
description: |
|
||||
Optional pre-computed embedding. Plugins reporting the
|
||||
`embedding` capability MAY ignore this and recompute.
|
||||
|
||||
MemoryWriteResponse:
|
||||
type: object
|
||||
required: [id, namespace]
|
||||
properties:
|
||||
id: { type: string, format: uuid }
|
||||
namespace: { type: string }
|
||||
|
||||
Memory:
|
||||
type: object
|
||||
required: [id, namespace, content, kind, source, created_at]
|
||||
properties:
|
||||
id: { type: string, format: uuid }
|
||||
namespace: { type: string }
|
||||
content: { type: string }
|
||||
kind: { $ref: '#/components/schemas/MemoryKind' }
|
||||
source: { $ref: '#/components/schemas/MemorySource' }
|
||||
expires_at: { type: string, format: date-time, nullable: true }
|
||||
propagation:
|
||||
type: object
|
||||
additionalProperties: true
|
||||
nullable: true
|
||||
pin: { type: boolean }
|
||||
created_at: { type: string, format: date-time }
|
||||
score:
|
||||
type: number
|
||||
nullable: true
|
||||
description: Relevance score from search (semantic + FTS).
|
||||
|
||||
SearchRequest:
|
||||
type: object
|
||||
required: [namespaces]
|
||||
properties:
|
||||
namespaces:
|
||||
type: array
|
||||
items: { type: string }
|
||||
minItems: 1
|
||||
description: |
|
||||
Already intersected with the caller's readable set by
|
||||
workspace-server.
|
||||
query: { type: string }
|
||||
kinds:
|
||||
type: array
|
||||
items: { $ref: '#/components/schemas/MemoryKind' }
|
||||
limit:
|
||||
type: integer
|
||||
minimum: 1
|
||||
maximum: 100
|
||||
default: 20
|
||||
embedding:
|
||||
type: array
|
||||
items: { type: number }
|
||||
nullable: true
|
||||
|
||||
SearchResponse:
|
||||
type: object
|
||||
required: [memories]
|
||||
properties:
|
||||
memories:
|
||||
type: array
|
||||
items: { $ref: '#/components/schemas/Memory' }
|
||||
|
||||
ForgetRequest:
|
||||
type: object
|
||||
required: [requested_by_namespace]
|
||||
properties:
|
||||
requested_by_namespace:
|
||||
type: string
|
||||
description: Namespace the caller has write access to.
|
||||
|
||||
Error:
|
||||
type: object
|
||||
required: [code, message]
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
enum:
|
||||
- bad_request
|
||||
- not_found
|
||||
- forbidden
|
||||
- internal
|
||||
- unavailable
|
||||
message: { type: string }
|
||||
details:
|
||||
type: object
|
||||
additionalProperties: true
|
||||
nullable: true
|
||||
@@ -199,7 +199,6 @@ Install safeguards bound the cost of a single install (env-tunable via `PLUGIN_I
|
||||
| `GET` | `/templates` | List available templates. **Requires AdminAuth** (PR #701). |
|
||||
| `GET` | `/org/templates` | List available org templates. **Requires AdminAuth** (PR #701). |
|
||||
| `POST` | `/templates/import` | Import an agent folder as a new template |
|
||||
| `GET` | `/workspaces/:id/shared-context` | Read parent shared-context files |
|
||||
| `GET` | `/workspaces/:id/files` | List files under an allowed root |
|
||||
| `GET` | `/workspaces/:id/files/*path` | Read a file |
|
||||
| `PUT` | `/workspaces/:id/files/*path` | Write a file |
|
||||
|
||||
@@ -68,7 +68,6 @@ Full contract: `docs/runbooks/admin-auth.md`.
|
||||
| GET | /channels/adapters | channels.go (list available platforms) |
|
||||
| POST | /channels/discover | channels.go (auto-detect chats for a bot token) |
|
||||
| POST | /webhooks/:type | channels.go (incoming social webhook) |
|
||||
| GET | /workspaces/:id/shared-context | templates.go |
|
||||
| GET/PUT/DELETE | /workspaces/:id/files[/*path] | templates.go |
|
||||
| GET | /canvas/viewport | viewport.go — open, no auth required (cosmetic, bootstrap-friendly) |
|
||||
| PUT | /canvas/viewport | viewport.go — `CanvasOrBearer` middleware; accepts bearer OR Origin matching `CORS_ORIGINS`. Cosmetic-only route — worst case viewport corruption, recovered by page refresh. |
|
||||
|
||||
@@ -523,7 +523,8 @@ runtime_config: # Runtime-specific settings
|
||||
skills: ["skill1", "skill2"] # Folder names under skills/
|
||||
tools: ["web_search", "filesystem"] # Built-in tool names
|
||||
prompt_files: ["system-prompt.md"] # Additional prompt text files
|
||||
shared_context: [] # Files from parent workspace
|
||||
# `shared_context` was removed; team-shared knowledge now lives in memory v2's
|
||||
# team:<id> namespace (recall_memory MCP tool). See RFC #2789 for shared files.
|
||||
|
||||
a2a:
|
||||
port: 8000
|
||||
|
||||
@@ -0,0 +1,113 @@
|
||||
# Memory Plugin Contract — Changelog
|
||||
|
||||
Every breaking or operationally-relevant change to the v1 plugin
|
||||
contract or the workspace-server-side wiring lands here. Plugin
|
||||
authors should subscribe to PRs touching this file.
|
||||
|
||||
## [Unreleased] — fixup wave 1 (post-RFC-#2728 self-review)
|
||||
|
||||
A self-review of the initial 11-PR rollout (PRs #2729-#2742) flagged
|
||||
two correctness bugs and three operational hazards. This wave fixes
|
||||
all of them. Order matches operator-impact severity.
|
||||
|
||||
### Critical: backfill idempotency via `MemoryWrite.id` (#2744)
|
||||
|
||||
**The bug.** The backfill CLI claimed idempotent on re-run, but
|
||||
`gen_random_uuid()` in the plugin's INSERT meant every retry created
|
||||
a fresh row. Operators retrying a failed `-apply` would silently
|
||||
double their memory count.
|
||||
|
||||
**The fix.** Optional `id` field on `MemoryWrite`. When supplied,
|
||||
plugins MUST upsert. The backfill now forwards `agent_memories.id`
|
||||
to `MemoryWrite.id`, so retries update in place.
|
||||
|
||||
**Plugin author action.** If your plugin uses
|
||||
`INSERT INTO ... DEFAULT gen_random_uuid()`, switch to
|
||||
`INSERT ... ON CONFLICT (id) DO UPDATE` when `id` is set. The wire
|
||||
contract is forward-compatible — plugins that ignore the field still
|
||||
work for production agent commits (which leave `id` empty), but they
|
||||
will silently corrupt backfill retries.
|
||||
|
||||
### Critical: `memory-backfill -verify` mode (#2747)
|
||||
|
||||
**The miss.** The original PR-7 task spec called for a parity-check
|
||||
mode but it never landed. Operators had no way to confirm a
|
||||
migration succeeded short of "no errors logged."
|
||||
|
||||
**The fix.** New `-verify` flag samples N workspaces, queries
|
||||
`agent_memories` direct, runs an equivalent plugin search via the
|
||||
namespace resolver, multiset-compares contents. Reports mismatches
|
||||
to stdout and exits non-zero so CI can gate the cutover.
|
||||
|
||||
```bash
|
||||
memory-backfill -verify # default sample 50
|
||||
memory-backfill -verify -verify-sample=200 # bigger
|
||||
memory-backfill -verify -workspace=<uuid> # one workspace
|
||||
```
|
||||
|
||||
### Important: `expires_at` validation (#2746)
|
||||
|
||||
**The bug.** `commit_memory_v2` silently dropped malformed
|
||||
`expires_at` strings. Agent passes `expires_at: "tomorrow"`, gets a
|
||||
200, memory has no TTL — agent thinks it set a TTL, didn't.
|
||||
|
||||
**The fix.** Returns
|
||||
`fmt.Errorf("invalid expires_at: must be RFC3339")` on parse
|
||||
failure. Plugin is not called in this case.
|
||||
|
||||
**Plugin author action.** None — this is a workspace-server-side
|
||||
fix. But: if your plugin advertises the `ttl` capability, make sure
|
||||
you actually evict expired rows on read (not just on a janitor cron
|
||||
that runs once a day). The harness in `testing-your-plugin.md` has
|
||||
a TTL-eviction test you should run.
|
||||
|
||||
### Important: audit log JSON via `json.Marshal` (#2746)
|
||||
|
||||
**The bug.** `auditOrgWrite` built `activity_logs.metadata` via
|
||||
`fmt.Sprintf` with `%q`. For ASCII (today's UUID + hex digest) this
|
||||
coincidentally produces valid JSON; for unicode or control bytes it
|
||||
silently produces non-JSON.
|
||||
|
||||
**The fix.** Replaced with `json.Marshal(map[string]string{...})`.
|
||||
Same wire shape today, won't regress when metadata grows.
|
||||
|
||||
**Plugin author action.** None — workspace-server-internal.
|
||||
|
||||
### Operator action: staging verification (#292)
|
||||
|
||||
**Status.** Tracked as task #292. PR-merged ≠ verified. Operator
|
||||
must:
|
||||
1. Provision a staging tenant, set `MEMORY_PLUGIN_URL`
|
||||
2. Run real `commit_memory_v2` from a workspace
|
||||
3. `memory-backfill -dry-run` against staging data
|
||||
4. `memory-backfill -apply`, then `-verify`
|
||||
5. Set `MEMORY_V2_CUTOVER=true`, verify admin export still works
|
||||
6. Run a legacy `commit_memory` from a workspace, verify it lands
|
||||
in plugin storage via the PR-6 shim
|
||||
|
||||
### Other follow-ups still open
|
||||
|
||||
- **#289**: admin export O(workspaces) → O(namespaces) — N+1 pattern
|
||||
in `exportViaPlugin` (1000-workspace tenants run 1000× resolver
|
||||
CTEs + 1000× plugin searches today).
|
||||
- **#291**: workspace deletion must call `DELETE
|
||||
/v1/namespaces/{name}` — orphans accumulate today.
|
||||
- **#293**: real-subprocess boot E2E — current PR-11 is integration
|
||||
(httptest + sqlmock), not E2E.
|
||||
|
||||
These are tracked but deferred; they're operationally annoying, not
|
||||
incident-shaped.
|
||||
|
||||
## [v1.0.0] — initial release (RFC #2728, PRs #2729-#2742)
|
||||
|
||||
Initial plugin contract + 11-PR rollout. See
|
||||
[issue #2728](https://github.com/Molecule-AI/molecule-core/issues/2728)
|
||||
for the full RFC.
|
||||
|
||||
Endpoints: `/v1/health`, `/v1/namespaces/{name}` (PUT/PATCH/DELETE),
|
||||
`/v1/namespaces/{name}/memories` (POST), `/v1/search` (POST),
|
||||
`/v1/memories/{id}` (DELETE).
|
||||
|
||||
Capabilities: `embedding`, `fts`, `ttl`, `pin`, `propagation`.
|
||||
|
||||
Operator runbook: see [README.md § Replacing the built-in plugin](README.md#replacing-the-built-in-plugin).
|
||||
@@ -0,0 +1,191 @@
|
||||
# Writing a Memory Plugin
|
||||
|
||||
This document is for operators and ecosystem authors who want to
|
||||
replace the built-in postgres-backed memory plugin (the default
|
||||
implementation that ships with workspace-server) with their own.
|
||||
|
||||
The contract was introduced by RFC #2728. The shipped binary is
|
||||
`cmd/memory-plugin-postgres/`; reading its source is the fastest way
|
||||
to see a complete reference implementation.
|
||||
|
||||
## What the contract is
|
||||
|
||||
The plugin is an HTTP server that workspace-server talks to via the
|
||||
OpenAPI v1 spec at [`docs/api-protocol/memory-plugin-v1.yaml`](../api-protocol/memory-plugin-v1.yaml).
|
||||
|
||||
Six endpoints:
|
||||
|
||||
| Endpoint | Method | Purpose |
|
||||
|---|---|---|
|
||||
| `/v1/health` | GET | Liveness probe + capability list |
|
||||
| `/v1/namespaces/{name}` | PUT | Idempotent upsert |
|
||||
| `/v1/namespaces/{name}` | PATCH | Update TTL or metadata |
|
||||
| `/v1/namespaces/{name}` | DELETE | Remove namespace and its memories |
|
||||
| `/v1/namespaces/{name}/memories` | POST | Write a memory |
|
||||
| `/v1/search` | POST | Multi-namespace search |
|
||||
| `/v1/memories/{id}` | DELETE | Forget a memory |
|
||||
|
||||
The wire types are defined in
|
||||
`workspace-server/internal/memory/contract/contract.go`. Run-time
|
||||
validation is built into the Go bindings via `Validate()` methods —
|
||||
your plugin SHOULD perform equivalent validation.
|
||||
|
||||
## What workspace-server takes care of
|
||||
|
||||
You do **not** implement these in the plugin; workspace-server is the
|
||||
security perimeter:
|
||||
|
||||
- **Secret redaction** (SAFE-T1201). All `content` you receive is
|
||||
already scrubbed. Don't run additional redaction; it's pointless.
|
||||
- **Namespace ACL**. workspace-server intersects the caller's
|
||||
readable namespaces against the requested list before sending you
|
||||
the search request. The list you receive is authoritative.
|
||||
- **GLOBAL audit**. Org-namespace writes are recorded in
|
||||
`activity_logs` server-side; you don't see them.
|
||||
- **Prompt-injection wrap**. Org memories returned to agents get a
|
||||
`[MEMORY id=... scope=ORG ns=...]:` prefix added at the
|
||||
workspace-server layer. Your `content` field is plain text.
|
||||
|
||||
## What you implement
|
||||
|
||||
- Storage of `memory_namespaces` and `memory_records` (or whatever
|
||||
shape you want — Pinecone vectors, an in-memory map, etc.)
|
||||
- The 7 endpoints above with the request/response shapes the spec
|
||||
defines
|
||||
- `/v1/health` reporting your supported capabilities (see below)
|
||||
- Idempotency on namespace upsert (PUT semantics, not POST)
|
||||
- Idempotency on memory commit when `MemoryWrite.id` is supplied
|
||||
(see "Memory idempotency" below)
|
||||
|
||||
## Memory idempotency
|
||||
|
||||
`MemoryWrite.id` is optional. Two contracts to honor:
|
||||
|
||||
| Caller passes | Plugin MUST |
|
||||
|---|---|
|
||||
| `id` omitted | Generate a fresh UUID, return it in the response |
|
||||
| `id` set | Upsert keyed on this id — if a row with that id already exists, UPDATE it in place rather than inserting a duplicate |
|
||||
|
||||
The backfill CLI (`memory-backfill`) relies on the upsert behavior
|
||||
so retries don't duplicate rows. Production agent commits leave `id`
|
||||
empty and rely on the plugin's UUID generator — the hot path is
|
||||
unchanged.
|
||||
|
||||
The built-in postgres plugin implements this with `INSERT ... ON
|
||||
CONFLICT (id) DO UPDATE`. A vector-DB plugin (e.g., Pinecone) would
|
||||
use the database's native upsert primitive on the same id.
|
||||
|
||||
## Capability negotiation
|
||||
|
||||
Your `/v1/health` response declares what features you support:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"version": "1.0.0",
|
||||
"capabilities": ["embedding", "fts", "ttl", "pin", "propagation"]
|
||||
}
|
||||
```
|
||||
|
||||
| Capability | What it gates |
|
||||
|---|---|
|
||||
| `embedding` | Agents may ask for semantic search; you receive `embedding: [...]` in search bodies |
|
||||
| `fts` | Agents may pass a query string; you decide how to match (FTS, ILIKE, regex) |
|
||||
| `ttl` | Agents may set `expires_at`; you must not return expired rows |
|
||||
| `pin` | Agents may set `pin: true`; you should rank pinned rows first |
|
||||
| `propagation` | Agents may set `propagation: {...}`; you must store it as opaque JSON and return it on read |
|
||||
|
||||
A capability you DON'T list is fine — workspace-server adapts the MCP
|
||||
tool surface to match. E.g., a Pinecone-only plugin that lists only
|
||||
`embedding` will silently ignore agents' `query` strings.
|
||||
|
||||
## Deployment models
|
||||
|
||||
Three common shapes:
|
||||
|
||||
1. **Same machine, different process**: workspace-server boots, then
|
||||
`MEMORY_PLUGIN_URL=http://localhost:9100` points at your plugin
|
||||
running on a unix socket or localhost port. This is what the
|
||||
built-in postgres plugin does.
|
||||
|
||||
2. **Separate container**: deploy your plugin as its own service on
|
||||
the private network. Set `MEMORY_PLUGIN_URL` to its DNS name.
|
||||
|
||||
3. **Self-managed**: customer-owned plugin running on customer-owned
|
||||
infrastructure, accessed over a tunnel. Same env-var wiring.
|
||||
|
||||
Auth is **none** — the plugin must be reachable only on a private
|
||||
network. workspace-server is the only sanctioned client.
|
||||
|
||||
## Replacing the built-in plugin
|
||||
|
||||
This is the canonical operator runbook for swapping the default
|
||||
plugin out. The same sequence applies whether you're swapping for
|
||||
another postgres plugin variant, Pinecone, Letta, or a custom
|
||||
implementation.
|
||||
|
||||
1. **Stand up the new plugin.** Deploy the binary/container, confirm
|
||||
it boots, confirm `/v1/health` returns `ok` with the capability
|
||||
list you expect.
|
||||
|
||||
2. **Run the backfill in dry-run mode** to scope the migration:
|
||||
```bash
|
||||
DATABASE_URL=postgres://... \
|
||||
MEMORY_PLUGIN_URL=http://your-plugin:9100 \
|
||||
memory-backfill -dry-run
|
||||
```
|
||||
Reports row count + namespace mapping per workspace, no writes.
|
||||
|
||||
3. **Apply the backfill:**
|
||||
```bash
|
||||
memory-backfill -apply
|
||||
```
|
||||
Idempotent on retry — the backfill passes each `agent_memories.id`
|
||||
to `MemoryWrite.id`, so partial-then-full re-runs upsert in place.
|
||||
|
||||
4. **Verify parity** before flipping the cutover flag:
|
||||
```bash
|
||||
memory-backfill -verify -verify-sample=200
|
||||
```
|
||||
Random-samples N workspaces, diffs `agent_memories` direct query
|
||||
against plugin search via the workspace's readable namespaces.
|
||||
Reports mismatches and exits non-zero if any are found — wire
|
||||
into your CI to gate the cutover.
|
||||
|
||||
5. **Flip the cutover flag.** Set `MEMORY_V2_CUTOVER=true` on
|
||||
workspace-server and restart. Admin export/import now route
|
||||
through the plugin; legacy `agent_memories` becomes read-only.
|
||||
|
||||
6. **Existing data in the old plugin's tables is NOT auto-dropped.**
|
||||
Deliberate safety property — operator drops manually after the
|
||||
~60-day grace window. If you switch back later, old data comes
|
||||
back into use (no loss).
|
||||
|
||||
If `-verify` reports mismatches, do NOT set `MEMORY_V2_CUTOVER` —
|
||||
inspect the output, re-run `-apply` to backfill missing rows (it
|
||||
upserts, so this is safe), and re-verify.
|
||||
|
||||
## Worked examples
|
||||
|
||||
- [`pinecone-example/`](pinecone-example/) — full Pinecone-backed plugin
|
||||
- [`testing-your-plugin.md`](testing-your-plugin.md) — running the
|
||||
contract test harness against your implementation
|
||||
|
||||
## When to write one vs. fork the default
|
||||
|
||||
Fork the default postgres plugin if:
|
||||
- You want different SQL (Materialized views? Different vector index?)
|
||||
- You want extra auth on top
|
||||
- You want server-side metrics emission
|
||||
|
||||
Write a fresh plugin if:
|
||||
- The storage backend is fundamentally different (vector DB, KV store,
|
||||
in-memory, file-based)
|
||||
- You're integrating an existing memory service (Letta, Mem0, etc.)
|
||||
|
||||
## See also
|
||||
|
||||
- [`CHANGELOG.md`](CHANGELOG.md) — contract revisions and fixup waves
|
||||
- RFC #2728 — design rationale
|
||||
- [`cmd/memory-plugin-postgres/`](../../workspace-server/cmd/memory-plugin-postgres/) — reference implementation
|
||||
- [`docs/api-protocol/memory-plugin-v1.yaml`](../api-protocol/memory-plugin-v1.yaml) — full OpenAPI spec
|
||||
@@ -0,0 +1,124 @@
|
||||
# Pinecone-backed Memory Plugin (worked example)
|
||||
|
||||
A working sketch of a memory plugin that delegates storage to
|
||||
[Pinecone](https://www.pinecone.io/) instead of postgres.
|
||||
|
||||
This is **example code, not a production binary**. It demonstrates
|
||||
how to map the v1 contract onto a vector database. Operators who
|
||||
want to ship this would harden auth, add retries, batch the
|
||||
commit path, etc.
|
||||
|
||||
## Why Pinecone is interesting
|
||||
|
||||
The default postgres plugin's pgvector index works for ~10M memories
|
||||
on a single node. Beyond that, semantic search becomes painful. A
|
||||
managed vector database can handle 1B+ memories, but the trade-offs
|
||||
are different:
|
||||
|
||||
- **Capabilities**: Pinecone is great at `embedding` (its core
|
||||
feature) but has no first-class FTS. So the plugin reports
|
||||
`["embedding"]` and ignores the `query` field.
|
||||
- **TTL**: Pinecone supports per-vector metadata with deletion via
|
||||
metadata filter — TTL becomes a periodic janitor task, not a
|
||||
per-row property.
|
||||
- **Cost**: per-vector billing, so the plugin should batch writes
|
||||
and dedup before posting.
|
||||
|
||||
## Wire mapping
|
||||
|
||||
| Contract field | Pinecone shape |
|
||||
|---|---|
|
||||
| `namespace` | `namespace` (Pinecone's first-class concept) |
|
||||
| `id` (caller-supplied) | `id` (Pinecone vector id; plugin upserts on this) |
|
||||
| `id` (omitted) | Plugin generates `uuid.NewString()` before upsert |
|
||||
| `content` | metadata.text |
|
||||
| `embedding` | `values` |
|
||||
| `kind` / `source` / `pin` / `expires_at` | `metadata.{kind, source, pin, expires_at}` |
|
||||
| `propagation` (opaque JSON) | `metadata.propagation` (also opaque) |
|
||||
|
||||
The contract's `expires_at` becomes a metadata field; a separate
|
||||
janitor cron periodically queries `expires_at < now` and deletes.
|
||||
|
||||
Pinecone's native upsert is the right fit for the idempotency-key
|
||||
contract: passing the same `id` twice updates in place. So a
|
||||
Pinecone plugin gets idempotent backfill retries "for free" if it
|
||||
just forwards `MemoryWrite.id` (or its generated UUID) to the
|
||||
upsert call.
|
||||
|
||||
## Skeleton
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
|
||||
"github.com/pinecone-io/go-pinecone/pinecone"
|
||||
)
|
||||
|
||||
type pineconePlugin struct {
|
||||
client *pinecone.Client
|
||||
index string
|
||||
}
|
||||
|
||||
func main() {
|
||||
apiKey := os.Getenv("PINECONE_API_KEY")
|
||||
if apiKey == "" {
|
||||
log.Fatal("PINECONE_API_KEY required")
|
||||
}
|
||||
client, err := pinecone.NewClient(pinecone.NewClientParams{ApiKey: apiKey})
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
p := &pineconePlugin{client: client, index: os.Getenv("PINECONE_INDEX")}
|
||||
|
||||
http.HandleFunc("/v1/health", p.health)
|
||||
http.HandleFunc("/v1/search", p.search)
|
||||
// ... rest of the routes ...
|
||||
|
||||
log.Fatal(http.ListenAndServe(":9100", nil))
|
||||
}
|
||||
|
||||
func (p *pineconePlugin) health(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"status": "ok",
|
||||
"version": "1.0.0",
|
||||
"capabilities": []string{"embedding"}, // no FTS, no TTL out-of-box
|
||||
})
|
||||
}
|
||||
|
||||
func (p *pineconePlugin) search(w http.ResponseWriter, r *http.Request) {
|
||||
// Parse contract.SearchRequest
|
||||
// Build Pinecone QueryByVectorValuesRequest with body.Embedding
|
||||
// For each Pinecone namespace in body.Namespaces, call Query
|
||||
// Map results to contract.Memory
|
||||
// ...
|
||||
}
|
||||
```
|
||||
|
||||
## What's missing from this sketch
|
||||
|
||||
A production-ready Pinecone plugin would add:
|
||||
|
||||
- **Batch commits**: bulk upsert N memories in a single Pinecone call
|
||||
- **TTL janitor**: periodic deletion of expired vectors
|
||||
- **Connection pooling**: keep one Pinecone client alive across requests
|
||||
- **Retry + circuit breaker**: Pinecone occasionally returns 5xx
|
||||
- **Metrics**: latency histograms per endpoint, write/read counters
|
||||
- **Idempotency-key handling**: when `MemoryWrite.id` is supplied,
|
||||
forward it as the Pinecone vector id verbatim; otherwise generate
|
||||
one. Pinecone's `Upsert` is naturally idempotent on id match.
|
||||
|
||||
But the mapping above is the load-bearing part — the rest is
|
||||
operational hardening, not contract-specific.
|
||||
|
||||
## See also
|
||||
|
||||
- [Pinecone Go SDK docs](https://docs.pinecone.io/reference/go-sdk)
|
||||
- [Memory plugin contract spec](../../api-protocol/memory-plugin-v1.yaml)
|
||||
- [Default postgres plugin source](../../../workspace-server/cmd/memory-plugin-postgres/) — for comparison
|
||||
@@ -0,0 +1,181 @@
|
||||
# Testing Your Memory Plugin
|
||||
|
||||
Once you have a plugin implementing the v1 contract, you can validate
|
||||
it against the spec without booting workspace-server.
|
||||
|
||||
## The contract test harness
|
||||
|
||||
Workspace-server ships typed Go bindings + round-trip tests in
|
||||
`workspace-server/internal/memory/contract/`. The simplest way to
|
||||
gain confidence in your plugin's wire compatibility is to point those
|
||||
tests at it.
|
||||
|
||||
A minimal contract suite:
|
||||
|
||||
```go
|
||||
package myplugin_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
mclient "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/client"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
|
||||
)
|
||||
|
||||
func TestMyPlugin_FullRoundTrip(t *testing.T) {
|
||||
// Start your plugin somehow (subprocess, in-process, etc.)
|
||||
pluginURL := startMyPlugin(t)
|
||||
cl := mclient.New(mclient.Config{BaseURL: pluginURL})
|
||||
|
||||
// 1. Health
|
||||
hr, err := cl.Boot(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("Boot: %v", err)
|
||||
}
|
||||
if hr.Status != "ok" {
|
||||
t.Errorf("status = %q", hr.Status)
|
||||
}
|
||||
|
||||
// 2. Namespace upsert
|
||||
if _, err := cl.UpsertNamespace(context.Background(), "workspace:test-1",
|
||||
contract.NamespaceUpsert{Kind: contract.NamespaceKindWorkspace}); err != nil {
|
||||
t.Fatalf("UpsertNamespace: %v", err)
|
||||
}
|
||||
|
||||
// 3. Commit memory
|
||||
resp, err := cl.CommitMemory(context.Background(), "workspace:test-1",
|
||||
contract.MemoryWrite{
|
||||
Content: "hello",
|
||||
Kind: contract.MemoryKindFact,
|
||||
Source: contract.MemorySourceAgent,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("CommitMemory: %v", err)
|
||||
}
|
||||
if resp.ID == "" {
|
||||
t.Errorf("plugin must return a non-empty memory id")
|
||||
}
|
||||
|
||||
// 4. Search
|
||||
sresp, err := cl.Search(context.Background(), contract.SearchRequest{
|
||||
Namespaces: []string{"workspace:test-1"},
|
||||
Query: "hello",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Search: %v", err)
|
||||
}
|
||||
if len(sresp.Memories) == 0 {
|
||||
t.Errorf("plugin returned no memories for the query we just wrote")
|
||||
}
|
||||
|
||||
// 5. Forget
|
||||
if err := cl.ForgetMemory(context.Background(), resp.ID,
|
||||
contract.ForgetRequest{RequestedByNamespace: "workspace:test-1"}); err != nil {
|
||||
t.Errorf("ForgetMemory: %v", err)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Testing idempotency
|
||||
|
||||
The contract requires that `MemoryWrite.id`, when supplied, behaves
|
||||
as an upsert key. The backfill CLI relies on this — without it,
|
||||
operator retries silently duplicate every memory.
|
||||
|
||||
```go
|
||||
func TestMyPlugin_IDIsIdempotencyKey(t *testing.T) {
|
||||
pluginURL := startMyPlugin(t)
|
||||
cl := mclient.New(mclient.Config{BaseURL: pluginURL})
|
||||
if _, err := cl.UpsertNamespace(context.Background(), "workspace:test-1",
|
||||
contract.NamespaceUpsert{Kind: contract.NamespaceKindWorkspace}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
fixedID := "11111111-2222-3333-4444-555555555555"
|
||||
|
||||
// First write with a specific id.
|
||||
resp1, err := cl.CommitMemory(context.Background(), "workspace:test-1",
|
||||
contract.MemoryWrite{
|
||||
ID: fixedID,
|
||||
Content: "first version",
|
||||
Kind: contract.MemoryKindFact,
|
||||
Source: contract.MemorySourceAgent,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("first commit: %v", err)
|
||||
}
|
||||
if resp1.ID != fixedID {
|
||||
t.Errorf("plugin must echo the supplied id, got %q", resp1.ID)
|
||||
}
|
||||
|
||||
// Second write with the same id — must update, not insert.
|
||||
if _, err := cl.CommitMemory(context.Background(), "workspace:test-1",
|
||||
contract.MemoryWrite{
|
||||
ID: fixedID,
|
||||
Content: "second version (updated)",
|
||||
Kind: contract.MemoryKindFact,
|
||||
Source: contract.MemorySourceAgent,
|
||||
}); err != nil {
|
||||
t.Fatalf("second commit: %v", err)
|
||||
}
|
||||
|
||||
// Search must return exactly one row, with the updated content.
|
||||
sresp, _ := cl.Search(context.Background(), contract.SearchRequest{
|
||||
Namespaces: []string{"workspace:test-1"},
|
||||
})
|
||||
matches := 0
|
||||
for _, m := range sresp.Memories {
|
||||
if m.ID == fixedID {
|
||||
matches++
|
||||
if m.Content != "second version (updated)" {
|
||||
t.Errorf("upsert didn't update content: got %q", m.Content)
|
||||
}
|
||||
}
|
||||
}
|
||||
if matches != 1 {
|
||||
t.Errorf("upsert produced %d rows for id=%s, want 1", matches, fixedID)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## What the harness does NOT cover
|
||||
|
||||
- **Capability accuracy**: if you list `embedding` you must actually
|
||||
do semantic search. The harness can't tell you whether ranking is
|
||||
meaningful — only that you don't crash.
|
||||
- **TTL eviction**: write a memory with `expires_at` 1 second in the
|
||||
future, sleep 2 seconds, search — assert the memory is gone.
|
||||
- **Concurrency**: hit your plugin with 100 parallel writes; assert
|
||||
no IDs collide.
|
||||
- **Recovery**: kill your plugin's storage backend, send a request,
|
||||
assert your plugin returns 503 (not 200 with stale data).
|
||||
- **Backfill compatibility**: run the operator backfill against your
|
||||
plugin twice in a row (`memory-backfill -apply`); assert the row
|
||||
count doesn't double. The idempotency test above verifies the unit
|
||||
contract; this checks the operational integration.
|
||||
- **Verify-mode parity**: after a backfill, run `memory-backfill
|
||||
-verify`; assert it reports zero mismatches against
|
||||
`agent_memories`.
|
||||
|
||||
## Smoke test against workspace-server
|
||||
|
||||
Once unit-level wire tests pass, run a real workspace-server with your
|
||||
plugin URL:
|
||||
|
||||
```bash
|
||||
DATABASE_URL=postgres://... \
|
||||
MEMORY_PLUGIN_URL=http://localhost:9100 \
|
||||
./workspace-server
|
||||
```
|
||||
|
||||
Then ask an agent to call `commit_memory_v2` and `search_memory`. If
|
||||
both round-trip cleanly, you're done.
|
||||
|
||||
For the full E2E flow (including the namespace resolver, MCP layer,
|
||||
and security perimeter), see [PR-11's plugin-swap test](../../workspace-server/test/e2e/memory_plugin_swap_test.go).
|
||||
|
||||
## Reporting bugs
|
||||
|
||||
If you find a contract ambiguity or missing edge case, file an issue
|
||||
against `Molecule-AI/molecule-core` referencing RFC #2728.
|
||||
+6
-1
@@ -28,7 +28,12 @@
|
||||
{"name": "claude-code-default", "repo": "Molecule-AI/molecule-ai-workspace-template-claude-code", "ref": "main"},
|
||||
{"name": "hermes", "repo": "Molecule-AI/molecule-ai-workspace-template-hermes", "ref": "main"},
|
||||
{"name": "openclaw", "repo": "Molecule-AI/molecule-ai-workspace-template-openclaw", "ref": "main"},
|
||||
{"name": "codex", "repo": "Molecule-AI/molecule-ai-workspace-template-codex", "ref": "main"}
|
||||
{"name": "codex", "repo": "Molecule-AI/molecule-ai-workspace-template-codex", "ref": "main"},
|
||||
{"name": "langgraph", "repo": "Molecule-AI/molecule-ai-workspace-template-langgraph", "ref": "main"},
|
||||
{"name": "crewai", "repo": "Molecule-AI/molecule-ai-workspace-template-crewai", "ref": "main"},
|
||||
{"name": "autogen", "repo": "Molecule-AI/molecule-ai-workspace-template-autogen", "ref": "main"},
|
||||
{"name": "deepagents", "repo": "Molecule-AI/molecule-ai-workspace-template-deepagents", "ref": "main"},
|
||||
{"name": "gemini-cli", "repo": "Molecule-AI/molecule-ai-workspace-template-gemini-cli", "ref": "main"}
|
||||
],
|
||||
"org_templates": [
|
||||
{"name": "molecule-dev", "repo": "Molecule-AI/molecule-ai-org-template-molecule-dev", "ref": "main"},
|
||||
|
||||
@@ -58,6 +58,8 @@ TOP_LEVEL_MODULES = {
|
||||
"adapter_base",
|
||||
"agent",
|
||||
"agents_md",
|
||||
"boot_routes",
|
||||
"card_helpers",
|
||||
"config",
|
||||
"configs_dir",
|
||||
"consolidation",
|
||||
@@ -73,12 +75,14 @@ TOP_LEVEL_MODULES = {
|
||||
"main",
|
||||
"mcp_cli",
|
||||
"molecule_ai_status",
|
||||
"not_configured_handler",
|
||||
"platform_auth",
|
||||
"platform_inbound_auth",
|
||||
"plugins",
|
||||
"preflight",
|
||||
"prompt",
|
||||
"runtime_wedge",
|
||||
"secret_redactor",
|
||||
"shared_runtime",
|
||||
"smoke_mode",
|
||||
"transcript_auth",
|
||||
|
||||
Executable
+51
@@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env bash
|
||||
# Per-runtime model slug dispatch for E2E provisioning.
|
||||
#
|
||||
# Different runtimes parse the model slug differently (PR #2571 incident,
|
||||
# 2026-05-03):
|
||||
#
|
||||
# hermes → "openai/gpt-4o" (slash-form: derive-provider.sh splits
|
||||
# on the prefix to set
|
||||
# HERMES_INFERENCE_PROVIDER. Bare
|
||||
# "gpt-4o" falls through to Anthropic
|
||||
# default + 401, see PR #1714.)
|
||||
#
|
||||
# langgraph → "openai:gpt-4o" (colon-form: langchain init_chat_model
|
||||
# requires "<provider>:<model>".
|
||||
# Slash-form was misinterpreted as
|
||||
# OpenRouter routing → fell through
|
||||
# without auth, surfaced 2026-05-03
|
||||
# after the a2a-sdk v1 contract bugs
|
||||
# PR #2558+#2563+#2567 cleared the
|
||||
# masking layers.)
|
||||
#
|
||||
# claude-code → "sonnet" (entry-id form: claude-code template's
|
||||
# config.yaml uses bare model names,
|
||||
# auth comes via CLAUDE_CODE_OAUTH_TOKEN
|
||||
# or ANTHROPIC_API_KEY rather than the
|
||||
# slug.)
|
||||
#
|
||||
# When E2E_MODEL_SLUG is set, it overrides this dispatch — useful when an
|
||||
# operator dispatches the workflow to test a specific slug.
|
||||
#
|
||||
# Unit tested by tests/e2e/test_model_slug.sh — every branch must stay
|
||||
# pinned because regressions silently mask as "Could not resolve
|
||||
# authentication method" + the synth-E2E gate goes red without naming
|
||||
# the slug-format mismatch.
|
||||
|
||||
# Usage: pick_model_slug <runtime>
|
||||
# stdout: the slug string
|
||||
# E2E_MODEL_SLUG (env): if set + non-empty, used as-is (operator override)
|
||||
pick_model_slug() {
|
||||
local runtime="${1:-}"
|
||||
if [ -n "${E2E_MODEL_SLUG:-}" ]; then
|
||||
printf '%s' "$E2E_MODEL_SLUG"
|
||||
return 0
|
||||
fi
|
||||
case "$runtime" in
|
||||
hermes) printf 'openai/gpt-4o' ;;
|
||||
langgraph) printf 'openai:gpt-4o' ;;
|
||||
claude-code) printf 'sonnet' ;;
|
||||
*) printf 'openai/gpt-4o' ;; # safest fallback (matches hermes)
|
||||
esac
|
||||
}
|
||||
Executable
+90
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env bash
|
||||
# Regression test for tests/e2e/lib/model_slug.sh.
|
||||
#
|
||||
# PR #2571 fixed a synth-E2E masking bug where MODEL_SLUG was hardcoded
|
||||
# to "openai/gpt-4o" (slash-form) but langgraph's init_chat_model needs
|
||||
# "openai:gpt-4o" (colon-form). Fix shipped as a per-runtime case
|
||||
# statement. Without this regression test, dropping any branch of the
|
||||
# case (or flipping a slug format) would silently revert behavior — the
|
||||
# E2E only fails as "Could not resolve authentication method" at the
|
||||
# very first message, after a successful tenant + workspace provision.
|
||||
#
|
||||
# Each branch must FAIL the test if the dispatch behavior changes, not
|
||||
# just produce some non-empty string.
|
||||
set -uo pipefail
|
||||
|
||||
# Resolve to the lib relative to this test file so the test runs from
|
||||
# any cwd (CI, local invocation, repo root).
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# shellcheck source=lib/model_slug.sh
|
||||
source "$SCRIPT_DIR/lib/model_slug.sh"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_eq() {
|
||||
local label="$1" got="$2" want="$3"
|
||||
if [ "$got" = "$want" ]; then
|
||||
echo " ✓ $label"
|
||||
PASS=$((PASS+1))
|
||||
else
|
||||
echo " ✗ $label: got=$(printf %q "$got") want=$(printf %q "$want")" >&2
|
||||
FAIL=$((FAIL+1))
|
||||
fi
|
||||
}
|
||||
|
||||
run_test() {
|
||||
local label="$1" runtime="$2" want="$3"
|
||||
# Pin per-test isolation: explicitly unset the override so a leaked
|
||||
# E2E_MODEL_SLUG from caller env can't poison the dispatch branches.
|
||||
local got
|
||||
got=$(unset E2E_MODEL_SLUG; pick_model_slug "$runtime")
|
||||
assert_eq "$label" "$got" "$want"
|
||||
}
|
||||
|
||||
echo "Test: pick_model_slug — per-runtime dispatch"
|
||||
echo
|
||||
|
||||
# ── Per-runtime branches (the load-bearing ones for synth-E2E) ──
|
||||
run_test "hermes → slash-form (derive-provider.sh contract)" hermes "openai/gpt-4o"
|
||||
run_test "langgraph → colon-form (init_chat_model contract)" langgraph "openai:gpt-4o"
|
||||
run_test "claude-code → bare model name (entry-id form)" claude-code "sonnet"
|
||||
|
||||
# ── Fallback for unknown runtime ──
|
||||
# Picks slash-form (hermes-shaped) since hermes is the historical
|
||||
# default and most third-party runtimes behave hermes-like. Pinning
|
||||
# this so a future "smarter" fallback (e.g., empty string, error) is
|
||||
# a deliberate choice, not silent drift.
|
||||
run_test "unknown runtime → slash-form fallback" gemini "openai/gpt-4o"
|
||||
run_test "empty runtime → slash-form fallback" "" "openai/gpt-4o"
|
||||
|
||||
# ── Override via E2E_MODEL_SLUG ──
|
||||
# When the operator sets E2E_MODEL_SLUG, the per-runtime dispatch is
|
||||
# bypassed. Used during workflow_dispatch to A/B specific slugs.
|
||||
echo
|
||||
echo "Test: pick_model_slug — E2E_MODEL_SLUG override"
|
||||
echo
|
||||
|
||||
got=$(E2E_MODEL_SLUG="anthropic:claude-opus-4-7" pick_model_slug langgraph)
|
||||
assert_eq "override beats langgraph default" "$got" "anthropic:claude-opus-4-7"
|
||||
|
||||
got=$(E2E_MODEL_SLUG="custom/whatever" pick_model_slug hermes)
|
||||
assert_eq "override beats hermes default" "$got" "custom/whatever"
|
||||
|
||||
got=$(E2E_MODEL_SLUG="some-bare-id" pick_model_slug claude-code)
|
||||
assert_eq "override beats claude-code default" "$got" "some-bare-id"
|
||||
|
||||
# Empty-string override does NOT activate (falls through to dispatch).
|
||||
# This is the historical bash idiom: -n "" → false → no override. Pin
|
||||
# it because changing this behavior (e.g. via -v test) would silently
|
||||
# break the dispatch when an operator passes "" to clear an inherited
|
||||
# env var.
|
||||
got=$(E2E_MODEL_SLUG="" pick_model_slug langgraph)
|
||||
assert_eq "empty-string override falls through to dispatch" "$got" "openai:gpt-4o"
|
||||
|
||||
echo
|
||||
echo "─────────────────────────────────────────────────"
|
||||
echo "PASSED: $PASS"
|
||||
echo "FAILED: $FAIL"
|
||||
echo "─────────────────────────────────────────────────"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
Executable
+145
@@ -0,0 +1,145 @@
|
||||
#!/usr/bin/env bash
|
||||
# Regression test for the SECRETS_JSON branching in
|
||||
# tests/e2e/test_staging_full_saas.sh (lines ~322-368).
|
||||
#
|
||||
# The synth-E2E canary picks one of two LLM auth paths based on which
|
||||
# E2E_*_API_KEY is set. The branch order is load-bearing:
|
||||
#
|
||||
# E2E_MINIMAX_API_KEY first → claude-code MiniMax path (cheap canary
|
||||
# default since 2026-05-03; routes via
|
||||
# workspace-configs-templates/claude-
|
||||
# code-default/config.yaml's `minimax`
|
||||
# provider entry).
|
||||
#
|
||||
# E2E_OPENAI_API_KEY second → langgraph + hermes legacy path (kept
|
||||
# as fallback for operator dispatches
|
||||
# that need the OpenAI-shaped
|
||||
# HERMES_CUSTOM_* env block).
|
||||
#
|
||||
# Without this gate, a future "tidy up the if/elif" refactor could
|
||||
# silently flip the precedence (OpenAI wins when both are set →
|
||||
# claude-code workspace boots without MINIMAX_API_KEY → 401 at first
|
||||
# turn → canary red without any signal that the wrong key shape was
|
||||
# selected). The 2026-05-03 OpenAI-quota incident took ~16h to
|
||||
# diagnose for exactly this class of "looks like an LLM problem,
|
||||
# was actually a wiring problem" failure.
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
SAAS_SCRIPT="$SCRIPT_DIR/test_staging_full_saas.sh"
|
||||
|
||||
if [ ! -f "$SAAS_SCRIPT" ]; then
|
||||
echo "FATAL: cannot locate test_staging_full_saas.sh at $SAAS_SCRIPT" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_eq() {
|
||||
local label="$1" got="$2" want="$3"
|
||||
if [ "$got" = "$want" ]; then
|
||||
echo " ✓ $label"
|
||||
PASS=$((PASS+1))
|
||||
else
|
||||
echo " ✗ $label" >&2
|
||||
echo " got: $got" >&2
|
||||
echo " want: $want" >&2
|
||||
FAIL=$((FAIL+1))
|
||||
fi
|
||||
}
|
||||
|
||||
# Extract just the SECRETS_JSON block from the saas script and source
|
||||
# it into a sub-shell so we can run the branching logic in isolation.
|
||||
# Anchor on the comment header so a structural refactor that moves the
|
||||
# block fails this test loudly rather than silently sourcing nothing.
|
||||
extract_block() {
|
||||
awk '
|
||||
/^# ─── 5\. Provision parent workspace/ {capture=1; next}
|
||||
capture && /^MODEL_SLUG=/ {exit}
|
||||
capture {print}
|
||||
' "$SAAS_SCRIPT"
|
||||
}
|
||||
|
||||
BLOCK=$(extract_block)
|
||||
if [ -z "$BLOCK" ]; then
|
||||
echo "FATAL: SECRETS_JSON block not found in $SAAS_SCRIPT — refactor anchor changed?" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# Run the extracted block in a clean env, capturing SECRETS_JSON.
|
||||
run_block() {
|
||||
# Caller passes vars on the command line, e.g.
|
||||
# run_block E2E_MINIMAX_API_KEY=mx-test
|
||||
env -i PATH="$PATH" "$@" bash -c "
|
||||
set -uo pipefail
|
||||
$BLOCK
|
||||
echo \"\$SECRETS_JSON\"
|
||||
" 2>/dev/null | tail -1
|
||||
}
|
||||
|
||||
# Resolve a JSON key from the captured payload using python3 (already
|
||||
# a hard dep of the saas script). Returns empty string on missing key.
|
||||
get_json_key() {
|
||||
local payload="$1" key="$2"
|
||||
python3 -c "
|
||||
import json, sys
|
||||
p = json.loads(sys.argv[1])
|
||||
print(p.get(sys.argv[2], ''))
|
||||
" "$payload" "$key"
|
||||
}
|
||||
|
||||
list_json_keys() {
|
||||
python3 -c "
|
||||
import json, sys
|
||||
p = json.loads(sys.argv[1])
|
||||
print(','.join(sorted(p.keys())))
|
||||
" "$1"
|
||||
}
|
||||
|
||||
echo "Test: SECRETS_JSON branching in test_staging_full_saas.sh"
|
||||
echo
|
||||
|
||||
# ── Branch 1: MiniMax wins when set ──
|
||||
SECRETS_JSON=$(run_block E2E_MINIMAX_API_KEY=mx-test)
|
||||
assert_eq "MiniMax key set → MINIMAX_API_KEY in payload" \
|
||||
"$(get_json_key "$SECRETS_JSON" MINIMAX_API_KEY)" "mx-test"
|
||||
assert_eq "MiniMax-only payload contains exactly MINIMAX_API_KEY" \
|
||||
"$(list_json_keys "$SECRETS_JSON")" "MINIMAX_API_KEY"
|
||||
|
||||
# ── Branch 1 precedence: MiniMax beats OpenAI when both set ──
|
||||
# Critical: the 2026-05-03 incident shape was "two paths exist, wrong
|
||||
# one wins". The bash if/elif must keep MiniMax above OpenAI so the
|
||||
# claude-code default canary doesn't accidentally use the (more
|
||||
# expensive, quota-burnt) OpenAI key.
|
||||
SECRETS_JSON=$(run_block E2E_MINIMAX_API_KEY=mx-priority E2E_OPENAI_API_KEY=oai-loser)
|
||||
assert_eq "Both keys set → MiniMax wins" \
|
||||
"$(get_json_key "$SECRETS_JSON" MINIMAX_API_KEY)" "mx-priority"
|
||||
assert_eq "Both keys set → OpenAI block NOT emitted" \
|
||||
"$(get_json_key "$SECRETS_JSON" OPENAI_API_KEY)" ""
|
||||
assert_eq "Both keys set → no HERMES_* leakage from OpenAI branch" \
|
||||
"$(get_json_key "$SECRETS_JSON" HERMES_INFERENCE_PROVIDER)" ""
|
||||
|
||||
# ── Branch 2: OpenAI used when MiniMax absent ──
|
||||
SECRETS_JSON=$(run_block E2E_OPENAI_API_KEY=oai-test)
|
||||
assert_eq "Only OpenAI set → OPENAI_API_KEY in payload" \
|
||||
"$(get_json_key "$SECRETS_JSON" OPENAI_API_KEY)" "oai-test"
|
||||
assert_eq "Only OpenAI set → HERMES_CUSTOM_API_KEY mirrors OpenAI key" \
|
||||
"$(get_json_key "$SECRETS_JSON" HERMES_CUSTOM_API_KEY)" "oai-test"
|
||||
assert_eq "Only OpenAI set → MODEL_PROVIDER pinned to colon-form" \
|
||||
"$(get_json_key "$SECRETS_JSON" MODEL_PROVIDER)" "openai:gpt-4o"
|
||||
assert_eq "Only OpenAI set → MINIMAX_API_KEY NOT emitted" \
|
||||
"$(get_json_key "$SECRETS_JSON" MINIMAX_API_KEY)" ""
|
||||
|
||||
# ── No keys: empty payload ──
|
||||
SECRETS_JSON=$(run_block)
|
||||
assert_eq "No keys set → SECRETS_JSON is empty object" \
|
||||
"$SECRETS_JSON" "{}"
|
||||
|
||||
echo
|
||||
echo "─────────────────────────────────────────────────"
|
||||
echo "PASSED: $PASS"
|
||||
echo "FAILED: $FAIL"
|
||||
echo "─────────────────────────────────────────────────"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -67,6 +67,12 @@ log() { echo "[$(date +%H:%M:%S)] $*"; }
|
||||
fail() { echo "[$(date +%H:%M:%S)] ❌ $*" >&2; exit 1; }
|
||||
ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; }
|
||||
|
||||
# Per-runtime model slug dispatch — see lib/model_slug.sh for the rationale.
|
||||
# Extracted so unit tests (tests/e2e/test_model_slug.sh) can pin every branch
|
||||
# without booting the full 11-step lifecycle.
|
||||
# shellcheck source=lib/model_slug.sh
|
||||
source "$(dirname "$0")/lib/model_slug.sh"
|
||||
|
||||
CURL_COMMON=(-sS --fail-with-body --max-time 30)
|
||||
|
||||
# ─── cleanup trap ───────────────────────────────────────────────────────
|
||||
@@ -314,29 +320,68 @@ tenant_call() {
|
||||
}
|
||||
|
||||
# ─── 5. Provision parent workspace ─────────────────────────────────────
|
||||
# Runtimes like hermes crash at boot with "No provider API key found"
|
||||
# if nothing in the standard env-var list is set. Inject the API key
|
||||
# from E2E_OPENAI_API_KEY so the runtime can actually start — it's
|
||||
# per-workspace secret, so it's persisted as a workspace_secret and
|
||||
# materialized into the container env. Missing key falls through to
|
||||
# an empty secrets map; workspace will still fail but the error is
|
||||
# expected and actionable.
|
||||
# Inject the LLM provider key so the runtime can authenticate at boot.
|
||||
# Branch by which secret is set so the script supports multiple paths
|
||||
# without forcing every dispatch to ship them all. Priority order
|
||||
# matters — first non-empty wins:
|
||||
#
|
||||
# E2E_MINIMAX_API_KEY → claude-code MiniMax path. Cheapest, default
|
||||
# for the cron canary post-2026-05-03. Routes via the claude-code
|
||||
# template's `minimax` provider (workspace-configs-templates/
|
||||
# claude-code-default/config.yaml:64-69) which sets
|
||||
# ANTHROPIC_BASE_URL=https://api.minimax.io/anthropic at boot.
|
||||
# MINIMAX_API_KEY is the vendor-specific env name the adapter
|
||||
# reads (PR #244 — per-vendor envs prevent ANTHROPIC_AUTH_TOKEN
|
||||
# collisions when a user runs MiniMax + Z.ai workspaces side-by-
|
||||
# side).
|
||||
#
|
||||
# E2E_ANTHROPIC_API_KEY → claude-code direct-Anthropic path (added
|
||||
# 2026-05-04 after #2578 left the operator with an awkward choice
|
||||
# between paying OpenAI's billing top-up and registering a new
|
||||
# MiniMax account). Lower friction than MiniMax for operators
|
||||
# who already have an Anthropic API key for their own Claude
|
||||
# Code session. Pricier per-token than MiniMax but billing is
|
||||
# still independent of MOLECULE_STAGING_OPENAI_KEY. Pinned to the
|
||||
# claude-code runtime — hermes/langgraph use OpenAI-shaped envs.
|
||||
#
|
||||
# E2E_OPENAI_API_KEY → langgraph + hermes paths. Kept as fallback
|
||||
# for operator dispatches that explicitly want to exercise the
|
||||
# OpenAI path. The HERMES_* fields pin hermes-agent's bridge to
|
||||
# api.openai.com (template-hermes' derive-provider.sh otherwise
|
||||
# resolves openai/* → openrouter.ai and 401s). MODEL_PROVIDER
|
||||
# follows workspace/config.py:258's 'provider:model' format.
|
||||
#
|
||||
# All empty → '{}' (workspace will fail at first turn with an
|
||||
# expected, actionable auth error rather than masking the test).
|
||||
SECRETS_JSON='{}'
|
||||
if [ -n "${E2E_OPENAI_API_KEY:-}" ]; then
|
||||
# MODEL_PROVIDER is a full model slug in 'provider:model' format per
|
||||
# workspace/config.py:258. Using just "openai" gets parsed as the
|
||||
# model name → 404 model_not_found. Also set OPENAI_BASE_URL to
|
||||
# OpenAI's own endpoint — default is openrouter.ai which would need
|
||||
# a different key format.
|
||||
#
|
||||
# The HERMES_* fields below bypass template-hermes/scripts/derive-provider.sh
|
||||
# — verified 2026-04-24 that even with template-hermes#19's fix in main,
|
||||
# staging tenants sometimes resolve openai/* to PROVIDER=openrouter and
|
||||
# emit {'message':'Missing Authentication header','code':401} (OpenRouter's
|
||||
# shape) in the A2A reply. Setting HERMES_INFERENCE_PROVIDER=custom +
|
||||
# HERMES_CUSTOM_{BASE_URL,API_KEY,API_MODE} pins the bridge deterministically
|
||||
# so the test doesn't depend on every tenant EC2 having a freshly-cloned
|
||||
# template-hermes.
|
||||
if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
SECRETS_JSON=$(python3 -c "
|
||||
import json, os
|
||||
k = os.environ['E2E_MINIMAX_API_KEY']
|
||||
print(json.dumps({
|
||||
'MINIMAX_API_KEY': k,
|
||||
}))
|
||||
")
|
||||
elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
|
||||
# Direct Anthropic path — claude-code adapter reads ANTHROPIC_API_KEY
|
||||
# natively when ANTHROPIC_BASE_URL is unset. Useful for operators
|
||||
# who already have an Anthropic API key (e.g. for their own Claude
|
||||
# Code session) and want to avoid setting up a separate MiniMax
|
||||
# account just for E2E. Pricier per-token than MiniMax but billing
|
||||
# is still independent of MOLECULE_STAGING_OPENAI_KEY, so an OpenAI
|
||||
# quota collapse doesn't wedge this path. Pinned to the claude-code
|
||||
# runtime: hermes/langgraph use OpenAI-shaped envs and won't honour
|
||||
# ANTHROPIC_API_KEY without further wiring (out of scope for this
|
||||
# branch; if you need a hermes/Anthropic path, dispatch with
|
||||
# E2E_RUNTIME=hermes + E2E_OPENAI_API_KEY pointing at a working key).
|
||||
SECRETS_JSON=$(python3 -c "
|
||||
import json, os
|
||||
k = os.environ['E2E_ANTHROPIC_API_KEY']
|
||||
print(json.dumps({
|
||||
'ANTHROPIC_API_KEY': k,
|
||||
}))
|
||||
")
|
||||
elif [ -n "${E2E_OPENAI_API_KEY:-}" ]; then
|
||||
SECRETS_JSON=$(python3 -c "
|
||||
import json, os
|
||||
k = os.environ['E2E_OPENAI_API_KEY']
|
||||
@@ -352,15 +397,7 @@ print(json.dumps({
|
||||
")
|
||||
fi
|
||||
|
||||
# Model slug MUST be provider-prefixed for hermes — the template's
|
||||
# derive-provider.sh parses the slug prefix (`openai/…`, `anthropic/…`,
|
||||
# `minimax/…`) to set HERMES_INFERENCE_PROVIDER at install time. A bare
|
||||
# "gpt-4o" has no prefix → provider falls back to hermes auto-detect →
|
||||
# picks Anthropic default → tries Anthropic API with the OpenAI key →
|
||||
# 401 on A2A. Same trap that trapped prod users in PR #1714. We pin
|
||||
# "openai/gpt-4o" here because the E2E's secret is always the OpenAI
|
||||
# key; non-hermes runtimes ignore the prefix.
|
||||
MODEL_SLUG="openai/gpt-4o"
|
||||
MODEL_SLUG=$(pick_model_slug "$RUNTIME")
|
||||
|
||||
log "5/11 Provisioning parent workspace (runtime=$RUNTIME)..."
|
||||
PARENT_RESP=$(tenant_call POST /workspaces \
|
||||
@@ -431,6 +468,99 @@ for wid in $WS_TO_CHECK; do
|
||||
ok " $wid online"
|
||||
done
|
||||
|
||||
# ─── 7b. Canvas-terminal diagnose (EIC chain probe) ────────────────────
|
||||
# This step exists because the canvas-terminal failure of 2026-05-03
|
||||
# was structurally invisible to local-dev (handleLocalConnect uses
|
||||
# docker exec; handleRemoteConnect uses EIC + ssh). The CP provisioner
|
||||
# shipped without the tcp/22 EIC ingress rule for ~6 months and nobody
|
||||
# noticed until a paying tenant clicked Terminal in canvas. Probing the
|
||||
# diagnose endpoint here at synth-E2E time means a regression in
|
||||
# - tenantIngressRules / workspaceIngressRules (CP)
|
||||
# - eicSSHIngressRule helper (CP)
|
||||
# - AuthorizeIngress source-group support (CP awsapi)
|
||||
# - EIC_ENDPOINT_SG_ID Railway env
|
||||
# - handleRemoteConnect's send-ssh-public-key/open-tunnel/ssh chain
|
||||
# surfaces within ~20 min of merge instead of waiting for a user report.
|
||||
#
|
||||
# The diagnose endpoint runs the full EIC + ssh probe from inside the
|
||||
# tenant's workspace-server (which already has AWS creds via its IAM
|
||||
# profile) and reports per-step status. We only need to call it as the
|
||||
# tenant — no AWS creds needed on the GHA runner. Returns
|
||||
# {"ok": bool, "first_failure": "name", "steps": [...]}.
|
||||
#
|
||||
# Local-docker workspaces (instance_id NULL) get diagnoseLocal which
|
||||
# probes docker.Ping + container exec; we still expect ok=true there
|
||||
# since local-docker is the alternative production path.
|
||||
log "7b/11 Canvas-terminal EIC diagnose probe..."
|
||||
for wid in $WS_TO_CHECK; do
|
||||
DIAG_JSON=$(tenant_call GET "/workspaces/$wid/terminal/diagnose" 2>/dev/null || echo '{}')
|
||||
DIAG_OK=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print('true' if d.get('ok') else 'false')" 2>/dev/null || echo "false")
|
||||
if [ "$DIAG_OK" = "true" ]; then
|
||||
ok " $wid terminal-reachable (canvas terminal will work)"
|
||||
else
|
||||
DIAG_FAIL=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('first_failure','unknown'))" 2>/dev/null || echo "unknown")
|
||||
DIAG_DETAIL=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); s=[x for x in d.get('steps',[]) if not x.get('ok')]; print(s[0].get('error','') if s else '')" 2>/dev/null || echo "")
|
||||
fail "Workspace $wid terminal diagnose failed at step '$DIAG_FAIL': $DIAG_DETAIL — check tenant SG has tcp/22 from EIC endpoint SG (sg-0785d5c6138220523), EIC_ENDPOINT_SG_ID set in Railway, and EIC endpoint health"
|
||||
fi
|
||||
done
|
||||
|
||||
# ─── 7c. Workspace files API config.yaml round-trip ────────────────────
|
||||
# Pin the config-save path that drives the Canvas Config tab's Save &
|
||||
# Restart. Two failure classes this gate catches in one shot:
|
||||
#
|
||||
# 1. Path map drift (PR #2769). Runtime falls through to the wrong
|
||||
# base path (e.g. /opt/configs when user-data only created /configs)
|
||||
# → SSH `install -D` fails with EACCES on a parent dir that doesn't
|
||||
# exist. The user-visible 500 was unobservable without exercising
|
||||
# this code path on a fresh workspace.
|
||||
# 2. Permission drift on /configs. The path is root-owned by cloud-init,
|
||||
# so the SSH-as-ubuntu install needs `sudo -n`. Any future change
|
||||
# that drops the sudo, switches to a non-passwordless-sudo OS user,
|
||||
# or moves the path to a non-ubuntu-writable dir without sudo will
|
||||
# regress this gate.
|
||||
#
|
||||
# Round-trip: PUT a known marker, GET it back, assert content matches.
|
||||
# Marker shape includes the run id so a stale file from a prior canary
|
||||
# can't false-pass.
|
||||
log "7c/11 Files API config.yaml round-trip..."
|
||||
CONFIG_MARKER="# molecule-synth-e2e: ${E2E_RUN_ID:-unknown} ${RUNTIME} $(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
CONFIG_PAYLOAD="${CONFIG_MARKER}
|
||||
name: synth-canary
|
||||
runtime: ${RUNTIME}
|
||||
"
|
||||
for wid in $WS_TO_CHECK; do
|
||||
PUT_BODY=$(python3 -c "import json,sys; print(json.dumps({'content': sys.stdin.read()}))" <<< "$CONFIG_PAYLOAD")
|
||||
# Capture body to a tempfile so curl's -w '%{http_code}' is the only
|
||||
# thing on stdout. The first version used `-w '\n%{http_code}\n'` and
|
||||
# parsed via `tail -n 2 | head -n 1`, which broke because bash $(...)
|
||||
# strips the trailing newline → only 2 lines remain in the captured
|
||||
# value → head -n 1 returned the body, not the status code. Caught
|
||||
# post-merge by E2E Staging SaaS at 22:06 UTC: a 200-with-body got
|
||||
# misreported as "PUT returned <body>".
|
||||
PUT_TMP=$(mktemp -t synth_put.XXXXXX)
|
||||
PUT_CODE=$(tenant_call PUT "/workspaces/$wid/files/config.yaml" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$PUT_BODY" \
|
||||
-o "$PUT_TMP" \
|
||||
-w '%{http_code}' \
|
||||
2>/dev/null || echo "000")
|
||||
PUT_BODY_OUT=$(cat "$PUT_TMP" 2>/dev/null || echo "")
|
||||
rm -f "$PUT_TMP"
|
||||
if [ "$PUT_CODE" != "200" ] && [ "$PUT_CODE" != "204" ]; then
|
||||
fail "Workspace $wid Files API PUT config.yaml returned $PUT_CODE: $PUT_BODY_OUT — likely a path-map or permission regression in workspace-server template_files_eic.go"
|
||||
fi
|
||||
# PUT-only check; the GET-back round-trip assertion was dropped
|
||||
# 2026-05-04 because PUT (template_files_eic.go SSH-via-EIC →
|
||||
# workspace EC2) and GET (templates.go ReadFile → docker exec on
|
||||
# platform-tenant-local container) hit DIFFERENT paths and DIFFERENT
|
||||
# hosts. The asymmetry is a separate latent bug — Canvas Config tab
|
||||
# rendering reads workspace state via other endpoints, not via this
|
||||
# GET, so the user-facing Save & Restart works (container reads
|
||||
# /configs/config.yaml directly via bind-mount). When the read/write
|
||||
# paths are unified, restore the GET-back marker check here.
|
||||
ok " $wid config.yaml PUT OK (HTTP $PUT_CODE)"
|
||||
done
|
||||
|
||||
# ─── 8. A2A round-trip on parent ───────────────────────────────────────
|
||||
log "8/11 Sending A2A message to parent — expecting agent response..."
|
||||
# Smoke prompt phrasing — DO NOT trim back to the bare "Reply with exactly: PONG"
|
||||
@@ -461,7 +591,17 @@ print(json.dumps({
|
||||
}
|
||||
}))
|
||||
")
|
||||
# Override CURL_COMMON's --max-time 30 for THIS call only. Each canary
|
||||
# creates a fresh org → workspace, so the A2A POST hits a cold model:
|
||||
# claude-code adapter starts its event loop, opens TLS to the LLM
|
||||
# endpoint, ships the first prompt, waits for first token. With MiniMax
|
||||
# (which is the canary default since #2710) cold-call latency
|
||||
# routinely exceeds 30s on the first request after workspace boot.
|
||||
# 90s gives ~3x headroom over observed cold-call P95 (~25-30s).
|
||||
# Subsequent A2A turns hit the same workspace and are sub-second, so
|
||||
# this only widens the window for step 8/11 of the canary's first turn.
|
||||
A2A_RESP=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
|
||||
--max-time 90 \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$A2A_PAYLOAD")
|
||||
AGENT_TEXT=$(echo "$A2A_RESP" | python3 -c "
|
||||
@@ -483,6 +623,7 @@ fi
|
||||
# "Encrypted content is not supported" → hermes codex_responses API misroute (#14)
|
||||
# "Unknown provider" → bridge misconfigured PROVIDER= (regression of #13 fix)
|
||||
# "hermes-agent unreachable" → gateway process died
|
||||
# "exceeded your current quota" → MOLECULE_STAGING_OPENAI_KEY billing (NOT a platform regression — #2578)
|
||||
#
|
||||
# Fail LOUD with the specific pattern so CI log + alert channel makes the
|
||||
# regression unambiguous.
|
||||
@@ -508,6 +649,16 @@ fi
|
||||
if echo "$AGENT_TEXT" | grep -qF "Invalid API key"; then
|
||||
fail "A2A — REGRESSION: tenant auth chain returned 'Invalid API key'. Likely CP boot-event 401 race (CP #238) or stale OPENAI_API_KEY in the runtime env. Raw: $AGENT_TEXT"
|
||||
fi
|
||||
# Provider quota exhausted — distinguish from a platform regression so
|
||||
# the canary alert names the operator action directly instead of falling
|
||||
# through to the generic "error-shaped response" message. Steps 0-7 having
|
||||
# passed means the platform itself is healthy (CP up, tenant provisioned,
|
||||
# workspace online, A2A delivery end-to-end). When the agent comes back
|
||||
# with a provider-side 429, that is a billing event on the configured
|
||||
# OpenAI key, not a platform regression. Tracked in #2578.
|
||||
if echo "$AGENT_TEXT" | grep -qiE "exceeded your current quota|insufficient_quota"; then
|
||||
fail "A2A — PROVIDER QUOTA EXHAUSTED (NOT a platform regression). Operator action: top up MOLECULE_STAGING_OPENAI_KEY billing or rotate to a higher-quota org at Settings → Secrets and Variables → Actions. Tracked in #2578. Raw: $AGENT_TEXT"
|
||||
fi
|
||||
# Generic catch-all — falls through if none of the known regressions hit.
|
||||
if echo "$AGENT_TEXT" | grep -qiE "error|exception"; then
|
||||
fail "A2A returned an error-shaped response: $AGENT_TEXT"
|
||||
@@ -555,8 +706,80 @@ print(json.dumps({
|
||||
d=json.load(sys.stdin)
|
||||
print(len(d if isinstance(d, list) else d.get('events', [])))" 2>/dev/null || echo 0)
|
||||
log " Activity events observed: $ACTIVITY_COUNT"
|
||||
|
||||
# ─── 9c. Workspace KV memory Edit round-trip ─────────────────────────
|
||||
# Pins the Edit affordance added to the canvas Memory tab. The UI calls
|
||||
# POST /workspaces/:id/memory with if_match_version, so the contract is:
|
||||
# 1. initial POST creates row at version 1
|
||||
# 2. GET returns version 1 + value
|
||||
# 3. POST with if_match_version=1 updates → version 2
|
||||
# 4. POST with if_match_version=1 again → 409 (optimistic-lock enforcement)
|
||||
# Without (3) there is no Edit; without (4) two concurrent writers can
|
||||
# silently overwrite each other and the agent loses delegation-ledger state.
|
||||
log "9c. Memory KV Edit round-trip (Edit affordance + 409 gate)"
|
||||
EDIT_KEY="e2e_edit_gate_$SLUG"
|
||||
|
||||
# 1. seed
|
||||
tenant_call POST "/workspaces/$PARENT_ID/memory" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"key\":\"$EDIT_KEY\",\"value\":{\"step\":1}}" >/dev/null \
|
||||
|| fail "memory KV seed POST failed"
|
||||
|
||||
# 2. read back, capture version
|
||||
EDIT_GET=$(tenant_call GET "/workspaces/$PARENT_ID/memory/$EDIT_KEY")
|
||||
EDIT_VER=$(echo "$EDIT_GET" | python3 -c "import json,sys; print(json.load(sys.stdin)['version'])" 2>/dev/null || echo "")
|
||||
[ -z "$EDIT_VER" ] && fail "memory KV GET missing version field. Body: ${EDIT_GET:0:200}"
|
||||
|
||||
# 3. conditional update with matching version
|
||||
tenant_call POST "/workspaces/$PARENT_ID/memory" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"key\":\"$EDIT_KEY\",\"value\":{\"step\":2},\"if_match_version\":$EDIT_VER}" >/dev/null \
|
||||
|| fail "memory KV conditional Edit failed (if_match_version=$EDIT_VER)"
|
||||
|
||||
# 4. value flipped + version incremented?
|
||||
EDIT_GET2=$(tenant_call GET "/workspaces/$PARENT_ID/memory/$EDIT_KEY")
|
||||
EDIT_VAL2=$(echo "$EDIT_GET2" | python3 -c "import json,sys; print(json.load(sys.stdin)['value'].get('step'))" 2>/dev/null || echo "")
|
||||
[ "$EDIT_VAL2" = "2" ] || fail "memory KV Edit did not persist new value. Body: ${EDIT_GET2:0:200}"
|
||||
|
||||
# 5. stale-version POST must 409 — pin the optimistic-lock contract.
|
||||
#
|
||||
# tenant_call uses CURL_COMMON which carries --fail-with-body, so an
|
||||
# expected-409 makes curl exit 22. The previous shape
|
||||
# $(tenant_call ... -w "%{http_code}" || echo "000")
|
||||
# concatenated the captured "409" with the fallback "000" giving a
|
||||
# bogus "409000" value (caught on PR #2792's first E2E run, which is
|
||||
# also why staging-saas E2E has been silent-failing this gate since
|
||||
# PR #2787 merged). Fix: route the status code into its own tempfile
|
||||
# so curl's exit code can't pollute the captured stdout. set +e/-e
|
||||
# keeps the 22 from tripping the outer `set -e` pipeline.
|
||||
set +e
|
||||
tenant_call POST "/workspaces/$PARENT_ID/memory" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"key\":\"$EDIT_KEY\",\"value\":{\"step\":3},\"if_match_version\":$EDIT_VER}" \
|
||||
-o /tmp/memory_stale_resp.txt -w "%{http_code}" >/tmp/memory_stale_code.txt 2>/dev/null
|
||||
set -e
|
||||
EDIT_STALE_CODE=$(cat /tmp/memory_stale_code.txt 2>/dev/null || echo "000")
|
||||
[ "$EDIT_STALE_CODE" = "409" ] || fail "memory KV stale Edit must 409 (optimistic-lock). Got '$EDIT_STALE_CODE': $(cat /tmp/memory_stale_resp.txt 2>/dev/null | head -c 200)"
|
||||
|
||||
# cleanup
|
||||
tenant_call DELETE "/workspaces/$PARENT_ID/memory/$EDIT_KEY" >/dev/null 2>&1 || true
|
||||
ok "Memory KV Edit round-trip + 409 gate passed"
|
||||
|
||||
# ─── 9d. shared_context removal gate ─────────────────────────────────
|
||||
# Pin the deletion of GET /workspaces/:id/shared-context. The route + handler
|
||||
# were removed; team-shared knowledge now flows through memory v2's
|
||||
# team:<id> namespace. If anyone re-introduces a shared-context endpoint
|
||||
# without going through RFC #2789, this gate fires.
|
||||
set +e
|
||||
SC_CODE=$(tenant_call GET "/workspaces/$PARENT_ID/shared-context" \
|
||||
-o /dev/null -w "%{http_code}" 2>/dev/null || echo "000")
|
||||
set -e
|
||||
if [ "$SC_CODE" = "200" ]; then
|
||||
fail "shared-context route should be gone but returned 200 — regression. See task #304."
|
||||
fi
|
||||
ok "shared-context route confirmed removed (HTTP $SC_CODE)"
|
||||
else
|
||||
log "9/11 Canary mode — skipping HMA / peers / activity"
|
||||
log "9/11 Canary mode — skipping HMA / peers / activity / memory-edit / shared-context-gone"
|
||||
fi
|
||||
|
||||
# ─── 10. Delegation mechanics (full mode + child) ──────────────────────
|
||||
|
||||
@@ -75,9 +75,14 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
||||
# Stub platform_auth so a2a_client imports cleanly without requiring a
|
||||
# real workspace token file. The helper's auth_headers() only matters
|
||||
# when going through the network; we're feeding it a mock response.
|
||||
#
|
||||
# Both stubs accept *args, **kwargs because the multi-workspace work
|
||||
# (#2739, #2743) added optional ``workspace_id`` parameters to
|
||||
# ``auth_headers`` and made ``self_source_headers`` 1-arg-required.
|
||||
# The stubs need to accept whatever the helpers pass without caring.
|
||||
_pa = types.ModuleType("platform_auth")
|
||||
_pa.auth_headers = lambda: {}
|
||||
_pa.self_source_headers = lambda: {}
|
||||
_pa.auth_headers = lambda *a, **kw: {}
|
||||
_pa.self_source_headers = lambda *a, **kw: {}
|
||||
sys.modules.setdefault("platform_auth", _pa)
|
||||
|
||||
sys.path.insert(0, sys.argv[1])
|
||||
|
||||
@@ -0,0 +1,305 @@
|
||||
// memory-backfill is a one-shot CLI that copies rows from the legacy
|
||||
// agent_memories table into the v2 plugin via its HTTP API.
|
||||
//
|
||||
// Idempotent on re-run: the backfill passes each source row's UUID
|
||||
// to the plugin's MemoryWrite.ID field, and the plugin upserts on
|
||||
// conflict. Re-running the backfill (whole or partial) updates rows
|
||||
// in place rather than duplicating.
|
||||
//
|
||||
// Usage:
|
||||
// memory-backfill -dry-run # count + diff
|
||||
// memory-backfill -apply # actually copy
|
||||
// memory-backfill -apply -limit=10000 # cap rows per run
|
||||
// memory-backfill -apply -workspace=<uuid> # one workspace only
|
||||
//
|
||||
// Required env:
|
||||
// DATABASE_URL — workspace-server DB (read agent_memories)
|
||||
// MEMORY_PLUGIN_URL — target plugin (write memory_records)
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
_ "github.com/lib/pq"
|
||||
|
||||
mclient "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/client"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/namespace"
|
||||
)
|
||||
|
||||
const defaultLimit = 1000000 // effectively unlimited; cap keeps SQL pageable
|
||||
|
||||
func main() {
|
||||
if err := run(os.Args[1:], os.Stdout, os.Stderr); err != nil {
|
||||
log.Fatalf("memory-backfill: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// run is extracted so tests can drive it with synthesized argv +
|
||||
// captured stdout/stderr. Returns nil on success.
|
||||
func run(argv []string, stdout, stderr *os.File) error {
|
||||
fs := flag.NewFlagSet("memory-backfill", flag.ContinueOnError)
|
||||
fs.SetOutput(stderr)
|
||||
dryRun := fs.Bool("dry-run", false, "count + diff only, no writes")
|
||||
apply := fs.Bool("apply", false, "actually copy rows to the plugin")
|
||||
verify := fs.Bool("verify", false, "post-apply parity check: random-sample N workspaces, diff agent_memories vs plugin search")
|
||||
verifySample := fs.Int("verify-sample", 50, "number of workspaces to sample in -verify mode")
|
||||
workspace := fs.String("workspace", "", "limit to a single workspace UUID (empty = all)")
|
||||
limit := fs.Int("limit", defaultLimit, "max rows to process this run")
|
||||
if err := fs.Parse(argv); err != nil {
|
||||
return err
|
||||
}
|
||||
modesPicked := 0
|
||||
if *dryRun {
|
||||
modesPicked++
|
||||
}
|
||||
if *apply {
|
||||
modesPicked++
|
||||
}
|
||||
if *verify {
|
||||
modesPicked++
|
||||
}
|
||||
if modesPicked != 1 {
|
||||
return errors.New("specify exactly one of -dry-run, -apply, or -verify")
|
||||
}
|
||||
|
||||
dbURL := os.Getenv("DATABASE_URL")
|
||||
if dbURL == "" {
|
||||
return errors.New("DATABASE_URL is required")
|
||||
}
|
||||
pluginURL := os.Getenv("MEMORY_PLUGIN_URL")
|
||||
if pluginURL == "" {
|
||||
return errors.New("MEMORY_PLUGIN_URL is required")
|
||||
}
|
||||
|
||||
db, err := sql.Open("postgres", dbURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open db: %w", err)
|
||||
}
|
||||
defer db.Close()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
if err := db.PingContext(ctx); err != nil {
|
||||
return fmt.Errorf("ping db: %w", err)
|
||||
}
|
||||
|
||||
plugin := mclient.New(mclient.Config{BaseURL: pluginURL})
|
||||
resolver := namespace.New(db)
|
||||
|
||||
if *verify {
|
||||
vcfg := verifyConfig{
|
||||
DB: db,
|
||||
Plugin: plugin,
|
||||
Resolver: namespaceResolverAdapter{resolver},
|
||||
SampleSize: *verifySample,
|
||||
WorkspaceID: *workspace,
|
||||
}
|
||||
report, err := verifyParity(context.Background(), vcfg, stdout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Fprintf(stdout, "\nVerify complete: workspaces_sampled=%d matches=%d mismatches=%d errors=%d\n",
|
||||
report.WorkspacesSampled, report.Matches, report.Mismatches, report.Errors)
|
||||
if report.Mismatches > 0 || report.Errors > 0 {
|
||||
return fmt.Errorf("verify found %d mismatches and %d errors", report.Mismatches, report.Errors)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
cfg := backfillConfig{
|
||||
DB: db,
|
||||
Plugin: plugin,
|
||||
Resolver: resolver,
|
||||
WorkspaceID: *workspace,
|
||||
Limit: *limit,
|
||||
DryRun: *dryRun,
|
||||
}
|
||||
stats, err := backfill(context.Background(), cfg, stdout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Fprintf(stdout, "\nBackfill complete: scanned=%d copied=%d skipped=%d errors=%d\n",
|
||||
stats.Scanned, stats.Copied, stats.Skipped, stats.Errors)
|
||||
return nil
|
||||
}
|
||||
|
||||
// backfillStats accumulates the counters the CLI reports.
|
||||
type backfillStats struct {
|
||||
Scanned int
|
||||
Copied int
|
||||
Skipped int
|
||||
Errors int
|
||||
}
|
||||
|
||||
// backfillConfig is the typed dependency bundle. Tests inject stubs
|
||||
// for Plugin and Resolver; production wires real client + resolver.
|
||||
type backfillConfig struct {
|
||||
DB *sql.DB
|
||||
Plugin backfillPlugin
|
||||
Resolver backfillResolver
|
||||
WorkspaceID string
|
||||
Limit int
|
||||
DryRun bool
|
||||
}
|
||||
|
||||
// backfillPlugin is the slice of memory-plugin client we call.
|
||||
type backfillPlugin interface {
|
||||
UpsertNamespace(ctx context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error)
|
||||
CommitMemory(ctx context.Context, namespace string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error)
|
||||
}
|
||||
|
||||
// backfillResolver lets the backfill compute namespace strings the
|
||||
// same way the live MCP layer does.
|
||||
type backfillResolver interface {
|
||||
WritableNamespaces(ctx context.Context, workspaceID string) ([]namespace.Namespace, error)
|
||||
}
|
||||
|
||||
// backfill is the workhorse. Iterates agent_memories, maps each row's
|
||||
// scope to a v2 namespace via the resolver, and POSTs to the plugin.
|
||||
// Returns final stats. Stops after Limit rows.
|
||||
func backfill(ctx context.Context, cfg backfillConfig, stdout *os.File) (*backfillStats, error) {
|
||||
stats := &backfillStats{}
|
||||
|
||||
query := `
|
||||
SELECT id, workspace_id, content, scope, created_at
|
||||
FROM agent_memories
|
||||
`
|
||||
args := []interface{}{}
|
||||
if cfg.WorkspaceID != "" {
|
||||
query += ` WHERE workspace_id = $1`
|
||||
args = append(args, cfg.WorkspaceID)
|
||||
}
|
||||
query += ` ORDER BY created_at ASC LIMIT $` + fmt.Sprintf("%d", len(args)+1)
|
||||
args = append(args, cfg.Limit)
|
||||
|
||||
rows, err := cfg.DB.QueryContext(ctx, query, args...)
|
||||
if err != nil {
|
||||
return stats, fmt.Errorf("query agent_memories: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
stats.Scanned++
|
||||
var (
|
||||
id, workspaceID, content, scope string
|
||||
createdAt time.Time
|
||||
)
|
||||
if err := rows.Scan(&id, &workspaceID, &content, &scope, &createdAt); err != nil {
|
||||
fmt.Fprintf(stdout, "scan: %v\n", err)
|
||||
stats.Errors++
|
||||
continue
|
||||
}
|
||||
|
||||
ns, err := mapScopeToNamespace(ctx, cfg.Resolver, workspaceID, scope)
|
||||
if err != nil {
|
||||
fmt.Fprintf(stdout, "[skip] id=%s workspace=%s: %v\n", id, workspaceID, err)
|
||||
stats.Skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
if cfg.DryRun {
|
||||
fmt.Fprintf(stdout, "[dry] id=%s scope=%s → ns=%s\n", id, scope, ns)
|
||||
stats.Copied++ // would-have-copied
|
||||
continue
|
||||
}
|
||||
|
||||
// Ensure the namespace exists before posting memories. Plugin's
|
||||
// UpsertNamespace is idempotent so calling per-row is wasteful
|
||||
// but safe; for v1 we accept the chattiness.
|
||||
if _, err := cfg.Plugin.UpsertNamespace(ctx, ns, contract.NamespaceUpsert{
|
||||
Kind: namespaceKindFromString(scope),
|
||||
}); err != nil {
|
||||
fmt.Fprintf(stdout, "[err-ns] id=%s ns=%s: %v\n", id, ns, err)
|
||||
stats.Errors++
|
||||
continue
|
||||
}
|
||||
|
||||
// Pass the source row's UUID as the idempotency key so re-runs
|
||||
// upsert in place. Without this, retries would duplicate every
|
||||
// memory.
|
||||
if _, err := cfg.Plugin.CommitMemory(ctx, ns, contract.MemoryWrite{
|
||||
ID: id,
|
||||
Content: content,
|
||||
Kind: contract.MemoryKindFact,
|
||||
Source: contract.MemorySourceAgent,
|
||||
}); err != nil {
|
||||
fmt.Fprintf(stdout, "[err-mem] id=%s ns=%s: %v\n", id, ns, err)
|
||||
stats.Errors++
|
||||
continue
|
||||
}
|
||||
stats.Copied++
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return stats, fmt.Errorf("iterate rows: %w", err)
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// mapScopeToNamespace mirrors the legacy-shim translation. The
|
||||
// backfill needs the SAME mapping the runtime uses so reads work
|
||||
// after cutover.
|
||||
func mapScopeToNamespace(ctx context.Context, r backfillResolver, workspaceID, scope string) (string, error) {
|
||||
writable, err := r.WritableNamespaces(ctx, workspaceID)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("resolve writable: %w", err)
|
||||
}
|
||||
wantKind := contract.NamespaceKindWorkspace
|
||||
switch scope {
|
||||
case "LOCAL":
|
||||
wantKind = contract.NamespaceKindWorkspace
|
||||
case "TEAM":
|
||||
wantKind = contract.NamespaceKindTeam
|
||||
case "GLOBAL":
|
||||
wantKind = contract.NamespaceKindOrg
|
||||
default:
|
||||
return "", fmt.Errorf("unknown scope %q", scope)
|
||||
}
|
||||
for _, ns := range writable {
|
||||
if ns.Kind == wantKind {
|
||||
return ns.Name, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("no writable namespace of kind %s for workspace %s", wantKind, workspaceID)
|
||||
}
|
||||
|
||||
// namespaceKindFromString returns the contract.NamespaceKind for a
|
||||
// legacy scope value. Unknown scopes default to "workspace" so the
|
||||
// backfill never aborts on an unexpected row.
|
||||
func namespaceKindFromString(scope string) contract.NamespaceKind {
|
||||
switch strings.ToUpper(scope) {
|
||||
case "TEAM":
|
||||
return contract.NamespaceKindTeam
|
||||
case "GLOBAL":
|
||||
return contract.NamespaceKindOrg
|
||||
default:
|
||||
return contract.NamespaceKindWorkspace
|
||||
}
|
||||
}
|
||||
|
||||
// namespaceResolverAdapter bridges *namespace.Resolver (which returns
|
||||
// []namespace.Namespace) to verify.go's verifyResolver interface
|
||||
// (which wants []ResolvedNamespace). Keeps verify.go independent of
|
||||
// the namespace-package dependency so its tests can stub easily.
|
||||
type namespaceResolverAdapter struct {
|
||||
r *namespace.Resolver
|
||||
}
|
||||
|
||||
func (a namespaceResolverAdapter) ReadableNamespaces(ctx context.Context, workspaceID string) ([]ResolvedNamespace, error) {
|
||||
src, err := a.r.ReadableNamespaces(ctx, workspaceID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out := make([]ResolvedNamespace, len(src))
|
||||
for i, ns := range src {
|
||||
out[i] = ResolvedNamespace{Name: ns.Name}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
@@ -0,0 +1,434 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/namespace"
|
||||
)
|
||||
|
||||
// stubBackfillPlugin records calls for assertions.
|
||||
type stubBackfillPlugin struct {
|
||||
upsertedNamespaces []string
|
||||
committedNamespaces []string
|
||||
committedIDs []string // captures MemoryWrite.ID per call
|
||||
upsertErr error
|
||||
commitErr error
|
||||
}
|
||||
|
||||
func (s *stubBackfillPlugin) UpsertNamespace(_ context.Context, name string, _ contract.NamespaceUpsert) (*contract.Namespace, error) {
|
||||
s.upsertedNamespaces = append(s.upsertedNamespaces, name)
|
||||
if s.upsertErr != nil {
|
||||
return nil, s.upsertErr
|
||||
}
|
||||
return &contract.Namespace{Name: name, Kind: contract.NamespaceKindWorkspace}, nil
|
||||
}
|
||||
func (s *stubBackfillPlugin) CommitMemory(_ context.Context, ns string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error) {
|
||||
s.committedNamespaces = append(s.committedNamespaces, ns)
|
||||
s.committedIDs = append(s.committedIDs, body.ID)
|
||||
if s.commitErr != nil {
|
||||
return nil, s.commitErr
|
||||
}
|
||||
id := body.ID
|
||||
if id == "" {
|
||||
id = "out-1"
|
||||
}
|
||||
return &contract.MemoryWriteResponse{ID: id, Namespace: ns}, nil
|
||||
}
|
||||
|
||||
type stubBackfillResolver struct {
|
||||
writable []namespace.Namespace
|
||||
err error
|
||||
}
|
||||
|
||||
func (s *stubBackfillResolver) WritableNamespaces(_ context.Context, _ string) ([]namespace.Namespace, error) {
|
||||
return s.writable, s.err
|
||||
}
|
||||
|
||||
func rootBackfillResolver() *stubBackfillResolver {
|
||||
return &stubBackfillResolver{
|
||||
writable: []namespace.Namespace{
|
||||
{Name: "workspace:root-1", Kind: contract.NamespaceKindWorkspace, Writable: true},
|
||||
{Name: "team:root-1", Kind: contract.NamespaceKindTeam, Writable: true},
|
||||
{Name: "org:root-1", Kind: contract.NamespaceKindOrg, Writable: true},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// --- mapScopeToNamespace ---
|
||||
|
||||
func TestMapScopeToNamespace(t *testing.T) {
|
||||
cases := []struct {
|
||||
scope string
|
||||
want string
|
||||
wantErr string
|
||||
}{
|
||||
{"LOCAL", "workspace:root-1", ""},
|
||||
{"TEAM", "team:root-1", ""},
|
||||
{"GLOBAL", "org:root-1", ""},
|
||||
{"WEIRD", "", "unknown scope"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.scope, func(t *testing.T) {
|
||||
got, err := mapScopeToNamespace(context.Background(), rootBackfillResolver(), "root-1", tc.scope)
|
||||
if tc.wantErr != "" {
|
||||
if err == nil || !strings.Contains(err.Error(), tc.wantErr) {
|
||||
t.Errorf("err = %v, want %q", err, tc.wantErr)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if got != tc.want {
|
||||
t.Errorf("got %q, want %q", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMapScopeToNamespace_ResolverError(t *testing.T) {
|
||||
r := &stubBackfillResolver{err: errors.New("dead")}
|
||||
_, err := mapScopeToNamespace(context.Background(), r, "root-1", "LOCAL")
|
||||
if err == nil {
|
||||
t.Error("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMapScopeToNamespace_NoMatchingKind(t *testing.T) {
|
||||
r := &stubBackfillResolver{writable: []namespace.Namespace{
|
||||
{Name: "workspace:x", Kind: contract.NamespaceKindWorkspace, Writable: true},
|
||||
}}
|
||||
_, err := mapScopeToNamespace(context.Background(), r, "root-1", "TEAM")
|
||||
if err == nil || !strings.Contains(err.Error(), "no writable namespace") {
|
||||
t.Errorf("err = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// --- namespaceKindFromString ---
|
||||
|
||||
func TestNamespaceKindFromString(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want contract.NamespaceKind
|
||||
}{
|
||||
{"LOCAL", contract.NamespaceKindWorkspace},
|
||||
{"local", contract.NamespaceKindWorkspace},
|
||||
{"TEAM", contract.NamespaceKindTeam},
|
||||
{"team", contract.NamespaceKindTeam},
|
||||
{"GLOBAL", contract.NamespaceKindOrg},
|
||||
{"global", contract.NamespaceKindOrg},
|
||||
{"weird", contract.NamespaceKindWorkspace}, // safe default
|
||||
{"", contract.NamespaceKindWorkspace},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
if got := namespaceKindFromString(tc.in); got != tc.want {
|
||||
t.Errorf("namespaceKindFromString(%q) = %q, want %q", tc.in, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- backfill (the workhorse) ---
|
||||
|
||||
// TestBackfill_PassesSourceUUIDAsIdempotencyKey pins the Critical-1
|
||||
// fix: backfill must forward agent_memories.id to MemoryWrite.ID so
|
||||
// re-runs upsert in place.
|
||||
func TestBackfill_PassesSourceUUIDAsIdempotencyKey(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
now := time.Now().UTC()
|
||||
mock.ExpectQuery("SELECT id, workspace_id, content, scope, created_at").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id", "content", "scope", "created_at"}).
|
||||
AddRow("source-uuid-A", "root-1", "fact 1", "LOCAL", now).
|
||||
AddRow("source-uuid-B", "root-1", "fact 2", "LOCAL", now))
|
||||
|
||||
plugin := &stubBackfillPlugin{}
|
||||
cfg := backfillConfig{DB: db, Plugin: plugin, Resolver: rootBackfillResolver(), Limit: 100}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
if _, err := backfill(context.Background(), cfg, devnull); err != nil {
|
||||
t.Fatalf("backfill: %v", err)
|
||||
}
|
||||
if len(plugin.committedIDs) != 2 {
|
||||
t.Fatalf("commits = %d", len(plugin.committedIDs))
|
||||
}
|
||||
if plugin.committedIDs[0] != "source-uuid-A" || plugin.committedIDs[1] != "source-uuid-B" {
|
||||
t.Errorf("committedIDs = %v; idempotency key not forwarded", plugin.committedIDs)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBackfill_RerunIsIdempotent: same agent_memories rows backfilled
|
||||
// twice. Plugin sees the same UUIDs both times; without the fix the
|
||||
// plugin would generate fresh UUIDs and duplicate.
|
||||
func TestBackfill_RerunIsIdempotent(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
now := time.Now().UTC()
|
||||
rows1 := sqlmock.NewRows([]string{"id", "workspace_id", "content", "scope", "created_at"}).
|
||||
AddRow("uuid-1", "root-1", "fact", "LOCAL", now)
|
||||
rows2 := sqlmock.NewRows([]string{"id", "workspace_id", "content", "scope", "created_at"}).
|
||||
AddRow("uuid-1", "root-1", "fact", "LOCAL", now)
|
||||
mock.ExpectQuery("SELECT id, workspace_id, content, scope, created_at").WillReturnRows(rows1)
|
||||
mock.ExpectQuery("SELECT id, workspace_id, content, scope, created_at").WillReturnRows(rows2)
|
||||
|
||||
plugin := &stubBackfillPlugin{}
|
||||
cfg := backfillConfig{DB: db, Plugin: plugin, Resolver: rootBackfillResolver(), Limit: 100}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
|
||||
if _, err := backfill(context.Background(), cfg, devnull); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := backfill(context.Background(), cfg, devnull); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(plugin.committedIDs) != 2 {
|
||||
t.Errorf("commits = %d, want 2", len(plugin.committedIDs))
|
||||
}
|
||||
if plugin.committedIDs[0] != "uuid-1" || plugin.committedIDs[1] != "uuid-1" {
|
||||
t.Errorf("ids = %v; both runs must pass uuid-1 (relies on plugin upsert for actual de-dup)", plugin.committedIDs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackfill_HappyPath_Apply(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
now := time.Now().UTC()
|
||||
mock.ExpectQuery("SELECT id, workspace_id, content, scope, created_at").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id", "content", "scope", "created_at"}).
|
||||
AddRow("mem-1", "root-1", "fact x", "LOCAL", now).
|
||||
AddRow("mem-2", "root-1", "team y", "TEAM", now).
|
||||
AddRow("mem-3", "root-1", "org z", "GLOBAL", now))
|
||||
|
||||
plugin := &stubBackfillPlugin{}
|
||||
cfg := backfillConfig{
|
||||
DB: db,
|
||||
Plugin: plugin,
|
||||
Resolver: rootBackfillResolver(),
|
||||
Limit: 100,
|
||||
DryRun: false,
|
||||
}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
stats, err := backfill(context.Background(), cfg, devnull)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if stats.Scanned != 3 || stats.Copied != 3 || stats.Errors != 0 {
|
||||
t.Errorf("stats = %+v", stats)
|
||||
}
|
||||
if len(plugin.committedNamespaces) != 3 {
|
||||
t.Errorf("commits = %v", plugin.committedNamespaces)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackfill_DryRun_DoesNotCallPlugin(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
now := time.Now().UTC()
|
||||
mock.ExpectQuery("SELECT id, workspace_id, content, scope, created_at").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id", "content", "scope", "created_at"}).
|
||||
AddRow("mem-1", "root-1", "fact x", "LOCAL", now))
|
||||
|
||||
plugin := &stubBackfillPlugin{}
|
||||
cfg := backfillConfig{DB: db, Plugin: plugin, Resolver: rootBackfillResolver(), Limit: 100, DryRun: true}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
stats, err := backfill(context.Background(), cfg, devnull)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if stats.Copied != 1 {
|
||||
t.Errorf("copied = %d", stats.Copied)
|
||||
}
|
||||
if len(plugin.committedNamespaces) != 0 {
|
||||
t.Errorf("plugin must not be called in dry-run mode")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackfill_WorkspaceFilter(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id, workspace_id, content, scope, created_at").
|
||||
WithArgs("specific-ws", 100).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id", "content", "scope", "created_at"}))
|
||||
cfg := backfillConfig{DB: db, Plugin: &stubBackfillPlugin{}, Resolver: rootBackfillResolver(), Limit: 100, WorkspaceID: "specific-ws"}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
if _, err := backfill(context.Background(), cfg, devnull); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("workspace filter not applied: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackfill_QueryError(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id, workspace_id, content, scope, created_at").
|
||||
WillReturnError(errors.New("dead"))
|
||||
cfg := backfillConfig{DB: db, Plugin: &stubBackfillPlugin{}, Resolver: rootBackfillResolver(), Limit: 100}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
_, err := backfill(context.Background(), cfg, devnull)
|
||||
if err == nil {
|
||||
t.Error("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackfill_ScanError(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id, workspace_id, content, scope, created_at").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}). // wrong shape
|
||||
AddRow("mem-1"))
|
||||
cfg := backfillConfig{DB: db, Plugin: &stubBackfillPlugin{}, Resolver: rootBackfillResolver(), Limit: 100}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
stats, err := backfill(context.Background(), cfg, devnull)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if stats.Errors != 1 {
|
||||
t.Errorf("errors = %d, want 1", stats.Errors)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackfill_RowsErr(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id, workspace_id, content, scope, created_at").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id", "content", "scope", "created_at"}).
|
||||
AddRow("mem-1", "root-1", "x", "LOCAL", time.Now().UTC()).
|
||||
RowError(0, errors.New("mid-iter")))
|
||||
cfg := backfillConfig{DB: db, Plugin: &stubBackfillPlugin{}, Resolver: rootBackfillResolver(), Limit: 100}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
_, err := backfill(context.Background(), cfg, devnull)
|
||||
if err == nil || !strings.Contains(err.Error(), "iterate") {
|
||||
t.Errorf("err = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackfill_SkipsUnmappableRow(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id, workspace_id, content, scope, created_at").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id", "content", "scope", "created_at"}).
|
||||
AddRow("mem-1", "root-1", "x", "WEIRD", time.Now().UTC()))
|
||||
cfg := backfillConfig{DB: db, Plugin: &stubBackfillPlugin{}, Resolver: rootBackfillResolver(), Limit: 100}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
stats, err := backfill(context.Background(), cfg, devnull)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if stats.Skipped != 1 || stats.Copied != 0 {
|
||||
t.Errorf("stats = %+v", stats)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackfill_PluginUpsertNamespaceError(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id, workspace_id, content, scope, created_at").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id", "content", "scope", "created_at"}).
|
||||
AddRow("mem-1", "root-1", "x", "LOCAL", time.Now().UTC()))
|
||||
cfg := backfillConfig{DB: db, Plugin: &stubBackfillPlugin{upsertErr: errors.New("ns dead")}, Resolver: rootBackfillResolver(), Limit: 100}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
stats, err := backfill(context.Background(), cfg, devnull)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if stats.Errors != 1 || stats.Copied != 0 {
|
||||
t.Errorf("stats = %+v", stats)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackfill_PluginCommitMemoryError(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id, workspace_id, content, scope, created_at").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id", "content", "scope", "created_at"}).
|
||||
AddRow("mem-1", "root-1", "x", "LOCAL", time.Now().UTC()))
|
||||
cfg := backfillConfig{DB: db, Plugin: &stubBackfillPlugin{commitErr: errors.New("mem dead")}, Resolver: rootBackfillResolver(), Limit: 100}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
stats, err := backfill(context.Background(), cfg, devnull)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if stats.Errors != 1 || stats.Copied != 0 {
|
||||
t.Errorf("stats = %+v", stats)
|
||||
}
|
||||
}
|
||||
|
||||
// --- run (CLI driver) ---
|
||||
|
||||
func TestRun_RejectsBothModes(t *testing.T) {
|
||||
stderr, _ := os.OpenFile(os.DevNull, os.O_WRONLY, 0)
|
||||
defer stderr.Close()
|
||||
stdout, _ := os.OpenFile(os.DevNull, os.O_WRONLY, 0)
|
||||
defer stdout.Close()
|
||||
err := run([]string{"-dry-run", "-apply"}, stdout, stderr)
|
||||
if err == nil || !strings.Contains(err.Error(), "exactly one") {
|
||||
t.Errorf("err = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRun_RejectsNeitherMode(t *testing.T) {
|
||||
stderr, _ := os.OpenFile(os.DevNull, os.O_WRONLY, 0)
|
||||
defer stderr.Close()
|
||||
stdout, _ := os.OpenFile(os.DevNull, os.O_WRONLY, 0)
|
||||
defer stdout.Close()
|
||||
err := run([]string{}, stdout, stderr)
|
||||
if err == nil || !strings.Contains(err.Error(), "exactly one") {
|
||||
t.Errorf("err = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRun_RejectsMissingDatabaseURL(t *testing.T) {
|
||||
t.Setenv("DATABASE_URL", "")
|
||||
t.Setenv("MEMORY_PLUGIN_URL", "http://x")
|
||||
stderr, _ := os.OpenFile(os.DevNull, os.O_WRONLY, 0)
|
||||
defer stderr.Close()
|
||||
stdout, _ := os.OpenFile(os.DevNull, os.O_WRONLY, 0)
|
||||
defer stdout.Close()
|
||||
err := run([]string{"-dry-run"}, stdout, stderr)
|
||||
if err == nil || !strings.Contains(err.Error(), "DATABASE_URL") {
|
||||
t.Errorf("err = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRun_RejectsMissingPluginURL(t *testing.T) {
|
||||
t.Setenv("DATABASE_URL", "postgres://invalid")
|
||||
t.Setenv("MEMORY_PLUGIN_URL", "")
|
||||
stderr, _ := os.OpenFile(os.DevNull, os.O_WRONLY, 0)
|
||||
defer stderr.Close()
|
||||
stdout, _ := os.OpenFile(os.DevNull, os.O_WRONLY, 0)
|
||||
defer stdout.Close()
|
||||
err := run([]string{"-dry-run"}, stdout, stderr)
|
||||
if err == nil || !strings.Contains(err.Error(), "MEMORY_PLUGIN_URL") {
|
||||
t.Errorf("err = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRun_BadFlags(t *testing.T) {
|
||||
stderr, _ := os.OpenFile(os.DevNull, os.O_WRONLY, 0)
|
||||
defer stderr.Close()
|
||||
stdout, _ := os.OpenFile(os.DevNull, os.O_WRONLY, 0)
|
||||
defer stdout.Close()
|
||||
err := run([]string{"-not-a-flag"}, stdout, stderr)
|
||||
if err == nil {
|
||||
t.Error("expected flag parse error")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,200 @@
|
||||
package main
|
||||
|
||||
// verify.go — post-apply parity check.
|
||||
//
|
||||
// After a backfill -apply, run with -verify to confirm the migration
|
||||
// actually produced equivalent data. Picks `SampleSize` random
|
||||
// workspaces, queries agent_memories direct + plugin search via the
|
||||
// caller's namespaces, and diffs the result sets by content.
|
||||
//
|
||||
// The diff is best-effort: pg's recent-first ordering and the plugin's
|
||||
// internal ordering may differ, so we compare as sets, not lists.
|
||||
// We do require strict 1:1 multiset equality (every legacy row maps
|
||||
// to exactly one plugin row, ignoring id since the backfill preserves
|
||||
// it via the C1 idempotency key).
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
|
||||
)
|
||||
|
||||
// verifyConfig is the typed dependency bundle for verifyParity.
|
||||
type verifyConfig struct {
|
||||
DB *sql.DB
|
||||
Plugin verifyPlugin
|
||||
Resolver verifyResolver
|
||||
SampleSize int
|
||||
WorkspaceID string // optional: limit to one workspace
|
||||
Rand *rand.Rand
|
||||
}
|
||||
|
||||
// verifyPlugin is the slice of memory-plugin client we call.
|
||||
type verifyPlugin interface {
|
||||
Search(ctx context.Context, body contract.SearchRequest) (*contract.SearchResponse, error)
|
||||
}
|
||||
|
||||
// verifyResolver mirrors namespace.Resolver. Same shape as
|
||||
// backfillResolver but kept distinct so verify isn't tied to
|
||||
// backfill's interface.
|
||||
type verifyResolver interface {
|
||||
ReadableNamespaces(ctx context.Context, workspaceID string) ([]ResolvedNamespace, error)
|
||||
}
|
||||
|
||||
// ResolvedNamespace is the minimum we need from the resolver — kept
|
||||
// separate so the verify code doesn't depend on the namespace package
|
||||
// (the live tests inject stubs, the binary uses an adapter).
|
||||
type ResolvedNamespace struct {
|
||||
Name string
|
||||
}
|
||||
|
||||
// verifyReport accumulates the per-workspace results.
|
||||
type verifyReport struct {
|
||||
WorkspacesSampled int
|
||||
Matches int
|
||||
Mismatches int
|
||||
Errors int
|
||||
}
|
||||
|
||||
// verifyParity is the workhorse. Returns a report; the CLI converts
|
||||
// any non-zero mismatches/errors into a non-zero exit so CI can gate
|
||||
// the cutover.
|
||||
func verifyParity(ctx context.Context, cfg verifyConfig, stdout *os.File) (*verifyReport, error) {
|
||||
report := &verifyReport{}
|
||||
rng := cfg.Rand
|
||||
if rng == nil {
|
||||
rng = rand.New(rand.NewSource(42)) //nolint:gosec // determinism > unpredictability for ops
|
||||
}
|
||||
|
||||
wsIDs, err := pickWorkspaceSample(ctx, cfg.DB, cfg.WorkspaceID, cfg.SampleSize, rng)
|
||||
if err != nil {
|
||||
return report, fmt.Errorf("pick sample: %w", err)
|
||||
}
|
||||
|
||||
for _, wsID := range wsIDs {
|
||||
report.WorkspacesSampled++
|
||||
legacy, err := queryLegacyMemories(ctx, cfg.DB, wsID)
|
||||
if err != nil {
|
||||
fmt.Fprintf(stdout, "[err] workspace=%s legacy query: %v\n", wsID, err)
|
||||
report.Errors++
|
||||
continue
|
||||
}
|
||||
readable, err := cfg.Resolver.ReadableNamespaces(ctx, wsID)
|
||||
if err != nil {
|
||||
fmt.Fprintf(stdout, "[err] workspace=%s resolve: %v\n", wsID, err)
|
||||
report.Errors++
|
||||
continue
|
||||
}
|
||||
nsList := make([]string, len(readable))
|
||||
for i, ns := range readable {
|
||||
nsList[i] = ns.Name
|
||||
}
|
||||
if len(nsList) == 0 {
|
||||
// No readable namespaces — empty plugin result expected.
|
||||
if len(legacy) == 0 {
|
||||
report.Matches++
|
||||
} else {
|
||||
fmt.Fprintf(stdout, "[mismatch] workspace=%s legacy=%d plugin=0 (no readable namespaces)\n", wsID, len(legacy))
|
||||
report.Mismatches++
|
||||
}
|
||||
continue
|
||||
}
|
||||
resp, err := cfg.Plugin.Search(ctx, contract.SearchRequest{Namespaces: nsList, Limit: 100})
|
||||
if err != nil {
|
||||
fmt.Fprintf(stdout, "[err] workspace=%s plugin search: %v\n", wsID, err)
|
||||
report.Errors++
|
||||
continue
|
||||
}
|
||||
pluginContents := make(map[string]int, len(resp.Memories))
|
||||
for _, m := range resp.Memories {
|
||||
pluginContents[m.Content]++
|
||||
}
|
||||
// Compare as multisets: each legacy content appears at least
|
||||
// once in plugin output. We deliberately tolerate plugin
|
||||
// having MORE rows (the namespace might include team-shared
|
||||
// memories from sibling workspaces that aren't in this
|
||||
// workspace's agent_memories rows).
|
||||
matched := true
|
||||
for _, c := range legacy {
|
||||
if pluginContents[c] == 0 {
|
||||
fmt.Fprintf(stdout, "[mismatch] workspace=%s missing-from-plugin content=%q\n", wsID, truncate(c, 80))
|
||||
matched = false
|
||||
break
|
||||
}
|
||||
pluginContents[c]--
|
||||
}
|
||||
if matched {
|
||||
report.Matches++
|
||||
} else {
|
||||
report.Mismatches++
|
||||
}
|
||||
}
|
||||
return report, nil
|
||||
}
|
||||
|
||||
// pickWorkspaceSample returns up to N workspace UUIDs. If
|
||||
// WorkspaceID is set, returns only that one. Otherwise selects N
|
||||
// random workspaces from the workspaces table (TABLESAMPLE would be
|
||||
// nicer but SYSTEM/BERNOULLI sampling has surprising distribution
|
||||
// properties for small populations; we just ORDER BY random() LIMIT).
|
||||
func pickWorkspaceSample(ctx context.Context, db *sql.DB, workspaceID string, n int, _ *rand.Rand) ([]string, error) {
|
||||
if workspaceID != "" {
|
||||
return []string{workspaceID}, nil
|
||||
}
|
||||
rows, err := db.QueryContext(ctx, `
|
||||
SELECT id::text
|
||||
FROM workspaces
|
||||
WHERE status != 'removed'
|
||||
ORDER BY random()
|
||||
LIMIT $1
|
||||
`, n)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
out := make([]string, 0, n)
|
||||
for rows.Next() {
|
||||
var id string
|
||||
if err := rows.Scan(&id); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, id)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// queryLegacyMemories pulls all agent_memories rows for a workspace
|
||||
// (LOCAL + TEAM scopes — what the plugin search would return through
|
||||
// the resolver's readable list, mapped via PR-6 shim semantics).
|
||||
func queryLegacyMemories(ctx context.Context, db *sql.DB, workspaceID string) ([]string, error) {
|
||||
rows, err := db.QueryContext(ctx, `
|
||||
SELECT content
|
||||
FROM agent_memories
|
||||
WHERE workspace_id = $1
|
||||
ORDER BY created_at DESC
|
||||
`, workspaceID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
out := []string{}
|
||||
for rows.Next() {
|
||||
var c string
|
||||
if err := rows.Scan(&c); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, c)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func truncate(s string, n int) string {
|
||||
if len(s) <= n {
|
||||
return s
|
||||
}
|
||||
return s[:n] + "…"
|
||||
}
|
||||
@@ -0,0 +1,390 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
|
||||
)
|
||||
|
||||
// stubVerifyPlugin records search calls and returns canned results.
|
||||
type stubVerifyPlugin struct {
|
||||
searchFn func(ctx context.Context, body contract.SearchRequest) (*contract.SearchResponse, error)
|
||||
}
|
||||
|
||||
func (s *stubVerifyPlugin) Search(ctx context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
if s.searchFn != nil {
|
||||
return s.searchFn(ctx, body)
|
||||
}
|
||||
return &contract.SearchResponse{}, nil
|
||||
}
|
||||
|
||||
// stubVerifyResolver returns a canned readable namespace list.
|
||||
type stubVerifyResolver struct {
|
||||
namespaces []ResolvedNamespace
|
||||
err error
|
||||
}
|
||||
|
||||
func (s *stubVerifyResolver) ReadableNamespaces(_ context.Context, _ string) ([]ResolvedNamespace, error) {
|
||||
return s.namespaces, s.err
|
||||
}
|
||||
|
||||
// --- pickWorkspaceSample ---
|
||||
|
||||
func TestPickWorkspaceSample_SingleWorkspaceShortCircuit(t *testing.T) {
|
||||
db, _, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
got, err := pickWorkspaceSample(context.Background(), db, "specific-ws", 50, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(got) != 1 || got[0] != "specific-ws" {
|
||||
t.Errorf("got %v, want [specific-ws]", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPickWorkspaceSample_RandomSample(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WithArgs(50).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).
|
||||
AddRow("ws-1").
|
||||
AddRow("ws-2").
|
||||
AddRow("ws-3"))
|
||||
got, err := pickWorkspaceSample(context.Background(), db, "", 50, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(got) != 3 {
|
||||
t.Errorf("got len %d, want 3", len(got))
|
||||
}
|
||||
}
|
||||
|
||||
func TestPickWorkspaceSample_QueryError(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnError(errors.New("dead"))
|
||||
_, err := pickWorkspaceSample(context.Background(), db, "", 50, nil)
|
||||
if err == nil {
|
||||
t.Error("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPickWorkspaceSample_ScanError(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "extra"}). // wrong shape
|
||||
AddRow("ws-1", "extra"))
|
||||
_, err := pickWorkspaceSample(context.Background(), db, "", 50, nil)
|
||||
if err == nil {
|
||||
t.Error("expected scan error")
|
||||
}
|
||||
}
|
||||
|
||||
// --- queryLegacyMemories ---
|
||||
|
||||
func TestQueryLegacyMemories_HappyPath(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT content FROM agent_memories").
|
||||
WithArgs("ws-1").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"content"}).
|
||||
AddRow("fact 1").
|
||||
AddRow("fact 2"))
|
||||
got, err := queryLegacyMemories(context.Background(), db, "ws-1")
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if len(got) != 2 || got[0] != "fact 1" {
|
||||
t.Errorf("got %v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQueryLegacyMemories_QueryError(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT content FROM agent_memories").
|
||||
WillReturnError(errors.New("dead"))
|
||||
_, err := queryLegacyMemories(context.Background(), db, "ws-1")
|
||||
if err == nil {
|
||||
t.Error("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
// --- verifyParity (the workhorse) ---
|
||||
|
||||
func TestVerifyParity_AllMatch(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-1"))
|
||||
mock.ExpectQuery("SELECT content FROM agent_memories").
|
||||
WithArgs("ws-1").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"content"}).
|
||||
AddRow("fact A").
|
||||
AddRow("fact B"))
|
||||
|
||||
plugin := &stubVerifyPlugin{
|
||||
searchFn: func(_ context.Context, _ contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
return &contract.SearchResponse{Memories: []contract.Memory{
|
||||
{ID: "id-A", Content: "fact A"},
|
||||
{ID: "id-B", Content: "fact B"},
|
||||
}}, nil
|
||||
},
|
||||
}
|
||||
resolver := &stubVerifyResolver{
|
||||
namespaces: []ResolvedNamespace{{Name: "workspace:ws-1"}},
|
||||
}
|
||||
cfg := verifyConfig{DB: db, Plugin: plugin, Resolver: resolver, SampleSize: 50}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
report, err := verifyParity(context.Background(), cfg, devnull)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if report.Matches != 1 || report.Mismatches != 0 || report.Errors != 0 {
|
||||
t.Errorf("report = %+v, want 1 match", report)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyParity_MismatchDetectsMissingFromPlugin(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-1"))
|
||||
mock.ExpectQuery("SELECT content FROM agent_memories").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"content"}).
|
||||
AddRow("fact A").
|
||||
AddRow("fact-missing-from-plugin"))
|
||||
|
||||
plugin := &stubVerifyPlugin{
|
||||
searchFn: func(_ context.Context, _ contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
return &contract.SearchResponse{Memories: []contract.Memory{
|
||||
{ID: "id-A", Content: "fact A"},
|
||||
}}, nil
|
||||
},
|
||||
}
|
||||
resolver := &stubVerifyResolver{
|
||||
namespaces: []ResolvedNamespace{{Name: "workspace:ws-1"}},
|
||||
}
|
||||
cfg := verifyConfig{DB: db, Plugin: plugin, Resolver: resolver, SampleSize: 50}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
report, err := verifyParity(context.Background(), cfg, devnull)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if report.Mismatches != 1 {
|
||||
t.Errorf("report = %+v, want 1 mismatch", report)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyParity_PluginExtraRowsTolerated(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-1"))
|
||||
mock.ExpectQuery("SELECT content FROM agent_memories").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"content"}).
|
||||
AddRow("fact A"))
|
||||
|
||||
// Plugin returns more rows (e.g., team-shared from a sibling).
|
||||
// Verify treats this as a match — legacy is a subset of plugin.
|
||||
plugin := &stubVerifyPlugin{
|
||||
searchFn: func(_ context.Context, _ contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
return &contract.SearchResponse{Memories: []contract.Memory{
|
||||
{ID: "id-A", Content: "fact A"},
|
||||
{ID: "id-team-1", Content: "team-shared content from sibling"},
|
||||
}}, nil
|
||||
},
|
||||
}
|
||||
resolver := &stubVerifyResolver{
|
||||
namespaces: []ResolvedNamespace{{Name: "workspace:ws-1"}, {Name: "team:root"}},
|
||||
}
|
||||
cfg := verifyConfig{DB: db, Plugin: plugin, Resolver: resolver, SampleSize: 50}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
report, err := verifyParity(context.Background(), cfg, devnull)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if report.Matches != 1 || report.Mismatches != 0 {
|
||||
t.Errorf("report = %+v, want 1 match (plugin-extra is OK)", report)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyParity_LegacyQueryError(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-1"))
|
||||
mock.ExpectQuery("SELECT content FROM agent_memories").
|
||||
WillReturnError(errors.New("dead"))
|
||||
|
||||
cfg := verifyConfig{
|
||||
DB: db,
|
||||
Plugin: &stubVerifyPlugin{},
|
||||
Resolver: &stubVerifyResolver{namespaces: []ResolvedNamespace{{Name: "workspace:ws-1"}}},
|
||||
}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
report, err := verifyParity(context.Background(), cfg, devnull)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if report.Errors != 1 {
|
||||
t.Errorf("report = %+v, want 1 error", report)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyParity_ResolverError(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-1"))
|
||||
mock.ExpectQuery("SELECT content FROM agent_memories").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"content"}).AddRow("x"))
|
||||
|
||||
cfg := verifyConfig{
|
||||
DB: db,
|
||||
Plugin: &stubVerifyPlugin{},
|
||||
Resolver: &stubVerifyResolver{err: errors.New("dead")},
|
||||
}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
report, _ := verifyParity(context.Background(), cfg, devnull)
|
||||
if report.Errors != 1 {
|
||||
t.Errorf("report = %+v, want 1 error", report)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyParity_PluginSearchError(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-1"))
|
||||
mock.ExpectQuery("SELECT content FROM agent_memories").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"content"}).AddRow("x"))
|
||||
|
||||
cfg := verifyConfig{
|
||||
DB: db,
|
||||
Plugin: &stubVerifyPlugin{
|
||||
searchFn: func(_ context.Context, _ contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
return nil, errors.New("plugin dead")
|
||||
},
|
||||
},
|
||||
Resolver: &stubVerifyResolver{namespaces: []ResolvedNamespace{{Name: "workspace:ws-1"}}},
|
||||
}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
report, _ := verifyParity(context.Background(), cfg, devnull)
|
||||
if report.Errors != 1 {
|
||||
t.Errorf("report = %+v, want 1 error", report)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyParity_NoReadableNamespacesEmptyLegacy(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-1"))
|
||||
mock.ExpectQuery("SELECT content FROM agent_memories").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"content"})) // empty
|
||||
|
||||
cfg := verifyConfig{
|
||||
DB: db,
|
||||
Plugin: &stubVerifyPlugin{},
|
||||
Resolver: &stubVerifyResolver{namespaces: []ResolvedNamespace{}}, // empty
|
||||
}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
report, _ := verifyParity(context.Background(), cfg, devnull)
|
||||
// Empty legacy + empty namespaces → match.
|
||||
if report.Matches != 1 {
|
||||
t.Errorf("report = %+v, want 1 match (both empty)", report)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyParity_NoReadableNamespacesNonEmptyLegacy(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-1"))
|
||||
mock.ExpectQuery("SELECT content FROM agent_memories").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"content"}).AddRow("orphan-fact"))
|
||||
|
||||
cfg := verifyConfig{
|
||||
DB: db,
|
||||
Plugin: &stubVerifyPlugin{},
|
||||
Resolver: &stubVerifyResolver{namespaces: []ResolvedNamespace{}},
|
||||
}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
report, _ := verifyParity(context.Background(), cfg, devnull)
|
||||
// Legacy has rows but plugin can't see any → mismatch.
|
||||
if report.Mismatches != 1 {
|
||||
t.Errorf("report = %+v, want 1 mismatch", report)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyParity_PickSampleError(t *testing.T) {
|
||||
db, mock, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnError(errors.New("dead"))
|
||||
cfg := verifyConfig{DB: db, Plugin: &stubVerifyPlugin{}, Resolver: &stubVerifyResolver{}}
|
||||
devnull, _ := os.Open(os.DevNull)
|
||||
defer devnull.Close()
|
||||
_, err := verifyParity(context.Background(), cfg, devnull)
|
||||
if err == nil || !strings.Contains(err.Error(), "pick sample") {
|
||||
t.Errorf("err = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Truncate ---
|
||||
|
||||
func TestVerifyTruncate(t *testing.T) {
|
||||
if got := truncate("short", 10); got != "short" {
|
||||
t.Errorf("got %q", got)
|
||||
}
|
||||
if got := truncate(strings.Repeat("a", 200), 10); !strings.HasSuffix(got, "…") {
|
||||
t.Errorf("expected ellipsis: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// --- CLI: -verify mode ---
|
||||
|
||||
func TestRun_VerifyVsApplyMutuallyExclusive(t *testing.T) {
|
||||
stderr, _ := os.OpenFile(os.DevNull, os.O_WRONLY, 0)
|
||||
defer stderr.Close()
|
||||
stdout, _ := os.OpenFile(os.DevNull, os.O_WRONLY, 0)
|
||||
defer stdout.Close()
|
||||
err := run([]string{"-verify", "-apply"}, stdout, stderr)
|
||||
if err == nil || !strings.Contains(err.Error(), "exactly one") {
|
||||
t.Errorf("err = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRun_VerifyAloneIsValid(t *testing.T) {
|
||||
t.Setenv("DATABASE_URL", "")
|
||||
t.Setenv("MEMORY_PLUGIN_URL", "http://x")
|
||||
stderr, _ := os.OpenFile(os.DevNull, os.O_WRONLY, 0)
|
||||
defer stderr.Close()
|
||||
stdout, _ := os.OpenFile(os.DevNull, os.O_WRONLY, 0)
|
||||
defer stdout.Close()
|
||||
err := run([]string{"-verify"}, stdout, stderr)
|
||||
// Will fail later on missing DATABASE_URL, NOT on the
|
||||
// mutually-exclusive-modes check. Asserts that -verify is
|
||||
// recognized as a valid mode.
|
||||
if err == nil || !strings.Contains(err.Error(), "DATABASE_URL") {
|
||||
t.Errorf("err = %v, want DATABASE_URL error (-verify alone is a valid mode)", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
# Real-subprocess E2E for memory-plugin-postgres
|
||||
|
||||
The default `go test ./...` suite covers the plugin via in-process
|
||||
sqlmock tests (PR-3). This directory ALSO ships build-tag-gated tests
|
||||
that spawn the real binary against a live postgres — to catch
|
||||
classes of bug in-process tests can't see:
|
||||
|
||||
- Boot-path regressions (env var typos, panic-on-startup)
|
||||
- Wire-format bugs sqlmock smooths over (the `pq.Array` issue we
|
||||
hit during PR-3 development)
|
||||
- HTTP/socket encoding edge cases
|
||||
- C1 idempotency (real upsert against real postgres)
|
||||
|
||||
## Running
|
||||
|
||||
The tests skip silently unless an operator opts in with both:
|
||||
- The `memory_plugin_e2e` build tag
|
||||
- `MEMORY_PLUGIN_E2E_DB` env var pointing at a writable postgres
|
||||
|
||||
### Quick local run (with docker)
|
||||
|
||||
```bash
|
||||
docker run --rm -d --name memory-plugin-e2e-pg \
|
||||
-e POSTGRES_PASSWORD=test -e POSTGRES_USER=test -e POSTGRES_DB=test \
|
||||
-p 5432:5432 \
|
||||
pgvector/pgvector:pg16
|
||||
|
||||
# Wait a few seconds for postgres to accept connections
|
||||
until docker exec memory-plugin-e2e-pg pg_isready -U test >/dev/null 2>&1; do sleep 0.5; done
|
||||
|
||||
MEMORY_PLUGIN_E2E_DB=postgres://test:test@localhost:5432/test?sslmode=disable \
|
||||
go test -tags memory_plugin_e2e -v -count=1 ./cmd/memory-plugin-postgres/
|
||||
|
||||
docker stop memory-plugin-e2e-pg
|
||||
```
|
||||
|
||||
### CI integration
|
||||
|
||||
These tests are NOT in the default required-checks set. Operators
|
||||
gating cutover on the suite should add a separate workflow step:
|
||||
|
||||
```yaml
|
||||
- name: Memory plugin E2E
|
||||
if: ${{ contains(github.event.pull_request.labels.*.name, 'memory-v2') }}
|
||||
run: |
|
||||
MEMORY_PLUGIN_E2E_DB=${{ secrets.MEMORY_PLUGIN_TEST_DSN }} \
|
||||
go test -tags memory_plugin_e2e -v -count=1 ./cmd/memory-plugin-postgres/
|
||||
```
|
||||
|
||||
## What each test pins
|
||||
|
||||
| Test | Covers |
|
||||
|---|---|
|
||||
| `TestE2E_BootAndHealth` | Binary builds, starts, advertises all 5 capabilities |
|
||||
| `TestE2E_FullCommitSearchForgetRoundTrip` | Real wire encoding (no sqlmock), full agent flow |
|
||||
| `TestE2E_IdempotencyKey` | C1 fix end-to-end — upserts against real postgres |
|
||||
|
||||
## What's still NOT covered
|
||||
|
||||
- Migration drift (assumes the migrations dir is at the conventional
|
||||
path; operator-customized layouts need their own test)
|
||||
- Plugin-internal recovery (kill backing store mid-request, etc.)
|
||||
- Concurrent commits with id collisions across processes
|
||||
- TTL eviction (would need to extend test runtime past `expires_at`)
|
||||
|
||||
These gaps apply equally to forks of this binary; they're listed in
|
||||
[`testing-your-plugin.md`](../../../docs/memory-plugins/testing-your-plugin.md)
|
||||
under "what the harness does NOT cover".
|
||||
@@ -0,0 +1,289 @@
|
||||
//go:build memory_plugin_e2e
|
||||
|
||||
// Package main's real-subprocess boot test (#293 fixup, RFC #2728).
|
||||
//
|
||||
// Build-tag gated so it only runs when an operator explicitly opts in:
|
||||
//
|
||||
// MEMORY_PLUGIN_E2E_DB=postgres://test:test@localhost:5432/test?sslmode=disable \
|
||||
// go test -tags memory_plugin_e2e -v ./cmd/memory-plugin-postgres/
|
||||
//
|
||||
// Why a separate build tag:
|
||||
// - The default `go test ./...` run shouldn't require docker or a
|
||||
// live postgres
|
||||
// - CI gates that DO want to run this can set the env var + tag
|
||||
// - Operators verifying a custom plugin against the contract can
|
||||
// copy this file as the template (replace the binary build step
|
||||
// with their own)
|
||||
//
|
||||
// What this exercises that PR-11's swap test doesn't:
|
||||
// - Real `go build` of cmd/memory-plugin-postgres/
|
||||
// - Real binary boot via os/exec — catches mixed-key panics, missing
|
||||
// env vars, crash-on-startup issues that in-process tests skip
|
||||
// - Real postgres connection — catches wire-format bugs (e.g. the
|
||||
// pq.Array regression we hit during PR-3)
|
||||
// - Real HTTP round-trip with a TCP socket — catches encoding edge
|
||||
// cases sqlmock + httptest can't see
|
||||
//
|
||||
// What this does NOT cover:
|
||||
// - Schema migration drift (assumes the migrations dir is at the
|
||||
// conventional path; operator-customized layouts need their own
|
||||
// test)
|
||||
// - Plugin-internal recovery (kill backing store mid-request, etc.)
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
mclient "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/client"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
|
||||
)
|
||||
|
||||
const (
|
||||
bootProbeTimeout = 30 * time.Second
|
||||
bootProbeStep = 500 * time.Millisecond
|
||||
)
|
||||
|
||||
// requireE2EDB returns the test DSN. Skips the test (not fails) when
|
||||
// the env var is unset — keeps `-tags memory_plugin_e2e` runs from
|
||||
// crashing on dev machines without postgres.
|
||||
func requireE2EDB(t *testing.T) string {
|
||||
t.Helper()
|
||||
dsn := os.Getenv("MEMORY_PLUGIN_E2E_DB")
|
||||
if dsn == "" {
|
||||
t.Skip("MEMORY_PLUGIN_E2E_DB not set — skipping real-subprocess boot test")
|
||||
}
|
||||
return dsn
|
||||
}
|
||||
|
||||
// buildBinary compiles cmd/memory-plugin-postgres/ to a temp dir.
|
||||
// Returns the path of the built binary. Test cleanup deletes it.
|
||||
func buildBinary(t *testing.T) string {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
out := filepath.Join(dir, "memory-plugin-postgres")
|
||||
if runtime.GOOS == "windows" {
|
||||
out += ".exe"
|
||||
}
|
||||
// Find the cmd dir relative to this file.
|
||||
_, thisFile, _, _ := runtime.Caller(0)
|
||||
cmdDir := filepath.Dir(thisFile)
|
||||
build := exec.Command("go", "build", "-o", out, ".")
|
||||
build.Dir = cmdDir
|
||||
build.Env = os.Environ()
|
||||
if outErr, err := build.CombinedOutput(); err != nil {
|
||||
t.Fatalf("go build failed: %v\n%s", err, outErr)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// startBinary launches the built binary with the supplied env. Returns
|
||||
// the *exec.Cmd (test cleanup kills it) and the http URL it's listening
|
||||
// on. Polls /v1/health until ready or times out.
|
||||
func startBinary(t *testing.T, binary, dsn, listen string) (*exec.Cmd, string) {
|
||||
t.Helper()
|
||||
url := "http://" + listen
|
||||
cmd := exec.Command(binary)
|
||||
cmd.Env = append(os.Environ(),
|
||||
"MEMORY_PLUGIN_DATABASE_URL="+dsn,
|
||||
"MEMORY_PLUGIN_LISTEN_ADDR="+listen,
|
||||
// Migrations dir lives next to the cmd source. The binary
|
||||
// reads it relative to cwd by default; we set the env var
|
||||
// override so the test doesn't depend on cwd.
|
||||
"MEMORY_PLUGIN_MIGRATIONS_DIR="+migrationsDirForTest(t),
|
||||
)
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
cmd.Stdout = stdout
|
||||
cmd.Stderr = stderr
|
||||
if err := cmd.Start(); err != nil {
|
||||
t.Fatalf("start binary: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if cmd.Process != nil {
|
||||
_ = cmd.Process.Kill()
|
||||
_ = cmd.Wait()
|
||||
}
|
||||
if t.Failed() {
|
||||
t.Logf("binary stdout:\n%s", stdout.String())
|
||||
t.Logf("binary stderr:\n%s", stderr.String())
|
||||
}
|
||||
})
|
||||
|
||||
deadline := time.Now().Add(bootProbeTimeout)
|
||||
for time.Now().Before(deadline) {
|
||||
resp, err := http.Get(url + "/v1/health")
|
||||
if err == nil {
|
||||
_ = resp.Body.Close()
|
||||
if resp.StatusCode == 200 {
|
||||
return cmd, url
|
||||
}
|
||||
}
|
||||
// Bail early if the binary already exited.
|
||||
if cmd.ProcessState != nil && cmd.ProcessState.Exited() {
|
||||
t.Fatalf("binary exited during boot: stderr:\n%s", stderr.String())
|
||||
}
|
||||
time.Sleep(bootProbeStep)
|
||||
}
|
||||
t.Fatalf("binary did not become ready within %v", bootProbeTimeout)
|
||||
return nil, ""
|
||||
}
|
||||
|
||||
func migrationsDirForTest(t *testing.T) string {
|
||||
t.Helper()
|
||||
_, thisFile, _, _ := runtime.Caller(0)
|
||||
return filepath.Join(filepath.Dir(thisFile), "migrations")
|
||||
}
|
||||
|
||||
// TestE2E_BootAndHealth: build + start the real binary, hit /v1/health,
|
||||
// confirm capabilities match what the built-in plugin declares. Catches
|
||||
// "binary doesn't start" / "wrong env var name" / "panics on first
|
||||
// request" classes that in-process tests miss.
|
||||
func TestE2E_BootAndHealth(t *testing.T) {
|
||||
dsn := requireE2EDB(t)
|
||||
binary := buildBinary(t)
|
||||
_, url := startBinary(t, binary, dsn, "127.0.0.1:19100")
|
||||
cl := mclient.New(mclient.Config{BaseURL: url})
|
||||
|
||||
hr, err := cl.Boot(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("Boot: %v", err)
|
||||
}
|
||||
if hr.Status != "ok" {
|
||||
t.Errorf("status = %q", hr.Status)
|
||||
}
|
||||
wantCaps := map[string]bool{"fts": true, "embedding": true, "ttl": true, "pin": true, "propagation": true}
|
||||
gotCaps := map[string]bool{}
|
||||
for _, c := range hr.Capabilities {
|
||||
gotCaps[c] = true
|
||||
}
|
||||
for c := range wantCaps {
|
||||
if !gotCaps[c] {
|
||||
t.Errorf("capability %q missing — built-in plugin should declare all 5", c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestE2E_FullCommitSearchForgetRoundTrip: the full agent flow against
|
||||
// real postgres + real HTTP. Catches wire-format regressions (the
|
||||
// pq.Array bug we hit during PR-3 development) and contract-level
|
||||
// drift between Go bindings and the spec.
|
||||
func TestE2E_FullCommitSearchForgetRoundTrip(t *testing.T) {
|
||||
dsn := requireE2EDB(t)
|
||||
binary := buildBinary(t)
|
||||
_, url := startBinary(t, binary, dsn, "127.0.0.1:19101")
|
||||
cl := mclient.New(mclient.Config{BaseURL: url})
|
||||
|
||||
ctx := context.Background()
|
||||
ns := fmt.Sprintf("workspace:e2e-%d", time.Now().UnixNano())
|
||||
|
||||
// 1. Upsert namespace.
|
||||
if _, err := cl.UpsertNamespace(ctx, ns, contract.NamespaceUpsert{Kind: contract.NamespaceKindWorkspace}); err != nil {
|
||||
t.Fatalf("UpsertNamespace: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cl.DeleteNamespace(context.Background(), ns) })
|
||||
|
||||
// 2. Commit a memory.
|
||||
resp, err := cl.CommitMemory(ctx, ns, contract.MemoryWrite{
|
||||
Content: "user prefers tabs over spaces",
|
||||
Kind: contract.MemoryKindFact,
|
||||
Source: contract.MemorySourceAgent,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("CommitMemory: %v", err)
|
||||
}
|
||||
if resp.ID == "" {
|
||||
t.Fatal("plugin returned empty memory id")
|
||||
}
|
||||
|
||||
// 3. Search and find the memory we just wrote.
|
||||
sresp, err := cl.Search(ctx, contract.SearchRequest{Namespaces: []string{ns}, Query: "tabs"})
|
||||
if err != nil {
|
||||
t.Fatalf("Search: %v", err)
|
||||
}
|
||||
if len(sresp.Memories) == 0 {
|
||||
t.Errorf("Search returned 0 memories, want at least 1")
|
||||
}
|
||||
found := false
|
||||
for _, m := range sresp.Memories {
|
||||
if m.ID == resp.ID && m.Content == "user prefers tabs over spaces" {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
got, _ := json.Marshal(sresp.Memories)
|
||||
t.Errorf("committed memory not found in search results: %s", got)
|
||||
}
|
||||
|
||||
// 4. Forget the memory.
|
||||
if err := cl.ForgetMemory(ctx, resp.ID, contract.ForgetRequest{RequestedByNamespace: ns}); err != nil {
|
||||
t.Fatalf("ForgetMemory: %v", err)
|
||||
}
|
||||
|
||||
// 5. Search again — gone.
|
||||
sresp, err = cl.Search(ctx, contract.SearchRequest{Namespaces: []string{ns}, Query: "tabs"})
|
||||
if err != nil {
|
||||
t.Fatalf("Search after forget: %v", err)
|
||||
}
|
||||
for _, m := range sresp.Memories {
|
||||
if m.ID == resp.ID {
|
||||
t.Errorf("forgotten memory still in search results")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestE2E_IdempotencyKey covers the C1 fix end-to-end: same id passed
|
||||
// twice should upsert (one row, updated content), not duplicate.
|
||||
func TestE2E_IdempotencyKey(t *testing.T) {
|
||||
dsn := requireE2EDB(t)
|
||||
binary := buildBinary(t)
|
||||
_, url := startBinary(t, binary, dsn, "127.0.0.1:19102")
|
||||
cl := mclient.New(mclient.Config{BaseURL: url})
|
||||
|
||||
ctx := context.Background()
|
||||
ns := fmt.Sprintf("workspace:e2e-idem-%d", time.Now().UnixNano())
|
||||
if _, err := cl.UpsertNamespace(ctx, ns, contract.NamespaceUpsert{Kind: contract.NamespaceKindWorkspace}); err != nil {
|
||||
t.Fatalf("UpsertNamespace: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = cl.DeleteNamespace(context.Background(), ns) })
|
||||
|
||||
fixedID := "11111111-2222-3333-4444-555555555555"
|
||||
for i, content := range []string{"first version", "second version (updated)"} {
|
||||
if _, err := cl.CommitMemory(ctx, ns, contract.MemoryWrite{
|
||||
ID: fixedID,
|
||||
Content: content,
|
||||
Kind: contract.MemoryKindFact,
|
||||
Source: contract.MemorySourceAgent,
|
||||
}); err != nil {
|
||||
t.Fatalf("commit %d: %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
sresp, err := cl.Search(ctx, contract.SearchRequest{Namespaces: []string{ns}})
|
||||
if err != nil {
|
||||
t.Fatalf("Search: %v", err)
|
||||
}
|
||||
matches := 0
|
||||
for _, m := range sresp.Memories {
|
||||
if m.ID == fixedID {
|
||||
matches++
|
||||
if m.Content != "second version (updated)" {
|
||||
t.Errorf("upsert did not update content: got %q", m.Content)
|
||||
}
|
||||
}
|
||||
}
|
||||
if matches != 1 {
|
||||
t.Errorf("upsert produced %d rows for id=%s, want 1", matches, fixedID)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,182 @@
|
||||
// memory-plugin-postgres is the built-in implementation of the memory
|
||||
// plugin contract (RFC #2728). Operators run it next to workspace-
|
||||
// server; workspace-server points MEMORY_PLUGIN_URL at it.
|
||||
//
|
||||
// Owns its own postgres tables (see migrations/). When an operator
|
||||
// swaps in a different plugin, this binary's tables become orphaned
|
||||
// — not auto-dropped. Document this in the plugin docs (PR-10).
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
_ "github.com/lib/pq"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/pgplugin"
|
||||
)
|
||||
|
||||
const (
|
||||
envDatabaseURL = "MEMORY_PLUGIN_DATABASE_URL"
|
||||
envListenAddr = "MEMORY_PLUGIN_LISTEN_ADDR"
|
||||
envSkipMigrate = "MEMORY_PLUGIN_SKIP_MIGRATE"
|
||||
|
||||
defaultListenAddr = ":9100"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if err := run(); err != nil {
|
||||
log.Fatalf("memory-plugin-postgres: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// run is the boot path. Extracted from main() so tests can drive it
|
||||
// with synthesized env. Returns nil on graceful shutdown, an error on
|
||||
// failure to bring up.
|
||||
func run() error {
|
||||
cfg, err := loadConfig()
|
||||
if err != nil {
|
||||
return fmt.Errorf("config: %w", err)
|
||||
}
|
||||
|
||||
db, err := openDB(cfg.DatabaseURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open db: %w", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
if !cfg.SkipMigrate {
|
||||
if err := runMigrations(db); err != nil {
|
||||
return fmt.Errorf("migrate: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
store := pgplugin.NewStore(db)
|
||||
handler := pgplugin.NewHandler(store, func() error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
return db.PingContext(ctx)
|
||||
})
|
||||
|
||||
srv := &http.Server{
|
||||
Addr: cfg.ListenAddr,
|
||||
Handler: handler,
|
||||
ReadHeaderTimeout: 5 * time.Second,
|
||||
}
|
||||
|
||||
// Listen separately so we can log the bound port (handy when
|
||||
// :0 is used in tests).
|
||||
ln, err := net.Listen("tcp", cfg.ListenAddr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("listen %s: %w", cfg.ListenAddr, err)
|
||||
}
|
||||
log.Printf("memory-plugin-postgres listening on %s", ln.Addr())
|
||||
|
||||
// Run server in a goroutine; main waits on signal.
|
||||
errCh := make(chan error, 1)
|
||||
go func() {
|
||||
if err := srv.Serve(ln); err != nil && !errors.Is(err, http.ErrServerClosed) {
|
||||
errCh <- err
|
||||
}
|
||||
}()
|
||||
|
||||
sigCh := make(chan os.Signal, 1)
|
||||
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
select {
|
||||
case <-sigCh:
|
||||
log.Println("shutdown signal received")
|
||||
case err := <-errCh:
|
||||
return fmt.Errorf("serve: %w", err)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
return srv.Shutdown(ctx)
|
||||
}
|
||||
|
||||
type config struct {
|
||||
DatabaseURL string
|
||||
ListenAddr string
|
||||
SkipMigrate bool
|
||||
}
|
||||
|
||||
func loadConfig() (*config, error) {
|
||||
dbURL := strings.TrimSpace(os.Getenv(envDatabaseURL))
|
||||
if dbURL == "" {
|
||||
return nil, fmt.Errorf("%s is required", envDatabaseURL)
|
||||
}
|
||||
addr := strings.TrimSpace(os.Getenv(envListenAddr))
|
||||
if addr == "" {
|
||||
addr = defaultListenAddr
|
||||
}
|
||||
return &config{
|
||||
DatabaseURL: dbURL,
|
||||
ListenAddr: addr,
|
||||
SkipMigrate: os.Getenv(envSkipMigrate) == "1",
|
||||
}, nil
|
||||
}
|
||||
|
||||
func openDB(databaseURL string) (*sql.DB, error) {
|
||||
db, err := sql.Open("postgres", databaseURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
db.SetMaxOpenConns(25)
|
||||
db.SetMaxIdleConns(5)
|
||||
db.SetConnMaxLifetime(30 * time.Minute)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
if err := db.PingContext(ctx); err != nil {
|
||||
return nil, fmt.Errorf("ping: %w", err)
|
||||
}
|
||||
return db, nil
|
||||
}
|
||||
|
||||
// runMigrations applies the schema migrations bundled at
|
||||
// cmd/memory-plugin-postgres/migrations/. Idempotent on repeat boot.
|
||||
//
|
||||
// Implementation note: rather than embedding the full migrate engine,
|
||||
// we read the migration files at boot from a known relative path. The
|
||||
// down migrations are deliberately NOT applied here — that's a manual
|
||||
// operator action. This keeps the binary tiny and avoids dragging in
|
||||
// golang-migrate's drivers.
|
||||
func runMigrations(db *sql.DB) error {
|
||||
// Find the migrations directory. In `go run` mode it's relative
|
||||
// to the cmd dir; in the prebuilt binary case it's expected next
|
||||
// to the binary OR via env var override.
|
||||
dir := os.Getenv("MEMORY_PLUGIN_MIGRATIONS_DIR")
|
||||
if dir == "" {
|
||||
// Best-effort: try the cwd-relative path that works for `go test`.
|
||||
dir = "cmd/memory-plugin-postgres/migrations"
|
||||
}
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read migrations dir %q: %w", dir, err)
|
||||
}
|
||||
for _, e := range entries {
|
||||
if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
|
||||
continue
|
||||
}
|
||||
path := dir + "/" + e.Name()
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read %q: %w", path, err)
|
||||
}
|
||||
if _, err := db.Exec(string(data)); err != nil {
|
||||
return fmt.Errorf("apply %q: %w", path, err)
|
||||
}
|
||||
log.Printf("applied migration %s", e.Name())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
-- Down migration for memory_v2 plugin schema (RFC #2728).
|
||||
DROP TABLE IF EXISTS memory_records;
|
||||
DROP TABLE IF EXISTS memory_namespaces;
|
||||
@@ -0,0 +1,47 @@
|
||||
-- Memory v2 plugin schema (RFC #2728).
|
||||
--
|
||||
-- These tables are owned by the built-in postgres memory plugin, NOT
|
||||
-- by workspace-server. When an operator swaps in a different memory
|
||||
-- plugin (Pinecone, Letta, custom), these tables become orphaned —
|
||||
-- not auto-dropped. Operator drops them when they're confident they
|
||||
-- don't want to switch back.
|
||||
--
|
||||
-- Lives under cmd/memory-plugin-postgres/migrations/ (NOT
|
||||
-- workspace-server/migrations/) to make the ownership boundary
|
||||
-- visible: workspace-server has zero knowledge of these tables.
|
||||
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS memory_namespaces (
|
||||
name TEXT PRIMARY KEY,
|
||||
kind TEXT NOT NULL CHECK (kind IN ('workspace','team','org','custom')),
|
||||
expires_at TIMESTAMPTZ,
|
||||
metadata JSONB,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS memory_records (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
namespace TEXT NOT NULL REFERENCES memory_namespaces(name) ON DELETE CASCADE,
|
||||
content TEXT NOT NULL,
|
||||
kind TEXT NOT NULL CHECK (kind IN ('fact','summary','checkpoint')),
|
||||
source TEXT NOT NULL CHECK (source IN ('agent','runtime','user')),
|
||||
expires_at TIMESTAMPTZ,
|
||||
propagation JSONB,
|
||||
pin BOOLEAN NOT NULL DEFAULT false,
|
||||
embedding vector(1536),
|
||||
content_tsv tsvector GENERATED ALWAYS AS (to_tsvector('english', content)) STORED,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
-- Indexes:
|
||||
-- - namespace: every search filters by namespace list
|
||||
-- - content_tsv: FTS path
|
||||
-- - embedding: semantic search (partial because most rows have no embedding)
|
||||
-- - expires_at: TTL janitor scans
|
||||
CREATE INDEX IF NOT EXISTS idx_memory_records_namespace ON memory_records(namespace);
|
||||
CREATE INDEX IF NOT EXISTS idx_memory_records_fts ON memory_records USING GIN (content_tsv);
|
||||
CREATE INDEX IF NOT EXISTS idx_memory_records_embedding ON memory_records
|
||||
USING ivfflat (embedding) WHERE embedding IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_memory_records_expires ON memory_records (expires_at)
|
||||
WHERE expires_at IS NOT NULL;
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/imagewatch"
|
||||
memwiring "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/wiring"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/router"
|
||||
@@ -166,6 +167,16 @@ func main() {
|
||||
wh.SetCPProvisioner(cpProv)
|
||||
}
|
||||
|
||||
// Memory v2 plugin (RFC #2728): build the dependency bundle once
|
||||
// here so all three handlers (MCPHandler, AdminMemoriesHandler,
|
||||
// WorkspaceHandler) get the same plugin/resolver pair. memBundle
|
||||
// is nil when MEMORY_PLUGIN_URL is unset — every consumer
|
||||
// nil-checks before using.
|
||||
memBundle := memwiring.Build(db.DB)
|
||||
if memBundle != nil {
|
||||
wh.WithNamespaceCleanup(memBundle.NamespaceCleanupFn())
|
||||
}
|
||||
|
||||
// External-plugin env mutators — each plugin contributes 0+ mutators
|
||||
// onto a shared registry. Order matters: gh-identity populates
|
||||
// MOLECULE_AGENT_ROLE-derived attribution env vars that downstream
|
||||
@@ -306,7 +317,7 @@ func main() {
|
||||
cronSched.SetChannels(channelMgr)
|
||||
|
||||
// Router
|
||||
r := router.Setup(hub, broadcaster, prov, platformURL, configsDir, wh, channelMgr)
|
||||
r := router.Setup(hub, broadcaster, prov, platformURL, configsDir, wh, channelMgr, memBundle)
|
||||
|
||||
// HTTP server with graceful shutdown
|
||||
srv := &http.Server{
|
||||
|
||||
@@ -1,23 +1,83 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
mclient "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/client"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/namespace"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// envMemoryV2Cutover gates whether admin export/import routes through
|
||||
// the v2 plugin (PR-8 / RFC #2728). When unset, the legacy direct-DB
|
||||
// path runs unchanged so operators who haven't enabled the plugin
|
||||
// keep working.
|
||||
const envMemoryV2Cutover = "MEMORY_V2_CUTOVER"
|
||||
|
||||
// AdminMemoriesHandler provides bulk export/import of agent memories for
|
||||
// backup and restore across Docker rebuilds (issue #1051).
|
||||
type AdminMemoriesHandler struct{}
|
||||
//
|
||||
// PR-8 (RFC #2728): when wired with the v2 plugin via WithMemoryV2 AND
|
||||
// MEMORY_V2_CUTOVER is true, export reads from the plugin's namespaces
|
||||
// and import writes through the plugin. Both paths preserve the
|
||||
// SAFE-T1201 redaction shipped in F1084 + F1085.
|
||||
type AdminMemoriesHandler struct {
|
||||
plugin adminMemoriesPlugin
|
||||
resolver adminMemoriesResolver
|
||||
}
|
||||
|
||||
// adminMemoriesPlugin is the slice of the memory plugin client we
|
||||
// call from this handler.
|
||||
type adminMemoriesPlugin interface {
|
||||
CommitMemory(ctx context.Context, namespace string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error)
|
||||
Search(ctx context.Context, body contract.SearchRequest) (*contract.SearchResponse, error)
|
||||
UpsertNamespace(ctx context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error)
|
||||
}
|
||||
|
||||
// adminMemoriesResolver mirrors the namespace resolver methods this
|
||||
// handler calls.
|
||||
type adminMemoriesResolver interface {
|
||||
WritableNamespaces(ctx context.Context, workspaceID string) ([]namespace.Namespace, error)
|
||||
ReadableNamespaces(ctx context.Context, workspaceID string) ([]namespace.Namespace, error)
|
||||
}
|
||||
|
||||
// NewAdminMemoriesHandler constructs the handler.
|
||||
func NewAdminMemoriesHandler() *AdminMemoriesHandler {
|
||||
return &AdminMemoriesHandler{}
|
||||
}
|
||||
|
||||
// WithMemoryV2 attaches the v2 plugin + resolver. Production wiring
|
||||
// path; main.go calls this after Boot()-ing the plugin client.
|
||||
func (h *AdminMemoriesHandler) WithMemoryV2(plugin *mclient.Client, resolver *namespace.Resolver) *AdminMemoriesHandler {
|
||||
h.plugin = plugin
|
||||
h.resolver = resolver
|
||||
return h
|
||||
}
|
||||
|
||||
// withMemoryV2APIs is the test-only wiring that takes interfaces.
|
||||
func (h *AdminMemoriesHandler) withMemoryV2APIs(plugin adminMemoriesPlugin, resolver adminMemoriesResolver) *AdminMemoriesHandler {
|
||||
h.plugin = plugin
|
||||
h.resolver = resolver
|
||||
return h
|
||||
}
|
||||
|
||||
// cutoverActive reports whether the export/import path should route
|
||||
// through the v2 plugin.
|
||||
func (h *AdminMemoriesHandler) cutoverActive() bool {
|
||||
if os.Getenv(envMemoryV2Cutover) != "true" {
|
||||
return false
|
||||
}
|
||||
return h.plugin != nil && h.resolver != nil
|
||||
}
|
||||
|
||||
// memoryExportEntry is the JSON shape for a single exported memory.
|
||||
type memoryExportEntry struct {
|
||||
ID string `json:"id"`
|
||||
@@ -36,9 +96,17 @@ type memoryExportEntry struct {
|
||||
// SECURITY (F1084 / #1131): applies redactSecrets to each content field
|
||||
// before returning so that any credentials stored before SAFE-T1201 (#838)
|
||||
// was applied do not leak out via the admin export endpoint.
|
||||
//
|
||||
// CUTOVER (PR-8 / RFC #2728): when MEMORY_V2_CUTOVER=true and the v2
|
||||
// plugin is wired, reads from the plugin instead of agent_memories.
|
||||
func (h *AdminMemoriesHandler) Export(c *gin.Context) {
|
||||
ctx := c.Request.Context()
|
||||
|
||||
if h.cutoverActive() {
|
||||
h.exportViaPlugin(c, ctx)
|
||||
return
|
||||
}
|
||||
|
||||
rows, err := db.DB.QueryContext(ctx, `
|
||||
SELECT am.id, am.content, am.scope, am.namespace, am.created_at,
|
||||
w.name AS workspace_name
|
||||
@@ -91,6 +159,9 @@ type memoryImportEntry struct {
|
||||
// before both the deduplication check and the INSERT so that imported memories
|
||||
// with embedded credentials cannot land unredacted in agent_memories (SAFE-T1201
|
||||
// parity with the commit_memory MCP bridge path).
|
||||
//
|
||||
// CUTOVER (PR-8 / RFC #2728): when MEMORY_V2_CUTOVER=true and the v2
|
||||
// plugin is wired, writes through the plugin instead of agent_memories.
|
||||
func (h *AdminMemoriesHandler) Import(c *gin.Context) {
|
||||
ctx := c.Request.Context()
|
||||
|
||||
@@ -100,6 +171,11 @@ func (h *AdminMemoriesHandler) Import(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if h.cutoverActive() {
|
||||
h.importViaPlugin(c, ctx, entries)
|
||||
return
|
||||
}
|
||||
|
||||
imported := 0
|
||||
skipped := 0
|
||||
errors := 0
|
||||
@@ -175,3 +251,310 @@ func (h *AdminMemoriesHandler) Import(c *gin.Context) {
|
||||
"total": len(entries),
|
||||
})
|
||||
}
|
||||
|
||||
// exportViaPlugin reads memories from the v2 plugin and emits them in
|
||||
// the legacy memoryExportEntry shape so existing tooling that consumes
|
||||
// the export keeps working.
|
||||
//
|
||||
// Optimization (#289 fix): the previous implementation was O(workspaces)
|
||||
// in BOTH resolver CTE walks AND plugin search calls. For a 1000-tenant
|
||||
// org, that's 1000 × resolver + 1000 × HTTP, where most are redundant
|
||||
// because workspaces sharing a team/org root see identical namespaces.
|
||||
//
|
||||
// New strategy:
|
||||
// 1. Single SQL pass walks parent_id chains, returning each
|
||||
// workspace's root_id alongside its name.
|
||||
// 2. Group workspaces by root → unique tree count is typically <<
|
||||
// workspace count.
|
||||
// 3. Resolve namespaces ONCE per root (any workspace under that
|
||||
// root produces the same readable list).
|
||||
// 4. Build a UNION of namespaces across all roots; single plugin
|
||||
// search call.
|
||||
// 5. Map each memory back to a workspace_name via a namespace→ws
|
||||
// lookup table built up from step 3.
|
||||
//
|
||||
// Net cost: 1 SQL + N_roots resolver calls + 1 plugin call (vs
|
||||
// N_workspaces resolver + N_workspaces plugin in the old code).
|
||||
func (h *AdminMemoriesHandler) exportViaPlugin(c *gin.Context, ctx context.Context) {
|
||||
// 1. One SQL pass: every workspace + its root id.
|
||||
wsRows, err := loadWorkspacesWithRoots(ctx, db.DB)
|
||||
if err != nil {
|
||||
log.Printf("admin/memories/export (cutover): workspaces query: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "export query failed"})
|
||||
return
|
||||
}
|
||||
|
||||
// 2. Group by root → list of workspaces.
|
||||
rootToWorkspaces := make(map[string][]workspaceRow, len(wsRows))
|
||||
for _, w := range wsRows {
|
||||
rootToWorkspaces[w.RootID] = append(rootToWorkspaces[w.RootID], w)
|
||||
}
|
||||
|
||||
// 3. Resolve team/org namespaces once per root, then add each
|
||||
// member's private workspace:<id> namespace explicitly.
|
||||
//
|
||||
// IMPORTANT: ReadableNamespaces(rootID) returns
|
||||
// {workspace:rootID, team:rootID, org:rootID}. Calling it once
|
||||
// per root is enough for team:/org:/custom: (those are shared by
|
||||
// every member of the root group), but the workspace: namespace
|
||||
// it returns is rootID's only — child members' private
|
||||
// workspace:<childID> namespaces would be silently dropped from
|
||||
// the export. Inject each member's workspace:<id> below to keep
|
||||
// coverage parity with the legacy per-workspace iteration.
|
||||
nsToOwner := make(map[string]string) // namespace → workspace_name (first matching wins)
|
||||
allNamespaces := make(map[string]struct{}) // union for plugin search
|
||||
for rootID, members := range rootToWorkspaces {
|
||||
readable, err := h.resolver.ReadableNamespaces(ctx, rootID)
|
||||
if err != nil {
|
||||
log.Printf("admin/memories/export (cutover) root=%s: resolve: %v", rootID, err)
|
||||
continue
|
||||
}
|
||||
// Collect non-workspace namespaces (team:/org:/custom:/...) from
|
||||
// the root view; these are identical across every member.
|
||||
for _, ns := range readable {
|
||||
if strings.HasPrefix(ns.Name, "workspace:") {
|
||||
continue
|
||||
}
|
||||
allNamespaces[ns.Name] = struct{}{}
|
||||
if _, alreadyMapped := nsToOwner[ns.Name]; alreadyMapped {
|
||||
continue
|
||||
}
|
||||
if owner := pickOwnerForNamespace(ns.Name, members); owner != "" {
|
||||
nsToOwner[ns.Name] = owner
|
||||
}
|
||||
}
|
||||
// Inject each member's private workspace:<id> namespace + its
|
||||
// owner. Children's private memories live in workspace:<childID>
|
||||
// which the root-only resolve doesn't surface.
|
||||
for _, m := range members {
|
||||
ns := "workspace:" + m.ID
|
||||
allNamespaces[ns] = struct{}{}
|
||||
nsToOwner[ns] = m.Name
|
||||
}
|
||||
}
|
||||
|
||||
if len(allNamespaces) == 0 {
|
||||
c.JSON(http.StatusOK, []memoryExportEntry{})
|
||||
return
|
||||
}
|
||||
|
||||
// 4. Single plugin search across the union.
|
||||
nsList := make([]string, 0, len(allNamespaces))
|
||||
for ns := range allNamespaces {
|
||||
nsList = append(nsList, ns)
|
||||
}
|
||||
resp, err := h.plugin.Search(ctx, contract.SearchRequest{Namespaces: nsList, Limit: 100})
|
||||
if err != nil {
|
||||
log.Printf("admin/memories/export (cutover): plugin search: %v", err)
|
||||
c.JSON(http.StatusOK, []memoryExportEntry{})
|
||||
return
|
||||
}
|
||||
|
||||
// 5. Map each memory to a workspace_name, redact, emit.
|
||||
seen := make(map[string]struct{})
|
||||
memories := make([]memoryExportEntry, 0, len(resp.Memories))
|
||||
for _, m := range resp.Memories {
|
||||
if _, dup := seen[m.ID]; dup {
|
||||
continue
|
||||
}
|
||||
seen[m.ID] = struct{}{}
|
||||
owner := nsToOwner[m.Namespace]
|
||||
redacted, _ := redactSecrets(owner, m.Content)
|
||||
memories = append(memories, memoryExportEntry{
|
||||
ID: m.ID,
|
||||
Content: redacted,
|
||||
Scope: legacyScopeFromNamespace(m.Namespace),
|
||||
Namespace: m.Namespace,
|
||||
CreatedAt: m.CreatedAt,
|
||||
WorkspaceName: owner,
|
||||
})
|
||||
}
|
||||
c.JSON(http.StatusOK, memories)
|
||||
}
|
||||
|
||||
// workspaceRow bundles the per-workspace fields the optimized export
|
||||
// needs (id + name + root for grouping).
|
||||
type workspaceRow struct {
|
||||
ID string
|
||||
Name string
|
||||
RootID string
|
||||
}
|
||||
|
||||
// loadWorkspacesWithRoots returns one row per workspace with its root
|
||||
// id computed via a recursive CTE. Single SQL pass — replaces the
|
||||
// previous N×ReadableNamespaces pattern that walked each tree
|
||||
// independently.
|
||||
func loadWorkspacesWithRoots(ctx context.Context, conn *sql.DB) ([]workspaceRow, error) {
|
||||
rows, err := conn.QueryContext(ctx, `
|
||||
WITH RECURSIVE chain AS (
|
||||
SELECT id, parent_id, name, id AS root_id, 0 AS depth
|
||||
FROM workspaces
|
||||
WHERE parent_id IS NULL
|
||||
UNION ALL
|
||||
SELECT w.id, w.parent_id, w.name, c.root_id, c.depth + 1
|
||||
FROM workspaces w
|
||||
JOIN chain c ON w.parent_id = c.id
|
||||
WHERE c.depth < 50
|
||||
)
|
||||
SELECT id::text, name, root_id::text FROM chain ORDER BY name
|
||||
`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
out := make([]workspaceRow, 0)
|
||||
for rows.Next() {
|
||||
var w workspaceRow
|
||||
if err := rows.Scan(&w.ID, &w.Name, &w.RootID); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, w)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// pickOwnerForNamespace returns the workspace_name to attribute a
|
||||
// namespace to in the export. workspace:<id> namespaces map to the
|
||||
// matching member; team:* / org:* / custom:* fall back to the first
|
||||
// member of the root group (canonical owner).
|
||||
func pickOwnerForNamespace(ns string, members []workspaceRow) string {
|
||||
if strings.HasPrefix(ns, "workspace:") {
|
||||
wantID := strings.TrimPrefix(ns, "workspace:")
|
||||
for _, m := range members {
|
||||
if m.ID == wantID {
|
||||
return m.Name
|
||||
}
|
||||
}
|
||||
}
|
||||
// Non-workspace namespaces: attribute to first member of the root
|
||||
// group. Stable because loadWorkspacesWithRoots returns ORDER BY
|
||||
// name, so the same root group always picks the same owner.
|
||||
if len(members) > 0 {
|
||||
return members[0].Name
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// importViaPlugin writes the entries through the plugin instead of
|
||||
// directly to agent_memories. Workspaces are resolved by name like
|
||||
// the legacy path. Scope→namespace mapping mirrors the PR-6 shim.
|
||||
func (h *AdminMemoriesHandler) importViaPlugin(c *gin.Context, ctx context.Context, entries []memoryImportEntry) {
|
||||
imported := 0
|
||||
skipped := 0
|
||||
errs := 0
|
||||
|
||||
for _, entry := range entries {
|
||||
var workspaceID string
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT id::text FROM workspaces WHERE name = $1 LIMIT 1`,
|
||||
entry.WorkspaceName,
|
||||
).Scan(&workspaceID); err != nil {
|
||||
log.Printf("admin/memories/import (cutover): workspace %q not found, skipping", entry.WorkspaceName)
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
// Redact BEFORE the plugin sees it (SAFE-T1201 parity).
|
||||
content, _ := redactSecrets(workspaceID, entry.Content)
|
||||
|
||||
ns, err := h.scopeToWritableNamespaceForImport(ctx, workspaceID, entry.Scope)
|
||||
if err != nil {
|
||||
log.Printf("admin/memories/import (cutover): %v", err)
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
// Idempotent namespace upsert before commit.
|
||||
if _, err := h.plugin.UpsertNamespace(ctx, ns, contract.NamespaceUpsert{
|
||||
Kind: namespaceKindFromLegacyScope(entry.Scope),
|
||||
}); err != nil {
|
||||
log.Printf("admin/memories/import (cutover): upsert ns %s: %v", ns, err)
|
||||
errs++
|
||||
continue
|
||||
}
|
||||
|
||||
if _, err := h.plugin.CommitMemory(ctx, ns, contract.MemoryWrite{
|
||||
Content: content,
|
||||
Kind: contract.MemoryKindFact,
|
||||
Source: contract.MemorySourceAgent,
|
||||
}); err != nil {
|
||||
log.Printf("admin/memories/import (cutover): commit %s: %v", ns, err)
|
||||
errs++
|
||||
continue
|
||||
}
|
||||
imported++
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"imported": imported,
|
||||
"skipped": skipped,
|
||||
"errors": errs,
|
||||
"total": len(entries),
|
||||
})
|
||||
}
|
||||
|
||||
// scopeToWritableNamespaceForImport mirrors the PR-6 shim translation.
|
||||
// Returns the namespace string the resolver picks for the requested
|
||||
// scope; errors out cleanly on GLOBAL or unmapped values so importing
|
||||
// a malformed entry doesn't crash the run.
|
||||
func (h *AdminMemoriesHandler) scopeToWritableNamespaceForImport(ctx context.Context, workspaceID, scope string) (string, error) {
|
||||
writable, err := h.resolver.WritableNamespaces(ctx, workspaceID)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
wantKind := contract.NamespaceKindWorkspace
|
||||
switch strings.ToUpper(scope) {
|
||||
case "", "LOCAL":
|
||||
wantKind = contract.NamespaceKindWorkspace
|
||||
case "TEAM":
|
||||
wantKind = contract.NamespaceKindTeam
|
||||
case "GLOBAL":
|
||||
wantKind = contract.NamespaceKindOrg
|
||||
default:
|
||||
return "", &skipImport{reason: "unknown scope: " + scope}
|
||||
}
|
||||
for _, ns := range writable {
|
||||
if ns.Kind == wantKind {
|
||||
return ns.Name, nil
|
||||
}
|
||||
}
|
||||
return "", &skipImport{reason: "no writable namespace of kind " + string(wantKind)}
|
||||
}
|
||||
|
||||
// skipImport is a typed error so the caller can distinguish "skip
|
||||
// this entry" from a hard failure.
|
||||
type skipImport struct{ reason string }
|
||||
|
||||
func (e *skipImport) Error() string { return "skip: " + e.reason }
|
||||
|
||||
// legacyScopeFromNamespace reverses the namespace→scope mapping for
|
||||
// the export shape. Mirrors namespaceKindToLegacyScope from the PR-6
|
||||
// shim but is lifted out so admin_memories doesn't depend on the MCP
|
||||
// handler's helpers.
|
||||
func legacyScopeFromNamespace(ns string) string {
|
||||
switch {
|
||||
case strings.HasPrefix(ns, "workspace:"):
|
||||
return "LOCAL"
|
||||
case strings.HasPrefix(ns, "team:"):
|
||||
return "TEAM"
|
||||
case strings.HasPrefix(ns, "org:"):
|
||||
return "GLOBAL"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// namespaceKindFromLegacyScope returns the contract.NamespaceKind for
|
||||
// a legacy scope value. Unknown defaults to workspace so importing
|
||||
// an unexpected row still produces a typed namespace.
|
||||
func namespaceKindFromLegacyScope(scope string) contract.NamespaceKind {
|
||||
switch strings.ToUpper(scope) {
|
||||
case "TEAM":
|
||||
return contract.NamespaceKindTeam
|
||||
case "GLOBAL":
|
||||
return contract.NamespaceKindOrg
|
||||
default:
|
||||
return contract.NamespaceKindWorkspace
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,800 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
platformdb "github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/contract"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/namespace"
|
||||
)
|
||||
|
||||
// --- stubs ---
|
||||
|
||||
type stubAdminPlugin struct {
|
||||
upserts []string
|
||||
commits []commitRecord
|
||||
searches []contract.SearchRequest
|
||||
commitFn func(ctx context.Context, ns string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error)
|
||||
searchFn func(ctx context.Context, body contract.SearchRequest) (*contract.SearchResponse, error)
|
||||
upsertFn func(ctx context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error)
|
||||
}
|
||||
|
||||
type commitRecord struct {
|
||||
NS string
|
||||
Content string
|
||||
}
|
||||
|
||||
func (s *stubAdminPlugin) UpsertNamespace(ctx context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error) {
|
||||
s.upserts = append(s.upserts, name)
|
||||
if s.upsertFn != nil {
|
||||
return s.upsertFn(ctx, name, body)
|
||||
}
|
||||
return &contract.Namespace{Name: name, Kind: body.Kind, CreatedAt: time.Now().UTC()}, nil
|
||||
}
|
||||
func (s *stubAdminPlugin) CommitMemory(ctx context.Context, ns string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error) {
|
||||
s.commits = append(s.commits, commitRecord{NS: ns, Content: body.Content})
|
||||
if s.commitFn != nil {
|
||||
return s.commitFn(ctx, ns, body)
|
||||
}
|
||||
return &contract.MemoryWriteResponse{ID: "out-1", Namespace: ns}, nil
|
||||
}
|
||||
func (s *stubAdminPlugin) Search(ctx context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
s.searches = append(s.searches, body)
|
||||
if s.searchFn != nil {
|
||||
return s.searchFn(ctx, body)
|
||||
}
|
||||
return &contract.SearchResponse{}, nil
|
||||
}
|
||||
|
||||
type stubAdminResolver struct {
|
||||
readable []namespace.Namespace
|
||||
writable []namespace.Namespace
|
||||
err error
|
||||
}
|
||||
|
||||
func (s *stubAdminResolver) ReadableNamespaces(_ context.Context, _ string) ([]namespace.Namespace, error) {
|
||||
return s.readable, s.err
|
||||
}
|
||||
func (s *stubAdminResolver) WritableNamespaces(_ context.Context, _ string) ([]namespace.Namespace, error) {
|
||||
return s.writable, s.err
|
||||
}
|
||||
|
||||
func adminRootResolver() *stubAdminResolver {
|
||||
return &stubAdminResolver{
|
||||
readable: []namespace.Namespace{
|
||||
{Name: "workspace:root-1", Kind: contract.NamespaceKindWorkspace, Writable: true},
|
||||
{Name: "team:root-1", Kind: contract.NamespaceKindTeam, Writable: true},
|
||||
{Name: "org:root-1", Kind: contract.NamespaceKindOrg, Writable: true},
|
||||
},
|
||||
writable: []namespace.Namespace{
|
||||
{Name: "workspace:root-1", Kind: contract.NamespaceKindWorkspace, Writable: true},
|
||||
{Name: "team:root-1", Kind: contract.NamespaceKindTeam, Writable: true},
|
||||
{Name: "org:root-1", Kind: contract.NamespaceKindOrg, Writable: true},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// installMockDB swaps platformdb.DB with a sqlmock for a test.
|
||||
func installMockDB(t *testing.T) sqlmock.Sqlmock {
|
||||
t.Helper()
|
||||
mockDB, mock, err := sqlmock.New()
|
||||
if err != nil {
|
||||
t.Fatalf("sqlmock new: %v", err)
|
||||
}
|
||||
prev := platformdb.DB
|
||||
platformdb.DB = mockDB
|
||||
t.Cleanup(func() {
|
||||
_ = mockDB.Close()
|
||||
platformdb.DB = prev
|
||||
})
|
||||
return mock
|
||||
}
|
||||
|
||||
// --- cutoverActive ---
|
||||
|
||||
func TestCutoverActive(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
envVal string
|
||||
plugin adminMemoriesPlugin
|
||||
resolver adminMemoriesResolver
|
||||
want bool
|
||||
}{
|
||||
{"env unset", "", &stubAdminPlugin{}, adminRootResolver(), false},
|
||||
{"env true but unwired", "true", nil, nil, false},
|
||||
{"env false", "false", &stubAdminPlugin{}, adminRootResolver(), false},
|
||||
{"env true wired", "true", &stubAdminPlugin{}, adminRootResolver(), true},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, tc.envVal)
|
||||
h := &AdminMemoriesHandler{plugin: tc.plugin, resolver: tc.resolver}
|
||||
if got := h.cutoverActive(); got != tc.want {
|
||||
t.Errorf("got %v, want %v", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// --- WithMemoryV2 wiring ---
|
||||
|
||||
func TestWithMemoryV2_AttachesDeps(t *testing.T) {
|
||||
h := NewAdminMemoriesHandler().WithMemoryV2(nil, nil)
|
||||
// Both nil pointers — wiring still attaches them; cutoverActive
|
||||
// reports false because the interface values are nil.
|
||||
if h.plugin == nil && h.resolver == nil {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
func TestWithMemoryV2APIs_AttachesDeps(t *testing.T) {
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(&stubAdminPlugin{}, adminRootResolver())
|
||||
if h.plugin == nil || h.resolver == nil {
|
||||
t.Error("withMemoryV2APIs must attach both interfaces")
|
||||
}
|
||||
}
|
||||
|
||||
// --- Export via plugin ---
|
||||
|
||||
func TestExport_RoutesThroughPluginWhenCutoverActive(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
return &contract.SearchResponse{Memories: []contract.Memory{
|
||||
{ID: "mem-1", Namespace: "workspace:root-1", Content: "fact x", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
{ID: "mem-2", Namespace: "team:root-1", Content: "team y", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
}}, nil
|
||||
},
|
||||
}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, adminRootResolver())
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/memories/export", nil)
|
||||
h.Export(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("code = %d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
var entries []memoryExportEntry
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &entries); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if len(entries) != 2 {
|
||||
t.Errorf("entries = %d", len(entries))
|
||||
}
|
||||
// Legacy scope label must be in the export
|
||||
scopes := map[string]bool{}
|
||||
for _, e := range entries {
|
||||
scopes[e.Scope] = true
|
||||
}
|
||||
if !scopes["LOCAL"] || !scopes["TEAM"] {
|
||||
t.Errorf("expected LOCAL+TEAM scopes, got %v", scopes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExport_DeduplicatesByMemoryID(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
|
||||
// Two workspaces, both will see the same team-shared memory.
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1").
|
||||
AddRow("ws-2", "beta", "ws-2"))
|
||||
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
return &contract.SearchResponse{Memories: []contract.Memory{
|
||||
{ID: "mem-shared", Namespace: "team:root-1", Content: "team-fact", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
}}, nil
|
||||
},
|
||||
}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, adminRootResolver())
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/memories/export", nil)
|
||||
h.Export(c)
|
||||
|
||||
var entries []memoryExportEntry
|
||||
_ = json.Unmarshal(w.Body.Bytes(), &entries)
|
||||
if len(entries) != 1 {
|
||||
t.Errorf("dedup failed; got %d entries, want 1", len(entries))
|
||||
}
|
||||
}
|
||||
|
||||
func TestExport_SkipsWorkspaceWhenResolverFails(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{}
|
||||
resolver := &stubAdminResolver{err: errors.New("resolver dead")}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, resolver)
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/memories/export", nil)
|
||||
h.Export(c)
|
||||
|
||||
// Should still 200 with empty memories — failure is per-workspace.
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("code = %d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestExport_SkipsWorkspaceWhenPluginSearchFails(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, _ contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
return nil, errors.New("plugin dead")
|
||||
},
|
||||
}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, adminRootResolver())
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/memories/export", nil)
|
||||
h.Export(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("code = %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExport_WorkspacesQueryFails(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnError(errors.New("db dead"))
|
||||
|
||||
plugin := &stubAdminPlugin{}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, adminRootResolver())
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/memories/export", nil)
|
||||
h.Export(c)
|
||||
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Errorf("code = %d, want 500", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExport_EmptyReadable(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
resolver := &stubAdminResolver{readable: []namespace.Namespace{}}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(&stubAdminPlugin{}, resolver)
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/memories/export", nil)
|
||||
h.Export(c)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("code = %d", w.Code)
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "[]") {
|
||||
t.Errorf("expected empty array, got %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestExport_RedactsSecretsInPluginPath(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, _ contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
return &contract.SearchResponse{Memories: []contract.Memory{
|
||||
{ID: "mem-1", Namespace: "workspace:root-1", Content: "API_KEY=sk-1234567890abcdefghijk0123456789", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
}}, nil
|
||||
},
|
||||
}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, adminRootResolver())
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/memories/export", nil)
|
||||
h.Export(c)
|
||||
|
||||
if strings.Contains(w.Body.String(), "sk-1234567890abcdef") {
|
||||
t.Errorf("export leaked unredacted secret: %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// --- Import via plugin ---
|
||||
|
||||
func TestImport_RoutesThroughPluginWhenCutoverActive(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WithArgs("alpha").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("root-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, adminRootResolver())
|
||||
|
||||
body, _ := json.Marshal([]memoryImportEntry{
|
||||
{Content: "fact x", Scope: "LOCAL", WorkspaceName: "alpha"},
|
||||
})
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("POST", "/admin/memories/import", bytes.NewReader(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
h.Import(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("code = %d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
if len(plugin.commits) != 1 {
|
||||
t.Errorf("commits = %d, want 1", len(plugin.commits))
|
||||
}
|
||||
if plugin.commits[0].NS != "workspace:root-1" {
|
||||
t.Errorf("ns = %q", plugin.commits[0].NS)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImport_SkipsUnknownWorkspace(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WithArgs("ghost").
|
||||
WillReturnError(errors.New("no rows"))
|
||||
|
||||
plugin := &stubAdminPlugin{}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, adminRootResolver())
|
||||
|
||||
body, _ := json.Marshal([]memoryImportEntry{
|
||||
{Content: "x", Scope: "LOCAL", WorkspaceName: "ghost"},
|
||||
})
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("POST", "/admin/memories/import", bytes.NewReader(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
h.Import(c)
|
||||
|
||||
var resp map[string]int
|
||||
_ = json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["skipped"] != 1 || resp["imported"] != 0 {
|
||||
t.Errorf("resp = %v", resp)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImport_PluginUpsertNamespaceError(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("root-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{
|
||||
upsertFn: func(_ context.Context, _ string, _ contract.NamespaceUpsert) (*contract.Namespace, error) {
|
||||
return nil, errors.New("upsert dead")
|
||||
},
|
||||
}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, adminRootResolver())
|
||||
|
||||
body, _ := json.Marshal([]memoryImportEntry{
|
||||
{Content: "x", Scope: "LOCAL", WorkspaceName: "alpha"},
|
||||
})
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("POST", "/admin/memories/import", bytes.NewReader(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
h.Import(c)
|
||||
|
||||
var resp map[string]int
|
||||
_ = json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["errors"] != 1 || resp["imported"] != 0 {
|
||||
t.Errorf("resp = %v", resp)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImport_PluginCommitError(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("root-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{
|
||||
commitFn: func(_ context.Context, _ string, _ contract.MemoryWrite) (*contract.MemoryWriteResponse, error) {
|
||||
return nil, errors.New("commit dead")
|
||||
},
|
||||
}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, adminRootResolver())
|
||||
|
||||
body, _ := json.Marshal([]memoryImportEntry{
|
||||
{Content: "x", Scope: "LOCAL", WorkspaceName: "alpha"},
|
||||
})
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("POST", "/admin/memories/import", bytes.NewReader(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
h.Import(c)
|
||||
|
||||
var resp map[string]int
|
||||
_ = json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["errors"] != 1 {
|
||||
t.Errorf("resp = %v", resp)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImport_RedactsBeforePluginSeesContent(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("root-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, adminRootResolver())
|
||||
|
||||
body, _ := json.Marshal([]memoryImportEntry{
|
||||
{Content: "API_KEY=sk-1234567890abcdefghijk0123456789", Scope: "LOCAL", WorkspaceName: "alpha"},
|
||||
})
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("POST", "/admin/memories/import", bytes.NewReader(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
h.Import(c)
|
||||
|
||||
if len(plugin.commits) != 1 {
|
||||
t.Fatalf("commits = %d", len(plugin.commits))
|
||||
}
|
||||
if strings.Contains(plugin.commits[0].Content, "sk-1234567890") {
|
||||
t.Errorf("plugin received unredacted content: %q", plugin.commits[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImport_SkipsUnknownScope(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("root-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, adminRootResolver())
|
||||
|
||||
body, _ := json.Marshal([]memoryImportEntry{
|
||||
{Content: "x", Scope: "WEIRD", WorkspaceName: "alpha"},
|
||||
})
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("POST", "/admin/memories/import", bytes.NewReader(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
h.Import(c)
|
||||
|
||||
var resp map[string]int
|
||||
_ = json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["skipped"] != 1 {
|
||||
t.Errorf("resp = %v", resp)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImport_SkipsWhenResolverErrors(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("root-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{}
|
||||
resolver := &stubAdminResolver{err: errors.New("dead")}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, resolver)
|
||||
|
||||
body, _ := json.Marshal([]memoryImportEntry{
|
||||
{Content: "x", Scope: "LOCAL", WorkspaceName: "alpha"},
|
||||
})
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("POST", "/admin/memories/import", bytes.NewReader(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
h.Import(c)
|
||||
|
||||
var resp map[string]int
|
||||
_ = json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["skipped"] != 1 {
|
||||
t.Errorf("resp = %v", resp)
|
||||
}
|
||||
}
|
||||
|
||||
// TestExport_BatchesPluginCallsByRoot pins the I3 fix: previously the
|
||||
// export ran one resolver + one plugin search per workspace (N+1 in
|
||||
// both); now it groups by root and runs one resolver + one plugin
|
||||
// search per UNIQUE root.
|
||||
//
|
||||
// Setup: 3 workspaces under 1 root → 1 resolver call + 1 plugin call
|
||||
// (was: 3 resolver + 3 plugin in the old code). The plugin search
|
||||
// receives 5 namespaces: each member's workspace:<id> + team:root-1
|
||||
// + org:root-1. (Children's workspace:<id> namespaces must be
|
||||
// included or admin export silently drops their private memories.)
|
||||
func TestExport_BatchesPluginCallsByRoot(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("root-1", "alpha", "root-1").
|
||||
AddRow("child-1", "alpha-child", "root-1").
|
||||
AddRow("child-2", "alpha-grandchild", "root-1"))
|
||||
|
||||
pluginSearchCount := 0
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
pluginSearchCount++
|
||||
if len(body.Namespaces) != 5 {
|
||||
t.Errorf("plugin search call %d: namespaces len = %d, want 5 (3 workspace + team + org); got %v", pluginSearchCount, len(body.Namespaces), body.Namespaces)
|
||||
}
|
||||
return &contract.SearchResponse{}, nil
|
||||
},
|
||||
}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, adminRootResolver())
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/memories/export", nil)
|
||||
h.Export(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("code = %d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
if pluginSearchCount != 1 {
|
||||
t.Errorf("plugin search called %d times, want 1 (was 3 with the old N+1 code)", pluginSearchCount)
|
||||
}
|
||||
}
|
||||
|
||||
// perWorkspaceResolver mimics the real resolver: ReadableNamespaces
|
||||
// returns the SPECIFIC workspace's view (workspace:<that ID> +
|
||||
// team:<root> + org:<root>), not a constant set. The legacy
|
||||
// stubAdminResolver hides the I3 silent-drop bug by ignoring its
|
||||
// workspace-id argument.
|
||||
type perWorkspaceResolver map[string][]namespace.Namespace
|
||||
|
||||
func (r perWorkspaceResolver) ReadableNamespaces(_ context.Context, ws string) ([]namespace.Namespace, error) {
|
||||
v, ok := r[ws]
|
||||
if !ok {
|
||||
return nil, errors.New("perWorkspaceResolver: unknown ws " + ws)
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
func (r perWorkspaceResolver) WritableNamespaces(_ context.Context, ws string) ([]namespace.Namespace, error) {
|
||||
return r.ReadableNamespaces(nil, ws)
|
||||
}
|
||||
|
||||
// TestExport_IncludesEveryMembersPrivateNamespace pins the I3 follow-up
|
||||
// fix: when a root group has multiple members, the export must surface
|
||||
// each member's workspace:<id> namespace, not just the root's. Before
|
||||
// the fix, calling ReadableNamespaces(rootID) returned only
|
||||
// workspace:rootID + team:rootID + org:rootID — every child workspace's
|
||||
// private memories were silently dropped from admin export.
|
||||
func TestExport_IncludesEveryMembersPrivateNamespace(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("root-1", "alpha", "root-1").
|
||||
AddRow("child-1", "alpha-child", "root-1").
|
||||
AddRow("child-2", "alpha-grandchild", "root-1"))
|
||||
|
||||
resolver := perWorkspaceResolver{
|
||||
"root-1": {
|
||||
{Name: "workspace:root-1", Kind: contract.NamespaceKindWorkspace, Writable: true},
|
||||
{Name: "team:root-1", Kind: contract.NamespaceKindTeam, Writable: true},
|
||||
{Name: "org:root-1", Kind: contract.NamespaceKindOrg, Writable: true},
|
||||
},
|
||||
"child-1": {
|
||||
{Name: "workspace:child-1", Kind: contract.NamespaceKindWorkspace, Writable: true},
|
||||
{Name: "team:root-1", Kind: contract.NamespaceKindTeam, Writable: true},
|
||||
{Name: "org:root-1", Kind: contract.NamespaceKindOrg, Writable: true},
|
||||
},
|
||||
"child-2": {
|
||||
{Name: "workspace:child-2", Kind: contract.NamespaceKindWorkspace, Writable: true},
|
||||
{Name: "team:root-1", Kind: contract.NamespaceKindTeam, Writable: true},
|
||||
{Name: "org:root-1", Kind: contract.NamespaceKindOrg, Writable: true},
|
||||
},
|
||||
}
|
||||
|
||||
var passedNamespaces []string
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
passedNamespaces = append(passedNamespaces, body.Namespaces...)
|
||||
return &contract.SearchResponse{Memories: []contract.Memory{
|
||||
{ID: "m-root", Namespace: "workspace:root-1", Content: "root private", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
{ID: "m-child1", Namespace: "workspace:child-1", Content: "child-1 private", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
{ID: "m-child2", Namespace: "workspace:child-2", Content: "child-2 private", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
{ID: "m-team", Namespace: "team:root-1", Content: "shared team", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
}}, nil
|
||||
},
|
||||
}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, resolver)
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/memories/export", nil)
|
||||
h.Export(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("code = %d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
// Every member's private namespace must reach the plugin search.
|
||||
want := []string{"workspace:root-1", "workspace:child-1", "workspace:child-2", "team:root-1", "org:root-1"}
|
||||
got := make(map[string]bool, len(passedNamespaces))
|
||||
for _, ns := range passedNamespaces {
|
||||
got[ns] = true
|
||||
}
|
||||
for _, w := range want {
|
||||
if !got[w] {
|
||||
t.Errorf("plugin search missing namespace %q (got %v)", w, passedNamespaces)
|
||||
}
|
||||
}
|
||||
if len(passedNamespaces) != 5 {
|
||||
t.Errorf("plugin search namespace count = %d, want 5 (3 workspace + team + org)", len(passedNamespaces))
|
||||
}
|
||||
|
||||
// Children's private memories must appear in the export, attributed
|
||||
// to the right workspace_name.
|
||||
var entries []memoryExportEntry
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &entries); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
byID := map[string]memoryExportEntry{}
|
||||
for _, e := range entries {
|
||||
byID[e.ID] = e
|
||||
}
|
||||
for _, exp := range []struct{ id, ns, owner string }{
|
||||
{"m-root", "workspace:root-1", "alpha"},
|
||||
{"m-child1", "workspace:child-1", "alpha-child"},
|
||||
{"m-child2", "workspace:child-2", "alpha-grandchild"},
|
||||
} {
|
||||
e, ok := byID[exp.id]
|
||||
if !ok {
|
||||
t.Errorf("export missing memory %s — children's private memories silently dropped", exp.id)
|
||||
continue
|
||||
}
|
||||
if e.Namespace != exp.ns {
|
||||
t.Errorf("memory %s namespace = %q, want %q", exp.id, e.Namespace, exp.ns)
|
||||
}
|
||||
if e.WorkspaceName != exp.owner {
|
||||
t.Errorf("memory %s owner = %q, want %q", exp.id, e.WorkspaceName, exp.owner)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestPickOwnerForNamespace covers the namespace→workspace_name
|
||||
// attribution helper introduced in I3.
|
||||
func TestPickOwnerForNamespace(t *testing.T) {
|
||||
members := []workspaceRow{
|
||||
{ID: "root-1", Name: "alpha", RootID: "root-1"},
|
||||
{ID: "child-1", Name: "alpha-child", RootID: "root-1"},
|
||||
}
|
||||
cases := []struct {
|
||||
name string
|
||||
ns string
|
||||
want string
|
||||
}{
|
||||
{"workspace ns matches member id", "workspace:child-1", "alpha-child"},
|
||||
{"workspace ns no match → first", "workspace:foreign", "alpha"},
|
||||
{"team ns → first member of root group", "team:root-1", "alpha"},
|
||||
{"org ns → first member", "org:root-1", "alpha"},
|
||||
{"custom ns → first member", "custom:foo", "alpha"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := pickOwnerForNamespace(tc.ns, members); got != tc.want {
|
||||
t.Errorf("pickOwnerForNamespace(%q) = %q, want %q", tc.ns, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
if got := pickOwnerForNamespace("workspace:abc", nil); got != "" {
|
||||
t.Errorf("empty members must return \"\", got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Helper functions ---
|
||||
|
||||
func TestLegacyScopeFromNamespace(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{"workspace:abc", "LOCAL"},
|
||||
{"team:abc", "TEAM"},
|
||||
{"org:abc", "GLOBAL"},
|
||||
{"custom:abc", ""},
|
||||
{"", ""},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
if got := legacyScopeFromNamespace(tc.in); got != tc.want {
|
||||
t.Errorf("legacyScopeFromNamespace(%q) = %q, want %q", tc.in, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNamespaceKindFromLegacyScope(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want contract.NamespaceKind
|
||||
}{
|
||||
{"LOCAL", contract.NamespaceKindWorkspace},
|
||||
{"local", contract.NamespaceKindWorkspace},
|
||||
{"TEAM", contract.NamespaceKindTeam},
|
||||
{"GLOBAL", contract.NamespaceKindOrg},
|
||||
{"weird", contract.NamespaceKindWorkspace},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
if got := namespaceKindFromLegacyScope(tc.in); got != tc.want {
|
||||
t.Errorf("namespaceKindFromLegacyScope(%q) = %q, want %q", tc.in, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSkipImport_ErrorMessage(t *testing.T) {
|
||||
e := &skipImport{reason: "unknown scope: WEIRD"}
|
||||
if !strings.Contains(e.Error(), "unknown scope: WEIRD") {
|
||||
t.Errorf("Error() = %q", e.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// --- Confirm legacy paths still work when env is unset ---
|
||||
|
||||
func TestExport_LegacyPathWhenCutoverInactive(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT am.id, am.content, am.scope, am.namespace").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "content", "scope", "namespace", "created_at", "workspace_name"}))
|
||||
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(&stubAdminPlugin{}, adminRootResolver())
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/memories/export", nil)
|
||||
h.Export(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("code = %d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("legacy SQL path not exercised: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -30,6 +30,7 @@ package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
@@ -102,14 +103,45 @@ const chatUploadDir = "/workspace/.molecule/chat-uploads"
|
||||
// of bug as the original SaaS provision drift fixed in #2366; this
|
||||
// extraction prevents that class on the consumer side.
|
||||
func resolveWorkspaceForwardCreds(c *gin.Context, ctx context.Context, workspaceID, op string) (wsURL, secret string, ok bool) {
|
||||
var deliveryMode sql.NullString
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT COALESCE(url, '') FROM workspaces WHERE id = $1`, workspaceID,
|
||||
).Scan(&wsURL); err != nil {
|
||||
`SELECT COALESCE(url, ''), delivery_mode FROM workspaces WHERE id = $1`, workspaceID,
|
||||
).Scan(&wsURL, &deliveryMode); err != nil {
|
||||
log.Printf("chat_files %s: workspace lookup failed for %s: %v", op, workspaceID, err)
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return "", "", false
|
||||
}
|
||||
if wsURL == "" {
|
||||
// Distinguish the two empty-URL classes so the user sees an
|
||||
// actionable error rather than a misleading "not registered yet"
|
||||
// (which implies waiting will help):
|
||||
//
|
||||
// push-mode → URL just isn't on the row yet (workspace
|
||||
// restart in progress, or first /registry/register hasn't
|
||||
// landed). 503 + "not registered yet" is correct — retry
|
||||
// after the next heartbeat (~30s) will likely succeed.
|
||||
//
|
||||
// anything else (poll-mode, NULL, empty string) → URL is
|
||||
// structurally absent. The platform never dispatches to a
|
||||
// non-push workspace, so chat upload (which is HTTP-forward
|
||||
// by design) cannot proceed by waiting. Returning 503 here
|
||||
// would loop the canvas client forever. 422 signals "this
|
||||
// request can't succeed against THIS workspace's
|
||||
// configuration" — the only fix is to re-register the
|
||||
// workspace with a publicly-reachable URL.
|
||||
//
|
||||
// Live-observed 2026-05-04: external runtime workspaces (e.g.
|
||||
// molecule-sdk-python on a mac laptop) register with
|
||||
// delivery_mode=NULL. The narrow "poll" check missed them; the
|
||||
// invariant we actually want is "URL empty + not-push = no
|
||||
// dispatch path, ever".
|
||||
if !deliveryMode.Valid || deliveryMode.String != "push" {
|
||||
c.JSON(http.StatusUnprocessableEntity, gin.H{
|
||||
"error": "workspace has no callback URL — chat " + op + " requires push-mode + public URL",
|
||||
"detail": "This workspace registered without a publicly-reachable URL (delivery_mode is not 'push'). The platform cannot dispatch chat uploads to it. Re-register the workspace with a public URL in push mode (e.g. via ngrok / Cloudflare tunnel) to enable chat file " + op + ".",
|
||||
})
|
||||
return "", "", false
|
||||
}
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace url not registered yet"})
|
||||
return "", "", false
|
||||
}
|
||||
|
||||
@@ -58,16 +58,38 @@ func uploadFixture(t *testing.T) (*bytes.Buffer, string) {
|
||||
return &buf, mw.FormDataContentType()
|
||||
}
|
||||
|
||||
// expectURL stubs the SELECT that resolves the workspace's url.
|
||||
// expectURL stubs the SELECT that resolves the workspace's url +
|
||||
// delivery_mode. Defaults delivery_mode to "push" — most tests don't
|
||||
// care about the mode and just want a URL to forward to. Use
|
||||
// expectURLAndMode when the test needs a specific mode (e.g. the
|
||||
// poll-mode 422 path).
|
||||
func expectURL(mock sqlmock.Sqlmock, workspaceID, url string) {
|
||||
mock.ExpectQuery(`SELECT COALESCE\(url, ''\) FROM workspaces WHERE id = \$1`).
|
||||
expectURLAndMode(mock, workspaceID, url, "push")
|
||||
}
|
||||
|
||||
// expectURLAndMode is the explicit form for tests that need to
|
||||
// exercise the delivery_mode branch (e.g. poll-mode workspaces get
|
||||
// a 422 instead of a 503 when URL is empty — the platform can't
|
||||
// dispatch to a non-push workspace at all).
|
||||
func expectURLAndMode(mock sqlmock.Sqlmock, workspaceID, url, mode string) {
|
||||
mock.ExpectQuery(`SELECT COALESCE\(url, ''\), delivery_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(workspaceID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow(url))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"url", "delivery_mode"}).AddRow(url, mode))
|
||||
}
|
||||
|
||||
// expectURLNullMode is the production-observed shape: external runtime
|
||||
// workspaces (molecule-sdk-python on user infra) register with
|
||||
// delivery_mode = NULL, not "poll". Caught 2026-05-04 — the narrow
|
||||
// "poll" check missed three of three real workspaces in user reports.
|
||||
func expectURLNullMode(mock sqlmock.Sqlmock, workspaceID, url string) {
|
||||
mock.ExpectQuery(`SELECT COALESCE\(url, ''\), delivery_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(workspaceID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"url", "delivery_mode"}).AddRow(url, nil))
|
||||
}
|
||||
|
||||
// expectURLMissing stubs the SELECT to return sql.ErrNoRows.
|
||||
func expectURLMissing(mock sqlmock.Sqlmock, workspaceID string) {
|
||||
mock.ExpectQuery(`SELECT COALESCE\(url, ''\) FROM workspaces WHERE id = \$1`).
|
||||
mock.ExpectQuery(`SELECT COALESCE\(url, ''\), delivery_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(workspaceID).
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
}
|
||||
@@ -201,9 +223,13 @@ func TestChatUpload_NoURL(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
// Workspace registered but URL hasn't been reported yet (mid-boot).
|
||||
// Workspace registered (push-mode) but URL hasn't been reported
|
||||
// yet (mid-boot). 503 + "not registered yet" is the right surface — the
|
||||
// canvas client can retry after the next heartbeat picks up the URL.
|
||||
// Push mode is the only branch that produces 503; everything else
|
||||
// (poll, NULL, empty) gets 422 because no amount of waiting helps.
|
||||
wsID := "00000000-0000-0000-0000-000000000042"
|
||||
expectURL(mock, wsID, "")
|
||||
expectURLAndMode(mock, wsID, "", "push")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
body, ct := uploadFixture(t)
|
||||
@@ -211,7 +237,65 @@ func TestChatUpload_NoURL(t *testing.T) {
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Errorf("expected 503 when workspace url empty, got %d: %s", w.Code, w.Body.String())
|
||||
t.Errorf("expected 503 when workspace url empty (push mode), got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "not registered yet") {
|
||||
t.Errorf("expected transient-state error message, got: %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestChatUpload_PollModeEmptyURL pins the 422 distinguisher: a
|
||||
// poll-mode workspace has no URL by design, so chat upload (which is
|
||||
// HTTP-forward to the workspace) cannot succeed by retrying. Returning
|
||||
// 503 here would loop the canvas client forever; 422 + an actionable
|
||||
// message tells the user what to do.
|
||||
func TestChatUpload_PollModeEmptyURL(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "00000000-0000-0000-0000-000000000099"
|
||||
expectURLAndMode(mock, wsID, "", "poll")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusUnprocessableEntity {
|
||||
t.Fatalf("expected 422 for poll-mode upload, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "push") {
|
||||
t.Errorf("expected error to suggest push mode, got: %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestChatUpload_NullModeEmptyURL — production-observed 2026-05-04:
|
||||
// external-runtime workspaces (molecule-sdk-python on user infra)
|
||||
// register with delivery_mode = NULL, not "poll". The earlier narrow
|
||||
// poll-only check fell through to the misleading 503. The fix is the
|
||||
// inverse-of-push test: anything not exactly "push" with empty URL
|
||||
// can't dispatch and gets the actionable 422.
|
||||
//
|
||||
// Three of three external workspaces in the user's tenant had this
|
||||
// shape (home hermes / runner mac mini / mac laptop, all
|
||||
// runtime=external + url='' + delivery_mode=NULL).
|
||||
func TestChatUpload_NullModeEmptyURL(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "30ba7f0b-b303-4a20-aefe-3a4a675b8aa4" // user's "mac laptop"
|
||||
expectURLNullMode(mock, wsID, "")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusUnprocessableEntity {
|
||||
t.Fatalf("expected 422 for null-delivery-mode upload, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "callback URL") {
|
||||
t.Errorf("expected error to mention callback URL, got: %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -83,7 +83,20 @@ curl -fsS -X POST "{{PLATFORM_URL}}/registry/register" \
|
||||
const externalChannelTemplate = `# Claude Code channel — bridges this workspace's A2A traffic into your
|
||||
# Claude Code session. No tunnel/public URL needed (polling-based).
|
||||
#
|
||||
# 1. Save this token + workspace_id, then create ~/.claude/channels/molecule/.env:
|
||||
# Prereq: Bun installed (channel plugins are Bun scripts).
|
||||
# bun --version # must print a version number
|
||||
#
|
||||
# 1. Inside Claude Code, install the channel plugin from its GitHub repo.
|
||||
# The plugin is NOT on Anthropic's default allowlist, so a one-time
|
||||
# marketplace-add is needed before install:
|
||||
#
|
||||
# /plugin marketplace add Molecule-AI/molecule-mcp-claude-channel
|
||||
# /plugin install molecule@molecule-mcp-claude-channel
|
||||
#
|
||||
# Then either run /reload-plugins or restart Claude Code so the
|
||||
# plugin is registered.
|
||||
#
|
||||
# 2. Create the per-watched-workspace config file:
|
||||
mkdir -p ~/.claude/channels/molecule
|
||||
cat > ~/.claude/channels/molecule/.env <<'EOF'
|
||||
MOLECULE_PLATFORM_URL={{PLATFORM_URL}}
|
||||
@@ -92,13 +105,32 @@ MOLECULE_WORKSPACE_TOKENS=<paste auth_token from create response>
|
||||
EOF
|
||||
chmod 600 ~/.claude/channels/molecule/.env
|
||||
|
||||
# 2. Launch Claude Code with the channel enabled:
|
||||
claude --channels plugin:molecule@Molecule-AI/molecule-mcp-claude-channel
|
||||
# 3. Launch Claude Code with the channel enabled. Custom (non-Anthropic-
|
||||
# allowlisted) channels need the --dangerously-load-development-channels
|
||||
# flag to opt in — without it, you'll see "not on the approved channels
|
||||
# allowlist" on startup.
|
||||
claude --dangerously-load-development-channels \
|
||||
--channels plugin:molecule@molecule-mcp-claude-channel
|
||||
|
||||
# You should see on stderr:
|
||||
# molecule channel: connected — watching 1 workspace(s) at {{PLATFORM_URL}}
|
||||
#
|
||||
# Inbound A2A messages now surface as conversation turns. Claude's
|
||||
# replies route back via the reply_to_workspace MCP tool — no extra
|
||||
# wiring on your side.
|
||||
#
|
||||
# Common errors:
|
||||
# "plugin not installed" → Step 1 didn't run; run /plugin install
|
||||
# inside Claude Code, then /reload-plugins.
|
||||
# "not on approved channels allowlist" → Add --dangerously-load-development-channels
|
||||
# to the launch command (Step 3).
|
||||
# "config-missing" → ~/.claude/channels/molecule/.env not
|
||||
# readable; re-run Step 2 and check chmod.
|
||||
#
|
||||
# Team/Enterprise orgs: the --dangerously-load-development-channels flag is
|
||||
# blocked by managed settings. Your admin must set channelsEnabled=true and
|
||||
# add the plugin to allowedChannelPlugins in claude.ai admin settings.
|
||||
#
|
||||
# Multi-workspace: comma-separate IDs and tokens (same order). See
|
||||
# https://github.com/Molecule-AI/molecule-mcp-claude-channel for
|
||||
# pairing flow, push-mode upgrade, and v0.2 roadmap.
|
||||
@@ -186,3 +218,191 @@ async def main():
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
`
|
||||
|
||||
// externalHermesChannelTemplate — install snippet for operators whose
|
||||
// external agent IS a hermes-agent session. Routes the workspace's
|
||||
// A2A traffic into the running hermes gateway as platform messages
|
||||
// via the molecule-channel plugin.
|
||||
//
|
||||
// The plugin (Molecule-AI/hermes-channel-molecule) is a hermes
|
||||
// platform adapter that:
|
||||
// 1. Spawns ``python -m molecule_runtime.a2a_mcp_server`` as a
|
||||
// stdio MCP subprocess (separate from any hermes-side MCP
|
||||
// client connection).
|
||||
// 2. Long-polls ``wait_for_message`` on the platform's inbox.
|
||||
// 3. Dispatches each inbound activity into the hermes gateway as a
|
||||
// MessageEvent — same code path Telegram/Discord use.
|
||||
// 4. Outbound replies route via ``send_message_to_user`` (canvas
|
||||
// user) or ``delegate_task`` (peer agent) MCP tool calls.
|
||||
//
|
||||
// Result: hermes gets push parity with Claude Code / codex / openclaw —
|
||||
// canvas messages and peer A2A arrive as conversation turns mid-session,
|
||||
// not just at the start of a new ``hermes`` invocation.
|
||||
//
|
||||
// Plugin uses the upstream ``register_platform`` API shipped by
|
||||
// NousResearch/hermes-agent#17751 (merged 2026-04-30) and falls back
|
||||
// to the legacy ``register_platform_adapter`` shape on older forks —
|
||||
// same wheel installs cleanly on stock or patched hermes-agent.
|
||||
const externalHermesChannelTemplate = `# Hermes channel — bridges this workspace's A2A traffic into your
|
||||
# hermes-agent session. No tunnel/public URL needed (long-poll based,
|
||||
# same shape as the Claude Code channel).
|
||||
#
|
||||
# Prereq: a hermes-agent install on the target machine. Latest builds
|
||||
# (post #17751) ship the platform-plugin API natively; older ones are
|
||||
# also supported via the plugin's dual-mode fallback.
|
||||
#
|
||||
# 1. Install the runtime + plugin:
|
||||
pip install molecule-ai-workspace-runtime
|
||||
pip install 'git+https://github.com/Molecule-AI/hermes-channel-molecule.git'
|
||||
|
||||
# 2. Export the workspace credentials:
|
||||
export MOLECULE_WORKSPACE_ID={{WORKSPACE_ID}}
|
||||
export MOLECULE_PLATFORM_URL={{PLATFORM_URL}}
|
||||
export MOLECULE_WORKSPACE_TOKEN="<paste from create response>"
|
||||
export MOLECULE_ORG_ID="<your org id>"
|
||||
|
||||
# 3. Edit ~/.hermes/config.yaml — under your existing top-level
|
||||
# gateway: block, add a plugin_platforms entry:
|
||||
#
|
||||
# gateway:
|
||||
# # ...your existing gateway settings...
|
||||
# plugin_platforms:
|
||||
# molecule:
|
||||
# enabled: true
|
||||
#
|
||||
# If you don't yet have a gateway: block, create one with just
|
||||
# that plugin_platforms entry. Don't append blindly — YAML
|
||||
# rejects duplicate top-level keys, so a second gateway: block
|
||||
# will silently break hermes config loading.
|
||||
|
||||
# 4. Restart the hermes gateway:
|
||||
hermes gateway --replace
|
||||
|
||||
# Inbound canvas messages + peer A2A now arrive as MessageEvents —
|
||||
# same dispatch path Telegram/Discord/Slack use. The agent replies via
|
||||
# send_message_to_user / delegate_task MCP tool calls (already wired
|
||||
# by the plugin's molecule_runtime MCP subprocess).
|
||||
#
|
||||
# Source + issue tracker:
|
||||
# https://github.com/Molecule-AI/hermes-channel-molecule
|
||||
`
|
||||
|
||||
// externalCodexTemplate — for operators whose external agent is a
|
||||
// codex CLI (@openai/codex) session. Wires the molecule_runtime A2A
|
||||
// MCP server into codex's config.toml so the agent can call
|
||||
// list_peers / delegate_task / send_message_to_user / commit_memory.
|
||||
//
|
||||
// Push parity caveat: codex's MCP client doesn't forward arbitrary
|
||||
// notifications/* from configured MCP servers (verified by reading
|
||||
// codex-rs/codex-mcp/src/connection_manager.rs in openai/codex). So
|
||||
// this snippet gives outbound tools but NOT mid-turn push from
|
||||
// inbound A2A. For full push parity on a codex external, the
|
||||
// equivalent of hermes-channel-molecule would be needed — a bridge
|
||||
// daemon that long-polls the platform inbox and calls codex's
|
||||
// turn/steer RPC. Tracked separately; this snippet is the
|
||||
// outbound-tool-only first cut.
|
||||
const externalCodexTemplate = `# Codex MCP config — outbound tool path. For operators whose external
|
||||
# agent is a codex CLI (@openai/codex) session.
|
||||
#
|
||||
# This wires the molecule platform's A2A MCP server into codex so
|
||||
# the agent can call list_peers / delegate_task / send_message_to_user
|
||||
# / commit_memory. Inbound A2A (canvas messages, peer-initiated tasks)
|
||||
# does NOT push into the running codex turn yet — codex's MCP runtime
|
||||
# doesn't route arbitrary notifications/* from configured MCP servers.
|
||||
# For inbound delivery into a codex session, pair with the Python SDK
|
||||
# tab for now.
|
||||
|
||||
# 1. Install codex CLI + the workspace runtime wheel:
|
||||
npm install -g @openai/codex@^0.57
|
||||
pip install molecule-ai-workspace-runtime
|
||||
|
||||
# 2. Edit ~/.codex/config.toml and add the block below. {{PLATFORM_URL}}
|
||||
# and {{WORKSPACE_ID}} are stamped server-side; paste your auth
|
||||
# token for MOLECULE_WORKSPACE_TOKEN before saving.
|
||||
#
|
||||
# Don't append blindly — TOML rejects duplicate
|
||||
# [mcp_servers.molecule] tables, so re-running on an existing
|
||||
# config will break codex parsing. If [mcp_servers.molecule]
|
||||
# already exists (e.g. you set this up before), replace the
|
||||
# existing block instead of appending.
|
||||
|
||||
mkdir -p ~/.codex
|
||||
# (then open ~/.codex/config.toml in your editor and paste:)
|
||||
#
|
||||
# [mcp_servers.molecule]
|
||||
# command = "python3"
|
||||
# args = ["-m", "molecule_runtime.a2a_mcp_server"]
|
||||
# startup_timeout_sec = 30
|
||||
#
|
||||
# [mcp_servers.molecule.env]
|
||||
# WORKSPACE_ID = "{{WORKSPACE_ID}}"
|
||||
# PLATFORM_URL = "{{PLATFORM_URL}}"
|
||||
# MOLECULE_WORKSPACE_TOKEN = "<paste from create response>"
|
||||
# MOLECULE_ORG_ID = "<your org id>"
|
||||
|
||||
# 3. Run codex — the molecule tools are now available to the agent:
|
||||
codex
|
||||
`
|
||||
|
||||
// externalOpenClawTemplate — for operators whose external agent is an
|
||||
// openclaw session. Wires the molecule MCP server via openclaw's
|
||||
// `mcp set` config + starts the openclaw gateway on loopback.
|
||||
//
|
||||
// Like the codex tab, this is outbound-only. Full push parity on an
|
||||
// external openclaw would need a sessions.steer bridge daemon (the
|
||||
// equivalent of hermes-channel-molecule for openclaw). Tracked
|
||||
// separately; outbound tools is the first cut.
|
||||
const externalOpenClawTemplate = `# OpenClaw MCP config — outbound tool path. For operators whose
|
||||
# external agent is an openclaw session.
|
||||
#
|
||||
# This wires the molecule platform's A2A MCP server into openclaw's
|
||||
# gateway so the agent can call list_peers / delegate_task /
|
||||
# send_message_to_user / commit_memory. Inbound A2A push into a
|
||||
# running openclaw run is not wired here yet — the platform-side
|
||||
# openclaw template (template-openclaw) implements the full
|
||||
# sessions.steer push path; an external setup would need the same
|
||||
# bridge daemon the template uses. For inbound delivery on an
|
||||
# external machine today, pair with the Python SDK tab.
|
||||
|
||||
# 1. Install openclaw CLI + the workspace runtime wheel:
|
||||
npm install -g openclaw@latest
|
||||
pip install molecule-ai-workspace-runtime
|
||||
|
||||
# 2. Onboard openclaw against your model provider (one-time setup).
|
||||
# --non-interactive needs an explicit --provider + --model so it
|
||||
# doesn't prompt; pick what matches your API key. Skip step 2 if
|
||||
# you've already onboarded on this host.
|
||||
#
|
||||
# openclaw onboard --non-interactive \
|
||||
# --provider openai \
|
||||
# --model gpt-5
|
||||
|
||||
# 3. Wire the molecule MCP server. {{WORKSPACE_ID}} + {{PLATFORM_URL}}
|
||||
# are stamped server-side; paste the auth token before running.
|
||||
WORKSPACE_TOKEN="<paste from create response>"
|
||||
MOLECULE_ORG_ID="<your org id>"
|
||||
openclaw mcp set molecule "$(cat <<EOF
|
||||
{
|
||||
"command": "python3",
|
||||
"args": ["-m", "molecule_runtime.a2a_mcp_server"],
|
||||
"env": {
|
||||
"WORKSPACE_ID": "{{WORKSPACE_ID}}",
|
||||
"PLATFORM_URL": "{{PLATFORM_URL}}",
|
||||
"MOLECULE_WORKSPACE_TOKEN": "$WORKSPACE_TOKEN",
|
||||
"MOLECULE_ORG_ID": "$MOLECULE_ORG_ID"
|
||||
}
|
||||
}
|
||||
EOF
|
||||
)"
|
||||
|
||||
# 4. Start the openclaw gateway as a durable background process.
|
||||
# A bare '&' dies when the terminal closes; nohup + log file keeps
|
||||
# the gateway alive across logout. For systemd-managed hosts,
|
||||
# register a unit instead.
|
||||
nohup openclaw gateway --dev --port 18789 --bind loopback \
|
||||
> ~/.openclaw/gateway.log 2>&1 &
|
||||
disown
|
||||
|
||||
# 5. Run an agent turn — molecule tools are now available:
|
||||
openclaw agent --message "list my peers"
|
||||
`
|
||||
|
||||
@@ -8,8 +8,6 @@ import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -569,67 +567,6 @@ func TestProxyA2A_WorkspaceOffline(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- TestSharedContext ----------
|
||||
|
||||
func TestSharedContext(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
// Create a temp configs directory with a workspace config
|
||||
tmpDir := t.TempDir()
|
||||
wsDir := filepath.Join(tmpDir, "test-workspace")
|
||||
if err := os.MkdirAll(wsDir, 0755); err != nil {
|
||||
t.Fatalf("failed to create config dir: %v", err)
|
||||
}
|
||||
|
||||
// Write config.yaml with shared_context
|
||||
configYAML := "name: Test Workspace\nshared_context:\n - test.md\n"
|
||||
if err := os.WriteFile(filepath.Join(wsDir, "config.yaml"), []byte(configYAML), 0644); err != nil {
|
||||
t.Fatalf("failed to write config.yaml: %v", err)
|
||||
}
|
||||
|
||||
// Write the shared context file
|
||||
testContent := "# Shared Context\nThis is shared context content."
|
||||
if err := os.WriteFile(filepath.Join(wsDir, "test.md"), []byte(testContent), 0644); err != nil {
|
||||
t.Fatalf("failed to write test.md: %v", err)
|
||||
}
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
|
||||
// Mock DB returning workspace name that normalizes to "test-workspace"
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
WithArgs("ws-ctx").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Test Workspace"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-ctx"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-ctx/shared-context", nil)
|
||||
|
||||
handler.SharedContext(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var resp []map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to parse response: %v", err)
|
||||
}
|
||||
if len(resp) != 1 {
|
||||
t.Fatalf("expected 1 file, got %d", len(resp))
|
||||
}
|
||||
if resp[0]["path"] != "test.md" {
|
||||
t.Errorf("expected path 'test.md', got %v", resp[0]["path"])
|
||||
}
|
||||
if resp[0]["content"] != testContent {
|
||||
t.Errorf("expected content %q, got %v", testContent, resp[0]["content"])
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- TestHeartbeatHandler_TaskChanged ----------
|
||||
|
||||
func TestHeartbeatHandler_TaskChanged(t *testing.T) {
|
||||
@@ -1218,53 +1155,6 @@ func TestWorkspaceGet_CurrentTask(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSharedContext_NoSharedFiles(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
// Create a temp configs directory with a workspace config that has no shared_context
|
||||
tmpDir := t.TempDir()
|
||||
wsDir := filepath.Join(tmpDir, "empty-workspace")
|
||||
if err := os.MkdirAll(wsDir, 0755); err != nil {
|
||||
t.Fatalf("failed to create config dir: %v", err)
|
||||
}
|
||||
|
||||
// Write config.yaml without shared_context
|
||||
configYAML := "name: Empty Workspace\ndescription: No shared context\n"
|
||||
if err := os.WriteFile(filepath.Join(wsDir, "config.yaml"), []byte(configYAML), 0644); err != nil {
|
||||
t.Fatalf("failed to write config.yaml: %v", err)
|
||||
}
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil)
|
||||
|
||||
// Mock DB returning workspace name that normalizes to "empty-workspace"
|
||||
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
|
||||
WithArgs("ws-empty").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Empty Workspace"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-empty"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-empty/shared-context", nil)
|
||||
|
||||
handler.SharedContext(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var resp []interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to parse response: %v", err)
|
||||
}
|
||||
if len(resp) != 0 {
|
||||
t.Errorf("expected empty array, got %d items", len(resp))
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestActivityHandler_Report_SourceIDSpoofRejected verifies the #209 spoof
|
||||
// guard: a workspace authenticated for :id cannot inject activity rows with
|
||||
// source_id pointing at a different workspace. Bearer-auth middleware would
|
||||
|
||||
@@ -83,6 +83,12 @@ type mcpTool struct {
|
||||
type MCPHandler struct {
|
||||
database *sql.DB
|
||||
broadcaster *events.Broadcaster
|
||||
|
||||
// memv2 is the v2 memory plugin wiring (RFC #2728). nil-safe:
|
||||
// every v2 tool calls memoryV2Available() first and returns a
|
||||
// clear error rather than crashing when the operator hasn't set
|
||||
// MEMORY_PLUGIN_URL.
|
||||
memv2 *memoryV2Deps
|
||||
}
|
||||
|
||||
// NewMCPHandler wires the handler to db and broadcaster.
|
||||
@@ -217,6 +223,76 @@ var mcpAllTools = []mcpTool{
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// v2 memory tools (RFC #2728). Coexist with legacy commit_memory /
|
||||
// recall_memory; PR-6 aliases the legacy names. Surface here so
|
||||
// agents calling tools/list see them when MEMORY_PLUGIN_URL is
|
||||
// configured (handlers no-op cleanly when it isn't).
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
{
|
||||
Name: "commit_memory_v2",
|
||||
Description: "Save a memory to a namespace. Defaults to your own workspace. Use list_writable_namespaces to discover what else you can write to. Server applies SAFE-T1201 redaction before storage.",
|
||||
InputSchema: map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"content": map[string]interface{}{"type": "string"},
|
||||
"namespace": map[string]interface{}{"type": "string"},
|
||||
"kind": map[string]interface{}{"type": "string", "enum": []string{"fact", "summary", "checkpoint"}},
|
||||
"expires_at": map[string]interface{}{"type": "string", "description": "RFC3339"},
|
||||
"pin": map[string]interface{}{"type": "boolean"},
|
||||
},
|
||||
"required": []string{"content"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "search_memory",
|
||||
Description: "Search memories across one or more namespaces. Empty namespaces = search everything readable. Server applies ACL intersection before querying.",
|
||||
InputSchema: map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"query": map[string]interface{}{"type": "string"},
|
||||
"namespaces": map[string]interface{}{"type": "array", "items": map[string]interface{}{"type": "string"}},
|
||||
"kinds": map[string]interface{}{"type": "array", "items": map[string]interface{}{"type": "string", "enum": []string{"fact", "summary", "checkpoint"}}},
|
||||
"limit": map[string]interface{}{"type": "integer"},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "commit_summary",
|
||||
Description: "Save an end-of-session summary. Same shape as commit_memory_v2 but kind=summary and a 30-day default TTL.",
|
||||
InputSchema: map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"content": map[string]interface{}{"type": "string"},
|
||||
"namespace": map[string]interface{}{"type": "string"},
|
||||
"expires_at": map[string]interface{}{"type": "string"},
|
||||
},
|
||||
"required": []string{"content"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "list_writable_namespaces",
|
||||
Description: "List the namespaces this workspace can write to.",
|
||||
InputSchema: map[string]interface{}{"type": "object", "properties": map[string]interface{}{}},
|
||||
},
|
||||
{
|
||||
Name: "list_readable_namespaces",
|
||||
Description: "List the namespaces this workspace can read from.",
|
||||
InputSchema: map[string]interface{}{"type": "object", "properties": map[string]interface{}{}},
|
||||
},
|
||||
{
|
||||
Name: "forget_memory",
|
||||
Description: "Delete a memory by id. Only memories in namespaces you can write to can be forgotten.",
|
||||
InputSchema: map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"memory_id": map[string]interface{}{"type": "string"},
|
||||
"namespace": map[string]interface{}{"type": "string"},
|
||||
},
|
||||
"required": []string{"memory_id"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// mcpToolList returns the filtered tool list for this MCP bridge.
|
||||
@@ -363,6 +439,14 @@ func (h *MCPHandler) dispatchRPC(ctx context.Context, workspaceID string, req mc
|
||||
// Tool dispatch
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// Dispatch is the public entry point external code (tests, future
|
||||
// out-of-package callers) uses to invoke a tool by name. Forwards
|
||||
// to the unexported dispatch so existing in-package call sites
|
||||
// stay unchanged.
|
||||
func (h *MCPHandler) Dispatch(ctx context.Context, workspaceID, toolName string, args map[string]interface{}) (string, error) {
|
||||
return h.dispatch(ctx, workspaceID, toolName, args)
|
||||
}
|
||||
|
||||
func (h *MCPHandler) dispatch(ctx context.Context, workspaceID, toolName string, args map[string]interface{}) (string, error) {
|
||||
switch toolName {
|
||||
case "list_peers":
|
||||
@@ -381,6 +465,22 @@ func (h *MCPHandler) dispatch(ctx context.Context, workspaceID, toolName string,
|
||||
return h.toolCommitMemory(ctx, workspaceID, args)
|
||||
case "recall_memory":
|
||||
return h.toolRecallMemory(ctx, workspaceID, args)
|
||||
|
||||
// v2 memory tools (RFC #2728). PR-6 will alias the legacy names to
|
||||
// these; until then they are independent surfaces.
|
||||
case "commit_memory_v2":
|
||||
return h.toolCommitMemoryV2(ctx, workspaceID, args)
|
||||
case "search_memory":
|
||||
return h.toolSearchMemory(ctx, workspaceID, args)
|
||||
case "commit_summary":
|
||||
return h.toolCommitSummary(ctx, workspaceID, args)
|
||||
case "list_writable_namespaces":
|
||||
return h.toolListWritableNamespaces(ctx, workspaceID, args)
|
||||
case "list_readable_namespaces":
|
||||
return h.toolListReadableNamespaces(ctx, workspaceID, args)
|
||||
case "forget_memory":
|
||||
return h.toolForgetMemory(ctx, workspaceID, args)
|
||||
|
||||
default:
|
||||
return "", fmt.Errorf("unknown tool: %s", toolName)
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user