forked from molecule-ai/molecule-core
Compare commits
367 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e7375348e2 | |||
| 81ee0cbd55 | |||
| dca442e87a | |||
| bae34039e2 | |||
| 3d8a0a58fa | |||
| 91766e68e7 | |||
| 77882c920e | |||
| 0064f02c00 | |||
| a15972066b | |||
| 40e09508b6 | |||
| 18edf88d59 | |||
| 3ca2f40e16 | |||
| c275716005 | |||
| 093e5038d2 | |||
| 56a5427709 | |||
| 955755ce1e | |||
| 5cca462843 | |||
| 82cc331517 | |||
| d81319fd6b | |||
| b54968878a | |||
| 47617a93ef | |||
| 4f64c4366f | |||
| 7a19724194 | |||
| 6f0e914521 | |||
| e4452a2a88 | |||
| fe92194584 | |||
| f6a48d593e | |||
| 78ab8e97c6 | |||
| 46daae1ffb | |||
| 1181699482 | |||
| 0b979aed78 | |||
| 88b156a3bc | |||
| 8838f99ed3 | |||
| 9bbf32b526 | |||
| 885eff2350 | |||
| 82beb98fff | |||
| afc01d6995 | |||
| e6eda38318 | |||
| 1282b6f3d6 | |||
| d50d6f4209 | |||
| ff21c26835 | |||
| 15e1ea36de | |||
| 46bc63e373 | |||
| 09e99a09c6 | |||
| 645d687b0a | |||
| b39dc62de6 | |||
| 103ac09aeb | |||
| 59f0a449bd | |||
| c85fac4663 | |||
| 2297c083c8 | |||
| 0fec3d6fe4 | |||
| 050aa33fc1 | |||
| 4343fffcff | |||
| 2d8c45989a | |||
| 9abc9a0487 | |||
| e5a3b5282b | |||
| f96bb9f860 | |||
| f80e054a95 | |||
| c61a6ff9bd | |||
| e9e11213fc | |||
| dbd086c7ad | |||
| ea206043d8 | |||
| a3a496bced | |||
| 94937359d7 | |||
| e6be3c0df0 | |||
| 2588ab27d5 | |||
| bdba75ca43 | |||
| 63ef3b128c | |||
| 06bebc1b35 | |||
| d294f15c88 | |||
| 0a87dec50e | |||
| 3ba924d174 | |||
| 0608e15ab3 | |||
| 141ecc1c16 | |||
| b8fdbd9fab | |||
| de353a5933 | |||
| c636022d2f | |||
| e1496936e9 | |||
| 0b809cfa62 | |||
| 6d23611620 | |||
| 092724b6d7 | |||
| 2e8892ebc4 | |||
| d55360c5d8 | |||
| 517bd0efc5 | |||
| 1a1285171c | |||
| 89a6f27478 | |||
| 258c6bea44 | |||
| 364c70fc71 | |||
| c06c4c0f56 | |||
| b97a346fbf | |||
| 645c1862c4 | |||
| 59902bce83 | |||
| 6dbc36d820 | |||
| 72f0079c10 | |||
| 08d082d466 | |||
| 661eec2659 | |||
| 1a18e9398a | |||
| e6161b15a1 | |||
| aacaba024c | |||
| b9311134cf | |||
| d012a803e4 | |||
| f46c471f9b | |||
| 0b2ea0a50f | |||
| e58e446444 | |||
| f2545fcb57 | |||
| 067ad83ce5 | |||
| 6e92fe0a08 | |||
| 5b70204b01 | |||
| 76c604fb4f | |||
| 3c16c27415 | |||
| 5ad41f63ce | |||
| 0070d0bd59 | |||
| 4e39609ae0 | |||
| bbc994f6c8 | |||
| cda93e3c52 | |||
| 0acdf3bb56 | |||
| f30b3d4476 | |||
| c901d52ee3 | |||
| 0a3ec53f34 | |||
| ed29ad0d2a | |||
| 0c51df989b | |||
| 05de9f5777 | |||
| f6ddcf66ab | |||
| 03d5f80cb6 | |||
| ace3c85708 | |||
| c4bb803329 | |||
| 6cca4c5708 | |||
| 210f6e066a | |||
| 7706db5a93 | |||
| 2a5669788c | |||
| d887ce8e96 | |||
| 98845c8f42 | |||
| ce7698efc3 | |||
| 695d286dba | |||
| c733454a56 | |||
| f035482e0a | |||
| 993f8c494e | |||
| 6445c0ad17 | |||
| a5c5139e3a | |||
| 665582b612 | |||
| aefb44aff2 | |||
| cf6061a6c3 | |||
| cc58e87393 | |||
| 563b31f2af | |||
| d061642cfc | |||
| b47d4ceb00 | |||
| d00c8be8c9 | |||
| b54ceb799f | |||
| 876c0bfcd4 | |||
| 427300f3a4 | |||
| 716589742c | |||
| 74c5e0d7a8 | |||
| 1cdcabecf2 | |||
| 169e284d57 | |||
| d2046c374d | |||
| 36e263a07d | |||
| 0e3544d7b8 | |||
| c68ec23d3c | |||
| 0f0df576f5 | |||
| c8b17ea1ad | |||
| 9dae0503ee | |||
| 630dd0dae7 | |||
| be44e54b77 | |||
| 24cb2a286f | |||
| 41d5f9558f | |||
| b5c7b349d8 | |||
| 3105e87cf7 | |||
| 9c7b14923d | |||
| 8516a8f9c6 | |||
| 235aca9908 | |||
| c03074424e | |||
| fc3b5fd385 | |||
| 6e0a0dba55 | |||
| 0af4012f79 | |||
| 46ca5aa6d3 | |||
| ef206b5be6 | |||
| 3689fd9d82 | |||
| 9b909c4459 | |||
| 281c84fcde | |||
| 6159429634 | |||
| c5aaca2bbe | |||
| ec39fecda2 | |||
| 046eccbb7c | |||
| 22314a6c91 | |||
| d45241cae7 | |||
| f13d2b2b7b | |||
| 3b34dfefbc | |||
| c06e2fec5e | |||
| dea306d267 | |||
| 998e13c4bd | |||
| 188db33794 | |||
| fdf1b5d76a | |||
| f57ebd40f1 | |||
| 17a0f49140 | |||
| 201f39a6d0 | |||
| eacc229e91 | |||
| 36fd658cc0 | |||
| 56a1b659b1 | |||
| 8efb2dae8d | |||
| 26fecb902f | |||
| 79496dcffe | |||
| 904cf31e2c | |||
| 319f85a4b4 | |||
| e081c8335f | |||
| 2507cc00a0 | |||
| e418d32582 | |||
| 8aba565df0 | |||
| c6cb82e1c0 | |||
| 9b061672a0 | |||
| 899a2231d6 | |||
| cdef8932ea | |||
| 830e4aa548 | |||
| 59dec57197 | |||
| 7cb8b476ad | |||
| 21313dcb07 | |||
| 6bd38c2333 | |||
| 70176e6c8f | |||
| 284511f02e | |||
| 4d1156cb8b | |||
| ae0db09857 | |||
| e336688278 | |||
| 7a6ccde7f2 | |||
| 12e39c7311 | |||
| a9391c5900 | |||
| a7ddfbc3b5 | |||
| 8488a188c2 | |||
| 97058d5392 | |||
| 233a912cbe | |||
| 677b4858ab | |||
| 30a569c742 | |||
| eef1969e30 | |||
| f3f5c4537b | |||
| 343e164f5f | |||
| 64822dac49 | |||
| 17760e10d2 | |||
| e403d74a3d | |||
| 264e726672 | |||
| 501a42d753 | |||
| 29368dd749 | |||
| 4ba12668f0 | |||
| ab6bcc030c | |||
| 6c065a02e6 | |||
| d9801d1e62 | |||
| bb52a1a365 | |||
| b9b0a46f2e | |||
| 3f8286ea47 | |||
| 49c3433a70 | |||
| e06ebaefdf | |||
| b5df2126b9 | |||
| 8e508a7a2f | |||
| 2742c3c837 | |||
| 747c12e582 | |||
| 344e3e8914 | |||
| a27cf8f39f | |||
| 28b4e38002 | |||
| 9f35788aee | |||
| 92a29bb37c | |||
| 26d5c5ba1f | |||
| 6ef562ee05 | |||
| d850ec7c8c | |||
| 9a7f61661b | |||
| c1f993ca36 | |||
| 08252b3cd7 | |||
| 5973c9bd63 | |||
| a495b86a06 | |||
| b5bde0399a | |||
| a81b0e1e3d | |||
| c140ad28ae | |||
| 706a388806 | |||
| 91a1d5377d | |||
| 3da2392f95 | |||
| ec6e47cbe3 | |||
| 68f18424f5 | |||
| 664bbd8899 | |||
| 0b83faa33c | |||
| db5d11ffca | |||
| 140fc5fb10 | |||
| e284168c47 | |||
| d5b00d6ac1 | |||
| ea8ff626a9 | |||
| 21ed74c76a | |||
| 4299475746 | |||
| c62d2ac50c | |||
| 856ff89973 | |||
| 360361a0ce | |||
| b8246d54a7 | |||
| b7291e006b | |||
| f21cb54ae4 | |||
| 2e1cef324b | |||
| 86d9cb8b55 | |||
| 82f73b1fa3 | |||
| 75885d6017 | |||
| b6d223cd0a | |||
| d5d8de946f | |||
| 88da3d523b | |||
| 16796431b9 | |||
| 3a6d2f179d | |||
| a603dc449f | |||
| 15b98c4916 | |||
| e3588d4934 | |||
| 0b1d4f294b | |||
| c8205b009a | |||
| d3b2e9e61d | |||
| c79cf1cfa9 | |||
| 66142c1eab | |||
| 5d34abd5b5 | |||
| 5806feadcc | |||
| 01696281b3 | |||
| 57b0b4b4d1 | |||
| f7b9feb34f | |||
| 83e3fe436f | |||
| ecb6ad3c2a | |||
| 142b8e9d5b | |||
| ca6fc55c8b | |||
| e22a56d351 | |||
| 4050999a15 | |||
| e30d870b0f | |||
| da17753dec | |||
| fcb2049f3f | |||
| 51603c7b0a | |||
| 2bff662e37 | |||
| 593f2bd2be | |||
| e8943dffd7 | |||
| e955597a98 | |||
| 055e447355 | |||
| 1557197289 | |||
| c02cb0e1b6 | |||
| e632a31347 | |||
| d1de330152 | |||
| 1c9cea980d | |||
| 1475637dca | |||
| 51e48a267a | |||
| 558a0631f9 | |||
| e779a4ae7b | |||
| 4a6095ee1a | |||
| 240d513ab8 | |||
| 34d467fe8a | |||
| 2dcacb8f6b | |||
| c2191684bf | |||
| 2d1b15ecbc | |||
| 9785f5ebb1 | |||
| 949b1b97a5 | |||
| b733cf46c3 | |||
| 9a0d440fb7 | |||
| 22326d6591 | |||
| d8210514c1 | |||
| bfa54e2ee7 | |||
| 9559118678 | |||
| f75599eba9 | |||
| 80c612d987 | |||
| 4742b6c3f4 | |||
| 499fed5080 | |||
| 59d65ba557 | |||
| 42cf4f444c | |||
| a57382e918 | |||
| aa6c42f042 | |||
| bdfa45572e | |||
| 480b0576f2 | |||
| 3623e975ec | |||
| f088090b27 | |||
| de01544d6b | |||
| 9d159f0a94 | |||
| a18d116606 | |||
| dd5c54dbaa | |||
| ef95628202 | |||
| 00e4766046 | |||
| 592f47694b |
+33
-1
@@ -95,7 +95,39 @@ if [ -n "$STAGED_GO" ]; then
|
||||
fi
|
||||
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# 5. Secrets: No tokens/keys in staged files
|
||||
# 5. Go: build check — catches bot-generated structurally-invalid Go (#1770)
|
||||
# ──────────────────────────────────────────────────────────
|
||||
#
|
||||
# Background: bot agents have produced syntactically-broken Go that the
|
||||
# patch tool happily applied (e.g. PR #1769 commit 66ea0b64 — function
|
||||
# declaration nested inside another function's body). Compilation failed,
|
||||
# staging Platform(Go) was red for hours. CI catches this AT PR-time but
|
||||
# by then the malformed commit is already shared.
|
||||
#
|
||||
# Pre-commit guard: when ANY .go file in workspace-server/ is staged, run
|
||||
# `go build ./...` from workspace-server. If it fails, reject the commit.
|
||||
# Cost: ~5-10s on a warm cache; acceptable for the class of bug it
|
||||
# catches. Skip when go isn't available (CI runners that need to bypass).
|
||||
|
||||
if [ -n "$STAGED_GO" ]; then
|
||||
if command -v go >/dev/null 2>&1; then
|
||||
if ! (cd workspace-server && go build ./... >/tmp/precommit-go-build.log 2>&1); then
|
||||
echo "❌ GO BUILD FAILED — staged Go changes don't compile (workspace-server/)."
|
||||
echo " Output:"
|
||||
sed 's/^/ /' /tmp/precommit-go-build.log | head -20
|
||||
echo " Fix the build error before committing. See #1770 for context."
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
else
|
||||
# Bots and CI runners may bypass when go isn't installed — surface a
|
||||
# warning so the absence is visible, but don't block. Humans hit this
|
||||
# only if they didn't run setup.sh.
|
||||
echo "⚠️ go not installed — skipping go-build pre-commit check (#1770)"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# 6. Secrets: No tokens/keys in staged files
|
||||
# ──────────────────────────────────────────────────────────
|
||||
|
||||
ALL_STAGED=$(git diff --cached --name-only --diff-filter=ACM || true)
|
||||
|
||||
@@ -155,6 +155,20 @@ jobs:
|
||||
fi
|
||||
|
||||
# Upstream is publish-workspace-server-image. Check E2E state.
|
||||
# The jq filter must defend against TWO empty cases that gh
|
||||
# CLI emits indistinguishably:
|
||||
# 1. gh exits non-zero (network blip, auth issue) → handled
|
||||
# by the `|| echo "none/none"` fallback below.
|
||||
# 2. gh exits zero but returns `[]` (no E2E run on this
|
||||
# main SHA — the common case for canvas-only / cmd-only
|
||||
# / sweep-only changes whose paths don't trigger E2E).
|
||||
# Without `(.[0] // {})`, jq sees `null` and emits
|
||||
# "null/none" — which the case statement below has no
|
||||
# branch for, so it falls into *) → exit 1.
|
||||
# Surfaced 2026-04-30 the first time the App-token chain
|
||||
# (#2389) actually fired auto-promote-on-e2e from a publish
|
||||
# upstream — every prior run was E2E-upstream which
|
||||
# short-circuits before this gate.
|
||||
RESULT=$(gh run list \
|
||||
--repo "$REPO" \
|
||||
--workflow e2e-staging-saas.yml \
|
||||
@@ -162,7 +176,7 @@ jobs:
|
||||
--commit "$SHA" \
|
||||
--limit 1 \
|
||||
--json status,conclusion \
|
||||
--jq '.[0] | "\(.status)/\(.conclusion // "none")"' \
|
||||
--jq '(.[0] // {}) | "\(.status // "none")/\(.conclusion // "none")"' \
|
||||
2>/dev/null || echo "none/none")
|
||||
|
||||
echo "E2E Staging SaaS for ${SHA:0:7}: $RESULT"
|
||||
@@ -236,6 +250,135 @@ jobs:
|
||||
echo " ok: $tag exists"
|
||||
done
|
||||
|
||||
- name: Ancestry check — refuse to promote :latest backwards
|
||||
# #2244: workflow_run completions arrive in arbitrary order. If
|
||||
# SHA-A and SHA-B both reach main within ~10 min and SHA-B's E2E
|
||||
# completes before SHA-A's, this workflow can fire for SHA-A
|
||||
# AFTER it already promoted SHA-B → :latest goes backwards. The
|
||||
# orphan-reconciler "next run corrects it" doesn't apply: there's
|
||||
# no auto-corrective re-promote, :latest stays wrong until the
|
||||
# next main push lands.
|
||||
#
|
||||
# Detection: read current :latest's `org.opencontainers.image.revision`
|
||||
# label (set by publish-workspace-server-image.yml at build time)
|
||||
# and ask the GitHub compare API whether the candidate SHA is
|
||||
# ahead-of / identical-to / behind / diverged-from current.
|
||||
# Hard-fail on `behind` and `diverged` per the approved design —
|
||||
# silent-bypass is the class we're moving away from. Workflow
|
||||
# goes red, oncall sees it, operator decides how to recover
|
||||
# (manual dispatch with the right SHA, force-promote, etc.).
|
||||
#
|
||||
# Manual dispatch skips this check — operator override semantics
|
||||
# match the gate-check step above.
|
||||
#
|
||||
# Backward-compat: when current :latest carries no revision
|
||||
# label (legacy image pre-publish-with-label), skip-with-warning.
|
||||
# All :latest images on main are post-label as of 2026-04-29, so
|
||||
# this branch will be dead within 90 days; remove then.
|
||||
if: steps.gate.outputs.proceed == 'true' && github.event_name != 'workflow_dispatch'
|
||||
id: ancestry
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO: ${{ github.repository }}
|
||||
TARGET_SHA: ${{ steps.sha.outputs.full }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Read the current :latest config and pull the revision label.
|
||||
# `crane config` returns the OCI image config blob (not the manifest);
|
||||
# labels live under `.config.Labels`. `// empty` makes jq return ""
|
||||
# rather than the literal "null" so the test below works.
|
||||
CURRENT_REVISION=$(crane config "${IMAGE_NAME}:latest" 2>/dev/null \
|
||||
| jq -r '.config.Labels["org.opencontainers.image.revision"] // empty' \
|
||||
|| true)
|
||||
|
||||
if [ -z "$CURRENT_REVISION" ]; then
|
||||
echo "decision=skip-no-label" >> "$GITHUB_OUTPUT"
|
||||
{
|
||||
echo "## ⚠ Ancestry check skipped — current :latest has no revision label"
|
||||
echo
|
||||
echo "Likely a legacy image built before \`org.opencontainers.image.revision\` was set."
|
||||
echo "Falling through to retag. After all \`:latest\` images are post-label (TODO 90 days), this branch is dead and should be removed."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "::warning::Current :latest carries no revision label — skipping ancestry check (legacy image)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$CURRENT_REVISION" = "$TARGET_SHA" ]; then
|
||||
echo "decision=identical" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice:::latest already at ${TARGET_SHA:0:7} — retag will be a no-op"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Ask GitHub which side of the merge graph TARGET_SHA sits on
|
||||
# relative to CURRENT_REVISION. Returns one of: ahead | identical
|
||||
# | behind | diverged. Network or auth errors collapse to "error"
|
||||
# via the explicit fallback so the case below always matches.
|
||||
STATUS=$(gh api \
|
||||
"repos/${REPO}/compare/${CURRENT_REVISION}...${TARGET_SHA}" \
|
||||
--jq '.status' 2>/dev/null || echo "error")
|
||||
|
||||
echo "ancestry compare ${CURRENT_REVISION:0:7} → ${TARGET_SHA:0:7}: $STATUS"
|
||||
|
||||
case "$STATUS" in
|
||||
ahead)
|
||||
echo "decision=ahead" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::Target ${TARGET_SHA:0:7} is ahead of current :latest (${CURRENT_REVISION:0:7}) — proceeding with retag"
|
||||
;;
|
||||
identical)
|
||||
echo "decision=identical" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::Target identical to :latest — retag will be a no-op"
|
||||
;;
|
||||
behind)
|
||||
echo "decision=behind" >> "$GITHUB_OUTPUT"
|
||||
{
|
||||
echo "## ❌ Auto-promote refused — target is BEHIND current :latest"
|
||||
echo
|
||||
echo "| Field | Value |"
|
||||
echo "|---|---|"
|
||||
echo "| Target SHA | \`$TARGET_SHA\` |"
|
||||
echo "| Current :latest revision | \`$CURRENT_REVISION\` |"
|
||||
echo "| GitHub compare status | \`behind\` |"
|
||||
echo
|
||||
echo "This guard catches the workflow_run-completion-order race (#2244):"
|
||||
echo "two rapid main pushes whose E2Es complete out-of-order can otherwise"
|
||||
echo "promote \`:latest\` backwards. \`:latest\` stays on \`${CURRENT_REVISION:0:7}\`."
|
||||
echo
|
||||
echo "**Recovery:** if this is a legitimate revert that should land on \`:latest\`,"
|
||||
echo "manually dispatch this workflow with the target sha as input — the manual-dispatch"
|
||||
echo "path skips the ancestry check (operator override)."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
exit 1
|
||||
;;
|
||||
diverged)
|
||||
echo "decision=diverged" >> "$GITHUB_OUTPUT"
|
||||
{
|
||||
echo "## ❓ Auto-promote refused — history diverged"
|
||||
echo
|
||||
echo "| Field | Value |"
|
||||
echo "|---|---|"
|
||||
echo "| Target SHA | \`$TARGET_SHA\` |"
|
||||
echo "| Current :latest revision | \`$CURRENT_REVISION\` |"
|
||||
echo "| GitHub compare status | \`diverged\` |"
|
||||
echo
|
||||
echo "Likely cause: force-push rewrote main's history, leaving the previous"
|
||||
echo "\`:latest\` revision orphaned. Needs human review before \`:latest\` advances."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
exit 1
|
||||
;;
|
||||
error|*)
|
||||
echo "decision=error" >> "$GITHUB_OUTPUT"
|
||||
{
|
||||
echo "## ❌ Auto-promote aborted — ancestry-check API error"
|
||||
echo
|
||||
echo "\`gh api repos/${REPO}/compare/${CURRENT_REVISION}...${TARGET_SHA}\` returned unexpected status: \`$STATUS\`"
|
||||
echo
|
||||
echo "Manual dispatch with the target sha bypasses this check."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Retag platform :staging-<sha> → :latest
|
||||
if: steps.gate.outputs.proceed == 'true'
|
||||
run: |
|
||||
|
||||
@@ -76,6 +76,27 @@ on:
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
# actions: write is needed by the post-merge dispatch tail step
|
||||
# (#2358 / #2357) — `gh workflow run publish-workspace-server-image.yml`
|
||||
# POSTs to /actions/workflows/.../dispatches which requires this scope.
|
||||
# Without it the call 403s and the publish/canary/redeploy chain still
|
||||
# doesn't run on staging→main promotions, undoing #2358.
|
||||
actions: write
|
||||
|
||||
# Serialize auto-promote runs. Multiple staging gate completions can land
|
||||
# in quick succession (CI + E2E + CodeQL all finish within seconds of
|
||||
# each other on a green PR) — without this, two parallel runs both:
|
||||
# 1. Open / re-use the same promote PR.
|
||||
# 2. Both call `gh pr merge --auto` (idempotent — fine).
|
||||
# 3. Both poll for the same mergedAt and both `gh workflow run` publish
|
||||
# → 2× redundant publish builds racing for the same `:staging-latest`
|
||||
# retag, and 2× canary-verify chains.
|
||||
# cancel-in-progress: false because we don't want a brand-new run to kill
|
||||
# a polling-tail that's about to dispatch — the polling tail's 30 min cap
|
||||
# is the right backstop, not workflow-level cancel.
|
||||
concurrency:
|
||||
group: auto-promote-staging
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
check-all-gates-green:
|
||||
@@ -240,3 +261,124 @@ jobs:
|
||||
echo
|
||||
echo "Merge queue lands the PR once required gates are green; no human action needed unless gates fail."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
# Hand the PR number to the next step so we can dispatch the
|
||||
# tenant-redeploy chain after the merge queue lands the merge.
|
||||
echo "promote_pr_num=${PR_NUM}" >> "$GITHUB_OUTPUT"
|
||||
id: promote_pr
|
||||
|
||||
# Mint a short-lived GitHub App installation token for the dispatch
|
||||
# step below. We CANNOT use `secrets.GITHUB_TOKEN` to dispatch the
|
||||
# downstream publish chain — workflow runs created by GITHUB_TOKEN
|
||||
# do not fire `workflow_run` triggers on completion (the
|
||||
# documented "no recursion" rule —
|
||||
# https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow).
|
||||
#
|
||||
# Symptom this caused (root-caused on 2026-04-30): publish-image
|
||||
# ran successfully twice (21313dc 14:41Z, 59dec57 15:21Z) but
|
||||
# canary-verify and redeploy-tenants-on-main never chained,
|
||||
# because the publish run's `triggering_actor` was
|
||||
# `github-actions[bot]` (i.e. GITHUB_TOKEN). A manual dispatch
|
||||
# earlier in the day with the operator's PAT (d850ec7 06:52Z) did
|
||||
# chain — same workflow file, only the actor differed.
|
||||
#
|
||||
# An App token's triggering_actor is the App user (e.g.
|
||||
# `molecule-ai[bot]`), which IS allowed to fire downstream
|
||||
# workflow_run cascades.
|
||||
- name: Mint App token for downstream dispatch
|
||||
if: steps.promote_pr.outputs.promote_pr_num != ''
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
|
||||
with:
|
||||
app-id: ${{ secrets.MOLECULE_AI_APP_ID }}
|
||||
private-key: ${{ secrets.MOLECULE_AI_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Wait for promote merge, then dispatch publish + redeploy (#2357)
|
||||
# GITHUB_TOKEN-initiated merges suppress downstream `push` events
|
||||
# (https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow).
|
||||
# Result: when the merge queue lands the promote PR, the resulting
|
||||
# main-branch push DOES NOT fire publish-workspace-server-image,
|
||||
# so canary-verify and redeploy-tenants-on-main never run and
|
||||
# tenants stay on stale code (issue #2357).
|
||||
#
|
||||
# Workaround: poll for the merge to land, then explicitly
|
||||
# `gh workflow run` publish-workspace-server-image. The dispatch
|
||||
# MUST authenticate as the molecule-ai App (App token minted
|
||||
# above) — not GITHUB_TOKEN — so that the resulting publish
|
||||
# run's completion event can fire the workflow_run cascade
|
||||
# into canary-verify + redeploy-tenants-on-main. See the prior
|
||||
# step's comment for the GITHUB_TOKEN no-recursion details.
|
||||
#
|
||||
# Long-term fix: switch the auto-merge call above to use the
|
||||
# same App token, so the merge's push event fires
|
||||
# publish-workspace-server-image naturally and this polling tail
|
||||
# becomes unnecessary. Tracked in #2357.
|
||||
if: steps.promote_pr.outputs.promote_pr_num != ''
|
||||
env:
|
||||
GH_TOKEN: ${{ steps.app-token.outputs.token }}
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUM: ${{ steps.promote_pr.outputs.promote_pr_num }}
|
||||
run: |
|
||||
# Poll for merge — max 30 min (60 × 30s). The merge queue
|
||||
# typically lands within 5-10 min when gates are green. Break
|
||||
# early if the PR is closed without merging (operator action,
|
||||
# gates flipped red post-approval, branch-protection rejection)
|
||||
# so we don't tie up a runner for the full 30 min on a dead PR.
|
||||
MERGED=""
|
||||
STATE=""
|
||||
for _ in $(seq 1 60); do
|
||||
VIEW=$(gh pr view "$PR_NUM" --repo "$REPO" --json mergedAt,state)
|
||||
MERGED=$(echo "$VIEW" | jq -r '.mergedAt // ""')
|
||||
STATE=$(echo "$VIEW" | jq -r '.state // ""')
|
||||
if [ -n "$MERGED" ] && [ "$MERGED" != "null" ]; then
|
||||
echo "::notice::Promote PR #${PR_NUM} merged at ${MERGED}"
|
||||
break
|
||||
fi
|
||||
if [ "$STATE" = "CLOSED" ]; then
|
||||
echo "::warning::Promote PR #${PR_NUM} was closed without merging — skipping deploy dispatch."
|
||||
exit 0
|
||||
fi
|
||||
sleep 30
|
||||
done
|
||||
|
||||
if [ -z "$MERGED" ] || [ "$MERGED" = "null" ]; then
|
||||
echo "::warning::Promote PR #${PR_NUM} didn't merge within 30min — skipping deploy dispatch (manually run \`gh workflow run publish-workspace-server-image.yml --ref main\` once it lands)."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Dispatch publish on main using the App token. App-initiated
|
||||
# workflow_dispatch DOES propagate the workflow_run cascade,
|
||||
# unlike GITHUB_TOKEN-initiated dispatch.
|
||||
# publish completes → canary-verify chains via workflow_run →
|
||||
# redeploy-tenants-on-main chains via workflow_run + branches:[main].
|
||||
if gh workflow run publish-workspace-server-image.yml \
|
||||
--repo "$REPO" --ref main 2>&1; then
|
||||
echo "::notice::Dispatched publish-workspace-server-image on ref=main as molecule-ai App — canary-verify and redeploy-tenants-on-main will chain via workflow_run."
|
||||
{
|
||||
echo "## 🚀 Tenant redeploy chain dispatched"
|
||||
echo
|
||||
echo "- publish-workspace-server-image (workflow_dispatch on \`main\`, actor: \`molecule-ai[bot]\`)"
|
||||
echo "- canary-verify will chain on completion"
|
||||
echo "- redeploy-tenants-on-main will chain on canary green"
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
else
|
||||
echo "::error::Failed to dispatch publish-workspace-server-image. Run manually: gh workflow run publish-workspace-server-image.yml --ref main"
|
||||
fi
|
||||
|
||||
# ALSO dispatch auto-sync-main-to-staging.yml. Same root cause as
|
||||
# publish above (issue #2357): the merge-queue-initiated push to
|
||||
# main is by GITHUB_TOKEN → no `on: push` triggers fire downstream.
|
||||
# Without this dispatch, every staging→main promote leaves staging
|
||||
# one merge commit BEHIND main, which silently dead-locks the NEXT
|
||||
# promote PR as `mergeStateStatus: BEHIND` because main's
|
||||
# branch-protection has `strict: true`. Verified empirically on
|
||||
# 2026-05-02 against PR #2442 (Phase 2 promote): only the explicit
|
||||
# publish-workspace-server-image dispatch fired on the previous
|
||||
# promote SHA 76c604fb, while auto-sync silently no-op'd, leaving
|
||||
# staging behind for ~24h until manually bridged.
|
||||
if gh workflow run auto-sync-main-to-staging.yml \
|
||||
--repo "$REPO" --ref main 2>&1; then
|
||||
echo "::notice::Dispatched auto-sync-main-to-staging on ref=main as molecule-ai App — staging will absorb the new main merge commit via PR + merge queue."
|
||||
else
|
||||
echo "::error::Failed to dispatch auto-sync-main-to-staging. Run manually: gh workflow run auto-sync-main-to-staging.yml --ref main"
|
||||
fi
|
||||
|
||||
@@ -60,6 +60,24 @@ name: Auto-sync main → staging
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
# workflow_dispatch lets:
|
||||
# 1. Operators manually backfill a missed sync (e.g. after a manual
|
||||
# UI merge that the runner missed).
|
||||
# 2. auto-promote-staging.yml's polling tail explicitly invoke us
|
||||
# after the promote PR lands. This is load-bearing: when the
|
||||
# merge queue lands a promote-PR merge, the resulting push to
|
||||
# `main` is "by GITHUB_TOKEN", and per GitHub's no-recursion
|
||||
# rule (https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow)
|
||||
# that push event does NOT fire any downstream workflows. The
|
||||
# `on: push` trigger above is silently dead for the very pattern
|
||||
# we exist to handle. Verified empirically 2026-05-02 against
|
||||
# SHA 76c604fb (PR #2437 staging→main): only ONE workflow fired
|
||||
# (publish-workspace-server-image, dispatched explicitly by
|
||||
# auto-promote's polling tail with an App token). Every other
|
||||
# `on: push: branches: [main]` workflow — including this one —
|
||||
# was suppressed. Until the underlying merge call moves to an
|
||||
# App token, an explicit dispatch is the only reliable path.
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -71,8 +89,14 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
sync-staging:
|
||||
# Self-hosted Mac mini matches the rest of this repo's workflows.
|
||||
runs-on: [self-hosted, macos, arm64]
|
||||
# ubuntu-latest matches every other workflow in this repo. The
|
||||
# earlier `[self-hosted, macos, arm64]` was a copy-paste artefact
|
||||
# from the molecule-controlplane repo (which IS private and uses a
|
||||
# Mac runner) — molecule-core has no Mac runner registered, so the
|
||||
# job sat unassigned whenever the trigger fired. Verified 2026-05-02:
|
||||
# this is the ONLY workflow in molecule-core/.github/workflows/ with
|
||||
# a non-ubuntu runs-on.
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout staging
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
name: Check migration collisions
|
||||
|
||||
# Hard gate (#2341): fails a PR that adds a migration prefix already
|
||||
# claimed by the base branch or another open PR. Caught manually 2026-04-30
|
||||
# during PR #2276 rebase: 044_runtime_image_pins collided with
|
||||
# 044_platform_inbound_secret from RFC #2312. This workflow makes that
|
||||
# check automatic.
|
||||
#
|
||||
# Trigger model: pull_request only — there's no value running this on
|
||||
# pushes to staging or main (those are post-merge; the gate must fire
|
||||
# pre-merge to be useful). Path filter scopes to PRs that actually touch
|
||||
# migrations.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths:
|
||||
- 'workspace-server/migrations/**'
|
||||
- 'scripts/ops/check_migration_collisions.py'
|
||||
- '.github/workflows/check-migration-collisions.yml'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
# gh pr list/diff need read access to other PRs
|
||||
pull-requests: read
|
||||
|
||||
jobs:
|
||||
check:
|
||||
name: Migration version collision check
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
# Need history to diff against base ref
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Detect collisions
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
BASE_REF: origin/${{ github.event.pull_request.base.ref }}
|
||||
HEAD_REF: ${{ github.event.pull_request.head.sha }}
|
||||
GITHUB_REPOSITORY: ${{ github.repository }}
|
||||
# gh CLI uses GH_TOKEN from env
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
# Ensure the named base ref exists locally. checkout@v4 with
|
||||
# fetch-depth=0 pulls full history, but the explicit fetch is
|
||||
# cheap insurance against form-of-ref differences across runs.
|
||||
#
|
||||
# IMPORTANT: do NOT pass --depth=1 here. The script below uses
|
||||
# `git diff origin/<base>...<head>` (three-dot, merge-base form),
|
||||
# which fails with "fatal: no merge base" if the base ref is
|
||||
# shallow. The auto-promote staging→main PR (#2361) was blocked
|
||||
# by exactly this for ~5h on 2026-04-30 — the depth=1 fetch
|
||||
# overwrote checkout@v4's full-history clone with a shallow tip.
|
||||
git fetch origin "${{ github.event.pull_request.base.ref }}" || true
|
||||
python3 scripts/ops/check_migration_collisions.py
|
||||
+84
-25
@@ -63,29 +63,42 @@ jobs:
|
||||
echo "python=$(echo "$DIFF" | grep -qE '^workspace/|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
echo "scripts=$(echo "$DIFF" | grep -qE '^tests/e2e/|^scripts/|^infra/scripts/|^\.github/workflows/ci\.yml$' && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# Platform (Go) is a required check on staging. Always-run + per-step
|
||||
# gating (see Canvas (Next.js) for the rationale and the failure mode
|
||||
# this avoids).
|
||||
platform-build:
|
||||
name: Platform (Go)
|
||||
needs: changes
|
||||
if: needs.changes.outputs.platform == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: workspace-server
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
- if: needs.changes.outputs.platform != 'true'
|
||||
working-directory: .
|
||||
run: echo "No platform/** changes — skipping real build steps; this job always runs to satisfy the required-check name on branch protection."
|
||||
- if: needs.changes.outputs.platform == 'true'
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- if: needs.changes.outputs.platform == 'true'
|
||||
uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
- run: go mod download
|
||||
- run: go build ./cmd/server
|
||||
- if: needs.changes.outputs.platform == 'true'
|
||||
run: go mod download
|
||||
- if: needs.changes.outputs.platform == 'true'
|
||||
run: go build ./cmd/server
|
||||
# CLI (molecli) moved to standalone repo: github.com/Molecule-AI/molecule-cli
|
||||
- run: go vet ./... || true
|
||||
- name: Run golangci-lint
|
||||
- if: needs.changes.outputs.platform == 'true'
|
||||
run: go vet ./... || true
|
||||
- if: needs.changes.outputs.platform == 'true'
|
||||
name: Run golangci-lint
|
||||
run: golangci-lint run --timeout 3m ./... || true
|
||||
- name: Run tests with race detection and coverage
|
||||
- if: needs.changes.outputs.platform == 'true'
|
||||
name: Run tests with race detection and coverage
|
||||
run: go test -race -coverprofile=coverage.out ./...
|
||||
|
||||
- name: Per-file coverage report
|
||||
- if: needs.changes.outputs.platform == 'true'
|
||||
name: Per-file coverage report
|
||||
# Advisory — lists every source file with its coverage so reviewers
|
||||
# can see at-a-glance where gaps are. Sorted ascending so the worst
|
||||
# offenders float to the top. Does NOT fail the build; the hard
|
||||
@@ -98,7 +111,8 @@ jobs:
|
||||
END {for (f in s) printf "%6.1f%% %s\n", s[f]/c[f], f}' \
|
||||
| sort -n
|
||||
|
||||
- name: Check coverage thresholds
|
||||
- if: needs.changes.outputs.platform == 'true'
|
||||
name: Check coverage thresholds
|
||||
# Enforces two gates from #1823 Layer 1:
|
||||
# 1. Total floor (25% — ratchet plan in COVERAGE_FLOOR.md).
|
||||
# 2. Per-file floor — non-test .go files in security-critical
|
||||
@@ -178,23 +192,55 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Canvas (Next.js) — required check, always runs. See platform-build
|
||||
# comment above for the rationale.
|
||||
#
|
||||
# Supersedes the canvas-build-noop pattern attempted in PR #2321: two
|
||||
# jobs sharing `name:` doesn't actually satisfy branch protection
|
||||
# because the SKIPPED check run sibling is treated as not-passed
|
||||
# regardless of how many SUCCESS siblings it has. Verified empirically
|
||||
# on PR #2314 — mergeStateStatus stayed BLOCKED until I collapsed to
|
||||
# a single-job-with-conditional-steps shape.
|
||||
canvas-build:
|
||||
name: Canvas (Next.js)
|
||||
needs: changes
|
||||
if: needs.changes.outputs.canvas == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: canvas
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
- if: needs.changes.outputs.canvas != 'true'
|
||||
working-directory: .
|
||||
run: echo "No canvas/** changes — skipping real build steps; this job always runs to satisfy the required-check name on branch protection."
|
||||
- if: needs.changes.outputs.canvas == 'true'
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- if: needs.changes.outputs.canvas == 'true'
|
||||
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
with:
|
||||
node-version: '22'
|
||||
- run: rm -f package-lock.json && npm install
|
||||
- run: npm run build
|
||||
- name: Run tests
|
||||
run: npx vitest run
|
||||
- if: needs.changes.outputs.canvas == 'true'
|
||||
run: rm -f package-lock.json && npm install
|
||||
- if: needs.changes.outputs.canvas == 'true'
|
||||
run: npm run build
|
||||
- if: needs.changes.outputs.canvas == 'true'
|
||||
name: Run tests with coverage
|
||||
# Coverage instrumentation is configured in canvas/vitest.config.ts
|
||||
# (provider: v8, reporters: text + html + json-summary). Step 2 of
|
||||
# #1815 — wires coverage into CI so we get a baseline visible on
|
||||
# every PR. No threshold gate yet; thresholds dial in (Step 3, also
|
||||
# tracked in #1815) after the team sees what current coverage is.
|
||||
# Per the inline comment in vitest.config.ts: "first land
|
||||
# observability so we can see the baseline, then dial in
|
||||
# thresholds + a hard gate" — this PR ships the observability half.
|
||||
run: npx vitest run --coverage
|
||||
- name: Upload coverage summary as artifact
|
||||
if: needs.changes.outputs.canvas == 'true' && always()
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: canvas-coverage-${{ github.run_id }}
|
||||
path: canvas/coverage/
|
||||
retention-days: 7
|
||||
if-no-files-found: warn
|
||||
|
||||
# MCP Server + SDK removed from CI — now in standalone repos:
|
||||
# - github.com/Molecule-AI/molecule-mcp-server (npm CI)
|
||||
@@ -204,14 +250,19 @@ jobs:
|
||||
# It now has workflow-level concurrency (cancel-in-progress: false) so
|
||||
# new pushes queue the E2E run rather than cancelling it at the run level.
|
||||
|
||||
# Shellcheck (E2E scripts) — required check, always runs. See
|
||||
# platform-build for the rationale.
|
||||
shellcheck:
|
||||
name: Shellcheck (E2E scripts)
|
||||
needs: changes
|
||||
if: needs.changes.outputs.scripts == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- name: Run shellcheck on tests/e2e/*.sh and infra/scripts/*.sh
|
||||
- if: needs.changes.outputs.scripts != 'true'
|
||||
run: echo "No tests/e2e/ or infra/scripts/ changes — skipping real shellcheck; this job always runs to satisfy the required-check name on branch protection."
|
||||
- if: needs.changes.outputs.scripts == 'true'
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- if: needs.changes.outputs.scripts == 'true'
|
||||
name: Run shellcheck on tests/e2e/*.sh and infra/scripts/*.sh
|
||||
# shellcheck is pre-installed on ubuntu-latest runners (via apt).
|
||||
# infra/scripts/ is included because setup.sh + nuke.sh gate the
|
||||
# README quickstart — a shellcheck regression there silently breaks
|
||||
@@ -265,10 +316,11 @@ jobs:
|
||||
"repos/${{ github.repository }}/commits/${{ github.sha }}/comments" \
|
||||
--field "body=@/tmp/deploy-reminder.md"
|
||||
|
||||
# Python Lint & Test — required check, always runs. See platform-build
|
||||
# for the rationale.
|
||||
python-lint:
|
||||
name: Python Lint & Test
|
||||
needs: changes
|
||||
if: needs.changes.outputs.python == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
WORKSPACE_ID: test
|
||||
@@ -276,16 +328,23 @@ jobs:
|
||||
run:
|
||||
working-directory: workspace
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
- if: needs.changes.outputs.python != 'true'
|
||||
working-directory: .
|
||||
run: echo "No workspace/** changes — skipping real lint+test; this job always runs to satisfy the required-check name on branch protection."
|
||||
- if: needs.changes.outputs.python == 'true'
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- if: needs.changes.outputs.python == 'true'
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: pip
|
||||
cache-dependency-path: workspace/requirements.txt
|
||||
- run: pip install -r requirements.txt pytest pytest-asyncio pytest-cov
|
||||
- if: needs.changes.outputs.python == 'true'
|
||||
run: pip install -r requirements.txt pytest pytest-asyncio pytest-cov
|
||||
# Coverage flags + fail-under floor moved into workspace/pytest.ini
|
||||
# (issue #1817) so local `pytest` and CI use identical config.
|
||||
- run: python -m pytest --tb=short
|
||||
- if: needs.changes.outputs.python == 'true'
|
||||
run: python -m pytest --tb=short
|
||||
|
||||
# SDK + plugin validation moved to standalone repo:
|
||||
# github.com/Molecule-AI/molecule-sdk-python
|
||||
|
||||
@@ -0,0 +1,160 @@
|
||||
name: Continuous synthetic E2E (staging)
|
||||
|
||||
# Hard gate (#2342): cron-driven full-lifecycle E2E that catches
|
||||
# regressions visible only at runtime — schema drift, deployment-pipeline
|
||||
# gaps, vendor outages, env-var rotations, DNS / CF / Railway side-effects.
|
||||
#
|
||||
# Why this gate exists:
|
||||
# PR-time CI catches code-level regressions but not deployment-time or
|
||||
# integration-time ones. Today's empirical data:
|
||||
# • #2345 (A2A v0.2 silent drop) — passed all unit tests, broke at
|
||||
# JSON-RPC parse layer between sender and receiver. Visible only
|
||||
# to a sender exercising the full path.
|
||||
# • RFC #2312 chat upload — landed on staging-branch but never
|
||||
# reached staging tenants because publish-workspace-server-image
|
||||
# was main-only. Caught by manual dogfooding hours after deploy.
|
||||
# Both would have surfaced within 15-20 min of regression if a
|
||||
# continuous synth-E2E was running.
|
||||
#
|
||||
# Cadence: every 20 min (3x/hour). The script is conservatively
|
||||
# bounded at 10 min wall-clock; even on degraded staging it should
|
||||
# finish before the next firing. cron-overlap is guarded by the
|
||||
# concurrency group below.
|
||||
#
|
||||
# Cost: ~3 runs/hour × 5-10 min × $0.008/min GHA = ~$0.50-$1/day.
|
||||
# Plus a fresh tenant provisioned + torn down each run (Railway +
|
||||
# AWS pennies). Negligible.
|
||||
#
|
||||
# Failure handling: when the run fails, the workflow exits non-zero
|
||||
# and GitHub's standard email/notification path fires. Operators
|
||||
# can subscribe to this workflow's failure channel for paging-grade
|
||||
# alerting.
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Every 20 minutes, on the :00 :20 :40. Offsets the existing :15
|
||||
# sweep-cf-orphans and :45 sweep-cf-tunnels so the three
|
||||
# operations don't all hit Cloudflare/AWS at the same minute.
|
||||
- cron: '0,20,40 * * * *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
runtime:
|
||||
description: "Runtime to provision (langgraph = fastest, default; hermes = slower but covers SDK-native path; claude-code = needs OAUTH token in tenant env)"
|
||||
required: false
|
||||
default: "langgraph"
|
||||
type: string
|
||||
keep_org:
|
||||
description: "Skip teardown for post-mortem debugging (only manual dispatch — never set this for cron runs)"
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
# No issue-write here — failures surface as red runs in the workflow
|
||||
# history. If you want auto-issue-on-fail, add a follow-up step that
|
||||
# uses gh issue create gated on `if: failure()`. Keeping the surface
|
||||
# minimal until that's actually wanted.
|
||||
|
||||
# Serialize so two firings can never overlap. Cron firing every 20 min
|
||||
# but scripts conservatively bounded at 10 min — overlap shouldn't
|
||||
# happen in steady state, but if a run hangs we don't want N more
|
||||
# stacking up.
|
||||
concurrency:
|
||||
group: continuous-synth-e2e
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
synth:
|
||||
name: Synthetic E2E against staging
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 12
|
||||
env:
|
||||
# langgraph default keeps cold-start under 5 min on staging EC2.
|
||||
# hermes is slower (~7-10 min) and isn't needed for the
|
||||
# regression class this gate exists to catch (deployment-pipeline
|
||||
# + schema-drift + integration). Operators can pick hermes via
|
||||
# workflow_dispatch when they need to exercise the SDK-native
|
||||
# session path.
|
||||
E2E_RUNTIME: ${{ github.event.inputs.runtime || 'langgraph' }}
|
||||
# Bound to 10 min so a stuck provision fails the run instead of
|
||||
# holding up the next cron firing. 15-min default in the script
|
||||
# is for the on-PR full lifecycle where we have more headroom.
|
||||
E2E_PROVISION_TIMEOUT_SECS: '600'
|
||||
# Slug suffix — namespaced "synth-" so these runs are
|
||||
# distinguishable from PR-driven runs in CP admin.
|
||||
E2E_RUN_ID: synth-${{ github.run_id }}
|
||||
# Forced false for cron; respected for manual dispatch
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org == 'true' && '1' || '' }}
|
||||
MOLECULE_CP_URL: ${{ vars.STAGING_CP_URL || 'https://staging-api.moleculesai.app' }}
|
||||
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Verify required secret present
|
||||
run: |
|
||||
# Schedule-vs-dispatch hardening (mirrors the sweep-cf-* and
|
||||
# redeploy-tenants-on-* workflows): hard-fail on missing secret
|
||||
# for cron firing so a misconfigured-repo doesn't silently
|
||||
# report green while doing nothing. Soft-skip on operator
|
||||
# dispatch — operators can dispatch ad-hoc to verify a fix
|
||||
# without setting up the secret first.
|
||||
if [ -z "${MOLECULE_ADMIN_TOKEN:-}" ]; then
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
echo "::warning::CP_STAGING_ADMIN_API_TOKEN not set — synth E2E cannot run"
|
||||
echo "::warning::Set it at Settings → Secrets and Variables → Actions"
|
||||
exit 0
|
||||
fi
|
||||
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret missing — synth E2E cannot run"
|
||||
echo "::error::Set it at Settings → Secrets and Variables → Actions; pull from staging-CP's CP_ADMIN_API_TOKEN env in Railway."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install required tools
|
||||
run: |
|
||||
# The script depends on jq + curl (already on ubuntu-latest)
|
||||
# and python3 (likewise). Verify they're all present so we
|
||||
# fail fast on a runner image regression rather than mid-script.
|
||||
for cmd in jq curl python3; do
|
||||
command -v "$cmd" >/dev/null 2>&1 || {
|
||||
echo "::error::required tool '$cmd' not on PATH — runner image regression?"
|
||||
exit 1
|
||||
}
|
||||
done
|
||||
|
||||
- name: Run synthetic E2E
|
||||
# The script handles its own teardown via EXIT trap; even on
|
||||
# failure (timeout, assertion), the org is deprovisioned and
|
||||
# leaks are reported. Exit code propagates from the script.
|
||||
run: |
|
||||
bash tests/e2e/test_staging_full_saas.sh
|
||||
|
||||
- name: Failure summary
|
||||
# Runs only on failure. Adds a job summary so the workflow run
|
||||
# page shows a quick "what happened" instead of forcing readers
|
||||
# to scroll through script output.
|
||||
if: failure()
|
||||
run: |
|
||||
{
|
||||
echo "## Continuous synth E2E failed"
|
||||
echo ""
|
||||
echo "**Run ID:** ${{ github.run_id }}"
|
||||
echo "**Trigger:** ${{ github.event_name }}"
|
||||
echo "**Runtime:** ${E2E_RUNTIME}"
|
||||
echo "**Slug:** synth-${{ github.run_id }}"
|
||||
echo ""
|
||||
echo "### What this means"
|
||||
echo ""
|
||||
echo "Staging just regressed on a path that previously worked. Likely classes:"
|
||||
echo "- Schema mismatch between sender and receiver (#2345 class)"
|
||||
echo "- Deployment-pipeline gap (RFC #2312 / staging-tenant-image-stale class)"
|
||||
echo "- Vendor outage (Cloudflare, Railway, AWS, GHCR)"
|
||||
echo "- Staging-CP env var rotation"
|
||||
echo ""
|
||||
echo "### Next steps"
|
||||
echo ""
|
||||
echo "1. Check the script output above for the assertion that failed"
|
||||
echo "2. If it's a vendor outage, no action needed — next firing in ~20 min"
|
||||
echo "3. If it's a code regression, find the causing PR via \`git log\` against last green run and revert/fix"
|
||||
echo "4. Keep an eye on the next 1-2 firings — flake vs persistent fail differs in priority"
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
@@ -2,22 +2,16 @@ name: E2E API Smoke Test
|
||||
# Extracted from ci.yml so workflow-level concurrency can protect this job
|
||||
# from run-level cancellation (issue #458).
|
||||
#
|
||||
# Trigger model (changed 2026-04-28 — see auto-promote gap below):
|
||||
# Trigger model (revised 2026-04-29):
|
||||
#
|
||||
# This workflow always FIRES on push/pull_request to staging+main, but
|
||||
# only does real work when paths under `workspace-server/`,
|
||||
# `tests/e2e/`, or this workflow file changed. The detect-changes job
|
||||
# uses dorny/paths-filter to decide; the e2e-api job runs only if
|
||||
# changes match. Otherwise the no-op job emits success so the workflow
|
||||
# always produces a `completed/success` run record.
|
||||
#
|
||||
# Why: auto-promote-staging.yml's gate-check (line 99) treats "workflow
|
||||
# didn't run" as failure, which dead-locked any platform-only or
|
||||
# test-only push to staging that didn't touch workspace-server paths.
|
||||
# Dropping the path filter on the trigger and gating real work
|
||||
# internally guarantees the workflow always emits a result that the
|
||||
# auto-promote chain can read. Same pattern applied to
|
||||
# e2e-staging-canvas.yml in the same PR.
|
||||
# Always FIRES on push/pull_request to staging+main. Real work is gated
|
||||
# per-step on `needs.detect-changes.outputs.api` — when paths under
|
||||
# `workspace-server/`, `tests/e2e/`, or this workflow file haven't
|
||||
# changed, the no-op step alone runs and emits SUCCESS for the
|
||||
# `E2E API Smoke Test` check, satisfying branch protection without
|
||||
# spending CI cycles. See the in-job comment on the `e2e-api` job for
|
||||
# why this is one job (not two-jobs-sharing-name) and the 2026-04-29
|
||||
# PR #2264 incident that drove the consolidation.
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -66,27 +60,20 @@ jobs:
|
||||
echo "api=${{ steps.filter.outputs.api }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Same `name:` as the real job below so the check-run produced by the
|
||||
# no-op path is indistinguishable from the real one for branch
|
||||
# protection purposes. Without this, the real job was always skipped on
|
||||
# paths-filtered commits → branch protection on `main` saw "E2E API
|
||||
# Smoke Test" as a missing required check → auto-promote-staging's
|
||||
# `git push origin main` got rejected with GH006. Observed 2026-04-28
|
||||
# 00:22 UTC blocking the staging→main promote despite all gates
|
||||
# actually passing at the workflow level.
|
||||
no-op:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.api != 'true'
|
||||
name: E2E API Smoke Test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- run: |
|
||||
echo "No workspace-server / tests/e2e / workflow changes — E2E API gate satisfied without running tests."
|
||||
echo "::notice::E2E API Smoke Test no-op pass (paths filter excluded this commit)."
|
||||
|
||||
# ONE job (no job-level `if:`) that always runs and reports under the
|
||||
# required-check name `E2E API Smoke Test`. Real work is gated per-step
|
||||
# on `needs.detect-changes.outputs.api`. Reason: GitHub registers a
|
||||
# check run for every job that matches `name:`, and a job-level
|
||||
# `if: false` produces a SKIPPED check run. Branch protection treats
|
||||
# all check runs with a matching context name on the latest commit as a
|
||||
# SET — any SKIPPED in the set fails the required-check eval, even with
|
||||
# SUCCESS siblings. Verified 2026-04-29 on PR #2264 (staging→main):
|
||||
# 4 check runs (2 SKIPPED + 2 SUCCESS) at the head SHA blocked
|
||||
# promotion despite all real work succeeding. Collapsing to a single
|
||||
# always-running job with conditional steps emits exactly one SUCCESS
|
||||
# check run regardless of paths filter — branch-protection-clean.
|
||||
e2e-api:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
name: E2E API Smoke Test
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
@@ -97,13 +84,21 @@ jobs:
|
||||
PG_CONTAINER: molecule-ci-postgres
|
||||
REDIS_CONTAINER: molecule-ci-redis
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
- name: No-op pass (paths filter excluded this commit)
|
||||
if: needs.detect-changes.outputs.api != 'true'
|
||||
run: |
|
||||
echo "No workspace-server / tests/e2e / workflow changes — E2E API gate satisfied without running tests."
|
||||
echo "::notice::E2E API Smoke Test no-op pass (paths filter excluded this commit)."
|
||||
- if: needs.detect-changes.outputs.api == 'true'
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- if: needs.detect-changes.outputs.api == 'true'
|
||||
uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
cache: true
|
||||
cache-dependency-path: workspace-server/go.sum
|
||||
- name: Start Postgres (docker)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: |
|
||||
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
|
||||
docker run -d --name "$PG_CONTAINER" -e POSTGRES_USER=dev -e POSTGRES_PASSWORD=dev -e POSTGRES_DB=molecule -p 15432:5432 postgres:16
|
||||
@@ -118,6 +113,7 @@ jobs:
|
||||
docker logs "$PG_CONTAINER" || true
|
||||
exit 1
|
||||
- name: Start Redis (docker)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: |
|
||||
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
|
||||
docker run -d --name "$REDIS_CONTAINER" -p 16379:6379 redis:7
|
||||
@@ -132,14 +128,17 @@ jobs:
|
||||
docker logs "$REDIS_CONTAINER" || true
|
||||
exit 1
|
||||
- name: Build platform
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
working-directory: workspace-server
|
||||
run: go build -o platform-server ./cmd/server
|
||||
- name: Start platform (background)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
working-directory: workspace-server
|
||||
run: |
|
||||
./platform-server > platform.log 2>&1 &
|
||||
echo $! > platform.pid
|
||||
- name: Wait for /health
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: |
|
||||
for i in $(seq 1 30); do
|
||||
if curl -sf http://localhost:8080/health > /dev/null; then
|
||||
@@ -152,6 +151,7 @@ jobs:
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
- name: Assert migrations applied
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: |
|
||||
tables=$(docker exec "$PG_CONTAINER" psql -U dev -d molecule -tAc "SELECT count(*) FROM information_schema.tables WHERE table_schema='public' AND table_name='workspaces'")
|
||||
if [ "$tables" != "1" ]; then
|
||||
@@ -161,25 +161,28 @@ jobs:
|
||||
fi
|
||||
echo "Migrations OK"
|
||||
- name: Run E2E API tests
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_api.sh
|
||||
- name: Run notify-with-attachments E2E
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_notify_attachments_e2e.sh
|
||||
- name: Run priority-runtimes E2E (claude-code + hermes — skips when keys absent)
|
||||
# Validates the test script itself runs cleanly even with no LLM
|
||||
# keys (both phases skip gracefully). The wire-real coverage with
|
||||
# actual keys runs in canary-staging.yml + e2e-staging-saas.yml.
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_priority_runtimes_e2e.sh
|
||||
- name: Run poll-mode + since_id cursor E2E (#2339)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_poll_mode_e2e.sh
|
||||
- name: Dump platform log on failure
|
||||
if: failure()
|
||||
if: failure() && needs.detect-changes.outputs.api == 'true'
|
||||
run: cat workspace-server/platform.log || true
|
||||
- name: Stop platform
|
||||
if: always()
|
||||
if: always() && needs.detect-changes.outputs.api == 'true'
|
||||
run: |
|
||||
if [ -f workspace-server/platform.pid ]; then
|
||||
kill "$(cat workspace-server/platform.pid)" 2>/dev/null || true
|
||||
fi
|
||||
- name: Stop service containers
|
||||
if: always()
|
||||
if: always() && needs.detect-changes.outputs.api == 'true'
|
||||
run: |
|
||||
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
|
||||
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
|
||||
|
||||
@@ -13,19 +13,14 @@ name: E2E Staging Canvas (Playwright)
|
||||
# workflow — mirrors what PR #1891 does for e2e-api.yml.
|
||||
|
||||
on:
|
||||
# Trigger model (changed 2026-04-28 — see auto-promote gap below):
|
||||
# Trigger model (revised 2026-04-29):
|
||||
#
|
||||
# Always fires on push/pull_request; only does real work when canvas/
|
||||
# or this workflow file changed. The detect-changes job uses
|
||||
# dorny/paths-filter to decide; the playwright job runs only if
|
||||
# changes match. Otherwise no-op emits success so the workflow always
|
||||
# produces a `completed/success` run record.
|
||||
#
|
||||
# Why: auto-promote-staging.yml's gate-check (line 99) treats
|
||||
# "workflow didn't run" as failure, which dead-locked platform-only
|
||||
# pushes to staging. Dropping the trigger path filter and gating real
|
||||
# work internally guarantees a result the auto-promote chain can
|
||||
# read. Same pattern applied to e2e-api.yml in the same PR.
|
||||
# Always fires on push/pull_request; real work is gated per-step on
|
||||
# `needs.detect-changes.outputs.canvas`. When canvas/ paths haven't
|
||||
# changed, the no-op step alone runs and emits SUCCESS for the
|
||||
# `Canvas tabs E2E` check, satisfying branch protection without
|
||||
# spending CI cycles. See e2e-api.yml for the rationale on why this
|
||||
# is a single job rather than two-jobs-sharing-name.
|
||||
push:
|
||||
branches: [main, staging]
|
||||
pull_request:
|
||||
@@ -82,23 +77,14 @@ jobs:
|
||||
echo "canvas=${{ steps.filter.outputs.canvas }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Same `name:` as the playwright job below so the check-run is
|
||||
# indistinguishable from the real one for branch protection. Mirrors
|
||||
# the e2e-api.yml fix in the same PR — see that file for the
|
||||
# 2026-04-28 incident reference.
|
||||
no-op:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.canvas != 'true'
|
||||
name: Canvas tabs E2E
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- run: |
|
||||
echo "No canvas / workflow changes — E2E Staging Canvas gate satisfied without running tests."
|
||||
echo "::notice::E2E Staging Canvas no-op pass (paths filter excluded this commit)."
|
||||
|
||||
# ONE job (no job-level `if:`) that always runs and reports under the
|
||||
# required-check name `Canvas tabs E2E`. Real work is gated per-step on
|
||||
# `needs.detect-changes.outputs.canvas`. See e2e-api.yml for the full
|
||||
# rationale — same path-filter check-name parity issue blocked PR #2264
|
||||
# (staging→main) on 2026-04-29 because branch protection treats matching-
|
||||
# name check runs as a SET, and any SKIPPED member fails the eval.
|
||||
playwright:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
name: Canvas tabs E2E
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 40
|
||||
@@ -113,9 +99,18 @@ jobs:
|
||||
working-directory: canvas
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- name: No-op pass (paths filter excluded this commit)
|
||||
if: needs.detect-changes.outputs.canvas != 'true'
|
||||
working-directory: .
|
||||
run: |
|
||||
echo "No canvas / workflow changes — E2E Staging Canvas gate satisfied without running tests."
|
||||
echo "::notice::E2E Staging Canvas no-op pass (paths filter excluded this commit)."
|
||||
|
||||
- if: needs.detect-changes.outputs.canvas == 'true'
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Verify admin token present
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
run: |
|
||||
if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
|
||||
echo "::error::Missing MOLECULE_STAGING_ADMIN_TOKEN"
|
||||
@@ -123,6 +118,7 @@ jobs:
|
||||
fi
|
||||
|
||||
- name: Set up Node
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
with:
|
||||
node-version: '20'
|
||||
@@ -130,16 +126,19 @@ jobs:
|
||||
cache-dependency-path: canvas/package-lock.json
|
||||
|
||||
- name: Install canvas deps
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
run: npm ci
|
||||
|
||||
- name: Install Playwright browsers
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
run: npx playwright install --with-deps chromium
|
||||
|
||||
- name: Run staging canvas E2E
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
run: npx playwright test --config=playwright.staging.config.ts
|
||||
|
||||
- name: Upload Playwright report on failure
|
||||
if: failure()
|
||||
if: failure() && needs.detect-changes.outputs.canvas == 'true'
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: playwright-report-staging
|
||||
@@ -147,50 +146,46 @@ jobs:
|
||||
retention-days: 14
|
||||
|
||||
- name: Upload screenshots on failure
|
||||
if: failure()
|
||||
if: failure() && needs.detect-changes.outputs.canvas == 'true'
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: playwright-screenshots
|
||||
path: canvas/test-results/
|
||||
retention-days: 14
|
||||
|
||||
# Safety-net teardown mirrors the bash-harness workflow — if
|
||||
# globalTeardown didn't run (worker crash, runner cancel), this
|
||||
# step sweeps any e2e-canvas-* org tagged with today's date.
|
||||
# Safety-net teardown — fires only when Playwright's globalTeardown
|
||||
# didn't (worker crash, runner cancel). Reads the slug from
|
||||
# canvas/.playwright-staging-state.json (written by staging-setup
|
||||
# as its first action, before any CP call) and deletes only that
|
||||
# slug.
|
||||
#
|
||||
# Earlier versions of this step pattern-swept `e2e-canvas-<today>-*`
|
||||
# orgs to compensate for setup-crash-before-state-file-write. That
|
||||
# over-aggressive cleanup raced concurrent canvas-E2E runs and
|
||||
# poisoned each other's tenants — observed 2026-04-30 when three
|
||||
# real-test runs killed each other mid-test, surfacing as
|
||||
# `getaddrinfo ENOTFOUND` once CP had cleaned up the just-deleted
|
||||
# DNS record. Pattern-sweep removed; setup now writes the state
|
||||
# file before any CP work, so the slug is always recoverable.
|
||||
- name: Teardown safety net
|
||||
if: always()
|
||||
if: always() && needs.detect-changes.outputs.canvas == 'true'
|
||||
env:
|
||||
ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }}
|
||||
run: |
|
||||
set +e
|
||||
# Midnight-UTC rollover guard: a single-date filter misses
|
||||
# orgs created on the prior UTC day when the run crosses
|
||||
# midnight (incident 2026-04-26 23:46Z → 2026-04-27 00:12Z:
|
||||
# slug `e2e-canvas-20260426-1u8nz3` survived because the
|
||||
# safety-net step ran on the 27th, computed `today=20260427`,
|
||||
# and the filter `e2e-canvas-20260427-` never matched). Sweep
|
||||
# both today AND yesterday's dates so a cross-midnight run
|
||||
# still cleans up its own slug.
|
||||
orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
|
||||
| python3 -c "
|
||||
import json, sys, datetime
|
||||
d = json.load(sys.stdin)
|
||||
today = datetime.date.today()
|
||||
yesterday = today - datetime.timedelta(days=1)
|
||||
prefixes = (
|
||||
f'e2e-canvas-{today.strftime(\"%Y%m%d\")}-',
|
||||
f'e2e-canvas-{yesterday.strftime(\"%Y%m%d\")}-',
|
||||
)
|
||||
candidates = [o['slug'] for o in d.get('orgs', [])
|
||||
if any(o.get('slug','').startswith(p) for p in prefixes)
|
||||
and o.get('status') not in ('purged',)]
|
||||
print('\n'.join(candidates))
|
||||
" 2>/dev/null)
|
||||
for slug in $orgs; do
|
||||
curl -sS -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/dev/null || true
|
||||
done
|
||||
STATE_FILE=".playwright-staging-state.json"
|
||||
if [ ! -f "$STATE_FILE" ]; then
|
||||
echo "::notice::No state file at canvas/$STATE_FILE — Playwright globalTeardown handled it (or setup never ran)."
|
||||
exit 0
|
||||
fi
|
||||
slug=$(python3 -c "import json; print(json.load(open('$STATE_FILE')).get('slug',''))")
|
||||
if [ -z "$slug" ]; then
|
||||
echo "::warning::State file present but slug missing; nothing to clean up."
|
||||
exit 0
|
||||
fi
|
||||
echo "Deleting orphan tenant: $slug"
|
||||
curl -sS -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/dev/null || true
|
||||
exit 0
|
||||
|
||||
@@ -0,0 +1,164 @@
|
||||
name: E2E Staging External Runtime
|
||||
|
||||
# Regression for the four/five workspaces.status=awaiting_agent transitions
|
||||
# that silently failed in production for five days before migration 046
|
||||
# extended the workspace_status enum (see
|
||||
# workspace-server/migrations/046_workspace_status_awaiting_agent.up.sql).
|
||||
#
|
||||
# Why this is its own workflow (not folded into e2e-staging-saas.yml):
|
||||
# - The full-saas harness defaults to runtime=hermes, never exercises
|
||||
# external-runtime. Adding an `external` parameter to that script
|
||||
# would force every push to staging through both lifecycles in
|
||||
# series, doubling the EC2 cold-start budget.
|
||||
# - The external lifecycle has unique timing (REMOTE_LIVENESS_STALE_AFTER
|
||||
# window, 90s default + sweep interval), which we wait through
|
||||
# deliberately. Folding it into hermes would make the long path
|
||||
# even longer.
|
||||
# - It can run in parallel with the hermes E2E since both create
|
||||
# fresh tenant orgs with distinct slug prefixes (`e2e-ext-...` vs
|
||||
# `e2e-...`).
|
||||
#
|
||||
# Triggers:
|
||||
# - Push to staging when any source affecting external runtime,
|
||||
# hibernation, or the migration set changes.
|
||||
# - PR review for the same set.
|
||||
# - Manual workflow_dispatch.
|
||||
# - Daily cron at 07:30 UTC (catches drift on quiet days; staggered
|
||||
# 30 min after e2e-staging-saas.yml's 07:00 UTC cron).
|
||||
#
|
||||
# Concurrency: serialized so two staging pushes don't fight for the
|
||||
# same EC2 quota window. cancel-in-progress=false so a half-rolled
|
||||
# tenant always finishes its teardown.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [staging, main]
|
||||
paths:
|
||||
- 'workspace-server/internal/handlers/workspace.go'
|
||||
- 'workspace-server/internal/handlers/registry.go'
|
||||
- 'workspace-server/internal/handlers/workspace_restart.go'
|
||||
- 'workspace-server/internal/registry/healthsweep.go'
|
||||
- 'workspace-server/internal/registry/liveness.go'
|
||||
- 'workspace-server/migrations/**'
|
||||
- 'workspace-server/internal/db/workspace_status_enum_drift_test.go'
|
||||
- 'tests/e2e/test_staging_external_runtime.sh'
|
||||
- '.github/workflows/e2e-staging-external.yml'
|
||||
pull_request:
|
||||
branches: [staging, main]
|
||||
paths:
|
||||
- 'workspace-server/internal/handlers/workspace.go'
|
||||
- 'workspace-server/internal/handlers/registry.go'
|
||||
- 'workspace-server/internal/handlers/workspace_restart.go'
|
||||
- 'workspace-server/internal/registry/healthsweep.go'
|
||||
- 'workspace-server/internal/registry/liveness.go'
|
||||
- 'workspace-server/migrations/**'
|
||||
- 'workspace-server/internal/db/workspace_status_enum_drift_test.go'
|
||||
- 'tests/e2e/test_staging_external_runtime.sh'
|
||||
- '.github/workflows/e2e-staging-external.yml'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
keep_org:
|
||||
description: "Skip teardown for debugging (only via manual dispatch)"
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
stale_wait_secs:
|
||||
description: "Seconds to wait for the heartbeat-staleness sweep (default 180 = 90s window + 90s buffer)"
|
||||
required: false
|
||||
default: "180"
|
||||
schedule:
|
||||
- cron: '30 7 * * *'
|
||||
|
||||
concurrency:
|
||||
group: e2e-staging-external
|
||||
cancel-in-progress: false
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
e2e-staging-external:
|
||||
name: E2E Staging External Runtime
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 25
|
||||
|
||||
env:
|
||||
MOLECULE_CP_URL: https://staging-api.moleculesai.app
|
||||
MOLECULE_ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }}
|
||||
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
E2E_STALE_WAIT_SECS: ${{ github.event.inputs.stale_wait_secs || '180' }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Verify admin token present
|
||||
run: |
|
||||
if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
|
||||
# Schedule + push triggers must hard-fail when the token is
|
||||
# missing — silent skip would mask infra rot. Manual dispatch
|
||||
# gets the same hard-fail; an operator running this on a fork
|
||||
# without secrets configured needs to know up-front.
|
||||
echo "::error::MOLECULE_STAGING_ADMIN_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
|
||||
exit 2
|
||||
fi
|
||||
echo "Admin token present ✓"
|
||||
|
||||
- name: CP staging health preflight
|
||||
run: |
|
||||
code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$MOLECULE_CP_URL/health")
|
||||
if [ "$code" != "200" ]; then
|
||||
echo "::error::Staging CP unhealthy (got HTTP $code). Skipping — not a workspace bug."
|
||||
exit 1
|
||||
fi
|
||||
echo "Staging CP healthy ✓"
|
||||
|
||||
- name: Run external-runtime E2E
|
||||
id: e2e
|
||||
run: bash tests/e2e/test_staging_external_runtime.sh
|
||||
|
||||
# Mirror the e2e-staging-saas.yml safety net: if the runner is
|
||||
# cancelled (e.g. concurrent staging push), the test script's
|
||||
# EXIT trap may not fire, so we sweep e2e-ext-* slugs scoped to
|
||||
# *this* run id.
|
||||
- name: Teardown safety net (runs on cancel/failure)
|
||||
if: always()
|
||||
env:
|
||||
ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }}
|
||||
run: |
|
||||
set +e
|
||||
orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
|
||||
| python3 -c "
|
||||
import json, sys, os, datetime
|
||||
run_id = os.environ.get('GITHUB_RUN_ID', '')
|
||||
d = json.load(sys.stdin)
|
||||
# Scope STRICTLY to this run id (e2e-ext-YYYYMMDD-<runid>-...)
|
||||
# so concurrent runs and unrelated dev probes are not touched.
|
||||
# Sweep today AND yesterday so a midnight-crossing run still
|
||||
# cleans up its own slug.
|
||||
today = datetime.date.today()
|
||||
yesterday = today - datetime.timedelta(days=1)
|
||||
dates = (today.strftime('%Y%m%d'), yesterday.strftime('%Y%m%d'))
|
||||
if not run_id:
|
||||
# Without a run id we cannot scope safely; bail rather
|
||||
# than risk deleting unrelated tenants.
|
||||
sys.exit(0)
|
||||
prefixes = tuple(f'e2e-ext-{d}-{run_id}-' for d in dates)
|
||||
for o in d.get('orgs', []):
|
||||
s = o.get('slug', '')
|
||||
if s.startswith(prefixes) and o.get('status') != 'purged':
|
||||
print(s)
|
||||
" 2>/dev/null)
|
||||
if [ -n "$orgs" ]; then
|
||||
echo "Safety-net sweep: deleting leftover orgs:"
|
||||
echo "$orgs"
|
||||
for slug in $orgs; do
|
||||
curl -sS -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/dev/null 2>&1
|
||||
done
|
||||
else
|
||||
echo "Safety-net sweep: no leftover orgs to clean."
|
||||
fi
|
||||
@@ -0,0 +1,170 @@
|
||||
name: Harness Replays
|
||||
|
||||
# Boots tests/harness (production-shape compose topology with TenantGuard,
|
||||
# /cp/* proxy, canvas proxy, real production Dockerfile.tenant) and runs
|
||||
# every replay under tests/harness/replays/. Fails the PR if any replay
|
||||
# fails.
|
||||
#
|
||||
# Why this exists: 2026-04-30 we shipped #2398 which added /buildinfo as
|
||||
# a public route in router.go but forgot to add it to TenantGuard's
|
||||
# allowlist. The handler-level test in buildinfo_test.go constructed a
|
||||
# minimal gin engine without TenantGuard — green. The harness's
|
||||
# buildinfo-stale-image.sh replay would have caught it (cf-proxy doesn't
|
||||
# inject X-Molecule-Org-Id, so the curl path is identical to production's
|
||||
# redeploy verifier), but no one ran the harness pre-merge. The bug
|
||||
# shipped; the redeploy verifier silently soft-warned every tenant as
|
||||
# "unreachable" for ~1 day before being noticed.
|
||||
#
|
||||
# This gate makes "did you actually run the harness?" a CI invariant
|
||||
# instead of a memory-discipline thing.
|
||||
#
|
||||
# Trigger model — match e2e-api.yml: always FIRES on push/pull_request
|
||||
# to staging+main, real work is gated per-step on detect-changes output.
|
||||
# One job → one check run → branch-protection-clean (the SKIPPED-in-set
|
||||
# trap from PR #2264 is documented in e2e-api.yml's e2e-api job comment).
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'workspace-server/**'
|
||||
- 'canvas/**'
|
||||
- 'tests/harness/**'
|
||||
- '.github/workflows/harness-replays.yml'
|
||||
pull_request:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'workspace-server/**'
|
||||
- 'canvas/**'
|
||||
- 'tests/harness/**'
|
||||
- '.github/workflows/harness-replays.yml'
|
||||
workflow_dispatch:
|
||||
merge_group:
|
||||
types: [checks_requested]
|
||||
|
||||
concurrency:
|
||||
# Per-SHA grouping. Per-ref kept hitting the auto-promote-staging
|
||||
# cancellation deadlock — see e2e-api.yml's concurrency block for
|
||||
# the 2026-04-28 incident that codified this pattern.
|
||||
group: harness-replays-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
detect-changes:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
run: ${{ steps.decide.outputs.run }}
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
|
||||
id: filter
|
||||
with:
|
||||
filters: |
|
||||
run:
|
||||
- 'workspace-server/**'
|
||||
- 'canvas/**'
|
||||
- 'tests/harness/**'
|
||||
- '.github/workflows/harness-replays.yml'
|
||||
- id: decide
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
echo "run=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "run=${{ steps.filter.outputs.run }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# ONE job that always runs. Real work is gated per-step on
|
||||
# detect-changes.outputs.run so an unrelated PR (e.g. doc-only
|
||||
# change to molecule-controlplane wired here later) emits the
|
||||
# required check without spending CI cycles. Single-job pattern
|
||||
# matches e2e-api.yml — see that workflow's comment for why a
|
||||
# job-level `if: false` would block branch protection via the
|
||||
# SKIPPED-in-set bug.
|
||||
harness-replays:
|
||||
needs: detect-changes
|
||||
name: Harness Replays
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- name: No-op pass (paths filter excluded this commit)
|
||||
if: needs.detect-changes.outputs.run != 'true'
|
||||
run: |
|
||||
echo "No workspace-server / canvas / tests/harness / workflow changes — Harness Replays gate satisfied without running."
|
||||
echo "::notice::Harness Replays no-op pass (paths filter excluded this commit)."
|
||||
|
||||
- if: needs.detect-changes.outputs.run == 'true'
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Checkout sibling plugin repo
|
||||
# Dockerfile.tenant copies molecule-ai-plugin-github-app-auth/
|
||||
# at the build-context root (see workspace-server/Dockerfile.tenant
|
||||
# line 19). PLUGIN_REPO_PAT pattern matches publish-workspace-server-image.yml.
|
||||
if: needs.detect-changes.outputs.run == 'true'
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
repository: Molecule-AI/molecule-ai-plugin-github-app-auth
|
||||
path: molecule-ai-plugin-github-app-auth
|
||||
token: ${{ secrets.PLUGIN_REPO_PAT || secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Install Python deps for replays
|
||||
# peer-discovery-404 (and future replays) eval Python against the
|
||||
# running tenant — importing workspace/a2a_client.py pulls in
|
||||
# httpx. tests/harness/requirements.txt holds just the HTTP-client
|
||||
# surface to keep CI install fast (~3s) vs the full
|
||||
# workspace/requirements.txt (~30s).
|
||||
if: needs.detect-changes.outputs.run == 'true'
|
||||
run: pip install -r tests/harness/requirements.txt
|
||||
|
||||
- name: Run all replays against the harness
|
||||
# run-all-replays.sh: boot via up.sh → seed via seed.sh → run
|
||||
# every replays/*.sh → tear down via down.sh on EXIT (trap).
|
||||
# Non-zero exit on any replay failure.
|
||||
#
|
||||
# KEEP_UP=1: without this, the script's trap-on-EXIT tears
|
||||
# down containers immediately on failure, leaving the dump
|
||||
# step below with nothing to dump (verified on PR #2410's
|
||||
# first run — tenant became unhealthy, trap fired, dump
|
||||
# step saw empty containers). Keeping them up lets the
|
||||
# failure path collect tenant/cp-stub/cf-proxy logs. The
|
||||
# always-run "Force teardown" step does the actual cleanup.
|
||||
if: needs.detect-changes.outputs.run == 'true'
|
||||
working-directory: tests/harness
|
||||
env:
|
||||
KEEP_UP: "1"
|
||||
run: ./run-all-replays.sh
|
||||
|
||||
- name: Dump compose logs on failure
|
||||
# SECRETS_ENCRYPTION_KEY: docker compose validates the entire compose
|
||||
# file even for read-only `logs` calls. up.sh generates a per-run key
|
||||
# and exports it to its OWN shell — this step runs in a fresh shell
|
||||
# that wouldn't see it, so without a placeholder the validate step
|
||||
# errors before logs print (verified against PR #2492's first run:
|
||||
# "required variable SECRETS_ENCRYPTION_KEY is missing a value").
|
||||
# A placeholder is fine — we're only reading log streams, not booting.
|
||||
if: failure() && needs.detect-changes.outputs.run == 'true'
|
||||
working-directory: tests/harness
|
||||
env:
|
||||
SECRETS_ENCRYPTION_KEY: dump-logs-placeholder
|
||||
run: |
|
||||
echo "=== docker compose ps ==="
|
||||
docker compose -f compose.yml ps || true
|
||||
echo "=== tenant-alpha logs ==="
|
||||
docker compose -f compose.yml logs tenant-alpha || true
|
||||
echo "=== tenant-beta logs ==="
|
||||
docker compose -f compose.yml logs tenant-beta || true
|
||||
echo "=== cp-stub logs ==="
|
||||
docker compose -f compose.yml logs cp-stub || true
|
||||
echo "=== cf-proxy logs ==="
|
||||
docker compose -f compose.yml logs cf-proxy || true
|
||||
echo "=== postgres-alpha logs (last 100) ==="
|
||||
docker compose -f compose.yml logs --tail 100 postgres-alpha || true
|
||||
echo "=== postgres-beta logs (last 100) ==="
|
||||
docker compose -f compose.yml logs --tail 100 postgres-beta || true
|
||||
|
||||
- name: Force teardown
|
||||
# We pass KEEP_UP=1 to run-all-replays.sh so the dump step
|
||||
# above sees real containers — that means we own teardown
|
||||
# explicitly here. Always run.
|
||||
if: always() && needs.detect-changes.outputs.run == 'true'
|
||||
working-directory: tests/harness
|
||||
run: ./down.sh || true
|
||||
@@ -154,139 +154,15 @@ jobs:
|
||||
|
||||
- name: Verify package contents (sanity)
|
||||
working-directory: ${{ runner.temp }}/runtime-build
|
||||
# Smoke logic lives in scripts/wheel_smoke.py so the same gate runs
|
||||
# at both PR-time (runtime-prbuild-compat.yml) and publish-time
|
||||
# (here). Splitting the smoke across two heredocs let them drift
|
||||
# apart historically — one script keeps them locked.
|
||||
run: |
|
||||
python -m twine check dist/*
|
||||
# Smoke-import the built wheel to catch import-rewrite mistakes
|
||||
# before they hit PyPI. Asserts on STABLE INVARIANTS only —
|
||||
# symbols + classes that are part of the package's public
|
||||
# contract (BaseAdapter interface, the canonical a2a sentinel,
|
||||
# core submodules). Don't add feature-flag-style assertions
|
||||
# here — they fire false-positive every time staging is mid-
|
||||
# release of that feature.
|
||||
python -m venv /tmp/smoke
|
||||
/tmp/smoke/bin/pip install --quiet dist/*.whl
|
||||
WORKSPACE_ID=00000000-0000-0000-0000-000000000000 \
|
||||
PLATFORM_URL=http://localhost:8080 \
|
||||
/tmp/smoke/bin/python -c "
|
||||
# Importing main is the strongest smoke test we can do here:
|
||||
# main.py is the entry point and pulls every other module
|
||||
# transitively. If the build script missed an import rewrite
|
||||
# (e.g. left a bare \`from transcript_auth import ...\` instead
|
||||
# of \`from molecule_runtime.transcript_auth import ...\` — the
|
||||
# 0.1.16 incident), this fails with ModuleNotFoundError instead
|
||||
# of shipping to PyPI and breaking every workspace startup.
|
||||
# Import the entry-point target by NAME — not just the module.
|
||||
# The wheel's pyproject.toml declares
|
||||
# `molecule-runtime = molecule_runtime.main:main_sync` so if
|
||||
# main_sync goes missing (it did in 0.1.16-0.1.18), every
|
||||
# workspace startup fails with `ImportError: cannot import name
|
||||
# 'main_sync'`. Plain `import molecule_runtime.main` doesn't
|
||||
# catch that because the module loads fine.
|
||||
from molecule_runtime.main import main_sync # noqa: F401
|
||||
from molecule_runtime import a2a_client, a2a_tools
|
||||
from molecule_runtime.builtin_tools import memory
|
||||
from molecule_runtime.adapters import get_adapter, BaseAdapter, AdapterConfig
|
||||
# Stable invariants: package exports + BaseAdapter shape.
|
||||
assert a2a_client._A2A_ERROR_PREFIX, 'a2a_client missing error sentinel'
|
||||
assert callable(get_adapter), 'adapters.get_adapter must be callable'
|
||||
assert hasattr(BaseAdapter, 'name'), 'BaseAdapter interface broken'
|
||||
assert hasattr(AdapterConfig, '__init__'), 'AdapterConfig dataclass missing'
|
||||
|
||||
# Call-shape smoke for AgentCard. Pure imports don't catch
|
||||
# field-shape regressions in upstream SDKs that only surface
|
||||
# at construction time. Two bugs of this exact class shipped
|
||||
# since the a2a-sdk 1.0 migration:
|
||||
# - state_transition_history=True (fixed in #2179)
|
||||
# - supported_protocols=[...] (the protobuf field is
|
||||
# supported_interfaces — caused every workspace boot
|
||||
# to crash with `ValueError: Protocol message AgentCard
|
||||
# has no "supported_protocols" field`; fixed alongside
|
||||
# this smoke)
|
||||
#
|
||||
# This block instantiates the EXACT classes main.py uses,
|
||||
# with the EXACT keyword arguments. If a future a2a-sdk
|
||||
# upgrade renames any of supported_interfaces / streaming /
|
||||
# push_notifications / etc., the publish fails here instead
|
||||
# of breaking every workspace startup. main.py and this
|
||||
# smoke MUST stay in lockstep — adding a kwarg to one
|
||||
# without mirroring it here is the regression vector.
|
||||
from a2a.types import AgentCard, AgentCapabilities, AgentSkill, AgentInterface
|
||||
AgentCard(
|
||||
name='smoke-agent',
|
||||
description='publish-runtime smoke test',
|
||||
version='0.0.0-smoke',
|
||||
supported_interfaces=[
|
||||
AgentInterface(protocol_binding='https://a2a.g/v1', url='http://localhost:8080'),
|
||||
],
|
||||
capabilities=AgentCapabilities(
|
||||
streaming=True,
|
||||
push_notifications=False,
|
||||
),
|
||||
skills=[
|
||||
AgentSkill(
|
||||
id='smoke-skill',
|
||||
name='Smoke',
|
||||
description='no-op',
|
||||
tags=['smoke'],
|
||||
examples=['noop'],
|
||||
),
|
||||
],
|
||||
default_input_modes=['text/plain', 'application/json'],
|
||||
default_output_modes=['text/plain', 'application/json'],
|
||||
)
|
||||
print('✓ AgentCard call-shape smoke passed')
|
||||
|
||||
# Well-known agent-card path probe alignment. main.py's
|
||||
# _send_initial_prompt() polls AGENT_CARD_WELL_KNOWN_PATH
|
||||
# to know when the local A2A server is ready. If the SDK
|
||||
# ever splits the constant value from the path that
|
||||
# create_agent_card_routes() actually mounts at, every
|
||||
# workspace silently drops its initial_prompt:
|
||||
# - Probe gets 404 every attempt.
|
||||
# - Falls through to 'server not ready after 30s,
|
||||
# skipping' even though the server is fine.
|
||||
# - The user hits a fresh chat with no kickoff context.
|
||||
# This was the #2193 incident class — the v0.x → v1.x
|
||||
# rename of /.well-known/agent.json → /.well-known/agent-card.json
|
||||
# plus the constant itself moving to a2a.utils.constants.
|
||||
# source-tree pytest (test_agent_card_well_known_path.py)
|
||||
# catches main.py-side regressions; this catches the
|
||||
# SDK-side ones BEFORE PyPI upload.
|
||||
from a2a.utils.constants import AGENT_CARD_WELL_KNOWN_PATH
|
||||
from a2a.server.routes import create_agent_card_routes
|
||||
mounted_paths = [
|
||||
getattr(r, 'path', None)
|
||||
for r in create_agent_card_routes(
|
||||
AgentCard(
|
||||
name='wk-smoke',
|
||||
description='well-known mount alignment',
|
||||
version='0.0.0-smoke',
|
||||
)
|
||||
)
|
||||
]
|
||||
assert AGENT_CARD_WELL_KNOWN_PATH in mounted_paths, (
|
||||
f'AGENT_CARD_WELL_KNOWN_PATH ({AGENT_CARD_WELL_KNOWN_PATH!r}) '
|
||||
f'is NOT among paths mounted by create_agent_card_routes '
|
||||
f'({mounted_paths!r}). The SDK constant and its own route '
|
||||
f'factory have drifted — workspace probes will 404 forever, '
|
||||
f'silently dropping every workspace initial_prompt.'
|
||||
)
|
||||
print(f'✓ well-known mount alignment OK ({AGENT_CARD_WELL_KNOWN_PATH})')
|
||||
|
||||
# Message helper smoke. a2a-sdk renamed
|
||||
# new_agent_text_message → new_text_message in the v1.x
|
||||
# protobuf-flat migration (per the v0→v1 cheat sheet). main.py
|
||||
# and a2a_executor.py call new_text_message in hot paths; if
|
||||
# the import breaks, every reply errors with ImportError before
|
||||
# the message even leaves the workspace. Importing here
|
||||
# catches a future v2.x rename at publish time.
|
||||
from a2a.helpers import new_text_message
|
||||
msg = new_text_message('smoke')
|
||||
assert msg is not None, 'new_text_message returned None'
|
||||
print('✓ message helper import + call OK')
|
||||
|
||||
print('✓ smoke import passed')
|
||||
"
|
||||
/tmp/smoke/bin/python "$GITHUB_WORKSPACE/scripts/wheel_smoke.py"
|
||||
|
||||
- name: Publish to PyPI (Trusted Publisher / OIDC)
|
||||
# PyPI side is configured: project molecule-ai-workspace-runtime →
|
||||
|
||||
@@ -1,19 +1,60 @@
|
||||
name: publish-workspace-server-image
|
||||
|
||||
# Builds and pushes Docker images to GHCR when staging is promoted to main.
|
||||
# PRs target staging (default branch). Only main push triggers production builds.
|
||||
# Builds and pushes Docker images to GHCR on staging or main pushes.
|
||||
# EC2 tenant instances pull the tenant image from GHCR.
|
||||
#
|
||||
# Branch / tag policy (see Compute tags step for the per-branch logic):
|
||||
#
|
||||
# staging push → builds image, tags :staging-<sha> + :staging-latest.
|
||||
# staging-CP pins TENANT_IMAGE=:staging-latest, so it
|
||||
# picks up staging-branch code automatically. This is
|
||||
# what makes staging-CP actually test staging-branch
|
||||
# code instead of "yesterday's main" — pre-fix, this
|
||||
# workflow only ran on main, so staging tenants
|
||||
# silently served stale code (#2308 fix RFC #2312
|
||||
# landed on staging but never reached tenants because
|
||||
# staging→main was wedged on path-filter parity bugs).
|
||||
#
|
||||
# main push → builds image, tags :staging-<sha> + :staging-latest
|
||||
# (same as before). canary-verify.yml retags
|
||||
# :staging-<sha> → :latest after canary tenants
|
||||
# green-light the digest. The :staging-latest retag
|
||||
# on main push is intentional: when main lands AFTER a
|
||||
# staging push, staging-CP gets the post-promote code
|
||||
# (which equals what it had + any merge resolution),
|
||||
# so the canary-on-staging-CP step still runs against
|
||||
# the prod-bound digest.
|
||||
#
|
||||
# In the steady state both branches refresh :staging-latest; the
|
||||
# semantic is "most recent staging-or-main build of tenant code."
|
||||
# Drift between the two is bounded by the staging→main auto-promote
|
||||
# cadence and is corrected on the next staging push.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
branches: [staging, main]
|
||||
paths:
|
||||
- 'workspace-server/**'
|
||||
- 'canvas/**'
|
||||
- 'manifest.json'
|
||||
- '.github/workflows/publish-platform-image.yml'
|
||||
- '.github/workflows/publish-workspace-server-image.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
# Serialize per-branch so two rapid staging pushes don't race the same
|
||||
# :staging-latest tag retag. Allow staging and main to run in parallel
|
||||
# (different github.ref → different concurrency group) since they
|
||||
# produce different :staging-<sha> tags and last-write-wins on
|
||||
# :staging-latest is acceptable across branches (the post-promote
|
||||
# main code equals current staging code in a healthy flow).
|
||||
#
|
||||
# cancel-in-progress: false → in-flight builds finish; the next push's
|
||||
# build queues. This avoids a partially-pushed image and keeps the
|
||||
# canary fleet pin (:staging-<sha>) consistent with what was actually
|
||||
# tested at canary-verify time.
|
||||
concurrency:
|
||||
group: publish-workspace-server-image-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
@@ -63,29 +104,32 @@ jobs:
|
||||
run: |
|
||||
echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# Canary-gated release: we publish :staging-<sha> ONLY here. The
|
||||
# :latest tag (which existing prod tenants auto-pull every 5 min)
|
||||
# is promoted by .github/workflows/canary-verify.yml after the
|
||||
# staging canary fleet green-lights this digest.
|
||||
# That means:
|
||||
# - Every main merge produces a :staging-<sha> image
|
||||
# - Canary tenants (configured to pull :staging-<sha>) pick it up
|
||||
# - canary-verify.yml runs smoke tests against them
|
||||
# - On green → canary-verify retags :staging-<sha> → :latest
|
||||
# - On red → :latest stays on the prior good digest, prod is safe
|
||||
# Every push of :staging-<sha> also retags the same digest as
|
||||
# :staging-latest so staging CP (which pins TENANT_IMAGE at
|
||||
# :staging-latest) picks up new builds automatically — no more manual
|
||||
# Railway env-var edits. Prod's :latest retag still happens in
|
||||
# canary-verify.yml after the canary fleet greenlights this digest;
|
||||
# :staging-latest is strictly the "most recent main build," not a
|
||||
# canary-verified promotion.
|
||||
# Canary-gated release flow:
|
||||
# - This step always publishes :staging-<sha> + :staging-latest.
|
||||
# - On staging push, staging-CP picks up :staging-latest immediately
|
||||
# (its TENANT_IMAGE pin is :staging-latest) — so staging-branch
|
||||
# code reaches staging tenants without waiting for main.
|
||||
# - On main push, canary-verify.yml runs smoke tests against
|
||||
# canary tenants (which pin :staging-<sha>), and on green retags
|
||||
# :staging-<sha> → :latest. Prod tenants pull :latest.
|
||||
# - On red, :latest stays on the prior good digest — prod is safe.
|
||||
#
|
||||
# Before this, TENANT_IMAGE on Railway staging was pinned to a static
|
||||
# :staging-<sha> and drifted months behind (2026-04-24 incident:
|
||||
# canary tenant ran :staging-a14cf86, 10 days stale, which lacked
|
||||
# applyRuntimeModelEnv and caused every E2E to route hermes+openai
|
||||
# through openrouter → 401). See issue filed with this PR.
|
||||
# Why :staging-latest is retagged on main push too: when main lands
|
||||
# after a staging promote, staging-CP gets the post-promote code so
|
||||
# the canary-on-staging-CP step still runs against the prod-bound
|
||||
# digest. In a healthy flow the post-promote main code == the
|
||||
# current staging code, so this is effectively a no-op except for
|
||||
# the canary fleet pin handoff.
|
||||
#
|
||||
# Pre-fix history: this workflow used to only trigger on main. That
|
||||
# meant staging-CP served "yesterday's main" indefinitely whenever
|
||||
# staging→main was wedged. The 2026-04-30 dogfooding session
|
||||
# surfaced this when RFC #2312 (chat upload HTTP-forward) landed on
|
||||
# staging but staging tenants kept failing chat upload because they
|
||||
# were running pre-RFC code. Adding the staging trigger above closes
|
||||
# that gap. Earlier 2026-04-24 incident: a static :staging-<sha> pin
|
||||
# drifted 10 days behind staging — same class of bug, different
|
||||
# mechanism.
|
||||
- name: Build & push platform image to GHCR (staging-<sha> + staging-latest)
|
||||
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
|
||||
with:
|
||||
@@ -98,6 +142,13 @@ jobs:
|
||||
${{ env.IMAGE_NAME }}:staging-latest
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
# GIT_SHA bakes into the Go binary via -ldflags so /buildinfo
|
||||
# returns it at runtime — see Dockerfile + buildinfo/buildinfo.go.
|
||||
# This is the same value as the OCI revision label below; passing
|
||||
# it twice is intentional, the OCI label is for registry tooling
|
||||
# while /buildinfo is for the redeploy verification step.
|
||||
build-args: |
|
||||
GIT_SHA=${{ github.sha }}
|
||||
labels: |
|
||||
org.opencontainers.image.source=https://github.com/${{ github.repository }}
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
@@ -128,6 +179,7 @@ jobs:
|
||||
# NEXT_PUBLIC_PLATFORM_URL=http://localhost:8080).
|
||||
build-args: |
|
||||
NEXT_PUBLIC_PLATFORM_URL=
|
||||
GIT_SHA=${{ github.sha }}
|
||||
labels: |
|
||||
org.opencontainers.image.source=https://github.com/${{ github.repository }}
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
|
||||
@@ -0,0 +1,207 @@
|
||||
name: Railway pin audit (drift detection)
|
||||
|
||||
# Daily audit of Railway env vars for drift-prone image-tag pins —
|
||||
# automation-cadence layer over the detection script + regression test
|
||||
# shipped in PR #2168 (#2001 closure).
|
||||
#
|
||||
# Background: on 2026-04-24 a stale `:staging-a14cf86` SHA pin in CP's
|
||||
# TENANT_IMAGE caused 3+ hours of E2E failure with the appearance that
|
||||
# "every fix didn't propagate" — really the tenant image was so old it
|
||||
# didn't read the env vars those fixes produced. The audit script
|
||||
# (scripts/ops/audit-railway-sha-pins.sh) flags drift; this workflow
|
||||
# runs the same check unattended on a daily cron.
|
||||
#
|
||||
# Cadence: once a day, 13:00 UTC (06:00 PT). Daily is the right
|
||||
# cadence for variables-tier config — Railway env var changes are
|
||||
# deliberate operator actions, low-frequency. Hourly would risk
|
||||
# Railway API rate-limit surprises and is overkill for the change rate.
|
||||
#
|
||||
# Issue-on-failure: drift triggers a priority-high issue, mirroring
|
||||
# .github/workflows/e2e-staging-sanity.yml's pattern. Drift is
|
||||
# medium-priority "config slipped, fix at next ops window," not
|
||||
# active-outage paging.
|
||||
#
|
||||
# Secret hardening: per feedback_schedule_vs_dispatch_secrets_hardening,
|
||||
# the schedule trigger HARD-FAILS on missing RAILWAY_AUDIT_TOKEN
|
||||
# (silent-success on schedule was the failure-mode class that bit the
|
||||
# team before; cron firing without checking anything is worse than no
|
||||
# cron). The workflow_dispatch trigger SOFT-SKIPS on missing secret so
|
||||
# an operator can dry-run the workflow shape during initial provisioning
|
||||
# without tripping a fake red.
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 13 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: railway-pin-audit
|
||||
cancel-in-progress: false
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
audit:
|
||||
name: Audit Railway env vars for drift-prone pins
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Verify RAILWAY_AUDIT_TOKEN present
|
||||
# Schedule trigger: hard-fail when the secret is missing —
|
||||
# otherwise the cron silently runs against the wrong scope (or
|
||||
# exits 2 from the script and we issue-spam) without anyone
|
||||
# noticing the token rot.
|
||||
# Dispatch trigger: soft-skip — operator may be dry-running the
|
||||
# workflow shape before provisioning the secret. Logged as a
|
||||
# workflow notice, not a failure.
|
||||
env:
|
||||
RAILWAY_AUDIT_TOKEN: ${{ secrets.RAILWAY_AUDIT_TOKEN }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
id: secret_check
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ -n "${RAILWAY_AUDIT_TOKEN:-}" ]; then
|
||||
echo "have_secret=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
echo "have_secret=false" >> "$GITHUB_OUTPUT"
|
||||
if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
|
||||
echo "::notice::RAILWAY_AUDIT_TOKEN not configured — soft-skipping (manual dispatch)"
|
||||
exit 0
|
||||
fi
|
||||
echo "::error::RAILWAY_AUDIT_TOKEN secret missing — schedule trigger requires it. Provision the token (read-only \`variables\` scope on the molecule-platform Railway project) and store as repo secret RAILWAY_AUDIT_TOKEN."
|
||||
exit 1
|
||||
|
||||
- name: Install Railway CLI
|
||||
if: steps.secret_check.outputs.have_secret == 'true'
|
||||
# Pinned hash matching the public install instructions; bump in
|
||||
# tandem with the audit-script's documented Railway CLI version.
|
||||
run: |
|
||||
set -euo pipefail
|
||||
curl -fsSL https://railway.com/install.sh | sh
|
||||
# The installer drops the binary in ~/.railway/bin
|
||||
echo "$HOME/.railway/bin" >> "$GITHUB_PATH"
|
||||
|
||||
- name: Verify Railway CLI authenticated
|
||||
if: steps.secret_check.outputs.have_secret == 'true'
|
||||
env:
|
||||
RAILWAY_TOKEN: ${{ secrets.RAILWAY_AUDIT_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# `railway whoami` exits non-zero when the token is
|
||||
# unauthenticated or doesn't have any project access.
|
||||
if ! railway whoami >/dev/null 2>&1; then
|
||||
echo "::error::Railway CLI failed to authenticate with RAILWAY_AUDIT_TOKEN — token may be revoked or scoped incorrectly"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
- name: Link molecule-platform project
|
||||
if: steps.secret_check.outputs.have_secret == 'true'
|
||||
env:
|
||||
RAILWAY_TOKEN: ${{ secrets.RAILWAY_AUDIT_TOKEN }}
|
||||
# Project ID from reference_production_stack: molecule-platform
|
||||
# / 7ccc8c68-61f4-42ab-9be5-586eeee11768. Linking is per-process,
|
||||
# so we re-link in this CI shell (the audit script comment says
|
||||
# it deliberately doesn't chdir for you because the linked
|
||||
# project's identity matters).
|
||||
run: |
|
||||
set -euo pipefail
|
||||
railway link --project 7ccc8c68-61f4-42ab-9be5-586eeee11768
|
||||
|
||||
- name: Run drift audit
|
||||
if: steps.secret_check.outputs.have_secret == 'true'
|
||||
id: audit
|
||||
env:
|
||||
RAILWAY_TOKEN: ${{ secrets.RAILWAY_AUDIT_TOKEN }}
|
||||
run: |
|
||||
set +e
|
||||
bash scripts/ops/audit-railway-sha-pins.sh 2>&1 | tee /tmp/audit.log
|
||||
rc=${PIPESTATUS[0]}
|
||||
echo "rc=$rc" >> "$GITHUB_OUTPUT"
|
||||
# Capture the audit log for the issue body.
|
||||
{
|
||||
echo 'log<<AUDIT_EOF'
|
||||
cat /tmp/audit.log
|
||||
echo 'AUDIT_EOF'
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
# Exit codes from the script:
|
||||
# 0 — no drift; workflow goes green
|
||||
# 1 — drift detected; we'll file an issue and fail the run
|
||||
# 2 — railway CLI unauthenticated / project unlinked; fail
|
||||
# Anything else: also fail.
|
||||
case "$rc" in
|
||||
0) exit 0 ;;
|
||||
1) echo "::warning::Drift-prone pin(s) detected — issue will be filed"; exit 1 ;;
|
||||
2) echo "::error::Railway CLI auth/link failed mid-script — token or project ID drift"; exit 2 ;;
|
||||
*) echo "::error::Unexpected audit rc=$rc"; exit 1 ;;
|
||||
esac
|
||||
|
||||
- name: Open / update drift issue
|
||||
if: failure() && steps.audit.outputs.rc == '1'
|
||||
uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7
|
||||
env:
|
||||
AUDIT_LOG: ${{ steps.audit.outputs.log }}
|
||||
with:
|
||||
script: |
|
||||
const title = "🚨 Railway env-var drift detected";
|
||||
const runURL = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
|
||||
const body =
|
||||
`Daily Railway pin audit found drift-prone image-tag pins in the molecule-platform Railway project.\n\n` +
|
||||
`**What this means:** an env var (likely on \`controlplane\`) is pinned to a SHA-shaped or semver tag instead of a floating tag. ` +
|
||||
`Same pattern that caused the 2026-04-24 TENANT_IMAGE incident — fix-PRs land but the running service doesn't pick them up.\n\n` +
|
||||
`**Recovery:** open the Railway dashboard, replace the flagged value with a floating tag (\`:staging-latest\`, \`:main\`) unless the pin is intentional and documented in the ops runbook.\n\n` +
|
||||
`**Audit output:**\n\n\`\`\`\n${process.env.AUDIT_LOG || '(log unavailable)'}\n\`\`\`\n\n` +
|
||||
`Run: ${runURL}\n\n` +
|
||||
`Closes automatically when a subsequent daily run reports clean.`;
|
||||
|
||||
const { data: existing } = await github.rest.issues.listForRepo({
|
||||
owner: context.repo.owner, repo: context.repo.repo,
|
||||
state: 'open', labels: 'railway-drift',
|
||||
});
|
||||
const match = existing.find(i => i.title === title);
|
||||
if (match) {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner, repo: context.repo.repo,
|
||||
issue_number: match.number,
|
||||
body: `Still drifting. ${runURL}\n\n\`\`\`\n${process.env.AUDIT_LOG || '(log unavailable)'}\n\`\`\``,
|
||||
});
|
||||
} else {
|
||||
await github.rest.issues.create({
|
||||
owner: context.repo.owner, repo: context.repo.repo,
|
||||
title, body,
|
||||
labels: ['railway-drift', 'bug', 'priority-high'],
|
||||
});
|
||||
}
|
||||
|
||||
- name: Close stale drift issue on clean run
|
||||
# When a previously-flagged drift gets fixed by an operator,
|
||||
# the next daily run goes green. Close any open `railway-drift`
|
||||
# issue with a confirmation comment so the queue doesn't carry
|
||||
# stale ones.
|
||||
if: success() && steps.audit.outputs.rc == '0'
|
||||
uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7
|
||||
with:
|
||||
script: |
|
||||
const runURL = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
|
||||
const { data: existing } = await github.rest.issues.listForRepo({
|
||||
owner: context.repo.owner, repo: context.repo.repo,
|
||||
state: 'open', labels: 'railway-drift',
|
||||
});
|
||||
for (const issue of existing) {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner, repo: context.repo.repo,
|
||||
issue_number: issue.number,
|
||||
body: `Daily audit clean — drift resolved. ${runURL}`,
|
||||
});
|
||||
await github.rest.issues.update({
|
||||
owner: context.repo.owner, repo: context.repo.repo,
|
||||
issue_number: issue.number,
|
||||
state: 'closed',
|
||||
state_reason: 'completed',
|
||||
});
|
||||
}
|
||||
@@ -64,6 +64,20 @@ permissions:
|
||||
# No write scopes needed — the workflow hits an external CP endpoint,
|
||||
# not the GitHub API.
|
||||
|
||||
# Serialize redeploys so two rapid main pushes' redeploys don't overlap
|
||||
# and cause confusing per-tenant SSM state. Without this, GitHub's
|
||||
# implicit workflow_run queueing would *probably* serialize them, but
|
||||
# the explicit block makes the invariant defensible. Mirrors the
|
||||
# concurrency block on redeploy-tenants-on-staging.yml for shape parity.
|
||||
#
|
||||
# cancel-in-progress: false → aborting a half-rolled-out fleet would
|
||||
# leave tenants stuck on whatever image they happened to be on when
|
||||
# cancelled. Better to finish the in-flight rollout before starting
|
||||
# the next one.
|
||||
concurrency:
|
||||
group: redeploy-tenants-on-main
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
redeploy:
|
||||
# Skip the auto-trigger if publish-workspace-server-image didn't
|
||||
@@ -161,4 +175,151 @@ jobs:
|
||||
echo "::error::redeploy-fleet reported ok=false (see summary for which tenant halted the rollout)"
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::Tenant fleet redeploy complete."
|
||||
echo "::notice::Tenant fleet redeploy reported ssm_status=Success — verifying actual image roll on each tenant..."
|
||||
|
||||
# Stash the response for the verify step. $RUNNER_TEMP outlasts
|
||||
# the step boundary; $HTTP_RESPONSE doesn't.
|
||||
cp "$HTTP_RESPONSE" "$RUNNER_TEMP/redeploy-response.json"
|
||||
|
||||
- name: Verify each tenant /buildinfo matches published SHA
|
||||
# ROOT FIX FOR #2395.
|
||||
#
|
||||
# `redeploy-fleet`'s `ssm_status=Success` means "the SSM RPC
|
||||
# didn't error" — NOT "the new image is running on the tenant."
|
||||
# `:latest` lives in the local Docker daemon's image cache; if
|
||||
# the SSM document does `docker compose up -d` without an
|
||||
# explicit `docker pull`, the daemon serves the previously-
|
||||
# cached digest and the container restarts on stale code.
|
||||
# 2026-04-30 incident: hongmingwang's tenant reported
|
||||
# ssm_status=Success at 17:00:53Z but kept serving pre-501a42d7
|
||||
# chat_files for 30+ min — the lazy-heal fix never reached the
|
||||
# user despite green deploy + green redeploy.
|
||||
#
|
||||
# This step closes the gap by curling each tenant's /buildinfo
|
||||
# endpoint (added in workspace-server/internal/buildinfo +
|
||||
# /Dockerfile* GIT_SHA build-arg, this PR) and comparing the
|
||||
# returned git_sha to the SHA the workflow expects. Mismatches
|
||||
# fail the workflow, which is what `ok=true` should have
|
||||
# guaranteed all along.
|
||||
#
|
||||
# When the redeploy was triggered by workflow_dispatch with a
|
||||
# specific tag (target_tag != "latest"), the expected SHA may
|
||||
# not equal ${{ github.sha }} — in that case we resolve via
|
||||
# GHCR's manifest. For workflow_run (default :latest) the
|
||||
# workflow_run.head_sha is the SHA that just published.
|
||||
env:
|
||||
EXPECTED_SHA: ${{ github.event.workflow_run.head_sha || github.sha }}
|
||||
TARGET_TAG: ${{ inputs.target_tag || 'latest' }}
|
||||
# Tenant subdomain template — slugs from the response are
|
||||
# appended. Production CP issues `<slug>.moleculesai.app`;
|
||||
# staging CP issues `<slug>.staging.moleculesai.app`. This
|
||||
# workflow runs on main → prod CP → no `staging.` infix.
|
||||
TENANT_DOMAIN: 'moleculesai.app'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
if [ "$TARGET_TAG" != "latest" ] && [ "$TARGET_TAG" != "$EXPECTED_SHA" ]; then
|
||||
# workflow_dispatch with a pinned tag that isn't the head
|
||||
# SHA — operator is rolling back / pinning. Skip the
|
||||
# verification because we don't have the expected SHA in
|
||||
# this context (would need to crane-inspect the GHCR
|
||||
# manifest, which is a follow-up). Failing-open here is
|
||||
# safe: the operator chose the tag deliberately.
|
||||
echo "::notice::target_tag=$TARGET_TAG (operator-pinned) — skipping per-tenant SHA verification."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
RESP="$RUNNER_TEMP/redeploy-response.json"
|
||||
if [ ! -s "$RESP" ]; then
|
||||
echo "::error::redeploy-response.json missing or empty — verify step ran without a response to read"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Pull only successfully-redeployed tenants. Any tenant that
|
||||
# halted the rollout already failed the previous step, so we
|
||||
# don't double-count them here.
|
||||
mapfile -t SLUGS < <(jq -r '.results[]? | select(.healthz_ok == true) | .slug' "$RESP")
|
||||
if [ ${#SLUGS[@]} -eq 0 ]; then
|
||||
echo "::warning::No tenants reported healthz_ok — nothing to verify"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Verifying ${#SLUGS[@]} tenant(s) against EXPECTED_SHA=${EXPECTED_SHA:0:7}..."
|
||||
|
||||
# Two distinct failure modes — STALE (the #2395 bug class, hard-fail)
|
||||
# vs UNREACHABLE (teardown race, soft-warn). See the staging variant's
|
||||
# comment for the full rationale; same logic applies on prod even
|
||||
# though prod has fewer ephemeral tenants — the asymmetry would be a
|
||||
# gratuitous fork.
|
||||
STALE_COUNT=0
|
||||
UNREACHABLE_COUNT=0
|
||||
STALE_LINES=()
|
||||
UNREACHABLE_LINES=()
|
||||
for slug in "${SLUGS[@]}"; do
|
||||
URL="https://${slug}.${TENANT_DOMAIN}/buildinfo"
|
||||
# 30s total: tenant just SSM-restarted, may still be coming
|
||||
# up. Retry-on-empty rather than retry-on-status — we want
|
||||
# to fail fast on "responded with wrong SHA", not "still
|
||||
# warming up".
|
||||
BODY=$(curl -sS --max-time 30 --retry 3 --retry-delay 5 --retry-connrefused "$URL" || true)
|
||||
ACTUAL_SHA=$(echo "$BODY" | jq -r '.git_sha // ""' 2>/dev/null || echo "")
|
||||
if [ -z "$ACTUAL_SHA" ]; then
|
||||
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
|
||||
UNREACHABLE_LINES+=("| $slug | (no /buildinfo response) | ${EXPECTED_SHA:0:7} | ⚠ unreachable (likely teardown race) |")
|
||||
continue
|
||||
fi
|
||||
if [ "$ACTUAL_SHA" = "$EXPECTED_SHA" ]; then
|
||||
echo " $slug: ${ACTUAL_SHA:0:7} ✓"
|
||||
else
|
||||
STALE_COUNT=$((STALE_COUNT + 1))
|
||||
STALE_LINES+=("| $slug | ${ACTUAL_SHA:0:7} | ${EXPECTED_SHA:0:7} | ❌ stale |")
|
||||
fi
|
||||
done
|
||||
|
||||
{
|
||||
echo ""
|
||||
echo "### Per-tenant /buildinfo verification"
|
||||
echo ""
|
||||
echo "Expected SHA: \`${EXPECTED_SHA:0:7}\`"
|
||||
echo ""
|
||||
if [ $STALE_COUNT -gt 0 ]; then
|
||||
echo "**${STALE_COUNT} STALE tenant(s) — these did NOT pick up the new image despite ssm_status=Success:**"
|
||||
echo ""
|
||||
echo "| Slug | Actual /buildinfo SHA | Expected | Status |"
|
||||
echo "|------|----------------------|----------|--------|"
|
||||
for line in "${STALE_LINES[@]}"; do echo "$line"; done
|
||||
echo ""
|
||||
fi
|
||||
if [ $UNREACHABLE_COUNT -gt 0 ]; then
|
||||
echo "**${UNREACHABLE_COUNT} unreachable tenant(s) — likely teardown race (soft-warn, not failing):**"
|
||||
echo ""
|
||||
echo "| Slug | Actual /buildinfo SHA | Expected | Status |"
|
||||
echo "|------|----------------------|----------|--------|"
|
||||
for line in "${UNREACHABLE_LINES[@]}"; do echo "$line"; done
|
||||
echo ""
|
||||
fi
|
||||
if [ $STALE_COUNT -eq 0 ] && [ $UNREACHABLE_COUNT -eq 0 ]; then
|
||||
echo "All ${#SLUGS[@]} tenants returned matching SHA. ✓"
|
||||
fi
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
if [ $UNREACHABLE_COUNT -gt 0 ]; then
|
||||
echo "::warning::$UNREACHABLE_COUNT tenant(s) unreachable post-redeploy. Likely benign teardown race — CP healthz monitor catches real outages."
|
||||
fi
|
||||
|
||||
# Belt-and-suspenders sanity floor: same logic as the staging
|
||||
# variant — see that file's comment for the full rationale.
|
||||
# Floor only applies when fleet >= 4; below that, canary-verify
|
||||
# is the actual gate.
|
||||
TOTAL_VERIFIED=${#SLUGS[@]}
|
||||
if [ $TOTAL_VERIFIED -ge 4 ] && [ $UNREACHABLE_COUNT -gt $((TOTAL_VERIFIED / 2)) ]; then
|
||||
echo "::error::$UNREACHABLE_COUNT of $TOTAL_VERIFIED tenant(s) unreachable — exceeds 50% threshold on a fleet large enough that this signals a real outage, not teardown race."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $STALE_COUNT -gt 0 ]; then
|
||||
echo "::error::$STALE_COUNT tenant(s) returned a stale SHA. ssm_status=Success was misleading — see job summary."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "::notice::Tenant fleet redeploy complete — all reachable tenants on ${EXPECTED_SHA:0:7} (${UNREACHABLE_COUNT} unreachable, soft-warned)."
|
||||
|
||||
@@ -0,0 +1,310 @@
|
||||
name: redeploy-tenants-on-staging
|
||||
|
||||
# Auto-refresh staging tenant EC2s after every staging-branch merge.
|
||||
#
|
||||
# Mirror of redeploy-tenants-on-main.yml, with the staging-CP host and
|
||||
# the :staging-latest tag. Sister workflow exists for prod (rolls
|
||||
# :latest after canary-verify). Both share the same shape — just
|
||||
# different CP_URL + target_tag + admin token secret.
|
||||
#
|
||||
# Why this workflow exists: publish-workspace-server-image now builds
|
||||
# on every staging-branch push (PR #2335), pushing
|
||||
# platform-tenant:staging-latest to GHCR. Existing tenants pulled
|
||||
# their image once at boot and never re-pull, so the new image just
|
||||
# sits unused until the tenant is reprovisioned.
|
||||
#
|
||||
# This workflow closes the gap by calling staging-CP's
|
||||
# /cp/admin/tenants/redeploy-fleet, which performs a canary-first,
|
||||
# batched, health-gated SSM redeploy across every live staging tenant.
|
||||
# Same endpoint shape as prod CP — only the host differs.
|
||||
#
|
||||
# Runtime ordering:
|
||||
# 1. publish-workspace-server-image completes on staging branch →
|
||||
# new :staging-latest in GHCR.
|
||||
# 2. This workflow fires via workflow_run, waits 30s for GHCR's CDN
|
||||
# to propagate the new tag.
|
||||
# 3. Calls redeploy-fleet with no canary (staging IS canary; we don't
|
||||
# need a sub-canary inside it). Soak still applies to the first
|
||||
# tenant in case of bad-deploy detection.
|
||||
# 4. Any failure aborts the rollout and leaves older tenants on the
|
||||
# prior image — safer default than half-and-half state.
|
||||
#
|
||||
# Rollback path: re-run with workflow_dispatch + target_tag=staging-<sha>
|
||||
# of a known-good build.
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ['publish-workspace-server-image']
|
||||
types: [completed]
|
||||
branches: [staging]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
target_tag:
|
||||
description: 'Tenant image tag to deploy (e.g. "staging-latest" or "staging-a59f1a6c"). Defaults to staging-latest when empty.'
|
||||
required: false
|
||||
type: string
|
||||
default: 'staging-latest'
|
||||
canary_slug:
|
||||
description: 'Tenant slug to deploy first + soak (empty = skip canary, fan out immediately). Default empty for staging since staging itself is the canary.'
|
||||
required: false
|
||||
type: string
|
||||
default: ''
|
||||
soak_seconds:
|
||||
description: 'Seconds to wait after canary before fanning out. Only meaningful if canary_slug is set.'
|
||||
required: false
|
||||
type: string
|
||||
default: '60'
|
||||
batch_size:
|
||||
description: 'How many tenants SSM redeploys in parallel per batch.'
|
||||
required: false
|
||||
type: string
|
||||
default: '3'
|
||||
dry_run:
|
||||
description: 'Plan only — do not actually redeploy.'
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
# No write scopes needed — the workflow hits an external CP endpoint,
|
||||
# not the GitHub API.
|
||||
|
||||
# Serialize per-branch so two rapid staging pushes' redeploys don't
|
||||
# overlap and cause confusing per-tenant SSM state. cancel-in-progress
|
||||
# is false because aborting a half-rolled-out fleet leaves tenants
|
||||
# stuck on whatever image they happened to be on when cancelled.
|
||||
concurrency:
|
||||
group: redeploy-tenants-on-staging
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
redeploy:
|
||||
# Skip the auto-trigger if publish-workspace-server-image didn't
|
||||
# actually succeed. workflow_run fires on any completion state; we
|
||||
# don't want to redeploy against a half-built image.
|
||||
if: |
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success')
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 25
|
||||
steps:
|
||||
- name: Wait for GHCR tag propagation
|
||||
# GHCR's edge cache takes ~15-30s to consistently serve the new
|
||||
# :staging-latest manifest after the registry accepts the push.
|
||||
# Same rationale as redeploy-tenants-on-main.yml.
|
||||
run: sleep 30
|
||||
|
||||
- name: Call staging-CP redeploy-fleet
|
||||
# CP_STAGING_ADMIN_API_TOKEN must be set as a repo/org secret
|
||||
# on Molecule-AI/molecule-core, matching staging-CP's
|
||||
# CP_ADMIN_API_TOKEN env var (visible in Railway controlplane
|
||||
# / staging environment). Stored separately from the prod
|
||||
# CP_ADMIN_API_TOKEN so a leak of one doesn't auth the other.
|
||||
env:
|
||||
CP_URL: ${{ vars.STAGING_CP_URL || 'https://staging-api.moleculesai.app' }}
|
||||
CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
TARGET_TAG: ${{ inputs.target_tag || 'staging-latest' }}
|
||||
CANARY_SLUG: ${{ inputs.canary_slug || '' }}
|
||||
SOAK_SECONDS: ${{ inputs.soak_seconds || '60' }}
|
||||
BATCH_SIZE: ${{ inputs.batch_size || '3' }}
|
||||
DRY_RUN: ${{ inputs.dry_run || false }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Schedule-vs-dispatch hardening (mirrors sweep-cf-orphans
|
||||
# and sweep-cf-tunnels): hard-fail on auto-trigger when the
|
||||
# secret is missing so a misconfigured-repo doesn't silently
|
||||
# serve stale staging tenants. Soft-skip on operator dispatch.
|
||||
if [ -z "${CP_STAGING_ADMIN_API_TOKEN:-}" ]; then
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
echo "::warning::CP_STAGING_ADMIN_API_TOKEN secret not set — skipping redeploy"
|
||||
echo "::warning::Set CP_STAGING_ADMIN_API_TOKEN in repo secrets to enable auto-redeploy."
|
||||
echo "::notice::Pull the value from staging-CP's CP_ADMIN_API_TOKEN env in Railway."
|
||||
exit 0
|
||||
fi
|
||||
echo "::error::staging redeploy cannot run — CP_STAGING_ADMIN_API_TOKEN secret missing"
|
||||
echo "::error::set it at Settings → Secrets and Variables → Actions; pull from staging-CP's CP_ADMIN_API_TOKEN env in Railway."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BODY=$(jq -nc \
|
||||
--arg tag "$TARGET_TAG" \
|
||||
--arg canary "$CANARY_SLUG" \
|
||||
--argjson soak "$SOAK_SECONDS" \
|
||||
--argjson batch "$BATCH_SIZE" \
|
||||
--argjson dry "$DRY_RUN" \
|
||||
'{
|
||||
target_tag: $tag,
|
||||
canary_slug: $canary,
|
||||
soak_seconds: $soak,
|
||||
batch_size: $batch,
|
||||
dry_run: $dry
|
||||
}')
|
||||
|
||||
echo "POST $CP_URL/cp/admin/tenants/redeploy-fleet"
|
||||
echo " body: $BODY"
|
||||
|
||||
HTTP_RESPONSE=$(mktemp)
|
||||
HTTP_CODE=$(curl -sS -o "$HTTP_RESPONSE" -w '%{http_code}' \
|
||||
-m 1200 \
|
||||
-H "Authorization: Bearer $CP_STAGING_ADMIN_API_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-X POST "$CP_URL/cp/admin/tenants/redeploy-fleet" \
|
||||
-d "$BODY" || echo "000")
|
||||
|
||||
echo "HTTP $HTTP_CODE"
|
||||
cat "$HTTP_RESPONSE" | jq . || cat "$HTTP_RESPONSE"
|
||||
|
||||
{
|
||||
echo "## Staging tenant redeploy fleet"
|
||||
echo ""
|
||||
echo "**Target tag:** \`$TARGET_TAG\`"
|
||||
echo "**Canary:** \`${CANARY_SLUG:-(none — staging is itself the canary)}\` (soak ${SOAK_SECONDS}s)"
|
||||
echo "**Batch size:** $BATCH_SIZE"
|
||||
echo "**Dry run:** $DRY_RUN"
|
||||
echo "**HTTP:** $HTTP_CODE"
|
||||
echo ""
|
||||
echo "### Per-tenant result"
|
||||
echo ""
|
||||
echo '| Slug | Phase | SSM Status | Exit | Healthz | Error |'
|
||||
echo '|------|-------|------------|------|---------|-------|'
|
||||
jq -r '.results[]? | "| \(.slug) | \(.phase) | \(.ssm_status // "-") | \(.ssm_exit_code) | \(.healthz_ok) | \(.error // "-") |"' "$HTTP_RESPONSE" || true
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
if [ "$HTTP_CODE" != "200" ]; then
|
||||
echo "::error::redeploy-fleet returned HTTP $HTTP_CODE"
|
||||
exit 1
|
||||
fi
|
||||
OK=$(jq -r '.ok' "$HTTP_RESPONSE")
|
||||
if [ "$OK" != "true" ]; then
|
||||
echo "::error::redeploy-fleet reported ok=false (see summary for which tenant halted the rollout)"
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::Staging tenant fleet redeploy reported ssm_status=Success — verifying actual image roll on each tenant..."
|
||||
|
||||
cp "$HTTP_RESPONSE" "$RUNNER_TEMP/redeploy-response.json"
|
||||
|
||||
- name: Verify each staging tenant /buildinfo matches published SHA
|
||||
# Mirror of the verify step in redeploy-tenants-on-main.yml — see
|
||||
# there for the rationale (#2395 root fix). Staging has the same
|
||||
# ssm_status-success-but-stale-image hazard and benefits from the
|
||||
# same gate. Diff: TENANT_DOMAIN includes the `staging.` infix.
|
||||
env:
|
||||
EXPECTED_SHA: ${{ github.event.workflow_run.head_sha || github.sha }}
|
||||
TARGET_TAG: ${{ inputs.target_tag || 'staging-latest' }}
|
||||
TENANT_DOMAIN: 'staging.moleculesai.app'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# staging-latest is the staging-side moving tag; treat it the
|
||||
# same way main treats `latest`. Operator-pinned SHAs skip
|
||||
# verification (see main variant for why).
|
||||
if [ "$TARGET_TAG" != "staging-latest" ] && [ "$TARGET_TAG" != "latest" ] && [ "$TARGET_TAG" != "$EXPECTED_SHA" ]; then
|
||||
echo "::notice::target_tag=$TARGET_TAG (operator-pinned) — skipping per-tenant SHA verification."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
RESP="$RUNNER_TEMP/redeploy-response.json"
|
||||
if [ ! -s "$RESP" ]; then
|
||||
echo "::error::redeploy-response.json missing or empty"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mapfile -t SLUGS < <(jq -r '.results[]? | select(.healthz_ok == true) | .slug' "$RESP")
|
||||
if [ ${#SLUGS[@]} -eq 0 ]; then
|
||||
echo "::warning::No staging tenants reported healthz_ok — nothing to verify"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Verifying ${#SLUGS[@]} staging tenant(s) against EXPECTED_SHA=${EXPECTED_SHA:0:7}..."
|
||||
|
||||
# Two distinct failure modes here:
|
||||
# STALE_COUNT — tenant returned a SHA that doesn't match. THIS is
|
||||
# the #2395 bug class: tenant up + serving old code.
|
||||
# Always hard-fail the workflow.
|
||||
# UNREACHABLE_COUNT — tenant didn't respond. Almost always a benign
|
||||
# teardown race: redeploy-fleet snapshot says
|
||||
# healthz_ok=true, then the E2E suite tears the
|
||||
# ephemeral tenant down before this step runs (the
|
||||
# e2e-* fixtures churn 5-10/hour on staging). Soft-
|
||||
# warn so we don't block staging→main on cleanup.
|
||||
# Real "tenant up but unreachable" is caught by CP's
|
||||
# own healthz monitor + the post-redeploy alert; we
|
||||
# don't need to double-count it here.
|
||||
STALE_COUNT=0
|
||||
UNREACHABLE_COUNT=0
|
||||
STALE_LINES=()
|
||||
UNREACHABLE_LINES=()
|
||||
for slug in "${SLUGS[@]}"; do
|
||||
URL="https://${slug}.${TENANT_DOMAIN}/buildinfo"
|
||||
BODY=$(curl -sS --max-time 30 --retry 3 --retry-delay 5 --retry-connrefused "$URL" || true)
|
||||
ACTUAL_SHA=$(echo "$BODY" | jq -r '.git_sha // ""' 2>/dev/null || echo "")
|
||||
if [ -z "$ACTUAL_SHA" ]; then
|
||||
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
|
||||
UNREACHABLE_LINES+=("| $slug | (no /buildinfo response) | ${EXPECTED_SHA:0:7} | ⚠ unreachable (likely teardown race) |")
|
||||
continue
|
||||
fi
|
||||
if [ "$ACTUAL_SHA" = "$EXPECTED_SHA" ]; then
|
||||
echo " $slug: ${ACTUAL_SHA:0:7} ✓"
|
||||
else
|
||||
STALE_COUNT=$((STALE_COUNT + 1))
|
||||
STALE_LINES+=("| $slug | ${ACTUAL_SHA:0:7} | ${EXPECTED_SHA:0:7} | ❌ stale |")
|
||||
fi
|
||||
done
|
||||
|
||||
{
|
||||
echo ""
|
||||
echo "### Per-tenant /buildinfo verification (staging)"
|
||||
echo ""
|
||||
echo "Expected SHA: \`${EXPECTED_SHA:0:7}\`"
|
||||
echo ""
|
||||
if [ $STALE_COUNT -gt 0 ]; then
|
||||
echo "**${STALE_COUNT} STALE tenant(s) — these did NOT pick up the new image despite ssm_status=Success:**"
|
||||
echo ""
|
||||
echo "| Slug | Actual /buildinfo SHA | Expected | Status |"
|
||||
echo "|------|----------------------|----------|--------|"
|
||||
for line in "${STALE_LINES[@]}"; do echo "$line"; done
|
||||
echo ""
|
||||
fi
|
||||
if [ $UNREACHABLE_COUNT -gt 0 ]; then
|
||||
echo "**${UNREACHABLE_COUNT} unreachable tenant(s) — likely E2E teardown race (soft-warn, not failing):**"
|
||||
echo ""
|
||||
echo "| Slug | Actual /buildinfo SHA | Expected | Status |"
|
||||
echo "|------|----------------------|----------|--------|"
|
||||
for line in "${UNREACHABLE_LINES[@]}"; do echo "$line"; done
|
||||
echo ""
|
||||
fi
|
||||
if [ $STALE_COUNT -eq 0 ] && [ $UNREACHABLE_COUNT -eq 0 ]; then
|
||||
echo "All ${#SLUGS[@]} staging tenants returned matching SHA. ✓"
|
||||
fi
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
if [ $UNREACHABLE_COUNT -gt 0 ]; then
|
||||
echo "::warning::$UNREACHABLE_COUNT staging tenant(s) unreachable post-redeploy. Likely benign teardown race — CP healthz monitor catches real outages."
|
||||
fi
|
||||
|
||||
# Belt-and-suspenders sanity floor: if MORE than half the fleet is
|
||||
# unreachable AND the fleet is large enough that "half down" is
|
||||
# statistically meaningful, this is a real outage (e.g. new image
|
||||
# crashes on startup), not a teardown race. Hard-fail.
|
||||
#
|
||||
# Floor only applies when TOTAL_VERIFIED >= 4 — below that, the
|
||||
# canary-verify step is the actual gate for "all tenants down"
|
||||
# detection (it runs against the canary first and aborts the
|
||||
# rollout if the canary fails to come up). Without the >=4 gate,
|
||||
# a 1-tenant fleet (e.g. a single ephemeral e2e-* tenant on a
|
||||
# quiet staging push) would re-flake on the exact teardown-race
|
||||
# condition #2402 fixed: 1 of 1 unreachable = 100% > 50% → fail.
|
||||
TOTAL_VERIFIED=${#SLUGS[@]}
|
||||
if [ $TOTAL_VERIFIED -ge 4 ] && [ $UNREACHABLE_COUNT -gt $((TOTAL_VERIFIED / 2)) ]; then
|
||||
echo "::error::$UNREACHABLE_COUNT of $TOTAL_VERIFIED staging tenant(s) unreachable — exceeds 50% threshold on a fleet large enough that this signals a real outage, not teardown race."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $STALE_COUNT -gt 0 ]; then
|
||||
echo "::error::$STALE_COUNT staging tenant(s) returned a stale SHA. ssm_status=Success was misleading — see job summary."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "::notice::Staging tenant fleet redeploy complete — all reachable tenants on ${EXPECTED_SHA:0:7} (${UNREACHABLE_COUNT} unreachable, soft-warned)."
|
||||
@@ -23,53 +23,88 @@ name: Runtime PR-Built Compatibility
|
||||
#
|
||||
# By building from the PR's source and smoke-importing THAT wheel, we
|
||||
# fail at PR-time instead of after publish.
|
||||
#
|
||||
# Required-check shape (2026-05-01): the workflow runs on EVERY push +
|
||||
# PR + merge_group event with no top-level `paths:` filter, then uses a
|
||||
# detect-changes job + per-step `if:` gates inside ONE always-running
|
||||
# job named `PR-built wheel + import smoke`. PRs that don't touch
|
||||
# wheel-relevant paths get a no-op SUCCESS check run, satisfying branch
|
||||
# protection without re-running the heavy build. Same pattern as
|
||||
# e2e-api.yml — see its comment for the full rationale + the 2026-04-29
|
||||
# PR #2264 incident that motivated the always-run-with-if-gates shape.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
# Broad filter: this workflow's verdict can change whenever any
|
||||
# workspace/ source file changes (because the wheel we build is
|
||||
# produced from those files), or when the build script itself
|
||||
# changes (it controls the wheel layout).
|
||||
- 'workspace/**'
|
||||
- 'scripts/build_runtime_package.py'
|
||||
- '.github/workflows/runtime-prbuild-compat.yml'
|
||||
pull_request:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'workspace/**'
|
||||
- 'scripts/build_runtime_package.py'
|
||||
- '.github/workflows/runtime-prbuild-compat.yml'
|
||||
workflow_dispatch:
|
||||
# Required-check support: when this becomes a branch-protection gate,
|
||||
# merge_group runs let the queue green-check this in addition to PRs.
|
||||
merge_group:
|
||||
types: [checks_requested]
|
||||
# No cron: the same pre-merge run already covered the commit, and
|
||||
# re-running daily wouldn't surface anything new (workspace/ doesn't
|
||||
# change between cron firings unless a PR already passed this gate).
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
detect-changes:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
wheel: ${{ steps.decide.outputs.wheel }}
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
|
||||
id: filter
|
||||
with:
|
||||
filters: |
|
||||
wheel:
|
||||
- 'workspace/**'
|
||||
- 'scripts/build_runtime_package.py'
|
||||
- 'scripts/wheel_smoke.py'
|
||||
- '.github/workflows/runtime-prbuild-compat.yml'
|
||||
- id: decide
|
||||
# Always run real work for manual dispatch + merge_group — no
|
||||
# diff-against-base in those contexts, and the gate exists to
|
||||
# validate the to-be-merged state regardless of which paths it
|
||||
# touched (paths-filter would default to "no changes" which is
|
||||
# the wrong answer when the queue is composing many PRs).
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ] || [ "${{ github.event_name }}" = "merge_group" ]; then
|
||||
echo "wheel=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "wheel=${{ steps.filter.outputs.wheel }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# ONE job (no job-level `if:`) that always runs and reports under the
|
||||
# required-check name `PR-built wheel + import smoke`. Real work is
|
||||
# gated per-step on `needs.detect-changes.outputs.wheel`. Same shape
|
||||
# as e2e-api.yml's e2e-api job — see its comment block for the full
|
||||
# rationale (SKIPPED check runs block branch protection even with
|
||||
# SUCCESS siblings; collapsing to one always-run job emits exactly
|
||||
# one SUCCESS check run).
|
||||
local-build-install:
|
||||
# Builds the wheel from THIS PR's workspace/ + scripts/ and tests
|
||||
# IT — the artifact that WOULD be published if this PR merges.
|
||||
needs: detect-changes
|
||||
name: PR-built wheel + import smoke
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
- name: No-op pass (paths filter excluded this commit)
|
||||
if: needs.detect-changes.outputs.wheel != 'true'
|
||||
run: |
|
||||
echo "No workspace/ / scripts/{build_runtime_package,wheel_smoke}.py / workflow changes — wheel gate satisfied without rebuilding."
|
||||
echo "::notice::PR-built wheel + import smoke no-op pass (paths filter excluded this commit)."
|
||||
- if: needs.detect-changes.outputs.wheel == 'true'
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- if: needs.detect-changes.outputs.wheel == 'true'
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: pip
|
||||
cache-dependency-path: workspace/requirements.txt
|
||||
- name: Install build tooling
|
||||
if: needs.detect-changes.outputs.wheel == 'true'
|
||||
run: pip install build
|
||||
- name: Build wheel from PR source (mirrors publish-runtime.yml)
|
||||
if: needs.detect-changes.outputs.wheel == 'true'
|
||||
# Use a fixed test version so the wheel filename is predictable.
|
||||
# Doesn't reach PyPI — this build is local-only for the smoke.
|
||||
# Use the SAME build script with the SAME args as
|
||||
@@ -86,6 +121,7 @@ jobs:
|
||||
--out /tmp/runtime-build
|
||||
cd /tmp/runtime-build && python -m build
|
||||
- name: Install built wheel + workspace requirements
|
||||
if: needs.detect-changes.outputs.wheel == 'true'
|
||||
run: |
|
||||
python -m venv /tmp/venv-built
|
||||
/tmp/venv-built/bin/pip install --upgrade pip
|
||||
@@ -94,7 +130,10 @@ jobs:
|
||||
/tmp/venv-built/bin/pip show molecule-ai-workspace-runtime a2a-sdk \
|
||||
| grep -E '^(Name|Version):'
|
||||
- name: Smoke import the PR-built wheel
|
||||
env:
|
||||
WORKSPACE_ID: 00000000-0000-0000-0000-000000000001
|
||||
if: needs.detect-changes.outputs.wheel == 'true'
|
||||
# Same script publish-runtime.yml runs against the to-be-PyPI wheel.
|
||||
# Closes the PR-time vs publish-time gap: a PR adding a new SDK
|
||||
# call-shape no longer passes here (narrow `import main_sync`) only
|
||||
# to fail post-merge in publish-runtime's broader smoke.
|
||||
run: |
|
||||
/tmp/venv-built/bin/python -c "from molecule_runtime.main import main_sync; print('PR-built runtime imports OK')"
|
||||
/tmp/venv-built/bin/python "$GITHUB_WORKSPACE/scripts/wheel_smoke.py"
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
name: Sweep stale Cloudflare Tunnels
|
||||
|
||||
# Janitor for Cloudflare Tunnels whose backing tenant no longer
|
||||
# exists. Parallel-shape to sweep-cf-orphans.yml (which sweeps DNS
|
||||
# records); same justification, different CF resource.
|
||||
#
|
||||
# Why this exists separately from sweep-cf-orphans:
|
||||
# - DNS records live on the zone (`/zones/<id>/dns_records`).
|
||||
# - Tunnels live on the account (`/accounts/<id>/cfd_tunnel`).
|
||||
# - Different CF API surface, different scopes; the existing CF
|
||||
# token might not have `account:cloudflare_tunnel:edit`. Splitting
|
||||
# the workflows keeps each one's secret-presence gate independent
|
||||
# so neither silent-skips when the other's secret is missing.
|
||||
# - Cleaner blast radius — operators can disable one without the
|
||||
# other if a regression surfaces.
|
||||
#
|
||||
# Safety: the script's MAX_DELETE_PCT gate (default 90% — higher than
|
||||
# the DNS sweep's 50% because tenant-shaped tunnels are mostly
|
||||
# orphans by design) refuses to nuke past the threshold.
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Hourly at :45 — offset from sweep-cf-orphans (:15) so the two
|
||||
# janitors don't issue parallel CF API bursts at the same minute.
|
||||
- cron: '45 * * * *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry_run:
|
||||
description: "Dry run only — list what would be deleted, no deletion"
|
||||
required: false
|
||||
type: boolean
|
||||
default: true
|
||||
max_delete_pct:
|
||||
description: "Override safety gate (default 90, set higher only for major cleanup)"
|
||||
required: false
|
||||
default: "90"
|
||||
|
||||
# Don't let two sweeps race the same account.
|
||||
concurrency:
|
||||
group: sweep-cf-tunnels
|
||||
cancel-in-progress: false
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
sweep:
|
||||
name: Sweep CF tunnels
|
||||
runs-on: ubuntu-latest
|
||||
# 5 min surfaces hangs (CF API stall, slow pagination on busy
|
||||
# accounts). Realistic worst case is ~3 min: 2 CP curls + N CF
|
||||
# list pages + N×CF-DELETE, each capped at 10-15s by curl -m.
|
||||
timeout-minutes: 5
|
||||
env:
|
||||
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
|
||||
CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }}
|
||||
CP_PROD_ADMIN_TOKEN: ${{ secrets.CP_PROD_ADMIN_TOKEN }}
|
||||
CP_STAGING_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_TOKEN }}
|
||||
MAX_DELETE_PCT: ${{ github.event.inputs.max_delete_pct || '90' }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Verify required secrets present
|
||||
id: verify
|
||||
# Schedule-vs-dispatch behaviour split mirrors sweep-cf-orphans
|
||||
# (hardened 2026-04-28 after the silent-no-op incident: the
|
||||
# janitor reported green while doing nothing because secrets
|
||||
# were unset, masking a 152/200 zone-record leak). Same
|
||||
# principle applies here:
|
||||
# - schedule → exit 1 on missing secrets (red CI surfaces it)
|
||||
# - workflow_dispatch → exit 0 with warning (operator-driven,
|
||||
# they already accepted the repo state)
|
||||
run: |
|
||||
missing=()
|
||||
for var in CF_API_TOKEN CF_ACCOUNT_ID CP_PROD_ADMIN_TOKEN CP_STAGING_ADMIN_TOKEN; do
|
||||
if [ -z "${!var:-}" ]; then
|
||||
missing+=("$var")
|
||||
fi
|
||||
done
|
||||
if [ ${#missing[@]} -gt 0 ]; then
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
echo "::warning::skipping sweep — secrets not configured: ${missing[*]}"
|
||||
echo "::warning::set them at Settings → Secrets and Variables → Actions, then rerun."
|
||||
echo "::warning::CF_API_TOKEN must include account:cloudflare_tunnel:edit scope (separate from the zone:dns:edit scope used by sweep-cf-orphans)."
|
||||
echo "skip=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
echo "::error::sweep cannot run — required secrets missing: ${missing[*]}"
|
||||
echo "::error::set them at Settings → Secrets and Variables → Actions, or disable this workflow."
|
||||
echo "::error::CF_API_TOKEN must include account:cloudflare_tunnel:edit scope."
|
||||
exit 1
|
||||
fi
|
||||
echo "All required secrets present ✓"
|
||||
echo "skip=false" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Run sweep
|
||||
if: steps.verify.outputs.skip != 'true'
|
||||
# Schedule-vs-dispatch dry-run asymmetry mirrors sweep-cf-orphans:
|
||||
# - Scheduled: input empty → "false" → --execute (the whole
|
||||
# point of an hourly janitor).
|
||||
# - Manual workflow_dispatch: input default true → dry-run;
|
||||
# operator must flip it to actually delete.
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ "${{ github.event.inputs.dry_run || 'false' }}" = "true" ]; then
|
||||
echo "Running in dry-run mode — no deletions"
|
||||
bash scripts/ops/sweep-cf-tunnels.sh
|
||||
else
|
||||
echo "Running with --execute — will delete identified orphans"
|
||||
bash scripts/ops/sweep-cf-tunnels.sh --execute
|
||||
fi
|
||||
@@ -1,19 +1,27 @@
|
||||
name: Ops Scripts Tests
|
||||
|
||||
# Runs the unittest suite for scripts/ops/ on every PR + push that touches
|
||||
# the directory. Kept separate from the main CI so a script-only change
|
||||
# doesn't trigger the heavier Go/Canvas/Python pipelines.
|
||||
# Runs the unittest suite for scripts/ on every PR + push that touches
|
||||
# anything under scripts/. Kept separate from the main CI so a script-only
|
||||
# change doesn't trigger the heavier Go/Canvas/Python pipelines.
|
||||
#
|
||||
# Discovery layout: tests sit alongside the code they test (see
|
||||
# scripts/ops/test_sweep_cf_decide.py for the pattern; scripts/
|
||||
# test_build_runtime_package.py for the rewriter coverage). The job
|
||||
# below runs `unittest discover` TWICE — once from `scripts/`, once
|
||||
# from `scripts/ops/` — because neither dir has an `__init__.py`, so
|
||||
# a single discover from `scripts/` doesn't recurse into the ops
|
||||
# subdir. Two passes is simpler than retrofitting namespace packages.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'scripts/ops/**'
|
||||
- 'scripts/**'
|
||||
- '.github/workflows/test-ops-scripts.yml'
|
||||
pull_request:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'scripts/ops/**'
|
||||
- 'scripts/**'
|
||||
- '.github/workflows/test-ops-scripts.yml'
|
||||
merge_group:
|
||||
types: [checks_requested]
|
||||
@@ -31,6 +39,14 @@ jobs:
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
- name: Run unittest
|
||||
- name: Run scripts/ unittests (build_runtime_package, …)
|
||||
# Top-level scripts/ tests live alongside their target file
|
||||
# (e.g. scripts/test_build_runtime_package.py exercises
|
||||
# scripts/build_runtime_package.py). discover from scripts/
|
||||
# picks up only top-level test_*.py because scripts/ops/ has
|
||||
# no __init__.py — that's intentional, so we run two passes.
|
||||
working-directory: scripts
|
||||
run: python -m unittest discover -t . -p 'test_*.py' -v
|
||||
- name: Run scripts/ops/ unittests (sweep_cf_decide, …)
|
||||
working-directory: scripts/ops
|
||||
run: python -m unittest discover -p 'test_*.py' -v
|
||||
|
||||
@@ -146,3 +146,4 @@ backups/
|
||||
*-temp.txt
|
||||
/test-pmm-*.txt
|
||||
/tick-reflections-*.md
|
||||
tests/harness/cp-stub/cp-stub
|
||||
|
||||
@@ -53,6 +53,29 @@ cp .env.example .env
|
||||
|
||||
See `CLAUDE.md` for a full list of environment variables and their purposes.
|
||||
|
||||
## What goes where (content vs code)
|
||||
|
||||
This repo is scoped to **code** (canvas, workspace, workspace-server, related
|
||||
infra). Public content (blog posts, marketing copy, OG images, SEO briefs,
|
||||
DevRel demos) lives in [`Molecule-AI/docs`](https://github.com/Molecule-AI/docs).
|
||||
The `Block forbidden paths` CI gate fails any PR that writes to `marketing/`
|
||||
or other removed paths — open against `Molecule-AI/docs` instead.
|
||||
|
||||
| Content type | Target |
|
||||
|---|---|
|
||||
| Blog posts | `Molecule-AI/docs` → `content/blog/<YYYY-MM-DD-slug>/` |
|
||||
| Doc pages | `Molecule-AI/docs` → `content/docs/` |
|
||||
| Marketing copy / PMM positioning | `Molecule-AI/docs` → `marketing/` |
|
||||
| OG images, visual assets | `Molecule-AI/docs` → `app/` or `marketing/` |
|
||||
| SEO briefs | `Molecule-AI/docs` → `marketing/` |
|
||||
| DevRel demos (runnable code) | Standalone repo under `Molecule-AI/`, OR embedded in `Molecule-AI/docs` |
|
||||
| Launch checklists, internal tracking | GitHub Issues — **not** committed files |
|
||||
| Engineering docs (`docs/adr/`, `docs/architecture/`, `docs/incidents/`) | This repo (internal, not published) |
|
||||
| Live product pages (e.g. `canvas/src/app/pricing/page.tsx`) | This repo (these are app code, not marketing copy) |
|
||||
|
||||
If a PR fails the `Block forbidden paths` check, the contents belong in
|
||||
`Molecule-AI/docs`. No CI drag, no Canvas E2E, content lands in minutes.
|
||||
|
||||
## Development Workflow
|
||||
|
||||
### Branch Naming
|
||||
@@ -152,6 +175,17 @@ and run CI manually.
|
||||
- Type hints on public functions
|
||||
- pytest for all tests
|
||||
|
||||
## External integrations
|
||||
|
||||
Code in this repo lands in molecule-core. Some related runtime artifacts
|
||||
live in their own repos:
|
||||
|
||||
- [`Molecule-AI/molecule-ai-workspace-runtime`](https://github.com/Molecule-AI/molecule-ai-workspace-runtime) — Python adapter SDK (`molecule_runtime`) that runs inside containerized Molecule workspaces. Bridges Claude Code SDK / hermes / langgraph / etc. → A2A queue.
|
||||
- [`Molecule-AI/molecule-sdk-python`](https://github.com/Molecule-AI/molecule-sdk-python) — `A2AServer` + `RemoteAgentClient` for external agents that register over the public `/registry/register` flow.
|
||||
- [`Molecule-AI/molecule-mcp-claude-channel`](https://github.com/Molecule-AI/molecule-mcp-claude-channel) — Claude Code channel plugin. Bridges A2A traffic into a running Claude Code session via MCP `notifications/claude/channel`. Polling-based (no tunnel required); install with `claude --channels plugin:molecule@Molecule-AI/molecule-mcp-claude-channel`.
|
||||
|
||||
When extending the **A2A surface** in molecule-core (`workspace-server/internal/handlers/a2a_proxy.go` etc.), consider whether the change has a downstream impact on the runtime SDK or the channel plugin — they're versioned independently but share the wire shape.
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
See `CLAUDE.md` for detailed architecture documentation, including:
|
||||
|
||||
@@ -39,8 +39,8 @@
|
||||
<a href="./docs/agent-runtime/workspace-runtime.md"><strong>Workspace Runtime</strong></a>
|
||||
</p>
|
||||
|
||||
[](https://railway.app/new/template?template=https://github.com/Molecule-AI/molecule-core)
|
||||
[](https://render.com/deploy?repo=https://github.com/Molecule-AI/molecule-core)
|
||||
[](https://railway.app/new/template?template=https://github.com/Molecule-AI/molecule-monorepo)
|
||||
[](https://render.com/deploy?repo=https://github.com/Molecule-AI/molecule-monorepo)
|
||||
|
||||
</div>
|
||||
|
||||
@@ -249,8 +249,8 @@ Workspace Runtime (Python image with adapters)
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
git clone https://github.com/Molecule-AI/molecule-core.git
|
||||
cd molecule-core
|
||||
git clone https://github.com/Molecule-AI/molecule-monorepo.git
|
||||
cd molecule-monorepo
|
||||
|
||||
cp .env.example .env
|
||||
# Defaults boot the stack locally out of the box. See .env.example for
|
||||
|
||||
@@ -111,6 +111,20 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
const adminAuth = { Authorization: `Bearer ${ADMIN_TOKEN}` };
|
||||
console.log(`[staging-setup] Using slug=${slug}`);
|
||||
|
||||
// Write the state file FIRST, before any CP call. Teardown (both
|
||||
// Playwright globalTeardown and the workflow safety-net) reads this
|
||||
// file to identify the slug it must clean up. If we wait until the
|
||||
// end of setup to write it (the previous behavior), a crash during
|
||||
// any of steps 1-6 leaves the org orphaned in CP with no record on
|
||||
// disk — forcing the workflow safety-net into a pattern-sweep over
|
||||
// every `e2e-canvas-<date>-*` org, which races with concurrent
|
||||
// canvas-E2E runs and deletes their live tenants. Race observed
|
||||
// 2026-04-30 on PR #2264 staging→main: three real-test runs killed
|
||||
// each other's tenants mid-test, surfacing as `getaddrinfo ENOTFOUND`
|
||||
// when CP cleaned up the just-deleted DNS record.
|
||||
const stateFile = join(process.cwd(), ".playwright-staging-state.json");
|
||||
writeFileSync(stateFile, JSON.stringify({ slug }, null, 2));
|
||||
|
||||
// 1. Create org via admin endpoint — no WorkOS session needed
|
||||
const create = await jsonFetch(`${CP_URL}/cp/admin/orgs`, {
|
||||
method: "POST",
|
||||
@@ -245,8 +259,8 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
);
|
||||
console.log(`[staging-setup] Workspace online`);
|
||||
|
||||
// 7. Hand state off to tests + teardown
|
||||
const stateFile = join(process.cwd(), ".playwright-staging-state.json");
|
||||
// 7. Hand state off to tests + teardown — overwrite the slug-only
|
||||
// bootstrap state with the full state spec tests need.
|
||||
writeFileSync(
|
||||
stateFile,
|
||||
JSON.stringify({ slug, tenantURL, workspaceId, tenantToken }, null, 2),
|
||||
|
||||
@@ -24,7 +24,11 @@ export default async function globalTeardown(): Promise<void> {
|
||||
|
||||
const stateFile = join(process.cwd(), ".playwright-staging-state.json");
|
||||
if (!existsSync(stateFile)) {
|
||||
console.warn("[staging-teardown] no state file — setup must have failed before org create; nothing to tear down");
|
||||
// staging-setup writes this file as its first action, before any
|
||||
// CP call. Missing here means setup never ran (CANVAS_E2E_STAGING
|
||||
// unset, or ran in a different cwd) — there's no slug we created
|
||||
// that needs cleaning up.
|
||||
console.warn("[staging-teardown] no state file — nothing to tear down");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
/**
|
||||
* Canvas /api/buildinfo — version-display endpoint mirroring
|
||||
* workspace-server's /buildinfo. Lets `curl <url>/api/buildinfo`
|
||||
* confirm which git SHA is live on a canvas deployment.
|
||||
*/
|
||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import { GET } from "../route";
|
||||
|
||||
const ENV_KEYS = ["VERCEL_GIT_COMMIT_SHA", "VERCEL_GIT_COMMIT_REF", "VERCEL_ENV"];
|
||||
|
||||
describe("GET /api/buildinfo", () => {
|
||||
let saved: Record<string, string | undefined>;
|
||||
|
||||
beforeEach(() => {
|
||||
saved = Object.fromEntries(ENV_KEYS.map((k) => [k, process.env[k]]));
|
||||
for (const k of ENV_KEYS) delete process.env[k];
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
for (const k of ENV_KEYS) {
|
||||
if (saved[k] === undefined) delete process.env[k];
|
||||
else process.env[k] = saved[k];
|
||||
}
|
||||
});
|
||||
|
||||
it("returns dev sentinel when Vercel env vars are unset", async () => {
|
||||
const res = await GET();
|
||||
const body = await res.json();
|
||||
expect(body).toEqual({ git_sha: "dev", git_ref: "", vercel_env: "local" });
|
||||
});
|
||||
|
||||
it("reports the SHA Vercel injected at build time", async () => {
|
||||
process.env.VERCEL_GIT_COMMIT_SHA = "abc1234567890";
|
||||
process.env.VERCEL_GIT_COMMIT_REF = "main";
|
||||
process.env.VERCEL_ENV = "production";
|
||||
const res = await GET();
|
||||
const body = await res.json();
|
||||
expect(body.git_sha).toBe("abc1234567890");
|
||||
expect(body.git_ref).toBe("main");
|
||||
expect(body.vercel_env).toBe("production");
|
||||
});
|
||||
|
||||
it("returns 200 status and JSON content type", async () => {
|
||||
const res = await GET();
|
||||
expect(res.status).toBe(200);
|
||||
expect(res.headers.get("content-type")).toContain("application/json");
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,18 @@
|
||||
import { NextResponse } from "next/server";
|
||||
|
||||
// Mirror of workspace-server's GET /buildinfo (PR #2398). Lets a developer
|
||||
// confirm which git SHA is live on a canvas deployment with the same
|
||||
// `curl <url>/buildinfo` flow they use against tenant workspaces.
|
||||
//
|
||||
// Vercel injects VERCEL_GIT_COMMIT_SHA / _REF / VERCEL_ENV at build time
|
||||
// from the deploying commit; outside Vercel (local `next dev`, harness)
|
||||
// these are unset and the endpoint reports `git_sha: "dev"`. Same sentinel
|
||||
// the workspace-server uses pre-ldflags-injection so both surfaces speak
|
||||
// the same vocabulary.
|
||||
export async function GET() {
|
||||
return NextResponse.json({
|
||||
git_sha: process.env.VERCEL_GIT_COMMIT_SHA ?? "dev",
|
||||
git_ref: process.env.VERCEL_GIT_COMMIT_REF ?? "",
|
||||
vercel_env: process.env.VERCEL_ENV ?? "local",
|
||||
});
|
||||
}
|
||||
@@ -12,6 +12,19 @@ interface WorkspaceOption {
|
||||
tier: number;
|
||||
}
|
||||
|
||||
// Subset of the /templates row used here. Mirrors the shape ConfigTab
|
||||
// reads. `providers` is the per-template declarative list of supported
|
||||
// LLM providers — sourced from the template's
|
||||
// runtime_config.providers (config.yaml). When present, it filters
|
||||
// the modal's provider <select> so an operator can only pick a
|
||||
// provider the template actually supports.
|
||||
interface TemplateSpec {
|
||||
id: string;
|
||||
name?: string;
|
||||
runtime?: string;
|
||||
providers?: string[];
|
||||
}
|
||||
|
||||
interface HermesProvider {
|
||||
id: string;
|
||||
label: string;
|
||||
@@ -55,6 +68,13 @@ export function CreateWorkspaceButton() {
|
||||
const [creating, setCreating] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [workspaces, setWorkspaces] = useState<WorkspaceOption[]>([]);
|
||||
// Templates fetched from /api/templates — drives the dynamic provider
|
||||
// filter below. Same data source ConfigTab uses (PR #2454). When the
|
||||
// selected template declares `runtime_config.providers` in its
|
||||
// config.yaml, the modal surfaces only those providers in the
|
||||
// <select>. Empty/missing list falls back to the full HERMES_PROVIDERS
|
||||
// catalog so older templates without the field keep working.
|
||||
const [templateSpecs, setTemplateSpecs] = useState<TemplateSpec[]>([]);
|
||||
// External-runtime path: skip docker provision, mint a workspace_auth_token,
|
||||
// and surface the connection snippet in a modal after create. When
|
||||
// isExternal is true the template / model / hermes-provider fields are
|
||||
@@ -130,6 +150,52 @@ export function CreateWorkspaceButton() {
|
||||
|
||||
const isHermes = template.trim().toLowerCase() === "hermes";
|
||||
|
||||
// Resolve the selected template's spec from the /templates response.
|
||||
// The `template` input is free-text; templates can be matched by id,
|
||||
// name, or runtime so any of those work. Lower-cased compare keeps
|
||||
// "Hermes" / "hermes" / "HERMES" interchangeable.
|
||||
const selectedTemplateSpec = useMemo<TemplateSpec | null>(() => {
|
||||
const t = template.trim().toLowerCase();
|
||||
if (!t) return null;
|
||||
return (
|
||||
templateSpecs.find(
|
||||
(s) =>
|
||||
(s.id || "").toLowerCase() === t ||
|
||||
(s.name || "").toLowerCase() === t ||
|
||||
(s.runtime || "").toLowerCase() === t,
|
||||
) ?? null
|
||||
);
|
||||
}, [template, templateSpecs]);
|
||||
|
||||
// Filter HERMES_PROVIDERS by what the template declares it supports.
|
||||
// Empty/missing declared list → fall back to the full catalog so
|
||||
// templates that haven't migrated to the explicit `providers:` field
|
||||
// (and self-hosted setups without /templates) keep working unchanged.
|
||||
const availableProviders = useMemo<HermesProvider[]>(() => {
|
||||
const declared = selectedTemplateSpec?.providers;
|
||||
if (!declared || declared.length === 0) return HERMES_PROVIDERS;
|
||||
const allowed = new Set(declared.map((p) => p.toLowerCase()));
|
||||
const filtered = HERMES_PROVIDERS.filter((p) => allowed.has(p.id.toLowerCase()));
|
||||
// Defensive: if the template's declared list doesn't match anything
|
||||
// in our static catalog (e.g. brand-new provider id we don't have
|
||||
// metadata for yet), fall back to the full list rather than render
|
||||
// an empty <select>. Better to over-show than to lock the user out.
|
||||
return filtered.length > 0 ? filtered : HERMES_PROVIDERS;
|
||||
}, [selectedTemplateSpec]);
|
||||
|
||||
// If the currently-selected provider is filtered out by a template
|
||||
// change, snap back to the first available. Without this, the
|
||||
// hermesProvider state could refer to a provider not in the dropdown
|
||||
// — confusing UI + the API key field's envVar would be wrong.
|
||||
useEffect(() => {
|
||||
if (!isHermes) return;
|
||||
if (availableProviders.length === 0) return;
|
||||
if (!availableProviders.some((p) => p.id === hermesProvider)) {
|
||||
setHermesProvider(availableProviders[0].id);
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [availableProviders, isHermes]);
|
||||
|
||||
// Auto-fill hermesModel with the provider's defaultModel whenever the
|
||||
// provider changes, but only if the user hasn't already typed their own
|
||||
// slug. Prevents the empty-model → "auto" → Anthropic-default 401 trap.
|
||||
@@ -163,6 +229,10 @@ export function CreateWorkspaceButton() {
|
||||
.get<WorkspaceOption[]>("/workspaces")
|
||||
.then((ws) => setWorkspaces(ws))
|
||||
.catch(() => {});
|
||||
api
|
||||
.get<TemplateSpec[]>("/templates")
|
||||
.then((rows) => setTemplateSpecs(Array.isArray(rows) ? rows : []))
|
||||
.catch(() => { /* keep empty — HERMES_PROVIDERS fallback below */ });
|
||||
// defaultTier is stable for the session (derived from window.location),
|
||||
// safe to omit from deps.
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
@@ -405,7 +475,7 @@ export function CreateWorkspaceButton() {
|
||||
aria-label="Hermes provider"
|
||||
className="w-full bg-zinc-800/60 border border-zinc-700/50 rounded-lg px-3 py-2 text-sm text-zinc-100 focus:outline-none focus:border-violet-500/60 focus:ring-1 focus:ring-violet-500/20 transition-colors"
|
||||
>
|
||||
{HERMES_PROVIDERS.map((p) => (
|
||||
{availableProviders.map((p) => (
|
||||
<option key={p.id} value={p.id}>
|
||||
{p.label}
|
||||
</option>
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
'use client';
|
||||
|
||||
// ExternalConnectModal — shown once after creating a runtime="external"
|
||||
// workspace. Surfaces the workspace_auth_token + ready-to-paste snippets
|
||||
// so the operator can hand them to whoever runs their off-host agent
|
||||
@@ -24,6 +26,20 @@ export interface ExternalConnectionInfo {
|
||||
heartbeat_endpoint: string;
|
||||
curl_register_template: string;
|
||||
python_snippet: string;
|
||||
// Claude Code channel plugin snippet — for operators whose external
|
||||
// agent IS a Claude Code session. Polling-based; no tunnel required.
|
||||
// Optional in the type for backward compat with platforms that
|
||||
// haven't shipped molecule-core PR #2304 yet (older response payload
|
||||
// omits the field; tab is hidden if empty).
|
||||
claude_code_channel_snippet?: string;
|
||||
// Universal MCP snippet — runtime-agnostic outbound tool path via
|
||||
// the `molecule-mcp` console script in the
|
||||
// molecule-ai-workspace-runtime PyPI wheel. Works with any MCP-aware
|
||||
// agent runtime (Claude Code, hermes, codex, third-party). Outbound-
|
||||
// only: pair with claude_code_channel or python tabs for heartbeat
|
||||
// + inbound. Optional for backward compat with platforms that
|
||||
// haven't shipped PR #2413 yet.
|
||||
universal_mcp_snippet?: string;
|
||||
}
|
||||
|
||||
interface Props {
|
||||
@@ -31,10 +47,14 @@ interface Props {
|
||||
onClose: () => void;
|
||||
}
|
||||
|
||||
type Tab = "python" | "curl" | "fields";
|
||||
type Tab = "python" | "curl" | "claude" | "mcp" | "fields";
|
||||
|
||||
export function ExternalConnectModal({ info, onClose }: Props) {
|
||||
const [tab, setTab] = useState<Tab>("python");
|
||||
// Default to Claude Code when the platform offers it — that's the
|
||||
// newest + simplest path (no tunnel needed). Falls back to Python
|
||||
// for older platform builds that don't ship the snippet.
|
||||
const initialTab: Tab = info?.claude_code_channel_snippet ? "claude" : "python";
|
||||
const [tab, setTab] = useState<Tab>(initialTab);
|
||||
const [copiedKey, setCopiedKey] = useState<string | null>(null);
|
||||
|
||||
const copy = useCallback(async (value: string, key: string) => {
|
||||
@@ -70,6 +90,24 @@ export function ExternalConnectModal({ info, onClose }: Props) {
|
||||
'WORKSPACE_AUTH_TOKEN="<paste from create response>"',
|
||||
`WORKSPACE_AUTH_TOKEN="${info.auth_token}"`,
|
||||
);
|
||||
// The channel snippet asks the operator to paste the auth_token into
|
||||
// the .env file's MOLECULE_WORKSPACE_TOKENS field. Stamp it server-side
|
||||
// here so the copy-paste-block is truly ready-to-run.
|
||||
const filledChannel = info.claude_code_channel_snippet?.replace(
|
||||
'MOLECULE_WORKSPACE_TOKENS=<paste auth_token from create response>',
|
||||
`MOLECULE_WORKSPACE_TOKENS=${info.auth_token}`,
|
||||
);
|
||||
// Universal MCP snippet uses MOLECULE_WORKSPACE_TOKEN as the env-var
|
||||
// name passed through to molecule-mcp via `claude mcp add ... -- env
|
||||
// MOLECULE_WORKSPACE_TOKEN=...`. The placeholder must match the
|
||||
// template's literal — pre-2026-04-30 polish this looked for
|
||||
// WORKSPACE_AUTH_TOKEN (carryover from the curl tab), which silently
|
||||
// skipped the substitution and left "<paste from create response>"
|
||||
// visible in the operator's clipboard.
|
||||
const filledUniversalMcp = info.universal_mcp_snippet?.replace(
|
||||
'MOLECULE_WORKSPACE_TOKEN="<paste from create response>"',
|
||||
`MOLECULE_WORKSPACE_TOKEN="${info.auth_token}"`,
|
||||
);
|
||||
|
||||
return (
|
||||
<Dialog.Root open onOpenChange={(o) => !o && onClose()}>
|
||||
@@ -91,7 +129,19 @@ export function ExternalConnectModal({ info, onClose }: Props) {
|
||||
aria-label="Connection snippet format"
|
||||
className="mt-4 flex gap-1 border-b border-zinc-800"
|
||||
>
|
||||
{(["python", "curl", "fields"] as Tab[]).map((t) => (
|
||||
{(() => {
|
||||
// Build the tab order dynamically. Claude Code first
|
||||
// (when offered) since it's the simplest setup; Python
|
||||
// SDK second (full register+heartbeat+inbound); Universal
|
||||
// MCP third (any MCP-aware runtime, outbound-only); curl
|
||||
// for one-shot register; Fields for raw values.
|
||||
const tabs: Tab[] = [];
|
||||
if (filledChannel) tabs.push("claude");
|
||||
tabs.push("python");
|
||||
if (filledUniversalMcp) tabs.push("mcp");
|
||||
tabs.push("curl", "fields");
|
||||
return tabs;
|
||||
})().map((t) => (
|
||||
<button
|
||||
key={t}
|
||||
type="button"
|
||||
@@ -104,17 +154,34 @@ export function ExternalConnectModal({ info, onClose }: Props) {
|
||||
: "border-transparent text-zinc-500 hover:text-zinc-300"
|
||||
}`}
|
||||
>
|
||||
{t === "python" ? "Python SDK" : t === "curl" ? "curl" : "Fields"}
|
||||
{t === "claude"
|
||||
? "Claude Code"
|
||||
: t === "python"
|
||||
? "Python SDK"
|
||||
: t === "mcp"
|
||||
? "Universal MCP"
|
||||
: t === "curl"
|
||||
? "curl"
|
||||
: "Fields"}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Snippet area */}
|
||||
<div className="mt-3">
|
||||
{tab === "claude" && filledChannel && (
|
||||
<SnippetBlock
|
||||
value={filledChannel}
|
||||
label="Claude Code channel — polls workspace's A2A; no tunnel needed"
|
||||
copyKey="claude"
|
||||
copied={copiedKey === "claude"}
|
||||
onCopy={() => copy(filledChannel, "claude")}
|
||||
/>
|
||||
)}
|
||||
{tab === "python" && (
|
||||
<SnippetBlock
|
||||
value={filledPython}
|
||||
label="Python (recommended — includes heartbeat loop)"
|
||||
label="Python SDK — includes heartbeat loop (push-mode, needs public URL)"
|
||||
copyKey="python"
|
||||
copied={copiedKey === "python"}
|
||||
onCopy={() => copy(filledPython, "python")}
|
||||
@@ -129,6 +196,15 @@ export function ExternalConnectModal({ info, onClose }: Props) {
|
||||
onCopy={() => copy(filledCurl, "curl")}
|
||||
/>
|
||||
)}
|
||||
{tab === "mcp" && filledUniversalMcp && (
|
||||
<SnippetBlock
|
||||
value={filledUniversalMcp}
|
||||
label="Universal MCP — standalone register + heartbeat + tools for any MCP-aware runtime (Claude Code, hermes, codex). Pair with Python or Claude Code tab if you need inbound A2A delivery."
|
||||
copyKey="mcp"
|
||||
copied={copiedKey === "mcp"}
|
||||
onCopy={() => copy(filledUniversalMcp, "mcp")}
|
||||
/>
|
||||
)}
|
||||
{tab === "fields" && (
|
||||
<div className="space-y-2">
|
||||
<Field label="workspace_id" value={info.workspace_id} onCopy={() => copy(info.workspace_id, "wsid")} copied={copiedKey === "wsid"} />
|
||||
|
||||
@@ -16,14 +16,35 @@ interface Props {
|
||||
/** Runtime slug — used only for the "The <runtime> runtime …"
|
||||
* headline; behavior is driven by providers/missingKeys. */
|
||||
runtime: string;
|
||||
/** Called when all required keys for the chosen provider are saved. */
|
||||
onKeysAdded: () => void;
|
||||
/** Called when all required keys for the chosen provider are saved.
|
||||
* Receives the model slug if the modal collected one (template-deploy
|
||||
* flow); legacy callers ignore it. */
|
||||
onKeysAdded: (model?: string) => void;
|
||||
/** Called when the user cancels the deploy. */
|
||||
onCancel: () => void;
|
||||
/** Optional — open the Settings Panel (Config tab → Secrets). */
|
||||
onOpenSettings?: () => void;
|
||||
/** If provided, secrets save at workspace scope instead of global. */
|
||||
workspaceId?: string;
|
||||
/** Set of env var names already configured in the relevant scope
|
||||
* (global or workspace). When provided, entries whose key is already
|
||||
* in this set start as `saved: true` so the user can confirm without
|
||||
* re-entering. Used by the template-deploy "always ask" flow so a
|
||||
* user can pick a different provider even when global env covers
|
||||
* the default one. */
|
||||
configuredKeys?: Set<string>;
|
||||
/** Model slug suggestions (datalist) — populated from the template's
|
||||
* models[]. When non-empty the picker renders a model input above
|
||||
* the API-key fields. The picker passes the entered slug back via
|
||||
* onKeysAdded. */
|
||||
modelSuggestions?: string[];
|
||||
/** Pre-fill the model input. */
|
||||
initialModel?: string;
|
||||
/** Override the modal's title + description copy. The default
|
||||
* "Missing API Keys" title misreads when the modal is opened to
|
||||
* pick provider/model with keys already configured. */
|
||||
title?: string;
|
||||
description?: string;
|
||||
}
|
||||
|
||||
interface KeyEntry {
|
||||
@@ -60,6 +81,11 @@ export function MissingKeysModal({
|
||||
onCancel,
|
||||
onOpenSettings,
|
||||
workspaceId,
|
||||
configuredKeys,
|
||||
modelSuggestions,
|
||||
initialModel,
|
||||
title,
|
||||
description,
|
||||
}: Props) {
|
||||
const pickerProviders = providers ?? [];
|
||||
const pickerMode = pickerProviders.length > 1;
|
||||
@@ -74,6 +100,11 @@ export function MissingKeysModal({
|
||||
onCancel={onCancel}
|
||||
onOpenSettings={onOpenSettings}
|
||||
workspaceId={workspaceId}
|
||||
configuredKeys={configuredKeys}
|
||||
modelSuggestions={modelSuggestions}
|
||||
initialModel={initialModel}
|
||||
title={title}
|
||||
description={description}
|
||||
/>
|
||||
);
|
||||
}
|
||||
@@ -108,17 +139,41 @@ function ProviderPickerModal({
|
||||
onCancel,
|
||||
onOpenSettings,
|
||||
workspaceId,
|
||||
configuredKeys,
|
||||
modelSuggestions,
|
||||
initialModel,
|
||||
title,
|
||||
description,
|
||||
}: {
|
||||
open: boolean;
|
||||
providers: ProviderChoice[];
|
||||
runtime: string;
|
||||
onKeysAdded: () => void;
|
||||
onKeysAdded: (model?: string) => void;
|
||||
onCancel: () => void;
|
||||
onOpenSettings?: () => void;
|
||||
workspaceId?: string;
|
||||
configuredKeys?: Set<string>;
|
||||
modelSuggestions?: string[];
|
||||
initialModel?: string;
|
||||
title?: string;
|
||||
description?: string;
|
||||
}) {
|
||||
const [selectedId, setSelectedId] = useState(providers[0].id);
|
||||
// Prefer the first provider whose env vars are already satisfied by
|
||||
// the configured set — pre-selecting "the option the user already has
|
||||
// keys for" matches expected UX. Falls back to providers[0] otherwise.
|
||||
const initialSelected = useMemo(() => {
|
||||
if (configuredKeys) {
|
||||
const satisfied = providers.find((p) =>
|
||||
p.envVars.every((k) => configuredKeys.has(k)),
|
||||
);
|
||||
if (satisfied) return satisfied.id;
|
||||
}
|
||||
return providers[0].id;
|
||||
}, [providers, configuredKeys]);
|
||||
|
||||
const [selectedId, setSelectedId] = useState(initialSelected);
|
||||
const [entries, setEntries] = useState<KeyEntry[]>([]);
|
||||
const [model, setModel] = useState(initialModel ?? "");
|
||||
const firstInputRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
const selected = useMemo(
|
||||
@@ -126,10 +181,13 @@ function ProviderPickerModal({
|
||||
[providers, selectedId],
|
||||
);
|
||||
|
||||
const showModelInput = (modelSuggestions?.length ?? 0) > 0 || initialModel !== undefined;
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
setSelectedId(providers[0].id);
|
||||
}, [open, providers]);
|
||||
setSelectedId(initialSelected);
|
||||
setModel(initialModel ?? "");
|
||||
}, [open, initialSelected, initialModel]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
@@ -137,12 +195,15 @@ function ProviderPickerModal({
|
||||
selected.envVars.map((key) => ({
|
||||
key,
|
||||
value: "",
|
||||
saved: false,
|
||||
// Pre-mark as saved when the key is already in the configured
|
||||
// set (global or workspace scope). Lets the user click Deploy
|
||||
// without re-entering a key the platform already holds.
|
||||
saved: configuredKeys?.has(key) ?? false,
|
||||
saving: false,
|
||||
error: null,
|
||||
})),
|
||||
);
|
||||
}, [open, selected]);
|
||||
}, [open, selected, configuredKeys]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
@@ -243,16 +304,52 @@ function ProviderPickerModal({
|
||||
</svg>
|
||||
</div>
|
||||
<h3 id="missing-keys-title" className="text-sm font-semibold text-zinc-100">
|
||||
Missing API Keys
|
||||
{title ?? "Missing API Keys"}
|
||||
</h3>
|
||||
</div>
|
||||
<p className="text-[12px] text-zinc-400 leading-relaxed">
|
||||
The <span className="text-amber-300 font-medium">{runtimeLabel}</span>{" "}
|
||||
runtime supports multiple providers. Pick one and paste its API key.
|
||||
{description ?? (
|
||||
<>
|
||||
The <span className="text-amber-300 font-medium">{runtimeLabel}</span>{" "}
|
||||
runtime supports multiple providers. Pick one and paste its API key.
|
||||
</>
|
||||
)}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="px-5 py-4 space-y-3">
|
||||
{showModelInput && (
|
||||
<div>
|
||||
<label
|
||||
htmlFor="provider-picker-model-input"
|
||||
className="text-[10px] uppercase tracking-wide text-zinc-500 font-semibold mb-1.5 block"
|
||||
>
|
||||
Model{" "}
|
||||
<span aria-hidden="true" className="text-red-400">*</span>
|
||||
<span className="sr-only"> (required)</span>
|
||||
</label>
|
||||
<input
|
||||
id="provider-picker-model-input"
|
||||
type="text"
|
||||
value={model}
|
||||
onChange={(e) => setModel(e.target.value)}
|
||||
placeholder="e.g. minimax/MiniMax-M2.7"
|
||||
aria-label="Model slug"
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
list="provider-picker-model-suggestions"
|
||||
className="w-full bg-zinc-900 border border-zinc-600 rounded px-2 py-1.5 text-[11px] text-zinc-100 font-mono focus:outline-none focus:border-blue-500 focus:ring-1 focus:ring-blue-500/20 transition-colors"
|
||||
/>
|
||||
<datalist id="provider-picker-model-suggestions">
|
||||
{modelSuggestions?.map((m) => (
|
||||
<option key={m} value={m} />
|
||||
))}
|
||||
</datalist>
|
||||
<p className="text-[9px] text-zinc-500 mt-1 leading-relaxed">
|
||||
Slug determines provider routing at install time.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
<fieldset className="space-y-1.5">
|
||||
<legend className="text-[10px] uppercase tracking-wide text-zinc-500 font-semibold mb-1.5">
|
||||
Provider
|
||||
@@ -364,8 +461,12 @@ function ProviderPickerModal({
|
||||
Cancel Deploy
|
||||
</button>
|
||||
<button
|
||||
onClick={onKeysAdded}
|
||||
disabled={!allSaved || anySaving}
|
||||
onClick={() => onKeysAdded(showModelInput ? model.trim() : undefined)}
|
||||
disabled={
|
||||
!allSaved ||
|
||||
anySaving ||
|
||||
(showModelInput && model.trim() === "")
|
||||
}
|
||||
className="px-3.5 py-1.5 text-[12px] bg-blue-600 hover:bg-blue-500 text-white rounded-lg transition-colors disabled:opacity-40"
|
||||
>
|
||||
{allSaved ? "Deploy" : entries.length > 1 ? "Add Keys" : "Add Key"}
|
||||
|
||||
@@ -190,6 +190,91 @@ describe("CreateWorkspaceDialog — Hermes provider picker", () => {
|
||||
expect(ids).toContain("hermes");
|
||||
});
|
||||
|
||||
// Pins the dynamic-providers behavior: when the matched template's
|
||||
// /templates row declares `providers`, the dropdown filters to that
|
||||
// subset instead of showing the full HERMES_PROVIDERS catalog. Same
|
||||
// data source ConfigTab uses (PR #2454) — keeps the modal and the
|
||||
// settings tab honest about which providers a template supports.
|
||||
it("hermes provider dropdown filters to template-declared providers when /templates ships them", async () => {
|
||||
// Per-URL mock: /workspaces returns the existing fixture, /templates
|
||||
// returns a hermes row that only allows anthropic + minimax + openai.
|
||||
mockGet.mockImplementation(async (url: string) => {
|
||||
if (url === "/templates") {
|
||||
return [
|
||||
{ id: "hermes", name: "Hermes", runtime: "hermes", providers: ["anthropic", "minimax", "openai"] },
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
] as any;
|
||||
}
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
return SAMPLE_WORKSPACES as any;
|
||||
});
|
||||
|
||||
await openDialog();
|
||||
await setTemplate("hermes");
|
||||
await waitFor(() =>
|
||||
expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
|
||||
);
|
||||
const providerSelect = document.getElementById("hermes-provider-select") as HTMLSelectElement;
|
||||
// Filtered list arrives async after /templates fetch resolves —
|
||||
// keep waiting until the dropdown shrinks below the full catalog.
|
||||
await waitFor(() => expect(providerSelect.options.length).toBe(3));
|
||||
const ids = Array.from(providerSelect.options).map((o) => o.value);
|
||||
expect(ids).toEqual(expect.arrayContaining(["anthropic", "minimax", "openai"]));
|
||||
expect(ids).not.toContain("gemini");
|
||||
expect(ids).not.toContain("deepseek");
|
||||
});
|
||||
|
||||
// Back-compat: a template that hasn't migrated to runtime_config.providers
|
||||
// (older templates, self-hosted setups without /templates server) keeps
|
||||
// showing the full provider catalog. Operators picking from those
|
||||
// templates can't be locked out of providers we know hermes supports.
|
||||
it("hermes provider dropdown falls back to all providers when template declares no providers list", async () => {
|
||||
mockGet.mockImplementation(async (url: string) => {
|
||||
if (url === "/templates") {
|
||||
// No `providers` field — empty/missing → fall back to full catalog.
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
return [{ id: "hermes", name: "Hermes", runtime: "hermes" }] as any;
|
||||
}
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
return SAMPLE_WORKSPACES as any;
|
||||
});
|
||||
|
||||
await openDialog();
|
||||
await setTemplate("hermes");
|
||||
await waitFor(() =>
|
||||
expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
|
||||
);
|
||||
const providerSelect = document.getElementById("hermes-provider-select") as HTMLSelectElement;
|
||||
expect(providerSelect.options.length).toBe(HERMES_PROVIDERS.length);
|
||||
});
|
||||
|
||||
// Defensive: a template's declared list with NO matches against our
|
||||
// static catalog (e.g. a brand-new provider id we don't have label/
|
||||
// envVar metadata for yet) must not render an empty <select> — the
|
||||
// operator can't pick a provider, the form locks. Component falls
|
||||
// back to the full catalog so the user can still proceed.
|
||||
it("hermes provider dropdown falls back to all providers when template declares only unknown providers", async () => {
|
||||
mockGet.mockImplementation(async (url: string) => {
|
||||
if (url === "/templates") {
|
||||
return [
|
||||
{ id: "hermes", name: "Hermes", runtime: "hermes", providers: ["totally-new-provider-2030"] },
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
] as any;
|
||||
}
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
return SAMPLE_WORKSPACES as any;
|
||||
});
|
||||
|
||||
await openDialog();
|
||||
await setTemplate("hermes");
|
||||
await waitFor(() =>
|
||||
expect(document.querySelector("[data-testid='hermes-provider-section']")).toBeTruthy()
|
||||
);
|
||||
const providerSelect = document.getElementById("hermes-provider-select") as HTMLSelectElement;
|
||||
// Stays at full catalog length — no flapping to 0 then back.
|
||||
expect(providerSelect.options.length).toBe(HERMES_PROVIDERS.length);
|
||||
});
|
||||
|
||||
it("hermes API key field is a password input (masked)", async () => {
|
||||
await openDialog();
|
||||
await setTemplate("hermes");
|
||||
|
||||
@@ -100,6 +100,42 @@ interface RuntimeOption {
|
||||
value: string;
|
||||
label: string;
|
||||
models: ModelSpec[];
|
||||
// providers is the declarative provider list each template ships in
|
||||
// its config.yaml under runtime_config.providers. The /templates API
|
||||
// surfaces it (workspace-server templates.go) so canvas stays
|
||||
// adapter-driven: hermes ships ~20 slugs, claude-code ships
|
||||
// ["anthropic"], gemini-cli ships ["gemini"], etc. Empty list →
|
||||
// canvas falls back to deriving unique vendor prefixes from
|
||||
// models[].id (still adapter-driven, just inferred).
|
||||
providers: string[];
|
||||
}
|
||||
|
||||
// deriveProvidersFromModels — when a template doesn't ship an explicit
|
||||
// providers list, infer suggestions from the vendor prefixes of its
|
||||
// model slugs. e.g. ["anthropic:claude-opus-4-7", "openai:gpt-4o",
|
||||
// "anthropic:claude-sonnet-4-5"] → ["anthropic", "openai"].
|
||||
//
|
||||
// This keeps the dropdown adapter-driven for older templates that
|
||||
// haven't migrated to the explicit `providers:` field yet, AND
|
||||
// continues to be a useful fallback for any future runtime whose
|
||||
// derive-provider semantics happen to match the slug prefix.
|
||||
function deriveProvidersFromModels(models: ModelSpec[]): string[] {
|
||||
const seen = new Set<string>();
|
||||
const out: string[] = [];
|
||||
for (const m of models) {
|
||||
if (!m.id) continue;
|
||||
// Both ":" (anthropic:claude-opus-4-7) and "/" (nousresearch/hermes-4-70b)
|
||||
// are valid vendor separators in our slug taxonomy. Take whichever
|
||||
// appears first and split there.
|
||||
const sep = m.id.match(/[:/]/)?.index ?? -1;
|
||||
if (sep <= 0) continue;
|
||||
const vendor = m.id.slice(0, sep);
|
||||
if (!seen.has(vendor)) {
|
||||
seen.add(vendor);
|
||||
out.push(vendor);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// Fallback used when /templates can't be fetched (offline, older backend).
|
||||
@@ -118,14 +154,14 @@ interface RuntimeOption {
|
||||
const RUNTIMES_WITH_OWN_CONFIG = new Set<string>(["external"]);
|
||||
|
||||
const FALLBACK_RUNTIME_OPTIONS: RuntimeOption[] = [
|
||||
{ value: "", label: "LangGraph (default)", models: [] },
|
||||
{ value: "claude-code", label: "Claude Code", models: [] },
|
||||
{ value: "crewai", label: "CrewAI", models: [] },
|
||||
{ value: "autogen", label: "AutoGen", models: [] },
|
||||
{ value: "deepagents", label: "DeepAgents", models: [] },
|
||||
{ value: "openclaw", label: "OpenClaw", models: [] },
|
||||
{ value: "hermes", label: "Hermes", models: [] },
|
||||
{ value: "gemini-cli", label: "Gemini CLI", models: [] },
|
||||
{ value: "", label: "LangGraph (default)", models: [], providers: [] },
|
||||
{ value: "claude-code", label: "Claude Code", models: [], providers: [] },
|
||||
{ value: "crewai", label: "CrewAI", models: [], providers: [] },
|
||||
{ value: "autogen", label: "AutoGen", models: [], providers: [] },
|
||||
{ value: "deepagents", label: "DeepAgents", models: [], providers: [] },
|
||||
{ value: "openclaw", label: "OpenClaw", models: [], providers: [] },
|
||||
{ value: "hermes", label: "Hermes", models: [], providers: [] },
|
||||
{ value: "gemini-cli", label: "Gemini CLI", models: [], providers: [] },
|
||||
];
|
||||
|
||||
export function ConfigTab({ workspaceId }: Props) {
|
||||
@@ -138,6 +174,17 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
const [rawMode, setRawMode] = useState(false);
|
||||
const [rawDraft, setRawDraft] = useState("");
|
||||
const [runtimeOptions, setRuntimeOptions] = useState<RuntimeOption[]>(FALLBACK_RUNTIME_OPTIONS);
|
||||
// Provider override (Option B PR-5): stored separately from config.yaml
|
||||
// because the value lives in workspace_secrets (encrypted), not in the
|
||||
// platform-managed config.yaml. The two endpoints are GET/PUT
|
||||
// /workspaces/:id/provider on workspace-server (handlers/secrets.go).
|
||||
// Empty = "auto-derive from model slug prefix" — pre-Option-B behavior
|
||||
// and what most users want. Setting to a non-empty value writes
|
||||
// LLM_PROVIDER into workspace_secrets and triggers an auto-restart so
|
||||
// the workspace boots with the new provider in env (and via CP user-
|
||||
// data, written into /configs/config.yaml on next provision too).
|
||||
const [provider, setProvider] = useState("");
|
||||
const [originalProvider, setOriginalProvider] = useState("");
|
||||
const successTimerRef = useRef<ReturnType<typeof setTimeout>>(undefined);
|
||||
|
||||
useEffect(() => {
|
||||
@@ -168,6 +215,22 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
wsMetadataModel = (m.model || "").trim();
|
||||
} catch { /* non-fatal */ }
|
||||
|
||||
// Load explicit provider override (Option B PR-5). Endpoint returns
|
||||
// {provider: "", source: "default"} when no override is set, so the
|
||||
// empty string is the legitimate "auto-derive" signal — don't treat
|
||||
// it as a load error. Non-fatal: an older workspace-server that
|
||||
// predates PR-2 returns 404 here; the form falls back to "" and
|
||||
// Save just won't PUT the provider field.
|
||||
try {
|
||||
const p = await api.get<{ provider?: string }>(`/workspaces/${workspaceId}/provider`);
|
||||
const loadedProvider = (p.provider || "").trim();
|
||||
setProvider(loadedProvider);
|
||||
setOriginalProvider(loadedProvider);
|
||||
} catch {
|
||||
setProvider("");
|
||||
setOriginalProvider("");
|
||||
}
|
||||
|
||||
try {
|
||||
const res = await api.get<{ content: string }>(`/workspaces/${workspaceId}/files/config.yaml`);
|
||||
const parsed = parseYaml(res.content);
|
||||
@@ -209,11 +272,11 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false;
|
||||
api.get<Array<{ id: string; name?: string; runtime?: string; models?: ModelSpec[] }>>("/templates")
|
||||
api.get<Array<{ id: string; name?: string; runtime?: string; models?: ModelSpec[]; providers?: string[] }>>("/templates")
|
||||
.then((rows) => {
|
||||
if (cancelled || !Array.isArray(rows)) return;
|
||||
const byRuntime = new Map<string, RuntimeOption>();
|
||||
byRuntime.set("", { value: "", label: "LangGraph (default)", models: [] });
|
||||
byRuntime.set("", { value: "", label: "LangGraph (default)", models: [], providers: [] });
|
||||
for (const r of rows) {
|
||||
const v = (r.runtime || "").trim();
|
||||
if (!v || v === "langgraph") continue;
|
||||
@@ -221,8 +284,9 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
// one with the richer models list is probably newer.
|
||||
const existing = byRuntime.get(v);
|
||||
const models = Array.isArray(r.models) ? r.models : [];
|
||||
const providers = Array.isArray(r.providers) ? r.providers : [];
|
||||
if (!existing || models.length > existing.models.length) {
|
||||
byRuntime.set(v, { value: v, label: r.name || v, models });
|
||||
byRuntime.set(v, { value: v, label: r.name || v, models, providers });
|
||||
}
|
||||
}
|
||||
if (byRuntime.size > 1) setRuntimeOptions(Array.from(byRuntime.values()));
|
||||
@@ -234,6 +298,16 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
// Models + env hints for the currently-selected runtime.
|
||||
const selectedRuntime = runtimeOptions.find((o) => o.value === (config.runtime || "")) ?? null;
|
||||
const availableModels: ModelSpec[] = selectedRuntime?.models ?? [];
|
||||
// Provider suggestions: prefer the runtime's declarative providers
|
||||
// list (sourced from its template config.yaml runtime_config.providers
|
||||
// and surfaced via /templates), fall back to deriving from model slug
|
||||
// prefixes when the template hasn't migrated to the explicit field
|
||||
// yet. Either way the data flows from the adapter — no hardcoded
|
||||
// canvas-side enum.
|
||||
const providerSuggestions: string[] =
|
||||
(selectedRuntime?.providers && selectedRuntime.providers.length > 0)
|
||||
? selectedRuntime.providers
|
||||
: deriveProvidersFromModels(availableModels);
|
||||
const currentModelId = config.runtime_config?.model || config.model || "";
|
||||
const currentModelSpec = availableModels.find((m) => m.id === currentModelId) ?? null;
|
||||
|
||||
@@ -301,20 +375,57 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
// partial-save state — we report it as a user-visible warning
|
||||
// rather than lying "Saved" and letting the user discover the
|
||||
// revert on next reload.
|
||||
const oldModel = (oldParsed.model as string) || "";
|
||||
//
|
||||
// Read from runtime_config.model first, then fall back to top-level
|
||||
// model. The dropdown's onChange (above, ~line 475) writes to
|
||||
// runtime_config.model whenever a runtime is selected (hermes,
|
||||
// claude-code, etc.) and only falls back to top-level model when
|
||||
// there's no runtime. handleSave used to diff against top-level
|
||||
// model only, so for any runtime-bearing workspace the user's
|
||||
// model selection never persisted — they'd Save & Restart, the
|
||||
// EC2 would boot with HERMES_DEFAULT_MODEL empty, and hermes
|
||||
// would fall back to nousresearch/hermes-4-70b → "No LLM provider
|
||||
// configured" error in the chat. Caught 2026-04-30 on hongmingwang
|
||||
// hermes workspace 32993ee7-…cb9d75d112a5.
|
||||
const nextModelRaw = (nextSource.runtime_config as Record<string, unknown> | undefined)?.model;
|
||||
const oldModelRaw = (oldParsed.runtime_config as Record<string, unknown> | undefined)?.model;
|
||||
const nextModel =
|
||||
typeof nextModelRaw === "string" && nextModelRaw
|
||||
? nextModelRaw
|
||||
: typeof nextSource.model === "string"
|
||||
? nextSource.model
|
||||
: "";
|
||||
const oldModel =
|
||||
typeof oldModelRaw === "string" && oldModelRaw
|
||||
? oldModelRaw
|
||||
: (oldParsed.model as string) || "";
|
||||
let modelSaveError: string | null = null;
|
||||
if (
|
||||
typeof nextSource.model === "string" &&
|
||||
nextSource.model &&
|
||||
nextSource.model !== oldModel
|
||||
) {
|
||||
if (nextModel && nextModel !== oldModel) {
|
||||
try {
|
||||
await api.put(`/workspaces/${workspaceId}/model`, { model: nextSource.model });
|
||||
await api.put(`/workspaces/${workspaceId}/model`, { model: nextModel });
|
||||
} catch (e) {
|
||||
modelSaveError = e instanceof Error ? e.message : "Model update was rejected";
|
||||
}
|
||||
}
|
||||
|
||||
// Provider override save (Option B PR-5). PUT only when the user
|
||||
// changed the dropdown — otherwise an unrelated Save (e.g. tier
|
||||
// edit) would re-write the provider unchanged and the server-
|
||||
// side auto-restart would fire on every Save, costing the user a
|
||||
// ~30s reboot for a no-op change. Server endpoint accepts an
|
||||
// empty string to clear the override (deletes the
|
||||
// workspace_secrets row); we forward whatever the form holds.
|
||||
let providerSaveError: string | null = null;
|
||||
const providerChanged = provider !== originalProvider;
|
||||
if (providerChanged) {
|
||||
try {
|
||||
await api.put(`/workspaces/${workspaceId}/provider`, { provider });
|
||||
setOriginalProvider(provider);
|
||||
} catch (e) {
|
||||
providerSaveError = e instanceof Error ? e.message : "Provider update was rejected";
|
||||
}
|
||||
}
|
||||
|
||||
setOriginalYaml(content);
|
||||
if (rawMode) {
|
||||
const parsed = parseYaml(content);
|
||||
@@ -322,16 +433,30 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
} else {
|
||||
setRawDraft(content);
|
||||
}
|
||||
if (restart) {
|
||||
// SetProvider on the server already triggers an auto-restart for
|
||||
// the workspace whenever the value actually changed (see
|
||||
// workspace-server/internal/handlers/secrets.go:SetProvider). If
|
||||
// the user also clicked Save+Restart we'd kick off a SECOND
|
||||
// restart here and the two would race in the canvas store —
|
||||
// suppress the redundant call and rely on the server-side one.
|
||||
const providerWillAutoRestart = providerChanged && !providerSaveError;
|
||||
if (restart && !providerWillAutoRestart) {
|
||||
await useCanvasStore.getState().restartWorkspace(workspaceId);
|
||||
} else {
|
||||
useCanvasStore.getState().updateNodeData(workspaceId, { needsRestart: true });
|
||||
} else if (!restart) {
|
||||
useCanvasStore.getState().updateNodeData(workspaceId, { needsRestart: !providerWillAutoRestart });
|
||||
}
|
||||
if (modelSaveError) {
|
||||
// Partial-save UX: surface the model rejection instead of
|
||||
// showing "Saved" — the user would otherwise watch the model
|
||||
// field revert on next reload with no explanation.
|
||||
setError(`Other fields saved, but model update failed: ${modelSaveError}`);
|
||||
// Aggregate partial-save errors. Both modelSaveError and
|
||||
// providerSaveError describe rejected updates from independent
|
||||
// endpoints — show whichever fired so the user knows which
|
||||
// field reverts on next reload (otherwise they'd see "Saved" and
|
||||
// be confused why Provider snapped back).
|
||||
const partialError = providerSaveError
|
||||
? `Other fields saved, but provider update failed: ${providerSaveError}`
|
||||
: modelSaveError
|
||||
? `Other fields saved, but model update failed: ${modelSaveError}`
|
||||
: null;
|
||||
if (partialError) {
|
||||
setError(partialError);
|
||||
} else {
|
||||
setSuccess(true);
|
||||
clearTimeout(successTimerRef.current);
|
||||
@@ -352,7 +477,8 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
const taskBudgetId = useId();
|
||||
const sandboxBackendId = useId();
|
||||
|
||||
const isDirty = rawMode ? rawDraft !== originalYaml : toYaml(config) !== originalYaml;
|
||||
const providerDirty = provider !== originalProvider;
|
||||
const isDirty = (rawMode ? rawDraft !== originalYaml : toYaml(config) !== originalYaml) || providerDirty;
|
||||
|
||||
if (loading) {
|
||||
return <div className="p-4 text-xs text-zinc-500">Loading config...</div>;
|
||||
@@ -499,6 +625,51 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
{/* Provider override (Option B PR-5). Free-text combobox so
|
||||
operators can use any of the 30+ slugs hermes-agent's
|
||||
derive-provider.sh recognizes — the suggestion list is
|
||||
a hint, not a constraint. Empty = "auto-derive from
|
||||
model slug prefix" which is correct for the common case
|
||||
(model "anthropic:claude-opus-4-7" → provider derived
|
||||
as "anthropic"). The override is needed when the model
|
||||
alias has no clean vendor prefix (e.g. hermes default
|
||||
"nousresearch/hermes-4-70b" → derive returns empty →
|
||||
hermes errors "No LLM provider configured"). */}
|
||||
<div>
|
||||
<label htmlFor={`${runtimeId}-provider`} className="text-[10px] text-zinc-500 block mb-1">
|
||||
Provider
|
||||
<span className="ml-1 text-zinc-600">
|
||||
(override — leave empty to auto-derive from model slug)
|
||||
</span>
|
||||
</label>
|
||||
<input
|
||||
id={`${runtimeId}-provider`}
|
||||
type="text"
|
||||
list={providerSuggestions.length > 0 ? `${runtimeId}-providers` : undefined}
|
||||
value={provider}
|
||||
onChange={(e) => setProvider(e.target.value.trim())}
|
||||
placeholder={
|
||||
providerSuggestions.length > 0
|
||||
? `e.g. ${providerSuggestions.slice(0, 3).join(", ")} (empty = auto-derive)`
|
||||
: "empty = auto-derive from model slug"
|
||||
}
|
||||
aria-label="LLM provider override"
|
||||
data-testid="provider-input"
|
||||
className="w-full bg-zinc-800 border border-zinc-700 rounded px-2 py-1 text-xs text-zinc-200 font-mono focus:outline-none focus:border-blue-500"
|
||||
/>
|
||||
{providerSuggestions.length > 0 && (
|
||||
<datalist id={`${runtimeId}-providers`}>
|
||||
{providerSuggestions.map((p) => (
|
||||
<option key={p} value={p} />
|
||||
))}
|
||||
</datalist>
|
||||
)}
|
||||
{provider && provider !== originalProvider && (
|
||||
<p className="text-[10px] text-amber-500 mt-1">
|
||||
Provider change → workspace will auto-restart on Save.
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
<TagList
|
||||
label={
|
||||
currentModelSpec?.required_env?.length &&
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
// Each test pins one invariant. If any fails, the bug is back.
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor } from "@testing-library/react";
|
||||
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
@@ -168,6 +168,116 @@ describe("ConfigTab — hermes workspace", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("ConfigTab — Save persists model under runtime_config.model (2026-04-30)", () => {
|
||||
// The dropdown's onChange writes to config.runtime_config.model whenever
|
||||
// a runtime is selected (hermes, claude-code, etc.) and only falls back
|
||||
// to top-level config.model when no runtime is set. The Save handler used
|
||||
// to diff against top-level model only, so for any runtime-bearing
|
||||
// workspace the user's model selection never persisted — Save & Restart
|
||||
// would reboot with HERMES_DEFAULT_MODEL empty, hermes would fall back
|
||||
// to nousresearch/hermes-4-70b → "No LLM provider configured" in chat.
|
||||
// Caught 2026-04-30 on hongmingwang hermes workspace.
|
||||
|
||||
it("PUTs /model when user picks a model on a hermes workspace", async () => {
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === "/workspaces/ws-test") {
|
||||
return Promise.resolve({ runtime: "hermes" });
|
||||
}
|
||||
if (path === "/workspaces/ws-test/model") {
|
||||
return Promise.resolve({ model: "" });
|
||||
}
|
||||
if (path === "/workspaces/ws-test/files/config.yaml") {
|
||||
return Promise.reject(new Error("not found"));
|
||||
}
|
||||
if (path === "/templates") {
|
||||
return Promise.resolve([
|
||||
{
|
||||
id: "t-hermes",
|
||||
name: "Hermes",
|
||||
runtime: "hermes",
|
||||
models: [
|
||||
{ id: "minimax/MiniMax-M2.7-highspeed", name: "MiniMax M2.7" },
|
||||
],
|
||||
},
|
||||
]);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
apiPut.mockResolvedValue({});
|
||||
apiPatch.mockResolvedValue({});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
|
||||
// Wait for the runtime dropdown to populate so the model textbox renders.
|
||||
await waitFor(() =>
|
||||
expect(
|
||||
(screen.getByRole("combobox", { name: /runtime/i }) as HTMLSelectElement).value,
|
||||
).toBe("hermes"),
|
||||
);
|
||||
|
||||
// The model input is a free-text input wired to a datalist of suggestions.
|
||||
const modelInput = (await waitFor(() =>
|
||||
screen.getByPlaceholderText(/anthropic:claude-sonnet/i),
|
||||
)) as HTMLInputElement;
|
||||
|
||||
fireEvent.change(modelInput, {
|
||||
target: { value: "minimax/MiniMax-M2.7-highspeed" },
|
||||
});
|
||||
|
||||
// Click Save & Restart.
|
||||
fireEvent.click(screen.getByRole("button", { name: /save & restart/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(apiPut).toHaveBeenCalledWith("/workspaces/ws-test/model", {
|
||||
model: "minimax/MiniMax-M2.7-highspeed",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("does NOT PUT /model when the value is unchanged (no-op restart)", async () => {
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === "/workspaces/ws-test") {
|
||||
return Promise.resolve({ runtime: "hermes" });
|
||||
}
|
||||
if (path === "/workspaces/ws-test/model") {
|
||||
return Promise.resolve({ model: "minimax/MiniMax-M2.7" });
|
||||
}
|
||||
if (path === "/workspaces/ws-test/files/config.yaml") {
|
||||
return Promise.reject(new Error("not found"));
|
||||
}
|
||||
if (path === "/templates") {
|
||||
return Promise.resolve([
|
||||
{ id: "t-hermes", runtime: "hermes", models: [] },
|
||||
]);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
apiPut.mockResolvedValue({});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
|
||||
// Wait for load.
|
||||
await waitFor(() =>
|
||||
expect(
|
||||
(screen.getByRole("combobox", { name: /runtime/i }) as HTMLSelectElement).value,
|
||||
).toBe("hermes"),
|
||||
);
|
||||
|
||||
// Force isDirty by toggling a field that doesn't affect model. (Save is
|
||||
// disabled until isDirty=true; we want to prove that even when Save
|
||||
// fires, /model isn't called for an unchanged model.) Skipped — easier
|
||||
// to just verify apiPut wasn't called with the model URL.
|
||||
|
||||
// Without any user edit, Save & Restart is disabled, so /model is
|
||||
// trivially not PUT. The asserts below verify no /model PUT happens
|
||||
// at any point during load.
|
||||
const modelPuts = apiPut.mock.calls.filter(
|
||||
([path]) => path === "/workspaces/ws-test/model",
|
||||
);
|
||||
expect(modelPuts).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("ConfigTab — config.yaml on disk", () => {
|
||||
it("workspace metadata (DB) wins over config.yaml when both are present (#2061)", async () => {
|
||||
// Priority inversion in #2061: previously config.yaml overrode DB, so
|
||||
|
||||
@@ -0,0 +1,332 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Regression tests for ConfigTab Provider override (Option B PR-5).
|
||||
//
|
||||
// What this pins: a free-text Provider combobox in the Runtime section
|
||||
// that lets the operator override the model→provider derivation hermes-
|
||||
// agent does internally. Without this UI, a fresh signup whose Hermes
|
||||
// workspace defaults to a model with no clean vendor prefix (e.g.
|
||||
// `nousresearch/hermes-4-70b`) hits the runtime's own preflight error:
|
||||
// "No LLM provider configured. Run `hermes model` to select a
|
||||
// provider, or run `hermes setup` for first-time configuration."
|
||||
// — even though tasks #195-198 wired the entire downstream pipe so a
|
||||
// non-empty provider WOULD flow through canvas → workspace-server →
|
||||
// CP user-data → workspace config.yaml → hermes adapter.
|
||||
//
|
||||
// Hongming Wang hit this on hongming.moleculesai.app at signup
|
||||
// 2026-05-01T17:35Z. Backend PRs were green, the gap was the missing
|
||||
// UI to set the value.
|
||||
//
|
||||
// Each test pins one invariant. If any fails, the bug is back.
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
const apiGet = vi.fn();
|
||||
const apiPatch = vi.fn();
|
||||
const apiPut = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
patch: (path: string, body: unknown) => apiPatch(path, body),
|
||||
put: (path: string, body: unknown) => apiPut(path, body),
|
||||
post: vi.fn(),
|
||||
del: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: Object.assign(
|
||||
(selector: (s: unknown) => unknown) => selector({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }),
|
||||
{ getState: () => ({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }) },
|
||||
),
|
||||
}));
|
||||
|
||||
vi.mock("../AgentCardSection", () => ({
|
||||
AgentCardSection: () => <div data-testid="agent-card-stub" />,
|
||||
}));
|
||||
|
||||
import { ConfigTab } from "../ConfigTab";
|
||||
|
||||
// wireApi — same shape as ConfigTab.hermes.test.tsx, extended with the
|
||||
// /provider endpoint. Each test sets `providerValue` to the value the
|
||||
// GET endpoint returns; "missing" means the endpoint rejects (older
|
||||
// workspace-server pre-PR-2 — must not crash the tab).
|
||||
function wireApi(opts: {
|
||||
workspaceRuntime?: string;
|
||||
workspaceModel?: string;
|
||||
configYamlContent?: string | null;
|
||||
templates?: Array<{ id: string; name?: string; runtime?: string; models?: unknown[]; providers?: string[] }>;
|
||||
providerValue?: string | "missing";
|
||||
}) {
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === `/workspaces/ws-test`) {
|
||||
return Promise.resolve({ runtime: opts.workspaceRuntime ?? "" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/model`) {
|
||||
return Promise.resolve({ model: opts.workspaceModel ?? "" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/provider`) {
|
||||
if (opts.providerValue === "missing") {
|
||||
return Promise.reject(new Error("404"));
|
||||
}
|
||||
return Promise.resolve({ provider: opts.providerValue ?? "", source: opts.providerValue ? "workspace_secrets" : "default" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/files/config.yaml`) {
|
||||
if (opts.configYamlContent === null) return Promise.reject(new Error("not found"));
|
||||
return Promise.resolve({ content: opts.configYamlContent ?? "" });
|
||||
}
|
||||
if (path === "/templates") {
|
||||
return Promise.resolve(opts.templates ?? []);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiPatch.mockReset();
|
||||
apiPut.mockReset();
|
||||
});
|
||||
|
||||
describe("ConfigTab — Provider override (Option B PR-5)", () => {
|
||||
// Empty provider on load is the legitimate default ("auto-derive
|
||||
// from model slug prefix"), NOT an error. The endpoint returning
|
||||
// {provider: "", source: "default"} is the documented happy-path
|
||||
// shape — if the form treated that as "load failed" we'd lose the
|
||||
// ability to render the input at all on fresh workspaces.
|
||||
it("renders an empty Provider input when no override is set", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "",
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
expect((input as HTMLInputElement).value).toBe("");
|
||||
});
|
||||
|
||||
// Pre-existing override loads back into the field on mount. Without
|
||||
// this, an operator who set provider=openrouter yesterday would see
|
||||
// the field blank today, conclude the value didn't stick, and
|
||||
// re-save — the resulting PUT-with-same-value would auto-restart
|
||||
// the workspace for nothing.
|
||||
it("loads an existing provider override from the server", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "openrouter",
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
await waitFor(() => expect((input as HTMLInputElement).value).toBe("openrouter"));
|
||||
});
|
||||
|
||||
// Old workspace-server (pre-PR-2) returns a 404 on /provider. The
|
||||
// tab must keep loading — the fallback is "" (auto-derive), same as
|
||||
// a fresh workspace.
|
||||
it("falls back to empty provider when the endpoint is missing", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "missing",
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
expect((input as HTMLInputElement).value).toBe("");
|
||||
// Tab should be fully rendered, not stuck in loading or error state.
|
||||
expect(screen.queryByText(/Loading config/i)).toBeNull();
|
||||
});
|
||||
|
||||
// Setting a value + Save must PUT to the right endpoint with the
|
||||
// right body shape. Server-side handler (workspace-server
|
||||
// handlers/secrets.go:SetProvider) reads body.provider — any other
|
||||
// key gets silently ignored and the workspace_secrets row stays
|
||||
// unset. This regression would manifest as "Save → Restart →
|
||||
// workspace still says No LLM provider configured."
|
||||
it("PUTs the new provider to /workspaces/:id/provider on Save", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "",
|
||||
});
|
||||
apiPut.mockResolvedValue({ status: "saved", provider: "anthropic" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
|
||||
fireEvent.change(input, { target: { value: "anthropic" } });
|
||||
expect((input as HTMLInputElement).value).toBe("anthropic");
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
|
||||
expect(providerCalls.length).toBe(1);
|
||||
expect(providerCalls[0][1]).toEqual({ provider: "anthropic" });
|
||||
});
|
||||
});
|
||||
|
||||
// No-change Save must NOT PUT /provider. The server-side SetProvider
|
||||
// auto-restarts the workspace on every successful PUT — re-writing
|
||||
// an unchanged value would cost the user a ~30s reboot every time
|
||||
// they tweak some other field.
|
||||
it("does not PUT /provider when the value is unchanged", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\ntier: 2\n",
|
||||
providerValue: "openrouter",
|
||||
});
|
||||
apiPut.mockResolvedValue({});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
await screen.findByTestId("provider-input");
|
||||
|
||||
// Click Save without touching the provider field. Trigger another
|
||||
// dirty-marker (tier change) so Save is enabled — the test is
|
||||
// about NOT touching /provider, not about Save being disabled.
|
||||
const tierSelect = screen.getByLabelText(/tier/i) as HTMLSelectElement;
|
||||
fireEvent.change(tierSelect, { target: { value: "3" } });
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
// Some PUT(s) may fire (e.g. /model). Just assert /provider is NOT among them.
|
||||
const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
|
||||
expect(providerCalls.length).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
// The dropdown's suggestion list MUST come from the runtime's own
|
||||
// template (via /templates → runtime_config.providers), not a
|
||||
// hardcoded canvas-side enum. This is the "Native + pluggable
|
||||
// runtime" invariant: a new runtime declaring its own provider
|
||||
// taxonomy in its config.yaml gets a working dropdown without ANY
|
||||
// canvas-side change.
|
||||
//
|
||||
// Pinned by checking that suggestions surfaced in the datalist
|
||||
// exactly mirror what the templates endpoint returned for the
|
||||
// matching runtime. If a future contributor reintroduces a
|
||||
// PROVIDER_SUGGESTIONS-style hardcoded list and the datalist
|
||||
// contents don't follow the template, this test fails.
|
||||
it("populates the provider datalist from the matched runtime's templates entry", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "hermes",
|
||||
name: "Hermes",
|
||||
runtime: "hermes",
|
||||
models: [],
|
||||
// The provider list every runtime adapter ships in its own
|
||||
// config.yaml. Canvas must surface THIS, not its own list.
|
||||
providers: ["nous", "openrouter", "anthropic", "minimax-cn"],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
const listId = (input as HTMLInputElement).getAttribute("list");
|
||||
expect(listId).toBeTruthy();
|
||||
await waitFor(() => {
|
||||
const datalist = document.getElementById(listId!);
|
||||
expect(datalist).not.toBeNull();
|
||||
const optionValues = Array.from(datalist!.querySelectorAll("option")).map(
|
||||
(o) => (o as HTMLOptionElement).value,
|
||||
);
|
||||
// Order matters — most-common-first is part of the contract so
|
||||
// the demo flow lands on a working choice without scrolling.
|
||||
expect(optionValues).toEqual(["nous", "openrouter", "anthropic", "minimax-cn"]);
|
||||
});
|
||||
});
|
||||
|
||||
// Fallback path: when a template hasn't migrated to the explicit
|
||||
// `providers:` field yet, suggestions are derived from model slug
|
||||
// prefixes. Still adapter-driven (the slugs come from the template's
|
||||
// `models:` list), just inferred. This keeps existing templates
|
||||
// working while the platform team migrates them one at a time.
|
||||
it("falls back to model-slug prefixes when the runtime ships no providers list", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "anthropic:claude-opus-4-7",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "hermes",
|
||||
name: "Hermes",
|
||||
runtime: "hermes",
|
||||
models: [
|
||||
{ id: "anthropic:claude-opus-4-7" },
|
||||
{ id: "openai:gpt-4o" },
|
||||
{ id: "anthropic:claude-sonnet-4-5" }, // dup vendor — must dedupe
|
||||
{ id: "nousresearch/hermes-4-70b" }, // "/" separator
|
||||
],
|
||||
// No `providers:` field → fallback derivation kicks in.
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
const listId = (input as HTMLInputElement).getAttribute("list");
|
||||
expect(listId).toBeTruthy();
|
||||
await waitFor(() => {
|
||||
const datalist = document.getElementById(listId!);
|
||||
const optionValues = Array.from(datalist!.querySelectorAll("option")).map(
|
||||
(o) => (o as HTMLOptionElement).value,
|
||||
);
|
||||
// Order = first-appearance from models[]; dedup keeps anthropic
|
||||
// once even though two model slugs use it.
|
||||
expect(optionValues).toEqual(["anthropic", "openai", "nousresearch"]);
|
||||
});
|
||||
});
|
||||
|
||||
// Empty string is a legitimate save target — it clears the override
|
||||
// (the server-side endpoint deletes the workspace_secrets row).
|
||||
// Operators who picked "anthropic" yesterday and want to revert to
|
||||
// auto-derive today should be able to do so by clearing the field
|
||||
// and clicking Save. Without this PUT path, the only way to clear
|
||||
// would be a direct DB edit.
|
||||
it("PUTs an empty string when the operator clears a previously-set provider", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "anthropic:claude-opus-4-7",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "openrouter",
|
||||
});
|
||||
apiPut.mockResolvedValue({ status: "cleared" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
await waitFor(() => expect((input as HTMLInputElement).value).toBe("openrouter"));
|
||||
|
||||
fireEvent.change(input, { target: { value: "" } });
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
|
||||
expect(providerCalls.length).toBe(1);
|
||||
expect(providerCalls[0][1]).toEqual({ provider: "" });
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -27,16 +27,16 @@ import { renderHook } from "@testing-library/react";
|
||||
import type { Template } from "@/lib/deploy-preflight";
|
||||
|
||||
// ── Hoisted mocks ────────────────────────────────────────────────────────────
|
||||
const { mockApiPost, mockCheckDeploySecrets, mockResolveRuntime } = vi.hoisted(
|
||||
() => ({
|
||||
const { mockApiPost, mockApiGet, mockCheckDeploySecrets, mockResolveRuntime } =
|
||||
vi.hoisted(() => ({
|
||||
mockApiPost: vi.fn(),
|
||||
mockApiGet: vi.fn(),
|
||||
mockCheckDeploySecrets: vi.fn(),
|
||||
mockResolveRuntime: vi.fn(),
|
||||
}),
|
||||
);
|
||||
}));
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: { post: mockApiPost },
|
||||
api: { post: mockApiPost, get: mockApiGet },
|
||||
}));
|
||||
|
||||
vi.mock("@/lib/deploy-preflight", async () => {
|
||||
@@ -51,20 +51,44 @@ vi.mock("@/lib/deploy-preflight", async () => {
|
||||
};
|
||||
});
|
||||
|
||||
// MissingKeysModal: render a minimal stand-in that exposes the two
|
||||
// callbacks the hook wires up. The real modal pulls in radix + the
|
||||
// secrets store, neither of which is relevant to this hook's behavior.
|
||||
// MissingKeysModal: render a minimal stand-in that exposes the
|
||||
// callbacks the hook wires up + dumps the new template-deploy props
|
||||
// (configuredKeys size, modelSuggestions, initialModel) into the
|
||||
// DOM so tests can assert on them. The real modal pulls in radix +
|
||||
// the secrets store, neither of which is relevant to this hook's
|
||||
// behavior.
|
||||
vi.mock("@/components/MissingKeysModal", () => ({
|
||||
MissingKeysModal: (props: {
|
||||
open: boolean;
|
||||
onKeysAdded: () => void;
|
||||
onKeysAdded: (model?: string) => void;
|
||||
onCancel: () => void;
|
||||
configuredKeys?: Set<string>;
|
||||
modelSuggestions?: string[];
|
||||
initialModel?: string;
|
||||
title?: string;
|
||||
}) =>
|
||||
props.open ? (
|
||||
<div data-testid="missing-keys-modal">
|
||||
<button data-testid="modal-keys-added" onClick={props.onKeysAdded}>
|
||||
<span data-testid="modal-configured-size">
|
||||
{props.configuredKeys?.size ?? 0}
|
||||
</span>
|
||||
<span data-testid="modal-model-suggestions">
|
||||
{(props.modelSuggestions ?? []).join(",")}
|
||||
</span>
|
||||
<span data-testid="modal-initial-model">{props.initialModel ?? ""}</span>
|
||||
<span data-testid="modal-title">{props.title ?? ""}</span>
|
||||
<button
|
||||
data-testid="modal-keys-added"
|
||||
onClick={() => props.onKeysAdded()}
|
||||
>
|
||||
keys added
|
||||
</button>
|
||||
<button
|
||||
data-testid="modal-keys-added-with-model"
|
||||
onClick={() => props.onKeysAdded("minimax/MiniMax-M2.7")}
|
||||
>
|
||||
keys added with model
|
||||
</button>
|
||||
<button data-testid="modal-cancel" onClick={props.onCancel}>
|
||||
cancel
|
||||
</button>
|
||||
@@ -95,6 +119,7 @@ function makeTemplate(over: Partial<Template> = {}): Template {
|
||||
|
||||
beforeEach(() => {
|
||||
mockApiPost.mockReset();
|
||||
mockApiGet.mockReset();
|
||||
mockCheckDeploySecrets.mockReset();
|
||||
mockResolveRuntime.mockReset();
|
||||
// Default: identity-mapped runtime, preflight passes.
|
||||
@@ -104,8 +129,12 @@ beforeEach(() => {
|
||||
missingKeys: [],
|
||||
providers: [],
|
||||
runtime: "claude-code",
|
||||
configuredKeys: new Set(),
|
||||
});
|
||||
mockApiPost.mockResolvedValue({ id: "ws-new" });
|
||||
// Default: secrets endpoint returns nothing so the picker
|
||||
// renders every entry as input. Multi-provider tests override.
|
||||
mockApiGet.mockResolvedValue([]);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@@ -114,14 +143,38 @@ afterEach(() => {
|
||||
|
||||
// ── Tests ────────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("useTemplateDeploy — happy path", () => {
|
||||
it("preflight ok → POST /workspaces → onDeployed fires with new id", async () => {
|
||||
const onDeployed = vi.fn();
|
||||
const { result } = renderHook(() => useTemplateDeploy({ onDeployed }));
|
||||
/**
|
||||
* Drive the always-show-picker flow to completion: deploy() opens the
|
||||
* modal, then we click "keys added" to fire the actual POST. Centralised
|
||||
* here because as of the always-prompt change, every happy-path test
|
||||
* must click through the modal before asserting on POST.
|
||||
*/
|
||||
async function deployThroughPicker<T>(
|
||||
result: { current: ReturnType<typeof useTemplateDeploy> },
|
||||
rerender: () => void,
|
||||
template: Template,
|
||||
): Promise<void> {
|
||||
await act(async () => {
|
||||
await result.current.deploy(template);
|
||||
});
|
||||
rerender();
|
||||
render(<>{result.current.modal}</>);
|
||||
await act(async () => {
|
||||
fireEvent.click(screen.getByTestId("modal-keys-added"));
|
||||
// Let the fire-and-forget executeDeploy resolve.
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
});
|
||||
}
|
||||
|
||||
await act(async () => {
|
||||
await result.current.deploy(makeTemplate());
|
||||
});
|
||||
describe("useTemplateDeploy — happy path", () => {
|
||||
it("preflight ok → modal opens → keys-added → POST /workspaces → onDeployed fires", async () => {
|
||||
const onDeployed = vi.fn();
|
||||
const { result, rerender } = renderHook(() =>
|
||||
useTemplateDeploy({ onDeployed }),
|
||||
);
|
||||
|
||||
await deployThroughPicker(result, rerender, makeTemplate());
|
||||
|
||||
expect(mockCheckDeploySecrets).toHaveBeenCalledTimes(1);
|
||||
expect(mockApiPost).toHaveBeenCalledWith(
|
||||
@@ -139,11 +192,11 @@ describe("useTemplateDeploy — happy path", () => {
|
||||
|
||||
it("uses caller-supplied canvasCoords when provided", async () => {
|
||||
const canvasCoords = vi.fn(() => ({ x: 42, y: 99 }));
|
||||
const { result } = renderHook(() => useTemplateDeploy({ canvasCoords }));
|
||||
const { result, rerender } = renderHook(() =>
|
||||
useTemplateDeploy({ canvasCoords }),
|
||||
);
|
||||
|
||||
await act(async () => {
|
||||
await result.current.deploy(makeTemplate());
|
||||
});
|
||||
await deployThroughPicker(result, rerender, makeTemplate());
|
||||
|
||||
expect(canvasCoords).toHaveBeenCalledTimes(1);
|
||||
expect(mockApiPost).toHaveBeenCalledWith(
|
||||
@@ -153,11 +206,9 @@ describe("useTemplateDeploy — happy path", () => {
|
||||
});
|
||||
|
||||
it("falls back to random coords inside [100,500] × [100,400] when canvasCoords omitted", async () => {
|
||||
const { result } = renderHook(() => useTemplateDeploy());
|
||||
const { result, rerender } = renderHook(() => useTemplateDeploy());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.deploy(makeTemplate());
|
||||
});
|
||||
await deployThroughPicker(result, rerender, makeTemplate());
|
||||
|
||||
const body = (mockApiPost as Mock).mock.calls[0]?.[1] as {
|
||||
canvas: { x: number; y: number };
|
||||
@@ -204,6 +255,7 @@ describe("useTemplateDeploy — preflight failure modes", () => {
|
||||
missingKeys: ["ANTHROPIC_API_KEY"],
|
||||
providers: [],
|
||||
runtime: "claude-code",
|
||||
configuredKeys: new Set(),
|
||||
});
|
||||
const onDeployed = vi.fn();
|
||||
|
||||
@@ -231,6 +283,7 @@ describe("useTemplateDeploy — modal lifecycle", () => {
|
||||
missingKeys: ["ANTHROPIC_API_KEY"],
|
||||
providers: [],
|
||||
runtime: "claude-code",
|
||||
configuredKeys: new Set(),
|
||||
});
|
||||
const onDeployed = vi.fn();
|
||||
const { result, rerender } = renderHook(() =>
|
||||
@@ -265,6 +318,7 @@ describe("useTemplateDeploy — modal lifecycle", () => {
|
||||
missingKeys: ["ANTHROPIC_API_KEY"],
|
||||
providers: [],
|
||||
runtime: "claude-code",
|
||||
configuredKeys: new Set(),
|
||||
});
|
||||
const { result, rerender } = renderHook(() => useTemplateDeploy());
|
||||
|
||||
@@ -287,16 +341,190 @@ describe("useTemplateDeploy — modal lifecycle", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("useTemplateDeploy — POST failure", () => {
|
||||
it("POST rejection sets error and clears deploying", async () => {
|
||||
mockApiPost.mockRejectedValueOnce(new Error("server 500"));
|
||||
describe("useTemplateDeploy — multi-provider always-ask flow", () => {
|
||||
// The user-reported bug: clicking a hermes template (which has
|
||||
// multiple provider options) deployed silently when global env
|
||||
// covered the API key, producing "No LLM provider configured" 500
|
||||
// because the workspace booted with no explicit model. Fix:
|
||||
// always open the picker for multi-provider templates so the
|
||||
// user picks provider + model per workspace, even when keys are
|
||||
// already saved.
|
||||
function multiProviderTemplate(): Template {
|
||||
return makeTemplate({
|
||||
id: "hermes-template",
|
||||
name: "Hermes",
|
||||
runtime: "hermes",
|
||||
model: "anthropic/claude-sonnet-4-5",
|
||||
models: [
|
||||
{ id: "minimax/MiniMax-M2.7", required_env: ["MINIMAX_API_KEY"] },
|
||||
{ id: "anthropic/claude-sonnet-4-5", required_env: ["ANTHROPIC_API_KEY"] },
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
it("opens picker even when preflight.ok=true (≥2 providers)", async () => {
|
||||
mockCheckDeploySecrets.mockResolvedValueOnce({
|
||||
ok: true, // every key is in global env
|
||||
missingKeys: [],
|
||||
providers: [
|
||||
{ id: "MINIMAX_API_KEY", label: "MiniMax", envVars: ["MINIMAX_API_KEY"] },
|
||||
{ id: "ANTHROPIC_API_KEY", label: "Anthropic", envVars: ["ANTHROPIC_API_KEY"] },
|
||||
],
|
||||
runtime: "hermes",
|
||||
configuredKeys: new Set(["MINIMAX_API_KEY", "ANTHROPIC_API_KEY"]),
|
||||
});
|
||||
const { result, rerender } = renderHook(() => useTemplateDeploy());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.deploy(multiProviderTemplate());
|
||||
});
|
||||
|
||||
rerender();
|
||||
render(<>{result.current.modal}</>);
|
||||
|
||||
expect(screen.getByTestId("missing-keys-modal")).toBeTruthy();
|
||||
// Both global keys flowed into the modal as `configuredKeys` so
|
||||
// entries can render as Saved without re-prompting.
|
||||
expect(screen.getByTestId("modal-configured-size").textContent).toBe("2");
|
||||
// Confirm POST has NOT fired yet — the user must explicitly
|
||||
// confirm in the picker even though preflight passed.
|
||||
expect(mockApiPost).not.toHaveBeenCalled();
|
||||
// Title shifts to "Configure Workspace" since keys aren't missing.
|
||||
expect(screen.getByTestId("modal-title").textContent).toBe(
|
||||
"Configure Workspace",
|
||||
);
|
||||
});
|
||||
|
||||
it("threads template.models[].id as model suggestions + template.model as initial value", async () => {
|
||||
mockCheckDeploySecrets.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
missingKeys: [],
|
||||
providers: [
|
||||
{ id: "MINIMAX_API_KEY", label: "MiniMax", envVars: ["MINIMAX_API_KEY"] },
|
||||
{ id: "ANTHROPIC_API_KEY", label: "Anthropic", envVars: ["ANTHROPIC_API_KEY"] },
|
||||
],
|
||||
runtime: "hermes",
|
||||
configuredKeys: new Set(),
|
||||
});
|
||||
const { result, rerender } = renderHook(() => useTemplateDeploy());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.deploy(multiProviderTemplate());
|
||||
});
|
||||
|
||||
rerender();
|
||||
render(<>{result.current.modal}</>);
|
||||
|
||||
expect(screen.getByTestId("modal-model-suggestions").textContent).toBe(
|
||||
"minimax/MiniMax-M2.7,anthropic/claude-sonnet-4-5",
|
||||
);
|
||||
expect(screen.getByTestId("modal-initial-model").textContent).toBe(
|
||||
"anthropic/claude-sonnet-4-5",
|
||||
);
|
||||
});
|
||||
|
||||
it("POST /workspaces includes model when picker confirms with one", async () => {
|
||||
mockCheckDeploySecrets.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
missingKeys: [],
|
||||
providers: [
|
||||
{ id: "MINIMAX_API_KEY", label: "MiniMax", envVars: ["MINIMAX_API_KEY"] },
|
||||
{ id: "ANTHROPIC_API_KEY", label: "Anthropic", envVars: ["ANTHROPIC_API_KEY"] },
|
||||
],
|
||||
runtime: "hermes",
|
||||
configuredKeys: new Set(),
|
||||
});
|
||||
const { result, rerender } = renderHook(() => useTemplateDeploy());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.deploy(multiProviderTemplate());
|
||||
});
|
||||
|
||||
rerender();
|
||||
render(<>{result.current.modal}</>);
|
||||
|
||||
await act(async () => {
|
||||
fireEvent.click(screen.getByTestId("modal-keys-added-with-model"));
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
});
|
||||
|
||||
expect(mockApiPost).toHaveBeenCalledWith(
|
||||
"/workspaces",
|
||||
expect.objectContaining({
|
||||
template: "hermes-template",
|
||||
model: "minimax/MiniMax-M2.7",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("single-provider template ALSO opens picker when preflight.ok (always-prompt rule)", async () => {
|
||||
// Default preflight mock: ok=true, providers=[]. claude-code is
|
||||
// single-provider, but the always-prompt rule means the user must
|
||||
// still click through the picker to confirm provider+model — even
|
||||
// when keys are saved and the runtime has only one provider option.
|
||||
// Reason: the user needs an explicit chance to override the
|
||||
// template's default model (e.g. opus vs sonnet vs haiku) before
|
||||
// an EC2 boots and burns billing on the wrong tier.
|
||||
const onDeployed = vi.fn();
|
||||
const { result } = renderHook(() => useTemplateDeploy({ onDeployed }));
|
||||
const { result, rerender } = renderHook(() =>
|
||||
useTemplateDeploy({ onDeployed }),
|
||||
);
|
||||
|
||||
await act(async () => {
|
||||
await result.current.deploy(makeTemplate());
|
||||
});
|
||||
|
||||
rerender();
|
||||
render(<>{result.current.modal}</>);
|
||||
|
||||
expect(screen.getByTestId("missing-keys-modal")).toBeTruthy();
|
||||
// POST does NOT fire until the user confirms in the picker.
|
||||
expect(mockApiPost).not.toHaveBeenCalled();
|
||||
expect(onDeployed).not.toHaveBeenCalled();
|
||||
expect(result.current.deploying).toBeNull();
|
||||
});
|
||||
|
||||
it("empty configuredKeys (preflight defensive fallback) still opens picker", async () => {
|
||||
// checkDeploySecrets falls back to an empty Set when the
|
||||
// /settings/secrets endpoint errors — the modal must still
|
||||
// open so the user isn't blocked, just with every entry
|
||||
// rendered as input rather than Saved.
|
||||
mockCheckDeploySecrets.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
missingKeys: [],
|
||||
providers: [
|
||||
{ id: "MINIMAX_API_KEY", label: "MiniMax", envVars: ["MINIMAX_API_KEY"] },
|
||||
{ id: "ANTHROPIC_API_KEY", label: "Anthropic", envVars: ["ANTHROPIC_API_KEY"] },
|
||||
],
|
||||
runtime: "hermes",
|
||||
configuredKeys: new Set(),
|
||||
});
|
||||
const { result, rerender } = renderHook(() => useTemplateDeploy());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.deploy(multiProviderTemplate());
|
||||
});
|
||||
|
||||
rerender();
|
||||
render(<>{result.current.modal}</>);
|
||||
|
||||
expect(screen.getByTestId("missing-keys-modal")).toBeTruthy();
|
||||
expect(screen.getByTestId("modal-configured-size").textContent).toBe("0");
|
||||
expect(mockApiPost).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe("useTemplateDeploy — POST failure", () => {
|
||||
it("POST rejection sets error and clears deploying", async () => {
|
||||
mockApiPost.mockRejectedValueOnce(new Error("server 500"));
|
||||
const onDeployed = vi.fn();
|
||||
const { result, rerender } = renderHook(() =>
|
||||
useTemplateDeploy({ onDeployed }),
|
||||
);
|
||||
|
||||
await deployThroughPicker(result, rerender, makeTemplate());
|
||||
|
||||
expect(result.current.error).toBe("server 500");
|
||||
expect(result.current.deploying).toBeNull();
|
||||
expect(onDeployed).not.toHaveBeenCalled();
|
||||
@@ -304,11 +532,9 @@ describe("useTemplateDeploy — POST failure", () => {
|
||||
|
||||
it("non-Error rejection still surfaces a message (defensive)", async () => {
|
||||
mockApiPost.mockRejectedValueOnce("plain string");
|
||||
const { result } = renderHook(() => useTemplateDeploy());
|
||||
const { result, rerender } = renderHook(() => useTemplateDeploy());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.deploy(makeTemplate());
|
||||
});
|
||||
await deployThroughPicker(result, rerender, makeTemplate());
|
||||
|
||||
expect(result.current.error).toBe("Deploy failed");
|
||||
expect(result.current.deploying).toBeNull();
|
||||
|
||||
@@ -44,7 +44,11 @@ export interface UseTemplateDeployOptions {
|
||||
/** Paired template + preflight result carried through the "user
|
||||
* clicked deploy → modal opens → keys saved → retry" loop. Named
|
||||
* so the `useState` generic and any future signature change have
|
||||
* a single place to track. */
|
||||
* a single place to track. `preflight.configuredKeys` lets the
|
||||
* modal mark pre-saved entries without re-prompting — the
|
||||
* template-deploy "always ask" flow surfaces the picker even when
|
||||
* preflight.ok is true so the user can pick a different provider
|
||||
* per workspace. */
|
||||
interface MissingKeysInfo {
|
||||
template: Template;
|
||||
preflight: PreflightResult;
|
||||
@@ -81,9 +85,14 @@ export function useTemplateDeploy(
|
||||
|
||||
/** Actually execute the POST /workspaces call. Split from `deploy`
|
||||
* so the "modal → keys added → retry" path can reuse it without
|
||||
* re-running preflight (the user just proved the keys are now set). */
|
||||
* re-running preflight (the user just proved the keys are now set).
|
||||
*
|
||||
* `model` (optional) is the user-picked model slug from the picker
|
||||
* modal. When the template is multi-provider, hermes-style routing
|
||||
* reads the slug prefix at install time to pick the upstream
|
||||
* endpoint, so the slug must reach the workspace verbatim. */
|
||||
const executeDeploy = useCallback(
|
||||
async (template: Template) => {
|
||||
async (template: Template, model?: string) => {
|
||||
setDeploying(template.id);
|
||||
setError(null);
|
||||
try {
|
||||
@@ -98,6 +107,7 @@ export function useTemplateDeploy(
|
||||
template: template.id,
|
||||
tier: template.tier,
|
||||
canvas: coords,
|
||||
...(model ? { model } : {}),
|
||||
});
|
||||
onDeployed?.(ws.id);
|
||||
} catch (e) {
|
||||
@@ -133,33 +143,70 @@ export function useTemplateDeploy(
|
||||
setDeploying(null);
|
||||
return;
|
||||
}
|
||||
if (!preflight.ok) {
|
||||
setMissingKeysInfo({ template, preflight });
|
||||
setDeploying(null);
|
||||
return;
|
||||
}
|
||||
await executeDeploy(template);
|
||||
// Always open the picker — every deploy goes through an
|
||||
// explicit confirm-provider/model step. Reasons:
|
||||
// 1. Multi-provider templates (e.g. hermes) need a per-
|
||||
// workspace pick or the adapter falls back to its
|
||||
// compiled-in default and 500s with "No LLM provider
|
||||
// configured".
|
||||
// 2. Single-provider templates (claude-code, langgraph)
|
||||
// still need the model field — the template's default
|
||||
// may be wrong for the user's billing tier or a model
|
||||
// they explicitly want (sonnet vs opus vs haiku).
|
||||
// 3. Even when keys + model are pre-filled, surfacing the
|
||||
// modal one-click-away is the cheapest UX for catching
|
||||
// a misconfigured org BEFORE provisioning an EC2 that
|
||||
// will then sit in degraded.
|
||||
// The picker handles the "all-keys-saved single-provider"
|
||||
// case as a confirm-only prompt (provider radio is hidden,
|
||||
// model input is pre-filled with template.model).
|
||||
setMissingKeysInfo({ template, preflight });
|
||||
setDeploying(null);
|
||||
},
|
||||
[executeDeploy],
|
||||
[],
|
||||
);
|
||||
|
||||
// No useCallback here — consumers call this on every render anyway
|
||||
// (it's placed inline in JSX), and useCallback's deps would
|
||||
// invalidate on every state change, making the memoisation a wash.
|
||||
// Plain ReactNode is simpler and equally performant.
|
||||
const isMultiProvider = (missingKeysInfo?.preflight.providers.length ?? 0) >= 2;
|
||||
// Suggestions for the model field — pull declared model ids from the
|
||||
// template. Templates without `models` declared (e.g. claude-code)
|
||||
// pass [] which suppresses the model field entirely.
|
||||
const modelSuggestions =
|
||||
missingKeysInfo?.template.models?.map((m) => m.id) ?? [];
|
||||
// Pre-fill the model input with the template's default `model` so
|
||||
// confirming without changing it preserves today's behaviour.
|
||||
const initialModel = missingKeysInfo?.template.model;
|
||||
// When the user has keys configured (preflight.ok) we re-purpose the
|
||||
// modal as a "confirm provider/model" prompt — adjust copy
|
||||
// accordingly so it doesn't claim keys are missing.
|
||||
const allConfigured = missingKeysInfo?.preflight.ok ?? false;
|
||||
const modalTitle = allConfigured
|
||||
? "Configure Workspace"
|
||||
: undefined;
|
||||
const modalDescription = allConfigured
|
||||
? "Pick the provider and model for this workspace. Saved API keys are reused automatically."
|
||||
: undefined;
|
||||
const modal: ReactNode = (
|
||||
<MissingKeysModal
|
||||
open={!!missingKeysInfo}
|
||||
missingKeys={missingKeysInfo?.preflight.missingKeys ?? []}
|
||||
providers={missingKeysInfo?.preflight.providers ?? []}
|
||||
runtime={missingKeysInfo?.preflight.runtime ?? ""}
|
||||
onKeysAdded={() => {
|
||||
configuredKeys={missingKeysInfo?.preflight.configuredKeys}
|
||||
modelSuggestions={isMultiProvider ? modelSuggestions : undefined}
|
||||
initialModel={isMultiProvider ? initialModel : undefined}
|
||||
title={modalTitle}
|
||||
description={modalDescription}
|
||||
onKeysAdded={(model?: string) => {
|
||||
if (missingKeysInfo) {
|
||||
const template = missingKeysInfo.template;
|
||||
setMissingKeysInfo(null);
|
||||
// Intentional fire-and-forget — executeDeploy manages
|
||||
// its own error state via setError.
|
||||
void executeDeploy(template);
|
||||
void executeDeploy(template, model);
|
||||
}
|
||||
}}
|
||||
onCancel={() => setMissingKeysInfo(null)}
|
||||
|
||||
@@ -244,5 +244,26 @@ describe("checkDeploySecrets", () => {
|
||||
const result = await checkDeploySecrets(LANGGRAPH);
|
||||
expect(result.ok).toBe(false);
|
||||
expect(result.missingKeys).toEqual(["OPENAI_API_KEY"]);
|
||||
// Empty Set on fetch failure — useTemplateDeploy relies on this
|
||||
// so the picker still opens with every entry rendered as input.
|
||||
expect(result.configuredKeys).toEqual(new Set());
|
||||
});
|
||||
|
||||
it("surfaces configuredKeys (has_value=true entries only) so callers skip a second fetch", async () => {
|
||||
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () =>
|
||||
Promise.resolve([
|
||||
{ key: "ANTHROPIC_API_KEY", has_value: true, created_at: "", updated_at: "" },
|
||||
{ key: "OPENROUTER_API_KEY", has_value: false, created_at: "", updated_at: "" },
|
||||
{ key: "RANDOM_OTHER_KEY", has_value: true, created_at: "", updated_at: "" },
|
||||
]),
|
||||
} as Response);
|
||||
|
||||
const result = await checkDeploySecrets(HERMES);
|
||||
// Only has_value=true entries belong in the set.
|
||||
expect(result.configuredKeys).toEqual(
|
||||
new Set(["ANTHROPIC_API_KEY", "RANDOM_OTHER_KEY"]),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -91,6 +91,12 @@ export interface PreflightResult {
|
||||
* required (AllKeysModal renders the N envVars inline). */
|
||||
providers: ProviderChoice[];
|
||||
runtime: string;
|
||||
/** Set of env var names already configured (i.e. `has_value: true`) at
|
||||
* the relevant scope (workspace if `workspaceId` was passed, otherwise
|
||||
* global). Surfaced so callers can mark pre-saved entries in the
|
||||
* picker without making a second `/settings/secrets` round trip.
|
||||
* Empty Set on secrets-endpoint failure (treated as "nothing set"). */
|
||||
configuredKeys: Set<string>;
|
||||
}
|
||||
|
||||
/* ---------- Provider options ---------- */
|
||||
@@ -235,7 +241,13 @@ export async function checkDeploySecrets(
|
||||
|
||||
if (providers.length === 0) {
|
||||
// Template declares no env requirements — nothing to preflight.
|
||||
return { ok: true, missingKeys: [], providers: [], runtime };
|
||||
return {
|
||||
ok: true,
|
||||
missingKeys: [],
|
||||
providers: [],
|
||||
runtime,
|
||||
configuredKeys: new Set(),
|
||||
};
|
||||
}
|
||||
|
||||
let configured: Set<string>;
|
||||
@@ -254,7 +266,13 @@ export async function checkDeploySecrets(
|
||||
}
|
||||
|
||||
if (findSatisfiedProvider(providers, configured)) {
|
||||
return { ok: true, missingKeys: [], providers, runtime };
|
||||
return {
|
||||
ok: true,
|
||||
missingKeys: [],
|
||||
providers,
|
||||
runtime,
|
||||
configuredKeys: configured,
|
||||
};
|
||||
}
|
||||
|
||||
// Nothing configured — surface every candidate env var so the modal
|
||||
@@ -262,5 +280,11 @@ export async function checkDeploySecrets(
|
||||
const missingKeys = Array.from(
|
||||
new Set(providers.flatMap((p) => p.envVars)),
|
||||
);
|
||||
return { ok: false, missingKeys, providers, runtime };
|
||||
return {
|
||||
ok: false,
|
||||
missingKeys,
|
||||
providers,
|
||||
runtime,
|
||||
configuredKeys: configured,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
**Status:** living document — update when you ship a feature that touches one backend.
|
||||
**Owner:** workspace-server + controlplane teams.
|
||||
**Last audit:** 2026-04-23 (Claude agent, PR #TBD).
|
||||
**Last audit:** 2026-05-02 (Claude agent, PR #TBD).
|
||||
|
||||
## Why this exists
|
||||
|
||||
@@ -37,6 +37,12 @@ This document is the canonical matrix. If you are landing a workspace-facing fea
|
||||
| **A2A proxy** | | | | |
|
||||
| Forward | `a2a_proxy.go` | `127.0.0.1:<port>` | EC2 private IP inside tenant VPC | ✅ parity |
|
||||
| Liveness | `a2a_proxy_helpers.go` | `provisioner.IsRunning()` | `cpProv.IsRunning()` (DB-backed) | ✅ parity |
|
||||
| Channel envelope enrichment (peer_name / peer_role / agent_card_url) | `a2a_proxy.go` + workspace-runtime channel emitter (PR #2471) | inbox row carries enriched fields | inbox row carries enriched fields | ✅ parity as of 2026-05-02 |
|
||||
| **MCP tools (a2a)** | | | | |
|
||||
| `chat_history` — fetch prior turns with a peer | `mcp_server.go` + workspace-runtime `a2a_mcp` (PR #2474) | runtime-served, backend-agnostic | runtime-served, backend-agnostic | ✅ parity as of 2026-05-02 |
|
||||
| **Activity API** | | | | |
|
||||
| `before_ts` paging on `/workspaces/:id/activity` | `activity.go` (PR #2476) | DB-driven | DB-driven | ✅ parity as of 2026-05-02 |
|
||||
| `peer_id` filter on `/workspaces/:id/activity` | `activity.go` (PR #2472) | DB-driven | DB-driven | ✅ parity as of 2026-05-02 |
|
||||
| **Config / template injection** | | | | |
|
||||
| Template copy at provision | `provisioner.go:553-648` | host walk → tar → `CopyToContainer(/configs)` | CP user-data bakes template into bootstrap script | ⚠️ divergent — sync (docker) vs async (EC2) |
|
||||
| Runtime config hot-reload | `templates.go` + handlers | no hot-reload — restart required | no hot-reload — restart required | ✅ parity (both require restart; acceptable) |
|
||||
@@ -45,6 +51,9 @@ This document is the canonical matrix. If you are landing a workspace-facing fea
|
||||
| **Bootstrap signals** | | | | |
|
||||
| Ready detection | registry `/registry/register` | container heartbeat | tenant heartbeat + boot-event phone-home (CP `bootevents` table + `wait_platform_health=ok`) | ✅ parity as of molecule-controlplane#235 |
|
||||
| Console / log output | `workspace_bootstrap.go` | `docker logs` | `ec2:GetConsoleOutput` via CP proxy | 🟡 ec2-only (docker has `docker logs` directly; no unified API) |
|
||||
| `runtime_wedge` post-`execute()` smoke gate | workspace-runtime `smoke_mode.py` (PRs #2473 + #2475) | runtime-served, surfaces SDK-init wedges to wheel-smoke + container start | runtime-served, surfaces SDK-init wedges to wheel-smoke + container start | ✅ parity as of 2026-05-02 |
|
||||
| **Test infrastructure** | | | | |
|
||||
| Canvas-E2E `.playwright-staging-state.json` written before any CP call | `tools/e2e-staging-setup` (PR #2327, 2026-04-30) | n/a — staging-only safety net | required so workflow safety-net can find slug; pattern-sweeping by date prefix poisons concurrent runs | ✅ enforced (staging E2E) |
|
||||
| **Orphan cleanup** | | | | |
|
||||
| Detect + terminate stale | `healthsweep.go` + CP `DeprovisionInstance` | Docker daemon scan | CP OrgID-tag cascade (molecule-controlplane#234) | ✅ parity as of 2026-04-23 |
|
||||
| **Health / budget / schedules** | | | | |
|
||||
|
||||
@@ -16,7 +16,11 @@ workspace container running on it) over an [EC2 Instance Connect
|
||||
Endpoint](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-connect-setup-ec2-instance-connect-endpoint.html).
|
||||
End users see a terminal; no direct public SSH ingress is required.
|
||||
|
||||
Tracking: [molecule-core#1528](https://github.com/Molecule-AI/molecule-core/issues/1528) (resolved 2026-04-22).
|
||||
Tracking: originally `molecule-core#1528` (resolved 2026-04-22). The
|
||||
`molecule-core` repo has since been renamed to `molecule-monorepo` and no
|
||||
longer accepts new issues under the old name; future terminal work is
|
||||
tracked in `molecule-monorepo` issues (workspace-server scope) and in
|
||||
`molecule-controlplane` issues for the EIC / per-tenant SG path.
|
||||
|
||||
## Where things are
|
||||
|
||||
|
||||
@@ -17,6 +17,29 @@ distinct from the PyPI package) is no longer the source-of-truth and should
|
||||
be treated as a publish artifact only. It can be archived or used as a
|
||||
read-only mirror.
|
||||
|
||||
## Where to make changes
|
||||
|
||||
**All runtime edits land in `molecule-monorepo/workspace/`. Period.**
|
||||
|
||||
The GitHub repo `Molecule-AI/molecule-ai-workspace-runtime` is **mirror-only**.
|
||||
It exists so external consumers (template repos, downstream operators) have a
|
||||
git-cloneable artifact that mirrors the PyPI wheel — nothing more.
|
||||
|
||||
- **Direct PRs against `molecule-ai-workspace-runtime` are auto-rejected by
|
||||
the `mirror-guard` CI check.** The check fails any push that did not come
|
||||
from the publish pipeline. There is no opt-out — file the change against
|
||||
`molecule-monorepo/workspace/` instead.
|
||||
- **The mirror + the PyPI wheel both auto-regenerate on every push to
|
||||
`staging`** via `.github/workflows/publish-runtime.yml` (which calls
|
||||
`scripts/build_runtime_package.py`, builds wheel + sdist, smoke-imports,
|
||||
uploads to PyPI via Trusted Publisher, and force-pushes the rewritten tree
|
||||
to the mirror repo). You never touch the mirror by hand.
|
||||
|
||||
If you have an old local clone of the mirror and try to push a fix to it
|
||||
directly, expect a CI failure with a message pointing you here. Re-open the
|
||||
change against `molecule-monorepo/workspace/` and let the publish workflow
|
||||
do the rest.
|
||||
|
||||
## Why this shape
|
||||
|
||||
The 8 workspace template repos (claude-code, langgraph, hermes, etc.) each
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
# scripts/
|
||||
|
||||
Operational and one-off scripts for molecule-core. Most are
|
||||
self-documenting — see the header comments in each file.
|
||||
|
||||
## RFC #2251 coordinator task-bound harnesses
|
||||
|
||||
There are three related scripts; pick the right one:
|
||||
|
||||
| Script | Purpose | Targets |
|
||||
|---|---|---|
|
||||
| `measure-coordinator-task-bounds.sh` | **Canonical** v1 harness for the RFC #2251 / Issue 4 reproduction. Provisions a PM coordinator + Researcher child via `claude-code-default` + `langgraph` templates, sends a synthesis-heavy A2A kickoff, observes elapsed time + activity trace. | OSS-shape platform — localhost or any `/workspaces`-shaped endpoint. Has tenant/admin-token guards for non-localhost runs. |
|
||||
| `measure-coordinator-task-bounds-runner.sh` | Generalised runner for the same measurement contract but with **arbitrary template + secret + model combinations** (Hermes/MiniMax, etc.). Useful for cross-runtime variants without modifying the canonical harness. | Same as above (local or SaaS via `MODE=saas`). |
|
||||
| `measure-coordinator-task-bounds.sh` (in [molecule-controlplane](https://github.com/Molecule-AI/molecule-controlplane)) | **Production-shape** variant that bootstraps a real staging tenant via `POST /cp/admin/orgs`, then runs the same measurement against `<slug>.staging.moleculesai.app`. | Staging controlplane only — refuses to run against production. |
|
||||
|
||||
See `reference_harness_pair_pattern` (auto-memory) for when to use which
|
||||
and the cross-repo design rationale.
|
||||
|
||||
### Common safety pattern across all three
|
||||
|
||||
- **Cleanup trap** on EXIT/INT/TERM auto-deletes provisioned resources.
|
||||
- **`DRY_RUN=1`** prints plan + auth fingerprint, exits before any
|
||||
state mutation. Run this before pointing at staging or any shared
|
||||
infrastructure.
|
||||
- **Non-target guard** refuses arbitrary endpoints (the controlplane
|
||||
variant is locked to `staging-api.moleculesai.app`; the OSS variant
|
||||
requires explicit auth + tenant scoping for non-localhost PLATFORM).
|
||||
- **Cleanup failures emit `cleanup_*_failed` events** with remediation
|
||||
hints; no silenced curl. ADMIN_TOKEN expiring mid-run surfaces as a
|
||||
structured event rather than a silent leak.
|
||||
|
||||
### Activity trace caveat
|
||||
|
||||
If `activity_trace.raw == "<endpoint_unavailable>"`, the per-workspace
|
||||
`/activity` endpoint isn't wired on the target build — the bound
|
||||
measurement is INCONCLUSIVE on the platform-ceiling question. Either
|
||||
wire the endpoint or replace with the equivalent Datadog query. Note
|
||||
that `/activity` accepts a `since_secs` query parameter; see the
|
||||
endpoint handler for the supported range.
|
||||
|
||||
## Other scripts
|
||||
|
||||
- `cleanup-rogue-workspaces.sh` — emergency teardown for leaked
|
||||
workspaces. Prompts for confirmation. Pair with the harnesses if a
|
||||
cleanup trap fails (see `cleanup_*_failed` events).
|
||||
- `canary-smoke.sh` — quick smoke test for canary releases.
|
||||
- `dev-start.sh` — local-dev platform bring-up.
|
||||
|
||||
The rest are self-documenting in their header comments.
|
||||
@@ -59,20 +59,27 @@ TOP_LEVEL_MODULES = {
|
||||
"agent",
|
||||
"agents_md",
|
||||
"config",
|
||||
"configs_dir",
|
||||
"consolidation",
|
||||
"coordinator",
|
||||
"events",
|
||||
"executor_helpers",
|
||||
"heartbeat",
|
||||
"inbox",
|
||||
"initial_prompt",
|
||||
"internal_chat_uploads",
|
||||
"internal_file_read",
|
||||
"main",
|
||||
"mcp_cli",
|
||||
"molecule_ai_status",
|
||||
"platform_auth",
|
||||
"platform_inbound_auth",
|
||||
"plugins",
|
||||
"preflight",
|
||||
"prompt",
|
||||
"runtime_wedge",
|
||||
"shared_runtime",
|
||||
"smoke_mode",
|
||||
"transcript_auth",
|
||||
"watcher",
|
||||
}
|
||||
@@ -145,6 +152,13 @@ def rewrite_imports(text: str, regex: re.Pattern) -> str:
|
||||
`import X` → `import molecule_runtime.X as X` (preserve binding)
|
||||
`from X import Y` → `from molecule_runtime.X import Y`
|
||||
`from X.sub import Y` → `from molecule_runtime.X.sub import Y`
|
||||
|
||||
Rejects `import X as Y` because the rewrite would produce
|
||||
`import molecule_runtime.X as X as Y`, a syntax error. The PR #2433
|
||||
incident shipped this exact pattern past `Python Lint & Test` (which
|
||||
runs against pre-rewrite source) but blew up the wheel-smoke gate.
|
||||
Detecting it here turns the silent build failure into a build-time
|
||||
error with a clear path: use `from X import …` or plain `import X`.
|
||||
"""
|
||||
def repl(m: re.Match) -> str:
|
||||
indent, kw, mod, rest = m.group("indent"), m.group("kw"), m.group("mod"), m.group("rest")
|
||||
@@ -158,6 +172,26 @@ def rewrite_imports(text: str, regex: re.Pattern) -> str:
|
||||
# `import X.sub` — rewrite as `import molecule_runtime.X.sub` and
|
||||
# leave the trailing dot pattern intact for the rest of the line.
|
||||
return f"{indent}import molecule_runtime.{mod}{rest}"
|
||||
# Detect `import X as Y` — the regex's `rest` group captures only
|
||||
# the immediate following char (whitespace, comma, or EOL), so we
|
||||
# have to peek at the surrounding line context. The match start is
|
||||
# at the line's `import` keyword; everything after the matched
|
||||
# name on the same line is what the source author wrote.
|
||||
line_start = text.rfind("\n", 0, m.start()) + 1
|
||||
line_end = text.find("\n", m.end())
|
||||
if line_end == -1:
|
||||
line_end = len(text)
|
||||
line_after = text[m.end() - len(rest):line_end]
|
||||
# Strip comments from consideration so `import X # noqa` doesn't trip.
|
||||
line_after_no_comment = line_after.split("#", 1)[0]
|
||||
if re.search(r"^\s*as\s+\w+", line_after_no_comment):
|
||||
raise ValueError(
|
||||
f"rewrite_imports: cannot rewrite 'import {mod} as <alias>' on a "
|
||||
f"workspace module — the regex would produce "
|
||||
f"'import molecule_runtime.{mod} as {mod} as <alias>', invalid syntax. "
|
||||
f"Use 'from {mod} import …' or plain 'import {mod}' instead. "
|
||||
f"Offending line: {text[line_start:line_end]!r}"
|
||||
)
|
||||
# Plain `import X` — alias preserves the local name.
|
||||
return f"{indent}import molecule_runtime.{mod} as {mod}{rest}"
|
||||
return regex.sub(repl, text)
|
||||
@@ -214,6 +248,7 @@ dependencies = [
|
||||
|
||||
[project.scripts]
|
||||
molecule-runtime = "molecule_runtime.main:main_sync"
|
||||
molecule-mcp = "molecule_runtime.mcp_cli:main"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["."]
|
||||
@@ -237,6 +272,31 @@ directory** by the `publish-runtime` GitHub Actions workflow on every
|
||||
`runtime-v*` tag push. **Do not edit this package directly** — edit
|
||||
`workspace/` in the monorepo.
|
||||
|
||||
## External-runtime MCP server (`molecule-mcp`)
|
||||
|
||||
Operators running an agent outside the platform's container fleet
|
||||
(any runtime that supports MCP stdio — Claude Code, hermes, codex,
|
||||
etc.) can install this wheel and run the universal MCP server
|
||||
locally:
|
||||
|
||||
```sh
|
||||
pip install molecule-ai-workspace-runtime
|
||||
WORKSPACE_ID=<uuid> \\
|
||||
PLATFORM_URL=https://<tenant>.staging.moleculesai.app \\
|
||||
MOLECULE_WORKSPACE_TOKEN=<bearer> \\
|
||||
molecule-mcp
|
||||
```
|
||||
|
||||
That exposes the same 8 platform tools (`delegate_task`, `list_peers`,
|
||||
`send_message_to_user`, `commit_memory`, etc.) that container-bound
|
||||
runtimes already get via the workspace's auto-spawned MCP. Register
|
||||
the binary in your agent's MCP config (e.g. Claude Code's
|
||||
`claude mcp add molecule -- molecule-mcp` with the env above).
|
||||
|
||||
The token comes from the canvas → Tokens tab. Restarting an external
|
||||
workspace from the canvas no longer revokes the token (PR #2412), so
|
||||
operator tokens persist across status nudges.
|
||||
|
||||
See [`docs/workspace-runtime-package.md`](https://github.com/Molecule-AI/molecule-core/blob/main/docs/workspace-runtime-package.md)
|
||||
for the publish flow and architecture.
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,306 @@
|
||||
# Demo-day runbook
|
||||
|
||||
Pre-, during-, and post-demo operational procedures for the molecule
|
||||
production stack. Updated 2026-05-01 ahead of the funding-demo on
|
||||
~2026-05-06.
|
||||
|
||||
The whole stack:
|
||||
|
||||
```
|
||||
Vercel canvas (app.moleculesai.app)
|
||||
→ Railway controlplane (api.moleculesai.app)
|
||||
→ CloudFront/Cloudflare per-tenant edge (<slug>.moleculesai.app)
|
||||
→ EC2 tenant instance running platform container
|
||||
→ Docker workspaces pulled from
|
||||
ghcr.io/molecule-ai/workspace-template-<runtime>:latest
|
||||
```
|
||||
|
||||
Every layer has its own deploy/rollback story. This runbook indexes
|
||||
them in the order an operator would touch them during an incident.
|
||||
|
||||
## Pre-demo (T-48h to T-1h)
|
||||
|
||||
### 1. Freeze the runtime + template image cascade
|
||||
|
||||
A merge to `molecule-core/staging` that touches `workspace/**` triggers
|
||||
`publish-runtime.yml` → PyPI bump → repository_dispatch → 8 template
|
||||
repos rebuild and re-tag `:latest`. A merge to any template repo's
|
||||
`main` triggers the same final re-tag directly. Either path means a
|
||||
new workspace provision during the demo pulls whatever `:latest`
|
||||
resolved to seconds earlier.
|
||||
|
||||
Capture current good digests + disable both cascade vectors:
|
||||
|
||||
```bash
|
||||
# Dry-run first — verifies digests can be fetched and tooling is set up
|
||||
scripts/demo-freeze.sh
|
||||
|
||||
# Apply
|
||||
scripts/demo-freeze.sh --execute
|
||||
```
|
||||
|
||||
The script writes two receipts to `scripts/demo-freeze-snapshots/`:
|
||||
|
||||
- `digests-<TS>.txt` — current `:latest` digest per template (rollback target if needed)
|
||||
- `disabled-workflows-<TS>.txt` — workflow paths to re-enable post-demo
|
||||
|
||||
Verify the freeze landed:
|
||||
|
||||
```bash
|
||||
gh workflow list -R Molecule-AI/molecule-core | grep publish-runtime
|
||||
# expect: status = disabled_manually
|
||||
```
|
||||
|
||||
If a critical fix MUST ship during the freeze window:
|
||||
|
||||
1. `gh workflow enable publish-runtime.yml -R Molecule-AI/molecule-core`
|
||||
2. Merge the fix
|
||||
3. Watch the cascade through to GHCR:latest manually
|
||||
4. Smoke-verify against a staging tenant (`scripts/api-smoke.sh` or
|
||||
manual canvas walkthrough)
|
||||
5. `gh workflow disable publish-runtime.yml -R Molecule-AI/molecule-core` to re-freeze
|
||||
|
||||
Don't auto-promote during the freeze — the value of the freeze is that
|
||||
nothing happens automatically.
|
||||
|
||||
### 2. Confirm production CP is on the expected SHA
|
||||
|
||||
```bash
|
||||
gh run list -R Molecule-AI/molecule-controlplane --branch main --limit 5
|
||||
# Last `ci` run should be SUCCESS with the SHA you intend to demo on
|
||||
```
|
||||
|
||||
Railway auto-deploys from main. Spot-check `api.moleculesai.app`:
|
||||
|
||||
```bash
|
||||
curl -fsS -H "Authorization: Bearer $CP_ADMIN_API_TOKEN" \
|
||||
https://api.moleculesai.app/cp/admin/orgs?limit=1
|
||||
# Expect: 200 + a JSON {"orgs": [...]}
|
||||
```
|
||||
|
||||
### 3. Confirm production canvas (Vercel) is on main
|
||||
|
||||
Vercel auto-deploys `main`. Verify in the Vercel dashboard the most
|
||||
recent prod deploy ran from the expected commit SHA.
|
||||
|
||||
### 4. Pre-warm the demo tenant
|
||||
|
||||
Cold-start times on workspace-template images:
|
||||
|
||||
| Runtime | Cold-start (first boot) |
|
||||
|---|---|
|
||||
| claude-code | ~30-60s |
|
||||
| openclaw | ~1-2 min |
|
||||
| langgraph | ~1 min |
|
||||
| hermes | **~7 min** (large image) |
|
||||
|
||||
If the demo will use `hermes`, provision the demo workspace at least
|
||||
10 min before. The cold-start clock starts when the workspace is
|
||||
created, not when it's used.
|
||||
|
||||
## During demo — emergency rollback levers
|
||||
|
||||
### Lever A: Platform-image rollback (canvas/CP layer regression)
|
||||
|
||||
If the canvas or platform container shipped a regression, retag
|
||||
`:latest` to a prior staging SHA without rebuilding:
|
||||
|
||||
```bash
|
||||
# Find a known-good SHA from staging history
|
||||
gh run list -R Molecule-AI/molecule-core --workflow=publish-canvas-image.yml --limit 5
|
||||
|
||||
# Roll both platform + tenant images
|
||||
GITHUB_TOKEN=$(gh auth token) scripts/rollback-latest.sh <good-sha>
|
||||
```
|
||||
|
||||
`rollback-latest.sh` retags both `ghcr.io/molecule-ai/platform:latest`
|
||||
and `ghcr.io/molecule-ai/platform-tenant:latest`. Existing tenants
|
||||
auto-pull `:latest` every 5 min — rollback propagates without manual
|
||||
restart.
|
||||
|
||||
### Lever B: Workspace-template image rollback
|
||||
|
||||
If a specific runtime template (claude-code, hermes, etc.) shipped a
|
||||
broken `:latest`:
|
||||
|
||||
```bash
|
||||
# Get the demo's snapshotted-good digest from the freeze receipt
|
||||
grep claude-code scripts/demo-freeze-snapshots/digests-<TS>.txt
|
||||
|
||||
# Retag :latest back to the snapshotted digest using crane
|
||||
crane auth login ghcr.io -u "$(gh api user --jq .login)" \
|
||||
--password-stdin <<< "$(gh auth token)"
|
||||
crane tag \
|
||||
ghcr.io/molecule-ai/workspace-template-claude-code@sha256:<digest> \
|
||||
latest
|
||||
```
|
||||
|
||||
The next workspace provision pulls the rolled-back image. Existing
|
||||
workspaces are unaffected (their image is already loaded into Docker).
|
||||
|
||||
### Lever C: Wedged demo tenant — redeploy
|
||||
|
||||
If the demo tenant's EC2 instance is wedged (boot succeeded but app
|
||||
not responding, or a stuck workspace), the controlplane has an admin
|
||||
redeploy endpoint:
|
||||
|
||||
```bash
|
||||
# AWS-side: forces a fresh EC2 launch with current image. ~3 min.
|
||||
curl -fsS -X POST \
|
||||
-H "Authorization: Bearer $CP_ADMIN_API_TOKEN" \
|
||||
https://api.moleculesai.app/cp/admin/orgs/<slug>/redeploy
|
||||
```
|
||||
|
||||
WARNING per memory: this triggers real EC2 + SSM actions on production.
|
||||
Double-check `<slug>` against the demo tenant's slug before pressing
|
||||
return. The `/redeploy` endpoint is idempotent on the EC2 side but
|
||||
WILL drop active SSH sessions.
|
||||
|
||||
### Lever D: Specific bad workspace — delete
|
||||
|
||||
If a single workspace inside the demo tenant is misbehaving (e.g.
|
||||
hermes wedged on cold-start, claude-code returning the generic
|
||||
"Agent error (Exception)" message), kill it:
|
||||
|
||||
```bash
|
||||
# Get the demo tenant's per-tenant ADMIN_TOKEN
|
||||
TENANT_ADMIN=$(curl -fsS -H "Authorization: Bearer $CP_ADMIN_API_TOKEN" \
|
||||
https://api.moleculesai.app/cp/admin/orgs/<slug>/admin-token \
|
||||
| jq -r .admin_token)
|
||||
|
||||
ORG_ID=$(curl -fsS -H "Authorization: Bearer $CP_ADMIN_API_TOKEN" \
|
||||
https://api.moleculesai.app/cp/admin/orgs?limit=20 \
|
||||
| jq -r '.orgs[] | select(.slug=="<slug>") | .id')
|
||||
|
||||
# Delete the bad workspace
|
||||
curl -fsS -X DELETE \
|
||||
-H "Origin: https://<slug>.moleculesai.app" \
|
||||
-H "Authorization: Bearer $TENANT_ADMIN" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID" \
|
||||
https://<slug>.moleculesai.app/workspaces/<workspace-id>
|
||||
```
|
||||
|
||||
Then re-provision a fresh workspace from the canvas. Faster than
|
||||
debugging the wedged one.
|
||||
|
||||
### Lever E: Railway production rollback (CP regression)
|
||||
|
||||
If the last Railway deploy of CP introduced a regression that lever A
|
||||
can't fix (e.g. a logic bug, not a container issue):
|
||||
|
||||
1. Open Railway dashboard → molecule-platform → controlplane → Deployments
|
||||
2. Find the previous-known-good deployment
|
||||
3. Click **Rollback to this deployment**
|
||||
|
||||
Manual step — no CLI equivalent built. Takes ~30s to redeploy from
|
||||
the prior image. Note: rollback restores the prior code AND prior env
|
||||
var snapshot; don't expect any env var changes made since to persist.
|
||||
|
||||
### Lever F: Vercel production rollback (canvas regression)
|
||||
|
||||
If the canvas ships a regression:
|
||||
|
||||
1. Open Vercel dashboard → molecule-app → Deployments
|
||||
2. Find the previous prod deployment
|
||||
3. **Promote to Production**
|
||||
|
||||
Same pattern as Railway — fast revert, no rebuild.
|
||||
|
||||
## Tenant-level read-only diagnostics (not actions)
|
||||
|
||||
Useful during a "is this working?" moment without touching anything:
|
||||
|
||||
```bash
|
||||
# Tenant infra state
|
||||
curl -fsS -H "Authorization: Bearer $CP_ADMIN_API_TOKEN" \
|
||||
"https://api.moleculesai.app/cp/admin/orgs?limit=20" \
|
||||
| jq '.orgs[] | select(.slug=="<slug>")'
|
||||
|
||||
# Tenant boot events (debug a stuck provision)
|
||||
curl -fsS -H "Authorization: Bearer $CP_ADMIN_API_TOKEN" \
|
||||
"https://api.moleculesai.app/cp/admin/tenants/<slug>/boot-events?limit=50" \
|
||||
| jq
|
||||
|
||||
# Workspace activity (debug an unresponsive agent)
|
||||
curl -fsS \
|
||||
-H "Origin: https://<slug>.moleculesai.app" \
|
||||
-H "Authorization: Bearer $TENANT_ADMIN" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID" \
|
||||
"https://<slug>.moleculesai.app/workspaces/<workspace-id>/activity?limit=20" \
|
||||
| jq
|
||||
```
|
||||
|
||||
## Post-demo (T+30m to T+24h)
|
||||
|
||||
### 1. Thaw the cascades
|
||||
|
||||
```bash
|
||||
# Find the freeze receipt
|
||||
ls scripts/demo-freeze-snapshots/
|
||||
|
||||
# Thaw — pass the timestamp suffix
|
||||
scripts/demo-thaw.sh 20260506-180000
|
||||
```
|
||||
|
||||
The next merge to `molecule-core/staging` (workspace/**) or any
|
||||
template repo's `main` will resume the auto-rebuild cascade.
|
||||
|
||||
### 2. Audit what was held back
|
||||
|
||||
If any merges queued during the freeze:
|
||||
|
||||
```bash
|
||||
gh pr list -R Molecule-AI/molecule-core --base staging --state merged \
|
||||
--search "merged:>=$(date -u -v-7d +%Y-%m-%d)"
|
||||
```
|
||||
|
||||
Verify each merge's CI is green and dispatch the runtime cascade once
|
||||
to ensure all templates rebuild against the post-freeze HEAD.
|
||||
|
||||
### 3. File a post-mortem if anything fired
|
||||
|
||||
If any rollback lever was used during the demo, file a brief doc:
|
||||
|
||||
- Which lever (A through F)
|
||||
- Which SHA was rolled back FROM and TO
|
||||
- Did the rollback fully resolve the issue or was a follow-up needed
|
||||
- Whether the underlying regression should have been caught by CI
|
||||
|
||||
## Common issues + first-line fix
|
||||
|
||||
| Symptom | First lever to try |
|
||||
|---|---|
|
||||
| Workspace boots but agent always errors | Lever D (delete + reprovision) |
|
||||
| Whole tenant unreachable | Lever C (redeploy) |
|
||||
| Canvas crashes on load | Lever F (Vercel rollback) |
|
||||
| Login broken / API errors | Lever E (Railway rollback) |
|
||||
| Specific runtime broken across tenants | Lever B (template image rollback) |
|
||||
| Platform container regression | Lever A (rollback-latest.sh) |
|
||||
| Mid-demo stray PR auto-published a bad image | Lever B + investigate why freeze didn't catch it |
|
||||
|
||||
## Auth fingerprint (rotate post-demo)
|
||||
|
||||
The freeze + rollback procedures assume:
|
||||
|
||||
- `CP_ADMIN_API_TOKEN` available via `railway variables --kv --environment production`
|
||||
- `gh auth token` returns a working PAT with `workflow:write` + `write:packages`
|
||||
- `crane` installed (`brew install crane`)
|
||||
|
||||
After the demo, **rotate** `CP_ADMIN_API_TOKEN` (it's the keys-to-the-kingdom
|
||||
token for production) — it likely got copy-pasted into shells during
|
||||
the demo.
|
||||
|
||||
```bash
|
||||
# Generate a new admin token
|
||||
NEW_TOKEN=$(openssl rand -hex 32)
|
||||
|
||||
# Update Railway production env var (and optionally staging)
|
||||
railway variables --set CP_ADMIN_API_TOKEN="$NEW_TOKEN" --environment production
|
||||
|
||||
# Restart CP service to pick up the change
|
||||
# (Railway auto-restarts on env var change)
|
||||
|
||||
# Verify
|
||||
curl -fsS -H "Authorization: Bearer $NEW_TOKEN" \
|
||||
https://api.moleculesai.app/cp/admin/orgs?limit=1
|
||||
```
|
||||
@@ -0,0 +1,6 @@
|
||||
# Generated by scripts/demo-freeze.sh — receipts are operational state,
|
||||
# not source. Tracked .gitignore + .gitkeep keep the directory itself
|
||||
# in version control so the freeze script's output dir always exists.
|
||||
*
|
||||
!.gitignore
|
||||
!.gitkeep
|
||||
Executable
+214
@@ -0,0 +1,214 @@
|
||||
#!/usr/bin/env bash
|
||||
# demo-freeze.sh — disable the runtime + template image publish cascades
|
||||
# during a demo-prep window so a stray staging merge can't auto-rebuild
|
||||
# `:latest` for the 8 workspace-template images mid-demo.
|
||||
#
|
||||
# Demo prep typically runs T-48h to T+1h. During that window:
|
||||
#
|
||||
# PATH 1: any merge to molecule-core/staging that touches workspace/**
|
||||
# → publish-runtime.yml fires
|
||||
# → PyPI auto-bumps molecule-ai-workspace-runtime patch version
|
||||
# → repository_dispatch fans out to 8 workspace-template-* repos
|
||||
# → each template repo rebuilds and re-tags
|
||||
# ghcr.io/molecule-ai/workspace-template-<runtime>:latest
|
||||
#
|
||||
# PATH 2: any merge to a workspace-template-* repo's main branch
|
||||
# → that repo's publish-image.yml fires
|
||||
# → ghcr.io/molecule-ai/workspace-template-<runtime>:latest
|
||||
# gets re-tagged
|
||||
#
|
||||
# provisioner.go:296 RuntimeImages[runtime] reads `:latest` at every
|
||||
# workspace boot. A new workspace provision during demo pulls whatever
|
||||
# `:latest` resolved to seconds earlier — so a bad merge minutes
|
||||
# before the demo can break a tenant the funder is about to see.
|
||||
#
|
||||
# This script captures the current good `:latest` digests for all 8
|
||||
# templates and disables both cascade vectors. The complementary
|
||||
# demo-thaw.sh re-enables them.
|
||||
#
|
||||
# Usage:
|
||||
# scripts/demo-freeze.sh # dry run — print what would happen
|
||||
# scripts/demo-freeze.sh --execute # actually disable workflows + snapshot
|
||||
#
|
||||
# Prereqs:
|
||||
# - gh CLI authenticated with workflow:write scope on Molecule-AI org
|
||||
# - curl + jq (for digest snapshot via GHCR anonymous registry API)
|
||||
#
|
||||
# Output:
|
||||
# <snapshot dir>/digests-YYYYMMDD-HHMMSS.txt
|
||||
# One line per template: "<runtime>: <digest>"
|
||||
# <snapshot dir>/disabled-workflows-YYYYMMDD-HHMMSS.txt
|
||||
# One line per disabled workflow: "<repo>: <workflow>"
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 — freeze complete (or dry-run successful)
|
||||
# 1 — pre-flight failure (missing tooling, missing auth, etc.)
|
||||
# 2 — partial freeze (some workflows did not disable cleanly; see log)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
demo-freeze.sh — disable the runtime + template image publish cascades
|
||||
during a demo-prep window.
|
||||
|
||||
Captures current :latest digests for all 8 workspace-template-* images
|
||||
and disables the workflows that would otherwise re-tag them.
|
||||
|
||||
Usage:
|
||||
scripts/demo-freeze.sh # dry run — print what would happen
|
||||
scripts/demo-freeze.sh --execute # actually disable workflows + snapshot
|
||||
|
||||
See the comment block at the top of this script for the full procedure.
|
||||
USAGE
|
||||
}
|
||||
|
||||
EXECUTE=0
|
||||
case "${1:-}" in
|
||||
--execute)
|
||||
EXECUTE=1
|
||||
;;
|
||||
--help|-h)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
"")
|
||||
;;
|
||||
*)
|
||||
echo "unknown arg: $1" >&2
|
||||
usage >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
# Templates and their GHCR repository slugs. Source of truth for the
|
||||
# runtime → image map is workspace-server/internal/provisioner/provisioner.go
|
||||
# RuntimeImages — keep this list in sync if a runtime is added.
|
||||
TEMPLATES=(
|
||||
"claude-code"
|
||||
"hermes"
|
||||
"openclaw"
|
||||
"langgraph"
|
||||
"deepagents"
|
||||
"crewai"
|
||||
"autogen"
|
||||
"gemini-cli"
|
||||
)
|
||||
|
||||
# Pre-flight: required tooling.
|
||||
need() {
|
||||
command -v "$1" >/dev/null || { echo "ERROR: missing required tool: $1" >&2; exit 1; }
|
||||
}
|
||||
need gh
|
||||
need curl
|
||||
need jq
|
||||
|
||||
# Pre-flight: gh auth. Snapshot via anonymous GHCR token works without
|
||||
# org auth, but workflow disable needs an authenticated gh.
|
||||
if ! gh auth status >/dev/null 2>&1; then
|
||||
echo "ERROR: gh not authenticated. Run 'gh auth login' first." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Snapshot location relative to this script. Keeping it under scripts/
|
||||
# rather than a temp dir means freeze receipts are easy to find again
|
||||
# during the actual demo.
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
SNAPSHOT_DIR="${SCRIPT_DIR}/demo-freeze-snapshots"
|
||||
mkdir -p "$SNAPSHOT_DIR"
|
||||
TS="$(date -u +%Y%m%d-%H%M%S)"
|
||||
DIGESTS_FILE="${SNAPSHOT_DIR}/digests-${TS}.txt"
|
||||
WORKFLOWS_FILE="${SNAPSHOT_DIR}/disabled-workflows-${TS}.txt"
|
||||
|
||||
if [ $EXECUTE -eq 0 ]; then
|
||||
echo "=== DRY RUN (no changes will be made; pass --execute to apply) ==="
|
||||
else
|
||||
echo "=== EXECUTING FREEZE — workflows will be disabled ==="
|
||||
fi
|
||||
echo "Snapshot timestamp: $TS"
|
||||
echo "Digest log: $DIGESTS_FILE"
|
||||
echo "Workflow log: $WORKFLOWS_FILE"
|
||||
echo
|
||||
|
||||
# Step 1: capture current :latest digest for each template.
|
||||
echo "→ Capturing current :latest digests"
|
||||
for tpl in "${TEMPLATES[@]}"; do
|
||||
token=$(curl -fsS "https://ghcr.io/token?scope=repository:molecule-ai/workspace-template-${tpl}:pull" | jq -r .token 2>/dev/null || true)
|
||||
if [ -z "$token" ] || [ "$token" = "null" ]; then
|
||||
echo " WARN: token fetch failed for $tpl — skipping digest capture"
|
||||
continue
|
||||
fi
|
||||
digest=$(curl -fsSI \
|
||||
-H "Authorization: Bearer $token" \
|
||||
-H "Accept: application/vnd.oci.image.index.v1+json" \
|
||||
-H "Accept: application/vnd.docker.distribution.manifest.v2+json" \
|
||||
"https://ghcr.io/v2/molecule-ai/workspace-template-${tpl}/manifests/latest" 2>/dev/null \
|
||||
| grep -i 'docker-content-digest' \
|
||||
| awk '{print $2}' \
|
||||
| tr -d '\r')
|
||||
if [ -z "$digest" ]; then
|
||||
echo " WARN: digest fetch failed for $tpl"
|
||||
continue
|
||||
fi
|
||||
echo " $tpl: $digest"
|
||||
if [ $EXECUTE -eq 1 ]; then
|
||||
echo "$tpl: $digest" >> "$DIGESTS_FILE"
|
||||
fi
|
||||
done
|
||||
echo
|
||||
|
||||
# Step 2: disable publish-runtime.yml in molecule-core (PATH 1 source).
|
||||
echo "→ Disabling publish-runtime.yml in molecule-core (kills runtime → 8-template cascade)"
|
||||
if [ $EXECUTE -eq 1 ]; then
|
||||
if gh workflow disable publish-runtime.yml -R Molecule-AI/molecule-core 2>/tmp/freeze.err; then
|
||||
echo " OK molecule-core/publish-runtime.yml disabled"
|
||||
echo "Molecule-AI/molecule-core: publish-runtime.yml" >> "$WORKFLOWS_FILE"
|
||||
else
|
||||
echo " FAIL molecule-core/publish-runtime.yml: $(cat /tmp/freeze.err)" >&2
|
||||
fi
|
||||
else
|
||||
echo " (dry-run) would disable: gh workflow disable publish-runtime.yml -R Molecule-AI/molecule-core"
|
||||
fi
|
||||
echo
|
||||
|
||||
# Step 3: disable publish-image.yml in each of the 8 template repos (PATH 2 sources).
|
||||
echo "→ Disabling publish-image.yml in each workspace-template-* repo"
|
||||
PARTIAL_FAIL=0
|
||||
for tpl in "${TEMPLATES[@]}"; do
|
||||
repo="Molecule-AI/molecule-ai-workspace-template-${tpl}"
|
||||
if [ $EXECUTE -eq 1 ]; then
|
||||
if gh workflow disable publish-image.yml -R "$repo" 2>/tmp/freeze.err; then
|
||||
echo " OK $repo/publish-image.yml disabled"
|
||||
echo "${repo}: publish-image.yml" >> "$WORKFLOWS_FILE"
|
||||
else
|
||||
echo " FAIL $repo/publish-image.yml: $(cat /tmp/freeze.err)" >&2
|
||||
PARTIAL_FAIL=1
|
||||
fi
|
||||
else
|
||||
echo " (dry-run) would disable: gh workflow disable publish-image.yml -R $repo"
|
||||
fi
|
||||
done
|
||||
echo
|
||||
|
||||
if [ $EXECUTE -eq 0 ]; then
|
||||
echo "=== DRY RUN COMPLETE ==="
|
||||
echo "Re-run with --execute to apply the freeze."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "=== FREEZE COMPLETE ==="
|
||||
echo "Receipts: $DIGESTS_FILE"
|
||||
echo " $WORKFLOWS_FILE"
|
||||
echo
|
||||
echo "Next steps:"
|
||||
echo " - Verify by running: gh workflow list -R Molecule-AI/molecule-core | grep publish-runtime"
|
||||
echo " Status should be 'disabled_manually'."
|
||||
echo " - Demo proceeds; new workspaces pull the snapshotted :latest digests."
|
||||
echo " - Post-demo, run: scripts/demo-thaw.sh ${TS}"
|
||||
echo " to re-enable every workflow this freeze disabled."
|
||||
echo
|
||||
if [ $PARTIAL_FAIL -ne 0 ]; then
|
||||
echo "WARNING: one or more workflows did not disable cleanly. Re-run after fixing." >&2
|
||||
exit 2
|
||||
fi
|
||||
exit 0
|
||||
Executable
+124
@@ -0,0 +1,124 @@
|
||||
#!/usr/bin/env bash
|
||||
# demo-thaw.sh — re-enable workflows that demo-freeze.sh disabled.
|
||||
#
|
||||
# Usage:
|
||||
# scripts/demo-thaw.sh <freeze-timestamp>
|
||||
# scripts/demo-thaw.sh 20260503-180000
|
||||
#
|
||||
# Reads disabled-workflows-<ts>.txt produced by demo-freeze.sh and
|
||||
# runs `gh workflow enable` for each entry. Idempotent — re-enabling
|
||||
# an already-enabled workflow is a no-op.
|
||||
#
|
||||
# Defaults to executing (the inverse of freeze, which defaults to
|
||||
# dry-run). Pass --dry-run to print without executing.
|
||||
#
|
||||
# Prereqs:
|
||||
# - gh CLI authenticated with workflow:write scope on Molecule-AI org
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 — all workflows re-enabled
|
||||
# 1 — pre-flight failure (missing receipt file, missing tooling)
|
||||
# 2 — partial thaw (some workflows did not enable; check output)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
demo-thaw.sh — re-enable workflows that demo-freeze.sh disabled.
|
||||
|
||||
Usage:
|
||||
scripts/demo-thaw.sh <freeze-timestamp> # apply
|
||||
scripts/demo-thaw.sh <freeze-timestamp> --dry-run # print without applying
|
||||
|
||||
ts is the YYYYMMDD-HHMMSS suffix on
|
||||
scripts/demo-freeze-snapshots/disabled-workflows-*.txt produced by
|
||||
demo-freeze.sh.
|
||||
USAGE
|
||||
}
|
||||
|
||||
DRY_RUN=0
|
||||
TS=""
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--dry-run)
|
||||
DRY_RUN=1
|
||||
;;
|
||||
--help|-h)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
if [ -z "$TS" ]; then
|
||||
TS="$arg"
|
||||
else
|
||||
echo "unknown arg: $arg" >&2
|
||||
usage >&2
|
||||
exit 2
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$TS" ]; then
|
||||
echo "usage: $0 <freeze-timestamp> [--dry-run]" >&2
|
||||
echo " e.g. $0 20260503-180000" >&2
|
||||
echo " ts is the YYYYMMDD-HHMMSS suffix on demo-freeze-snapshots/disabled-workflows-*.txt" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
command -v gh >/dev/null || { echo "ERROR: gh CLI required" >&2; exit 1; }
|
||||
if ! gh auth status >/dev/null 2>&1; then
|
||||
echo "ERROR: gh not authenticated. Run 'gh auth login' first." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
WORKFLOWS_FILE="${SCRIPT_DIR}/demo-freeze-snapshots/disabled-workflows-${TS}.txt"
|
||||
|
||||
if [ ! -f "$WORKFLOWS_FILE" ]; then
|
||||
echo "ERROR: receipt not found: $WORKFLOWS_FILE" >&2
|
||||
echo "Available receipts:" >&2
|
||||
ls "${SCRIPT_DIR}/demo-freeze-snapshots/" 2>/dev/null | grep '^disabled-workflows-' >&2 || echo " (none)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $DRY_RUN -eq 1 ]; then
|
||||
echo "=== DRY RUN (no changes will be made) ==="
|
||||
else
|
||||
echo "=== THAWING — re-enabling workflows ==="
|
||||
fi
|
||||
echo "Reading: $WORKFLOWS_FILE"
|
||||
echo
|
||||
|
||||
PARTIAL_FAIL=0
|
||||
while IFS=': ' read -r repo workflow; do
|
||||
[ -z "$repo" ] && continue
|
||||
if [ $DRY_RUN -eq 1 ]; then
|
||||
echo " (dry-run) would enable: gh workflow enable $workflow -R $repo"
|
||||
else
|
||||
if gh workflow enable "$workflow" -R "$repo" 2>/tmp/thaw.err; then
|
||||
echo " OK $repo/$workflow re-enabled"
|
||||
else
|
||||
echo " FAIL $repo/$workflow: $(cat /tmp/thaw.err)" >&2
|
||||
PARTIAL_FAIL=1
|
||||
fi
|
||||
fi
|
||||
done < "$WORKFLOWS_FILE"
|
||||
|
||||
echo
|
||||
if [ $DRY_RUN -eq 1 ]; then
|
||||
echo "=== DRY RUN COMPLETE ==="
|
||||
echo "Re-run without --dry-run to apply."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "=== THAW COMPLETE ==="
|
||||
echo "Cascades restored. Next workspace/** push to molecule-core/staging will"
|
||||
echo "auto-publish the runtime wheel and fan out to template rebuilds as normal."
|
||||
if [ $PARTIAL_FAIL -ne 0 ]; then
|
||||
echo
|
||||
echo "WARNING: one or more workflows did not re-enable cleanly. Re-run or enable manually:" >&2
|
||||
echo " gh workflow list -R <repo>" >&2
|
||||
exit 2
|
||||
fi
|
||||
exit 0
|
||||
+36
-4
@@ -105,11 +105,43 @@ echo "==> Running infra/scripts/setup.sh (infra + template registry)"
|
||||
"$ROOT/infra/scripts/setup.sh"
|
||||
|
||||
# ─────────────────────────────────────────────── 3. platform
|
||||
#
|
||||
# Two paths:
|
||||
# (a) `go` is on PATH → run the platform directly via `go run`.
|
||||
# Fast iteration, attaches to /tmp/molecule-platform.log.
|
||||
# (b) `go` is NOT on PATH → fall back to the published platform
|
||||
# container image. Slower first run (image pull) but the script
|
||||
# still works on a fresh dev box without forcing the dev to
|
||||
# install Go just to read logs.
|
||||
#
|
||||
# The earlier version of this script silently called `go run` and died
|
||||
# with `go: not found` on dev boxes where Go wasn't installed; the
|
||||
# script's own prerequisite list (line 13-21) said "Go 1.25+" but the
|
||||
# user had no signpost between "open the doc" and "command not found
|
||||
# at line 111." This branch makes the failure path either succeed
|
||||
# (fallback) or fail loud with explicit install guidance.
|
||||
|
||||
echo "==> Starting Platform (Go :8080)"
|
||||
cd "$ROOT/workspace-server"
|
||||
go run ./cmd/server > /tmp/molecule-platform.log 2>&1 &
|
||||
PLATFORM_PID=$!
|
||||
if command -v go >/dev/null 2>&1; then
|
||||
echo "==> Starting Platform (Go :8080)"
|
||||
cd "$ROOT/workspace-server"
|
||||
go run ./cmd/server > /tmp/molecule-platform.log 2>&1 &
|
||||
PLATFORM_PID=$!
|
||||
else
|
||||
echo "==> Go not found on PATH — falling back to docker-compose platform service"
|
||||
echo " (Install Go 1.25+ for faster iteration: https://go.dev/dl/)"
|
||||
cd "$ROOT"
|
||||
# Bring up just the platform service from docker-compose.yml. infra/setup.sh
|
||||
# already brought up postgres+redis+etc on docker-compose.infra.yml; this
|
||||
# adds the platform container on top, mapped to :8080 so the rest of this
|
||||
# script's wait-for-/health loop works unchanged.
|
||||
docker compose up -d --build platform > /tmp/molecule-platform.log 2>&1 || {
|
||||
echo " ✗ docker compose up platform failed — see /tmp/molecule-platform.log"
|
||||
echo " Either install Go 1.25+ (https://go.dev/dl/) and rerun, or fix the docker fallback."
|
||||
exit 1
|
||||
}
|
||||
# PLATFORM_PID is unset on this path; cleanup() handles that with `kill ... 2>/dev/null || true`.
|
||||
PLATFORM_PID=
|
||||
fi
|
||||
|
||||
echo " Waiting for Platform /health..."
|
||||
PLATFORM_READY=0
|
||||
|
||||
+271
@@ -0,0 +1,271 @@
|
||||
#!/usr/bin/env bash
|
||||
# Standalone runner for Issue 4 reproduction (RFC #2251) — exists alongside
|
||||
# `measure-coordinator-task-bounds.sh` to support arbitrary template + secret
|
||||
# combinations without modifying the canonical harness. The canonical harness
|
||||
# stays focused on its v1 contract (claude-code-default + langgraph + OpenRouter);
|
||||
# this runner wraps the same workspace-server API calls but takes everything as
|
||||
# env-var inputs so a Hermes/MiniMax run can share the measurement code path.
|
||||
#
|
||||
# Two routing modes:
|
||||
# MODE=local (default) — direct workspace-server API
|
||||
# MODE=saas — placeholder; populates same vars but expects
|
||||
# PLATFORM=<tenant-subdomain> with X-Tenant-Id +
|
||||
# Authorization headers from CP_ADMIN_API_TOKEN
|
||||
#
|
||||
# Required env:
|
||||
# PLATFORM workspace-server base URL (default http://localhost:8080)
|
||||
# PM_TEMPLATE template slug for coordinator
|
||||
# CHILD_TEMPLATE template slug for researcher child
|
||||
# SECRET_NAME workspace_secrets key (e.g. MINIMAX_API_KEY)
|
||||
# SECRET_VALUE the secret value (or read from $SECRET_NAME if unset)
|
||||
#
|
||||
# Optional:
|
||||
# MODEL PUT /workspaces/:id/model after provision
|
||||
# SYNTHESIS_DEPTH=3 number of delegation rounds in the kickoff task
|
||||
# A2A_TIMEOUT=600 ceiling on measurement-side wait (seconds)
|
||||
# KEEP_WORKSPACES=0 skip cleanup-on-exit when 1 (for log inspection)
|
||||
# MODE=local|saas local-dev vs SaaS routing posture
|
||||
# CP_ADMIN_API_TOKEN required when MODE=saas; sent as Authorization bearer
|
||||
# TENANT_ID required when MODE=saas; sent as X-Tenant-Id
|
||||
#
|
||||
# Output: NDJSON event stream on stdout + a human summary on stderr.
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
PLATFORM="${PLATFORM:-http://localhost:8080}"
|
||||
MODE="${MODE:-local}"
|
||||
PM_TEMPLATE="${PM_TEMPLATE:?PM_TEMPLATE is required (e.g. claude-code-default, hermes)}"
|
||||
CHILD_TEMPLATE="${CHILD_TEMPLATE:?CHILD_TEMPLATE is required}"
|
||||
SECRET_NAME="${SECRET_NAME:?SECRET_NAME is required (e.g. MINIMAX_API_KEY)}"
|
||||
MODEL="${MODEL:-}"
|
||||
SYNTHESIS_DEPTH="${SYNTHESIS_DEPTH:-3}"
|
||||
A2A_TIMEOUT="${A2A_TIMEOUT:-600}"
|
||||
KEEP_WORKSPACES="${KEEP_WORKSPACES:-0}"
|
||||
|
||||
# SaaS-mode auth chain: workspace-server (per-tenant Go binary on EC2)
|
||||
# requires BOTH headers:
|
||||
# Authorization: Bearer <tenant-admin-token> (per-tenant secret)
|
||||
# X-Molecule-Org-Id: <org-uuid> (TenantGuard middleware)
|
||||
# The tenant-admin-token is provisioned by controlplane and retrievable via:
|
||||
# GET /cp/admin/orgs/<slug>/admin-token (CP_ADMIN_API_TOKEN bearer-gated)
|
||||
# The runner can either:
|
||||
# 1. Take ORG_SLUG + CP_ADMIN_API_TOKEN and fetch the tenant token itself, or
|
||||
# 2. Take ORG_ID + TENANT_ADMIN_TOKEN directly.
|
||||
ORG_ID="${ORG_ID:-}"
|
||||
ORG_SLUG="${ORG_SLUG:-}"
|
||||
TENANT_ADMIN_TOKEN="${TENANT_ADMIN_TOKEN:-}"
|
||||
CP_ADMIN_API_TOKEN="${CP_ADMIN_API_TOKEN:-}"
|
||||
CP_API_URL="${CP_API_URL:-https://staging-api.moleculesai.app}"
|
||||
|
||||
# Resolve secret value: ${SECRET_VALUE} > $${SECRET_NAME} > error.
|
||||
SECRET_VALUE="${SECRET_VALUE:-}"
|
||||
if [ -z "$SECRET_VALUE" ]; then
|
||||
SECRET_VALUE="$(printenv "$SECRET_NAME" 2>/dev/null || true)"
|
||||
fi
|
||||
[ -n "$SECRET_VALUE" ] || { echo "ERROR: set \$$SECRET_NAME or \$SECRET_VALUE" >&2; exit 1; }
|
||||
|
||||
# SaaS-mode preflight + format validation.
|
||||
# Validating ORG_ID + ORG_SLUG client-side gives an actionable error
|
||||
# before the request hits TenantGuard's intentionally-opaque 404
|
||||
# (which doesn't tell the operator whether the slug is wrong, the
|
||||
# UUID is wrong, or auth is wrong).
|
||||
if [ "$MODE" = "saas" ]; then
|
||||
[ -n "$ORG_ID" ] || { echo "ERROR: MODE=saas requires ORG_ID (the org UUID)" >&2; exit 1; }
|
||||
case "$ORG_ID" in
|
||||
[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]-[0-9a-f][0-9a-f][0-9a-f][0-9a-f]-[0-9a-f][0-9a-f][0-9a-f][0-9a-f]-[0-9a-f][0-9a-f][0-9a-f][0-9a-f]-[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]) ;;
|
||||
*) echo "ERROR: ORG_ID must be a UUID (got '$ORG_ID')" >&2; exit 1;;
|
||||
esac
|
||||
if [ -n "$ORG_SLUG" ]; then
|
||||
case "$ORG_SLUG" in
|
||||
*[!a-z0-9-]* | -* | *-) echo "ERROR: ORG_SLUG must match ^[a-z0-9][a-z0-9-]*[a-z0-9]\$ (got '$ORG_SLUG')" >&2; exit 1;;
|
||||
esac
|
||||
fi
|
||||
if [ -z "$TENANT_ADMIN_TOKEN" ]; then
|
||||
[ -n "$ORG_SLUG" ] || { echo "ERROR: MODE=saas needs TENANT_ADMIN_TOKEN or ORG_SLUG (to fetch it via CP)" >&2; exit 1; }
|
||||
[ -n "$CP_ADMIN_API_TOKEN" ] || { echo "ERROR: ORG_SLUG path needs CP_ADMIN_API_TOKEN to fetch tenant token from $CP_API_URL" >&2; exit 1; }
|
||||
TENANT_ADMIN_TOKEN=$(curl -s -H "Authorization: Bearer $CP_ADMIN_API_TOKEN" \
|
||||
"$CP_API_URL/cp/admin/orgs/$ORG_SLUG/admin-token" \
|
||||
| python3 -c "import sys,json; print(json.load(sys.stdin).get('admin_token',''))" 2>/dev/null || echo "")
|
||||
[ -n "$TENANT_ADMIN_TOKEN" ] || { echo "ERROR: failed to resolve tenant admin token via $CP_API_URL/cp/admin/orgs/$ORG_SLUG/admin-token" >&2; exit 1; }
|
||||
fi
|
||||
fi
|
||||
|
||||
ts() { date -u +%Y-%m-%dT%H:%M:%S.%3NZ 2>/dev/null || date -u +%Y-%m-%dT%H:%M:%SZ; }
|
||||
emit() { printf '{"ts":"%s","event":"%s","data":%s}\n' "$(ts)" "$1" "${2:-null}"; }
|
||||
|
||||
api() {
|
||||
local args=()
|
||||
if [ "$MODE" = "saas" ]; then
|
||||
args+=(-H "Authorization: Bearer $TENANT_ADMIN_TOKEN")
|
||||
args+=(-H "X-Molecule-Org-Id: $ORG_ID")
|
||||
fi
|
||||
curl -s ${args[@]+"${args[@]}"} "$@"
|
||||
}
|
||||
|
||||
PM_ID=""
|
||||
CHILD_ID=""
|
||||
cleanup() {
|
||||
local rc=$?
|
||||
set +e
|
||||
if [ "$KEEP_WORKSPACES" = "1" ]; then
|
||||
emit "cleanup_skipped" "{\"reason\":\"KEEP_WORKSPACES=1\",\"pm_id\":\"$PM_ID\",\"child_id\":\"$CHILD_ID\"}"
|
||||
return $rc
|
||||
fi
|
||||
for id in "$CHILD_ID" "$PM_ID"; do
|
||||
[ -z "$id" ] && continue
|
||||
code=$(api -o /dev/null -w '%{http_code}' -X DELETE "$PLATFORM/workspaces/$id" 2>/dev/null || echo "curl_err")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ] || [ "$code" = "404" ]; then
|
||||
emit "cleanup_deleted" "{\"workspace_id\":\"$id\",\"http_code\":\"$code\"}"
|
||||
else
|
||||
emit "cleanup_failed" "{\"workspace_id\":\"$id\",\"http_code\":\"$code\"}"
|
||||
fi
|
||||
done
|
||||
return $rc
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
emit "run_started" "{\"platform\":\"$PLATFORM\",\"mode\":\"$MODE\",\"pm_template\":\"$PM_TEMPLATE\",\"child_template\":\"$CHILD_TEMPLATE\",\"model\":\"$MODEL\",\"secret_name\":\"$SECRET_NAME\",\"synthesis_depth\":$SYNTHESIS_DEPTH,\"a2a_timeout_secs\":$A2A_TIMEOUT}"
|
||||
|
||||
# ---- Provision via JSON-encoded bodies (defends against templates/values
|
||||
# with embedded shell-special chars). ----
|
||||
pm_body=$(python3 -c '
|
||||
import json, sys
|
||||
print(json.dumps({"name":"PM","role":"Coordinator — delegates and synthesizes","tier":2,"template":sys.argv[1]}))' "$PM_TEMPLATE")
|
||||
child_body=$(python3 -c '
|
||||
import json, sys
|
||||
print(json.dumps({"name":"Researcher","role":"Returns short research findings","tier":2,"template":sys.argv[1]}))' "$CHILD_TEMPLATE")
|
||||
secret_body=$(python3 -c '
|
||||
import json, sys
|
||||
print(json.dumps({"key":sys.argv[1],"value":sys.argv[2]}))' "$SECRET_NAME" "$SECRET_VALUE")
|
||||
|
||||
emit "provisioning_pm" "{\"template\":\"$PM_TEMPLATE\"}"
|
||||
R=$(api -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' -d "$pm_body")
|
||||
PM_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "")
|
||||
[ -n "$PM_ID" ] || { echo "ERROR: PM create failed — response: $R" >&2; exit 1; }
|
||||
emit "pm_provisioned" "{\"workspace_id\":\"$PM_ID\"}"
|
||||
|
||||
emit "provisioning_child" "{\"template\":\"$CHILD_TEMPLATE\"}"
|
||||
R=$(api -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' -d "$child_body")
|
||||
CHILD_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "")
|
||||
[ -n "$CHILD_ID" ] || { echo "ERROR: child create failed — response: $R" >&2; exit 1; }
|
||||
emit "child_provisioned" "{\"workspace_id\":\"$CHILD_ID\"}"
|
||||
|
||||
api -X PATCH "$PLATFORM/workspaces/$CHILD_ID" -H 'Content-Type: application/json' \
|
||||
-d "{\"parent_id\":\"$PM_ID\"}" > /dev/null
|
||||
|
||||
# Seed secret on BOTH workspaces. Hermes/MiniMax both sides need it; templates
|
||||
# that ignore unknown env vars treat extras as no-op.
|
||||
for id in "$PM_ID" "$CHILD_ID"; do
|
||||
api -X POST "$PLATFORM/workspaces/$id/secrets" -H 'Content-Type: application/json' -d "$secret_body" > /dev/null
|
||||
done
|
||||
emit "secrets_seeded" "{\"key\":\"$SECRET_NAME\",\"workspaces\":[\"$PM_ID\",\"$CHILD_ID\"]}"
|
||||
|
||||
if [ -n "$MODEL" ]; then
|
||||
model_body=$(python3 -c 'import json,sys; print(json.dumps({"model":sys.argv[1]}))' "$MODEL")
|
||||
for id in "$PM_ID" "$CHILD_ID"; do
|
||||
api -X PUT "$PLATFORM/workspaces/$id/model" -H 'Content-Type: application/json' -d "$model_body" > /dev/null
|
||||
done
|
||||
emit "model_set" "{\"model\":\"$MODEL\",\"workspaces\":[\"$PM_ID\",\"$CHILD_ID\"]}"
|
||||
fi
|
||||
|
||||
# ---- Wait for both online ----
|
||||
WAIT_ONLINE_SECS="${WAIT_ONLINE_SECS:-180}"
|
||||
wait_online() {
|
||||
local id="$1" label="$2"
|
||||
# Round up so a non-multiple-of-3 budget waits at least the requested
|
||||
# seconds (200 → 67 polls × 3s = 201s, not 198s).
|
||||
local polls=$(( (WAIT_ONLINE_SECS + 2) / 3 ))
|
||||
local last_status=""
|
||||
for i in $(seq 1 "$polls"); do
|
||||
s=$(api "$PLATFORM/workspaces/$id" | python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "")
|
||||
if [ "$s" != "$last_status" ]; then
|
||||
emit "status_change" "{\"workspace\":\"$label\",\"from\":\"$last_status\",\"to\":\"$s\",\"poll\":$i}"
|
||||
last_status="$s"
|
||||
fi
|
||||
[ "$s" = "online" ] && { emit "online" "{\"workspace\":\"$label\",\"after_polls\":$i,\"after_secs\":$((i * 3))}"; return 0; }
|
||||
[ "$s" = "failed" ] && { emit "failed" "{\"workspace\":\"$label\"}"; return 1; }
|
||||
sleep 3
|
||||
done
|
||||
emit "online_timeout" "{\"workspace\":\"$label\",\"last_status\":\"$last_status\",\"waited_secs\":$WAIT_ONLINE_SECS}"
|
||||
return 1
|
||||
}
|
||||
wait_online "$PM_ID" "PM" || exit 2
|
||||
wait_online "$CHILD_ID" "child" || exit 2
|
||||
|
||||
# ---- Build a synthesis-heavy kickoff task ----
|
||||
TASK="You are coordinating a research analysis. Delegate $SYNTHESIS_DEPTH separate sub-questions to the Researcher (one at a time, sequentially — wait for each response before sending the next), then synthesize all findings into a single coherent report. Sub-questions: (a) historical context of distributed consensus, (b) modern Byzantine-fault-tolerant protocols, (c) practical trade-offs between Raft and Paxos. After all delegations complete, write a 600-word synthesis comparing the three responses and drawing one cross-cutting insight. Do not respond until the synthesis is complete."
|
||||
|
||||
# ---- A2A kickoff round-trip ----
|
||||
emit "a2a_kickoff_sent" "{\"to\":\"$PM_ID\",\"task_chars\":${#TASK}}"
|
||||
START_NS=$(python3 -c 'import time; print(int(time.time_ns()))')
|
||||
|
||||
a2a_body=$(python3 -c '
|
||||
import json, sys
|
||||
print(json.dumps({"method":"message/send","params":{"message":{"role":"user","parts":[{"type":"text","text":sys.argv[1]}]}}}))' "$TASK")
|
||||
|
||||
RESP=$(api --max-time "$A2A_TIMEOUT" -X POST "$PLATFORM/workspaces/$PM_ID/a2a" \
|
||||
-H "Content-Type: application/json" -d "$a2a_body" || echo "<curl_failed_or_timed_out>")
|
||||
|
||||
END_NS=$(python3 -c 'import time; print(int(time.time_ns()))')
|
||||
ELAPSED_SECS=$(python3 -c "print(round(($END_NS - $START_NS) / 1e9, 2))")
|
||||
|
||||
emit "a2a_response_observed" "{\"elapsed_secs\":$ELAPSED_SECS,\"response_chars\":${#RESP},\"response_head\":$(python3 -c "import json,sys; print(json.dumps(sys.argv[1][:200]))" "$RESP")}"
|
||||
|
||||
# ---- Activity trace ----
|
||||
# Earlier versions of this runner called /workspaces/:id/heartbeat-history,
|
||||
# which doesn't exist on workspace-server. On local dev that returned 404,
|
||||
# on tenant builds the platform's canvas-proxy fallback intercepted it and
|
||||
# returned 28KB of Next.js HTML — neither of which is useful trace data.
|
||||
# /workspaces/:id/activity is the existing endpoint that reads the
|
||||
# activity_logs table (a2a_send / a2a_receive / task_update / agent_log /
|
||||
# error events with duration_ms + status). That's the data the RFC's
|
||||
# §V1.0 step 6 'platform-side transition' check actually needs.
|
||||
emit "fetching_activity_trace" "{\"mode\":\"$MODE\"}"
|
||||
ACTIVITY=$(api "$PLATFORM/workspaces/$PM_ID/activity?since_secs=$A2A_TIMEOUT" 2>&1 || echo "<endpoint_unavailable>")
|
||||
emit "activity_trace" "{\"raw\":$(python3 -c "import json,sys; print(json.dumps(sys.argv[1]))" "$ACTIVITY")}"
|
||||
|
||||
# ---- rfc2251_phase log lines from the workspace container ----
|
||||
# Local Docker provisioner: workspace container name is workspace-<id>.
|
||||
# SaaS: container is on EC2 — skip log capture, fall back to heartbeat only.
|
||||
if [ "$MODE" = "local" ] && command -v docker >/dev/null 2>&1; then
|
||||
for id in "$PM_ID"; do
|
||||
container=$(docker ps --filter "name=workspace-$id" --format '{{.Names}}' | head -1)
|
||||
if [ -n "$container" ]; then
|
||||
phase_log=$(docker logs --since "${A2A_TIMEOUT}s" "$container" 2>&1 | grep 'rfc2251_phase=' || echo "<no rfc2251_phase log lines — container running stale image without #2255 instrumentation>")
|
||||
emit "phase_log" "{\"workspace_id\":\"$id\",\"container\":\"$container\",\"raw\":$(python3 -c "import json,sys; print(json.dumps(sys.argv[1]))" "$phase_log")}"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
emit "run_completed" "{\"elapsed_secs\":$ELAPSED_SECS,\"pm_id\":\"$PM_ID\",\"child_id\":\"$CHILD_ID\"}"
|
||||
|
||||
cat <<EOF >&2
|
||||
|
||||
=========================================
|
||||
Measurement complete. (RFC #2251 / Issue 4 repro)
|
||||
Mode: $MODE
|
||||
Coordinator template: $PM_TEMPLATE
|
||||
Child template: $CHILD_TEMPLATE
|
||||
Model: ${MODEL:-<template default>}
|
||||
Coordinator response: ${ELAPSED_SECS}s
|
||||
PM workspace: $PM_ID
|
||||
Child workspace: $CHILD_ID
|
||||
=========================================
|
||||
|
||||
Interpretation:
|
||||
|
||||
ELAPSED < 60 → Synthesis fast; not informative about platform bounds.
|
||||
Re-run with SYNTHESIS_DEPTH=8 for longer synthesis.
|
||||
|
||||
60 <= ELAPSED < 300 → Within DELEGATION_TIMEOUT. Doesn't prove or refute
|
||||
Issue 4 — HTTP-level timeout would be sufficient.
|
||||
|
||||
ELAPSED >= 300 → BUG CONFIRMED IF activity_trace shows no platform-side
|
||||
transition. Coordinator ran past DELEGATION_TIMEOUT without
|
||||
any platform ceiling kicking in — exactly the gap V1.0
|
||||
plans to close with MAX_TASK_EXECUTION_SECS.
|
||||
|
||||
curl_failed_or_timed_out → \$A2A_TIMEOUT exceeded. Coordinator likely hung
|
||||
or synthesis is just very slow.
|
||||
|
||||
EOF
|
||||
Executable
+112
@@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env bash
|
||||
# Check whether production tenants and canvas are running latest main.
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/ops/check-prod-versions.sh # production
|
||||
# ENV=staging ./scripts/ops/check-prod-versions.sh # staging tenants
|
||||
#
|
||||
# Outputs a table of {surface, current_sha, expected_sha, status}. Returns
|
||||
# non-zero if any surface is stale so this can be wired into a periodic
|
||||
# alert.
|
||||
#
|
||||
# Why this exists: every time someone hits a "is the fix live?" question,
|
||||
# they have to remember the curl pattern + cross-reference with
|
||||
# `git rev-parse origin/main`. This script does that check uniformly across
|
||||
# every public surface (workspace tenants + canvas) and gives a one-line
|
||||
# verdict instead of a stack of one-off curls.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ENV="${ENV:-production}"
|
||||
EXPECTED_REF="${EXPECTED_REF:-main}"
|
||||
|
||||
case "$ENV" in
|
||||
production)
|
||||
TENANT_DOMAIN="moleculesai.app"
|
||||
CANVAS_URL="https://canvas.moleculesai.app"
|
||||
# Default canary tenant for production. Override via TENANT_SLUGS=
|
||||
# to cover a custom set.
|
||||
DEFAULT_TENANTS="hongmingwang reno-stars"
|
||||
;;
|
||||
staging)
|
||||
TENANT_DOMAIN="staging.moleculesai.app"
|
||||
CANVAS_URL="https://canvas-staging.moleculesai.app"
|
||||
DEFAULT_TENANTS="" # staging tenants are ephemeral; user must specify
|
||||
;;
|
||||
*)
|
||||
echo "Unknown ENV=$ENV (expected: production | staging)" >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
TENANT_SLUGS="${TENANT_SLUGS:-$DEFAULT_TENANTS}"
|
||||
|
||||
# Pull EXPECTED_SHA from GitHub. Falls back to local git if gh isn't
|
||||
# logged in — local main may lag origin but is usually close enough for
|
||||
# debugging, and we still report the comparison clearly.
|
||||
EXPECTED_SHA=""
|
||||
if command -v gh >/dev/null 2>&1; then
|
||||
EXPECTED_SHA=$(gh api "repos/Molecule-AI/molecule-core/commits/${EXPECTED_REF}" --jq '.sha' 2>/dev/null || true)
|
||||
fi
|
||||
if [ -z "$EXPECTED_SHA" ]; then
|
||||
if git rev-parse "origin/${EXPECTED_REF}" >/dev/null 2>&1; then
|
||||
EXPECTED_SHA=$(git rev-parse "origin/${EXPECTED_REF}")
|
||||
echo "[check-prod-versions] WARN: gh unavailable, using local origin/${EXPECTED_REF}=${EXPECTED_SHA:0:7} (may lag)"
|
||||
else
|
||||
echo "[check-prod-versions] ERROR: cannot resolve expected SHA — gh not logged in and origin/${EXPECTED_REF} not fetched" >&2
|
||||
exit 2
|
||||
fi
|
||||
fi
|
||||
EXPECTED_SHORT="${EXPECTED_SHA:0:7}"
|
||||
|
||||
echo "Checking ${ENV} surfaces against ${EXPECTED_REF}=${EXPECTED_SHORT}"
|
||||
echo ""
|
||||
printf "%-25s %-9s %-9s %s\n" "Surface" "Live" "Expected" "Status"
|
||||
printf "%-25s %-9s %-9s %s\n" "-------" "----" "--------" "------"
|
||||
|
||||
STALE_COUNT=0
|
||||
UNREACHABLE_COUNT=0
|
||||
|
||||
# Tenant surfaces — workspace-server /buildinfo (added in PR #2398).
|
||||
for slug in $TENANT_SLUGS; do
|
||||
URL="https://${slug}.${TENANT_DOMAIN}/buildinfo"
|
||||
BODY=$(curl -sS --max-time 15 "$URL" 2>/dev/null || echo "")
|
||||
ACTUAL_SHA=$(echo "$BODY" | jq -r '.git_sha // ""' 2>/dev/null || echo "")
|
||||
if [ -z "$ACTUAL_SHA" ]; then
|
||||
printf "%-25s %-9s %-9s ⚠ unreachable\n" "tenant: $slug" "—" "$EXPECTED_SHORT"
|
||||
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
|
||||
elif [ "$ACTUAL_SHA" = "$EXPECTED_SHA" ]; then
|
||||
printf "%-25s %-9s %-9s ✓ current\n" "tenant: $slug" "${ACTUAL_SHA:0:7}" "$EXPECTED_SHORT"
|
||||
else
|
||||
printf "%-25s %-9s %-9s ✗ stale\n" "tenant: $slug" "${ACTUAL_SHA:0:7}" "$EXPECTED_SHORT"
|
||||
STALE_COUNT=$((STALE_COUNT + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Canvas — Next.js /api/buildinfo (PR #2407). Vercel injects
|
||||
# VERCEL_GIT_COMMIT_SHA at build time so this reflects the deployed
|
||||
# commit, not the request time.
|
||||
CANVAS_BODY=$(curl -sS --max-time 15 "${CANVAS_URL}/api/buildinfo" 2>/dev/null || echo "")
|
||||
CANVAS_SHA=$(echo "$CANVAS_BODY" | jq -r '.git_sha // ""' 2>/dev/null || echo "")
|
||||
if [ -z "$CANVAS_SHA" ]; then
|
||||
printf "%-25s %-9s %-9s ⚠ unreachable (route may not be deployed yet)\n" "canvas" "—" "$EXPECTED_SHORT"
|
||||
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
|
||||
elif [ "$CANVAS_SHA" = "dev" ]; then
|
||||
printf "%-25s %-9s %-9s ⚠ dev sentinel (Vercel env not injected — check VERCEL_GIT_COMMIT_SHA)\n" "canvas" "dev" "$EXPECTED_SHORT"
|
||||
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
|
||||
elif [ "$CANVAS_SHA" = "$EXPECTED_SHA" ]; then
|
||||
printf "%-25s %-9s %-9s ✓ current\n" "canvas" "${CANVAS_SHA:0:7}" "$EXPECTED_SHORT"
|
||||
else
|
||||
printf "%-25s %-9s %-9s ✗ stale\n" "canvas" "${CANVAS_SHA:0:7}" "$EXPECTED_SHORT"
|
||||
STALE_COUNT=$((STALE_COUNT + 1))
|
||||
fi
|
||||
|
||||
echo ""
|
||||
if [ $STALE_COUNT -eq 0 ] && [ $UNREACHABLE_COUNT -eq 0 ]; then
|
||||
echo "All surfaces current."
|
||||
exit 0
|
||||
fi
|
||||
echo "Summary: ${STALE_COUNT} stale, ${UNREACHABLE_COUNT} unreachable."
|
||||
# Stale is a deploy gap; unreachable is operational (DNS, CF, route absent).
|
||||
# Both are signal — exit non-zero so cron / CI can alert.
|
||||
exit 1
|
||||
Executable
+206
@@ -0,0 +1,206 @@
|
||||
#!/usr/bin/env python3
|
||||
"""check_migration_collisions.py — fail-loud detector for two open PRs adding
|
||||
the same migration version number.
|
||||
|
||||
Why this exists: two PRs targeting staging can each add a migration with the
|
||||
same numeric prefix (e.g. 044_*.up.sql). Each passes CI independently. They
|
||||
collide at merge time. Worst-case the second migration silently doesn't apply
|
||||
and the schema drifts from what the code expects. Caught manually 2026-04-30
|
||||
during PR #2276 rebase: 044_runtime_image_pins collided with
|
||||
044_platform_inbound_secret from RFC #2312.
|
||||
|
||||
This check runs on every PR and asserts the migration prefixes added by THIS
|
||||
PR don't collide with:
|
||||
|
||||
1. The base branch's tip (someone else already used this number)
|
||||
2. Any other open PR (race-window collision — both pass CI independently)
|
||||
|
||||
Exit codes:
|
||||
0 — no collisions
|
||||
1 — collision detected; output names the conflicting PR(s) for the author
|
||||
|
||||
Designed to run from a GitHub Actions PR check. Reads PR metadata via the
|
||||
GitHub CLI (gh) which is preinstalled on ubuntu-latest runners. Runs in
|
||||
under 10s against a typical PR.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
MIGRATIONS_DIR = "workspace-server/migrations"
|
||||
MIGRATION_FILE_RE = re.compile(r"^(\d+)_[^/]+\.(up|down)\.sql$")
|
||||
|
||||
|
||||
def run(cmd: list[str], check: bool = True) -> str:
|
||||
"""Run a subprocess and return stdout. Raise on non-zero when check=True."""
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if check and result.returncode != 0:
|
||||
sys.stderr.write(f"command failed: {' '.join(cmd)}\n{result.stderr}\n")
|
||||
sys.exit(1)
|
||||
return result.stdout
|
||||
|
||||
|
||||
def migrations_in_diff(base_ref: str, head_ref: str) -> set[int]:
|
||||
"""Return the set of migration prefixes added or modified between two refs.
|
||||
|
||||
Uses --diff-filter=AM (Added or Modified) so a deleted migration doesn't
|
||||
count. Renames (--diff-filter=R) appear as A on the new path and D on the
|
||||
old, so we'd catch a renumbering correctly.
|
||||
"""
|
||||
out = run([
|
||||
"git", "diff", "--name-only", "--diff-filter=AM",
|
||||
f"{base_ref}...{head_ref}", "--", MIGRATIONS_DIR,
|
||||
])
|
||||
prefixes: set[int] = set()
|
||||
for line in out.splitlines():
|
||||
path = Path(line.strip())
|
||||
if not path.name:
|
||||
continue
|
||||
m = MIGRATION_FILE_RE.match(path.name)
|
||||
if not m:
|
||||
# Files like the workflow_checkpoints.up.sql with non-numeric
|
||||
# prefix are intentional — skip without complaint.
|
||||
continue
|
||||
prefixes.add(int(m.group(1)))
|
||||
return prefixes
|
||||
|
||||
|
||||
def migrations_on_ref(ref: str) -> set[int]:
|
||||
"""Return the set of numeric migration prefixes existing at the given git ref.
|
||||
|
||||
Walks the migrations dir at that ref via `git ls-tree`, not the working
|
||||
tree, so it works against any branch / SHA without checking it out.
|
||||
"""
|
||||
out = run([
|
||||
"git", "ls-tree", "-r", "--name-only", ref, "--", MIGRATIONS_DIR,
|
||||
])
|
||||
prefixes: set[int] = set()
|
||||
for line in out.splitlines():
|
||||
path = Path(line.strip())
|
||||
if not path.name:
|
||||
continue
|
||||
m = MIGRATION_FILE_RE.match(path.name)
|
||||
if not m:
|
||||
continue
|
||||
prefixes.add(int(m.group(1)))
|
||||
return prefixes
|
||||
|
||||
|
||||
def open_prs_with_migration_prefix(
|
||||
repo: str, prefix: int, exclude_pr: int
|
||||
) -> list[dict]:
|
||||
"""Return open PRs (other than `exclude_pr`) that add a migration with
|
||||
`prefix`. Uses `gh pr diff` per PR — we only need to walk PRs that are
|
||||
actually in flight, so the cost is bounded by open-PR count.
|
||||
"""
|
||||
out = run([
|
||||
"gh", "pr", "list", "--repo", repo, "--state", "open",
|
||||
"--json", "number,headRefName", "--limit", "100",
|
||||
])
|
||||
prs = json.loads(out)
|
||||
matches: list[dict] = []
|
||||
for pr in prs:
|
||||
num = pr["number"]
|
||||
if num == exclude_pr:
|
||||
continue
|
||||
try:
|
||||
files = run([
|
||||
"gh", "pr", "diff", str(num), "--repo", repo, "--name-only",
|
||||
], check=False)
|
||||
except Exception: # noqa: BLE001
|
||||
continue
|
||||
for raw in files.splitlines():
|
||||
path = Path(raw.strip())
|
||||
if not path.name:
|
||||
continue
|
||||
m = MIGRATION_FILE_RE.match(path.name)
|
||||
if m and int(m.group(1)) == prefix:
|
||||
matches.append(pr)
|
||||
break
|
||||
return matches
|
||||
|
||||
|
||||
def main() -> int:
|
||||
pr_number_env = os.environ.get("PR_NUMBER", "").strip()
|
||||
if not pr_number_env:
|
||||
sys.stderr.write(
|
||||
"PR_NUMBER not set — this script is intended to run from a PR "
|
||||
"context. Set PR_NUMBER (e.g. ${{ github.event.pull_request.number }}) "
|
||||
"and BASE_REF (target branch) and HEAD_REF (PR head SHA).\n"
|
||||
)
|
||||
return 1
|
||||
pr_number = int(pr_number_env)
|
||||
base_ref = os.environ.get("BASE_REF", "origin/staging")
|
||||
head_ref = os.environ.get("HEAD_REF", "HEAD")
|
||||
repo = os.environ.get("GITHUB_REPOSITORY", "Molecule-AI/molecule-core")
|
||||
|
||||
added = migrations_in_diff(base_ref, head_ref)
|
||||
if not added:
|
||||
print("no migrations added or modified by this PR — nothing to check")
|
||||
return 0
|
||||
|
||||
print(f"this PR adds/modifies migrations: {sorted(added)}")
|
||||
|
||||
# Collision check 1: base branch already has this prefix on a different
|
||||
# filename. This happens when the PR was branched off an old base and
|
||||
# didn't rebase — base advanced and another PR landed the same number.
|
||||
base_prefixes = migrations_on_ref(base_ref)
|
||||
base_collisions = added & base_prefixes
|
||||
# Filter to "different filename, same prefix" — same filename means the
|
||||
# PR is updating an existing migration in place, which is fine.
|
||||
real_base_collisions: set[int] = set()
|
||||
for prefix in base_collisions:
|
||||
# List filenames at base for this prefix
|
||||
out = run([
|
||||
"git", "ls-tree", "-r", "--name-only", base_ref, "--",
|
||||
MIGRATIONS_DIR,
|
||||
])
|
||||
base_names = {
|
||||
Path(line).name for line in out.splitlines()
|
||||
if (m := MIGRATION_FILE_RE.match(Path(line).name)) and int(m.group(1)) == prefix
|
||||
}
|
||||
# And in the PR
|
||||
diff_out = run([
|
||||
"git", "diff", "--name-only", "--diff-filter=AM",
|
||||
f"{base_ref}...{head_ref}", "--", MIGRATIONS_DIR,
|
||||
])
|
||||
pr_names = {
|
||||
Path(line).name for line in diff_out.splitlines()
|
||||
if (m := MIGRATION_FILE_RE.match(Path(line).name)) and int(m.group(1)) == prefix
|
||||
}
|
||||
if pr_names - base_names:
|
||||
real_base_collisions.add(prefix)
|
||||
|
||||
# Collision check 2: another open PR claims the same prefix.
|
||||
open_pr_collisions: dict[int, list[dict]] = {}
|
||||
for prefix in added:
|
||||
peers = open_prs_with_migration_prefix(repo, prefix, pr_number)
|
||||
if peers:
|
||||
open_pr_collisions[prefix] = peers
|
||||
|
||||
if not real_base_collisions and not open_pr_collisions:
|
||||
print("no migration version collisions detected")
|
||||
return 0
|
||||
|
||||
print()
|
||||
print("::error::migration version collision detected")
|
||||
if real_base_collisions:
|
||||
print(f"::error::these prefixes already exist on {base_ref} with different filenames: "
|
||||
f"{sorted(real_base_collisions)}")
|
||||
print("::error::rebase onto current base and renumber to the next available prefix")
|
||||
for prefix, peers in sorted(open_pr_collisions.items()):
|
||||
peer_str = ", ".join(f"#{p['number']} ({p['headRefName']})" for p in peers)
|
||||
print(f"::error::migration prefix {prefix:03d} also claimed by open PR(s): {peer_str}")
|
||||
print(f"::error::rebase coordination needed — only one PR can land a given prefix; "
|
||||
f"renumber yours or theirs")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Executable
+257
@@ -0,0 +1,257 @@
|
||||
#!/usr/bin/env bash
|
||||
# sweep-cf-tunnels.sh — safe, targeted sweep of Cloudflare Tunnels
|
||||
# whose corresponding tenant no longer exists.
|
||||
#
|
||||
# Why this exists: CP's tenant-delete cascade removes the DNS record
|
||||
# (caught by sweep-cf-orphans.sh as a backstop) but does NOT delete
|
||||
# the underlying Cloudflare Tunnel. Each E2E provision creates one
|
||||
# Tunnel named `tenant-<slug>`; without cleanup these accumulate
|
||||
# indefinitely on the account, consuming the account's tunnel quota
|
||||
# and cluttering the Cloudflare dashboard.
|
||||
#
|
||||
# Observed 2026-04-30: dozens of `tenant-e2e-canvas-*` tunnels in
|
||||
# Down state with zero replicas, weeks past their tenant's deletion.
|
||||
#
|
||||
# This script is a parallel-shape janitor to sweep-cf-orphans.sh:
|
||||
# 1. Query CP admin API to enumerate live org slugs (prod + staging)
|
||||
# 2. Enumerate Cloudflare Tunnels via the account-scoped API
|
||||
# 3. For each tunnel matching `tenant-<slug>`, check if <slug>
|
||||
# appears in the live set
|
||||
# 4. Skip tunnels with active connections (defense-in-depth — never
|
||||
# delete a healthy tunnel even if CP claims the org is gone)
|
||||
# 5. Only delete tunnels with NO live counterpart AND NO active
|
||||
# connections
|
||||
#
|
||||
# Dry-run by default; must pass --execute to actually delete.
|
||||
#
|
||||
# Env vars required:
|
||||
# CF_API_TOKEN — Cloudflare token with
|
||||
# account:cloudflare_tunnel:edit scope.
|
||||
# (Same secret as sweep-cf-orphans, but the
|
||||
# token must include the tunnel scope.)
|
||||
# CF_ACCOUNT_ID — the account that owns the tunnels (visible
|
||||
# in dash.cloudflare.com URL path)
|
||||
# CP_PROD_ADMIN_TOKEN — CP admin bearer for api.moleculesai.app
|
||||
# CP_STAGING_ADMIN_TOKEN — CP admin bearer for staging-api.moleculesai.app
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 — dry-run completed or sweep executed successfully
|
||||
# 1 — missing required env, API failure, or unexpected state
|
||||
# 2 — safety check failed (would delete >MAX_DELETE_PCT% of
|
||||
# tenant-shaped tunnels; refusing)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
DRY_RUN=1
|
||||
# Tenant tunnels are short-lived by design — most of them at any
|
||||
# given moment are orphans from finished E2E runs. The default is
|
||||
# tuned higher than sweep-cf-orphans (50%) to reflect that the
|
||||
# steady-state for tenant-* tunnels is mostly-orphan, not mostly-live.
|
||||
MAX_DELETE_PCT="${MAX_DELETE_PCT:-90}"
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--execute|--no-dry-run) DRY_RUN=0 ;;
|
||||
--help|-h)
|
||||
grep '^#' "$0" | head -45 | sed 's/^# \{0,1\}//'
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "unknown arg: $arg (use --help)" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
need() {
|
||||
local var="$1"
|
||||
if [ -z "${!var:-}" ]; then
|
||||
echo "ERROR: $var is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
need CF_API_TOKEN
|
||||
need CF_ACCOUNT_ID
|
||||
need CP_PROD_ADMIN_TOKEN
|
||||
need CP_STAGING_ADMIN_TOKEN
|
||||
|
||||
log() { echo "[$(date -u +%H:%M:%S)] $*"; }
|
||||
|
||||
# --- Gather live sets ------------------------------------------------------
|
||||
|
||||
log "Fetching CP prod org slugs..."
|
||||
PROD_SLUGS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_PROD_ADMIN_TOKEN" \
|
||||
"https://api.moleculesai.app/cp/admin/orgs?limit=500" \
|
||||
| python3 -c "import json,sys; print(' '.join(o['slug'] for o in json.load(sys.stdin).get('orgs',[])))")
|
||||
log " prod orgs: $(echo "$PROD_SLUGS" | wc -w | tr -d ' ')"
|
||||
|
||||
log "Fetching CP staging org slugs..."
|
||||
STAGING_SLUGS=$(curl -sS -m 15 -H "Authorization: Bearer $CP_STAGING_ADMIN_TOKEN" \
|
||||
"https://staging-api.moleculesai.app/cp/admin/orgs?limit=500" \
|
||||
| python3 -c "import json,sys; print(' '.join(o['slug'] for o in json.load(sys.stdin).get('orgs',[])))")
|
||||
log " staging orgs: $(echo "$STAGING_SLUGS" | wc -w | tr -d ' ')"
|
||||
|
||||
log "Fetching Cloudflare tunnels..."
|
||||
# The cfd_tunnel list endpoint is paginated; per_page max is 50.
|
||||
# Walk all pages so we don't silently miss orphans on busy accounts.
|
||||
PAGE=1
|
||||
TUNNEL_JSON='{"result":[]}'
|
||||
while :; do
|
||||
page_json=$(curl -sS -m 15 -H "Authorization: Bearer $CF_API_TOKEN" \
|
||||
"https://api.cloudflare.com/client/v4/accounts/$CF_ACCOUNT_ID/cfd_tunnel?per_page=50&page=$PAGE&is_deleted=false")
|
||||
page_count=$(echo "$page_json" | python3 -c "import json,sys; print(len(json.load(sys.stdin).get('result') or []))")
|
||||
if [ "$page_count" = "0" ]; then break; fi
|
||||
# Merge pages
|
||||
TUNNEL_JSON=$(python3 -c "
|
||||
import json, sys
|
||||
acc = json.loads(sys.argv[1])
|
||||
new = json.loads(sys.argv[2])
|
||||
acc['result'].extend(new.get('result') or [])
|
||||
print(json.dumps(acc))
|
||||
" "$TUNNEL_JSON" "$page_json")
|
||||
PAGE=$((PAGE + 1))
|
||||
if [ "$PAGE" -gt 20 ]; then
|
||||
log "::warning::stopping pagination at page 20 (1000 tunnels) — re-run if more"
|
||||
break
|
||||
fi
|
||||
done
|
||||
TOTAL_TUNNELS=$(echo "$TUNNEL_JSON" | python3 -c "import json,sys; print(len(json.load(sys.stdin)['result']))")
|
||||
log " total tunnels: $TOTAL_TUNNELS"
|
||||
|
||||
# --- Compute orphans -------------------------------------------------------
|
||||
#
|
||||
# Rules (in order):
|
||||
# 1. Name doesn't match `tenant-<slug>` → keep (unknown — never sweep
|
||||
# arbitrary tunnels that might belong to platform infra).
|
||||
# 2. Tunnel has active connections (status=healthy or non-empty
|
||||
# connections array) → keep (defense-in-depth: don't kill a live
|
||||
# tunnel even if CP forgot the org).
|
||||
# 3. Slug ∈ {prod_slugs ∪ staging_slugs} → keep (live tenant).
|
||||
# 4. Otherwise → delete (orphan).
|
||||
|
||||
export PROD_SLUGS STAGING_SLUGS
|
||||
DECISIONS=$(echo "$TUNNEL_JSON" | python3 -c '
|
||||
import json, os, re, sys
|
||||
|
||||
prod_slugs = set(os.environ["PROD_SLUGS"].split())
|
||||
staging_slugs = set(os.environ["STAGING_SLUGS"].split())
|
||||
all_slugs = prod_slugs | staging_slugs
|
||||
|
||||
_TENANT_RE = re.compile(r"^tenant-(.+)$")
|
||||
|
||||
def decide(t, all_slugs):
|
||||
name = t.get("name", "")
|
||||
tid = t.get("id", "")
|
||||
status = t.get("status", "")
|
||||
conns = t.get("connections") or []
|
||||
|
||||
m = _TENANT_RE.match(name)
|
||||
if not m:
|
||||
return ("keep", "not-a-tenant-tunnel", tid, name, status)
|
||||
|
||||
slug = m.group(1)
|
||||
|
||||
# Defense-in-depth: never delete a tunnel with live connectors.
|
||||
# The CF tunnel "status" field is one of inactive/degraded/healthy/down.
|
||||
# "down" with empty connections is the orphan state we sweep.
|
||||
if status == "healthy" or len(conns) > 0:
|
||||
return ("keep", "active-connections", tid, name, status)
|
||||
|
||||
if slug in all_slugs:
|
||||
return ("keep", "live-tenant", tid, name, status)
|
||||
|
||||
return ("delete", "orphan-tenant", tid, name, status)
|
||||
|
||||
d = json.loads(sys.stdin.read())
|
||||
for t in d.get("result", []):
|
||||
action, reason, tid, name, status = decide(t, all_slugs)
|
||||
print(json.dumps({"action": action, "reason": reason, "id": tid, "name": name, "status": status}))
|
||||
')
|
||||
|
||||
# --- Summarize + safety gate ----------------------------------------------
|
||||
|
||||
DELETE_COUNT=$(echo "$DECISIONS" | python3 -c "import json,sys; print(sum(1 for l in sys.stdin if json.loads(l)['action']=='delete'))")
|
||||
KEEP_COUNT=$((TOTAL_TUNNELS - DELETE_COUNT))
|
||||
TENANT_TUNNELS=$(echo "$DECISIONS" | python3 -c "
|
||||
import json, sys
|
||||
n = sum(1 for l in sys.stdin if json.loads(l)['reason'] != 'not-a-tenant-tunnel')
|
||||
print(n)
|
||||
")
|
||||
|
||||
log ""
|
||||
log "== Sweep plan =="
|
||||
log " total tunnels: $TOTAL_TUNNELS"
|
||||
log " tenant-shaped tunnels: $TENANT_TUNNELS"
|
||||
log " would delete: $DELETE_COUNT"
|
||||
log " would keep: $KEEP_COUNT"
|
||||
log ""
|
||||
|
||||
# Per-reason breakdown of deletes
|
||||
echo "$DECISIONS" | python3 -c "
|
||||
import json,sys,collections
|
||||
c = collections.Counter()
|
||||
for l in sys.stdin:
|
||||
d = json.loads(l)
|
||||
if d['action'] == 'delete':
|
||||
c[d['reason']] += 1
|
||||
for reason, n in c.most_common():
|
||||
print(f' delete/{reason}: {n}')
|
||||
"
|
||||
|
||||
# Safety gate operates against the tenant-shaped subset (the reasonable
|
||||
# "all of these could conceivably be ours" denominator), not the total.
|
||||
# A miscount of platform-infra tunnels shouldn't relax the gate.
|
||||
if [ "$TENANT_TUNNELS" -gt 0 ]; then
|
||||
PCT=$(( DELETE_COUNT * 100 / TENANT_TUNNELS ))
|
||||
if [ "$PCT" -gt "$MAX_DELETE_PCT" ]; then
|
||||
log ""
|
||||
log "SAFETY: would delete $PCT% of tenant-shaped tunnels (threshold $MAX_DELETE_PCT%) — refusing."
|
||||
log " If this is expected (e.g. major cleanup after incident), rerun with"
|
||||
log " MAX_DELETE_PCT=$((PCT+5)) $0 $*"
|
||||
exit 2
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$DRY_RUN" = "1" ]; then
|
||||
log ""
|
||||
log "Dry run complete. Pass --execute to actually delete $DELETE_COUNT tunnels."
|
||||
log ""
|
||||
log "First 20 tunnels that would be deleted:"
|
||||
echo "$DECISIONS" | python3 -c "
|
||||
import json, sys
|
||||
shown = 0
|
||||
for l in sys.stdin:
|
||||
d = json.loads(l)
|
||||
if d['action'] == 'delete':
|
||||
print(f\" {d['reason']:25s} {d['name']:40s} status={d['status']}\")
|
||||
shown += 1
|
||||
if shown >= 20: break
|
||||
"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- Execute deletes -------------------------------------------------------
|
||||
|
||||
log ""
|
||||
log "Executing $DELETE_COUNT deletions..."
|
||||
DELETED=0
|
||||
FAILED=0
|
||||
while IFS= read -r line; do
|
||||
action=$(echo "$line" | python3 -c "import json,sys; print(json.loads(sys.stdin.read())['action'])")
|
||||
[ "$action" = "delete" ] || continue
|
||||
tid=$(echo "$line" | python3 -c "import json,sys; print(json.loads(sys.stdin.read())['id'])")
|
||||
name=$(echo "$line" | python3 -c "import json,sys; print(json.loads(sys.stdin.read())['name'])")
|
||||
if curl -sS -m 10 -X DELETE \
|
||||
-H "Authorization: Bearer $CF_API_TOKEN" \
|
||||
"https://api.cloudflare.com/client/v4/accounts/$CF_ACCOUNT_ID/cfd_tunnel/$tid" \
|
||||
| grep -q '"success":true'; then
|
||||
DELETED=$((DELETED+1))
|
||||
else
|
||||
FAILED=$((FAILED+1))
|
||||
log " FAILED: $name ($tid)"
|
||||
fi
|
||||
done <<< "$DECISIONS"
|
||||
|
||||
log ""
|
||||
log "Done. deleted=$DELETED failed=$FAILED"
|
||||
[ "$FAILED" -eq 0 ]
|
||||
@@ -0,0 +1,65 @@
|
||||
"""Unit tests for check_migration_collisions.py — focuses on the regex
|
||||
classifier + the diff/base-set logic that runs without git.
|
||||
|
||||
The end-to-end git diff + gh pr list path is exercised manually (running
|
||||
the workflow against test PRs). These tests pin the pure-logic surface
|
||||
so a regression in migration-name parsing fails immediately at PR time.
|
||||
|
||||
Run locally: ``python3 -m unittest scripts/ops/test_check_migration_collisions.py -v``
|
||||
"""
|
||||
|
||||
import importlib.util
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
# Load the script as a module without invoking main(). We import the
|
||||
# regex + helpers directly so we can test them without setting up git.
|
||||
SCRIPT_PATH = Path(__file__).parent / "check_migration_collisions.py"
|
||||
spec = importlib.util.spec_from_file_location("ccm", SCRIPT_PATH)
|
||||
ccm = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(ccm)
|
||||
|
||||
|
||||
class TestMigrationFileRe(unittest.TestCase):
|
||||
"""The regex classifier — the load-bearing piece of the detector."""
|
||||
|
||||
def test_matches_standard_three_digit_prefix(self):
|
||||
m = ccm.MIGRATION_FILE_RE.match("044_platform_inbound_secret.up.sql")
|
||||
assert m is not None
|
||||
assert int(m.group(1)) == 44
|
||||
assert m.group(2) == "up"
|
||||
|
||||
def test_matches_down_migration(self):
|
||||
m = ccm.MIGRATION_FILE_RE.match("044_platform_inbound_secret.down.sql")
|
||||
assert m is not None
|
||||
assert int(m.group(1)) == 44
|
||||
assert m.group(2) == "down"
|
||||
|
||||
def test_matches_date_shaped_prefix(self):
|
||||
# Real example from the repo: 20260417000000_workflow_checkpoints
|
||||
m = ccm.MIGRATION_FILE_RE.match("20260417000000_workflow_checkpoints.up.sql")
|
||||
assert m is not None
|
||||
assert int(m.group(1)) == 20260417000000
|
||||
|
||||
def test_matches_long_compound_name(self):
|
||||
m = ccm.MIGRATION_FILE_RE.match("042_a2a_queue.up.sql")
|
||||
assert m is not None
|
||||
assert int(m.group(1)) == 42
|
||||
|
||||
def test_rejects_no_prefix(self):
|
||||
assert ccm.MIGRATION_FILE_RE.match("readme.md") is None
|
||||
|
||||
def test_rejects_alpha_prefix(self):
|
||||
assert ccm.MIGRATION_FILE_RE.match("abc_migration.up.sql") is None
|
||||
|
||||
def test_rejects_wrong_extension(self):
|
||||
assert ccm.MIGRATION_FILE_RE.match("044_test.sql") is None
|
||||
assert ccm.MIGRATION_FILE_RE.match("044_test.up.txt") is None
|
||||
|
||||
def test_rejects_path_separator(self):
|
||||
# Filename only — paths come pre-split via Path(line).name
|
||||
assert ccm.MIGRATION_FILE_RE.match("044/test.up.sql") is None
|
||||
|
||||
def test_rejects_no_underscore(self):
|
||||
# Naming convention requires <digits>_<name>
|
||||
assert ccm.MIGRATION_FILE_RE.match("044.up.sql") is None
|
||||
@@ -0,0 +1,201 @@
|
||||
"""Tests for scripts/build_runtime_package.py — the wheel-build import rewriter.
|
||||
|
||||
Run locally: ``python3 -m unittest scripts/test_build_runtime_package.py -v``
|
||||
|
||||
Why this exists: PR #2433 shipped ``import inbox as _inbox_module`` inside
|
||||
the workspace runtime, and the rewriter expanded it to
|
||||
``import molecule_runtime.inbox as inbox as _inbox_module`` — invalid
|
||||
Python. The wheel-smoke gate caught it post-merge but couldn't block
|
||||
the merge (not a required check yet — see PR #2439). PR #2436 added a
|
||||
build-time gate that raises ``ValueError`` on this pattern; this file
|
||||
locks the rewriter's documented contract under unit test so the gate
|
||||
itself can't silently regress.
|
||||
|
||||
Coverage:
|
||||
- ``import X`` → ``import molecule_runtime.X as X``
|
||||
- ``import X.sub`` → ``import molecule_runtime.X.sub``
|
||||
- ``import X`` + trailing comment is preserved
|
||||
- ``from X import Y`` → ``from molecule_runtime.X import Y``
|
||||
- ``from X.sub import Y`` → ``from molecule_runtime.X.sub import Y``
|
||||
- ``from X import Y, Z`` → ``from molecule_runtime.X import Y, Z``
|
||||
- ``import X as Y`` → raises ValueError (the rewriter would
|
||||
produce ``import molecule_runtime.X as X as Y``, syntax error)
|
||||
- non-allowlist module names → not rewritten (regex anchors on the closed set)
|
||||
- Indented imports (inside def/class) keep their indentation.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
# scripts/build_runtime_package.py lives at scripts/ — add scripts/ to sys.path
|
||||
# so the import works whether unittest is invoked from repo root or scripts/.
|
||||
HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
if HERE not in sys.path:
|
||||
sys.path.insert(0, HERE)
|
||||
|
||||
import build_runtime_package as M # noqa: E402
|
||||
|
||||
|
||||
def rewrite(text: str) -> str:
|
||||
"""Run the rewriter end-to-end so the test exercises the same path
|
||||
used by the wheel build (regex compile + substitution)."""
|
||||
regex = M.build_import_rewriter()
|
||||
return M.rewrite_imports(text, regex)
|
||||
|
||||
|
||||
class TestBareImportRewriting(unittest.TestCase):
|
||||
def test_plain_import_aliases_to_preserve_binding(self):
|
||||
self.assertEqual(
|
||||
rewrite("import inbox\n"),
|
||||
"import molecule_runtime.inbox as inbox\n",
|
||||
)
|
||||
|
||||
def test_plain_import_with_trailing_comment_is_preserved(self):
|
||||
# Real-world shape from a2a_mcp_server.py — the comment must
|
||||
# survive the rewrite without losing its leading-space buffer.
|
||||
self.assertEqual(
|
||||
rewrite("import inbox # noqa: E402\n"),
|
||||
"import molecule_runtime.inbox as inbox # noqa: E402\n",
|
||||
)
|
||||
|
||||
def test_import_dotted_keeps_dotted_form(self):
|
||||
# `import X.sub` is rare for our modules but the rewriter must
|
||||
# not double-alias — we want `import molecule_runtime.X.sub`,
|
||||
# not `import molecule_runtime.X.sub as X.sub` (invalid).
|
||||
self.assertEqual(
|
||||
rewrite("import platform_tools.registry\n"),
|
||||
"import molecule_runtime.platform_tools.registry\n",
|
||||
)
|
||||
|
||||
def test_indented_import_preserves_indentation(self):
|
||||
src = "def foo():\n import inbox\n return inbox.x\n"
|
||||
out = rewrite(src)
|
||||
self.assertIn(" import molecule_runtime.inbox as inbox\n", out)
|
||||
|
||||
|
||||
class TestFromImportRewriting(unittest.TestCase):
|
||||
def test_from_module_import_simple(self):
|
||||
self.assertEqual(
|
||||
rewrite("from inbox import InboxState\n"),
|
||||
"from molecule_runtime.inbox import InboxState\n",
|
||||
)
|
||||
|
||||
def test_from_dotted_import(self):
|
||||
self.assertEqual(
|
||||
rewrite("from platform_tools.registry import TOOLS\n"),
|
||||
"from molecule_runtime.platform_tools.registry import TOOLS\n",
|
||||
)
|
||||
|
||||
def test_from_import_multiple_symbols(self):
|
||||
# Multi-import statement — the rewriter only touches the module
|
||||
# prefix, not the names being imported.
|
||||
self.assertEqual(
|
||||
rewrite("from a2a_tools import (foo, bar, baz)\n"),
|
||||
"from molecule_runtime.a2a_tools import (foo, bar, baz)\n",
|
||||
)
|
||||
|
||||
def test_from_import_block_form(self):
|
||||
src = (
|
||||
"from a2a_tools import (\n"
|
||||
" tool_check_task_status,\n"
|
||||
" tool_commit_memory,\n"
|
||||
")\n"
|
||||
)
|
||||
out = rewrite(src)
|
||||
self.assertIn("from molecule_runtime.a2a_tools import (\n", out)
|
||||
# Trailing names + closer are unchanged.
|
||||
self.assertIn(" tool_check_task_status,\n", out)
|
||||
self.assertIn(")\n", out)
|
||||
|
||||
|
||||
class TestImportAsAliasRejection(unittest.TestCase):
|
||||
"""The key regression class — the failure mode that shipped in PR #2433."""
|
||||
|
||||
def test_import_as_alias_raises_value_error(self):
|
||||
with self.assertRaises(ValueError) as ctx:
|
||||
rewrite("import inbox as _inbox_module\n")
|
||||
msg = str(ctx.exception)
|
||||
# Error must name the offending module + suggest the fix.
|
||||
self.assertIn("inbox", msg)
|
||||
self.assertIn("as <alias>", msg)
|
||||
self.assertIn("from", msg) # suggests `from X import …`
|
||||
|
||||
def test_import_as_alias_indented_still_rejected(self):
|
||||
# Indented (inside def/class) — same hazard, same rejection.
|
||||
with self.assertRaises(ValueError):
|
||||
rewrite("def foo():\n import inbox as _x\n")
|
||||
|
||||
def test_import_as_alias_with_trailing_comment_still_rejected(self):
|
||||
with self.assertRaises(ValueError):
|
||||
rewrite("import inbox as _x # comment\n")
|
||||
|
||||
def test_plain_import_with_as_in_comment_does_not_trip(self):
|
||||
# The detection strips comments before pattern-matching, so a
|
||||
# comment containing "as foo" must NOT trigger the rejection.
|
||||
self.assertEqual(
|
||||
rewrite("import inbox # rewriter produces alias as inbox\n"),
|
||||
"import molecule_runtime.inbox as inbox # rewriter produces alias as inbox\n",
|
||||
)
|
||||
|
||||
def test_import_followed_by_comma_is_not_an_alias(self):
|
||||
# `import inbox, os` — comma is not `as`, must not be rejected.
|
||||
# Our regex captures `inbox` then `,` — only `inbox` gets prefixed.
|
||||
# `os` is not in TOP_LEVEL_MODULES so it's left alone.
|
||||
out = rewrite("import inbox, os\n")
|
||||
# The first module is rewritten; the second (non-allowlist) is not.
|
||||
self.assertIn("import molecule_runtime.inbox as inbox", out)
|
||||
|
||||
|
||||
class TestOutsideAllowlistModules(unittest.TestCase):
|
||||
def test_third_party_imports_unchanged(self):
|
||||
# `httpx`, `os`, `re` etc. are not in TOP_LEVEL_MODULES — the
|
||||
# regex must not match them. This is the closed-list invariant
|
||||
# that prevents accidental rewrites of stdlib / third-party.
|
||||
src = "import httpx\nimport os\nfrom re import match\n"
|
||||
self.assertEqual(rewrite(src), src)
|
||||
|
||||
def test_short_name_collision_avoided(self):
|
||||
# `from a2a.server.X import Y` must not match the bare `a2a`
|
||||
# prefix — `a2a` isn't in our allowlist (we allow `a2a_tools`,
|
||||
# `a2a_client`, etc., but not bare `a2a`). Belt-and-suspenders.
|
||||
src = "from a2a.server.routes import create_agent_card_routes\n"
|
||||
self.assertEqual(rewrite(src), src)
|
||||
|
||||
|
||||
class TestEndToEndShape(unittest.TestCase):
|
||||
"""Reproduces the PR #2433 → #2436 incident shape."""
|
||||
|
||||
def test_pr_2433_pattern_now_rejected(self):
|
||||
# The exact line PR #2433 added (inside main()), which produced
|
||||
# `import molecule_runtime.inbox as inbox as _inbox_module` —
|
||||
# invalid syntax in the published wheel.
|
||||
with self.assertRaises(ValueError) as ctx:
|
||||
rewrite(
|
||||
" import inbox as _inbox_module\n"
|
||||
" _inbox_module.set_notification_callback(_on_inbox_message)\n"
|
||||
)
|
||||
# Error message includes the offending line so the operator
|
||||
# knows exactly where to fix.
|
||||
self.assertIn("inbox", str(ctx.exception))
|
||||
|
||||
def test_pr_2436_fix_pattern_works(self):
|
||||
# The fix-forward shape (#2436): top-level `import inbox`,
|
||||
# bridge wired in main() via `inbox.set_notification_callback`.
|
||||
src = (
|
||||
"import inbox\n"
|
||||
"\n"
|
||||
"def main():\n"
|
||||
" inbox.set_notification_callback(cb)\n"
|
||||
)
|
||||
out = rewrite(src)
|
||||
self.assertIn("import molecule_runtime.inbox as inbox\n", out)
|
||||
# The callable reference inside main() is left alone — only
|
||||
# imports get rewritten, not arbitrary `inbox.foo` callsites
|
||||
# (those resolve via the module binding the rewrite preserves).
|
||||
self.assertIn(" inbox.set_notification_callback(cb)\n", out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,173 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Smoke-test an installed molecule-ai-workspace-runtime wheel.
|
||||
|
||||
Runs the same invariant assertions in two workflows:
|
||||
* publish-runtime.yml — after building dist/*.whl, before PyPI upload
|
||||
* runtime-prbuild-compat.yml — after building the PR's wheel, before merge
|
||||
|
||||
Splitting the smoke across two inline heredocs let PR-time and publish-time
|
||||
drift apart. After 2026-04 we kept hitting publish-time failures for
|
||||
regressions a PR-time check could have caught. One script, both gates.
|
||||
|
||||
Failure here intentionally exits non-zero so the workflow's `run:` step fails.
|
||||
Each block prints a single ✓ line on success so the GH summary log stays
|
||||
readable; assertion errors propagate with their own message.
|
||||
|
||||
Run directly: `python scripts/wheel_smoke.py` after `pip install <wheel>`.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def smoke_imports_and_invariants() -> None:
|
||||
"""Module imports + stable contract assertions.
|
||||
|
||||
Importing main_sync by name is the strongest pre-PyPI gate we have for
|
||||
import-rewrite mistakes (the 0.1.16 incident, where main.py loaded but
|
||||
main_sync was missing because the build script dropped a re-export).
|
||||
"""
|
||||
from molecule_runtime.main import main_sync # noqa: F401
|
||||
from molecule_runtime import a2a_client, a2a_tools # noqa: F401
|
||||
from molecule_runtime.builtin_tools import memory # noqa: F401
|
||||
from molecule_runtime.adapters import get_adapter, BaseAdapter, AdapterConfig
|
||||
|
||||
# cli_main + mcp_cli.main are the molecule-mcp console-script entry
|
||||
# points — the external-runtime universal MCP path. Same regression
|
||||
# class as the 0.1.16 main_sync incident: a silent rename or missed
|
||||
# rewrite here would break every external operator's MCP install on
|
||||
# the next wheel publish. Pin both names because pyproject points
|
||||
# at mcp_cli.main, which then imports a2a_mcp_server.cli_main.
|
||||
from molecule_runtime.a2a_mcp_server import cli_main # noqa: F401
|
||||
from molecule_runtime.mcp_cli import main as mcp_cli_main # noqa: F401
|
||||
assert callable(cli_main), "a2a_mcp_server.cli_main must be callable"
|
||||
assert callable(mcp_cli_main), "mcp_cli.main must be callable"
|
||||
|
||||
# inbox.activate / get_state / start_poller_thread form the inbound
|
||||
# delivery path for the standalone molecule-mcp wrapper. mcp_cli.main
|
||||
# imports + activates these at startup; if a wheel ships without
|
||||
# them, the standalone agent silently loses the wait_for_message /
|
||||
# inbox_peek / inbox_pop tools and reverts to outbound-only.
|
||||
from molecule_runtime.inbox import ( # noqa: F401
|
||||
InboxState,
|
||||
activate as inbox_activate,
|
||||
get_state as inbox_get_state,
|
||||
set_notification_callback as inbox_set_notification_callback,
|
||||
start_poller_thread as inbox_start_poller_thread,
|
||||
)
|
||||
assert callable(inbox_activate), "inbox.activate must be callable"
|
||||
assert callable(inbox_get_state), "inbox.get_state must be callable"
|
||||
assert callable(inbox_start_poller_thread), "inbox.start_poller_thread must be callable"
|
||||
assert callable(inbox_set_notification_callback), "inbox.set_notification_callback must be callable"
|
||||
|
||||
assert a2a_client._A2A_ERROR_PREFIX, "a2a_client missing error sentinel"
|
||||
assert callable(get_adapter), "adapters.get_adapter must be callable"
|
||||
assert hasattr(BaseAdapter, "name"), "BaseAdapter interface broken"
|
||||
assert hasattr(AdapterConfig, "__init__"), "AdapterConfig dataclass missing"
|
||||
print("✓ module imports + invariants OK")
|
||||
|
||||
|
||||
def smoke_agent_card_call_shape() -> None:
|
||||
"""Construct AgentCard with the EXACT kwargs main.py uses.
|
||||
|
||||
Pure imports don't catch field-shape regressions in upstream SDKs that
|
||||
only surface at construction time. Two bugs of this exact class shipped
|
||||
since the a2a-sdk 1.0 migration:
|
||||
- state_transition_history=True (#2179)
|
||||
- supported_protocols=[...] (the protobuf field is supported_interfaces;
|
||||
every workspace boot crashed with `ValueError: Protocol message
|
||||
AgentCard has no "supported_protocols" field`)
|
||||
|
||||
main.py and this block MUST stay in lockstep — adding a kwarg there
|
||||
without mirroring it here is the regression vector.
|
||||
"""
|
||||
from a2a.types import AgentCard, AgentCapabilities, AgentSkill, AgentInterface
|
||||
|
||||
AgentCard(
|
||||
name="smoke-agent",
|
||||
description="wheel-smoke: AgentCard call-shape",
|
||||
version="0.0.0-smoke",
|
||||
supported_interfaces=[
|
||||
AgentInterface(protocol_binding="https://a2a.g/v1", url="http://localhost:8080"),
|
||||
],
|
||||
capabilities=AgentCapabilities(
|
||||
streaming=True,
|
||||
push_notifications=False,
|
||||
),
|
||||
skills=[
|
||||
AgentSkill(
|
||||
id="smoke-skill",
|
||||
name="Smoke",
|
||||
description="no-op",
|
||||
tags=["smoke"],
|
||||
examples=["noop"],
|
||||
),
|
||||
],
|
||||
default_input_modes=["text/plain", "application/json"],
|
||||
default_output_modes=["text/plain", "application/json"],
|
||||
)
|
||||
print("✓ AgentCard call-shape smoke passed")
|
||||
|
||||
|
||||
def smoke_well_known_path_alignment() -> None:
|
||||
"""The SDK's published constant must match the path it actually mounts.
|
||||
|
||||
main.py polls AGENT_CARD_WELL_KNOWN_PATH to detect server readiness. If
|
||||
the constant and create_agent_card_routes() drift, every workspace's
|
||||
initial_prompt silently drops (probe 404s, falls through to "skipping").
|
||||
This was the #2193 incident class.
|
||||
"""
|
||||
from a2a.types import AgentCard
|
||||
from a2a.utils.constants import AGENT_CARD_WELL_KNOWN_PATH
|
||||
from a2a.server.routes import create_agent_card_routes
|
||||
|
||||
mounted_paths = [
|
||||
getattr(r, "path", None)
|
||||
for r in create_agent_card_routes(
|
||||
AgentCard(
|
||||
name="wk-smoke",
|
||||
description="well-known mount alignment",
|
||||
version="0.0.0-smoke",
|
||||
)
|
||||
)
|
||||
]
|
||||
assert AGENT_CARD_WELL_KNOWN_PATH in mounted_paths, (
|
||||
f"AGENT_CARD_WELL_KNOWN_PATH ({AGENT_CARD_WELL_KNOWN_PATH!r}) is NOT among "
|
||||
f"paths mounted by create_agent_card_routes ({mounted_paths!r}). The SDK "
|
||||
"constant and its own route factory have drifted — workspace probes will "
|
||||
"404 forever, silently dropping every workspace initial_prompt."
|
||||
)
|
||||
print(f"✓ well-known mount alignment OK ({AGENT_CARD_WELL_KNOWN_PATH})")
|
||||
|
||||
|
||||
def smoke_message_helper() -> None:
|
||||
"""new_text_message is the v1.x rename of new_agent_text_message.
|
||||
|
||||
main.py and a2a_executor.py call new_text_message in hot paths; if the
|
||||
import breaks, every reply errors with ImportError before the message
|
||||
even leaves the workspace. Importing here catches a future v2.x rename
|
||||
at publish time.
|
||||
"""
|
||||
from a2a.helpers import new_text_message
|
||||
|
||||
msg = new_text_message("smoke")
|
||||
assert msg is not None, "new_text_message returned None"
|
||||
print("✓ message helper import + call OK")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
# main.py validates WORKSPACE_ID at module-import time via platform_auth.
|
||||
# Set placeholders so the smoke doesn't trip on the env-var guard.
|
||||
os.environ.setdefault("WORKSPACE_ID", "00000000-0000-0000-0000-000000000000")
|
||||
os.environ.setdefault("PLATFORM_URL", "http://localhost:8080")
|
||||
|
||||
smoke_imports_and_invariants()
|
||||
smoke_agent_card_call_shape()
|
||||
smoke_well_known_path_alignment()
|
||||
smoke_message_helper()
|
||||
print("✓ wheel smoke passed")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
+275
@@ -0,0 +1,275 @@
|
||||
#!/usr/bin/env bash
|
||||
# Staging E2E for #2307 — create fresh tenant, test peer visibility, tear down.
|
||||
#
|
||||
# Mirrors tests/e2e/test_staging_full_saas.sh's pattern (org create via
|
||||
# /cp/admin/orgs, EXIT-trap teardown via DELETE /cp/admin/tenants/:slug
|
||||
# with required {"confirm":slug} body).
|
||||
#
|
||||
# Required: MOLECULE_ADMIN_TOKEN exported (CP admin bearer).
|
||||
# Optional:
|
||||
# MOLECULE_CP_URL default https://staging-api.moleculesai.app
|
||||
# PARENT_RUNTIME default claude-code
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
CP_URL="${MOLECULE_CP_URL:-https://staging-api.moleculesai.app}"
|
||||
ADMIN_TOKEN="${MOLECULE_ADMIN_TOKEN:?MOLECULE_ADMIN_TOKEN required}"
|
||||
PARENT_RUNTIME="${PARENT_RUNTIME:-claude-code}"
|
||||
|
||||
RUN_ID=$(date +%s | tail -c 8)
|
||||
SLUG="e2e-2307-$RUN_ID"
|
||||
ORG_ID=""
|
||||
TENANT_URL=""
|
||||
TENANT_TOKEN=""
|
||||
PARENT=""
|
||||
CHILD=""
|
||||
CTOK=""
|
||||
|
||||
admin_call() {
|
||||
local method="$1" path="$2"
|
||||
shift 2
|
||||
curl -sS -X "$method" "$CP_URL$path" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
tenant_call() {
|
||||
local method="$1" path="$2"
|
||||
shift 2
|
||||
curl -sS -X "$method" "$TENANT_URL$path" \
|
||||
-H "Authorization: Bearer $TENANT_TOKEN" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID" \
|
||||
-H "Content-Type: application/json" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
teardown() {
|
||||
local rc=$?
|
||||
set +e
|
||||
echo ""
|
||||
echo "[teardown] DELETE /cp/admin/tenants/$SLUG ..."
|
||||
admin_call DELETE "/cp/admin/tenants/$SLUG" \
|
||||
--max-time 120 \
|
||||
-d "{\"confirm\":\"$SLUG\"}" >/dev/null 2>&1
|
||||
# Poll up to 60s for purge
|
||||
for j in $(seq 1 12); do
|
||||
LIST=$(admin_call GET /cp/admin/orgs 2>/dev/null)
|
||||
LEAK=$(echo "$LIST" | python3 -c "
|
||||
import sys, json
|
||||
try:
|
||||
d = json.load(sys.stdin)
|
||||
except Exception:
|
||||
print(1); sys.exit(0)
|
||||
orgs = d if isinstance(d, list) else d.get('orgs', [])
|
||||
n = sum(1 for o in orgs if o.get('slug') == '$SLUG' and o.get('status') != 'purged')
|
||||
print(n)
|
||||
" 2>/dev/null || echo 1)
|
||||
if [ "$LEAK" = "0" ]; then
|
||||
echo " ✓ tenant purged (after ${j}x5s)"
|
||||
exit $rc
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
echo " ⚠ LEAK: $SLUG still in /cp/admin/orgs after 60s — manual cleanup needed"
|
||||
[ $rc -eq 0 ] && rc=4
|
||||
exit $rc
|
||||
}
|
||||
trap teardown EXIT INT TERM
|
||||
|
||||
# ─── 1. Create the org ────────────────────────────────────────────────
|
||||
echo "[1/8] POST /cp/admin/orgs — slug=$SLUG"
|
||||
CREATE=$(admin_call POST /cp/admin/orgs \
|
||||
-d "{\"slug\":\"$SLUG\",\"name\":\"E2E #2307 $SLUG\",\"owner_user_id\":\"e2e-runner:$SLUG\"}")
|
||||
echo " resp: $(echo "$CREATE" | head -c 300)"
|
||||
ORG_ID=$(echo "$CREATE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null)
|
||||
[ -n "$ORG_ID" ] || { echo " ✗ org creation failed"; exit 1; }
|
||||
echo " ✓ ORG_ID=$ORG_ID"
|
||||
|
||||
# ─── 2. Wait for tenant ready ─────────────────────────────────────────
|
||||
echo "[2/8] waiting for tenant to come up (cold-start ~5-10min)..."
|
||||
for i in $(seq 1 180); do
|
||||
STATUS=$(admin_call GET /cp/admin/orgs 2>/dev/null | python3 -c "
|
||||
import sys, json
|
||||
try: d = json.load(sys.stdin)
|
||||
except Exception: sys.exit(0)
|
||||
orgs = d if isinstance(d, list) else d.get('orgs', [])
|
||||
for o in orgs:
|
||||
if o.get('slug') == '$SLUG':
|
||||
print(o.get('instance_status') or o.get('status') or 'unknown')
|
||||
break
|
||||
" 2>/dev/null)
|
||||
[ $((i % 6)) -eq 1 ] && echo " attempt $i: status=$STATUS"
|
||||
case "$STATUS" in running|online|ready) break ;; esac
|
||||
sleep 5
|
||||
done
|
||||
case "$STATUS" in running|online|ready) ;;
|
||||
*) echo " ✗ tenant never came up (last=$STATUS)"; exit 2 ;; esac
|
||||
echo " ✓ tenant status=$STATUS"
|
||||
|
||||
# ─── 3. Per-tenant admin token ────────────────────────────────────────
|
||||
echo "[3/8] fetching per-tenant admin token..."
|
||||
TT_RESP=$(admin_call GET "/cp/admin/orgs/$SLUG/admin-token")
|
||||
TENANT_TOKEN=$(echo "$TT_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('admin_token',''))" 2>/dev/null)
|
||||
[ -n "$TENANT_TOKEN" ] || { echo " ✗ tenant token fetch failed: $TT_RESP"; exit 2; }
|
||||
echo " ✓ got tenant admin token (len ${#TENANT_TOKEN})"
|
||||
|
||||
CP_HOST=$(echo "$CP_URL" | sed -E 's#^https?://##; s#/.*$##')
|
||||
case "$CP_HOST" in
|
||||
api.*) DERIVED_DOMAIN="${CP_HOST#api.}" ;;
|
||||
staging-api.*) DERIVED_DOMAIN="staging.${CP_HOST#staging-api.}" ;;
|
||||
*) DERIVED_DOMAIN="$CP_HOST" ;;
|
||||
esac
|
||||
TENANT_URL="https://${SLUG}.${DERIVED_DOMAIN}"
|
||||
echo " tenant url: $TENANT_URL"
|
||||
|
||||
# ─── 4. Wait for tenant TLS/DNS readiness ─────────────────────────────
|
||||
echo "[4/8] waiting for tenant /health (TLS/DNS, up to 10min)..."
|
||||
for i in $(seq 1 120); do
|
||||
if curl -fsS "$TENANT_URL/health" -m 5 -k >/dev/null 2>&1; then
|
||||
echo " ✓ /health ok (attempt $i)"
|
||||
break
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
|
||||
# ─── 5. Provision parent CEO workspace ────────────────────────────────
|
||||
echo "[5/8] creating parent CEO ($PARENT_RUNTIME)..."
|
||||
P_RESP=$(tenant_call POST /workspaces \
|
||||
-d "{\"name\":\"e2e-CEO\",\"runtime\":\"$PARENT_RUNTIME\",\"tier\":3}")
|
||||
echo " parent resp: $(echo "$P_RESP" | head -c 300)"
|
||||
PARENT=$(echo "$P_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null)
|
||||
PTOK=$(echo "$P_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('auth_token',''))" 2>/dev/null)
|
||||
[ -n "$PARENT" ] || { echo " ✗ parent create failed"; exit 3; }
|
||||
echo " ✓ PARENT=$PARENT (parent_token_returned=$([ -n "$PTOK" ] && echo yes || echo no))"
|
||||
|
||||
# ─── 6. Wait for parent online ────────────────────────────────────────
|
||||
echo "[6/8] waiting for parent to come online (up to 12min)..."
|
||||
for i in $(seq 1 144); do
|
||||
WS_JSON=$(tenant_call GET "/workspaces/$PARENT" 2>/dev/null)
|
||||
S=$(echo "$WS_JSON" | python3 -c "
|
||||
import sys, json
|
||||
try: d = json.load(sys.stdin)
|
||||
except Exception: sys.exit(0)
|
||||
w = d.get('workspace') if isinstance(d.get('workspace'), dict) else d
|
||||
print(w.get('status') or '')
|
||||
" 2>/dev/null)
|
||||
[ $((i % 6)) -eq 1 ] && echo " attempt $i: parent status=$S"
|
||||
[ "$S" = "online" ] && break
|
||||
sleep 5
|
||||
done
|
||||
[ "$S" = "online" ] || { echo " ✗ parent never online (last=$S)"; exit 3; }
|
||||
echo " ✓ parent online"
|
||||
|
||||
# ─── 7. Create external child + register URL ──────────────────────────
|
||||
echo "[7/8] creating external child + registering..."
|
||||
C_RESP=$(tenant_call POST /workspaces \
|
||||
-d "{\"name\":\"e2e-Reno-Server\",\"runtime\":\"external\",\"external\":true,\"tier\":2,\"parent_id\":\"$PARENT\"}")
|
||||
echo " child resp: $(echo "$C_RESP" | head -c 400)"
|
||||
CHILD=$(echo "$C_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null)
|
||||
# External-runtime token is nested under `connection.auth_token` (verified
|
||||
# 2026-04-29 against staging response shape). Fall back to top-level for
|
||||
# parity with older clients.
|
||||
CTOK=$(echo "$C_RESP" | python3 -c "
|
||||
import sys, json
|
||||
d = json.load(sys.stdin)
|
||||
print(d.get('connection', {}).get('auth_token') or d.get('auth_token') or '')
|
||||
" 2>/dev/null)
|
||||
[ -n "$CHILD" ] || { echo " ✗ child create failed"; exit 3; }
|
||||
echo " ✓ CHILD=$CHILD (child_token_returned=$([ -n "$CTOK" ] && echo yes || echo no))"
|
||||
|
||||
# Try register with child's own token (bootstrap path); fall back to tenant_call
|
||||
if [ -n "$CTOK" ]; then
|
||||
REG_RESP=$(curl -sS -X POST "$TENANT_URL/registry/register" \
|
||||
-H "Authorization: Bearer $CTOK" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"id\":\"$CHILD\",\"url\":\"https://example.com/molecule-test\",\"agent_card\":{\"name\":\"Reno Server\",\"description\":\"Mock\",\"version\":\"0.1.0\"}}")
|
||||
else
|
||||
REG_RESP=$(tenant_call POST /registry/register \
|
||||
-d "{\"id\":\"$CHILD\",\"url\":\"https://example.com/molecule-test\",\"agent_card\":{\"name\":\"Reno Server\",\"description\":\"Mock\",\"version\":\"0.1.0\"}}")
|
||||
fi
|
||||
echo " register resp: $(echo "$REG_RESP" | head -c 300)"
|
||||
|
||||
# ─── 8. THE TEST — peer visibility ────────────────────────────────────
|
||||
echo ""
|
||||
echo "[8/8] === Verdict — does parent see external child? ==="
|
||||
echo ""
|
||||
echo "(a) DB shape via admin: GET /cp/admin/orgs/$SLUG (workspaces listing if exposed)"
|
||||
|
||||
# Check children listing — most direct DB-shape signal we can get from outside
|
||||
LIST=$(tenant_call GET "/workspaces?parent_id=$PARENT")
|
||||
echo " /workspaces?parent_id=$PARENT response: $(echo "$LIST" | head -c 500)"
|
||||
echo ""
|
||||
|
||||
CHILD_LISTED=$(echo "$LIST" | python3 -c "
|
||||
import sys, json
|
||||
try:
|
||||
d = json.load(sys.stdin)
|
||||
except Exception:
|
||||
print('parse_error'); sys.exit(0)
|
||||
ws = d if isinstance(d, list) else d.get('workspaces', d.get('items', []))
|
||||
print('yes' if any(w.get('id') == '$CHILD' for w in ws) else 'no')
|
||||
" 2>/dev/null)
|
||||
echo " child appears in parent's children listing: $CHILD_LISTED"
|
||||
|
||||
# (b) /peers from PARENT side using PTOK if provided
|
||||
if [ -n "$PTOK" ]; then
|
||||
PEERS=$(curl -sS "$TENANT_URL/registry/$PARENT/peers" \
|
||||
-H "Authorization: Bearer $PTOK" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID")
|
||||
echo ""
|
||||
echo "(b) GET /registry/$PARENT/peers (parent's bearer):"
|
||||
echo " $(echo "$PEERS" | head -c 600)"
|
||||
if echo "$PEERS" | grep -q "$CHILD"; then
|
||||
echo " ✓ child IS in parent's /peers"
|
||||
VERDICT_B=ok
|
||||
else
|
||||
echo " ✗ child is NOT in parent's /peers — bug REPRODUCES at API layer"
|
||||
VERDICT_B=fail
|
||||
fi
|
||||
else
|
||||
echo ""
|
||||
echo "(b) parent's auth_token not exposed by /workspaces create — skipping direct /peers check"
|
||||
VERDICT_B=skipped
|
||||
fi
|
||||
|
||||
# (c) /peers from CHILD side using CTOK
|
||||
if [ -n "$CTOK" ]; then
|
||||
PEERS_C=$(curl -sS "$TENANT_URL/registry/$CHILD/peers" \
|
||||
-H "Authorization: Bearer $CTOK" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID")
|
||||
echo ""
|
||||
echo "(c) GET /registry/$CHILD/peers (child's bearer):"
|
||||
echo " $(echo "$PEERS_C" | head -c 600)"
|
||||
if echo "$PEERS_C" | grep -q "$PARENT"; then
|
||||
echo " ✓ parent IS in child's /peers"
|
||||
VERDICT_C=ok
|
||||
else
|
||||
echo " ✗ parent is NOT in child's /peers"
|
||||
VERDICT_C=fail
|
||||
fi
|
||||
else
|
||||
VERDICT_C=skipped
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== SUMMARY for #2307 staging E2E ==="
|
||||
echo " child listed under parent: $CHILD_LISTED"
|
||||
echo " /peers parent→child: $VERDICT_B"
|
||||
echo " /peers child→parent: $VERDICT_C"
|
||||
|
||||
# Exit code: 0 if everything visible, 10 if bug reproduces, 11 if inconclusive
|
||||
if [ "$CHILD_LISTED" = "yes" ] && [ "$VERDICT_B" = "ok" ]; then
|
||||
echo ""
|
||||
echo "✓ STAGING: parent fully sees external child — bug is downstream (agent code, not platform API)"
|
||||
exit 0
|
||||
elif [ "$VERDICT_B" = "fail" ] || [ "$CHILD_LISTED" = "no" ]; then
|
||||
echo ""
|
||||
echo "✗ STAGING: bug REPRODUCES at platform-API layer"
|
||||
exit 10
|
||||
else
|
||||
echo ""
|
||||
echo "? STAGING: inconclusive (need parent token to call /peers definitively)"
|
||||
exit 11
|
||||
fi
|
||||
Executable
+135
@@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env bash
|
||||
# E2E for the v2 chat upload path (RFC #2312):
|
||||
#
|
||||
# POST /workspaces/:id/chat/uploads
|
||||
# └─▶ platform Go workspace-server (proxies)
|
||||
# └─▶ workspace's own /internal/chat/uploads/ingest
|
||||
# └─▶ writes to /workspace/.molecule/chat-uploads
|
||||
#
|
||||
# The same script runs against ANY environment because the architecture
|
||||
# is now uniform — local docker-compose, staging tenant, production
|
||||
# health-probe — all hit the same call site with the same expected
|
||||
# behavior. This is the design goal RFC #2312 set: "test local will
|
||||
# pretty much match production."
|
||||
#
|
||||
# Required env:
|
||||
# BASE default http://localhost:8080
|
||||
# override to https://<id>.<tenant>.staging...
|
||||
# WORKSPACE_RUNTIME default langgraph (any internal runtime)
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 upload + read-back round-trip succeeded
|
||||
# 1 setup failed (couldn't create workspace, never came online, etc.)
|
||||
# 2 upload returned non-2xx
|
||||
# 3 upload succeeded but the file isn't readable via download
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
BASE="${BASE:-http://localhost:8080}"
|
||||
RUNTIME="${WORKSPACE_RUNTIME:-langgraph}"
|
||||
|
||||
PARENT=""
|
||||
PARENT_TOK=""
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source "$(dirname "$0")/_lib.sh"
|
||||
|
||||
cleanup() {
|
||||
local rc=$?
|
||||
set +e
|
||||
if [ -n "$PARENT" ]; then
|
||||
curl -sS -X DELETE "$BASE/workspaces/$PARENT?confirm=true&purge=true" \
|
||||
${PARENT_TOK:+-H "Authorization: Bearer $PARENT_TOK"} >/dev/null 2>&1
|
||||
fi
|
||||
exit $rc
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
# ─── 1. Create workspace ───────────────────────────────────────────────
|
||||
echo "[1/5] POST /workspaces (runtime=$RUNTIME)..."
|
||||
P_RESP=$(curl -sS -X POST "$BASE/workspaces" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"e2e-chat-upload\",\"runtime\":\"$RUNTIME\",\"tier\":2}")
|
||||
PARENT=$(echo "$P_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null)
|
||||
[ -n "$PARENT" ] || { echo " ✗ workspace create failed: $P_RESP"; exit 1; }
|
||||
echo " ✓ workspace=$PARENT"
|
||||
|
||||
# ─── 2. Wait for online ────────────────────────────────────────────────
|
||||
echo "[2/5] waiting for workspace online (up to 5min)..."
|
||||
for i in $(seq 1 60); do
|
||||
S=$(curl -sS "$BASE/workspaces/$PARENT" 2>/dev/null \
|
||||
| python3 -c "import sys,json; d=json.load(sys.stdin); w=d.get('workspace') if isinstance(d.get('workspace'),dict) else d; print(w.get('status') or '')" 2>/dev/null)
|
||||
[ $((i % 6)) -eq 1 ] && echo " attempt $i: status=$S"
|
||||
[ "$S" = "online" ] && break
|
||||
sleep 5
|
||||
done
|
||||
[ "$S" = "online" ] || { echo " ✗ workspace never online (last=$S)"; exit 1; }
|
||||
echo " ✓ online"
|
||||
|
||||
# Mint a workspace bearer for the test (the auth needed to call
|
||||
# /workspaces/:id/chat/uploads, which is wsAuth-gated).
|
||||
PARENT_TOK=$(e2e_mint_test_token "$PARENT") || {
|
||||
echo " ✗ couldn't mint test token (MOLECULE_ENV=production?)"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# ─── 3. Upload a fixture ───────────────────────────────────────────────
|
||||
echo "[3/5] POST /workspaces/$PARENT/chat/uploads ..."
|
||||
FIXTURE=$(mktemp)
|
||||
echo "e2e fixture content $(date +%s)" > "$FIXTURE"
|
||||
EXPECTED=$(cat "$FIXTURE")
|
||||
|
||||
UPLOAD=$(curl -sS -X POST "$BASE/workspaces/$PARENT/chat/uploads" \
|
||||
-H "Authorization: Bearer $PARENT_TOK" \
|
||||
-F "files=@$FIXTURE;filename=greeting.txt;type=text/plain" \
|
||||
-w "\nHTTP_CODE=%{http_code}\n")
|
||||
CODE=$(echo "$UPLOAD" | grep -oE 'HTTP_CODE=[0-9]+' | cut -d= -f2)
|
||||
BODY=$(echo "$UPLOAD" | sed '/^HTTP_CODE=/,$d')
|
||||
echo " status=$CODE"
|
||||
echo " body=$(echo "$BODY" | head -c 300)"
|
||||
|
||||
if [ "$CODE" != "200" ]; then
|
||||
echo " ✗ upload returned $CODE"
|
||||
rm -f "$FIXTURE"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
URI=$(echo "$BODY" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['files'][0]['uri'])" 2>/dev/null)
|
||||
NAME=$(echo "$BODY" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['files'][0]['name'])" 2>/dev/null)
|
||||
SIZE=$(echo "$BODY" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['files'][0]['size'])" 2>/dev/null)
|
||||
[ -n "$URI" ] || { echo " ✗ no URI in response"; rm -f "$FIXTURE"; exit 2; }
|
||||
[ "$NAME" = "greeting.txt" ] || { echo " ✗ name mismatch: $NAME"; rm -f "$FIXTURE"; exit 2; }
|
||||
[ "$SIZE" = "$(wc -c <"$FIXTURE" | tr -d ' ')" ] || { echo " ✗ size mismatch: $SIZE"; rm -f "$FIXTURE"; exit 2; }
|
||||
echo " ✓ uri=$URI"
|
||||
echo " ✓ name=$NAME size=$SIZE"
|
||||
|
||||
# Extract the absolute path inside the workspace (strip workspace: scheme).
|
||||
PATH_IN_WS="${URI#workspace:}"
|
||||
|
||||
# ─── 4. Read it back via /chat/download ────────────────────────────────
|
||||
echo "[4/5] GET /workspaces/$PARENT/chat/download?path=$PATH_IN_WS"
|
||||
DOWNLOADED=$(curl -sS "$BASE/workspaces/$PARENT/chat/download?path=$PATH_IN_WS" \
|
||||
-H "Authorization: Bearer $PARENT_TOK")
|
||||
if [ "$DOWNLOADED" != "$EXPECTED" ]; then
|
||||
echo " ✗ content mismatch"
|
||||
echo " expected: $EXPECTED"
|
||||
echo " got: $DOWNLOADED"
|
||||
rm -f "$FIXTURE"
|
||||
exit 3
|
||||
fi
|
||||
echo " ✓ round-trip content matches"
|
||||
|
||||
# ─── 5. Auth: bare upload without bearer is rejected ───────────────────
|
||||
echo "[5/5] POST without bearer must be 401..."
|
||||
NA_CODE=$(curl -sS -o /dev/null -w "%{http_code}" -X POST "$BASE/workspaces/$PARENT/chat/uploads" \
|
||||
-F "files=@$FIXTURE")
|
||||
if [ "$NA_CODE" != "401" ]; then
|
||||
echo " ✗ expected 401 without bearer, got $NA_CODE"
|
||||
rm -f "$FIXTURE"
|
||||
exit 2
|
||||
fi
|
||||
echo " ✓ 401 without bearer"
|
||||
|
||||
rm -f "$FIXTURE"
|
||||
echo ""
|
||||
echo "✓ chat upload v2 (RFC #2312) end-to-end passed against $BASE"
|
||||
Executable
+308
@@ -0,0 +1,308 @@
|
||||
#!/usr/bin/env bash
|
||||
# E2E for delivery_mode=poll + since_id cursor (#2339).
|
||||
#
|
||||
# Round-trip: register a workspace as poll-mode (no URL) → POST A2A to it →
|
||||
# verify the proxy short-circuits to {status:"queued"} → verify the message
|
||||
# appears in /activity → verify the since_id cursor returns ONLY new events
|
||||
# in ASC order → verify a stale cursor returns 410.
|
||||
#
|
||||
# Requires: platform running on localhost:8080 with migrations applied.
|
||||
# bash workspace-server/scripts/dev-start.sh
|
||||
# bash workspace-server/scripts/run-migrations.sh
|
||||
#
|
||||
# Idempotent: each run uses fresh per-script workspace ids so reruns don't
|
||||
# collide. Does NOT call e2e_cleanup_all_workspaces — see
|
||||
# `feedback_never_run_cluster_cleanup_tests_on_live_platform.md`.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
source "$(dirname "$0")/_lib.sh"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
TIMEOUT="${A2A_TIMEOUT:-30}"
|
||||
|
||||
# Per-run unique ids — workspaces.id is a UUID column, so we generate
|
||||
# real v4 UUIDs. A "ws-<tag>" string fails the pq UUID cast and surfaces
|
||||
# as opaque "registration failed" (caught against this very test in CI
|
||||
# before merge — the failure mode that motivates the helper).
|
||||
gen_uuid() {
|
||||
if command -v uuidgen >/dev/null 2>&1; then
|
||||
uuidgen | tr '[:upper:]' '[:lower:]'
|
||||
else
|
||||
python3 -c 'import uuid; print(uuid.uuid4())'
|
||||
fi
|
||||
}
|
||||
POLL_WS_ID="$(gen_uuid)"
|
||||
CALLER_WS_ID="$(gen_uuid)"
|
||||
# Phase 2 uses a separate UUID for its invalid-mode probe so a rerun
|
||||
# can't poison POLL_WS_ID's row with a bad upsert (the 400 path doesn't
|
||||
# touch DB, but defense in depth).
|
||||
INVALID_PROBE_ID="$(gen_uuid)"
|
||||
|
||||
cleanup() {
|
||||
local rc=$?
|
||||
# Best-effort delete; non-fatal if the row was never created.
|
||||
curl -s -X DELETE "$BASE/workspaces/$POLL_WS_ID" >/dev/null || true
|
||||
curl -s -X DELETE "$BASE/workspaces/$CALLER_WS_ID" >/dev/null || true
|
||||
exit $rc
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
check() {
|
||||
local desc="$1"
|
||||
local expected="$2"
|
||||
local actual="$3"
|
||||
if echo "$actual" | grep -qF -- "$expected"; then
|
||||
echo "PASS: $desc"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "FAIL: $desc"
|
||||
echo " expected to contain: $expected"
|
||||
echo " got: $(echo "$actual" | head -10)"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
check_eq() {
|
||||
local desc="$1"
|
||||
local expected="$2"
|
||||
local actual="$3"
|
||||
if [ "$actual" = "$expected" ]; then
|
||||
echo "PASS: $desc"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "FAIL: $desc"
|
||||
echo " expected: $expected"
|
||||
echo " got: $actual"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
check_not_contains() {
|
||||
local desc="$1"
|
||||
local unexpected="$2"
|
||||
local actual="$3"
|
||||
if echo "$actual" | grep -qF -- "$unexpected"; then
|
||||
echo "FAIL: $desc"
|
||||
echo " should NOT contain: $unexpected"
|
||||
FAIL=$((FAIL + 1))
|
||||
else
|
||||
echo "PASS: $desc"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "=== Poll-Mode + since_id Cursor E2E (#2339) ==="
|
||||
echo " base: $BASE"
|
||||
echo " poll workspace: $POLL_WS_ID"
|
||||
echo " caller workspace: $CALLER_WS_ID"
|
||||
echo ""
|
||||
|
||||
# ---------- Phase 1: register as poll-mode ----------
|
||||
echo "--- Phase 1: Register poll-mode workspace (no URL) ---"
|
||||
|
||||
# A poll-mode workspace registers WITHOUT a URL — that's the contract from
|
||||
# PR 1 (#2348). The agent_card is required; everything else is optional.
|
||||
REG_RESP=$(curl -s -X POST "$BASE/registry/register" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"id\": \"$POLL_WS_ID\",
|
||||
\"delivery_mode\": \"poll\",
|
||||
\"agent_card\": {\"name\": \"poll-mode-test\"}
|
||||
}")
|
||||
|
||||
check "register accepts poll mode without URL" '"status":"registered"' "$REG_RESP"
|
||||
check "register response echoes delivery_mode=poll" '"delivery_mode":"poll"' "$REG_RESP"
|
||||
|
||||
# Capture the auth token for subsequent /activity reads (Phase 30.1).
|
||||
POLL_TOKEN=$(echo "$REG_RESP" | e2e_extract_token || true)
|
||||
if [ -z "$POLL_TOKEN" ]; then
|
||||
echo "WARN: no auth_token in register response — token-required reads will fail"
|
||||
fi
|
||||
|
||||
# ---------- Phase 2: invalid mode rejected ----------
|
||||
echo ""
|
||||
echo "--- Phase 2: Invalid delivery_mode rejected ---"
|
||||
|
||||
INVALID_RESP=$(curl -s -w '\n%{http_code}' -X POST "$BASE/registry/register" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"id\": \"$INVALID_PROBE_ID\",
|
||||
\"delivery_mode\": \"webhook\",
|
||||
\"agent_card\": {\"name\": \"bad\"}
|
||||
}")
|
||||
INVALID_CODE=$(printf '%s' "$INVALID_RESP" | tail -n1)
|
||||
INVALID_BODY=$(printf '%s' "$INVALID_RESP" | sed '$d')
|
||||
|
||||
check_eq "register rejects unknown delivery_mode (HTTP 400)" "400" "$INVALID_CODE"
|
||||
check "error mentions delivery_mode" "delivery_mode" "$INVALID_BODY"
|
||||
|
||||
# ---------- Phase 3: A2A short-circuits to {status:"queued"} ----------
|
||||
echo ""
|
||||
echo "--- Phase 3: A2A to poll-mode workspace short-circuits ---"
|
||||
|
||||
A2A_RESP=$(curl -s --max-time "$TIMEOUT" -X POST "$BASE/workspaces/$POLL_WS_ID/a2a" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "msg-1",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"message": {
|
||||
"role": "user",
|
||||
"parts": [{"type": "text", "text": "hello-from-e2e-1"}]
|
||||
}
|
||||
}
|
||||
}')
|
||||
|
||||
check "poll-mode A2A returns queued status" '"status":"queued"' "$A2A_RESP"
|
||||
check "queued response echoes delivery_mode=poll" '"delivery_mode":"poll"' "$A2A_RESP"
|
||||
check "queued response echoes the JSON-RPC method" '"method":"message/send"' "$A2A_RESP"
|
||||
|
||||
# ---------- Phase 4: queued message appears in /activity ----------
|
||||
echo ""
|
||||
echo "--- Phase 4: Queued message visible via /activity ---"
|
||||
|
||||
# The activity_logs INSERT runs in a goroutine — give it a moment.
|
||||
sleep 1
|
||||
|
||||
# Use bearer token if we got one; some platforms require it on /activity.
|
||||
ACTIVITY_AUTH=()
|
||||
[ -n "${POLL_TOKEN:-}" ] && ACTIVITY_AUTH=(-H "Authorization: Bearer $POLL_TOKEN")
|
||||
|
||||
ACT_RESP=$(curl -s --max-time "$TIMEOUT" "${ACTIVITY_AUTH[@]}" \
|
||||
"$BASE/workspaces/$POLL_WS_ID/activity?type=a2a_receive&limit=10")
|
||||
|
||||
check "activity feed has the queued message text" "hello-from-e2e-1" "$ACT_RESP"
|
||||
check "activity_type is a2a_receive" '"activity_type":"a2a_receive"' "$ACT_RESP"
|
||||
check "method preserved on the activity row" '"method":"message/send"' "$ACT_RESP"
|
||||
|
||||
# Pull the most-recent activity_id for use as a cursor.
|
||||
FIRST_ACTIVITY_ID=$(echo "$ACT_RESP" | python3 -c "
|
||||
import json, sys
|
||||
rows = json.load(sys.stdin)
|
||||
if not rows:
|
||||
print('')
|
||||
else:
|
||||
# Default ordering is DESC (newest-first) when no since_id is set.
|
||||
print(rows[0]['id'])
|
||||
")
|
||||
|
||||
if [ -z "$FIRST_ACTIVITY_ID" ]; then
|
||||
echo "FAIL: could not extract activity_id from /activity response"
|
||||
FAIL=$((FAIL + 1))
|
||||
exit 1
|
||||
fi
|
||||
echo " cursor candidate: $FIRST_ACTIVITY_ID"
|
||||
|
||||
# ---------- Phase 5: since_id returns only events strictly after ----------
|
||||
echo ""
|
||||
echo "--- Phase 5: since_id cursor returns ASC, strictly-after ---"
|
||||
|
||||
# Send a SECOND A2A message; it must appear in the cursor-filtered feed,
|
||||
# the FIRST message must NOT (cursor is strictly-after).
|
||||
A2A_RESP2=$(curl -s --max-time "$TIMEOUT" -X POST "$BASE/workspaces/$POLL_WS_ID/a2a" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "msg-2",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"message": {
|
||||
"role": "user",
|
||||
"parts": [{"type": "text", "text": "hello-from-e2e-2"}]
|
||||
}
|
||||
}
|
||||
}')
|
||||
check "second A2A also queues" '"status":"queued"' "$A2A_RESP2"
|
||||
|
||||
sleep 1
|
||||
|
||||
CURSOR_RESP=$(curl -s --max-time "$TIMEOUT" "${ACTIVITY_AUTH[@]}" \
|
||||
"$BASE/workspaces/$POLL_WS_ID/activity?type=a2a_receive&since_id=$FIRST_ACTIVITY_ID&limit=10")
|
||||
|
||||
check "since_id feed includes the new message" "hello-from-e2e-2" "$CURSOR_RESP"
|
||||
check_not_contains "since_id feed excludes the cursor row itself" "hello-from-e2e-1" "$CURSOR_RESP"
|
||||
|
||||
# Verify ASC ordering: in a fresh cursor window with two new events the
|
||||
# array's first element must be the OLDER one (the test only sends one
|
||||
# event after the cursor, so this case is trivially "exactly one row";
|
||||
# the next sub-phase strengthens this with a second event).
|
||||
A2A_RESP3=$(curl -s --max-time "$TIMEOUT" -X POST "$BASE/workspaces/$POLL_WS_ID/a2a" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "msg-3",
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"message": {
|
||||
"role": "user",
|
||||
"parts": [{"type": "text", "text": "hello-from-e2e-3"}]
|
||||
}
|
||||
}
|
||||
}')
|
||||
check "third A2A queues" '"status":"queued"' "$A2A_RESP3"
|
||||
|
||||
sleep 1
|
||||
|
||||
ASC_RESP=$(curl -s --max-time "$TIMEOUT" "${ACTIVITY_AUTH[@]}" \
|
||||
"$BASE/workspaces/$POLL_WS_ID/activity?type=a2a_receive&since_id=$FIRST_ACTIVITY_ID&limit=10")
|
||||
|
||||
# rows[0] should be msg-2 (older), rows[-1] should be msg-3 (newer) — that's
|
||||
# ASC. If the server still defaulted to DESC, rows[0] would be msg-3.
|
||||
ASC_FIRST=$(echo "$ASC_RESP" | python3 -c "
|
||||
import json, sys
|
||||
rows = json.load(sys.stdin)
|
||||
def text_of(r):
|
||||
body = r.get('request_body') or {}
|
||||
parts = (body.get('params') or {}).get('message', {}).get('parts') or []
|
||||
return ''.join(p.get('text','') for p in parts if p.get('type')=='text')
|
||||
if len(rows) < 2:
|
||||
print('NEED2_GOT_'+str(len(rows)))
|
||||
else:
|
||||
print(text_of(rows[0]) + '|' + text_of(rows[-1]))
|
||||
")
|
||||
check_eq "since_id feed orders ASC (oldest-new first, newest-new last)" \
|
||||
"hello-from-e2e-2|hello-from-e2e-3" "$ASC_FIRST"
|
||||
|
||||
# ---------- Phase 6: stale cursor returns 410 ----------
|
||||
echo ""
|
||||
echo "--- Phase 6: Stale / unknown cursor returns 410 ---"
|
||||
|
||||
GONE_RESP=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" "${ACTIVITY_AUTH[@]}" \
|
||||
"$BASE/workspaces/$POLL_WS_ID/activity?since_id=00000000-0000-0000-0000-000000000000")
|
||||
GONE_CODE=$(printf '%s' "$GONE_RESP" | tail -n1)
|
||||
GONE_BODY=$(printf '%s' "$GONE_RESP" | sed '$d')
|
||||
|
||||
check_eq "unknown since_id returns HTTP 410 Gone" "410" "$GONE_CODE"
|
||||
check "410 body explains how to recover" "since_id" "$GONE_BODY"
|
||||
|
||||
# ---------- Phase 7: cross-workspace cursor isolation ----------
|
||||
echo ""
|
||||
echo "--- Phase 7: Cross-workspace cursor isolation ---"
|
||||
|
||||
# Register a SECOND poll-mode workspace and try to read its activity
|
||||
# feed using a cursor from the FIRST workspace. Must 410 — the cursor
|
||||
# is workspace-scoped to prevent UUID-guessing peeks.
|
||||
REG2=$(curl -s -X POST "$BASE/registry/register" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"id\": \"$CALLER_WS_ID\",
|
||||
\"delivery_mode\": \"poll\",
|
||||
\"agent_card\": {\"name\": \"poll-cross-test\"}
|
||||
}")
|
||||
check "second poll-mode workspace registers" '"status":"registered"' "$REG2"
|
||||
CALLER_TOKEN=$(echo "$REG2" | e2e_extract_token || true)
|
||||
CROSS_AUTH=()
|
||||
[ -n "${CALLER_TOKEN:-}" ] && CROSS_AUTH=(-H "Authorization: Bearer $CALLER_TOKEN")
|
||||
|
||||
CROSS_RESP=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" "${CROSS_AUTH[@]}" \
|
||||
"$BASE/workspaces/$CALLER_WS_ID/activity?since_id=$FIRST_ACTIVITY_ID")
|
||||
CROSS_CODE=$(printf '%s' "$CROSS_RESP" | tail -n1)
|
||||
check_eq "cross-workspace cursor blocked with 410 (no info leak)" "410" "$CROSS_CODE"
|
||||
|
||||
# ---------- Results ----------
|
||||
echo ""
|
||||
echo "=== Results: $PASS passed, $FAIL failed ==="
|
||||
[ "$FAIL" -eq 0 ]
|
||||
Executable
+348
@@ -0,0 +1,348 @@
|
||||
#!/bin/bash
|
||||
# test_staging_external_runtime.sh — E2E regression for the
|
||||
# external-runtime workspace lifecycle on a real staging tenant.
|
||||
#
|
||||
# Why this test exists: the four/five sites that write 'awaiting_agent'
|
||||
# / 'hibernating' to workspaces.status had been silently failing in
|
||||
# production for five days (see migration 046) before a static drift
|
||||
# gate caught the enum gap. Unit tests passed because sqlmock matched
|
||||
# the SQL by regex but didn't enforce the live enum constraint, and
|
||||
# every existing E2E exercised hermes (not external) so the silent
|
||||
# failures never surfaced. This test pins the four awaiting_agent
|
||||
# transitions in real Postgres on a real staging tenant.
|
||||
#
|
||||
# Verification path:
|
||||
# 1. Provision a fresh tenant (test_staging_full_saas.sh harness shape).
|
||||
# 2. Create an external-runtime workspace with NO URL → assert
|
||||
# response status == 'awaiting_agent' AND GET on the workspace
|
||||
# returns the same. (Pre-fix the row stuck on 'provisioning'
|
||||
# because the UPDATE in workspace.go:333 silently failed.)
|
||||
# 3. Register a fake URL via /registry/register → assert transition
|
||||
# to 'online'. (Pre-fix this branch worked because it writes
|
||||
# 'online' which IS in the enum.)
|
||||
# 4. Stop heartbeating; wait past REMOTE_LIVENESS_STALE_AFTER (90s
|
||||
# default) + a sweep interval → assert transition back to
|
||||
# 'awaiting_agent'. (Pre-fix the sweep UPDATE failed silently and
|
||||
# the workspace stuck on 'online' indefinitely.)
|
||||
#
|
||||
# Hibernation is intentionally NOT covered here — it has its own timing
|
||||
# model (idle threshold) and warrants a separate harness.
|
||||
#
|
||||
# Required env (mirrors test_staging_full_saas.sh):
|
||||
# MOLECULE_CP_URL default: https://staging-api.moleculesai.app
|
||||
# MOLECULE_ADMIN_TOKEN CP admin bearer (Railway CP_ADMIN_API_TOKEN)
|
||||
#
|
||||
# Optional env:
|
||||
# E2E_PROVISION_TIMEOUT_SECS default 900 (15 min cold EC2 budget)
|
||||
# E2E_KEEP_ORG 1 → skip teardown (debugging only)
|
||||
# E2E_RUN_ID Slug suffix; CI: ${GITHUB_RUN_ID}
|
||||
# E2E_STALE_WAIT_SECS default 180 (90s window + 90s buffer)
|
||||
# E2E_INTENTIONAL_FAILURE 1 → break a step on purpose to verify
|
||||
# the EXIT trap still tears down (mirrors
|
||||
# the full-saas harness's safety net).
|
||||
#
|
||||
# Exit codes: 0 happy, 1 generic, 2 missing env, 3 provision timeout,
|
||||
# 4 teardown leak.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
CP_URL="${MOLECULE_CP_URL:-https://staging-api.moleculesai.app}"
|
||||
ADMIN_TOKEN="${MOLECULE_ADMIN_TOKEN:?MOLECULE_ADMIN_TOKEN required — Railway staging CP_ADMIN_API_TOKEN}"
|
||||
PROVISION_TIMEOUT_SECS="${E2E_PROVISION_TIMEOUT_SECS:-900}"
|
||||
RUN_ID_SUFFIX="${E2E_RUN_ID:-$(date +%H%M%S)-$$}"
|
||||
STALE_WAIT_SECS="${E2E_STALE_WAIT_SECS:-180}"
|
||||
|
||||
SLUG="e2e-ext-$(date +%Y%m%d)-${RUN_ID_SUFFIX}"
|
||||
SLUG=$(echo "$SLUG" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-' | head -c 32)
|
||||
|
||||
log() { echo "[$(date +%H:%M:%S)] $*"; }
|
||||
fail() { echo "[$(date +%H:%M:%S)] ❌ $*" >&2; exit 1; }
|
||||
ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; }
|
||||
|
||||
CURL_COMMON=(-sS --fail-with-body --max-time 30)
|
||||
|
||||
# ─── cleanup trap (mirrors full-saas) ────────────────────────────────────
|
||||
CLEANUP_DONE=0
|
||||
cleanup_org() {
|
||||
local entry_rc=$?
|
||||
if [ "$CLEANUP_DONE" = "1" ]; then return 0; fi
|
||||
CLEANUP_DONE=1
|
||||
|
||||
if [ "${E2E_KEEP_ORG:-0}" = "1" ]; then
|
||||
log "E2E_KEEP_ORG=1 → leaving $SLUG behind for inspection"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log "Cleanup: deleting tenant $SLUG..."
|
||||
curl "${CURL_COMMON[@]}" --max-time 120 -X DELETE "$CP_URL/cp/admin/tenants/$SLUG" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$SLUG\"}" >/dev/null 2>&1 \
|
||||
&& ok "Teardown request accepted" \
|
||||
|| log "Teardown returned non-2xx (may already be gone)"
|
||||
|
||||
local leak_count=1 elapsed=0
|
||||
while [ "$elapsed" -lt 60 ]; do
|
||||
leak_count=$(curl "${CURL_COMMON[@]}" "$CP_URL/cp/admin/orgs" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
|
||||
| python3 -c "import json,sys; d=json.load(sys.stdin); print(sum(1 for o in d.get('orgs', []) if o.get('slug')=='$SLUG' and o.get('status') != 'purged'))" \
|
||||
2>/dev/null || echo 1)
|
||||
[ "$leak_count" = "0" ] && break
|
||||
sleep 5
|
||||
elapsed=$((elapsed + 5))
|
||||
done
|
||||
|
||||
if [ "$leak_count" != "0" ]; then
|
||||
echo "⚠️ LEAK: org $SLUG still present post-teardown (count=$leak_count)" >&2
|
||||
exit 4
|
||||
fi
|
||||
ok "Teardown clean — no orphan resources for $SLUG (${elapsed}s)"
|
||||
|
||||
case "$entry_rc" in
|
||||
0|1|2|3|4) ;;
|
||||
*) exit 1 ;;
|
||||
esac
|
||||
}
|
||||
trap cleanup_org EXIT INT TERM
|
||||
|
||||
# ─── 0. Preflight ───────────────────────────────────────────────────────
|
||||
log "═══════════════════════════════════════════════════════════════════"
|
||||
log " Staging external-runtime E2E (regression for migration 046)"
|
||||
log " CP: $CP_URL"
|
||||
log " Slug: $SLUG"
|
||||
log " Stale: ${STALE_WAIT_SECS}s wait window"
|
||||
log "═══════════════════════════════════════════════════════════════════"
|
||||
|
||||
curl "${CURL_COMMON[@]}" "$CP_URL/health" >/dev/null || fail "CP health check failed"
|
||||
ok "CP reachable"
|
||||
|
||||
admin_call() {
|
||||
local method="$1"; shift; local path="$1"; shift
|
||||
curl "${CURL_COMMON[@]}" -X "$method" "$CP_URL$path" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" "$@"
|
||||
}
|
||||
|
||||
# ─── 1. Create org ──────────────────────────────────────────────────────
|
||||
log "1/8 Creating org $SLUG..."
|
||||
CREATE_RESP=$(admin_call POST /cp/admin/orgs \
|
||||
-d "{\"slug\":\"$SLUG\",\"name\":\"E2E ext $SLUG\",\"owner_user_id\":\"e2e-runner:$SLUG\"}")
|
||||
ORG_ID=$(echo "$CREATE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))")
|
||||
[ -z "$ORG_ID" ] && fail "Org create response missing 'id'"
|
||||
ok "Org created (id=$ORG_ID)"
|
||||
|
||||
# ─── 2. Wait for tenant provisioning ────────────────────────────────────
|
||||
# Terminal status from /cp/admin/orgs is 'running' (org_instances.status),
|
||||
# NOT 'ready' — same field the full-saas harness polls. 'failed' surfaces
|
||||
# diagnostic dump and aborts. See test_staging_full_saas.sh step 2 for
|
||||
# the field-bugfix history (2026-04-21, last_error path).
|
||||
log "2/8 Waiting for tenant (up to ${PROVISION_TIMEOUT_SECS}s)..."
|
||||
DEADLINE=$(( $(date +%s) + PROVISION_TIMEOUT_SECS ))
|
||||
LAST_STATUS=""
|
||||
while true; do
|
||||
if [ "$(date +%s)" -gt "$DEADLINE" ]; then
|
||||
fail "Tenant provisioning timed out (last: $LAST_STATUS)"
|
||||
fi
|
||||
LIST_JSON=$(admin_call GET /cp/admin/orgs 2>/dev/null || echo '{"orgs":[]}')
|
||||
STATUS=$(echo "$LIST_JSON" | python3 -c "
|
||||
import json, sys
|
||||
d = json.load(sys.stdin)
|
||||
for o in d.get('orgs', []):
|
||||
if o.get('slug') == '$SLUG':
|
||||
print(o.get('instance_status', ''))
|
||||
sys.exit(0)
|
||||
print('')
|
||||
" 2>/dev/null || echo "")
|
||||
if [ "$STATUS" != "$LAST_STATUS" ]; then
|
||||
log " instance_status: $STATUS"
|
||||
LAST_STATUS="$STATUS"
|
||||
fi
|
||||
case "$STATUS" in
|
||||
running) break ;;
|
||||
failed)
|
||||
log "── DIAGNOSTIC BURST (step 2 — tenant provisioning failed) ──"
|
||||
echo "$LIST_JSON" | python3 -c "
|
||||
import json, sys
|
||||
d = json.load(sys.stdin)
|
||||
for o in d.get('orgs', []):
|
||||
if o.get('slug') == '$SLUG':
|
||||
print(json.dumps(o, indent=2))
|
||||
sys.exit(0)
|
||||
print('(no org row found for slug=$SLUG — DB drift?)')
|
||||
" 2>&1 | sed 's/^/ /'
|
||||
log "── END DIAGNOSTIC ──"
|
||||
fail "Tenant provisioning failed for $SLUG (see diagnostic above)"
|
||||
;;
|
||||
*) sleep 15 ;;
|
||||
esac
|
||||
done
|
||||
ok "Tenant provisioning complete"
|
||||
|
||||
# Derive tenant URL the same way the full-saas harness does.
|
||||
CP_HOST=$(echo "$CP_URL" | sed -E 's#^https?://##; s#/.*$##')
|
||||
case "$CP_HOST" in
|
||||
api.*) DERIVED_DOMAIN="${CP_HOST#api.}" ;;
|
||||
staging-api.*) DERIVED_DOMAIN="staging.${CP_HOST#staging-api.}" ;;
|
||||
*) DERIVED_DOMAIN="$CP_HOST" ;;
|
||||
esac
|
||||
TENANT_DOMAIN="${MOLECULE_TENANT_DOMAIN:-$DERIVED_DOMAIN}"
|
||||
TENANT_URL="https://$SLUG.$TENANT_DOMAIN"
|
||||
log " TENANT_URL=$TENANT_URL"
|
||||
|
||||
# ─── 3. Per-tenant admin token + TLS readiness ──────────────────────────
|
||||
log "3/8 Fetching per-tenant admin token..."
|
||||
TENANT_TOKEN_RESP=$(admin_call GET "/cp/admin/orgs/$SLUG/admin-token")
|
||||
TENANT_TOKEN=$(echo "$TENANT_TOKEN_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('admin_token',''))")
|
||||
[ -z "$TENANT_TOKEN" ] && fail "Could not retrieve per-tenant admin token"
|
||||
ok "Token retrieved (len=${#TENANT_TOKEN})"
|
||||
|
||||
log "Waiting for tenant TLS / DNS..."
|
||||
TLS_DEADLINE=$(( $(date +%s) + 15 * 60 ))
|
||||
while true; do
|
||||
if curl -sSfk --max-time 5 "$TENANT_URL/health" >/dev/null 2>&1; then break; fi
|
||||
if [ "$(date +%s)" -gt "$TLS_DEADLINE" ]; then
|
||||
fail "Tenant URL never responded 2xx on /health within 15min"
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
ok "Tenant reachable"
|
||||
|
||||
tenant_call() {
|
||||
local method="$1"; shift; local path="$1"; shift
|
||||
curl "${CURL_COMMON[@]}" -X "$method" "$TENANT_URL$path" \
|
||||
-H "Authorization: Bearer $TENANT_TOKEN" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
# ─── 4. Create external workspace (no URL) ──────────────────────────────
|
||||
# This is the FIRST silent-failure path (workspace.go:333). Pre-migration
|
||||
# 046, the response would say status=awaiting_agent but the row stuck
|
||||
# on whatever the create handler set first (typically 'provisioning')
|
||||
# because the follow-up UPDATE failed the enum cast.
|
||||
log "4/8 Creating external workspace (no URL — exercises workspace.go:333)..."
|
||||
WS_CREATE_RESP=$(tenant_call POST /workspaces \
|
||||
-d '{"name":"ext-e2e","runtime":"external","external":true}')
|
||||
|
||||
WS_ID=$(echo "$WS_CREATE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))")
|
||||
WS_RESP_STATUS=$(echo "$WS_CREATE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
WS_AUTH_TOKEN=$(echo "$WS_CREATE_RESP" | python3 -c "
|
||||
import json,sys
|
||||
try:
|
||||
d = json.load(sys.stdin)
|
||||
conn = d.get('connection') or {}
|
||||
print(conn.get('auth_token','') or d.get('auth_token',''))
|
||||
except Exception:
|
||||
print('')
|
||||
")
|
||||
[ -z "$WS_ID" ] && fail "Workspace create missing id: $WS_CREATE_RESP"
|
||||
[ "$WS_RESP_STATUS" != "awaiting_agent" ] && fail "Expected response status=awaiting_agent, got $WS_RESP_STATUS"
|
||||
ok "Workspace created (id=$WS_ID, response status=awaiting_agent)"
|
||||
|
||||
# This GET is the proof that the row actually has the value (not just
|
||||
# the response body lying). Pre-migration-046 the UPDATE would have
|
||||
# silently failed and this would return whatever 'provisioning' the
|
||||
# initial INSERT left. Post-fix it must be 'awaiting_agent'.
|
||||
log " Verifying DB row..."
|
||||
GET_RESP=$(tenant_call GET "/workspaces/$WS_ID")
|
||||
DB_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
[ "$DB_STATUS" != "awaiting_agent" ] && fail "DB row status=$DB_STATUS (expected awaiting_agent — migration 046 likely not applied)"
|
||||
ok "DB row stored as awaiting_agent (proof migration 046 applied)"
|
||||
|
||||
# ─── 5. Register the workspace (transitions to online) ──────────────────
|
||||
# Pre-fix this path was actually fine because it writes 'online', a value
|
||||
# already in the enum. We exercise it anyway because the registration
|
||||
# implicitly walks resolveDeliveryMode (registry.go:resolveDeliveryMode),
|
||||
# which DOES read runtime + apply the new poll-default introduced by
|
||||
# PR #2382.
|
||||
log "5/8 Registering workspace via /registry/register..."
|
||||
[ -z "$WS_AUTH_TOKEN" ] && fail "No workspace auth token returned — register impossible"
|
||||
# Payload contract (workspace-server/internal/models/workspace.go RegisterPayload):
|
||||
# id — required, the workspace UUID (NOT "workspace_id" — that's the
|
||||
# heartbeat payload field; mixing them yields a 400 from
|
||||
# ShouldBindJSON because `id` has binding:"required").
|
||||
# agent_card — required (binding:"required"); minimal valid card is name+skills.
|
||||
# delivery_mode — set explicitly to "poll" so url validation is skipped
|
||||
# regardless of whether the deployed image has the
|
||||
# runtime=external→poll default from PR #2382. Observed
|
||||
# 2026-04-30 17:18Z: a freshly-provisioned staging tenant
|
||||
# was running an older workspace-server :latest image
|
||||
# that lacked resolveDeliveryMode's external→poll branch,
|
||||
# so the implicit default was push and validateAgentURL
|
||||
# 400'd on example.invalid. Asserting on the implicit
|
||||
# default makes the *register call* itself fragile to
|
||||
# image-tag drift on the fleet — verify the default
|
||||
# separately (step 5b assertion) without depending on it
|
||||
# here.
|
||||
# url — accepted but not dispatched-to in poll mode, so
|
||||
# example.invalid is a valid sentinel.
|
||||
REGISTER_BODY=$(printf '{"id":"%s","url":"https://example.invalid:443","delivery_mode":"poll","agent_card":{"name":"e2e-ext","skills":[{"id":"echo","name":"Echo"}]}}' "$WS_ID")
|
||||
# Disable --fail-with-body for this one call so a 4xx surfaces the response
|
||||
# body (the bare CURL_COMMON would `set -e`-kill before we could log it).
|
||||
REGISTER_RESP=$(curl -sS --max-time 30 -w "\nHTTP_CODE=%{http_code}" -X POST "$TENANT_URL/registry/register" \
|
||||
-H "Authorization: Bearer $WS_AUTH_TOKEN" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$REGISTER_BODY") || true
|
||||
log " register response: $(echo "$REGISTER_RESP" | head -c 300)"
|
||||
echo "$REGISTER_RESP" | grep -q "HTTP_CODE=200" || fail "register returned non-200 — see body above"
|
||||
|
||||
GET_RESP=$(tenant_call GET "/workspaces/$WS_ID")
|
||||
ONLINE_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
[ "$ONLINE_STATUS" != "online" ] && fail "Expected online after register, got $ONLINE_STATUS"
|
||||
ok "Workspace transitioned to online"
|
||||
|
||||
# Confirm the register handler echoed back delivery_mode=poll. We read
|
||||
# this from the register RESPONSE, not the workspace GET response, because
|
||||
# the GET handler's SELECT (workspace.go:597) doesn't fetch delivery_mode
|
||||
# — its column list pre-dates the delivery_mode column from #2339 PR 1.
|
||||
# Surfacing delivery_mode in GET is tracked separately; not gating on it
|
||||
# here keeps this test focused on the awaiting_agent transitions.
|
||||
REGISTER_BODY_JSON=$(echo "$REGISTER_RESP" | head -n 1)
|
||||
REGISTER_DELIVERY_MODE=$(echo "$REGISTER_BODY_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('delivery_mode',''))")
|
||||
if [ "$REGISTER_DELIVERY_MODE" = "poll" ]; then
|
||||
ok "delivery_mode=poll (register response echoed explicit value)"
|
||||
else
|
||||
fail "Register response delivery_mode=$REGISTER_DELIVERY_MODE (expected poll). Body: $REGISTER_BODY_JSON"
|
||||
fi
|
||||
|
||||
# ─── 6. Stop heartbeating; wait past REMOTE_LIVENESS_STALE_AFTER ────────
|
||||
# This is the SECOND silent-failure path (registry/healthsweep.go's
|
||||
# sweepStaleRemoteWorkspaces). Pre-migration-046 the heartbeat-staleness
|
||||
# UPDATE silently failed and the workspace stuck on 'online' forever
|
||||
# even though no agent was alive. We wait the full window + a sweep
|
||||
# interval and assert the row transitions back to 'awaiting_agent'.
|
||||
log "6/8 Waiting ${STALE_WAIT_SECS}s for heartbeat-staleness sweep (no heartbeat sent)..."
|
||||
sleep "$STALE_WAIT_SECS"
|
||||
|
||||
GET_RESP=$(tenant_call GET "/workspaces/$WS_ID")
|
||||
STALE_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
[ "$STALE_STATUS" != "awaiting_agent" ] && \
|
||||
fail "After ${STALE_WAIT_SECS}s with no heartbeat, expected status=awaiting_agent (sweep transition), got $STALE_STATUS — migration 046 likely not applied OR sweep not running"
|
||||
ok "Heartbeat-staleness sweep transitioned online → awaiting_agent (proof healthsweep.go fix working)"
|
||||
|
||||
# ─── 7. Re-register and confirm we can come back online ─────────────────
|
||||
# This proves the awaiting_agent state is recoverable (re-registrable),
|
||||
# which is the whole point of using it instead of 'offline'.
|
||||
log "7/8 Re-registering after stale → confirming recovery to online..."
|
||||
# Same payload contract as step 5 (id + agent_card both required). See note
|
||||
# there for why workspace_id would 400.
|
||||
REREG_RESP=$(curl -sS --max-time 30 -w "\nHTTP_CODE=%{http_code}" -X POST "$TENANT_URL/registry/register" \
|
||||
-H "Authorization: Bearer $WS_AUTH_TOKEN" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$REGISTER_BODY") || true
|
||||
log " re-register response: $(echo "$REREG_RESP" | head -c 300)"
|
||||
echo "$REREG_RESP" | grep -q "HTTP_CODE=200" || fail "re-register returned non-200 — see body above"
|
||||
|
||||
GET_RESP=$(tenant_call GET "/workspaces/$WS_ID")
|
||||
RECOVERED_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
[ "$RECOVERED_STATUS" != "online" ] && \
|
||||
fail "Expected re-register to return workspace to online, got $RECOVERED_STATUS"
|
||||
ok "Re-register succeeded — awaiting_agent → online (operator-recoverable)"
|
||||
|
||||
# ─── 8. Done — cleanup runs in the EXIT trap ───────────────────────────
|
||||
log "8/8 All four awaiting_agent transitions verified."
|
||||
log "═══════════════════════════════════════════════════════════════════"
|
||||
ok "External-runtime E2E PASSED on $SLUG"
|
||||
log "═══════════════════════════════════════════════════════════════════"
|
||||
@@ -0,0 +1,2 @@
|
||||
# Harness ephemeral state. Re-generated by ./seed.sh on every boot.
|
||||
.seed.env
|
||||
@@ -0,0 +1,156 @@
|
||||
# Production-shape local harness
|
||||
|
||||
The harness brings up the SaaS tenant topology on localhost using the
|
||||
same `Dockerfile.tenant` image that ships to production. Tests target
|
||||
the cf-proxy on `http://localhost:8080` and pass the tenant identity
|
||||
via a `Host:` header — exactly the way production CF tunnel routes by
|
||||
Host header. The cf-proxy nginx then rewrites headers and proxies to
|
||||
the right tenant container, exercising the SAME code path a real tenant
|
||||
takes including TenantGuard middleware, the `/cp/*` reverse proxy, the
|
||||
canvas reverse proxy, and a Cloudflare-tunnel-shape header rewrite
|
||||
layer.
|
||||
|
||||
Since Phase 2 the harness runs **two tenants in parallel** (alpha and
|
||||
beta) with their own Postgres instance and distinct
|
||||
`MOLECULE_ORG_ID`s — same shape as production, where each tenant gets
|
||||
its own EC2 + DB. This is what cross-tenant isolation replays need to
|
||||
prove TenantGuard actually 404s a misrouted request.
|
||||
|
||||
`tests/harness/_curl.sh` is the helper sourced by every replay. Per
|
||||
tenant: `curl_alpha_anon` / `curl_alpha_admin` / `curl_beta_anon` /
|
||||
`curl_beta_admin` / `psql_exec_alpha` / `psql_exec_beta`. Plus
|
||||
deliberately-wrong cross-tenant negative-test helpers for isolation
|
||||
replays: `curl_alpha_creds_at_beta` / `curl_beta_creds_at_alpha`.
|
||||
Legacy single-tenant aliases (`curl_anon`, `curl_admin`, `psql_exec`)
|
||||
default to alpha so pre-Phase-2 replays continue to work. New replays
|
||||
should source `_curl.sh` rather than rolling their own curl.
|
||||
|
||||
## Why this exists
|
||||
|
||||
Local `go run ./cmd/server` skips:
|
||||
- `TenantGuard` middleware (no `MOLECULE_ORG_ID` env)
|
||||
- `/cp/*` reverse proxy mount (no `CP_UPSTREAM_URL` env)
|
||||
- `CANVAS_PROXY_URL` (canvas runs separately on `:3000`)
|
||||
- Header rewrites that production's CF tunnel + LB perform
|
||||
- Strict-auth mode (no live `ADMIN_TOKEN`)
|
||||
|
||||
Bugs that survive `go run` and ship to production almost always live
|
||||
in one of those layers. The harness activates ALL of them.
|
||||
|
||||
## Topology
|
||||
|
||||
```
|
||||
client
|
||||
↓
|
||||
cf-proxy nginx, mirrors CF tunnel header rewrites
|
||||
↓ (routes by Host header)
|
||||
┌─────────────────────────┴─────────────────────────┐
|
||||
↓ ↓
|
||||
tenant-alpha tenant-beta
|
||||
Host: harness-tenant-alpha.localhost Host: harness-tenant-beta.localhost
|
||||
MOLECULE_ORG_ID=harness-org-alpha MOLECULE_ORG_ID=harness-org-beta
|
||||
↓ ↓
|
||||
postgres-alpha postgres-beta
|
||||
↓ ↓
|
||||
└─────────────────────────┬─────────────────────────┘
|
||||
↓
|
||||
cp-stub + redis (shared)
|
||||
```
|
||||
|
||||
Each tenant runs the production `Dockerfile.tenant` image with its own
|
||||
admin token, org id, and Postgres instance — identical isolation
|
||||
boundaries to production where each tenant gets a dedicated EC2 + DB.
|
||||
cp-stub and redis are shared because they model the per-region
|
||||
multi-tenant CP and a single Redis cluster.
|
||||
|
||||
## Quickstart
|
||||
|
||||
```bash
|
||||
cd tests/harness
|
||||
./up.sh # builds + starts all services (both tenants)
|
||||
./seed.sh # registers parent+child workspaces in BOTH tenants
|
||||
./replays/tenant-isolation.sh
|
||||
./replays/per-tenant-independence.sh
|
||||
./down.sh # tear down + remove volumes
|
||||
```
|
||||
|
||||
To run every replay in one shot (boot, seed, run-all, teardown):
|
||||
|
||||
```bash
|
||||
cd tests/harness
|
||||
./run-all-replays.sh # full lifecycle; non-zero exit if any replay fails
|
||||
KEEP_UP=1 ./run-all-replays.sh # leave harness up for debugging
|
||||
REBUILD=1 ./run-all-replays.sh # rebuild images before booting
|
||||
```
|
||||
|
||||
No `/etc/hosts` edit required — replays use the cf-proxy's loopback
|
||||
port and pass the per-tenant `Host:` header (`_curl.sh` handles this
|
||||
automatically). This matches how production CF tunnel routes: the URL
|
||||
is the public CF endpoint, the Host header carries the per-tenant
|
||||
identity. Quick check:
|
||||
|
||||
```bash
|
||||
curl -H "Host: harness-tenant-alpha.localhost" http://localhost:8080/health
|
||||
curl -H "Host: harness-tenant-beta.localhost" http://localhost:8080/health
|
||||
```
|
||||
|
||||
(If you have a legacy `/etc/hosts` entry from older docs, it still
|
||||
works — `BASE`, `ALPHA_HOST`, `BETA_HOST` all honor env-var overrides.
|
||||
The legacy `harness-tenant.localhost` host alias maps to alpha.)
|
||||
|
||||
## Replay scripts
|
||||
|
||||
Each replay script reproduces a real bug class against the harness so
|
||||
fixes can be verified locally before deploy. The bar for adding a
|
||||
replay is "this bug shipped to production despite local E2E being
|
||||
green" — the script becomes the regression gate that closes that gap.
|
||||
|
||||
| Replay | Closes | What it proves |
|
||||
|--------|--------|----------------|
|
||||
| `peer-discovery-404.sh` | #2397 | tool_list_peers surfaces the actual reason instead of "may be isolated" |
|
||||
| `buildinfo-stale-image.sh` | #2395 | GIT_SHA reaches the binary; verify-step comparison logic works |
|
||||
| `chat-history.sh` | #2472 + #2474 + #2476 | `peer_id` filter (incl. OR over source/target) + `before_ts` paging + UUID/RFC3339 trust boundary on the activity route |
|
||||
| `channel-envelope-trust-boundary.sh` | #2471 + #2481 | published wheel scrubs malformed `peer_id` from the channel envelope and from `agent_card_url` (path-traversal + XML-attr injection) |
|
||||
| `tenant-isolation.sh` | Phase 2 | TenantGuard 404s any request whose `X-Molecule-Org-Id` doesn't match the container's `MOLECULE_ORG_ID` (covers cross-tenant routing bug + allowlist drift); per-tenant `/workspaces` listings stay partitioned |
|
||||
| `per-tenant-independence.sh` | Phase 2 | parallel A2A workflows in both tenants don't bleed into each other's `activity_logs` / `workspaces`, including under a concurrent INSERT race (catches lib/pq prepared-statement cache collision + shared-pool poisoning) |
|
||||
|
||||
To add a new replay:
|
||||
1. Drop a script under `replays/` named after the issue.
|
||||
2. The script's purpose: reproduce the production failure mode against
|
||||
the harness, then assert the fix is present. PASS criterion is the
|
||||
post-fix behavior.
|
||||
3. The `run-all-replays.sh` runner picks up every `replays/*.sh` script
|
||||
automatically — no per-replay registration needed.
|
||||
|
||||
## Extending the cp-stub
|
||||
|
||||
`cp-stub/main.go` serves the minimum surface for the existing replays
|
||||
plus a catch-all that returns 501 + a clear message when the tenant
|
||||
asks for a route the stub doesn't implement. To add a new CP route:
|
||||
|
||||
1. Add a `mux.HandleFunc` in `cp-stub/main.go` for the path.
|
||||
2. Return the same wire shape the real CP returns. The contract is
|
||||
"wire compatibility with the staging CP at the time of writing" —
|
||||
document it with a comment pointing at the real CP handler.
|
||||
3. Add a replay script that exercises the path.
|
||||
|
||||
## What the harness does NOT cover
|
||||
|
||||
- Real TLS / cert handling (CF terminates TLS in production; harness is
|
||||
HTTP-only).
|
||||
- Cloudflare API edge cases (rate limits, DNS propagation timing).
|
||||
- Real EC2 / SSM / EBS behavior (image-cache replay simulates the
|
||||
outcome but not the AWS API surface).
|
||||
- Cross-region or multi-AZ topology.
|
||||
- Real production data scale.
|
||||
|
||||
These are intentional Phase 1 limits. If a bug class hits one of these
|
||||
gaps, escalate to staging E2E rather than expanding the harness past
|
||||
its mandate of "exercise the tenant binary in production-shape topology."
|
||||
|
||||
## Roadmap
|
||||
|
||||
- **Phase 1 (shipped):** harness + cp-stub + cf-proxy + 4 replays + `run-all-replays.sh` runner. No-sudo `Host`-header path via `_curl.sh`. Per-replay psql seeding for tests that need DB-side fixtures.
|
||||
- **Phase 2 (shipped):** multi-tenant — `tenant-alpha` + `tenant-beta` with their own Postgres instances and distinct `MOLECULE_ORG_ID`s; cf-proxy nginx routes by Host header (prod CF tunnel parity); `seed.sh` registers parent+child workspaces in both tenants; `_curl.sh` exposes per-tenant + cross-tenant-negative helpers; new replays cover TenantGuard isolation (`tenant-isolation.sh`) and per-tenant independence under concurrent load (`per-tenant-independence.sh`). `harness-replays.yml` runs `run-all-replays.sh` as a required check on every PR touching `workspace-server/**`, `canvas/**`, `tests/harness/**`, or the workflow itself.
|
||||
- **Phase 3:** replace `cp-stub/` with the real `molecule-controlplane` Docker build. Add a config-coherence lint that diffs harness env list against production CP's env list and fails CI on drift. Convert `tests/e2e/test_api.sh` to target the harness instead of localhost.
|
||||
- **Phase 4 (long-term):** Miniflare in front of cf-proxy for real CF emulation (WAF, BotID, rate-limit, cf-tunnel headers). LocalStack for the EC2 provisioner. Anonymized prod-traffic recording/replay for SaaS-scale regression detection.
|
||||
@@ -0,0 +1,159 @@
|
||||
# Sourceable helper for harness replays. Centralises the
|
||||
# curl-against-cf-proxy pattern so scripts don't depend on /etc/hosts.
|
||||
#
|
||||
# Production CF tunnel routes by Host header, not by DNS — the request
|
||||
# URL is to a public CF endpoint and the Host header carries the
|
||||
# per-tenant identity. We replay the same shape locally:
|
||||
#
|
||||
# curl -H "Host: harness-tenant-alpha.localhost" http://localhost:8080/health
|
||||
#
|
||||
# This matches what cf-proxy/nginx.conf already routes (`server_name
|
||||
# *.localhost` + `map $host $tenant_upstream`) and avoids the macOS
|
||||
# /etc/hosts requirement that previously gated the harness behind a
|
||||
# sudo step.
|
||||
#
|
||||
# Multi-tenant since Phase 2: alpha and beta tenants run in parallel.
|
||||
# `curl_alpha_admin` and `curl_beta_admin` target each tenant's URL
|
||||
# with that tenant's ADMIN_TOKEN + MOLECULE_ORG_ID. The legacy
|
||||
# `curl_admin` is aliased to alpha for backwards compat with the
|
||||
# pre-Phase-2 single-tenant replays.
|
||||
#
|
||||
# Usage:
|
||||
# HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# source "$HERE/../_curl.sh" # from replays/<name>.sh
|
||||
# curl_alpha_admin "$BASE/health"
|
||||
# curl_beta_admin "$BASE/health"
|
||||
|
||||
# Bind to the cf-proxy's loopback port — the proxy front-doors every
|
||||
# tenant and routes by Host header, exactly like production's CF tunnel.
|
||||
: "${BASE:=http://localhost:8080}"
|
||||
|
||||
# Per-tenant identity. Each pair must match the corresponding tenant
|
||||
# container's environment in compose.yml or auth/TenantGuard will fail
|
||||
# in non-obvious ways (401 vs 403 vs silent route to wrong tenant).
|
||||
: "${ALPHA_HOST:=harness-tenant-alpha.localhost}"
|
||||
: "${ALPHA_ADMIN_TOKEN:=harness-admin-token-alpha}"
|
||||
: "${ALPHA_ORG_ID:=harness-org-alpha}"
|
||||
|
||||
: "${BETA_HOST:=harness-tenant-beta.localhost}"
|
||||
: "${BETA_ADMIN_TOKEN:=harness-admin-token-beta}"
|
||||
: "${BETA_ORG_ID:=harness-org-beta}"
|
||||
|
||||
# Legacy single-tenant aliases — pre-Phase-2 replays use these without
|
||||
# knowing the topology grew. They map to alpha. New replays should use
|
||||
# the explicit alpha/beta variants for clarity.
|
||||
: "${TENANT_HOST:=$ALPHA_HOST}"
|
||||
: "${ADMIN_TOKEN:=$ALPHA_ADMIN_TOKEN}"
|
||||
: "${ORG_ID:=$ALPHA_ORG_ID}"
|
||||
|
||||
# ─── Anonymous (no auth) ──────────────────────────────────────────────
|
||||
|
||||
# Anonymous request to alpha. Use for /health, /buildinfo, etc.
|
||||
curl_alpha_anon() {
|
||||
curl -sS -H "Host: ${ALPHA_HOST}" "$@"
|
||||
}
|
||||
|
||||
# Anonymous request to beta.
|
||||
curl_beta_anon() {
|
||||
curl -sS -H "Host: ${BETA_HOST}" "$@"
|
||||
}
|
||||
|
||||
# Legacy alias for single-tenant replays.
|
||||
curl_anon() {
|
||||
curl -sS -H "Host: ${TENANT_HOST}" "$@"
|
||||
}
|
||||
|
||||
# ─── Admin-token requests ─────────────────────────────────────────────
|
||||
|
||||
# Admin-token request to alpha tenant. SaaS-shape auth: bearer token,
|
||||
# tenant org header (TenantGuard activates), JSON content type.
|
||||
curl_alpha_admin() {
|
||||
curl -sS \
|
||||
-H "Host: ${ALPHA_HOST}" \
|
||||
-H "Authorization: Bearer ${ALPHA_ADMIN_TOKEN}" \
|
||||
-H "X-Molecule-Org-Id: ${ALPHA_ORG_ID}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
# Admin-token request to beta tenant.
|
||||
curl_beta_admin() {
|
||||
curl -sS \
|
||||
-H "Host: ${BETA_HOST}" \
|
||||
-H "Authorization: Bearer ${BETA_ADMIN_TOKEN}" \
|
||||
-H "X-Molecule-Org-Id: ${BETA_ORG_ID}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
# Legacy alias.
|
||||
curl_admin() {
|
||||
curl_alpha_admin "$@"
|
||||
}
|
||||
|
||||
# ─── Cross-tenant negative-test helpers ───────────────────────────────
|
||||
# These exist to MAKE WRONG calls — replays use them to assert
|
||||
# TenantGuard rejects them. Names spell out what's mismatched.
|
||||
|
||||
# alpha bearer + alpha org, but talking to beta's URL. TenantGuard
|
||||
# should reject because the org header doesn't match beta's MOLECULE_ORG_ID.
|
||||
curl_alpha_creds_at_beta() {
|
||||
curl -sS \
|
||||
-H "Host: ${BETA_HOST}" \
|
||||
-H "Authorization: Bearer ${ALPHA_ADMIN_TOKEN}" \
|
||||
-H "X-Molecule-Org-Id: ${ALPHA_ORG_ID}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
# beta bearer + beta org, but talking to alpha's URL.
|
||||
curl_beta_creds_at_alpha() {
|
||||
curl -sS \
|
||||
-H "Host: ${ALPHA_HOST}" \
|
||||
-H "Authorization: Bearer ${BETA_ADMIN_TOKEN}" \
|
||||
-H "X-Molecule-Org-Id: ${BETA_ORG_ID}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
# ─── Workspace-scoped (per-workspace bearer) ──────────────────────────
|
||||
|
||||
# Workspace-scoped request to alpha — uses a per-workspace bearer
|
||||
# minted from /admin/workspaces/:id/test-token. Caller must export
|
||||
# WORKSPACE_TOKEN.
|
||||
curl_workspace() {
|
||||
: "${WORKSPACE_TOKEN:?WORKSPACE_TOKEN must be set — mint via /admin/workspaces/:id/test-token}"
|
||||
curl -sS \
|
||||
-H "Host: ${TENANT_HOST}" \
|
||||
-H "Authorization: Bearer ${WORKSPACE_TOKEN}" \
|
||||
-H "X-Molecule-Org-Id: ${ORG_ID}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
# ─── Postgres exec (per-tenant) ───────────────────────────────────────
|
||||
|
||||
# Direct postgres exec — for replays that need to seed activity_logs
|
||||
# rows or read DB state that has no public HTTP route.
|
||||
#
|
||||
# SECRETS_ENCRYPTION_KEY placeholder lets compose validate without
|
||||
# requiring up.sh's per-run key (exec doesn't actually use it but
|
||||
# compose validates the file).
|
||||
psql_exec_alpha() {
|
||||
SECRETS_ENCRYPTION_KEY="${SECRETS_ENCRYPTION_KEY:-exec-placeholder}" \
|
||||
docker compose -f "${HARNESS_COMPOSE:-$(dirname "${BASH_SOURCE[0]}")/compose.yml}" \
|
||||
exec -T postgres-alpha \
|
||||
psql -U harness -d molecule -At "$@"
|
||||
}
|
||||
|
||||
psql_exec_beta() {
|
||||
SECRETS_ENCRYPTION_KEY="${SECRETS_ENCRYPTION_KEY:-exec-placeholder}" \
|
||||
docker compose -f "${HARNESS_COMPOSE:-$(dirname "${BASH_SOURCE[0]}")/compose.yml}" \
|
||||
exec -T postgres-beta \
|
||||
psql -U harness -d molecule -At "$@"
|
||||
}
|
||||
|
||||
# Legacy alias — single-tenant replays default to alpha's DB.
|
||||
psql_exec() {
|
||||
psql_exec_alpha "$@"
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
# cf-proxy — Cloudflare-tunnel-shape reverse proxy for the local harness.
|
||||
#
|
||||
# Production path: agent → CF tunnel → AWS LB → tenant container.
|
||||
# This config replays the same header rewrites the CF tunnel does so
|
||||
# the tenant sees the same Host + X-Forwarded-* it would in production.
|
||||
#
|
||||
# Multi-tenant: nginx routes by Host header to the right tenant
|
||||
# container — exactly the same way the production CF tunnel does
|
||||
# (URL is the public CF endpoint, Host carries the tenant identity).
|
||||
#
|
||||
# How tests reach it (no /etc/hosts required):
|
||||
# curl -H 'Host: harness-tenant-alpha.localhost' http://localhost:8080/health
|
||||
# curl -H 'Host: harness-tenant-beta.localhost' http://localhost:8080/health
|
||||
#
|
||||
# Backwards-compat: harness-tenant.localhost (no -alpha/-beta suffix) maps
|
||||
# to alpha for legacy single-tenant replays.
|
||||
|
||||
worker_processes 1;
|
||||
events { worker_connections 256; }
|
||||
|
||||
http {
|
||||
# Docker's embedded DNS at 127.0.0.11. Required because the
|
||||
# `proxy_pass http://$tenant_upstream:8080` below uses a variable —
|
||||
# nginx needs an explicit resolver to do per-request DNS lookups
|
||||
# (literal hostnames are resolved once at startup, variables are
|
||||
# resolved per-request). Without this, nginx fails closed with
|
||||
# "no resolver defined" + 502.
|
||||
#
|
||||
# `valid=30s` caps cache life so a tenant container restart picks
|
||||
# up a new IP within 30 seconds. ipv6=off skips AAAA lookups that
|
||||
# Docker DNS doesn't always serve cleanly.
|
||||
resolver 127.0.0.11 valid=30s ipv6=off;
|
||||
|
||||
# Reusable proxy block so each tenant server only carries the
|
||||
# upstream-pointer + its identity-specific tweaks. Keeping the
|
||||
# header rewrites + buffering settings centralised prevents drift
|
||||
# between alpha and beta as the harness grows.
|
||||
map $host $tenant_upstream {
|
||||
default tenant-alpha;
|
||||
harness-tenant.localhost tenant-alpha;
|
||||
harness-tenant-alpha.localhost tenant-alpha;
|
||||
harness-tenant-beta.localhost tenant-beta;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 8080 default_server;
|
||||
|
||||
# Reject Host headers we don't recognise — without this, an
|
||||
# unknown Host would silently route to the default tenant and
|
||||
# mask cross-tenant routing bugs in test output.
|
||||
server_name harness-tenant.localhost
|
||||
harness-tenant-alpha.localhost
|
||||
harness-tenant-beta.localhost
|
||||
localhost;
|
||||
|
||||
# Cap upload at 50MB to mirror the staging tenant nginx limit;
|
||||
# chat upload tests will fail closed if the platform handler
|
||||
# ever silently expands its limit (catches the failure mode
|
||||
# opposite of the chat-files lazy-heal incident).
|
||||
client_max_body_size 50m;
|
||||
|
||||
location / {
|
||||
# The map above resolves $tenant_upstream to the right
|
||||
# container based on the Host header — production CF tunnel
|
||||
# behavior in one line.
|
||||
proxy_pass http://$tenant_upstream:8080;
|
||||
|
||||
# Header parity with CF tunnel + AWS LB. Production CF sets
|
||||
# X-Forwarded-Proto=https; we keep http here because TLS
|
||||
# termination in compose is unnecessary for testing the
|
||||
# tenant logic — TLS is a CF concern, not a tenant bug
|
||||
# surface. If TLS-specific bugs ever bite, add cert-manager
|
||||
# + listen 8443 ssl here.
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Host $host;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# Streamable HTTP / SSE / WebSocket — the tenant exposes /ws
|
||||
# and /events/stream + MCP /mcp/stream. Disabling buffering
|
||||
# reproduces CF tunnel's pass-through streaming semantics
|
||||
# (CF tunnel = no buffering by default; nginx default IS
|
||||
# buffering, which would mask issue #2397-class streaming
|
||||
# bugs by accumulating output until the client disconnects).
|
||||
proxy_buffering off;
|
||||
proxy_request_buffering off;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Connection "";
|
||||
|
||||
# Read timeout — CF tunnel default is 100s. Setting this to
|
||||
# the same value catches "long agent run finishes after the
|
||||
# proxy already closed the upstream" failure mode.
|
||||
proxy_read_timeout 100s;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,173 @@
|
||||
# Production-shape harness for local E2E. Multi-tenant.
|
||||
#
|
||||
# Reproduces the SaaS tenant topology on localhost using the SAME
|
||||
# images that ship to production:
|
||||
#
|
||||
# client → cf-proxy (nginx, mimics CF tunnel headers, routes by Host)
|
||||
# ├─ Host: harness-tenant-alpha.localhost → tenant-alpha
|
||||
# │ ↓ (CP_UPSTREAM_URL=http://cp-stub:9090)
|
||||
# │ tenant-alpha (workspace-server/Dockerfile.tenant)
|
||||
# │ ↓
|
||||
# │ postgres-alpha (per-tenant DB, matches prod)
|
||||
# ├─ Host: harness-tenant-beta.localhost → tenant-beta
|
||||
# │ ↓
|
||||
# │ tenant-beta + postgres-beta
|
||||
# └─ cp-stub + redis (shared infra; CP is Railway-singleton in prod,
|
||||
# redis is shared cluster)
|
||||
#
|
||||
# The two-tenant topology catches:
|
||||
# - TenantGuard cross-tenant escape (alpha-org token shouldn't see
|
||||
# beta-tenant data even with a valid bearer)
|
||||
# - cf-proxy Host-header routing correctness
|
||||
# - Per-tenant DB isolation (workspaces table, activity_logs)
|
||||
# - Concurrent multi-tenant operation (no shared mutable state)
|
||||
#
|
||||
# Quickstart (no /etc/hosts edits — see README):
|
||||
# cd tests/harness && ./up.sh && ./seed.sh
|
||||
# ./replays/peer-discovery-404.sh
|
||||
# ./run-all-replays.sh
|
||||
#
|
||||
# Env config:
|
||||
# GIT_SHA — passed to BOTH tenant builds for /buildinfo verification.
|
||||
# CP_STUB_PEERS_MODE — peers failure mode for replay scripts.
|
||||
|
||||
services:
|
||||
# ─── Shared infra (matches prod: CP is Railway-singleton, redis shared) ───
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
networks: [harness-net]
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 2s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
cp-stub:
|
||||
build:
|
||||
context: ./cp-stub
|
||||
environment:
|
||||
PORT: "9090"
|
||||
CP_STUB_PEERS_MODE: "${CP_STUB_PEERS_MODE:-}"
|
||||
networks: [harness-net]
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -q -O- http://localhost:9090/healthz || exit 1"]
|
||||
interval: 2s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
# ─── Tenant alpha: postgres + workspace-server ────────────────────────
|
||||
postgres-alpha:
|
||||
image: postgres:16-alpine
|
||||
environment:
|
||||
POSTGRES_USER: harness
|
||||
POSTGRES_PASSWORD: harness
|
||||
POSTGRES_DB: molecule
|
||||
networks: [harness-net]
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U harness"]
|
||||
interval: 2s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
tenant-alpha:
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: workspace-server/Dockerfile.tenant
|
||||
args:
|
||||
GIT_SHA: "${GIT_SHA:-harness}"
|
||||
depends_on:
|
||||
postgres-alpha:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
cp-stub:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
DATABASE_URL: "postgres://harness:harness@postgres-alpha:5432/molecule?sslmode=disable"
|
||||
REDIS_URL: "redis://redis:6379"
|
||||
PORT: "8080"
|
||||
PLATFORM_URL: "http://tenant-alpha:8080"
|
||||
MOLECULE_ENV: "production"
|
||||
SECRETS_ENCRYPTION_KEY: "${SECRETS_ENCRYPTION_KEY:?must be set — run via tests/harness/up.sh, which generates one per run}"
|
||||
ADMIN_TOKEN: "harness-admin-token-alpha"
|
||||
MOLECULE_ORG_ID: "harness-org-alpha"
|
||||
CP_UPSTREAM_URL: "http://cp-stub:9090"
|
||||
RATE_LIMIT: "1000"
|
||||
CANVAS_PROXY_URL: "http://localhost:3000"
|
||||
networks: [harness-net]
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/health || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 20
|
||||
|
||||
# ─── Tenant beta: postgres + workspace-server (parallel to alpha) ─────
|
||||
postgres-beta:
|
||||
image: postgres:16-alpine
|
||||
environment:
|
||||
POSTGRES_USER: harness
|
||||
POSTGRES_PASSWORD: harness
|
||||
POSTGRES_DB: molecule
|
||||
networks: [harness-net]
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U harness"]
|
||||
interval: 2s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
tenant-beta:
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: workspace-server/Dockerfile.tenant
|
||||
args:
|
||||
GIT_SHA: "${GIT_SHA:-harness}"
|
||||
depends_on:
|
||||
postgres-beta:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
cp-stub:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
DATABASE_URL: "postgres://harness:harness@postgres-beta:5432/molecule?sslmode=disable"
|
||||
REDIS_URL: "redis://redis:6379"
|
||||
PORT: "8080"
|
||||
PLATFORM_URL: "http://tenant-beta:8080"
|
||||
MOLECULE_ENV: "production"
|
||||
SECRETS_ENCRYPTION_KEY: "${SECRETS_ENCRYPTION_KEY:?must be set — run via tests/harness/up.sh, which generates one per run}"
|
||||
# Distinct ADMIN_TOKEN — replays use this to verify TenantGuard
|
||||
# blocks alpha-token presented at beta's URL.
|
||||
ADMIN_TOKEN: "harness-admin-token-beta"
|
||||
MOLECULE_ORG_ID: "harness-org-beta"
|
||||
CP_UPSTREAM_URL: "http://cp-stub:9090"
|
||||
RATE_LIMIT: "1000"
|
||||
CANVAS_PROXY_URL: "http://localhost:3000"
|
||||
networks: [harness-net]
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/health || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 20
|
||||
|
||||
# ─── cf-proxy: routes by Host to the right tenant container ───────────
|
||||
# Production shape: same single CF tunnel front-doors every tenant
|
||||
# subdomain — the Host header carries the tenant identity, not the
|
||||
# routing destination. Local cf-proxy mirrors this exactly.
|
||||
cf-proxy:
|
||||
image: nginx:1.27-alpine
|
||||
depends_on:
|
||||
tenant-alpha:
|
||||
condition: service_healthy
|
||||
tenant-beta:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- ./cf-proxy/nginx.conf:/etc/nginx/nginx.conf:ro
|
||||
# Bind to 127.0.0.1 only — hardcoded ADMIN_TOKENs make 0.0.0.0
|
||||
# exposure unsafe even on a local network.
|
||||
ports:
|
||||
- "127.0.0.1:8080:8080"
|
||||
networks: [harness-net]
|
||||
|
||||
networks:
|
||||
harness-net:
|
||||
name: molecule-harness-net
|
||||
@@ -0,0 +1,14 @@
|
||||
# cp-stub — minimal CP stand-in for the local production-shape harness.
|
||||
# See main.go for the rationale. Self-contained build, no module deps.
|
||||
|
||||
FROM golang:1.25-alpine AS builder
|
||||
WORKDIR /src
|
||||
COPY go.mod ./
|
||||
COPY main.go ./
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /cp-stub .
|
||||
|
||||
FROM alpine:3.20
|
||||
RUN apk add --no-cache ca-certificates
|
||||
COPY --from=builder /cp-stub /cp-stub
|
||||
EXPOSE 9090
|
||||
ENTRYPOINT ["/cp-stub"]
|
||||
@@ -0,0 +1,3 @@
|
||||
module github.com/Molecule-AI/molecule-monorepo/tests/harness/cp-stub
|
||||
|
||||
go 1.25
|
||||
@@ -0,0 +1,113 @@
|
||||
// cp-stub — minimal control-plane stand-in for the local production-shape harness.
|
||||
//
|
||||
// In production, the tenant Go server reverse-proxies /cp/* to the SaaS
|
||||
// control-plane (molecule-controlplane). This stub plays that role on
|
||||
// localhost so we can exercise the SAME code path the tenant takes in
|
||||
// production — `if cpURL := os.Getenv("CP_UPSTREAM_URL"); cpURL != ""`
|
||||
// in workspace-server/internal/router/router.go fires, the proxy mount
|
||||
// activates, and tests exercise the real tenant→CP wire.
|
||||
//
|
||||
// This is NOT a CP reimplementation. It serves the minimum surface to:
|
||||
// 1. Boot the tenant image without /cp/* breaking the canvas bootstrap.
|
||||
// 2. Replay specific bug classes (e.g. /cp/* returns 404, returns 5xx,
|
||||
// returns malformed JSON) by toggling env vars.
|
||||
//
|
||||
// Scope is bounded by what the tenant + canvas actually call. Add new
|
||||
// handlers as new replay scenarios demand them. Drift from real CP is
|
||||
// tolerated because each handler is named for the exact path it serves —
|
||||
// when the real CP changes, the failing scenario tells us where to look.
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// redeployFleetCalls tracks how many times /cp/admin/tenants/redeploy-fleet
|
||||
// was invoked. Replay scripts assert > 0 to confirm the workflow's redeploy
|
||||
// step actually reached the stub (catches misrouted CP_URL configs).
|
||||
var redeployFleetCalls atomic.Int64
|
||||
|
||||
func main() {
|
||||
mux := http.NewServeMux()
|
||||
|
||||
// /cp/auth/me — canvas calls this on bootstrap; minimal user record
|
||||
// keeps the canvas from redirecting to login during local E2E.
|
||||
mux.HandleFunc("/cp/auth/me", func(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, 200, map[string]any{
|
||||
"id": "harness-user",
|
||||
"email": "harness@local",
|
||||
"org_id": "harness-org",
|
||||
"roles": []string{"admin"},
|
||||
})
|
||||
})
|
||||
|
||||
// /cp/admin/tenants/redeploy-fleet — exercised by the
|
||||
// redeploy-tenants-on-{staging,main} workflow's local replay. Returns
|
||||
// the same shape the real CP returns so the verify-fleet logic in CI
|
||||
// can be tested without spinning up a real EC2 fleet.
|
||||
mux.HandleFunc("/cp/admin/tenants/redeploy-fleet", func(w http.ResponseWriter, r *http.Request) {
|
||||
redeployFleetCalls.Add(1)
|
||||
writeJSON(w, 200, map[string]any{
|
||||
"ok": true,
|
||||
"results": []map[string]any{
|
||||
{
|
||||
"slug": "harness-tenant",
|
||||
"phase": "redeploy",
|
||||
"ssm_status": "Success",
|
||||
"ssm_exit_code": 0,
|
||||
"healthz_ok": true,
|
||||
},
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
// __stub/state — expose stub state (counters) so replay scripts can
|
||||
// assert the tenant actually reached us. Read-only.
|
||||
mux.HandleFunc("/__stub/state", func(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, 200, map[string]any{
|
||||
"redeploy_fleet_calls": redeployFleetCalls.Load(),
|
||||
})
|
||||
})
|
||||
|
||||
// Catch-all for any /cp/* the tenant proxies. Keeps the harness from
|
||||
// crashing the canvas when a new CP route is added — surfaces a clear
|
||||
// "stub doesn't implement X" error instead of opaque 502 from the
|
||||
// reverse proxy.
|
||||
mux.HandleFunc("/cp/", func(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, 501, map[string]any{
|
||||
"error": "cp-stub: handler not implemented for " + r.Method + " " + r.URL.Path,
|
||||
"hint": "add a handler in tests/harness/cp-stub/main.go for the scenario you're testing",
|
||||
})
|
||||
})
|
||||
|
||||
// /healthz — readiness probe for compose's depends_on.
|
||||
mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, 200, map[string]any{"status": "ok"})
|
||||
})
|
||||
|
||||
addr := ":" + envOr("PORT", "9090")
|
||||
log.Printf("cp-stub listening on %s", addr)
|
||||
if err := http.ListenAndServe(addr, mux); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func writeJSON(w http.ResponseWriter, code int, body any) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(code)
|
||||
if err := json.NewEncoder(w).Encode(body); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "cp-stub: write json: %v\n", err)
|
||||
}
|
||||
}
|
||||
|
||||
func envOr(k, def string) string {
|
||||
if v := os.Getenv(k); v != "" {
|
||||
return v
|
||||
}
|
||||
return def
|
||||
}
|
||||
Executable
+17
@@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tear down the harness and wipe per-tenant volumes.
|
||||
#
|
||||
# SECRETS_ENCRYPTION_KEY placeholder: docker compose validates the entire
|
||||
# compose file even for `down -v` (a destructive read-only operation that
|
||||
# doesn't read the env). up.sh generates a per-run key into its own
|
||||
# shell — this script runs in a fresh shell that wouldn't see it. Without
|
||||
# the placeholder, `compose down` exits non-zero before removing volumes,
|
||||
# silently leaking workspaces+activity_logs into the next ./up.sh + seed.sh
|
||||
# (verified 2026-05-02: tenant-isolation.sh F1/F2 saw 3× duplicate
|
||||
# alpha-parent + alpha-child rows accumulated across three prior boots).
|
||||
set -euo pipefail
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$HERE"
|
||||
SECRETS_ENCRYPTION_KEY="${SECRETS_ENCRYPTION_KEY:-down-placeholder}" \
|
||||
docker compose -f compose.yml down -v --remove-orphans
|
||||
echo "[harness] down + volumes removed."
|
||||
+75
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env bash
|
||||
# Replay for issue #2395 — local proof that the /buildinfo verify gate
|
||||
# closes the SaaS deploy-chain blindness.
|
||||
#
|
||||
# Prior behavior: redeploy-fleet returned ssm_status=Success based on
|
||||
# the SSM RPC return code alone. EC2 tenants kept serving the cached
|
||||
# :latest digest because `docker compose up -d` is a no-op when the
|
||||
# tag hasn't been invalidated. ssm_status=Success was lying.
|
||||
#
|
||||
# This replay simulates that condition locally:
|
||||
# 1. Boot the harness with GIT_SHA=fix-applied.
|
||||
# 2. Curl /buildinfo and assert it returns "fix-applied" (the new code
|
||||
# actually shipped).
|
||||
# 3. Negative test: curl with a different EXPECTED_SHA and assert the
|
||||
# mismatch detection logic the workflow uses returns failure.
|
||||
#
|
||||
# This proves the verify-step's jq lookup + comparison logic works
|
||||
# against the SAME Dockerfile.tenant production builds. If the
|
||||
# /buildinfo route ever stops being wired through, this replay
|
||||
# catches it before it reaches a production tenant.
|
||||
|
||||
set -euo pipefail
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
HARNESS_ROOT="$(dirname "$HERE")"
|
||||
# shellcheck source=../_curl.sh
|
||||
source "$HARNESS_ROOT/_curl.sh"
|
||||
|
||||
# 1. Confirm /buildinfo wire shape — same shape the workflow's jq lookup expects.
|
||||
echo "[replay] curl $BASE/buildinfo ..."
|
||||
BUILD_JSON=$(curl_anon "$BASE/buildinfo")
|
||||
echo "[replay] $BUILD_JSON"
|
||||
|
||||
ACTUAL_SHA=$(echo "$BUILD_JSON" | jq -r '.git_sha // ""')
|
||||
if [ -z "$ACTUAL_SHA" ]; then
|
||||
echo "[replay] FAIL: /buildinfo response missing git_sha field — workflow's jq lookup would null"
|
||||
exit 1
|
||||
fi
|
||||
echo "[replay] git_sha=$ACTUAL_SHA"
|
||||
|
||||
# 2. Assert the harness build threaded GIT_SHA through. If we got "dev",
|
||||
# the Dockerfile arg / ldflags wiring is broken — same regression
|
||||
# class that made #2395 invisible until production.
|
||||
EXPECTED_FROM_HARNESS="${HARNESS_GIT_SHA:-harness}"
|
||||
if [ "$ACTUAL_SHA" = "dev" ]; then
|
||||
echo "[replay] FAIL: /buildinfo returned 'dev' — Dockerfile.tenant ARG GIT_SHA isn't reaching the binary"
|
||||
echo "[replay] This regresses #2395 by silencing the deploy-verify gate."
|
||||
exit 1
|
||||
fi
|
||||
if [ "$ACTUAL_SHA" != "$EXPECTED_FROM_HARNESS" ]; then
|
||||
echo "[replay] WARN: /buildinfo returned '$ACTUAL_SHA' but harness was built with GIT_SHA='$EXPECTED_FROM_HARNESS'"
|
||||
echo "[replay] Image may be cached from a previous run. Run ./up.sh --rebuild to force a fresh build."
|
||||
fi
|
||||
|
||||
# 3. Negative test — replay the workflow's mismatch detection by
|
||||
# comparing the actual SHA to a deliberately-wrong expected SHA.
|
||||
WRONG_EXPECTED="0000000000000000000000000000000000000000"
|
||||
if [ "$ACTUAL_SHA" = "$WRONG_EXPECTED" ]; then
|
||||
echo "[replay] FAIL: /buildinfo returned all-zero SHA — wiring inverted"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 4. Replay the workflow's exact comparison logic so a regression in
|
||||
# the verify step's bash gets caught here.
|
||||
MISMATCH_DETECTED=0
|
||||
if [ "$ACTUAL_SHA" != "$WRONG_EXPECTED" ]; then
|
||||
MISMATCH_DETECTED=1
|
||||
fi
|
||||
if [ "$MISMATCH_DETECTED" != "1" ]; then
|
||||
echo "[replay] FAIL: workflow comparison logic would not flag a real mismatch"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "[replay] PASS: /buildinfo wire shape, GIT_SHA injection, and mismatch detection all work in"
|
||||
echo " production-shape topology. The redeploy-fleet verify-step covers what it claims to."
|
||||
+182
@@ -0,0 +1,182 @@
|
||||
#!/usr/bin/env bash
|
||||
# Replay for the channel envelope peer_id trust-boundary fix
|
||||
# (PR #2481, follow-up to PR #2471). Verifies that the PUBLISHED wheel
|
||||
# installed on this machine — not local source — gates malformed peer_id
|
||||
# at both the envelope builder and the agent_card_url builder.
|
||||
#
|
||||
# Why this matters:
|
||||
# - Unit tests in workspace/tests/ run against local source. They
|
||||
# prove the fix works in source. They DO NOT prove the published
|
||||
# wheel contains the fix.
|
||||
# - The wheel rewriter (scripts/build_runtime_package.py) renames
|
||||
# symbols + paths. Any rewrite drift could silently strip the
|
||||
# guard from the shipped artifact.
|
||||
# - This replay imports from `molecule_runtime.a2a_mcp_server` (the
|
||||
# wheel-rewritten path), exercises the actual published code, and
|
||||
# asserts the envelope shape. If the wheel build ever ships without
|
||||
# the guard, this fails — even if unit tests on local source pass.
|
||||
#
|
||||
# Phases:
|
||||
# A. Confirm an installed molecule-runtime version that contains the
|
||||
# #2481 fix (>= 0.1.78).
|
||||
# B. Call `_build_channel_notification` with peer_id="../../foo" and
|
||||
# assert (1) meta["peer_id"] == "", (2) no agent_card_url field,
|
||||
# (3) no peer_name/peer_role.
|
||||
# C. Symmetric case: peer_id with embedded XML-attribute injection
|
||||
# bytes — assert the same scrubbing.
|
||||
# D. Happy path: a valid UUID peer_id is preserved (proves we didn't
|
||||
# regress legitimate enrichment).
|
||||
# E. Direct check on the URL builder — `_agent_card_url_for("../../foo")`
|
||||
# must return "" and never an unsanitised URL.
|
||||
|
||||
set -euo pipefail
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
HARNESS_ROOT="$(dirname "$HERE")"
|
||||
cd "$HARNESS_ROOT"
|
||||
# shellcheck source=../_curl.sh
|
||||
source "$HARNESS_ROOT/_curl.sh"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert() {
|
||||
local desc="$1" expected="$2" actual="$3"
|
||||
if [ "$expected" = "$actual" ]; then
|
||||
printf " PASS %s\n" "$desc"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
printf " FAIL %s\n expected: %s\n got : %s\n" "$desc" "$expected" "$actual" >&2
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# ─── Phase A: wheel version contains the fix ───────────────────────────
|
||||
echo "[replay] A. confirming installed molecule-ai-workspace-runtime contains #2481..."
|
||||
INSTALLED=$(pip3 show molecule-ai-workspace-runtime 2>/dev/null | awk -F': ' '/^Version:/ {print $2}')
|
||||
if [ -z "$INSTALLED" ]; then
|
||||
echo "[replay] FAIL A: molecule-ai-workspace-runtime not installed."
|
||||
echo " Install: pip3 install molecule-ai-workspace-runtime"
|
||||
exit 2
|
||||
fi
|
||||
echo "[replay] installed version: $INSTALLED"
|
||||
|
||||
# 0.1.78 is the first published version after #2481 merged to staging.
|
||||
# Compare via Python distutils-style version sort (works across patch
|
||||
# bumps without sed-fragility).
|
||||
HAS_FIX=$(python3 -c "
|
||||
from packaging.version import parse
|
||||
print('yes' if parse('$INSTALLED') >= parse('0.1.78') else 'no')
|
||||
" 2>/dev/null || echo "unknown")
|
||||
if [ "$HAS_FIX" != "yes" ]; then
|
||||
echo "[replay] FAIL A: installed $INSTALLED < 0.1.78 (the version that shipped the #2481 fix)."
|
||||
echo " Upgrade: pip3 install --upgrade molecule-ai-workspace-runtime"
|
||||
exit 2
|
||||
fi
|
||||
echo "[replay] ✓ contains #2481 trust-boundary fix"
|
||||
|
||||
# ─── Phase B-E: in-process assertions against the installed wheel ──────
|
||||
# We don't need WORKSPACE_ID/PLATFORM_URL/MOLECULE_WORKSPACE_TOKEN to
|
||||
# import the module — the env validation only fires at console-script
|
||||
# entry. We use molecule_runtime.* (the wheel-rewritten import path)
|
||||
# rather than workspace.a2a_mcp_server (local source) so this exercises
|
||||
# the SHIPPED code.
|
||||
echo ""
|
||||
echo "[replay] B-E. exercising _build_channel_notification + _agent_card_url_for from the installed wheel..."
|
||||
|
||||
OUT=$(WORKSPACE_ID=00000000-0000-0000-0000-000000000000 \
|
||||
PLATFORM_URL=http://localhost:8080 \
|
||||
MOLECULE_WORKSPACE_TOKEN=stub \
|
||||
MOLECULE_MCP_DISABLE_HEARTBEAT=1 \
|
||||
python3 - <<'PYEOF'
|
||||
import json
|
||||
import sys
|
||||
|
||||
from molecule_runtime.a2a_mcp_server import _build_channel_notification
|
||||
from molecule_runtime.a2a_client import _agent_card_url_for
|
||||
|
||||
results = []
|
||||
|
||||
def emit(name, value):
|
||||
results.append({"name": name, "value": value})
|
||||
|
||||
# ── B: path-traversal peer_id stripped from envelope ──
|
||||
payload = _build_channel_notification({
|
||||
"peer_id": "../../foo",
|
||||
"kind": "peer_agent",
|
||||
"text": "redirect-attempt",
|
||||
"activity_id": "act-1",
|
||||
"method": "message/send",
|
||||
"created_at": "2026-05-01T00:00:00Z",
|
||||
})
|
||||
meta = payload["params"]["meta"]
|
||||
emit("B1_peer_id_scrubbed", meta.get("peer_id", "<missing>"))
|
||||
emit("B2_agent_card_url_absent", "absent" if "agent_card_url" not in meta else meta["agent_card_url"])
|
||||
emit("B3_peer_name_absent", "absent" if "peer_name" not in meta else meta["peer_name"])
|
||||
emit("B4_peer_role_absent", "absent" if "peer_role" not in meta else meta["peer_role"])
|
||||
|
||||
# ── C: XML-attribute-injection-shape peer_id ──
|
||||
payload = _build_channel_notification({
|
||||
"peer_id": 'aaa" onclick="alert(1)',
|
||||
"kind": "peer_agent",
|
||||
"text": "xss",
|
||||
})
|
||||
meta = payload["params"]["meta"]
|
||||
emit("C1_peer_id_scrubbed", meta.get("peer_id", "<missing>"))
|
||||
emit("C2_agent_card_url_absent", "absent" if "agent_card_url" not in meta else "leaked")
|
||||
|
||||
# ── D: legitimate UUID is preserved ──
|
||||
valid_uuid = "11111111-2222-3333-4444-555555555555"
|
||||
payload = _build_channel_notification({
|
||||
"peer_id": valid_uuid,
|
||||
"kind": "peer_agent",
|
||||
"text": "legit",
|
||||
})
|
||||
meta = payload["params"]["meta"]
|
||||
emit("D1_peer_id_preserved", meta.get("peer_id", "<missing>"))
|
||||
# agent_card_url IS present (we don't gate the URL itself on whether the registry is reachable)
|
||||
emit("D2_agent_card_url_present", "yes" if meta.get("agent_card_url", "").endswith(valid_uuid) else "no")
|
||||
|
||||
# ── E: direct URL builder gate ──
|
||||
emit("E1_url_builder_strips_traversal", _agent_card_url_for("../../foo"))
|
||||
emit("E2_url_builder_strips_xml", _agent_card_url_for('a" onclick="x'))
|
||||
emit("E3_url_builder_accepts_uuid_endswith", "yes" if _agent_card_url_for(valid_uuid).endswith(valid_uuid) else "no")
|
||||
|
||||
print(json.dumps(results))
|
||||
PYEOF
|
||||
)
|
||||
|
||||
# Parse and assert each result.
|
||||
echo "$OUT" | python3 -c "
|
||||
import json, sys
|
||||
results = json.loads(sys.stdin.read())
|
||||
for r in results:
|
||||
print(f\"{r['name']}={r['value']}\")
|
||||
" > /tmp/cha-envelope-results.txt
|
||||
|
||||
while IFS='=' read -r key value; do
|
||||
case "$key" in
|
||||
B1_peer_id_scrubbed) assert "B1: malicious peer_id scrubbed to \"\"" "" "$value" ;;
|
||||
B2_agent_card_url_absent) assert "B2: agent_card_url not emitted" "absent" "$value" ;;
|
||||
B3_peer_name_absent) assert "B3: peer_name not enriched" "absent" "$value" ;;
|
||||
B4_peer_role_absent) assert "B4: peer_role not enriched" "absent" "$value" ;;
|
||||
C1_peer_id_scrubbed) assert "C1: XML-injection peer_id scrubbed" "" "$value" ;;
|
||||
C2_agent_card_url_absent) assert "C2: XML-injection URL not emitted" "absent" "$value" ;;
|
||||
D1_peer_id_preserved) assert "D1: valid UUID peer_id preserved" "11111111-2222-3333-4444-555555555555" "$value" ;;
|
||||
D2_agent_card_url_present) assert "D2: agent_card_url present for valid id" "yes" "$value" ;;
|
||||
E1_url_builder_strips_traversal) assert "E1: _agent_card_url_for(\"../../foo\") returns \"\"" "" "$value" ;;
|
||||
E2_url_builder_strips_xml) assert "E2: _agent_card_url_for(XML-injection) returns \"\"" "" "$value" ;;
|
||||
E3_url_builder_accepts_uuid_endswith) assert "E3: _agent_card_url_for(valid uuid) builds canonical URL" "yes" "$value" ;;
|
||||
esac
|
||||
done < /tmp/cha-envelope-results.txt
|
||||
|
||||
echo ""
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
echo "[replay] FAIL: $PASS pass, $FAIL fail"
|
||||
echo ""
|
||||
echo "[replay] If B/C/E failed: the published wheel does NOT contain the #2481 fix."
|
||||
echo "[replay] Likely causes:"
|
||||
echo " - Wheel rewriter dropped _validate_peer_id from molecule_runtime.a2a_client"
|
||||
echo " - publish-runtime.yml regressed to a SHA before #2481 (check pip install version)"
|
||||
exit 1
|
||||
fi
|
||||
echo "[replay] PASS: $PASS/$PASS — channel envelope peer_id trust boundary holds in published wheel $INSTALLED"
|
||||
Executable
+175
@@ -0,0 +1,175 @@
|
||||
#!/usr/bin/env bash
|
||||
# Replay for the chat_history MCP tool — exercises the full SaaS-shape
|
||||
# wire that PRs #2472 (peer_id filter), #2474 (chat_history client), and
|
||||
# #2476 (before_ts paging) ride on. Runs against the prod-shape tenant
|
||||
# image, not unit-mock'd handlers, so any drift between the Go handler
|
||||
# and the Python tool's expectations surfaces here.
|
||||
#
|
||||
# What this catches that unit tests don't:
|
||||
# - Real Postgres planner behaviour on the (source_id = $X OR target_id = $X)
|
||||
# OR clause (issue #2478 — both indexes missing).
|
||||
# - cf-proxy header rewrites + TenantGuard middleware in the path.
|
||||
# - lib/pq + Postgres driver type binding for time.Time parameters.
|
||||
# - JSON encoding of created_at across the wire (timezone, precision).
|
||||
#
|
||||
# Phases:
|
||||
# A. Seed three a2a_receive rows for alpha with peer_id=beta, spread
|
||||
# across distinct timestamps.
|
||||
# B. Basic peer_id filter: GET ?type=a2a_receive&peer_id=beta&limit=10
|
||||
# → assert 3 rows DESC.
|
||||
# C. Limit cap: limit=2 → assert 2 newest rows.
|
||||
# D. before_ts paging: take the 2nd-newest's created_at, GET with
|
||||
# before_ts=that → assert the 1 strictly-older row.
|
||||
# E. OR clause (target side): seed an a2a_send row where source=alpha,
|
||||
# target=beta. GET with type unset, peer_id=beta → assert that row
|
||||
# surfaces too (target_id match, not just source_id).
|
||||
# F. Trust-boundary: peer_id="not-a-uuid" → 400 + "peer_id must be a UUID".
|
||||
# G. Trust-boundary: before_ts="garbage" → 400 + RFC3339 example.
|
||||
# H. URL-encoded SQL-injection-shape peer_id → 400 (matches activity_test.go's
|
||||
# malicious-peer-id panel).
|
||||
|
||||
set -euo pipefail
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
HARNESS_ROOT="$(dirname "$HERE")"
|
||||
cd "$HARNESS_ROOT"
|
||||
|
||||
if [ ! -f .seed.env ]; then
|
||||
echo "[replay] no .seed.env — running ./seed.sh first..."
|
||||
./seed.sh
|
||||
fi
|
||||
# shellcheck source=/dev/null
|
||||
source .seed.env
|
||||
# shellcheck source=../_curl.sh
|
||||
source "$HARNESS_ROOT/_curl.sh"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert() {
|
||||
local desc="$1" expected="$2" actual="$3"
|
||||
if [ "$expected" = "$actual" ]; then
|
||||
printf " PASS %s\n" "$desc"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
printf " FAIL %s\n expected: %s\n got : %s\n" "$desc" "$expected" "$actual" >&2
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
assert_contains() {
|
||||
local desc="$1" needle="$2" haystack="$3"
|
||||
if echo "$haystack" | grep -qF "$needle"; then
|
||||
printf " PASS %s\n" "$desc"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
printf " FAIL %s\n expected to contain: %s\n got: %s\n" "$desc" "$needle" "$haystack" >&2
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "[replay] alpha=$ALPHA_ID beta=$BETA_ID"
|
||||
|
||||
# ─── Phase A: seed the activity_logs table ─────────────────────────────
|
||||
# Inserted via psql so the seed is independent of the platform's HTTP
|
||||
# Notify path — that path itself ships through the same handler chain
|
||||
# we want to test, and seeding through it would conflate setup and
|
||||
# assertion.
|
||||
echo ""
|
||||
echo "[replay] A. seeding 3 a2a_receive rows for alpha←beta at distinct timestamps..."
|
||||
psql_exec >/dev/null <<SQL
|
||||
DELETE FROM activity_logs WHERE workspace_id = '$ALPHA_ID';
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, source_id, target_id, method, summary, created_at)
|
||||
VALUES
|
||||
('$ALPHA_ID', 'a2a_receive', '$BETA_ID', '$ALPHA_ID', 'message/send', 'oldest from beta', NOW() - INTERVAL '4 hours'),
|
||||
('$ALPHA_ID', 'a2a_receive', '$BETA_ID', '$ALPHA_ID', 'message/send', 'middle from beta', NOW() - INTERVAL '2 hours'),
|
||||
('$ALPHA_ID', 'a2a_receive', '$BETA_ID', '$ALPHA_ID', 'message/send', 'newest from beta', NOW() - INTERVAL '1 hour');
|
||||
SQL
|
||||
echo "[replay] inserted 3 rows"
|
||||
|
||||
# ─── Phase B: basic peer_id filter ─────────────────────────────────────
|
||||
echo ""
|
||||
echo "[replay] B. GET ?type=a2a_receive&peer_id=beta&limit=10 ..."
|
||||
RESP=$(curl_admin "$BASE/workspaces/$ALPHA_ID/activity?type=a2a_receive&peer_id=$BETA_ID&limit=10")
|
||||
COUNT=$(echo "$RESP" | jq 'length')
|
||||
assert "B1: returns 3 rows" "3" "$COUNT"
|
||||
|
||||
# DESC order — newest first
|
||||
NEWEST_SUMMARY=$(echo "$RESP" | jq -r '.[0].summary')
|
||||
assert "B2: newest first (DESC ordering)" "newest from beta" "$NEWEST_SUMMARY"
|
||||
|
||||
OLDEST_SUMMARY=$(echo "$RESP" | jq -r '.[2].summary')
|
||||
assert "B3: oldest last" "oldest from beta" "$OLDEST_SUMMARY"
|
||||
|
||||
# ─── Phase C: limit cap ────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "[replay] C. limit=2 (expecting 2 newest) ..."
|
||||
RESP=$(curl_admin "$BASE/workspaces/$ALPHA_ID/activity?type=a2a_receive&peer_id=$BETA_ID&limit=2")
|
||||
assert "C1: limit clamps to 2" "2" "$(echo "$RESP" | jq 'length')"
|
||||
assert "C2: kept newest" "newest from beta" "$(echo "$RESP" | jq -r '.[0].summary')"
|
||||
assert "C3: kept middle" "middle from beta" "$(echo "$RESP" | jq -r '.[1].summary')"
|
||||
|
||||
# ─── Phase D: before_ts paging ─────────────────────────────────────────
|
||||
echo ""
|
||||
echo "[replay] D. before_ts paging — walk backwards from middle row's created_at ..."
|
||||
# Take the newest row's created_at, page from there.
|
||||
NEWEST_TS=$(curl_admin "$BASE/workspaces/$ALPHA_ID/activity?type=a2a_receive&peer_id=$BETA_ID&limit=1" \
|
||||
| jq -r '.[0].created_at')
|
||||
# RFC3339 with timezone — Go's time.Parse(RFC3339) handles `2026-...Z` AND
|
||||
# `2026-...+00:00`. Postgres returns the latter; URL-encode the +.
|
||||
NEWEST_TS_ENCODED=$(echo "$NEWEST_TS" | python3 -c 'import sys, urllib.parse; print(urllib.parse.quote(sys.stdin.read().strip(), safe=""))')
|
||||
RESP=$(curl_admin "$BASE/workspaces/$ALPHA_ID/activity?type=a2a_receive&peer_id=$BETA_ID&before_ts=$NEWEST_TS_ENCODED&limit=10")
|
||||
assert "D1: 2 rows older than newest" "2" "$(echo "$RESP" | jq 'length')"
|
||||
assert "D2: middle is now newest in the slice" "middle from beta" "$(echo "$RESP" | jq -r '.[0].summary')"
|
||||
# Strict less-than — the row at exactly NEWEST_TS must NOT come back.
|
||||
NOT_INCLUDED=$(echo "$RESP" | jq -r '[.[].summary] | index("newest from beta") // "absent"')
|
||||
assert "D3: strictly older — newest excluded" "absent" "$NOT_INCLUDED"
|
||||
|
||||
# ─── Phase E: OR clause covers target_id direction ─────────────────────
|
||||
echo ""
|
||||
echo "[replay] E. OR clause: seed an a2a_send row (alpha→beta) and confirm it surfaces ..."
|
||||
psql_exec >/dev/null <<SQL
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, source_id, target_id, method, summary, created_at)
|
||||
VALUES ('$ALPHA_ID', 'a2a_send', '$ALPHA_ID', '$BETA_ID', 'message/send', 'sent to beta', NOW());
|
||||
SQL
|
||||
# No type filter — we want both a2a_receive AND a2a_send rows back.
|
||||
RESP=$(curl_admin "$BASE/workspaces/$ALPHA_ID/activity?peer_id=$BETA_ID&limit=10")
|
||||
HAS_SENT=$(echo "$RESP" | jq '[.[].summary] | any(. == "sent to beta")')
|
||||
assert "E1: a2a_send (alpha→beta) returned via target_id match" "true" "$HAS_SENT"
|
||||
TOTAL=$(echo "$RESP" | jq 'length')
|
||||
assert "E2: total = 4 (3 receives + 1 send)" "4" "$TOTAL"
|
||||
|
||||
# ─── Phase F: malformed peer_id → 400 ──────────────────────────────────
|
||||
echo ""
|
||||
echo "[replay] F. malformed peer_id → 400 ..."
|
||||
HTTP_CODE=$(curl_admin -o /tmp/cha-bad-peer.json -w '%{http_code}' \
|
||||
"$BASE/workspaces/$ALPHA_ID/activity?type=a2a_receive&peer_id=not-a-uuid")
|
||||
assert "F1: HTTP 400" "400" "$HTTP_CODE"
|
||||
assert_contains "F2: error names the param" "peer_id must be a UUID" "$(cat /tmp/cha-bad-peer.json)"
|
||||
|
||||
# ─── Phase G: malformed before_ts → 400 ────────────────────────────────
|
||||
echo ""
|
||||
echo "[replay] G. malformed before_ts → 400 ..."
|
||||
HTTP_CODE=$(curl_admin -o /tmp/cha-bad-ts.json -w '%{http_code}' \
|
||||
"$BASE/workspaces/$ALPHA_ID/activity?type=a2a_receive&before_ts=garbage")
|
||||
assert "G1: HTTP 400" "400" "$HTTP_CODE"
|
||||
assert_contains "G2: error mentions RFC3339" "RFC3339" "$(cat /tmp/cha-bad-ts.json)"
|
||||
|
||||
# ─── Phase H: SQL-injection-shape peer_id is rejected ──────────────────
|
||||
echo ""
|
||||
echo "[replay] H. URL-encoded SQLi-shape peer_id → 400 ..."
|
||||
SQLI_ENCODED="%27%20OR%201%3D1%20--" # ' OR 1=1 --
|
||||
HTTP_CODE=$(curl_admin -o /tmp/cha-sqli.json -w '%{http_code}' \
|
||||
"$BASE/workspaces/$ALPHA_ID/activity?type=a2a_receive&peer_id=$SQLI_ENCODED")
|
||||
assert "H1: HTTP 400 (UUID validation rejects before SQL builder sees it)" "400" "$HTTP_CODE"
|
||||
|
||||
# ─── Cleanup: tear down seeded rows so subsequent runs don't accumulate ─
|
||||
psql_exec >/dev/null <<SQL
|
||||
DELETE FROM activity_logs WHERE workspace_id = '$ALPHA_ID';
|
||||
SQL
|
||||
|
||||
echo ""
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
echo "[replay] FAIL: $PASS pass, $FAIL fail"
|
||||
exit 1
|
||||
fi
|
||||
echo "[replay] PASS: $PASS/$PASS — chat_history wire (peer_id filter + before_ts paging + trust boundary + OR clause)"
|
||||
Executable
+135
@@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env bash
|
||||
# Replay for issue #2397 — local proof that peer-discovery surfaces
|
||||
# actionable diagnostics instead of "may be isolated".
|
||||
#
|
||||
# Prior behavior: tool_list_peers returned "No peers available (this
|
||||
# workspace may be isolated)" regardless of WHY peers were empty —
|
||||
# five distinct conditions (200+empty, 401, 403, 404, 5xx, network)
|
||||
# collapsed to one ambiguous message.
|
||||
#
|
||||
# This replay proves two things, separately:
|
||||
# (a) WIRE: the platform side of the contract — the tenant's
|
||||
# /registry/<unregistered>/peers returns 404. If this regresses
|
||||
# (e.g. tenant starts returning 200 with empty list, or 500),
|
||||
# the runtime helper would parse it differently and the agent
|
||||
# would see a different diagnostic. The harness catches that here.
|
||||
# (b) PARSE: the runtime helper, given a 404, produces a diagnostic
|
||||
# containing "404" + "register" hints. Done in unit tests against
|
||||
# a mock httpx response (test_a2a_client.py::TestGetPeersWithDiagnostic
|
||||
# — the harness re-asserts the same contract here against a real
|
||||
# Python eval that does NOT depend on workspace auth tokens.
|
||||
#
|
||||
# Why split the assertion: the Python eval here doesn't have the
|
||||
# workspace's auth token file, so going through get_peers_with_diagnostic
|
||||
# directly would hit the platform without auth and produce a different
|
||||
# branch (401 instead of 404). Splitting (a) from (b) keeps each
|
||||
# assertion targeting exactly what it claims to test.
|
||||
|
||||
set -euo pipefail
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
HARNESS_ROOT="$(dirname "$HERE")"
|
||||
cd "$HARNESS_ROOT"
|
||||
|
||||
if [ ! -f .seed.env ]; then
|
||||
echo "[replay] no .seed.env — running ./seed.sh first..."
|
||||
./seed.sh
|
||||
fi
|
||||
# shellcheck source=/dev/null
|
||||
source .seed.env
|
||||
# shellcheck source=../_curl.sh
|
||||
source "$HARNESS_ROOT/_curl.sh"
|
||||
|
||||
# ─── (a) WIRE: tenant returns 404 for an unregistered workspace ────────
|
||||
ROGUE_ID="$(uuidgen | tr '[:upper:]' '[:lower:]')"
|
||||
echo "[replay] (a) WIRE: querying /registry/$ROGUE_ID/peers (unregistered workspace)..."
|
||||
HTTP_CODE=$(curl_admin -o /tmp/peer-replay.json -w '%{http_code}' \
|
||||
-H "X-Workspace-ID: $ROGUE_ID" \
|
||||
"$BASE/registry/$ROGUE_ID/peers")
|
||||
|
||||
echo "[replay] tenant responded HTTP $HTTP_CODE"
|
||||
if [ "$HTTP_CODE" != "404" ]; then
|
||||
echo "[replay] FAIL (a): expected 404 from /registry/<unregistered>/peers, got $HTTP_CODE"
|
||||
echo "[replay] This is a platform-side regression — the runtime's diagnostic helper"
|
||||
echo "[replay] would see a different status code than the unit tests cover."
|
||||
cat /tmp/peer-replay.json
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ─── (b) PARSE: helper converts a synthetic 404 to actionable diagnostic ─
|
||||
#
|
||||
# We construct a synthetic httpx 404 response and run the helper against
|
||||
# it directly. This isolates the parse branch we want to test from the
|
||||
# auth-context concerns of going through the network. The helper's network
|
||||
# branches are exhaustively covered by tests/test_a2a_client.py — this is
|
||||
# a regression-guard that the helper IS in the install, IS importable in
|
||||
# the harness's Python env, and IS reading the status code.
|
||||
|
||||
WORKSPACE_PATH="$(cd "$HARNESS_ROOT/../../workspace" && pwd)"
|
||||
DIAGNOSTIC=$(WORKSPACE_ID="harness-rogue" PYTHONPATH="$WORKSPACE_PATH" \
|
||||
python3 - "$WORKSPACE_PATH" <<'PYEOF'
|
||||
import asyncio
|
||||
import sys
|
||||
import types
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
# Stub platform_auth so a2a_client imports cleanly without requiring a
|
||||
# real workspace token file. The helper's auth_headers() only matters
|
||||
# when going through the network; we're feeding it a mock response.
|
||||
_pa = types.ModuleType("platform_auth")
|
||||
_pa.auth_headers = lambda: {}
|
||||
_pa.self_source_headers = lambda: {}
|
||||
sys.modules.setdefault("platform_auth", _pa)
|
||||
|
||||
sys.path.insert(0, sys.argv[1])
|
||||
import a2a_client # noqa: E402
|
||||
|
||||
# This replay validates PR #2399's diagnostic helper. If the workspace
|
||||
# runtime in the current checkout pre-dates that fix, fail with a
|
||||
# clear message instead of an opaque AttributeError.
|
||||
if not hasattr(a2a_client, "get_peers_with_diagnostic"):
|
||||
print("__SKIP__: workspace/a2a_client.py is pre-#2399 (no get_peers_with_diagnostic).")
|
||||
sys.exit(0)
|
||||
|
||||
resp = MagicMock()
|
||||
resp.status_code = 404
|
||||
resp.json = MagicMock(return_value={"detail": "not found"})
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_client.get = AsyncMock(return_value=resp)
|
||||
|
||||
async def main():
|
||||
with patch("a2a_client.httpx.AsyncClient", return_value=mock_client):
|
||||
peers, diag = await a2a_client.get_peers_with_diagnostic()
|
||||
print(repr(diag))
|
||||
|
||||
asyncio.run(main())
|
||||
PYEOF
|
||||
)
|
||||
|
||||
if [[ "$DIAGNOSTIC" == __SKIP__:* ]]; then
|
||||
echo "[replay] (b) SKIP: ${DIAGNOSTIC#__SKIP__: }"
|
||||
echo "[replay] Re-run after #2399 lands on staging."
|
||||
echo ""
|
||||
echo "[replay] PASS (a) only: peer-discovery wire returns 404 (parse branch skipped — see above)."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "[replay] (b) PARSE: helper diagnostic = $DIAGNOSTIC"
|
||||
|
||||
if ! echo "$DIAGNOSTIC" | grep -q "404"; then
|
||||
echo "[replay] FAIL (b): diagnostic missing '404' — helper regressed to swallow-the-status-code"
|
||||
exit 1
|
||||
fi
|
||||
if ! echo "$DIAGNOSTIC" | grep -qi "regist"; then
|
||||
echo "[replay] FAIL (b): diagnostic missing 'register' guidance — helper regressed to opaque message"
|
||||
exit 1
|
||||
fi
|
||||
if echo "$DIAGNOSTIC" | grep -qi "may be isolated"; then
|
||||
echo "[replay] FAIL (b): diagnostic still says 'may be isolated' — fix didn't reach this code path"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "[replay] PASS: peer-discovery (a) wire returns 404, (b) helper produces actionable diagnostic."
|
||||
+185
@@ -0,0 +1,185 @@
|
||||
#!/usr/bin/env bash
|
||||
# Replay for per-tenant independence — each tenant runs the same
|
||||
# workflow concurrently with no cross-bleed in workspaces table or
|
||||
# activity_logs.
|
||||
#
|
||||
# What this proves that tenant-isolation.sh doesn't:
|
||||
# tenant-isolation.sh proves that REQUESTS get rejected at the
|
||||
# middleware layer when they target the wrong tenant. THIS replay
|
||||
# proves that even when both tenants are doing legitimate work
|
||||
# simultaneously, the back-end state stays partitioned: no row in
|
||||
# alpha's activity_logs ever shows up in beta's, no FK-resolution
|
||||
# ever crosses tenants, etc.
|
||||
#
|
||||
# Test shape: seed activity_logs in BOTH tenants in parallel using
|
||||
# distinct row counts (3 vs 5) so we can distinguish them. Then
|
||||
# fetch each tenant's history and assert the count + content match
|
||||
# the seed exactly — proves no leak in either direction.
|
||||
#
|
||||
# Phases:
|
||||
# A. Seed alpha tenant: 3 a2a_receive rows (parent ← child).
|
||||
# B. Seed beta tenant: 5 a2a_receive rows (parent ← child).
|
||||
# C. GET alpha history → exactly 3 rows, all alpha-summary.
|
||||
# D. GET beta history → exactly 5 rows, all beta-summary.
|
||||
# E. Direct DB sanity — alpha PG has only alpha rows, beta PG only beta.
|
||||
# F. Concurrent write race — both tenants take turns INSERTing
|
||||
# simultaneously; each tenant's count after the race matches what
|
||||
# it INSERTed. Catches "shared cache poison" / "shared connection
|
||||
# pool" failure modes that don't show up in single-tenant tests.
|
||||
|
||||
set -euo pipefail
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
HARNESS_ROOT="$(dirname "$HERE")"
|
||||
cd "$HARNESS_ROOT"
|
||||
|
||||
if [ ! -f .seed.env ]; then
|
||||
echo "[replay] no .seed.env — running ./seed.sh first..."
|
||||
./seed.sh
|
||||
fi
|
||||
# shellcheck source=/dev/null
|
||||
source .seed.env
|
||||
# shellcheck source=../_curl.sh
|
||||
source "$HARNESS_ROOT/_curl.sh"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert() {
|
||||
local desc="$1" expected="$2" actual="$3"
|
||||
if [ "$expected" = "$actual" ]; then
|
||||
printf " PASS %s\n" "$desc"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
printf " FAIL %s\n expected: %s\n got : %s\n" "$desc" "$expected" "$actual" >&2
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# ─── Cleanup (idempotent) ──────────────────────────────────────────────
|
||||
psql_exec_alpha >/dev/null <<SQL
|
||||
DELETE FROM activity_logs WHERE workspace_id = '$ALPHA_PARENT_ID';
|
||||
SQL
|
||||
psql_exec_beta >/dev/null <<SQL
|
||||
DELETE FROM activity_logs WHERE workspace_id = '$BETA_PARENT_ID';
|
||||
SQL
|
||||
|
||||
# ─── Phase A: seed alpha (3 rows) ──────────────────────────────────────
|
||||
echo "[replay] A. seeding alpha tenant: 3 a2a_receive rows for alpha-parent ←alpha-child"
|
||||
psql_exec_alpha >/dev/null <<SQL
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, source_id, target_id, method, summary, created_at)
|
||||
VALUES
|
||||
('$ALPHA_PARENT_ID', 'a2a_receive', '$ALPHA_CHILD_ID', '$ALPHA_PARENT_ID', 'message/send', 'alpha-msg-1', NOW() - INTERVAL '3 hours'),
|
||||
('$ALPHA_PARENT_ID', 'a2a_receive', '$ALPHA_CHILD_ID', '$ALPHA_PARENT_ID', 'message/send', 'alpha-msg-2', NOW() - INTERVAL '2 hours'),
|
||||
('$ALPHA_PARENT_ID', 'a2a_receive', '$ALPHA_CHILD_ID', '$ALPHA_PARENT_ID', 'message/send', 'alpha-msg-3', NOW() - INTERVAL '1 hour');
|
||||
SQL
|
||||
|
||||
# ─── Phase B: seed beta (5 rows — distinct count) ──────────────────────
|
||||
echo "[replay] B. seeding beta tenant: 5 a2a_receive rows for beta-parent ← beta-child"
|
||||
psql_exec_beta >/dev/null <<SQL
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, source_id, target_id, method, summary, created_at)
|
||||
VALUES
|
||||
('$BETA_PARENT_ID', 'a2a_receive', '$BETA_CHILD_ID', '$BETA_PARENT_ID', 'message/send', 'beta-msg-1', NOW() - INTERVAL '5 hours'),
|
||||
('$BETA_PARENT_ID', 'a2a_receive', '$BETA_CHILD_ID', '$BETA_PARENT_ID', 'message/send', 'beta-msg-2', NOW() - INTERVAL '4 hours'),
|
||||
('$BETA_PARENT_ID', 'a2a_receive', '$BETA_CHILD_ID', '$BETA_PARENT_ID', 'message/send', 'beta-msg-3', NOW() - INTERVAL '3 hours'),
|
||||
('$BETA_PARENT_ID', 'a2a_receive', '$BETA_CHILD_ID', '$BETA_PARENT_ID', 'message/send', 'beta-msg-4', NOW() - INTERVAL '2 hours'),
|
||||
('$BETA_PARENT_ID', 'a2a_receive', '$BETA_CHILD_ID', '$BETA_PARENT_ID', 'message/send', 'beta-msg-5', NOW() - INTERVAL '1 hour');
|
||||
SQL
|
||||
|
||||
# ─── Phase C: alpha tenant sees only its 3 rows ────────────────────────
|
||||
echo ""
|
||||
echo "[replay] C. alpha history via /activity ..."
|
||||
ALPHA_RESP=$(curl_alpha_admin "$BASE/workspaces/$ALPHA_PARENT_ID/activity?type=a2a_receive&peer_id=$ALPHA_CHILD_ID&limit=20")
|
||||
assert "C1: alpha row count = 3" "3" "$(echo "$ALPHA_RESP" | jq 'length')"
|
||||
|
||||
# Every summary must start with "alpha-msg-" — beta leak would manifest
|
||||
# as a beta-msg-* string in this list.
|
||||
ALPHA_NON_ALPHA=$(echo "$ALPHA_RESP" | jq -r '[.[].summary | select(startswith("alpha-msg-") | not)] | length')
|
||||
assert "C2: zero non-alpha summaries leaked into alpha" "0" "$ALPHA_NON_ALPHA"
|
||||
|
||||
# ─── Phase D: beta tenant sees only its 5 rows ─────────────────────────
|
||||
echo ""
|
||||
echo "[replay] D. beta history via /activity ..."
|
||||
BETA_RESP=$(curl_beta_admin "$BASE/workspaces/$BETA_PARENT_ID/activity?type=a2a_receive&peer_id=$BETA_CHILD_ID&limit=20")
|
||||
assert "D1: beta row count = 5" "5" "$(echo "$BETA_RESP" | jq 'length')"
|
||||
|
||||
BETA_NON_BETA=$(echo "$BETA_RESP" | jq -r '[.[].summary | select(startswith("beta-msg-") | not)] | length')
|
||||
assert "D2: zero non-beta summaries leaked into beta" "0" "$BETA_NON_BETA"
|
||||
|
||||
# ─── Phase E: direct DB-side sanity ────────────────────────────────────
|
||||
echo ""
|
||||
echo "[replay] E. direct DB-side counts ..."
|
||||
ALPHA_DB=$(psql_exec_alpha -c "SELECT COUNT(*) FROM activity_logs WHERE workspace_id = '$ALPHA_PARENT_ID';")
|
||||
BETA_DB=$(psql_exec_beta -c "SELECT COUNT(*) FROM activity_logs WHERE workspace_id = '$BETA_PARENT_ID';")
|
||||
assert "E1: postgres-alpha has exactly 3 alpha rows" "3" "$ALPHA_DB"
|
||||
assert "E2: postgres-beta has exactly 5 beta rows" "5" "$BETA_DB"
|
||||
|
||||
# Cross-DB sanity: alpha PG has zero beta-named workspaces, vice versa.
|
||||
ALPHA_HAS_BETA=$(psql_exec_alpha -c "SELECT COUNT(*) FROM workspaces WHERE name LIKE 'beta-%';")
|
||||
BETA_HAS_ALPHA=$(psql_exec_beta -c "SELECT COUNT(*) FROM workspaces WHERE name LIKE 'alpha-%';")
|
||||
assert "E3: postgres-alpha has zero beta-named workspaces" "0" "$ALPHA_HAS_BETA"
|
||||
assert "E4: postgres-beta has zero alpha-named workspaces" "0" "$BETA_HAS_ALPHA"
|
||||
|
||||
# ─── Phase F: concurrent INSERT race ───────────────────────────────────
|
||||
# Both tenants insert 10 rows concurrently. Race shape catches the
|
||||
# failure modes that CAN cross tenants in this topology:
|
||||
# - redis cross-keyspace bleed (shared redis container).
|
||||
# - shared-cp-stub state corruption (single Go process serves both).
|
||||
# - cf-proxy buffer mixup under simultaneous in-flight writes.
|
||||
# Does NOT catch lib/pq prepared-statement cache collision or shared
|
||||
# *sql.DB pool poisoning — each tenant has its own DATABASE_URL and
|
||||
# its own postgres-{alpha,beta} container, so there is no shared pool
|
||||
# to corrupt. A future replay variant on a single shared Postgres
|
||||
# would be the right place to assert that failure mode.
|
||||
# Each side must end with EXACTLY +10 rows from its own writes.
|
||||
echo ""
|
||||
echo "[replay] F. concurrent insert race — 10 rows per tenant in parallel"
|
||||
|
||||
(
|
||||
for i in $(seq 1 10); do
|
||||
psql_exec_alpha >/dev/null <<SQL
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, source_id, target_id, method, summary)
|
||||
VALUES ('$ALPHA_PARENT_ID', 'a2a_receive', '$ALPHA_CHILD_ID', '$ALPHA_PARENT_ID', 'message/send', 'alpha-race-$i');
|
||||
SQL
|
||||
done
|
||||
) &
|
||||
ALPHA_PID=$!
|
||||
|
||||
(
|
||||
for i in $(seq 1 10); do
|
||||
psql_exec_beta >/dev/null <<SQL
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, source_id, target_id, method, summary)
|
||||
VALUES ('$BETA_PARENT_ID', 'a2a_receive', '$BETA_CHILD_ID', '$BETA_PARENT_ID', 'message/send', 'beta-race-$i');
|
||||
SQL
|
||||
done
|
||||
) &
|
||||
BETA_PID=$!
|
||||
|
||||
wait $ALPHA_PID $BETA_PID
|
||||
|
||||
ALPHA_AFTER=$(psql_exec_alpha -c "SELECT COUNT(*) FROM activity_logs WHERE workspace_id = '$ALPHA_PARENT_ID';")
|
||||
BETA_AFTER=$(psql_exec_beta -c "SELECT COUNT(*) FROM activity_logs WHERE workspace_id = '$BETA_PARENT_ID';")
|
||||
assert "F1: alpha has 13 rows after race (3 + 10)" "13" "$ALPHA_AFTER"
|
||||
assert "F2: beta has 15 rows after race (5 + 10)" "15" "$BETA_AFTER"
|
||||
|
||||
# Concurrency leak check: alpha's "race" rows must all be alpha-race-*,
|
||||
# beta's must all be beta-race-*. A pool/cache cross-bleed would surface
|
||||
# as some tenant getting the other's writes.
|
||||
ALPHA_RACE_NAMES=$(psql_exec_alpha -c "SELECT COUNT(*) FROM activity_logs WHERE workspace_id = '$ALPHA_PARENT_ID' AND summary LIKE 'beta-race-%';")
|
||||
BETA_RACE_NAMES=$(psql_exec_beta -c "SELECT COUNT(*) FROM activity_logs WHERE workspace_id = '$BETA_PARENT_ID' AND summary LIKE 'alpha-race-%';")
|
||||
assert "F3: zero beta-race rows leaked into alpha PG" "0" "$ALPHA_RACE_NAMES"
|
||||
assert "F4: zero alpha-race rows leaked into beta PG" "0" "$BETA_RACE_NAMES"
|
||||
|
||||
# ─── Cleanup ───────────────────────────────────────────────────────────
|
||||
psql_exec_alpha >/dev/null <<SQL
|
||||
DELETE FROM activity_logs WHERE workspace_id = '$ALPHA_PARENT_ID';
|
||||
SQL
|
||||
psql_exec_beta >/dev/null <<SQL
|
||||
DELETE FROM activity_logs WHERE workspace_id = '$BETA_PARENT_ID';
|
||||
SQL
|
||||
|
||||
echo ""
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
echo "[replay] FAIL: $PASS pass, $FAIL fail"
|
||||
exit 1
|
||||
fi
|
||||
echo "[replay] PASS: $PASS/$PASS — per-tenant independence holds (DB partition + concurrent race)"
|
||||
Executable
+186
@@ -0,0 +1,186 @@
|
||||
#!/usr/bin/env bash
|
||||
# Replay for cross-tenant isolation — TenantGuard middleware MUST 404
|
||||
# any request whose X-Molecule-Org-Id (or Fly-Replay state, or
|
||||
# same-origin Canvas trust) doesn't match the tenant container's
|
||||
# configured MOLECULE_ORG_ID.
|
||||
#
|
||||
# Why this matters in production:
|
||||
# - One Cloudflare tunnel front-doors every tenant subdomain.
|
||||
# - DNS/routing layer can mis-direct a request (CF cache poisoning,
|
||||
# misconfigured CNAME, internal traffic mirror).
|
||||
# - TenantGuard is the last-line defense — it 404s any request whose
|
||||
# declared org doesn't match what the tenant binary was provisioned
|
||||
# with. Returning 404 (not 403) is intentional: the existence of a
|
||||
# tenant on this machine must not be probable by an outsider.
|
||||
#
|
||||
# What this replay catches:
|
||||
# - A regression where TenantGuard accidentally allows requests with
|
||||
# a different org id (e.g. someone removes the strict equality check).
|
||||
# - cf-proxy routing-by-Host bug that sends alpha's request to beta's
|
||||
# container (the negative test would suddenly succeed).
|
||||
# - Allowlist drift — if /workspaces is added to tenantGuardAllowlist
|
||||
# it would silently be cross-tenant readable.
|
||||
#
|
||||
# Phases:
|
||||
# A. Positive controls — each tenant accepts its own valid creds.
|
||||
# B. Org-header mismatch — alpha-org header at beta's URL → 404.
|
||||
# C. Reverse — beta-org header at alpha's URL → 404.
|
||||
# D. Right URL, wrong org header (typo) → 404.
|
||||
# E. Bearer present but no org header → 404 (TenantGuard rejects).
|
||||
# F. Per-tenant DB isolation — alpha's /workspaces enumerates only
|
||||
# alpha workspaces; beta's only beta. Confirms cf-proxy + TenantGuard
|
||||
# really did partition the request to the right backing DB.
|
||||
# G. Allowlisted /health stays public on both tenants (sanity check —
|
||||
# a regression that put /health behind the guard would 404 too).
|
||||
|
||||
set -euo pipefail
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
HARNESS_ROOT="$(dirname "$HERE")"
|
||||
cd "$HARNESS_ROOT"
|
||||
|
||||
if [ ! -f .seed.env ]; then
|
||||
echo "[replay] no .seed.env — running ./seed.sh first..."
|
||||
./seed.sh
|
||||
fi
|
||||
# shellcheck source=/dev/null
|
||||
source .seed.env
|
||||
# shellcheck source=../_curl.sh
|
||||
source "$HARNESS_ROOT/_curl.sh"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_status() {
|
||||
local desc="$1" expected="$2" actual="$3"
|
||||
if [ "$expected" = "$actual" ]; then
|
||||
printf " PASS %s (HTTP %s)\n" "$desc" "$actual"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
printf " FAIL %s\n expected HTTP %s, got HTTP %s\n" "$desc" "$expected" "$actual" >&2
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# Plain equality check — for non-HTTP values (counts, names, etc.).
|
||||
# Distinct from assert_status so output reads naturally instead of
|
||||
# claiming "(HTTP 0)" for what is really a count.
|
||||
assert() {
|
||||
local desc="$1" expected="$2" actual="$3"
|
||||
if [ "$expected" = "$actual" ]; then
|
||||
printf " PASS %s\n" "$desc"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
printf " FAIL %s\n expected: %s\n got : %s\n" "$desc" "$expected" "$actual" >&2
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# ─── Phase A: positive controls ────────────────────────────────────────
|
||||
echo "[replay] A. positive controls — each tenant accepts its own valid creds"
|
||||
|
||||
ALPHA_OWN=$(curl_alpha_admin -o /dev/null -w '%{http_code}' "$BASE/workspaces")
|
||||
assert_status "A1: alpha creds at alpha returns 200" "200" "$ALPHA_OWN"
|
||||
|
||||
BETA_OWN=$(curl_beta_admin -o /dev/null -w '%{http_code}' "$BASE/workspaces")
|
||||
assert_status "A2: beta creds at beta returns 200" "200" "$BETA_OWN"
|
||||
|
||||
# ─── Phase B: alpha creds at beta's URL → 404 ──────────────────────────
|
||||
echo ""
|
||||
echo "[replay] B. alpha-org header at beta's URL — TenantGuard must 404"
|
||||
|
||||
CROSS_AB=$(curl_alpha_creds_at_beta -o /tmp/iso-ab.json -w '%{http_code}' "$BASE/workspaces")
|
||||
assert_status "B1: alpha-org header at beta URL → 404" "404" "$CROSS_AB"
|
||||
|
||||
# Body must be a generic 404 — never reveal that beta exists or that
|
||||
# the org check fired (TenantGuard is intentionally indistinguishable
|
||||
# from "no such route" to an outside scanner).
|
||||
B_BODY=$(cat /tmp/iso-ab.json)
|
||||
if echo "$B_BODY" | grep -qiE "tenant|org|forbidden|denied"; then
|
||||
printf " FAIL B2: 404 body leaks tenant/org/auth keywords (info disclosure)\n body: %s\n" "$B_BODY" >&2
|
||||
FAIL=$((FAIL + 1))
|
||||
else
|
||||
printf " PASS B2: 404 body has no tenant/org leak\n"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
|
||||
# ─── Phase C: beta creds at alpha's URL → 404 ──────────────────────────
|
||||
echo ""
|
||||
echo "[replay] C. beta-org header at alpha's URL — TenantGuard must 404"
|
||||
|
||||
CROSS_BA=$(curl_beta_creds_at_alpha -o /tmp/iso-ba.json -w '%{http_code}' "$BASE/workspaces")
|
||||
assert_status "C1: beta-org header at alpha URL → 404" "404" "$CROSS_BA"
|
||||
|
||||
# ─── Phase D: right URL, garbage org header ────────────────────────────
|
||||
echo ""
|
||||
echo "[replay] D. right URL, garbage org header → 404"
|
||||
|
||||
GARBAGE=$(curl -sS -o /dev/null -w '%{http_code}' \
|
||||
-H "Host: ${ALPHA_HOST}" \
|
||||
-H "Authorization: Bearer ${ALPHA_ADMIN_TOKEN}" \
|
||||
-H "X-Molecule-Org-Id: not-the-right-org" \
|
||||
"$BASE/workspaces")
|
||||
assert_status "D1: garbage org id at alpha URL → 404" "404" "$GARBAGE"
|
||||
|
||||
# ─── Phase E: bearer present but no org header at all → 404 ────────────
|
||||
echo ""
|
||||
echo "[replay] E. valid bearer but missing X-Molecule-Org-Id → 404"
|
||||
|
||||
NO_ORG=$(curl -sS -o /dev/null -w '%{http_code}' \
|
||||
-H "Host: ${ALPHA_HOST}" \
|
||||
-H "Authorization: Bearer ${ALPHA_ADMIN_TOKEN}" \
|
||||
"$BASE/workspaces")
|
||||
assert_status "E1: missing X-Molecule-Org-Id → 404" "404" "$NO_ORG"
|
||||
|
||||
# ─── Phase F: per-tenant DB isolation via list_workspaces ──────────────
|
||||
echo ""
|
||||
echo "[replay] F. per-tenant DB isolation via /workspaces listing"
|
||||
|
||||
ALPHA_LIST=$(curl_alpha_admin "$BASE/workspaces")
|
||||
ALPHA_NAMES=$(echo "$ALPHA_LIST" | jq -r '.[].name' | sort | tr '\n' ',' | sed 's/,$//')
|
||||
echo "[replay] alpha tenant sees: $ALPHA_NAMES"
|
||||
|
||||
if [ "$ALPHA_NAMES" = "alpha-child,alpha-parent" ]; then
|
||||
printf " PASS F1: alpha enumerates only alpha workspaces\n"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
printf " FAIL F1: alpha enumerated unexpected workspaces\n expected: alpha-child,alpha-parent\n got : %s\n" "$ALPHA_NAMES" >&2
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
|
||||
BETA_LIST=$(curl_beta_admin "$BASE/workspaces")
|
||||
BETA_NAMES=$(echo "$BETA_LIST" | jq -r '.[].name' | sort | tr '\n' ',' | sed 's/,$//')
|
||||
echo "[replay] beta tenant sees: $BETA_NAMES"
|
||||
|
||||
if [ "$BETA_NAMES" = "beta-child,beta-parent" ]; then
|
||||
printf " PASS F2: beta enumerates only beta workspaces\n"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
printf " FAIL F2: beta enumerated unexpected workspaces\n expected: beta-child,beta-parent\n got : %s\n" "$BETA_NAMES" >&2
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
|
||||
# Cross-check: neither tenant's list contains the other's workspace ids.
|
||||
LEAKED_INTO_ALPHA=$(echo "$ALPHA_LIST" | jq -r --arg b1 "$BETA_PARENT_ID" --arg b2 "$BETA_CHILD_ID" \
|
||||
'[.[] | select(.id == $b1 or .id == $b2)] | length')
|
||||
assert "F3: alpha list contains zero beta workspace ids" "0" "$LEAKED_INTO_ALPHA"
|
||||
|
||||
LEAKED_INTO_BETA=$(echo "$BETA_LIST" | jq -r --arg a1 "$ALPHA_PARENT_ID" --arg a2 "$ALPHA_CHILD_ID" \
|
||||
'[.[] | select(.id == $a1 or .id == $a2)] | length')
|
||||
assert "F4: beta list contains zero alpha workspace ids" "0" "$LEAKED_INTO_BETA"
|
||||
|
||||
# ─── Phase G: /health is allowlisted (sanity) ──────────────────────────
|
||||
echo ""
|
||||
echo "[replay] G. /health stays public on both tenants (TenantGuard allowlist sanity)"
|
||||
|
||||
ALPHA_HEALTH=$(curl -sS -o /dev/null -w '%{http_code}' -H "Host: ${ALPHA_HOST}" "$BASE/health")
|
||||
assert_status "G1: alpha /health public → 200" "200" "$ALPHA_HEALTH"
|
||||
|
||||
BETA_HEALTH=$(curl -sS -o /dev/null -w '%{http_code}' -H "Host: ${BETA_HOST}" "$BASE/health")
|
||||
assert_status "G2: beta /health public → 200" "200" "$BETA_HEALTH"
|
||||
|
||||
echo ""
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
echo "[replay] FAIL: $PASS pass, $FAIL fail"
|
||||
exit 1
|
||||
fi
|
||||
echo "[replay] PASS: $PASS/$PASS — TenantGuard isolation + per-tenant DB partitioning hold"
|
||||
@@ -0,0 +1,20 @@
|
||||
# Harness-replay Python deps — minimal set for replays/*.sh scripts that
|
||||
# eval Python against the running tenant (e.g. importing
|
||||
# workspace/a2a_client.py to assert parser behavior).
|
||||
#
|
||||
# This is intentionally smaller than workspace/requirements.txt: the
|
||||
# replays don't need a2a-sdk, langchain, opentelemetry, etc. — only the
|
||||
# HTTP client surface that the imported helpers depend on. Adding the
|
||||
# full workspace deps would slow every harness CI run by ~30s for no
|
||||
# gain.
|
||||
#
|
||||
# Add a line here (with a version constraint matching workspace/requirements.txt)
|
||||
# when a new replay introduces a new Python import.
|
||||
|
||||
httpx>=0.28.1
|
||||
|
||||
# channel-envelope-trust-boundary.sh imports from `molecule_runtime.*` (the
|
||||
# wheel-rewritten path) so it catches the failure mode where the wheel
|
||||
# build silently strips a fix that unit tests on local source still pass.
|
||||
# >= 0.1.78 ships PR #2481's peer_id trust-boundary guard.
|
||||
molecule-ai-workspace-runtime>=0.1.78
|
||||
Executable
+90
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env bash
|
||||
# Run every replay under tests/harness/replays/ against a fresh harness.
|
||||
#
|
||||
# Boots the harness (up.sh + seed.sh), runs each `replays/*.sh` in
|
||||
# alphabetical order, tracks pass/fail, and tears down on exit. Returns
|
||||
# non-zero if any replay failed.
|
||||
#
|
||||
# Usage:
|
||||
# ./run-all-replays.sh # boot, run, teardown
|
||||
# KEEP_UP=1 ./run-all-replays.sh # leave harness running on exit (debug)
|
||||
# REBUILD=1 ./run-all-replays.sh # rebuild images before booting
|
||||
#
|
||||
# CI usage: invoke without flags. The trap-on-EXIT teardown ensures we
|
||||
# don't leak Docker resources when a replay fails partway through.
|
||||
|
||||
set -euo pipefail
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$HERE"
|
||||
|
||||
REPLAYS_DIR="$HERE/replays"
|
||||
if [ ! -d "$REPLAYS_DIR" ]; then
|
||||
echo "[run-all] no replays/ directory at $REPLAYS_DIR — nothing to run"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
shopt -s nullglob
|
||||
REPLAYS=("$REPLAYS_DIR"/*.sh)
|
||||
shopt -u nullglob
|
||||
if [ ${#REPLAYS[@]} -eq 0 ]; then
|
||||
echo "[run-all] replays/ is empty — nothing to run"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cleanup() {
|
||||
local exit_code=$?
|
||||
if [ "${KEEP_UP:-0}" = "1" ]; then
|
||||
echo ""
|
||||
echo "[run-all] KEEP_UP=1 — leaving harness up. Tear down manually with ./down.sh"
|
||||
else
|
||||
echo ""
|
||||
echo "[run-all] tearing down harness..."
|
||||
./down.sh >/dev/null 2>&1 || echo "[run-all] WARN: ./down.sh exited non-zero"
|
||||
fi
|
||||
exit "$exit_code"
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
echo "[run-all] booting harness..."
|
||||
if [ "${REBUILD:-0}" = "1" ]; then
|
||||
./up.sh --rebuild
|
||||
else
|
||||
./up.sh
|
||||
fi
|
||||
|
||||
echo "[run-all] seeding workspaces..."
|
||||
./seed.sh
|
||||
|
||||
PASS_COUNT=0
|
||||
FAIL_COUNT=0
|
||||
SKIP_COUNT=0
|
||||
FAILED_NAMES=()
|
||||
|
||||
for replay in "${REPLAYS[@]}"; do
|
||||
name=$(basename "$replay" .sh)
|
||||
echo ""
|
||||
echo "[run-all] ━━━ $name ━━━"
|
||||
if bash "$replay"; then
|
||||
# Replays signal "skip" by exiting 0 with a __SKIP__ marker in stdout —
|
||||
# but we capture that as a pass here since the script exited 0. The
|
||||
# skip is documented in the script's own output. CI uses pass/fail.
|
||||
PASS_COUNT=$((PASS_COUNT + 1))
|
||||
echo "[run-all] PASS: $name"
|
||||
else
|
||||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||
FAILED_NAMES+=("$name")
|
||||
echo "[run-all] FAIL: $name"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "[run-all] ============================="
|
||||
echo "[run-all] Replay summary: ${PASS_COUNT} passed, ${FAIL_COUNT} failed (of ${#REPLAYS[@]} total)"
|
||||
if [ ${FAIL_COUNT} -gt 0 ]; then
|
||||
echo "[run-all] Failed:"
|
||||
for name in "${FAILED_NAMES[@]}"; do
|
||||
echo "[run-all] - $name"
|
||||
done
|
||||
exit 1
|
||||
fi
|
||||
echo "[run-all] All replays passed."
|
||||
Executable
+89
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env bash
|
||||
# Seed BOTH tenants with parent + child workspaces so peer-discovery
|
||||
# and cross-tenant replays have something to discover.
|
||||
#
|
||||
# Tenant alpha:
|
||||
# - alpha-parent (tier 0)
|
||||
# - alpha-child (tier 1, child of alpha-parent)
|
||||
# Tenant beta:
|
||||
# - beta-parent (tier 0)
|
||||
# - beta-child (tier 1, child of beta-parent)
|
||||
#
|
||||
# IDs are server-generated (POST /workspaces ignores body.id) — we
|
||||
# capture the returned id rather than minting client-side. Older
|
||||
# versions silently desynced from the workspaces table, breaking
|
||||
# FK-dependent replays.
|
||||
#
|
||||
# All four IDs persist to .seed.env so replays can target any of them.
|
||||
|
||||
set -euo pipefail
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$HERE"
|
||||
|
||||
# shellcheck source=_curl.sh
|
||||
source "$HERE/_curl.sh"
|
||||
|
||||
create_workspace() {
|
||||
local tenant="$1" name="$2" tier="$3" parent="${4:-}"
|
||||
local body
|
||||
if [ -n "$parent" ]; then
|
||||
body="{\"name\":\"$name\",\"tier\":$tier,\"parent_id\":\"$parent\",\"runtime\":\"langgraph\"}"
|
||||
else
|
||||
body="{\"name\":\"$name\",\"tier\":$tier,\"runtime\":\"langgraph\"}"
|
||||
fi
|
||||
local id
|
||||
if [ "$tenant" = "alpha" ]; then
|
||||
id=$(curl_alpha_admin -X POST "$BASE/workspaces" -d "$body" | jq -r '.id')
|
||||
else
|
||||
id=$(curl_beta_admin -X POST "$BASE/workspaces" -d "$body" | jq -r '.id')
|
||||
fi
|
||||
if [ -z "$id" ] || [ "$id" = "null" ]; then
|
||||
echo "[seed] FAIL: $tenant/$name workspace creation returned no id" >&2
|
||||
return 1
|
||||
fi
|
||||
echo "$id"
|
||||
}
|
||||
|
||||
echo "[seed] confirming both tenants reachable..."
|
||||
ALPHA_HEALTH=$(curl_alpha_anon "$BASE/health" || echo "")
|
||||
BETA_HEALTH=$(curl_beta_anon "$BASE/health" || echo "")
|
||||
if [ -z "$ALPHA_HEALTH" ] || [ -z "$BETA_HEALTH" ]; then
|
||||
echo "[seed] FAIL: tenant unreachable. alpha='$ALPHA_HEALTH' beta='$BETA_HEALTH'"
|
||||
echo " Did ./up.sh complete cleanly?"
|
||||
exit 1
|
||||
fi
|
||||
echo "[seed] alpha: $ALPHA_HEALTH"
|
||||
echo "[seed] beta : $BETA_HEALTH"
|
||||
|
||||
echo ""
|
||||
echo "[seed] tenant alpha — creating alpha-parent + alpha-child ..."
|
||||
ALPHA_PARENT_ID=$(create_workspace alpha alpha-parent 0)
|
||||
echo "[seed] alpha-parent id=$ALPHA_PARENT_ID"
|
||||
ALPHA_CHILD_ID=$(create_workspace alpha alpha-child 1 "$ALPHA_PARENT_ID")
|
||||
echo "[seed] alpha-child id=$ALPHA_CHILD_ID"
|
||||
|
||||
echo ""
|
||||
echo "[seed] tenant beta — creating beta-parent + beta-child ..."
|
||||
BETA_PARENT_ID=$(create_workspace beta beta-parent 0)
|
||||
echo "[seed] beta-parent id=$BETA_PARENT_ID"
|
||||
BETA_CHILD_ID=$(create_workspace beta beta-child 1 "$BETA_PARENT_ID")
|
||||
echo "[seed] beta-child id=$BETA_CHILD_ID"
|
||||
|
||||
# Stash IDs for replay scripts.
|
||||
#
|
||||
# Backwards-compat: ALPHA_ID + BETA_ID aliases keep pre-Phase-2 replays
|
||||
# working (they used these names for the alpha tenant's parent + child).
|
||||
{
|
||||
echo "ALPHA_PARENT_ID=$ALPHA_PARENT_ID"
|
||||
echo "ALPHA_CHILD_ID=$ALPHA_CHILD_ID"
|
||||
echo "BETA_PARENT_ID=$BETA_PARENT_ID"
|
||||
echo "BETA_CHILD_ID=$BETA_CHILD_ID"
|
||||
echo "# legacy aliases — pre-Phase-2 replays expect these names"
|
||||
echo "ALPHA_ID=$ALPHA_PARENT_ID"
|
||||
echo "BETA_ID=$ALPHA_CHILD_ID"
|
||||
} > "$HERE/.seed.env"
|
||||
|
||||
echo ""
|
||||
echo "[seed] done. IDs persisted to tests/harness/.seed.env"
|
||||
echo "[seed] alpha: parent=$ALPHA_PARENT_ID child=$ALPHA_CHILD_ID"
|
||||
echo "[seed] beta : parent=$BETA_PARENT_ID child=$BETA_CHILD_ID"
|
||||
Executable
+59
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env bash
|
||||
# Bring the production-shape harness up.
|
||||
#
|
||||
# Usage: ./up.sh [--rebuild]
|
||||
#
|
||||
# Always operates in tests/harness/ regardless of where it's invoked
|
||||
# from — test scripts under tests/harness/replays/ source it via the
|
||||
# absolute path, so cd-ing first prevents compose-context surprises.
|
||||
|
||||
set -euo pipefail
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$HERE"
|
||||
|
||||
REBUILD=false
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--rebuild) REBUILD=true ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Generate a per-run encryption key. The tenant runs with
|
||||
# MOLECULE_ENV=production (intentional, to replay prod-shape bugs), and
|
||||
# crypto.InitStrict() refuses to boot without SECRETS_ENCRYPTION_KEY.
|
||||
# Generate fresh so:
|
||||
# - No key-shaped string lives in the repo (avoids muscle-memorying a
|
||||
# hardcoded value into other places + secret-scanner false positives).
|
||||
# - Each harness lifetime gets a unique key, mimicking prod's per-tenant
|
||||
# isolation. Persistence across runs isn't required — the harness DB
|
||||
# is wiped on every ./down.sh.
|
||||
# Honor a caller-supplied value if already exported (lets a debug session
|
||||
# pin a key for reproducibility).
|
||||
if [ -z "${SECRETS_ENCRYPTION_KEY:-}" ]; then
|
||||
SECRETS_ENCRYPTION_KEY=$(openssl rand -base64 32)
|
||||
export SECRETS_ENCRYPTION_KEY
|
||||
fi
|
||||
|
||||
if [ "$REBUILD" = true ]; then
|
||||
docker compose -f compose.yml build --no-cache tenant cp-stub
|
||||
fi
|
||||
|
||||
echo "[harness] starting redis + cp-stub + tenant-alpha + tenant-beta + cf-proxy ..."
|
||||
docker compose -f compose.yml up -d --wait
|
||||
|
||||
# Sudo-free reachability: cf-proxy/nginx routes by Host header to the
|
||||
# right tenant container (matches production CF tunnel: same URL,
|
||||
# different Host = different tenant). Replays target loopback :8080
|
||||
# with a per-tenant Host header. _curl.sh centralises the helper
|
||||
# functions (curl_alpha_admin, curl_beta_admin, etc.).
|
||||
echo ""
|
||||
echo "[harness] up. Multi-tenant topology:"
|
||||
echo " tenant-alpha: Host: harness-tenant-alpha.localhost"
|
||||
echo " tenant-beta: Host: harness-tenant-beta.localhost"
|
||||
echo " legacy alias: Host: harness-tenant.localhost → alpha"
|
||||
echo ""
|
||||
echo " Quick check (no /etc/hosts needed):"
|
||||
echo " curl -H 'Host: harness-tenant-alpha.localhost' http://localhost:8080/health"
|
||||
echo " curl -H 'Host: harness-tenant-beta.localhost' http://localhost:8080/health"
|
||||
echo ""
|
||||
echo "Next: ./seed.sh # register parent+child workspaces in BOTH tenants"
|
||||
@@ -16,7 +16,11 @@ RUN echo 'replace github.com/Molecule-AI/molecule-ai-plugin-github-app-auth => /
|
||||
RUN sed -i 's|replace github.com/Molecule-AI/molecule-monorepo/platform => .*|replace github.com/Molecule-AI/molecule-monorepo/platform => /app|' /plugin/go.mod
|
||||
RUN go mod download
|
||||
COPY workspace-server/ .
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /platform ./cmd/server
|
||||
# GIT_SHA mirror of Dockerfile.tenant — see that file for the rationale.
|
||||
ARG GIT_SHA=dev
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build \
|
||||
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
|
||||
-o /platform ./cmd/server
|
||||
|
||||
# Clone templates + plugins at build time from manifest.json
|
||||
FROM alpine:3.20 AS templates
|
||||
|
||||
@@ -21,7 +21,19 @@ COPY workspace-server/go.mod workspace-server/go.sum ./
|
||||
RUN echo 'replace github.com/Molecule-AI/molecule-ai-plugin-github-app-auth => /plugin' >> go.mod
|
||||
RUN go mod download
|
||||
COPY workspace-server/ .
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /platform ./cmd/server
|
||||
|
||||
# GIT_SHA is baked into the binary via -ldflags so /buildinfo can return
|
||||
# it at runtime. CI passes ${{ github.sha }}; local builds default to
|
||||
# "dev" so an unset value never reads as a real SHA.
|
||||
#
|
||||
# Why this matters: the redeploy verification step compares each tenant's
|
||||
# /buildinfo against the SHA the workflow expects. If GIT_SHA isn't
|
||||
# threaded through here, every tenant returns "dev" and the verification
|
||||
# fails closed — which is the correct fail-direction (#2395 root fix).
|
||||
ARG GIT_SHA=dev
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build \
|
||||
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
|
||||
-o /platform ./cmd/server
|
||||
|
||||
# ── Stage 2: Canvas Next.js standalone ────────────────────────────────
|
||||
FROM node:20-alpine AS canvas-builder
|
||||
|
||||
@@ -223,13 +223,24 @@ func main() {
|
||||
registry.StartLivenessMonitor(c, onWorkspaceOffline)
|
||||
})
|
||||
|
||||
// Proactive container health sweep — detects dead containers faster than Redis TTL.
|
||||
// Checks all "online" workspaces against Docker every 15 seconds.
|
||||
if prov != nil {
|
||||
go supervised.RunWithRecover(ctx, "health-sweep", func(c context.Context) {
|
||||
registry.StartHealthSweep(c, prov, 15*time.Second, onWorkspaceOffline)
|
||||
})
|
||||
}
|
||||
// Proactive health sweep — two passes per tick:
|
||||
// 1. Docker-side: checks "online" workspaces against the local Docker
|
||||
// daemon (only runs when prov is non-nil, i.e. self-hosted mode).
|
||||
// 2. Remote-side: scans runtime='external' rows whose last_heartbeat_at
|
||||
// is past REMOTE_LIVENESS_STALE_AFTER and flips them to
|
||||
// awaiting_agent. Runs regardless of provisioner mode — SaaS
|
||||
// tenants need this even though they don't run Docker locally,
|
||||
// because external-runtime workspaces are operator-managed and
|
||||
// the platform-side liveness sweep is the only thing that
|
||||
// transitions them off 'online' when the operator's CLI dies.
|
||||
//
|
||||
// Pre-2026-04-30 this goroutine was gated on prov != nil, which silently
|
||||
// disabled the remote-side sweep on every SaaS tenant. The function in
|
||||
// healthsweep.go has always handled nil checker correctly; only the
|
||||
// orchestration was wrong. See #2392's CI failure for the trace.
|
||||
go supervised.RunWithRecover(ctx, "health-sweep", func(c context.Context) {
|
||||
registry.StartHealthSweep(c, prov, 15*time.Second, onWorkspaceOffline)
|
||||
})
|
||||
|
||||
// Orphan-container reconcile sweep — finds running containers
|
||||
// whose workspace row is already status='removed' and stops
|
||||
@@ -249,7 +260,13 @@ func main() {
|
||||
// and the state is incoherent (e.g. user sees "Retry" after 15min but
|
||||
// backend still thinks provisioning is in progress).
|
||||
go supervised.RunWithRecover(ctx, "provision-timeout-sweep", func(c context.Context) {
|
||||
registry.StartProvisioningTimeoutSweep(c, broadcaster, registry.DefaultProvisionSweepInterval)
|
||||
// Pass the handler's per-runtime template-manifest lookup so the
|
||||
// sweeper honours `runtime_config.provision_timeout_seconds`
|
||||
// declared in any template's config.yaml — the same value the
|
||||
// canvas already reads via addProvisionTimeoutMs. Without this
|
||||
// the sweeper killed claude-code at the 10-min hardcoded floor
|
||||
// regardless of the manifest. See registry.RuntimeTimeoutLookup.
|
||||
registry.StartProvisioningTimeoutSweep(c, broadcaster, registry.DefaultProvisionSweepInterval, wh.ProvisionTimeoutSecondsForRuntime)
|
||||
})
|
||||
|
||||
// Cron Scheduler — fires A2A messages to workspaces on user-defined schedules
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
// Package buildinfo exposes the git SHA the binary was built from.
|
||||
//
|
||||
// Set at link time:
|
||||
//
|
||||
// go build -ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=<sha>"
|
||||
//
|
||||
// CI passes ${{ github.sha }} via Dockerfile.tenant ARG GIT_SHA; local
|
||||
// dev builds default to "dev" so unset never reads as success.
|
||||
//
|
||||
// Why this package exists: redeploy-fleet (CP) returns ssm_status=Success
|
||||
// when the SSM RPC didn't error — that's "the deploy command ran",
|
||||
// NOT "the new code is running on every tenant." Image-tag-as-tag
|
||||
// (`:latest`) caches in the local Docker daemon so `docker compose up -d`
|
||||
// without an explicit `docker pull` is a no-op when the tag hasn't been
|
||||
// invalidated. Both observed 2026-04-30: the user's tenant kept serving
|
||||
// pre-501a42d7 chat_files even after main published the lazy-heal fix
|
||||
// (#2395). Exposing GitSHA at /buildinfo lets the redeploy workflow
|
||||
// verify EVERY tenant is actually running the published SHA before
|
||||
// reporting success.
|
||||
package buildinfo
|
||||
|
||||
// GitSHA is overwritten at build time via -ldflags. Default catches
|
||||
// dev builds + any deploy that forgot to wire the build-arg through.
|
||||
// "dev" is intentional — comparing it to a real SHA always fails,
|
||||
// which is what we want for an unconfigured deploy.
|
||||
var GitSHA = "dev"
|
||||
@@ -0,0 +1,81 @@
|
||||
package buildinfo_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// TestGitSHA_DefaultDevSentinel pins the contract that an unset
|
||||
// GIT_SHA at build time reads as "dev", NOT as an empty string. The
|
||||
// redeploy verification step compares the deployed /buildinfo against
|
||||
// the workflow's expected SHA — if GitSHA were "" by default, a
|
||||
// misconfigured deploy would round-trip "" successfully if the
|
||||
// expected SHA were also somehow ""; "dev" guarantees the comparison
|
||||
// always fails closed for an unset deploy.
|
||||
//
|
||||
// Linker tests can't directly exercise -ldflags injection from inside
|
||||
// `go test`, but they can pin the default the linker overrides.
|
||||
func TestGitSHA_DefaultDevSentinel(t *testing.T) {
|
||||
if buildinfo.GitSHA != "dev" {
|
||||
t.Errorf("GitSHA default = %q, want %q (CI ldflags override expected to set this; tests run without ldflags so this should be the dev sentinel)", buildinfo.GitSHA, "dev")
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuildInfoEndpoint_ReturnsGitSHA pins the wire shape of the
|
||||
// /buildinfo response. The redeploy verification step reads
|
||||
// `.git_sha` from this JSON; renaming the field would silently break
|
||||
// every tenant verification (the jq lookup would return null + the
|
||||
// step would interpret it as "tenant unreachable" and fail closed,
|
||||
// which is correct but noisy).
|
||||
//
|
||||
// Test routes the handler against an httptest server rather than
|
||||
// constructing a router.Setup() — that constructor takes a Hub +
|
||||
// Broadcaster + Provisioner + WorkspaceHandler + ChannelMgr, and
|
||||
// /buildinfo doesn't depend on any of them. Using a minimal gin
|
||||
// engine here keeps the test fast and isolated to the contract under
|
||||
// test.
|
||||
func TestBuildInfoEndpoint_ReturnsGitSHA(t *testing.T) {
|
||||
// Stash + restore so other tests that read GitSHA see a stable
|
||||
// value. The package-level var is mutable by design (-ldflags),
|
||||
// so test isolation requires explicit save/restore.
|
||||
prev := buildinfo.GitSHA
|
||||
t.Cleanup(func() { buildinfo.GitSHA = prev })
|
||||
buildinfo.GitSHA = "abc1234deadbeef"
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
r := gin.New()
|
||||
r.GET("/buildinfo", func(c *gin.Context) {
|
||||
c.JSON(200, gin.H{"git_sha": buildinfo.GitSHA})
|
||||
})
|
||||
|
||||
srv := httptest.NewServer(r)
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
resp, err := http.Get(srv.URL + "/buildinfo")
|
||||
if err != nil {
|
||||
t.Fatalf("GET /buildinfo: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = resp.Body.Close() })
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
t.Fatalf("status = %d, want 200", resp.StatusCode)
|
||||
}
|
||||
|
||||
var body map[string]string
|
||||
if err := json.NewDecoder(resp.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
|
||||
got, ok := body["git_sha"]
|
||||
if !ok {
|
||||
t.Fatalf("response missing git_sha field — would break the redeploy verification jq lookup. Body: %+v", body)
|
||||
}
|
||||
if got != "abc1234deadbeef" {
|
||||
t.Errorf("git_sha = %q, want %q", got, "abc1234deadbeef")
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
@@ -131,7 +132,8 @@ func buildBundleConfigFiles(b *Bundle) map[string][]byte {
|
||||
|
||||
func markFailed(ctx context.Context, wsID string, broadcaster *events.Broadcaster, err error) {
|
||||
db.DB.ExecContext(ctx,
|
||||
`UPDATE workspaces SET status = 'failed', updated_at = now() WHERE id = $1`, wsID)
|
||||
`UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2`,
|
||||
models.StatusFailed, wsID)
|
||||
broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_PROVISION_FAILED", wsID, map[string]interface{}{
|
||||
"error": err.Error(),
|
||||
})
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
package db_test
|
||||
|
||||
// Architecture test (#2344): db is a leaf — DB pool + migrations + raw
|
||||
// SQL helpers, no business-logic dependencies. The DB layer must be
|
||||
// testable with sqlmock in isolation. If db starts importing handlers
|
||||
// or provisioner, every db unit test would need to bring up that
|
||||
// subsystem, and the layering becomes circular.
|
||||
//
|
||||
// If this test fails: you put business logic in the db package. Move
|
||||
// it to a higher-tier package that imports db, not the reverse.
|
||||
|
||||
import (
|
||||
"go/parser"
|
||||
"go/token"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const moduleInternalPrefix = "github.com/Molecule-AI/molecule-monorepo/platform/internal/"
|
||||
|
||||
func TestDBHasNoInternalDependencies(t *testing.T) {
|
||||
t.Parallel()
|
||||
for path, file := range listImports(t, ".") {
|
||||
if strings.HasPrefix(path, moduleInternalPrefix) {
|
||||
t.Errorf(
|
||||
"db must not import other internal packages "+
|
||||
"(found %q in %s) — db is the foundation layer and a "+
|
||||
"reverse dep creates a cycle (everything imports db). "+
|
||||
"See workspace-server/internal/db/architecture_test.go.",
|
||||
path, file,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func listImports(t *testing.T, dir string) map[string]string {
|
||||
t.Helper()
|
||||
fset := token.NewFileSet()
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("read %s: %v", dir, err)
|
||||
}
|
||||
out := make(map[string]string)
|
||||
for _, e := range entries {
|
||||
name := e.Name()
|
||||
if e.IsDir() || !strings.HasSuffix(name, ".go") || strings.HasSuffix(name, "_test.go") {
|
||||
continue
|
||||
}
|
||||
f, err := parser.ParseFile(fset, filepath.Join(dir, name), nil, parser.ImportsOnly)
|
||||
if err != nil {
|
||||
t.Fatalf("parse %s: %v", name, err)
|
||||
}
|
||||
for _, imp := range f.Imports {
|
||||
path := strings.Trim(imp.Path.Value, "\"")
|
||||
if _, seen := out[path]; !seen {
|
||||
out[path] = name
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,360 @@
|
||||
package db_test
|
||||
|
||||
// Static drift gate: every value declared in models.AllWorkspaceStatuses
|
||||
// must exist in the workspace_status enum after every migration applies.
|
||||
//
|
||||
// Why this exists: the workspace_status enum (migration 043) initially
|
||||
// shipped without 'awaiting_agent' and 'hibernating' even though
|
||||
// application code already wrote both. Every UPDATE silently failed in
|
||||
// production for five days because:
|
||||
//
|
||||
// - Status values were ad-hoc string literals scattered across raw
|
||||
// SQL strings in 8+ files, with no compile-time check.
|
||||
// - sqlmock matched SQL by regex, not against the live enum.
|
||||
// - Errors were dropped or log-and-continued at every call site.
|
||||
//
|
||||
// The fix is layered. This gate is the static layer:
|
||||
//
|
||||
// - models.AllWorkspaceStatuses is the source of truth for the
|
||||
// codebase side. Every status write goes through one of those
|
||||
// typed constants (the parameterized-write refactor enforces this).
|
||||
// - The migrations are the source of truth for the DB side.
|
||||
// - This test parses both and asserts the codebase set ⊆ migration set.
|
||||
//
|
||||
// If you add a new status:
|
||||
//
|
||||
// 1. Add a `Status…` constant in models/workspace_status.go AND
|
||||
// append it to AllWorkspaceStatuses.
|
||||
// 2. Open a migration `ALTER TYPE workspace_status ADD VALUE 'X'`.
|
||||
// 3. This test confirms both happened in the same PR.
|
||||
//
|
||||
// If you intend to retire a status: keep it in the enum as long as any
|
||||
// row could legitimately still hold it, then drop it from
|
||||
// AllWorkspaceStatuses (the gate runs the inclusion in one direction
|
||||
// only — extras in the enum are fine).
|
||||
|
||||
import (
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/token"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestWorkspaceStatusEnum_NoLiteralDrift(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
repoRoot := findRepoRoot(t)
|
||||
migrationsDir := filepath.Join(repoRoot, "workspace-server", "migrations")
|
||||
statusFile := filepath.Join(repoRoot, "workspace-server", "internal", "models", "workspace_status.go")
|
||||
srcRoot := filepath.Join(repoRoot, "workspace-server")
|
||||
|
||||
enum := loadWorkspaceStatusEnum(t, migrationsDir)
|
||||
if len(enum) == 0 {
|
||||
t.Fatalf("could not parse workspace_status enum from %s — gate is non-functional", migrationsDir)
|
||||
}
|
||||
|
||||
codebase := loadAllWorkspaceStatuses(t, statusFile)
|
||||
if len(codebase) == 0 {
|
||||
t.Fatalf("could not parse models.AllWorkspaceStatuses from %s — gate is non-functional", statusFile)
|
||||
}
|
||||
|
||||
var rogue []string
|
||||
for lit := range codebase {
|
||||
if _, ok := enum[lit]; !ok {
|
||||
rogue = append(rogue, lit)
|
||||
}
|
||||
}
|
||||
if len(rogue) > 0 {
|
||||
sort.Strings(rogue)
|
||||
t.Errorf(
|
||||
"workspace status constants %v are declared in models.AllWorkspaceStatuses but not in the workspace_status enum.\n"+
|
||||
"Add a migration `ALTER TYPE workspace_status ADD VALUE 'X';` (see migration 046 for shape).\n"+
|
||||
"Enum currently: %v\nCodebase declares: %v",
|
||||
rogue, sortedKeys(enum), sortedKeys(codebase),
|
||||
)
|
||||
}
|
||||
|
||||
// Second axis: scan production .go files for hard-coded
|
||||
// `UPDATE workspaces SET status = '<literal>'`. Every status write must
|
||||
// flow through models.Status* constants — the typed-constants refactor
|
||||
// (PR #2396) made this enforceable. Without this scan, a future
|
||||
// site-update can silently re-introduce a literal that bypasses
|
||||
// AllWorkspaceStatuses + the migration gate above. The hard-coded site
|
||||
// in workspace_bootstrap.go:62 was missed in the initial sweep and
|
||||
// only caught by manual grep — this gate makes that automatic.
|
||||
if hits := findHardCodedStatusWrites(t, srcRoot); len(hits) > 0 {
|
||||
t.Errorf(
|
||||
"hard-coded `SET status = '<literal>'` found in production code — replace with a parameterized $N + models.Status* constant:\n %s",
|
||||
strings.Join(hits, "\n "),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// loadWorkspaceStatusEnum scans every *.up.sql file for either:
|
||||
//
|
||||
// CREATE TYPE workspace_status AS ENUM ('a', 'b', ...)
|
||||
// ALTER TYPE workspace_status ADD VALUE [IF NOT EXISTS] 'X' [BEFORE|AFTER 'Y']
|
||||
//
|
||||
// and returns the union of every value the enum will hold after all
|
||||
// migrations apply.
|
||||
func loadWorkspaceStatusEnum(t *testing.T, migrationsDir string) map[string]struct{} {
|
||||
t.Helper()
|
||||
|
||||
out := make(map[string]struct{})
|
||||
|
||||
files, err := filepath.Glob(filepath.Join(migrationsDir, "*.up.sql"))
|
||||
if err != nil {
|
||||
t.Fatalf("glob migrations: %v", err)
|
||||
}
|
||||
sort.Strings(files)
|
||||
|
||||
createRE := regexp.MustCompile(`(?is)CREATE\s+TYPE\s+workspace_status\s+AS\s+ENUM\s*\(([^)]+)\)`)
|
||||
addValueRE := regexp.MustCompile(`(?i)ALTER\s+TYPE\s+workspace_status\s+ADD\s+VALUE(?:\s+IF\s+NOT\s+EXISTS)?\s+'([^']+)'`)
|
||||
literalRE := regexp.MustCompile(`'([^']+)'`)
|
||||
|
||||
for _, f := range files {
|
||||
body, err := os.ReadFile(f)
|
||||
if err != nil {
|
||||
t.Fatalf("read %s: %v", f, err)
|
||||
}
|
||||
for _, m := range createRE.FindAllStringSubmatch(string(body), -1) {
|
||||
for _, lit := range literalRE.FindAllStringSubmatch(m[1], -1) {
|
||||
out[lit[1]] = struct{}{}
|
||||
}
|
||||
}
|
||||
for _, m := range addValueRE.FindAllStringSubmatch(string(body), -1) {
|
||||
out[m[1]] = struct{}{}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// loadAllWorkspaceStatuses parses workspace_status.go and extracts:
|
||||
//
|
||||
// - Every `Status… WorkspaceStatus = "..."` declaration in the const block.
|
||||
// - Every entry in the AllWorkspaceStatuses slice literal.
|
||||
//
|
||||
// The gate asserts the slice's set equals (or is a subset of) the const
|
||||
// block's set, so a new status added to the const block but forgotten
|
||||
// in AllWorkspaceStatuses surfaces here. AllWorkspaceStatuses is the
|
||||
// canonical "what the codebase expects the DB to accept" list — any
|
||||
// const not in the slice is unenforced by the gate.
|
||||
func loadAllWorkspaceStatuses(t *testing.T, statusFile string) map[string]struct{} {
|
||||
t.Helper()
|
||||
|
||||
fset := token.NewFileSet()
|
||||
f, err := parser.ParseFile(fset, statusFile, nil, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse %s: %v", statusFile, err)
|
||||
}
|
||||
|
||||
consts := make(map[string]string) // const name → string value
|
||||
var sliceEntries []string // identifiers used in AllWorkspaceStatuses
|
||||
allWorkspaceStatusesFound := false
|
||||
|
||||
ast.Inspect(f, func(n ast.Node) bool {
|
||||
switch decl := n.(type) {
|
||||
case *ast.GenDecl:
|
||||
if decl.Tok == token.CONST {
|
||||
for _, spec := range decl.Specs {
|
||||
vs, ok := spec.(*ast.ValueSpec)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for i, name := range vs.Names {
|
||||
if !strings.HasPrefix(name.Name, "Status") {
|
||||
continue
|
||||
}
|
||||
if i >= len(vs.Values) {
|
||||
continue
|
||||
}
|
||||
lit, ok := vs.Values[i].(*ast.BasicLit)
|
||||
if !ok || lit.Kind != token.STRING {
|
||||
continue
|
||||
}
|
||||
unquoted := strings.Trim(lit.Value, `"`)
|
||||
consts[name.Name] = unquoted
|
||||
}
|
||||
}
|
||||
}
|
||||
if decl.Tok == token.VAR {
|
||||
for _, spec := range decl.Specs {
|
||||
vs, ok := spec.(*ast.ValueSpec)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for i, name := range vs.Names {
|
||||
if name.Name != "AllWorkspaceStatuses" {
|
||||
continue
|
||||
}
|
||||
allWorkspaceStatusesFound = true
|
||||
if i >= len(vs.Values) {
|
||||
continue
|
||||
}
|
||||
composite, ok := vs.Values[i].(*ast.CompositeLit)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, elt := range composite.Elts {
|
||||
ident, ok := elt.(*ast.Ident)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
sliceEntries = append(sliceEntries, ident.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
if !allWorkspaceStatusesFound {
|
||||
t.Fatalf("AllWorkspaceStatuses not found in %s", statusFile)
|
||||
}
|
||||
|
||||
// Cross-check: every slice entry must resolve to a known const.
|
||||
out := make(map[string]struct{})
|
||||
for _, entry := range sliceEntries {
|
||||
v, ok := consts[entry]
|
||||
if !ok {
|
||||
t.Errorf("AllWorkspaceStatuses references undefined identifier %q in %s", entry, statusFile)
|
||||
continue
|
||||
}
|
||||
out[v] = struct{}{}
|
||||
}
|
||||
|
||||
// Cross-check: every const must be in the slice (otherwise the
|
||||
// gate runs against an outdated source-of-truth list).
|
||||
sliceSet := make(map[string]struct{}, len(sliceEntries))
|
||||
for _, e := range sliceEntries {
|
||||
sliceSet[e] = struct{}{}
|
||||
}
|
||||
for name := range consts {
|
||||
if _, ok := sliceSet[name]; !ok {
|
||||
t.Errorf(
|
||||
"const %q is declared but missing from AllWorkspaceStatuses in %s — "+
|
||||
"add it to the slice or the drift gate cannot enforce migration coverage for it",
|
||||
name, statusFile,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// findHardCodedStatusWrites walks workspace-server/ production .go files
|
||||
// (excluding *_test.go) and returns any string literal that contains a
|
||||
// `SET status = '<literal>'` write against the workspaces table. Uses Go
|
||||
// AST so quoted snippets in comments don't false-positive.
|
||||
func findHardCodedStatusWrites(t *testing.T, srcRoot string) []string {
|
||||
t.Helper()
|
||||
|
||||
// Match `SET status = '<lit>'` only in strings that also reference
|
||||
// the workspaces table — narrows out a2a_queue / agents / approvals
|
||||
// which have their own status enums.
|
||||
literalRE := regexp.MustCompile(`(?is)UPDATE\s+workspaces\b[^']*?SET\s+status\s*=\s*'([^']+)'`)
|
||||
|
||||
var hits []string
|
||||
walkErr := filepath.Walk(srcRoot, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
// Skip vendor + .git + migrations (literals there are intentional).
|
||||
base := filepath.Base(path)
|
||||
if base == "vendor" || base == ".git" || base == "migrations" {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if !strings.HasSuffix(path, ".go") || strings.HasSuffix(path, "_test.go") {
|
||||
return nil
|
||||
}
|
||||
|
||||
fset := token.NewFileSet()
|
||||
f, parseErr := parser.ParseFile(fset, path, nil, parser.ParseComments)
|
||||
if parseErr != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
ast.Inspect(f, func(n ast.Node) bool {
|
||||
lit, ok := n.(*ast.BasicLit)
|
||||
if !ok || lit.Kind != token.STRING {
|
||||
return true
|
||||
}
|
||||
s := lit.Value
|
||||
if !strings.Contains(s, "UPDATE workspaces") && !strings.Contains(s, "UPDATE\nworkspaces") && !strings.Contains(s, "UPDATE\n\t\t\tworkspaces") {
|
||||
return true
|
||||
}
|
||||
for _, m := range literalRE.FindAllStringSubmatch(s, -1) {
|
||||
pos := fset.Position(lit.Pos())
|
||||
rel, _ := filepath.Rel(srcRoot, path)
|
||||
hits = append(hits, rel+":"+itoa(pos.Line)+" → SET status = '"+m[1]+"'")
|
||||
}
|
||||
return true
|
||||
})
|
||||
return nil
|
||||
})
|
||||
if walkErr != nil {
|
||||
t.Fatalf("walk %s: %v", srcRoot, walkErr)
|
||||
}
|
||||
sort.Strings(hits)
|
||||
return hits
|
||||
}
|
||||
|
||||
func itoa(n int) string {
|
||||
if n == 0 {
|
||||
return "0"
|
||||
}
|
||||
neg := n < 0
|
||||
if neg {
|
||||
n = -n
|
||||
}
|
||||
var b [20]byte
|
||||
i := len(b)
|
||||
for n > 0 {
|
||||
i--
|
||||
b[i] = byte('0' + n%10)
|
||||
n /= 10
|
||||
}
|
||||
if neg {
|
||||
i--
|
||||
b[i] = '-'
|
||||
}
|
||||
return string(b[i:])
|
||||
}
|
||||
|
||||
func findRepoRoot(t *testing.T) string {
|
||||
t.Helper()
|
||||
dir, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
for i := 0; i < 8; i++ {
|
||||
if _, err := os.Stat(filepath.Join(dir, "workspace-server", "migrations")); err == nil {
|
||||
return dir
|
||||
}
|
||||
parent := filepath.Dir(dir)
|
||||
if parent == dir {
|
||||
break
|
||||
}
|
||||
dir = parent
|
||||
}
|
||||
t.Fatalf("could not locate repo root with workspace-server/migrations from %s", dir)
|
||||
return ""
|
||||
}
|
||||
|
||||
func sortedKeys(m map[string]struct{}) []string {
|
||||
out := make([]string, 0, len(m))
|
||||
for k := range m {
|
||||
out = append(out, k)
|
||||
}
|
||||
sort.Strings(out)
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,318 @@
|
||||
package handlers
|
||||
|
||||
// a2a_corpus_test.go — backward-compat replay gate for the A2A
|
||||
// JSON-RPC protocol surface. Every PR that touches
|
||||
// normalizeA2APayload OR bumps the a-2-a-sdk version pin runs
|
||||
// every shape in testdata/a2a_corpus/ through the current code
|
||||
// and asserts:
|
||||
//
|
||||
// valid/ — every shape MUST parse without error and produce a
|
||||
// canonical v0.3 payload (params.message.parts list).
|
||||
//
|
||||
// invalid/ — every shape MUST be rejected with the documented
|
||||
// status code and error substring. Pins the
|
||||
// rejection contract so a future PR doesn't silently
|
||||
// start accepting malformed payloads.
|
||||
//
|
||||
// Closes the gap that allowed the 2026-04-29 v0.2 → v0.3 silent-
|
||||
// drop bug (PR #2349). That bug shipped because the SDK bump PR
|
||||
// didn't replay v0.2-shaped inputs against the new code; the
|
||||
// shape-mismatch surfaced only in production when the receiver's
|
||||
// Pydantic validator silently rejected inbound messages.
|
||||
//
|
||||
// Adding to the corpus: see testdata/a2a_corpus/README.md.
|
||||
// Removing from valid/: breaking change, requires explicit
|
||||
// approval per the README.
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const (
|
||||
corpusValidDir = "testdata/a2a_corpus/valid"
|
||||
corpusInvalidDir = "testdata/a2a_corpus/invalid"
|
||||
)
|
||||
|
||||
// metadataFields are the documentation-only keys the corpus loader
|
||||
// strips before passing the payload to normalizeA2APayload. They
|
||||
// are required for every corpus entry per the README policy.
|
||||
var metadataFields = []string{
|
||||
"_comment",
|
||||
"_added",
|
||||
"_source",
|
||||
"_expect_error",
|
||||
"_expect_status",
|
||||
}
|
||||
|
||||
// loadCorpusEntry reads one JSON file, parses it as a generic map,
|
||||
// extracts the metadata fields (including expected error/status for
|
||||
// invalid entries), strips them from the payload, and returns the
|
||||
// stripped JSON bytes ready for normalizeA2APayload.
|
||||
func loadCorpusEntry(t *testing.T, path string) (payload []byte, expectErr string, expectStatus int) {
|
||||
t.Helper()
|
||||
raw, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("read %s: %v", path, err)
|
||||
}
|
||||
var doc map[string]interface{}
|
||||
if err := json.Unmarshal(raw, &doc); err != nil {
|
||||
t.Fatalf("parse %s as JSON: %v", path, err)
|
||||
}
|
||||
// Pull metadata before strip.
|
||||
if v, ok := doc["_expect_error"].(string); ok {
|
||||
expectErr = v
|
||||
}
|
||||
if v, ok := doc["_expect_status"].(float64); ok {
|
||||
expectStatus = int(v)
|
||||
}
|
||||
for _, f := range metadataFields {
|
||||
delete(doc, f)
|
||||
}
|
||||
payload, err = json.Marshal(doc)
|
||||
if err != nil {
|
||||
t.Fatalf("re-marshal %s after strip: %v", path, err)
|
||||
}
|
||||
return payload, expectErr, expectStatus
|
||||
}
|
||||
|
||||
// listCorpus enumerates every .json file under dir and returns
|
||||
// (filename → full path). Sorted for stable test ordering.
|
||||
func listCorpus(t *testing.T, dir string) map[string]string {
|
||||
t.Helper()
|
||||
out := map[string]string{}
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("read %s: %v", dir, err)
|
||||
}
|
||||
for _, e := range entries {
|
||||
if e.IsDir() || !strings.HasSuffix(e.Name(), ".json") {
|
||||
continue
|
||||
}
|
||||
out[e.Name()] = filepath.Join(dir, e.Name())
|
||||
}
|
||||
if len(out) == 0 {
|
||||
t.Fatalf("corpus dir %s is empty — at least one entry is required", dir)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// TestA2ACorpus_ValidShapesParse replays every entry in valid/
|
||||
// through normalizeA2APayload and asserts:
|
||||
// 1. No error returned.
|
||||
// 2. The output's params.message.parts is a non-empty list
|
||||
// (v0.3 canonical shape — the compat shim must have converted
|
||||
// any v0.2 content field into parts).
|
||||
// 3. The output's params.message.messageId is non-empty (the
|
||||
// normalizer auto-fills if the sender omitted it).
|
||||
// 4. The output's method matches the input's method (the
|
||||
// normalizer is method-agnostic).
|
||||
//
|
||||
// One subtest per corpus entry — failures point directly at the
|
||||
// offending shape file.
|
||||
func TestA2ACorpus_ValidShapesParse(t *testing.T) {
|
||||
t.Parallel()
|
||||
for name, path := range listCorpus(t, corpusValidDir) {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
payload, _, _ := loadCorpusEntry(t, path)
|
||||
|
||||
normalized, method, perr := normalizeA2APayload(payload)
|
||||
if perr != nil {
|
||||
t.Fatalf("valid/%s: normalizeA2APayload returned error %d: %v",
|
||||
name, perr.Status, perr.Response)
|
||||
}
|
||||
|
||||
// Read back the normalized payload to verify shape invariants.
|
||||
var parsed map[string]interface{}
|
||||
if err := json.Unmarshal(normalized, &parsed); err != nil {
|
||||
t.Fatalf("valid/%s: normalized output not valid JSON: %v", name, err)
|
||||
}
|
||||
|
||||
// Method-agnostic check — input method survives normalization.
|
||||
if input := mustGetString(t, parsed, "method"); input != method {
|
||||
t.Errorf("valid/%s: method mismatch — got %q, want %q",
|
||||
name, method, input)
|
||||
}
|
||||
|
||||
// Canonical v0.3 shape invariants: params.message.parts is a
|
||||
// non-empty list, messageId is non-empty.
|
||||
params := mustGetMap(t, parsed, "params")
|
||||
msg := mustGetMap(t, params, "message")
|
||||
|
||||
parts, ok := msg["parts"].([]interface{})
|
||||
if !ok {
|
||||
t.Errorf("valid/%s: params.message.parts is not a list (got %T)",
|
||||
name, msg["parts"])
|
||||
return
|
||||
}
|
||||
if len(parts) == 0 {
|
||||
t.Errorf("valid/%s: params.message.parts is empty — compat shim should have converted content", name)
|
||||
}
|
||||
|
||||
if id := mustGetString(t, msg, "messageId"); id == "" {
|
||||
t.Errorf("valid/%s: params.message.messageId is empty after normalization", name)
|
||||
}
|
||||
|
||||
// content must NOT survive into the output — the shim
|
||||
// deletes it after converting to parts. If the shim left
|
||||
// content in place, downstream pydantic v0.3 would still
|
||||
// reject because it doesn't know that field.
|
||||
if _, hasContent := msg["content"]; hasContent {
|
||||
t.Errorf("valid/%s: params.message.content survived normalization (compat shim should delete it)", name)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestA2ACorpus_InvalidShapesRejected replays every entry in
|
||||
// invalid/ through normalizeA2APayload and asserts the rejection
|
||||
// matches the documented contract — same status code AND error
|
||||
// substring as recorded in the corpus entry's metadata.
|
||||
//
|
||||
// Catches the regression class "future PR adds permissive defaults
|
||||
// that silently accept what we used to reject loud."
|
||||
func TestA2ACorpus_InvalidShapesRejected(t *testing.T) {
|
||||
t.Parallel()
|
||||
for name, path := range listCorpus(t, corpusInvalidDir) {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
payload, expectErr, expectStatus := loadCorpusEntry(t, path)
|
||||
|
||||
if expectErr == "" {
|
||||
t.Fatalf("invalid/%s: missing _expect_error metadata", name)
|
||||
}
|
||||
if expectStatus == 0 {
|
||||
t.Fatalf("invalid/%s: missing _expect_status metadata", name)
|
||||
}
|
||||
|
||||
_, _, perr := normalizeA2APayload(payload)
|
||||
if perr == nil {
|
||||
t.Fatalf("invalid/%s: normalizeA2APayload returned no error — should have rejected", name)
|
||||
}
|
||||
if perr.Status != expectStatus {
|
||||
t.Errorf("invalid/%s: status = %d, want %d", name, perr.Status, expectStatus)
|
||||
}
|
||||
|
||||
body, _ := json.Marshal(perr.Response)
|
||||
if !strings.Contains(string(body), expectErr) {
|
||||
t.Errorf("invalid/%s: error response %q does not contain expected substring %q",
|
||||
name, string(body), expectErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestA2ACorpus_MalformedJSONRejected covers the case where the
|
||||
// body isn't valid JSON at all. The corpus is JSON-only so this
|
||||
// can't be expressed as a corpus entry; pin the contract inline.
|
||||
func TestA2ACorpus_MalformedJSONRejected(t *testing.T) {
|
||||
t.Parallel()
|
||||
cases := []struct {
|
||||
name string
|
||||
payload []byte
|
||||
}{
|
||||
{"truncated_object", []byte(`{"jsonrpc":"2.0","method":"message/send"`)},
|
||||
{"not_json_at_all", []byte(`this is not json`)},
|
||||
{"empty_body", []byte(``)},
|
||||
{"only_whitespace", []byte(` `)},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
_, _, perr := normalizeA2APayload(tc.payload)
|
||||
if perr == nil {
|
||||
t.Fatalf("expected error for %s, got none", tc.name)
|
||||
}
|
||||
if perr.Status != http.StatusBadRequest {
|
||||
t.Errorf("status = %d, want %d", perr.Status, http.StatusBadRequest)
|
||||
}
|
||||
body, _ := json.Marshal(perr.Response)
|
||||
if !strings.Contains(string(body), "invalid JSON") {
|
||||
t.Errorf("expected 'invalid JSON' in response, got %q", string(body))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestA2ACorpus_HasMinimumCoverage pins the corpus's
|
||||
// representativeness. The corpus must have at least one v0.2
|
||||
// entry (string content) and at least one v0.3 entry (parts list)
|
||||
// — losing either side of the schema bridge would silently drop
|
||||
// the most important coverage.
|
||||
func TestA2ACorpus_HasMinimumCoverage(t *testing.T) {
|
||||
t.Parallel()
|
||||
files := listCorpus(t, corpusValidDir)
|
||||
hasV02 := false
|
||||
hasV03 := false
|
||||
for name := range files {
|
||||
if strings.Contains(name, "v0_2_") {
|
||||
hasV02 = true
|
||||
}
|
||||
if strings.Contains(name, "v0_3_") {
|
||||
hasV03 = true
|
||||
}
|
||||
}
|
||||
if !hasV02 {
|
||||
t.Error("corpus has no v0_2_*.json entries — backward-compat coverage missing")
|
||||
}
|
||||
if !hasV03 {
|
||||
t.Error("corpus has no v0_3_*.json entries — forward (canonical) coverage missing")
|
||||
}
|
||||
}
|
||||
|
||||
// TestA2ACorpus_EveryEntryHasMetadata pins the README policy:
|
||||
// every corpus entry MUST have _comment, _added, _source. Catches
|
||||
// the bad commit shape "added entry without explanation" before
|
||||
// review.
|
||||
func TestA2ACorpus_EveryEntryHasMetadata(t *testing.T) {
|
||||
t.Parallel()
|
||||
for _, dir := range []string{corpusValidDir, corpusInvalidDir} {
|
||||
for name, path := range listCorpus(t, dir) {
|
||||
t.Run(filepath.Base(dir)+"/"+name, func(t *testing.T) {
|
||||
raw, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("read %s: %v", path, err)
|
||||
}
|
||||
var doc map[string]interface{}
|
||||
if err := json.Unmarshal(raw, &doc); err != nil {
|
||||
t.Fatalf("parse %s: %v", path, err)
|
||||
}
|
||||
required := []string{"_comment", "_added", "_source"}
|
||||
if dir == corpusInvalidDir {
|
||||
required = append(required, "_expect_error", "_expect_status")
|
||||
}
|
||||
for _, key := range required {
|
||||
if _, ok := doc[key]; !ok {
|
||||
t.Errorf("missing required metadata field %q", key)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func mustGetMap(t *testing.T, m map[string]interface{}, key string) map[string]interface{} {
|
||||
t.Helper()
|
||||
v, ok := m[key].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("expected %q to be a map, got %T", key, m[key])
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func mustGetString(t *testing.T, m map[string]interface{}, key string) string {
|
||||
t.Helper()
|
||||
v, ok := m[key].(string)
|
||||
if !ok {
|
||||
t.Fatalf("expected %q to be a string, got %T", key, m[key])
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// _ silences the unused-import linter for fmt in case future
|
||||
// helpers don't use it. Currently used by the t.Helper-style
|
||||
// formatters above (kept inline for clarity).
|
||||
var _ = fmt.Sprintf
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"errors"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
@@ -21,8 +22,10 @@ import (
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
@@ -90,13 +93,47 @@ func isSystemCaller(callerID string) bool {
|
||||
const maxProxyResponseBody = 10 << 20
|
||||
|
||||
// a2aClient is a shared HTTP client for proxying A2A requests to workspace agents.
|
||||
// No client-level timeout — timeouts are enforced per-request via context
|
||||
// deadlines: canvas = 5 min (Rule 3), agent-to-agent = 30 min (DoS cap). Do NOT
|
||||
// set a Client.Timeout here: it is enforced independently of ctx deadlines and
|
||||
// would pre-empt legitimate slow cold-start flows (e.g. Claude Code first-token
|
||||
// over OAuth can take 30-60s on boot). Callers that want a safety net should
|
||||
// build a context.WithTimeout themselves.
|
||||
var a2aClient = &http.Client{}
|
||||
//
|
||||
// Timeout model — three independent budgets, none of which gets in each other's way:
|
||||
//
|
||||
// 1. Client.Timeout — DELIBERATELY UNSET. Client.Timeout is a hard wall on
|
||||
// the entire request including streamed body reads, and would pre-empt
|
||||
// legitimate slow cold-start flows (Claude Code first-token over OAuth
|
||||
// can take 30-60s on boot; long-running agent synthesis can stream
|
||||
// tokens for minutes). Total-request budget is enforced per-request
|
||||
// via context deadline (canvas = idle-only, agent-to-agent = 30 min ceiling).
|
||||
//
|
||||
// 2. Transport.DialContext — 10s connect timeout. When a workspace's EC2
|
||||
// black-holes TCP connects (instance terminated mid-flight, security group
|
||||
// flipped, NACL bug), the OS default is 75s on Linux / 21s on macOS — long
|
||||
// enough that Cloudflare's ~100s edge timeout can fire first and surface
|
||||
// a generic 502 page to canvas. 10s is well above realistic intra-region
|
||||
// latencies and well below CF's edge timeout.
|
||||
//
|
||||
// 3. Transport.ResponseHeaderTimeout — 60s. From request-body-end to
|
||||
// response-headers-start. Covers cold-start first-byte (the 30-60s OAuth
|
||||
// flow above), with margin. Body streaming after headers is governed by
|
||||
// the per-request context deadline, NOT this timeout — so multi-minute
|
||||
// agent responses still work fine.
|
||||
//
|
||||
// The point of (2) and (3) is to surface a *structured* 503 from
|
||||
// handleA2ADispatchError when the workspace agent is unreachable, so canvas
|
||||
// gets `{"error":"workspace agent unreachable","restarting":true}` instead
|
||||
// of Cloudflare's opaque 502 error page. Without these, dead workspaces hang
|
||||
// long enough that CF gives up first and shows its own page.
|
||||
var a2aClient = &http.Client{
|
||||
Transport: &http.Transport{
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: 10 * time.Second,
|
||||
KeepAlive: 30 * time.Second,
|
||||
}).DialContext,
|
||||
ResponseHeaderTimeout: 60 * time.Second,
|
||||
TLSHandshakeTimeout: 10 * time.Second,
|
||||
// MaxIdleConns / IdleConnTimeout: stdlib defaults are fine; agent
|
||||
// fan-in is bounded by the platform's broadcaster fan-out, not by
|
||||
// connection-pool sizing.
|
||||
},
|
||||
}
|
||||
|
||||
type proxyA2AError struct {
|
||||
Status int
|
||||
@@ -144,6 +181,35 @@ func isUpstreamBusyError(err error) bool {
|
||||
strings.Contains(msg, "connection reset")
|
||||
}
|
||||
|
||||
// isUpstreamDeadStatus returns true when the upstream HTTP status indicates
|
||||
// the workspace agent is unreachable / unresponsive at the network layer
|
||||
// (vs an agent-authored 5xx with a real body). Used by the proxy to gate
|
||||
// reactive container-dead detection + auto-restart.
|
||||
//
|
||||
// - 502 Bad Gateway, 503 Service Unavailable, 504 Gateway Timeout: standard
|
||||
// proxy-layer "upstream is broken" codes (Cloudflare, ELB, agent tunnel).
|
||||
// - 521 Web Server Is Down: Cloudflare can't open TCP to origin (most
|
||||
// direct dead-EC2 signal).
|
||||
// - 522 Connection Timed Out: Cloudflare opened TCP but no response within
|
||||
// ~15s — typical of SG/NACL flap or agent process hung.
|
||||
// - 523 Origin Is Unreachable: Cloudflare can't route to origin (DNS or
|
||||
// network-path failure).
|
||||
// - 524 A Timeout Occurred: TCP succeeded, but origin didn't return
|
||||
// headers within ~100s — agent process alive but wedged.
|
||||
//
|
||||
// We always probe IsRunning before acting, so a transient false positive
|
||||
// from this set just costs one CP API call.
|
||||
func isUpstreamDeadStatus(status int) bool {
|
||||
switch status {
|
||||
case http.StatusBadGateway, // 502
|
||||
http.StatusServiceUnavailable, // 503
|
||||
http.StatusGatewayTimeout, // 504
|
||||
521, 522, 523, 524: // CF dead-origin family
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (e *proxyA2AError) Error() string {
|
||||
if e == nil || e.Response == nil {
|
||||
return "proxy a2a error"
|
||||
@@ -192,6 +258,27 @@ func (h *WorkspaceHandler) ProxyA2A(c *gin.Context) {
|
||||
|
||||
callerID := c.GetHeader("X-Workspace-ID")
|
||||
|
||||
// #2306: when X-Workspace-ID isn't set, derive callerID from the bearer
|
||||
// token's owning workspace. External callers (third-party SDKs, the
|
||||
// channel plugin, etc.) authenticate purely via bearer and frequently
|
||||
// don't set the header — without this, activity_logs.source_id ends up
|
||||
// NULL and downstream consumers (notification peer_id, "Agent Comms by
|
||||
// peer" tab, analytics) can't identify the sender. The bearer is the
|
||||
// authoritative caller identity per the wsauth contract; the header is
|
||||
// just a display/routing hint that must agree with it.
|
||||
//
|
||||
// Skip when an org-level token is in play (canvas/admin path) — those
|
||||
// tokens grant org-wide access and don't bind to a single workspace.
|
||||
if callerID == "" {
|
||||
if _, isOrg := c.Get("org_token_id"); !isOrg {
|
||||
if tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization")); tok != "" {
|
||||
if wsID, err := wsauth.WorkspaceFromToken(ctx, db.DB, tok); err == nil {
|
||||
callerID = wsID
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// #761 SECURITY: reject requests where the client-supplied X-Workspace-ID
|
||||
// contains a system-caller prefix. isSystemCaller() bypasses both token
|
||||
// validation and CanCommunicate. On the public /a2a endpoint, system-caller
|
||||
@@ -283,17 +370,54 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
|
||||
return 0, nil, proxyErr
|
||||
}
|
||||
|
||||
agentURL, proxyErr := h.resolveAgentURL(ctx, workspaceID)
|
||||
if proxyErr != nil {
|
||||
return 0, nil, proxyErr
|
||||
}
|
||||
|
||||
// Normalize the JSON-RPC envelope BEFORE the poll-mode short-circuit
|
||||
// so the activity_logs entry carries the protocol method name (initialize,
|
||||
// message/send, etc.) — the polling agent uses that to dispatch the
|
||||
// request body to the right handler. Doing it here also means a
|
||||
// malformed payload fails the same way for push and poll callers
|
||||
// (consistent 400 instead of "queued garbage").
|
||||
normalizedBody, a2aMethod, proxyErr := normalizeA2APayload(body)
|
||||
if proxyErr != nil {
|
||||
return 0, nil, proxyErr
|
||||
}
|
||||
body = normalizedBody
|
||||
|
||||
// #2339 PR 2 — poll-mode short-circuit. When the target workspace
|
||||
// is registered as delivery_mode=poll (e.g. an operator's laptop
|
||||
// running molecule-mcp-claude-channel), the platform does NOT
|
||||
// dispatch over HTTP — the agent has no public URL. Instead we record
|
||||
// the A2A request to activity_logs and the agent picks it up via
|
||||
// GET /activity?since_id= (PR 3).
|
||||
//
|
||||
// Returning here means we skip resolveAgentURL entirely (no SSRF check
|
||||
// needed — there's no URL to validate; no DNS lookup against potentially-
|
||||
// changing operator-side IPs) and skip the dispatch path completely
|
||||
// (no Do(), no maybeMarkContainerDead). The response is a synthetic
|
||||
// {status:"queued"} envelope so the caller (canvas, another workspace)
|
||||
// knows delivery is acknowledged but pending consumption.
|
||||
if lookupDeliveryMode(ctx, workspaceID) == models.DeliveryModePoll {
|
||||
if logActivity {
|
||||
h.logA2AReceiveQueued(ctx, workspaceID, callerID, body, a2aMethod)
|
||||
}
|
||||
respBody, marshalErr := json.Marshal(gin.H{
|
||||
"status": "queued",
|
||||
"delivery_mode": models.DeliveryModePoll,
|
||||
"method": a2aMethod,
|
||||
})
|
||||
if marshalErr != nil {
|
||||
return 0, nil, &proxyA2AError{
|
||||
Status: http.StatusInternalServerError,
|
||||
Response: gin.H{"error": "failed to marshal poll-mode response"},
|
||||
}
|
||||
}
|
||||
return http.StatusOK, respBody, nil
|
||||
}
|
||||
|
||||
agentURL, proxyErr := h.resolveAgentURL(ctx, workspaceID)
|
||||
if proxyErr != nil {
|
||||
return 0, nil, proxyErr
|
||||
}
|
||||
|
||||
startTime := time.Now()
|
||||
resp, cancelFwd, err := h.dispatchA2A(ctx, workspaceID, agentURL, body, callerID)
|
||||
if cancelFwd != nil {
|
||||
@@ -362,6 +486,43 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
|
||||
if errMsg == "" {
|
||||
errMsg = http.StatusText(resp.StatusCode)
|
||||
}
|
||||
|
||||
// Upstream returned 502/503/504 (gateway/proxy failure). This is
|
||||
// the "agent process is dead but the tunnel between us and the
|
||||
// workspace is still up" signal — handleA2ADispatchError's
|
||||
// network-error path doesn't run because Do() succeeded at the
|
||||
// HTTP layer. Without this branch, the dead-agent failure mode
|
||||
// surfaces to canvas as a generic 502 (and CF in front of the
|
||||
// platform masks it with its own error page, hiding any
|
||||
// structured response we might write).
|
||||
//
|
||||
// Treatment matches handleA2ADispatchError's container-dead path:
|
||||
// 1. Probe IsRunning via maybeMarkContainerDead. If the
|
||||
// container truly is dead, mark workspace offline + kick
|
||||
// a restart goroutine.
|
||||
// 2. Return a structured 503 with restarting=true + Retry-After
|
||||
// so canvas shows a useful "agent is restarting" message
|
||||
// (and CF doesn't intercept the 503 the way it does 502).
|
||||
// If IsRunning reports the container is alive, we leave the
|
||||
// upstream status untouched — the agent legitimately returned
|
||||
// 502/503/504 (e.g. it's returning its own Bad-Gateway from
|
||||
// some downstream call) and we shouldn't mistakenly recycle it.
|
||||
//
|
||||
// Empty body is the strong signal here — a CF-tunnel "no-origin"
|
||||
// 502 has 0 bytes; an agent-authored 502 typically has a JSON
|
||||
// error body. We probe IsRunning regardless (it's the
|
||||
// authoritative check) but the empty-body case is what makes
|
||||
// this fix necessary.
|
||||
if isUpstreamDeadStatus(resp.StatusCode) {
|
||||
if h.maybeMarkContainerDead(ctx, workspaceID) {
|
||||
return 0, nil, &proxyA2AError{
|
||||
Status: http.StatusServiceUnavailable,
|
||||
Headers: map[string]string{"Retry-After": "15"},
|
||||
Response: gin.H{"error": "workspace agent unreachable — container restart triggered", "restarting": true, "retry_after": 15},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return resp.StatusCode, respBody, &proxyA2AError{
|
||||
Status: resp.StatusCode,
|
||||
Response: gin.H{"error": errMsg},
|
||||
@@ -464,11 +625,54 @@ func normalizeA2APayload(body []byte) ([]byte, string, *proxyA2AError) {
|
||||
}
|
||||
|
||||
// Ensure params.message.messageId exists (required by a2a-sdk)
|
||||
// AND v0.2→v0.3 compat (#2345): when sender supplies
|
||||
// params.message.content (v0.2) instead of params.message.parts
|
||||
// (v0.3), wrap the content as a single text Part so the downstream
|
||||
// a2a-sdk's v0.3 Pydantic validator accepts the message.
|
||||
//
|
||||
// Pre-fix: Design Director silently dropped briefs whose sender
|
||||
// used v0.2 shape — Pydantic rejected at parse time, the rejection
|
||||
// went only to logs, and the sender saw a happy 200/202.
|
||||
//
|
||||
// Reject loud (HTTP 400) when neither content nor parts is present;
|
||||
// previously the SDK's own rejection happened post-handler-dispatch
|
||||
// and was invisible to the original sender.
|
||||
if params, ok := payload["params"].(map[string]interface{}); ok {
|
||||
if msg, ok := params["message"].(map[string]interface{}); ok {
|
||||
if _, hasID := msg["messageId"]; !hasID {
|
||||
msg["messageId"] = uuid.New().String()
|
||||
}
|
||||
_, hasParts := msg["parts"]
|
||||
rawContent, hasContent := msg["content"]
|
||||
if !hasParts {
|
||||
if hasContent {
|
||||
switch v := rawContent.(type) {
|
||||
case string:
|
||||
msg["parts"] = []interface{}{
|
||||
map[string]interface{}{"kind": "text", "text": v},
|
||||
}
|
||||
case []interface{}:
|
||||
msg["parts"] = v
|
||||
default:
|
||||
return nil, "", &proxyA2AError{
|
||||
Status: http.StatusBadRequest,
|
||||
Response: gin.H{
|
||||
"error": "invalid params.message.content type",
|
||||
"hint": "content must be a string (v0.2 compat) or omitted in favour of parts (v0.3)",
|
||||
},
|
||||
}
|
||||
}
|
||||
delete(msg, "content")
|
||||
} else {
|
||||
return nil, "", &proxyA2AError{
|
||||
Status: http.StatusBadRequest,
|
||||
Response: gin.H{
|
||||
"error": "params.message must contain either 'parts' (v0.3) or 'content' (v0.2 compat)",
|
||||
"hint": "v0.3 example: {\"parts\":[{\"kind\":\"text\",\"text\":\"...\"}]}",
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"log"
|
||||
@@ -13,6 +14,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
@@ -97,8 +99,16 @@ func (h *WorkspaceHandler) handleA2ADispatchError(ctx context.Context, workspace
|
||||
}
|
||||
|
||||
idempotencyKey := extractIdempotencyKey(body)
|
||||
// Honor params.expires_in_seconds when the caller specifies one. Zero
|
||||
// (the unset default) → expiresAt = nil → infinite TTL preserved by
|
||||
// DequeueNext. RFC #2331 Tier 1.
|
||||
var expiresAt *time.Time
|
||||
if secs := extractExpiresInSeconds(body); secs > 0 {
|
||||
t := time.Now().Add(time.Duration(secs) * time.Second)
|
||||
expiresAt = &t
|
||||
}
|
||||
if qid, depth, qerr := EnqueueA2A(
|
||||
ctx, workspaceID, callerID, PriorityTask, body, a2aMethod, idempotencyKey,
|
||||
ctx, workspaceID, callerID, PriorityTask, body, a2aMethod, idempotencyKey, expiresAt,
|
||||
); qerr == nil {
|
||||
log.Printf("ProxyA2A: target %s busy — enqueued as %s (depth=%d)", workspaceID, qid, depth)
|
||||
respBody, _ := json.Marshal(gin.H{
|
||||
@@ -131,29 +141,53 @@ func (h *WorkspaceHandler) handleA2ADispatchError(ctx context.Context, workspace
|
||||
}
|
||||
|
||||
// maybeMarkContainerDead runs the reactive health check after a forward error.
|
||||
// If the workspace's Docker container is no longer running (and the workspace
|
||||
// isn't external), it marks the workspace offline, clears Redis state,
|
||||
// broadcasts WORKSPACE_OFFLINE, and triggers an async restart. Returns true
|
||||
// when the container was found dead.
|
||||
// If the workspace's compute (Docker container OR EC2 instance) is no longer
|
||||
// running (and the workspace isn't external), it marks the workspace offline,
|
||||
// clears Redis state, broadcasts WORKSPACE_OFFLINE, and triggers an async
|
||||
// restart. Returns true when the compute was found dead.
|
||||
//
|
||||
// Provisioner selection (mutually exclusive in production):
|
||||
// - h.provisioner != nil → local Docker deployment; IsRunning does docker inspect.
|
||||
// - h.cpProv != nil → SaaS / EC2 deployment; IsRunning calls CP's
|
||||
// /cp/workspaces/:id/status to read the EC2 state.
|
||||
//
|
||||
// Pre-fix this function ONLY consulted h.provisioner — for SaaS tenants
|
||||
// (h.provisioner=nil, h.cpProv=set) it short-circuited to false on every
|
||||
// call, so a dead EC2 agent would propagate upstream 502/503/504 to canvas
|
||||
// with no auto-recovery and Cloudflare in front would mask the response with
|
||||
// its own error page. The 2026-04-30 hongmingwang.moleculesai.app
|
||||
// canvas-chat-to-dead-workspace incident traces to exactly this gap.
|
||||
func (h *WorkspaceHandler) maybeMarkContainerDead(ctx context.Context, workspaceID string) bool {
|
||||
var wsRuntime string
|
||||
db.DB.QueryRowContext(ctx, `SELECT COALESCE(runtime, 'langgraph') FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsRuntime)
|
||||
if h.provisioner == nil || wsRuntime == "external" {
|
||||
if wsRuntime == "external" {
|
||||
return false
|
||||
}
|
||||
running, inspectErr := h.provisioner.IsRunning(ctx, workspaceID)
|
||||
if h.provisioner == nil && h.cpProv == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
var running bool
|
||||
var inspectErr error
|
||||
if h.provisioner != nil {
|
||||
running, inspectErr = h.provisioner.IsRunning(ctx, workspaceID)
|
||||
} else {
|
||||
// SaaS path: ask the CP about the EC2 state. Same (true, err) on
|
||||
// transport errors contract — keeps the caller on the alive path
|
||||
// instead of triggering a restart cascade on a flaky CP call.
|
||||
running, inspectErr = h.cpProv.IsRunning(ctx, workspaceID)
|
||||
}
|
||||
if inspectErr != nil {
|
||||
// Transient Docker-daemon error (timeout, socket EOF, etc.). Post-
|
||||
// #386, IsRunning returns (true, err) in this case — caller stays
|
||||
// on the alive path and does not trigger a restart cascade. Log
|
||||
// so the defect is visible without being destructive.
|
||||
// Transient backend error (Docker daemon EOF, CP HTTP 5xx, etc.).
|
||||
// IsRunning's contract returns (true, err) in this case so we stay
|
||||
// on the alive path without triggering a restart cascade.
|
||||
log.Printf("ProxyA2A: IsRunning for %s returned transient error (assuming alive): %v", workspaceID, inspectErr)
|
||||
}
|
||||
if running {
|
||||
return false
|
||||
}
|
||||
log.Printf("ProxyA2A: container for %s is dead — marking offline and triggering restart", workspaceID)
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = 'offline', updated_at = now() WHERE id = $1 AND status NOT IN ('removed', 'provisioning')`, workspaceID); err != nil {
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2 AND status NOT IN ('removed', 'provisioning')`, models.StatusOffline, workspaceID); err != nil {
|
||||
log.Printf("ProxyA2A: failed to mark workspace %s offline: %v", workspaceID, err)
|
||||
}
|
||||
db.ClearWorkspaceKeys(ctx, workspaceID)
|
||||
@@ -368,6 +402,74 @@ func parseUsageFromA2AResponse(body []byte) (inputTokens, outputTokens int64) {
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
// lookupDeliveryMode returns the workspace's delivery_mode. On any DB
|
||||
// error or missing row it returns DeliveryModePush — the fail-closed
|
||||
// default. "Closed" here means "fall back to today's behavior (synchronous
|
||||
// dispatch)" rather than "fall back to drop the request silently into
|
||||
// activity_logs where the agent might never see it." A poll-mode workspace
|
||||
// that briefly reads as push will get its A2A request dispatched to the
|
||||
// stored URL (or a 502 if no URL); a push-mode workspace that briefly
|
||||
// reads as poll would get its request silently queued with no dispatch.
|
||||
// The first failure is loud + recoverable; the second is silent.
|
||||
//
|
||||
// The function is intentionally lookup-only — it never mutates the row.
|
||||
// The register handler (registry.go) is the only writer for delivery_mode.
|
||||
//
|
||||
// See #2339 PR 1 for the column + register-flow side; this is the
|
||||
// proxy-side read used for the short-circuit in proxyA2ARequest.
|
||||
func lookupDeliveryMode(ctx context.Context, workspaceID string) string {
|
||||
var mode sql.NullString
|
||||
err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT delivery_mode FROM workspaces WHERE id = $1`, workspaceID,
|
||||
).Scan(&mode)
|
||||
if err != nil {
|
||||
if !errors.Is(err, sql.ErrNoRows) {
|
||||
log.Printf("ProxyA2A: lookupDeliveryMode(%s) failed (%v) — defaulting to push", workspaceID, err)
|
||||
}
|
||||
return models.DeliveryModePush
|
||||
}
|
||||
if !mode.Valid || mode.String == "" {
|
||||
return models.DeliveryModePush
|
||||
}
|
||||
if !models.IsValidDeliveryMode(mode.String) {
|
||||
log.Printf("ProxyA2A: workspace %s has invalid delivery_mode=%q — defaulting to push", workspaceID, mode.String)
|
||||
return models.DeliveryModePush
|
||||
}
|
||||
return mode.String
|
||||
}
|
||||
|
||||
// logA2AReceiveQueued records a poll-mode "queued" A2A receive into
|
||||
// activity_logs. Same shape as logA2ASuccess but without ResponseBody
|
||||
// (there is no response yet — the polling agent will produce one when
|
||||
// it picks the request up). status="ok" because the request was
|
||||
// successfully queued; the consume side reports its own outcome.
|
||||
//
|
||||
// The activity_logs row is what the polling agent's GET /activity?since_id=
|
||||
// reads in PR 3 — that's how a poll-mode workspace receives inbound A2A
|
||||
// without a public URL.
|
||||
func (h *WorkspaceHandler) logA2AReceiveQueued(ctx context.Context, workspaceID, callerID string, body []byte, a2aMethod string) {
|
||||
var wsName string
|
||||
db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsName)
|
||||
if wsName == "" {
|
||||
wsName = workspaceID
|
||||
}
|
||||
summary := a2aMethod + " → " + wsName + " (queued for poll)"
|
||||
go func(parent context.Context) {
|
||||
logCtx, cancel := context.WithTimeout(context.WithoutCancel(parent), 30*time.Second)
|
||||
defer cancel()
|
||||
LogActivity(logCtx, h.broadcaster, ActivityParams{
|
||||
WorkspaceID: workspaceID,
|
||||
ActivityType: "a2a_receive",
|
||||
SourceID: nilIfEmpty(callerID),
|
||||
TargetID: &workspaceID,
|
||||
Method: &a2aMethod,
|
||||
Summary: &summary,
|
||||
RequestBody: json.RawMessage(body),
|
||||
Status: "ok",
|
||||
})
|
||||
}(ctx)
|
||||
}
|
||||
|
||||
// readUsageMap extracts input_tokens / output_tokens from the "usage" key of m.
|
||||
// Returns (0, 0, false) when the key is absent or contains no non-zero values.
|
||||
func readUsageMap(m map[string]json.RawMessage) (inputTokens, outputTokens int64, ok bool) {
|
||||
|
||||
@@ -11,10 +11,13 @@ import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
@@ -243,6 +246,117 @@ func TestProxyA2A_AgentReturnsError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestProxyA2A_Upstream502_TriggersContainerDeadCheck — when the agent
|
||||
// tunnel returns 502 (the "tunnel up but no origin" failure mode that
|
||||
// surfaces a Cloudflare error page to canvas), proxyA2A must consult
|
||||
// IsRunning on cpProv. If the EC2 instance truly is dead, the response
|
||||
// becomes a structured 503 with restarting=true (not the upstream 502
|
||||
// which CF would mask), and the workspace flips to status='offline' so
|
||||
// the next reactive poll sees the right state. This is the
|
||||
// 2026-04-30 hongmingwang.moleculesai.app canvas-chat-to-dead-workspace
|
||||
// regression: upstream 502 was previously propagated as-is, CF masked
|
||||
// it, and no auto-restart fired.
|
||||
func TestProxyA2A_Upstream502_TriggersContainerDeadCheck(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mr := setupTestRedis(t)
|
||||
allowLoopbackForTest(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
cp := &fakeCPProv{running: false}
|
||||
handler.SetCPProvisioner(cp)
|
||||
|
||||
// Agent tunnel returns 502 with empty body — the CF "no-origin" shape.
|
||||
agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusBadGateway)
|
||||
}))
|
||||
defer agentServer.Close()
|
||||
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", "ws-tunnel-dead"), agentServer.URL)
|
||||
expectBudgetCheck(mock, "ws-tunnel-dead")
|
||||
// Activity log fires (delivery_confirmed is true on Do() success regardless
|
||||
// of upstream status — handler's existing logA2ASuccess path runs first
|
||||
// and logs as success because the dispatch did get a response).
|
||||
mock.ExpectExec("INSERT INTO activity_logs").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
// maybeMarkContainerDead's runtime lookup, then the offline-flip UPDATE.
|
||||
mock.ExpectQuery(`SELECT COALESCE\(runtime, 'langgraph'\) FROM workspaces WHERE id =`).
|
||||
WithArgs("ws-tunnel-dead").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("hermes"))
|
||||
mock.ExpectExec(`UPDATE workspaces SET status =`).
|
||||
WithArgs(models.StatusOffline, "ws-tunnel-dead").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-tunnel-dead"}}
|
||||
body := `{"method":"message/send","params":{"message":{"role":"user","parts":[{"text":"hi"}]}}}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/ws-tunnel-dead/a2a", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.ProxyA2A(c)
|
||||
|
||||
time.Sleep(80 * time.Millisecond)
|
||||
|
||||
// Caller sees a structured 503 (NOT the upstream 502 which CF would mask).
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Fatalf("upstream 502 should translate to 503 once cpProv reports dead; got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "restarting") {
|
||||
t.Errorf("response body should mention restart trigger; got %s", w.Body.String())
|
||||
}
|
||||
if w.Header().Get("Retry-After") != "15" {
|
||||
t.Errorf("Retry-After header should be 15 to throttle canvas-side retry loop; got %q", w.Header().Get("Retry-After"))
|
||||
}
|
||||
if cp.calls != 1 {
|
||||
t.Errorf("cpProv.IsRunning must be consulted exactly once; got %d calls", cp.calls)
|
||||
}
|
||||
}
|
||||
|
||||
// TestProxyA2A_Upstream502_AliveAgent_PropagatesAsIs — the safety check:
|
||||
// if cpProv reports the EC2 IS running, the upstream 502 is propagated
|
||||
// as-is. Don't recycle a healthy agent on a transient hiccup — the agent
|
||||
// might have legitimately returned 502 (e.g. a downstream service it
|
||||
// called returned 502 and it forwarded). Net behavior matches pre-fix
|
||||
// for the alive-agent case.
|
||||
func TestProxyA2A_Upstream502_AliveAgent_PropagatesAsIs(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mr := setupTestRedis(t)
|
||||
allowLoopbackForTest(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
cp := &fakeCPProv{running: true}
|
||||
handler.SetCPProvisioner(cp)
|
||||
|
||||
agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusBadGateway)
|
||||
fmt.Fprint(w, `{"error":"downstream service returned 502"}`)
|
||||
}))
|
||||
defer agentServer.Close()
|
||||
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", "ws-alive-502"), agentServer.URL)
|
||||
expectBudgetCheck(mock, "ws-alive-502")
|
||||
mock.ExpectExec("INSERT INTO activity_logs").WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
// IsRunning runtime lookup runs but no UPDATE follows (running=true).
|
||||
mock.ExpectQuery(`SELECT COALESCE\(runtime, 'langgraph'\) FROM workspaces WHERE id =`).
|
||||
WithArgs("ws-alive-502").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("hermes"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-alive-502"}}
|
||||
body := `{"method":"message/send","params":{"message":{"role":"user","parts":[{"text":"hi"}]}}}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/ws-alive-502/a2a", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.ProxyA2A(c)
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
if w.Code != http.StatusBadGateway {
|
||||
t.Fatalf("alive agent 502 should propagate as 502; got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== ProxyA2A — messageId injection ====================
|
||||
|
||||
func TestProxyA2A_MessageIDInjected(t *testing.T) {
|
||||
@@ -504,6 +618,182 @@ func TestA2AProxy_SystemCallerForge_IsRejected(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== ProxyA2A — bearer-derived callerID (#2306) ====================
|
||||
|
||||
// TestProxyA2A_CallerIDDerivedFromBearer verifies that when X-Workspace-ID
|
||||
// is absent, ProxyA2A derives the callerID from the bearer token's owning
|
||||
// workspace. Without this, third-party SDKs that authenticate purely via
|
||||
// bearer end up with activity_logs.source_id=NULL, breaking peer_id and
|
||||
// "Agent Comms by peer" downstream signals.
|
||||
func TestProxyA2A_CallerIDDerivedFromBearer(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mr := setupTestRedis(t)
|
||||
allowLoopbackForTest(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprint(w, `{"jsonrpc":"2.0","id":"1","result":{}}`)
|
||||
}))
|
||||
defer agentServer.Close()
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", "ws-target"), agentServer.URL)
|
||||
|
||||
// 1. Bearer-derive lookup → returns ws-caller
|
||||
mock.ExpectQuery(`SELECT t\.id, t\.workspace_id.*FROM workspace_auth_tokens t.*JOIN workspaces`).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id"}).AddRow("tok-1", "ws-caller"))
|
||||
|
||||
// 2. validateCallerToken's HasAnyLiveToken / ValidateToken queries fall
|
||||
// through to fail-open (no expectations set) — same pattern as
|
||||
// TestProxyA2A_CallerIDPropagated.
|
||||
|
||||
// 3. CanCommunicate — siblings under same parent
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
|
||||
WithArgs("ws-caller").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-caller", "ws-parent"))
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
|
||||
WithArgs("ws-target").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-target", "ws-parent"))
|
||||
|
||||
expectBudgetCheck(mock, "ws-target")
|
||||
|
||||
// 4. activity_logs INSERT — verify source_id arg is the derived ws-caller
|
||||
// (column order: workspace_id, activity_type, source_id, target_id, ...)
|
||||
mock.ExpectExec("INSERT INTO activity_logs").
|
||||
WithArgs(
|
||||
"ws-target", // $1 workspace_id
|
||||
"a2a_receive", // $2 activity_type
|
||||
sqlmock.AnyArg(), // $3 source_id — *string("ws-caller"), checked below
|
||||
sqlmock.AnyArg(), // $4 target_id
|
||||
sqlmock.AnyArg(), // $5 method
|
||||
sqlmock.AnyArg(), // $6 summary
|
||||
sqlmock.AnyArg(), // $7 request_body
|
||||
sqlmock.AnyArg(), // $8 response_body
|
||||
sqlmock.AnyArg(), // $9 tool_trace
|
||||
sqlmock.AnyArg(), // $10 duration_ms
|
||||
sqlmock.AnyArg(), // $11 status
|
||||
sqlmock.AnyArg(), // $12 error_detail
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-target"}}
|
||||
|
||||
body := `{"method":"message/send","params":{"message":{"role":"user","parts":[{"text":"test"}]}}}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/ws-target/a2a", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
// NOTE: no X-Workspace-ID — the bearer must be the only callerID source.
|
||||
c.Request.Header.Set("Authorization", "Bearer some-bearer-token")
|
||||
|
||||
handler.ProxyA2A(c)
|
||||
time.Sleep(50 * time.Millisecond) // allow LogActivity goroutine to flush
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestProxyA2A_OrgTokenSkipsBearerDerive verifies that when an org-level
|
||||
// token is in play (canvas/admin path), the bearer-derive logic is skipped
|
||||
// even if the bearer matches a workspace token. Org tokens grant org-wide
|
||||
// access and don't bind to a single workspace; treating them as a workspace
|
||||
// caller would mis-attribute activity logs.
|
||||
func TestProxyA2A_OrgTokenSkipsBearerDerive(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mr := setupTestRedis(t)
|
||||
allowLoopbackForTest(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprint(w, `{"jsonrpc":"2.0","id":"1","result":{}}`)
|
||||
}))
|
||||
defer agentServer.Close()
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", "ws-target"), agentServer.URL)
|
||||
|
||||
// No WorkspaceFromToken expectation — the bearer-derive branch must NOT
|
||||
// fire when org_token_id is set.
|
||||
expectBudgetCheck(mock, "ws-target")
|
||||
|
||||
// Activity log INSERT with NULL source_id (canvas-class semantics).
|
||||
mock.ExpectExec("INSERT INTO activity_logs").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-target"}}
|
||||
c.Set("org_token_id", "org-token-123") // org-level auth
|
||||
|
||||
body := `{"method":"message/send","params":{"message":{"role":"user","parts":[{"text":"hi"}]}}}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/ws-target/a2a", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
c.Request.Header.Set("Authorization", "Bearer org-bearer")
|
||||
|
||||
handler.ProxyA2A(c)
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestProxyA2A_BearerDeriveFailureFallsThrough verifies that if the bearer
|
||||
// is present but doesn't resolve (e.g. revoked, removed workspace), the
|
||||
// callerID stays empty and the request is treated as canvas-class — we
|
||||
// don't 401, we don't error; we just lose the source_id signal. Mirrors
|
||||
// the canvas-bypass shape so legacy/anonymous paths aren't broken.
|
||||
func TestProxyA2A_BearerDeriveFailureFallsThrough(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mr := setupTestRedis(t)
|
||||
allowLoopbackForTest(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprint(w, `{"jsonrpc":"2.0","id":"1","result":{}}`)
|
||||
}))
|
||||
defer agentServer.Close()
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", "ws-target"), agentServer.URL)
|
||||
|
||||
// Bearer-derive lookup fails (no live row) — collapses to ErrInvalidToken
|
||||
// inside WorkspaceFromToken; ProxyA2A swallows the error and proceeds with
|
||||
// callerID="".
|
||||
mock.ExpectQuery(`SELECT t\.id, t\.workspace_id.*FROM workspace_auth_tokens t.*JOIN workspaces`).
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
|
||||
expectBudgetCheck(mock, "ws-target")
|
||||
mock.ExpectExec("INSERT INTO activity_logs").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-target"}}
|
||||
|
||||
body := `{"method":"message/send","params":{"message":{"role":"user","parts":[{"text":"hi"}]}}}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/ws-target/a2a", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
c.Request.Header.Set("Authorization", "Bearer revoked-or-stale")
|
||||
|
||||
handler.ProxyA2A(c)
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200 (canvas-fallback), got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsSystemCaller(t *testing.T) {
|
||||
cases := []struct {
|
||||
caller string
|
||||
@@ -630,6 +920,46 @@ func TestIsUpstreamBusyError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestIsUpstreamDeadStatus locks in the status-code matrix that gates
|
||||
// reactive container-dead detection. Order matters: the helper exists so
|
||||
// the proxy + any future caller (e.g. a sweeper) classify CF dead-origin
|
||||
// codes the same way. Drift here would re-introduce the SaaS-blind bug
|
||||
// for whichever code we forgot.
|
||||
func TestIsUpstreamDeadStatus(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
status int
|
||||
want bool
|
||||
}{
|
||||
// Standard proxy-layer dead-upstream codes
|
||||
{"502 BadGateway", 502, true},
|
||||
{"503 ServiceUnavailable", 503, true},
|
||||
{"504 GatewayTimeout", 504, true},
|
||||
// Cloudflare dead-origin family
|
||||
{"521 WebServerDown", 521, true},
|
||||
{"522 ConnectionTimedOut", 522, true},
|
||||
{"523 OriginUnreachable", 523, true},
|
||||
{"524 OriginTimedOut", 524, true},
|
||||
// Negative cases — must NOT trigger restart
|
||||
{"200 OK", 200, false},
|
||||
{"400 BadRequest (agent rejected payload)", 400, false},
|
||||
{"401 Unauthorized", 401, false},
|
||||
{"404 NotFound (no such session)", 404, false},
|
||||
{"408 RequestTimeout (client-side)", 408, false},
|
||||
{"429 TooManyRequests (rate limited, agent alive)", 429, false},
|
||||
{"500 InternalServerError (agent crashed mid-request)", 500, false},
|
||||
{"501 NotImplemented", 501, false},
|
||||
{"505 HTTPVersionNotSupported", 505, false},
|
||||
{"520 WebServerReturnedUnknown (agent returned malformed)", 520, false},
|
||||
{"525 SSLHandshakeFailed (TLS misconfig, not dead origin)", 525, false},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
if got := isUpstreamDeadStatus(tc.status); got != tc.want {
|
||||
t.Errorf("%s: isUpstreamDeadStatus(%d) = %v, want %v", tc.name, tc.status, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== ProxyA2A — upstream timeout returns 503 busy + Retry-After ====================
|
||||
|
||||
// Verifies the full error-shaping contract for the 503-busy path:
|
||||
@@ -961,7 +1291,10 @@ func TestNormalizeA2APayload_PreservesExistingMessageId(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestNormalizeA2APayload_MissingMethodReturnsEmpty(t *testing.T) {
|
||||
raw := []byte(`{"params":{"message":{"role":"user"}}}`)
|
||||
// Method extraction returns empty string when method is absent,
|
||||
// regardless of message validity. Include parts: [] so the v0.2→v0.3
|
||||
// compat check (#2345) doesn't reject before method extraction.
|
||||
raw := []byte(`{"params":{"message":{"role":"user","parts":[]}}}`)
|
||||
_, method, perr := normalizeA2APayload(raw)
|
||||
if perr != nil {
|
||||
t.Fatalf("unexpected error: %+v", perr)
|
||||
@@ -971,6 +1304,102 @@ func TestNormalizeA2APayload_MissingMethodReturnsEmpty(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// --- v0.2 → v0.3 compat shim (#2345) ---
|
||||
|
||||
func TestNormalizeA2APayload_ConvertsV02StringContentToParts(t *testing.T) {
|
||||
raw := []byte(`{"method":"message/send","params":{"message":{"role":"user","content":"hello world"}}}`)
|
||||
out, _, perr := normalizeA2APayload(raw)
|
||||
if perr != nil {
|
||||
t.Fatalf("unexpected error: %+v", perr)
|
||||
}
|
||||
var parsed map[string]interface{}
|
||||
if err := json.Unmarshal(out, &parsed); err != nil {
|
||||
t.Fatalf("output not valid JSON: %v", err)
|
||||
}
|
||||
msg := parsed["params"].(map[string]interface{})["message"].(map[string]interface{})
|
||||
if _, stillHasContent := msg["content"]; stillHasContent {
|
||||
t.Error("v0.2 'content' field should be removed after conversion")
|
||||
}
|
||||
parts, ok := msg["parts"].([]interface{})
|
||||
if !ok || len(parts) != 1 {
|
||||
t.Fatalf("expected 1 part, got %v", msg["parts"])
|
||||
}
|
||||
part := parts[0].(map[string]interface{})
|
||||
if part["kind"] != "text" || part["text"] != "hello world" {
|
||||
t.Errorf("expected {kind:text, text:'hello world'}, got %v", part)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeA2APayload_ConvertsV02ListContentToParts(t *testing.T) {
|
||||
raw := []byte(`{"method":"message/send","params":{"message":{"role":"user","content":[{"kind":"text","text":"hi"}]}}}`)
|
||||
out, _, perr := normalizeA2APayload(raw)
|
||||
if perr != nil {
|
||||
t.Fatalf("unexpected error: %+v", perr)
|
||||
}
|
||||
var parsed map[string]interface{}
|
||||
_ = json.Unmarshal(out, &parsed)
|
||||
msg := parsed["params"].(map[string]interface{})["message"].(map[string]interface{})
|
||||
parts, ok := msg["parts"].([]interface{})
|
||||
if !ok || len(parts) != 1 {
|
||||
t.Fatalf("expected list preserved as parts, got %v", msg["parts"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeA2APayload_PreservesV03Parts(t *testing.T) {
|
||||
raw := []byte(`{"method":"message/send","params":{"message":{"role":"user","parts":[{"kind":"text","text":"hi"}]}}}`)
|
||||
out, _, perr := normalizeA2APayload(raw)
|
||||
if perr != nil {
|
||||
t.Fatalf("unexpected error: %+v", perr)
|
||||
}
|
||||
var parsed map[string]interface{}
|
||||
_ = json.Unmarshal(out, &parsed)
|
||||
msg := parsed["params"].(map[string]interface{})["message"].(map[string]interface{})
|
||||
if _, hasContent := msg["content"]; hasContent {
|
||||
t.Error("did not expect content field in v0.3-shaped payload output")
|
||||
}
|
||||
parts := msg["parts"].([]interface{})
|
||||
if len(parts) != 1 {
|
||||
t.Errorf("expected 1 part preserved, got %d", len(parts))
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeA2APayload_RejectsMessageWithNeitherContentNorParts(t *testing.T) {
|
||||
raw := []byte(`{"method":"message/send","params":{"message":{"role":"user","metadata":{}}}}`)
|
||||
_, _, perr := normalizeA2APayload(raw)
|
||||
if perr == nil {
|
||||
t.Fatal("expected error for message with neither content nor parts")
|
||||
}
|
||||
if perr.Status != http.StatusBadRequest {
|
||||
t.Errorf("expected 400, got %d", perr.Status)
|
||||
}
|
||||
errMsg, _ := perr.Response["error"].(string)
|
||||
if !strings.Contains(errMsg, "parts") || !strings.Contains(errMsg, "content") {
|
||||
t.Errorf("error message should mention both 'parts' and 'content', got: %q", errMsg)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeA2APayload_RejectsContentWithUnsupportedType(t *testing.T) {
|
||||
raw := []byte(`{"method":"message/send","params":{"message":{"role":"user","content":42}}}`)
|
||||
_, _, perr := normalizeA2APayload(raw)
|
||||
if perr == nil {
|
||||
t.Fatal("expected error for non-string non-list content")
|
||||
}
|
||||
if perr.Status != http.StatusBadRequest {
|
||||
t.Errorf("expected 400, got %d", perr.Status)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeA2APayload_NoMessageNoCheck(t *testing.T) {
|
||||
raw := []byte(`{"method":"tasks/list","params":{}}`)
|
||||
_, method, perr := normalizeA2APayload(raw)
|
||||
if perr != nil {
|
||||
t.Fatalf("unexpected error on params-message-absent payload: %+v", perr)
|
||||
}
|
||||
if method != "tasks/list" {
|
||||
t.Errorf("expected method=tasks/list, got %q", method)
|
||||
}
|
||||
}
|
||||
|
||||
// --- resolveAgentURL direct unit tests ---
|
||||
|
||||
func TestResolveAgentURL_CacheHit(t *testing.T) {
|
||||
@@ -1364,6 +1793,143 @@ func TestMaybeMarkContainerDead_NilProvisioner(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// SaaS path: h.provisioner=nil but h.cpProv is wired and reports the EC2
|
||||
// instance is NOT running. maybeMarkContainerDead must consult cpProv,
|
||||
// flip the workspace to status='offline', clear keys, broadcast OFFLINE,
|
||||
// and return true so the caller surfaces the structured 503. Pre-fix
|
||||
// (#NNN) it returned false unconditionally on h.provisioner==nil, so
|
||||
// dead EC2 agents leaked upstream 502 to canvas with no recovery.
|
||||
func TestMaybeMarkContainerDead_CPOnly_NotRunning(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
|
||||
cp := &fakeCPProv{running: false}
|
||||
handler.SetCPProvisioner(cp)
|
||||
|
||||
mock.ExpectQuery(`SELECT COALESCE\(runtime, 'langgraph'\) FROM workspaces WHERE id =`).
|
||||
WithArgs("ws-saas-dead").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("hermes"))
|
||||
mock.ExpectExec(`UPDATE workspaces SET status =`).
|
||||
WithArgs(models.StatusOffline, "ws-saas-dead").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
got := handler.maybeMarkContainerDead(context.Background(), "ws-saas-dead")
|
||||
if !got {
|
||||
t.Fatal("expected true (cpProv reports not running) — without cpProv consultation, SaaS dead-agent recovery is impossible")
|
||||
}
|
||||
if cp.calls != 1 {
|
||||
t.Errorf("expected exactly 1 IsRunning call on cpProv; got %d", cp.calls)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// SaaS path: h.cpProv reports running=true → maybeMarkContainerDead must
|
||||
// return false (don't restart a healthy agent on a transient upstream
|
||||
// hiccup). This is the safety check that prevents over-eager recycling.
|
||||
func TestMaybeMarkContainerDead_CPOnly_Running(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
|
||||
cp := &fakeCPProv{running: true}
|
||||
handler.SetCPProvisioner(cp)
|
||||
|
||||
mock.ExpectQuery(`SELECT COALESCE\(runtime, 'langgraph'\) FROM workspaces WHERE id =`).
|
||||
WithArgs("ws-saas-alive").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("hermes"))
|
||||
|
||||
if got := handler.maybeMarkContainerDead(context.Background(), "ws-saas-alive"); got {
|
||||
t.Error("expected false when cpProv reports running — must not recycle a healthy agent")
|
||||
}
|
||||
if cp.calls != 1 {
|
||||
t.Errorf("expected exactly 1 IsRunning call on cpProv; got %d", cp.calls)
|
||||
}
|
||||
}
|
||||
|
||||
// SaaS-path runRestartCycle: when h.provisioner is nil and h.cpProv is set,
|
||||
// the auto-restart cycle MUST call cpProv.Stop (not Docker provisioner.Stop).
|
||||
// Pre-fix this dispatched only to h.provisioner.Stop, NPE'd on nil, was
|
||||
// silently swallowed by coalesceRestart's recover-without-re-raise, and
|
||||
// left the workspace stuck in status='provisioning' forever — making
|
||||
// reactive auto-restart on SaaS effectively dead code. The independent
|
||||
// review of PR #2362 caught this gap.
|
||||
//
|
||||
// We drive runRestartCycle directly (not via RestartByID/coalesceRestart)
|
||||
// so we don't fight the goroutine's timing in a unit test. The full
|
||||
// restart chain (provisionWorkspaceCP) needs its own mocked DB rows that
|
||||
// would explode the surface area of this test; what we care about here
|
||||
// is the dispatch decision, which is observable on cpProv.stopCalls.
|
||||
// stopForRestart is the dispatch helper extracted from runRestartCycle so the
|
||||
// branch logic can be tested without spawning the async sendRestartContext
|
||||
// goroutine that the full cycle fires. Pre-fix runRestartCycle's Stop dispatch
|
||||
// only called the Docker path, so on SaaS (h.provisioner=nil) the cycle NPE'd
|
||||
// silently and left the workspace stuck in status='provisioning'.
|
||||
func TestStopForRestart_SaaSPath_DispatchesViaCPProv(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
|
||||
cp := &fakeCPProv{}
|
||||
handler.SetCPProvisioner(cp)
|
||||
|
||||
handler.stopForRestart(context.Background(), "ws-saas-restart")
|
||||
|
||||
if cp.stopCalls != 1 {
|
||||
t.Fatalf("expected cpProv.Stop to be called once on SaaS auto-restart; got %d", cp.stopCalls)
|
||||
}
|
||||
if cp.startCalls != 0 {
|
||||
t.Fatalf("expected cpProv.Start NOT to be called by stopForRestart; got %d", cp.startCalls)
|
||||
}
|
||||
}
|
||||
|
||||
// Both nil → no-op, no panic, no DB / broadcast side effects. Guards the
|
||||
// dispatcher against being invoked on a misconfigured handler. Important
|
||||
// because runRestartCycle's surrounding flow (status='provisioning' UPDATE
|
||||
// + broadcast) MUST happen even when both provisioners are nil — but
|
||||
// stopForRestart itself is a pure dispatcher and shouldn't touch state.
|
||||
func TestStopForRestart_NoProvisioner_NoOp(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
|
||||
// no provisioner, no cpProv, no DB expectations set on mock — any
|
||||
// unexpected query/exec will produce a sqlmock error.
|
||||
handler.stopForRestart(context.Background(), "ws-orphan")
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("stopForRestart no-provisioner path should not touch DB: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// fakeCPProv satisfies provisioner.CPProvisionerAPI for tests that exercise
|
||||
// the SaaS / EC2-backed reactive-health path.
|
||||
//
|
||||
// Methods all record calls. Start/Stop/GetConsoleOutput return nil/empty by
|
||||
// default — the maybeMarkContainerDead happy path triggers an async
|
||||
// `go h.RestartByID(...)` which calls Stop, so the previous "panic on
|
||||
// unexpected call" pattern was unsafe (the panic fires on a goroutine,
|
||||
// after the assertions ran). Tests that want to ASSERT a method is unused
|
||||
// can check `calls == 0` after a sync barrier.
|
||||
type fakeCPProv struct {
|
||||
running bool
|
||||
calls int
|
||||
stopCalls int
|
||||
startCalls int
|
||||
}
|
||||
|
||||
func (f *fakeCPProv) Start(_ context.Context, _ provisioner.WorkspaceConfig) (string, error) {
|
||||
f.startCalls++
|
||||
return "", nil
|
||||
}
|
||||
func (f *fakeCPProv) Stop(_ context.Context, _ string) error {
|
||||
f.stopCalls++
|
||||
return nil
|
||||
}
|
||||
func (f *fakeCPProv) GetConsoleOutput(_ context.Context, _ string) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
func (f *fakeCPProv) IsRunning(_ context.Context, _ string) (bool, error) {
|
||||
f.calls++
|
||||
return f.running, nil
|
||||
}
|
||||
|
||||
// external runtime → false regardless of provisioner.
|
||||
func TestMaybeMarkContainerDead_ExternalRuntime(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
@@ -1528,3 +2094,185 @@ func TestResolveAgentURL_HibernatedWorkspace_NullURLVariant(t *testing.T) {
|
||||
t.Errorf("unmet DB expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== ProxyA2A — poll-mode short-circuit (#2339 PR 2) ====================
|
||||
|
||||
// TestProxyA2A_PollMode_ShortCircuits_NoSSRF_NoDispatch verifies the core
|
||||
// invariant of #2339 PR 2: when delivery_mode=poll, ProxyA2A must NOT
|
||||
// hit resolveAgentURL (which would SSRF-check or 502 on a missing URL)
|
||||
// and must NOT dispatch over HTTP. It records the request to activity_logs
|
||||
// and returns 200 {status:"queued"} instead.
|
||||
//
|
||||
// Without this short-circuit, the canvas chat fails for any workspace
|
||||
// running molecule-mcp-claude-channel (operator's laptop, no public URL):
|
||||
// resolveAgentURL would 502 on the missing URL and the polling agent
|
||||
// would never see the inbound message. That's the bug PR 2 fixes.
|
||||
func TestProxyA2A_PollMode_ShortCircuits_NoSSRF_NoDispatch(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
const wsID = "ws-poll-shortcircuit"
|
||||
|
||||
// Budget check still runs (above the short-circuit) — affirms the
|
||||
// budget guard is mode-agnostic, which is correct: a poll-mode
|
||||
// workspace shouldn't burn unmetered platform CPU/storage either.
|
||||
expectBudgetCheck(mock, wsID)
|
||||
|
||||
// lookupDeliveryMode SELECT — returns poll, triggering the short-circuit.
|
||||
// Note: NO ExpectQuery for `SELECT url, status FROM workspaces` (that's
|
||||
// resolveAgentURL's query) — the short-circuit must skip resolveAgentURL.
|
||||
mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"delivery_mode"}).AddRow("poll"))
|
||||
|
||||
// Activity log: the queued receive (logA2AReceiveQueued in helpers.go).
|
||||
mock.ExpectExec("INSERT INTO activity_logs").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: wsID}}
|
||||
|
||||
body := `{"jsonrpc":"2.0","id":"poll-1","method":"message/send","params":{"message":{"role":"user","parts":[{"text":"hi"}]}}}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/"+wsID+"/a2a", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.ProxyA2A(c)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 (queued), got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var resp map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("response is not valid JSON: %v", err)
|
||||
}
|
||||
if resp["status"] != "queued" {
|
||||
t.Errorf("response.status = %v, want %q", resp["status"], "queued")
|
||||
}
|
||||
if resp["delivery_mode"] != "poll" {
|
||||
t.Errorf("response.delivery_mode = %v, want %q", resp["delivery_mode"], "poll")
|
||||
}
|
||||
if resp["method"] != "message/send" {
|
||||
t.Errorf("response.method = %v, want %q (the JSON-RPC method that was queued)", resp["method"], "message/send")
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestProxyA2A_PushMode_NoShortCircuit verifies the symmetric contract:
|
||||
// a push-mode workspace (default) is NOT affected by the new short-circuit.
|
||||
// It still proceeds to resolveAgentURL + dispatch. Without this guard, a
|
||||
// regression in lookupDeliveryMode could silently break the entire fleet.
|
||||
func TestProxyA2A_PushMode_NoShortCircuit(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mr := setupTestRedis(t)
|
||||
allowLoopbackForTest(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
const wsID = "ws-push-default"
|
||||
|
||||
dispatched := false
|
||||
agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
dispatched = true
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprint(w, `{"jsonrpc":"2.0","id":"1","result":{"status":"ok"}}`)
|
||||
}))
|
||||
defer agentServer.Close()
|
||||
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", wsID), agentServer.URL)
|
||||
expectBudgetCheck(mock, wsID)
|
||||
|
||||
// lookupDeliveryMode returns "push" — short-circuit must NOT fire.
|
||||
mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"delivery_mode"}).AddRow("push"))
|
||||
|
||||
mock.ExpectExec("INSERT INTO activity_logs").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: wsID}}
|
||||
|
||||
body := `{"jsonrpc":"2.0","id":"push-1","method":"message/send","params":{"message":{"role":"user","parts":[{"text":"hi"}]}}}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/"+wsID+"/a2a", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.ProxyA2A(c)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 (dispatched), got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if !dispatched {
|
||||
t.Error("push-mode workspace: expected the agent server to receive the request, but it did not")
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err == nil {
|
||||
if resp["status"] == "queued" {
|
||||
t.Error("push-mode response leaked queued envelope — short-circuit fired when it shouldn't have")
|
||||
}
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestProxyA2A_PollMode_FailsClosedToPush verifies the safety contract:
|
||||
// a DB error reading delivery_mode must default to push (the existing
|
||||
// behavior), NOT poll. Failing to push means a poll-mode workspace
|
||||
// briefly attempts a real dispatch — visible failure (502 / SSRF
|
||||
// rejection / restart cascade), not a silent drop into activity_logs
|
||||
// where the agent might never look. Loud > silent, recoverable > lost.
|
||||
func TestProxyA2A_PollMode_FailsClosedToPush(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t) // empty Redis — forces resolveAgentURL DB lookup
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
const wsID = "ws-mode-db-error"
|
||||
|
||||
expectBudgetCheck(mock, wsID)
|
||||
|
||||
// lookupDeliveryMode hits a transient DB error → must default push.
|
||||
mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id").
|
||||
WithArgs(wsID).
|
||||
WillReturnError(sql.ErrConnDone)
|
||||
|
||||
// Push path proceeds to resolveAgentURL — empty result → 502 path.
|
||||
mock.ExpectQuery("SELECT url, status FROM workspaces WHERE id =").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"url", "status"}))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: wsID}}
|
||||
|
||||
body := `{"jsonrpc":"2.0","id":"x","method":"message/send","params":{}}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/"+wsID+"/a2a", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.ProxyA2A(c)
|
||||
|
||||
if w.Code == http.StatusOK {
|
||||
var resp map[string]interface{}
|
||||
_ = json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["status"] == "queued" {
|
||||
t.Errorf("DB error on delivery_mode lookup silently queued the request — must fail-closed-to-push, got body: %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
)
|
||||
@@ -39,6 +40,33 @@ func extractIdempotencyKey(body []byte) string {
|
||||
return envelope.Params.Message.MessageID
|
||||
}
|
||||
|
||||
// extractExpiresInSeconds pulls params.expires_in_seconds out of an A2A
|
||||
// JSON-RPC body and returns it as a positive integer. A zero return means
|
||||
// "no caller-specified TTL" — caller should leave expires_at NULL on the
|
||||
// queue row, preserving today's infinite-TTL behaviour (the
|
||||
// DropStaleQueueItems admin sweeper still drops entries past the
|
||||
// platform-default age). Negative values and parse errors collapse to 0.
|
||||
//
|
||||
// Why params-level (not metadata): expires_in_seconds is a delivery
|
||||
// directive, not a peer-to-peer message attribute. Putting it under
|
||||
// `params` keeps it adjacent to other delivery hints (priority,
|
||||
// idempotency) and out of `params.message.metadata` which the receiving
|
||||
// agent can read.
|
||||
func extractExpiresInSeconds(body []byte) int {
|
||||
var envelope struct {
|
||||
Params struct {
|
||||
ExpiresInSeconds int `json:"expires_in_seconds"`
|
||||
} `json:"params"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &envelope); err != nil {
|
||||
return 0
|
||||
}
|
||||
if envelope.Params.ExpiresInSeconds < 0 {
|
||||
return 0
|
||||
}
|
||||
return envelope.Params.ExpiresInSeconds
|
||||
}
|
||||
|
||||
const (
|
||||
PriorityCritical = 100
|
||||
PriorityTask = 50
|
||||
@@ -70,6 +98,7 @@ func EnqueueA2A(
|
||||
priority int,
|
||||
body []byte,
|
||||
method, idempotencyKey string,
|
||||
expiresAt *time.Time,
|
||||
) (id string, depth int, err error) {
|
||||
var keyArg interface{}
|
||||
if idempotencyKey != "" {
|
||||
@@ -83,6 +112,13 @@ func EnqueueA2A(
|
||||
if method != "" {
|
||||
methodArg = method
|
||||
}
|
||||
// expiresAtArg stays NULL when caller didn't specify a TTL. DequeueNext's
|
||||
// `expires_at IS NULL OR expires_at > now()` filter then preserves today's
|
||||
// infinite-TTL semantics for un-flagged messages.
|
||||
var expiresAtArg interface{}
|
||||
if expiresAt != nil {
|
||||
expiresAtArg = *expiresAt
|
||||
}
|
||||
|
||||
// INSERT ... ON CONFLICT DO NOTHING RETURNING id. The conflict target
|
||||
// must reference the partial unique INDEX columns + WHERE clause directly
|
||||
@@ -91,13 +127,13 @@ func EnqueueA2A(
|
||||
// then look up the existing row's id so the caller always receives a
|
||||
// valid queue entry reference.
|
||||
err = db.DB.QueryRowContext(ctx, `
|
||||
INSERT INTO a2a_queue (workspace_id, caller_id, priority, body, method, idempotency_key)
|
||||
VALUES ($1, $2, $3, $4::jsonb, $5, $6)
|
||||
INSERT INTO a2a_queue (workspace_id, caller_id, priority, body, method, idempotency_key, expires_at)
|
||||
VALUES ($1, $2, $3, $4::jsonb, $5, $6, $7)
|
||||
ON CONFLICT (workspace_id, idempotency_key)
|
||||
WHERE idempotency_key IS NOT NULL AND status IN ('queued','dispatched')
|
||||
DO NOTHING
|
||||
RETURNING id
|
||||
`, workspaceID, callerArg, priority, string(body), methodArg, keyArg).Scan(&id)
|
||||
`, workspaceID, callerArg, priority, string(body), methodArg, keyArg, expiresAtArg).Scan(&id)
|
||||
|
||||
if errors.Is(err, sql.ErrNoRows) && idempotencyKey != "" {
|
||||
// Conflict — look up the existing active row and use its id.
|
||||
|
||||
@@ -0,0 +1,231 @@
|
||||
package handlers
|
||||
|
||||
// a2a_queue_status.go — RFC #2331 Tier 1: public per-queue-id status endpoint.
|
||||
//
|
||||
// Closes the gap surfaced in #2329 item 5: callers receive `queue_id` in
|
||||
// the 202 enqueue response but had no public lookup endpoint. The only
|
||||
// observability path was through `check_task_status` which joins via
|
||||
// `request_body->>'delegation_id'` in `activity_logs` — works only for
|
||||
// delegation-flavored A2A. Cross-workspace peer-direct A2A had no
|
||||
// observability after enqueue.
|
||||
//
|
||||
// Auth model:
|
||||
//
|
||||
// - The caller's workspace token must match the `caller_id` recorded
|
||||
// on the queue row at enqueue time, OR the caller's token must be
|
||||
// for the target workspace_id (target can see what's queued for it),
|
||||
// OR an org-level token (canvas/admin) can see anything.
|
||||
//
|
||||
// - 404 — not 403 — when the caller has no read access. The queue_id
|
||||
// UUID is the access token; revealing "this queue_id exists but
|
||||
// you can't see it" leaks the existence-of-other-callers' state.
|
||||
//
|
||||
// What the response body excludes:
|
||||
//
|
||||
// - `body` (the original JSON-RPC request body) — could contain
|
||||
// prompts/PII the caller's authority shouldn't include in poll-loop
|
||||
// responses. The body is only relevant to the dispatching agent.
|
||||
// - `caller_id` — exposes the existence of other callers.
|
||||
//
|
||||
// What it includes:
|
||||
//
|
||||
// - status, attempts, last_error, enqueued_at, dispatched_at,
|
||||
// completed_at, expires_at, priority — the delivery state machine
|
||||
// observables.
|
||||
// - response_body when status == completed — so the caller can
|
||||
// retrieve the response without polling check_task_status.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"log"
|
||||
"net/http"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// QueueStatus is the public projection of an a2a_queue row.
|
||||
type QueueStatus struct {
|
||||
ID string `json:"queue_id"`
|
||||
WorkspaceID string `json:"workspace_id"`
|
||||
Status string `json:"status"`
|
||||
Priority int `json:"priority"`
|
||||
Attempts int `json:"attempts"`
|
||||
LastError *string `json:"last_error,omitempty"`
|
||||
EnqueuedAt string `json:"enqueued_at"`
|
||||
DispatchedAt *string `json:"dispatched_at,omitempty"`
|
||||
CompletedAt *string `json:"completed_at,omitempty"`
|
||||
ExpiresAt *string `json:"expires_at,omitempty"`
|
||||
ResponseBody []byte `json:"response_body,omitempty"`
|
||||
}
|
||||
|
||||
// QueueStatusByID looks up the queue row and projects it for the public
|
||||
// endpoint. Returns ErrNoQueueRow when the row doesn't exist OR the
|
||||
// caller has no read access — collapsing the two surfaces a single 404
|
||||
// from the handler so an attacker can't probe queue_id existence.
|
||||
//
|
||||
// Access rules — caller must satisfy at least one of:
|
||||
//
|
||||
// (a) callerID == queue.caller_id (sender can read own enqueue)
|
||||
// (b) callerID == queue.workspace_id (target can read queued-for-me)
|
||||
// (c) isAdmin == true (canvas/admin token)
|
||||
//
|
||||
// Internal helper; the HTTP handler enforces the auth checks before
|
||||
// calling this — by the time we get here we already know the caller
|
||||
// is authorized, so this just runs the SELECT.
|
||||
func QueueStatusByID(ctx context.Context, queueID string) (*QueueStatus, error) {
|
||||
var qs QueueStatus
|
||||
var lastError, dispatchedAt, completedAt, expiresAt sql.NullString
|
||||
var responseBody []byte
|
||||
|
||||
// response_body lives on activity_logs (the stitched delegation row), not
|
||||
// on a2a_queue itself. We pull both here in one round-trip via LEFT JOIN
|
||||
// so a completed delegation surfaces its result inline — non-delegation
|
||||
// queue rows simply won't have a matching activity_logs row and the field
|
||||
// stays null.
|
||||
err := db.DB.QueryRowContext(ctx, `
|
||||
SELECT
|
||||
q.id,
|
||||
q.workspace_id,
|
||||
q.status,
|
||||
q.priority,
|
||||
q.attempts,
|
||||
q.last_error,
|
||||
q.enqueued_at::text,
|
||||
q.dispatched_at::text,
|
||||
q.completed_at::text,
|
||||
q.expires_at::text,
|
||||
al.response_body::text
|
||||
FROM a2a_queue q
|
||||
LEFT JOIN activity_logs al
|
||||
ON al.method = 'delegate_result'
|
||||
AND al.target_id = q.workspace_id
|
||||
AND al.workspace_id = q.caller_id
|
||||
AND al.response_body->>'delegation_id' = (q.body->'params'->'message'->'metadata'->>'delegation_id')
|
||||
WHERE q.id = $1
|
||||
`, queueID).Scan(
|
||||
&qs.ID, &qs.WorkspaceID, &qs.Status, &qs.Priority, &qs.Attempts,
|
||||
&lastError, &qs.EnqueuedAt, &dispatchedAt, &completedAt, &expiresAt,
|
||||
&responseBody,
|
||||
)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return nil, sql.ErrNoRows
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if lastError.Valid && lastError.String != "" {
|
||||
s := lastError.String
|
||||
qs.LastError = &s
|
||||
}
|
||||
if dispatchedAt.Valid && dispatchedAt.String != "" {
|
||||
s := dispatchedAt.String
|
||||
qs.DispatchedAt = &s
|
||||
}
|
||||
if completedAt.Valid && completedAt.String != "" {
|
||||
s := completedAt.String
|
||||
qs.CompletedAt = &s
|
||||
}
|
||||
if expiresAt.Valid && expiresAt.String != "" {
|
||||
s := expiresAt.String
|
||||
qs.ExpiresAt = &s
|
||||
}
|
||||
if len(responseBody) > 0 && qs.Status == "completed" {
|
||||
qs.ResponseBody = responseBody
|
||||
}
|
||||
|
||||
return &qs, nil
|
||||
}
|
||||
|
||||
// queueRowAuthFields returns the (caller_id, workspace_id) of the queue row
|
||||
// for access control. Separate from QueueStatusByID so the handler can do
|
||||
// the auth check without first projecting the public response.
|
||||
func queueRowAuthFields(ctx context.Context, queueID string) (callerID, workspaceID string, err error) {
|
||||
var callerNS, workspaceNS sql.NullString
|
||||
err = db.DB.QueryRowContext(ctx,
|
||||
`SELECT caller_id, workspace_id FROM a2a_queue WHERE id = $1`,
|
||||
queueID,
|
||||
).Scan(&callerNS, &workspaceNS)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
return callerNS.String, workspaceNS.String, nil
|
||||
}
|
||||
|
||||
// GetA2AQueueStatus handles GET /workspaces/:id/a2a/queue/:queue_id.
|
||||
//
|
||||
// The :id path param is the workspace context (matches the proxy pattern
|
||||
// /workspaces/:id/a2a). :queue_id is the row's UUID returned from the
|
||||
// 202 enqueue response.
|
||||
//
|
||||
// Auth flow:
|
||||
//
|
||||
// 1. Extract caller's workspace from bearer (org tokens grant org-wide
|
||||
// access and short-circuit the per-row check).
|
||||
// 2. Look up queue row's (caller_id, workspace_id).
|
||||
// 3. Allow when caller's workspace == queue.caller_id OR
|
||||
// == queue.workspace_id, OR caller has org-level access.
|
||||
// 4. Otherwise 404 (not 403) — see file-header rationale.
|
||||
func (h *WorkspaceHandler) GetA2AQueueStatus(c *gin.Context) {
|
||||
ctx := c.Request.Context()
|
||||
queueID := c.Param("queue_id")
|
||||
if queueID == "" {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "queue_id required"})
|
||||
return
|
||||
}
|
||||
|
||||
// Org-level token (canvas/admin)? Bypass per-row caller match.
|
||||
_, isOrg := c.Get("org_token_id")
|
||||
|
||||
// Derive caller workspace from bearer when not org-token.
|
||||
callerWorkspace := c.GetHeader("X-Workspace-ID")
|
||||
if !isOrg && callerWorkspace == "" {
|
||||
if tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization")); tok != "" {
|
||||
if wsID, err := wsauth.WorkspaceFromToken(ctx, db.DB, tok); err == nil {
|
||||
callerWorkspace = wsID
|
||||
}
|
||||
}
|
||||
}
|
||||
if !isOrg && callerWorkspace == "" {
|
||||
// No identity — treat as not-found rather than 401, matching the
|
||||
// file-header existence-non-inference policy. A 401 would tell
|
||||
// an attacker that the queue_id at least might exist.
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "queue item not found"})
|
||||
return
|
||||
}
|
||||
|
||||
rowCallerID, rowWorkspaceID, err := queueRowAuthFields(ctx, queueID)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "queue item not found"})
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("GetA2AQueueStatus: row lookup failed for %s: %v", queueID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "lookup failed"})
|
||||
return
|
||||
}
|
||||
|
||||
// Access check.
|
||||
if !isOrg && callerWorkspace != rowCallerID && callerWorkspace != rowWorkspaceID {
|
||||
// Collapse to 404 — see header.
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "queue item not found"})
|
||||
return
|
||||
}
|
||||
|
||||
status, err := QueueStatusByID(ctx, queueID)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "queue item not found"})
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("GetA2AQueueStatus: status fetch failed for %s: %v", queueID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "status fetch failed"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, status)
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestExtractExpiresInSeconds covers the JSON parser used at enqueue time
|
||||
// to honor a caller-specified TTL. Zero return = "no TTL" — caller leaves
|
||||
// expires_at NULL on the queue row.
|
||||
func TestExtractExpiresInSeconds(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
want int
|
||||
}{
|
||||
{
|
||||
name: "absent",
|
||||
body: `{"params":{"message":{"messageId":"x"}}}`,
|
||||
want: 0,
|
||||
},
|
||||
{
|
||||
name: "positive",
|
||||
body: `{"params":{"expires_in_seconds":300,"message":{"messageId":"x"}}}`,
|
||||
want: 300,
|
||||
},
|
||||
{
|
||||
name: "zero",
|
||||
body: `{"params":{"expires_in_seconds":0,"message":{"messageId":"x"}}}`,
|
||||
want: 0,
|
||||
},
|
||||
{
|
||||
name: "negative coerced to zero",
|
||||
body: `{"params":{"expires_in_seconds":-30,"message":{"messageId":"x"}}}`,
|
||||
want: 0,
|
||||
},
|
||||
{
|
||||
name: "invalid JSON returns zero",
|
||||
body: `not json`,
|
||||
want: 0,
|
||||
},
|
||||
{
|
||||
name: "wrong type silently zero (json.Unmarshal returns err on type mismatch)",
|
||||
body: `{"params":{"expires_in_seconds":"not-a-number"}}`,
|
||||
want: 0,
|
||||
},
|
||||
{
|
||||
name: "params absent entirely",
|
||||
body: `{}`,
|
||||
want: 0,
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := extractExpiresInSeconds([]byte(tc.body))
|
||||
if got != tc.want {
|
||||
t.Errorf("extractExpiresInSeconds(%q) = %d, want %d", tc.body, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,9 @@ package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
@@ -13,6 +15,7 @@ import (
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type ActivityHandler struct {
|
||||
@@ -23,12 +26,74 @@ func NewActivityHandler(b *events.Broadcaster) *ActivityHandler {
|
||||
return &ActivityHandler{broadcaster: b}
|
||||
}
|
||||
|
||||
// List handles GET /workspaces/:id/activity?type=&limit=
|
||||
// List handles GET /workspaces/:id/activity?type=&source=&limit=&since_secs=&since_id=
|
||||
//
|
||||
// since_secs filters to activity_logs.created_at >= NOW() - INTERVAL '$N seconds'.
|
||||
// Optional, additive — callers that don't pass it get today's behavior (the
|
||||
// most-recent N events regardless of time). The harness runner
|
||||
// (scripts/measure-coordinator-task-bounds-runner.sh) uses this to scope a
|
||||
// trace to a specific test window; RFC #2251 §V1.0 step 6 also depends on it.
|
||||
// Capped at 30 days (2_592_000s) — anything older has typically been paged
|
||||
// out anyway, and a defensive ceiling keeps a paranoid client from triggering
|
||||
// a full-table scan via since_secs=99999999999. Closes #2268.
|
||||
//
|
||||
// since_id is a CURSOR for poll-mode workspaces (#2339 PR 3). The agent
|
||||
// passes the id of the last activity_logs row it has consumed; the server
|
||||
// returns rows STRICTLY AFTER that cursor in chronological (ASC) order so
|
||||
// the agent processes events in the order they were recorded. Telegram
|
||||
// getUpdates / Slack RTM shape — same proven pattern.
|
||||
//
|
||||
// Cross-workspace safety: the cursor lookup is scoped by workspace_id, so a
|
||||
// caller cannot peek at another workspace's activity by guessing its UUIDs.
|
||||
//
|
||||
// Cursor-not-found: returns 410 Gone. The client should reset its cursor
|
||||
// (omit since_id) and re-fetch the recent backlog. This avoids the silent
|
||||
// loss-window where a pruned cursor silently filters everything out.
|
||||
//
|
||||
// since_id + since_secs together: both filters apply (AND). Output is ASC
|
||||
// when since_id is set (polling order), DESC otherwise (recent feed order).
|
||||
func (h *ActivityHandler) List(c *gin.Context) {
|
||||
workspaceID := c.Param("id")
|
||||
activityType := c.Query("type")
|
||||
source := c.Query("source") // "canvas" = source_id IS NULL, "agent" = source_id IS NOT NULL
|
||||
peerID := c.Query("peer_id") // optional UUID — restrict to rows where this peer is sender OR target
|
||||
limitStr := c.DefaultQuery("limit", "100")
|
||||
sinceSecsStr := c.Query("since_secs")
|
||||
sinceID := c.Query("since_id")
|
||||
beforeTSStr := c.Query("before_ts") // optional RFC3339 — return rows strictly older than this timestamp
|
||||
|
||||
// Validate peer_id as a UUID at the trust boundary so a malformed
|
||||
// caller (the agent or a downstream MCP tool) can't smuggle SQL
|
||||
// fragments into the WHERE clause via the parameter, even though
|
||||
// args are bound. UUID-shape rejection is also the cleanest 400
|
||||
// signal for the wheel-side chat_history MCP tool — clearer than a
|
||||
// generic "no rows" empty list when the agent passed an obviously
|
||||
// wrong id.
|
||||
if peerID != "" {
|
||||
if _, err := uuid.Parse(peerID); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "peer_id must be a UUID"})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Parse before_ts as the wall-clock paging knob for the wheel-side
|
||||
// `chat_history` MCP tool. The agent passes the oldest `created_at`
|
||||
// from a previous response to walk backward through long histories.
|
||||
// Validated as RFC3339 at the trust boundary so a typoed value
|
||||
// surfaces as a clean 400 instead of being silently ignored.
|
||||
var beforeTS time.Time
|
||||
usingBeforeTS := false
|
||||
if beforeTSStr != "" {
|
||||
t, err := time.Parse(time.RFC3339, beforeTSStr)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{
|
||||
"error": "before_ts must be an RFC3339 timestamp (e.g. 2026-05-01T00:00:00Z)",
|
||||
})
|
||||
return
|
||||
}
|
||||
beforeTS = t
|
||||
usingBeforeTS = true
|
||||
}
|
||||
|
||||
limit := 100
|
||||
if n, err := strconv.Atoi(limitStr); err == nil && n > 0 {
|
||||
@@ -38,6 +103,54 @@ func (h *ActivityHandler) List(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// Parse since_secs. Reject negative or non-integer values rather than
|
||||
// silently ignoring them — a typoed param shouldn't be lost as
|
||||
// most-recent-100, that's exactly the bug this fixes.
|
||||
var sinceSecs int
|
||||
if sinceSecsStr != "" {
|
||||
n, err := strconv.Atoi(sinceSecsStr)
|
||||
if err != nil || n <= 0 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "since_secs must be a positive integer"})
|
||||
return
|
||||
}
|
||||
const maxSinceSecs = 30 * 24 * 60 * 60 // 30 days
|
||||
if n > maxSinceSecs {
|
||||
n = maxSinceSecs
|
||||
}
|
||||
sinceSecs = n
|
||||
}
|
||||
|
||||
// Resolve since_id cursor (if set) BEFORE building the main query so we
|
||||
// can 410 cleanly when the cursor row is gone — and so the cursor's
|
||||
// created_at is bound as a regular timestamp parameter (not a subquery)
|
||||
// for clean sqlmock matching and to keep the planner predictable.
|
||||
//
|
||||
// The lookup is scoped by workspace_id: a caller cannot enumerate or
|
||||
// peek at another workspace's events by passing a UUID belonging to a
|
||||
// different workspace. Mismatched-workspace cursor → 410, same as
|
||||
// "row not found" — both indicate the cursor is no longer usable for
|
||||
// this caller, no information leak.
|
||||
var cursorTime time.Time
|
||||
usingCursor := false
|
||||
if sinceID != "" {
|
||||
err := db.DB.QueryRowContext(c.Request.Context(),
|
||||
`SELECT created_at FROM activity_logs WHERE id = $1 AND workspace_id = $2`,
|
||||
sinceID, workspaceID,
|
||||
).Scan(&cursorTime)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
c.JSON(http.StatusGone, gin.H{
|
||||
"error": "since_id cursor not found (row may have been pruned or belongs to a different workspace); omit since_id to reset",
|
||||
})
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("Activity since_id cursor lookup error for ws=%s id=%s: %v", workspaceID, sinceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "cursor lookup failed"})
|
||||
return
|
||||
}
|
||||
usingCursor = true
|
||||
}
|
||||
|
||||
// Build query with optional filters
|
||||
query := `SELECT id, workspace_id, activity_type, source_id, target_id, method,
|
||||
summary, request_body, response_body, tool_trace, duration_ms, status, error_detail, created_at
|
||||
@@ -58,8 +171,55 @@ func (h *ActivityHandler) List(c *gin.Context) {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "source must be 'canvas' or 'agent'"})
|
||||
return
|
||||
}
|
||||
if peerID != "" {
|
||||
// Restrict to rows where this peer is either the sender (source_id)
|
||||
// or the recipient (target_id) of an A2A turn. This is the
|
||||
// "conversation history with peer X" view the wheel-side
|
||||
// chat_history MCP tool surfaces — agent receives a peer_agent
|
||||
// push, wants to see the prior 20 turns with that workspace
|
||||
// without paging through every other peer's traffic.
|
||||
//
|
||||
// Bound as a single arg, matched twice — keeps argIdx accurate
|
||||
// and avoids duplicate parameter binding (some drivers reject the
|
||||
// same arg slot reused, ours is fine but the explicit form is
|
||||
// clearer to read and matches the rest of the builder.)
|
||||
query += fmt.Sprintf(" AND (source_id = $%d OR target_id = $%d)", argIdx, argIdx)
|
||||
args = append(args, peerID)
|
||||
argIdx++
|
||||
}
|
||||
if usingBeforeTS {
|
||||
// Strictly older — never replay a row with the exact same
|
||||
// timestamp, mirrors the `created_at > cursorTime` shape
|
||||
// `since_id` uses for forward paging.
|
||||
query += fmt.Sprintf(" AND created_at < $%d", argIdx)
|
||||
args = append(args, beforeTS)
|
||||
argIdx++
|
||||
}
|
||||
if sinceSecs > 0 {
|
||||
// Use a parameterized interval so the value is bound, not
|
||||
// interpolated into the SQL string. `make_interval(secs => $N)`
|
||||
// avoids the lib/pq quirk where INTERVAL '$N seconds' won't
|
||||
// substitute a placeholder inside the literal.
|
||||
query += fmt.Sprintf(" AND created_at >= NOW() - make_interval(secs => $%d)", argIdx)
|
||||
args = append(args, sinceSecs)
|
||||
argIdx++
|
||||
}
|
||||
if usingCursor {
|
||||
// Strictly after — never replay the cursor row itself.
|
||||
query += fmt.Sprintf(" AND created_at > $%d", argIdx)
|
||||
args = append(args, cursorTime)
|
||||
argIdx++
|
||||
}
|
||||
|
||||
query += fmt.Sprintf(" ORDER BY created_at DESC LIMIT $%d", argIdx)
|
||||
// Polling clients (since_id) need oldest-first within the new window so
|
||||
// they process events in recorded order. The recent-feed view (no
|
||||
// since_id) keeps DESC — that's the canvas/UI shape and changing it
|
||||
// would surprise existing callers.
|
||||
if usingCursor {
|
||||
query += fmt.Sprintf(" ORDER BY created_at ASC LIMIT $%d", argIdx)
|
||||
} else {
|
||||
query += fmt.Sprintf(" ORDER BY created_at DESC LIMIT $%d", argIdx)
|
||||
}
|
||||
args = append(args, limit)
|
||||
|
||||
rows, err := db.DB.QueryContext(c.Request.Context(), query, args...)
|
||||
|
||||
@@ -0,0 +1,156 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// Tests for the since_id cursor on GET /workspaces/:id/activity (#2339 PR 3).
|
||||
//
|
||||
// Cursor shape: Telegram getUpdates / Slack RTM. The polling agent passes
|
||||
// the id of the last activity_logs row it processed; the server returns
|
||||
// rows STRICTLY AFTER that cursor in ASC order. Cross-workspace lookups
|
||||
// return 410 to prevent UUID-guessing peeks at other workspaces' events.
|
||||
|
||||
// TestActivityHandler_SinceID_ReturnsNewerASC: with a valid cursor the
|
||||
// handler does the cursor lookup, then queries with the cursor's
|
||||
// created_at as a > filter and ASC ordering — the polling shape.
|
||||
func TestActivityHandler_SinceID_ReturnsNewerASC(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
cursorID := "act-cursor-42"
|
||||
cursorTime := time.Date(2026, 4, 30, 5, 0, 0, 0, time.UTC)
|
||||
|
||||
// Step 1: cursor lookup — must include workspace_id scope so a UUID
|
||||
// from another workspace can't be used.
|
||||
mock.ExpectQuery(`SELECT created_at FROM activity_logs WHERE id = \$1 AND workspace_id = \$2`).
|
||||
WithArgs(cursorID, "ws-1").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"created_at"}).AddRow(cursorTime))
|
||||
|
||||
// Step 2: main query with the cursor's created_at as a > filter,
|
||||
// ASC ordering. Args: workspace_id, cursorTime, limit.
|
||||
mock.ExpectQuery("SELECT id, workspace_id, activity_type").
|
||||
WithArgs("ws-1", cursorTime, 100).
|
||||
WillReturnRows(newActivityRows())
|
||||
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewActivityHandler(broadcaster)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-1/activity?since_id="+cursorID, nil)
|
||||
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestActivityHandler_SinceID_CursorNotFound_410: cursor row doesn't exist
|
||||
// (pruned, never existed, or wrong UUID). Server returns 410 Gone so the
|
||||
// client knows to reset its cursor — silent empty results would cause a
|
||||
// stuck-poll bug where the agent never sees new events.
|
||||
func TestActivityHandler_SinceID_CursorNotFound_410(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
mock.ExpectQuery(`SELECT created_at FROM activity_logs WHERE id = \$1 AND workspace_id = \$2`).
|
||||
WithArgs("act-gone", "ws-1").
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewActivityHandler(broadcaster)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-1/activity?since_id=act-gone", nil)
|
||||
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusGone {
|
||||
t.Fatalf("expected 410, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestActivityHandler_SinceID_CrossWorkspaceCursor_410: a caller passes a
|
||||
// UUID that belongs to a different workspace. The cursor lookup is scoped
|
||||
// by workspace_id so the row is "not found" from this caller's perspective —
|
||||
// same 410 path as the pruned case. No information leak (caller cannot tell
|
||||
// whether the UUID belongs to nobody or to another workspace).
|
||||
func TestActivityHandler_SinceID_CrossWorkspaceCursor_410(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
// Cursor exists in DB but the WHERE workspace_id = $2 filter excludes
|
||||
// it — sqlmock returns no rows, which is what Postgres would do.
|
||||
mock.ExpectQuery(`SELECT created_at FROM activity_logs WHERE id = \$1 AND workspace_id = \$2`).
|
||||
WithArgs("act-other-ws", "ws-1").
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewActivityHandler(broadcaster)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-1/activity?since_id=act-other-ws", nil)
|
||||
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusGone {
|
||||
t.Fatalf("cross-workspace cursor: expected 410, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestActivityHandler_SinceID_CombinedWithSinceSecs: both filters apply
|
||||
// together (AND). Argument order in the main query: workspace_id,
|
||||
// since_secs, cursorTime, limit. Sanity-checks the placeholder index
|
||||
// arithmetic in the query builder.
|
||||
func TestActivityHandler_SinceID_CombinedWithSinceSecs(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
cursorID := "act-c"
|
||||
cursorTime := time.Date(2026, 4, 30, 4, 0, 0, 0, time.UTC)
|
||||
|
||||
mock.ExpectQuery(`SELECT created_at FROM activity_logs WHERE id = \$1 AND workspace_id = \$2`).
|
||||
WithArgs(cursorID, "ws-1").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"created_at"}).AddRow(cursorTime))
|
||||
|
||||
mock.ExpectQuery("SELECT id, workspace_id, activity_type").
|
||||
WithArgs("ws-1", 600, cursorTime, 100).
|
||||
WillReturnRows(newActivityRows())
|
||||
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewActivityHandler(broadcaster)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
c.Request = httptest.NewRequest("GET",
|
||||
"/workspaces/ws-1/activity?since_secs=600&since_id="+cursorID, nil)
|
||||
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,163 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// Tests for the since_secs query parameter on GET /workspaces/:id/activity.
|
||||
// Closes #2268 — the harness runner was passing this param and it was
|
||||
// silently ignored, capping the trace at most-recent-100 events. The new
|
||||
// shape: parse since_secs, add a parameterised `created_at >= NOW() -
|
||||
// make_interval(secs => $N)` clause, cap at 30 days, reject invalid input
|
||||
// with 400.
|
||||
|
||||
const activityCols = `id, workspace_id, activity_type, source_id, target_id, method, ` +
|
||||
`summary, request_body, response_body, tool_trace, duration_ms, status, error_detail, created_at`
|
||||
|
||||
func newActivityRows() *sqlmock.Rows {
|
||||
cols := []string{
|
||||
"id", "workspace_id", "activity_type", "source_id", "target_id", "method",
|
||||
"summary", "request_body", "response_body", "tool_trace", "duration_ms", "status", "error_detail", "created_at",
|
||||
}
|
||||
return sqlmock.NewRows(cols).
|
||||
AddRow("act-1", "ws-1", "a2a_send", nil, nil, nil,
|
||||
"sent", nil, nil, nil, nil, "ok", nil,
|
||||
time.Date(2026, 4, 29, 10, 0, 0, 0, time.UTC))
|
||||
}
|
||||
|
||||
// TestActivityHandler_SinceSecs_Accepted verifies that a valid since_secs
|
||||
// query param adds the make_interval clause to the SQL with the parsed
|
||||
// value as a bound parameter — exactly what the runner needs to scope a
|
||||
// trace to a test window.
|
||||
func TestActivityHandler_SinceSecs_Accepted(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
mock.ExpectQuery("SELECT id, workspace_id, activity_type").
|
||||
WithArgs("ws-1", 600, 100). // workspaceID, since_secs, limit
|
||||
WillReturnRows(newActivityRows())
|
||||
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewActivityHandler(broadcaster)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-1/activity?since_secs=600", nil)
|
||||
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestActivityHandler_SinceSecs_ClampedAt30Days verifies the defensive
|
||||
// ceiling so a paranoid client can't trigger a multi-month full-table
|
||||
// scan via since_secs=999999999.
|
||||
func TestActivityHandler_SinceSecs_ClampedAt30Days(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
const cap30Days = 30 * 24 * 60 * 60
|
||||
mock.ExpectQuery("SELECT id, workspace_id, activity_type").
|
||||
WithArgs("ws-1", cap30Days, 100).
|
||||
WillReturnRows(newActivityRows())
|
||||
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewActivityHandler(broadcaster)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-1/activity?since_secs=999999999", nil)
|
||||
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestActivityHandler_SinceSecs_InvalidRejected covers the loud-fail path:
|
||||
// a typoed param (non-int, zero, negative) returns 400 instead of being
|
||||
// silently dropped — that's the bug this whole feature is fixing.
|
||||
func TestActivityHandler_SinceSecs_InvalidRejected(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
val string
|
||||
}{
|
||||
{"non-integer", "abc"},
|
||||
{"zero", "0"},
|
||||
{"negative", "-1"},
|
||||
{"hex-prefix", "0x10"},
|
||||
{"float", "60.5"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// No DB call expected; bad input must be caught before the query.
|
||||
setupTestDB(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewActivityHandler(broadcaster)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
c.Request = httptest.NewRequest("GET",
|
||||
"/workspaces/ws-1/activity?since_secs="+tc.val, nil)
|
||||
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400 for %q, got %d: %s", tc.val, w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]string
|
||||
_ = json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["error"] == "" {
|
||||
t.Errorf("expected error message in response body for %q", tc.val)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestActivityHandler_SinceSecs_Omitted verifies backward compat — callers
|
||||
// that don't pass since_secs see the original behavior (no extra WHERE
|
||||
// clause, just workspace_id + limit).
|
||||
func TestActivityHandler_SinceSecs_Omitted(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
// Only workspace_id + limit; the query must NOT include the
|
||||
// make_interval clause. sqlmock's WithArgs is strict on count, so a
|
||||
// since_secs leak would surface as "expected 2 args, got 3".
|
||||
mock.ExpectQuery("SELECT id, workspace_id, activity_type").
|
||||
WithArgs("ws-1", 100).
|
||||
WillReturnRows(newActivityRows())
|
||||
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewActivityHandler(broadcaster)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-1/activity", nil)
|
||||
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -167,6 +167,223 @@ func TestActivityList_SourceWithType(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- Activity List peer_id filter ----------
|
||||
//
|
||||
// peer_id surfaces the conversation history with one specific peer
|
||||
// for the wheel-side chat_history MCP tool. The filter joins
|
||||
// (source_id = $X OR target_id = $X) so both inbound (where this
|
||||
// peer was the sender) and outbound (where this peer was the
|
||||
// recipient) turns appear in the same view, ordered by created_at.
|
||||
|
||||
const testPeerUUID = "11111111-2222-3333-4444-555555555555"
|
||||
|
||||
func TestActivityList_PeerIDFilter(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewActivityHandler(broadcaster)
|
||||
|
||||
// peer_id binds twice in the query (source_id OR target_id) but is
|
||||
// added to args once — sqlmock matches positional args, so the
|
||||
// binding shape is what matters.
|
||||
mock.ExpectQuery(
|
||||
`SELECT .+ FROM activity_logs WHERE workspace_id = .+ AND \(source_id = .+ OR target_id = .+\)`,
|
||||
).
|
||||
WithArgs("ws-1", testPeerUUID, 100).
|
||||
WillReturnRows(sqlmock.NewRows([]string{
|
||||
"id", "workspace_id", "activity_type", "source_id", "target_id",
|
||||
"method", "summary", "request_body", "response_body",
|
||||
"tool_trace", "duration_ms", "status", "error_detail", "created_at",
|
||||
}))
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
c.Request = httptest.NewRequest(
|
||||
"GET", "/workspaces/ws-1/activity?peer_id="+testPeerUUID, nil,
|
||||
)
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestActivityList_PeerIDComposesWithType(t *testing.T) {
|
||||
// peer_id + type + source must compose into a single AND-chain so
|
||||
// the wheel can fetch e.g. "all peer_agent inbound from peer X" in
|
||||
// one round-trip. Pin both args + arg order so a future refactor
|
||||
// of the builder can't silently rearrange placeholders.
|
||||
mock := setupTestDB(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewActivityHandler(broadcaster)
|
||||
|
||||
mock.ExpectQuery(
|
||||
`SELECT .+ FROM activity_logs WHERE workspace_id = .+ AND activity_type = .+ AND source_id IS NOT NULL AND \(source_id = .+ OR target_id = .+\)`,
|
||||
).
|
||||
WithArgs("ws-1", "a2a_receive", testPeerUUID, 100).
|
||||
WillReturnRows(sqlmock.NewRows([]string{
|
||||
"id", "workspace_id", "activity_type", "source_id", "target_id",
|
||||
"method", "summary", "request_body", "response_body",
|
||||
"tool_trace", "duration_ms", "status", "error_detail", "created_at",
|
||||
}))
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
c.Request = httptest.NewRequest(
|
||||
"GET",
|
||||
"/workspaces/ws-1/activity?type=a2a_receive&source=agent&peer_id="+testPeerUUID,
|
||||
nil,
|
||||
)
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestActivityList_PeerIDRejectsNonUUID(t *testing.T) {
|
||||
// Trust-boundary check: a malformed peer_id must 400 before any
|
||||
// query is built. Defends against caller bugs (typoed UUID,
|
||||
// leading whitespace) and against any future code path that might
|
||||
// otherwise interpolate the value into the URL or another query.
|
||||
gin.SetMode(gin.TestMode)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewActivityHandler(broadcaster)
|
||||
|
||||
for _, bad := range []string{
|
||||
"not-a-uuid",
|
||||
"%27%20OR%201%3D1%20--", // URL-encoded ' OR 1=1 --
|
||||
"11111111-2222-3333-4444", // truncated
|
||||
"11111111-2222-3333-4444-555555555555-extra", // overlong
|
||||
"11111111-2222-3333-4444-55555555555G", // non-hex
|
||||
} {
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
c.Request = httptest.NewRequest(
|
||||
"GET", "/workspaces/ws-1/activity?peer_id="+bad, nil,
|
||||
)
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("peer_id=%q: expected 400, got %d (%s)", bad, w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- before_ts paging knob ----------
|
||||
//
|
||||
// before_ts is the wall-clock paging companion to peer_id — the agent
|
||||
// walks backward through long histories by passing the oldest
|
||||
// `created_at` from the previous response. Validated as RFC3339 at the
|
||||
// trust boundary; mirrors the strict-inequality shape since_id uses
|
||||
// for forward paging.
|
||||
|
||||
func TestActivityList_BeforeTSFilter(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewActivityHandler(broadcaster)
|
||||
|
||||
cutoff, _ := time.Parse(time.RFC3339, "2026-05-01T00:00:00Z")
|
||||
mock.ExpectQuery(
|
||||
`SELECT .+ FROM activity_logs WHERE workspace_id = .+ AND created_at < .+`,
|
||||
).
|
||||
WithArgs("ws-1", cutoff, 100).
|
||||
WillReturnRows(sqlmock.NewRows([]string{
|
||||
"id", "workspace_id", "activity_type", "source_id", "target_id",
|
||||
"method", "summary", "request_body", "response_body",
|
||||
"tool_trace", "duration_ms", "status", "error_detail", "created_at",
|
||||
}))
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
c.Request = httptest.NewRequest(
|
||||
"GET", "/workspaces/ws-1/activity?before_ts=2026-05-01T00%3A00%3A00Z", nil,
|
||||
)
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestActivityList_BeforeTSComposesWithPeerID(t *testing.T) {
|
||||
// peer_id + before_ts: the canonical wheel-side chat_history paging
|
||||
// shape. Pin both args + arg order so a future builder refactor
|
||||
// can't silently drop one filter or reorder placeholders.
|
||||
mock := setupTestDB(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewActivityHandler(broadcaster)
|
||||
|
||||
cutoff, _ := time.Parse(time.RFC3339, "2026-05-01T00:00:00Z")
|
||||
mock.ExpectQuery(
|
||||
`SELECT .+ FROM activity_logs WHERE workspace_id = .+ AND \(source_id = .+ OR target_id = .+\) AND created_at < .+`,
|
||||
).
|
||||
WithArgs("ws-1", testPeerUUID, cutoff, 100).
|
||||
WillReturnRows(sqlmock.NewRows([]string{
|
||||
"id", "workspace_id", "activity_type", "source_id", "target_id",
|
||||
"method", "summary", "request_body", "response_body",
|
||||
"tool_trace", "duration_ms", "status", "error_detail", "created_at",
|
||||
}))
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
c.Request = httptest.NewRequest(
|
||||
"GET",
|
||||
"/workspaces/ws-1/activity?peer_id="+testPeerUUID+"&before_ts=2026-05-01T00%3A00%3A00Z",
|
||||
nil,
|
||||
)
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestActivityList_BeforeTSRejectsInvalidFormat(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewActivityHandler(broadcaster)
|
||||
|
||||
for _, bad := range []string{
|
||||
"yesterday",
|
||||
"2026-05-01", // missing time component
|
||||
"2026-05-01%2000%3A00%3A00", // URL-encoded space instead of T
|
||||
"%27%20OR%201%3D1%20--", // URL-encoded SQL injection
|
||||
} {
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
c.Request = httptest.NewRequest(
|
||||
"GET", "/workspaces/ws-1/activity?before_ts="+bad, nil,
|
||||
)
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("before_ts=%q: expected 400, got %d (%s)", bad, w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- Activity type allowlist (#125: memory_write added) ----------
|
||||
|
||||
func TestActivityReport_AcceptsMemoryWriteType(t *testing.T) {
|
||||
|
||||
@@ -129,3 +129,97 @@ func TestAdminTestToken_HappyPath_TokenValidates(t *testing.T) {
|
||||
}
|
||||
|
||||
func sqlErrNoRows() error { return sql.ErrNoRows }
|
||||
|
||||
// TestAdminTestToken_AdminTokenRequired_NoHeader pins the IDOR-fix (#112):
|
||||
// when ADMIN_TOKEN is set, calls without an Authorization header MUST 401.
|
||||
// Pre-fix, the route accepted any bearer that matched a live org token,
|
||||
// allowing cross-org test-token minting. The current code uses
|
||||
// subtle.ConstantTimeCompare against ADMIN_TOKEN explicitly. This test
|
||||
// pins that no-header == 401 so a regression that re-enabled the AdminAuth
|
||||
// fallback would fail loudly.
|
||||
func TestAdminTestToken_AdminTokenRequired_NoHeader(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
t.Setenv("MOLECULE_ENV", "development")
|
||||
t.Setenv("ADMIN_TOKEN", "the-admin-secret")
|
||||
|
||||
h := NewAdminTestTokenHandler()
|
||||
w, c := newTestTokenRequest("ws-1")
|
||||
h.GetTestToken(c)
|
||||
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Fatalf("expected 401 with ADMIN_TOKEN set + no Authorization, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestAdminTestToken_AdminTokenRequired_WrongHeader pins that a non-matching
|
||||
// bearer is rejected. Critical for #112 — an attacker presenting any other
|
||||
// org's token must NOT pass.
|
||||
func TestAdminTestToken_AdminTokenRequired_WrongHeader(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
t.Setenv("MOLECULE_ENV", "development")
|
||||
t.Setenv("ADMIN_TOKEN", "the-admin-secret")
|
||||
|
||||
h := NewAdminTestTokenHandler()
|
||||
w, c := newTestTokenRequest("ws-1")
|
||||
c.Request.Header.Set("Authorization", "Bearer wrong-token")
|
||||
h.GetTestToken(c)
|
||||
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Fatalf("expected 401 with wrong Authorization, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestAdminTestToken_AdminTokenRequired_CorrectHeader pins the success
|
||||
// path through the ADMIN_TOKEN gate. Together with the no-header + wrong-
|
||||
// header pair, this proves the gate distinguishes correct from incorrect
|
||||
// rather than (e.g.) erroring on every request.
|
||||
func TestAdminTestToken_AdminTokenRequired_CorrectHeader(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
t.Setenv("MOLECULE_ENV", "development")
|
||||
t.Setenv("ADMIN_TOKEN", "the-admin-secret")
|
||||
|
||||
mock.ExpectQuery("SELECT id FROM workspaces WHERE id =").
|
||||
WithArgs("ws-1").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-1"))
|
||||
mock.ExpectExec("INSERT INTO workspace_auth_tokens").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
h := NewAdminTestTokenHandler()
|
||||
w, c := newTestTokenRequest("ws-1")
|
||||
c.Request.Header.Set("Authorization", "Bearer the-admin-secret")
|
||||
h.GetTestToken(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 with correct ADMIN_TOKEN, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations not met — INSERT into workspace_auth_tokens did not run, suggesting the gate short-circuited the success path: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAdminTestToken_AdminTokenEmpty_GateBypassedSafely pins that when
|
||||
// ADMIN_TOKEN is unset (typical local-dev setup), the explicit gate is
|
||||
// bypassed and the route works without an Authorization header. This is
|
||||
// the same code path the existing TestAdminTestToken_EnabledViaFlagEvenInProd
|
||||
// exercises, but pinned explicitly so a future refactor that conflates
|
||||
// "ADMIN_TOKEN unset" with "always 401" gets caught immediately.
|
||||
func TestAdminTestToken_AdminTokenEmpty_GateBypassedSafely(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
t.Setenv("MOLECULE_ENV", "development")
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
|
||||
mock.ExpectQuery("SELECT id FROM workspaces WHERE id =").
|
||||
WithArgs("ws-1").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-1"))
|
||||
mock.ExpectExec("INSERT INTO workspace_auth_tokens").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
h := NewAdminTestTokenHandler()
|
||||
w, c := newTestTokenRequest("ws-1")
|
||||
// Note: NO Authorization header — the gate is unset, so this MUST work.
|
||||
h.GetTestToken(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 with ADMIN_TOKEN empty + no Authorization, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,26 @@
|
||||
package handlers
|
||||
|
||||
// chat_files.go — file upload/download for workspace chat.
|
||||
// chat_files.go — file upload + download for workspace chat,
|
||||
// both HTTP-forward (RFC #2312, fully landed).
|
||||
//
|
||||
// Architecture (v2, post-RFC-#2312):
|
||||
//
|
||||
// - Upload (POST /workspaces/:id/uploads): the platform proxies the
|
||||
// multipart request straight to the workspace's own
|
||||
// /internal/chat/uploads/ingest endpoint. The workspace agent then
|
||||
// writes to local /workspace/.molecule/chat-uploads.
|
||||
//
|
||||
// - Download (GET /workspaces/:id/files): the platform makes an HTTP
|
||||
// GET to the workspace's /internal/file/read?path=<abs> endpoint
|
||||
// and streams the response body to the caller.
|
||||
//
|
||||
// Same code path on local Docker and SaaS — the v1 docker-exec /
|
||||
// docker-cp paths were structurally broken in SaaS because
|
||||
// workspace-server's local Docker client has no visibility into
|
||||
// EC2-hosted workspaces (#2308 root cause). Both surfaces now use the
|
||||
// per-workspace platform_inbound_secret minted at provision time
|
||||
// (RFC #2312 PR-F) for auth, and the workspace's HTTP server mounts
|
||||
// the corresponding receiver at workspace/main.py.
|
||||
//
|
||||
// Split from templates.go because these endpoints have a different
|
||||
// security model (no /configs write, no template fallback) and a
|
||||
@@ -9,61 +29,141 @@ package handlers
|
||||
// conversation payloads.
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"mime"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/api/types/container"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// ChatFilesHandler serves file upload + download for chat. It
|
||||
// composes the existing TemplatesHandler's Docker plumbing
|
||||
// (findContainer, execInContainer, copyFilesToContainer) rather than
|
||||
// duplicating them, so a bug fix in the Docker layer propagates to
|
||||
// both endpoints.
|
||||
// ChatFilesHandler serves file upload + download for chat. Holds a
|
||||
// reference to TemplatesHandler so the (still docker-exec) Download
|
||||
// path keeps using the shared findContainer/CopyFromContainer helpers
|
||||
// without duplicating them. Upload no longer reaches into Docker.
|
||||
type ChatFilesHandler struct {
|
||||
templates *TemplatesHandler
|
||||
|
||||
// httpClient is broken out so tests can swap in an httptest.Server
|
||||
// transport. Prod uses a default with a generous Timeout to cover
|
||||
// the 50 MB worst case on a slow EC2 link without leaving a
|
||||
// connection hanging forever on a sick workspace.
|
||||
httpClient *http.Client
|
||||
}
|
||||
|
||||
func NewChatFilesHandler(t *TemplatesHandler) *ChatFilesHandler {
|
||||
return &ChatFilesHandler{templates: t}
|
||||
return &ChatFilesHandler{
|
||||
templates: t,
|
||||
httpClient: &http.Client{
|
||||
// 50 MB total body cap / ~1 MB/s slow-network floor → ~60s.
|
||||
// Doubled for headroom on the legitimate-but-slow case.
|
||||
Timeout: 120 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// chatUploadMaxBytes caps the full multipart request body so a
|
||||
// malicious / runaway client can't OOM the server. 50 MB covers most
|
||||
// documents + a handful of images per message; larger artefacts
|
||||
// should go through git/S3 rather than chat.
|
||||
// malicious / runaway client can't OOM the proxy hop. 50 MB matches
|
||||
// the workspace-side limit; anything larger is rejected at the
|
||||
// network boundary before forwarding.
|
||||
const chatUploadMaxBytes = 50 * 1024 * 1024
|
||||
|
||||
// chatUploadMaxFileBytes caps individual files in a multi-file upload.
|
||||
// Keeping the per-file cap below the total lets a user send, say, a
|
||||
// 5 MB PDF + 10 screenshots without tripping the batch limit on any
|
||||
// single attachment.
|
||||
const chatUploadMaxFileBytes = 25 * 1024 * 1024
|
||||
|
||||
// chatUploadDir is the in-container path where user-uploaded chat
|
||||
// attachments land. Under /workspace so the file persists with the
|
||||
// workspace volume and is readable by the agent without any extra
|
||||
// plumbing — the agent just reads from the URI path we return.
|
||||
// attachments land. Kept here for documentation parity with the
|
||||
// workspace-side handler — the platform no longer writes files
|
||||
// directly, but the URI scheme returned in responses still uses this
|
||||
// path, so any consumer parsing those URIs has the constant to
|
||||
// reference.
|
||||
const chatUploadDir = "/workspace/.molecule/chat-uploads"
|
||||
|
||||
// unsafeFilenameChars matches anything outside the conservative
|
||||
// {alnum, dot, underscore, dash} set. Filenames get rewritten
|
||||
// character-class at a time, so embedded paths, control chars,
|
||||
// newlines, quotes, and shell metachars never reach the filesystem.
|
||||
var unsafeFilenameChars = regexp.MustCompile(`[^a-zA-Z0-9._\-]`)
|
||||
// resolveWorkspaceForwardCreds resolves the workspace's URL +
|
||||
// platform_inbound_secret for an /internal/* forward, applying
|
||||
// lazy-heal on a missing inbound secret (RFC #2312 backfill — the
|
||||
// 2026-04-30 fix that closes the existing-workspace gap left by the
|
||||
// shared-mint refactor).
|
||||
//
|
||||
// On any failure path the function HAS ALREADY written the appropriate
|
||||
// status + JSON body to c (404 / 503 / 500) and returns ok=false.
|
||||
// On success returns the URL + secret + ok=true.
|
||||
//
|
||||
// op is the human-readable feature label ("upload"/"download") used
|
||||
// in log messages and the 503 RFC-#2312 detail copy so operators can
|
||||
// distinguish which feature ran.
|
||||
//
|
||||
// Centralized here (rather than inline in Upload + Download) so the
|
||||
// next forward-time condition we add — secret rotation, audit, etc. —
|
||||
// goes in ONE place. Drift between the two handlers is the same class
|
||||
// of bug as the original SaaS provision drift fixed in #2366; this
|
||||
// extraction prevents that class on the consumer side.
|
||||
func resolveWorkspaceForwardCreds(c *gin.Context, ctx context.Context, workspaceID, op string) (wsURL, secret string, ok bool) {
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT COALESCE(url, '') FROM workspaces WHERE id = $1`, workspaceID,
|
||||
).Scan(&wsURL); err != nil {
|
||||
log.Printf("chat_files %s: workspace lookup failed for %s: %v", op, workspaceID, err)
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return "", "", false
|
||||
}
|
||||
if wsURL == "" {
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace url not registered yet"})
|
||||
return "", "", false
|
||||
}
|
||||
// Trust note: workspaces.url passes validateAgentURL at /registry/
|
||||
// register write time, blocking SSRF-shaped URLs. We rely on that
|
||||
// upstream gate rather than re-validating here. Tracked at #2316
|
||||
// for follow-up: forward-time re-validation as defense-in-depth.
|
||||
|
||||
secret, healed, err := readOrLazyHealInboundSecret(ctx, workspaceID, "chat_files "+op)
|
||||
if err != nil {
|
||||
// Either a non-NoInboundSecret read error (DB hiccup) or a mint
|
||||
// failure during lazy-heal. The chat_files contract is to surface
|
||||
// 503 with the RFC-#2312 reprovision hint in both cases — the user
|
||||
// can't proceed and needs ops attention.
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{
|
||||
"error": "workspace not yet enrolled in v2 " + op + " (RFC #2312)",
|
||||
"detail": "Failed to mint inbound secret. Reprovision the workspace if this persists.",
|
||||
})
|
||||
return "", "", false
|
||||
}
|
||||
if healed {
|
||||
// The platform now has the secret but the workspace's
|
||||
// /configs/.platform_inbound_secret is still empty until the next
|
||||
// /registry/register response propagates it. User retries after
|
||||
// the workspace's next heartbeat picks up the new secret (~30s).
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{
|
||||
"error": "workspace re-registering — please retry in 30 seconds",
|
||||
"detail": "Inbound secret was just minted. Workspace will pick it up on its next heartbeat.",
|
||||
"retry_after_seconds": 30,
|
||||
})
|
||||
return "", "", false
|
||||
}
|
||||
return wsURL, secret, true
|
||||
}
|
||||
|
||||
// urlPathEscape percent-encodes every byte outside the RFC 3986
|
||||
// unreserved set — stricter than net/url.PathEscape (which leaves
|
||||
// "/" unescaped because it's legal in URL paths). Filenames must
|
||||
// never contain "/" anyway, so escaping it is defence-in-depth
|
||||
// against an agent that writes a path-like name.
|
||||
//
|
||||
// Used by Download's Content-Disposition header.
|
||||
func urlPathEscape(s string) string {
|
||||
const unreserved = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
|
||||
var b strings.Builder
|
||||
for _, c := range []byte(s) {
|
||||
if strings.IndexByte(unreserved, c) >= 0 {
|
||||
b.WriteByte(c)
|
||||
} else {
|
||||
fmt.Fprintf(&b, "%%%02X", c)
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// contentDispositionAttachment produces a safe `attachment; filename=...`
|
||||
// header. Quotes, CR, and LF in the filename are escaped per RFC 6266 /
|
||||
@@ -97,60 +197,23 @@ func contentDispositionAttachment(name string) string {
|
||||
asciiSafe, urlPathEscape(name))
|
||||
}
|
||||
|
||||
// urlPathEscape percent-encodes every byte outside the RFC 3986
|
||||
// unreserved set — stricter than net/url.PathEscape (which leaves
|
||||
// "/" unescaped because it's legal in URL paths). Filenames must
|
||||
// never contain "/" anyway, so escaping it is defence-in-depth
|
||||
// against an agent that writes a path-like name.
|
||||
func urlPathEscape(s string) string {
|
||||
const unreserved = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
|
||||
var b strings.Builder
|
||||
for _, c := range []byte(s) {
|
||||
if strings.IndexByte(unreserved, c) >= 0 {
|
||||
b.WriteByte(c)
|
||||
} else {
|
||||
fmt.Fprintf(&b, "%%%02X", c)
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func sanitizeFilename(in string) string {
|
||||
base := filepath.Base(in)
|
||||
base = strings.ReplaceAll(base, " ", "_")
|
||||
base = unsafeFilenameChars.ReplaceAllString(base, "_")
|
||||
if len(base) > 100 {
|
||||
ext := filepath.Ext(base)
|
||||
if len(ext) > 16 {
|
||||
ext = ""
|
||||
}
|
||||
base = base[:100-len(ext)] + ext
|
||||
}
|
||||
if base == "" || base == "." || base == ".." {
|
||||
return "file"
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
||||
// ChatUploadedFile is the per-file response returned from POST
|
||||
// /workspaces/:id/chat/uploads. Clients include this payload (or a
|
||||
// trimmed subset) in their outgoing A2A `message/send` parts.
|
||||
type ChatUploadedFile struct {
|
||||
// URI uses a custom "workspace:" scheme so clients can resolve it
|
||||
// against the streaming Download endpoint regardless of where the
|
||||
// canvas itself is hosted. The path component is always absolute
|
||||
// within the workspace container.
|
||||
URI string `json:"uri"`
|
||||
Name string `json:"name"`
|
||||
MimeType string `json:"mimeType,omitempty"`
|
||||
Size int64 `json:"size"`
|
||||
}
|
||||
|
||||
// Upload handles POST /workspaces/:id/chat/uploads.
|
||||
// Accepts multipart/form-data with one or more `files` fields, stages
|
||||
// each under /workspace/.molecule/chat-uploads with a UUID prefix,
|
||||
// and returns the list of URIs for the caller to attach to an A2A
|
||||
// message.
|
||||
//
|
||||
// Streams the multipart body straight to the workspace's own
|
||||
// /internal/chat/uploads/ingest endpoint with the platform_inbound_secret
|
||||
// (RFC #2312, migration 044) in the Authorization header. The workspace
|
||||
// validates and writes to its local /workspace/.molecule/chat-uploads;
|
||||
// the response (containing one ChatUploadedFile per upload) is streamed
|
||||
// back unchanged.
|
||||
//
|
||||
// Why streaming, not parse-then-re-encode:
|
||||
// - Eliminates the 50 MB intermediate buffer on the platform.
|
||||
// - Per-file size + path-safety enforcement is the workspace's job;
|
||||
// duplicating it here just creates two places to keep in sync.
|
||||
// - The error responses from the workspace (413 with the offending
|
||||
// filename, 400 on missing files field, etc.) propagate through
|
||||
// unchanged, so the user sees the same shapes regardless of where
|
||||
// the failure originated.
|
||||
func (h *ChatFilesHandler) Upload(c *gin.Context) {
|
||||
workspaceID := c.Param("id")
|
||||
if err := validateWorkspaceID(workspaceID); err != nil {
|
||||
@@ -158,172 +221,62 @@ func (h *ChatFilesHandler) Upload(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// Hard cap the request body BEFORE ParseMultipartForm — otherwise
|
||||
// a client could chunk-upload past the cap before Go notices.
|
||||
// Hard cap the request body BEFORE forwarding. http.MaxBytesReader
|
||||
// enforces lazily as the body is read; a malicious client cannot
|
||||
// chunk-upload past the cap, the wrapped reader returns an error
|
||||
// when the cap is exceeded and the workspace receives a truncated
|
||||
// stream that fails its own multipart parser.
|
||||
c.Request.Body = http.MaxBytesReader(c.Writer, c.Request.Body, chatUploadMaxBytes)
|
||||
if err := c.Request.ParseMultipartForm(chatUploadMaxBytes); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "failed to parse multipart form"})
|
||||
return
|
||||
}
|
||||
|
||||
form := c.Request.MultipartForm
|
||||
var headers []*multipart.FileHeader
|
||||
if form != nil && form.File != nil {
|
||||
headers = form.File["files"]
|
||||
}
|
||||
if len(headers) == 0 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "expected at least one 'files' field"})
|
||||
return
|
||||
}
|
||||
|
||||
ctx := c.Request.Context()
|
||||
containerName := h.templates.findContainer(ctx, workspaceID)
|
||||
if containerName == "" {
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace container not running"})
|
||||
|
||||
wsURL, secret, ok := resolveWorkspaceForwardCreds(c, ctx, workspaceID, "upload")
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
// Build the archive in memory. Files are byte-preserving through
|
||||
// Go's string<->[]byte (the tar helper takes map[string]string but
|
||||
// the conversion is a literal copy, not a UTF-8 reinterpretation).
|
||||
archive := map[string]string{}
|
||||
uploaded := make([]ChatUploadedFile, 0, len(headers))
|
||||
for _, fh := range headers {
|
||||
if fh.Size > chatUploadMaxFileBytes {
|
||||
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
|
||||
"error": fmt.Sprintf("%s exceeds per-file limit (%d MB)", fh.Filename, chatUploadMaxFileBytes/(1024*1024)),
|
||||
})
|
||||
return
|
||||
}
|
||||
f, err := fh.Open()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "failed to read upload"})
|
||||
return
|
||||
}
|
||||
// LimitReader guards against a truthful-but-lying Size header:
|
||||
// if the multipart stream carries more bytes than declared, we
|
||||
// stop at the cap instead of growing the buffer.
|
||||
data, err := io.ReadAll(io.LimitReader(f, chatUploadMaxFileBytes+1))
|
||||
f.Close()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "failed to read upload"})
|
||||
return
|
||||
}
|
||||
if int64(len(data)) > chatUploadMaxFileBytes {
|
||||
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
|
||||
"error": fmt.Sprintf("%s exceeds per-file limit (%d MB)", fh.Filename, chatUploadMaxFileBytes/(1024*1024)),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
name := sanitizeFilename(fh.Filename)
|
||||
// 16-byte (UUID-equivalent) random prefix. Within a single
|
||||
// batch we also check for collisions — birthday on 128 bits
|
||||
// is astronomical, but a bad PRNG or single re-used draw
|
||||
// would silently overwrite a sibling upload with its own
|
||||
// content and return two URIs pointing at one file.
|
||||
var stored string
|
||||
for attempt := 0; attempt < 4; attempt++ {
|
||||
idBytes := make([]byte, 16)
|
||||
if _, err := rand.Read(idBytes); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to allocate upload ID"})
|
||||
return
|
||||
}
|
||||
candidate := hex.EncodeToString(idBytes) + "-" + name
|
||||
if _, taken := archive[candidate]; !taken {
|
||||
stored = candidate
|
||||
break
|
||||
}
|
||||
}
|
||||
if stored == "" {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to allocate unique upload ID"})
|
||||
return
|
||||
}
|
||||
archive[stored] = string(data)
|
||||
|
||||
mt := fh.Header.Get("Content-Type")
|
||||
if mt == "" {
|
||||
mt = mime.TypeByExtension(filepath.Ext(name))
|
||||
}
|
||||
uploaded = append(uploaded, ChatUploadedFile{
|
||||
URI: "workspace:" + chatUploadDir + "/" + stored,
|
||||
Name: name,
|
||||
MimeType: mt,
|
||||
Size: int64(len(data)),
|
||||
})
|
||||
}
|
||||
|
||||
// mkdir -p is idempotent; we fire it every upload instead of
|
||||
// caching state here so container restarts don't surprise us.
|
||||
_, _ = h.templates.execInContainer(ctx, containerName, []string{"mkdir", "-p", chatUploadDir})
|
||||
|
||||
// Defence in depth: pre-remove each target path before extracting
|
||||
// the tar. An agent with write access to /workspace could in
|
||||
// theory race-create a symlink at <chatUploadDir>/<stored-name>
|
||||
// pointing at a sensitive in-container path (its own /etc/*,
|
||||
// mounted secrets). Docker's tar extraction on some drivers
|
||||
// follows pre-existing symlinks at the destination. `rm -f` the
|
||||
// exact stored-name closes that window — the UUID prefix on the
|
||||
// name makes a successful race effectively impossible, but this
|
||||
// guard costs nothing and documents the intent.
|
||||
rmArgs := []string{"rm", "-f", "--"}
|
||||
for stored := range archive {
|
||||
rmArgs = append(rmArgs, chatUploadDir+"/"+stored)
|
||||
}
|
||||
_, _ = h.templates.execInContainer(ctx, containerName, rmArgs)
|
||||
|
||||
if err := h.copyFlatToContainer(ctx, containerName, chatUploadDir, archive); err != nil {
|
||||
log.Printf("Chat upload copy failed for %s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to stage files in workspace"})
|
||||
// Build the forward request. Body is the (capped) reader from the
|
||||
// inbound request — Go's http.Client streams it directly to the
|
||||
// workspace, no intermediate buffering on the platform.
|
||||
forwardURL := strings.TrimRight(wsURL, "/") + "/internal/chat/uploads/ingest"
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, forwardURL, c.Request.Body)
|
||||
if err != nil {
|
||||
log.Printf("chat_files Upload: build request failed for %s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to construct forward request"})
|
||||
return
|
||||
}
|
||||
// Forward the multipart Content-Type (with boundary) verbatim;
|
||||
// without it the workspace's parser cannot find part boundaries.
|
||||
if ct := c.Request.Header.Get("Content-Type"); ct != "" {
|
||||
req.Header.Set("Content-Type", ct)
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+secret)
|
||||
// Pass through Content-Length so the workspace can short-circuit
|
||||
// the total-body cap before parsing. ContentLength on the request
|
||||
// struct also lets Go's transport know whether to stream or send
|
||||
// chunked-encoded.
|
||||
if c.Request.ContentLength > 0 {
|
||||
req.ContentLength = c.Request.ContentLength
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"files": uploaded})
|
||||
}
|
||||
|
||||
// copyFlatToContainer extracts one tar of flat files into destPath
|
||||
// inside the container. Unlike the shared copyFilesToContainer helper
|
||||
// (which prepends destPath into tar entry names — correct for its
|
||||
// callers whose files relative-live inside a nested tree), this
|
||||
// helper writes tar entries with ONLY the flat filename so Docker's
|
||||
// extraction at destPath lands them directly in destPath, not at
|
||||
// destPath/destPath/... as the shared helper would.
|
||||
// Filenames are validated to contain no path separator so nothing
|
||||
// can escape destPath via an embedded "../" or a leading "/".
|
||||
func (h *ChatFilesHandler) copyFlatToContainer(ctx context.Context, containerName, destPath string, files map[string]string) error {
|
||||
if h.templates.docker == nil {
|
||||
return fmt.Errorf("docker not available")
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
tw := tar.NewWriter(&buf)
|
||||
for name, content := range files {
|
||||
if strings.ContainsAny(name, "/\\") || name == ".." || name == "." || name == "" {
|
||||
return fmt.Errorf("unsafe flat filename: %q", name)
|
||||
}
|
||||
data := []byte(content)
|
||||
if err := tw.WriteHeader(&tar.Header{
|
||||
Name: name, // relative — Docker resolves against destPath
|
||||
Mode: 0644,
|
||||
Size: int64(len(data)),
|
||||
Typeflag: tar.TypeReg,
|
||||
}); err != nil {
|
||||
return fmt.Errorf("tar header %q: %w", name, err)
|
||||
}
|
||||
if _, err := tw.Write(data); err != nil {
|
||||
return fmt.Errorf("tar write %q: %w", name, err)
|
||||
}
|
||||
}
|
||||
if err := tw.Close(); err != nil {
|
||||
return fmt.Errorf("tar close: %w", err)
|
||||
}
|
||||
return h.templates.docker.CopyToContainer(ctx, containerName, destPath, &buf, container.CopyToContainerOptions{})
|
||||
h.streamWorkspaceResponse(c, "upload", workspaceID, forwardURL, req, []string{"Content-Type"})
|
||||
}
|
||||
|
||||
// Download handles GET /workspaces/:id/chat/download?path=<abs path>.
|
||||
// Streams the file bytes from the container with a correct
|
||||
// Content-Type and attachment Content-Disposition. Binary-safe —
|
||||
// unlike the existing JSON ReadFile endpoint which carries content
|
||||
// as a string (lossy for non-UTF-8 bytes).
|
||||
// Forwards over HTTP to the workspace's own /internal/file/read endpoint
|
||||
// (RFC #2312 PR-D), replacing the docker-cp tar-stream extraction that
|
||||
// only worked when the platform binary had local Docker socket access.
|
||||
//
|
||||
// Same path-safety contract as the legacy version: caller-side validation
|
||||
// is duplicated on the workspace side (internal_file_read.py) so a
|
||||
// platform bug or malicious caller bypassing one layer still hits the
|
||||
// other. This is "defence in depth via two parallel checks," not "trust
|
||||
// the workspace to validate" — the workspace doesn't trust the platform
|
||||
// either.
|
||||
//
|
||||
// Body is streamed end-to-end (no buffering on the platform), preserving
|
||||
// binary safety and arbitrary file size (the 50 MB cap on Upload doesn't
|
||||
// apply to artefacts the agent produced).
|
||||
func (h *ChatFilesHandler) Download(c *gin.Context) {
|
||||
workspaceID := c.Param("id")
|
||||
if err := validateWorkspaceID(workspaceID); err != nil {
|
||||
@@ -362,54 +315,61 @@ func (h *ChatFilesHandler) Download(c *gin.Context) {
|
||||
}
|
||||
|
||||
ctx := c.Request.Context()
|
||||
if h.templates.docker == nil {
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "docker unavailable"})
|
||||
return
|
||||
}
|
||||
containerName := h.templates.findContainer(ctx, workspaceID)
|
||||
if containerName == "" {
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace container not running"})
|
||||
|
||||
wsURL, secret, ok := resolveWorkspaceForwardCreds(c, ctx, workspaceID, "download")
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
// docker cp returns a tar stream containing the requested path.
|
||||
// For a regular file that's a single tar entry; we extract and
|
||||
// stream the body through.
|
||||
reader, _, err := h.templates.docker.CopyFromContainer(ctx, containerName, path)
|
||||
// Build forward URL with the validated path encoded as a query param.
|
||||
// url.Values handles all the percent-encoding correctly — a path with
|
||||
// special chars (spaces, &, +) round-trips through both the platform's
|
||||
// validator and the workspace-side validator.
|
||||
forwardURL := strings.TrimRight(wsURL, "/") + "/internal/file/read?path=" + url.QueryEscape(path)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, forwardURL, nil)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "file not found"})
|
||||
log.Printf("chat_files Download: build request failed for %s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to construct forward request"})
|
||||
return
|
||||
}
|
||||
defer reader.Close()
|
||||
req.Header.Set("Authorization", "Bearer "+secret)
|
||||
|
||||
tr := tar.NewReader(reader)
|
||||
hdr, err := tr.Next()
|
||||
h.streamWorkspaceResponse(c, "download", workspaceID, forwardURL, req,
|
||||
[]string{"Content-Type", "Content-Length", "Content-Disposition"})
|
||||
}
|
||||
|
||||
// streamWorkspaceResponse executes the prepared forward request and
|
||||
// streams the workspace's response back to the inbound caller.
|
||||
// Forwards the named response headers verbatim. Centralizes the
|
||||
// "do request → check err → defer close → copy headers → set status →
|
||||
// io.Copy" tail that's identical between Upload and Download.
|
||||
//
|
||||
// op is the human-readable feature label ("upload"/"download") used
|
||||
// in log messages so operators can distinguish which feature ran.
|
||||
func (h *ChatFilesHandler) streamWorkspaceResponse(
|
||||
c *gin.Context,
|
||||
op, workspaceID, forwardURL string,
|
||||
req *http.Request,
|
||||
forwardHeaders []string,
|
||||
) {
|
||||
resp, err := h.httpClient.Do(req)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read archive"})
|
||||
return
|
||||
}
|
||||
if hdr.Typeflag != tar.TypeReg {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "path is not a regular file"})
|
||||
log.Printf("chat_files %s: forward to %s failed: %v", op, forwardURL, err)
|
||||
c.JSON(http.StatusBadGateway, gin.H{"error": "workspace unreachable"})
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
name := filepath.Base(path)
|
||||
mt := mime.TypeByExtension(filepath.Ext(name))
|
||||
if mt == "" {
|
||||
mt = "application/octet-stream"
|
||||
for _, hdr := range forwardHeaders {
|
||||
if v := resp.Header.Get(hdr); v != "" {
|
||||
c.Header(hdr, v)
|
||||
}
|
||||
}
|
||||
c.Header("Content-Type", mt)
|
||||
c.Header("Content-Length", fmt.Sprintf("%d", hdr.Size))
|
||||
c.Header("Content-Disposition", contentDispositionAttachment(name))
|
||||
c.Status(http.StatusOK)
|
||||
|
||||
// Stream exactly hdr.Size bytes. CopyN was chosen over LimitReader
|
||||
// because it returns an error when the source is short — that
|
||||
// surfaces a bug in the tar extraction path immediately instead
|
||||
// of silently truncating. Agents can legitimately produce files
|
||||
// larger than the 50 MB upload cap (that's a per-request inbound
|
||||
// cap, not a per-artifact one), so we cannot clamp here.
|
||||
if _, err := io.CopyN(c.Writer, tr, hdr.Size); err != nil {
|
||||
log.Printf("Chat download stream error for %s (%s): %v", workspaceID, path, err)
|
||||
c.Status(resp.StatusCode)
|
||||
if _, err := io.Copy(c.Writer, resp.Body); err != nil {
|
||||
// Mid-stream failure — too late to write a JSON error, just
|
||||
// log so ops can correlate with the workspace's logs.
|
||||
log.Printf("chat_files %s: stream response back failed for %s: %v", op, workspaceID, err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,67 +1,91 @@
|
||||
package handlers
|
||||
|
||||
// Unit tests for chat_files.go. The Docker-touching paths (Upload
|
||||
// actually copying into a container, Download actually streaming tar)
|
||||
// are exercised via integration tests — docker-in-docker is out of
|
||||
// scope for the unit suite. These tests cover the validation + error
|
||||
// surfaces that a caller can reach without a running container.
|
||||
// Unit tests for chat_files.go.
|
||||
//
|
||||
// Upload (HTTP-forward, RFC #2312 PR-C): exercised against an httptest
|
||||
// mock workspace + sqlmock-backed db.DB. The platform-side handler is
|
||||
// now a streaming proxy; assertions focus on:
|
||||
// * input validation (400 on bad workspace id)
|
||||
// * resolution failures (404 missing row, 503 missing secret/url)
|
||||
// * forward shape (Authorization, Content-Type, body)
|
||||
// * pass-through of the workspace's status + body
|
||||
//
|
||||
// Path-safety + sanitization that lived on the platform pre-#2312 is
|
||||
// now the workspace-side handler's concern; covered in the Python
|
||||
// suite (workspace/tests/test_internal_chat_uploads.py).
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"database/sql"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func TestSanitizeFilename(t *testing.T) {
|
||||
cases := []struct {
|
||||
in, want string
|
||||
}{
|
||||
{"report.pdf", "report.pdf"},
|
||||
{"my file.pdf", "my_file.pdf"},
|
||||
{"../../etc/passwd", "passwd"},
|
||||
{"weird;$name`.txt", "weird__name_.txt"},
|
||||
{"", "file"},
|
||||
{".", "file"},
|
||||
{"..", "file"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
got := sanitizeFilename(tc.in)
|
||||
if got != tc.want {
|
||||
t.Errorf("sanitizeFilename(%q) = %q, want %q", tc.in, got, tc.want)
|
||||
}
|
||||
}
|
||||
// makeUploadRequest builds a gin context for POST /workspaces/:id/chat/uploads
|
||||
// with the given multipart body. The recorder is returned so callers can
|
||||
// assert status + body after invoking h.Upload(c).
|
||||
func makeUploadRequest(t *testing.T, workspaceID string, body *bytes.Buffer, contentType string) (*gin.Context, *httptest.ResponseRecorder) {
|
||||
t.Helper()
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: workspaceID}}
|
||||
req := httptest.NewRequest("POST", "/workspaces/"+workspaceID+"/chat/uploads", body)
|
||||
req.Header.Set("Content-Type", contentType)
|
||||
c.Request = req
|
||||
return c, w
|
||||
}
|
||||
|
||||
func TestSanitizeFilename_LongNamePreservesExtension(t *testing.T) {
|
||||
// 120-char base + .pdf — the helper should truncate the base but
|
||||
// keep the extension intact so content-type inference still works.
|
||||
longBase := strings.Repeat("a", 120)
|
||||
got := sanitizeFilename(longBase + ".pdf")
|
||||
if len(got) > 100 {
|
||||
t.Errorf("filename not truncated: len=%d", len(got))
|
||||
}
|
||||
if !strings.HasSuffix(got, ".pdf") {
|
||||
t.Errorf("extension stripped: %q", got)
|
||||
// uploadFixture builds a minimal multipart/form-data body with a single
|
||||
// `files` part. The exact bytes don't matter for proxy tests — only that
|
||||
// the workspace receives the same boundary + headers we sent.
|
||||
func uploadFixture(t *testing.T) (*bytes.Buffer, string) {
|
||||
t.Helper()
|
||||
var buf bytes.Buffer
|
||||
mw := multipart.NewWriter(&buf)
|
||||
fw, err := mw.CreateFormFile("files", "fixture.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("CreateFormFile: %v", err)
|
||||
}
|
||||
_, _ = fw.Write([]byte("fixture-payload"))
|
||||
mw.Close()
|
||||
return &buf, mw.FormDataContentType()
|
||||
}
|
||||
|
||||
// expectURL stubs the SELECT that resolves the workspace's url.
|
||||
func expectURL(mock sqlmock.Sqlmock, workspaceID, url string) {
|
||||
mock.ExpectQuery(`SELECT COALESCE\(url, ''\) FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(workspaceID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow(url))
|
||||
}
|
||||
|
||||
// expectURLMissing stubs the SELECT to return sql.ErrNoRows.
|
||||
func expectURLMissing(mock sqlmock.Sqlmock, workspaceID string) {
|
||||
mock.ExpectQuery(`SELECT COALESCE\(url, ''\) FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(workspaceID).
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
}
|
||||
|
||||
// expectInboundSecret stubs the SELECT performed by ReadPlatformInboundSecret.
|
||||
func expectInboundSecret(mock sqlmock.Sqlmock, workspaceID string, secret interface{}) {
|
||||
mock.ExpectQuery(`SELECT platform_inbound_secret FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(workspaceID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"platform_inbound_secret"}).AddRow(secret))
|
||||
}
|
||||
|
||||
func TestChatUpload_InvalidWorkspaceID(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
tmplh := NewTemplatesHandler(t.TempDir(), nil)
|
||||
h := NewChatFilesHandler(tmplh)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "not-a-uuid"}}
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/not-a-uuid/chat/uploads", nil)
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
|
||||
c, w := makeUploadRequest(t, "not-a-uuid", &bytes.Buffer{}, "")
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
@@ -69,33 +93,240 @@ func TestChatUpload_InvalidWorkspaceID(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatUpload_MissingFiles(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
func TestChatUpload_WorkspaceNotInDB(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
tmplh := NewTemplatesHandler(t.TempDir(), nil)
|
||||
h := NewChatFilesHandler(tmplh)
|
||||
|
||||
// Multipart body with no `files` field — only a text field.
|
||||
var buf bytes.Buffer
|
||||
mw := multipart.NewWriter(&buf)
|
||||
_ = mw.WriteField("other", "value")
|
||||
mw.Close()
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000001"}}
|
||||
req := httptest.NewRequest("POST", "/workspaces/00000000-0000-0000-0000-000000000001/chat/uploads", &buf)
|
||||
req.Header.Set("Content-Type", mw.FormDataContentType())
|
||||
c.Request = req
|
||||
wsID := "00000000-0000-0000-0000-000000000099"
|
||||
expectURLMissing(mock, wsID)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400 when files field missing, got %d: %s", w.Code, w.Body.String())
|
||||
// QueryRow returning sql.ErrNoRows surfaces as 404. The validate-id
|
||||
// step already passed; this is the next layer.
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Errorf("expected 404 when workspace row missing, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "files") {
|
||||
t.Errorf("expected error to mention files field: %s", w.Body.String())
|
||||
}
|
||||
|
||||
// TestChatUpload_NoInboundSecret_LazyHeal pins the lazy-heal flow
|
||||
// added 2026-04-30 alongside the SaaS shared-prepare refactor:
|
||||
//
|
||||
// 1. Reading the workspace's platform_inbound_secret returns NULL
|
||||
// (legacy row from before RFC #2312).
|
||||
// 2. Handler MUST call wsauth.IssuePlatformInboundSecret (an UPDATE
|
||||
// on the workspaces row) to backfill the secret, so the next
|
||||
// upload after the workspace's heartbeat picks it up succeeds
|
||||
// without operator action.
|
||||
// 3. Response is 503 with retry_after_seconds=30 — the workspace's
|
||||
// local /configs/.platform_inbound_secret is also empty, so the
|
||||
// forward this request would do still fails. The user retries
|
||||
// after the next register response delivers the new secret.
|
||||
//
|
||||
// Pre-fix (before the lazy-heal): handlers returned 503 with
|
||||
// "Reprovision the workspace" — accurate, but every legacy workspace
|
||||
// would 503 forever until ops manually triggered a reprovision.
|
||||
func TestChatUpload_NoInboundSecret_LazyHeal(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
// Legacy row: URL set but platform_inbound_secret is NULL.
|
||||
wsID := "00000000-0000-0000-0000-000000000041"
|
||||
expectURL(mock, wsID, "http://127.0.0.1:1")
|
||||
expectInboundSecret(mock, wsID, nil) // NULL — triggers lazy-heal
|
||||
// Lazy-heal mint MUST land. If this expectation isn't matched,
|
||||
// the upload handler skipped the backfill and ops would have to
|
||||
// manually reprovision every legacy workspace.
|
||||
mock.ExpectExec(`UPDATE workspaces SET platform_inbound_secret = \$1 WHERE id = \$2`).
|
||||
WithArgs(sqlmock.AnyArg(), wsID).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Errorf("expected 503 when platform_inbound_secret missing, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
// Lazy-heal-success body steers the user to retry; the failure
|
||||
// body steers them to reprovision. Distinguishing them pins which
|
||||
// branch ran.
|
||||
if !strings.Contains(w.Body.String(), "retry") {
|
||||
t.Errorf("expected lazy-heal success response (retry hint), got: %s", w.Body.String())
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "30") {
|
||||
t.Errorf("expected retry_after_seconds=30 in body, got: %s", w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations not met — lazy-heal mint did NOT run, regression of #2312 backfill: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestChatUpload_NoInboundSecret_LazyHealFailure pins the alternate
|
||||
// branch: the platform_inbound_secret is NULL AND the lazy-heal mint
|
||||
// itself fails (e.g. DB unreachable). Handler must surface the
|
||||
// reprovision-steering error rather than silently swallowing.
|
||||
func TestChatUpload_NoInboundSecret_LazyHealFailure(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "00000000-0000-0000-0000-000000000042"
|
||||
expectURL(mock, wsID, "http://127.0.0.1:1")
|
||||
expectInboundSecret(mock, wsID, nil) // NULL — triggers lazy-heal
|
||||
mock.ExpectExec(`UPDATE workspaces SET platform_inbound_secret = \$1 WHERE id = \$2`).
|
||||
WithArgs(sqlmock.AnyArg(), wsID).
|
||||
WillReturnError(sql.ErrConnDone) // mint fails
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Errorf("expected 503 when lazy-heal fails, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "RFC #2312") {
|
||||
t.Errorf("expected detail to reference RFC #2312 on lazy-heal failure, got: %s", w.Body.String())
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "Reprovision") {
|
||||
t.Errorf("expected reprovision hint on mint failure, got: %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatUpload_NoURL(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
// Workspace registered but URL hasn't been reported yet (mid-boot).
|
||||
wsID := "00000000-0000-0000-0000-000000000042"
|
||||
expectURL(mock, wsID, "")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Errorf("expected 503 when workspace url empty, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// captured snapshots everything the forwarder sent to the workspace so
|
||||
// we can assert auth + body + content-type forwarded correctly.
|
||||
type captured struct {
|
||||
authorization string
|
||||
contentType string
|
||||
method string
|
||||
path string
|
||||
body []byte
|
||||
}
|
||||
|
||||
func newCapturingWorkspace(t *testing.T, status int, response string) (*httptest.Server, *captured) {
|
||||
t.Helper()
|
||||
cap := &captured{}
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
cap.authorization = r.Header.Get("Authorization")
|
||||
cap.contentType = r.Header.Get("Content-Type")
|
||||
cap.method = r.Method
|
||||
cap.path = r.URL.Path
|
||||
body, _ := io.ReadAll(r.Body)
|
||||
cap.body = body
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
_, _ = w.Write([]byte(response))
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
return srv, cap
|
||||
}
|
||||
|
||||
func TestChatUpload_ForwardsToWorkspace_HappyPath(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
srv, captured := newCapturingWorkspace(t, http.StatusOK, `{"files":[{"uri":"workspace:/workspace/.molecule/chat-uploads/abc-fixture.txt","name":"fixture.txt","size":15}]}`)
|
||||
|
||||
wsID := "00000000-0000-0000-0000-000000000043"
|
||||
expectURL(mock, wsID, srv.URL)
|
||||
expectInboundSecret(mock, wsID, "super-secret-123")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 from happy forward, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if captured.method != "POST" {
|
||||
t.Errorf("expected POST, got %s", captured.method)
|
||||
}
|
||||
if captured.path != "/internal/chat/uploads/ingest" {
|
||||
t.Errorf("expected /internal/chat/uploads/ingest, got %s", captured.path)
|
||||
}
|
||||
if captured.authorization != "Bearer super-secret-123" {
|
||||
t.Errorf("expected secret in Authorization header, got %q", captured.authorization)
|
||||
}
|
||||
if !strings.HasPrefix(captured.contentType, "multipart/form-data") {
|
||||
t.Errorf("expected multipart Content-Type forwarded, got %q", captured.contentType)
|
||||
}
|
||||
// Body shape: must contain the multipart-encoded fixture content.
|
||||
if !bytes.Contains(captured.body, []byte("fixture-payload")) {
|
||||
t.Errorf("expected body to contain fixture payload, got %d bytes", len(captured.body))
|
||||
}
|
||||
// Response body streamed back unchanged.
|
||||
if !strings.Contains(w.Body.String(), "fixture.txt") {
|
||||
t.Errorf("expected workspace response forwarded back, got: %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatUpload_ForwardsErrorStatusUnchanged(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
// Workspace returns 413 with its standard "exceeds per-file limit"
|
||||
// shape. Platform must propagate, NOT remap to 500.
|
||||
srv, _ := newCapturingWorkspace(t, http.StatusRequestEntityTooLarge, `{"error":"big.bin exceeds per-file limit (25 MB)"}`)
|
||||
|
||||
wsID := "00000000-0000-0000-0000-000000000044"
|
||||
expectURL(mock, wsID, srv.URL)
|
||||
expectInboundSecret(mock, wsID, "tok")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusRequestEntityTooLarge {
|
||||
t.Errorf("expected 413 propagated unchanged, got %d", w.Code)
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "exceeds per-file limit") {
|
||||
t.Errorf("expected workspace's 413 body verbatim, got: %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatUpload_WorkspaceUnreachable(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "00000000-0000-0000-0000-000000000045"
|
||||
// 127.0.0.1:1 — port 1 has no listener → connect refused.
|
||||
expectURL(mock, wsID, "http://127.0.0.1:1")
|
||||
expectInboundSecret(mock, wsID, "tok")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
// Connect-refused → BadGateway. NOT 500 — the platform itself is
|
||||
// fine; the upstream is broken.
|
||||
if w.Code != http.StatusBadGateway {
|
||||
t.Errorf("expected 502 on workspace unreachable, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,8 +334,7 @@ func TestChatDownload_InvalidPath(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
tmplh := NewTemplatesHandler(t.TempDir(), nil)
|
||||
h := NewChatFilesHandler(tmplh)
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
|
||||
cases := []struct {
|
||||
name, path, wantSubstr string
|
||||
@@ -173,22 +403,158 @@ func TestContentDispositionAttachment_Escapes(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatDownload_DockerUnavailable(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
tmplh := NewTemplatesHandler(t.TempDir(), nil) // docker=nil
|
||||
h := NewChatFilesHandler(tmplh)
|
||||
|
||||
// makeDownloadRequest builds a gin context for GET /workspaces/:id/chat/download
|
||||
// with the given path query param.
|
||||
func makeDownloadRequest(t *testing.T, workspaceID, path string) (*gin.Context, *httptest.ResponseRecorder) {
|
||||
t.Helper()
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000001"}}
|
||||
req := httptest.NewRequest("GET", "/workspaces/xxx/chat/download?path=/workspace/report.pdf", nil)
|
||||
c.Request = req
|
||||
c.Params = gin.Params{{Key: "id", Value: workspaceID}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/"+workspaceID+"/chat/download?path="+path, nil)
|
||||
return c, w
|
||||
}
|
||||
|
||||
func TestChatDownload_WorkspaceNotInDB(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "00000000-0000-0000-0000-000000000099"
|
||||
mock.ExpectQuery(`SELECT COALESCE\(url, ''\) FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
|
||||
h.Download(c)
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Errorf("expected 404 when workspace row missing, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestChatDownload_NoInboundSecret_LazyHeal — same lazy-heal flow
|
||||
// as TestChatUpload_NoInboundSecret_LazyHeal but on the Download
|
||||
// handler. Pinned separately because Upload + Download have
|
||||
// independent code paths into ReadPlatformInboundSecret; a partial
|
||||
// regression that healed Upload but skipped Download is the kind of
|
||||
// drift we want to fail the test, not ship.
|
||||
func TestChatDownload_NoInboundSecret_LazyHeal(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "00000000-0000-0000-0000-000000000051"
|
||||
expectURL(mock, wsID, "http://127.0.0.1:1")
|
||||
expectInboundSecret(mock, wsID, nil)
|
||||
mock.ExpectExec(`UPDATE workspaces SET platform_inbound_secret = \$1 WHERE id = \$2`).
|
||||
WithArgs(sqlmock.AnyArg(), wsID).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
|
||||
h.Download(c)
|
||||
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Errorf("expected 503 when docker is nil, got %d: %s", w.Code, w.Body.String())
|
||||
t.Errorf("expected 503 when platform_inbound_secret missing, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "retry") {
|
||||
t.Errorf("expected lazy-heal success response (retry hint), got: %s", w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations not met — Download lazy-heal mint did NOT run: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatDownload_NoInboundSecret_LazyHealFailure(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "00000000-0000-0000-0000-000000000052"
|
||||
expectURL(mock, wsID, "http://127.0.0.1:1")
|
||||
expectInboundSecret(mock, wsID, nil)
|
||||
mock.ExpectExec(`UPDATE workspaces SET platform_inbound_secret = \$1 WHERE id = \$2`).
|
||||
WithArgs(sqlmock.AnyArg(), wsID).
|
||||
WillReturnError(sql.ErrConnDone)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
|
||||
h.Download(c)
|
||||
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Errorf("expected 503 when lazy-heal fails, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "RFC #2312") {
|
||||
t.Errorf("expected detail to reference RFC #2312 on lazy-heal failure, got: %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatDownload_ForwardsToWorkspace_HappyPath(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
body := []byte("file-contents-here\nmultiline\n")
|
||||
cap := &captured{}
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
cap.authorization = r.Header.Get("Authorization")
|
||||
cap.method = r.Method
|
||||
cap.path = r.URL.Path
|
||||
w.Header().Set("Content-Type", "text/plain")
|
||||
w.Header().Set("Content-Disposition", `attachment; filename="report.txt"`)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write(body)
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
wsID := "00000000-0000-0000-0000-000000000052"
|
||||
expectURL(mock, wsID, srv.URL)
|
||||
expectInboundSecret(mock, wsID, "the-secret")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/report.txt")
|
||||
h.Download(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if cap.authorization != "Bearer the-secret" {
|
||||
t.Errorf("expected secret in Authorization header, got %q", cap.authorization)
|
||||
}
|
||||
if cap.method != "GET" {
|
||||
t.Errorf("expected GET, got %s", cap.method)
|
||||
}
|
||||
if cap.path != "/internal/file/read" {
|
||||
t.Errorf("expected /internal/file/read, got %s", cap.path)
|
||||
}
|
||||
if got := w.Header().Get("Content-Type"); got != "text/plain" {
|
||||
t.Errorf("Content-Type not forwarded: %q", got)
|
||||
}
|
||||
if got := w.Header().Get("Content-Disposition"); got != `attachment; filename="report.txt"` {
|
||||
t.Errorf("Content-Disposition not forwarded: %q", got)
|
||||
}
|
||||
if got := w.Body.Bytes(); !bytes.Equal(got, body) {
|
||||
t.Errorf("body mismatch: got %q, want %q", got, body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestChatDownload_404FromWorkspacePropagated(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
_, _ = w.Write([]byte(`{"error":"file not found"}`))
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
wsID := "00000000-0000-0000-0000-000000000053"
|
||||
expectURL(mock, wsID, srv.URL)
|
||||
expectInboundSecret(mock, wsID, "tok")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/missing.txt")
|
||||
h.Download(c)
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Errorf("expected 404 propagated, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user