Compare commits
325 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d75b73e713 | |||
| 7420631c32 | |||
| caf19e8980 | |||
| 6748035720 | |||
| c74d0ecc94 | |||
| 9dd29882e2 | |||
| e342d0c5a7 | |||
| 166ad20cd7 | |||
| 8b9f809966 | |||
| a869bc1536 | |||
| d3e115cb06 | |||
| b372c265ab | |||
| 146c0e7c60 | |||
| 5d8b5e96e3 | |||
| dc6e1ac2bf | |||
| c2e12f3fb6 | |||
| dd5df70e59 | |||
| f1dc721eeb | |||
| 5b78bea10d | |||
| a5903af459 | |||
| 07d09f3696 | |||
| f7c270bf24 | |||
| 0301f90183 | |||
| feef80423b | |||
| 469b24ff8f | |||
| c4d3c9a451 | |||
| 2652ea8342 | |||
| 1e01083e55 | |||
| eab36e217e | |||
| 7ee696ec9a | |||
| decec9b9a1 | |||
| ada27fdb5d | |||
| f0f4d0e761 | |||
| e0df90c294 | |||
| f01f374072 | |||
| 1edee1131b | |||
| d99b3f2aec | |||
| f5ea812e9d | |||
| 3b7ed9cf53 | |||
| da9061c131 | |||
| c4807a930d | |||
| d22fbb29b8 | |||
| 899c53550d | |||
| cdfc9f743f | |||
| 7a2664523c | |||
| 632e906640 | |||
| 475da5b64c | |||
| 1ad107cc15 | |||
| e4bd1e4293 | |||
| 01deeb36cf | |||
| b906e1da61 | |||
| 226e57a942 | |||
| abc3affcb6 | |||
| 3322524b0f | |||
| de01ff51b0 | |||
| f3782662bd | |||
| e9eb3868d5 | |||
| cb70d3d437 | |||
| a1d202723d | |||
| 0d0840d9d9 | |||
| fc30b5c9de | |||
| ef67dc513e | |||
| 23d3f057d3 | |||
| 8ca027ddf3 | |||
| 46a4ef83bb | |||
| a6afc18de5 | |||
| 423d58d42c | |||
| 9386f1d399 | |||
| a766e5ce48 | |||
| 5ad2669f88 | |||
| 0ca4e431c1 | |||
| 184ce7ae4e | |||
| 2bf6a7005f | |||
| 16ead69641 | |||
| 60afcd43c9 | |||
| ff75aeb43e | |||
| 81cf0cbf98 | |||
| 412dec0d87 | |||
| 9a53529047 | |||
| 39931acd9c | |||
| 6f19b88fa7 | |||
| 83454e5efd | |||
| 575f893f4e | |||
| 4cac4e7710 | |||
| 8254bedf30 | |||
| ec72f199e6 | |||
| ae22a55675 | |||
| 08648bf4b1 | |||
| eec4ea2e7d | |||
| 6201d12533 | |||
| 81e83c05b7 | |||
| 5b5eacbb29 | |||
| c8fca1467e | |||
| 7c8b81c6eb | |||
| fc1c45789e | |||
| e3a18ed8e8 | |||
| 9f551319d2 | |||
| 1052f8bdb0 | |||
| 30fb507165 | |||
| 77e9a965ac | |||
| 5334d60de4 | |||
| d6c0227e3f | |||
| 27db090d3d | |||
| 0f25f6de97 | |||
| 9991057ad1 | |||
| b89a49ec93 | |||
| 3d0a7c381b | |||
| f5613bf099 | |||
| 9bd2a2c45f | |||
| a489ee1a7c | |||
| c79ba05ed5 | |||
| 6470e5f41b | |||
| aa560c0314 | |||
| 7644e82f2f | |||
| 33fabdf483 | |||
| abba16beb4 | |||
| 9c752e0673 | |||
| 8e5d193761 | |||
| 3e0d2e650a | |||
| 210a26d31a | |||
| be18b9c8f9 | |||
| 2cb1b26512 | |||
| 48d1945269 | |||
| a04a49f7aa | |||
| bbec4cfcfb | |||
| 19c25a9278 | |||
| e50799bc29 | |||
| 07839580a0 | |||
| 2227a14b1e | |||
| e72f9ad107 | |||
| 17aec22f9b | |||
| 8388144098 | |||
| a327d207da | |||
| afe5a0cfe9 | |||
| 529c3f3922 | |||
| c778b62202 | |||
| d80bffe3e3 | |||
| 0c461eb9f1 | |||
| 86015412eb | |||
| f81813f708 | |||
| 58253f0673 | |||
| 28ef75d25e | |||
| 243f9bc2b1 | |||
| 43bf94a07c | |||
| 55f5c0b0ff | |||
| 86fdaad111 | |||
| 6125700c39 | |||
| 89ee8e4d04 | |||
| db14191bc9 | |||
| 26e2e97006 | |||
| ec574f3d4b | |||
| 42f2ea3f4f | |||
| e0e9201142 | |||
| 90d202c80a | |||
| 1e8d7ae17c | |||
| ecf5f6fbf3 | |||
| fcdf79774d | |||
| d6337a1ae9 | |||
| 471dff25e9 | |||
| 3d2a50e2a2 | |||
| 9e678ccd5e | |||
| 191ef3be91 | |||
| 25fd6b021d | |||
| a959feae84 | |||
| c661ea4cd3 | |||
| 49027af419 | |||
| 4c9f12258d | |||
| da46bdeded | |||
| d890fd9a3f | |||
| ec1f21922c | |||
| ca61213578 | |||
| 118b8e47ad | |||
| ab164c1967 | |||
| b5f530e27a | |||
| 44bb35a926 | |||
| 024ef260db | |||
| d175d0c4c1 | |||
| d21ac991c1 | |||
| c85783fbee | |||
| b375252dc8 | |||
| 3d226a2c68 | |||
| da6d319c48 | |||
| 76e9656a7b | |||
| 35017c5452 | |||
| d10c1a1a36 | |||
| 61b7755c3c | |||
| 21a7e7b0e7 | |||
| 9a772bf946 | |||
| 0a90d7ae1a | |||
| 5b7f4d260b | |||
| f0fd7b4d9e | |||
| 7993693cf1 | |||
| 789d705866 | |||
| cb820acbd6 | |||
| 52915268b2 | |||
| 82e7059e0e | |||
| 5950d4cd81 | |||
| 1e12ed7e9f | |||
| 4f67fe59fb | |||
| 410275e5af | |||
| 1557743ef9 | |||
| e727b31246 | |||
| ae05f91bd8 | |||
| c89f17a2aa | |||
| cbe48c2225 | |||
| b0bcd97781 | |||
| 56149f8a24 | |||
| 0134353a48 | |||
| aca7d99152 | |||
| aec0fb35d2 | |||
| b5c0b4d371 | |||
| 2ed4f4fb41 | |||
| 02b325063b | |||
| 43caac911a | |||
| 2e505e7748 | |||
| ae79b9e9fe | |||
| b3b9a242d6 | |||
| ed6dfe01e5 | |||
| 4c9309e801 | |||
| 20f76c4fdf | |||
| ca6e7c39cf | |||
| ba63f76e10 | |||
| b037d555fa | |||
| 62fc25757c | |||
| a345adacad | |||
| 7cc1c39c49 | |||
| 8152cfc81e | |||
| 111c3d2c01 | |||
| 46d79a3e3b | |||
| 2198f92dcb | |||
| beab899501 | |||
| b851cfc813 | |||
| 3cb72b1df0 | |||
| 11c9ed2a46 | |||
| c0bfd19b9e | |||
| e0f9434eaf | |||
| 80e4b9ac9a | |||
| daefdd21c5 | |||
| 8df8487bbe | |||
| 9a835ef631 | |||
| 174e594690 | |||
| 856c967950 | |||
| 73f7e0c03b | |||
| 31f9a5e85e | |||
| c5dd14d8db | |||
| 7e1fdf5847 | |||
| d084d7e61a | |||
| 9c9be4cf12 | |||
| f256bfa9c6 | |||
| 463316772b | |||
| dfd0bc528c | |||
| 4ea6f437e9 | |||
| a872202fe7 | |||
| 2b862f65f9 | |||
| 53760a8a2f | |||
| 0f389ba325 | |||
| 472862bc50 | |||
| 461e5dcad0 | |||
| b5435b4732 | |||
| 4b16c95450 | |||
| f1b72af97e | |||
| 31facfc5c4 | |||
| 19e7acdc22 | |||
| 1ce51abea4 | |||
| 0ec226e119 | |||
| 872b781f64 | |||
| 0dd1244510 | |||
| 26fa220bef | |||
| 5559e96400 | |||
| 3bc7749e84 | |||
| 6d7a7fc86f | |||
| ecb3c75d74 | |||
| 2f7beb9bce | |||
| bd881f8756 | |||
| e39d818ac4 | |||
| ed4d24fb8c | |||
| 3a5544a9e6 | |||
| 095171f163 | |||
| 9c7b34cb7f | |||
| 8514ff1a96 | |||
| 1785732bbb | |||
| 066a0772ee | |||
| 3f2cc8cdd6 | |||
| 5c80b9c3d6 | |||
| a8850bac55 | |||
| adfa34c4ae | |||
| 7692dd4975 | |||
| 28f22609d9 | |||
| e67a854a33 | |||
| 3e7d483b8c | |||
| 4f4b6c4f90 | |||
| fc10386a78 | |||
| 1282c1c8ff | |||
| a242ca8b01 | |||
| ac9b07b7ad | |||
| 41ae4ec50b | |||
| 02960209a0 | |||
| d866d3aa5f | |||
| 61d5908817 | |||
| 89bdf29d6f | |||
| 700d44ec3d | |||
| f70071e1e1 | |||
| 63ac99788b | |||
| 28472f0d2d | |||
| f42feb4ed7 | |||
| 99e7f13149 | |||
| 6488ba09e7 | |||
| 8176b5142d | |||
| 314277769e | |||
| e0b567e992 | |||
| 707e4d7342 | |||
| 4f9e3feece | |||
| 10752fe330 | |||
| 8f7122a9b6 | |||
| b3982035b3 | |||
| d1122f8d28 | |||
| 4b35d25d86 | |||
| 46731729d4 | |||
| 6dc2d907a2 | |||
| 849bc97349 | |||
| e13dcab5e0 | |||
| 721010307c | |||
| 9f47ecf86e | |||
| ebc20794f3 | |||
| 9a64aeaa2c |
@@ -186,7 +186,7 @@ jobs:
|
||||
echo "proceed=true" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::E2E green for this SHA — proceeding with promote"
|
||||
;;
|
||||
completed/failure|completed/cancelled|completed/timed_out)
|
||||
completed/failure|completed/timed_out)
|
||||
echo "proceed=false" >> "$GITHUB_OUTPUT"
|
||||
{
|
||||
echo "## ❌ Auto-promote aborted — E2E Staging SaaS failed"
|
||||
@@ -198,6 +198,27 @@ jobs:
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
exit 1
|
||||
;;
|
||||
completed/cancelled)
|
||||
# cancelled ≠ failure. Per-SHA concurrency cancels older E2E
|
||||
# runs when a newer push lands (memory:
|
||||
# feedback_concurrency_group_per_sha) — the newer SHA will
|
||||
# have its own E2E + promote chain. Treat the same as
|
||||
# in_progress: defer without aborting, let the next E2E run
|
||||
# promote when it lands.
|
||||
#
|
||||
# Caught 2026-05-05 02:03 on sha 31f9a5e — auto-promote
|
||||
# blocked the whole chain because this case fell through to
|
||||
# exit 1 instead of clean defer.
|
||||
echo "proceed=false" >> "$GITHUB_OUTPUT"
|
||||
{
|
||||
echo "## ⏭ Auto-promote deferred — E2E Staging SaaS was cancelled"
|
||||
echo
|
||||
echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\`"
|
||||
echo "Likely per-SHA concurrency (newer push superseded this E2E run)."
|
||||
echo "The newer SHA's E2E will fire its own promote when it lands."
|
||||
echo "If you need this specific SHA promoted, manually dispatch."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
;;
|
||||
in_progress/*|queued/*|requested/*|waiting/*|pending/*)
|
||||
echo "proceed=false" >> "$GITHUB_OUTPUT"
|
||||
{
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
name: auto-promote-stale-alarm
|
||||
|
||||
# Hourly cron + on-demand alarm for the silent-block failure mode that
|
||||
# motivated issue #2975:
|
||||
# - The auto-promote-staging.yml workflow opened a PR + armed
|
||||
# auto-merge, but main's branch protection requires a human review
|
||||
# (reviewDecision=REVIEW_REQUIRED). The PR sat BLOCKED with no
|
||||
# surface-up-the-stack for 12+ hours, holding 25 commits hostage
|
||||
# including the Memory v2 redesign and a reno-stars data-loss fix.
|
||||
#
|
||||
# This workflow runs `scripts/check-stale-promote-pr.sh` against the
|
||||
# repo's open auto-promote PRs (base=main head=staging). When a PR has
|
||||
# been BLOCKED on REVIEW_REQUIRED for >4h, it:
|
||||
# 1. Emits a workflow-level warning (visible in run summary + the
|
||||
# Actions UI feed).
|
||||
# 2. Posts a comment on the PR (idempotent — one alarm per PR).
|
||||
#
|
||||
# The detection logic lives in scripts/check-stale-promote-pr.sh so
|
||||
# it's unit-testable with stubbed `gh` (see test-check-stale-promote-pr.sh).
|
||||
# This file is the schedule + invocation surface only — SSOT for the
|
||||
# detector itself.
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Hourly. Cheap (one `gh pr list` + jq), and 1h granularity is
|
||||
# plenty for a 4h staleness threshold — operators see the alarm
|
||||
# within at most 1h of crossing the threshold.
|
||||
- cron: "27 * * * *" # at :27 to dodge the cron herd at :00
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
stale_hours:
|
||||
description: "Hours after which a BLOCKED+REVIEW_REQUIRED PR is stale (default 4)"
|
||||
required: false
|
||||
default: "4"
|
||||
post_comment:
|
||||
description: "Post a comment on stale PRs (default true)"
|
||||
required: false
|
||||
default: "true"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write # post comments on stale PRs
|
||||
|
||||
# Serialize so the on-demand and scheduled runs don't double-comment
|
||||
# the same PR. cancel-in-progress=false because the script is idempotent
|
||||
# (existing comment marker prevents dupes), but a scheduled run firing
|
||||
# while a manual one runs would just re-list the same PR set.
|
||||
concurrency:
|
||||
group: auto-promote-stale-alarm
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
scan:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout (need scripts/ only)
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
sparse-checkout: |
|
||||
scripts/check-stale-promote-pr.sh
|
||||
sparse-checkout-cone-mode: false
|
||||
- name: Run stale-PR detector
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_REPOSITORY: ${{ github.repository }}
|
||||
STALE_HOURS: ${{ inputs.stale_hours || '4' }}
|
||||
POST_COMMENT: ${{ inputs.post_comment || 'true' }}
|
||||
run: |
|
||||
# The script's exit code reflects the count of stale PRs.
|
||||
# We don't want a stale finding to fail the workflow run —
|
||||
# the warning + comment are the signal, the green/red is
|
||||
# noise. So convert any non-zero exit to a workflow notice
|
||||
# and exit 0.
|
||||
set +e
|
||||
bash scripts/check-stale-promote-pr.sh
|
||||
rc=$?
|
||||
set -e
|
||||
if [ "$rc" -ne 0 ]; then
|
||||
echo "::notice::Stale PR detector found $rc PR(s) needing attention. See warnings above + comments on the PRs."
|
||||
fi
|
||||
# Always succeed — operator-facing surface is the warning,
|
||||
# not the workflow status.
|
||||
exit 0
|
||||
@@ -0,0 +1,81 @@
|
||||
name: branch-protection drift check
|
||||
|
||||
# Catches out-of-band edits to branch protection (UI clicks, manual gh
|
||||
# api PATCH from a one-off ops session) by comparing live state against
|
||||
# tools/branch-protection/apply.sh's desired state every day. Fails the
|
||||
# workflow when they drift; the failure is the signal.
|
||||
#
|
||||
# When it fails: re-run apply.sh to put the live state back to the
|
||||
# script's intent, OR update apply.sh to encode the new intent and
|
||||
# commit. Either way the script is the source of truth.
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# 14:00 UTC daily. Off-hours for most teams; gives a fresh signal
|
||||
# at the start of every working day.
|
||||
- cron: '0 14 * * *'
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
branches: [staging, main]
|
||||
paths:
|
||||
- 'tools/branch-protection/**'
|
||||
- '.github/workflows/branch-protection-drift.yml'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
drift:
|
||||
name: Branch protection drift
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
# Token strategy by trigger:
|
||||
#
|
||||
# - schedule (daily canary): hard-fail when the admin token is
|
||||
# missing. This is the *only* trigger where silent soft-skip is
|
||||
# dangerous — a missing secret on the cron run means the drift
|
||||
# gate has effectively disappeared with no human in the loop to
|
||||
# notice. Per feedback_schedule_vs_dispatch_secrets_hardening.md
|
||||
# the rule is "schedule/automated triggers must hard-fail".
|
||||
#
|
||||
# - pull_request (touching tools/branch-protection/**): soft-skip
|
||||
# with a prominent warning. A PR cannot retroactively drift the
|
||||
# live state — drift happens *between* PRs (UI clicks, manual
|
||||
# gh api PATCH) and is the schedule's job to catch. The PR-time
|
||||
# gate would only catch typos in apply.sh, which the apply.sh
|
||||
# *_payload unit tests catch better. A human is reviewing the
|
||||
# PR and will see the warning in the workflow log.
|
||||
#
|
||||
# - workflow_dispatch (operator one-off): soft-skip with warning,
|
||||
# so an operator can run a diagnostic without configuring the
|
||||
# secret first.
|
||||
- name: Verify admin token present (hard-fail on schedule only)
|
||||
env:
|
||||
GH_TOKEN_FOR_ADMIN_API: ${{ secrets.GH_TOKEN_FOR_ADMIN_API }}
|
||||
run: |
|
||||
if [[ -n "$GH_TOKEN_FOR_ADMIN_API" ]]; then
|
||||
echo "GH_TOKEN_FOR_ADMIN_API present — drift_check will run with admin scope."
|
||||
exit 0
|
||||
fi
|
||||
if [[ "${{ github.event_name }}" == "schedule" ]]; then
|
||||
echo "::error::GH_TOKEN_FOR_ADMIN_API secret missing on the daily canary." >&2
|
||||
echo "" >&2
|
||||
echo "The schedule run is the SoT for branch-protection drift detection." >&2
|
||||
echo "Without admin scope it silently passes, hiding any out-of-band edits." >&2
|
||||
echo "Set GH_TOKEN_FOR_ADMIN_API at Settings → Secrets and variables → Actions." >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "::warning::GH_TOKEN_FOR_ADMIN_API secret missing — drift_check will be SKIPPED."
|
||||
echo "::warning::PR drift checks need repo-admin scope to read /branches/:b/protection."
|
||||
echo "::warning::This is non-fatal: the daily schedule run is the canonical drift gate."
|
||||
echo "SKIP_DRIFT_CHECK=1" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Run drift check
|
||||
if: env.SKIP_DRIFT_CHECK != '1'
|
||||
env:
|
||||
# Repo-admin scope, needed for /branches/:b/protection.
|
||||
GH_TOKEN: ${{ secrets.GH_TOKEN_FOR_ADMIN_API }}
|
||||
run: bash tools/branch-protection/drift_check.sh
|
||||
@@ -295,12 +295,16 @@ jobs:
|
||||
# See molecule-controlplane#420.
|
||||
leaks=()
|
||||
for slug in $orgs; do
|
||||
code=$(curl -sS -o /tmp/canary-cleanup.out -w "%{http_code}" \
|
||||
# Tempfile-routed -w + set +e/-e prevents curl-exit-code
|
||||
# pollution of the captured status (lint-curl-status-capture.yml).
|
||||
set +e
|
||||
curl -sS -o /tmp/canary-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" \
|
||||
|| echo "000")
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/canary-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/canary-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
|
||||
@@ -272,6 +272,14 @@ jobs:
|
||||
find tests/e2e infra/scripts -type f -name '*.sh' -print0 \
|
||||
| xargs -0 shellcheck --severity=warning
|
||||
|
||||
- if: needs.changes.outputs.scripts == 'true'
|
||||
name: Lint cleanup-trap hygiene (RFC #2873)
|
||||
# Asserts every shell E2E test that calls `mktemp` also installs
|
||||
# an EXIT trap. Catches the /tmp-leak class — a missing trap
|
||||
# silently leaks scratch into CI runners (~10-100KB per run).
|
||||
# See tests/e2e/lint_cleanup_traps.sh for the rule + fix pattern.
|
||||
run: bash tests/e2e/lint_cleanup_traps.sh
|
||||
|
||||
- if: needs.changes.outputs.scripts == 'true'
|
||||
name: Run E2E bash unit tests (no live infra)
|
||||
# Pure-bash unit tests for E2E helper libs (lib/*.sh). These pin
|
||||
@@ -358,6 +366,73 @@ jobs:
|
||||
- if: needs.changes.outputs.python == 'true'
|
||||
run: python -m pytest --tb=short
|
||||
|
||||
- if: needs.changes.outputs.python == 'true'
|
||||
name: Per-file critical-path coverage (MCP / inbox / auth)
|
||||
# MCP-critical Python files have a per-file floor on top of the
|
||||
# 86% total floor in pytest.ini. Rationale (issue #2790, after
|
||||
# the PR #2766 → PR #2771 cycle): the total floor averages ~6000
|
||||
# lines, so a single MCP file could regress to ~50% with no
|
||||
# complaint as long as other modules compensate. These five
|
||||
# files handle multi-tenant routing + auth + inbox dispatch —
|
||||
# a coverage drop here is the same risk shape as a Go-side
|
||||
# workspace-server token/secrets file dropping below 10%.
|
||||
#
|
||||
# Floor 75% sits below current actuals (80-96%) so this gate is
|
||||
# strictly additive — no existing PR fails. Ratchet plan in
|
||||
# COVERAGE_FLOOR.md.
|
||||
run: |
|
||||
set -e
|
||||
PER_FILE_FLOOR=75
|
||||
CRITICAL_FILES=(
|
||||
"a2a_mcp_server.py"
|
||||
"mcp_cli.py"
|
||||
"a2a_tools.py"
|
||||
"a2a_tools_inbox.py"
|
||||
"inbox.py"
|
||||
"platform_auth.py"
|
||||
)
|
||||
|
||||
# pytest already wrote .coverage; emit a JSON view scoped to
|
||||
# the critical files so jq/python can read the per-file pct
|
||||
# without parsing tabular text. --include uses fnmatch, and
|
||||
# the leading "*" allows the file to live anywhere under the
|
||||
# workspace root (today they sit at workspace/<name>.py).
|
||||
INCLUDES=$(printf '*%s,' "${CRITICAL_FILES[@]}")
|
||||
INCLUDES="${INCLUDES%,}"
|
||||
python -m coverage json -o /tmp/critical-cov.json --include="$INCLUDES"
|
||||
|
||||
FAILED=0
|
||||
for f in "${CRITICAL_FILES[@]}"; do
|
||||
# Match by top-level path key (e.g. "a2a_tools.py", not
|
||||
# "builtin_tools/a2a_tools.py" — different file at 100%).
|
||||
# The keys in coverage.json are paths relative to the run
|
||||
# cwd (workspace/), so the critical-path entry sits at the
|
||||
# bare basename.
|
||||
pct=$(jq -r --arg f "$f" '.files | to_entries | map(select(.key == $f)) | .[0].value.summary.percent_covered // "MISSING"' /tmp/critical-cov.json)
|
||||
if [ "$pct" = "MISSING" ]; then
|
||||
echo "::error file=workspace/$f::No coverage data — file may have moved or test exclusion mis-set."
|
||||
FAILED=$((FAILED+1))
|
||||
continue
|
||||
fi
|
||||
echo "$f: ${pct}%"
|
||||
if awk "BEGIN{exit !($pct < $PER_FILE_FLOOR)}"; then
|
||||
echo "::error file=workspace/$f::${pct}% < ${PER_FILE_FLOOR}% per-file floor (MCP critical path). See COVERAGE_FLOOR.md."
|
||||
FAILED=$((FAILED+1))
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$FAILED" -gt 0 ]; then
|
||||
echo ""
|
||||
echo "$FAILED MCP critical-path file(s) below the ${PER_FILE_FLOOR}% per-file floor."
|
||||
echo "These paths handle multi-tenant routing, auth tokens, and inbox dispatch."
|
||||
echo "A coverage drop here is the same risk shape as Go-side tokens/secrets files"
|
||||
echo "dropping below 10% (see COVERAGE_FLOOR.md). Either:"
|
||||
echo " (a) add tests to raise coverage back above ${PER_FILE_FLOOR}%, or"
|
||||
echo " (b) if this is unavoidable historical debt, file an issue and propose"
|
||||
echo " adjusting the floor with rationale in COVERAGE_FLOOR.md."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# SDK + plugin validation moved to standalone repo:
|
||||
# github.com/Molecule-AI/molecule-sdk-python
|
||||
|
||||
|
||||
@@ -172,6 +172,9 @@ jobs:
|
||||
- name: Run poll-mode + since_id cursor E2E (#2339)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_poll_mode_e2e.sh
|
||||
- name: Run poll-mode chat upload E2E (RFC #2891)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_poll_mode_chat_upload_e2e.sh
|
||||
- name: Dump platform log on failure
|
||||
if: failure() && needs.detect-changes.outputs.api == 'true'
|
||||
run: cat workspace-server/platform.log || true
|
||||
|
||||
@@ -192,12 +192,16 @@ jobs:
|
||||
# cleanup miss shouldn't fail-flag the canvas test when the
|
||||
# actual smoke check passed; the sweeper is the safety net.
|
||||
# See molecule-controlplane#420.
|
||||
code=$(curl -sS -o /tmp/canvas-cleanup.out -w "%{http_code}" \
|
||||
# Tempfile-routed -w + set +e/-e prevents curl-exit-code
|
||||
# pollution of the captured status (lint-curl-status-capture.yml).
|
||||
set +e
|
||||
curl -sS -o /tmp/canvas-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" \
|
||||
|| echo "000")
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/canvas-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/canvas-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
|
||||
@@ -159,12 +159,16 @@ jobs:
|
||||
# leaked. Sweeper catches the rest within ~45 min.
|
||||
leaks=()
|
||||
for slug in $orgs; do
|
||||
code=$(curl -sS -o /tmp/external-cleanup.out -w "%{http_code}" \
|
||||
# Tempfile-routed -w + set +e/-e prevents curl-exit-code
|
||||
# pollution of the captured status (lint-curl-status-capture.yml).
|
||||
set +e
|
||||
curl -sS -o /tmp/external-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" \
|
||||
|| echo "000")
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/external-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/external-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
|
||||
@@ -224,12 +224,16 @@ jobs:
|
||||
leaks=()
|
||||
for slug in $orgs; do
|
||||
echo "Safety-net teardown: $slug"
|
||||
code=$(curl -sS -o /tmp/saas-cleanup.out -w "%{http_code}" \
|
||||
# Tempfile-routed -w + set +e/-e prevents curl-exit-code
|
||||
# pollution of the captured status (lint-curl-status-capture.yml).
|
||||
set +e
|
||||
curl -sS -o /tmp/saas-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" \
|
||||
|| echo "000")
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/saas-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/saas-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
|
||||
@@ -148,12 +148,16 @@ jobs:
|
||||
# safety net within ~45 min.
|
||||
leaks=()
|
||||
for slug in $orgs; do
|
||||
code=$(curl -sS -o /tmp/sanity-cleanup.out -w "%{http_code}" \
|
||||
# Tempfile-routed -w + set +e/-e prevents curl-exit-code
|
||||
# pollution of the captured status (lint-curl-status-capture.yml).
|
||||
set +e
|
||||
curl -sS -o /tmp/sanity-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" \
|
||||
|| echo "000")
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/sanity-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/sanity-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
|
||||
@@ -0,0 +1,160 @@
|
||||
name: Handlers Postgres Integration
|
||||
|
||||
# Real-Postgres integration tests for workspace-server/internal/handlers/.
|
||||
# Triggered on every PR/push that touches the handlers package.
|
||||
#
|
||||
# Why this workflow exists
|
||||
# ------------------------
|
||||
# Strict-sqlmock unit tests pin which SQL statements fire — they're fast
|
||||
# and let us iterate without a DB. But sqlmock CANNOT detect bugs that
|
||||
# depend on the row state AFTER the SQL runs. The result_preview-lost
|
||||
# bug shipped to staging in PR #2854 because every unit test was
|
||||
# satisfied with "an UPDATE statement fired" — none verified the row's
|
||||
# preview field actually landed. The local-postgres E2E that retrofit
|
||||
# self-review caught it took 2 minutes to set up and would have caught
|
||||
# the bug at PR-time.
|
||||
#
|
||||
# This job spins a Postgres service container, applies the migration,
|
||||
# and runs `go test -tags=integration` against a live DB. Required
|
||||
# check on staging branch protection — backend handler PRs cannot
|
||||
# merge without a real-DB regression gate.
|
||||
#
|
||||
# Cost: ~30s job (postgres pull from GH cache + go build + 4 tests).
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, staging]
|
||||
pull_request:
|
||||
branches: [main, staging]
|
||||
merge_group:
|
||||
types: [checks_requested]
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: handlers-pg-integ-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
detect-changes:
|
||||
name: detect-changes
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
handlers: ${{ steps.filter.outputs.handlers }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
|
||||
id: filter
|
||||
with:
|
||||
filters: |
|
||||
handlers:
|
||||
- 'workspace-server/internal/handlers/**'
|
||||
- 'workspace-server/internal/wsauth/**'
|
||||
- 'workspace-server/migrations/**'
|
||||
- '.github/workflows/handlers-postgres-integration.yml'
|
||||
|
||||
# Single-job-with-per-step-if pattern: always runs to satisfy the
|
||||
# required-check name on branch protection; real work gates on the
|
||||
# paths filter. See ci.yml's Platform (Go) for the same shape.
|
||||
integration:
|
||||
name: Handlers Postgres Integration
|
||||
needs: detect-changes
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:15-alpine
|
||||
env:
|
||||
POSTGRES_PASSWORD: test
|
||||
POSTGRES_DB: molecule
|
||||
ports:
|
||||
- 5432:5432
|
||||
# GHA spins this with --health-cmd built in for postgres images.
|
||||
options: >-
|
||||
--health-cmd pg_isready
|
||||
--health-interval 5s
|
||||
--health-timeout 5s
|
||||
--health-retries 10
|
||||
defaults:
|
||||
run:
|
||||
working-directory: workspace-server
|
||||
steps:
|
||||
- if: needs.detect-changes.outputs.handlers != 'true'
|
||||
working-directory: .
|
||||
run: echo "No handlers/migrations changes — skipping; this job always runs to satisfy the required-check name."
|
||||
|
||||
- if: needs.detect-changes.outputs.handlers == 'true'
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- if: needs.detect-changes.outputs.handlers == 'true'
|
||||
uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
|
||||
- if: needs.detect-changes.outputs.handlers == 'true'
|
||||
name: Apply migrations to Postgres service
|
||||
env:
|
||||
PGPASSWORD: test
|
||||
run: |
|
||||
# Wait for postgres to actually accept connections (the
|
||||
# GHA --health-cmd is best-effort but psql can still race).
|
||||
for i in {1..15}; do
|
||||
if pg_isready -h localhost -p 5432 -U postgres -q; then break; fi
|
||||
echo "waiting for postgres..."; sleep 2
|
||||
done
|
||||
|
||||
# Apply every .up.sql in lexicographic order with
|
||||
# ON_ERROR_STOP=0 — failing migrations are SKIPPED rather than
|
||||
# blocking the suite. This handles the current schema state
|
||||
# where a few historical migrations (e.g. 017_memories_fts_*)
|
||||
# depend on tables that were later renamed/dropped and so
|
||||
# cannot replay from scratch. The migrations that DO succeed
|
||||
# land their tables, which is sufficient for the integration
|
||||
# tests in handlers/.
|
||||
#
|
||||
# Why not maintain a curated allowlist: every new migration
|
||||
# touching a handlers/-tested table would have to update this
|
||||
# workflow. With apply-all-or-skip, a future migration that
|
||||
# adds a column to delegations runs automatically (its base
|
||||
# table 049_delegations.up.sql already succeeded above it in
|
||||
# the order). Operators only need to revisit this if the
|
||||
# migration chain becomes legitimately replayable end-to-end.
|
||||
#
|
||||
# Per-migration result is logged so a failed migration that
|
||||
# SHOULD have been replayable surfaces in the CI log instead
|
||||
# of silently failing.
|
||||
set +e
|
||||
for migration in migrations/*.up.sql; do
|
||||
if psql -h localhost -U postgres -d molecule -v ON_ERROR_STOP=1 \
|
||||
-f "$migration" >/dev/null 2>&1; then
|
||||
echo "✓ $(basename "$migration")"
|
||||
else
|
||||
echo "⊘ $(basename "$migration") (skipped — see comment in workflow)"
|
||||
fi
|
||||
done
|
||||
set -e
|
||||
|
||||
# Sanity: the delegations table MUST exist for the integration
|
||||
# tests to be meaningful. Hard-fail if 049 didn't land — that
|
||||
# would be a real regression we want loud.
|
||||
if ! psql -h localhost -U postgres -d molecule -tA \
|
||||
-c "SELECT 1 FROM information_schema.tables WHERE table_name = 'delegations'" \
|
||||
| grep -q 1; then
|
||||
echo "::error::delegations table missing after migration replay — handler integration tests would be meaningless"
|
||||
exit 1
|
||||
fi
|
||||
echo "✓ delegations table present"
|
||||
|
||||
- if: needs.detect-changes.outputs.handlers == 'true'
|
||||
name: Run integration tests
|
||||
env:
|
||||
INTEGRATION_DB_URL: postgres://postgres:test@localhost:5432/molecule?sslmode=disable
|
||||
run: |
|
||||
go test -tags=integration -timeout 5m -v ./internal/handlers/ -run "^TestIntegration_"
|
||||
|
||||
- if: needs.detect-changes.outputs.handlers == 'true' && failure()
|
||||
name: Diagnostic dump on failure
|
||||
env:
|
||||
PGPASSWORD: test
|
||||
run: |
|
||||
echo "::group::delegations table state"
|
||||
psql -h localhost -U postgres -d molecule -c "SELECT * FROM delegations LIMIT 50;" || true
|
||||
echo "::endgroup::"
|
||||
@@ -0,0 +1,94 @@
|
||||
name: Lint curl status-code capture
|
||||
|
||||
# Pins the workflow-bash anti-pattern that produced "HTTP 000000" on the
|
||||
# 2026-05-04 redeploy-tenants-on-main run for sha 2b862f6:
|
||||
#
|
||||
# HTTP_CODE=$(curl ... -w '%{http_code}' ... || echo "000")
|
||||
#
|
||||
# When curl exits non-zero (connection reset → 56, --fail-with-body 4xx/5xx
|
||||
# → 22), the `-w '%{http_code}'` already wrote a status to stdout — usually
|
||||
# "000" for connection failures or the actual code for HTTP errors. The
|
||||
# `|| echo "000"` then fires AND appends ANOTHER "000" to the captured
|
||||
# stdout, producing values like "000000" or "409000" that fail string
|
||||
# comparisons against "200" while looking superficially right.
|
||||
#
|
||||
# Same class of bug the synth-E2E §7c gate hit twice (PRs #2779/#2783 +
|
||||
# #2797). Memory: feedback_curl_status_capture_pollution.md.
|
||||
#
|
||||
# Fix shape (route -w into a tempfile so curl's exit code can't pollute):
|
||||
#
|
||||
# set +e
|
||||
# curl ... -w '%{http_code}' >code.txt 2>/dev/null
|
||||
# set -e
|
||||
# HTTP_CODE=$(cat code.txt 2>/dev/null)
|
||||
# [ -z "$HTTP_CODE" ] && HTTP_CODE="000"
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths: ['.github/workflows/**']
|
||||
push:
|
||||
branches: [main, staging]
|
||||
paths: ['.github/workflows/**']
|
||||
merge_group:
|
||||
types: [checks_requested]
|
||||
|
||||
jobs:
|
||||
scan:
|
||||
name: Scan workflows for curl status-capture pollution
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- name: Find curl ... -w '%{http_code}' ... || echo "000" subshells
|
||||
run: |
|
||||
set -uo pipefail
|
||||
# Multi-line aware: look for `$(curl ... -w '%{http_code}' ... || echo "000")`
|
||||
# subshell where the entire command-substitution wraps a curl that
|
||||
# ends with `|| echo "000"`. Must distinguish from the SAFE shape
|
||||
# `$(cat tempfile 2>/dev/null || echo "000")` — `cat` with a missing
|
||||
# tempfile produces empty stdout, no pollution.
|
||||
python3 <<'PY'
|
||||
import os, re, sys, glob
|
||||
|
||||
BAD_FILES = []
|
||||
|
||||
# Match the buggy substitution across newlines: $(curl ... -w '%{http_code}' ... || echo "000")
|
||||
# The `\\n` is the bash line-continuation that lets curl flags span lines.
|
||||
# We collapse continuation lines first, then look for the single-line bad pattern.
|
||||
PATTERN = re.compile(
|
||||
r'\$\(\s*curl\b[^)]*-w\s*[\'"]%\{http_code\}[\'"][^)]*\|\|\s*echo\s+"000"\s*\)',
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
# Self-skip: this lint workflow contains the literal anti-pattern in
|
||||
# its own docstring — that's intentional, not a bug.
|
||||
SELF = ".github/workflows/lint-curl-status-capture.yml"
|
||||
|
||||
for f in sorted(glob.glob(".github/workflows/*.yml")):
|
||||
if f == SELF:
|
||||
continue
|
||||
with open(f) as fh:
|
||||
content = fh.read()
|
||||
# Collapse bash line-continuations (\\\n + leading whitespace)
|
||||
# into a single logical line so the regex can see the full
|
||||
# curl invocation as one chunk.
|
||||
flat = re.sub(r'\\\s*\n\s*', ' ', content)
|
||||
for m in PATTERN.finditer(flat):
|
||||
BAD_FILES.append((f, m.group(0)[:120]))
|
||||
|
||||
if not BAD_FILES:
|
||||
print("✓ No curl-status-capture pollution patterns detected")
|
||||
sys.exit(0)
|
||||
|
||||
print(f"::error::Found {len(BAD_FILES)} curl-status-capture pollution site(s):")
|
||||
for f, snippet in BAD_FILES:
|
||||
print(f"::error file={f}::Curl status-capture pollution: '|| echo \"000\"' inside a $(curl ... -w '%{{http_code}}' ...) subshell. On non-2xx or connection failure, curl's -w writes a status, then exits non-zero, then the || echo appends another '000' — producing 'HTTP 000000' or '409000' that fails comparisons silently. Fix: route -w into a tempfile so the exit code can't pollute stdout. See memory feedback_curl_status_capture_pollution.md.")
|
||||
print(f" matched: {snippet}…")
|
||||
print()
|
||||
print("Fix template:")
|
||||
print(' set +e')
|
||||
print(' curl ... -w \'%{http_code}\' >code.txt 2>/dev/null')
|
||||
print(' set -e')
|
||||
print(' HTTP_CODE=$(cat code.txt 2>/dev/null)')
|
||||
print(' [ -z "$HTTP_CODE" ] && HTTP_CODE="000"')
|
||||
sys.exit(1)
|
||||
PY
|
||||
@@ -184,12 +184,29 @@ jobs:
|
||||
echo " body: $BODY"
|
||||
|
||||
HTTP_RESPONSE=$(mktemp)
|
||||
HTTP_CODE=$(curl -sS -o "$HTTP_RESPONSE" -w '%{http_code}' \
|
||||
HTTP_CODE_FILE=$(mktemp)
|
||||
# Route -w into its own tempfile so curl's exit code (e.g. 56
|
||||
# on connection-reset, 22 on --fail-with-body 4xx/5xx) can't
|
||||
# pollute the captured stdout. The previous inline-substitution
|
||||
# shape produced "000000" on connection reset (curl wrote
|
||||
# "000" via -w, then the inline echo-fallback appended another
|
||||
# "000") — caught on the 2026-05-04 redeploy of sha 2b862f6.
|
||||
# set +e/-e keeps the non-zero curl exit from tripping the
|
||||
# outer pipeline. See lint-curl-status-capture.yml for the
|
||||
# CI gate that pins this fix shape.
|
||||
set +e
|
||||
curl -sS -o "$HTTP_RESPONSE" -w '%{http_code}' \
|
||||
-m 1200 \
|
||||
-H "Authorization: Bearer $CP_ADMIN_API_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-X POST "$CP_URL/cp/admin/tenants/redeploy-fleet" \
|
||||
-d "$BODY" || echo "000")
|
||||
-d "$BODY" >"$HTTP_CODE_FILE"
|
||||
set -e
|
||||
# Stderr from curl (e.g. dial errors with -sS) goes to the runner
|
||||
# log so operators can see WHY a connection failed. Stdout is
|
||||
# captured to $HTTP_CODE_FILE because that's where -w writes.
|
||||
HTTP_CODE=$(cat "$HTTP_CODE_FILE" 2>/dev/null || echo "000")
|
||||
[ -z "$HTTP_CODE" ] && HTTP_CODE="000"
|
||||
|
||||
echo "HTTP $HTTP_CODE"
|
||||
cat "$HTTP_RESPONSE" | jq . || cat "$HTTP_RESPONSE"
|
||||
|
||||
@@ -146,12 +146,26 @@ jobs:
|
||||
echo " body: $BODY"
|
||||
|
||||
HTTP_RESPONSE=$(mktemp)
|
||||
HTTP_CODE=$(curl -sS -o "$HTTP_RESPONSE" -w '%{http_code}' \
|
||||
HTTP_CODE_FILE=$(mktemp)
|
||||
# Route -w into its own tempfile so curl's exit code (e.g. 56
|
||||
# on connection-reset) can't pollute the captured stdout. The
|
||||
# previous inline-substitution shape produced "000000" on
|
||||
# connection reset — caught on main variant 2026-05-04
|
||||
# redeploying sha 2b862f6. Same fix shape as the synth-E2E
|
||||
# §9c gate (PR #2797). See lint-curl-status-capture.yml for
|
||||
# the CI gate that pins this fix shape.
|
||||
set +e
|
||||
curl -sS -o "$HTTP_RESPONSE" -w '%{http_code}' \
|
||||
-m 1200 \
|
||||
-H "Authorization: Bearer $CP_STAGING_ADMIN_API_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-X POST "$CP_URL/cp/admin/tenants/redeploy-fleet" \
|
||||
-d "$BODY" || echo "000")
|
||||
-d "$BODY" >"$HTTP_CODE_FILE"
|
||||
set -e
|
||||
# Stderr from curl (-sS shows dial errors etc.) goes to the
|
||||
# runner log so operators can see WHY a connection failed.
|
||||
HTTP_CODE=$(cat "$HTTP_CODE_FILE" 2>/dev/null || echo "000")
|
||||
[ -z "$HTTP_CODE" ] && HTTP_CODE="000"
|
||||
|
||||
echo "HTTP $HTTP_CODE"
|
||||
cat "$HTTP_RESPONSE" | jq . || cat "$HTTP_RESPONSE"
|
||||
|
||||
@@ -43,7 +43,20 @@ on:
|
||||
types: [checks_requested]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
# Include event_name so a PR sync (event=pull_request) and the
|
||||
# subsequent staging push (event=push) on the SAME merge SHA don't
|
||||
# collide in one group. Without event_name, both runs hashed to
|
||||
# the same key and cancel-in-progress=true cancelled whichever
|
||||
# arrived second — usually the push run, which staging branch-
|
||||
# protection then sees as a CANCELLED required check and refuses
|
||||
# to mark merged. Caught 2026-05-05 across PR #2869's runs (run
|
||||
# ids 25371863455 / 25371811486 / 25371078157 / 25370403142 — every
|
||||
# staging push run cancelled, every matching PR run green).
|
||||
#
|
||||
# Per memory `feedback_concurrency_group_per_sha.md` — same drift
|
||||
# class that broke auto-promote-staging on 2026-04-28. Pin invariant:
|
||||
# event_name + sha is the minimum unique key for these workflows.
|
||||
group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
|
||||
@@ -159,12 +159,18 @@ jobs:
|
||||
# The DELETE handler requires {"confirm": "<slug>"} matching
|
||||
# the URL slug — fat-finger guard. Idempotent: re-issuing
|
||||
# picks up via org_purges.last_step.
|
||||
http_code=$(curl -sS -o /tmp/del_resp -w "%{http_code}" \
|
||||
# Tempfile-routed -w + set +e/-e prevents curl-exit-code
|
||||
# pollution of the captured status (lint-curl-status-capture.yml).
|
||||
set +e
|
||||
curl -sS -o /tmp/del_resp -w "%{http_code}" \
|
||||
--max-time 60 \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" || echo "000")
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/del_code
|
||||
set -e
|
||||
# Stderr from curl (-sS shows dial errors etc.) goes to runner log.
|
||||
http_code=$(cat /tmp/del_code 2>/dev/null || echo "000")
|
||||
if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
|
||||
deleted=$((deleted+1))
|
||||
echo " deleted: $slug"
|
||||
|
||||
+50
-2
@@ -1,7 +1,7 @@
|
||||
# Coverage Floor
|
||||
|
||||
CI enforces three coverage gates on `workspace-server` (Go). All defined in
|
||||
`.github/workflows/ci.yml` → `platform-build` job.
|
||||
CI enforces coverage gates on two surfaces — `workspace-server` (Go) and
|
||||
`workspace/` (Python). All defined in `.github/workflows/ci.yml`.
|
||||
|
||||
## Current floors (2026-04-23)
|
||||
|
||||
@@ -76,3 +76,51 @@ This gate makes "no untested critical paths merged" a mechanical property of
|
||||
the CI, not a behavioural property of QA agents or individual reviewers —
|
||||
which is the only way to make it survive fleet outages, agent rotations, or
|
||||
QA process changes.
|
||||
|
||||
## Python (workspace/) — added 2026-05-04 from #2790
|
||||
|
||||
The Python side has its own gates in the `python-lint` job:
|
||||
|
||||
| Gate | Threshold | Where |
|
||||
|---|---|---|
|
||||
| **Total floor** | `86%` | `workspace/pytest.ini` `--cov-fail-under=86` (issue #1817) |
|
||||
| **Critical-path per-file floor** | `75%` | Inline shell step after the pytest run |
|
||||
|
||||
### Critical-path Python files
|
||||
|
||||
These handle multi-tenant routing, auth tokens, and inbox dispatch. A
|
||||
coverage drop here is the same risk shape as a Go-side `tokens*` /
|
||||
`secrets*` file regressing below 10%.
|
||||
|
||||
- `workspace/a2a_mcp_server.py` — MCP dispatcher (PR #2766 / #2771)
|
||||
- `workspace/mcp_cli.py` — molecule-mcp standalone CLI entry
|
||||
- `workspace/a2a_tools.py` — workspace-scoped tool implementations
|
||||
- `workspace/inbox.py` — multi-workspace inbox + per-workspace cursors
|
||||
- `workspace/platform_auth.py` — per-workspace token resolver
|
||||
|
||||
### Why 75% (vs 86% total)
|
||||
|
||||
The total floor averages ~6000 lines across `workspace/`. A single MCP
|
||||
file could drop to ~50% with no CI complaint as long as other modules
|
||||
compensate. The per-file floor closes that distribution gap. 75% sits
|
||||
below current actuals (80–96% as of 2026-05-04) — strictly additive,
|
||||
no existing PR fails.
|
||||
|
||||
### Python ratchet plan
|
||||
|
||||
| Date | Total | Per-file critical | Notes |
|
||||
|---|---|---|---|
|
||||
| 2026-05-04 | 86% | 75% | Initial gate (this file). |
|
||||
| 2026-06-04 | 86% | 80% | First ratchet — at-floor files must catch up. |
|
||||
| 2026-07-04 | 88% | 85% | |
|
||||
| 2026-08-04 | 90% | 90% | Target steady-state. |
|
||||
|
||||
### Why this Python gate exists
|
||||
|
||||
Issue #2790, after the PR #2766 → PR #2771 cycle. PR #2766 added
|
||||
multi-workspace routing through `a2a_tools.py` + `a2a_mcp_server.py`,
|
||||
shipped to main with green CI, but the dispatcher silently dropped a
|
||||
load-bearing kwarg for 4 of 9 tools — caught only by post-merge code
|
||||
review. The structural drift gate (`test_dispatcher_schema_drift.py`,
|
||||
PR #2791) catches the schema↔dispatcher mismatch class; this floor
|
||||
catches the broader "MCP-critical file regressed" class.
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
// quick bounce between signup and either Checkout or the tenant UI.
|
||||
|
||||
import { useEffect, useState } from "react";
|
||||
import { fetchSession, redirectToLogin, type Session } from "@/lib/auth";
|
||||
import { fetchSession, redirectToLogin, signOut, type Session } from "@/lib/auth";
|
||||
import { PLATFORM_URL } from "@/lib/api";
|
||||
import { formatCredits, pillTone, bannerKind } from "@/lib/credits";
|
||||
import { TermsGate } from "@/components/TermsGate";
|
||||
@@ -129,7 +129,7 @@ export default function OrgsPage() {
|
||||
return <EmptyState banner={justCheckedOut ? <CheckoutBanner /> : null} />;
|
||||
}
|
||||
return (
|
||||
<Shell>
|
||||
<Shell session={session}>
|
||||
{justCheckedOut && <CheckoutBanner />}
|
||||
<ul className="space-y-3">
|
||||
{orgs.map((o) => (
|
||||
@@ -160,11 +160,21 @@ function CheckoutBanner() {
|
||||
);
|
||||
}
|
||||
|
||||
function Shell({ children }: { children: React.ReactNode }) {
|
||||
function Shell({
|
||||
children,
|
||||
session,
|
||||
}: {
|
||||
children: React.ReactNode;
|
||||
// Optional: when present, the header renders the signed-in email +
|
||||
// a Sign-out button. The empty-state Shell call doesn't have a
|
||||
// session in scope, so accept null and skip the header chrome there.
|
||||
session?: Session | null;
|
||||
}) {
|
||||
return (
|
||||
<main className="min-h-screen bg-surface text-ink">
|
||||
<TermsGate>
|
||||
<div className="mx-auto max-w-2xl px-6 pt-20 pb-12">
|
||||
{session ? <AccountBar session={session} /> : null}
|
||||
<h1 className="text-3xl font-bold text-ink">Your organizations</h1>
|
||||
<p className="mt-2 text-ink-mid">
|
||||
Each org is an isolated Molecule workspace.
|
||||
@@ -177,6 +187,40 @@ function Shell({ children }: { children: React.ReactNode }) {
|
||||
);
|
||||
}
|
||||
|
||||
// AccountBar renders the signed-in email + a Sign-out button at the
|
||||
// top of the page. Without this the user has no way to log out — the
|
||||
// /cp/auth/signout endpoint exists on the control plane but no UI ever
|
||||
// called it. Reported externally on 2026-05-05; this is the fix.
|
||||
//
|
||||
// Click → calls signOut() which POSTs /cp/auth/signout (clears the
|
||||
// WorkOS session cookie + revokes at the provider) then bounces to
|
||||
// /cp/auth/login. The signOut helper is best-effort — even on a 5xx
|
||||
// or network failure the redirect fires so the user never gets stuck
|
||||
// on an authed-looking page after they clicked Sign out.
|
||||
function AccountBar({ session }: { session: Session }) {
|
||||
const [signingOut, setSigningOut] = useState(false);
|
||||
return (
|
||||
<div className="mb-6 flex items-center justify-between text-sm text-ink-mid">
|
||||
<span title="Signed-in user">{session.email}</span>
|
||||
<button
|
||||
type="button"
|
||||
disabled={signingOut}
|
||||
onClick={async () => {
|
||||
setSigningOut(true);
|
||||
await signOut();
|
||||
// Redirect happens inside signOut; this line is for tests +
|
||||
// edge cases (jsdom, blocked navigation) where it doesn't.
|
||||
setSigningOut(false);
|
||||
}}
|
||||
className="rounded border border-line bg-surface-card px-3 py-1 text-xs text-ink hover:bg-surface-card disabled:opacity-50"
|
||||
aria-label="Sign out"
|
||||
>
|
||||
{signingOut ? "Signing out…" : "Sign out"}
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// DataResidencyNotice surfaces where workspace data lives so EU-based
|
||||
// signups can make an informed choice (GDPR Art. 13 disclosure
|
||||
// requirement). Plain text, no icon — the goal is clarity, not
|
||||
|
||||
@@ -215,16 +215,6 @@ export function ContextMenu() {
|
||||
closeContextMenu();
|
||||
}, [contextMenu, selectNode, setPanelTab, closeContextMenu]);
|
||||
|
||||
const handleExpand = useCallback(async () => {
|
||||
if (!contextMenu) return;
|
||||
try {
|
||||
await api.post(`/workspaces/${contextMenu.nodeId}/expand`, {});
|
||||
} catch (e) {
|
||||
showToast("Expand failed", "error");
|
||||
}
|
||||
closeContextMenu();
|
||||
}, [contextMenu, closeContextMenu]);
|
||||
|
||||
const setCollapsed = useCanvasStore((s) => s.setCollapsed);
|
||||
const handleCollapse = useCallback(async () => {
|
||||
if (!contextMenu) return;
|
||||
@@ -295,7 +285,7 @@ export function ContextMenu() {
|
||||
},
|
||||
{ label: "Zoom to Team", icon: "⊕", action: handleZoomToTeam },
|
||||
]
|
||||
: [{ label: "Expand to Team", icon: "▷", action: handleExpand }]),
|
||||
: []),
|
||||
{ label: "", icon: "", action: () => {}, divider: true },
|
||||
...(isPaused
|
||||
? [{ label: "Resume", icon: "▶", action: handleResume }]
|
||||
|
||||
@@ -48,16 +48,21 @@ export function EmptyState() {
|
||||
});
|
||||
|
||||
// "Create blank" bypasses templates entirely — no preflight, no
|
||||
// modal, just POST /workspaces with a default name and tier.
|
||||
// Deliberately NOT routed through useTemplateDeploy because it
|
||||
// has no `template.id` to deploy against.
|
||||
// modal, just POST /workspaces with a default name. Deliberately
|
||||
// NOT routed through useTemplateDeploy because it has no
|
||||
// `template.id` to deploy against.
|
||||
//
|
||||
// tier is omitted so the backend picks a SaaS-aware default
|
||||
// (T4 on SaaS, T3 on self-hosted — see WorkspaceHandler.DefaultTier).
|
||||
// The previous hardcoded `tier: 2` shipped every fresh-tenant agent
|
||||
// at Standard regardless of host, which surprised SaaS users whose
|
||||
// CreateWorkspaceDialog already defaults to T4.
|
||||
const createBlank = async () => {
|
||||
setBlankCreating(true);
|
||||
setBlankError(null);
|
||||
try {
|
||||
const ws = await api.post<{ id: string }>("/workspaces", {
|
||||
name: "My First Agent",
|
||||
tier: 2,
|
||||
canvas: firstDeployCoords(),
|
||||
});
|
||||
handleDeployed(ws.id);
|
||||
|
||||
@@ -20,155 +20,6 @@ import * as Dialog from "@radix-ui/react-dialog";
|
||||
|
||||
type Tab = "python" | "curl" | "claude" | "mcp" | "hermes" | "codex" | "openclaw" | "fields";
|
||||
|
||||
// Per-tab help metadata: docs link, where-to-install link, common errors.
|
||||
// All URLs verified against repo content (docs/guides/* file paths map to
|
||||
// docs.molecule.ai/docs/guides/*; canonical hostname confirmed by existing
|
||||
// blog post canonical metadata) or against the snippet text the operator
|
||||
// just copied. Never linking to a URL that wasn't already in product —
|
||||
// dead links here defeat the purpose of "more comprehensive instructions."
|
||||
const TAB_HELP: Record<
|
||||
Tab,
|
||||
{
|
||||
docsUrl?: string;
|
||||
docsLabel?: string;
|
||||
downloadUrl?: string;
|
||||
downloadLabel?: string;
|
||||
commonIssues?: { symptom: string; check: string }[];
|
||||
}
|
||||
> = {
|
||||
mcp: {
|
||||
docsUrl: "https://docs.molecule.ai/docs/guides/mcp-server-setup",
|
||||
docsLabel: "MCP server setup guide",
|
||||
downloadUrl: "https://pypi.org/project/molecule-ai-workspace-runtime/",
|
||||
downloadLabel: "molecule-ai-workspace-runtime on PyPI",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "Tools not appearing in your agent",
|
||||
check:
|
||||
"Run `claude mcp list` (or your runtime's equivalent) — the molecule entry should be listed. If missing, re-run the `claude mcp add` line.",
|
||||
},
|
||||
{
|
||||
symptom: "ConnectionRefused / DNS error on first call",
|
||||
check:
|
||||
"PLATFORM_URL must include the scheme (https://) and have no trailing slash. Verify with `curl $PLATFORM_URL/healthz`.",
|
||||
},
|
||||
],
|
||||
},
|
||||
python: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
downloadUrl: "https://pypi.org/project/molecule-ai-workspace-runtime/",
|
||||
downloadLabel: "molecule-ai-workspace-runtime on PyPI",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "401 from /heartbeat",
|
||||
check:
|
||||
"AUTH_TOKEN expired or wrong workspace_id. Tokens are shown only once at create time — re-create the workspace to get a fresh token.",
|
||||
},
|
||||
{
|
||||
symptom: "AGENT_URL not reachable from platform",
|
||||
check:
|
||||
"Public HTTPS URL required for inbound A2A. Use ngrok or Cloudflare Tunnel if your agent is behind NAT.",
|
||||
},
|
||||
],
|
||||
},
|
||||
claude: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
downloadUrl: "https://claude.com/claude-code",
|
||||
downloadLabel: "Claude Code (claude.com)",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "plugin not installed",
|
||||
check:
|
||||
"Run `/plugin marketplace add Molecule-AI/molecule-mcp-claude-channel` then `/plugin install molecule@molecule-mcp-claude-channel` inside Claude Code, then `/reload-plugins`.",
|
||||
},
|
||||
{
|
||||
symptom: "not on the approved channels allowlist",
|
||||
check:
|
||||
"Custom channels need `--dangerously-load-development-channels` on the launch command. Team/Enterprise orgs need admin to set `channelsEnabled` + `allowedChannelPlugins` in claude.ai admin settings.",
|
||||
},
|
||||
{
|
||||
symptom: "Inbound messages not arriving",
|
||||
check:
|
||||
"Check stderr for `molecule channel: connected — watching N workspace(s)`. Verify ~/.claude/channels/molecule/.env has the right PLATFORM_URL + token.",
|
||||
},
|
||||
],
|
||||
},
|
||||
hermes: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
downloadUrl: "https://github.com/NousResearch/hermes-agent",
|
||||
downloadLabel: "hermes-agent (NousResearch)",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "Gateway start failure",
|
||||
check:
|
||||
"Tail ~/.hermes/gateway.log. YAML duplicate-key in config.yaml is the most common cause — `gateway:` block must appear exactly once.",
|
||||
},
|
||||
{
|
||||
symptom: "Plugin not discovered after install",
|
||||
check:
|
||||
"Run `pip show hermes-channel-molecule` to confirm install. Some hermes builds need `hermes plugin reload` before the new platform_plugins entry takes effect.",
|
||||
},
|
||||
],
|
||||
},
|
||||
codex: {
|
||||
docsUrl: "https://docs.molecule.ai/docs/guides/mcp-server-setup",
|
||||
docsLabel: "MCP server setup guide",
|
||||
downloadUrl: "https://github.com/openai/codex",
|
||||
downloadLabel: "openai/codex",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "[mcp_servers.molecule] not loaded",
|
||||
check:
|
||||
"Codex must be ≥ 0.57. Check with `codex --version`; upgrade via `npm install -g @openai/codex@latest`.",
|
||||
},
|
||||
{
|
||||
symptom: "TOML parse error after re-running setup",
|
||||
check:
|
||||
"TOML rejects duplicate `[mcp_servers.molecule]` tables. Open ~/.codex/config.toml and remove the old block before pasting the new one.",
|
||||
},
|
||||
],
|
||||
},
|
||||
openclaw: {
|
||||
docsUrl: "https://docs.molecule.ai/docs/guides/mcp-server-setup",
|
||||
docsLabel: "MCP server setup guide",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "Gateway not starting",
|
||||
check:
|
||||
"Tail ~/.openclaw/gateway.log. The loopback bind requires :18789 to be free — check with `lsof -iTCP:18789`.",
|
||||
},
|
||||
{
|
||||
symptom: "openclaw mcp set rejected",
|
||||
check:
|
||||
"The heredoc generates JSON; verify it parsed by running `jq < ~/.openclaw/mcp/molecule.json`. Re-run `openclaw mcp set` if the file is malformed.",
|
||||
},
|
||||
],
|
||||
},
|
||||
curl: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
commonIssues: [
|
||||
{
|
||||
symptom: "401 / 403 on register",
|
||||
check:
|
||||
"WORKSPACE_AUTH_TOKEN must be the value shown at workspace create. Tokens are shown only once.",
|
||||
},
|
||||
],
|
||||
},
|
||||
fields: {
|
||||
docsUrl:
|
||||
"https://docs.molecule.ai/docs/guides/external-agent-registration",
|
||||
docsLabel: "External agent registration guide",
|
||||
},
|
||||
};
|
||||
|
||||
export interface ExternalConnectionInfo {
|
||||
workspace_id: string;
|
||||
platform_url: string;
|
||||
@@ -452,7 +303,6 @@ export function ExternalConnectModal({ info, onClose }: Props) {
|
||||
<Field label="heartbeat_endpoint" value={info.heartbeat_endpoint} onCopy={() => copy(info.heartbeat_endpoint, "hb")} copied={copiedKey === "hb"} />
|
||||
</div>
|
||||
)}
|
||||
<HelpBlock help={TAB_HELP[tab]} />
|
||||
</div>
|
||||
|
||||
<div className="mt-5 flex justify-end gap-2">
|
||||
@@ -501,70 +351,6 @@ function SnippetBlock({
|
||||
);
|
||||
}
|
||||
|
||||
// HelpBlock — collapsible "Need help?" section under each tab's snippet.
|
||||
// Renders only the keys present in the per-tab help metadata (no empty
|
||||
// sections). Closed by default so the snippet stays the visual focus;
|
||||
// operators with a working setup never see this. Uses native <details>
|
||||
// for keyboard accessibility (Tab + Enter) without extra ARIA wiring.
|
||||
function HelpBlock({
|
||||
help,
|
||||
}: {
|
||||
help: (typeof TAB_HELP)[Tab] | undefined;
|
||||
}) {
|
||||
if (!help) return null;
|
||||
const { docsUrl, docsLabel, downloadUrl, downloadLabel, commonIssues } = help;
|
||||
if (!docsUrl && !downloadUrl && !commonIssues?.length) return null;
|
||||
|
||||
return (
|
||||
<details className="mt-3 border border-line rounded-lg bg-surface text-xs">
|
||||
<summary className="cursor-pointer select-none px-3 py-2 text-ink-mid hover:text-ink">
|
||||
Need help? — install link, docs, common errors
|
||||
</summary>
|
||||
<div className="px-3 pb-3 pt-1 space-y-2">
|
||||
{downloadUrl && (
|
||||
<div>
|
||||
<span className="text-ink-soft">Where to install: </span>
|
||||
<a
|
||||
href={downloadUrl}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-accent underline hover:text-accent-strong"
|
||||
>
|
||||
{downloadLabel || downloadUrl}
|
||||
</a>
|
||||
</div>
|
||||
)}
|
||||
{docsUrl && (
|
||||
<div>
|
||||
<span className="text-ink-soft">Documentation: </span>
|
||||
<a
|
||||
href={docsUrl}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-accent underline hover:text-accent-strong"
|
||||
>
|
||||
{docsLabel || docsUrl}
|
||||
</a>
|
||||
</div>
|
||||
)}
|
||||
{commonIssues && commonIssues.length > 0 && (
|
||||
<div>
|
||||
<div className="text-ink-soft mb-1">Common errors:</div>
|
||||
<ul className="space-y-1.5 pl-3">
|
||||
{commonIssues.map((issue, i) => (
|
||||
<li key={i}>
|
||||
<code className="text-warm font-mono">{issue.symptom}</code>
|
||||
<span className="text-ink-mid"> — {issue.check}</span>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</details>
|
||||
);
|
||||
}
|
||||
|
||||
function Field({
|
||||
label,
|
||||
value,
|
||||
|
||||
@@ -1,29 +1,81 @@
|
||||
'use client';
|
||||
|
||||
import { useState, useEffect, useCallback } from "react";
|
||||
import { api } from "@/lib/api";
|
||||
import { ConfirmDialog } from "@/components/ConfirmDialog";
|
||||
/**
|
||||
* MemoryInspectorPanel — Memory v2 redesign.
|
||||
*
|
||||
* Reads the canvas Memory tab from the v2 plugin via the
|
||||
* workspace-server proxy at /v2/{namespaces,memories}, replacing the
|
||||
* v1 LOCAL/TEAM/GLOBAL trio that mapped to the deprecated
|
||||
* shared_context model.
|
||||
*
|
||||
* Surface differences from v1:
|
||||
* - Namespace dropdown driven by GET /v2/namespaces (workspace /
|
||||
* team / org / custom — labels rendered server-side).
|
||||
* - Per-row badges for kind (fact|summary|checkpoint), source
|
||||
* (agent|runtime|user), pin (📌), TTL countdown, and propagation
|
||||
* source-workspace if the memory came from a peer.
|
||||
* - No Edit affordance — v2's plugin contract has no PATCH; the
|
||||
* model is forget + recommit. Delete (Forget) stays.
|
||||
*
|
||||
* Shipping note: when the plugin isn't wired (MEMORY_PLUGIN_URL
|
||||
* unset), every endpoint returns 503 with a clear hint. The panel
|
||||
* surfaces that as a banner so operators know to set the env var,
|
||||
* rather than rendering a perpetual empty state that looks like
|
||||
* "no memories yet".
|
||||
*/
|
||||
|
||||
import { useCallback, useEffect, useMemo, useState } from 'react';
|
||||
import { api } from '@/lib/api';
|
||||
import { ConfirmDialog } from '@/components/ConfirmDialog';
|
||||
|
||||
// ── Types ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
/** Memory entry returned by GET /workspaces/:id/memories */
|
||||
export interface MemoryEntry {
|
||||
id: string;
|
||||
workspace_id: string;
|
||||
content: string;
|
||||
scope: "LOCAL" | "TEAM" | "GLOBAL";
|
||||
namespace: string;
|
||||
created_at: string;
|
||||
/**
|
||||
* Semantic similarity score (0–1). Only present when the API is queried
|
||||
* with ?q=<query> and the pgvector backend has been deployed.
|
||||
* Absent on plain list fetches — renders gracefully without a badge.
|
||||
*/
|
||||
similarity_score?: number;
|
||||
export type NamespaceKind = 'workspace' | 'team' | 'org' | 'custom';
|
||||
|
||||
export interface NamespaceView {
|
||||
name: string;
|
||||
kind: NamespaceKind;
|
||||
label: string;
|
||||
}
|
||||
|
||||
type Scope = "LOCAL" | "TEAM" | "GLOBAL";
|
||||
const SCOPES: Scope[] = ["LOCAL", "TEAM", "GLOBAL"];
|
||||
export interface NamespacesResponse {
|
||||
readable: NamespaceView[];
|
||||
writable: NamespaceView[];
|
||||
}
|
||||
|
||||
export type MemoryKind = 'fact' | 'summary' | 'checkpoint';
|
||||
export type MemorySource = 'agent' | 'runtime' | 'user';
|
||||
|
||||
export interface MemoryV2 {
|
||||
id: string;
|
||||
namespace: string;
|
||||
content: string;
|
||||
kind: MemoryKind;
|
||||
source: MemorySource;
|
||||
pin: boolean;
|
||||
expires_at?: string | null;
|
||||
created_at: string;
|
||||
/** 0..1 plugin similarity score; only present when ?q= is set. */
|
||||
score?: number | null;
|
||||
// Note: an earlier iteration of this type carried a `source_workspace_id`
|
||||
// field rendered as a "from peer" badge. The propagation contract that
|
||||
// would have populated it ("Reserved for future cross-namespace
|
||||
// propagation semantics" in memory-plugin-v1.yaml) is unimplemented —
|
||||
// nothing in the codebase writes that key. Removed in self-review.
|
||||
// Re-add when propagation gains a concrete shape.
|
||||
}
|
||||
|
||||
interface MemoriesResponse {
|
||||
memories: MemoryV2[];
|
||||
}
|
||||
|
||||
// MemoryEntry kept as a back-compat type alias so any other component
|
||||
// still importing it doesn't break the build. New consumers should
|
||||
// prefer MemoryV2 — the v1 shape (LOCAL/TEAM/GLOBAL scope) is gone.
|
||||
//
|
||||
// `unknown` is used over `any` so TS still flags accidental field
|
||||
// access on the legacy shape.
|
||||
export type MemoryEntry = MemoryV2;
|
||||
|
||||
interface Props {
|
||||
workspaceId: string;
|
||||
@@ -31,11 +83,26 @@ interface Props {
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Sanitise a memory id for use in an HTML id attribute.
|
||||
*/
|
||||
function sanitizeId(id: string): string {
|
||||
return id.replace(/[^a-zA-Z0-9]/g, "-");
|
||||
return id.replace(/[^a-zA-Z0-9]/g, '-');
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect a memory-plugin-503 error from the api wrapper's stringified
|
||||
* Error message. Matches on the literal env-var name rather than the
|
||||
* status code, because the api shim renders status codes inside a
|
||||
* larger formatted message and a future status-code reformat would
|
||||
* silently break the detection.
|
||||
*
|
||||
* The substring `MEMORY_PLUGIN_URL` is hard-coded in the handler at
|
||||
* `workspace-server/internal/handlers/memories_v2.go:available()`,
|
||||
* so this is a pinned cross-layer contract — drift is caught by both
|
||||
* the Go test (TestMemoriesV2_PluginUnwired_All503) and the canvas
|
||||
* test (TestMemoryInspectorPanel — plugin unavailable).
|
||||
*/
|
||||
export function isPluginUnavailableError(err: unknown): boolean {
|
||||
const msg = err instanceof Error ? err.message : '';
|
||||
return msg.includes('MEMORY_PLUGIN_URL');
|
||||
}
|
||||
|
||||
function formatRelativeTime(iso: string): string {
|
||||
@@ -46,6 +113,24 @@ function formatRelativeTime(iso: string): string {
|
||||
return new Date(iso).toLocaleDateString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Render a TTL countdown like "12h", "3d", or "expired" (when the
|
||||
* stored expires_at is in the past). Non-fatal if expires_at is null
|
||||
* or invalid — falls through to empty string so the badge doesn't
|
||||
* render.
|
||||
*/
|
||||
export function formatTTL(expiresAt: string | null | undefined): string {
|
||||
if (!expiresAt) return '';
|
||||
const ts = new Date(expiresAt).getTime();
|
||||
if (Number.isNaN(ts)) return '';
|
||||
const diff = ts - Date.now();
|
||||
if (diff <= 0) return 'expired';
|
||||
if (diff < 60_000) return `${Math.floor(diff / 1000)}s`;
|
||||
if (diff < 3_600_000) return `${Math.floor(diff / 60_000)}m`;
|
||||
if (diff < 86_400_000) return `${Math.floor(diff / 3_600_000)}h`;
|
||||
return `${Math.floor(diff / 86_400_000)}d`;
|
||||
}
|
||||
|
||||
// ── Skeleton rows ──────────────────────────────────────────────────────────────
|
||||
|
||||
function MemorySkeletonRows() {
|
||||
@@ -70,56 +155,92 @@ function MemorySkeletonRows() {
|
||||
|
||||
// ── Component ─────────────────────────────────────────────────────────────────
|
||||
|
||||
const ALL_NAMESPACES = '__all__';
|
||||
|
||||
export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
const [activeScope, setActiveScope] = useState<Scope>("LOCAL");
|
||||
const [activeNamespace, setActiveNamespace] = useState("");
|
||||
const [entries, setEntries] = useState<MemoryEntry[]>([]);
|
||||
const [namespaces, setNamespaces] = useState<NamespacesResponse | null>(null);
|
||||
const [activeNamespace, setActiveNamespace] = useState<string>(ALL_NAMESPACES);
|
||||
const [entries, setEntries] = useState<MemoryV2[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
// ── Search state (debounced) ────────────────────────────────────────────────
|
||||
const [searchQuery, setSearchQuery] = useState("");
|
||||
const [debouncedQuery, setDebouncedQuery] = useState("");
|
||||
// Plugin-disabled banner (503 from server). Stored separately so we
|
||||
// can keep showing the namespace dropdown empty rather than
|
||||
// hiding the whole panel.
|
||||
const [pluginUnavailable, setPluginUnavailable] = useState(false);
|
||||
|
||||
// Search state (debounced)
|
||||
const [searchQuery, setSearchQuery] = useState('');
|
||||
const [debouncedQuery, setDebouncedQuery] = useState('');
|
||||
|
||||
useEffect(() => {
|
||||
const timer = setTimeout(
|
||||
() => setDebouncedQuery(searchQuery.trim()),
|
||||
300
|
||||
);
|
||||
const timer = setTimeout(() => setDebouncedQuery(searchQuery.trim()), 300);
|
||||
return () => clearTimeout(timer);
|
||||
}, [searchQuery]);
|
||||
|
||||
// ── Delete state ─────────────────────────────────────────────────────────────
|
||||
// Delete state
|
||||
const [pendingDeleteId, setPendingDeleteId] = useState<string | null>(null);
|
||||
|
||||
// ── Data loading ────────────────────────────────────────────────────────────
|
||||
// ── Namespace loading ──────────────────────────────────────────────────────
|
||||
|
||||
const loadNamespaces = useCallback(async () => {
|
||||
try {
|
||||
const data = await api.get<NamespacesResponse>(
|
||||
`/workspaces/${workspaceId}/v2/namespaces`,
|
||||
);
|
||||
setNamespaces(data);
|
||||
setPluginUnavailable(false);
|
||||
} catch (e) {
|
||||
// Plugin-unavailable (503) indicates MEMORY_PLUGIN_URL isn't set.
|
||||
// Anything else stays as a generic load failure that the
|
||||
// entries-load path will also flag.
|
||||
if (isPluginUnavailableError(e)) {
|
||||
setPluginUnavailable(true);
|
||||
}
|
||||
setNamespaces({ readable: [], writable: [] });
|
||||
}
|
||||
}, [workspaceId]);
|
||||
|
||||
// ── Entries loading ────────────────────────────────────────────────────────
|
||||
|
||||
const loadEntries = useCallback(async () => {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
try {
|
||||
const params = new URLSearchParams();
|
||||
params.set("scope", activeScope);
|
||||
if (debouncedQuery) params.set("q", debouncedQuery);
|
||||
if (activeNamespace) params.set("namespace", activeNamespace);
|
||||
if (activeNamespace !== ALL_NAMESPACES) {
|
||||
params.set('namespace', activeNamespace);
|
||||
}
|
||||
if (debouncedQuery) params.set('q', debouncedQuery);
|
||||
|
||||
const url = `/workspaces/${workspaceId}/memories?${params.toString()}`;
|
||||
const data = await api.get<MemoryEntry[]>(url);
|
||||
const url = `/workspaces/${workspaceId}/v2/memories?${params.toString()}`;
|
||||
const data = await api.get<MemoriesResponse>(url);
|
||||
|
||||
// When a semantic query is active, sort by similarity_score descending.
|
||||
// When a semantic query is active and the plugin returns
|
||||
// scores, sort by score descending so the most-relevant hit
|
||||
// sits at the top. Empty score → push to bottom.
|
||||
const sorted = debouncedQuery
|
||||
? [...data].sort(
|
||||
(a, b) => (b.similarity_score ?? 0) - (a.similarity_score ?? 0)
|
||||
? [...data.memories].sort(
|
||||
(a, b) => (b.score ?? 0) - (a.score ?? 0),
|
||||
)
|
||||
: data;
|
||||
: data.memories;
|
||||
setEntries(sorted);
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : "Failed to load memories");
|
||||
if (isPluginUnavailableError(e)) {
|
||||
setPluginUnavailable(true);
|
||||
setError(null); // surfaced via banner, not row error
|
||||
} else {
|
||||
setError(e instanceof Error ? e.message : 'Failed to load memories');
|
||||
}
|
||||
setEntries([]);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [workspaceId, activeScope, debouncedQuery, activeNamespace]);
|
||||
}, [workspaceId, activeNamespace, debouncedQuery]);
|
||||
|
||||
useEffect(() => {
|
||||
loadNamespaces();
|
||||
}, [loadNamespaces]);
|
||||
|
||||
useEffect(() => {
|
||||
loadEntries();
|
||||
@@ -136,16 +257,35 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
setEntries((prev) => prev.filter((e) => e.id !== id));
|
||||
|
||||
try {
|
||||
await api.del(`/workspaces/${workspaceId}/memories/${encodeURIComponent(id)}`);
|
||||
await api.del(`/workspaces/${workspaceId}/v2/memories/${encodeURIComponent(id)}`);
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : "Delete failed — reloading...");
|
||||
// Reload first (which clears any stale error), THEN set the
|
||||
// delete-failure message — otherwise loadEntries' own
|
||||
// `setError(null)` wipes our error before the user sees it.
|
||||
// Caught by the rollback test in MemoryInspectorPanel.test.tsx.
|
||||
const msg = e instanceof Error ? e.message : 'Delete failed — reloading…';
|
||||
await loadEntries();
|
||||
setError(msg);
|
||||
}
|
||||
}, [pendingDeleteId, workspaceId, loadEntries]);
|
||||
|
||||
// ── Namespace dropdown options ─────────────────────────────────────────────
|
||||
|
||||
const dropdownOptions = useMemo(() => {
|
||||
const opts: Array<{ value: string; label: string; kind?: NamespaceKind }> = [
|
||||
{ value: ALL_NAMESPACES, label: 'All namespaces' },
|
||||
];
|
||||
if (namespaces) {
|
||||
for (const ns of namespaces.readable) {
|
||||
opts.push({ value: ns.name, label: ns.label, kind: ns.kind });
|
||||
}
|
||||
}
|
||||
return opts;
|
||||
}, [namespaces]);
|
||||
|
||||
// ── Render ──────────────────────────────────────────────────────────────────
|
||||
|
||||
if (loading && entries.length === 0 && !error) {
|
||||
if (loading && entries.length === 0 && !error && !pluginUnavailable) {
|
||||
return (
|
||||
<div className="flex items-center justify-center h-32">
|
||||
<span className="text-xs text-ink-soft">Loading memories…</span>
|
||||
@@ -155,32 +295,44 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
|
||||
return (
|
||||
<div className="flex flex-col h-full">
|
||||
{/* Scope tabs */}
|
||||
<div className="px-4 pt-3 pb-2 border-b border-line/40 shrink-0">
|
||||
<div className="flex items-center gap-1">
|
||||
{SCOPES.map((scope) => (
|
||||
<button
|
||||
type="button"
|
||||
key={scope}
|
||||
onClick={() => setActiveScope(scope)}
|
||||
aria-pressed={activeScope === scope}
|
||||
className={[
|
||||
"px-3 py-1 text-[11px] rounded transition-colors",
|
||||
activeScope === scope
|
||||
? "bg-accent-strong text-white"
|
||||
: "bg-surface-card text-ink-mid hover:bg-surface-card hover:text-ink",
|
||||
].join(" ")}
|
||||
>
|
||||
{scope}
|
||||
</button>
|
||||
))}
|
||||
{/* Plugin-unavailable banner */}
|
||||
{pluginUnavailable && (
|
||||
<div
|
||||
role="alert"
|
||||
aria-live="polite"
|
||||
className="mx-4 mt-3 px-3 py-2 bg-amber-950/30 border border-amber-800/40 rounded text-xs text-amber-300 shrink-0"
|
||||
data-testid="plugin-unavailable-banner"
|
||||
>
|
||||
Memory plugin not configured. Set <code>MEMORY_PLUGIN_URL</code> on the
|
||||
workspace-server to enable v2 memory.
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Search bar + namespace filter */}
|
||||
{/* Namespace dropdown */}
|
||||
<div className="px-4 pt-3 pb-2 border-b border-line/40 shrink-0 space-y-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<label htmlFor="namespace-dropdown" className="text-[10px] text-ink-soft shrink-0">
|
||||
Namespace:
|
||||
</label>
|
||||
<select
|
||||
id="namespace-dropdown"
|
||||
value={activeNamespace}
|
||||
onChange={(e) => setActiveNamespace(e.target.value)}
|
||||
aria-label="Filter by namespace"
|
||||
disabled={pluginUnavailable}
|
||||
className="flex-1 bg-surface-sunken border border-line/60 focus:border-accent/60 rounded px-2 py-1 text-[11px] text-ink focus:outline-none transition-colors min-w-0 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
{dropdownOptions.map((opt) => (
|
||||
<option key={opt.value} value={opt.value}>
|
||||
{opt.label}
|
||||
{opt.kind ? ` (${opt.kind})` : ''}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
{/* Search bar */}
|
||||
<div className="relative flex items-center">
|
||||
{/* Magnifying glass icon */}
|
||||
<svg
|
||||
width="12"
|
||||
height="12"
|
||||
@@ -198,14 +350,15 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
onChange={(e) => setSearchQuery(e.target.value)}
|
||||
placeholder="Semantic search…"
|
||||
aria-label="Search memories"
|
||||
className="w-full bg-surface-sunken border border-line/60 focus:border-accent/60 rounded-lg pl-8 pr-7 py-1.5 text-[11px] text-ink placeholder-zinc-600 focus:outline-none transition-colors"
|
||||
disabled={pluginUnavailable}
|
||||
className="w-full bg-surface-sunken border border-line/60 focus:border-accent/60 rounded-lg pl-8 pr-7 py-1.5 text-[11px] text-ink placeholder-zinc-600 focus:outline-none transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
/>
|
||||
{searchQuery && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => {
|
||||
setSearchQuery("");
|
||||
setDebouncedQuery("");
|
||||
setSearchQuery('');
|
||||
setDebouncedQuery('');
|
||||
}}
|
||||
aria-label="Clear search"
|
||||
className="absolute right-2 text-ink-soft hover:text-ink transition-colors text-sm leading-none"
|
||||
@@ -214,37 +367,22 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Namespace filter */}
|
||||
<div className="flex items-center gap-2">
|
||||
<label htmlFor="namespace-filter" className="text-[10px] text-ink-soft shrink-0">
|
||||
Namespace:
|
||||
</label>
|
||||
<input
|
||||
id="namespace-filter"
|
||||
type="text"
|
||||
value={activeNamespace}
|
||||
onChange={(e) => setActiveNamespace(e.target.value)}
|
||||
placeholder="all namespaces"
|
||||
aria-label="Filter by namespace"
|
||||
className="flex-1 bg-surface-sunken border border-line/60 focus:border-accent/60 rounded px-2 py-1 text-[11px] text-ink placeholder-zinc-600 focus:outline-none transition-colors min-w-0"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Toolbar */}
|
||||
<div className="px-4 py-2.5 border-b border-line/40 flex items-center justify-between shrink-0">
|
||||
<span className="text-[11px] text-ink-soft">
|
||||
{debouncedQuery
|
||||
? `${entries.length} result${entries.length !== 1 ? "s" : ""}`
|
||||
? `${entries.length} result${entries.length !== 1 ? 's' : ''}`
|
||||
: entries.length === 1
|
||||
? "1 memory"
|
||||
: `${entries.length} memories`}
|
||||
? '1 memory'
|
||||
: `${entries.length} memories`}
|
||||
</span>
|
||||
<button
|
||||
type="button"
|
||||
onClick={loadEntries}
|
||||
className="px-2 py-1 text-[11px] bg-surface-card hover:bg-surface-card text-ink-mid rounded transition-colors"
|
||||
disabled={pluginUnavailable}
|
||||
className="px-2 py-1 text-[11px] bg-surface-card hover:bg-surface-card text-ink-mid rounded transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
aria-label="Refresh memories"
|
||||
>
|
||||
↻ Refresh
|
||||
@@ -267,40 +405,7 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
{loading ? (
|
||||
<MemorySkeletonRows />
|
||||
) : entries.length === 0 ? (
|
||||
debouncedQuery ? (
|
||||
<div className="flex flex-col items-center justify-center py-16 gap-3 text-center">
|
||||
<span className="text-4xl text-ink-soft" aria-hidden="true">◇</span>
|
||||
<p className="text-sm font-medium text-ink-mid">
|
||||
No memories match your search
|
||||
</p>
|
||||
<p className="text-[11px] text-ink-soft max-w-[200px] leading-relaxed">
|
||||
Try a different query or{" "}
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => {
|
||||
setSearchQuery("");
|
||||
setDebouncedQuery("");
|
||||
}}
|
||||
className="text-accent hover:text-accent underline transition-colors"
|
||||
>
|
||||
clear the search
|
||||
</button>
|
||||
.
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex flex-col items-center justify-center py-16 gap-3 text-center">
|
||||
<span className="text-4xl text-ink-soft" aria-hidden="true">◇</span>
|
||||
<p className="text-sm font-medium text-ink-mid">No {activeScope} memories</p>
|
||||
<p className="text-[11px] text-ink-soft max-w-[200px] leading-relaxed">
|
||||
{activeScope === "LOCAL"
|
||||
? "This workspace has not written any local memories yet."
|
||||
: activeScope === "TEAM"
|
||||
? "No team memories shared with this workspace yet."
|
||||
: "No global memories exist yet."}
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
<EmptyState query={debouncedQuery} pluginUnavailable={pluginUnavailable} />
|
||||
) : (
|
||||
<div className="space-y-1.5">
|
||||
{entries.map((entry) => (
|
||||
@@ -317,9 +422,9 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
{/* Delete confirmation dialog */}
|
||||
<ConfirmDialog
|
||||
open={pendingDeleteId !== null}
|
||||
title="Delete memory"
|
||||
message={`Delete this ${activeScope} memory? This cannot be undone.`}
|
||||
confirmLabel="Delete"
|
||||
title="Forget memory"
|
||||
message="Forget this memory? This cannot be undone."
|
||||
confirmLabel="Forget"
|
||||
confirmVariant="danger"
|
||||
onConfirm={confirmDelete}
|
||||
onCancel={() => setPendingDeleteId(null)}
|
||||
@@ -328,19 +433,86 @@ export function MemoryInspectorPanel({ workspaceId }: Props) {
|
||||
);
|
||||
}
|
||||
|
||||
// ── Empty state ─────────────────────────────────────────────────────────────
|
||||
|
||||
function EmptyState({
|
||||
query,
|
||||
pluginUnavailable,
|
||||
}: {
|
||||
query: string;
|
||||
pluginUnavailable: boolean;
|
||||
}) {
|
||||
if (pluginUnavailable) {
|
||||
// The banner already explains the problem; the empty rows just
|
||||
// mirror it so the operator sees both signals.
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 gap-3 text-center">
|
||||
<span className="text-4xl text-ink-soft" aria-hidden="true">
|
||||
◇
|
||||
</span>
|
||||
<p className="text-sm font-medium text-ink-mid">Memory plugin disabled</p>
|
||||
<p className="text-[11px] text-ink-soft max-w-[220px] leading-relaxed">
|
||||
See banner above for the operator-side fix.
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
if (query) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 gap-3 text-center">
|
||||
<span className="text-4xl text-ink-soft" aria-hidden="true">
|
||||
◇
|
||||
</span>
|
||||
<p className="text-sm font-medium text-ink-mid">No memories match your search</p>
|
||||
<p className="text-[11px] text-ink-soft max-w-[200px] leading-relaxed">
|
||||
Try a different query or clear the search.
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center py-16 gap-3 text-center">
|
||||
<span className="text-4xl text-ink-soft" aria-hidden="true">
|
||||
◇
|
||||
</span>
|
||||
<p className="text-sm font-medium text-ink-mid">No memories yet</p>
|
||||
<p className="text-[11px] text-ink-soft max-w-[220px] leading-relaxed">
|
||||
Agents commit memories via MCP tools (commit_memory, commit_summary). They
|
||||
appear here once written.
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// ── MemoryEntryRow sub-component ──────────────────────────────────────────────
|
||||
|
||||
interface MemoryEntryRowProps {
|
||||
entry: MemoryEntry;
|
||||
entry: MemoryV2;
|
||||
onDelete: () => void;
|
||||
}
|
||||
|
||||
const KIND_BADGE_CLASS: Record<MemoryKind, string> = {
|
||||
fact: 'bg-surface-card text-ink-mid',
|
||||
summary: 'bg-blue-950 text-accent',
|
||||
checkpoint: 'bg-violet-950 text-violet-400',
|
||||
};
|
||||
|
||||
const SOURCE_BADGE_CLASS: Record<MemorySource, string> = {
|
||||
agent: 'bg-surface-card text-ink-mid',
|
||||
runtime: 'bg-amber-950 text-amber-300',
|
||||
user: 'bg-emerald-950 text-emerald-400',
|
||||
};
|
||||
|
||||
function MemoryEntryRow({ entry, onDelete }: MemoryEntryRowProps) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
const bodyId = `mem-body-${sanitizeId(entry.id)}`;
|
||||
const ttl = formatTTL(entry.expires_at);
|
||||
|
||||
return (
|
||||
<div className="rounded-lg border border-line/60 bg-surface-sunken/50 overflow-hidden">
|
||||
<div
|
||||
className="rounded-lg border border-line/60 bg-surface-sunken/50 overflow-hidden"
|
||||
data-testid={`memory-row-${entry.id}`}
|
||||
>
|
||||
{/* Header row */}
|
||||
<button
|
||||
type="button"
|
||||
@@ -349,52 +521,89 @@ function MemoryEntryRow({ entry, onDelete }: MemoryEntryRowProps) {
|
||||
aria-expanded={expanded}
|
||||
aria-controls={bodyId}
|
||||
>
|
||||
{/* Scope badge */}
|
||||
{/* Kind badge */}
|
||||
<span
|
||||
className={[
|
||||
"text-[9px] shrink-0 font-mono px-1 py-0.5 rounded",
|
||||
entry.scope === "LOCAL"
|
||||
? "bg-surface-card text-ink-mid"
|
||||
: entry.scope === "TEAM"
|
||||
? "bg-blue-950 text-accent"
|
||||
: "bg-violet-950 text-violet-400",
|
||||
].join(" ")}
|
||||
title={`Scope: ${entry.scope}`}
|
||||
'text-[9px] shrink-0 font-mono px-1 py-0.5 rounded',
|
||||
KIND_BADGE_CLASS[entry.kind] ?? 'bg-surface-card text-ink-mid',
|
||||
].join(' ')}
|
||||
title={`Kind: ${entry.kind}`}
|
||||
data-testid="kind-badge"
|
||||
>
|
||||
{entry.scope[0]}
|
||||
{entry.kind[0].toUpperCase()}
|
||||
</span>
|
||||
|
||||
{/* Source badge */}
|
||||
<span
|
||||
className={[
|
||||
'text-[9px] shrink-0 font-mono px-1 py-0.5 rounded',
|
||||
SOURCE_BADGE_CLASS[entry.source] ?? 'bg-surface-card text-ink-mid',
|
||||
].join(' ')}
|
||||
title={`Source: ${entry.source}`}
|
||||
data-testid="source-badge"
|
||||
>
|
||||
{entry.source}
|
||||
</span>
|
||||
|
||||
{/* Pin indicator */}
|
||||
{entry.pin && (
|
||||
<span
|
||||
className="text-[9px] shrink-0"
|
||||
title="Pinned"
|
||||
data-testid="pin-badge"
|
||||
aria-label="Pinned"
|
||||
>
|
||||
📌
|
||||
</span>
|
||||
)}
|
||||
|
||||
{/* Namespace tag */}
|
||||
<span className="text-[9px] shrink-0 font-mono text-ink-soft truncate max-w-[80px]" title={entry.namespace}>
|
||||
<span
|
||||
className="text-[9px] shrink-0 font-mono text-ink-soft truncate max-w-[100px]"
|
||||
title={entry.namespace}
|
||||
>
|
||||
{entry.namespace}
|
||||
</span>
|
||||
|
||||
{/* Content preview */}
|
||||
<span className="flex-1 min-w-0 text-[10px] font-mono text-ink-mid truncate text-left">
|
||||
{entry.content.length > 60 ? entry.content.slice(0, 60) + "…" : entry.content}
|
||||
{entry.content.length > 60 ? entry.content.slice(0, 60) + '…' : entry.content}
|
||||
</span>
|
||||
|
||||
{/* Similarity badge */}
|
||||
{entry.similarity_score != null && (
|
||||
{/* Score badge (semantic search only) */}
|
||||
{entry.score != null && (
|
||||
<span
|
||||
className={[
|
||||
"text-[9px] shrink-0 font-mono tabular-nums",
|
||||
entry.similarity_score >= 0.8
|
||||
? "text-accent"
|
||||
: "text-ink-mid",
|
||||
].join(" ")}
|
||||
title={`Similarity: ${(entry.similarity_score * 100).toFixed(1)}%`}
|
||||
data-testid="similarity-badge"
|
||||
'text-[9px] shrink-0 font-mono tabular-nums',
|
||||
entry.score >= 0.8 ? 'text-accent' : 'text-ink-mid',
|
||||
].join(' ')}
|
||||
title={`Similarity: ${(entry.score * 100).toFixed(1)}%`}
|
||||
data-testid="score-badge"
|
||||
>
|
||||
{Math.round(entry.similarity_score * 100)}%
|
||||
{Math.round(entry.score * 100)}%
|
||||
</span>
|
||||
)}
|
||||
|
||||
{/* TTL countdown */}
|
||||
{ttl && (
|
||||
<span
|
||||
className={[
|
||||
'text-[9px] shrink-0 font-mono',
|
||||
ttl === 'expired' ? 'text-bad' : 'text-amber-400',
|
||||
].join(' ')}
|
||||
title={`Expires: ${entry.expires_at}`}
|
||||
data-testid="ttl-badge"
|
||||
>
|
||||
⌛{ttl}
|
||||
</span>
|
||||
)}
|
||||
|
||||
|
||||
<span className="text-[9px] text-ink-soft shrink-0">
|
||||
{formatRelativeTime(entry.created_at)}
|
||||
</span>
|
||||
<span className="text-[9px] text-ink-soft shrink-0" aria-hidden="true">
|
||||
{expanded ? "▼" : "▶"}
|
||||
{expanded ? '▼' : '▶'}
|
||||
</span>
|
||||
</button>
|
||||
|
||||
@@ -412,6 +621,7 @@ function MemoryEntryRow({ entry, onDelete }: MemoryEntryRowProps) {
|
||||
<div className="flex items-center justify-between gap-2">
|
||||
<span className="text-[9px] text-ink-soft">
|
||||
Created: {new Date(entry.created_at).toLocaleString()}
|
||||
{entry.expires_at && ` · Expires: ${new Date(entry.expires_at).toLocaleString()}`}
|
||||
</span>
|
||||
<button
|
||||
type="button"
|
||||
@@ -419,10 +629,10 @@ function MemoryEntryRow({ entry, onDelete }: MemoryEntryRowProps) {
|
||||
e.stopPropagation();
|
||||
onDelete();
|
||||
}}
|
||||
aria-label="Delete memory"
|
||||
aria-label="Forget memory"
|
||||
className="text-[10px] px-2 py-0.5 bg-red-950/40 hover:bg-red-900/50 border border-red-900/30 rounded text-bad transition-colors shrink-0"
|
||||
>
|
||||
Delete
|
||||
Forget
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -283,7 +283,7 @@ export function SidePanel() {
|
||||
{panelTab === "skills" && <SkillsTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
|
||||
{panelTab === "activity" && <ActivityTab key={selectedNodeId} workspaceId={selectedNodeId} />}
|
||||
{panelTab === "chat" && <ChatTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
|
||||
{panelTab === "terminal" && <TerminalTab key={selectedNodeId} workspaceId={selectedNodeId} />}
|
||||
{panelTab === "terminal" && <TerminalTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
|
||||
{panelTab === "config" && <ConfigTab key={selectedNodeId} workspaceId={selectedNodeId} />}
|
||||
{panelTab === "schedule" && <ScheduleTab key={selectedNodeId} workspaceId={selectedNodeId} />}
|
||||
{panelTab === "channels" && <ChannelsTab key={selectedNodeId} workspaceId={selectedNodeId} />}
|
||||
|
||||
@@ -316,7 +316,7 @@ export function Toolbar() {
|
||||
<div className="space-y-2">
|
||||
<HelpRow shortcut="⌘K" text="Search workspaces and jump straight into Details or Chat." />
|
||||
<HelpRow shortcut="Palette" text="Open the template palette to deploy a new workspace." />
|
||||
<HelpRow shortcut="Right-click" text="Use node actions for expand, duplicate, export, restart, or delete." />
|
||||
<HelpRow shortcut="Right-click" text="Use node actions for duplicate, export, restart, or delete." />
|
||||
<HelpRow shortcut="Chat" text="If a task is still running, the chat tab resumes that session automatically." />
|
||||
<HelpRow shortcut="Config" text="Use the Config tab for skills, model, secrets, and runtime settings." />
|
||||
<HelpRow shortcut="Dbl-click / Z" text="Zoom canvas to fit a team node and all its sub-workspaces." />
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import { useCallback, useMemo } from "react";
|
||||
import { Handle, NodeResizer, Position, type NodeProps, type Node } from "@xyflow/react";
|
||||
import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
|
||||
import { getConfigurationError, getConfigurationStatus } from "@/store/canvas-topology";
|
||||
import { showToast } from "@/components/Toaster";
|
||||
import { Tooltip } from "@/components/Tooltip";
|
||||
import { STATUS_CONFIG, TIER_CONFIG } from "@/lib/design-tokens";
|
||||
@@ -35,8 +36,28 @@ function EjectIcon(props: React.SVGProps<SVGSVGElement>) {
|
||||
}
|
||||
|
||||
export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>) {
|
||||
const statusCfg = STATUS_CONFIG[data.status] || STATUS_CONFIG.offline;
|
||||
// Configuration-status overlay (PR #2756 / #467 chain). When the
|
||||
// workspace is reachable but adapter.setup() failed (typically a
|
||||
// missing/rotated LLM credential), the agent_card carries
|
||||
// configuration_status: "not_configured". Surface this as a distinct
|
||||
// tile state so the operator sees a useful error instead of an
|
||||
// ambiguous "online but silent" workspace.
|
||||
//
|
||||
// The override only applies when the underlying status is "online" —
|
||||
// a workspace that's actually offline / failed / provisioning gets
|
||||
// its own treatment. "online + not_configured" is the gap PR #2756
|
||||
// introduced; everything else was already covered.
|
||||
const isMisconfigured =
|
||||
data.status === "online" &&
|
||||
getConfigurationStatus(data.agentCard) === "not_configured";
|
||||
const configurationError = getConfigurationError(data.agentCard);
|
||||
const effectiveStatus = isMisconfigured ? "not_configured" : data.status;
|
||||
const statusCfg = STATUS_CONFIG[effectiveStatus] || STATUS_CONFIG.offline;
|
||||
const tierCfg = TIER_CONFIG[data.tier] || { label: `T${data.tier}`, color: "text-ink-mid bg-surface-card border border-line" };
|
||||
const tooltipExtra = isMisconfigured && configurationError
|
||||
? `Agent not configured: ${configurationError}`
|
||||
: null;
|
||||
void tooltipExtra; // wired in via aria-label below; reserved here for future tooltip surface.
|
||||
// Org-deploy context — four derived flags off one store subscription.
|
||||
// Drives the shimmer while provisioning, the dimmed/non-draggable
|
||||
// treatment on locked descendants, and the Cancel pill on the root.
|
||||
@@ -75,7 +96,12 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
<div
|
||||
role="button"
|
||||
tabIndex={0}
|
||||
aria-label={`${data.name} workspace — ${data.status}`}
|
||||
aria-label={
|
||||
isMisconfigured && configurationError
|
||||
? `${data.name} workspace — agent not configured: ${configurationError}`
|
||||
: `${data.name} workspace — ${data.status}`
|
||||
}
|
||||
title={isMisconfigured && configurationError ? `Agent not configured: ${configurationError}` : undefined}
|
||||
aria-pressed={isSelected}
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
@@ -283,11 +309,12 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
|
||||
{/* Bottom row: status / active tasks */}
|
||||
<div className="flex items-center justify-between mt-0.5">
|
||||
{data.status !== "online" ? (
|
||||
{effectiveStatus !== "online" ? (
|
||||
<div className={`text-[10px] uppercase tracking-widest font-medium ${
|
||||
data.status === "failed" ? "text-bad" :
|
||||
data.status === "degraded" ? "text-warm" :
|
||||
data.status === "provisioning" ? "text-accent" :
|
||||
effectiveStatus === "failed" ? "text-bad" :
|
||||
effectiveStatus === "degraded" ? "text-warm" :
|
||||
effectiveStatus === "not_configured" ? "text-warm" :
|
||||
effectiveStatus === "provisioning" ? "text-accent" :
|
||||
"text-ink-mid"
|
||||
}`}>
|
||||
{statusCfg.label}
|
||||
@@ -313,6 +340,19 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
{data.lastSampleError}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Configuration error preview — same visual as the degraded
|
||||
* error preview but keyed off the agent_card's configuration_status.
|
||||
* Tells the operator which env var is missing so they can fix it
|
||||
* without having to dig into the workspace logs. */}
|
||||
{isMisconfigured && configurationError && (
|
||||
<div
|
||||
className="text-[10px] text-warm truncate mt-1 bg-warm/10 px-1.5 py-0.5 rounded border border-warm/40"
|
||||
title={configurationError}
|
||||
>
|
||||
{configurationError}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<Handle
|
||||
|
||||
@@ -228,4 +228,38 @@ describe("ContextMenu — keyboard accessibility", () => {
|
||||
);
|
||||
expect(closeContextMenu).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// The "Expand to Team" right-click action was removed in Phase 2 of
|
||||
// RFC #2857 — every workspace can already have children via the
|
||||
// regular CreateWorkspace flow with parent_id, so a separate
|
||||
// backend bulk-create handler (which was non-idempotent and leaked
|
||||
// EC2s on every duplicate call) was deleted in PR #2856 and the
|
||||
// canvas affordance is gone with it.
|
||||
it("'Expand to Team' menu item is gone (childless workspace)", () => {
|
||||
// Default mockStore.nodes = [] → no children → workspace is childless.
|
||||
render(<ContextMenu />);
|
||||
const items = screen.getAllByRole("menuitem");
|
||||
const labels = items.map((el) => el.textContent?.trim() ?? "");
|
||||
// Literal absence — vitest's toContain uses Object.is/===, so the
|
||||
// earlier `.not.toContain(expect.stringMatching(...))` shape passed
|
||||
// for ANY string array (asymmetric matchers only work with toEqual /
|
||||
// arrayContaining). Pin the production string verbatim.
|
||||
expect(labels.some((l) => l.includes("Expand to Team"))).toBe(false);
|
||||
// Sanity: childless menu still has the regular actions.
|
||||
expect(labels.some((l) => l.includes("Delete"))).toBe(true);
|
||||
expect(labels.some((l) => l.includes("Restart"))).toBe(true);
|
||||
});
|
||||
|
||||
it("'Collapse Team' is still present when the workspace HAS children", () => {
|
||||
// Mark a child belonging to ws-1 so hasChildren() returns true.
|
||||
mockStore.nodes = [{ id: "child-1", data: { parentId: "ws-1" } }];
|
||||
render(<ContextMenu />);
|
||||
const items = screen.getAllByRole("menuitem");
|
||||
const labels = items.map((el) => el.textContent?.trim() ?? "");
|
||||
expect(labels.some((l) => /Collapse Team|Expand Team/.test(l))).toBe(true);
|
||||
expect(labels.some((l) => l.includes("Arrange Children"))).toBe(true);
|
||||
expect(labels.some((l) => l.includes("Zoom to Team"))).toBe(true);
|
||||
// Cleanup for other tests.
|
||||
mockStore.nodes = [];
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,16 +1,29 @@
|
||||
// @vitest-environment jsdom
|
||||
/**
|
||||
* MemoryInspectorPanel tests — issue #909
|
||||
* MemoryInspectorPanel — v2 redesign tests.
|
||||
*
|
||||
* Covers: loading, empty state, scope tabs, namespace filter,
|
||||
* entry list, expand, delete flow, optimistic updates, Refresh, semantic search.
|
||||
* Coverage targets every behavior the panel surfaces:
|
||||
* - Initial load wires GET /v2/namespaces + GET /v2/memories
|
||||
* - Plugin-unavailable banner (503) renders + disables interactions
|
||||
* - Generic error renders in the error banner
|
||||
* - Namespace dropdown populates from /v2/namespaces.readable; "All
|
||||
* namespaces" is the default
|
||||
* - Selecting a namespace re-fetches with ?namespace=...
|
||||
* - Search input debounces + scopes the request to ?q=
|
||||
* - Search results sort by score descending
|
||||
* - Empty-state copy differs by query / plugin-state / no-data
|
||||
* - Per-row badges render (kind / source / pin / TTL / score /
|
||||
* score) and TTL countdown handles past/future/null
|
||||
* - Delete (Forget) flow: optimistic removal, confirmation dialog,
|
||||
* server failure rolls back via reload
|
||||
* - formatTTL helper covers s/m/h/d/expired/null/invalid branches
|
||||
*/
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { render, screen, fireEvent, waitFor, cleanup, act } from "@testing-library/react";
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { render, screen, fireEvent, waitFor, cleanup } from '@testing-library/react';
|
||||
|
||||
// ── Mocks ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
vi.mock('@/lib/api', () => ({
|
||||
api: {
|
||||
get: vi.fn(),
|
||||
post: vi.fn(),
|
||||
@@ -18,7 +31,7 @@ vi.mock("@/lib/api", () => ({
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("@/components/ConfirmDialog", () => ({
|
||||
vi.mock('@/components/ConfirmDialog', () => ({
|
||||
ConfirmDialog: ({
|
||||
open,
|
||||
title,
|
||||
@@ -33,435 +46,473 @@ vi.mock("@/components/ConfirmDialog", () => ({
|
||||
confirmVariant?: string;
|
||||
onConfirm: () => void;
|
||||
onCancel: () => void;
|
||||
singleButton?: boolean;
|
||||
}) =>
|
||||
open ? (
|
||||
<div data-testid="confirm-dialog">
|
||||
<p data-testid="dialog-title">{title}</p>
|
||||
<p data-testid="dialog-message">{message}</p>
|
||||
<button onClick={onConfirm}>Confirm Delete</button>
|
||||
<button onClick={onCancel}>Cancel Delete</button>
|
||||
<button onClick={onConfirm}>Confirm</button>
|
||||
<button onClick={onCancel}>Cancel</button>
|
||||
</div>
|
||||
) : null,
|
||||
}));
|
||||
|
||||
import { api } from "@/lib/api";
|
||||
import { MemoryInspectorPanel } from "../MemoryInspectorPanel";
|
||||
|
||||
// ── Typed mock helpers ────────────────────────────────────────────────────────
|
||||
import { api } from '@/lib/api';
|
||||
import {
|
||||
MemoryInspectorPanel,
|
||||
formatTTL,
|
||||
isPluginUnavailableError,
|
||||
type MemoryV2,
|
||||
type NamespacesResponse,
|
||||
} from '../MemoryInspectorPanel';
|
||||
|
||||
const mockGet = vi.mocked(api.get);
|
||||
const mockDel = vi.mocked(api.del);
|
||||
|
||||
// ── Sample fixtures ───────────────────────────────────────────────────────────
|
||||
// ── Fixtures ──────────────────────────────────────────────────────────────────
|
||||
|
||||
const NOW = "2026-04-17T12:00:00.000Z";
|
||||
|
||||
const MEMORY_A: import("../MemoryInspectorPanel").MemoryEntry = {
|
||||
id: "mem-a",
|
||||
workspace_id: "ws-1",
|
||||
content: "Remember to review PRs before merging",
|
||||
scope: "LOCAL",
|
||||
namespace: "general",
|
||||
created_at: NOW,
|
||||
const NS_RESPONSE: NamespacesResponse = {
|
||||
readable: [
|
||||
{ name: 'workspace:ws-1', kind: 'workspace', label: 'Workspace (ws-1)' },
|
||||
{ name: 'team:t-1', kind: 'team', label: 'Team (t-1)' },
|
||||
],
|
||||
writable: [{ name: 'workspace:ws-1', kind: 'workspace', label: 'Workspace (ws-1)' }],
|
||||
};
|
||||
|
||||
const MEMORY_B: import("../MemoryInspectorPanel").MemoryEntry = {
|
||||
id: "mem-b",
|
||||
workspace_id: "ws-1",
|
||||
content: "Team knowledge: deploy happens on Fridays",
|
||||
scope: "TEAM",
|
||||
namespace: "procedures",
|
||||
created_at: NOW,
|
||||
const MEM_BASIC: MemoryV2 = {
|
||||
id: 'mem-a',
|
||||
namespace: 'workspace:ws-1',
|
||||
content: 'Remember the standup is at 10am',
|
||||
kind: 'fact',
|
||||
source: 'agent',
|
||||
pin: false,
|
||||
created_at: '2026-04-17T12:00:00.000Z',
|
||||
};
|
||||
|
||||
const TWO_MEMORIES = [MEMORY_A, MEMORY_B];
|
||||
const MEM_PINNED: MemoryV2 = {
|
||||
id: 'mem-pinned',
|
||||
namespace: 'team:t-1',
|
||||
content: 'Team retro every Friday',
|
||||
kind: 'summary',
|
||||
source: 'user',
|
||||
pin: true,
|
||||
expires_at: new Date(Date.now() + 86_400_000).toISOString(),
|
||||
created_at: '2026-04-17T12:00:00.000Z',
|
||||
};
|
||||
|
||||
const MEM_RUNTIME_CHECKPOINT: MemoryV2 = {
|
||||
id: 'mem-checkpoint',
|
||||
namespace: 'team:t-1',
|
||||
content: 'Runtime checkpoint',
|
||||
kind: 'checkpoint',
|
||||
source: 'runtime',
|
||||
pin: false,
|
||||
created_at: '2026-04-17T12:00:00.000Z',
|
||||
};
|
||||
|
||||
const MEM_EXPIRED: MemoryV2 = {
|
||||
id: 'mem-expired',
|
||||
namespace: 'workspace:ws-1',
|
||||
content: 'Stale memory',
|
||||
kind: 'fact',
|
||||
source: 'agent',
|
||||
pin: false,
|
||||
expires_at: new Date(Date.now() - 1000).toISOString(),
|
||||
created_at: '2026-04-17T12:00:00.000Z',
|
||||
};
|
||||
|
||||
// ── Setup / teardown ──────────────────────────────────────────────────────────
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mockGet.mockReset();
|
||||
mockDel.mockReset();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
cleanup();
|
||||
});
|
||||
|
||||
// ── Helper: flush microtasks + React state updates ─────────────────────────────
|
||||
async function flushUpdates(): Promise<void> {
|
||||
await act(async () => {});
|
||||
// Helper: stub a basic two-call flow (namespaces + memories).
|
||||
function stubFetch(memories: MemoryV2[], namespaces: NamespacesResponse = NS_RESPONSE) {
|
||||
mockGet.mockImplementation(((url: string) => {
|
||||
if (url.includes('/v2/namespaces')) {
|
||||
return Promise.resolve(namespaces);
|
||||
}
|
||||
return Promise.resolve({ memories });
|
||||
}) as typeof api.get);
|
||||
}
|
||||
|
||||
// ── Loading & empty state ─────────────────────────────────────────────────────
|
||||
// ── isPluginUnavailableError helper ─────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — loading and empty state", () => {
|
||||
it("shows loading indicator before data arrives", () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockReturnValue(new Promise(() => {}) as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
expect(screen.getByText(/loading memories/i)).toBeTruthy();
|
||||
});
|
||||
|
||||
it("renders empty state when API returns []", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByText("No LOCAL memories")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("fetches from the correct workspace memories endpoint with scope=LOCAL", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-abc-123" />);
|
||||
await flushUpdates();
|
||||
expect(mockGet).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-abc-123/memories?scope=LOCAL"
|
||||
);
|
||||
});
|
||||
|
||||
it("shows error banner when fetch throws", async () => {
|
||||
mockGet.mockRejectedValue(new Error("Network error"));
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByText("Network error")).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Scope tabs ────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — scope tabs", () => {
|
||||
it("renders LOCAL, TEAM, GLOBAL tabs", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByRole("button", { name: "LOCAL" })).toBeTruthy();
|
||||
expect(screen.getByRole("button", { name: "TEAM" })).toBeTruthy();
|
||||
expect(screen.getByRole("button", { name: "GLOBAL" })).toBeTruthy();
|
||||
});
|
||||
|
||||
it("LOCAL is active by default", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByRole("button", { name: "LOCAL" }).getAttribute("aria-pressed")).toBe("true");
|
||||
});
|
||||
|
||||
it("clicking TEAM tab re-fetches with scope=TEAM", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
|
||||
mockGet.mockClear();
|
||||
fireEvent.click(screen.getByRole("button", { name: "TEAM" }));
|
||||
await flushUpdates();
|
||||
expect(mockGet).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-1/memories?scope=TEAM"
|
||||
);
|
||||
});
|
||||
|
||||
it("clicking GLOBAL tab re-fetches with scope=GLOBAL", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
|
||||
mockGet.mockClear();
|
||||
fireEvent.click(screen.getByRole("button", { name: "GLOBAL" }));
|
||||
await flushUpdates();
|
||||
expect(mockGet).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-1/memories?scope=GLOBAL"
|
||||
);
|
||||
});
|
||||
|
||||
it("shows scope-specific empty state when switching tabs", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: "TEAM" }));
|
||||
await flushUpdates();
|
||||
expect(screen.getByText("No TEAM memories")).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Namespace filter ──────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — namespace filter", () => {
|
||||
it("renders namespace filter input", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByLabelText("Filter by namespace")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("includes namespace param in API call when set", async () => {
|
||||
vi.useFakeTimers();
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
|
||||
mockGet.mockClear();
|
||||
fireEvent.change(screen.getByLabelText("Filter by namespace"), {
|
||||
target: { value: "facts" },
|
||||
});
|
||||
// Advance past the 300ms debounce
|
||||
act(() => { vi.advanceTimersByTime(350); });
|
||||
await flushUpdates();
|
||||
|
||||
expect(mockGet).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-1/memories?scope=LOCAL&namespace=facts"
|
||||
);
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ── Entry list ───────────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — entry list", () => {
|
||||
beforeEach(() => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue(TWO_MEMORIES as any);
|
||||
});
|
||||
|
||||
it("renders a row for every memory", async () => {
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByText(/Remember to review PRs before merging/)).toBeTruthy();
|
||||
expect(screen.getByText(/Team knowledge: deploy happens on Fridays/)).toBeTruthy();
|
||||
});
|
||||
|
||||
it("displays memory count in toolbar", async () => {
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByText("2 memories")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("displays scope badge for each entry", async () => {
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByTitle("Scope: LOCAL")).toBeTruthy();
|
||||
expect(screen.getByTitle("Scope: TEAM")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("entries are collapsed by default (pre region not visible)", async () => {
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
// Expanded region (pre tag) should not exist in DOM yet
|
||||
expect(screen.queryByRole("region")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Expand / collapse ─────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — expand/collapse", () => {
|
||||
beforeEach(() => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue(TWO_MEMORIES as any);
|
||||
});
|
||||
|
||||
it("clicking a row header expands it and shows the full content in a pre tag", async () => {
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
|
||||
fireEvent.click(
|
||||
screen.getByText(/Remember to review PRs before merging/).closest("button")!
|
||||
);
|
||||
await flushUpdates();
|
||||
// After expand, a region with the full content <pre> should appear
|
||||
expect(screen.getByRole("region")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("clicking the header again collapses the row (pre region removed)", async () => {
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
|
||||
const headerBtn = screen
|
||||
.getByText(/Remember to review PRs before merging/)
|
||||
.closest("button")!;
|
||||
fireEvent.click(headerBtn); // expand
|
||||
await flushUpdates();
|
||||
expect(screen.getByRole("region")).toBeTruthy();
|
||||
|
||||
fireEvent.click(headerBtn); // collapse
|
||||
await flushUpdates();
|
||||
// After collapse, the region (pre) is removed from the DOM
|
||||
expect(screen.queryByRole("region")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Delete flow ───────────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — delete flow", () => {
|
||||
beforeEach(() => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue(TWO_MEMORIES as any);
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockDel.mockResolvedValue({ status: "deleted" } as any);
|
||||
});
|
||||
|
||||
/** Helper: expand memory-A and click its Delete button */
|
||||
async function openDeleteForMemoryA() {
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
fireEvent.click(
|
||||
screen.getByText(/Remember to review PRs before merging/).closest("button")!
|
||||
);
|
||||
await flushUpdates();
|
||||
fireEvent.click(screen.getByRole("button", { name: "Delete memory" }));
|
||||
await flushUpdates();
|
||||
}
|
||||
|
||||
it("opens ConfirmDialog when Delete is clicked", async () => {
|
||||
await openDeleteForMemoryA();
|
||||
expect(screen.getByTestId("confirm-dialog")).toBeTruthy();
|
||||
expect(screen.getByTestId("dialog-title").textContent).toBe("Delete memory");
|
||||
});
|
||||
|
||||
it("calls api.del with the correct URL-encoded path on confirm", async () => {
|
||||
await openDeleteForMemoryA();
|
||||
fireEvent.click(screen.getByText("Confirm Delete"));
|
||||
await flushUpdates();
|
||||
expect(mockDel).toHaveBeenCalledWith("/workspaces/ws-1/memories/mem-a");
|
||||
});
|
||||
|
||||
it("removes the entry optimistically after confirm", async () => {
|
||||
await openDeleteForMemoryA();
|
||||
fireEvent.click(screen.getByText("Confirm Delete"));
|
||||
await flushUpdates();
|
||||
expect(screen.queryByText(/Remember to review PRs before merging/)).toBeNull();
|
||||
// Sibling entry unaffected
|
||||
expect(screen.getByText(/Team knowledge: deploy happens on Fridays/)).toBeTruthy();
|
||||
});
|
||||
|
||||
it("closes ConfirmDialog without deleting when Cancel is clicked", async () => {
|
||||
await openDeleteForMemoryA();
|
||||
fireEvent.click(screen.getByText("Cancel Delete"));
|
||||
await flushUpdates();
|
||||
expect(screen.queryByTestId("confirm-dialog")).toBeNull();
|
||||
expect(mockDel).not.toHaveBeenCalled();
|
||||
// Sibling memory entry (MEMORY_B) is still in the list
|
||||
expect(screen.getByText(/Team knowledge: deploy happens on Fridays/)).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Refresh ───────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — Refresh button", () => {
|
||||
it("re-fetches entries when Refresh is clicked", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
expect(screen.getByText("No LOCAL memories")).toBeTruthy();
|
||||
|
||||
expect(mockGet).toHaveBeenCalledTimes(1);
|
||||
fireEvent.click(screen.getByRole("button", { name: "Refresh memories" }));
|
||||
await flushUpdates();
|
||||
expect(mockGet).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
// ── role=alert a11y ──────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — error elements have role=alert", () => {
|
||||
it("fetch error banner has role='alert'", async () => {
|
||||
mockGet.mockRejectedValue(new Error("Network error"));
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
const alert = screen.getByRole("alert");
|
||||
expect(alert).toBeTruthy();
|
||||
expect(alert.textContent).toContain("Network error");
|
||||
});
|
||||
});
|
||||
|
||||
// ── Semantic search ──────────────────────────────────────────────────────────
|
||||
|
||||
describe("MemoryInspectorPanel — semantic search", () => {
|
||||
afterEach(() => {
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it("debounces search input by 300ms before calling API", async () => {
|
||||
vi.useFakeTimers();
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
|
||||
mockGet.mockClear();
|
||||
|
||||
fireEvent.change(screen.getByLabelText("Search memories"), {
|
||||
target: { value: "deploy" },
|
||||
});
|
||||
|
||||
// 200ms — debounce has NOT fired yet
|
||||
act(() => { vi.advanceTimersByTime(200); });
|
||||
await flushUpdates();
|
||||
expect(mockGet).not.toHaveBeenCalled();
|
||||
|
||||
// 350ms total — debounce fires
|
||||
act(() => { vi.advanceTimersByTime(150); });
|
||||
await flushUpdates();
|
||||
|
||||
expect(mockGet).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-1/memories?scope=LOCAL&q=deploy"
|
||||
);
|
||||
});
|
||||
|
||||
it("renders similarity-badge when entry has similarity_score", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([{ ...MEMORY_A, similarity_score: 0.87 }] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
const badge = document.querySelector('[data-testid="similarity-badge"]');
|
||||
expect(badge).toBeTruthy();
|
||||
expect(badge?.textContent).toBe("87%");
|
||||
});
|
||||
|
||||
it("does not render similarity-badge when entry has no similarity_score", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([MEMORY_A] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
describe('isPluginUnavailableError', () => {
|
||||
it('matches the literal env var contract from the server handler', () => {
|
||||
expect(
|
||||
document.querySelector('[data-testid="similarity-badge"]')
|
||||
).toBeNull();
|
||||
isPluginUnavailableError(
|
||||
new Error('API GET /workspaces/x/v2/memories: 503 {"error":"memory plugin is not configured (set MEMORY_PLUGIN_URL)"}'),
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("clear button resets query immediately and re-fetches without ?q=", async () => {
|
||||
vi.useFakeTimers();
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValue([] as any);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await flushUpdates();
|
||||
it('does not false-match on generic 503 errors that don\'t mention the env var', () => {
|
||||
expect(isPluginUnavailableError(new Error('API GET /foo: 503 something else'))).toBe(false);
|
||||
});
|
||||
|
||||
fireEvent.change(screen.getByLabelText("Search memories"), {
|
||||
target: { value: "deploy" },
|
||||
it('does not false-match on plain 4xx errors', () => {
|
||||
expect(isPluginUnavailableError(new Error('API GET /foo: 401 unauthorized'))).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false for non-Error inputs', () => {
|
||||
expect(isPluginUnavailableError(null)).toBe(false);
|
||||
expect(isPluginUnavailableError(undefined)).toBe(false);
|
||||
expect(isPluginUnavailableError('a string')).toBe(false);
|
||||
expect(isPluginUnavailableError({ message: 'MEMORY_PLUGIN_URL' })).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ── formatTTL helper ─────────────────────────────────────────────────────────
|
||||
|
||||
describe('formatTTL', () => {
|
||||
it('returns empty string for null/undefined/empty', () => {
|
||||
expect(formatTTL(null)).toBe('');
|
||||
expect(formatTTL(undefined)).toBe('');
|
||||
expect(formatTTL('')).toBe('');
|
||||
});
|
||||
|
||||
it('returns empty for invalid date strings', () => {
|
||||
expect(formatTTL('not-a-date')).toBe('');
|
||||
});
|
||||
|
||||
it('returns "expired" for past timestamps', () => {
|
||||
const past = new Date(Date.now() - 5000).toISOString();
|
||||
expect(formatTTL(past)).toBe('expired');
|
||||
});
|
||||
|
||||
it('formats <60s as seconds', () => {
|
||||
const future = new Date(Date.now() + 30_000).toISOString();
|
||||
expect(formatTTL(future)).toMatch(/^\d{1,2}s$/);
|
||||
});
|
||||
|
||||
it('formats <60m as minutes', () => {
|
||||
const future = new Date(Date.now() + 30 * 60_000).toISOString();
|
||||
expect(formatTTL(future)).toMatch(/^\d{1,2}m$/);
|
||||
});
|
||||
|
||||
it('formats <24h as hours', () => {
|
||||
const future = new Date(Date.now() + 5 * 3_600_000).toISOString();
|
||||
expect(formatTTL(future)).toMatch(/^\d{1,2}h$/);
|
||||
});
|
||||
|
||||
it('formats >24h as days', () => {
|
||||
const future = new Date(Date.now() + 3 * 86_400_000).toISOString();
|
||||
expect(formatTTL(future)).toMatch(/^\d{1,2}d$/);
|
||||
});
|
||||
});
|
||||
|
||||
// ── Initial load + dropdown ─────────────────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — initial load', () => {
|
||||
it('fetches namespaces and memories on mount', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
|
||||
await waitFor(() => {
|
||||
const calls = mockGet.mock.calls.map((c) => c[0]);
|
||||
expect(calls.some((u) => u.includes('/v2/namespaces'))).toBe(true);
|
||||
expect(calls.some((u) => u.includes('/v2/memories'))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
it('renders the row contents from the memories response', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText(/Remember the standup is at 10am/)).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
it('populates the namespace dropdown with readable entries + "All namespaces"', async () => {
|
||||
stubFetch([]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByLabelText('Filter by namespace'));
|
||||
const select = screen.getByLabelText('Filter by namespace') as HTMLSelectElement;
|
||||
const optionLabels = Array.from(select.options).map((o) => o.textContent ?? '');
|
||||
expect(optionLabels[0]).toContain('All namespaces');
|
||||
expect(optionLabels.join('|')).toContain('Workspace (ws-1)');
|
||||
expect(optionLabels.join('|')).toContain('Team (t-1)');
|
||||
});
|
||||
|
||||
it('selecting a namespace re-fetches with ?namespace=', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByLabelText('Filter by namespace'));
|
||||
|
||||
const select = screen.getByLabelText('Filter by namespace') as HTMLSelectElement;
|
||||
fireEvent.change(select, { target: { value: 'team:t-1' } });
|
||||
|
||||
await waitFor(() => {
|
||||
const calls = mockGet.mock.calls.map((c) => c[0] as string);
|
||||
expect(calls.some((u) => u.includes('namespace=team%3At-1'))).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ── Plugin unavailable (503) ────────────────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — plugin unavailable', () => {
|
||||
it('renders the operator-hint banner and disables search input', async () => {
|
||||
mockGet.mockRejectedValue(new Error('HTTP 503: memory plugin is not configured (set MEMORY_PLUGIN_URL)'));
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByTestId('plugin-unavailable-banner'));
|
||||
|
||||
const searchInput = screen.getByLabelText('Search memories') as HTMLInputElement;
|
||||
expect(searchInput.disabled).toBe(true);
|
||||
});
|
||||
|
||||
it('shows the empty-state explaining plugin disabled', async () => {
|
||||
mockGet.mockRejectedValue(new Error('API GET /workspaces/x/v2/memories: 503 {"error":"memory plugin is not configured (set MEMORY_PLUGIN_URL)"}'));
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByText(/Memory plugin disabled/i));
|
||||
});
|
||||
});
|
||||
|
||||
// ── Generic error (non-503) ─────────────────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — generic errors', () => {
|
||||
it('surfaces a non-503 error in the error banner', async () => {
|
||||
mockGet.mockImplementation(((url: string) => {
|
||||
if (url.includes('/v2/namespaces')) {
|
||||
return Promise.resolve(NS_RESPONSE);
|
||||
}
|
||||
return Promise.reject(new Error('upstream timeout'));
|
||||
}) as typeof api.get);
|
||||
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => {
|
||||
// Error banner has role=alert
|
||||
const alerts = screen.getAllByRole('alert');
|
||||
const found = alerts.some((a) => a.textContent?.includes('upstream timeout'));
|
||||
expect(found).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ── Search ──────────────────────────────────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — search', () => {
|
||||
it('eventually fires query with ?q= after debounce', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByLabelText('Search memories'));
|
||||
|
||||
fireEvent.change(screen.getByLabelText('Search memories'), {
|
||||
target: { value: 'standup' },
|
||||
});
|
||||
|
||||
act(() => { vi.advanceTimersByTime(350); });
|
||||
await flushUpdates();
|
||||
|
||||
expect(mockGet).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-1/memories?scope=LOCAL&q=deploy"
|
||||
await waitFor(
|
||||
() => {
|
||||
const calls = mockGet.mock.calls.map((c) => c[0] as string);
|
||||
expect(calls.some((u) => u.includes('q=standup'))).toBe(true);
|
||||
},
|
||||
{ timeout: 1500 },
|
||||
);
|
||||
mockGet.mockClear();
|
||||
});
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: "Clear search" }));
|
||||
await flushUpdates();
|
||||
it('sorts results by score descending when query active', async () => {
|
||||
const lowScore: MemoryV2 = { ...MEM_BASIC, id: 'low', score: 0.2, content: 'low' };
|
||||
const highScore: MemoryV2 = { ...MEM_BASIC, id: 'high', score: 0.95, content: 'high' };
|
||||
// Plugin returns in arbitrary order; component sorts.
|
||||
mockGet.mockImplementation(((url: string) => {
|
||||
if (url.includes('/v2/namespaces')) return Promise.resolve(NS_RESPONSE);
|
||||
return Promise.resolve({ memories: [lowScore, highScore] });
|
||||
}) as typeof api.get);
|
||||
|
||||
expect(mockGet).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-1/memories?scope=LOCAL"
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByLabelText('Search memories'));
|
||||
fireEvent.change(screen.getByLabelText('Search memories'), {
|
||||
target: { value: 'something' },
|
||||
});
|
||||
|
||||
await waitFor(
|
||||
() => {
|
||||
const rows = screen.getAllByTestId(/^memory-row-/);
|
||||
// First row should be the high-score one
|
||||
expect(rows[0].getAttribute('data-testid')).toBe('memory-row-high');
|
||||
},
|
||||
{ timeout: 1500 },
|
||||
);
|
||||
});
|
||||
|
||||
it('clear-button resets the query', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByLabelText('Search memories'));
|
||||
|
||||
fireEvent.change(screen.getByLabelText('Search memories'), {
|
||||
target: { value: 'foo' },
|
||||
});
|
||||
fireEvent.click(screen.getByLabelText('Clear search'));
|
||||
expect((screen.getByLabelText('Search memories') as HTMLInputElement).value).toBe('');
|
||||
});
|
||||
|
||||
it('renders no-results empty-state when search has no matches', async () => {
|
||||
stubFetch([]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByLabelText('Search memories'));
|
||||
fireEvent.change(screen.getByLabelText('Search memories'), {
|
||||
target: { value: 'nothing' },
|
||||
});
|
||||
await waitFor(
|
||||
() => {
|
||||
expect(screen.getByText(/No memories match your search/i)).toBeTruthy();
|
||||
},
|
||||
{ timeout: 1500 },
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ── Per-row badges ───────────────────────────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — row badges', () => {
|
||||
it('renders kind, source, pin, TTL badges per shape', async () => {
|
||||
stubFetch([MEM_PINNED, MEM_RUNTIME_CHECKPOINT]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
|
||||
await waitFor(() => {
|
||||
// Pinned memory: kind=summary, source=user, pin=true, TTL>0
|
||||
const pinnedRow = screen.getByTestId('memory-row-mem-pinned');
|
||||
expect(pinnedRow.querySelector('[data-testid="kind-badge"]')?.textContent).toBe('S');
|
||||
expect(pinnedRow.querySelector('[data-testid="source-badge"]')?.textContent).toBe('user');
|
||||
expect(pinnedRow.querySelector('[data-testid="pin-badge"]')).toBeTruthy();
|
||||
expect(pinnedRow.querySelector('[data-testid="ttl-badge"]')?.textContent).toMatch(/^⌛\d+[hd]$/);
|
||||
|
||||
// Checkpoint memory: kind=checkpoint, source=runtime, no pin, no TTL
|
||||
const propRow = screen.getByTestId('memory-row-mem-checkpoint');
|
||||
expect(propRow.querySelector('[data-testid="kind-badge"]')?.textContent).toBe('C');
|
||||
expect(propRow.querySelector('[data-testid="source-badge"]')?.textContent).toBe('runtime');
|
||||
expect(propRow.querySelector('[data-testid="pin-badge"]')).toBeNull();
|
||||
expect(propRow.querySelector('[data-testid="ttl-badge"]')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
it('TTL badge shows "expired" for past expires_at', async () => {
|
||||
stubFetch([MEM_EXPIRED]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => {
|
||||
const row = screen.getByTestId('memory-row-mem-expired');
|
||||
expect(row.querySelector('[data-testid="ttl-badge"]')?.textContent).toBe('⌛expired');
|
||||
});
|
||||
});
|
||||
|
||||
it('expanding a row shows full content + Forget button', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByTestId('memory-row-mem-a'));
|
||||
|
||||
const row = screen.getByTestId('memory-row-mem-a');
|
||||
const headerButton = row.querySelector('button');
|
||||
expect(headerButton).toBeTruthy();
|
||||
fireEvent.click(headerButton!);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByLabelText('Forget memory')).toBeTruthy();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ── Delete (Forget) flow ──────────────────────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — forget flow', () => {
|
||||
it('opens the confirm dialog on Forget click and removes optimistically on confirm', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
mockDel.mockResolvedValue({ status: 'deleted' });
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
|
||||
// Expand row, click Forget
|
||||
await waitFor(() => screen.getByTestId('memory-row-mem-a'));
|
||||
const row = screen.getByTestId('memory-row-mem-a');
|
||||
fireEvent.click(row.querySelector('button')!);
|
||||
await waitFor(() => screen.getByLabelText('Forget memory'));
|
||||
fireEvent.click(screen.getByLabelText('Forget memory'));
|
||||
|
||||
// Dialog appears with v2-shaped copy (Forget, not Delete)
|
||||
expect(screen.getByTestId('dialog-title').textContent).toBe('Forget memory');
|
||||
fireEvent.click(screen.getByText('Confirm'));
|
||||
|
||||
// Optimistic removal happens immediately
|
||||
await waitFor(() => {
|
||||
expect(screen.queryByTestId('memory-row-mem-a')).toBeNull();
|
||||
});
|
||||
// DELETE called with the right path
|
||||
await waitFor(() => {
|
||||
const delPaths = mockDel.mock.calls.map((c) => c[0] as string);
|
||||
expect(delPaths.some((p) => p.includes('/v2/memories/mem-a'))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
it('cancelling the dialog leaves the row in place', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByTestId('memory-row-mem-a'));
|
||||
|
||||
fireEvent.click(screen.getByTestId('memory-row-mem-a').querySelector('button')!);
|
||||
await waitFor(() => screen.getByLabelText('Forget memory'));
|
||||
fireEvent.click(screen.getByLabelText('Forget memory'));
|
||||
fireEvent.click(screen.getByText('Cancel'));
|
||||
|
||||
expect(screen.queryByTestId('memory-row-mem-a')).toBeTruthy();
|
||||
expect(mockDel).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('rolls back on server failure by reloading entries', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
mockDel.mockRejectedValue(new Error('upstream 502'));
|
||||
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByTestId('memory-row-mem-a'));
|
||||
fireEvent.click(screen.getByTestId('memory-row-mem-a').querySelector('button')!);
|
||||
await waitFor(() => screen.getByLabelText('Forget memory'));
|
||||
fireEvent.click(screen.getByLabelText('Forget memory'));
|
||||
fireEvent.click(screen.getByText('Confirm'));
|
||||
|
||||
// After failure, error banner surfaces + reload re-fetches memories
|
||||
await waitFor(() => {
|
||||
const alerts = screen.getAllByRole('alert');
|
||||
const found = alerts.some((a) => a.textContent?.includes('upstream 502'));
|
||||
expect(found).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ── Empty state when no memories at all ────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — empty state', () => {
|
||||
it('renders the "no memories yet" empty state when not searching', async () => {
|
||||
stubFetch([]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText('No memories yet')).toBeTruthy();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ── Refresh ─────────────────────────────────────────────────────────────────
|
||||
|
||||
describe('MemoryInspectorPanel — refresh', () => {
|
||||
it('Refresh button refetches memories', async () => {
|
||||
stubFetch([MEM_BASIC]);
|
||||
render(<MemoryInspectorPanel workspaceId="ws-1" />);
|
||||
await waitFor(() => screen.getByLabelText('Refresh memories'));
|
||||
|
||||
const before = mockGet.mock.calls.filter((c) =>
|
||||
(c[0] as string).includes('/v2/memories'),
|
||||
).length;
|
||||
fireEvent.click(screen.getByLabelText('Refresh memories'));
|
||||
|
||||
await waitFor(() => {
|
||||
const after = mockGet.mock.calls.filter((c) =>
|
||||
(c[0] as string).includes('/v2/memories'),
|
||||
).length;
|
||||
expect(after).toBe(before + 1);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useRef, useEffect, useCallback } from "react";
|
||||
import { useState, useRef, useEffect, useCallback, useLayoutEffect } from "react";
|
||||
import ReactMarkdown from "react-markdown";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import { api } from "@/lib/api";
|
||||
import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
|
||||
import { useSocketEvent } from "@/hooks/useSocketEvent";
|
||||
import { type ChatMessage, type ChatAttachment, createMessage, appendMessageDeduped } from "./chat/types";
|
||||
import { uploadChatFiles, downloadChatFile } from "./chat/uploads";
|
||||
import { uploadChatFiles, downloadChatFile, isPlatformAttachment } from "./chat/uploads";
|
||||
import { AttachmentChip, PendingAttachmentPill } from "./chat/AttachmentViews";
|
||||
import { extractFilesFromTask } from "./chat/message-parser";
|
||||
import { AgentCommsPanel } from "./chat/AgentCommsPanel";
|
||||
@@ -124,14 +124,43 @@ function extractReplyText(resp: A2AResponse): string {
|
||||
// doesn't). Single source of truth for file-part parsing across
|
||||
// live chat, activity log replay, and any future consumers.
|
||||
|
||||
/** Initial chat history page size. The newest N messages are rendered
|
||||
* on first paint; older history is fetched on demand via loadOlder()
|
||||
* when the user scrolls the top sentinel into view. */
|
||||
const INITIAL_HISTORY_LIMIT = 10;
|
||||
/** Subsequent older-history batch size. Larger than INITIAL so a long
|
||||
* scroll-back doesn't fan out into many round-trips. */
|
||||
const OLDER_HISTORY_BATCH = 20;
|
||||
|
||||
/**
|
||||
* Load chat history from the activity_logs database via the platform API.
|
||||
* Uses source=canvas to only get user-initiated messages (not agent-to-agent).
|
||||
*
|
||||
* Pagination:
|
||||
* - Pass `limit` to bound the page size (newest-first from server).
|
||||
* - Pass `beforeTs` (RFC3339) to fetch rows STRICTLY OLDER than that
|
||||
* timestamp. Combined with limit, this yields the next-older page
|
||||
* when scrolling backward through history.
|
||||
*
|
||||
* `reachedEnd` is true when the server returned fewer rows than asked
|
||||
* for — caller uses this to disable further older-batch fetches.
|
||||
* (Counts row-level returns, not chat-bubble count: each row may
|
||||
* produce 1-2 bubbles.)
|
||||
*/
|
||||
async function loadMessagesFromDB(workspaceId: string): Promise<{ messages: ChatMessage[]; error: string | null }> {
|
||||
async function loadMessagesFromDB(
|
||||
workspaceId: string,
|
||||
limit: number,
|
||||
beforeTs?: string,
|
||||
): Promise<{ messages: ChatMessage[]; error: string | null; reachedEnd: boolean }> {
|
||||
try {
|
||||
const params = new URLSearchParams({
|
||||
type: "a2a_receive",
|
||||
source: "canvas",
|
||||
limit: String(limit),
|
||||
});
|
||||
if (beforeTs) params.set("before_ts", beforeTs);
|
||||
const activities = await api.get<ActivityRowForHydration[]>(
|
||||
`/workspaces/${workspaceId}/activity?type=a2a_receive&source=canvas&limit=50`,
|
||||
`/workspaces/${workspaceId}/activity?${params.toString()}`,
|
||||
);
|
||||
|
||||
const messages: ChatMessage[] = [];
|
||||
@@ -142,11 +171,12 @@ async function loadMessagesFromDB(workspaceId: string): Promise<{ messages: Chat
|
||||
for (const a of [...activities].reverse()) {
|
||||
messages.push(...activityRowToMessages(a, isInternalSelfMessage));
|
||||
}
|
||||
return { messages, error: null };
|
||||
return { messages, error: null, reachedEnd: activities.length < limit };
|
||||
} catch (err) {
|
||||
return {
|
||||
messages: [],
|
||||
error: err instanceof Error ? err.message : "Failed to load chat history",
|
||||
reachedEnd: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -256,6 +286,60 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [confirmRestart, setConfirmRestart] = useState(false);
|
||||
const bottomRef = useRef<HTMLDivElement>(null);
|
||||
// First-mount scroll-to-bottom needs `behavior: "instant"` — long
|
||||
// conversations smooth-animate for ~300ms which any concurrent
|
||||
// re-render can interrupt, leaving the user stuck mid-conversation
|
||||
// when the chat tab opens. Subsequent appends (new agent messages)
|
||||
// keep `smooth` for the visual "landing" feel. Flipped the first
|
||||
// time messages.length goes positive, so a workspace switch (which
|
||||
// remounts ChatTab) gets a fresh instant jump too.
|
||||
const hasInitialScrollRef = useRef(false);
|
||||
// Lazy-load older history on scroll-up.
|
||||
// - containerRef = the scrollable messages viewport
|
||||
// - topRef = sentinel above the messages list; IO observes it
|
||||
// and triggers loadOlder() when it enters view
|
||||
// - hasMore = false once a fetch returns < limit rows; stops IO
|
||||
// - loadingOlder = drives the "Loading older messages…" UI label
|
||||
// - inflightRef = synchronous guard against double-entry of loadOlder
|
||||
// when the IO callback fires twice in the same
|
||||
// microtask (state-based guard would be stale until
|
||||
// the next React commit)
|
||||
// - scrollAnchorRef = saves distance-from-bottom before a prepend
|
||||
// so the useLayoutEffect below can restore the
|
||||
// user's exact viewport position. Without this,
|
||||
// prepending older messages would jump the scroll
|
||||
// position by the height of the new content.
|
||||
// - oldestMessageRef / hasMoreRef = let the loadOlder closure read
|
||||
// the latest values without taking them as deps —
|
||||
// every live agent push mutates `messages`, and
|
||||
// having loadOlder depend on `messages` would tear
|
||||
// down + re-arm the IntersectionObserver on every
|
||||
// push. Refs decouple the observer lifecycle from
|
||||
// message-list updates.
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const topRef = useRef<HTMLDivElement>(null);
|
||||
const [hasMore, setHasMore] = useState(true);
|
||||
const [loadingOlder, setLoadingOlder] = useState(false);
|
||||
const inflightRef = useRef(false);
|
||||
// The scroll anchor includes the first-message id as it was BEFORE
|
||||
// the prepend — see useLayoutEffect below for why. Without this tag,
|
||||
// a live agent push that appends WHILE loadOlder is in flight would
|
||||
// run useLayoutEffect against the append (anchor still set), the
|
||||
// "restore" math would scroll the user to a stale offset, AND the
|
||||
// append's normal scroll-to-bottom would be swallowed.
|
||||
const scrollAnchorRef = useRef<
|
||||
{ savedDistanceFromBottom: number; expectFirstIdNotEqual: string | null } | null
|
||||
>(null);
|
||||
const oldestMessageRef = useRef<ChatMessage | null>(null);
|
||||
const hasMoreRef = useRef(true);
|
||||
// Monotonic token bumped on workspace switch + on every loadOlder
|
||||
// entry. Each fetch's .then() captures its own token; if the token
|
||||
// has moved, the resolved messages belong to a stale workspace or a
|
||||
// superseded fetch and we silently drop them. Without this guard, a
|
||||
// workspace switch mid-fetch would have the in-flight promise
|
||||
// resolve into the new workspace's setMessages — the user sees
|
||||
// someone else's history briefly.
|
||||
const fetchTokenRef = useRef(0);
|
||||
// Files the user has picked but not yet sent. Cleared on send
|
||||
// (upload success) or by the × on each pill.
|
||||
const [pendingFiles, setPendingFiles] = useState<File[]>([]);
|
||||
@@ -294,17 +378,144 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
sendInFlightRef.current = false;
|
||||
}, []);
|
||||
|
||||
// Load chat history from database on mount
|
||||
useEffect(() => {
|
||||
// Initial-load fetch — used by the mount effect and the "Retry"
|
||||
// button below. Single source of truth so the two paths can't drift
|
||||
// (e.g. INITIAL_HISTORY_LIMIT bumped in the effect but not the
|
||||
// retry, leading to inconsistent first-paint sizes).
|
||||
const loadInitial = useCallback(() => {
|
||||
setLoading(true);
|
||||
setLoadError(null);
|
||||
loadMessagesFromDB(workspaceId).then(({ messages: msgs, error: fetchErr }) => {
|
||||
setMessages(msgs);
|
||||
setLoadError(fetchErr);
|
||||
setLoading(false);
|
||||
});
|
||||
setHasMore(true);
|
||||
// Bump the token; any in-flight fetch from the previous workspace
|
||||
// (or a previous retry) will see token != myToken in its .then()
|
||||
// and silently bail — the late response can't clobber the new
|
||||
// workspace's state.
|
||||
fetchTokenRef.current += 1;
|
||||
const myToken = fetchTokenRef.current;
|
||||
loadMessagesFromDB(workspaceId, INITIAL_HISTORY_LIMIT).then(
|
||||
({ messages: msgs, error: fetchErr, reachedEnd }) => {
|
||||
if (fetchTokenRef.current !== myToken) return;
|
||||
setMessages(msgs);
|
||||
setLoadError(fetchErr);
|
||||
setHasMore(!reachedEnd);
|
||||
setLoading(false);
|
||||
},
|
||||
);
|
||||
}, [workspaceId]);
|
||||
|
||||
// Load chat history on mount / workspace switch.
|
||||
// Initial load is bounded to INITIAL_HISTORY_LIMIT (newest 10) — the
|
||||
// rest streams in as the user scrolls up via loadOlder() below. Pre-
|
||||
// 2026-05-05 this fetched the newest 50 in one shot; on a long-running
|
||||
// workspace that meant 50× message-bubble paint + DOM cost on every
|
||||
// tab-open even when the user only wanted to read the last few.
|
||||
useEffect(() => {
|
||||
loadInitial();
|
||||
}, [loadInitial]);
|
||||
|
||||
// Mirror the latest oldest-message + hasMore into refs so loadOlder
|
||||
// can read them without taking `messages` as a dep. Every live push
|
||||
// through agentMessages would otherwise recreate loadOlder and tear
|
||||
// down the IO observer.
|
||||
useEffect(() => {
|
||||
oldestMessageRef.current = messages[0] ?? null;
|
||||
}, [messages]);
|
||||
useEffect(() => {
|
||||
hasMoreRef.current = hasMore;
|
||||
}, [hasMore]);
|
||||
|
||||
// Fetch the next-older batch and prepend. Stable identity (deps =
|
||||
// [workspaceId]) so the IntersectionObserver effect below doesn't
|
||||
// re-arm on every messages update.
|
||||
const loadOlder = useCallback(async () => {
|
||||
// inflightRef is the load-bearing guard — synchronous, set BEFORE
|
||||
// any await, so two IO callbacks dispatched in the same microtask
|
||||
// can't both pass. The state checks are defensive secondary
|
||||
// gates for the slow-scroll case.
|
||||
if (inflightRef.current || !hasMoreRef.current) return;
|
||||
const oldest = oldestMessageRef.current;
|
||||
if (!oldest) return;
|
||||
const container = containerRef.current;
|
||||
if (!container) return;
|
||||
inflightRef.current = true;
|
||||
// Capture the user's distance-from-bottom BEFORE we prepend so the
|
||||
// useLayoutEffect can restore it after the new DOM lands. The
|
||||
// expectFirstIdNotEqual tag is what the layout effect checks
|
||||
// against `messages[0].id` to disambiguate prepend (id changed) vs
|
||||
// append (id unchanged → live message landed mid-fetch). Without
|
||||
// it, an agent push during loadOlder runs the "restore" against a
|
||||
// stale anchor — user gets yanked + the append's bottom-pin is
|
||||
// swallowed.
|
||||
scrollAnchorRef.current = {
|
||||
savedDistanceFromBottom: container.scrollHeight - container.scrollTop,
|
||||
expectFirstIdNotEqual: oldest.id,
|
||||
};
|
||||
fetchTokenRef.current += 1;
|
||||
const myToken = fetchTokenRef.current;
|
||||
setLoadingOlder(true);
|
||||
try {
|
||||
const { messages: older, reachedEnd } = await loadMessagesFromDB(
|
||||
workspaceId,
|
||||
OLDER_HISTORY_BATCH,
|
||||
oldest.timestamp,
|
||||
);
|
||||
// Workspace switched (or another loadOlder bumped the token)
|
||||
// mid-fetch — drop these results, they belong to a stale tab.
|
||||
if (fetchTokenRef.current !== myToken) {
|
||||
scrollAnchorRef.current = null;
|
||||
return;
|
||||
}
|
||||
if (older.length > 0) {
|
||||
setMessages((prev) => [...older, ...prev]);
|
||||
} else {
|
||||
// Nothing came back — clear the anchor so the next paint doesn't
|
||||
// try to "restore" against a no-op prepend.
|
||||
scrollAnchorRef.current = null;
|
||||
}
|
||||
setHasMore(!reachedEnd);
|
||||
} finally {
|
||||
setLoadingOlder(false);
|
||||
inflightRef.current = false;
|
||||
}
|
||||
}, [workspaceId]);
|
||||
|
||||
// IntersectionObserver on the top sentinel. Fires loadOlder() the
|
||||
// moment the user scrolls within 200px of the top. AbortController
|
||||
// unwires cleanly on workspace switch / unmount; root is the
|
||||
// scrollable container so we observe only what's visible inside it.
|
||||
//
|
||||
// Dependencies:
|
||||
// - loadOlder — stable per workspaceId (refs decouple it from
|
||||
// message updates), so this dep is here for the
|
||||
// workspace-switch case only
|
||||
// - hasMore — re-run when older history runs out so we
|
||||
// disconnect cleanly
|
||||
// - hasMessages — load-bearing: the sentinel JSX is gated on
|
||||
// `messages.length > 0`, so topRef.current is null
|
||||
// on the empty-messages render. We re-arm exactly
|
||||
// once when messages first land. NOT depending on
|
||||
// `messages.length` (or `messages`) directly so
|
||||
// each subsequent message append doesn't tear down
|
||||
// + re-arm the observer.
|
||||
const hasMessages = messages.length > 0;
|
||||
useEffect(() => {
|
||||
const top = topRef.current;
|
||||
const container = containerRef.current;
|
||||
if (!top || !container) return;
|
||||
if (!hasMore) return; // stop observing when no older history exists
|
||||
const ac = new AbortController();
|
||||
const io = new IntersectionObserver(
|
||||
(entries) => {
|
||||
if (ac.signal.aborted) return;
|
||||
if (entries[0]?.isIntersecting) loadOlder();
|
||||
},
|
||||
{ root: container, rootMargin: "200px 0px 0px 0px", threshold: 0 },
|
||||
);
|
||||
io.observe(top);
|
||||
ac.signal.addEventListener("abort", () => io.disconnect());
|
||||
return () => ac.abort();
|
||||
}, [loadOlder, hasMore, hasMessages]);
|
||||
|
||||
// Agent reachability
|
||||
useEffect(() => {
|
||||
const reachable = data.status === "online" || data.status === "degraded";
|
||||
@@ -316,7 +527,41 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
currentTaskRef.current = data.currentTask;
|
||||
}, [data.currentTask]);
|
||||
|
||||
useEffect(() => {
|
||||
// Scroll behavior across messages updates:
|
||||
// - Prepend (loadOlder landed) → restore the user's saved
|
||||
// distance-from-bottom so their reading position is unchanged.
|
||||
// - Append / initial → pin to latest bubble.
|
||||
// useLayoutEffect (not useEffect) so scroll restoration runs BEFORE
|
||||
// paint — otherwise the user sees the page jump for one frame.
|
||||
useLayoutEffect(() => {
|
||||
const container = containerRef.current;
|
||||
const anchor = scrollAnchorRef.current;
|
||||
// Only honor the anchor when this messages-update is the prepend
|
||||
// we expected. messages[0].id is the test:
|
||||
// - prepend → messages[0] is one of the older rows → id !== expectFirstIdNotEqual
|
||||
// - append → messages[0] unchanged → id === expectFirstIdNotEqual → fall through
|
||||
// Without this check, an agent push that lands mid-loadOlder would
|
||||
// run the restore against the append's update, yank the user's
|
||||
// scroll, AND swallow the append's bottom-pin.
|
||||
if (
|
||||
anchor &&
|
||||
container &&
|
||||
messages.length > 0 &&
|
||||
messages[0].id !== anchor.expectFirstIdNotEqual
|
||||
) {
|
||||
container.scrollTop = container.scrollHeight - anchor.savedDistanceFromBottom;
|
||||
scrollAnchorRef.current = null;
|
||||
return;
|
||||
}
|
||||
// Instant on first arrival of messages — smooth-scroll on a long
|
||||
// conversation gets interrupted by concurrent renders and leaves
|
||||
// the user stuck in the middle. After the first jump, subsequent
|
||||
// appends animate as before.
|
||||
if (!hasInitialScrollRef.current && messages.length > 0) {
|
||||
hasInitialScrollRef.current = true;
|
||||
bottomRef.current?.scrollIntoView({ behavior: "instant" as ScrollBehavior });
|
||||
return;
|
||||
}
|
||||
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
|
||||
}, [messages]);
|
||||
|
||||
@@ -735,7 +980,7 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
</div>
|
||||
)}
|
||||
{/* Messages */}
|
||||
<div className="flex-1 overflow-y-auto p-3 space-y-3">
|
||||
<div ref={containerRef} className="flex-1 overflow-y-auto p-3 space-y-3">
|
||||
{loading && (
|
||||
<div className="text-xs text-ink-soft text-center py-4">Loading chat history...</div>
|
||||
)}
|
||||
@@ -748,15 +993,7 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
Failed to load chat history: {loadError}
|
||||
</p>
|
||||
<button
|
||||
onClick={() => {
|
||||
setLoading(true);
|
||||
setLoadError(null);
|
||||
loadMessagesFromDB(workspaceId).then(({ messages: msgs, error: fetchErr }) => {
|
||||
setMessages(msgs);
|
||||
setLoadError(fetchErr);
|
||||
setLoading(false);
|
||||
});
|
||||
}}
|
||||
onClick={loadInitial}
|
||||
className="text-[10px] px-2 py-0.5 rounded bg-red-800/40 text-bad hover:bg-red-700/50 transition-colors"
|
||||
>
|
||||
Retry
|
||||
@@ -768,6 +1005,24 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
No messages yet. Send a message to start chatting with this agent.
|
||||
</div>
|
||||
)}
|
||||
{/* Top sentinel for lazy-loading older history. The IO observer
|
||||
in the effect above watches this; entering view triggers the
|
||||
next-older batch fetch. Sits ABOVE messages.map so it's the
|
||||
first thing the user reaches when scrolling up.
|
||||
|
||||
Only mounted when there might be more history (hasMore) so a
|
||||
short conversation doesn't pay an idle observer. The
|
||||
"Loading older messages…" line replaces the sentinel during
|
||||
the fetch so the user sees feedback for the scroll-up
|
||||
gesture. Once we hit the end, we drop the sentinel entirely
|
||||
instead of showing a "no more messages" footer — the user's
|
||||
scroll resting against the top of the conversation IS the
|
||||
signal. */}
|
||||
{hasMore && messages.length > 0 && (
|
||||
<div ref={topRef} className="text-xs text-ink-soft text-center py-1">
|
||||
{loadingOlder ? "Loading older messages…" : " "}
|
||||
</div>
|
||||
)}
|
||||
{messages.map((msg) => (
|
||||
<div key={msg.id} className={`flex ${msg.role === "user" ? "justify-end" : "justify-start"}`}>
|
||||
<div
|
||||
@@ -806,7 +1061,77 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
: "dark:prose-invert dark:[--tw-prose-invert-body:theme(colors.zinc.100)] dark:[--tw-prose-invert-headings:theme(colors.white)] dark:[--tw-prose-invert-bold:theme(colors.white)] dark:[--tw-prose-invert-code:theme(colors.zinc.100)]"
|
||||
}`}
|
||||
>
|
||||
<ReactMarkdown remarkPlugins={[remarkGfm]}>{msg.content}</ReactMarkdown>
|
||||
<ReactMarkdown
|
||||
remarkPlugins={[remarkGfm]}
|
||||
components={{
|
||||
// Default ReactMarkdown renders `<a href="...">`
|
||||
// with no target and no scheme handling, so:
|
||||
//
|
||||
// 1. http/https links navigate the canvas tab
|
||||
// itself away — user loses canvas state.
|
||||
// 2. workspace://, file://, and bare /workspace/
|
||||
// paths from agent-authored markdown produce
|
||||
// an unhandled-protocol click → browser ends
|
||||
// up at about:blank with no download (the
|
||||
// reported bug from 2026-05-05).
|
||||
//
|
||||
// Override: external URLs open in a new tab with
|
||||
// rel="noopener noreferrer"; in-container paths
|
||||
// route through downloadChatFile so the browser
|
||||
// gets a real Blob with proper auth headers.
|
||||
a: ({ href, children, ...rest }) => {
|
||||
const url = String(href ?? "");
|
||||
// Use the SSOT helper isPlatformAttachment so
|
||||
// the markdown link override and the chip
|
||||
// download path agree on which schemes need
|
||||
// auth-routed download. Pre-fix this list was
|
||||
// duplicated and missed `platform-pending:`,
|
||||
// producing about:blank for poll-mode uploads.
|
||||
if (isPlatformAttachment(url)) {
|
||||
return (
|
||||
<a
|
||||
href={url}
|
||||
{...rest}
|
||||
onClick={(e) => {
|
||||
e.preventDefault();
|
||||
// Construct a synthetic ChatAttachment
|
||||
// and route through the same
|
||||
// authenticated download path the
|
||||
// download chips use. Filename is the
|
||||
// last path segment so Save-As prefills
|
||||
// sensibly.
|
||||
const name = url.split(/[\\/]/).pop() || "download";
|
||||
downloadChatFile(workspaceId, {
|
||||
uri: url,
|
||||
name,
|
||||
}).catch((err) => {
|
||||
setError(
|
||||
err instanceof Error
|
||||
? `Download failed: ${err.message}`
|
||||
: "Download failed",
|
||||
);
|
||||
});
|
||||
}}
|
||||
>
|
||||
{children}
|
||||
</a>
|
||||
);
|
||||
}
|
||||
// External (http(s) / mailto / unknown scheme):
|
||||
// open in new tab so canvas state survives.
|
||||
return (
|
||||
<a
|
||||
href={url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
{...rest}
|
||||
>
|
||||
{children}
|
||||
</a>
|
||||
);
|
||||
},
|
||||
}}
|
||||
>{msg.content}</ReactMarkdown>
|
||||
</div>
|
||||
)}
|
||||
{msg.attachments && msg.attachments.length > 0 && (
|
||||
@@ -912,7 +1237,22 @@ function MyChatPanel({ workspaceId, data }: Props) {
|
||||
value={input}
|
||||
onChange={(e) => setInput(e.target.value)}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter" && !e.shiftKey) {
|
||||
// IME-safe send: while a CJK / Japanese / Korean IME is
|
||||
// composing, Enter accepts the candidate selection — not a
|
||||
// newline, not a send. `e.nativeEvent.isComposing` is the
|
||||
// standard signal (modern WebKit/Blink/Gecko); the keyCode
|
||||
// 229 fallback covers older Safari / WebKit-based mobile
|
||||
// browsers that delay setting isComposing on the
|
||||
// composition-end Enter. Reported 2026-05-05: typing
|
||||
// Chinese with the system IME, pressing Enter to commit
|
||||
// a candidate would inadvertently send the half-typed
|
||||
// message.
|
||||
if (
|
||||
e.key === "Enter" &&
|
||||
!e.shiftKey &&
|
||||
!e.nativeEvent.isComposing &&
|
||||
e.keyCode !== 229
|
||||
) {
|
||||
e.preventDefault();
|
||||
sendMessage();
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import { useCanvasStore } from "@/store/canvas";
|
||||
import { type ConfigData, DEFAULT_CONFIG, TextInput, NumberInput, Toggle, TagList, Section } from "./config/form-inputs";
|
||||
import { parseYaml, toYaml } from "./config/yaml-utils";
|
||||
import { SecretsSection } from "./config/secrets-section";
|
||||
import { ExternalConnectionSection } from "./ExternalConnectionSection";
|
||||
import {
|
||||
ProviderModelSelector,
|
||||
buildProviderCatalog,
|
||||
@@ -886,11 +887,24 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
</Section>
|
||||
)}
|
||||
|
||||
<Section title="Skills & Tools" defaultOpen={false}>
|
||||
<TagList label="Skills" values={config.skills || []} onChange={(v) => update("skills", v)} placeholder="e.g. code-review" />
|
||||
<TagList label="Tools" values={config.tools || []} onChange={(v) => update("tools", v)} placeholder="e.g. web_search, filesystem" />
|
||||
<TagList label="Prompt Files" values={config.prompt_files || []} onChange={(v) => update("prompt_files", v)} placeholder="e.g. system-prompt.md" />
|
||||
<TagList label="Shared Context" values={config.shared_context || []} onChange={(v) => update("shared_context", v)} placeholder="e.g. architecture.md" />
|
||||
{/* Skills + Tools used to live here as TagList inputs. They were
|
||||
redundant with their dedicated tabs:
|
||||
- Skills → managed via SkillsTab (per-workspace skill folders)
|
||||
- Tools → managed via the Plugins tab (install/uninstall)
|
||||
Editing them here only set the config.yaml field; the
|
||||
actual install/load happened elsewhere. Removed to stop
|
||||
showing the misnamed list-input affordance. */}
|
||||
|
||||
<Section title="Prompt Files" defaultOpen={false}>
|
||||
<p className="text-[10px] text-ink-soft px-1 pb-1">
|
||||
Markdown files that compose this workspace's system prompt.
|
||||
Loaded in order at boot from the workspace config dir
|
||||
(e.g. <code className="font-mono">system-prompt.md</code>,{' '}
|
||||
<code className="font-mono">CLAUDE.md</code>,{' '}
|
||||
<code className="font-mono">AGENTS.md</code>). Edit the file
|
||||
contents directly via the Files tab.
|
||||
</p>
|
||||
<TagList label="Files (load order)" values={config.prompt_files || []} onChange={(v) => update("prompt_files", v)} placeholder="e.g. system-prompt.md" />
|
||||
</Section>
|
||||
|
||||
<Section title="A2A Protocol" defaultOpen={false}>
|
||||
@@ -947,6 +961,9 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
: "This runtime manages its own config outside the platform template."}
|
||||
</div>
|
||||
)}
|
||||
{!error && config.runtime === "external" && (
|
||||
<ExternalConnectionSection workspaceId={workspaceId} />
|
||||
)}
|
||||
{success && (
|
||||
<div className="mx-3 mb-2 px-3 py-1.5 bg-green-900/30 border border-green-800 rounded text-xs text-good">Saved</div>
|
||||
)}
|
||||
|
||||
@@ -0,0 +1,146 @@
|
||||
'use client';
|
||||
|
||||
// ExternalConnectionSection — credential lifecycle controls for runtime=external
|
||||
// workspaces. Surfaced inside ConfigTab when the workspace's runtime is
|
||||
// "external"; ignored for hermes/claude-code/etc. (those have their own
|
||||
// restart-mints-token path).
|
||||
//
|
||||
// Two affordances:
|
||||
//
|
||||
// 1. "Show connection info" (read-only)
|
||||
// Fetches GET /workspaces/:id/external/connection. Returns the
|
||||
// connect block (PLATFORM_URL, WORKSPACE_ID, all 7 snippets) WITH
|
||||
// auth_token="". The modal masks the token field and labels it
|
||||
// "rotate to reveal a new token — current token is unrecoverable".
|
||||
//
|
||||
// 2. "Rotate credentials" (destructive)
|
||||
// POST /workspaces/:id/external/rotate. Revokes any prior live
|
||||
// tokens, mints a fresh one, returns the same connect block with
|
||||
// auth_token populated. Old credentials stop working IMMEDIATELY —
|
||||
// the previously-paired agent will fail auth on its next heartbeat.
|
||||
// Confirm dialog explains this before firing.
|
||||
//
|
||||
// Reuses the existing ExternalConnectModal so the snippet UX is the
|
||||
// same as on Create — operators don't have to learn a second modal.
|
||||
|
||||
import { useState } from "react";
|
||||
import * as Dialog from "@radix-ui/react-dialog";
|
||||
|
||||
import { api } from "@/lib/api";
|
||||
import {
|
||||
ExternalConnectModal,
|
||||
type ExternalConnectionInfo,
|
||||
} from "../ExternalConnectModal";
|
||||
|
||||
interface Props {
|
||||
workspaceId: string;
|
||||
}
|
||||
|
||||
export function ExternalConnectionSection({ workspaceId }: Props) {
|
||||
const [info, setInfo] = useState<ExternalConnectionInfo | null>(null);
|
||||
const [busy, setBusy] = useState<"show" | "rotate" | null>(null);
|
||||
const [confirmRotate, setConfirmRotate] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
async function showConnection() {
|
||||
setError(null);
|
||||
setBusy("show");
|
||||
try {
|
||||
const resp = await api.get<{ connection: ExternalConnectionInfo }>(
|
||||
`/workspaces/${workspaceId}/external/connection`,
|
||||
);
|
||||
setInfo(resp.connection);
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e));
|
||||
} finally {
|
||||
setBusy(null);
|
||||
}
|
||||
}
|
||||
|
||||
async function doRotate() {
|
||||
setError(null);
|
||||
setBusy("rotate");
|
||||
setConfirmRotate(false);
|
||||
try {
|
||||
const resp = await api.post<{ connection: ExternalConnectionInfo }>(
|
||||
`/workspaces/${workspaceId}/external/rotate`,
|
||||
{},
|
||||
);
|
||||
setInfo(resp.connection);
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e));
|
||||
} finally {
|
||||
setBusy(null);
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="mx-3 mt-3 p-3 bg-surface-sunken/50 border border-line rounded">
|
||||
<h3 className="text-xs text-ink-mid font-medium mb-1">External Connection</h3>
|
||||
<p className="text-[10px] text-ink-soft mb-2">
|
||||
This workspace runs an external agent. Use these controls to
|
||||
re-show the setup snippets or rotate the workspace token.
|
||||
</p>
|
||||
|
||||
<div className="flex gap-2 flex-wrap">
|
||||
<button
|
||||
type="button"
|
||||
onClick={showConnection}
|
||||
disabled={busy !== null}
|
||||
className="px-3 py-1.5 bg-surface-card hover:bg-surface-card text-xs rounded text-ink-mid disabled:opacity-30 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60"
|
||||
>
|
||||
{busy === "show" ? "Loading…" : "Show connection info"}
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setConfirmRotate(true)}
|
||||
disabled={busy !== null}
|
||||
className="px-3 py-1.5 bg-red-900/30 hover:bg-red-900/50 border border-red-800/60 text-xs rounded text-bad disabled:opacity-30 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-red-600/60"
|
||||
>
|
||||
{busy === "rotate" ? "Rotating…" : "Rotate credentials"}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<div className="mt-2 px-2 py-1 bg-red-900/30 border border-red-800 rounded text-[10px] text-bad">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<Dialog.Root open={confirmRotate} onOpenChange={setConfirmRotate}>
|
||||
<Dialog.Portal>
|
||||
<Dialog.Overlay className="fixed inset-0 bg-black/60 z-50" />
|
||||
<Dialog.Content className="fixed left-1/2 top-1/2 z-50 w-[min(440px,92vw)] -translate-x-1/2 -translate-y-1/2 rounded-xl bg-surface-sunken border border-line p-5 shadow-2xl">
|
||||
<Dialog.Title className="text-sm font-medium text-ink mb-2">
|
||||
Rotate workspace credentials?
|
||||
</Dialog.Title>
|
||||
<Dialog.Description className="text-xs text-ink-mid mb-4 leading-relaxed">
|
||||
This will mint a new <code className="font-mono">workspace_auth_token</code> and{' '}
|
||||
<strong>immediately invalidate the current one</strong>. Your external
|
||||
agent will start failing authentication on its next heartbeat
|
||||
until you redeploy it with the new token.
|
||||
</Dialog.Description>
|
||||
<div className="flex justify-end gap-2">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setConfirmRotate(false)}
|
||||
className="px-3 py-1.5 bg-surface-card text-xs rounded text-ink-mid"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={doRotate}
|
||||
className="px-3 py-1.5 bg-red-700 hover:bg-red-600 text-xs rounded text-white"
|
||||
>
|
||||
Rotate
|
||||
</button>
|
||||
</div>
|
||||
</Dialog.Content>
|
||||
</Dialog.Portal>
|
||||
</Dialog.Root>
|
||||
|
||||
<ExternalConnectModal info={info} onClose={() => setInfo(null)} />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -10,6 +10,7 @@ interface Props {
|
||||
interface MemoryEntry {
|
||||
key: string;
|
||||
value: unknown;
|
||||
version?: number;
|
||||
expires_at: string | null;
|
||||
updated_at: string;
|
||||
}
|
||||
@@ -28,6 +29,10 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
const [newValue, setNewValue] = useState("");
|
||||
const [newTTL, setNewTTL] = useState("");
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [editingKey, setEditingKey] = useState<string | null>(null);
|
||||
const [editValue, setEditValue] = useState("");
|
||||
const [editTTL, setEditTTL] = useState("");
|
||||
const [editError, setEditError] = useState<string | null>(null);
|
||||
|
||||
const awarenessUrl = useMemo(() => {
|
||||
try {
|
||||
@@ -109,6 +114,69 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
}
|
||||
};
|
||||
|
||||
const beginEdit = (entry: MemoryEntry) => {
|
||||
setEditError(null);
|
||||
setEditingKey(entry.key);
|
||||
// Stringify objects/arrays as pretty JSON; render plain strings raw so the
|
||||
// editor doesn't surprise users with surrounding quotes.
|
||||
setEditValue(
|
||||
typeof entry.value === "string"
|
||||
? entry.value
|
||||
: JSON.stringify(entry.value, null, 2),
|
||||
);
|
||||
if (entry.expires_at) {
|
||||
const remainingMs = new Date(entry.expires_at).getTime() - Date.now();
|
||||
const ttl = Math.max(0, Math.floor(remainingMs / 1000));
|
||||
setEditTTL(ttl > 0 ? String(ttl) : "");
|
||||
} else {
|
||||
setEditTTL("");
|
||||
}
|
||||
};
|
||||
|
||||
const cancelEdit = () => {
|
||||
setEditingKey(null);
|
||||
setEditValue("");
|
||||
setEditTTL("");
|
||||
setEditError(null);
|
||||
};
|
||||
|
||||
const handleEditSave = async (entry: MemoryEntry) => {
|
||||
setEditError(null);
|
||||
|
||||
let parsedValue: unknown;
|
||||
try {
|
||||
parsedValue = JSON.parse(editValue);
|
||||
} catch {
|
||||
parsedValue = editValue;
|
||||
}
|
||||
|
||||
// if_match_version closes the silent-overwrite hole when two writers
|
||||
// race. The handler returns 409 with the current version on mismatch
|
||||
// — surface that as a retry hint and reload to pick up the new state.
|
||||
const body: Record<string, unknown> = { key: entry.key, value: parsedValue };
|
||||
if (typeof entry.version === "number") {
|
||||
body.if_match_version = entry.version;
|
||||
}
|
||||
if (editTTL) {
|
||||
const ttl = parseInt(editTTL);
|
||||
if (!Number.isNaN(ttl) && ttl > 0) body.ttl_seconds = ttl;
|
||||
}
|
||||
|
||||
try {
|
||||
await api.post(`/workspaces/${workspaceId}/memory`, body);
|
||||
cancelEdit();
|
||||
loadMemory();
|
||||
} catch (e) {
|
||||
const message = e instanceof Error ? e.message : "Failed to save";
|
||||
if (message.includes("409") || /if_match_version mismatch/i.test(message)) {
|
||||
setEditError("This entry changed since you opened it. Reloading.");
|
||||
loadMemory();
|
||||
} else {
|
||||
setEditError(message);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const openAwareness = () => {
|
||||
window.open(awarenessUrl, "_blank", "noopener,noreferrer");
|
||||
};
|
||||
@@ -308,24 +376,71 @@ export function MemoryTab({ workspaceId }: Props) {
|
||||
|
||||
{expanded === entry.key && (
|
||||
<div className="px-3 pb-2 space-y-2">
|
||||
<pre className="text-[10px] text-ink-mid bg-surface-sunken rounded p-2 overflow-x-auto max-h-40">
|
||||
{JSON.stringify(entry.value, null, 2)}
|
||||
</pre>
|
||||
{editingKey === entry.key ? (
|
||||
<div className="space-y-2">
|
||||
<textarea
|
||||
value={editValue}
|
||||
onChange={(e) => setEditValue(e.target.value)}
|
||||
rows={4}
|
||||
aria-label={`Edit value for ${entry.key}`}
|
||||
className="w-full bg-surface-sunken border border-line rounded px-2 py-1 text-xs font-mono text-ink focus:outline-none focus:border-accent resize-none"
|
||||
/>
|
||||
<input
|
||||
value={editTTL}
|
||||
onChange={(e) => setEditTTL(e.target.value)}
|
||||
placeholder="TTL in seconds (blank = no expiry)"
|
||||
aria-label={`Edit TTL for ${entry.key}`}
|
||||
className="w-full bg-surface-sunken border border-line rounded px-2 py-1 text-xs text-ink focus:outline-none focus:border-accent"
|
||||
/>
|
||||
{editError && (
|
||||
<div role="alert" className="text-[10px] text-bad">
|
||||
{editError}
|
||||
</div>
|
||||
)}
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleEditSave(entry)}
|
||||
className="px-3 py-1 bg-accent hover:bg-accent-strong text-xs rounded text-white"
|
||||
>
|
||||
Save
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={cancelEdit}
|
||||
className="px-3 py-1 bg-surface-card hover:bg-surface-elevated text-xs rounded text-ink-mid"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<pre className="text-[10px] text-ink-mid bg-surface-sunken rounded p-2 overflow-x-auto max-h-40">
|
||||
{JSON.stringify(entry.value, null, 2)}
|
||||
</pre>
|
||||
)}
|
||||
<div className="flex items-center justify-between">
|
||||
<span className="text-[9px] text-ink-soft">
|
||||
Updated: {new Date(entry.updated_at).toLocaleString()}
|
||||
</span>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleDelete(entry.key)}
|
||||
// hover:text-bad on top of text-bad was a no-op.
|
||||
// Switch to a hover bg + focus-visible ring so
|
||||
// the destructive button visibly responds and
|
||||
// keyboard users see focus.
|
||||
className="text-[10px] text-bad hover:bg-red-950/40 rounded px-1 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-red-500/60"
|
||||
>
|
||||
Delete
|
||||
</button>
|
||||
<div className="flex items-center gap-2">
|
||||
{editingKey !== entry.key && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => beginEdit(entry)}
|
||||
className="text-[10px] text-ink-mid hover:bg-surface-elevated rounded px-1 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/60"
|
||||
>
|
||||
Edit
|
||||
</button>
|
||||
)}
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleDelete(entry.key)}
|
||||
className="text-[10px] text-bad hover:bg-red-950/40 rounded px-1 transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-red-500/60"
|
||||
>
|
||||
Delete
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -1,16 +1,105 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useRef, useState, useCallback } from "react";
|
||||
import type { WorkspaceNodeData } from "@/store/canvas";
|
||||
|
||||
interface Props {
|
||||
workspaceId: string;
|
||||
/** Workspace metadata from the canvas store. Optional for back-compat
|
||||
* with any caller that still mounts <TerminalTab workspaceId=... />
|
||||
* without threading data through (e.g. tests). When present, the
|
||||
* runtime field gates the early-return below. */
|
||||
data?: WorkspaceNodeData;
|
||||
}
|
||||
|
||||
import { deriveWsBaseUrl } from "@/lib/ws-url";
|
||||
|
||||
const WS_URL = deriveWsBaseUrl();
|
||||
|
||||
export function TerminalTab({ workspaceId }: Props) {
|
||||
/**
|
||||
* NotAvailablePanel — full-tab placeholder with a big terminal-off icon
|
||||
* for runtimes that don't expose a TTY (e.g. external workspaces, where
|
||||
* the platform doesn't own the process). Pre-fix the tab tried to open
|
||||
* a WebSocket against /ws/terminal/<id> for these workspaces, the server
|
||||
* 404'd, and the user saw "Connection failed" — which reads as a bug,
|
||||
* not as "this runtime intentionally has no shell". This banner makes
|
||||
* the absence intentional.
|
||||
*/
|
||||
function NotAvailablePanel({ runtime }: { runtime: string }) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center h-full p-8 text-center bg-surface-sunken/30">
|
||||
{/* Big terminal-off icon — bracket "[_]" with a slash through it.
|
||||
Custom inline SVG so we don't depend on an icon set being
|
||||
present at canvas build-time. */}
|
||||
<svg
|
||||
width="72"
|
||||
height="72"
|
||||
viewBox="0 0 72 72"
|
||||
fill="none"
|
||||
aria-hidden="true"
|
||||
className="text-ink-soft mb-4"
|
||||
>
|
||||
<rect
|
||||
x="10"
|
||||
y="14"
|
||||
width="52"
|
||||
height="44"
|
||||
rx="4"
|
||||
stroke="currentColor"
|
||||
strokeWidth="2.5"
|
||||
fill="none"
|
||||
opacity="0.6"
|
||||
/>
|
||||
<path
|
||||
d="M22 30 L30 36 L22 42"
|
||||
stroke="currentColor"
|
||||
strokeWidth="2.5"
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
opacity="0.7"
|
||||
/>
|
||||
<path
|
||||
d="M34 44 L44 44"
|
||||
stroke="currentColor"
|
||||
strokeWidth="2.5"
|
||||
strokeLinecap="round"
|
||||
opacity="0.7"
|
||||
/>
|
||||
{/* Diagonal cancel slash */}
|
||||
<path
|
||||
d="M14 14 L58 58"
|
||||
stroke="currentColor"
|
||||
strokeWidth="3"
|
||||
strokeLinecap="round"
|
||||
/>
|
||||
</svg>
|
||||
<h3 className="text-sm font-medium text-ink mb-1.5">Terminal not available</h3>
|
||||
<p className="text-[11px] text-ink-soft max-w-xs leading-relaxed">
|
||||
This workspace runs the{" "}
|
||||
<span className="font-mono text-ink-mid">{runtime}</span> runtime,
|
||||
which doesn't expose a shell. Use the Chat tab to interact with the
|
||||
agent directly.
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/** Runtimes that don't expose a TTY. Keep narrow — only add a runtime
|
||||
* here when its provisioner genuinely has no shell endpoint, otherwise
|
||||
* the user loses access to a real debugging surface. */
|
||||
const RUNTIMES_WITHOUT_TERMINAL = new Set(["external"]);
|
||||
|
||||
export function TerminalTab({ workspaceId, data }: Props) {
|
||||
// Early-return for runtimes that have no shell. Skips the entire
|
||||
// xterm + WebSocket dance below — without this, mounting the tab
|
||||
// for an external workspace pops the WS, gets a 404 from the
|
||||
// workspace-server (no /ws/terminal/<id> route registered for it),
|
||||
// and shows "Connection failed" with a Reconnect button — confusing
|
||||
// because the workspace IS healthy, just doesn't have a TTY.
|
||||
if (data && RUNTIMES_WITHOUT_TERMINAL.has(data.runtime)) {
|
||||
return <NotAvailablePanel runtime={data.runtime} />;
|
||||
}
|
||||
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const termRef = useRef<{ dispose: () => void } | null>(null);
|
||||
const wsRef = useRef<WebSocket | null>(null);
|
||||
|
||||
@@ -0,0 +1,141 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Pins two regressions reported on production 2026-05-05:
|
||||
//
|
||||
// 1. IME composition + Enter key: typing Chinese (or any CJK / IME-
|
||||
// composed text) and pressing Enter to commit the candidate
|
||||
// selection used to send the half-typed message. The fix checks
|
||||
// `event.nativeEvent.isComposing` (and a `keyCode === 229`
|
||||
// fallback for older WebKit) before treating Enter as send.
|
||||
//
|
||||
// 2. Markdown link clicks: the agent's ReactMarkdown-rendered links
|
||||
// used to:
|
||||
// - http/https → navigate canvas tab away (user lost canvas state)
|
||||
// - workspace://path / file:///workspace/... / /workspace/... →
|
||||
// browser hit about:blank (unhandled protocol).
|
||||
// Fix: external links get target="_blank" + noopener; in-container
|
||||
// paths route through downloadChatFile (same auth path as chips).
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, fireEvent, waitFor } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
// Mock the api module so render doesn't try to talk to a real CP.
|
||||
const apiGet = vi.fn((_path: string): Promise<unknown> => Promise.resolve([]));
|
||||
const apiPost = vi.fn((_path: string, _body: unknown): Promise<unknown> => Promise.resolve({}));
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
post: (path: string, body: unknown) => apiPost(path, body),
|
||||
del: vi.fn(),
|
||||
patch: vi.fn(),
|
||||
put: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: vi.fn((selector?: (s: unknown) => unknown) =>
|
||||
selector ? selector({ agentMessages: {}, consumeAgentMessages: () => [] }) : {},
|
||||
),
|
||||
}));
|
||||
|
||||
// Capture the downloadChatFile call so the markdown-link test can
|
||||
// assert in-container paths route through the authenticated download
|
||||
// path rather than the browser's bare anchor click.
|
||||
const downloadChatFileMock = vi.fn((_workspaceId: string, _att: { uri: string; name: string }) => Promise.resolve());
|
||||
vi.mock("../chat/uploads", async () => {
|
||||
const actual = await vi.importActual<typeof import("../chat/uploads")>("../chat/uploads");
|
||||
return {
|
||||
...actual,
|
||||
downloadChatFile: (workspaceId: string, att: { uri: string; name: string }) =>
|
||||
downloadChatFileMock(workspaceId, att),
|
||||
};
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockClear();
|
||||
apiPost.mockClear();
|
||||
downloadChatFileMock.mockClear();
|
||||
// jsdom doesn't implement scrollIntoView; ChatTab calls it after
|
||||
// every render with a new message.
|
||||
Element.prototype.scrollIntoView = vi.fn();
|
||||
// Stub IntersectionObserver — the lazy-history sentinel uses it.
|
||||
class FakeIO {
|
||||
observe() {}
|
||||
unobserve() {}
|
||||
disconnect() {}
|
||||
}
|
||||
(window as unknown as { IntersectionObserver: unknown }).IntersectionObserver = FakeIO;
|
||||
(globalThis as unknown as { IntersectionObserver: unknown }).IntersectionObserver = FakeIO;
|
||||
});
|
||||
|
||||
import { ChatTab } from "../ChatTab";
|
||||
|
||||
const minimalData = {
|
||||
status: "online" as const,
|
||||
runtime: "claude-code",
|
||||
currentTask: null,
|
||||
} as unknown as Parameters<typeof ChatTab>[0]["data"];
|
||||
|
||||
describe("ChatTab — IME-safe Enter key", () => {
|
||||
it("does NOT send the message when Enter fires during IME composition (isComposing)", async () => {
|
||||
render(<ChatTab workspaceId="ws-ime" data={minimalData} />);
|
||||
|
||||
// Find the textarea by its aria-label.
|
||||
const textarea = await screen.findByLabelText(/Message to agent/i);
|
||||
fireEvent.change(textarea, { target: { value: "你好" } });
|
||||
|
||||
// Simulate the Enter that commits an IME selection: isComposing=true.
|
||||
fireEvent.keyDown(textarea, { key: "Enter", isComposing: true });
|
||||
|
||||
// sendMessage POSTs via api.post; assert it was NOT called.
|
||||
await waitFor(() => {
|
||||
expect(apiPost).not.toHaveBeenCalled();
|
||||
});
|
||||
// And the input is preserved — ChatTab clears it only on actual send.
|
||||
expect((textarea as HTMLTextAreaElement).value).toBe("你好");
|
||||
});
|
||||
|
||||
it("does NOT send when keyCode is 229 (older Safari IME fallback)", async () => {
|
||||
render(<ChatTab workspaceId="ws-ime2" data={minimalData} />);
|
||||
const textarea = await screen.findByLabelText(/Message to agent/i);
|
||||
fireEvent.change(textarea, { target: { value: "한국어" } });
|
||||
|
||||
// keyCode 229 is the older-Safari signal that an IME is composing.
|
||||
// Some mobile WebKit-based browsers delay setting isComposing on
|
||||
// the composition-end Enter; the keyCode fallback covers that.
|
||||
fireEvent.keyDown(textarea, { key: "Enter", keyCode: 229 });
|
||||
|
||||
await waitFor(() => {
|
||||
expect(apiPost).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
it("DOES send on a non-composing Enter (the happy path stays intact)", async () => {
|
||||
render(<ChatTab workspaceId="ws-ok" data={minimalData} />);
|
||||
const textarea = await screen.findByLabelText(/Message to agent/i);
|
||||
fireEvent.change(textarea, { target: { value: "hello world" } });
|
||||
|
||||
fireEvent.keyDown(textarea, { key: "Enter" /* no isComposing, no 229 */ });
|
||||
|
||||
// The api.post for /a2a fires inside sendMessage. waitFor since
|
||||
// the call goes through several effects.
|
||||
await waitFor(() => {
|
||||
expect(apiPost).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
it("Shift+Enter inserts newline regardless (no send)", async () => {
|
||||
render(<ChatTab workspaceId="ws-shift" data={minimalData} />);
|
||||
const textarea = await screen.findByLabelText(/Message to agent/i);
|
||||
fireEvent.change(textarea, { target: { value: "line 1" } });
|
||||
|
||||
fireEvent.keyDown(textarea, { key: "Enter", shiftKey: true });
|
||||
|
||||
await waitFor(() => {
|
||||
expect(apiPost).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,340 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Pins the lazy-loading chat-history pagination added 2026-05-05.
|
||||
//
|
||||
// Pre-fix: ChatTab fetched the newest 50 messages on every mount and
|
||||
// scrolled to bottom, paying full DOM cost up-front even when the user
|
||||
// only wanted to read the last few bubbles. Post-fix: initial load is
|
||||
// bounded to 10 newest, and an IntersectionObserver on a top sentinel
|
||||
// triggers loadOlder() (batch of 20 with `before_ts` cursor) when the
|
||||
// user scrolls up.
|
||||
//
|
||||
// Pinned branches:
|
||||
// 1. Initial fetch carries `limit=10` and NO before_ts (newest-first
|
||||
// slice). Pre-fix this was limit=50.
|
||||
// 2. Server returning fewer than `limit` rows clears `hasMore` so the
|
||||
// top sentinel is removed and the IO observer disconnects — no
|
||||
// "Loading older messages…" spinner on a short conversation.
|
||||
// 3. Server returning exactly `limit` rows on the first batch keeps
|
||||
// hasMore=true so the sentinel mounts (verified indirectly by
|
||||
// asserting the rendered bubble count matches the full page).
|
||||
// 4. The retry button after a failed initial load uses the same
|
||||
// INITIAL_HISTORY_LIMIT (10), not the legacy 50.
|
||||
//
|
||||
// IntersectionObserver / scroll-anchor restoration is exercised by the
|
||||
// E2E synth-canary suite — pinning it in jsdom would require mocking
|
||||
// the observer and faking layout, which is brittler than trusting a
|
||||
// live-DOM canary against the staging tenant.
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
// Both ChatTab sub-panels (MyChat + AgentComms) mount simultaneously so
|
||||
// keyboard tab order and aria-controls land on a real DOM. Both fire
|
||||
// /activity GETs on mount: MyChat's hits `type=a2a_receive&source=canvas`,
|
||||
// AgentComms's hits a different filter. Route the mock by URL so each
|
||||
// gets a sensible default and only MyChat's call is what the assertions
|
||||
// scrutinise.
|
||||
const myChatActivityCalls: string[] = [];
|
||||
let myChatNextResponse: { ok: true; rows: unknown[] } | { ok: false; err: Error } = {
|
||||
ok: true,
|
||||
rows: [],
|
||||
};
|
||||
const apiGet = vi.fn((path: string): Promise<unknown> => {
|
||||
if (path.includes("type=a2a_receive") && path.includes("source=canvas")) {
|
||||
myChatActivityCalls.push(path);
|
||||
if (myChatNextResponse.ok) return Promise.resolve(myChatNextResponse.rows);
|
||||
return Promise.reject(myChatNextResponse.err);
|
||||
}
|
||||
// AgentComms / heartbeat / anything else — empty array is a safe
|
||||
// default that won't blow up the corresponding component's .then().
|
||||
return Promise.resolve([]);
|
||||
});
|
||||
const apiPost = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
post: (path: string, body: unknown) => apiPost(path, body),
|
||||
del: vi.fn(),
|
||||
patch: vi.fn(),
|
||||
put: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: vi.fn((selector?: (s: unknown) => unknown) =>
|
||||
selector ? selector({ agentMessages: {}, consumeAgentMessages: () => [] }) : {},
|
||||
),
|
||||
}));
|
||||
|
||||
// Capture IntersectionObserver instances so tests can drive callbacks
|
||||
// directly (jsdom has no layout, so nothing crosses thresholds on its
|
||||
// own) AND assert observer-instance count to pin the perf invariant
|
||||
// that live-message churn doesn't tear down + re-arm the observer.
|
||||
type IOInstance = {
|
||||
callback: IntersectionObserverCallback;
|
||||
observed: Element[];
|
||||
disconnected: boolean;
|
||||
};
|
||||
const ioInstances: IOInstance[] = [];
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockClear();
|
||||
apiPost.mockReset();
|
||||
myChatActivityCalls.length = 0;
|
||||
myChatNextResponse = { ok: true, rows: [] };
|
||||
ioInstances.length = 0;
|
||||
class FakeIO {
|
||||
private inst: IOInstance;
|
||||
constructor(cb: IntersectionObserverCallback) {
|
||||
this.inst = { callback: cb, observed: [], disconnected: false };
|
||||
ioInstances.push(this.inst);
|
||||
}
|
||||
observe(el: Element) {
|
||||
this.inst.observed.push(el);
|
||||
}
|
||||
unobserve() {}
|
||||
disconnect() {
|
||||
this.inst.disconnected = true;
|
||||
}
|
||||
}
|
||||
// Install on every reachable global — different bundlers / module
|
||||
// graphs can resolve `IntersectionObserver` via `window`, `globalThis`,
|
||||
// or the bare global. Without all three, jsdom's own (pre-existing)
|
||||
// stub silently wins and ioInstances stays empty.
|
||||
(window as unknown as { IntersectionObserver: unknown }).IntersectionObserver = FakeIO;
|
||||
(globalThis as unknown as { IntersectionObserver: unknown }).IntersectionObserver = FakeIO;
|
||||
// jsdom doesn't implement scrollIntoView; ChatTab calls it after every
|
||||
// messages update.
|
||||
Element.prototype.scrollIntoView = vi.fn();
|
||||
});
|
||||
|
||||
function triggerIntersection(instanceIdx = -1) {
|
||||
// -1 → the latest observer (the live one). Tests targeting an old
|
||||
// (disconnected) instance pass a positive index.
|
||||
const inst = ioInstances.at(instanceIdx);
|
||||
if (!inst) throw new Error(`no IO instance at ${instanceIdx}`);
|
||||
inst.callback(
|
||||
[{ isIntersecting: true, target: inst.observed[0] } as IntersectionObserverEntry],
|
||||
inst as unknown as IntersectionObserver,
|
||||
);
|
||||
}
|
||||
|
||||
import { ChatTab } from "../ChatTab";
|
||||
|
||||
function makeActivityRow(seq: number): Record<string, unknown> {
|
||||
// Zero-pad seq into the minute slot so "seq=10" doesn't produce
|
||||
// the invalid timestamp "00:010:00Z" (caught by the loadOlder URL
|
||||
// assertion below — first version of the helper used `0${seq}` and
|
||||
// the test failed on `before_ts` having an extra digit).
|
||||
const mm = String(seq).padStart(2, "0");
|
||||
return {
|
||||
activity_type: "a2a_receive",
|
||||
status: "ok",
|
||||
created_at: `2026-05-05T00:${mm}:00Z`,
|
||||
request_body: { params: { message: { parts: [{ kind: "text", text: `user msg ${seq}` }] } } },
|
||||
response_body: { result: `agent reply ${seq}` },
|
||||
};
|
||||
}
|
||||
|
||||
// Server returns newest-first; the helper builds a server-shape page
|
||||
// so the order in the rendered messages array matches production.
|
||||
function newestFirstPage(start: number, count: number): unknown[] {
|
||||
return Array.from({ length: count }, (_, i) => makeActivityRow(start + count - 1 - i));
|
||||
}
|
||||
|
||||
const minimalData = {
|
||||
status: "online" as const,
|
||||
runtime: "claude-code",
|
||||
currentTask: null,
|
||||
} as unknown as Parameters<typeof ChatTab>[0]["data"];
|
||||
|
||||
describe("ChatTab lazy history pagination", () => {
|
||||
it("initial fetch carries limit=10 (not the legacy 50)", async () => {
|
||||
myChatNextResponse = { ok: true, rows: [makeActivityRow(1)] };
|
||||
render(<ChatTab workspaceId="ws-1" data={minimalData} />);
|
||||
await waitFor(() => expect(myChatActivityCalls.length).toBe(1));
|
||||
const url = myChatActivityCalls[0];
|
||||
expect(url).toContain("limit=10");
|
||||
expect(url).not.toContain("limit=50");
|
||||
// before_ts should NOT be set on the initial fetch — that's the
|
||||
// newest-first slice the user lands on.
|
||||
expect(url).not.toContain("before_ts");
|
||||
});
|
||||
|
||||
it("hides the top sentinel when initial fetch returns fewer than the limit", async () => {
|
||||
// 3 < 10 → server says "no more older history exists"; sentinel
|
||||
// should NOT mount and the "Loading older messages…" line should
|
||||
// never appear (it can't, since the sentinel is what triggers it).
|
||||
myChatNextResponse = {
|
||||
ok: true,
|
||||
rows: [makeActivityRow(1), makeActivityRow(2), makeActivityRow(3)],
|
||||
};
|
||||
render(<ChatTab workspaceId="ws-2" data={minimalData} />);
|
||||
await waitFor(() => expect(myChatActivityCalls.length).toBe(1));
|
||||
await waitFor(() => {
|
||||
expect(screen.queryByText(/Loading chat history/i)).toBeNull();
|
||||
});
|
||||
expect(screen.queryByText(/Loading older messages/i)).toBeNull();
|
||||
});
|
||||
|
||||
it("renders all messages when initial fetch returns exactly the limit", async () => {
|
||||
// 10 == limit → server might have more older rows; sentinel SHOULD
|
||||
// mount so the IO observer can fire loadOlder() on scroll-up. We
|
||||
// verify by checking the rendered bubble count — if hasMore stayed
|
||||
// true the sentinel render path doesn't crash and all 10 rows
|
||||
// produced their pair of bubbles.
|
||||
const fullPage = Array.from({ length: 10 }, (_, i) => makeActivityRow(i + 1));
|
||||
myChatNextResponse = { ok: true, rows: fullPage };
|
||||
render(<ChatTab workspaceId="ws-3" data={minimalData} />);
|
||||
await waitFor(() => expect(myChatActivityCalls.length).toBe(1));
|
||||
await waitFor(() => {
|
||||
expect(screen.queryByText(/Loading chat history/i)).toBeNull();
|
||||
});
|
||||
expect(screen.getAllByText(/user msg/).length).toBe(10);
|
||||
expect(screen.getAllByText(/agent reply/).length).toBe(10);
|
||||
});
|
||||
|
||||
it("retry-after-failure uses limit=10, not the legacy 50", async () => {
|
||||
myChatNextResponse = { ok: false, err: new Error("network down") };
|
||||
render(<ChatTab workspaceId="ws-4" data={minimalData} />);
|
||||
const retry = await screen.findByText(/Retry/);
|
||||
myChatNextResponse = { ok: true, rows: [makeActivityRow(1)] };
|
||||
fireEvent.click(retry);
|
||||
await waitFor(() => expect(myChatActivityCalls.length).toBe(2));
|
||||
const retryUrl = myChatActivityCalls[1];
|
||||
expect(retryUrl).toContain("limit=10");
|
||||
expect(retryUrl).not.toContain("limit=50");
|
||||
});
|
||||
|
||||
it("loadOlder fetches limit=20 with before_ts=oldest.timestamp", async () => {
|
||||
// Initial page = 10 rows in newest-first order (seq 10..1). After
|
||||
// the component reverses to oldest-first for display, messages[0]
|
||||
// is built from seq=1 — the oldest — and its timestamp is what
|
||||
// before_ts should carry.
|
||||
myChatNextResponse = { ok: true, rows: newestFirstPage(1, 10) };
|
||||
render(<ChatTab workspaceId="ws-load-older" data={minimalData} />);
|
||||
await waitFor(() => expect(myChatActivityCalls.length).toBe(1));
|
||||
await waitFor(() => expect(ioInstances.length).toBeGreaterThan(0));
|
||||
|
||||
// Stage the older-batch response, then fire the IO callback.
|
||||
myChatNextResponse = { ok: true, rows: newestFirstPage(0, 1) };
|
||||
triggerIntersection();
|
||||
|
||||
await waitFor(() => expect(myChatActivityCalls.length).toBe(2));
|
||||
const olderUrl = myChatActivityCalls[1];
|
||||
expect(olderUrl).toContain("limit=20");
|
||||
expect(olderUrl).toContain("before_ts=");
|
||||
expect(decodeURIComponent(olderUrl)).toContain("before_ts=2026-05-05T00:01:00Z");
|
||||
});
|
||||
|
||||
it("inflight guard rejects a second IO trigger while first loadOlder is in flight", async () => {
|
||||
myChatNextResponse = { ok: true, rows: newestFirstPage(1, 10) };
|
||||
render(<ChatTab workspaceId="ws-inflight" data={minimalData} />);
|
||||
await waitFor(() => expect(myChatActivityCalls.length).toBe(1));
|
||||
await waitFor(() => expect(ioInstances.length).toBeGreaterThan(0));
|
||||
|
||||
// Hold the next loadOlder fetch open with a manual deferred so we
|
||||
// can fire the second trigger while the first is in-flight.
|
||||
let release!: (rows: unknown[]) => void;
|
||||
const deferred = new Promise<unknown[]>((res) => {
|
||||
release = res;
|
||||
});
|
||||
apiGet.mockImplementationOnce((path: string): Promise<unknown> => {
|
||||
myChatActivityCalls.push(path);
|
||||
return deferred;
|
||||
});
|
||||
|
||||
triggerIntersection(); // start loadOlder #1
|
||||
await waitFor(() => expect(myChatActivityCalls.length).toBe(2));
|
||||
|
||||
// Second IO trigger lands while #1 is still pending.
|
||||
triggerIntersection();
|
||||
triggerIntersection();
|
||||
triggerIntersection();
|
||||
// Without the inflight guard, each of these would have started a
|
||||
// new fetch. With the guard, none of them do — call count stays 2.
|
||||
await new Promise((r) => setTimeout(r, 10));
|
||||
expect(myChatActivityCalls.length).toBe(2);
|
||||
|
||||
// Release the first fetch. Inflight clears in the finally block;
|
||||
// a subsequent IO trigger is permitted again (verified by checking
|
||||
// we can fire a follow-up after release without hanging the test).
|
||||
release([]);
|
||||
await waitFor(() => expect(myChatActivityCalls.length).toBe(2));
|
||||
});
|
||||
|
||||
it("empty older response clears the scroll anchor and unmounts the sentinel", async () => {
|
||||
// The bug we're pinning: if loadOlder returns 0 rows, the
|
||||
// scrollAnchorRef must be cleared so the next paint doesn't try to
|
||||
// restore against a no-op prepend (which would fight the natural
|
||||
// bottom-pin for any subsequent live message). hasMore flipping to
|
||||
// false is the same flag-flip path; sentinel disappearing is the
|
||||
// observable proxy.
|
||||
myChatNextResponse = { ok: true, rows: newestFirstPage(1, 10) };
|
||||
render(<ChatTab workspaceId="ws-anchor" data={minimalData} />);
|
||||
await waitFor(() => expect(myChatActivityCalls.length).toBe(1));
|
||||
await waitFor(() => expect(ioInstances.length).toBeGreaterThan(0));
|
||||
|
||||
myChatNextResponse = { ok: true, rows: [] }; // empty → reachedEnd
|
||||
triggerIntersection();
|
||||
await waitFor(() => expect(myChatActivityCalls.length).toBe(2));
|
||||
|
||||
// After reachedEnd the sentinel unmounts (hasMore=false). We can't
|
||||
// peek scrollAnchorRef directly, but we can assert the consequence:
|
||||
// scrollIntoView (the bottom-pin for live appends) is not blocked
|
||||
// by a stale anchor. Trigger a re-render via an unrelated state
|
||||
// change… in practice the safest assertion here is that the
|
||||
// sentinel disappeared (proving the empty response propagated to
|
||||
// hasMore correctly, which is the same flag-flip path as anchor
|
||||
// clearing).
|
||||
await waitFor(() => {
|
||||
expect(screen.queryByText(/Loading older messages/i)).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
it("IntersectionObserver does not churn when older messages prepend", async () => {
|
||||
// Whole-PR perf invariant: prepending older history (the load-bearing
|
||||
// user gesture) must NOT tear down + re-arm the IO observer.
|
||||
// Triggering loadOlder is the cleanest way to drive a messages
|
||||
// mutation from inside the test, since live agent push goes through
|
||||
// a Zustand store that's harder to drive reliably from jsdom.
|
||||
//
|
||||
// Pre-fix, loadOlder depended on `messages`, so every prepend
|
||||
// recreated loadOlder → re-ran the IO effect → new observer. Each
|
||||
// call to triggerIntersection() produced a fresh disconnected
|
||||
// observer + a new live one. Post-fix, the observer survives.
|
||||
myChatNextResponse = { ok: true, rows: newestFirstPage(1, 10) };
|
||||
render(<ChatTab workspaceId="ws-stable-io" data={minimalData} />);
|
||||
await waitFor(() => expect(myChatActivityCalls.length).toBe(1));
|
||||
await waitFor(() => expect(ioInstances.length).toBeGreaterThan(0));
|
||||
|
||||
// Snapshot the observer instance after first paint stabilises.
|
||||
const observerBefore = ioInstances.at(-1);
|
||||
expect(observerBefore).toBeDefined();
|
||||
expect(observerBefore!.disconnected).toBe(false);
|
||||
|
||||
// Trigger three older-batch prepends. Each batch returns the full
|
||||
// OLDER_HISTORY_BATCH (20 rows) so reachedEnd stays false and the
|
||||
// sentinel keeps mounting. Pre-fix, each prepend mutated `messages`
|
||||
// → recreated loadOlder → re-ran the IO effect → new observer.
|
||||
for (let batch = 0; batch < 3; batch++) {
|
||||
myChatNextResponse = {
|
||||
ok: true,
|
||||
rows: newestFirstPage(-(batch + 1) * 20, 20),
|
||||
};
|
||||
const callsBefore = myChatActivityCalls.length;
|
||||
triggerIntersection();
|
||||
await waitFor(() =>
|
||||
expect(myChatActivityCalls.length).toBe(callsBefore + 1),
|
||||
);
|
||||
}
|
||||
|
||||
// The original observer is still the live one — no churn.
|
||||
expect(observerBefore!.disconnected).toBe(false);
|
||||
expect(ioInstances.at(-1)).toBe(observerBefore);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,125 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Regression tests for the ConfigTab section restructure (user feedback
|
||||
// 2026-05-04: "Skills and Tools are having their own tab as plugin, and
|
||||
// Prompt Files are in the file system which can be directly edited. Am
|
||||
// I missing something?" + "Tools should be merged into plugin then, and
|
||||
// for prompt files... should be in another section than in skill& tools").
|
||||
//
|
||||
// What this pins:
|
||||
// 1. The "Skills & Tools" section title is gone.
|
||||
// 2. Editable Skills + Tools tag inputs are gone (managed elsewhere).
|
||||
// 3. A dedicated "Prompt Files" section exists with explanatory text.
|
||||
//
|
||||
// If a future PR re-adds the Skills/Tools tag inputs to ConfigTab, this
|
||||
// suite catches it.
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
const apiGet = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
patch: vi.fn(),
|
||||
put: vi.fn(),
|
||||
post: vi.fn(),
|
||||
del: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
const storeUpdateNodeData = vi.fn();
|
||||
const storeRestartWorkspace = vi.fn();
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: Object.assign(
|
||||
(selector: (s: unknown) => unknown) =>
|
||||
selector({ restartWorkspace: storeRestartWorkspace, updateNodeData: storeUpdateNodeData }),
|
||||
{
|
||||
getState: () => ({
|
||||
restartWorkspace: storeRestartWorkspace,
|
||||
updateNodeData: storeUpdateNodeData,
|
||||
}),
|
||||
},
|
||||
),
|
||||
}));
|
||||
|
||||
vi.mock("../AgentCardSection", () => ({
|
||||
AgentCardSection: () => <div data-testid="agent-card-stub" />,
|
||||
}));
|
||||
|
||||
import { ConfigTab } from "../ConfigTab";
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === `/workspaces/ws-test`) {
|
||||
return Promise.resolve({ runtime: "claude-code" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/model`) {
|
||||
return Promise.resolve({ model: "claude-opus-4-7" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/provider`) {
|
||||
return Promise.resolve({ provider: "anthropic-oauth", source: "default" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/files/config.yaml`) {
|
||||
return Promise.resolve({ content: "name: test\nruntime: claude-code\n" });
|
||||
}
|
||||
if (path === "/templates") {
|
||||
return Promise.resolve([
|
||||
{ id: "claude-code", name: "Claude Code", runtime: "claude-code", providers: [] },
|
||||
]);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
});
|
||||
|
||||
describe("ConfigTab section restructure", () => {
|
||||
it("does not render a 'Skills & Tools' section title", async () => {
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
await waitFor(() => expect(apiGet).toHaveBeenCalled());
|
||||
// Section button uses the title as its accessible name; should be absent.
|
||||
expect(screen.queryByRole("button", { name: /Skills\s*&\s*Tools/i })).toBeNull();
|
||||
});
|
||||
|
||||
it("does not render an editable Skills tag input", async () => {
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
await waitFor(() => expect(apiGet).toHaveBeenCalled());
|
||||
// TagList renders its label; check no input labelled "Skills" in the form.
|
||||
// (Skills are managed via the dedicated Skills tab.)
|
||||
const skillsLabels = screen
|
||||
.queryAllByText(/^Skills$/)
|
||||
.filter((el) => el.tagName.toLowerCase() === "label");
|
||||
expect(skillsLabels).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("does not render an editable Tools tag input", async () => {
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
await waitFor(() => expect(apiGet).toHaveBeenCalled());
|
||||
// Tools are managed via the Plugins tab — install a plugin → its tools
|
||||
// become available. No reason to type tool names here.
|
||||
const toolsLabels = screen
|
||||
.queryAllByText(/^Tools$/)
|
||||
.filter((el) => el.tagName.toLowerCase() === "label");
|
||||
expect(toolsLabels).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("renders a dedicated 'Prompt Files' section with explanatory copy", async () => {
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
await waitFor(() => expect(apiGet).toHaveBeenCalled());
|
||||
// Section is collapsed by default — find + expand first.
|
||||
const sectionButton = screen.getByRole("button", { name: /Prompt Files/i });
|
||||
expect(sectionButton).toBeTruthy();
|
||||
fireEvent.click(sectionButton);
|
||||
// Explanatory copy mentions system-prompt.md (split across <code> tags
|
||||
// so use textContent on any element rather than the default text matcher).
|
||||
await waitFor(() => {
|
||||
const matches = screen.queryAllByText((_, el) =>
|
||||
(el?.textContent || "").includes("system-prompt.md"),
|
||||
);
|
||||
expect(matches.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,156 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// ExternalConnectionSection — coverage for the credential-rotate +
|
||||
// re-show-instructions UI on the Config tab.
|
||||
//
|
||||
// What this pins:
|
||||
// 1. "Show connection info" → GET /external/connection, opens modal
|
||||
// with auth_token=""
|
||||
// 2. "Rotate credentials" → confirm dialog → POST /external/rotate,
|
||||
// opens modal with the returned auth_token
|
||||
// 3. Confirm dialog cancels without firing the POST
|
||||
// 4. API failure surfaces an error chip (no silent loss)
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import {
|
||||
render,
|
||||
screen,
|
||||
cleanup,
|
||||
fireEvent,
|
||||
waitFor,
|
||||
} from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
const apiGet = vi.fn();
|
||||
const apiPost = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
post: (path: string, body?: unknown) => apiPost(path, body),
|
||||
patch: vi.fn(),
|
||||
put: vi.fn(),
|
||||
del: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
import { ExternalConnectionSection } from "../ExternalConnectionSection";
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiPost.mockReset();
|
||||
});
|
||||
|
||||
const SAMPLE_INFO = {
|
||||
workspace_id: "ws-test",
|
||||
platform_url: "https://platform.example.test",
|
||||
auth_token: "",
|
||||
registry_endpoint: "https://platform.example.test/registry/register",
|
||||
heartbeat_endpoint: "https://platform.example.test/registry/heartbeat",
|
||||
// The modal stamps these snippets server-side; for the test we
|
||||
// bake workspace_id into one so the rendered DOM contains a
|
||||
// findable token after the modal mounts.
|
||||
curl_register_template: "# curl ws=ws-test",
|
||||
python_snippet: "# py ws=ws-test",
|
||||
claude_code_channel_snippet: "# claude ws=ws-test",
|
||||
universal_mcp_snippet: "# mcp ws=ws-test",
|
||||
hermes_channel_snippet: "# hermes ws=ws-test",
|
||||
codex_snippet: "# codex ws=ws-test",
|
||||
openclaw_snippet: "# openclaw ws=ws-test",
|
||||
};
|
||||
|
||||
describe("ExternalConnectionSection", () => {
|
||||
it("renders both action buttons", () => {
|
||||
render(<ExternalConnectionSection workspaceId="ws-test" />);
|
||||
expect(screen.getByRole("button", { name: /show connection info/i })).toBeTruthy();
|
||||
expect(screen.getByRole("button", { name: /rotate credentials/i })).toBeTruthy();
|
||||
});
|
||||
|
||||
it("'Show connection info' calls GET /external/connection and opens modal with blank token", async () => {
|
||||
apiGet.mockResolvedValue({ connection: { ...SAMPLE_INFO, auth_token: "" } });
|
||||
render(<ExternalConnectionSection workspaceId="ws-test" />);
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /show connection info/i }));
|
||||
|
||||
await waitFor(() =>
|
||||
expect(apiGet).toHaveBeenCalledWith("/workspaces/ws-test/external/connection"),
|
||||
);
|
||||
// The ExternalConnectModal renders the workspace_id field in its
|
||||
// copy-block. document.body covers Radix's portal mount point.
|
||||
await waitFor(() => {
|
||||
expect(document.body.textContent || "").toContain("ws-test");
|
||||
});
|
||||
});
|
||||
|
||||
it("'Rotate credentials' opens confirm dialog before firing POST", async () => {
|
||||
render(<ExternalConnectionSection workspaceId="ws-test" />);
|
||||
fireEvent.click(screen.getByRole("button", { name: /rotate credentials/i }));
|
||||
|
||||
// Confirm dialog appears with the destructive copy.
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByText(/Rotate workspace credentials\?/i),
|
||||
).toBeTruthy();
|
||||
});
|
||||
expect(screen.getByText(/immediately invalidate the current one/i)).toBeTruthy();
|
||||
|
||||
// POST must NOT have fired yet — only on confirm.
|
||||
expect(apiPost).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("Cancel in confirm dialog dismisses without rotating", async () => {
|
||||
render(<ExternalConnectionSection workspaceId="ws-test" />);
|
||||
fireEvent.click(screen.getByRole("button", { name: /rotate credentials/i }));
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.getByText(/Rotate workspace credentials\?/i)).toBeTruthy(),
|
||||
);
|
||||
fireEvent.click(screen.getByRole("button", { name: /^cancel$/i }));
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.queryByText(/Rotate workspace credentials\?/i)).toBeNull(),
|
||||
);
|
||||
expect(apiPost).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("Confirm in dialog POSTs to /external/rotate and opens modal with returned token", async () => {
|
||||
apiPost.mockResolvedValue({
|
||||
connection: { ...SAMPLE_INFO, auth_token: "fresh-tok-123" },
|
||||
});
|
||||
render(<ExternalConnectionSection workspaceId="ws-test" />);
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /rotate credentials/i }));
|
||||
await waitFor(() =>
|
||||
expect(screen.getByText(/Rotate workspace credentials\?/i)).toBeTruthy(),
|
||||
);
|
||||
// Click the dialog's Rotate button (NOT the section's — the section's
|
||||
// "Rotate credentials" stays mounted; the dialog's "Rotate" is the
|
||||
// commit button. getAllByRole returns both; pick the one inside the
|
||||
// dialog by name "Rotate" exact-match).
|
||||
const rotateBtns = screen.getAllByRole("button", { name: /^rotate$/i });
|
||||
expect(rotateBtns.length).toBeGreaterThanOrEqual(1);
|
||||
fireEvent.click(rotateBtns[rotateBtns.length - 1]);
|
||||
|
||||
await waitFor(() =>
|
||||
expect(apiPost).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-test/external/rotate",
|
||||
{},
|
||||
),
|
||||
);
|
||||
});
|
||||
|
||||
it("Surfaces API errors as a visible chip, not silent loss", async () => {
|
||||
apiGet.mockRejectedValue(new Error("forbidden"));
|
||||
render(<ExternalConnectionSection workspaceId="ws-test" />);
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /show connection info/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
const matches = screen.queryAllByText((_, el) =>
|
||||
(el?.textContent || "").toLowerCase().includes("forbidden"),
|
||||
);
|
||||
expect(matches.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,107 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Pins the "Terminal not available" early-return added 2026-05-05.
|
||||
//
|
||||
// Pre-fix: TerminalTab tried to open /ws/terminal/<id> for every
|
||||
// workspace including external runtimes (which have no shell endpoint).
|
||||
// The server returned 404, status flipped to "error", user saw
|
||||
// "Connection failed" with a Reconnect button — reading as a bug
|
||||
// when really the runtime intentionally has no TTY. Now: when
|
||||
// data.runtime is in RUNTIMES_WITHOUT_TERMINAL, render a banner +
|
||||
// big icon instead of mounting xterm/WS.
|
||||
//
|
||||
// Pinned branches:
|
||||
// 1. external runtime → "Terminal not available" banner renders,
|
||||
// runtime name surfaces in the body so the user knows WHY.
|
||||
// 2. external runtime → xterm + WebSocket are NOT initialised.
|
||||
// Verified by checking the global WebSocket constructor isn't
|
||||
// called.
|
||||
// 3. claude-code (or any other runtime) → no banner, normal mount
|
||||
// proceeds. Pre-fix regression cover.
|
||||
// 4. data prop omitted (back-compat with any caller that doesn't
|
||||
// thread it through) → no early-return, falls through to normal
|
||||
// mount. Tested via the absence of the banner.
|
||||
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { render, screen, cleanup } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
// xterm + addon-fit are dynamically imported by TerminalTab. Stub them
|
||||
// so the tests don't pull a 200KB+ dependency just to verify the
|
||||
// not-available banner. The stubs only matter for the non-banner
|
||||
// branches; the banner returns BEFORE the dynamic import.
|
||||
vi.mock("xterm", () => ({
|
||||
Terminal: vi.fn().mockImplementation(() => ({
|
||||
loadAddon: vi.fn(),
|
||||
open: vi.fn(),
|
||||
onData: vi.fn(),
|
||||
write: vi.fn(),
|
||||
dispose: vi.fn(),
|
||||
onResize: vi.fn(),
|
||||
cols: 80,
|
||||
rows: 24,
|
||||
})),
|
||||
}));
|
||||
vi.mock("@xterm/addon-fit", () => ({
|
||||
FitAddon: vi.fn().mockImplementation(() => ({
|
||||
fit: vi.fn(),
|
||||
})),
|
||||
}));
|
||||
|
||||
// Track WebSocket constructor calls — this is the load-bearing
|
||||
// assertion for "external doesn't even try to connect".
|
||||
let wsConstructed = 0;
|
||||
beforeEach(() => {
|
||||
wsConstructed = 0;
|
||||
(globalThis as unknown as { WebSocket: unknown }).WebSocket = vi
|
||||
.fn()
|
||||
.mockImplementation(() => {
|
||||
wsConstructed++;
|
||||
return {
|
||||
addEventListener: vi.fn(),
|
||||
removeEventListener: vi.fn(),
|
||||
send: vi.fn(),
|
||||
close: vi.fn(),
|
||||
readyState: 0,
|
||||
};
|
||||
});
|
||||
});
|
||||
|
||||
import { TerminalTab } from "../TerminalTab";
|
||||
|
||||
const externalData = { runtime: "external", status: "online" } as unknown as Parameters<
|
||||
typeof TerminalTab
|
||||
>[0]["data"];
|
||||
|
||||
const claudeData = { runtime: "claude-code", status: "online" } as unknown as Parameters<
|
||||
typeof TerminalTab
|
||||
>[0]["data"];
|
||||
|
||||
describe("TerminalTab not-available early-return for runtimes without TTY", () => {
|
||||
it("external runtime renders the not-available banner with runtime name", () => {
|
||||
render(<TerminalTab workspaceId="ws-ext" data={externalData} />);
|
||||
expect(screen.getByText(/Terminal not available/i)).not.toBeNull();
|
||||
// Runtime name surfaces so user knows WHY there's no terminal.
|
||||
expect(screen.getByText(/external/)).not.toBeNull();
|
||||
});
|
||||
|
||||
it("external runtime does NOT open a WebSocket", async () => {
|
||||
render(<TerminalTab workspaceId="ws-ext" data={externalData} />);
|
||||
// Wait a tick for any deferred init (there shouldn't be any, but
|
||||
// tolerate a microtask boundary).
|
||||
await new Promise((r) => setTimeout(r, 0));
|
||||
expect(wsConstructed).toBe(0);
|
||||
});
|
||||
|
||||
it("claude-code runtime does NOT render the banner (normal mount)", () => {
|
||||
render(<TerminalTab workspaceId="ws-claude" data={claudeData} />);
|
||||
expect(screen.queryByText(/Terminal not available/i)).toBeNull();
|
||||
});
|
||||
|
||||
it("data prop omitted falls through to normal mount (back-compat)", () => {
|
||||
render(<TerminalTab workspaceId="ws-no-data" />);
|
||||
expect(screen.queryByText(/Terminal not available/i)).toBeNull();
|
||||
});
|
||||
});
|
||||
@@ -1,6 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useEffect, useMemo, useRef } from "react";
|
||||
import { useState, useEffect, useLayoutEffect, useMemo, useRef, useCallback } from "react";
|
||||
import ReactMarkdown from "react-markdown";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import { api } from "@/lib/api";
|
||||
@@ -184,13 +184,23 @@ function unwrapErrorText(raw: string | null): string {
|
||||
export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
|
||||
const [messages, setMessages] = useState<CommMessage[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [loadError, setLoadError] = useState<string | null>(null);
|
||||
// Dedup by timestamp+type+peer to handle API load + WebSocket race
|
||||
const seenKeys = useRef(new Set<string>());
|
||||
const bottomRef = useRef<HTMLDivElement>(null);
|
||||
// Mirrors the my-chat scroll behaviour from ChatTab (PR #2903) —
|
||||
// smooth-scroll on a long history gets interrupted by concurrent
|
||||
// renders and lands the panel mid-conversation. Switch the first
|
||||
// arrival to instant; subsequent appends animate.
|
||||
const hasInitialScrollRef = useRef(false);
|
||||
|
||||
// Load history
|
||||
useEffect(() => {
|
||||
// Load history. Extracted so the error-state retry button can
|
||||
// re-invoke without remount. ChatTab uses the same shape
|
||||
// (loadInitial → loadError state → retry button).
|
||||
const loadInitial = useCallback(() => {
|
||||
setLoading(true);
|
||||
setLoadError(null);
|
||||
seenKeys.current.clear();
|
||||
api.get<ActivityEntry[]>(`/workspaces/${workspaceId}/activity?source=agent&limit=50`)
|
||||
.then((entries) => {
|
||||
const filtered = (entries ?? [])
|
||||
@@ -234,10 +244,15 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
|
||||
// the .then body) — the panel just sat on the empty state
|
||||
// with zero signal.
|
||||
console.warn("AgentCommsPanel: load activity failed", err);
|
||||
setLoadError(err instanceof Error ? err.message : String(err));
|
||||
setLoading(false);
|
||||
});
|
||||
}, [workspaceId]);
|
||||
|
||||
useEffect(() => {
|
||||
loadInitial();
|
||||
}, [loadInitial]);
|
||||
|
||||
// Live updates routed through the global ReconnectingSocket. The
|
||||
// previous pattern of `new WebSocket(WS_URL)` per panel had no
|
||||
// onclose / no reconnect, so any drop (idle timeout, browser
|
||||
@@ -358,7 +373,18 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
|
||||
} catch { /* ignore */ }
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
// useLayoutEffect (not useEffect) so the scroll runs BEFORE paint —
|
||||
// otherwise the user sees the panel jump for one frame on every
|
||||
// append. Mirrors ChatTab's MyChatPanel scroll block.
|
||||
useLayoutEffect(() => {
|
||||
if (!hasInitialScrollRef.current && messages.length > 0) {
|
||||
// Instant on first arrival — smooth-scroll on a long history
|
||||
// gets interrupted by concurrent renders and lands the panel
|
||||
// mid-conversation (the chat-opens-in-middle bug class).
|
||||
hasInitialScrollRef.current = true;
|
||||
bottomRef.current?.scrollIntoView({ behavior: "instant" as ScrollBehavior });
|
||||
return;
|
||||
}
|
||||
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
|
||||
}, [messages]);
|
||||
|
||||
@@ -366,6 +392,27 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
|
||||
return <div className="text-xs text-ink-soft text-center py-8">Loading agent communications...</div>;
|
||||
}
|
||||
|
||||
if (loadError !== null && messages.length === 0) {
|
||||
// Mirrors ChatTab my-chat error UI — surfaces the load failure
|
||||
// with a retry button instead of silently rendering empty state.
|
||||
return (
|
||||
<div
|
||||
role="alert"
|
||||
className="mx-2 mt-2 rounded-lg border border-red-800/50 bg-red-950/30 px-3 py-2.5"
|
||||
>
|
||||
<p className="text-[11px] text-bad mb-1.5">
|
||||
Failed to load agent communications: {loadError}
|
||||
</p>
|
||||
<button
|
||||
onClick={loadInitial}
|
||||
className="text-[10px] px-2 py-0.5 rounded bg-red-800/40 text-bad hover:bg-red-700/50 transition-colors"
|
||||
>
|
||||
Retry
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (messages.length === 0) {
|
||||
return (
|
||||
<div className="text-xs text-ink-soft text-center py-8">
|
||||
|
||||
@@ -0,0 +1,115 @@
|
||||
// @vitest-environment jsdom
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { render, screen, fireEvent, waitFor } from "@testing-library/react";
|
||||
|
||||
// API mock — tests can override per case via apiGetMock.mockImplementationOnce.
|
||||
const apiGetMock = vi.fn<(url: string) => Promise<unknown>>();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (url: string) => apiGetMock(url),
|
||||
},
|
||||
}));
|
||||
|
||||
// useSocketEvent — no-op for these render tests; live updates aren't
|
||||
// what we're verifying here.
|
||||
vi.mock("@/hooks/useSocketEvent", () => ({
|
||||
useSocketEvent: () => {},
|
||||
}));
|
||||
|
||||
// Canvas store — peer name resolution.
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: {
|
||||
getState: () => ({
|
||||
nodes: [
|
||||
{ id: "ws-self", data: { name: "Self" } },
|
||||
{ id: "ws-peer", data: { name: "Peer Agent" } },
|
||||
],
|
||||
}),
|
||||
},
|
||||
}));
|
||||
|
||||
// Toaster shim — AgentCommsPanel imports showToast.
|
||||
vi.mock("../../Toaster", () => ({
|
||||
showToast: vi.fn(),
|
||||
}));
|
||||
|
||||
import { AgentCommsPanel } from "../AgentCommsPanel";
|
||||
|
||||
// jsdom doesn't implement scrollIntoView. Tests that observe the call
|
||||
// install a spy here; tests that don't care still need a no-op stub
|
||||
// so the component doesn't throw.
|
||||
const scrollSpy = vi.fn<(opts?: ScrollIntoViewOptions | boolean) => void>();
|
||||
beforeEach(() => {
|
||||
apiGetMock.mockReset();
|
||||
scrollSpy.mockReset();
|
||||
Element.prototype.scrollIntoView = scrollSpy as unknown as Element["scrollIntoView"];
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("AgentCommsPanel — initial-state parity with ChatTab my-chat", () => {
|
||||
it("shows loading text while history fetch is in flight", () => {
|
||||
apiGetMock.mockReturnValueOnce(new Promise(() => { /* never resolves */ }));
|
||||
render(<AgentCommsPanel workspaceId="ws-self" />);
|
||||
expect(screen.getByText("Loading agent communications...")).toBeDefined();
|
||||
});
|
||||
|
||||
it("renders error UI with a Retry button when the history fetch rejects", async () => {
|
||||
apiGetMock.mockRejectedValueOnce(new Error("network down"));
|
||||
render(<AgentCommsPanel workspaceId="ws-self" />);
|
||||
|
||||
// Wait for the error state to render — loading→error transition is async.
|
||||
const alert = await waitFor(() => screen.getByRole("alert"));
|
||||
expect(alert.textContent).toMatch(/Failed to load agent communications/);
|
||||
expect(alert.textContent).toMatch(/network down/);
|
||||
|
||||
// Retry button must be present and trigger a refetch.
|
||||
const retry = screen.getByRole("button", { name: "Retry" });
|
||||
apiGetMock.mockResolvedValueOnce([]); // success on retry
|
||||
fireEvent.click(retry);
|
||||
|
||||
// Two calls total: initial load + retry. Pin via mock call count.
|
||||
await waitFor(() => expect(apiGetMock.mock.calls.length).toBe(2));
|
||||
});
|
||||
|
||||
it("falls back to empty-state copy when load succeeds with zero rows", async () => {
|
||||
apiGetMock.mockResolvedValueOnce([]);
|
||||
render(<AgentCommsPanel workspaceId="ws-self" />);
|
||||
await waitFor(() =>
|
||||
expect(screen.getByText("No agent-to-agent communications yet.")).toBeDefined(),
|
||||
);
|
||||
});
|
||||
|
||||
it("scrollIntoView is called with behavior=instant on the first message arrival", async () => {
|
||||
apiGetMock.mockResolvedValueOnce([
|
||||
{
|
||||
id: "act-1",
|
||||
activity_type: "a2a_send",
|
||||
source_id: "ws-self",
|
||||
target_id: "ws-peer",
|
||||
method: "message/send",
|
||||
summary: "Delegating",
|
||||
request_body: { message: { parts: [{ text: "hi" }] } },
|
||||
response_body: null,
|
||||
status: "ok",
|
||||
created_at: "2026-04-25T18:00:00Z",
|
||||
},
|
||||
]);
|
||||
render(<AgentCommsPanel workspaceId="ws-self" />);
|
||||
|
||||
// useLayoutEffect is what makes the first call instant — wait for
|
||||
// the panel to render at least one message.
|
||||
await waitFor(() => expect(scrollSpy.mock.calls.length).toBeGreaterThan(0));
|
||||
|
||||
// The pinned contract: SOME call uses behavior: "instant" — the
|
||||
// first-arrival case. Subsequent appends use "smooth", but those
|
||||
// can't fire here (no live update yet).
|
||||
const sawInstant = scrollSpy.mock.calls.some((args) => {
|
||||
const opts = args[0];
|
||||
return typeof opts === "object" && opts !== null && "behavior" in opts && opts.behavior === "instant";
|
||||
});
|
||||
expect(sawInstant).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { resolveAttachmentHref } from "../uploads";
|
||||
import { isPlatformAttachment, resolveAttachmentHref } from "../uploads";
|
||||
|
||||
describe("resolveAttachmentHref — URI scheme normalisation", () => {
|
||||
const wsId = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee";
|
||||
@@ -39,3 +39,128 @@ describe("resolveAttachmentHref — URI scheme normalisation", () => {
|
||||
expect(resolveAttachmentHref(wsId, "s3://bucket/key")).toBe("s3://bucket/key");
|
||||
});
|
||||
});
|
||||
|
||||
// #2973 follow-up to #2968: cover the platform-pending: scheme branch
|
||||
// (poll-mode chat uploads) + the isPlatformAttachment SSOT helper that
|
||||
// the chip-download and markdown-link paths both consume.
|
||||
//
|
||||
// Pre-fix the platform-pending: URI fell through to the raw URI →
|
||||
// browser saw an unhandled-protocol click → about:blank. The fix
|
||||
// resolves it to the platform pending-uploads endpoint with auth
|
||||
// headers attached.
|
||||
describe("resolveAttachmentHref — platform-pending: scheme (poll-mode uploads)", () => {
|
||||
// Use a chat workspace ID that DIFFERS from the one in the URI, so
|
||||
// tests can verify which one the resolver uses. The forward-across-
|
||||
// workspace case is real production behavior — files dragged into one
|
||||
// workspace's chat can be referenced from another.
|
||||
const chatWs = "chat-ws-aaaaaaaa";
|
||||
const sourceWs = "source-ws-bbbbbbbb";
|
||||
|
||||
it("resolves a well-formed platform-pending: URI to /pending-uploads/<file>/content", () => {
|
||||
const url = resolveAttachmentHref(
|
||||
chatWs,
|
||||
`platform-pending:${sourceWs}/file-12345`,
|
||||
);
|
||||
expect(url).toContain(`/workspaces/${sourceWs}/pending-uploads/file-12345/content`);
|
||||
});
|
||||
|
||||
it("uses the URI's wsid, NOT the chat workspace_id (cross-workspace forwarding)", () => {
|
||||
// The two ids differ — this is the case PR #2968's commit
|
||||
// explicitly calls out. A regression that flipped this would
|
||||
// silently mis-route the download to the WRONG workspace's
|
||||
// pending-uploads store, returning 404 (or worse, leaking).
|
||||
const url = resolveAttachmentHref(
|
||||
chatWs,
|
||||
`platform-pending:${sourceWs}/file-xyz`,
|
||||
);
|
||||
expect(url).toContain(`/workspaces/${sourceWs}/`);
|
||||
expect(url).not.toContain(`/workspaces/${chatWs}/`);
|
||||
});
|
||||
|
||||
it("falls back to raw URI when platform-pending: is missing the slash", () => {
|
||||
// Defensive: a URI that drifted from the expected wsid/fileid shape
|
||||
// returns raw rather than producing a broken /pending-uploads//
|
||||
// path. Pinned to detect a regression where a future "helpful"
|
||||
// change synthesizes empty wsid/fileID.
|
||||
expect(resolveAttachmentHref(chatWs, "platform-pending:no-slash")).toBe(
|
||||
"platform-pending:no-slash",
|
||||
);
|
||||
});
|
||||
|
||||
it("falls back to raw URI when platform-pending: has empty fileID", () => {
|
||||
expect(resolveAttachmentHref(chatWs, "platform-pending:abc/")).toBe(
|
||||
"platform-pending:abc/",
|
||||
);
|
||||
});
|
||||
|
||||
it("falls back to raw URI when platform-pending: has empty wsid", () => {
|
||||
expect(resolveAttachmentHref(chatWs, "platform-pending:/file-xyz")).toBe(
|
||||
"platform-pending:/file-xyz",
|
||||
);
|
||||
});
|
||||
|
||||
it("regression: exact production repro from #2968 (reno-stars)", () => {
|
||||
// From the original PR #2968 body: the chat's markdown-link
|
||||
// override fell through on this exact shape and the browser
|
||||
// navigated to about:blank. Pin the post-fix output so a future
|
||||
// refactor can't reintroduce the original bug.
|
||||
const url = resolveAttachmentHref(
|
||||
"chat-ws",
|
||||
"platform-pending:d76977b1-uuid/bb0dcaf3-uuid",
|
||||
);
|
||||
expect(url).toContain("/workspaces/d76977b1-uuid/pending-uploads/bb0dcaf3-uuid/content");
|
||||
expect(url).not.toContain("chat-ws");
|
||||
});
|
||||
});
|
||||
|
||||
describe("isPlatformAttachment", () => {
|
||||
it("returns true for platform-pending: URIs", () => {
|
||||
expect(isPlatformAttachment("platform-pending:abc/file")).toBe(true);
|
||||
});
|
||||
|
||||
it("returns true even for malformed platform-pending: URIs", () => {
|
||||
// The helper is a SHAPE check — caller routes through
|
||||
// downloadChatFile and downloadChatFile handles the malformed case
|
||||
// downstream. Pinning so a future helper that "validates" the
|
||||
// wsid/fileID shape doesn't silently break the auth-attached
|
||||
// download flow for in-flight URIs.
|
||||
expect(isPlatformAttachment("platform-pending:no-slash")).toBe(true);
|
||||
});
|
||||
|
||||
it("returns true for workspace:<allowed-root> URIs", () => {
|
||||
expect(isPlatformAttachment("workspace:/configs/foo")).toBe(true);
|
||||
expect(isPlatformAttachment("workspace:/workspace/x.pdf")).toBe(true);
|
||||
});
|
||||
|
||||
it("returns true for file:///<allowed-root> URIs", () => {
|
||||
expect(isPlatformAttachment("file:///workspace/x")).toBe(true);
|
||||
});
|
||||
|
||||
it("returns true for absolute paths under allowed roots", () => {
|
||||
expect(isPlatformAttachment("/home/user/x")).toBe(true);
|
||||
expect(isPlatformAttachment("/configs/y")).toBe(true);
|
||||
});
|
||||
|
||||
it("returns FALSE for bare HTTPS URLs to other origins", () => {
|
||||
// Auth-leak class regression: a helper that always returned true
|
||||
// would attach workspace tokens to third-party requests. Pin
|
||||
// the negative case explicitly.
|
||||
expect(isPlatformAttachment("https://example.com/file")).toBe(false);
|
||||
expect(isPlatformAttachment("http://example.com/file")).toBe(false);
|
||||
});
|
||||
|
||||
it("returns FALSE for non-allowlisted root paths", () => {
|
||||
expect(isPlatformAttachment("/etc/passwd")).toBe(false);
|
||||
expect(isPlatformAttachment("/var/log/x")).toBe(false);
|
||||
expect(isPlatformAttachment("/tmp/x")).toBe(false);
|
||||
});
|
||||
|
||||
it("returns FALSE for empty string", () => {
|
||||
expect(isPlatformAttachment("")).toBe(false);
|
||||
});
|
||||
|
||||
it("returns FALSE for unrecognised schemes", () => {
|
||||
expect(isPlatformAttachment("s3://bucket/key")).toBe(false);
|
||||
expect(isPlatformAttachment("ftp://server/file")).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -44,6 +44,8 @@ export async function uploadChatFiles(
|
||||
* - `workspace:<abs-path>` (our canonical form)
|
||||
* - `file:///workspace/...` (some agents emit this)
|
||||
* - `/workspace/...` (bare absolute path inside the container)
|
||||
* - `platform-pending:<wsid>/<file_id>` (poll-mode upload, staged
|
||||
* on platform side; resolves to /pending-uploads/<file_id>/content)
|
||||
* Everything that looks like an allowed-root container path is
|
||||
* rewritten to the authenticated /chat/download endpoint. HTTP(S)
|
||||
* URIs pass through unchanged so we can also render links to
|
||||
@@ -53,6 +55,35 @@ export function resolveAttachmentHref(
|
||||
workspaceId: string,
|
||||
uri: string,
|
||||
): string {
|
||||
// platform-pending: agents-emitted URI that lives in the platform-side
|
||||
// staging layer (poll-mode chat uploads, see workspace-server's
|
||||
// chat_files.go ~line 690 + pendinguploads.Storage). The wire shape
|
||||
// is `platform-pending:<workspace_id>/<file_id>`. Resolving it
|
||||
// requires hitting GET /workspaces/<wsid>/pending-uploads/<file_id>/content
|
||||
// which streams the bytes with full workspace auth. Without this
|
||||
// case the browser sees an unhandled-protocol click → about:blank,
|
||||
// which was the user-visible bug from 2026-05-05 (reno-stars).
|
||||
if (uri.startsWith("platform-pending:")) {
|
||||
const rest = uri.slice("platform-pending:".length);
|
||||
const slash = rest.indexOf("/");
|
||||
// Defensive: if the URI doesn't have the expected wsid/fileid
|
||||
// shape, fall through to raw-URI handling so the consumer can
|
||||
// still try to render it (rather than producing a broken /pending-
|
||||
// uploads/// path).
|
||||
if (slash > 0) {
|
||||
const wsid = rest.slice(0, slash);
|
||||
const fileID = rest.slice(slash + 1);
|
||||
if (wsid && fileID) {
|
||||
// Use the URI's own workspace_id (the bytes live in THAT
|
||||
// workspace's pending-uploads store), not the chat's
|
||||
// workspace_id — these CAN differ when a user drags a file
|
||||
// into one workspace's chat that gets forwarded to another
|
||||
// (cross-workspace delegation, agent forwarding).
|
||||
return `${PLATFORM_URL}/workspaces/${wsid}/pending-uploads/${fileID}/content`;
|
||||
}
|
||||
}
|
||||
return uri;
|
||||
}
|
||||
const containerPath = normalizeWorkspaceUri(uri);
|
||||
if (containerPath) {
|
||||
return `${PLATFORM_URL}/workspaces/${workspaceId}/chat/download?path=${encodeURIComponent(containerPath)}`;
|
||||
@@ -60,6 +91,14 @@ export function resolveAttachmentHref(
|
||||
return uri;
|
||||
}
|
||||
|
||||
/** Returns true when the URI points at a platform-side resource that
|
||||
* requires our auth headers — caller should route through
|
||||
* downloadChatFile rather than letting the browser navigate. */
|
||||
export function isPlatformAttachment(uri: string): boolean {
|
||||
if (uri.startsWith("platform-pending:")) return true;
|
||||
return normalizeWorkspaceUri(uri) !== null;
|
||||
}
|
||||
|
||||
/** Extracts the absolute container path from a workspace-scoped URI,
|
||||
* or null if the URI isn't a container path. The matching roots
|
||||
* mirror the server's `allowedRoots` allowlist. */
|
||||
@@ -96,8 +135,7 @@ export async function downloadChatFile(
|
||||
attachment: ChatAttachment,
|
||||
): Promise<void> {
|
||||
const href = resolveAttachmentHref(workspaceId, attachment.uri);
|
||||
const isContainerPath = normalizeWorkspaceUri(attachment.uri) !== null;
|
||||
if (!isContainerPath) {
|
||||
if (!isPlatformAttachment(attachment.uri)) {
|
||||
// External URL — let the browser navigate. Opens in new tab so
|
||||
// the canvas context survives a navigation. `href` here is the
|
||||
// raw URI (http(s), or anything else the agent sent back).
|
||||
|
||||
@@ -22,7 +22,6 @@ export interface ConfigData {
|
||||
// task_budget maps to output_config.task_budget.total (requires beta header task-budgets-2026-03-13)
|
||||
task_budget?: number;
|
||||
prompt_files: string[];
|
||||
shared_context: string[];
|
||||
skills: string[];
|
||||
tools: string[];
|
||||
a2a: { port: number; streaming: boolean; push_notifications: boolean };
|
||||
@@ -40,7 +39,6 @@ export const DEFAULT_CONFIG: ConfigData = {
|
||||
effort: "",
|
||||
task_budget: 0,
|
||||
prompt_files: [],
|
||||
shared_context: [],
|
||||
skills: [],
|
||||
tools: [],
|
||||
a2a: { port: 8000, streaming: true, push_notifications: true },
|
||||
|
||||
@@ -120,7 +120,6 @@ export function toYaml(config: ConfigData): string {
|
||||
if (config.effort) { lines.push(""); simple("effort", config.effort); }
|
||||
if (config.task_budget && config.task_budget > 0) { simple("task_budget", config.task_budget); }
|
||||
if (config.prompt_files?.length) { lines.push(""); list("prompt_files", config.prompt_files); }
|
||||
if (config.shared_context?.length) { lines.push(""); list("shared_context", config.shared_context); }
|
||||
lines.push(""); list("skills", config.skills);
|
||||
if (config.tools?.length) { list("tools", config.tools); }
|
||||
lines.push(""); obj("a2a", config.a2a as unknown as Record<string, unknown>);
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
* @vitest-environment jsdom
|
||||
*/
|
||||
import { describe, it, expect, vi, afterEach } from "vitest";
|
||||
import { fetchSession, redirectToLogin } from "../auth";
|
||||
import { fetchSession, redirectToLogin, signOut } from "../auth";
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
@@ -110,3 +110,157 @@ describe("redirectToLogin", () => {
|
||||
expect((window.location as unknown as { href: string }).href).toBe(signupHref);
|
||||
});
|
||||
});
|
||||
|
||||
describe("signOut", () => {
|
||||
// Helper — most tests need the same window.location stub.
|
||||
function stubLocation(): void {
|
||||
Object.defineProperty(window, "location", {
|
||||
writable: true,
|
||||
value: {
|
||||
href: "https://acme.moleculesai.app/orgs",
|
||||
pathname: "/orgs",
|
||||
hostname: "acme.moleculesai.app",
|
||||
protocol: "https:",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
it("POSTs to /cp/auth/signout with credentials:include", async () => {
|
||||
stubLocation();
|
||||
const fetchMock = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({ ok: true, logout_url: "" }),
|
||||
});
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
await signOut();
|
||||
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||
expect(fetchMock).toHaveBeenCalledWith(
|
||||
expect.stringContaining("/cp/auth/signout"),
|
||||
expect.objectContaining({ method: "POST", credentials: "include" }),
|
||||
);
|
||||
});
|
||||
|
||||
it("navigates to provider logout_url when the response includes one", async () => {
|
||||
// The hosted-logout path is what actually breaks the SSO re-auth
|
||||
// loop reported on PR #2913. Without this, AuthKit's browser
|
||||
// cookie keeps the user signed in via SSO and any subsequent
|
||||
// /cp/auth/login silently re-auths.
|
||||
stubLocation();
|
||||
const hostedLogout =
|
||||
"https://api.workos.com/user_management/sessions/logout?session_id=cookie&return_to=https%3A%2F%2Fapp.moleculesai.app%2Forgs";
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({ ok: true, logout_url: hostedLogout }),
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe(hostedLogout);
|
||||
});
|
||||
|
||||
it("falls back to /cp/auth/login when logout_url is empty (DisabledProvider / dev)", async () => {
|
||||
// DisabledProvider returns "" — the local /cp/auth/login redirect
|
||||
// works in dev/test where there's no SSO session to escape.
|
||||
stubLocation();
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({ ok: true, logout_url: "" }),
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
// Tenant subdomain (acme.moleculesai.app) → auth origin is app.moleculesai.app.
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
|
||||
it("redirects even when the POST fails so the user isn't stuck on an authed page", async () => {
|
||||
// Critical UX invariant: clicking 'Sign out' MUST navigate away from
|
||||
// the authenticated app, even if the network is down or the cookie
|
||||
// is already invalid. Anything else looks like the button is
|
||||
// broken — the precise complaint that triggered this fix.
|
||||
stubLocation();
|
||||
vi.stubGlobal("fetch", vi.fn().mockRejectedValue(new Error("network down")));
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
|
||||
it("redirects on 401 (session already invalid) just like 200", async () => {
|
||||
// A user with an already-invalid cookie should still see the
|
||||
// logout flow complete — no error, no stuck-on-app dead end.
|
||||
// Note: 401 means res.ok=false → we don't read .json() at all,
|
||||
// so a missing body is fine.
|
||||
stubLocation();
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: false,
|
||||
status: 401,
|
||||
json: async () => ({}),
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
|
||||
it("falls back to /cp/auth/login when the response body is malformed", async () => {
|
||||
// Defensive parsing: a body that isn't valid JSON, or doesn't
|
||||
// have logout_url, or has logout_url as the wrong type — none of
|
||||
// these should strand the user on the authed page. Fallback path
|
||||
// takes over.
|
||||
stubLocation();
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => {
|
||||
throw new Error("not json");
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
|
||||
it("falls back to /cp/auth/login when logout_url is the wrong type", async () => {
|
||||
// Even valid JSON should be type-checked: a non-string logout_url
|
||||
// (e.g. server-side bug, version drift) must not crash or open-
|
||||
// redirect the user.
|
||||
stubLocation();
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({ ok: true, logout_url: 42 }),
|
||||
}),
|
||||
);
|
||||
|
||||
await signOut();
|
||||
|
||||
const after = (window.location as unknown as { href: string }).href;
|
||||
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -67,3 +67,80 @@ export function redirectToLogin(screenHint: "sign-up" | "sign-in" = "sign-in"):
|
||||
const dest = `${authOrigin}${AUTH_BASE}/${path}?return_to=${encodeURIComponent(returnTo)}`;
|
||||
window.location.href = dest;
|
||||
}
|
||||
|
||||
/**
|
||||
* signOut posts to /cp/auth/signout to clear the WorkOS session cookie
|
||||
* + revoke at the provider, then navigates the browser to the
|
||||
* provider-supplied hosted logout URL (so the provider's BROWSER-side
|
||||
* SSO cookie is cleared too — without this, AuthKit silently re-auths
|
||||
* via SSO on the next /cp/auth/login and the user is "still signed
|
||||
* in" after pressing Sign out).
|
||||
*
|
||||
* Two-layer flow:
|
||||
* 1. POST /cp/auth/signout → CP clears OUR session cookie + revokes
|
||||
* session_id at the provider API. Response includes
|
||||
* `logout_url` — the AuthKit hosted URL the BROWSER must navigate
|
||||
* to so the provider's own browser cookie is cleared.
|
||||
* 2. window.location.href = <logout_url> → AuthKit clears its
|
||||
* session, then redirects the browser to the configured
|
||||
* return_to (defaults to APP_URL/orgs).
|
||||
*
|
||||
* Best-effort by design: a 5xx, network failure, missing logout_url
|
||||
* (DisabledProvider, dev), or stale cookie still results in the
|
||||
* browser navigating away — leaving the user on a logged-in-looking
|
||||
* page after they clicked "Sign out" is the worst possible UX. The
|
||||
* fallback path navigates to /cp/auth/login on the auth origin, which
|
||||
* works correctly in environments without a hosted logout flow (dev,
|
||||
* tests, DisabledProvider).
|
||||
*
|
||||
* Throws nothing — callers can disable the button optimistically or
|
||||
* await this and trust it returns. On a redirect-blocked test
|
||||
* environment (jsdom under vitest) we still exit cleanly so unit tests
|
||||
* can spy on the fetch call.
|
||||
*/
|
||||
export async function signOut(): Promise<void> {
|
||||
let logoutURL: string | undefined;
|
||||
// Fire-and-tolerate the POST. credentials:include is mandatory cross-
|
||||
// origin so the SaaS canvas (acme.moleculesai.app) can hit
|
||||
// app.moleculesai.app/cp/auth/signout with the session cookie.
|
||||
try {
|
||||
const res = await fetch(`${getAuthOrigin()}${AUTH_BASE}/signout`, {
|
||||
method: "POST",
|
||||
credentials: "include",
|
||||
});
|
||||
if (res.ok) {
|
||||
// Body shape: {"ok": true, "logout_url": "..."}. logout_url is
|
||||
// empty for DisabledProvider (dev/local) — we fall back to
|
||||
// /cp/auth/login below. Defensive parsing: a malformed body
|
||||
// shouldn't strand the user on the authed page.
|
||||
const body: unknown = await res.json().catch(() => null);
|
||||
if (
|
||||
body &&
|
||||
typeof body === "object" &&
|
||||
"logout_url" in body &&
|
||||
typeof (body as { logout_url: unknown }).logout_url === "string" &&
|
||||
(body as { logout_url: string }).logout_url
|
||||
) {
|
||||
logoutURL = (body as { logout_url: string }).logout_url;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Ignore — we still redirect below.
|
||||
}
|
||||
if (typeof window === "undefined") return;
|
||||
if (logoutURL) {
|
||||
// Hosted logout: AuthKit clears its SSO cookie + redirects to
|
||||
// return_to (configured server-side). This is the path that
|
||||
// actually breaks the SSO re-auth loop.
|
||||
window.location.href = logoutURL;
|
||||
return;
|
||||
}
|
||||
// Fallback: no hosted logout (dev, DisabledProvider, network
|
||||
// failure). Land on the login screen rather than the current URL:
|
||||
// returning to a tenant URL after signout would just re-redirect
|
||||
// through /cp/auth/login due to AuthGate. Send the user straight
|
||||
// there with no return_to so they don't loop back into the org they
|
||||
// just left.
|
||||
const authOrigin = getAuthOrigin();
|
||||
window.location.href = `${authOrigin}${AUTH_BASE}/login`;
|
||||
}
|
||||
|
||||
@@ -5,6 +5,13 @@ export const STATUS_CONFIG: Record<string, { dot: string; glow: string; label: s
|
||||
degraded: { dot: "bg-amber-400", glow: "shadow-amber-400/50", label: "Degraded", bar: "from-amber-500/20 to-transparent" },
|
||||
failed: { dot: "bg-red-400", glow: "shadow-red-400/50", label: "Failed", bar: "from-red-500/20 to-transparent" },
|
||||
provisioning: { dot: "bg-sky-400 motion-safe:animate-pulse", glow: "shadow-sky-400/50", label: "Starting", bar: "from-sky-500/20 to-transparent" },
|
||||
// not_configured: derived state from agent_card.configuration_status (PR #2756 chain).
|
||||
// Workspace is reachable (heartbeating, /agent-card serves) but adapter.setup()
|
||||
// failed — typically a missing/rotated LLM credential. Amber to differentiate from
|
||||
// online (green) and failed (red) — the workspace itself is healthy, just needs
|
||||
// configuration. Hover renders agent_card.configuration_error in the tooltip so
|
||||
// the operator sees the exact env var to set.
|
||||
not_configured: { dot: "bg-amber-300", glow: "shadow-amber-300/50", label: "Not configured", bar: "from-amber-400/20 to-transparent" },
|
||||
};
|
||||
|
||||
export function statusDotClass(status: string): string {
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import {
|
||||
getConfigurationStatus,
|
||||
getConfigurationError,
|
||||
} from "../canvas-topology";
|
||||
|
||||
// Tests for the getConfigurationStatus / getConfigurationError helpers
|
||||
// (issue #467 / PR #2756 chain). Surfacing the workspace's
|
||||
// `agent_card.configuration_status` is the user-visible payoff of
|
||||
// PR #2756's decoupling — without it, a misconfigured workspace looks
|
||||
// identical to a healthy one in the canvas tile.
|
||||
|
||||
describe("getConfigurationStatus", () => {
|
||||
it("returns null when agentCard is null", () => {
|
||||
expect(getConfigurationStatus(null)).toBe(null);
|
||||
});
|
||||
|
||||
it("returns null when agentCard has no configuration_status", () => {
|
||||
expect(getConfigurationStatus({ name: "x" })).toBe(null);
|
||||
});
|
||||
|
||||
it("returns 'ready' when agent reports configuration ok", () => {
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: "ready" }),
|
||||
).toBe("ready");
|
||||
});
|
||||
|
||||
it("returns 'not_configured' when agent reports setup failed", () => {
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: "not_configured" }),
|
||||
).toBe("not_configured");
|
||||
});
|
||||
|
||||
it("ignores unknown values defensively", () => {
|
||||
// A future agent reporting a status string we don't yet recognise
|
||||
// shouldn't crash the canvas — we treat it as 'no info' (null).
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: "starting" }),
|
||||
).toBe(null);
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: 42 }),
|
||||
).toBe(null);
|
||||
expect(
|
||||
getConfigurationStatus({ configuration_status: null }),
|
||||
).toBe(null);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getConfigurationError", () => {
|
||||
it("returns null when agentCard is null", () => {
|
||||
expect(getConfigurationError(null)).toBe(null);
|
||||
});
|
||||
|
||||
it("returns null when status is 'ready' even if error string present", () => {
|
||||
// Defensive: if the agent somehow ships configuration_status=ready
|
||||
// alongside a stale configuration_error from a previous boot, we
|
||||
// trust the live status flag and don't surface the stale error.
|
||||
expect(
|
||||
getConfigurationError({
|
||||
configuration_status: "ready",
|
||||
configuration_error: "stale: was unset",
|
||||
}),
|
||||
).toBe(null);
|
||||
});
|
||||
|
||||
it("returns the error string when status is 'not_configured'", () => {
|
||||
expect(
|
||||
getConfigurationError({
|
||||
configuration_status: "not_configured",
|
||||
configuration_error:
|
||||
"RuntimeError: Neither OPENAI_API_KEY nor MINIMAX_API_KEY is set",
|
||||
}),
|
||||
).toBe(
|
||||
"RuntimeError: Neither OPENAI_API_KEY nor MINIMAX_API_KEY is set",
|
||||
);
|
||||
});
|
||||
|
||||
it("returns null when status is 'not_configured' but error is missing", () => {
|
||||
expect(
|
||||
getConfigurationError({ configuration_status: "not_configured" }),
|
||||
).toBe(null);
|
||||
});
|
||||
|
||||
it("returns null when error is empty string", () => {
|
||||
// Empty string isn't actionable for the operator — treat same as
|
||||
// missing.
|
||||
expect(
|
||||
getConfigurationError({
|
||||
configuration_status: "not_configured",
|
||||
configuration_error: "",
|
||||
}),
|
||||
).toBe(null);
|
||||
});
|
||||
|
||||
it("returns null when error is non-string", () => {
|
||||
expect(
|
||||
getConfigurationError({
|
||||
configuration_status: "not_configured",
|
||||
configuration_error: { reason: "object" },
|
||||
}),
|
||||
).toBe(null);
|
||||
});
|
||||
});
|
||||
@@ -564,3 +564,42 @@ export function extractSkillNames(agentCard: Record<string, unknown> | null): st
|
||||
.map((skill: Record<string, unknown>) => String(skill.name || skill.id || ""))
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the configuration status reported by the workspace, or null
|
||||
* when the agent card doesn't carry one (older runtime, or pre-PR #2756
|
||||
* worker).
|
||||
*
|
||||
* Pairs with molecule-core PR #2756: when adapter.setup() fails, the
|
||||
* runtime mounts a not-configured handler AND advertises the failure
|
||||
* via agent_card.configuration_status = "not_configured" +
|
||||
* configuration_error = "<reason>". Canvas reads both to render a
|
||||
* "needs config" tile instead of a confused "online but silent" state.
|
||||
*
|
||||
* Returns null (not undefined) so callers can distinguish "no info"
|
||||
* from explicit values via a strict equality check.
|
||||
*/
|
||||
export function getConfigurationStatus(
|
||||
agentCard: Record<string, unknown> | null,
|
||||
): "ready" | "not_configured" | null {
|
||||
if (!agentCard) return null;
|
||||
const raw = agentCard.configuration_status;
|
||||
if (raw === "ready" || raw === "not_configured") return raw;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the configuration error string from the agent card when
|
||||
* configuration_status is "not_configured", or null otherwise.
|
||||
*
|
||||
* Already redacted server-side via secret_redactor (PR #2778) — safe to
|
||||
* render in the UI verbatim.
|
||||
*/
|
||||
export function getConfigurationError(
|
||||
agentCard: Record<string, unknown> | null,
|
||||
): string | null {
|
||||
if (!agentCard) return null;
|
||||
if (getConfigurationStatus(agentCard) !== "not_configured") return null;
|
||||
const raw = agentCard.configuration_error;
|
||||
return typeof raw === "string" && raw.length > 0 ? raw : null;
|
||||
}
|
||||
|
||||
@@ -27,11 +27,11 @@ prompt_files:
|
||||
# AGENTS.md-style example:
|
||||
# prompt_files: [AGENTS.md]
|
||||
|
||||
# Files to share with direct children (1-level inheritance)
|
||||
# Children fetch these at startup via GET /workspaces/:id/shared-context
|
||||
shared_context:
|
||||
- architecture.md
|
||||
- conventions.md
|
||||
# NOTE: `shared_context` (parent → child file injection at boot) was removed.
|
||||
# To share knowledge across a team, use memory v2's team:<id> namespace via
|
||||
# the recall_memory MCP tool — the agent pulls it on demand instead of
|
||||
# paying for it at every boot. For large blob-shaped artefacts, see RFC
|
||||
# #2789 (platform-owned shared file storage).
|
||||
|
||||
# Skills to load -- folder names under skills/
|
||||
skills:
|
||||
@@ -123,7 +123,6 @@ env:
|
||||
| `runtime` | No | Adapter to use: `langgraph` (default), `claude-code`, `crewai`, `autogen`, `deepagents`, `openclaw`. See [Agent Runtime Adapters](./cli-runtime.md). |
|
||||
| `model` | Yes | LangChain-compatible provider string (e.g. `anthropic:claude-sonnet-4-6`). Overridden by `MODEL_PROVIDER` env var if set. |
|
||||
| `prompt_files` | No | Ordered list of markdown files to load as system prompt. Defaults to `["system-prompt.md"]` if omitted. `MEMORY.md` and `USER.md` are auto-appended when present so frozen memory snapshots do not need to be duplicated here. Supports any agent framework's file structure (OpenClaw, Claude Code, etc.) |
|
||||
| `shared_context` | No | Files from this workspace's config dir to share with direct children. Children fetch these at startup and inject into their system prompt as `## Parent Context`. 1-level inheritance only (grandchildren don't see grandparent's context). |
|
||||
| `skills` | Yes | List of skill folder names to load from `skills/` |
|
||||
| `tools` | No | Built-in tools from workspace-template |
|
||||
| `memory` | No | Memory backend config (defaults to filesystem) |
|
||||
@@ -157,7 +156,6 @@ The file watcher monitors the entire config directory. When `config.yaml` change
|
||||
| `name`, `description`, `version` | Yes | Rebuild Agent Card with new metadata |
|
||||
| `a2a` | **No** | Port and protocol changes require container restart |
|
||||
| `delegation` | Yes | Retry/timeout defaults take effect on next delegation call |
|
||||
| `shared_context` | Yes | Children fetch on next prompt rebuild; no restart needed |
|
||||
| `sub_workspaces` | **No** | Team structure changes go through `POST /workspaces/:id/expand` |
|
||||
|
||||
See [Skills — Live Reload](./skills.md#live-reload) for the full file watcher flow.
|
||||
|
||||
@@ -24,21 +24,19 @@ When you receive a task, break it into sub-tasks and delegate to your team.
|
||||
Always review work before reporting completion to the caller.
|
||||
```
|
||||
|
||||
### 2. Parent Context (if child workspace)
|
||||
### 2. Team-shared knowledge (on demand)
|
||||
|
||||
If this workspace was created via team expansion (has a `PARENT_ID` env var), it fetches its parent's shared context files at startup via `GET /workspaces/{parent_id}/shared-context`. The parent declares which files to share in its `config.yaml`:
|
||||
Team-scoped knowledge is no longer injected at boot. The previous
|
||||
`shared_context` field + `GET /workspaces/{parent_id}/shared-context`
|
||||
fetch was removed; agents now pull team-shared knowledge on demand via
|
||||
memory v2's `team:<id>` namespace using the `recall_memory` MCP tool.
|
||||
|
||||
```yaml
|
||||
shared_context:
|
||||
- architecture.md
|
||||
- conventions.md
|
||||
```
|
||||
|
||||
These files are injected as a `## Parent Context` section, with each file rendered under a `### {filename}` heading. This gives children the parent's project knowledge (architecture, conventions, API schemas) without exposing the parent's system prompt or full config.
|
||||
|
||||
**1-level inheritance only:** A grandchild sees its direct parent's shared context, not its grandparent's. This mirrors the L2 Team Memory scope.
|
||||
|
||||
**Graceful degradation:** If the parent is offline or the endpoint returns an error, the child starts normally without parent context.
|
||||
This shifts cost from "every boot, always" to "only when the agent
|
||||
asks", and lets team members write to the shared store from anywhere
|
||||
that can resolve the namespace (canvas Memory tab, agent
|
||||
`commit_memory`, admin import). For large blob-shaped artefacts (full
|
||||
architecture docs, brand assets, PDFs) see RFC #2789 (platform-owned
|
||||
shared file storage).
|
||||
|
||||
### 3. Skill Instructions
|
||||
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
# Team Expansion (Recursive Workspaces)
|
||||
|
||||
When a workspace is expanded into a team, it gains sub-workspaces while its own agent remains as the **team lead** (coordinator). This is recursive — sub-workspaces can themselves be expanded into teams, infinitely deep.
|
||||
|
||||
## How It Works
|
||||
|
||||
When Developer PM is expanded into a team:
|
||||
|
||||
```
|
||||
Business Core
|
||||
|
|
||||
+-- Developer PM (agent stays, becomes coordinator)
|
||||
|
|
||||
+-- Frontend Agent (sub-workspace, private scope)
|
||||
+-- Backend Agent (sub-workspace, private scope)
|
||||
+-- QA Agent (sub-workspace, private scope)
|
||||
```
|
||||
|
||||
- Developer PM's agent **still exists** and acts as coordinator
|
||||
- Developer PM receives incoming A2A messages from Business Core
|
||||
- Developer PM's agent decides how to delegate to sub-workspaces
|
||||
- Sub-workspaces talk to Developer PM and to each other (same level)
|
||||
- Sub-workspaces **cannot** talk to Business Core or any workspace outside the team
|
||||
|
||||
## Communication Rules
|
||||
|
||||
| Direction | Allowed? | Example |
|
||||
|-----------|----------|---------|
|
||||
| Parent level -> team lead | Yes | Business Core -> Developer PM |
|
||||
| Team lead -> sub-workspaces | Yes | Developer PM -> Frontend Agent |
|
||||
| Sub-workspace -> team lead | Yes | Frontend Agent -> Developer PM |
|
||||
| Sub-workspace <-> sibling | Yes | Frontend Agent <-> Backend Agent |
|
||||
| Outside -> sub-workspace directly | No (403) | Business Core -> Frontend Agent |
|
||||
| Sub-workspace -> outside directly | No | Frontend Agent -> Business Core |
|
||||
|
||||
The team lead (Developer PM) is the **only** bridge between the team's internal world and the outside.
|
||||
|
||||
## Scoped Registry
|
||||
|
||||
Sub-workspaces register in the platform registry but with a **private scope**. The registry knows about them but enforces access control.
|
||||
|
||||
```
|
||||
Registry:
|
||||
Business Core :8001 scope: public
|
||||
Developer PM :8002 scope: public
|
||||
Frontend Agent :8010 scope: private, parent=Developer PM
|
||||
Backend Agent :8011 scope: private, parent=Developer PM
|
||||
QA Agent :8012 scope: private, parent=Developer PM
|
||||
```
|
||||
|
||||
- The platform can always discover any workspace (for provisioning, monitoring)
|
||||
- The parent workspace can discover its sub-workspaces
|
||||
- Sub-workspaces can discover their siblings (same parent)
|
||||
- Outside workspaces get a **403 Forbidden** if they try to discover a private sub-workspace
|
||||
|
||||
## How to Expand
|
||||
|
||||
Expansion is triggered via `POST /workspaces/:id/expand`. The platform reads the `sub_workspaces` list from the workspace's config and provisions each one. On the canvas, users right-click a workspace node and select "Expand into team."
|
||||
|
||||
Collapsing is the inverse: `POST /workspaces/:id/collapse`. Sub-workspaces are stopped and removed.
|
||||
|
||||
## What Happens on Expansion
|
||||
|
||||
When Developer PM is expanded into a team, the hierarchy changes but the outside view doesn't. Business Core's parent/child relationship to Developer PM is unaffected — Developer PM still responds to the same A2A endpoint.
|
||||
|
||||
The events fired:
|
||||
- `WORKSPACE_EXPANDED` with the new `sub_workspace_ids` in the payload
|
||||
- `WORKSPACE_PROVISIONING` for each new sub-workspace
|
||||
- `WORKSPACE_ONLINE` for each sub-workspace as they come up
|
||||
|
||||
Communication rules are automatically derived from the new hierarchy — no manual wiring needed.
|
||||
|
||||
## Canvas Behavior
|
||||
|
||||
- Children render as embedded mini-cards (`TeamMemberChip`) inside the parent node, not as separate canvas nodes
|
||||
- Each mini-card shows full status: gradient bar, name, tier badge, skills pills, active tasks, descendant count
|
||||
- **Recursive rendering** up to 3 levels deep (`MAX_NESTING_DEPTH = 3`) — sub-cards can contain their own "Team" sections
|
||||
- Parent node dynamically resizes: 210-280px (no children), 320-450px (children), 400-560px (grandchildren)
|
||||
- Eject button (sky-blue arrow icon) on hover extracts a child from the team
|
||||
- "Extract from Team" also available in the right-click context menu
|
||||
- Double-click a team node to zoom/fit to the parent area
|
||||
- The parent workspace node shows a badge with total descendant count
|
||||
|
||||
## Collapsing a Team
|
||||
|
||||
The inverse of expansion, triggered via `POST /workspaces/:id/collapse`:
|
||||
|
||||
1. Each sub-workspace agent wraps up current work and writes a handoff document to memory
|
||||
2. Sub-workspaces are stopped and removed
|
||||
3. The team lead's agent goes back to handling everything directly
|
||||
4. A `WORKSPACE_COLLAPSED` event fires
|
||||
|
||||
Sub-workspace memory is cleaned up based on backend (see [Memory — Cleanup](../architecture/memory.md#cleanup-on-workspace-deletion)).
|
||||
|
||||
## Deleting a Team Workspace
|
||||
|
||||
When a team workspace is deleted:
|
||||
1. Platform shows a warning listing all sub-workspaces that will be deleted
|
||||
2. User can **drag sub-workspaces out** of the team before confirming (promotes them to the parent level)
|
||||
3. On confirmation, cascade delete removes the parent and all remaining sub-workspaces
|
||||
4. `WORKSPACE_REMOVED` events fire for each deleted workspace
|
||||
|
||||
## Related Docs
|
||||
|
||||
- [Communication Rules](../api-protocol/communication-rules.md) — Full access control model
|
||||
- [Core Concepts](../product/core-concepts.md) — Workspace fundamentals
|
||||
- [System Prompt Structure](./system-prompt-structure.md) — How peer capabilities are injected
|
||||
- [Provisioner](../architecture/provisioner.md) — How sub-workspaces are deployed
|
||||
- [Registry & Heartbeat](../api-protocol/registry-and-heartbeat.md) — How registration works
|
||||
- [Event Log](../architecture/event-log.md) — Events fired during expansion
|
||||
- [Canvas UI](../frontend/canvas.md) — Visual behavior of teams
|
||||
@@ -199,7 +199,6 @@ Install safeguards bound the cost of a single install (env-tunable via `PLUGIN_I
|
||||
| `GET` | `/templates` | List available templates. **Requires AdminAuth** (PR #701). |
|
||||
| `GET` | `/org/templates` | List available org templates. **Requires AdminAuth** (PR #701). |
|
||||
| `POST` | `/templates/import` | Import an agent folder as a new template |
|
||||
| `GET` | `/workspaces/:id/shared-context` | Read parent shared-context files |
|
||||
| `GET` | `/workspaces/:id/files` | List files under an allowed root |
|
||||
| `GET` | `/workspaces/:id/files/*path` | Read a file |
|
||||
| `PUT` | `/workspaces/:id/files/*path` | Write a file |
|
||||
|
||||
@@ -41,8 +41,6 @@ Full contract: `docs/runbooks/admin-auth.md`.
|
||||
| GET | /admin/workspaces/:id/test-token | admin_test_token.go — mint a fresh bearer token for E2E scripts; returns 404 unless `MOLECULE_ENV != production` or `MOLECULE_ENABLE_TEST_TOKENS=1` |
|
||||
| GET/POST/DELETE | /admin/secrets[/:key] | secrets.go — legacy aliases for /settings/secrets |
|
||||
| WS | /workspaces/:id/terminal | terminal.go |
|
||||
| POST | /workspaces/:id/expand | team.go |
|
||||
| POST | /workspaces/:id/collapse | team.go |
|
||||
| POST/GET | /workspaces/:id/approvals | approvals.go |
|
||||
| POST | /workspaces/:id/approvals/:id/decide | approvals.go |
|
||||
| GET | /approvals/pending | approvals.go |
|
||||
@@ -68,7 +66,6 @@ Full contract: `docs/runbooks/admin-auth.md`.
|
||||
| GET | /channels/adapters | channels.go (list available platforms) |
|
||||
| POST | /channels/discover | channels.go (auto-detect chats for a bot token) |
|
||||
| POST | /webhooks/:type | channels.go (incoming social webhook) |
|
||||
| GET | /workspaces/:id/shared-context | templates.go |
|
||||
| GET/PUT/DELETE | /workspaces/:id/files[/*path] | templates.go |
|
||||
| GET | /canvas/viewport | viewport.go — open, no auth required (cosmetic, bootstrap-friendly) |
|
||||
| PUT | /canvas/viewport | viewport.go — `CanvasOrBearer` middleware; accepts bearer OR Origin matching `CORS_ORIGINS`. Cosmetic-only route — worst case viewport corruption, recovered by page refresh. |
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
**Status:** living document — update when you ship a feature that touches one backend.
|
||||
**Owner:** workspace-server + controlplane teams.
|
||||
**Last audit:** 2026-05-02 (Claude agent, PR #TBD).
|
||||
**Last audit:** 2026-05-05 (Claude agent — `provisionWorkspaceAuto` / `StopWorkspaceAuto` / `HasProvisioner` SoT pattern landed in PRs #2811 + #2824).
|
||||
|
||||
## Why this exists
|
||||
|
||||
@@ -15,16 +15,39 @@ Every user-visible workspace feature should work on both backends unless it is f
|
||||
|
||||
This document is the canonical matrix. If you are landing a workspace-facing feature, update the row before you merge.
|
||||
|
||||
## How to dispatch (the SoT pattern)
|
||||
|
||||
When a handler needs to start, stop, or check whether-something-can-run a workspace, it MUST go through the centralized dispatcher on `WorkspaceHandler`:
|
||||
|
||||
| Need | Use | Source |
|
||||
|---|---|---|
|
||||
| Start a workspace | `provisionWorkspaceAuto(ctx, ...)` | `workspace.go:130` |
|
||||
| Stop a workspace | `StopWorkspaceAuto(ctx, wsID)` | `workspace.go:172` |
|
||||
| Gate "do we have any backend wired?" | `HasProvisioner()` | `workspace.go:115` |
|
||||
|
||||
Each dispatcher routes to `cpProv.X()` when the SaaS backend is wired, then `provisioner.X()` when the Docker backend is wired, then a defined fallback (`provisionWorkspaceAuto` self-marks-failed; `StopWorkspaceAuto` no-ops; `HasProvisioner` returns false).
|
||||
|
||||
**Rule: do not call `h.cpProv.Stop`, `h.provisioner.Stop`, `h.cpProv.Start`, or `h.provisioner.Start` directly from a handler.** Source-level pins (`TestNoCallSiteCallsDirectProvisionerExceptAuto`, `TestNoCallSiteCallsBareStop`) gate this at CI; they exist because the same drift class shipped twice — TeamHandler.Expand (#2367) bypassed routing on Start, then `team.go:208` + `workspace_crud.go:432` bypassed it on Stop (#2813, #2814) for ~6 months.
|
||||
|
||||
Allowed exceptions (in the source-pin allowlists):
|
||||
- `workspace.go` and `workspace_provision.go` — define the per-backend bodies the dispatcher routes between.
|
||||
- `workspace_restart.go` — pre-dates the dispatchers and uses manual if-cpProv-else dispatch with retry semantics tuned for the restart hot path. Consolidation tracked in #2799.
|
||||
- `container_files.go` — drives the Docker daemon directly for short-lived file-copy containers; no workspace-level Stop semantics involved.
|
||||
|
||||
For "do we have any backend?", use `HasProvisioner()`, never bare `h.provisioner == nil && h.cpProv == nil`. Source-level pin `TestNoBareBothNilCheck` enforces this — added 2026-05-05 after the hongming org-import incident showed the bare check shape was a recurring drift target.
|
||||
|
||||
## The matrix
|
||||
|
||||
| Feature | File(s) | Docker | EC2 | Verdict |
|
||||
|---|---|---|---|---|
|
||||
| **Lifecycle** | | | | |
|
||||
| Create | `workspace_provision.go:19-214` | `provisionWorkspace()` → `provisioner.Start()` | `provisionWorkspaceCP()` → `cpProv.Start()` | ✅ parity |
|
||||
| Create | `workspace.go:130` `provisionWorkspaceAuto` → `provisionWorkspace()` (Docker) / `provisionWorkspaceCP()` (CP) | dispatched | dispatched | ✅ parity (single source of truth, PR #2811) |
|
||||
| Start | `provisioner.go:140-325` | container create + image pull | EC2 `RunInstance` via CP | ✅ parity |
|
||||
| Stop | `provisioner.go:772-785` | `ContainerRemove(force=true)` + optional volume rm | `DELETE /cp/workspaces/:id` | ✅ parity |
|
||||
| Stop | `workspace.go:172` `StopWorkspaceAuto` → `provisioner.Stop()` (Docker) / `cpProv.Stop()` (CP) | dispatched | dispatched | ✅ parity (single source of truth, PR #2824) |
|
||||
| Restart | `workspace_restart.go:45-210` | reads runtime from live container before stop | reads runtime from DB only | ⚠️ divergent — config-change + crash window can boot old runtime on EC2 |
|
||||
| Delete | `workspace_crud.go` | stop + volume rm | stop only (stateless) | ✅ parity (expected divergence on volume cleanup) |
|
||||
| Delete | `workspace_crud.go` `stopAndRemove` → `StopWorkspaceAuto` + Docker-only `RemoveVolume` | stop + volume rm | stop only (stateless — CP has no volumes) | ✅ parity (PR #2824 closed the SaaS-leak gap) |
|
||||
| Org-import (bulk Create) | `org_import.go:178` gates on `h.workspace.HasProvisioner()`; routes through `provisionWorkspaceAuto` per workspace | dispatched | dispatched | ✅ parity (PR #2811 closed the SaaS-skip gate) |
|
||||
| Team-collapse (bulk Stop) | `team.go:206` calls `StopWorkspaceAuto` for each child | dispatched | dispatched | ✅ parity (PR #2824 closed the SaaS-leak gap) |
|
||||
| **Secrets** | | | | |
|
||||
| Create / update | `secrets.go` | DB insert, injected at container start | DB insert, injected via user-data at boot | ✅ parity |
|
||||
| Redaction | `workspace_provision.go:251` | applied at memory-seed time | applied at agent runtime | ⚠️ divergent — timing differs |
|
||||
@@ -76,7 +99,23 @@ This document is the canonical matrix. If you are landing a workspace-facing fea
|
||||
|
||||
- **`tools/check-template-parity.sh`** (this repo) — ensures `install.sh` and `start.sh` in a template repo forward identical sets of provider keys. Wire into each template repo's CI as `bash $MONOREPO/tools/check-template-parity.sh install.sh start.sh`.
|
||||
- **Contract tests** (stub) — `workspace-server/internal/provisioner/backend_contract_test.go` defines the behaviors every `provisioner.Provisioner` implementation must satisfy. Fails compile when a method drifts between `Docker` and `CPProvisioner`. Scenario-level runs are `t.Skip`'d today pending drift risk #6 (see above) — compile-time assertions still catch method drift.
|
||||
- **Source-level dispatcher pins** — `workspace_provision_auto_test.go` enforces the SoT pattern documented above:
|
||||
- `TestNoCallSiteCallsDirectProvisionerExceptAuto` — no handler calls `.provisionWorkspace(` or `.provisionWorkspaceCP(` directly outside the dispatcher's allowlist.
|
||||
- `TestNoCallSiteCallsBareStop` — no handler calls `.provisioner.Stop(` or `.cpProv.Stop(` directly outside the dispatcher's allowlist (strips Go comments before substring match so archaeology in code comments doesn't trip the gate).
|
||||
- `TestNoBareBothNilCheck` — no production code uses `h.provisioner == nil && h.cpProv == nil`; must use `!h.HasProvisioner()`.
|
||||
- `TestOrgImportGate_UsesHasProvisionerNotBareField` — pins the org-import provisioning gate against the bare-Docker-check shape that caused the 2026-05-05 hongming incident.
|
||||
|
||||
## How to update this doc
|
||||
|
||||
When you land a feature that touches a handler dispatch on `h.cpProv != nil`, add or update the matching row. If you can't implement both backends in the same PR, mark the row `docker-only` or `ec2-only` and file an issue tracking the gap.
|
||||
|
||||
### When you add a NEW dispatch site
|
||||
|
||||
If you find yourself writing `if h.cpProv != nil { ... } else if h.provisioner != nil { ... }` for a new operation (Pause, Hibernate, Snapshot, etc.):
|
||||
|
||||
1. Add a `<Op>WorkspaceAuto` method on `WorkspaceHandler` next to the existing dispatchers. Mirror the docstring shape: routing, no-backend fallback, ordering rationale.
|
||||
2. Add a source-level pin in `workspace_provision_auto_test.go` — the bare-call shape your dispatcher replaces, fail when a handler reintroduces it.
|
||||
3. Add a row to the matrix above with the dispatcher reference.
|
||||
4. If your operation has retry semantics specific to a hot path, leave them in the original location for now and file a follow-up under #2799 — don't bake retry into the generic dispatcher unless every caller benefits.
|
||||
|
||||
The pattern is "one dispatcher per verb." Don't fold every operation into `provisionWorkspaceAuto` — different verbs have different no-backend fallbacks (mark-failed for Start, no-op for Stop, false for Has).
|
||||
|
||||
@@ -336,8 +336,6 @@ This same logic governs: A2A delegation, memory scope enforcement, activity visi
|
||||
|
||||
| Method | Endpoint | Purpose |
|
||||
|--------|----------|---------|
|
||||
| `POST` | `/workspaces/:id/expand` | Expand workspace into team (become coordinator) |
|
||||
| `POST` | `/workspaces/:id/collapse` | Collapse team back to single workspace |
|
||||
|
||||
### Files, Terminal, Templates, Bundles (8 endpoints)
|
||||
|
||||
@@ -523,7 +521,8 @@ runtime_config: # Runtime-specific settings
|
||||
skills: ["skill1", "skill2"] # Folder names under skills/
|
||||
tools: ["web_search", "filesystem"] # Built-in tool names
|
||||
prompt_files: ["system-prompt.md"] # Additional prompt text files
|
||||
shared_context: [] # Files from parent workspace
|
||||
# `shared_context` was removed; team-shared knowledge now lives in memory v2's
|
||||
# team:<id> namespace (recall_memory MCP tool). See RFC #2789 for shared files.
|
||||
|
||||
a2a:
|
||||
port: 8000
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
# E2E coverage matrix
|
||||
|
||||
This document is the source of truth for which E2E suites guard which surfaces and which gates are wired up where. Read this before adding a new E2E or moving a check between branches.
|
||||
|
||||
## Suites
|
||||
|
||||
| Workflow file | Job (= required-check name) | What it covers | Cron |
|
||||
|---|---|---|---|
|
||||
| `e2e-api.yml` | `E2E API Smoke Test` | A2A handshake, registry/register, /workspaces/:id/a2a forward, structured-event emission. Lightweight enough to run on every PR. | — |
|
||||
| `e2e-staging-canvas.yml` | `Canvas tabs E2E` | Canvas-tab Playwright UX checks against staging — config tab, secrets tab, agent-card tab, Activity hydration. | weekly Sun 08:00 UTC |
|
||||
| `e2e-staging-saas.yml` | `E2E Staging SaaS` | Full lifecycle: org creation → workspace provision (CP path) → A2A delegation → status/heartbeat → workspace delete → EC2 termination. The integration test that catches the silent-drop bug class (#2486 / #2811 / #2813 / #2814). | daily 07:00 UTC |
|
||||
| `e2e-staging-external.yml` | `E2E Staging External Runtime` | External-runtime registration + heartbeat staleness sweep + `/registry/peers` resolution. Validates the OSS-templated workspace path. | daily 07:30 UTC |
|
||||
| `e2e-staging-sanity.yml` | `Intentional-failure teardown sanity` | Inverted assertion — the run MUST fail. Validates the leak-detection self-check itself; not for general gating. | weekly Mon 06:00 UTC |
|
||||
| `continuous-synth-e2e.yml` | `Synthetic E2E against staging` | Standing background coverage between PR runs. Catches drift in production-like staging that PR-time E2Es miss. | every 15 min |
|
||||
|
||||
## Required-check status (branch protection)
|
||||
|
||||
| Suite | staging required | main required |
|
||||
|---|---|---|
|
||||
| `E2E API Smoke Test` | ✅ this PR | ✅ |
|
||||
| `Canvas tabs E2E` | ✅ this PR | (see follow-up) |
|
||||
| `E2E Staging SaaS` | ❌ — needs always-emit refactor | ❌ |
|
||||
| `E2E Staging External Runtime` | ❌ — needs always-emit refactor | ❌ |
|
||||
| `Intentional-failure teardown sanity` | ❌ inverted assertion, never required | ❌ |
|
||||
| `Synthetic E2E against staging` | ❌ cron-only, not a per-PR gate | ❌ |
|
||||
|
||||
## Why the always-emit pattern matters
|
||||
|
||||
Branch protection requires a *check name* to land at SUCCESS for every PR. Workflows with `paths:` filters that exclude a PR never run, so the check name never appears, and the PR sits BLOCKED forever.
|
||||
|
||||
The pattern that supports being required is:
|
||||
|
||||
1. Workflow always triggers on push/PR to the protected branch.
|
||||
2. A `detect-changes` job uses `dorny/paths-filter` to decide if real work runs.
|
||||
3. The protected job runs unconditionally and either (a) does real work when paths matched, or (b) emits a no-op SUCCESS step when paths skipped.
|
||||
|
||||
`e2e-api.yml` and `e2e-staging-canvas.yml` already have this shape. `e2e-staging-saas.yml` and `e2e-staging-external.yml` use plain `paths:` filters and need the refactor before they can be required (filed as follow-up).
|
||||
|
||||
## Adding a new E2E suite
|
||||
|
||||
1. Pick a verb: smoke test, full lifecycle, fault-injection, drift detection. Pre-existing suites split along these lines.
|
||||
2. Use the always-emit shape so the check name can be made required.
|
||||
3. Add a row to the matrix above.
|
||||
4. Decide cron cadence based on cost + how fast drift would otherwise be caught.
|
||||
5. If you want it required, add to the relevant branch protection via `tools/branch-protection/apply.sh` (this PR adds the script).
|
||||
|
||||
## When to break glass — temporarily skip a required E2E
|
||||
|
||||
Don't. If an E2E is intermittently flaky, fix the test or move it out of required. The point of a required check is that it's load-bearing; bypassing one with admin override teaches the next operator the gate is optional.
|
||||
|
||||
If a Production incident requires bypassing, document the override in the incident postmortem with a same-week followup to either fix the test or rip the check out of required.
|
||||
|
||||
## Related issues / PRs
|
||||
|
||||
- #2486 — silent-drop bug class that the SaaS E2E now catches
|
||||
- PR #2811 — `provisionWorkspaceAuto` consolidation (org-import SaaS gate)
|
||||
- PR #2824 — `StopWorkspaceAuto` mirror (closes #2813 + #2814)
|
||||
- Follow-up: refactor `e2e-staging-saas` + `e2e-staging-external` to always-emit (so they can be required)
|
||||
@@ -186,4 +186,3 @@ So the UI now exposes more operational failure state directly instead of silentl
|
||||
- [Quickstart](../quickstart.md)
|
||||
- [Platform API](../api-protocol/platform-api.md)
|
||||
- [Workspace Runtime](../agent-runtime/workspace-runtime.md)
|
||||
- [Team Expansion](../agent-runtime/team-expansion.md)
|
||||
|
||||
+1
-1
@@ -18,7 +18,7 @@ lands in the watch list with a colliding term, add a row here.
|
||||
| **plugin** | A directory under `plugins/` packaging one or more skills or an MCP server wrapper, installable per-workspace via `POST /workspaces/:id/plugins`. Governed by `plugin.yaml`. | **Langflow**: a visual UI node / component in a flowchart. **CrewAI**: a Python-importable callable registered as a capability. |
|
||||
| **agent** | A persistent containerized workspace running continuously — an identity with memory, a role, and a schedule. Not a one-shot invocation. | Most frameworks (AutoGPT, LangChain agents, OpenAI Assistants): a stateless function-call loop. No persistence between invocations unless explicitly checkpointed. |
|
||||
| **flow** | A task execution within a workspace — a request enters, the agent runs tools, emits a response, logs activity. No explicit graph abstraction. | **Langflow**: a directed graph of nodes you author visually. **LangGraph**: a stateful graph of callable nodes. Our "flow" is an imperative timeline, not a graph. |
|
||||
| **team** | A named cluster of workspaces under a PM (org template `expand_team`). Used for role grouping in Canvas. | **CrewAI**: a "crew" is a sequence of agents that pass a task through a declared order. Our "team" is an org-chart abstraction, not an execution order. |
|
||||
| **team** | A named cluster of workspaces under a PM . Used for role grouping in Canvas. | **CrewAI**: a "crew" is a sequence of agents that pass a task through a declared order. Our "team" is an org-chart abstraction, not an execution order. |
|
||||
| **skill** | A directory with `SKILL.md` that an agent invokes via the `Skill` tool. Skills are documentation + optional scripts that teach an agent a recipe. | **Anthropic Skills API**: nearly identical. **CrewAI tool**: closer to our plugin's MCP tool, not our skill. |
|
||||
| **channel** | An outbound/inbound social integration (Telegram, Slack, …) per-workspace, wired in `workspace_channels`. | Slack's "channel": the container for messages. We use "channel" for the adapter + credentials, not the conversation itself. |
|
||||
| **runtime** | The execution engine image tag for a workspace: one of `langgraph`, `claude-code`, `openclaw`, `crewai`, `autogen`, `deepagents`, `hermes`. | **LangGraph runtime**: the Python process running the graph. We use "runtime" for the Docker image + adapter pairing, not the inner process. |
|
||||
|
||||
@@ -166,8 +166,6 @@ list_workspaces
|
||||
|
||||
| MCP Tool | API Route | Method | Description |
|
||||
|----------|-----------|--------|-------------|
|
||||
| `expand_team` | `/workspaces/:id/expand` | POST | Expand team node |
|
||||
| `collapse_team` | `/workspaces/:id/collapse` | POST | Collapse team node |
|
||||
|
||||
### Templates & Bundles
|
||||
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
# Workspace Runtime PyPI Package
|
||||
|
||||
## Requires Python >= 3.11
|
||||
|
||||
The wheel pins `requires_python>=3.11`. On Python 3.10 or older, `pip install
|
||||
molecule-ai-workspace-runtime` fails with `Could not find a version that
|
||||
satisfies the requirement (from versions: none)` — the pin filters the only
|
||||
available artifact before pip even attempts install. Upgrade the interpreter
|
||||
(`brew install python@3.12` / `apt install python3.12` / etc.) or use a
|
||||
3.11+ venv.
|
||||
|
||||
## Overview
|
||||
|
||||
The shared workspace runtime infrastructure has **one editable source** and
|
||||
|
||||
@@ -54,10 +54,18 @@ TOP_LEVEL_MODULES = {
|
||||
"a2a_client",
|
||||
"a2a_executor",
|
||||
"a2a_mcp_server",
|
||||
"a2a_response",
|
||||
"a2a_tools",
|
||||
"a2a_tools_delegation",
|
||||
"a2a_tools_inbox",
|
||||
"a2a_tools_memory",
|
||||
"a2a_tools_messaging",
|
||||
"a2a_tools_rbac",
|
||||
"adapter_base",
|
||||
"agent",
|
||||
"agents_md",
|
||||
"boot_routes",
|
||||
"card_helpers",
|
||||
"config",
|
||||
"configs_dir",
|
||||
"consolidation",
|
||||
@@ -67,18 +75,25 @@ TOP_LEVEL_MODULES = {
|
||||
"executor_helpers",
|
||||
"heartbeat",
|
||||
"inbox",
|
||||
"inbox_uploads",
|
||||
"initial_prompt",
|
||||
"internal_chat_uploads",
|
||||
"internal_file_read",
|
||||
"main",
|
||||
"mcp_cli",
|
||||
"mcp_doctor",
|
||||
"mcp_heartbeat",
|
||||
"mcp_inbox_pollers",
|
||||
"mcp_workspace_resolver",
|
||||
"molecule_ai_status",
|
||||
"not_configured_handler",
|
||||
"platform_auth",
|
||||
"platform_inbound_auth",
|
||||
"plugins",
|
||||
"preflight",
|
||||
"prompt",
|
||||
"runtime_wedge",
|
||||
"secret_redactor",
|
||||
"shared_runtime",
|
||||
"smoke_mode",
|
||||
"transcript_auth",
|
||||
@@ -278,10 +293,37 @@ directory** by the `publish-runtime` GitHub Actions workflow on every
|
||||
Operators running an agent outside the platform's container fleet
|
||||
(any runtime that supports MCP stdio — Claude Code, hermes, codex,
|
||||
etc.) can install this wheel and run the universal MCP server
|
||||
locally:
|
||||
locally.
|
||||
|
||||
### Requirements
|
||||
|
||||
* **Python ≥3.11.** The wheel sets `requires-python = ">=3.11"`. On
|
||||
older interpreters `pip install` returns the cryptic
|
||||
`Could not find a version that satisfies the requirement` — that
|
||||
message is pip filtering this wheel out, NOT the package missing
|
||||
from PyPI. Upgrade with `brew install python@3.12` /
|
||||
`apt install python3.12` / `pyenv install 3.12` first.
|
||||
* **`pipx` recommended over `pip`.** `pipx install` puts
|
||||
`molecule-mcp` on PATH automatically and isolates the runtime's
|
||||
deps from your system Python. Plain `pip install --user` works
|
||||
but the binary lands in `~/.local/bin` (Linux) or
|
||||
`~/Library/Python/3.X/bin` (macOS) which is often not on PATH on
|
||||
a fresh shell — `claude mcp add molecule -- molecule-mcp` then
|
||||
fails with "command not found" at first use.
|
||||
|
||||
### Install
|
||||
|
||||
```sh
|
||||
# Recommended:
|
||||
pipx install molecule-ai-workspace-runtime
|
||||
|
||||
# Alternative (manage PATH yourself):
|
||||
pip install --user molecule-ai-workspace-runtime
|
||||
```
|
||||
|
||||
### Run
|
||||
|
||||
```sh
|
||||
pip install molecule-ai-workspace-runtime
|
||||
WORKSPACE_ID=<uuid> \\
|
||||
PLATFORM_URL=https://<tenant>.staging.moleculesai.app \\
|
||||
MOLECULE_WORKSPACE_TOKEN=<bearer> \\
|
||||
@@ -294,10 +336,66 @@ runtimes already get via the workspace's auto-spawned MCP. Register
|
||||
the binary in your agent's MCP config (e.g. Claude Code's
|
||||
`claude mcp add molecule -- molecule-mcp` with the env above).
|
||||
|
||||
### Keeping the token out of shell history
|
||||
|
||||
Inline `MOLECULE_WORKSPACE_TOKEN=<bearer>` ends up in `~/.zsh_history`
|
||||
and (when registered via `claude mcp add`) plaintext in
|
||||
`~/.claude.json`. To avoid that, write the token to a 0600 file and
|
||||
point `MOLECULE_WORKSPACE_TOKEN_FILE` at it:
|
||||
|
||||
```sh
|
||||
umask 077
|
||||
printf '%s' "<bearer>" > ~/.config/molecule/token
|
||||
WORKSPACE_ID=<uuid> \\
|
||||
PLATFORM_URL=https://<tenant>.staging.moleculesai.app \\
|
||||
MOLECULE_WORKSPACE_TOKEN_FILE=$HOME/.config/molecule/token \\
|
||||
molecule-mcp
|
||||
```
|
||||
|
||||
Token resolution order: `MOLECULE_WORKSPACE_TOKEN` (inline env) →
|
||||
`MOLECULE_WORKSPACE_TOKEN_FILE` (path) → `${CONFIGS_DIR}/.auth_token`
|
||||
(in-container default).
|
||||
|
||||
The token comes from the canvas → Tokens tab. Restarting an external
|
||||
workspace from the canvas no longer revokes the token (PR #2412), so
|
||||
operator tokens persist across status nudges.
|
||||
|
||||
### Push vs poll delivery (Claude Code specifics)
|
||||
|
||||
By default the inbox runs in **poll mode** — every turn the agent
|
||||
calls `wait_for_message`, which blocks up to ~60s on
|
||||
`/activity?since_id=…`. Real-time push delivery is also supported,
|
||||
but on Claude Code it requires THREE conditions, ALL of which must
|
||||
hold:
|
||||
|
||||
1. **The MCP server declares `experimental.claude/channel`** — this
|
||||
wheel does (see `_build_initialize_result`). Nothing for you to
|
||||
do.
|
||||
2. **Claude Code installs the server as a marketplace plugin** — a
|
||||
plain `claude mcp add molecule -- molecule-mcp` produces a
|
||||
non-plugin-sourced server, which Claude Code rejects with
|
||||
`channel_enable requires a marketplace plugin`. Until the
|
||||
official `moleculesai/claude-code-plugin` marketplace lands
|
||||
(tracking [#2936](https://github.com/Molecule-AI/molecule-core/issues/2936)),
|
||||
operators who want push must scaffold their own local marketplace
|
||||
under
|
||||
`~/.claude/marketplaces/molecule-local/` containing a
|
||||
`marketplace.json` + `plugin.json` that points at this wheel.
|
||||
3. **Claude Code is launched with the dev-channels flag** — pass
|
||||
`--dangerously-load-development-channels plugin:molecule@<marketplace>`
|
||||
on the `claude` invocation. Without this flag the channel
|
||||
capability is silently ignored.
|
||||
|
||||
Symptom of any condition failing: messages arrive but only via the
|
||||
poll path (every ~1–60s), not real-time. There's currently no
|
||||
diagnostic surfaced — `molecule-mcp doctor` (tracking
|
||||
[#2937](https://github.com/Molecule-AI/molecule-core/issues/2937)) is
|
||||
planned.
|
||||
|
||||
If you don't need real-time push, the default poll path works
|
||||
universally with no extra setup; both modes converge on the same
|
||||
`inbox_pop` ack so messages never duplicate.
|
||||
|
||||
See [`docs/workspace-runtime-package.md`](https://github.com/Molecule-AI/molecule-core/blob/main/docs/workspace-runtime-package.md)
|
||||
for the publish flow and architecture.
|
||||
"""
|
||||
|
||||
Executable
+216
@@ -0,0 +1,216 @@
|
||||
#!/usr/bin/env bash
|
||||
# scripts/check-stale-promote-pr.sh
|
||||
#
|
||||
# Scan open auto-promote PRs (base=main head=staging) for the
|
||||
# silent-block failure mode that motivated issue #2975:
|
||||
# - PR sat for hours with mergeStateStatus=BLOCKED
|
||||
# - reviewDecision=REVIEW_REQUIRED (auto-merge armed but waiting
|
||||
# on a human approval that never comes)
|
||||
#
|
||||
# When found, emit:
|
||||
# - GitHub Actions notice/warning lines (workflow summary surface)
|
||||
# - Optionally post a comment on the PR (--comment)
|
||||
#
|
||||
# Exit code is the count of stale PRs found, capped at 125 so callers
|
||||
# can detect "alarm fired" via `if ! check-stale-promote-pr.sh; then …`.
|
||||
# Exit 0 = clean, exit ≥1 = at least N stale PRs need attention.
|
||||
#
|
||||
# Used by .github/workflows/auto-promote-stale-alarm.yml. Logic lives
|
||||
# here (not inline in the workflow YAML) so we can:
|
||||
# - Unit-test it with a stubbed `gh` (see test-check-stale-promote-pr.sh)
|
||||
# - Run it ad-hoc by an operator: `scripts/check-stale-promote-pr.sh`
|
||||
# - Reuse the same surface in any sibling workflow that needs the same
|
||||
# check (SSOT — one detector, many callers).
|
||||
#
|
||||
# Requires: `gh` CLI, `jq`. `GH_TOKEN` env in the workflow context.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Inputs
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Threshold beyond which a BLOCKED+REVIEW_REQUIRED promote PR is "stale"
|
||||
# enough to alarm. 4 hours is the floor: most legitimate gates clear
|
||||
# inside an hour, so 4× headroom is plenty for slow CI without false-
|
||||
# alarming. Override via env for tests + edge ops.
|
||||
STALE_HOURS="${STALE_HOURS:-4}"
|
||||
|
||||
# Repo defaults to the current `gh` context. Tests pass --repo explicitly.
|
||||
REPO="${GITHUB_REPOSITORY:-}"
|
||||
|
||||
# Whether to post a comment to the PR. Off by default to avoid noise on
|
||||
# manual ad-hoc runs; the cron workflow turns it on.
|
||||
POST_COMMENT="${POST_COMMENT:-false}"
|
||||
|
||||
# Where to read the open-PR JSON from. Empty = call `gh` live. Tests
|
||||
# point this at a fixture file.
|
||||
PR_FIXTURE="${PR_FIXTURE:-}"
|
||||
|
||||
# Where to read "now" from. Empty = real clock. Tests freeze time so
|
||||
# the staleness math is deterministic.
|
||||
NOW_OVERRIDE="${NOW_OVERRIDE:-}"
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--repo) REPO="$2"; shift 2 ;;
|
||||
--comment) POST_COMMENT="true"; shift ;;
|
||||
--no-comment) POST_COMMENT="false"; shift ;;
|
||||
--fixture) PR_FIXTURE="$2"; shift 2 ;;
|
||||
--stale-hours) STALE_HOURS="$2"; shift 2 ;;
|
||||
-h|--help)
|
||||
sed -n '1,/^set /p' "$0" | grep '^# ' | sed 's/^# //'
|
||||
exit 0
|
||||
;;
|
||||
*) echo "unknown arg: $1" >&2; exit 64 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$REPO" ] && [ -z "$PR_FIXTURE" ]; then
|
||||
echo "::error::REPO env (or GITHUB_REPOSITORY) required when no fixture given" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Clock helpers — split out so tests can freeze time
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
now_epoch() {
|
||||
if [ -n "$NOW_OVERRIDE" ]; then
|
||||
printf '%s\n' "$NOW_OVERRIDE"
|
||||
else
|
||||
date -u +%s
|
||||
fi
|
||||
}
|
||||
|
||||
# Parse RFC3339 timestamps the way GitHub emits them (e.g.
|
||||
# "2026-05-05T23:15:00Z"). gnu-date uses -d, bsd-date uses -j -f. Cover
|
||||
# both because the workflow runs on ubuntu-latest (gnu) but operators
|
||||
# may run this script on macOS (bsd).
|
||||
to_epoch() {
|
||||
local ts="$1"
|
||||
# gnu-date path first.
|
||||
if date -u -d "$ts" +%s 2>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
# bsd-date fallback — strip optional fractional seconds before %S.
|
||||
local ts_clean="${ts%%.*}"
|
||||
ts_clean="${ts_clean%Z}Z"
|
||||
date -u -j -f "%Y-%m-%dT%H:%M:%SZ" "$ts_clean" +%s 2>/dev/null || {
|
||||
echo "::error::cannot parse timestamp: $ts" >&2
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Fetch open auto-promote PRs
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
fetch_prs() {
|
||||
if [ -n "$PR_FIXTURE" ]; then
|
||||
cat "$PR_FIXTURE"
|
||||
return 0
|
||||
fi
|
||||
gh pr list --repo "$REPO" \
|
||||
--base main --head staging --state open \
|
||||
--json number,title,createdAt,mergeStateStatus,reviewDecision,url
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Stale detection
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Read PR list from stdin, emit one TSV line per stale PR:
|
||||
# <num>\t<age_hours>\t<url>\t<title>
|
||||
# Caller decides what to do (warn, comment, escalate).
|
||||
detect_stale() {
|
||||
local now_ts
|
||||
now_ts="$(now_epoch)"
|
||||
local stale_seconds=$((STALE_HOURS * 3600))
|
||||
|
||||
jq -r '.[] | [.number, .createdAt, .mergeStateStatus, .reviewDecision, .url, .title] | @tsv' \
|
||||
| while IFS=$'\t' read -r num created_at merge_state review_decision url title; do
|
||||
# Only alarm on the specific failure mode: BLOCKED + REVIEW_REQUIRED.
|
||||
# Other BLOCKED reasons (DIRTY, BEHIND, failed checks) are the
|
||||
# author's signal-to-fix; this script targets the silent
|
||||
# "no human reviewed yet" wedge specifically.
|
||||
[ "$merge_state" = "BLOCKED" ] || continue
|
||||
[ "$review_decision" = "REVIEW_REQUIRED" ] || continue
|
||||
|
||||
local created_ts
|
||||
created_ts="$(to_epoch "$created_at")" || continue
|
||||
local age=$((now_ts - created_ts))
|
||||
if [ "$age" -ge "$stale_seconds" ]; then
|
||||
local age_h=$((age / 3600))
|
||||
printf '%s\t%d\t%s\t%s\n' "$num" "$age_h" "$url" "$title"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Reporting
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Comment body — kept short; the issue body has the full design.
|
||||
comment_body() {
|
||||
local age_h="$1"
|
||||
cat <<EOF
|
||||
⚠️ This auto-promote PR has been BLOCKED on \`REVIEW_REQUIRED\` for **${age_h}h**.
|
||||
|
||||
Auto-merge is armed, but main's branch protection requires 1 review and no human has approved. Until someone reviews, the staging→main promote chain is wedged and downstream consumers (canvas builds, tenant redeploys) won't see new code.
|
||||
|
||||
**Action**: a human reviewer on \`@Molecule-AI/maintainers\` should approve this PR (or mark it as not ready and close).
|
||||
|
||||
Detected by \`scripts/check-stale-promote-pr.sh\` per issue #2975.
|
||||
EOF
|
||||
}
|
||||
|
||||
post_comment() {
|
||||
local pr_num="$1"
|
||||
local age_h="$2"
|
||||
if [ "$POST_COMMENT" != "true" ]; then
|
||||
return 0
|
||||
fi
|
||||
# Idempotency: only one alarm comment per PR. Look for the marker
|
||||
# string in existing comments before posting a new one.
|
||||
local existing
|
||||
existing="$(gh pr view "$pr_num" --repo "$REPO" --json comments \
|
||||
--jq '.comments[] | select(.body | test("scripts/check-stale-promote-pr.sh per issue #2975")) | .databaseId' \
|
||||
| head -n1)"
|
||||
if [ -n "$existing" ]; then
|
||||
echo "::notice::PR #$pr_num already has a stale-alarm comment ($existing) — not re-posting"
|
||||
return 0
|
||||
fi
|
||||
comment_body "$age_h" | gh pr comment "$pr_num" --repo "$REPO" --body-file -
|
||||
echo "::notice::Posted stale-alarm comment on PR #$pr_num (age=${age_h}h)"
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Main
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
stale_count=0
|
||||
while IFS=$'\t' read -r num age_h url title; do
|
||||
[ -n "$num" ] || continue
|
||||
stale_count=$((stale_count + 1))
|
||||
echo "::warning title=Stale auto-promote PR::PR #$num — BLOCKED on REVIEW_REQUIRED for ${age_h}h. $url"
|
||||
{
|
||||
echo "## ⚠️ Stale auto-promote PR detected"
|
||||
echo
|
||||
echo "- PR: #$num — \`$title\`"
|
||||
echo "- Age: ${age_h}h"
|
||||
echo "- State: BLOCKED on REVIEW_REQUIRED"
|
||||
echo "- URL: $url"
|
||||
echo
|
||||
echo "Auto-merge is armed but waiting on a human review. See issue #2975."
|
||||
} >> "${GITHUB_STEP_SUMMARY:-/dev/null}"
|
||||
post_comment "$num" "$age_h"
|
||||
done < <(fetch_prs | detect_stale)
|
||||
|
||||
if [ "$stale_count" -eq 0 ]; then
|
||||
echo "::notice::No stale auto-promote PRs detected (threshold: ${STALE_HOURS}h)"
|
||||
fi
|
||||
|
||||
# Cap exit code so we don't accidentally break shells that interpret
|
||||
# >125 as signal-style. 1..N maps to "1..N stale PRs".
|
||||
exit $(( stale_count > 125 ? 125 : stale_count ))
|
||||
Executable
+257
@@ -0,0 +1,257 @@
|
||||
#!/usr/bin/env bash
|
||||
# scripts/test-check-stale-promote-pr.sh
|
||||
#
|
||||
# Exhaustive bash unit tests for check-stale-promote-pr.sh.
|
||||
# Goal: 100% branch coverage on the detector logic.
|
||||
#
|
||||
# Each case writes a fixture JSON, freezes the clock with NOW_OVERRIDE,
|
||||
# runs the script with --fixture + --no-comment (so we don't try to
|
||||
# actually call `gh pr comment`), and asserts on stdout/exit code.
|
||||
#
|
||||
# Run: bash scripts/test-check-stale-promote-pr.sh
|
||||
# Expected: "All N tests passed" + exit 0.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT="$(cd "$(dirname "$0")" && pwd)/check-stale-promote-pr.sh"
|
||||
TMP="$(mktemp -d)"
|
||||
trap 'rm -rf "$TMP"' EXIT
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Helpers
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
# Frozen "now" — 2026-05-06T05:00:00Z. Compute dynamically so the
|
||||
# tests stay correct regardless of platform-specific date semantics
|
||||
# (gnu vs bsd) and any author math errors on the epoch.
|
||||
if FROZEN_NOW="$(date -u -d '2026-05-06T05:00:00Z' +%s 2>/dev/null)"; then
|
||||
: # gnu-date worked
|
||||
elif FROZEN_NOW="$(date -u -j -f '%Y-%m-%dT%H:%M:%SZ' '2026-05-06T05:00:00Z' +%s 2>/dev/null)"; then
|
||||
: # bsd-date worked
|
||||
else
|
||||
echo "FATAL: cannot compute FROZEN_NOW on this platform" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
run_script() {
|
||||
# Args: <fixture-file>
|
||||
# Returns stdout + exit code via a known marker.
|
||||
local fixture="$1"
|
||||
shift
|
||||
set +e
|
||||
NOW_OVERRIDE="$FROZEN_NOW" \
|
||||
POST_COMMENT="false" \
|
||||
bash "$SCRIPT" --fixture "$fixture" "$@" 2>&1
|
||||
local rc=$?
|
||||
set -e
|
||||
echo "EXIT_CODE=$rc"
|
||||
}
|
||||
|
||||
assert_pass() {
|
||||
local name="$1"
|
||||
local got="$2"
|
||||
local want_pattern="$3"
|
||||
if printf '%s' "$got" | grep -qE "$want_pattern"; then
|
||||
PASS=$((PASS + 1))
|
||||
printf ' ✓ %s\n' "$name"
|
||||
else
|
||||
FAIL=$((FAIL + 1))
|
||||
printf ' ✗ %s\n want pattern: %s\n got:\n%s\n' "$name" "$want_pattern" "$got"
|
||||
fi
|
||||
}
|
||||
|
||||
assert_no_match() {
|
||||
local name="$1"
|
||||
local got="$2"
|
||||
local bad_pattern="$3"
|
||||
if printf '%s' "$got" | grep -qE "$bad_pattern"; then
|
||||
FAIL=$((FAIL + 1))
|
||||
printf ' ✗ %s\n bad pattern matched: %s\n got:\n%s\n' "$name" "$bad_pattern" "$got"
|
||||
else
|
||||
PASS=$((PASS + 1))
|
||||
printf ' ✓ %s\n' "$name"
|
||||
fi
|
||||
}
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Test cases
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
echo "1. Empty PR list — clean exit"
|
||||
echo '[]' > "$TMP/empty.json"
|
||||
got=$(run_script "$TMP/empty.json")
|
||||
assert_pass "empty-no-warning" "$got" "No stale auto-promote PRs detected"
|
||||
assert_pass "empty-exit-zero" "$got" "EXIT_CODE=0"
|
||||
|
||||
echo
|
||||
echo "2. Single PR, BLOCKED+REVIEW_REQUIRED, 5h old — fires alarm"
|
||||
cat > "$TMP/stale1.json" <<EOF
|
||||
[{
|
||||
"number": 2963,
|
||||
"title": "staging → main",
|
||||
"createdAt": "2026-05-06T00:00:00Z",
|
||||
"mergeStateStatus": "BLOCKED",
|
||||
"reviewDecision": "REVIEW_REQUIRED",
|
||||
"url": "https://github.com/test/test/pull/2963"
|
||||
}]
|
||||
EOF
|
||||
got=$(run_script "$TMP/stale1.json")
|
||||
assert_pass "stale1-warning" "$got" "Stale auto-promote PR"
|
||||
assert_pass "stale1-pr-number" "$got" "PR #2963"
|
||||
assert_pass "stale1-age" "$got" "for 5h"
|
||||
assert_pass "stale1-exit-1" "$got" "EXIT_CODE=1"
|
||||
|
||||
echo
|
||||
echo "3. Same PR but only 3h old — under threshold, NO alarm"
|
||||
cat > "$TMP/young.json" <<EOF
|
||||
[{
|
||||
"number": 100,
|
||||
"title": "fresh promote",
|
||||
"createdAt": "2026-05-06T02:00:00Z",
|
||||
"mergeStateStatus": "BLOCKED",
|
||||
"reviewDecision": "REVIEW_REQUIRED",
|
||||
"url": "https://github.com/test/test/pull/100"
|
||||
}]
|
||||
EOF
|
||||
got=$(run_script "$TMP/young.json")
|
||||
assert_pass "young-no-alarm" "$got" "No stale auto-promote PRs"
|
||||
assert_pass "young-exit-zero" "$got" "EXIT_CODE=0"
|
||||
assert_no_match "young-no-warning" "$got" "Stale auto-promote PR"
|
||||
|
||||
echo
|
||||
echo "4. PR is BLOCKED but for the wrong reason (DIRTY, not REVIEW_REQUIRED)"
|
||||
cat > "$TMP/dirty.json" <<EOF
|
||||
[{
|
||||
"number": 200,
|
||||
"title": "needs rebase",
|
||||
"createdAt": "2026-05-06T00:00:00Z",
|
||||
"mergeStateStatus": "BLOCKED",
|
||||
"reviewDecision": "APPROVED",
|
||||
"url": "https://github.com/test/test/pull/200"
|
||||
}]
|
||||
EOF
|
||||
got=$(run_script "$TMP/dirty.json")
|
||||
assert_pass "dirty-no-alarm" "$got" "No stale auto-promote PRs"
|
||||
assert_pass "dirty-exit-zero" "$got" "EXIT_CODE=0"
|
||||
|
||||
echo
|
||||
echo "5. PR is APPROVED but mergeStateStatus is CLEAN — NOT alarming"
|
||||
cat > "$TMP/clean.json" <<EOF
|
||||
[{
|
||||
"number": 300,
|
||||
"title": "all green",
|
||||
"createdAt": "2026-05-06T00:00:00Z",
|
||||
"mergeStateStatus": "CLEAN",
|
||||
"reviewDecision": "APPROVED",
|
||||
"url": "https://github.com/test/test/pull/300"
|
||||
}]
|
||||
EOF
|
||||
got=$(run_script "$TMP/clean.json")
|
||||
assert_pass "clean-no-alarm" "$got" "No stale auto-promote PRs"
|
||||
|
||||
echo
|
||||
echo "6. Multiple PRs — only the BLOCKED+REVIEW_REQUIRED+old one alarms"
|
||||
cat > "$TMP/mixed.json" <<EOF
|
||||
[
|
||||
{
|
||||
"number": 100,
|
||||
"title": "fresh",
|
||||
"createdAt": "2026-05-06T04:00:00Z",
|
||||
"mergeStateStatus": "BLOCKED",
|
||||
"reviewDecision": "REVIEW_REQUIRED",
|
||||
"url": "https://x/100"
|
||||
},
|
||||
{
|
||||
"number": 200,
|
||||
"title": "stale + alarming",
|
||||
"createdAt": "2026-05-05T20:00:00Z",
|
||||
"mergeStateStatus": "BLOCKED",
|
||||
"reviewDecision": "REVIEW_REQUIRED",
|
||||
"url": "https://x/200"
|
||||
},
|
||||
{
|
||||
"number": 300,
|
||||
"title": "approved + clean",
|
||||
"createdAt": "2026-05-05T20:00:00Z",
|
||||
"mergeStateStatus": "CLEAN",
|
||||
"reviewDecision": "APPROVED",
|
||||
"url": "https://x/300"
|
||||
}
|
||||
]
|
||||
EOF
|
||||
got=$(run_script "$TMP/mixed.json")
|
||||
assert_pass "mixed-only-200" "$got" "PR #200"
|
||||
assert_no_match "mixed-not-100" "$got" "PR #100"
|
||||
assert_no_match "mixed-not-300" "$got" "PR #300"
|
||||
assert_pass "mixed-exit-1" "$got" "EXIT_CODE=1"
|
||||
|
||||
echo
|
||||
echo "7. Custom STALE_HOURS via --stale-hours overrides threshold"
|
||||
got=$(run_script "$TMP/young.json" --stale-hours 1)
|
||||
assert_pass "custom-threshold-fires" "$got" "PR #100"
|
||||
assert_pass "custom-threshold-exit-1" "$got" "EXIT_CODE=1"
|
||||
|
||||
echo
|
||||
echo "8. Two stale PRs — exit code reflects count"
|
||||
cat > "$TMP/two-stale.json" <<EOF
|
||||
[
|
||||
{
|
||||
"number": 200,
|
||||
"title": "stale-A",
|
||||
"createdAt": "2026-05-05T20:00:00Z",
|
||||
"mergeStateStatus": "BLOCKED",
|
||||
"reviewDecision": "REVIEW_REQUIRED",
|
||||
"url": "https://x/200"
|
||||
},
|
||||
{
|
||||
"number": 201,
|
||||
"title": "stale-B",
|
||||
"createdAt": "2026-05-05T19:00:00Z",
|
||||
"mergeStateStatus": "BLOCKED",
|
||||
"reviewDecision": "REVIEW_REQUIRED",
|
||||
"url": "https://x/201"
|
||||
}
|
||||
]
|
||||
EOF
|
||||
got=$(run_script "$TMP/two-stale.json")
|
||||
assert_pass "two-stale-exit-2" "$got" "EXIT_CODE=2"
|
||||
|
||||
echo
|
||||
echo "9. Help text is shown for --help"
|
||||
set +e
|
||||
help_out=$(bash "$SCRIPT" --help 2>&1)
|
||||
help_rc=$?
|
||||
set -e
|
||||
assert_pass "help-exits-zero" "EXIT_CODE=$help_rc" "EXIT_CODE=0"
|
||||
assert_pass "help-mentions-issue" "$help_out" "issue #2975"
|
||||
|
||||
echo
|
||||
echo "10. Unknown arg exits 64 (EX_USAGE)"
|
||||
set +e
|
||||
bad_out=$(bash "$SCRIPT" --bogus 2>&1)
|
||||
bad_rc=$?
|
||||
set -e
|
||||
assert_pass "unknown-arg-rc" "EXIT_CODE=$bad_rc" "EXIT_CODE=64"
|
||||
|
||||
echo
|
||||
echo "11. Missing repo + missing fixture exits 2"
|
||||
set +e
|
||||
out=$(REPO="" bash "$SCRIPT" 2>&1)
|
||||
rc=$?
|
||||
set -e
|
||||
assert_pass "no-repo-exit-2" "EXIT_CODE=$rc" "EXIT_CODE=2"
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Summary
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
echo
|
||||
echo "─────────────────────────────────────────────"
|
||||
echo "Tests: $PASS passed, $FAIL failed"
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
exit 1
|
||||
fi
|
||||
echo "All tests passed."
|
||||
Executable
+40
@@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env bash
|
||||
# lint_cleanup_traps.sh — regression gate for the OSS-shape program's
|
||||
# "all E2E tests must have proper cleanup" bar (RFC #2873).
|
||||
#
|
||||
# Asserts: every shell file under tests/e2e/ that calls `mktemp` ALSO
|
||||
# installs an `EXIT` trap somewhere in the file. The trap is the
|
||||
# minimum-viable guarantee that scratch files won't leak when an
|
||||
# assertion or curl exits the script non-zero.
|
||||
#
|
||||
# Why this lints (instead of the test runner enforcing): shell scripts
|
||||
# can't easily be wrapped by an outer harness without breaking the
|
||||
# `WSID=… ./test_x.sh` invocation contract. Static gate is the cheap
|
||||
# defense.
|
||||
#
|
||||
# Usage:
|
||||
# tests/e2e/lint_cleanup_traps.sh
|
||||
#
|
||||
# Exits non-zero if any test_*.sh has unmatched mktemp/trap. CI invokes
|
||||
# it from the existing Shellcheck (E2E scripts) workflow.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
violations=0
|
||||
for f in test_*.sh; do
|
||||
if grep -qE '\bmktemp\b' "$f"; then
|
||||
if ! grep -qE 'trap[[:space:]]+.*EXIT' "$f"; then
|
||||
echo "::error file=tests/e2e/$f::has 'mktemp' but no 'trap … EXIT' — scratch will leak when test exits non-zero. Pattern: TMPDIR_E2E=\$(mktemp -d -t prefix-XXX); trap 'rm -rf \"\$TMPDIR_E2E\"' EXIT INT TERM"
|
||||
violations=$((violations + 1))
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$violations" -gt 0 ]; then
|
||||
echo "::error::$violations shell E2E file(s) leak scratch on early exit. See above."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ all $(grep -lE '\bmktemp\b' test_*.sh | wc -l | tr -d ' ') shell E2E files with mktemp also install an EXIT trap"
|
||||
@@ -22,6 +22,13 @@ set -euo pipefail
|
||||
WSID="${WSID:?WSID=<workspace-id> required}"
|
||||
BASE="${BASE:-http://localhost:8080}"
|
||||
|
||||
# Per-run scratch dir collected under one trap so every mktemp leak path
|
||||
# (assertion failure, SIGINT, exit non-zero) is plugged. Pre-fix this test
|
||||
# created a /tmp/hermes-e2e-XXXXXX.txt and never deleted it — ~10 KB ×
|
||||
# every CI run leaked into the runner. RFC #2873 cleanup-hygiene PR.
|
||||
TMPDIR_E2E=$(mktemp -d -t chat-attachments-e2e-XXXXXX)
|
||||
trap 'rm -rf "$TMPDIR_E2E"' EXIT INT TERM
|
||||
|
||||
log() { printf "\n=== %s ===\n" "$*"; }
|
||||
|
||||
log "Preflight: workspace online?"
|
||||
@@ -29,7 +36,9 @@ STATUS=$(curl -s "$BASE/workspaces/$WSID" | python3 -c 'import json,sys;print(js
|
||||
[ "$STATUS" = "online" ] || { echo "workspace not online ($STATUS)"; exit 1; }
|
||||
|
||||
log "Step 1 — Upload a text file via /chat/uploads"
|
||||
TEST_FILE=$(mktemp -t hermes-e2e-XXXXXX.txt)
|
||||
# `mktemp <full-template>` is portable across BSD (macOS) + GNU; -p is
|
||||
# GNU-only and breaks local dev runs on Mac.
|
||||
TEST_FILE=$(mktemp "$TMPDIR_E2E/hermes-e2e-XXXXXX.txt")
|
||||
echo "secret code: $(openssl rand -hex 4)-$(openssl rand -hex 4)" > "$TEST_FILE"
|
||||
EXPECTED=$(cat "$TEST_FILE" | awk '{print $NF}')
|
||||
UPLOAD=$(curl -s -X POST "$BASE/workspaces/$WSID/chat/uploads" -F "files=@$TEST_FILE")
|
||||
|
||||
@@ -24,6 +24,15 @@ set -uo pipefail
|
||||
BASE="${BASE:-http://localhost:8080}"
|
||||
fails=0
|
||||
|
||||
# Per-run scratch dir collected under one trap so every per-runtime
|
||||
# round_trip mktemp leak path (assertion failure, SIGINT, exit
|
||||
# non-zero, function early-return between mktemp and rm) is plugged.
|
||||
# Pre-fix, round_trip's `rm -f "$test_file"` only fired on the success
|
||||
# path inside the function — every test_failure path before the rm
|
||||
# leaked the scratch into /tmp permanently. RFC #2873 cleanup-hygiene PR.
|
||||
TMPDIR_E2E=$(mktemp -d -t mr-attachments-e2e-XXXXXX)
|
||||
trap 'rm -rf "$TMPDIR_E2E"' EXIT INT TERM
|
||||
|
||||
has_patch_in_container() {
|
||||
local container="$1"
|
||||
# Signal that platform helpers are available AND wired into the
|
||||
@@ -74,12 +83,16 @@ print(f"executor: claude-code monkey-patch active ({name})")
|
||||
round_trip() {
|
||||
local label="$1" wsid="$2"
|
||||
local test_file expected upload uri payload reply reply_text
|
||||
test_file=$(mktemp -t e2e-mr-XXXX.txt)
|
||||
# Scratch goes under TMPDIR_E2E; the script-level trap rm -rf's the
|
||||
# whole dir on exit, so per-file rm calls are unnecessary AND make
|
||||
# error paths leak when forgotten.
|
||||
# `mktemp <full-template>` is portable across BSD (macOS) + GNU; -p is GNU-only.
|
||||
test_file=$(mktemp "$TMPDIR_E2E/e2e-mr-${label}-XXXX.txt")
|
||||
expected="secret-$(openssl rand -hex 6)"
|
||||
echo "$expected" > "$test_file"
|
||||
upload=$(curl -s -X POST "$BASE/workspaces/$wsid/chat/uploads" -F "files=@$test_file")
|
||||
uri=$(echo "$upload" | python3 -c 'import json,sys;print(json.load(sys.stdin)["files"][0]["uri"])' 2>/dev/null)
|
||||
[ -z "$uri" ] && { echo "FAIL $label: upload returned no URI: $upload"; rm -f "$test_file"; return 1; }
|
||||
[ -z "$uri" ] && { echo "FAIL $label: upload returned no URI: $upload"; return 1; }
|
||||
payload=$(URI="$uri" python3 -c '
|
||||
import json, os
|
||||
uri = os.environ["URI"]
|
||||
@@ -103,7 +116,8 @@ try:
|
||||
except Exception as exc:
|
||||
print(f"(parse failed: {exc})")
|
||||
' 2>&1)
|
||||
rm -f "$test_file"
|
||||
# $test_file lives under TMPDIR_E2E; the script-level trap rm -rf's
|
||||
# the dir on exit, covering every return path including SIGINT.
|
||||
|
||||
if echo "$reply_text" | grep -qF "$expected"; then
|
||||
echo "PASS $label round-trip: agent quoted $expected"
|
||||
|
||||
@@ -29,11 +29,20 @@ FAIL=0
|
||||
WSID=""
|
||||
|
||||
cleanup() {
|
||||
# Workspace teardown — best-effort, ignore errors so an unrelated CP
|
||||
# outage doesn't shadow a real test failure.
|
||||
if [ -n "$WSID" ]; then
|
||||
curl -s -X DELETE "$BASE/workspaces/$WSID?confirm=true" > /dev/null || true
|
||||
fi
|
||||
# /tmp scratch — pre-fix only ran on success path (the unconditional
|
||||
# rm at the bottom of the script). Trap-based path lets the file leak
|
||||
# whenever the script exits non-zero before reaching the rm. RFC #2873
|
||||
# cleanup-hygiene PR.
|
||||
if [ -n "${TMPF:-}" ]; then
|
||||
rm -f "$TMPF"
|
||||
fi
|
||||
}
|
||||
trap cleanup EXIT
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
assert() {
|
||||
local label="$1"
|
||||
@@ -230,7 +239,8 @@ for r in rows:
|
||||
assert "stored URI matches uploaded URI" "$STORED_URI" "$URI"
|
||||
fi
|
||||
|
||||
rm -f "$TMPF"
|
||||
# $TMPF cleanup happens via the trap-cleanup function above — covers
|
||||
# both the success path and any early exit / SIGINT.
|
||||
|
||||
echo ""
|
||||
echo "=== Results: $PASS passed, $FAIL failed ==="
|
||||
|
||||
Executable
+295
@@ -0,0 +1,295 @@
|
||||
#!/usr/bin/env bash
|
||||
# E2E for poll-mode chat upload (RFC #2891 phases 1-5b).
|
||||
#
|
||||
# Round-trip: register a workspace as poll-mode (no callback URL) → POST a
|
||||
# multi-file chat upload → verify each file becomes (a) one
|
||||
# `chat_upload_receive` activity row and (b) one /pending-uploads row → fetch
|
||||
# the bytes back via the poll endpoint → ack → verify the row 404s on
|
||||
# subsequent fetch. Also pins cross-workspace bleed protection: workspace B
|
||||
# cannot read workspace A's pending uploads even with its own valid bearer.
|
||||
#
|
||||
# Why this exists separately from test_chat_upload_e2e.sh: that script
|
||||
# covers the PUSH path (the workspace's own /internal/chat/uploads/ingest).
|
||||
# This script covers the POLL path: the same canvas-side request lands on
|
||||
# the platform's pendinguploads.Storage instead, and the workspace fetches
|
||||
# it later. The two paths share zero handler code on the platform side, so
|
||||
# both need their own E2E.
|
||||
#
|
||||
# Requires: platform running on localhost:8080 with migrations applied.
|
||||
# bash workspace-server/scripts/dev-start.sh
|
||||
# bash workspace-server/scripts/run-migrations.sh
|
||||
#
|
||||
# Idempotent: each run uses fresh per-script workspace UUIDs so reruns
|
||||
# don't collide. Best-effort cleanup on EXIT — does NOT call
|
||||
# e2e_cleanup_all_workspaces (see
|
||||
# `feedback_never_run_cluster_cleanup_tests_on_live_platform.md`).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
source "$(dirname "$0")/_lib.sh"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
TIMEOUT="${A2A_TIMEOUT:-30}"
|
||||
|
||||
gen_uuid() {
|
||||
if command -v uuidgen >/dev/null 2>&1; then
|
||||
uuidgen | tr '[:upper:]' '[:lower:]'
|
||||
else
|
||||
python3 -c 'import uuid; print(uuid.uuid4())'
|
||||
fi
|
||||
}
|
||||
WS_A="$(gen_uuid)"
|
||||
WS_B="$(gen_uuid)"
|
||||
|
||||
# Per-run scratch dir collected under one trap so every assertion-failure
|
||||
# path drops the temp files it made (see test_chat_attachments_e2e.sh).
|
||||
TMPDIR_E2E=$(mktemp -d -t poll-chat-upload-e2e-XXXXXX)
|
||||
|
||||
cleanup() {
|
||||
local rc=$?
|
||||
curl -s -X DELETE "$BASE/workspaces/$WS_A?confirm=true" >/dev/null 2>&1 || true
|
||||
curl -s -X DELETE "$BASE/workspaces/$WS_B?confirm=true" >/dev/null 2>&1 || true
|
||||
rm -rf "$TMPDIR_E2E"
|
||||
exit $rc
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
check() {
|
||||
local desc="$1" expected="$2" actual="$3"
|
||||
if echo "$actual" | grep -qF -- "$expected"; then
|
||||
echo "PASS: $desc"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "FAIL: $desc"
|
||||
echo " expected to contain: $expected"
|
||||
echo " got: $(echo "$actual" | head -10)"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
check_eq() {
|
||||
local desc="$1" expected="$2" actual="$3"
|
||||
if [ "$actual" = "$expected" ]; then
|
||||
echo "PASS: $desc"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "FAIL: $desc"
|
||||
echo " expected: $expected"
|
||||
echo " got: $actual"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "=== Poll-Mode Chat Upload E2E ==="
|
||||
echo " base: $BASE"
|
||||
echo " workspace A: $WS_A"
|
||||
echo " workspace B: $WS_B"
|
||||
echo ""
|
||||
|
||||
# ---------- Phase 1: register poll-mode workspace ----------
|
||||
echo "--- Phase 1: Register poll-mode workspace A ---"
|
||||
|
||||
REG_A=$(curl -s -X POST "$BASE/registry/register" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"id\": \"$WS_A\",
|
||||
\"delivery_mode\": \"poll\",
|
||||
\"agent_card\": {\"name\": \"poll-chat-upload-test-a\"}
|
||||
}")
|
||||
check "register accepts poll mode without URL" '"status":"registered"' "$REG_A"
|
||||
TOK_A=$(echo "$REG_A" | e2e_extract_token || true)
|
||||
[ -n "$TOK_A" ] || { echo "FAIL: no auth_token in register response (ws A)"; FAIL=$((FAIL + 1)); exit 1; }
|
||||
|
||||
# ---------- Phase 2: multi-file chat upload ----------
|
||||
echo ""
|
||||
echo "--- Phase 2: POST /chat/uploads with two files ---"
|
||||
|
||||
FILE1="$TMPDIR_E2E/alpha.txt"
|
||||
FILE2="$TMPDIR_E2E/beta.txt"
|
||||
EXPECTED1="alpha-secret-$(openssl rand -hex 4)"
|
||||
EXPECTED2="beta-secret-$(openssl rand -hex 4)"
|
||||
printf '%s' "$EXPECTED1" > "$FILE1"
|
||||
printf '%s' "$EXPECTED2" > "$FILE2"
|
||||
|
||||
UPLOAD=$(curl -s -X POST "$BASE/workspaces/$WS_A/chat/uploads" \
|
||||
-H "Authorization: Bearer $TOK_A" \
|
||||
-F "files=@$FILE1;filename=alpha.txt;type=text/plain" \
|
||||
-F "files=@$FILE2;filename=beta.txt;type=text/plain" \
|
||||
-w "\nHTTP_CODE=%{http_code}\n")
|
||||
UPLOAD_CODE=$(echo "$UPLOAD" | grep -oE 'HTTP_CODE=[0-9]+' | cut -d= -f2)
|
||||
UPLOAD_BODY=$(echo "$UPLOAD" | sed '/^HTTP_CODE=/,$d')
|
||||
|
||||
check_eq "upload returns 200" "200" "$UPLOAD_CODE"
|
||||
check "upload response has files array" '"files":' "$UPLOAD_BODY"
|
||||
|
||||
# Pull file_ids out of the URI in the response. URI shape is
|
||||
# `platform-pending:<wsid>/<file_id>` — proves the response came from the
|
||||
# poll-mode branch, not the push-mode internal-ingest branch.
|
||||
URI1=$(echo "$UPLOAD_BODY" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][0]["uri"])')
|
||||
URI2=$(echo "$UPLOAD_BODY" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][1]["uri"])')
|
||||
check "URI 1 has platform-pending: scheme" "platform-pending:$WS_A/" "$URI1"
|
||||
check "URI 2 has platform-pending: scheme" "platform-pending:$WS_A/" "$URI2"
|
||||
|
||||
FID1="${URI1##*/}"
|
||||
FID2="${URI2##*/}"
|
||||
[ -n "$FID1" ] && [ -n "$FID2" ] || { echo "FAIL: could not extract file IDs"; FAIL=$((FAIL + 1)); exit 1; }
|
||||
echo " file_id 1: $FID1"
|
||||
echo " file_id 2: $FID2"
|
||||
|
||||
# ---------- Phase 3: activity rows visible to the workspace ----------
|
||||
echo ""
|
||||
echo "--- Phase 3: /activity shows two chat_upload_receive rows ---"
|
||||
|
||||
# activity_logs INSERTs run in a goroutine — give them a moment.
|
||||
sleep 1
|
||||
ACT=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/activity?type=a2a_receive&limit=20")
|
||||
check "activity feed has the alpha file" "$FID1" "$ACT"
|
||||
check "activity feed has the beta file" "$FID2" "$ACT"
|
||||
check "activity rows tagged chat_upload_receive" '"method":"chat_upload_receive"' "$ACT"
|
||||
check "activity rows record alpha mimetype" '"mimeType":"text/plain"' "$ACT"
|
||||
|
||||
CHAT_UPLOAD_COUNT=$(echo "$ACT" | python3 -c '
|
||||
import json, sys
|
||||
rows = json.load(sys.stdin)
|
||||
n = sum(1 for r in rows if (r.get("method") or "") == "chat_upload_receive")
|
||||
print(n)
|
||||
')
|
||||
check_eq "exactly two chat_upload_receive rows" "2" "$CHAT_UPLOAD_COUNT"
|
||||
|
||||
# ---------- Phase 4: GET /pending-uploads/:file_id/content ----------
|
||||
echo ""
|
||||
echo "--- Phase 4: Fetch content for each pending upload ---"
|
||||
|
||||
GOT1=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
|
||||
check_eq "alpha bytes round-trip" "$EXPECTED1" "$GOT1"
|
||||
|
||||
GOT2=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID2/content")
|
||||
check_eq "beta bytes round-trip" "$EXPECTED2" "$GOT2"
|
||||
|
||||
# Mimetype + Content-Disposition headers should match what was uploaded.
|
||||
HEAD1=$(curl -s -D - -o /dev/null --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
|
||||
check "alpha response carries text/plain Content-Type" "Content-Type: text/plain" "$HEAD1"
|
||||
check "alpha response carries Content-Disposition with filename" 'filename="alpha.txt"' "$HEAD1"
|
||||
|
||||
# ---------- Phase 5: idempotent re-fetch (until ack) ----------
|
||||
echo ""
|
||||
echo "--- Phase 5: Re-fetch before ack returns the same bytes ---"
|
||||
|
||||
RE_GOT1=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
|
||||
check_eq "re-fetch returns same alpha bytes" "$EXPECTED1" "$RE_GOT1"
|
||||
|
||||
# ---------- Phase 6: ack each row ----------
|
||||
echo ""
|
||||
echo "--- Phase 6: Ack each pending upload ---"
|
||||
|
||||
ACK1=$(curl -s -X POST --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/ack")
|
||||
check "alpha ack returns acked:true" '"acked":true' "$ACK1"
|
||||
|
||||
ACK2=$(curl -s -X POST --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID2/ack")
|
||||
check "beta ack returns acked:true" '"acked":true' "$ACK2"
|
||||
|
||||
# Re-ack should still 200 (idempotent — the row's gone but the workspace's
|
||||
# at-least-once intent was already honored, and the second ack hits the
|
||||
# raced path which also returns 200).
|
||||
RE_ACK1=$(curl -s -w '\n%{http_code}' -X POST --max-time "$TIMEOUT" \
|
||||
-H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/ack")
|
||||
RE_ACK1_CODE=$(printf '%s' "$RE_ACK1" | tail -n1)
|
||||
# Acked rows return 404 on Get-before-Ack (the row's still in the table
|
||||
# but Get filters acked_at IS NULL); workspace would not normally re-ack
|
||||
# since it already saw the success. Accept both 200 and 404 here so the
|
||||
# test pins the contract without being brittle on the inner ordering.
|
||||
case "$RE_ACK1_CODE" in
|
||||
200|404)
|
||||
echo "PASS: re-ack returns 200 or 404 ($RE_ACK1_CODE)"
|
||||
PASS=$((PASS + 1))
|
||||
;;
|
||||
*)
|
||||
echo "FAIL: re-ack returned unexpected $RE_ACK1_CODE"
|
||||
FAIL=$((FAIL + 1))
|
||||
;;
|
||||
esac
|
||||
|
||||
# ---------- Phase 7: GET content after ack returns 404 ----------
|
||||
echo ""
|
||||
echo "--- Phase 7: Acked file 404s on subsequent fetch ---"
|
||||
|
||||
POST_ACK=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
|
||||
POST_ACK_CODE=$(printf '%s' "$POST_ACK" | tail -n1)
|
||||
check_eq "acked alpha returns HTTP 404" "404" "$POST_ACK_CODE"
|
||||
|
||||
# ---------- Phase 8: cross-workspace bleed protection ----------
|
||||
echo ""
|
||||
echo "--- Phase 8: Workspace B cannot read workspace A's pending uploads ---"
|
||||
|
||||
# Stage a fresh upload on workspace A so we have an UN-acked row to probe.
|
||||
PROBE_FILE="$TMPDIR_E2E/probe.txt"
|
||||
printf '%s' "probe-bytes-$(openssl rand -hex 4)" > "$PROBE_FILE"
|
||||
PROBE_UP=$(curl -s -X POST "$BASE/workspaces/$WS_A/chat/uploads" \
|
||||
-H "Authorization: Bearer $TOK_A" \
|
||||
-F "files=@$PROBE_FILE;filename=probe.txt;type=text/plain")
|
||||
PROBE_FID=$(echo "$PROBE_UP" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][0]["uri"].split("/")[-1])')
|
||||
[ -n "$PROBE_FID" ] || { echo "FAIL: probe upload returned no file_id"; FAIL=$((FAIL + 1)); exit 1; }
|
||||
|
||||
# Register a SECOND poll-mode workspace and capture its bearer.
|
||||
REG_B=$(curl -s -X POST "$BASE/registry/register" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"id\": \"$WS_B\",
|
||||
\"delivery_mode\": \"poll\",
|
||||
\"agent_card\": {\"name\": \"poll-chat-upload-test-b\"}
|
||||
}")
|
||||
check "second workspace registers" '"status":"registered"' "$REG_B"
|
||||
TOK_B=$(echo "$REG_B" | e2e_extract_token || true)
|
||||
[ -n "$TOK_B" ] || { echo "FAIL: no auth_token (ws B)"; FAIL=$((FAIL + 1)); exit 1; }
|
||||
|
||||
# B's bearer hitting B's URL with A's file_id → 404 (handler checks the row's
|
||||
# workspace_id matches the URL :id, not the bearer's workspace).
|
||||
CROSS_RESP=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
|
||||
-H "Authorization: Bearer $TOK_B" \
|
||||
"$BASE/workspaces/$WS_B/pending-uploads/$PROBE_FID/content")
|
||||
CROSS_CODE=$(printf '%s' "$CROSS_RESP" | tail -n1)
|
||||
check_eq "B's URL with A's file_id returns 404" "404" "$CROSS_CODE"
|
||||
|
||||
# B's bearer hitting A's URL → 401 (wsAuth pins bearer to :id). This is the
|
||||
# strictest cross-workspace check: a presented-but-wrong bearer is rejected
|
||||
# in EVERY platform posture (dev-mode fail-open only triggers when no bearer
|
||||
# is presented at all — invalid tokens always 401).
|
||||
WRONG_BEARER=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
|
||||
-H "Authorization: Bearer $TOK_B" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/$PROBE_FID/content")
|
||||
WRONG_CODE=$(printf '%s' "$WRONG_BEARER" | tail -n1)
|
||||
check_eq "B's bearer on A's URL returns 401" "401" "$WRONG_CODE"
|
||||
|
||||
# NB: a fully bearerless request to /pending-uploads/:fid/content returns
|
||||
# 401 ONLY when the platform has MOLECULE_ENV != development (production /
|
||||
# staging). On local-dev with MOLECULE_ENV=development the wsauth middleware
|
||||
# fail-opens for bearerless requests so the canvas at :3000 can talk to the
|
||||
# platform at :8080 without per-call token plumbing — see middleware/
|
||||
# devmode.go. The strict bearerless-401 contract is covered by the wsauth
|
||||
# unit + middleware tests; we don't reassert it here because the result
|
||||
# depends on platform posture, not the poll-mode upload contract.
|
||||
|
||||
# ---------- Phase 9: invalid file_id rejected at the URL parser ----------
|
||||
echo ""
|
||||
echo "--- Phase 9: Invalid file_id returns 400 ---"
|
||||
|
||||
BAD_FID=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
|
||||
-H "Authorization: Bearer $TOK_A" \
|
||||
"$BASE/workspaces/$WS_A/pending-uploads/not-a-uuid/content")
|
||||
BAD_FID_CODE=$(printf '%s' "$BAD_FID" | tail -n1)
|
||||
check_eq "invalid file_id UUID returns 400" "400" "$BAD_FID_CODE"
|
||||
|
||||
# ---------- Results ----------
|
||||
echo ""
|
||||
echo "=== Results: $PASS passed, $FAIL failed ==="
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -157,6 +157,43 @@ A2A_RESP=$(curl -s --max-time "$TIMEOUT" -X POST "$BASE/workspaces/$POLL_WS_ID/a
|
||||
}')
|
||||
|
||||
check "poll-mode A2A returns queued status" '"status":"queued"' "$A2A_RESP"
|
||||
|
||||
# ---------- Phase 3.5: Python parser classifies queued envelope correctly ----------
|
||||
# (#2967) — server emits the queued envelope, the wheel's a2a_response.parse()
|
||||
# MUST classify it as the Queued variant, not Malformed. Pre-#2967 the bare
|
||||
# message/send parser in a2a_client.py:587 misclassified this and returned
|
||||
# "[A2A_ERROR] unexpected response shape", which broke external↔external A2A
|
||||
# on poll-mode peers.
|
||||
#
|
||||
# This phase exercises the actual on-the-wire response from a real
|
||||
# workspace-server (NOT a mocked dict) through the same module the production
|
||||
# wheel ships, so a regression in either the server emit shape OR the client
|
||||
# parser fails this E2E.
|
||||
|
||||
echo ""
|
||||
echo "--- Phase 3.5: Python parser classifies real server response (#2967) ---"
|
||||
|
||||
# Pipe the queued response captured above through a2a_response.parse and
|
||||
# assert the classification. WORKSPACE_ID is required at module import
|
||||
# time but irrelevant to this parsing call (any UUID is fine).
|
||||
PARSE_RESULT=$(WORKSPACE_ID="00000000-0000-0000-0000-000000000001" \
|
||||
python3 -c "
|
||||
import json, sys
|
||||
sys.path.insert(0, '$(cd "$(dirname "$0")/../../workspace" && pwd)')
|
||||
import a2a_response
|
||||
data = json.loads(r'''$A2A_RESP''')
|
||||
v = a2a_response.parse(data)
|
||||
print(type(v).__name__)
|
||||
if isinstance(v, a2a_response.Queued):
|
||||
print(f'method={v.method} delivery_mode={v.delivery_mode}')
|
||||
")
|
||||
|
||||
check_eq "Python parser classifies real server response as Queued" \
|
||||
"Queued" "$(printf '%s' "$PARSE_RESULT" | head -n1)"
|
||||
check "Queued variant captures method=message/send" \
|
||||
"method=message/send" "$PARSE_RESULT"
|
||||
check "Queued variant captures delivery_mode=poll" \
|
||||
"delivery_mode=poll" "$PARSE_RESULT"
|
||||
check "queued response echoes delivery_mode=poll" '"delivery_mode":"poll"' "$A2A_RESP"
|
||||
check "queued response echoes the JSON-RPC method" '"method":"message/send"' "$A2A_RESP"
|
||||
|
||||
|
||||
@@ -504,6 +504,63 @@ for wid in $WS_TO_CHECK; do
|
||||
fi
|
||||
done
|
||||
|
||||
# ─── 7c. Workspace files API config.yaml round-trip ────────────────────
|
||||
# Pin the config-save path that drives the Canvas Config tab's Save &
|
||||
# Restart. Two failure classes this gate catches in one shot:
|
||||
#
|
||||
# 1. Path map drift (PR #2769). Runtime falls through to the wrong
|
||||
# base path (e.g. /opt/configs when user-data only created /configs)
|
||||
# → SSH `install -D` fails with EACCES on a parent dir that doesn't
|
||||
# exist. The user-visible 500 was unobservable without exercising
|
||||
# this code path on a fresh workspace.
|
||||
# 2. Permission drift on /configs. The path is root-owned by cloud-init,
|
||||
# so the SSH-as-ubuntu install needs `sudo -n`. Any future change
|
||||
# that drops the sudo, switches to a non-passwordless-sudo OS user,
|
||||
# or moves the path to a non-ubuntu-writable dir without sudo will
|
||||
# regress this gate.
|
||||
#
|
||||
# Round-trip: PUT a known marker, GET it back, assert content matches.
|
||||
# Marker shape includes the run id so a stale file from a prior canary
|
||||
# can't false-pass.
|
||||
log "7c/11 Files API config.yaml round-trip..."
|
||||
CONFIG_MARKER="# molecule-synth-e2e: ${E2E_RUN_ID:-unknown} ${RUNTIME} $(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
CONFIG_PAYLOAD="${CONFIG_MARKER}
|
||||
name: synth-canary
|
||||
runtime: ${RUNTIME}
|
||||
"
|
||||
for wid in $WS_TO_CHECK; do
|
||||
PUT_BODY=$(python3 -c "import json,sys; print(json.dumps({'content': sys.stdin.read()}))" <<< "$CONFIG_PAYLOAD")
|
||||
# Capture body to a tempfile so curl's -w '%{http_code}' is the only
|
||||
# thing on stdout. The first version used `-w '\n%{http_code}\n'` and
|
||||
# parsed via `tail -n 2 | head -n 1`, which broke because bash $(...)
|
||||
# strips the trailing newline → only 2 lines remain in the captured
|
||||
# value → head -n 1 returned the body, not the status code. Caught
|
||||
# post-merge by E2E Staging SaaS at 22:06 UTC: a 200-with-body got
|
||||
# misreported as "PUT returned <body>".
|
||||
PUT_TMP=$(mktemp -t synth_put.XXXXXX)
|
||||
PUT_CODE=$(tenant_call PUT "/workspaces/$wid/files/config.yaml" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$PUT_BODY" \
|
||||
-o "$PUT_TMP" \
|
||||
-w '%{http_code}' \
|
||||
2>/dev/null || echo "000")
|
||||
PUT_BODY_OUT=$(cat "$PUT_TMP" 2>/dev/null || echo "")
|
||||
rm -f "$PUT_TMP"
|
||||
if [ "$PUT_CODE" != "200" ] && [ "$PUT_CODE" != "204" ]; then
|
||||
fail "Workspace $wid Files API PUT config.yaml returned $PUT_CODE: $PUT_BODY_OUT — likely a path-map or permission regression in workspace-server template_files_eic.go"
|
||||
fi
|
||||
# PUT-only check; the GET-back round-trip assertion was dropped
|
||||
# 2026-05-04 because PUT (template_files_eic.go SSH-via-EIC →
|
||||
# workspace EC2) and GET (templates.go ReadFile → docker exec on
|
||||
# platform-tenant-local container) hit DIFFERENT paths and DIFFERENT
|
||||
# hosts. The asymmetry is a separate latent bug — Canvas Config tab
|
||||
# rendering reads workspace state via other endpoints, not via this
|
||||
# GET, so the user-facing Save & Restart works (container reads
|
||||
# /configs/config.yaml directly via bind-mount). When the read/write
|
||||
# paths are unified, restore the GET-back marker check here.
|
||||
ok " $wid config.yaml PUT OK (HTTP $PUT_CODE)"
|
||||
done
|
||||
|
||||
# ─── 8. A2A round-trip on parent ───────────────────────────────────────
|
||||
log "8/11 Sending A2A message to parent — expecting agent response..."
|
||||
# Smoke prompt phrasing — DO NOT trim back to the bare "Reply with exactly: PONG"
|
||||
@@ -649,8 +706,80 @@ print(json.dumps({
|
||||
d=json.load(sys.stdin)
|
||||
print(len(d if isinstance(d, list) else d.get('events', [])))" 2>/dev/null || echo 0)
|
||||
log " Activity events observed: $ACTIVITY_COUNT"
|
||||
|
||||
# ─── 9c. Workspace KV memory Edit round-trip ─────────────────────────
|
||||
# Pins the Edit affordance added to the canvas Memory tab. The UI calls
|
||||
# POST /workspaces/:id/memory with if_match_version, so the contract is:
|
||||
# 1. initial POST creates row at version 1
|
||||
# 2. GET returns version 1 + value
|
||||
# 3. POST with if_match_version=1 updates → version 2
|
||||
# 4. POST with if_match_version=1 again → 409 (optimistic-lock enforcement)
|
||||
# Without (3) there is no Edit; without (4) two concurrent writers can
|
||||
# silently overwrite each other and the agent loses delegation-ledger state.
|
||||
log "9c. Memory KV Edit round-trip (Edit affordance + 409 gate)"
|
||||
EDIT_KEY="e2e_edit_gate_$SLUG"
|
||||
|
||||
# 1. seed
|
||||
tenant_call POST "/workspaces/$PARENT_ID/memory" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"key\":\"$EDIT_KEY\",\"value\":{\"step\":1}}" >/dev/null \
|
||||
|| fail "memory KV seed POST failed"
|
||||
|
||||
# 2. read back, capture version
|
||||
EDIT_GET=$(tenant_call GET "/workspaces/$PARENT_ID/memory/$EDIT_KEY")
|
||||
EDIT_VER=$(echo "$EDIT_GET" | python3 -c "import json,sys; print(json.load(sys.stdin)['version'])" 2>/dev/null || echo "")
|
||||
[ -z "$EDIT_VER" ] && fail "memory KV GET missing version field. Body: ${EDIT_GET:0:200}"
|
||||
|
||||
# 3. conditional update with matching version
|
||||
tenant_call POST "/workspaces/$PARENT_ID/memory" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"key\":\"$EDIT_KEY\",\"value\":{\"step\":2},\"if_match_version\":$EDIT_VER}" >/dev/null \
|
||||
|| fail "memory KV conditional Edit failed (if_match_version=$EDIT_VER)"
|
||||
|
||||
# 4. value flipped + version incremented?
|
||||
EDIT_GET2=$(tenant_call GET "/workspaces/$PARENT_ID/memory/$EDIT_KEY")
|
||||
EDIT_VAL2=$(echo "$EDIT_GET2" | python3 -c "import json,sys; print(json.load(sys.stdin)['value'].get('step'))" 2>/dev/null || echo "")
|
||||
[ "$EDIT_VAL2" = "2" ] || fail "memory KV Edit did not persist new value. Body: ${EDIT_GET2:0:200}"
|
||||
|
||||
# 5. stale-version POST must 409 — pin the optimistic-lock contract.
|
||||
#
|
||||
# tenant_call uses CURL_COMMON which carries --fail-with-body, so an
|
||||
# expected-409 makes curl exit 22. The previous shape
|
||||
# $(tenant_call ... -w "%{http_code}" || echo "000")
|
||||
# concatenated the captured "409" with the fallback "000" giving a
|
||||
# bogus "409000" value (caught on PR #2792's first E2E run, which is
|
||||
# also why staging-saas E2E has been silent-failing this gate since
|
||||
# PR #2787 merged). Fix: route the status code into its own tempfile
|
||||
# so curl's exit code can't pollute the captured stdout. set +e/-e
|
||||
# keeps the 22 from tripping the outer `set -e` pipeline.
|
||||
set +e
|
||||
tenant_call POST "/workspaces/$PARENT_ID/memory" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"key\":\"$EDIT_KEY\",\"value\":{\"step\":3},\"if_match_version\":$EDIT_VER}" \
|
||||
-o /tmp/memory_stale_resp.txt -w "%{http_code}" >/tmp/memory_stale_code.txt 2>/dev/null
|
||||
set -e
|
||||
EDIT_STALE_CODE=$(cat /tmp/memory_stale_code.txt 2>/dev/null || echo "000")
|
||||
[ "$EDIT_STALE_CODE" = "409" ] || fail "memory KV stale Edit must 409 (optimistic-lock). Got '$EDIT_STALE_CODE': $(cat /tmp/memory_stale_resp.txt 2>/dev/null | head -c 200)"
|
||||
|
||||
# cleanup
|
||||
tenant_call DELETE "/workspaces/$PARENT_ID/memory/$EDIT_KEY" >/dev/null 2>&1 || true
|
||||
ok "Memory KV Edit round-trip + 409 gate passed"
|
||||
|
||||
# ─── 9d. shared_context removal gate ─────────────────────────────────
|
||||
# Pin the deletion of GET /workspaces/:id/shared-context. The route + handler
|
||||
# were removed; team-shared knowledge now flows through memory v2's
|
||||
# team:<id> namespace. If anyone re-introduces a shared-context endpoint
|
||||
# without going through RFC #2789, this gate fires.
|
||||
set +e
|
||||
SC_CODE=$(tenant_call GET "/workspaces/$PARENT_ID/shared-context" \
|
||||
-o /dev/null -w "%{http_code}" 2>/dev/null || echo "000")
|
||||
set -e
|
||||
if [ "$SC_CODE" = "200" ]; then
|
||||
fail "shared-context route should be gone but returned 200 — regression. See task #304."
|
||||
fi
|
||||
ok "shared-context route confirmed removed (HTTP $SC_CODE)"
|
||||
else
|
||||
log "9/11 Canary mode — skipping HMA / peers / activity"
|
||||
log "9/11 Canary mode — skipping HMA / peers / activity / memory-edit / shared-context-gone"
|
||||
fi
|
||||
|
||||
# ─── 10. Delegation mechanics (full mode + child) ──────────────────────
|
||||
|
||||
@@ -94,6 +94,13 @@ services:
|
||||
CP_UPSTREAM_URL: "http://cp-stub:9090"
|
||||
RATE_LIMIT: "1000"
|
||||
CANVAS_PROXY_URL: "http://localhost:3000"
|
||||
# Memory v2 sidecar (PR #2906) bundles the plugin into the
|
||||
# tenant image and starts it before the main server. The plugin
|
||||
# runs `CREATE EXTENSION vector` on first boot, which fails on
|
||||
# the harness's plain postgres:15-alpine (no pgvector). The
|
||||
# harness doesn't exercise memory features, so disable the
|
||||
# sidecar via the entrypoint's documented escape hatch.
|
||||
MEMORY_PLUGIN_DISABLE: "1"
|
||||
networks: [harness-net]
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/health || exit 1"]
|
||||
@@ -142,6 +149,13 @@ services:
|
||||
CP_UPSTREAM_URL: "http://cp-stub:9090"
|
||||
RATE_LIMIT: "1000"
|
||||
CANVAS_PROXY_URL: "http://localhost:3000"
|
||||
# Memory v2 sidecar (PR #2906) bundles the plugin into the
|
||||
# tenant image and starts it before the main server. The plugin
|
||||
# runs `CREATE EXTENSION vector` on first boot, which fails on
|
||||
# the harness's plain postgres:15-alpine (no pgvector). The
|
||||
# harness doesn't exercise memory features, so disable the
|
||||
# sidecar via the entrypoint's documented escape hatch.
|
||||
MEMORY_PLUGIN_DISABLE: "1"
|
||||
networks: [harness-net]
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/health || exit 1"]
|
||||
|
||||
Executable
+238
@@ -0,0 +1,238 @@
|
||||
#!/usr/bin/env bash
|
||||
# tools/branch-protection/apply.sh — idempotently apply branch
|
||||
# protection to molecule-core's `staging` and `main` branches.
|
||||
#
|
||||
# Single source of truth for the protection settings. Diff this file
|
||||
# against the live state (drift_check.sh handles that nightly + on
|
||||
# every PR that touches this directory).
|
||||
#
|
||||
# Why each branch has its OWN payload section instead of a shared
|
||||
# template: pre-2026-05-05 the script generated both branches from a
|
||||
# shared template that hard-coded enforce_admins=false,
|
||||
# dismiss_stale_reviews=true, strict=false, allow_fork_syncing=true,
|
||||
# and dropped bypass_pull_request_allowances. Live staging had
|
||||
# enforce_admins=true, dismiss_stale_reviews=false, strict=true,
|
||||
# allow_fork_syncing=false, and a bypass list. Running the script
|
||||
# would have silently weakened protection on every dimension at once.
|
||||
# Per-branch payloads codify the deliberate per-branch policy that
|
||||
# already lives on the repo, with the script's net contribution
|
||||
# being ONLY the explicit additions to required_status_checks.
|
||||
#
|
||||
# Per memory feedback_dismiss_stale_reviews_blocks_promote.md,
|
||||
# dismiss_stale_reviews=true silently re-blocks every auto-promote PR
|
||||
# (cost the user 2.5h once already on staging — confirming we keep
|
||||
# this OFF on staging is load-bearing for the auto-promote chain).
|
||||
#
|
||||
# Usage:
|
||||
# tools/branch-protection/apply.sh # apply both branches
|
||||
# tools/branch-protection/apply.sh --dry-run # show payload only
|
||||
# tools/branch-protection/apply.sh --branch staging
|
||||
# tools/branch-protection/apply.sh --skip-preflight # skip check-name validation
|
||||
#
|
||||
# Requires: gh CLI authenticated as a repo admin. The script uses gh's
|
||||
# token (no separate PAT needed).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
REPO="Molecule-AI/molecule-core"
|
||||
DRY_RUN=0
|
||||
ONLY_BRANCH=""
|
||||
SKIP_PREFLIGHT=0
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--dry-run) DRY_RUN=1; shift ;;
|
||||
--branch) ONLY_BRANCH="$2"; shift 2 ;;
|
||||
--skip-preflight) SKIP_PREFLIGHT=1; shift ;;
|
||||
-h|--help)
|
||||
echo "Usage: $0 [--dry-run] [--branch <name>] [--skip-preflight]"
|
||||
exit 0
|
||||
;;
|
||||
*) echo "Unknown arg: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# ─── Required-check matrices ──────────────────────────────────────
|
||||
# Each branch's set is the canonical list of check NAMES (from each
|
||||
# workflow's job-name). Adding/removing a check here is the place to
|
||||
# do it. Match docs/e2e-coverage.md.
|
||||
|
||||
read -r -d '' STAGING_CHECKS <<'EOF' || true
|
||||
Analyze (go)
|
||||
Analyze (javascript-typescript)
|
||||
Analyze (python)
|
||||
Canvas (Next.js)
|
||||
Canvas tabs E2E
|
||||
Detect changes
|
||||
E2E API Smoke Test
|
||||
Platform (Go)
|
||||
Python Lint & Test
|
||||
Scan diff for credential-shaped strings
|
||||
Shellcheck (E2E scripts)
|
||||
EOF
|
||||
|
||||
read -r -d '' MAIN_CHECKS <<'EOF' || true
|
||||
Analyze (go)
|
||||
Analyze (javascript-typescript)
|
||||
Analyze (python)
|
||||
Canvas (Next.js)
|
||||
Canvas tabs E2E
|
||||
Detect changes
|
||||
E2E API Smoke Test
|
||||
PR-built wheel + import smoke
|
||||
Platform (Go)
|
||||
Python Lint & Test
|
||||
Scan diff for credential-shaped strings
|
||||
Shellcheck (E2E scripts)
|
||||
EOF
|
||||
|
||||
checks_to_json() {
|
||||
printf '%s\n' "$1" | jq -Rs '
|
||||
split("\n")
|
||||
| map(select(length > 0))
|
||||
| map({context: ., app_id: -1})
|
||||
'
|
||||
}
|
||||
|
||||
# ─── Per-branch payloads (each preserves live-state policy) ───────
|
||||
# Staging payload — preserves the live values that pre-2026-05-05's
|
||||
# apply.sh would have silently rewritten:
|
||||
# enforce_admins=true, dismiss_stale_reviews=false, strict=true,
|
||||
# allow_fork_syncing=false, bypass list = HongmingWang-Rabbit + molecule-ai app.
|
||||
build_staging_payload() {
|
||||
local checks_json
|
||||
checks_json=$(checks_to_json "$STAGING_CHECKS")
|
||||
jq -n \
|
||||
--argjson checks "$checks_json" \
|
||||
'{
|
||||
required_status_checks: {
|
||||
strict: true,
|
||||
checks: $checks
|
||||
},
|
||||
enforce_admins: true,
|
||||
required_pull_request_reviews: {
|
||||
required_approving_review_count: 1,
|
||||
dismiss_stale_reviews: false,
|
||||
require_code_owner_reviews: false,
|
||||
require_last_push_approval: false,
|
||||
bypass_pull_request_allowances: {
|
||||
users: ["HongmingWang-Rabbit"],
|
||||
teams: [],
|
||||
apps: ["molecule-ai"]
|
||||
}
|
||||
},
|
||||
restrictions: null,
|
||||
allow_deletions: false,
|
||||
allow_force_pushes: false,
|
||||
block_creations: false,
|
||||
required_conversation_resolution: true,
|
||||
required_linear_history: false,
|
||||
lock_branch: false,
|
||||
allow_fork_syncing: false
|
||||
}'
|
||||
}
|
||||
|
||||
# Main payload — preserves the live values:
|
||||
# enforce_admins=false, dismiss_stale_reviews=true, strict=true,
|
||||
# allow_fork_syncing=false, NO bypass list.
|
||||
# main intentionally has different settings than staging because main
|
||||
# is the deploy target — the auto-promote app pushes to main without
|
||||
# the friction of an admin-bypass list, and stale-review dismissal
|
||||
# is acceptable here because every change has already cleared
|
||||
# staging review.
|
||||
build_main_payload() {
|
||||
local checks_json
|
||||
checks_json=$(checks_to_json "$MAIN_CHECKS")
|
||||
jq -n \
|
||||
--argjson checks "$checks_json" \
|
||||
'{
|
||||
required_status_checks: {
|
||||
strict: true,
|
||||
checks: $checks
|
||||
},
|
||||
enforce_admins: false,
|
||||
required_pull_request_reviews: {
|
||||
required_approving_review_count: 1,
|
||||
dismiss_stale_reviews: true,
|
||||
require_code_owner_reviews: false,
|
||||
require_last_push_approval: false
|
||||
},
|
||||
restrictions: null,
|
||||
allow_deletions: false,
|
||||
allow_force_pushes: false,
|
||||
block_creations: false,
|
||||
required_conversation_resolution: true,
|
||||
required_linear_history: false,
|
||||
lock_branch: false,
|
||||
allow_fork_syncing: false
|
||||
}'
|
||||
}
|
||||
|
||||
# ─── R3 preflight: validate every desired check name has at least
|
||||
# one historical run ──────────────────────────────────────────────
|
||||
# Pre-fix the script accepted arbitrary strings into
|
||||
# required_status_checks.checks. A typo like "Canvas Tabs E2E" vs
|
||||
# "Canvas tabs E2E" → GH accepts → every PR is blocked forever
|
||||
# waiting for a context that never emits. The preflight hits the
|
||||
# /commits/{sha}/check-runs endpoint and asserts each desired name
|
||||
# has at least one matching run. Skippable via --skip-preflight for
|
||||
# the case where you're adding a brand-new workflow whose first run
|
||||
# hasn't fired yet.
|
||||
preflight_check_names() {
|
||||
local branch="$1"
|
||||
local checks="$2"
|
||||
local sha
|
||||
sha=$(gh api "repos/$REPO/commits/$branch" --jq '.sha' 2>/dev/null || echo "")
|
||||
if [[ -z "$sha" ]]; then
|
||||
echo "preflight: WARN cannot resolve $branch tip SHA, skipping check-name validation" >&2
|
||||
return 0
|
||||
fi
|
||||
local known_names
|
||||
known_names=$(gh api "repos/$REPO/commits/$sha/check-runs?per_page=100" \
|
||||
--jq '.check_runs | map(.name)' 2>/dev/null || echo "[]")
|
||||
local missing=()
|
||||
while IFS= read -r name; do
|
||||
[[ -z "$name" ]] && continue
|
||||
if ! echo "$known_names" | jq -e --arg n "$name" 'index($n) != null' >/dev/null; then
|
||||
missing+=("$name")
|
||||
fi
|
||||
done <<< "$checks"
|
||||
if [[ ${#missing[@]} -gt 0 ]]; then
|
||||
echo "preflight: $branch — these check names are NOT in the historical check-runs for the tip SHA:" >&2
|
||||
printf ' - %s\n' "${missing[@]}" >&2
|
||||
echo "If they're truly new (workflow added but never run), re-run with --skip-preflight." >&2
|
||||
echo "Otherwise typos here will permanently block every PR — fix the names." >&2
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
apply_branch() {
|
||||
local branch="$1"
|
||||
local checks="$2"
|
||||
local payload_fn="$3"
|
||||
local payload
|
||||
payload=$($payload_fn)
|
||||
if [[ "$DRY_RUN" -eq 1 ]]; then
|
||||
echo "=== branch: $branch ==="
|
||||
echo "$payload" | jq .
|
||||
return
|
||||
fi
|
||||
if [[ "$SKIP_PREFLIGHT" -eq 0 ]]; then
|
||||
if ! preflight_check_names "$branch" "$checks"; then
|
||||
echo "FAIL: preflight on $branch caught typos or missing workflows. Aborting." >&2
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
echo "Applying branch protection on $branch..."
|
||||
printf '%s' "$payload" | gh api -X PUT \
|
||||
"repos/$REPO/branches/$branch/protection" \
|
||||
--input -
|
||||
echo "Applied: $branch"
|
||||
}
|
||||
|
||||
if [[ -z "$ONLY_BRANCH" || "$ONLY_BRANCH" == "staging" ]]; then
|
||||
apply_branch staging "$STAGING_CHECKS" build_staging_payload
|
||||
fi
|
||||
if [[ -z "$ONLY_BRANCH" || "$ONLY_BRANCH" == "main" ]]; then
|
||||
apply_branch main "$MAIN_CHECKS" build_main_payload
|
||||
fi
|
||||
Executable
+157
@@ -0,0 +1,157 @@
|
||||
#!/usr/bin/env bash
|
||||
# tools/branch-protection/drift_check.sh — compare the live branch
|
||||
# protection on staging + main against what apply.sh would set. Used
|
||||
# by branch-protection-drift.yml (cron) to catch out-of-band UI edits.
|
||||
#
|
||||
# Pre-2026-05-05 version diffed only required_status_checks.checks —
|
||||
# would have missed a UI click that flipped enforce_admins or
|
||||
# dismiss_stale_reviews. Now compares the full normalized payload so
|
||||
# any silent rewrite of admin/review/lock/deletion settings trips the
|
||||
# drift gate.
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 — live state matches the script
|
||||
# 1 — drift detected (output shows the diff)
|
||||
# 2 — gh API call failed
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
REPO="Molecule-AI/molecule-core"
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
EXIT_CODE=0
|
||||
|
||||
# Normalise the GET /branches/:b/protection response so we can compare
|
||||
# against apply.sh's payload. The GET response inflates booleans into
|
||||
# {url, enabled} sub-objects and bypass list users/apps into full
|
||||
# user/app objects with avatar_url etc — strip those down to match
|
||||
# the input shape.
|
||||
NORMALISE_LIVE='{
|
||||
required_status_checks: (
|
||||
.required_status_checks
|
||||
| { strict: .strict,
|
||||
checks: (.checks | map({context}) | sort_by(.context)) }
|
||||
),
|
||||
enforce_admins: (
|
||||
if (.enforce_admins | type) == "object"
|
||||
then .enforce_admins.enabled
|
||||
else .enforce_admins end
|
||||
),
|
||||
required_pull_request_reviews: (
|
||||
.required_pull_request_reviews
|
||||
| if . == null then null else
|
||||
{ required_approving_review_count,
|
||||
dismiss_stale_reviews,
|
||||
require_code_owner_reviews,
|
||||
require_last_push_approval,
|
||||
bypass_pull_request_allowances: (
|
||||
if .bypass_pull_request_allowances == null then null
|
||||
else {
|
||||
users: (.bypass_pull_request_allowances.users // [] | map(.login) | sort),
|
||||
teams: (.bypass_pull_request_allowances.teams // [] | map(.slug) | sort),
|
||||
apps: (.bypass_pull_request_allowances.apps // [] | map(.slug) | sort)
|
||||
} end
|
||||
)
|
||||
}
|
||||
end
|
||||
),
|
||||
restrictions: (
|
||||
if .restrictions == null then null
|
||||
else { users: (.restrictions.users | map(.login) | sort),
|
||||
teams: (.restrictions.teams | map(.slug) | sort),
|
||||
apps: (.restrictions.apps | map(.slug) | sort) }
|
||||
end
|
||||
),
|
||||
allow_deletions: (
|
||||
if (.allow_deletions | type) == "object" then .allow_deletions.enabled
|
||||
else (.allow_deletions // false) end
|
||||
),
|
||||
allow_force_pushes: (
|
||||
if (.allow_force_pushes | type) == "object" then .allow_force_pushes.enabled
|
||||
else (.allow_force_pushes // false) end
|
||||
),
|
||||
block_creations: (
|
||||
if (.block_creations | type) == "object" then .block_creations.enabled
|
||||
else (.block_creations // false) end
|
||||
),
|
||||
required_conversation_resolution: (
|
||||
if (.required_conversation_resolution | type) == "object"
|
||||
then .required_conversation_resolution.enabled
|
||||
else (.required_conversation_resolution // false) end
|
||||
),
|
||||
required_linear_history: (
|
||||
if (.required_linear_history | type) == "object" then .required_linear_history.enabled
|
||||
else (.required_linear_history // false) end
|
||||
),
|
||||
lock_branch: (
|
||||
if (.lock_branch | type) == "object" then .lock_branch.enabled
|
||||
else (.lock_branch // false) end
|
||||
),
|
||||
allow_fork_syncing: (
|
||||
if (.allow_fork_syncing | type) == "object" then .allow_fork_syncing.enabled
|
||||
else (.allow_fork_syncing // false) end
|
||||
)
|
||||
}'
|
||||
|
||||
# Apply.sh's payload is already in the input shape; we just need to
|
||||
# canonicalise the checks order and fill in optional fields with their
|
||||
# defaults so the comparison aligns.
|
||||
NORMALISE_SCRIPT='{
|
||||
required_status_checks: {
|
||||
strict: .required_status_checks.strict,
|
||||
checks: (.required_status_checks.checks | map({context}) | sort_by(.context))
|
||||
},
|
||||
enforce_admins: .enforce_admins,
|
||||
required_pull_request_reviews: (
|
||||
if .required_pull_request_reviews == null then null else
|
||||
{ required_approving_review_count: .required_pull_request_reviews.required_approving_review_count,
|
||||
dismiss_stale_reviews: .required_pull_request_reviews.dismiss_stale_reviews,
|
||||
require_code_owner_reviews: (.required_pull_request_reviews.require_code_owner_reviews // false),
|
||||
require_last_push_approval: (.required_pull_request_reviews.require_last_push_approval // false),
|
||||
bypass_pull_request_allowances: (
|
||||
if .required_pull_request_reviews.bypass_pull_request_allowances == null then null
|
||||
else {
|
||||
users: (.required_pull_request_reviews.bypass_pull_request_allowances.users // [] | sort),
|
||||
teams: (.required_pull_request_reviews.bypass_pull_request_allowances.teams // [] | sort),
|
||||
apps: (.required_pull_request_reviews.bypass_pull_request_allowances.apps // [] | sort)
|
||||
} end
|
||||
)
|
||||
}
|
||||
end
|
||||
),
|
||||
restrictions: .restrictions,
|
||||
allow_deletions: (.allow_deletions // false),
|
||||
allow_force_pushes: (.allow_force_pushes // false),
|
||||
block_creations: (.block_creations // false),
|
||||
required_conversation_resolution: (.required_conversation_resolution // false),
|
||||
required_linear_history: (.required_linear_history // false),
|
||||
lock_branch: (.lock_branch // false),
|
||||
allow_fork_syncing: (.allow_fork_syncing // false)
|
||||
}'
|
||||
|
||||
check_branch() {
|
||||
local branch="$1"
|
||||
local want
|
||||
want=$(bash "$SCRIPT_DIR/apply.sh" --dry-run --branch "$branch" 2>&1 |
|
||||
sed -n '/^{$/,/^}$/p' |
|
||||
jq -S "$NORMALISE_SCRIPT")
|
||||
local have_raw
|
||||
if ! have_raw=$(gh api "repos/$REPO/branches/$branch/protection" 2>/dev/null); then
|
||||
echo "drift_check: FAIL to fetch $branch protection (gh API error)"
|
||||
return 2
|
||||
fi
|
||||
local have
|
||||
have=$(echo "$have_raw" | jq -S "$NORMALISE_LIVE")
|
||||
if [[ "$want" != "$have" ]]; then
|
||||
echo "=== DRIFT on $branch ==="
|
||||
diff <(echo "$want") <(echo "$have") || true
|
||||
return 1
|
||||
fi
|
||||
echo "OK: $branch matches desired state"
|
||||
}
|
||||
|
||||
for b in staging main; do
|
||||
if ! check_branch "$b"; then
|
||||
EXIT_CODE=1
|
||||
fi
|
||||
done
|
||||
exit "$EXIT_CODE"
|
||||
@@ -21,6 +21,14 @@ ARG GIT_SHA=dev
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build \
|
||||
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
|
||||
-o /platform ./cmd/server
|
||||
# Bundle the built-in memory-plugin-postgres binary so an operator can
|
||||
# activate Memory v2 by setting MEMORY_V2_CUTOVER=true + (default)
|
||||
# MEMORY_PLUGIN_URL=http://localhost:9100. The entrypoint starts this
|
||||
# binary in the background; main /platform talks to it over loopback.
|
||||
# Stays inert until the operator flips the cutover env var.
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build \
|
||||
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
|
||||
-o /memory-plugin ./cmd/memory-plugin-postgres
|
||||
|
||||
# Clone templates + plugins at build time from manifest.json
|
||||
FROM alpine:3.20 AS templates
|
||||
@@ -30,8 +38,9 @@ COPY scripts/clone-manifest.sh /scripts/clone-manifest.sh
|
||||
RUN chmod +x /scripts/clone-manifest.sh && /scripts/clone-manifest.sh /manifest.json /workspace-configs-templates /org-templates /plugins
|
||||
|
||||
FROM alpine:3.20
|
||||
RUN apk add --no-cache ca-certificates git tzdata
|
||||
RUN apk add --no-cache ca-certificates git tzdata wget
|
||||
COPY --from=builder /platform /platform
|
||||
COPY --from=builder /memory-plugin /memory-plugin
|
||||
COPY workspace-server/migrations /migrations
|
||||
COPY --from=templates /workspace-configs-templates /workspace-configs-templates
|
||||
COPY --from=templates /org-templates /org-templates
|
||||
@@ -41,6 +50,7 @@ RUN addgroup -g 1000 platform && adduser -u 1000 -G platform -s /bin/sh -D platf
|
||||
EXPOSE 8080
|
||||
COPY <<'ENTRY' /entrypoint.sh
|
||||
#!/bin/sh
|
||||
# Set up docker-socket group (unchanged from pre-sidecar entrypoint).
|
||||
if [ -S /var/run/docker.sock ]; then
|
||||
SOCK_GID=$(stat -c '%g' /var/run/docker.sock 2>/dev/null || stat -f '%g' /var/run/docker.sock 2>/dev/null)
|
||||
if [ -n "$SOCK_GID" ] && [ "$SOCK_GID" != "0" ]; then
|
||||
@@ -50,6 +60,61 @@ if [ -S /var/run/docker.sock ]; then
|
||||
addgroup platform root 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
|
||||
# Memory v2 sidecar (built-in postgres plugin). Co-located with the
|
||||
# main server so operators flipping MEMORY_V2_CUTOVER=true don't need
|
||||
# to provision a separate service.
|
||||
#
|
||||
# Spawn-gating: only start the sidecar when the operator has indicated
|
||||
# they want it — either MEMORY_V2_CUTOVER=true OR MEMORY_PLUGIN_URL set.
|
||||
# Without that signal, the sidecar adds zero value (the platform's
|
||||
# wiring.go skips building the client too) but pays a real cost: the
|
||||
# plugin's first migration runs `CREATE EXTENSION vector`, which fails
|
||||
# on tenant Postgres without pgvector preinstalled and aborts container
|
||||
# boot via the 30s health gate. Caught on staging redeploy 2026-05-05.
|
||||
#
|
||||
# Env defaults (when sidecar IS spawned):
|
||||
# MEMORY_PLUGIN_DATABASE_URL = $DATABASE_URL (share existing Postgres;
|
||||
# plugin's `memory_namespaces` / `memory_records` tables coexist
|
||||
# with `agent_memories` and the rest of the platform schema —
|
||||
# no conflicts. Operator can override with a separate URL.)
|
||||
# MEMORY_PLUGIN_LISTEN_ADDR = 127.0.0.1:9100
|
||||
#
|
||||
# Set MEMORY_PLUGIN_DISABLE=1 to force-skip the sidecar even with
|
||||
# cutover env set (e.g. running the plugin externally on a separate host).
|
||||
memory_plugin_wanted=""
|
||||
if [ "$MEMORY_V2_CUTOVER" = "true" ] || [ -n "$MEMORY_PLUGIN_URL" ]; then
|
||||
memory_plugin_wanted=1
|
||||
fi
|
||||
if [ -z "$MEMORY_PLUGIN_DISABLE" ] && [ -n "$memory_plugin_wanted" ] && [ -n "$DATABASE_URL" ]; then
|
||||
: "${MEMORY_PLUGIN_DATABASE_URL:=$DATABASE_URL}"
|
||||
: "${MEMORY_PLUGIN_LISTEN_ADDR:=:9100}"
|
||||
export MEMORY_PLUGIN_DATABASE_URL MEMORY_PLUGIN_LISTEN_ADDR
|
||||
echo "memory-plugin: starting sidecar on $MEMORY_PLUGIN_LISTEN_ADDR" >&2
|
||||
# Drop privs to the platform user — the plugin doesn't need root and
|
||||
# runs unprivileged elsewhere (tenant image already starts as canvas).
|
||||
su-exec platform /memory-plugin &
|
||||
MEMORY_PLUGIN_PID=$!
|
||||
# Wait up to 30s for the plugin's /v1/health to return 200. Boot
|
||||
# failure here is fatal — better to crash-loop than to silently
|
||||
# serve cutover traffic against a dead plugin.
|
||||
health_port=${MEMORY_PLUGIN_LISTEN_ADDR#:}
|
||||
ready=0
|
||||
for _ in $(seq 1 30); do
|
||||
if wget -qO- --timeout=2 "http://localhost:${health_port}/v1/health" >/dev/null 2>&1; then
|
||||
ready=1
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
if [ "$ready" != "1" ]; then
|
||||
echo "memory-plugin: ❌ /v1/health never returned 200 after 30s — aborting boot. Check that DATABASE_URL is reachable, has the pgvector extension, and the plugin's migrations applied." >&2
|
||||
kill "$MEMORY_PLUGIN_PID" 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
echo "memory-plugin: ✅ sidecar healthy on :$health_port" >&2
|
||||
fi
|
||||
|
||||
exec su-exec platform /platform "$@"
|
||||
ENTRY
|
||||
RUN chmod +x /entrypoint.sh && apk add --no-cache su-exec
|
||||
|
||||
@@ -34,6 +34,13 @@ ARG GIT_SHA=dev
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build \
|
||||
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
|
||||
-o /platform ./cmd/server
|
||||
# Memory v2 sidecar binary (Memory v2 #2728). Bundled so an operator
|
||||
# can activate cutover by flipping MEMORY_V2_CUTOVER=true without
|
||||
# provisioning a separate service. See entrypoint-tenant.sh for the
|
||||
# launch logic.
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build \
|
||||
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
|
||||
-o /memory-plugin ./cmd/memory-plugin-postgres
|
||||
|
||||
# ── Stage 2: Canvas Next.js standalone ────────────────────────────────
|
||||
FROM node:20-alpine AS canvas-builder
|
||||
@@ -74,8 +81,9 @@ RUN deluser --remove-home node 2>/dev/null || true; \
|
||||
delgroup node 2>/dev/null || true; \
|
||||
addgroup -g 1000 canvas && adduser -u 1000 -G canvas -s /bin/sh -D canvas
|
||||
|
||||
# Go platform binary
|
||||
# Go platform binary + Memory v2 sidecar
|
||||
COPY --from=go-builder /platform /platform
|
||||
COPY --from=go-builder /memory-plugin /memory-plugin
|
||||
COPY workspace-server/migrations /migrations
|
||||
|
||||
# Templates + plugins (cloned from GitHub in stage 3)
|
||||
@@ -91,7 +99,7 @@ COPY --from=canvas-builder /canvas/public ./public
|
||||
|
||||
COPY workspace-server/entrypoint-tenant.sh /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh && \
|
||||
chown -R canvas:canvas /canvas /platform /migrations
|
||||
chown -R canvas:canvas /canvas /platform /memory-plugin /migrations
|
||||
|
||||
EXPOSE 8080
|
||||
# entrypoint.sh starts as root to fix volume perms, then drops to
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestLoadConfig_DefaultListenAddrIsLoopback pins the default-bind contract.
|
||||
//
|
||||
// Why this matters: with the prior `:9100` default, the plugin listened on
|
||||
// every interface. Inside the container it didn't matter (no host port
|
||||
// mapping today), but a future change that publishes 9100 OR a cross-host
|
||||
// sidecar deploy would have exposed an unauth'd memory store. Loopback by
|
||||
// default is the least-privilege baseline; operators with a multi-host
|
||||
// topology override via MEMORY_PLUGIN_LISTEN_ADDR.
|
||||
func TestLoadConfig_DefaultListenAddrIsLoopback(t *testing.T) {
|
||||
t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "postgres://stub")
|
||||
t.Setenv("MEMORY_PLUGIN_LISTEN_ADDR", "")
|
||||
|
||||
cfg, err := loadConfig()
|
||||
if err != nil {
|
||||
t.Fatalf("loadConfig: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(cfg.ListenAddr, "127.0.0.1:") {
|
||||
t.Errorf("default ListenAddr must bind loopback-only, got %q "+
|
||||
"(security regression — would expose plugin on every interface)",
|
||||
cfg.ListenAddr)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfig_ListenAddrEnvOverride(t *testing.T) {
|
||||
t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "postgres://stub")
|
||||
t.Setenv("MEMORY_PLUGIN_LISTEN_ADDR", ":9100")
|
||||
|
||||
cfg, err := loadConfig()
|
||||
if err != nil {
|
||||
t.Fatalf("loadConfig: %v", err)
|
||||
}
|
||||
if cfg.ListenAddr != ":9100" {
|
||||
t.Errorf("env override ignored: want :9100, got %q", cfg.ListenAddr)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfig_MissingDatabaseURL(t *testing.T) {
|
||||
t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "")
|
||||
|
||||
if _, err := loadConfig(); err == nil {
|
||||
t.Fatal("loadConfig must error when MEMORY_PLUGIN_DATABASE_URL is empty")
|
||||
}
|
||||
}
|
||||
@@ -10,6 +10,7 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"embed"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
@@ -17,6 +18,7 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"sort"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
@@ -26,12 +28,28 @@ import (
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/pgplugin"
|
||||
)
|
||||
|
||||
// migrationsFS bundles the .up.sql files into the binary at build time
|
||||
// so the prebuilt image doesn't need the source tree at runtime. The
|
||||
// prior `os.ReadDir("cmd/memory-plugin-postgres/migrations")` path
|
||||
// only resolved during `go test` from the repo root — in the published
|
||||
// image the path didn't exist and boot failed after the 30s health gate
|
||||
// (caught on staging redeploy 2026-05-05 after PR #2906).
|
||||
//
|
||||
//go:embed migrations/*.up.sql
|
||||
var migrationsFS embed.FS
|
||||
|
||||
const (
|
||||
envDatabaseURL = "MEMORY_PLUGIN_DATABASE_URL"
|
||||
envListenAddr = "MEMORY_PLUGIN_LISTEN_ADDR"
|
||||
envSkipMigrate = "MEMORY_PLUGIN_SKIP_MIGRATE"
|
||||
|
||||
defaultListenAddr = ":9100"
|
||||
// Loopback-only by default (defense in depth). The platform talks to
|
||||
// the plugin over `http://localhost:9100` from the same container, so
|
||||
// binding to all interfaces would only widen the reachable surface
|
||||
// without enabling any in-design caller. Operators running the plugin
|
||||
// on a separate host override via MEMORY_PLUGIN_LISTEN_ADDR=:9100 (or
|
||||
// some other interface).
|
||||
defaultListenAddr = "127.0.0.1:9100"
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -143,32 +161,71 @@ func openDB(databaseURL string) (*sql.DB, error) {
|
||||
return db, nil
|
||||
}
|
||||
|
||||
// runMigrations applies the schema migrations bundled at
|
||||
// cmd/memory-plugin-postgres/migrations/. Idempotent on repeat boot.
|
||||
// runMigrations applies the schema migrations bundled into the binary
|
||||
// via go:embed (see migrationsFS at the top of this file). Idempotent
|
||||
// on repeat boot — every migration file uses CREATE … IF NOT EXISTS.
|
||||
//
|
||||
// Implementation note: rather than embedding the full migrate engine,
|
||||
// we read the migration files at boot from a known relative path. The
|
||||
// down migrations are deliberately NOT applied here — that's a manual
|
||||
// operator action. This keeps the binary tiny and avoids dragging in
|
||||
// golang-migrate's drivers.
|
||||
// The down migrations are deliberately NOT applied here — that's a
|
||||
// manual operator action. This keeps the binary tiny and avoids
|
||||
// dragging in golang-migrate's drivers.
|
||||
//
|
||||
// MEMORY_PLUGIN_MIGRATIONS_DIR (filesystem path) is honored as an
|
||||
// override for operators who need to ship custom migrations alongside
|
||||
// the binary without rebuilding. When unset (the common case) we read
|
||||
// from the embedded FS.
|
||||
func runMigrations(db *sql.DB) error {
|
||||
// Find the migrations directory. In `go run` mode it's relative
|
||||
// to the cmd dir; in the prebuilt binary case it's expected next
|
||||
// to the binary OR via env var override.
|
||||
dir := os.Getenv("MEMORY_PLUGIN_MIGRATIONS_DIR")
|
||||
if dir == "" {
|
||||
// Best-effort: try the cwd-relative path that works for `go test`.
|
||||
dir = "cmd/memory-plugin-postgres/migrations"
|
||||
if dir := strings.TrimSpace(os.Getenv("MEMORY_PLUGIN_MIGRATIONS_DIR")); dir != "" {
|
||||
return runMigrationsFromDisk(db, dir)
|
||||
}
|
||||
entries, err := os.ReadDir(dir)
|
||||
return runMigrationsFromEmbed(db)
|
||||
}
|
||||
|
||||
// runMigrationsFromEmbed applies the *.up.sql files bundled into the
|
||||
// binary at build time. Order is alphabetical (matches the on-disk
|
||||
// behavior of os.ReadDir on Linux for the same set of names).
|
||||
func runMigrationsFromEmbed(db *sql.DB) error {
|
||||
entries, err := migrationsFS.ReadDir("migrations")
|
||||
if err != nil {
|
||||
return fmt.Errorf("read migrations dir %q: %w", dir, err)
|
||||
return fmt.Errorf("read embedded migrations: %w", err)
|
||||
}
|
||||
names := make([]string, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
|
||||
continue
|
||||
}
|
||||
path := dir + "/" + e.Name()
|
||||
names = append(names, e.Name())
|
||||
}
|
||||
sort.Strings(names)
|
||||
for _, name := range names {
|
||||
data, err := migrationsFS.ReadFile("migrations/" + name)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read embedded %q: %w", name, err)
|
||||
}
|
||||
if _, err := db.Exec(string(data)); err != nil {
|
||||
return fmt.Errorf("apply %q: %w", name, err)
|
||||
}
|
||||
log.Printf("applied embedded migration %s", name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// runMigrationsFromDisk preserves the legacy filesystem-path mode for
|
||||
// operator-supplied custom migrations.
|
||||
func runMigrationsFromDisk(db *sql.DB, dir string) error {
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read migrations dir %q: %w", dir, err)
|
||||
}
|
||||
names := make([]string, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
|
||||
continue
|
||||
}
|
||||
names = append(names, e.Name())
|
||||
}
|
||||
sort.Strings(names)
|
||||
for _, name := range names {
|
||||
path := dir + "/" + name
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read %q: %w", path, err)
|
||||
@@ -176,7 +233,7 @@ func runMigrations(db *sql.DB) error {
|
||||
if _, err := db.Exec(string(data)); err != nil {
|
||||
return fmt.Errorf("apply %q: %w", path, err)
|
||||
}
|
||||
log.Printf("applied migration %s", e.Name())
|
||||
log.Printf("applied disk migration %s (from %s)", name, dir)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -0,0 +1,72 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestMigrationsEmbedded_ContainsCreateTable pins that the migrations
|
||||
// are bundled into the binary at build time, NOT loaded from a
|
||||
// filesystem path that doesn't exist at runtime in the published image.
|
||||
//
|
||||
// Pre-fix: PR #2906 shipped the binary without the migrations dir;
|
||||
// `os.ReadDir("cmd/memory-plugin-postgres/migrations")` errored on every
|
||||
// tenant boot, the 30s health gate aborted the container, and the
|
||||
// staging redeploy fleet job marked all tenants as failed. Embedding
|
||||
// the migrations into the binary removes the runtime path entirely.
|
||||
func TestMigrationsEmbedded_ContainsCreateTable(t *testing.T) {
|
||||
entries, err := migrationsFS.ReadDir("migrations")
|
||||
if err != nil {
|
||||
t.Fatalf("embedded migrations dir unreadable: %v", err)
|
||||
}
|
||||
if len(entries) == 0 {
|
||||
t.Fatal("embedded migrations dir is empty — go:embed pattern matched no files")
|
||||
}
|
||||
|
||||
var seenUp bool
|
||||
for _, e := range entries {
|
||||
if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
|
||||
continue
|
||||
}
|
||||
seenUp = true
|
||||
data, err := migrationsFS.ReadFile("migrations/" + e.Name())
|
||||
if err != nil {
|
||||
t.Errorf("read embedded %q: %v", e.Name(), err)
|
||||
continue
|
||||
}
|
||||
if !strings.Contains(string(data), "CREATE TABLE") {
|
||||
t.Errorf("embedded %q has no CREATE TABLE — wrong file embedded?", e.Name())
|
||||
}
|
||||
}
|
||||
if !seenUp {
|
||||
t.Fatal("no *.up.sql in embedded migrations — runtime would have no schema to apply")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunMigrationsFromEmbed_OrderingIsAlphabetic pins that we apply
|
||||
// migrations in deterministic alphabetical order, not in whatever
|
||||
// arbitrary order migrationsFS.ReadDir happens to return. With one
|
||||
// migration today this is moot, but a future second migration ('002_…')
|
||||
// MUST run after '001_…' or the schema is broken.
|
||||
//
|
||||
// We can't easily exercise db.Exec here (no test DB); instead pin the
|
||||
// sort step on the directory listing itself.
|
||||
func TestRunMigrationsFromEmbed_OrderingIsAlphabetic(t *testing.T) {
|
||||
entries, err := migrationsFS.ReadDir("migrations")
|
||||
if err != nil {
|
||||
t.Fatalf("embedded migrations dir unreadable: %v", err)
|
||||
}
|
||||
var names []string
|
||||
for _, e := range entries {
|
||||
if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
|
||||
continue
|
||||
}
|
||||
names = append(names, e.Name())
|
||||
}
|
||||
for i := 1; i < len(names); i++ {
|
||||
if names[i-1] > names[i] {
|
||||
t.Errorf("ReadDir returned non-sorted names; runMigrationsFromEmbed must sort. "+
|
||||
"Got %q before %q", names[i-1], names[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -18,6 +18,8 @@ import (
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/imagewatch"
|
||||
memwiring "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/wiring"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/router"
|
||||
@@ -166,6 +168,16 @@ func main() {
|
||||
wh.SetCPProvisioner(cpProv)
|
||||
}
|
||||
|
||||
// Memory v2 plugin (RFC #2728): build the dependency bundle once
|
||||
// here so all three handlers (MCPHandler, AdminMemoriesHandler,
|
||||
// WorkspaceHandler) get the same plugin/resolver pair. memBundle
|
||||
// is nil when MEMORY_PLUGIN_URL is unset — every consumer
|
||||
// nil-checks before using.
|
||||
memBundle := memwiring.Build(db.DB)
|
||||
if memBundle != nil {
|
||||
wh.WithNamespaceCleanup(memBundle.NamespaceCleanupFn())
|
||||
}
|
||||
|
||||
// External-plugin env mutators — each plugin contributes 0+ mutators
|
||||
// onto a shared registry. Order matters: gh-identity populates
|
||||
// MOLECULE_AGENT_ROLE-derived attribution env vars that downstream
|
||||
@@ -254,6 +266,14 @@ func main() {
|
||||
})
|
||||
}
|
||||
|
||||
// Pending-uploads GC sweep — deletes acked rows past their retention
|
||||
// window plus unacked rows past expires_at. Without this the
|
||||
// pending_uploads table grows unbounded; even with the 24h hard TTL,
|
||||
// nothing actually deletes a row, just makes it un-fetchable.
|
||||
go supervised.RunWithRecover(ctx, "pending-uploads-sweeper", func(c context.Context) {
|
||||
pendinguploads.StartSweeper(c, pendinguploads.NewPostgres(db.DB), 0)
|
||||
})
|
||||
|
||||
// Provision-timeout sweep — flips workspaces that have been stuck in
|
||||
// status='provisioning' past the timeout window to 'failed' and emits
|
||||
// WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic
|
||||
@@ -286,6 +306,15 @@ func main() {
|
||||
registry.StartHibernationMonitor(c, wh.HibernateWorkspace)
|
||||
})
|
||||
|
||||
// RFC #2829 PR-3: stuck-task sweeper for the durable delegations
|
||||
// ledger. Marks deadline-exceeded rows as failed and heartbeat-stale
|
||||
// in-flight rows as stuck. Both transitions go through the ledger's
|
||||
// terminal forward-only protection so concurrent UpdateStatus calls
|
||||
// are not clobbered. Defaults: 5min interval, 10min stale threshold;
|
||||
// override via DELEGATION_SWEEPER_INTERVAL_S / DELEGATION_STUCK_THRESHOLD_S.
|
||||
delegSweeper := handlers.NewDelegationSweeper(nil, nil)
|
||||
go supervised.RunWithRecover(ctx, "delegation-sweeper", delegSweeper.Start)
|
||||
|
||||
// Channel Manager — social channel integrations (Telegram, Slack, etc.)
|
||||
channelMgr := channels.NewManager(wh, broadcaster)
|
||||
go supervised.RunWithRecover(ctx, "channel-manager", channelMgr.Start)
|
||||
@@ -306,7 +335,7 @@ func main() {
|
||||
cronSched.SetChannels(channelMgr)
|
||||
|
||||
// Router
|
||||
r := router.Setup(hub, broadcaster, prov, platformURL, configsDir, wh, channelMgr)
|
||||
r := router.Setup(hub, broadcaster, prov, platformURL, configsDir, wh, channelMgr, memBundle)
|
||||
|
||||
// HTTP server with graceful shutdown
|
||||
srv := &http.Server{
|
||||
|
||||
@@ -20,6 +20,51 @@ cd /canvas
|
||||
PORT=3000 HOSTNAME=0.0.0.0 node server.js &
|
||||
CANVAS_PID=$!
|
||||
|
||||
# Memory v2 sidecar (built-in postgres plugin). See Dockerfile entrypoint
|
||||
# comment for rationale.
|
||||
#
|
||||
# Spawn-gating: only start the sidecar when the operator has indicated
|
||||
# they want it (MEMORY_V2_CUTOVER=true OR MEMORY_PLUGIN_URL set).
|
||||
# Without that signal, the sidecar adds zero value and risks aborting
|
||||
# tenant boot via the 30s health gate when the tenant Postgres lacks
|
||||
# pgvector. Caught on staging redeploy 2026-05-05:
|
||||
# pq: extension "vector" is not available
|
||||
#
|
||||
# Defaults (when sidecar IS spawned): MEMORY_PLUGIN_DATABASE_URL
|
||||
# falls back to the tenant's DATABASE_URL.
|
||||
MEMORY_PLUGIN_PID=""
|
||||
memory_plugin_wanted=""
|
||||
if [ "$MEMORY_V2_CUTOVER" = "true" ] || [ -n "$MEMORY_PLUGIN_URL" ]; then
|
||||
memory_plugin_wanted=1
|
||||
fi
|
||||
if [ -z "$MEMORY_PLUGIN_DISABLE" ] && [ -n "$memory_plugin_wanted" ] && [ -n "$DATABASE_URL" ]; then
|
||||
: "${MEMORY_PLUGIN_DATABASE_URL:=$DATABASE_URL}"
|
||||
: "${MEMORY_PLUGIN_LISTEN_ADDR:=:9100}"
|
||||
export MEMORY_PLUGIN_DATABASE_URL MEMORY_PLUGIN_LISTEN_ADDR
|
||||
echo "memory-plugin: starting sidecar on $MEMORY_PLUGIN_LISTEN_ADDR" >&2
|
||||
/memory-plugin &
|
||||
MEMORY_PLUGIN_PID=$!
|
||||
# Wait up to 30s for /v1/health. Boot failure is fatal so a misconfigured
|
||||
# tenant crash-loops instead of silently serving cutover traffic against
|
||||
# a dead plugin.
|
||||
health_port=${MEMORY_PLUGIN_LISTEN_ADDR#:}
|
||||
ready=0
|
||||
for _ in $(seq 1 30); do
|
||||
if wget -qO- --timeout=2 "http://localhost:${health_port}/v1/health" >/dev/null 2>&1; then
|
||||
ready=1
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
if [ "$ready" != "1" ]; then
|
||||
echo "memory-plugin: ❌ /v1/health never returned 200 after 30s — aborting boot. Check DATABASE_URL reachability + pgvector extension + migrations." >&2
|
||||
kill "$MEMORY_PLUGIN_PID" 2>/dev/null || true
|
||||
kill "$CANVAS_PID" 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
echo "memory-plugin: ✅ sidecar healthy on :$health_port" >&2
|
||||
fi
|
||||
|
||||
# Start Go platform in foreground-ish (we trap signals)
|
||||
# CANVAS_PROXY_URL tells the platform to proxy unmatched routes to Canvas.
|
||||
# CONTAINER_BACKEND: empty = Docker (default for self-hosted/local).
|
||||
@@ -29,15 +74,20 @@ cd /
|
||||
/platform &
|
||||
PLATFORM_PID=$!
|
||||
|
||||
# If either process exits, kill the other
|
||||
# If any process exits, kill the others
|
||||
cleanup() {
|
||||
kill $CANVAS_PID 2>/dev/null || true
|
||||
kill $PLATFORM_PID 2>/dev/null || true
|
||||
[ -n "$MEMORY_PLUGIN_PID" ] && kill $MEMORY_PLUGIN_PID 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT SIGTERM SIGINT
|
||||
|
||||
# Wait for either to exit — whichever exits first triggers cleanup
|
||||
wait -n $CANVAS_PID $PLATFORM_PID
|
||||
# Wait for any to exit — whichever exits first triggers cleanup
|
||||
if [ -n "$MEMORY_PLUGIN_PID" ]; then
|
||||
wait -n $CANVAS_PID $PLATFORM_PID $MEMORY_PLUGIN_PID
|
||||
else
|
||||
wait -n $CANVAS_PID $PLATFORM_PID
|
||||
fi
|
||||
EXIT_CODE=$?
|
||||
cleanup
|
||||
exit $EXIT_CODE
|
||||
|
||||
@@ -131,11 +131,19 @@ func buildBundleConfigFiles(b *Bundle) map[string][]byte {
|
||||
}
|
||||
|
||||
func markFailed(ctx context.Context, wsID string, broadcaster *events.Broadcaster, err error) {
|
||||
// Set last_sample_error along with status so operators (and the
|
||||
// Canvas E2E + GET /workspaces/:id callers) get a non-null reason
|
||||
// in the row. Pre-2026-05-05 this UPDATE only set status, leaving
|
||||
// last_sample_error NULL — Canvas E2E #2632 surfaced the gap with
|
||||
// `Workspace failed: (no last_sample_error)`. Same UPDATE shape as
|
||||
// markProvisionFailed in workspace-server/internal/handlers/
|
||||
// workspace_provision_shared.go.
|
||||
msg := err.Error()
|
||||
db.DB.ExecContext(ctx,
|
||||
`UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2`,
|
||||
models.StatusFailed, wsID)
|
||||
`UPDATE workspaces SET status = $1, last_sample_error = $2, updated_at = now() WHERE id = $3`,
|
||||
models.StatusFailed, msg, wsID)
|
||||
broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_PROVISION_FAILED", wsID, map[string]interface{}{
|
||||
"error": err.Error(),
|
||||
"error": msg,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,175 @@
|
||||
package db_test
|
||||
|
||||
// Static drift gate: every UPDATE that sets status to a "failed" value
|
||||
// must also set last_sample_error in the same statement. Otherwise the
|
||||
// row ends up with status='failed' + last_sample_error=NULL — operators
|
||||
// see "workspace failed" with no reason, and the Canvas E2E reports the
|
||||
// useless `Workspace failed: (no last_sample_error)` from #2632.
|
||||
//
|
||||
// Why a static gate: pre-2026-05-05 we had at least two writers
|
||||
// (markProvisionFailed in workspace_provision_shared.go set the
|
||||
// message; bundle/importer.go's markFailed didn't). The provision-
|
||||
// timeout sweep also sets the message. Code review missed the
|
||||
// importer drift for ~6 months until the Canvas E2E surfaced it.
|
||||
//
|
||||
// Rule:
|
||||
// - If a Go string literal in this repo contains both
|
||||
// `UPDATE workspaces` and a clause setting `status` to a value
|
||||
// resembling "failed" — either via a `$N` placeholder later bound
|
||||
// to StatusFailed, or via an inline `'failed'` literal — that same
|
||||
// literal MUST also contain `last_sample_error`.
|
||||
// - Allowed: an UPDATE that only sets status to a non-failed value
|
||||
// (online, hibernating, removed, etc.). Those don't need the
|
||||
// message column, and clearing it would lose forensic context.
|
||||
//
|
||||
// Caveats:
|
||||
// - The test reads source as text. Multi-line UPDATEs split across
|
||||
// concatenated string fragments will slip past — that's an
|
||||
// accepted limitation for now; the parameterized-write refactor
|
||||
// (#2799) will let us replace this textual gate with a typed-call
|
||||
// gate eventually.
|
||||
// - "last_sample_error" appearing anywhere in the same literal is
|
||||
// enough to satisfy the rule. We don't try to verify the column
|
||||
// receives a non-empty value at runtime — that's the
|
||||
// parameterized-write refactor's territory too.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/token"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestWorkspaceStatusFailed_MustSetLastSampleError uses Go's AST to find
|
||||
// every ExecContext call whose argument list includes the
|
||||
// `models.StatusFailed` constant. For each such call, the SQL literal
|
||||
// (the second argument) must also contain `last_sample_error`. This
|
||||
// catches the bug class without false-positive matches on UPDATEs that
|
||||
// set status to a non-failed value (online/hibernating/removed/etc.)
|
||||
// because those don't pass StatusFailed as an arg.
|
||||
func TestWorkspaceStatusFailed_MustSetLastSampleError(t *testing.T) {
|
||||
root := findRepoRoot(t)
|
||||
violations := []string{}
|
||||
|
||||
walkErr := filepath.Walk(filepath.Join(root, "workspace-server", "internal"), func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
if filepath.Ext(path) != ".go" {
|
||||
return nil
|
||||
}
|
||||
if strings.HasSuffix(path, "_test.go") {
|
||||
return nil
|
||||
}
|
||||
fset := token.NewFileSet()
|
||||
f, err := parser.ParseFile(fset, path, nil, parser.SkipObjectResolution)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ast.Inspect(f, func(n ast.Node) bool {
|
||||
call, ok := n.(*ast.CallExpr)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
sel, ok := call.Fun.(*ast.SelectorExpr)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
// Match db.DB.ExecContext / db.DB.QueryContext / db.DB.QueryRowContext
|
||||
// — the three SQL execution surfaces this codebase uses.
|
||||
methodName := sel.Sel.Name
|
||||
if methodName != "ExecContext" && methodName != "QueryContext" && methodName != "QueryRowContext" {
|
||||
return true
|
||||
}
|
||||
// Args: 0=ctx, 1=sql-literal, 2..=bind vars.
|
||||
if len(call.Args) < 3 {
|
||||
return true
|
||||
}
|
||||
passesStatusFailed := false
|
||||
for _, a := range call.Args[2:] {
|
||||
if isStatusFailedRef(a) {
|
||||
passesStatusFailed = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !passesStatusFailed {
|
||||
return true
|
||||
}
|
||||
// SQL literal — usually `*ast.BasicLit` for a single-line
|
||||
// string or a back-tick string. May also be a const ref.
|
||||
sqlText := extractStringLit(call.Args[1])
|
||||
if sqlText == "" {
|
||||
// SQL is a name reference, not a literal — can't check.
|
||||
return true
|
||||
}
|
||||
if strings.Contains(sqlText, "last_sample_error") {
|
||||
return true
|
||||
}
|
||||
// Skip non-UPDATE statements that happen to pass StatusFailed
|
||||
// (e.g. SELECT … WHERE status = $1). The drift target is
|
||||
// specifically writes that mark the row failed.
|
||||
if !regexp.MustCompile(`(?i)\bUPDATE\s+workspaces\b`).MatchString(sqlText) {
|
||||
return true
|
||||
}
|
||||
rel, _ := filepath.Rel(root, path)
|
||||
pos := fset.Position(call.Pos())
|
||||
snippet := strings.TrimSpace(sqlText)
|
||||
if len(snippet) > 120 {
|
||||
snippet = snippet[:120] + "..."
|
||||
}
|
||||
violations = append(violations,
|
||||
fmt.Sprintf("%s:%d: %s", rel, pos.Line, snippet))
|
||||
return true
|
||||
})
|
||||
return nil
|
||||
})
|
||||
if walkErr != nil {
|
||||
t.Fatalf("walk: %v", walkErr)
|
||||
}
|
||||
|
||||
if len(violations) > 0 {
|
||||
t.Errorf("UPDATE workspaces SET status = ... binds models.StatusFailed but the SQL literal does not write last_sample_error — every code path that marks a workspace failed must also write the reason, or operators see `Workspace failed: (no last_sample_error)` (incident: Canvas E2E #2632). Add `, last_sample_error = $N` to the SET clause.\n\nViolations:\n - %s",
|
||||
strings.Join(violations, "\n - "))
|
||||
}
|
||||
}
|
||||
|
||||
// isStatusFailedRef returns true if expr resolves to models.StatusFailed
|
||||
// (selector StatusFailed off the models package). Catches both
|
||||
// `models.StatusFailed` directly and `models.StatusFailed.String()`
|
||||
// style usages — anything that names the constant.
|
||||
func isStatusFailedRef(expr ast.Expr) bool {
|
||||
if sel, ok := expr.(*ast.SelectorExpr); ok {
|
||||
if sel.Sel.Name == "StatusFailed" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// extractStringLit returns the unquoted contents of a string literal
|
||||
// expression, or "" if expr is not a literal we can read statically
|
||||
// (e.g. concatenation, function-call argument, named const reference).
|
||||
func extractStringLit(expr ast.Expr) string {
|
||||
lit, ok := expr.(*ast.BasicLit)
|
||||
if !ok || lit.Kind != token.STRING {
|
||||
return ""
|
||||
}
|
||||
val := lit.Value
|
||||
if len(val) >= 2 {
|
||||
first, last := val[0], val[len(val)-1]
|
||||
if (first == '`' && last == '`') || (first == '"' && last == '"') {
|
||||
return val[1 : len(val)-1]
|
||||
}
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
package events
|
||||
|
||||
// types.go — typed taxonomy of WebSocket event names emitted by the
|
||||
// workspace-server.
|
||||
//
|
||||
// RFC #2945 PR-B. Pre-consolidation, every BroadcastOnly /
|
||||
// RecordAndBroadcast call site passed a bare string literal:
|
||||
//
|
||||
// h.broadcaster.BroadcastOnly(workspaceID, "AGENT_MESSAGE", payload)
|
||||
//
|
||||
// Producers (Go workspace-server, ~30 call sites across handlers/,
|
||||
// scheduler/, registry/, bundle/) and consumers (canvas TS store +
|
||||
// component listeners) duplicated the same string with no shared
|
||||
// definition. A producer renaming an event silently broke every
|
||||
// consumer — same drift class that produced the reno-stars data-loss
|
||||
// regression on the persistence side. The fix on that side was the
|
||||
// AgentMessageWriter SSOT (PR-A); the fix on this side is named
|
||||
// constants.
|
||||
//
|
||||
// Why a typed string (not a plain enum / iota): the event name
|
||||
// crosses the wire to TypeScript consumers as the literal string in
|
||||
// `WSMessage.Event`. Iota integers would break the canvas store's
|
||||
// switch (`case "AGENT_MESSAGE":`); a typed string preserves the
|
||||
// wire contract while giving Go callers compile-time discipline.
|
||||
//
|
||||
// Mirror in canvas: a parity gate (PR-B-2 follow-up) will assert this
|
||||
// constant set ≡ the TypeScript union members in
|
||||
// `canvas/src/lib/ws-events.ts`. Today the canvas consumes the names
|
||||
// via bare-string comparisons; the mirror lands separately to keep
|
||||
// PR-B narrow.
|
||||
|
||||
// EventType is the wire-typed name of a WebSocket event the platform
|
||||
// broadcasts. Always emit constants from this file rather than bare
|
||||
// strings — the AST gate in events_types_drift_test.go guards
|
||||
// against bare-string usage in the broadcaster surfaces.
|
||||
type EventType string
|
||||
|
||||
// Event constants — the canonical taxonomy. New events MUST be added
|
||||
// here AND mirrored in canvas/src/lib/ws-events.ts (parity gate
|
||||
// pending in PR-B-2). Group by semantic family so the list stays
|
||||
// scan-friendly as it grows.
|
||||
const (
|
||||
// Chat / agent messaging — surfaces in canvas chat panels.
|
||||
EventAgentMessage EventType = "AGENT_MESSAGE"
|
||||
EventA2AResponse EventType = "A2A_RESPONSE"
|
||||
EventActivityLogged EventType = "ACTIVITY_LOGGED"
|
||||
EventChannelMessage EventType = "CHANNEL_MESSAGE"
|
||||
|
||||
// Workspace lifecycle.
|
||||
EventWorkspaceProvisioning EventType = "WORKSPACE_PROVISIONING"
|
||||
EventWorkspaceProvisionFailed EventType = "WORKSPACE_PROVISION_FAILED"
|
||||
EventWorkspaceOnline EventType = "WORKSPACE_ONLINE"
|
||||
EventWorkspaceOffline EventType = "WORKSPACE_OFFLINE"
|
||||
EventWorkspaceDegraded EventType = "WORKSPACE_DEGRADED"
|
||||
EventWorkspaceHibernated EventType = "WORKSPACE_HIBERNATED"
|
||||
EventWorkspacePaused EventType = "WORKSPACE_PAUSED"
|
||||
EventWorkspaceRemoved EventType = "WORKSPACE_REMOVED"
|
||||
EventWorkspaceAwaitingAgent EventType = "WORKSPACE_AWAITING_AGENT"
|
||||
EventWorkspaceHeartbeat EventType = "WORKSPACE_HEARTBEAT"
|
||||
|
||||
// Agent assignment + identity.
|
||||
EventAgentAssigned EventType = "AGENT_ASSIGNED"
|
||||
EventAgentReplaced EventType = "AGENT_REPLACED"
|
||||
EventAgentRemoved EventType = "AGENT_REMOVED"
|
||||
EventAgentMoved EventType = "AGENT_MOVED"
|
||||
EventAgentCardUpdated EventType = "AGENT_CARD_UPDATED"
|
||||
|
||||
// Delegation lifecycle.
|
||||
EventDelegationSent EventType = "DELEGATION_SENT"
|
||||
EventDelegationStatus EventType = "DELEGATION_STATUS"
|
||||
EventDelegationComplete EventType = "DELEGATION_COMPLETE"
|
||||
EventDelegationFailed EventType = "DELEGATION_FAILED"
|
||||
|
||||
// Task progression + scheduler.
|
||||
EventTaskUpdated EventType = "TASK_UPDATED"
|
||||
EventCronExecuted EventType = "CRON_EXECUTED"
|
||||
EventCronSkipped EventType = "CRON_SKIPPED"
|
||||
|
||||
// Approvals.
|
||||
EventApprovalRequested EventType = "APPROVAL_REQUESTED"
|
||||
EventApprovalEscalated EventType = "APPROVAL_ESCALATED"
|
||||
|
||||
// Auth / credentials.
|
||||
EventExternalCredentialsRotated EventType = "EXTERNAL_CREDENTIALS_ROTATED"
|
||||
)
|
||||
|
||||
// AllEventTypes lists every constant in this file. Used by the
|
||||
// snapshot test (events_types_drift_test.go) to detect when a new
|
||||
// constant is added without updating the snapshot — the catch-up
|
||||
// step is mirroring the addition into canvas/src/lib/ws-events.ts so
|
||||
// canvas consumers can switch on it.
|
||||
//
|
||||
// Keep in lexicographic order so the snapshot diff is stable on
|
||||
// renames and the parity-with-TS comparison is order-independent.
|
||||
var AllEventTypes = []EventType{
|
||||
EventA2AResponse,
|
||||
EventActivityLogged,
|
||||
EventAgentAssigned,
|
||||
EventAgentCardUpdated,
|
||||
EventAgentMessage,
|
||||
EventAgentMoved,
|
||||
EventAgentRemoved,
|
||||
EventAgentReplaced,
|
||||
EventApprovalEscalated,
|
||||
EventApprovalRequested,
|
||||
EventChannelMessage,
|
||||
EventCronExecuted,
|
||||
EventCronSkipped,
|
||||
EventDelegationComplete,
|
||||
EventDelegationFailed,
|
||||
EventDelegationSent,
|
||||
EventDelegationStatus,
|
||||
EventExternalCredentialsRotated,
|
||||
EventTaskUpdated,
|
||||
EventWorkspaceAwaitingAgent,
|
||||
EventWorkspaceDegraded,
|
||||
EventWorkspaceHeartbeat,
|
||||
EventWorkspaceHibernated,
|
||||
EventWorkspaceOffline,
|
||||
EventWorkspaceOnline,
|
||||
EventWorkspacePaused,
|
||||
EventWorkspaceProvisionFailed,
|
||||
EventWorkspaceProvisioning,
|
||||
EventWorkspaceRemoved,
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
package events
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestAllEventTypes_IsSnapshot pins the canonical event taxonomy.
|
||||
// Adding a new constant in types.go without updating AllEventTypes
|
||||
// (or vice versa) fails this test.
|
||||
//
|
||||
// The snapshot is also the authoritative input to the canvas-side
|
||||
// parity gate (PR-B-2 follow-up): the TypeScript union members in
|
||||
// canvas/src/lib/ws-events.ts MUST match this list exactly. A drift
|
||||
// gate at CI time will assert set equality once the TS file lands.
|
||||
func TestAllEventTypes_IsSnapshot(t *testing.T) {
|
||||
// Every named constant must appear in AllEventTypes. Walk via
|
||||
// reflection over the package-level vars would over-include test
|
||||
// fixtures, so list the canonical names here. When a constant
|
||||
// is added in types.go, append the EventType's literal value
|
||||
// to the expected list below — the failure message names
|
||||
// exactly what's missing so the diff is one-line obvious.
|
||||
expected := []string{
|
||||
"A2A_RESPONSE",
|
||||
"ACTIVITY_LOGGED",
|
||||
"AGENT_ASSIGNED",
|
||||
"AGENT_CARD_UPDATED",
|
||||
"AGENT_MESSAGE",
|
||||
"AGENT_MOVED",
|
||||
"AGENT_REMOVED",
|
||||
"AGENT_REPLACED",
|
||||
"APPROVAL_ESCALATED",
|
||||
"APPROVAL_REQUESTED",
|
||||
"CHANNEL_MESSAGE",
|
||||
"CRON_EXECUTED",
|
||||
"CRON_SKIPPED",
|
||||
"DELEGATION_COMPLETE",
|
||||
"DELEGATION_FAILED",
|
||||
"DELEGATION_SENT",
|
||||
"DELEGATION_STATUS",
|
||||
"EXTERNAL_CREDENTIALS_ROTATED",
|
||||
"TASK_UPDATED",
|
||||
"WORKSPACE_AWAITING_AGENT",
|
||||
"WORKSPACE_DEGRADED",
|
||||
"WORKSPACE_HEARTBEAT",
|
||||
"WORKSPACE_HIBERNATED",
|
||||
"WORKSPACE_OFFLINE",
|
||||
"WORKSPACE_ONLINE",
|
||||
"WORKSPACE_PAUSED",
|
||||
"WORKSPACE_PROVISIONING",
|
||||
"WORKSPACE_PROVISION_FAILED",
|
||||
"WORKSPACE_REMOVED",
|
||||
}
|
||||
sort.Strings(expected)
|
||||
|
||||
actual := make([]string, 0, len(AllEventTypes))
|
||||
for _, e := range AllEventTypes {
|
||||
actual = append(actual, string(e))
|
||||
}
|
||||
sort.Strings(actual)
|
||||
|
||||
if len(actual) != len(expected) {
|
||||
t.Errorf("AllEventTypes count = %d, want %d\nactual: %s\nexpected: %s",
|
||||
len(actual), len(expected),
|
||||
strings.Join(actual, ", "),
|
||||
strings.Join(expected, ", "))
|
||||
return
|
||||
}
|
||||
for i, want := range expected {
|
||||
if actual[i] != want {
|
||||
t.Errorf("AllEventTypes[%d] = %q, want %q (full diff:\n actual: %v\n expected: %v\n)",
|
||||
i, actual[i], want, actual, expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestEventType_NoEmptyConstants pins that no constant declared in
|
||||
// types.go has an accidentally-empty value. The catch is the
|
||||
// "WORKSPACE_X" → forgot-to-fill pattern: a typo in the literal
|
||||
// would surface as the empty string, and broadcast pipelines would
|
||||
// silently filter empty-name events without any error signal.
|
||||
func TestEventType_NoEmptyConstants(t *testing.T) {
|
||||
for _, e := range AllEventTypes {
|
||||
if string(e) == "" {
|
||||
t.Errorf("found empty EventType in AllEventTypes — typo in types.go?")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestEventType_AllUppercaseSnakeCase pins the wire format. Mixed
|
||||
// case or kebab-case would break the canvas TypeScript switch
|
||||
// statements (every consumer's `case "AGENT_MESSAGE":` is upper-
|
||||
// snake). The check is the catch for an accidental
|
||||
// `"agent_message"` typo that wouldn't fail the snapshot gate.
|
||||
func TestEventType_AllUppercaseSnakeCase(t *testing.T) {
|
||||
for _, e := range AllEventTypes {
|
||||
s := string(e)
|
||||
// Allowed chars: A-Z, 0-9, _ — nothing else, no leading/
|
||||
// trailing underscores, no consecutive underscores.
|
||||
if s != strings.ToUpper(s) {
|
||||
t.Errorf("EventType %q is not all-uppercase — wire format requires upper-snake", s)
|
||||
}
|
||||
if strings.HasPrefix(s, "_") || strings.HasSuffix(s, "_") {
|
||||
t.Errorf("EventType %q has leading/trailing underscore — disallowed", s)
|
||||
}
|
||||
if strings.Contains(s, "__") {
|
||||
t.Errorf("EventType %q has consecutive underscores — disallowed", s)
|
||||
}
|
||||
for _, r := range s {
|
||||
if !((r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_') {
|
||||
t.Errorf("EventType %q contains disallowed char %q", s, r)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -163,7 +163,7 @@ func (h *WorkspaceHandler) maybeMarkContainerDead(ctx context.Context, workspace
|
||||
if wsRuntime == "external" {
|
||||
return false
|
||||
}
|
||||
if h.provisioner == nil && h.cpProv == nil {
|
||||
if !h.HasProvisioner() {
|
||||
return false
|
||||
}
|
||||
|
||||
|
||||
@@ -465,78 +465,30 @@ func (h *ActivityHandler) Notify(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// Verify workspace exists
|
||||
var wsName string
|
||||
err := db.DB.QueryRowContext(c.Request.Context(),
|
||||
`SELECT name FROM workspaces WHERE id = $1 AND status != 'removed'`, workspaceID,
|
||||
).Scan(&wsName)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return
|
||||
// Single source of truth for chat-bearing agent → user messages —
|
||||
// see agent_message_writer.go for the contract. Pre-RFC-#2945, the
|
||||
// broadcast + INSERT pair was inlined here and again in
|
||||
// mcp_tools.go's send_message_to_user, and the duplication is what
|
||||
// produced the reno-stars data-loss regression. Both paths now
|
||||
// route through the same writer; future channels (Slack, Discord,
|
||||
// Lark) hook in here too.
|
||||
attachments := make([]AgentMessageAttachment, 0, len(body.Attachments))
|
||||
for _, a := range body.Attachments {
|
||||
attachments = append(attachments, AgentMessageAttachment{
|
||||
URI: a.URI,
|
||||
Name: a.Name,
|
||||
MimeType: a.MimeType,
|
||||
Size: a.Size,
|
||||
})
|
||||
}
|
||||
|
||||
broadcastPayload := map[string]interface{}{
|
||||
"message": body.Message,
|
||||
"workspace_id": workspaceID,
|
||||
"name": wsName,
|
||||
}
|
||||
if len(body.Attachments) > 0 {
|
||||
broadcastPayload["attachments"] = body.Attachments
|
||||
}
|
||||
h.broadcaster.BroadcastOnly(workspaceID, "AGENT_MESSAGE", broadcastPayload)
|
||||
|
||||
// Persist to activity_logs so the chat history loader restores this
|
||||
// message after a page reload. Pre-fix, send_message_to_user pushes
|
||||
// were broadcast-only — survived the WebSocket session but vanished
|
||||
// when the user refreshed because nothing wrote them to the DB.
|
||||
//
|
||||
// Shape chosen to match the existing loader query
|
||||
// (`type=a2a_receive&source=canvas`):
|
||||
// - activity_type='a2a_receive' so it joins the same query path
|
||||
// - source_id=NULL so the canvas-source filter accepts it
|
||||
// - method='notify' to distinguish from real A2A receives in audits
|
||||
// - request_body=NULL so the loader doesn't append a duplicate
|
||||
// "user message" bubble for it
|
||||
// - response_body={"result": "<text>"} matches extractResponseText's
|
||||
// simplest branch ({result: string} → take verbatim)
|
||||
//
|
||||
// Errors are logged-only — broadcast already succeeded, the user
|
||||
// sees the message; persistence failure just means the message
|
||||
// won't survive reload (pre-fix behavior). Don't fail the whole
|
||||
// notify on a DB hiccup.
|
||||
// response_body shape — chosen to feed BOTH:
|
||||
// - extractResponseText: looks at body.result (string) and returns it
|
||||
// - extractFilesFromTask: looks at body.parts[] for kind=file
|
||||
// so a chat reload after a notify-with-attachments restores both
|
||||
// the text bubble AND the download chips.
|
||||
respPayload := map[string]interface{}{"result": body.Message}
|
||||
if len(body.Attachments) > 0 {
|
||||
fileParts := make([]map[string]interface{}, 0, len(body.Attachments))
|
||||
for _, a := range body.Attachments {
|
||||
fileMeta := map[string]interface{}{"uri": a.URI, "name": a.Name}
|
||||
if a.MimeType != "" {
|
||||
fileMeta["mimeType"] = a.MimeType
|
||||
}
|
||||
if a.Size > 0 {
|
||||
fileMeta["size"] = a.Size
|
||||
}
|
||||
fileParts = append(fileParts, map[string]interface{}{
|
||||
"kind": "file",
|
||||
"file": fileMeta,
|
||||
})
|
||||
writer := NewAgentMessageWriter(db.DB, h.broadcaster)
|
||||
if err := writer.Send(c.Request.Context(), workspaceID, body.Message, attachments); err != nil {
|
||||
if errors.Is(err, ErrWorkspaceNotFound) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return
|
||||
}
|
||||
respPayload["parts"] = fileParts
|
||||
}
|
||||
respJSON, _ := json.Marshal(respPayload)
|
||||
preview := body.Message
|
||||
if len(preview) > 80 {
|
||||
preview = preview[:80] + "…"
|
||||
}
|
||||
if _, err := db.DB.ExecContext(c.Request.Context(), `
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, method, summary, response_body, status)
|
||||
VALUES ($1, 'a2a_receive', 'notify', $2, $3::jsonb, 'ok')
|
||||
`, workspaceID, "Agent message: "+preview, string(respJSON)); err != nil {
|
||||
log.Printf("Notify: failed to persist message for %s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "internal error"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"status": "sent"})
|
||||
|
||||
@@ -0,0 +1,236 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"log"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// admin_delegations.go — RFC #2829 PR-4: operator dashboard endpoint
|
||||
// over the durable delegations ledger (PR-1 schema, PR-3 sweeper).
|
||||
//
|
||||
// What this endpoint serves
|
||||
// -------------------------
|
||||
//
|
||||
// GET /admin/delegations[?status=in_flight|stuck|failed&limit=N]
|
||||
//
|
||||
// Returns the rows the operator needs to triage delegation health:
|
||||
// - in_flight : status IN (queued, dispatched, in_progress) — the
|
||||
// things actively churning right now. Default view.
|
||||
// - stuck : status='stuck' — sweeper found these wedged. Operator
|
||||
// can investigate the callee + decide whether to retry
|
||||
// (RFC #2829 PR-5 plan).
|
||||
// - failed : status='failed' — terminal failures, recent. Useful
|
||||
// for spotting trends like "callee X is failing 50% of
|
||||
// delegations since 14:00".
|
||||
//
|
||||
// Why an admin endpoint at all
|
||||
// ----------------------------
|
||||
// Without this, post-incident investigation requires direct DB access —
|
||||
// only the on-call SRE can answer "is workspace X delegating to a wedged
|
||||
// callee?". The dashboard endpoint moves that visibility into the same
|
||||
// surface as /admin/queue, /admin/schedules-health, /admin/memories etc.
|
||||
//
|
||||
// Out of scope (deferred to a follow-up PR per RFC #2829)
|
||||
// -------------------------------------------------------
|
||||
// - "retry this stuck task" mutation: needs careful interaction with
|
||||
// the agent-side cutover (PR-5) before it can be safely re-fired
|
||||
// - p95 / p99 duration aggregates: separate metric exposure, not a
|
||||
// row-level read
|
||||
// - Canvas UI: this is the JSON contract; the canvas operator panel
|
||||
// consumes it in a follow-up canvas PR
|
||||
|
||||
// AdminDelegationsHandler serves the operator dashboard read endpoint.
|
||||
type AdminDelegationsHandler struct {
|
||||
db *sql.DB
|
||||
}
|
||||
|
||||
func NewAdminDelegationsHandler(handle *sql.DB) *AdminDelegationsHandler {
|
||||
if handle == nil {
|
||||
handle = db.DB
|
||||
}
|
||||
return &AdminDelegationsHandler{db: handle}
|
||||
}
|
||||
|
||||
// delegationRow mirrors the row shape of the `delegations` table that the
|
||||
// operator dashboard cares about. Order matches the SELECT below — keep
|
||||
// the two in sync if you add a column.
|
||||
type delegationRow struct {
|
||||
DelegationID string `json:"delegation_id"`
|
||||
CallerID string `json:"caller_id"`
|
||||
CalleeID string `json:"callee_id"`
|
||||
TaskPreview string `json:"task_preview"`
|
||||
Status string `json:"status"`
|
||||
LastHeartbeat *time.Time `json:"last_heartbeat,omitempty"`
|
||||
Deadline time.Time `json:"deadline"`
|
||||
ResultPreview *string `json:"result_preview,omitempty"`
|
||||
ErrorDetail *string `json:"error_detail,omitempty"`
|
||||
RetryCount int `json:"retry_count"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
// statusFilters maps the query-string `status` value to the SQL set.
|
||||
// Keep tight — operators don't get to query arbitrary status — so a
|
||||
// new status name added to the schema needs an explicit allowlist
|
||||
// entry here. Caught when a future status name doesn't pin to a UI
|
||||
// expectation (forward-defense).
|
||||
var statusFilters = map[string][]string{
|
||||
"in_flight": {"queued", "dispatched", "in_progress"},
|
||||
"stuck": {"stuck"},
|
||||
"failed": {"failed"},
|
||||
"completed": {"completed"},
|
||||
}
|
||||
|
||||
const defaultListLimit = 100
|
||||
const maxListLimit = 1000
|
||||
|
||||
// List handles GET /admin/delegations
|
||||
//
|
||||
// Query params:
|
||||
// - status — one of `in_flight` (default) / `stuck` / `failed` / `completed`
|
||||
// - limit — int, 1..1000 (default 100)
|
||||
//
|
||||
// Returns 200 with `{"delegations": [...], "count": N}`.
|
||||
func (h *AdminDelegationsHandler) List(c *gin.Context) {
|
||||
statusKey := c.DefaultQuery("status", "in_flight")
|
||||
statuses, ok := statusFilters[statusKey]
|
||||
if !ok {
|
||||
c.JSON(http.StatusBadRequest, gin.H{
|
||||
"error": "unknown status filter",
|
||||
"allowed": []string{"in_flight", "stuck", "failed", "completed"},
|
||||
"requested_status": statusKey,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
limit := defaultListLimit
|
||||
if v := c.Query("limit"); v != "" {
|
||||
n, err := strconv.Atoi(v)
|
||||
if err != nil || n < 1 || n > maxListLimit {
|
||||
c.JSON(http.StatusBadRequest, gin.H{
|
||||
"error": "limit must be 1..1000",
|
||||
"requested": v,
|
||||
})
|
||||
return
|
||||
}
|
||||
limit = n
|
||||
}
|
||||
|
||||
// Build the IN list as a parameterized expression — never string-
|
||||
// concatenate user-controlled values into the SQL. statusKey came
|
||||
// from the allowlist above so the slice is fully bounded.
|
||||
args := make([]any, 0, len(statuses)+1)
|
||||
placeholders := ""
|
||||
for i, s := range statuses {
|
||||
if i > 0 {
|
||||
placeholders += ","
|
||||
}
|
||||
args = append(args, s)
|
||||
placeholders += "$" + strconv.Itoa(i+1)
|
||||
}
|
||||
args = append(args, limit)
|
||||
limitPlaceholder := "$" + strconv.Itoa(len(statuses)+1)
|
||||
|
||||
rows, err := h.db.QueryContext(c.Request.Context(), `
|
||||
SELECT delegation_id, caller_id::text, callee_id::text, task_preview,
|
||||
status, last_heartbeat, deadline, result_preview, error_detail,
|
||||
retry_count, created_at, updated_at
|
||||
FROM delegations
|
||||
WHERE status IN (`+placeholders+`)
|
||||
ORDER BY created_at DESC
|
||||
LIMIT `+limitPlaceholder, args...)
|
||||
if err != nil {
|
||||
log.Printf("AdminDelegations.List: query failed: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
out := make([]delegationRow, 0)
|
||||
for rows.Next() {
|
||||
var r delegationRow
|
||||
var lastBeat sql.NullTime
|
||||
var resultPreview, errorDetail sql.NullString
|
||||
if err := rows.Scan(
|
||||
&r.DelegationID, &r.CallerID, &r.CalleeID, &r.TaskPreview,
|
||||
&r.Status, &lastBeat, &r.Deadline, &resultPreview, &errorDetail,
|
||||
&r.RetryCount, &r.CreatedAt, &r.UpdatedAt,
|
||||
); err != nil {
|
||||
log.Printf("AdminDelegations.List: scan failed: %v", err)
|
||||
continue
|
||||
}
|
||||
if lastBeat.Valid {
|
||||
t := lastBeat.Time
|
||||
r.LastHeartbeat = &t
|
||||
}
|
||||
if resultPreview.Valid {
|
||||
s := resultPreview.String
|
||||
r.ResultPreview = &s
|
||||
}
|
||||
if errorDetail.Valid {
|
||||
s := errorDetail.String
|
||||
r.ErrorDetail = &s
|
||||
}
|
||||
out = append(out, r)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
log.Printf("AdminDelegations.List: rows.Err: %v", err)
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"delegations": out,
|
||||
"count": len(out),
|
||||
"status": statusKey,
|
||||
"limit": limit,
|
||||
})
|
||||
}
|
||||
|
||||
// Stats handles GET /admin/delegations/stats — at-a-glance counts per
|
||||
// status. Useful for the dashboard summary card at the top of the
|
||||
// operator panel without paying for a row-level fetch.
|
||||
//
|
||||
// Returns 200 with `{"queued": N, "dispatched": N, "in_progress": N,
|
||||
// "completed": N, "failed": N, "stuck": N}`.
|
||||
func (h *AdminDelegationsHandler) Stats(c *gin.Context) {
|
||||
rows, err := h.db.QueryContext(c.Request.Context(), `
|
||||
SELECT status, COUNT(*) FROM delegations GROUP BY status
|
||||
`)
|
||||
if err != nil {
|
||||
log.Printf("AdminDelegations.Stats: query failed: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
// Initialise to zero so the response always has every known status
|
||||
// key — the dashboard card doesn't need to handle "missing key vs
|
||||
// zero" branching.
|
||||
stats := map[string]int{
|
||||
"queued": 0,
|
||||
"dispatched": 0,
|
||||
"in_progress": 0,
|
||||
"completed": 0,
|
||||
"failed": 0,
|
||||
"stuck": 0,
|
||||
}
|
||||
for rows.Next() {
|
||||
var status string
|
||||
var count int
|
||||
if err := rows.Scan(&status, &count); err != nil {
|
||||
log.Printf("AdminDelegations.Stats: scan failed: %v", err)
|
||||
continue
|
||||
}
|
||||
stats[status] = count
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
log.Printf("AdminDelegations.Stats: rows.Err: %v", err)
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, stats)
|
||||
}
|
||||
@@ -0,0 +1,332 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// admin_delegations_test.go — RFC #2829 PR-4 dashboard endpoint coverage.
|
||||
//
|
||||
// - List: status filter + limit defaults + bad-input rejection
|
||||
// - Stats: per-status counts + zero-fill for missing statuses
|
||||
|
||||
// ---------- List ----------
|
||||
|
||||
func TestAdminDelegations_List_DefaultStatusInFlight(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
h := NewAdminDelegationsHandler(nil)
|
||||
|
||||
now := time.Now()
|
||||
mock.ExpectQuery(`SELECT delegation_id, caller_id::text, callee_id::text, task_preview,\s+status, last_heartbeat, deadline, result_preview, error_detail,\s+retry_count, created_at, updated_at\s+FROM delegations\s+WHERE status IN \(\$1,\$2,\$3\)\s+ORDER BY created_at DESC\s+LIMIT \$4`).
|
||||
WithArgs("queued", "dispatched", "in_progress", 100).
|
||||
WillReturnRows(sqlmock.NewRows([]string{
|
||||
"delegation_id", "caller_id", "callee_id", "task_preview",
|
||||
"status", "last_heartbeat", "deadline", "result_preview", "error_detail",
|
||||
"retry_count", "created_at", "updated_at",
|
||||
}).AddRow(
|
||||
"deleg-1", "caller-uuid", "callee-uuid", "task body",
|
||||
"in_progress", now, now.Add(2*time.Hour), nil, nil,
|
||||
0, now.Add(-5*time.Minute), now.Add(-1*time.Minute),
|
||||
))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/delegations", nil)
|
||||
h.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
|
||||
t.Fatalf("body parse: %v", err)
|
||||
}
|
||||
if got := body["count"]; got != float64(1) {
|
||||
t.Errorf("count: expected 1, got %v", got)
|
||||
}
|
||||
if got := body["status"]; got != "in_flight" {
|
||||
t.Errorf("status: expected in_flight, got %v", got)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminDelegations_List_StatusStuck(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
h := NewAdminDelegationsHandler(nil)
|
||||
|
||||
mock.ExpectQuery(`SELECT delegation_id`).
|
||||
WithArgs("stuck", 100).
|
||||
WillReturnRows(sqlmock.NewRows([]string{
|
||||
"delegation_id", "caller_id", "callee_id", "task_preview",
|
||||
"status", "last_heartbeat", "deadline", "result_preview", "error_detail",
|
||||
"retry_count", "created_at", "updated_at",
|
||||
}))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/delegations?status=stuck", nil)
|
||||
h.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d", w.Code)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminDelegations_List_StatusFailed(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
h := NewAdminDelegationsHandler(nil)
|
||||
|
||||
mock.ExpectQuery(`SELECT delegation_id`).
|
||||
WithArgs("failed", 100).
|
||||
WillReturnRows(sqlmock.NewRows([]string{
|
||||
"delegation_id", "caller_id", "callee_id", "task_preview",
|
||||
"status", "last_heartbeat", "deadline", "result_preview", "error_detail",
|
||||
"retry_count", "created_at", "updated_at",
|
||||
}))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/delegations?status=failed", nil)
|
||||
h.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d", w.Code)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminDelegations_List_RejectsUnknownStatus(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
h := NewAdminDelegationsHandler(nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/delegations?status=garbage", nil)
|
||||
h.List(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminDelegations_List_RejectsNegativeLimit(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
h := NewAdminDelegationsHandler(nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/delegations?limit=-5", nil)
|
||||
h.List(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminDelegations_List_RejectsLimitOverCap(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
h := NewAdminDelegationsHandler(nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/delegations?limit=99999", nil)
|
||||
h.List(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminDelegations_List_AcceptsCustomLimit(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
h := NewAdminDelegationsHandler(nil)
|
||||
|
||||
mock.ExpectQuery(`SELECT delegation_id`).
|
||||
WithArgs("queued", "dispatched", "in_progress", 25).
|
||||
WillReturnRows(sqlmock.NewRows([]string{
|
||||
"delegation_id", "caller_id", "callee_id", "task_preview",
|
||||
"status", "last_heartbeat", "deadline", "result_preview", "error_detail",
|
||||
"retry_count", "created_at", "updated_at",
|
||||
}))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/delegations?limit=25", nil)
|
||||
h.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var body map[string]any
|
||||
_ = json.Unmarshal(w.Body.Bytes(), &body)
|
||||
if body["limit"] != float64(25) {
|
||||
t.Errorf("expected limit=25 echo, got %v", body["limit"])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminDelegations_List_PopulatesNullableFields(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
h := NewAdminDelegationsHandler(nil)
|
||||
|
||||
now := time.Now()
|
||||
resultStr := "all done"
|
||||
mock.ExpectQuery(`SELECT delegation_id`).
|
||||
WithArgs("completed", 100).
|
||||
WillReturnRows(sqlmock.NewRows([]string{
|
||||
"delegation_id", "caller_id", "callee_id", "task_preview",
|
||||
"status", "last_heartbeat", "deadline", "result_preview", "error_detail",
|
||||
"retry_count", "created_at", "updated_at",
|
||||
}).AddRow(
|
||||
"deleg-2", "c", "ca", "t",
|
||||
"completed", now, now.Add(2*time.Hour), resultStr, nil,
|
||||
0, now, now,
|
||||
))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/delegations?status=completed", nil)
|
||||
h.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var body struct {
|
||||
Delegations []struct {
|
||||
ResultPreview *string `json:"result_preview"`
|
||||
ErrorDetail *string `json:"error_detail"`
|
||||
LastHeartbeat *string `json:"last_heartbeat"`
|
||||
} `json:"delegations"`
|
||||
}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
|
||||
t.Fatalf("parse: %v", err)
|
||||
}
|
||||
if len(body.Delegations) != 1 {
|
||||
t.Fatalf("expected 1 row, got %d", len(body.Delegations))
|
||||
}
|
||||
row := body.Delegations[0]
|
||||
if row.ResultPreview == nil || *row.ResultPreview != "all done" {
|
||||
t.Errorf("result_preview not populated correctly: %+v", row.ResultPreview)
|
||||
}
|
||||
if row.ErrorDetail != nil {
|
||||
t.Errorf("error_detail should be nil for completed-no-error: %+v", row.ErrorDetail)
|
||||
}
|
||||
if row.LastHeartbeat == nil {
|
||||
t.Errorf("last_heartbeat should be present (non-NULL); got nil")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- Stats ----------
|
||||
|
||||
func TestAdminDelegations_Stats_ZeroFillsMissingStatuses(t *testing.T) {
|
||||
// Stats response must always include every status key. If no rows
|
||||
// exist for status='stuck', the response still shows "stuck": 0.
|
||||
mock := setupTestDB(t)
|
||||
h := NewAdminDelegationsHandler(nil)
|
||||
|
||||
mock.ExpectQuery(`SELECT status, COUNT\(\*\) FROM delegations GROUP BY status`).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"status", "count"}).
|
||||
AddRow("in_progress", 7).
|
||||
AddRow("completed", 130))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/delegations/stats", nil)
|
||||
h.Stats(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var stats map[string]int
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &stats); err != nil {
|
||||
t.Fatalf("parse: %v", err)
|
||||
}
|
||||
|
||||
expectedKeys := []string{"queued", "dispatched", "in_progress", "completed", "failed", "stuck"}
|
||||
for _, k := range expectedKeys {
|
||||
if _, ok := stats[k]; !ok {
|
||||
t.Errorf("stats missing key %q (zero-fill contract broken)", k)
|
||||
}
|
||||
}
|
||||
if stats["in_progress"] != 7 {
|
||||
t.Errorf("in_progress count: expected 7, got %d", stats["in_progress"])
|
||||
}
|
||||
if stats["completed"] != 130 {
|
||||
t.Errorf("completed count: expected 130, got %d", stats["completed"])
|
||||
}
|
||||
if stats["stuck"] != 0 {
|
||||
t.Errorf("stuck must be zero-filled: got %d", stats["stuck"])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminDelegations_Stats_EmptyTable(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
h := NewAdminDelegationsHandler(nil)
|
||||
|
||||
mock.ExpectQuery(`SELECT status, COUNT\(\*\) FROM delegations GROUP BY status`).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"status", "count"}))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/delegations/stats", nil)
|
||||
h.Stats(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var stats map[string]int
|
||||
_ = json.Unmarshal(w.Body.Bytes(), &stats)
|
||||
for k, v := range stats {
|
||||
if v != 0 {
|
||||
t.Errorf("empty table → all counts zero; %s=%d", k, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// statusFilters is a contract surface — every key here is documented in
|
||||
// the endpoint comment + accepted by the validator. Pin it.
|
||||
func TestStatusFiltersTableShape(t *testing.T) {
|
||||
expected := map[string][]string{
|
||||
"in_flight": {"queued", "dispatched", "in_progress"},
|
||||
"stuck": {"stuck"},
|
||||
"failed": {"failed"},
|
||||
"completed": {"completed"},
|
||||
}
|
||||
for k, want := range expected {
|
||||
got, ok := statusFilters[k]
|
||||
if !ok {
|
||||
t.Errorf("statusFilters missing key %q", k)
|
||||
continue
|
||||
}
|
||||
if len(got) != len(want) {
|
||||
t.Errorf("statusFilters[%q]: want %v, got %v", k, want, got)
|
||||
continue
|
||||
}
|
||||
for i := range want {
|
||||
if got[i] != want[i] {
|
||||
t.Errorf("statusFilters[%q][%d]: want %q, got %q", k, i, want[i], got[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
@@ -255,68 +256,185 @@ func (h *AdminMemoriesHandler) Import(c *gin.Context) {
|
||||
// the legacy memoryExportEntry shape so existing tooling that consumes
|
||||
// the export keeps working.
|
||||
//
|
||||
// Strategy: enumerate workspaces, ask the resolver for each one's
|
||||
// readable namespaces, search each namespace once. Deduplicate by
|
||||
// memory id (a single memory in team:X is visible to every workspace
|
||||
// under root X — we want one row per memory, not N).
|
||||
// Optimization (#289 fix): the previous implementation was O(workspaces)
|
||||
// in BOTH resolver CTE walks AND plugin search calls. For a 1000-tenant
|
||||
// org, that's 1000 × resolver + 1000 × HTTP, where most are redundant
|
||||
// because workspaces sharing a team/org root see identical namespaces.
|
||||
//
|
||||
// New strategy:
|
||||
// 1. Single SQL pass walks parent_id chains, returning each
|
||||
// workspace's root_id alongside its name.
|
||||
// 2. Group workspaces by root → unique tree count is typically <<
|
||||
// workspace count.
|
||||
// 3. Resolve namespaces ONCE per root (any workspace under that
|
||||
// root produces the same readable list).
|
||||
// 4. Build a UNION of namespaces across all roots; single plugin
|
||||
// search call.
|
||||
// 5. Map each memory back to a workspace_name via a namespace→ws
|
||||
// lookup table built up from step 3.
|
||||
//
|
||||
// Net cost: 1 SQL + N_roots resolver calls + 1 plugin call (vs
|
||||
// N_workspaces resolver + N_workspaces plugin in the old code).
|
||||
func (h *AdminMemoriesHandler) exportViaPlugin(c *gin.Context, ctx context.Context) {
|
||||
rows, err := db.DB.QueryContext(ctx, `SELECT id::text, name FROM workspaces ORDER BY created_at`)
|
||||
// 1. One SQL pass: every workspace + its root id.
|
||||
wsRows, err := loadWorkspacesWithRoots(ctx, db.DB)
|
||||
if err != nil {
|
||||
log.Printf("admin/memories/export (cutover): workspaces query: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "export query failed"})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type wsRow struct{ ID, Name string }
|
||||
var workspaces []wsRow
|
||||
for rows.Next() {
|
||||
var w wsRow
|
||||
if err := rows.Scan(&w.ID, &w.Name); err != nil {
|
||||
continue
|
||||
}
|
||||
workspaces = append(workspaces, w)
|
||||
// 2. Group by root → list of workspaces.
|
||||
rootToWorkspaces := make(map[string][]workspaceRow, len(wsRows))
|
||||
for _, w := range wsRows {
|
||||
rootToWorkspaces[w.RootID] = append(rootToWorkspaces[w.RootID], w)
|
||||
}
|
||||
|
||||
seen := make(map[string]struct{})
|
||||
memories := make([]memoryExportEntry, 0)
|
||||
for _, w := range workspaces {
|
||||
readable, err := h.resolver.ReadableNamespaces(ctx, w.ID)
|
||||
// 3. Resolve team/org namespaces once per root, then add each
|
||||
// member's private workspace:<id> namespace explicitly.
|
||||
//
|
||||
// IMPORTANT: ReadableNamespaces(rootID) returns
|
||||
// {workspace:rootID, team:rootID, org:rootID}. Calling it once
|
||||
// per root is enough for team:/org:/custom: (those are shared by
|
||||
// every member of the root group), but the workspace: namespace
|
||||
// it returns is rootID's only — child members' private
|
||||
// workspace:<childID> namespaces would be silently dropped from
|
||||
// the export. Inject each member's workspace:<id> below to keep
|
||||
// coverage parity with the legacy per-workspace iteration.
|
||||
nsToOwner := make(map[string]string) // namespace → workspace_name (first matching wins)
|
||||
allNamespaces := make(map[string]struct{}) // union for plugin search
|
||||
for rootID, members := range rootToWorkspaces {
|
||||
readable, err := h.resolver.ReadableNamespaces(ctx, rootID)
|
||||
if err != nil {
|
||||
log.Printf("admin/memories/export (cutover) workspace=%s: resolve: %v", w.Name, err)
|
||||
log.Printf("admin/memories/export (cutover) root=%s: resolve: %v", rootID, err)
|
||||
continue
|
||||
}
|
||||
nsList := make([]string, len(readable))
|
||||
for i, ns := range readable {
|
||||
nsList[i] = ns.Name
|
||||
}
|
||||
if len(nsList) == 0 {
|
||||
continue
|
||||
}
|
||||
resp, err := h.plugin.Search(ctx, contract.SearchRequest{Namespaces: nsList, Limit: 100})
|
||||
if err != nil {
|
||||
log.Printf("admin/memories/export (cutover) workspace=%s: plugin search: %v", w.Name, err)
|
||||
continue
|
||||
}
|
||||
for _, m := range resp.Memories {
|
||||
if _, dup := seen[m.ID]; dup {
|
||||
// Collect non-workspace namespaces (team:/org:/custom:/...) from
|
||||
// the root view; these are identical across every member.
|
||||
for _, ns := range readable {
|
||||
if strings.HasPrefix(ns.Name, "workspace:") {
|
||||
continue
|
||||
}
|
||||
seen[m.ID] = struct{}{}
|
||||
redacted, _ := redactSecrets(w.Name, m.Content)
|
||||
memories = append(memories, memoryExportEntry{
|
||||
ID: m.ID,
|
||||
Content: redacted,
|
||||
Scope: legacyScopeFromNamespace(m.Namespace),
|
||||
Namespace: m.Namespace,
|
||||
CreatedAt: m.CreatedAt,
|
||||
WorkspaceName: w.Name,
|
||||
})
|
||||
allNamespaces[ns.Name] = struct{}{}
|
||||
if _, alreadyMapped := nsToOwner[ns.Name]; alreadyMapped {
|
||||
continue
|
||||
}
|
||||
if owner := pickOwnerForNamespace(ns.Name, members); owner != "" {
|
||||
nsToOwner[ns.Name] = owner
|
||||
}
|
||||
}
|
||||
// Inject each member's private workspace:<id> namespace + its
|
||||
// owner. Children's private memories live in workspace:<childID>
|
||||
// which the root-only resolve doesn't surface.
|
||||
for _, m := range members {
|
||||
ns := "workspace:" + m.ID
|
||||
allNamespaces[ns] = struct{}{}
|
||||
nsToOwner[ns] = m.Name
|
||||
}
|
||||
}
|
||||
|
||||
if len(allNamespaces) == 0 {
|
||||
c.JSON(http.StatusOK, []memoryExportEntry{})
|
||||
return
|
||||
}
|
||||
|
||||
// 4. Single plugin search across the union.
|
||||
nsList := make([]string, 0, len(allNamespaces))
|
||||
for ns := range allNamespaces {
|
||||
nsList = append(nsList, ns)
|
||||
}
|
||||
resp, err := h.plugin.Search(ctx, contract.SearchRequest{Namespaces: nsList, Limit: 100})
|
||||
if err != nil {
|
||||
log.Printf("admin/memories/export (cutover): plugin search: %v", err)
|
||||
c.JSON(http.StatusOK, []memoryExportEntry{})
|
||||
return
|
||||
}
|
||||
|
||||
// 5. Map each memory to a workspace_name, redact, emit.
|
||||
seen := make(map[string]struct{})
|
||||
memories := make([]memoryExportEntry, 0, len(resp.Memories))
|
||||
for _, m := range resp.Memories {
|
||||
if _, dup := seen[m.ID]; dup {
|
||||
continue
|
||||
}
|
||||
seen[m.ID] = struct{}{}
|
||||
owner := nsToOwner[m.Namespace]
|
||||
redacted, _ := redactSecrets(owner, m.Content)
|
||||
memories = append(memories, memoryExportEntry{
|
||||
ID: m.ID,
|
||||
Content: redacted,
|
||||
Scope: legacyScopeFromNamespace(m.Namespace),
|
||||
Namespace: m.Namespace,
|
||||
CreatedAt: m.CreatedAt,
|
||||
WorkspaceName: owner,
|
||||
})
|
||||
}
|
||||
c.JSON(http.StatusOK, memories)
|
||||
}
|
||||
|
||||
// workspaceRow bundles the per-workspace fields the optimized export
|
||||
// needs (id + name + root for grouping).
|
||||
type workspaceRow struct {
|
||||
ID string
|
||||
Name string
|
||||
RootID string
|
||||
}
|
||||
|
||||
// loadWorkspacesWithRoots returns one row per workspace with its root
|
||||
// id computed via a recursive CTE. Single SQL pass — replaces the
|
||||
// previous N×ReadableNamespaces pattern that walked each tree
|
||||
// independently.
|
||||
func loadWorkspacesWithRoots(ctx context.Context, conn *sql.DB) ([]workspaceRow, error) {
|
||||
rows, err := conn.QueryContext(ctx, `
|
||||
WITH RECURSIVE chain AS (
|
||||
SELECT id, parent_id, name, id AS root_id, 0 AS depth
|
||||
FROM workspaces
|
||||
WHERE parent_id IS NULL
|
||||
UNION ALL
|
||||
SELECT w.id, w.parent_id, w.name, c.root_id, c.depth + 1
|
||||
FROM workspaces w
|
||||
JOIN chain c ON w.parent_id = c.id
|
||||
WHERE c.depth < 50
|
||||
)
|
||||
SELECT id::text, name, root_id::text FROM chain ORDER BY name
|
||||
`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
out := make([]workspaceRow, 0)
|
||||
for rows.Next() {
|
||||
var w workspaceRow
|
||||
if err := rows.Scan(&w.ID, &w.Name, &w.RootID); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, w)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// pickOwnerForNamespace returns the workspace_name to attribute a
|
||||
// namespace to in the export. workspace:<id> namespaces map to the
|
||||
// matching member; team:* / org:* / custom:* fall back to the first
|
||||
// member of the root group (canonical owner).
|
||||
func pickOwnerForNamespace(ns string, members []workspaceRow) string {
|
||||
if strings.HasPrefix(ns, "workspace:") {
|
||||
wantID := strings.TrimPrefix(ns, "workspace:")
|
||||
for _, m := range members {
|
||||
if m.ID == wantID {
|
||||
return m.Name
|
||||
}
|
||||
}
|
||||
}
|
||||
// Non-workspace namespaces: attribute to first member of the root
|
||||
// group. Stable because loadWorkspacesWithRoots returns ORDER BY
|
||||
// name, so the same root group always picks the same owner.
|
||||
if len(members) > 0 {
|
||||
return members[0].Name
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// importViaPlugin writes the entries through the plugin instead of
|
||||
// directly to agent_memories. Workspaces are resolved by name like
|
||||
// the legacy path. Scope→namespace mapping mirrors the PR-6 shim.
|
||||
|
||||
@@ -151,9 +151,9 @@ func TestExport_RoutesThroughPluginWhenCutoverActive(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
|
||||
mock.ExpectQuery("SELECT id::text, name FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
|
||||
AddRow("ws-1", "alpha"))
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
@@ -196,10 +196,10 @@ func TestExport_DeduplicatesByMemoryID(t *testing.T) {
|
||||
mock := installMockDB(t)
|
||||
|
||||
// Two workspaces, both will see the same team-shared memory.
|
||||
mock.ExpectQuery("SELECT id::text, name FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
|
||||
AddRow("ws-1", "alpha").
|
||||
AddRow("ws-2", "beta"))
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1").
|
||||
AddRow("ws-2", "beta", "ws-2"))
|
||||
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
@@ -225,9 +225,9 @@ func TestExport_DeduplicatesByMemoryID(t *testing.T) {
|
||||
func TestExport_SkipsWorkspaceWhenResolverFails(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text, name FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
|
||||
AddRow("ws-1", "alpha"))
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{}
|
||||
resolver := &stubAdminResolver{err: errors.New("resolver dead")}
|
||||
@@ -247,9 +247,9 @@ func TestExport_SkipsWorkspaceWhenResolverFails(t *testing.T) {
|
||||
func TestExport_SkipsWorkspaceWhenPluginSearchFails(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text, name FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
|
||||
AddRow("ws-1", "alpha"))
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, _ contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
@@ -271,7 +271,7 @@ func TestExport_SkipsWorkspaceWhenPluginSearchFails(t *testing.T) {
|
||||
func TestExport_WorkspacesQueryFails(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text, name FROM workspaces").
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnError(errors.New("db dead"))
|
||||
|
||||
plugin := &stubAdminPlugin{}
|
||||
@@ -290,9 +290,9 @@ func TestExport_WorkspacesQueryFails(t *testing.T) {
|
||||
func TestExport_EmptyReadable(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text, name FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
|
||||
AddRow("ws-1", "alpha"))
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
resolver := &stubAdminResolver{readable: []namespace.Namespace{}}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(&stubAdminPlugin{}, resolver)
|
||||
@@ -312,9 +312,9 @@ func TestExport_EmptyReadable(t *testing.T) {
|
||||
func TestExport_RedactsSecretsInPluginPath(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
mock.ExpectQuery("SELECT id::text, name FROM workspaces").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
|
||||
AddRow("ws-1", "alpha"))
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("ws-1", "alpha", "ws-1"))
|
||||
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, _ contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
@@ -535,6 +535,202 @@ func TestImport_SkipsWhenResolverErrors(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestExport_BatchesPluginCallsByRoot pins the I3 fix: previously the
|
||||
// export ran one resolver + one plugin search per workspace (N+1 in
|
||||
// both); now it groups by root and runs one resolver + one plugin
|
||||
// search per UNIQUE root.
|
||||
//
|
||||
// Setup: 3 workspaces under 1 root → 1 resolver call + 1 plugin call
|
||||
// (was: 3 resolver + 3 plugin in the old code). The plugin search
|
||||
// receives 5 namespaces: each member's workspace:<id> + team:root-1
|
||||
// + org:root-1. (Children's workspace:<id> namespaces must be
|
||||
// included or admin export silently drops their private memories.)
|
||||
func TestExport_BatchesPluginCallsByRoot(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("root-1", "alpha", "root-1").
|
||||
AddRow("child-1", "alpha-child", "root-1").
|
||||
AddRow("child-2", "alpha-grandchild", "root-1"))
|
||||
|
||||
pluginSearchCount := 0
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
pluginSearchCount++
|
||||
if len(body.Namespaces) != 5 {
|
||||
t.Errorf("plugin search call %d: namespaces len = %d, want 5 (3 workspace + team + org); got %v", pluginSearchCount, len(body.Namespaces), body.Namespaces)
|
||||
}
|
||||
return &contract.SearchResponse{}, nil
|
||||
},
|
||||
}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, adminRootResolver())
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/memories/export", nil)
|
||||
h.Export(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("code = %d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
if pluginSearchCount != 1 {
|
||||
t.Errorf("plugin search called %d times, want 1 (was 3 with the old N+1 code)", pluginSearchCount)
|
||||
}
|
||||
}
|
||||
|
||||
// perWorkspaceResolver mimics the real resolver: ReadableNamespaces
|
||||
// returns the SPECIFIC workspace's view (workspace:<that ID> +
|
||||
// team:<root> + org:<root>), not a constant set. The legacy
|
||||
// stubAdminResolver hides the I3 silent-drop bug by ignoring its
|
||||
// workspace-id argument.
|
||||
type perWorkspaceResolver map[string][]namespace.Namespace
|
||||
|
||||
func (r perWorkspaceResolver) ReadableNamespaces(_ context.Context, ws string) ([]namespace.Namespace, error) {
|
||||
v, ok := r[ws]
|
||||
if !ok {
|
||||
return nil, errors.New("perWorkspaceResolver: unknown ws " + ws)
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
func (r perWorkspaceResolver) WritableNamespaces(_ context.Context, ws string) ([]namespace.Namespace, error) {
|
||||
return r.ReadableNamespaces(nil, ws)
|
||||
}
|
||||
|
||||
// TestExport_IncludesEveryMembersPrivateNamespace pins the I3 follow-up
|
||||
// fix: when a root group has multiple members, the export must surface
|
||||
// each member's workspace:<id> namespace, not just the root's. Before
|
||||
// the fix, calling ReadableNamespaces(rootID) returned only
|
||||
// workspace:rootID + team:rootID + org:rootID — every child workspace's
|
||||
// private memories were silently dropped from admin export.
|
||||
func TestExport_IncludesEveryMembersPrivateNamespace(t *testing.T) {
|
||||
t.Setenv(envMemoryV2Cutover, "true")
|
||||
mock := installMockDB(t)
|
||||
|
||||
mock.ExpectQuery("WITH RECURSIVE chain").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "root_id"}).
|
||||
AddRow("root-1", "alpha", "root-1").
|
||||
AddRow("child-1", "alpha-child", "root-1").
|
||||
AddRow("child-2", "alpha-grandchild", "root-1"))
|
||||
|
||||
resolver := perWorkspaceResolver{
|
||||
"root-1": {
|
||||
{Name: "workspace:root-1", Kind: contract.NamespaceKindWorkspace, Writable: true},
|
||||
{Name: "team:root-1", Kind: contract.NamespaceKindTeam, Writable: true},
|
||||
{Name: "org:root-1", Kind: contract.NamespaceKindOrg, Writable: true},
|
||||
},
|
||||
"child-1": {
|
||||
{Name: "workspace:child-1", Kind: contract.NamespaceKindWorkspace, Writable: true},
|
||||
{Name: "team:root-1", Kind: contract.NamespaceKindTeam, Writable: true},
|
||||
{Name: "org:root-1", Kind: contract.NamespaceKindOrg, Writable: true},
|
||||
},
|
||||
"child-2": {
|
||||
{Name: "workspace:child-2", Kind: contract.NamespaceKindWorkspace, Writable: true},
|
||||
{Name: "team:root-1", Kind: contract.NamespaceKindTeam, Writable: true},
|
||||
{Name: "org:root-1", Kind: contract.NamespaceKindOrg, Writable: true},
|
||||
},
|
||||
}
|
||||
|
||||
var passedNamespaces []string
|
||||
plugin := &stubAdminPlugin{
|
||||
searchFn: func(_ context.Context, body contract.SearchRequest) (*contract.SearchResponse, error) {
|
||||
passedNamespaces = append(passedNamespaces, body.Namespaces...)
|
||||
return &contract.SearchResponse{Memories: []contract.Memory{
|
||||
{ID: "m-root", Namespace: "workspace:root-1", Content: "root private", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
{ID: "m-child1", Namespace: "workspace:child-1", Content: "child-1 private", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
{ID: "m-child2", Namespace: "workspace:child-2", Content: "child-2 private", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
{ID: "m-team", Namespace: "team:root-1", Content: "shared team", Kind: contract.MemoryKindFact, Source: contract.MemorySourceAgent, CreatedAt: time.Now().UTC()},
|
||||
}}, nil
|
||||
},
|
||||
}
|
||||
h := NewAdminMemoriesHandler().withMemoryV2APIs(plugin, resolver)
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/memories/export", nil)
|
||||
h.Export(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("code = %d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
// Every member's private namespace must reach the plugin search.
|
||||
want := []string{"workspace:root-1", "workspace:child-1", "workspace:child-2", "team:root-1", "org:root-1"}
|
||||
got := make(map[string]bool, len(passedNamespaces))
|
||||
for _, ns := range passedNamespaces {
|
||||
got[ns] = true
|
||||
}
|
||||
for _, w := range want {
|
||||
if !got[w] {
|
||||
t.Errorf("plugin search missing namespace %q (got %v)", w, passedNamespaces)
|
||||
}
|
||||
}
|
||||
if len(passedNamespaces) != 5 {
|
||||
t.Errorf("plugin search namespace count = %d, want 5 (3 workspace + team + org)", len(passedNamespaces))
|
||||
}
|
||||
|
||||
// Children's private memories must appear in the export, attributed
|
||||
// to the right workspace_name.
|
||||
var entries []memoryExportEntry
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &entries); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
byID := map[string]memoryExportEntry{}
|
||||
for _, e := range entries {
|
||||
byID[e.ID] = e
|
||||
}
|
||||
for _, exp := range []struct{ id, ns, owner string }{
|
||||
{"m-root", "workspace:root-1", "alpha"},
|
||||
{"m-child1", "workspace:child-1", "alpha-child"},
|
||||
{"m-child2", "workspace:child-2", "alpha-grandchild"},
|
||||
} {
|
||||
e, ok := byID[exp.id]
|
||||
if !ok {
|
||||
t.Errorf("export missing memory %s — children's private memories silently dropped", exp.id)
|
||||
continue
|
||||
}
|
||||
if e.Namespace != exp.ns {
|
||||
t.Errorf("memory %s namespace = %q, want %q", exp.id, e.Namespace, exp.ns)
|
||||
}
|
||||
if e.WorkspaceName != exp.owner {
|
||||
t.Errorf("memory %s owner = %q, want %q", exp.id, e.WorkspaceName, exp.owner)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestPickOwnerForNamespace covers the namespace→workspace_name
|
||||
// attribution helper introduced in I3.
|
||||
func TestPickOwnerForNamespace(t *testing.T) {
|
||||
members := []workspaceRow{
|
||||
{ID: "root-1", Name: "alpha", RootID: "root-1"},
|
||||
{ID: "child-1", Name: "alpha-child", RootID: "root-1"},
|
||||
}
|
||||
cases := []struct {
|
||||
name string
|
||||
ns string
|
||||
want string
|
||||
}{
|
||||
{"workspace ns matches member id", "workspace:child-1", "alpha-child"},
|
||||
{"workspace ns no match → first", "workspace:foreign", "alpha"},
|
||||
{"team ns → first member of root group", "team:root-1", "alpha"},
|
||||
{"org ns → first member", "org:root-1", "alpha"},
|
||||
{"custom ns → first member", "custom:foo", "alpha"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := pickOwnerForNamespace(tc.ns, members); got != tc.want {
|
||||
t.Errorf("pickOwnerForNamespace(%q) = %q, want %q", tc.ns, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
if got := pickOwnerForNamespace("workspace:abc", nil); got != "" {
|
||||
t.Errorf("empty members must return \"\", got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Helper functions ---
|
||||
|
||||
func TestLegacyScopeFromNamespace(t *testing.T) {
|
||||
|
||||
@@ -0,0 +1,177 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/token"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestAgentMessageBroadcastsArePersisted is a forward-looking AST
|
||||
// gate: every function in this package that broadcasts an
|
||||
// `AGENT_MESSAGE` WebSocket event MUST also call
|
||||
// `INSERT INTO activity_logs` somewhere in its body.
|
||||
//
|
||||
// The reno-stars production data-loss bug (CEO Ryan PC's long-form
|
||||
// onboarding-friction message visible live but missing on reload)
|
||||
// happened because mcp_tools.go:toolSendMessageToUser broadcast WS
|
||||
// without a paired INSERT — while the HTTP /notify sibling DID
|
||||
// persist. The fix added the INSERT; this gate prevents the regression
|
||||
// class from re-emerging in any future chat-bearing tool.
|
||||
//
|
||||
// Why an AST gate vs a code-review checklist (per memory
|
||||
// feedback_behavior_based_ast_gates.md): "pin invariants by what a
|
||||
// function calls, not what it's named". The shape that loses data is:
|
||||
//
|
||||
// BroadcastOnly(_, "AGENT_MESSAGE", _) without an INSERT companion
|
||||
//
|
||||
// Any new tool that emits AGENT_MESSAGE must persist or the next
|
||||
// canvas refresh drops the message — same shape as reno-stars. A
|
||||
// reviewer can miss this; the AST walk can't.
|
||||
//
|
||||
// Allowlist: empty by intent. If a future use case genuinely needs
|
||||
// fire-and-forget broadcast (e.g., transient typing indicators that
|
||||
// should NOT survive reload), add an entry here AND document why.
|
||||
// "Doesn't need to persist" is rarely the right answer for chat —
|
||||
// the canvas history is the source of truth.
|
||||
func TestAgentMessageBroadcastsArePersisted(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
entries, err := os.ReadDir(wd)
|
||||
if err != nil {
|
||||
t.Fatalf("readdir %s: %v", wd, err)
|
||||
}
|
||||
|
||||
type violation struct {
|
||||
file string
|
||||
fn string
|
||||
}
|
||||
var violations []violation
|
||||
|
||||
for _, ent := range entries {
|
||||
name := ent.Name()
|
||||
if ent.IsDir() || !strings.HasSuffix(name, ".go") || strings.HasSuffix(name, "_test.go") {
|
||||
continue
|
||||
}
|
||||
path := filepath.Join(wd, name)
|
||||
fset := token.NewFileSet()
|
||||
file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse %s: %v", path, err)
|
||||
}
|
||||
|
||||
for _, decl := range file.Decls {
|
||||
fn, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fn.Body == nil {
|
||||
continue
|
||||
}
|
||||
if !funcEmitsAgentMessageBroadcast(fn) {
|
||||
continue
|
||||
}
|
||||
if !funcInsertsIntoActivityLogs(fn) {
|
||||
violations = append(violations, violation{file: name, fn: fn.Name.Name})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(violations) > 0 {
|
||||
sort.Slice(violations, func(i, j int) bool {
|
||||
if violations[i].file != violations[j].file {
|
||||
return violations[i].file < violations[j].file
|
||||
}
|
||||
return violations[i].fn < violations[j].fn
|
||||
})
|
||||
var buf strings.Builder
|
||||
for _, v := range violations {
|
||||
buf.WriteString(" - ")
|
||||
buf.WriteString(v.file)
|
||||
buf.WriteString(":")
|
||||
buf.WriteString(v.fn)
|
||||
buf.WriteString("\n")
|
||||
}
|
||||
t.Errorf(`function(s) broadcast `+"`AGENT_MESSAGE`"+` without persisting to activity_logs:
|
||||
|
||||
%s
|
||||
This is the reno-stars data-loss regression class: live message
|
||||
visible to the user, but missing on reload because activity_log was
|
||||
never written. Every chat-bearing broadcast MUST be paired with:
|
||||
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, method,
|
||||
summary, response_body, status)
|
||||
VALUES ($1, 'a2a_receive', 'notify', $2, $3::jsonb, 'ok')
|
||||
|
||||
See activity.go:Notify and mcp_tools.go:toolSendMessageToUser for
|
||||
the canonical shapes. Don't add an allowlist entry without a
|
||||
documented reason — the canvas chat history is the source of truth
|
||||
and silently dropping messages is a P0 user trust break.`,
|
||||
buf.String())
|
||||
}
|
||||
}
|
||||
|
||||
// funcEmitsAgentMessageBroadcast walks fn.Body for any CallExpr that
|
||||
// looks like `*.BroadcastOnly(_, "AGENT_MESSAGE", _)`.
|
||||
func funcEmitsAgentMessageBroadcast(fn *ast.FuncDecl) bool {
|
||||
var found bool
|
||||
ast.Inspect(fn.Body, func(n ast.Node) bool {
|
||||
call, ok := n.(*ast.CallExpr)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
sel, ok := call.Fun.(*ast.SelectorExpr)
|
||||
if !ok || sel.Sel.Name != "BroadcastOnly" {
|
||||
return true
|
||||
}
|
||||
// BroadcastOnly(workspaceID, eventType, payload) — the second
|
||||
// arg is the event name. Match by string-literal value.
|
||||
if len(call.Args) < 2 {
|
||||
return true
|
||||
}
|
||||
lit, ok := call.Args[1].(*ast.BasicLit)
|
||||
if !ok || lit.Kind != token.STRING {
|
||||
return true
|
||||
}
|
||||
raw := lit.Value
|
||||
if unq, err := strconv.Unquote(raw); err == nil {
|
||||
raw = unq
|
||||
}
|
||||
if raw == "AGENT_MESSAGE" {
|
||||
found = true
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
return found
|
||||
}
|
||||
|
||||
// funcInsertsIntoActivityLogs walks fn.Body for any STRING BasicLit
|
||||
// whose body contains `INSERT INTO activity_logs` (the SQL literal
|
||||
// passed to ExecContext). Matches the substring rather than a strict
|
||||
// regex because we don't care about the exact INSERT shape here —
|
||||
// only that the function persists. Specific shape pinning lives in
|
||||
// the per-handler test (see TestMCPHandler_SendMessageToUser_*).
|
||||
func funcInsertsIntoActivityLogs(fn *ast.FuncDecl) bool {
|
||||
var found bool
|
||||
ast.Inspect(fn.Body, func(n ast.Node) bool {
|
||||
lit, ok := n.(*ast.BasicLit)
|
||||
if !ok || lit.Kind != token.STRING {
|
||||
return true
|
||||
}
|
||||
raw := lit.Value
|
||||
if unq, err := strconv.Unquote(raw); err == nil {
|
||||
raw = unq
|
||||
}
|
||||
if strings.Contains(raw, "INSERT INTO activity_logs") {
|
||||
found = true
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
return found
|
||||
}
|
||||
@@ -0,0 +1,203 @@
|
||||
package handlers
|
||||
|
||||
// AgentMessageWriter is the SSOT for "agent → user" message delivery in the
|
||||
// workspace-server. Every chat-bearing path that surfaces a message to the
|
||||
// canvas — HTTP /notify (Notify handler), MCP tools/call
|
||||
// send_message_to_user (toolSendMessageToUser), any future channel — MUST
|
||||
// route through this writer rather than re-implement the broadcast +
|
||||
// persist contract inline.
|
||||
//
|
||||
// Why: pre-consolidation, two handlers duplicated the same "broadcast then
|
||||
// INSERT activity_logs" sequence. The reno-stars production data-loss
|
||||
// incident (2026-05-05, RFC #2945, PR #2944) was the symptom — the
|
||||
// persistence half landed for /notify but lagged for the MCP bridge by
|
||||
// months, silently dropping every long-form external-agent message until
|
||||
// reload. The AST gate from #2944 catches drift; this writer eliminates
|
||||
// the *possibility* of drift by giving both call sites a single
|
||||
// well-tested function to call.
|
||||
//
|
||||
// Contract:
|
||||
// 1. Look up the workspace by id; ErrWorkspaceNotFound on miss so the
|
||||
// caller can return 404 with a clean message.
|
||||
// 2. Broadcast a WS AGENT_MESSAGE event with {message, workspace_id,
|
||||
// name, attachments?}.
|
||||
// 3. INSERT a row into activity_logs:
|
||||
// type='a2a_receive', method='notify', source_id NULL,
|
||||
// response_body={"result": message[, "parts": [file kind...]]},
|
||||
// status='ok'
|
||||
// Best-effort — INSERT failure logs only, returns nil so the broadcast
|
||||
// success isn't undone on the caller side.
|
||||
// 4. Returns nil on success.
|
||||
//
|
||||
// The shape (especially the JSON response_body) is the wire contract the
|
||||
// canvas's chat-history hydrator (canvas/src/.../historyHydration.ts)
|
||||
// reads. Drift here silently breaks chat replay across all consumers, so
|
||||
// changes to the JSON shape MUST be cross-verified against the hydrator
|
||||
// in the same PR.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
)
|
||||
|
||||
// ErrWorkspaceNotFound is returned by AgentMessageWriter.Send when the
|
||||
// workspace lookup turns up nothing (or the workspace is in
|
||||
// status='removed'). Callers translate to HTTP 404 / JSON-RPC error /
|
||||
// whatever surface they expose. Real DB errors (connection drop, query
|
||||
// timeout) surface as wrapped errors and should be treated as 503.
|
||||
var ErrWorkspaceNotFound = errors.New("agent_message: workspace not found")
|
||||
|
||||
// truncatePreviewRunes returns at most maxRunes runes of s, plus an ellipsis
|
||||
// when truncated. Operates on the rune (codepoint) boundary instead of
|
||||
// byte indices — the previous byte-slice version produced invalid UTF-8
|
||||
// when maxRunes landed mid-codepoint (CJK, emoji, accented characters
|
||||
// in agent-authored chat messages), and Postgres JSONB rejects invalid
|
||||
// UTF-8, dropping the activity_log INSERT silently. The persistence
|
||||
// failure log fires but the message vanishes from chat history — the
|
||||
// exact regression class the SSOT consolidation was built to prevent.
|
||||
//
|
||||
// maxRunes is in runes, not bytes — `truncatePreviewRunes("你好", 1)` returns
|
||||
// `"你…"`, not `"\xe4…"`. Set the cap on a UI-friendly basis (visible
|
||||
// character count, not stored byte count); 80 runes covers the
|
||||
// activity_logs.summary column comfortably.
|
||||
func truncatePreviewRunes(s string, maxRunes int) string {
|
||||
if utf8.RuneCountInString(s) <= maxRunes {
|
||||
return s
|
||||
}
|
||||
// Walk runes until we've consumed maxRunes; cut at that byte index.
|
||||
count := 0
|
||||
cut := len(s)
|
||||
for i := range s {
|
||||
if count == maxRunes {
|
||||
cut = i
|
||||
break
|
||||
}
|
||||
count++
|
||||
}
|
||||
return s[:cut] + "…"
|
||||
}
|
||||
|
||||
// AgentMessageAttachment is one file attached to an agent → user
|
||||
// message. Identical to handlers.NotifyAttachment in field set; kept
|
||||
// distinct so the writer's API doesn't import a handler type with HTTP
|
||||
// binding tags.
|
||||
type AgentMessageAttachment struct {
|
||||
URI string
|
||||
Name string
|
||||
MimeType string
|
||||
Size int64
|
||||
}
|
||||
|
||||
// AgentMessageWriter persists + broadcasts agent → user messages. Construct
|
||||
// once per process via NewAgentMessageWriter; pass the same instance to
|
||||
// every handler that delivers chat (Notify, toolSendMessageToUser, etc.).
|
||||
//
|
||||
// Takes events.EventEmitter (not the *Broadcaster concrete type) so tests
|
||||
// can substitute a fake emitter and producers in other packages can wrap
|
||||
// the real broadcaster behind their own metrics / retries without leaking
|
||||
// the concrete dependency.
|
||||
type AgentMessageWriter struct {
|
||||
db *sql.DB
|
||||
broadcaster events.EventEmitter
|
||||
}
|
||||
|
||||
// NewAgentMessageWriter binds the writer to the platform's DB pool +
|
||||
// WebSocket broadcaster.
|
||||
func NewAgentMessageWriter(db *sql.DB, broadcaster events.EventEmitter) *AgentMessageWriter {
|
||||
return &AgentMessageWriter{db: db, broadcaster: broadcaster}
|
||||
}
|
||||
|
||||
// Send delivers a single agent → user message. Look up + broadcast +
|
||||
// persist in that order; ErrWorkspaceNotFound short-circuits before any
|
||||
// broadcast or DB write so callers can 404 cleanly.
|
||||
//
|
||||
// Returns nil on success — including on DB-INSERT failure (the broadcast
|
||||
// already returned successfully and the user has seen the message; the
|
||||
// persistence-failure mode is logged at WARN but the caller's response
|
||||
// stays 200 so the agent doesn't retry and double-broadcast).
|
||||
func (w *AgentMessageWriter) Send(
|
||||
ctx context.Context,
|
||||
workspaceID, message string,
|
||||
attachments []AgentMessageAttachment,
|
||||
) error {
|
||||
// 1. Workspace lookup. status='removed' filter is the same shape /notify
|
||||
// used pre-consolidation; deleted workspaces don't get notifications.
|
||||
//
|
||||
// Distinguish sql.ErrNoRows ("workspace genuinely not present" — caller
|
||||
// should 404) from real DB errors (connection drop, statement timeout,
|
||||
// pool exhaustion — caller should 503). Pre-fix this branch returned
|
||||
// ErrWorkspaceNotFound for any error, so during a DB outage every
|
||||
// notify call surfaced as "workspace not found" and masked real
|
||||
// incidents in the alert path.
|
||||
var wsName string
|
||||
err := w.db.QueryRowContext(ctx,
|
||||
`SELECT name FROM workspaces WHERE id = $1 AND status != 'removed'`,
|
||||
workspaceID,
|
||||
).Scan(&wsName)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return ErrWorkspaceNotFound
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("agent_message: workspace lookup: %w", err)
|
||||
}
|
||||
|
||||
// 2. Build broadcast payload + WS-emit. Same shape that ChatTab's
|
||||
// AGENT_MESSAGE handler in canvas/src/store/canvas-events.ts has
|
||||
// consumed since the canvas chat shipped — drift here would orphan
|
||||
// every live chat panel.
|
||||
broadcastPayload := map[string]interface{}{
|
||||
"message": message,
|
||||
"workspace_id": workspaceID,
|
||||
"name": wsName,
|
||||
}
|
||||
if len(attachments) > 0 {
|
||||
broadcastPayload["attachments"] = attachments
|
||||
}
|
||||
w.broadcaster.BroadcastOnly(workspaceID, string(events.EventAgentMessage), broadcastPayload)
|
||||
|
||||
// 3. Persist for chat-history hydration. response_body shape MUST stay
|
||||
// in sync with extractResponseText + extractFilesFromTask in
|
||||
// canvas/src/components/tabs/chat/historyHydration.ts:
|
||||
// - extractResponseText reads body.result (string) → renders text
|
||||
// - extractFilesFromTask reads body.parts[] (kind=file) → renders chips
|
||||
respPayload := map[string]interface{}{"result": message}
|
||||
if len(attachments) > 0 {
|
||||
fileParts := make([]map[string]interface{}, 0, len(attachments))
|
||||
for _, a := range attachments {
|
||||
fileMeta := map[string]interface{}{"uri": a.URI, "name": a.Name}
|
||||
if a.MimeType != "" {
|
||||
fileMeta["mimeType"] = a.MimeType
|
||||
}
|
||||
if a.Size > 0 {
|
||||
fileMeta["size"] = a.Size
|
||||
}
|
||||
fileParts = append(fileParts, map[string]interface{}{
|
||||
"kind": "file",
|
||||
"file": fileMeta,
|
||||
})
|
||||
}
|
||||
respPayload["parts"] = fileParts
|
||||
}
|
||||
respJSON, _ := json.Marshal(respPayload)
|
||||
preview := truncatePreviewRunes(message, 80)
|
||||
if _, err := w.db.ExecContext(ctx, `
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, method, summary, response_body, status)
|
||||
VALUES ($1, 'a2a_receive', 'notify', $2, $3::jsonb, 'ok')
|
||||
`, workspaceID, "Agent message: "+preview, string(respJSON)); err != nil {
|
||||
// Best-effort: the broadcast already returned ok and the user
|
||||
// has seen the message. Logging a structured line lets operators
|
||||
// notice persistence-failure rates spike if the DB is unhealthy,
|
||||
// without breaking the tool response or causing the agent to
|
||||
// retry-and-double-broadcast.
|
||||
log.Printf("agent_message: failed to persist for %s: %v", workspaceID, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,448 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql/driver"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
)
|
||||
|
||||
// AgentMessageWriter is the SSOT for agent → user chat delivery
|
||||
// (RFC #2945 PR-A). These tests pin the contract the writer
|
||||
// guarantees: workspace lookup, broadcast, INSERT, error semantics —
|
||||
// every shape that producers (Notify, toolSendMessageToUser, future
|
||||
// channels) rely on.
|
||||
//
|
||||
// Pre-consolidation, the broadcast-then-INSERT logic was duplicated
|
||||
// across two handlers and they drifted (reno-stars, 2026-05-05). With
|
||||
// the writer being the only place this logic lives, these tests are
|
||||
// the regression line for every chat-bearing path simultaneously.
|
||||
|
||||
// jsonMatcher is a sqlmock Argument matcher that decodes the actual
|
||||
// SQL arg as JSON and runs a caller-supplied predicate over the
|
||||
// resulting structure. Tighter than substring matching (which can
|
||||
// false-pass on a renamed key) and tolerant of map-key ordering
|
||||
// (which exact-string matching is not).
|
||||
type jsonMatcher struct {
|
||||
predicate func(parsed map[string]any) bool
|
||||
desc string
|
||||
}
|
||||
|
||||
func (m jsonMatcher) Match(v driver.Value) bool {
|
||||
s, ok := v.(string)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal([]byte(s), &parsed); err != nil {
|
||||
return false
|
||||
}
|
||||
return m.predicate(parsed)
|
||||
}
|
||||
|
||||
// stringMatcher pins exact prefix/suffix/equality checks against a
|
||||
// driver.Value that's actually a string.
|
||||
type stringMatcher func(string) bool
|
||||
|
||||
func (f stringMatcher) Match(v driver.Value) bool {
|
||||
s, ok := v.(string)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return f(s)
|
||||
}
|
||||
|
||||
// capturingEmitter records every BroadcastOnly call so tests can pin
|
||||
// the WS event shape without a real ws.Hub. RecordAndBroadcast is
|
||||
// also captured for completeness — the writer doesn't call it today,
|
||||
// but a future producer might, and a captured-but-unasserted record
|
||||
// is easier to diagnose than a nil panic.
|
||||
type capturingEmitter struct {
|
||||
events []capturedEvent
|
||||
}
|
||||
|
||||
type capturedEvent struct {
|
||||
workspaceID string
|
||||
eventType string
|
||||
payload interface{}
|
||||
}
|
||||
|
||||
func (c *capturingEmitter) BroadcastOnly(workspaceID string, eventType string, payload interface{}) {
|
||||
c.events = append(c.events, capturedEvent{workspaceID, eventType, payload})
|
||||
}
|
||||
|
||||
func (c *capturingEmitter) RecordAndBroadcast(_ context.Context, eventType string, workspaceID string, payload interface{}) error {
|
||||
c.events = append(c.events, capturedEvent{workspaceID, eventType, payload})
|
||||
return nil
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_Success_NoAttachments pins the happy
|
||||
// path: workspace lookup, broadcast, INSERT, return nil.
|
||||
func TestAgentMessageWriter_Send_Success_NoAttachments(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
w := NewAgentMessageWriter(db.DB, newTestBroadcaster())
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-1").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("CEO Ryan PC"))
|
||||
|
||||
mock.ExpectExec(`INSERT INTO activity_logs.*'a2a_receive'.*'notify'`).
|
||||
WithArgs(
|
||||
"ws-1",
|
||||
sqlmock.AnyArg(), // summary
|
||||
`{"result":"hi"}`,
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
if err := w.Send(context.Background(), "ws-1", "hi", nil); err != nil {
|
||||
t.Fatalf("Send returned %v, want nil", err)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("DB expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_Success_WithAttachments pins the file
|
||||
// attachment shape — response_body MUST contain a parts[] array with
|
||||
// kind=file entries so the canvas hydrater renders download chips.
|
||||
// Drift here = chips disappear on chat reload.
|
||||
func TestAgentMessageWriter_Send_Success_WithAttachments(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
w := NewAgentMessageWriter(db.DB, newTestBroadcaster())
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-att").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Ryan"))
|
||||
|
||||
mock.ExpectExec(`INSERT INTO activity_logs.*'a2a_receive'.*'notify'`).
|
||||
WithArgs(
|
||||
"ws-att",
|
||||
sqlmock.AnyArg(),
|
||||
jsonMatcher{
|
||||
desc: "response_body has result + parts with kind=file metadata",
|
||||
predicate: func(p map[string]any) bool {
|
||||
if p["result"] != "see attached" {
|
||||
return false
|
||||
}
|
||||
parts, ok := p["parts"].([]any)
|
||||
if !ok || len(parts) != 1 {
|
||||
return false
|
||||
}
|
||||
part, ok := parts[0].(map[string]any)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
if part["kind"] != "file" {
|
||||
return false
|
||||
}
|
||||
file, ok := part["file"].(map[string]any)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return file["uri"] == "workspace://x.zip" &&
|
||||
file["name"] == "x.zip" &&
|
||||
file["mimeType"] == "application/zip" &&
|
||||
file["size"].(float64) == 1234
|
||||
},
|
||||
},
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
atts := []AgentMessageAttachment{
|
||||
{URI: "workspace://x.zip", Name: "x.zip", MimeType: "application/zip", Size: 1234},
|
||||
}
|
||||
if err := w.Send(context.Background(), "ws-att", "see attached", atts); err != nil {
|
||||
t.Fatalf("Send returned %v, want nil", err)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("DB expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_WorkspaceNotFound pins ErrWorkspaceNotFound
|
||||
// short-circuit. Must NOT broadcast, MUST NOT INSERT — caller will 404
|
||||
// or surface a JSON-RPC error.
|
||||
func TestAgentMessageWriter_Send_WorkspaceNotFound(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
emitter := &capturingEmitter{}
|
||||
w := NewAgentMessageWriter(db.DB, emitter)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-missing").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}))
|
||||
|
||||
err := w.Send(context.Background(), "ws-missing", "lost in the void", nil)
|
||||
if !errors.Is(err, ErrWorkspaceNotFound) {
|
||||
t.Errorf("Send returned %v, want ErrWorkspaceNotFound", err)
|
||||
}
|
||||
if len(emitter.events) != 0 {
|
||||
t.Errorf("workspace-not-found path MUST NOT broadcast, got %d events", len(emitter.events))
|
||||
}
|
||||
// Implicit: no INSERT expectation registered, so a stray INSERT
|
||||
// would fail ExpectationsWereMet.
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("DB expectations (INSERT must NOT fire on workspace-not-found): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_DBInsertFailureStillReturnsNil pins the
|
||||
// "best-effort persistence" contract: when the activity_log INSERT
|
||||
// fails (DB hiccup, transient connection, constraint), the writer
|
||||
// MUST still return nil. The broadcast already succeeded; the user
|
||||
// has seen the message; returning an error here would cause the
|
||||
// caller (and the agent calling the tool) to retry and double-
|
||||
// broadcast.
|
||||
func TestAgentMessageWriter_Send_DBInsertFailureStillReturnsNil(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
w := NewAgentMessageWriter(db.DB, newTestBroadcaster())
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-dbfail").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("CEO Ryan PC"))
|
||||
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WillReturnError(errors.New("transient db error"))
|
||||
|
||||
err := w.Send(context.Background(), "ws-dbfail", "should not be lost from live chat", nil)
|
||||
if err != nil {
|
||||
t.Errorf("DB INSERT failure must return nil (broadcast already succeeded), got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_PreviewTruncation pins the summary
|
||||
// preview cap. Long messages (Ryan's onboarding-friction report was
|
||||
// ~2k chars) must summarise to ≤80 chars + ellipsis so the activity
|
||||
// table doesn't carry multi-KB summaries that bloat list queries.
|
||||
func TestAgentMessageWriter_Send_PreviewTruncation(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
w := NewAgentMessageWriter(db.DB, newTestBroadcaster())
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-trunc").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Ryan"))
|
||||
|
||||
longMsg := strings.Repeat("x", 200)
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WithArgs(
|
||||
"ws-trunc",
|
||||
stringMatcher(func(s string) bool {
|
||||
if !strings.HasPrefix(s, "Agent message: ") {
|
||||
return false
|
||||
}
|
||||
preview := strings.TrimPrefix(s, "Agent message: ")
|
||||
if !strings.HasSuffix(preview, "…") {
|
||||
return false
|
||||
}
|
||||
body := strings.TrimSuffix(preview, "…")
|
||||
return len(body) == 80
|
||||
}),
|
||||
sqlmock.AnyArg(),
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
if err := w.Send(context.Background(), "ws-trunc", longMsg, nil); err != nil {
|
||||
t.Fatalf("Send: %v", err)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("preview truncation drift: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_BroadcastsAgentMessageEvent pins the
|
||||
// WS event name + payload shape. The canvas's
|
||||
// canvas-events.ts:AGENT_MESSAGE handler reads {message, workspace_id,
|
||||
// name, attachments?} — drift here orphans every live chat panel.
|
||||
func TestAgentMessageWriter_Send_BroadcastsAgentMessageEvent(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
emitter := &capturingEmitter{}
|
||||
w := NewAgentMessageWriter(db.DB, emitter)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-bc").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Workspace Name"))
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
atts := []AgentMessageAttachment{
|
||||
{URI: "workspace://a.txt", Name: "a.txt"},
|
||||
}
|
||||
if err := w.Send(context.Background(), "ws-bc", "hi", atts); err != nil {
|
||||
t.Fatalf("Send: %v", err)
|
||||
}
|
||||
|
||||
if len(emitter.events) != 1 {
|
||||
t.Fatalf("expected exactly 1 broadcast, got %d", len(emitter.events))
|
||||
}
|
||||
ev := emitter.events[0]
|
||||
if ev.eventType != "AGENT_MESSAGE" {
|
||||
t.Errorf("event type = %q, want AGENT_MESSAGE", ev.eventType)
|
||||
}
|
||||
if ev.workspaceID != "ws-bc" {
|
||||
t.Errorf("workspace_id = %q, want ws-bc", ev.workspaceID)
|
||||
}
|
||||
pl, ok := ev.payload.(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("payload not a map: %T", ev.payload)
|
||||
}
|
||||
if pl["message"] != "hi" {
|
||||
t.Errorf("payload.message = %v, want hi", pl["message"])
|
||||
}
|
||||
if pl["workspace_id"] != "ws-bc" {
|
||||
t.Errorf("payload.workspace_id = %v, want ws-bc", pl["workspace_id"])
|
||||
}
|
||||
if pl["name"] != "Workspace Name" {
|
||||
t.Errorf("payload.name = %v, want Workspace Name", pl["name"])
|
||||
}
|
||||
if pl["attachments"] == nil {
|
||||
t.Error("payload.attachments missing on attachment-bearing send")
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_DBErrorOnLookupReturnsWrapped pins the
|
||||
// distinction between sql.ErrNoRows (legit not-found → 404) and real
|
||||
// DB errors (connection drop → 503). Pre-followup the lookup branch
|
||||
// returned ErrWorkspaceNotFound for ANY error, so during a DB outage
|
||||
// every notify call surfaced as "workspace not found" and masked
|
||||
// real incidents in alerting.
|
||||
func TestAgentMessageWriter_Send_DBErrorOnLookupReturnsWrapped(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
w := NewAgentMessageWriter(db.DB, newTestBroadcaster())
|
||||
|
||||
transientErr := errors.New("connection refused")
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-dbdown").
|
||||
WillReturnError(transientErr)
|
||||
|
||||
err := w.Send(context.Background(), "ws-dbdown", "hi", nil)
|
||||
if err == nil {
|
||||
t.Fatal("expected wrapped DB error, got nil")
|
||||
}
|
||||
if errors.Is(err, ErrWorkspaceNotFound) {
|
||||
t.Errorf("DB outage MUST NOT surface as ErrWorkspaceNotFound (masks incidents in alerting); got %v", err)
|
||||
}
|
||||
if !errors.Is(err, transientErr) {
|
||||
t.Errorf("expected wrapped %v, got %v", transientErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTruncatePreviewRunes_RuneBoundary pins the multi-byte-safe
|
||||
// truncation. The previous byte-slice version produced invalid UTF-8
|
||||
// when the cut landed mid-codepoint (CJK, emoji, accented), and
|
||||
// Postgres JSONB rejects invalid UTF-8 — INSERT fails, log.Printf
|
||||
// fires, message vanishes from chat history. Per memory
|
||||
// feedback_assert_exact_not_substring.md, pin the boundary cases
|
||||
// directly.
|
||||
func TestTruncatePreviewRunes_RuneBoundary(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
in string
|
||||
max int
|
||||
want string
|
||||
}{
|
||||
{"under-max ASCII", "hi", 80, "hi"},
|
||||
{"under-max CJK", "你好", 80, "你好"},
|
||||
{"exactly-at-max", "abcde", 5, "abcde"},
|
||||
{"truncate ASCII", "abcdefghij", 5, "abcde…"},
|
||||
{"truncate CJK at rune boundary", "你好世界你好世界", 4, "你好世界…"},
|
||||
{"truncate emoji at rune boundary", "😀😀😀😀😀😀", 3, "😀😀😀…"},
|
||||
// The pre-fix bug shape: byte-slice on non-ASCII would have
|
||||
// mangled the codepoint here. With rune-boundary truncation
|
||||
// the result is well-formed UTF-8.
|
||||
{"non-zero with emoji prefix", "🚀abcdefghijk", 5, "🚀abcd…"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
got := truncatePreviewRunes(c.in, c.max)
|
||||
if got != c.want {
|
||||
t.Errorf("truncatePreviewRunes(%q, %d) = %q, want %q", c.in, c.max, got, c.want)
|
||||
}
|
||||
// Always-valid UTF-8 invariant. A byte-slice truncation
|
||||
// could leave partial codepoints; this version must not.
|
||||
if !utf8.ValidString(got) {
|
||||
t.Errorf("truncatePreviewRunes(%q, %d) returned invalid UTF-8: %q", c.in, c.max, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_NonASCIIMessagePersists pins the end-to-end
|
||||
// path for non-ASCII messages — the original reno-stars regression
|
||||
// surfaced via byte-slice truncation breaking JSONB INSERT. Every
|
||||
// handler-level test had ASCII content, so this branch had no
|
||||
// coverage. Now it does.
|
||||
func TestAgentMessageWriter_Send_NonASCIIMessagePersists(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
w := NewAgentMessageWriter(db.DB, newTestBroadcaster())
|
||||
|
||||
// 200-rune CJK message — exceeds the 80-rune cap, would have hit
|
||||
// the byte-slice bug.
|
||||
msg := strings.Repeat("你", 200)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-cjk").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("CEO Ryan PC"))
|
||||
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WithArgs(
|
||||
"ws-cjk",
|
||||
stringMatcher(func(s string) bool {
|
||||
if !strings.HasPrefix(s, "Agent message: ") {
|
||||
return false
|
||||
}
|
||||
preview := strings.TrimPrefix(s, "Agent message: ")
|
||||
if !strings.HasSuffix(preview, "…") {
|
||||
return false
|
||||
}
|
||||
body := strings.TrimSuffix(preview, "…")
|
||||
// 80 runes of 你 = 80 codepoints. Each is 3 bytes UTF-8.
|
||||
if utf8.RuneCountInString(body) != 80 {
|
||||
return false
|
||||
}
|
||||
// MUST be valid UTF-8 — pre-fix byte-slice would have
|
||||
// returned half a codepoint here.
|
||||
return utf8.ValidString(body)
|
||||
}),
|
||||
sqlmock.AnyArg(),
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
if err := w.Send(context.Background(), "ws-cjk", msg, nil); err != nil {
|
||||
t.Fatalf("Send: %v", err)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("non-ASCII path drift: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentMessageWriter_Send_OmitsAttachmentsKeyWhenEmpty pins the
|
||||
// "no key when nil" wire contract — extra empty fields would force
|
||||
// canvas consumers to defensively check for [] vs undefined; the
|
||||
// existing AGENT_MESSAGE handler treats absence as "no attachments".
|
||||
func TestAgentMessageWriter_Send_OmitsAttachmentsKeyWhenEmpty(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
emitter := &capturingEmitter{}
|
||||
w := NewAgentMessageWriter(db.DB, emitter)
|
||||
|
||||
mock.ExpectQuery("SELECT name FROM workspaces").
|
||||
WithArgs("ws-noatt").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("X"))
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
if err := w.Send(context.Background(), "ws-noatt", "plain text", nil); err != nil {
|
||||
t.Fatalf("Send: %v", err)
|
||||
}
|
||||
if len(emitter.events) != 1 {
|
||||
t.Fatalf("expected 1 event, got %d", len(emitter.events))
|
||||
}
|
||||
pl := emitter.events[0].payload.(map[string]interface{})
|
||||
if _, present := pl["attachments"]; present {
|
||||
t.Errorf("attachments key MUST NOT be present when empty (canvas treats absence as 'none'); payload=%v", pl)
|
||||
}
|
||||
}
|
||||
@@ -31,23 +31,37 @@ package handlers
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
|
||||
)
|
||||
|
||||
// ChatFilesHandler serves file upload + download for chat. Holds a
|
||||
// reference to TemplatesHandler so the (still docker-exec) Download
|
||||
// path keeps using the shared findContainer/CopyFromContainer helpers
|
||||
// without duplicating them. Upload no longer reaches into Docker.
|
||||
//
|
||||
// pendingUploads + broadcaster are wired only when the platform's
|
||||
// migration 20260505100000 has run; nil values fall back to the
|
||||
// pre-poll-mode behavior (422 on poll-mode upload, same as before).
|
||||
// This lets the binary keep booting in environments where the
|
||||
// migration hasn't run yet — the poll branch is gated by a not-nil
|
||||
// check at the call site.
|
||||
type ChatFilesHandler struct {
|
||||
templates *TemplatesHandler
|
||||
|
||||
@@ -56,6 +70,19 @@ type ChatFilesHandler struct {
|
||||
// the 50 MB worst case on a slow EC2 link without leaving a
|
||||
// connection hanging forever on a sick workspace.
|
||||
httpClient *http.Client
|
||||
|
||||
// pendingUploads is the platform-side staging layer for poll-mode
|
||||
// uploads. nil → poll branch returns 422 unchanged (the pre-feature
|
||||
// behavior); non-nil → poll branch parses multipart, persists each
|
||||
// file via storage.Put, logs a chat_upload_receive activity row,
|
||||
// and returns 200 with synthetic platform-pending: URIs.
|
||||
pendingUploads pendinguploads.Storage
|
||||
|
||||
// broadcaster is the events.EventEmitter used to notify the canvas
|
||||
// when an activity row lands (so the Agent Comms panel updates
|
||||
// live). Same emitter the rest of the platform uses; nil = no
|
||||
// broadcast (tests).
|
||||
broadcaster events.EventEmitter
|
||||
}
|
||||
|
||||
func NewChatFilesHandler(t *TemplatesHandler) *ChatFilesHandler {
|
||||
@@ -69,6 +96,16 @@ func NewChatFilesHandler(t *TemplatesHandler) *ChatFilesHandler {
|
||||
}
|
||||
}
|
||||
|
||||
// WithPendingUploads enables the poll-mode upload branch by wiring a
|
||||
// Storage + broadcaster. Call site (router.go) does this at
|
||||
// construction; tests set the fields directly when they want the
|
||||
// poll path exercised. Returns the handler for chained construction.
|
||||
func (h *ChatFilesHandler) WithPendingUploads(storage pendinguploads.Storage, broadcaster events.EventEmitter) *ChatFilesHandler {
|
||||
h.pendingUploads = storage
|
||||
h.broadcaster = broadcaster
|
||||
return h
|
||||
}
|
||||
|
||||
// chatUploadMaxBytes caps the full multipart request body so a
|
||||
// malicious / runaway client can't OOM the proxy hop. 50 MB matches
|
||||
// the workspace-side limit; anything larger is rejected at the
|
||||
@@ -262,6 +299,24 @@ func (h *ChatFilesHandler) Upload(c *gin.Context) {
|
||||
|
||||
ctx := c.Request.Context()
|
||||
|
||||
// Branch on delivery_mode BEFORE attempting the HTTP forward.
|
||||
// Push-mode workspaces continue to do the streaming forward
|
||||
// unchanged. Poll-mode workspaces (typically external runtimes
|
||||
// on a laptop, no public callback URL) get the platform-side
|
||||
// staging path — the file lands in pending_uploads, an activity
|
||||
// row goes into the inbox queue, and the workspace pulls on its
|
||||
// next poll cycle.
|
||||
if h.pendingUploads != nil {
|
||||
mode, modeOK := lookupUploadDeliveryMode(c, ctx, workspaceID)
|
||||
if !modeOK {
|
||||
return
|
||||
}
|
||||
if mode == "poll" {
|
||||
h.uploadPollMode(c, ctx, workspaceID)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
wsURL, secret, ok := resolveWorkspaceForwardCreds(c, ctx, workspaceID, "upload")
|
||||
if !ok {
|
||||
return
|
||||
@@ -405,3 +460,317 @@ func (h *ChatFilesHandler) streamWorkspaceResponse(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// lookupUploadDeliveryMode returns the workspace's delivery_mode
|
||||
// for the chat upload branch. Returns ("", false) and writes the
|
||||
// HTTP error response on lookup failure (caller stops). NULL or
|
||||
// empty delivery_mode is treated as "push" — that's the schema
|
||||
// default and matches the legacy pre-#2339 behavior. Only the
|
||||
// explicit string "poll" routes the upload through the poll-mode
|
||||
// branch.
|
||||
//
|
||||
// Why a dedicated helper instead of reusing lookupDeliveryMode
|
||||
// from a2a_proxy_helpers.go: that one swallows errors and falls
|
||||
// back to "push" so the proxy keeps working on a transient DB
|
||||
// hiccup. For upload we want to surface the not-found case as 404
|
||||
// (which the workspace-poll branch wouldn't otherwise hit, since
|
||||
// the workspace-side row IS the source of truth for the mode).
|
||||
func lookupUploadDeliveryMode(c *gin.Context, ctx context.Context, workspaceID string) (string, bool) {
|
||||
var mode sql.NullString
|
||||
err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT delivery_mode FROM workspaces WHERE id = $1`, workspaceID,
|
||||
).Scan(&mode)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return "", false
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("chat_files Upload: delivery_mode lookup failed for %s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "delivery_mode lookup failed"})
|
||||
return "", false
|
||||
}
|
||||
if !mode.Valid || mode.String == "" {
|
||||
return "push", true
|
||||
}
|
||||
return mode.String, true
|
||||
}
|
||||
|
||||
// unsafeFilenameChars matches every character that isn't in the safe
|
||||
// alphanumeric + dot/dash/underscore set. Mirrors the Python regex
|
||||
// _UNSAFE_FILENAME_CHARS in workspace/internal_chat_uploads.py — drift
|
||||
// here would mean canvas-emitted URIs differ between push and poll
|
||||
// paths for the same upload.
|
||||
var unsafeFilenameChars = regexp.MustCompile(`[^a-zA-Z0-9._\-]`)
|
||||
|
||||
// SanitizeFilename reduces a user-supplied filename to a safe form.
|
||||
// Behaviorally identical to sanitize_filename in workspace/
|
||||
// internal_chat_uploads.py. Exported so tests in other packages can
|
||||
// pin behavior parity, and so a future shared library can move both
|
||||
// implementations behind one source of truth.
|
||||
func SanitizeFilename(name string) string {
|
||||
base := filepath.Base(name)
|
||||
// filepath.Base on a path-traversal input ("../../etc/passwd")
|
||||
// returns "passwd" (just the last component) — which matches what
|
||||
// Python's os.path.basename does. Tests pin both here and on the
|
||||
// Python side.
|
||||
base = strings.ReplaceAll(base, " ", "_")
|
||||
base = unsafeFilenameChars.ReplaceAllString(base, "_")
|
||||
if len(base) > 100 {
|
||||
ext := ""
|
||||
dot := strings.LastIndex(base, ".")
|
||||
if dot >= 0 && len(base)-dot <= 16 {
|
||||
ext = base[dot:]
|
||||
}
|
||||
base = base[:100-len(ext)] + ext
|
||||
}
|
||||
if base == "" || base == "." || base == ".." {
|
||||
return "file"
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
||||
// uploadedFile is the per-file response shape the workspace-side
|
||||
// /internal/chat/uploads/ingest also produces. Mirroring the schema
|
||||
// keeps the canvas client unaware of which path handled the upload.
|
||||
type uploadedFile struct {
|
||||
URI string `json:"uri"`
|
||||
Name string `json:"name"`
|
||||
Mimetype string `json:"mimeType"`
|
||||
Size int64 `json:"size"`
|
||||
}
|
||||
|
||||
// uploadPollMode handles a chat upload bound for a poll-mode
|
||||
// workspace. Parses the multipart in-place, persists each file via
|
||||
// pendinguploads.Storage, and logs one chat_upload_receive activity
|
||||
// row per file so the workspace's inbox poller picks them up on its
|
||||
// next cycle.
|
||||
//
|
||||
// Why one activity row per file (not one per multipart batch):
|
||||
// - Each row carries one URI; agents that consume the inbox treat
|
||||
// each row as one inbound event. A batch row would force every
|
||||
// consumer to deserialize a list, doubling the field-shape
|
||||
// surface for no UX win.
|
||||
// - At-least-once semantics: a workspace can ack files
|
||||
// individually. Batch ack would leak partial-success state on
|
||||
// a fetcher crash mid-batch.
|
||||
//
|
||||
// Limits enforced here mirror the workspace-side ingest_handler:
|
||||
// - Total body cap: 50 MB (set on c.Request.Body before reaching us)
|
||||
// - Per-file cap: 25 MB (pendinguploads.MaxFileBytes; rejected as 413)
|
||||
// - Filename: sanitized + capped at 100 chars (SanitizeFilename)
|
||||
//
|
||||
// Logging: every persisted file logs an INFO line with workspace_id,
|
||||
// file_id, size, and sanitized name. Failure modes (oversize, missing
|
||||
// files field, malformed multipart) log at WARN with the same fields.
|
||||
// Phase 3 metrics will hook these structured logs.
|
||||
func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, workspaceID string) {
|
||||
// Parse multipart with the same per-file/per-form limits the
|
||||
// workspace-side handler uses (workspace/internal_chat_uploads.py:
|
||||
// max_files=64, max_fields=32). gin's MultipartForm does not
|
||||
// expose those limits directly — the underlying ParseMultipartForm
|
||||
// caps memory at 32 MB by default and spills to disk. For poll-
|
||||
// mode we read each file into memory to hand to Storage.Put;
|
||||
// 25 MB-per-file × 64-files ceiling means worst-case is 1.6 GB of
|
||||
// peak memory. Bound the per-file size at the multipart layer so
|
||||
// the spill never gets close.
|
||||
if err := c.Request.ParseMultipartForm(32 << 20); err != nil {
|
||||
log.Printf("chat_files uploadPollMode: parse multipart failed for %s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "malformed multipart body"})
|
||||
return
|
||||
}
|
||||
form := c.Request.MultipartForm
|
||||
if form == nil || len(form.File["files"]) == 0 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "no files field in request"})
|
||||
return
|
||||
}
|
||||
headers := form.File["files"]
|
||||
if len(headers) > 64 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "too many files (limit 64)"})
|
||||
return
|
||||
}
|
||||
|
||||
wsUUID, err := uuid.Parse(workspaceID)
|
||||
if err != nil {
|
||||
// validateWorkspaceID at the top of Upload already gates this;
|
||||
// the re-parse is defence in depth in case validateWorkspaceID
|
||||
// drifts. Keep the error class consistent so a bad-id reaches
|
||||
// the same 400 path. Not separately tested because the gate at
|
||||
// the call site is structurally the same uuid.Parse.
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
|
||||
return
|
||||
}
|
||||
|
||||
// Phase 1: pre-validate + read every part BEFORE any DB write.
|
||||
// A multi-file upload must commit all-or-nothing; a per-file
|
||||
// failure halfway through used to leave rows 1..K-1 in the table
|
||||
// while the client got a 500 and retried the whole batch — duplicate
|
||||
// rows, orphan activity rows. Validating up-front + atomic PutBatch
|
||||
// closes that gap.
|
||||
type prepped struct {
|
||||
Sanitized string
|
||||
Mimetype string
|
||||
Content []byte
|
||||
Original string // original (unsanitized) filename for error messages
|
||||
}
|
||||
prepReady := make([]prepped, 0, len(headers))
|
||||
items := make([]pendinguploads.PutItem, 0, len(headers))
|
||||
for _, fh := range headers {
|
||||
if fh.Size > pendinguploads.MaxFileBytes {
|
||||
log.Printf("chat_files uploadPollMode: per-file cap exceeded for %s: %s (%d bytes)",
|
||||
workspaceID, fh.Filename, fh.Size)
|
||||
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
|
||||
"error": "file exceeds per-file cap",
|
||||
"filename": fh.Filename,
|
||||
"size": fh.Size,
|
||||
"max": pendinguploads.MaxFileBytes,
|
||||
})
|
||||
return
|
||||
}
|
||||
content, err := readMultipartFile(fh)
|
||||
if err != nil {
|
||||
log.Printf("chat_files uploadPollMode: read part failed for %s/%s: %v",
|
||||
workspaceID, fh.Filename, err)
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "could not read file part"})
|
||||
return
|
||||
}
|
||||
// Belt-and-braces post-read cap (multipart.FileHeader.Size can lie
|
||||
// on some clients that don't set Content-Length per part).
|
||||
if len(content) > pendinguploads.MaxFileBytes {
|
||||
log.Printf("chat_files uploadPollMode: per-file cap exceeded post-read for %s: %s (%d bytes)",
|
||||
workspaceID, fh.Filename, len(content))
|
||||
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
|
||||
"error": "file exceeds per-file cap",
|
||||
"filename": fh.Filename,
|
||||
"size": len(content),
|
||||
"max": pendinguploads.MaxFileBytes,
|
||||
})
|
||||
return
|
||||
}
|
||||
sanitized := SanitizeFilename(fh.Filename)
|
||||
mimetype := safeMimetype(fh.Header.Get("Content-Type"))
|
||||
prepReady = append(prepReady, prepped{
|
||||
Sanitized: sanitized, Mimetype: mimetype, Content: content, Original: fh.Filename,
|
||||
})
|
||||
items = append(items, pendinguploads.PutItem{
|
||||
Content: content, Filename: sanitized, Mimetype: mimetype,
|
||||
})
|
||||
}
|
||||
|
||||
// Phase 2: atomic batch insert. On failure no rows commit.
|
||||
fileIDs, err := h.pendingUploads.PutBatch(ctx, wsUUID, items)
|
||||
if err != nil {
|
||||
if errors.Is(err, pendinguploads.ErrTooLarge) {
|
||||
// Belt + suspenders: pre-validation above already caught
|
||||
// this; surface a clean 413 if a malformed FileHeader
|
||||
// somehow slipped through.
|
||||
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
|
||||
"error": "one or more files exceed per-file cap",
|
||||
"max": pendinguploads.MaxFileBytes,
|
||||
})
|
||||
return
|
||||
}
|
||||
log.Printf("chat_files uploadPollMode: storage.PutBatch failed for %s: %v",
|
||||
workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "could not stage files"})
|
||||
return
|
||||
}
|
||||
|
||||
// Phase 3: write per-file activity rows and build the response. Activity
|
||||
// rows are written individually (not part of the same Tx as PutBatch)
|
||||
// because LogActivity is shared across many handlers and threading the
|
||||
// Tx through would be a bigger refactor. The trade-off: if an activity
|
||||
// write fails after the PutBatch commits, the pending_uploads rows
|
||||
// orphan until the 24h TTL — significantly better than the previous
|
||||
// "every multi-file upload could orphan" behavior, and the workspace's
|
||||
// fetcher handles soft-404 cleanly when activity rows reference a row
|
||||
// the platform later expired.
|
||||
out := make([]uploadedFile, 0, len(prepReady))
|
||||
for i, p := range prepReady {
|
||||
fileID := fileIDs[i]
|
||||
uri := fmt.Sprintf("platform-pending:%s/%s", workspaceID, fileID)
|
||||
summary := "chat_upload_receive: " + p.Sanitized
|
||||
method := "chat_upload_receive"
|
||||
LogActivity(ctx, h.broadcaster, ActivityParams{
|
||||
WorkspaceID: workspaceID,
|
||||
ActivityType: "a2a_receive",
|
||||
TargetID: &workspaceID,
|
||||
Method: &method,
|
||||
Summary: &summary,
|
||||
RequestBody: map[string]interface{}{
|
||||
"file_id": fileID.String(),
|
||||
"name": p.Sanitized,
|
||||
"mimeType": p.Mimetype,
|
||||
"size": len(p.Content),
|
||||
"uri": uri,
|
||||
},
|
||||
Status: "ok",
|
||||
})
|
||||
|
||||
log.Printf("chat_files uploadPollMode: staged %s/%s (file_id=%s size=%d mimetype=%q)",
|
||||
workspaceID, p.Sanitized, fileID, len(p.Content), p.Mimetype)
|
||||
|
||||
out = append(out, uploadedFile{
|
||||
URI: uri,
|
||||
Name: p.Sanitized,
|
||||
Mimetype: p.Mimetype,
|
||||
Size: int64(len(p.Content)),
|
||||
})
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"files": out})
|
||||
}
|
||||
|
||||
// safeMimetype validates a multipart-supplied Content-Type header and
|
||||
// returns a sanitized value safe to store + serve back unmodified.
|
||||
//
|
||||
// The platform's GET /content handler reflects the stored mimetype as
|
||||
// the response Content-Type. An attacker-controlled header that
|
||||
// embedded CR/LF could split the response (header injection); a value
|
||||
// containing semicolons could carry an unexpected charset parameter
|
||||
// that confuses a downstream renderer. Strip CR/LF/control chars +
|
||||
// keep only the type/subtype prefix; reject anything that doesn't
|
||||
// match a basic `type/subtype` regex by falling back to the safe
|
||||
// default (application/octet-stream — the workspace-side handler does
|
||||
// the same fallback).
|
||||
func safeMimetype(raw string) string {
|
||||
const fallback = "application/octet-stream"
|
||||
// Trim parameters (`text/html; charset=utf-8` → `text/html`).
|
||||
if i := strings.IndexByte(raw, ';'); i >= 0 {
|
||||
raw = raw[:i]
|
||||
}
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
return ""
|
||||
}
|
||||
// Reject if any control char or whitespace is present (header
|
||||
// injection defense). RFC 7231 mimetype grammar forbids whitespace.
|
||||
for _, r := range raw {
|
||||
if r < 0x21 || r > 0x7e {
|
||||
return fallback
|
||||
}
|
||||
}
|
||||
// Require exactly one slash separating type and subtype.
|
||||
parts := strings.Split(raw, "/")
|
||||
if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
|
||||
return fallback
|
||||
}
|
||||
return raw
|
||||
}
|
||||
|
||||
// readMultipartFile reads a multipart part fully into memory. Wraps
|
||||
// the open + io.ReadAll + close idiom so the call site stays clean,
|
||||
// and so a future change (chunked reads / hashing) has one place to
|
||||
// land.
|
||||
func readMultipartFile(fh *multipartFileHeader) ([]byte, error) {
|
||||
f, err := fh.Open()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open part: %w", err)
|
||||
}
|
||||
defer f.Close()
|
||||
return io.ReadAll(f)
|
||||
}
|
||||
|
||||
// multipartFileHeader is a local alias so the readMultipartFile
|
||||
// signature doesn't pull "mime/multipart" into every test that
|
||||
// touches uploadPollMode.
|
||||
type multipartFileHeader = multipart.FileHeader
|
||||
|
||||
@@ -0,0 +1,750 @@
|
||||
package handlers
|
||||
|
||||
// chat_files_poll_test.go — Upload poll-mode branch tests.
|
||||
//
|
||||
// Pinned in their own file so the existing chat_files_test.go stays
|
||||
// focused on the push-mode forward proxy. Same setupTestDB / sqlmock
|
||||
// scaffolding as the rest of the package, plus an in-memory
|
||||
// pendinguploads.Storage so we don't have to mock six SQL statements
|
||||
// per assertion.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
|
||||
)
|
||||
|
||||
// inMemStorage is a process-local pendinguploads.Storage for branch
|
||||
// tests. Records every Put for assertion. Failure modes (Put error,
|
||||
// MarkFetched / Ack tested elsewhere) are injected via fields.
|
||||
type inMemStorage struct {
|
||||
mu sync.Mutex
|
||||
rows map[uuid.UUID]pendinguploads.Record
|
||||
puts []putCall
|
||||
putErr error
|
||||
}
|
||||
|
||||
type putCall struct {
|
||||
WorkspaceID uuid.UUID
|
||||
Filename string
|
||||
Mimetype string
|
||||
Size int
|
||||
}
|
||||
|
||||
func newInMemStorage() *inMemStorage {
|
||||
return &inMemStorage{rows: map[uuid.UUID]pendinguploads.Record{}}
|
||||
}
|
||||
|
||||
func (s *inMemStorage) Put(_ context.Context, ws uuid.UUID, content []byte, filename, mimetype string) (uuid.UUID, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.putErr != nil {
|
||||
return uuid.Nil, s.putErr
|
||||
}
|
||||
id := uuid.New()
|
||||
s.rows[id] = pendinguploads.Record{
|
||||
FileID: id, WorkspaceID: ws, Content: content,
|
||||
Filename: filename, Mimetype: mimetype,
|
||||
SizeBytes: int64(len(content)), CreatedAt: time.Now(),
|
||||
ExpiresAt: time.Now().Add(24 * time.Hour),
|
||||
}
|
||||
s.puts = append(s.puts, putCall{
|
||||
WorkspaceID: ws, Filename: filename, Mimetype: mimetype, Size: len(content),
|
||||
})
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// PutBatch mirrors the production atomic-batch contract: any per-item
|
||||
// failure leaves the in-memory state unchanged, simulating Tx rollback.
|
||||
// Pre-validation matches PostgresStorage.PutBatch; oversized items
|
||||
// return ErrTooLarge before any row is added.
|
||||
func (s *inMemStorage) PutBatch(_ context.Context, ws uuid.UUID, items []pendinguploads.PutItem) ([]uuid.UUID, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.putErr != nil {
|
||||
return nil, s.putErr
|
||||
}
|
||||
// Pre-validate so an oversized item rejects the whole batch before
|
||||
// any state mutation — matches the Tx-rollback semantics.
|
||||
for _, it := range items {
|
||||
if len(it.Content) > pendinguploads.MaxFileBytes {
|
||||
return nil, pendinguploads.ErrTooLarge
|
||||
}
|
||||
}
|
||||
ids := make([]uuid.UUID, 0, len(items))
|
||||
stagedRows := make(map[uuid.UUID]pendinguploads.Record, len(items))
|
||||
stagedPuts := make([]putCall, 0, len(items))
|
||||
for _, it := range items {
|
||||
id := uuid.New()
|
||||
stagedRows[id] = pendinguploads.Record{
|
||||
FileID: id, WorkspaceID: ws, Content: it.Content,
|
||||
Filename: it.Filename, Mimetype: it.Mimetype,
|
||||
SizeBytes: int64(len(it.Content)), CreatedAt: time.Now(),
|
||||
ExpiresAt: time.Now().Add(24 * time.Hour),
|
||||
}
|
||||
stagedPuts = append(stagedPuts, putCall{
|
||||
WorkspaceID: ws, Filename: it.Filename, Mimetype: it.Mimetype, Size: len(it.Content),
|
||||
})
|
||||
ids = append(ids, id)
|
||||
}
|
||||
for id, r := range stagedRows {
|
||||
s.rows[id] = r
|
||||
}
|
||||
s.puts = append(s.puts, stagedPuts...)
|
||||
return ids, nil
|
||||
}
|
||||
|
||||
func (s *inMemStorage) Get(context.Context, uuid.UUID) (pendinguploads.Record, error) {
|
||||
return pendinguploads.Record{}, pendinguploads.ErrNotFound
|
||||
}
|
||||
func (s *inMemStorage) MarkFetched(context.Context, uuid.UUID) error { return nil }
|
||||
func (s *inMemStorage) Ack(context.Context, uuid.UUID) error { return nil }
|
||||
|
||||
// Sweep is required by the Storage interface (Phase 3 GC). Not
|
||||
// exercised by upload-branch tests — the dedicated sweeper_test.go +
|
||||
// storage_sweep_test.go cover it.
|
||||
func (s *inMemStorage) Sweep(context.Context, time.Duration) (pendinguploads.SweepResult, error) {
|
||||
return pendinguploads.SweepResult{}, nil
|
||||
}
|
||||
|
||||
// expectPollDeliveryMode stubs the SELECT delivery_mode lookup that
|
||||
// uploadPollMode does (separate from the one resolveWorkspaceForwardCreds
|
||||
// does — this is the new helper introduced for the poll branch).
|
||||
func expectPollDeliveryMode(mock sqlmock.Sqlmock, workspaceID, mode string) {
|
||||
rows := sqlmock.NewRows([]string{"delivery_mode"}).AddRow(mode)
|
||||
mock.ExpectQuery(`SELECT delivery_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(workspaceID).
|
||||
WillReturnRows(rows)
|
||||
}
|
||||
|
||||
func expectPollDeliveryModeMissing(mock sqlmock.Sqlmock, workspaceID string) {
|
||||
mock.ExpectQuery(`SELECT delivery_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(workspaceID).
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
}
|
||||
|
||||
// expectActivityInsert stubs the LogActivity INSERT so the poll branch's
|
||||
// per-file activity row write doesn't fail the sqlmock expectations.
|
||||
func expectActivityInsert(mock sqlmock.Sqlmock) {
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
}
|
||||
|
||||
// expectActivityInsertWithTypeAndMethod is a strict variant that pins
|
||||
// the activity_type and method positional args. Used in the discriminator
|
||||
// regression test below — the workspace inbox poller filters
|
||||
// `?type=a2a_receive`, so writing any other activity_type silently breaks
|
||||
// poll-mode delivery without a build/test error. Pin the two discriminator
|
||||
// fields so a refactor that flips activity_type back to a custom value is
|
||||
// caught here instead of at runtime by a confused poller.
|
||||
//
|
||||
// Positional args (LogActivity uses ExecContext with 12 positional params):
|
||||
// $1 workspace_id, $2 activity_type, $3 source_id, $4 target_id,
|
||||
// $5 method, $6 summary, $7 request_body, $8 response_body,
|
||||
// $9 tool_trace, $10 duration_ms, $11 status, $12 error_detail.
|
||||
func expectActivityInsertWithTypeAndMethod(mock sqlmock.Sqlmock, workspaceID, activityType, method string) {
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WithArgs(
|
||||
workspaceID, // $1 workspace_id
|
||||
activityType, // $2 activity_type ← pinned
|
||||
sqlmock.AnyArg(), // $3 source_id
|
||||
sqlmock.AnyArg(), // $4 target_id (workspaceID, but already covered)
|
||||
method, // $5 method ← pinned
|
||||
sqlmock.AnyArg(), // $6 summary
|
||||
sqlmock.AnyArg(), // $7 request_body
|
||||
sqlmock.AnyArg(), // $8 response_body
|
||||
sqlmock.AnyArg(), // $9 tool_trace
|
||||
sqlmock.AnyArg(), // $10 duration_ms
|
||||
sqlmock.AnyArg(), // $11 status
|
||||
sqlmock.AnyArg(), // $12 error_detail
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
}
|
||||
|
||||
// pollUploadFixture builds a multipart body with N named files.
|
||||
func pollUploadFixture(t *testing.T, files map[string][]byte) (*bytes.Buffer, string) {
|
||||
t.Helper()
|
||||
var buf bytes.Buffer
|
||||
mw := multipart.NewWriter(&buf)
|
||||
for name, data := range files {
|
||||
fw, err := mw.CreateFormFile("files", name)
|
||||
if err != nil {
|
||||
t.Fatalf("CreateFormFile: %v", err)
|
||||
}
|
||||
_, _ = fw.Write(data)
|
||||
}
|
||||
mw.Close()
|
||||
return &buf, mw.FormDataContentType()
|
||||
}
|
||||
|
||||
// ---- happy path ----
|
||||
|
||||
func TestPollUpload_HappyPath_OneFile_StagesAndLogs(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "11111111-2222-3333-4444-555555555555"
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
expectActivityInsert(mock)
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"report.pdf": []byte("PDF-bytes")})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
if len(store.puts) != 1 {
|
||||
t.Fatalf("expected 1 storage Put, got %d", len(store.puts))
|
||||
}
|
||||
put := store.puts[0]
|
||||
if put.Filename != "report.pdf" || put.Size != 9 {
|
||||
t.Errorf("unexpected put: %+v", put)
|
||||
}
|
||||
|
||||
// Response shape must match the workspace-side
|
||||
// /internal/chat/uploads/ingest schema so canvas can't tell which
|
||||
// path handled the upload.
|
||||
var resp struct {
|
||||
Files []struct {
|
||||
URI string `json:"uri"`
|
||||
Name string `json:"name"`
|
||||
Mimetype string `json:"mimeType"`
|
||||
Size int `json:"size"`
|
||||
} `json:"files"`
|
||||
}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("decode response: %v body=%s", err, w.Body.String())
|
||||
}
|
||||
if len(resp.Files) != 1 {
|
||||
t.Fatalf("response files count = %d, want 1", len(resp.Files))
|
||||
}
|
||||
got := resp.Files[0]
|
||||
if got.Name != "report.pdf" || got.Size != 9 {
|
||||
t.Errorf("response file mismatch: %+v", got)
|
||||
}
|
||||
if !strings.HasPrefix(got.URI, "platform-pending:"+wsID+"/") {
|
||||
t.Errorf("URI %q does not start with platform-pending:%s/", got.URI, wsID)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPollUpload_MultipleFiles_AllStagedAndLogged(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "11111111-aaaa-bbbb-cccc-555555555555"
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
expectActivityInsert(mock)
|
||||
expectActivityInsert(mock)
|
||||
expectActivityInsert(mock)
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{
|
||||
"a.txt": []byte("aaaa"),
|
||||
"b.txt": []byte("bbbbb"),
|
||||
"c.txt": []byte("cccccc"),
|
||||
})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
if len(store.puts) != 3 {
|
||||
t.Fatalf("expected 3 storage Puts, got %d", len(store.puts))
|
||||
}
|
||||
}
|
||||
|
||||
// ---- regression: push-mode unchanged ----
|
||||
|
||||
func TestPollUpload_PushModeFallsThroughToForward(t *testing.T) {
|
||||
// With pendingUploads wired but the workspace's mode is push,
|
||||
// the poll branch must NOT activate — flow falls through to the
|
||||
// existing resolveWorkspaceForwardCreds path. Pinned via the
|
||||
// "delivery_mode lookup happened, then the URL+mode SELECT
|
||||
// happened, then we 503 because no inbound secret" sequence.
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "22222222-2222-3333-4444-555555555555"
|
||||
expectPollDeliveryMode(mock, wsID, "push")
|
||||
// After the poll branch is bypassed, we hit
|
||||
// resolveWorkspaceForwardCreds which selects url+delivery_mode.
|
||||
expectURL(mock, wsID, "")
|
||||
// URL empty + mode=push → 503 (no inbound secret check needed).
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("data")})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Fatalf("status=%d body=%s — expected push-mode 503 fall-through", w.Code, w.Body.String())
|
||||
}
|
||||
if len(store.puts) != 0 {
|
||||
t.Errorf("push-mode should NOT have hit storage, got %d puts", len(store.puts))
|
||||
}
|
||||
}
|
||||
|
||||
func TestPollUpload_NotConfigured_FallsThrough(t *testing.T) {
|
||||
// Backwards compat: a binary running without WithPendingUploads
|
||||
// behaves exactly as before — the poll branch is dead code.
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "33333333-2222-3333-4444-555555555555"
|
||||
expectURLAndMode(mock, wsID, "", "poll") // resolveWorkspaceForwardCreds emits 422
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
// No WithPendingUploads — pendingUploads is nil.
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("data")})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusUnprocessableEntity {
|
||||
t.Errorf("status=%d, want 422 (legacy poll-mode rejection)", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// ---- error paths ----
|
||||
|
||||
func TestPollUpload_WorkspaceMissing_404(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "44444444-2222-3333-4444-555555555555"
|
||||
expectPollDeliveryModeMissing(mock, wsID)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(newInMemStorage(), nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("d")})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Errorf("status=%d, want 404", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPollUpload_DeliveryModeLookupDBError_500(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "55555555-2222-3333-4444-555555555555"
|
||||
mock.ExpectQuery(`SELECT delivery_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).WillReturnError(errors.New("connection lost"))
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(newInMemStorage(), nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("d")})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Errorf("status=%d, want 500", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPollUpload_NoFilesField_400(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "66666666-2222-3333-4444-555555555555"
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
// Multipart with a non-files field — no actual files.
|
||||
var buf bytes.Buffer
|
||||
mw := multipart.NewWriter(&buf)
|
||||
mw.WriteField("not_files", "hi")
|
||||
mw.Close()
|
||||
|
||||
c, w := makeUploadRequest(t, wsID, &buf, mw.FormDataContentType())
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("status=%d, want 400 on no files field", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPollUpload_MalformedMultipart_400(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "77777777-2222-3333-4444-555555555555"
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
// Body that doesn't match the boundary in Content-Type.
|
||||
c, w := makeUploadRequest(t, wsID, bytes.NewBufferString("garbage"), "multipart/form-data; boundary=fake")
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("status=%d, want 400 on malformed multipart", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPollUpload_StorageError_500(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "88888888-2222-3333-4444-555555555555"
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
|
||||
store := newInMemStorage()
|
||||
store.putErr = errors.New("disk full")
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Errorf("status=%d, want 500", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPollUpload_StorageTooLarge_413(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "99999999-2222-3333-4444-555555555555"
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
|
||||
store := newInMemStorage()
|
||||
store.putErr = pendinguploads.ErrTooLarge
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusRequestEntityTooLarge {
|
||||
t.Errorf("status=%d, want 413", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPollUpload_TooManyFiles_400(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "aaaaaaaa-2222-3333-4444-555555555555"
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
// 65 files — over the per-batch cap.
|
||||
files := map[string][]byte{}
|
||||
for i := 0; i < 65; i++ {
|
||||
files[uuid.New().String()] = []byte("x")
|
||||
}
|
||||
body, ct := pollUploadFixture(t, files)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("status=%d, want 400 on too many files", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPollUpload_NullDeliveryMode_TreatedAsPush(t *testing.T) {
|
||||
// Production-observed 2026-05-04: external runtime workspaces
|
||||
// (molecule-sdk-python on user infra) sometimes register with
|
||||
// delivery_mode = NULL — the schema default for legacy rows from
|
||||
// before #2339. The poll branch must NOT activate on NULL — only
|
||||
// the explicit "poll" string. This is the same defensive posture
|
||||
// resolveWorkspaceForwardCreds takes for legacy rows.
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "cccccccc-2222-3333-4444-555555555555"
|
||||
mock.ExpectQuery(`SELECT delivery_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"delivery_mode"}).AddRow(nil))
|
||||
// Falls through to resolveWorkspaceForwardCreds:
|
||||
expectURLAndMode(mock, wsID, "", "")
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
// resolveWorkspaceForwardCreds with empty url + NULL mode = 422
|
||||
// (the legacy "no callback URL" rejection — exactly what we're
|
||||
// fixing for ACTUAL poll-mode rows but want to preserve for
|
||||
// NULL ones until the row gets a real mode value via the next
|
||||
// /registry/register).
|
||||
if w.Code != http.StatusUnprocessableEntity {
|
||||
t.Errorf("status=%d, want 422 for NULL delivery_mode (legacy fallthrough)", w.Code)
|
||||
}
|
||||
if len(store.puts) != 0 {
|
||||
t.Errorf("NULL mode should NOT have hit storage, got %d puts", len(store.puts))
|
||||
}
|
||||
}
|
||||
|
||||
func TestPollUpload_PerFileCapPreStorage_413(t *testing.T) {
|
||||
// Pin the early-reject branch (fh.Size > MaxFileBytes) BEFORE we
|
||||
// read the part into memory. Without this, an oversize file
|
||||
// would hit the storage layer's belt-and-suspenders check, which
|
||||
// works but burns ~25 MB of memory + DB round-trip first. Send
|
||||
// 25 MB + 1 byte → 413 with the file size in the response.
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "dddddddd-2222-3333-4444-555555555555"
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
// 25 MB + 1 byte. Single file, large enough to trip the early
|
||||
// size check.
|
||||
oversize := make([]byte, pendinguploads.MaxFileBytes+1)
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"big.bin": oversize})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusRequestEntityTooLarge {
|
||||
t.Fatalf("status=%d, want 413 on per-file size cap", w.Code)
|
||||
}
|
||||
if len(store.puts) != 0 {
|
||||
t.Errorf("per-file cap reject should NOT have called storage.Put, got %d puts", len(store.puts))
|
||||
}
|
||||
// Sanity: response carries the size we tried to upload + the cap.
|
||||
var body_ map[string]any
|
||||
json.Unmarshal(w.Body.Bytes(), &body_)
|
||||
if got := body_["max"]; got == nil {
|
||||
t.Errorf("expected max field in response, got %v", body_)
|
||||
}
|
||||
}
|
||||
|
||||
// SanitizeFilename is exercised in the upload chain — pin one
|
||||
// end-to-end case that exercises the URI path through the response.
|
||||
func TestPollUpload_SanitizesFilenameInResponse(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "bbbbbbbb-2222-3333-4444-555555555555"
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
expectActivityInsert(mock)
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"hello world!.pdf": []byte("data")})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp struct {
|
||||
Files []struct {
|
||||
Name string `json:"name"`
|
||||
URI string `json:"uri"`
|
||||
}
|
||||
}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if len(resp.Files) == 0 || resp.Files[0].Name != "hello_world_.pdf" {
|
||||
t.Errorf("expected sanitized name 'hello_world_.pdf', got: %+v", resp.Files)
|
||||
}
|
||||
if len(store.puts) == 0 || store.puts[0].Filename != "hello_world_.pdf" {
|
||||
t.Errorf("storage Put didn't receive sanitized filename: %+v", store.puts)
|
||||
}
|
||||
}
|
||||
|
||||
// TestPollUpload_AtomicRollbackOnSecondFileTooLarge pins the
|
||||
// transactional contract introduced in phase 5: when one file in a
|
||||
// multi-file batch fails pre-validation (oversize), NONE of the files
|
||||
// in the batch land in storage. Previously a per-file Put loop would
|
||||
// stage rows 1..K-1 before failing on row K, leaving orphan
|
||||
// pending_uploads + activity rows the client would re-create on retry.
|
||||
//
|
||||
// Pinned via inMemStorage's PutBatch (which mirrors PostgresStorage's
|
||||
// Tx-rollback behavior on a per-item validation failure) — but the
|
||||
// real atomicity guarantee is the integration test in
|
||||
// pending_uploads_integration_test.go.
|
||||
func TestPollUpload_AtomicRollbackOnSecondFileTooLarge(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "aaaaaaaa-3333-3333-4444-555555555555"
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
// Two files: first OK, second over the per-file cap. Pre-validation
|
||||
// in uploadPollMode catches it BEFORE any Put — store.puts must
|
||||
// stay empty. (If the test ever sees len=1, the regression is
|
||||
// "first file slipped through into storage on a partial-failure
|
||||
// batch.")
|
||||
tooBig := bytes.Repeat([]byte{0x42}, pendinguploads.MaxFileBytes+1)
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{
|
||||
"ok.txt": []byte("small"),
|
||||
"huge.bin": tooBig,
|
||||
})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusRequestEntityTooLarge {
|
||||
t.Errorf("status=%d body=%s, want 413", w.Code, w.Body.String())
|
||||
}
|
||||
if len(store.puts) != 0 {
|
||||
t.Errorf("expected zero Puts on rollback, got %d: %+v", len(store.puts), store.puts)
|
||||
}
|
||||
}
|
||||
|
||||
// TestPollUpload_AtomicRollbackOnPutBatchError validates that an in-
|
||||
// flight PutBatch failure (e.g. simulated DB error) leaves zero rows
|
||||
// — same guarantee as the pre-validation path, but exercises the
|
||||
// "Tx-Rollback after BEGIN" branch via the fake.
|
||||
func TestPollUpload_AtomicRollbackOnPutBatchError(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "bbbbbbbb-3333-3333-4444-555555555555"
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
|
||||
store := newInMemStorage()
|
||||
store.putErr = errors.New("db down mid-batch")
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{
|
||||
"a.txt": []byte("aaa"),
|
||||
"b.txt": []byte("bbb"),
|
||||
"c.txt": []byte("ccc"),
|
||||
})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Errorf("status=%d, want 500", w.Code)
|
||||
}
|
||||
if len(store.puts) != 0 {
|
||||
t.Errorf("expected zero Puts after PutBatch error, got %d", len(store.puts))
|
||||
}
|
||||
}
|
||||
|
||||
// TestPollUpload_MimetypeWithCRLFInjectionStripped pins the safeMimetype
|
||||
// hardening: a multipart-supplied Content-Type header with CR/LF is
|
||||
// rewritten to application/octet-stream so the eventual /content
|
||||
// response can't be header-split on the wire.
|
||||
func TestPollUpload_MimetypeWithCRLFInjectionStripped(t *testing.T) {
|
||||
got := safeMimetype("text/html\r\nX-Injected: pwn")
|
||||
if got != "application/octet-stream" {
|
||||
t.Errorf("CRLF mimetype not stripped, got %q", got)
|
||||
}
|
||||
got = safeMimetype("image/png\x00")
|
||||
if got != "application/octet-stream" {
|
||||
t.Errorf("NUL byte mimetype not stripped, got %q", got)
|
||||
}
|
||||
got = safeMimetype("text/plain; charset=utf-8")
|
||||
if got != "text/plain" {
|
||||
t.Errorf("parameter not stripped, got %q", got)
|
||||
}
|
||||
got = safeMimetype("application/pdf")
|
||||
if got != "application/pdf" {
|
||||
t.Errorf("clean mime modified, got %q", got)
|
||||
}
|
||||
got = safeMimetype("")
|
||||
if got != "" {
|
||||
t.Errorf("empty input should pass through, got %q", got)
|
||||
}
|
||||
got = safeMimetype("notamime")
|
||||
if got != "application/octet-stream" {
|
||||
t.Errorf("non-type/subtype not coerced, got %q", got)
|
||||
}
|
||||
got = safeMimetype("/empty-type")
|
||||
if got != "application/octet-stream" {
|
||||
t.Errorf("missing type half not coerced, got %q", got)
|
||||
}
|
||||
got = safeMimetype("type/")
|
||||
if got != "application/octet-stream" {
|
||||
t.Errorf("missing subtype half not coerced, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestPollUpload_ActivityRowDiscriminator pins the
|
||||
// activity_type / method shape that the workspace inbox poller depends
|
||||
// on. The poller filters `GET /workspaces/:id/activity?type=a2a_receive`
|
||||
// so the handler MUST write activity_type=a2a_receive (NOT a custom
|
||||
// type), and use method=chat_upload_receive as the
|
||||
// upload-vs-message-vs-task discriminator.
|
||||
//
|
||||
// Why pinned: a previous iteration of this handler used
|
||||
// activity_type="chat_upload_receive" — silently invisible to the
|
||||
// existing poller. The branch passed every push-mode test, every
|
||||
// storage test, and every per-file content test; the bug only
|
||||
// surfaced at runtime when the workspace polled and got nothing.
|
||||
// Encode the contract in a unit test so the next refactor can't
|
||||
// re-break it without a red CI.
|
||||
func TestPollUpload_ActivityRowDiscriminator(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
wsID := "abc12345-6789-4abc-8def-000000000999"
|
||||
expectPollDeliveryMode(mock, wsID, "poll")
|
||||
expectActivityInsertWithTypeAndMethod(mock, wsID, "a2a_receive", "chat_upload_receive")
|
||||
|
||||
store := newInMemStorage()
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
|
||||
WithPendingUploads(store, nil)
|
||||
|
||||
body, ct := pollUploadFixture(t, map[string][]byte{"x.pdf": []byte("xx")})
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("expectations: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -105,7 +105,7 @@ func TestChatUpload_InvalidWorkspaceID(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
|
||||
c, w := makeUploadRequest(t, "not-a-uuid", &bytes.Buffer{}, "")
|
||||
h.Upload(c)
|
||||
@@ -122,7 +122,7 @@ func TestChatUpload_WorkspaceNotInDB(t *testing.T) {
|
||||
wsID := "00000000-0000-0000-0000-000000000099"
|
||||
expectURLMissing(mock, wsID)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -166,7 +166,7 @@ func TestChatUpload_NoInboundSecret_LazyHeal(t *testing.T) {
|
||||
WithArgs(sqlmock.AnyArg(), wsID).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -203,7 +203,7 @@ func TestChatUpload_NoInboundSecret_LazyHealFailure(t *testing.T) {
|
||||
WithArgs(sqlmock.AnyArg(), wsID).
|
||||
WillReturnError(sql.ErrConnDone) // mint fails
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -231,7 +231,7 @@ func TestChatUpload_NoURL(t *testing.T) {
|
||||
wsID := "00000000-0000-0000-0000-000000000042"
|
||||
expectURLAndMode(mock, wsID, "", "push")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -256,7 +256,7 @@ func TestChatUpload_PollModeEmptyURL(t *testing.T) {
|
||||
wsID := "00000000-0000-0000-0000-000000000099"
|
||||
expectURLAndMode(mock, wsID, "", "poll")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -286,7 +286,7 @@ func TestChatUpload_NullModeEmptyURL(t *testing.T) {
|
||||
wsID := "30ba7f0b-b303-4a20-aefe-3a4a675b8aa4" // user's "mac laptop"
|
||||
expectURLNullMode(mock, wsID, "")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -338,7 +338,7 @@ func TestChatUpload_ForwardsToWorkspace_HappyPath(t *testing.T) {
|
||||
expectURL(mock, wsID, srv.URL)
|
||||
expectInboundSecret(mock, wsID, "super-secret-123")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -380,7 +380,7 @@ func TestChatUpload_ForwardsErrorStatusUnchanged(t *testing.T) {
|
||||
expectURL(mock, wsID, srv.URL)
|
||||
expectInboundSecret(mock, wsID, "tok")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -402,7 +402,7 @@ func TestChatUpload_WorkspaceUnreachable(t *testing.T) {
|
||||
expectURL(mock, wsID, "http://127.0.0.1:1")
|
||||
expectInboundSecret(mock, wsID, "tok")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
body, ct := uploadFixture(t)
|
||||
c, w := makeUploadRequest(t, wsID, body, ct)
|
||||
h.Upload(c)
|
||||
@@ -418,7 +418,7 @@ func TestChatDownload_InvalidPath(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
|
||||
cases := []struct {
|
||||
name, path, wantSubstr string
|
||||
@@ -507,7 +507,7 @@ func TestChatDownload_WorkspaceNotInDB(t *testing.T) {
|
||||
WithArgs(wsID).
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
|
||||
h.Download(c)
|
||||
|
||||
@@ -533,7 +533,7 @@ func TestChatDownload_NoInboundSecret_LazyHeal(t *testing.T) {
|
||||
WithArgs(sqlmock.AnyArg(), wsID).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
|
||||
h.Download(c)
|
||||
|
||||
@@ -559,7 +559,7 @@ func TestChatDownload_NoInboundSecret_LazyHealFailure(t *testing.T) {
|
||||
WithArgs(sqlmock.AnyArg(), wsID).
|
||||
WillReturnError(sql.ErrConnDone)
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
|
||||
h.Download(c)
|
||||
|
||||
@@ -592,7 +592,7 @@ func TestChatDownload_ForwardsToWorkspace_HappyPath(t *testing.T) {
|
||||
expectURL(mock, wsID, srv.URL)
|
||||
expectInboundSecret(mock, wsID, "the-secret")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/report.txt")
|
||||
h.Download(c)
|
||||
|
||||
@@ -634,7 +634,7 @@ func TestChatDownload_404FromWorkspacePropagated(t *testing.T) {
|
||||
expectURL(mock, wsID, srv.URL)
|
||||
expectInboundSecret(mock, wsID, "tok")
|
||||
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
|
||||
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
|
||||
c, w := makeDownloadRequest(t, wsID, "/workspace/missing.txt")
|
||||
h.Download(c)
|
||||
|
||||
|
||||
@@ -0,0 +1,468 @@
|
||||
package handlers
|
||||
|
||||
// class1_ast_gate_test.go — generic Class 1 leak gate per #2867 PR-A.
|
||||
//
|
||||
// What this gate prevents:
|
||||
// The tenant-hongming leak class — a handler iterates a YAML-derived
|
||||
// slice (ws.Children, sub_workspaces, etc.) and calls
|
||||
// `INSERT INTO workspaces` inside the loop body without first
|
||||
// checking whether a workspace with the same (parent_id, name) is
|
||||
// already there. Each call to such a handler doubles the tree.
|
||||
//
|
||||
// Why this is broader than TestCreateWorkspaceTree_CallsLookupBeforeInsert:
|
||||
// The existing gate is hard-coded to org_import.go's createWorkspaceTree.
|
||||
// That catches the specific function that triggered the original
|
||||
// incident — but a future handler written from scratch in a different
|
||||
// file would not be covered. This gate walks every production handler
|
||||
// .go file and applies a structural rule that does not depend on
|
||||
// function or file names.
|
||||
//
|
||||
// The rule (verbatim from #2867 PR-A):
|
||||
//
|
||||
// "No handler in handlers/ may iterate a slice (any RangeStmt) AND
|
||||
// call INSERT INTO workspaces inside the loop body without a
|
||||
// preceding SELECT id FROM workspaces WHERE name=$1 AND parent_id IS
|
||||
// NOT DISTINCT FROM $2 in the same function (== a lookupExistingChild
|
||||
// call, OR an ON CONFLICT clause baked into the same INSERT, OR an
|
||||
// explicit allowlist annotation)."
|
||||
//
|
||||
// Allowlist mechanism: a function whose body contains the exact comment
|
||||
// string `// class1-gate: idempotent-by-design` is treated as safe.
|
||||
// Use this only after writing a unit test that pins WHY the function
|
||||
// is safe. The annotation is intentionally awkward to type — it should
|
||||
// be rare.
|
||||
|
||||
import (
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/token"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// reINSERTWorkspaces matches the exact statement shape we care about.
|
||||
// Tightened (vs bytes.Index "INSERT INTO workspaces") so the audit
|
||||
// table `workspaces_audit` literal — or any other lookalike — does not
|
||||
// false-positive trigger this gate. The same regex is used in the
|
||||
// existing createWorkspaceTree gate (workspaces_insert_allowlist_test.go)
|
||||
// — keep them in sync if either changes.
|
||||
var reINSERTWorkspaces = regexp.MustCompile(`(?m)^\s*INSERT INTO workspaces\s*\(`)
|
||||
|
||||
// reONCONFLICT matches ON CONFLICT clauses anywhere in the same SQL
|
||||
// literal. An UPSERT (INSERT ... ON CONFLICT ... DO UPDATE) is
|
||||
// idempotent by definition, so the gate exempts it.
|
||||
var reONCONFLICT = regexp.MustCompile(`(?i)\bON CONFLICT\b`)
|
||||
|
||||
// gateAllowlistComment is the magic comment a function author writes
|
||||
// to opt out of this gate. Forces an explicit decision.
|
||||
const gateAllowlistComment = "// class1-gate: idempotent-by-design"
|
||||
|
||||
// preflightCallNames are function names whose presence in a function
|
||||
// body counts as "did a SELECT-by-(parent_id, name) preflight". Add
|
||||
// new names here as new preflight helpers are introduced. Keep the
|
||||
// list TIGHT — any sloppy addition weakens the gate.
|
||||
var preflightCallNames = map[string]bool{
|
||||
"lookupExistingChild": true,
|
||||
}
|
||||
|
||||
// TestClass1_NoUnpreflightedInsertInsideRange walks every production
|
||||
// .go file in this package, parses the AST, and fails the test if any
|
||||
// FuncDecl violates the rule above.
|
||||
//
|
||||
// Failure message must include: file path, function name, line of
|
||||
// the offending INSERT, line of the enclosing range, and a hint at
|
||||
// the three escape hatches (preflight call, ON CONFLICT, allowlist
|
||||
// comment).
|
||||
func TestClass1_NoUnpreflightedInsertInsideRange(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
|
||||
entries, err := os.ReadDir(wd)
|
||||
if err != nil {
|
||||
t.Fatalf("readdir %s: %v", wd, err)
|
||||
}
|
||||
|
||||
type violation struct {
|
||||
file string
|
||||
fn string
|
||||
insertLine int
|
||||
rangeLine int
|
||||
}
|
||||
var violations []violation
|
||||
scanned := 0
|
||||
|
||||
for _, e := range entries {
|
||||
name := e.Name()
|
||||
if e.IsDir() || !strings.HasSuffix(name, ".go") {
|
||||
continue
|
||||
}
|
||||
if strings.HasSuffix(name, "_test.go") {
|
||||
continue
|
||||
}
|
||||
path := filepath.Join(wd, name)
|
||||
src, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("read %s: %v", path, err)
|
||||
}
|
||||
fset := token.NewFileSet()
|
||||
file, err := parser.ParseFile(fset, name, src, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse %s: %v", path, err)
|
||||
}
|
||||
scanned++
|
||||
|
||||
// Walk every function declaration and apply the rule.
|
||||
for _, decl := range file.Decls {
|
||||
fd, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fd.Body == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Allowlist: skip if the function body contains the magic
|
||||
// comment. We check via the source range of the function
|
||||
// — comments inside the body are in file.Comments and
|
||||
// must overlap the function's Pos/End range.
|
||||
if functionHasAllowlistComment(file, fd) {
|
||||
continue
|
||||
}
|
||||
|
||||
// First pass: locate every INSERT INTO workspaces literal
|
||||
// in this function. We treat each such literal as a
|
||||
// candidate violation and try to clear it via the rules.
|
||||
candidates := findInsertWorkspacesLiterals(fd, src, fset)
|
||||
if len(candidates) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Has the function called a preflight helper? Single
|
||||
// pass — if any preflight name appears, every INSERT in
|
||||
// the function is considered preflighted. This is more
|
||||
// permissive than position-aware (preflight could be
|
||||
// AFTER the INSERT and still satisfy the gate), but the
|
||||
// existing org_import.go gate already pins the position
|
||||
// invariant for createWorkspaceTree, and a function that
|
||||
// preflights AFTER inserting would fail the position
|
||||
// gate in a separate test.
|
||||
hasPreflight := functionCallsAny(fd, preflightCallNames)
|
||||
|
||||
for _, c := range candidates {
|
||||
if c.hasONCONFLICT {
|
||||
continue
|
||||
}
|
||||
if hasPreflight {
|
||||
continue
|
||||
}
|
||||
if c.enclosingRangeLine == 0 {
|
||||
// INSERT not inside any RangeStmt — single-shot,
|
||||
// not the bug pattern.
|
||||
continue
|
||||
}
|
||||
violations = append(violations, violation{
|
||||
file: name,
|
||||
fn: fd.Name.Name,
|
||||
insertLine: c.insertLine,
|
||||
rangeLine: c.enclosingRangeLine,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if scanned == 0 {
|
||||
t.Fatal("scanned 0 .go files — wrong working directory? gate would always pass")
|
||||
}
|
||||
|
||||
if len(violations) > 0 {
|
||||
// Stable sort so the failure message is deterministic across
|
||||
// reruns.
|
||||
sort.Slice(violations, func(i, j int) bool {
|
||||
if violations[i].file != violations[j].file {
|
||||
return violations[i].file < violations[j].file
|
||||
}
|
||||
return violations[i].insertLine < violations[j].insertLine
|
||||
})
|
||||
var b strings.Builder
|
||||
b.WriteString("Class 1 leak gate (#2867 PR-A) — these handler functions iterate a slice and INSERT INTO workspaces inside the loop body without a (parent_id, name) preflight.\n\n")
|
||||
b.WriteString("This is the bug shape that triggered the tenant-hongming leak (TeamHandler.Expand re-inserting the entire sub_workspaces tree on every call). To fix any reported violation, choose ONE of:\n")
|
||||
b.WriteString(" 1. Call h.lookupExistingChild(ctx, name, parentID) before the INSERT and skip the INSERT when it returns existing=true. (preferred)\n")
|
||||
b.WriteString(" 2. Use INSERT ... ON CONFLICT ... DO ... (idempotent UPSERT, like registry.go).\n")
|
||||
b.WriteString(" 3. Annotate the function with a `// class1-gate: idempotent-by-design` comment AND a unit test that pins why the function is structurally idempotent. (rare; require code review)\n\n")
|
||||
b.WriteString("Violations:\n")
|
||||
for _, v := range violations {
|
||||
b.WriteString(" - ")
|
||||
b.WriteString(v.file)
|
||||
b.WriteString(":")
|
||||
b.WriteString(itoa(v.insertLine))
|
||||
b.WriteString(" — function ")
|
||||
b.WriteString(v.fn)
|
||||
b.WriteString("() INSERTs inside RangeStmt at line ")
|
||||
b.WriteString(itoa(v.rangeLine))
|
||||
b.WriteString("\n")
|
||||
}
|
||||
t.Fatal(b.String())
|
||||
}
|
||||
}
|
||||
|
||||
func itoa(n int) string {
|
||||
// Avoid strconv import for one call site — keeps the test focused.
|
||||
if n == 0 {
|
||||
return "0"
|
||||
}
|
||||
neg := n < 0
|
||||
if neg {
|
||||
n = -n
|
||||
}
|
||||
var buf [20]byte
|
||||
i := len(buf)
|
||||
for n > 0 {
|
||||
i--
|
||||
buf[i] = byte('0' + n%10)
|
||||
n /= 10
|
||||
}
|
||||
if neg {
|
||||
i--
|
||||
buf[i] = '-'
|
||||
}
|
||||
return string(buf[i:])
|
||||
}
|
||||
|
||||
// candidateInsert holds the per-INSERT facts needed to decide whether
|
||||
// the gate fires.
|
||||
type candidateInsert struct {
|
||||
insertLine int
|
||||
hasONCONFLICT bool
|
||||
enclosingRangeLine int // 0 means not inside any range
|
||||
}
|
||||
|
||||
// findInsertWorkspacesLiterals walks fd's body and returns one
|
||||
// candidateInsert per INSERT INTO workspaces string literal.
|
||||
//
|
||||
// Position-based detection: collect every RangeStmt's body span first,
|
||||
// then for each INSERT literal check if its position is inside any
|
||||
// span. ast.Inspect's nil-call ordering does NOT give per-node pop
|
||||
// semantics, so a stack-based approach against ast.Inspect would
|
||||
// silently miscount. Position spans are deterministic and easy to
|
||||
// reason about.
|
||||
func findInsertWorkspacesLiterals(fd *ast.FuncDecl, src []byte, fset *token.FileSet) []candidateInsert {
|
||||
var out []candidateInsert
|
||||
|
||||
type span struct{ start, end token.Pos }
|
||||
var ranges []span
|
||||
ast.Inspect(fd.Body, func(n ast.Node) bool {
|
||||
rs, ok := n.(*ast.RangeStmt)
|
||||
if !ok || rs.Body == nil {
|
||||
return true
|
||||
}
|
||||
ranges = append(ranges, span{rs.Body.Lbrace, rs.Body.Rbrace})
|
||||
return true
|
||||
})
|
||||
|
||||
enclosingRangeLineFor := func(p token.Pos) int {
|
||||
// Pick the innermost enclosing range — i.e., the one with the
|
||||
// largest start that still covers p. Innermost is the one
|
||||
// whose body actually contains the INSERT, which is the line
|
||||
// most useful in a violation message.
|
||||
bestStart := token.NoPos
|
||||
bestLine := 0
|
||||
for _, s := range ranges {
|
||||
if p > s.start && p < s.end && s.start > bestStart {
|
||||
bestStart = s.start
|
||||
bestLine = fset.Position(s.start).Line
|
||||
}
|
||||
}
|
||||
return bestLine
|
||||
}
|
||||
|
||||
ast.Inspect(fd.Body, func(n ast.Node) bool {
|
||||
bl, ok := n.(*ast.BasicLit)
|
||||
if !ok || bl.Kind != token.STRING {
|
||||
return true
|
||||
}
|
||||
// Strip surrounding backticks/quotes — value includes them.
|
||||
lit := bl.Value
|
||||
if len(lit) >= 2 {
|
||||
lit = lit[1 : len(lit)-1]
|
||||
}
|
||||
if !reINSERTWorkspaces.MatchString(lit) {
|
||||
return true
|
||||
}
|
||||
out = append(out, candidateInsert{
|
||||
insertLine: fset.Position(bl.Pos()).Line,
|
||||
hasONCONFLICT: reONCONFLICT.MatchString(lit),
|
||||
enclosingRangeLine: enclosingRangeLineFor(bl.Pos()),
|
||||
})
|
||||
return true
|
||||
})
|
||||
return out
|
||||
}
|
||||
|
||||
// functionCallsAny returns true if any CallExpr in fd's body has a
|
||||
// function name (either a SelectorExpr Sel.Name or an Ident name)
|
||||
// matching a key in names.
|
||||
func functionCallsAny(fd *ast.FuncDecl, names map[string]bool) bool {
|
||||
found := false
|
||||
ast.Inspect(fd.Body, func(n ast.Node) bool {
|
||||
if found {
|
||||
return false
|
||||
}
|
||||
ce, ok := n.(*ast.CallExpr)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
switch fun := ce.Fun.(type) {
|
||||
case *ast.Ident:
|
||||
if names[fun.Name] {
|
||||
found = true
|
||||
return false
|
||||
}
|
||||
case *ast.SelectorExpr:
|
||||
if names[fun.Sel.Name] {
|
||||
found = true
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
return found
|
||||
}
|
||||
|
||||
// functionHasAllowlistComment returns true if the function body
|
||||
// (between fd.Body.Lbrace and fd.Body.Rbrace) contains a comment
|
||||
// equal to gateAllowlistComment.
|
||||
func functionHasAllowlistComment(file *ast.File, fd *ast.FuncDecl) bool {
|
||||
if fd.Body == nil {
|
||||
return false
|
||||
}
|
||||
start := fd.Body.Lbrace
|
||||
end := fd.Body.Rbrace
|
||||
for _, cg := range file.Comments {
|
||||
for _, c := range cg.List {
|
||||
if c.Pos() < start || c.Pos() > end {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(c.Text) == gateAllowlistComment {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// TestClass1_GateFiresOnSyntheticBuggySource — proves the gate actually
|
||||
// catches the bug shape it's named after. Without this, a regression
|
||||
// to "always pass" would not be noticed until the leak shipped again.
|
||||
// Per memory feedback_assert_exact_not_substring.md: tighten the test
|
||||
// + verify it FAILS on old-shape source before merging.
|
||||
func TestClass1_GateFiresOnSyntheticBuggySource(t *testing.T) {
|
||||
const buggySrc = `package handlers
|
||||
|
||||
import "context"
|
||||
|
||||
type fakeDB struct{}
|
||||
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
|
||||
|
||||
func buggyExpand(db fakeDB, ctx context.Context, children []string) {
|
||||
for _, child := range children {
|
||||
// Bug shape: INSERT inside the range body, no preflight.
|
||||
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", child)
|
||||
}
|
||||
}
|
||||
`
|
||||
fset := token.NewFileSet()
|
||||
file, err := parser.ParseFile(fset, "buggy.go", buggySrc, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse synthetic source: %v", err)
|
||||
}
|
||||
for _, decl := range file.Decls {
|
||||
fd, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fd.Name.Name != "buggyExpand" {
|
||||
continue
|
||||
}
|
||||
candidates := findInsertWorkspacesLiterals(fd, []byte(buggySrc), fset)
|
||||
if len(candidates) != 1 {
|
||||
t.Fatalf("expected 1 INSERT literal, got %d", len(candidates))
|
||||
}
|
||||
c := candidates[0]
|
||||
if c.enclosingRangeLine == 0 {
|
||||
t.Errorf("synthetic INSERT inside `for _, child := range` should be detected as enclosed by range, got enclosingRangeLine=0 — gate would miss the bug shape")
|
||||
}
|
||||
if c.hasONCONFLICT {
|
||||
t.Errorf("synthetic INSERT has no ON CONFLICT, gate falsely treated it as idempotent")
|
||||
}
|
||||
if functionCallsAny(fd, preflightCallNames) {
|
||||
t.Errorf("synthetic function does not call lookupExistingChild — gate falsely treated it as preflighted")
|
||||
}
|
||||
// All three guards say the gate WOULD fire. Pass.
|
||||
return
|
||||
}
|
||||
t.Fatal("buggyExpand FuncDecl not found in synthetic source")
|
||||
}
|
||||
|
||||
// TestClass1_GateAllowsONCONFLICT — pins that an INSERT with ON
|
||||
// CONFLICT inside a range body is NOT flagged. registry.go's
|
||||
// upsert pattern is the prod example.
|
||||
func TestClass1_GateAllowsONCONFLICT(t *testing.T) {
|
||||
const safeSrc = `package handlers
|
||||
|
||||
import "context"
|
||||
|
||||
type fakeDB struct{}
|
||||
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
|
||||
|
||||
func upsertLoop(db fakeDB, ctx context.Context, children []string) {
|
||||
for _, child := range children {
|
||||
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2) ON CONFLICT (id) DO UPDATE SET name = $2`" + `, "x", child)
|
||||
}
|
||||
}
|
||||
`
|
||||
fset := token.NewFileSet()
|
||||
file, _ := parser.ParseFile(fset, "safe.go", safeSrc, parser.ParseComments)
|
||||
for _, decl := range file.Decls {
|
||||
fd, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fd.Name.Name != "upsertLoop" {
|
||||
continue
|
||||
}
|
||||
candidates := findInsertWorkspacesLiterals(fd, []byte(safeSrc), fset)
|
||||
if len(candidates) != 1 {
|
||||
t.Fatalf("expected 1 candidate, got %d", len(candidates))
|
||||
}
|
||||
if !candidates[0].hasONCONFLICT {
|
||||
t.Errorf("ON CONFLICT clause should be detected, was missed — gate would falsely flag idempotent UPSERTs")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestClass1_GateAllowsAllowlistAnnotation — pins the escape hatch
|
||||
// works. Annotated functions are skipped at the FuncDecl level.
|
||||
func TestClass1_GateAllowsAllowlistAnnotation(t *testing.T) {
|
||||
const annotatedSrc = `package handlers
|
||||
|
||||
import "context"
|
||||
|
||||
type fakeDB struct{}
|
||||
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
|
||||
|
||||
func intentionallyUnpreflighted(db fakeDB, ctx context.Context, children []string) {
|
||||
// class1-gate: idempotent-by-design
|
||||
for _, child := range children {
|
||||
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", child)
|
||||
}
|
||||
}
|
||||
`
|
||||
fset := token.NewFileSet()
|
||||
file, _ := parser.ParseFile(fset, "annotated.go", annotatedSrc, parser.ParseComments)
|
||||
for _, decl := range file.Decls {
|
||||
fd, ok := decl.(*ast.FuncDecl)
|
||||
if !ok || fd.Name.Name != "intentionallyUnpreflighted" {
|
||||
continue
|
||||
}
|
||||
if !functionHasAllowlistComment(file, fd) {
|
||||
t.Error("allowlist comment should be detected for the intentionallyUnpreflighted function — escape hatch not working")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
@@ -13,6 +14,68 @@ import (
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// delegationResultInboxPushEnabled gates the RFC #2829 PR-2 result-push
|
||||
// behavior: when callee POSTs `status=completed` (or `failed`) via
|
||||
// /workspaces/:id/delegations/:delegation_id/update, ALSO write an
|
||||
// `activity_type='a2a_receive'` row to the caller's activity_logs.
|
||||
//
|
||||
// Why a flag: the caller's inbox poller (workspace/inbox.py) queries
|
||||
// `?type=a2a_receive` to surface inbound messages to the agent. Adding
|
||||
// a2a_receive rows for delegation results is the universal-sized fix for
|
||||
// the 600s message/send timeout class — long-running delegations no
|
||||
// longer rely on the proxy holding the HTTP connection open. But it is
|
||||
// observable behavior change (existing agents start seeing delegation
|
||||
// results in their inbox where they didn't before), so we flag it for
|
||||
// staging burn-in before flipping default.
|
||||
//
|
||||
// Default: off. Staging-canary first; flip to on after RFC #2829 PR-3
|
||||
// (agent-side cutover) lands and proves the round-trip end-to-end.
|
||||
func delegationResultInboxPushEnabled() bool {
|
||||
return os.Getenv("DELEGATION_RESULT_INBOX_PUSH") == "1"
|
||||
}
|
||||
|
||||
// pushDelegationResultToInbox writes the inbox-visible row for a
|
||||
// completed/failed delegation. Best-effort: a failure logs but does NOT
|
||||
// fail the parent UpdateStatus — the existing delegate_result row in
|
||||
// activity_logs is still authoritative for the dashboard.
|
||||
//
|
||||
// Caller (sourceID) is the workspace that initiated the delegation; the
|
||||
// inbox row lands in their activity_logs so wait_for_message picks it up.
|
||||
//
|
||||
// Body shape mirrors a2a_receive rows produced by the proxy on a
|
||||
// successful synchronous reply: response_body.text carries the agent's
|
||||
// answer, request_body.delegation_id correlates back to the originating
|
||||
// row.
|
||||
func pushDelegationResultToInbox(ctx context.Context, sourceID, delegationID, status, responsePreview, errorDetail string) {
|
||||
if !delegationResultInboxPushEnabled() {
|
||||
return
|
||||
}
|
||||
respPayload := map[string]interface{}{
|
||||
"text": responsePreview,
|
||||
"delegation_id": delegationID,
|
||||
}
|
||||
respJSON, _ := json.Marshal(respPayload)
|
||||
reqJSON, _ := json.Marshal(map[string]interface{}{
|
||||
"delegation_id": delegationID,
|
||||
})
|
||||
logStatus := "ok"
|
||||
if status == "failed" {
|
||||
logStatus = "error"
|
||||
}
|
||||
summary := "Delegation result delivered"
|
||||
if status == "failed" {
|
||||
summary = "Delegation failed"
|
||||
}
|
||||
if _, err := db.DB.ExecContext(ctx, `
|
||||
INSERT INTO activity_logs (
|
||||
workspace_id, activity_type, method, source_id,
|
||||
summary, request_body, response_body, status, error_detail
|
||||
) VALUES ($1, 'a2a_receive', 'delegate_result', $2, $3, $4::jsonb, $5::jsonb, $6, NULLIF($7, ''))
|
||||
`, sourceID, sourceID, summary, string(reqJSON), string(respJSON), logStatus, errorDetail); err != nil {
|
||||
log.Printf("Delegation %s: inbox-push insert failed: %v", delegationID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Delegation status lifecycle:
|
||||
// pending → dispatched → received → in_progress → completed | failed
|
||||
//
|
||||
@@ -206,6 +269,9 @@ func insertDelegationRow(ctx context.Context, c *gin.Context, sourceID string, b
|
||||
VALUES ($1, 'delegation', 'delegate', $2, $3, $4, $5::jsonb, 'pending', $6)
|
||||
`, sourceID, sourceID, body.TargetID, "Delegating to "+body.TargetID, string(taskJSON), idemArg)
|
||||
if err == nil {
|
||||
// RFC #2829 #318 — mirror to the durable delegations ledger
|
||||
// (gated by DELEGATION_LEDGER_WRITE; default off → no-op).
|
||||
recordLedgerInsert(ctx, sourceID, body.TargetID, delegationID, body.Task, body.IdempotencyKey)
|
||||
return insertOK
|
||||
}
|
||||
// A unique-constraint hit means a concurrent request just took the
|
||||
@@ -289,6 +355,8 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s
|
||||
h.broadcaster.RecordAndBroadcast(ctx, "DELEGATION_FAILED", sourceID, map[string]interface{}{
|
||||
"delegation_id": delegationID, "target_id": targetID, "error": proxyErr.Error(),
|
||||
})
|
||||
// RFC #2829 PR-2 result-push (see UpdateStatus for rationale).
|
||||
pushDelegationResultToInbox(ctx, sourceID, delegationID, "failed", "", proxyErr.Error())
|
||||
return
|
||||
}
|
||||
|
||||
@@ -343,17 +411,28 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s
|
||||
log.Printf("Delegation %s: failed to insert success log: %v", delegationID, err)
|
||||
}
|
||||
|
||||
// RFC #2829 #318: write the ledger row with result_preview FIRST,
|
||||
// THEN updateDelegationStatus. Order matters: SetStatus has a
|
||||
// same-status replay no-op — if updateDelegationStatus's nested
|
||||
// recordLedgerStatus(completed, "", "") fires first, the outer call
|
||||
// hits the no-op branch and result_preview is never written.
|
||||
// Caught by the local-Postgres integration test in
|
||||
// delegation_ledger_integration_test.go.
|
||||
recordLedgerStatus(ctx, delegationID, "completed", "", responseText)
|
||||
h.updateDelegationStatus(sourceID, delegationID, "completed", "")
|
||||
h.broadcaster.RecordAndBroadcast(ctx, "DELEGATION_COMPLETE", sourceID, map[string]interface{}{
|
||||
"delegation_id": delegationID,
|
||||
"target_id": targetID,
|
||||
"response_preview": truncate(responseText, 200),
|
||||
})
|
||||
// RFC #2829 PR-2 result-push (see UpdateStatus for rationale).
|
||||
pushDelegationResultToInbox(ctx, sourceID, delegationID, "completed", responseText, "")
|
||||
}
|
||||
|
||||
// updateDelegationStatus updates the status of a delegation record in activity_logs.
|
||||
func (h *DelegationHandler) updateDelegationStatus(workspaceID, delegationID, status, errorDetail string) {
|
||||
if _, err := db.DB.ExecContext(context.Background(), `
|
||||
ctx := context.Background()
|
||||
if _, err := db.DB.ExecContext(ctx, `
|
||||
UPDATE activity_logs
|
||||
SET status = $1, error_detail = CASE WHEN $2 = '' THEN error_detail ELSE $2 END
|
||||
WHERE workspace_id = $3
|
||||
@@ -362,6 +441,14 @@ func (h *DelegationHandler) updateDelegationStatus(workspaceID, delegationID, st
|
||||
`, status, errorDetail, workspaceID, delegationID); err != nil {
|
||||
log.Printf("Delegation %s: status update failed: %v", delegationID, err)
|
||||
}
|
||||
// RFC #2829 #318 — mirror status transition to the durable ledger
|
||||
// (gated). Note: the ledger uses different vocabulary for "pending"
|
||||
// (its initial state is `queued`); map "received" / unknown values
|
||||
// the ledger doesn't accept by skipping them rather than failing.
|
||||
switch status {
|
||||
case "queued", "dispatched", "in_progress", "completed", "failed", "stuck":
|
||||
recordLedgerStatus(ctx, delegationID, status, errorDetail, "")
|
||||
}
|
||||
}
|
||||
|
||||
// Record handles POST /workspaces/:id/delegations/record — the agent-initiated
|
||||
@@ -407,6 +494,15 @@ func (h *DelegationHandler) Record(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// RFC #2829 #318 — mirror to durable ledger (gated). Record always
|
||||
// reflects an A2A request the agent already fired itself, so the
|
||||
// initial activity_logs status is 'dispatched' — but the ledger's
|
||||
// CHECK constraint only accepts 'queued' as the initial state via
|
||||
// Insert. Insert as queued first; the very next SetStatus(...,
|
||||
// dispatched) below promotes it to dispatched on the same row.
|
||||
recordLedgerInsert(ctx, sourceID, body.TargetID, body.DelegationID, body.Task, "")
|
||||
recordLedgerStatus(ctx, body.DelegationID, "dispatched", "", "")
|
||||
|
||||
h.broadcaster.RecordAndBroadcast(ctx, "DELEGATION_SENT", sourceID, map[string]interface{}{
|
||||
"delegation_id": body.DelegationID,
|
||||
"target_id": body.TargetID,
|
||||
@@ -442,6 +538,13 @@ func (h *DelegationHandler) UpdateStatus(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// RFC #2829 #318 — same ordering pin as executeDelegation completion:
|
||||
// write the with-preview ledger row FIRST so updateDelegationStatus's
|
||||
// inner same-status no-op doesn't clobber preview.
|
||||
if body.Status == "completed" {
|
||||
recordLedgerStatus(ctx, delegationID, "completed", "", body.ResponsePreview)
|
||||
}
|
||||
|
||||
h.updateDelegationStatus(sourceID, delegationID, body.Status, body.Error)
|
||||
|
||||
if body.Status == "completed" {
|
||||
@@ -459,11 +562,19 @@ func (h *DelegationHandler) UpdateStatus(c *gin.Context) {
|
||||
"delegation_id": delegationID,
|
||||
"response_preview": truncate(body.ResponsePreview, 200),
|
||||
})
|
||||
// RFC #2829 PR-2 result-push: when the gate is on, also write an
|
||||
// a2a_receive row so the caller's inbox poller surfaces this to
|
||||
// the agent. Foundational for getting rid of the proxy-blocked
|
||||
// sync path that hits the 600s message/send timeout — once the
|
||||
// agent-side cutover lands, the caller polls its own inbox for
|
||||
// the result instead of holding open an HTTP connection.
|
||||
pushDelegationResultToInbox(ctx, sourceID, delegationID, "completed", body.ResponsePreview, "")
|
||||
} else {
|
||||
h.broadcaster.RecordAndBroadcast(ctx, "DELEGATION_FAILED", sourceID, map[string]interface{}{
|
||||
"delegation_id": delegationID,
|
||||
"error": body.Error,
|
||||
})
|
||||
pushDelegationResultToInbox(ctx, sourceID, delegationID, "failed", "", body.Error)
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"status": body.Status, "delegation_id": delegationID})
|
||||
|
||||
@@ -0,0 +1,246 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// delegation_inbox_push_test.go — coverage for the RFC #2829 PR-2
|
||||
// result-push behavior. The push is feature-flagged via
|
||||
// DELEGATION_RESULT_INBOX_PUSH=1; default off keeps the existing
|
||||
// strict-sqlmock test surface unchanged.
|
||||
//
|
||||
// What we pin:
|
||||
// 1. Flag off (default) → no a2a_receive INSERT fires.
|
||||
// 2. Flag on, status=completed → a2a_receive row written with the
|
||||
// response_preview and no error_detail.
|
||||
// 3. Flag on, status=failed → a2a_receive row written with status=error
|
||||
// and the error_detail set.
|
||||
// 4. INSERT failure on inbox-push does NOT bubble up — UpdateStatus
|
||||
// still returns 200.
|
||||
|
||||
// ---------- pushDelegationResultToInbox in isolation ----------
|
||||
|
||||
func TestPushDelegationResultToInbox_FlagOff_NoSQL(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
t.Setenv("DELEGATION_RESULT_INBOX_PUSH", "")
|
||||
|
||||
pushDelegationResultToInbox(
|
||||
context.Background(),
|
||||
"caller", "deleg-1", "completed", "answer body", "",
|
||||
)
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("flag off must not fire SQL: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPushDelegationResultToInbox_FlagOn_CompletedInsertsA2AReceiveRow(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
t.Setenv("DELEGATION_RESULT_INBOX_PUSH", "1")
|
||||
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WithArgs(
|
||||
"caller-ws",
|
||||
"caller-ws", // source_id mirrors workspace_id
|
||||
"Delegation result delivered",
|
||||
sqlmock.AnyArg(), // request_body json
|
||||
sqlmock.AnyArg(), // response_body json
|
||||
"ok",
|
||||
"", // error_detail empty for completed
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
pushDelegationResultToInbox(
|
||||
context.Background(),
|
||||
"caller-ws", "deleg-1", "completed", "answer body", "",
|
||||
)
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPushDelegationResultToInbox_FlagOn_FailedInsertsErrorRow(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
t.Setenv("DELEGATION_RESULT_INBOX_PUSH", "1")
|
||||
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WithArgs(
|
||||
"caller-ws",
|
||||
"caller-ws",
|
||||
"Delegation failed",
|
||||
sqlmock.AnyArg(),
|
||||
sqlmock.AnyArg(),
|
||||
"error",
|
||||
"target unreachable",
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
pushDelegationResultToInbox(
|
||||
context.Background(),
|
||||
"caller-ws", "deleg-2", "failed", "", "target unreachable",
|
||||
)
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- UpdateStatus end-to-end ----------
|
||||
|
||||
func TestUpdateStatus_FlagOn_PushesA2AReceiveOnCompleted(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
t.Setenv("DELEGATION_RESULT_INBOX_PUSH", "1")
|
||||
|
||||
broadcaster := newTestBroadcaster()
|
||||
wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
dh := NewDelegationHandler(wh, broadcaster)
|
||||
|
||||
// 1. updateDelegationStatus — UPDATE activity_logs SET status='completed'
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("completed", "", "ws-source", "deleg-9").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// 2. existing delegate_result INSERT (caller-side dashboard view)
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WithArgs(
|
||||
"ws-source", "ws-source",
|
||||
sqlmock.AnyArg(), // summary
|
||||
sqlmock.AnyArg(), // response_body
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// 3. NEW: PR-2 a2a_receive row for inbox-poller
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WithArgs(
|
||||
"ws-source", "ws-source",
|
||||
"Delegation result delivered",
|
||||
sqlmock.AnyArg(),
|
||||
sqlmock.AnyArg(),
|
||||
"ok",
|
||||
"",
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{
|
||||
{Key: "id", Value: "ws-source"},
|
||||
{Key: "delegation_id", Value: "deleg-9"},
|
||||
}
|
||||
body := `{"status":"completed","response_preview":"all done"}`
|
||||
c.Request = httptest.NewRequest("POST",
|
||||
"/workspaces/ws-source/delegations/deleg-9/update",
|
||||
bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
dh.UpdateStatus(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateStatus_FlagOn_PushesA2AReceiveOnFailed(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
t.Setenv("DELEGATION_RESULT_INBOX_PUSH", "1")
|
||||
|
||||
broadcaster := newTestBroadcaster()
|
||||
wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
dh := NewDelegationHandler(wh, broadcaster)
|
||||
|
||||
// 1. updateDelegationStatus — UPDATE activity_logs
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("failed", "boom", "ws-source", "deleg-10").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// 2. NEW: PR-2 a2a_receive row for inbox-poller (failure path doesn't
|
||||
// have the existing delegate_result INSERT — only the new push).
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WithArgs(
|
||||
"ws-source", "ws-source",
|
||||
"Delegation failed",
|
||||
sqlmock.AnyArg(),
|
||||
sqlmock.AnyArg(),
|
||||
"error",
|
||||
"boom",
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{
|
||||
{Key: "id", Value: "ws-source"},
|
||||
{Key: "delegation_id", Value: "deleg-10"},
|
||||
}
|
||||
body := `{"status":"failed","error":"boom"}`
|
||||
c.Request = httptest.NewRequest("POST",
|
||||
"/workspaces/ws-source/delegations/deleg-10/update",
|
||||
bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
dh.UpdateStatus(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d", w.Code)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestUpdateStatus_FlagOff_NoNewSQL — sanity check that the existing
|
||||
// behavior is preserved when the flag is off. Critical for safe rollout.
|
||||
func TestUpdateStatus_FlagOff_NoNewSQL(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
// explicitly empty — flag off
|
||||
t.Setenv("DELEGATION_RESULT_INBOX_PUSH", "")
|
||||
|
||||
broadcaster := newTestBroadcaster()
|
||||
wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
dh := NewDelegationHandler(wh, broadcaster)
|
||||
|
||||
// Only the two pre-existing queries — no third (a2a_receive) INSERT.
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("completed", "", "ws-source", "deleg-11").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
WithArgs(
|
||||
"ws-source", "ws-source",
|
||||
sqlmock.AnyArg(),
|
||||
sqlmock.AnyArg(),
|
||||
).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{
|
||||
{Key: "id", Value: "ws-source"},
|
||||
{Key: "delegation_id", Value: "deleg-11"},
|
||||
}
|
||||
c.Request = httptest.NewRequest("POST",
|
||||
"/workspaces/ws-source/delegations/deleg-11/update",
|
||||
bytes.NewBufferString(`{"status":"completed","response_preview":"ok"}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
dh.UpdateStatus(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d", w.Code)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("flag-off must not fire extra SQL: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,222 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
)
|
||||
|
||||
// delegation_ledger.go — durable per-task ledger for A2A delegation
|
||||
// (RFC #2829 PR-1).
|
||||
//
|
||||
// activity_logs is an event stream — one row per state transition. Replaying
|
||||
// the stream gives you history. This file's table (delegations) is the
|
||||
// folded current state — one row per delegation_id with a single status,
|
||||
// last_heartbeat, deadline, and result_preview.
|
||||
//
|
||||
// Why both: PR-3 needs a sweeper that joins on
|
||||
// (status='in_progress' AND last_heartbeat < now() - interval '10 minutes')
|
||||
// which is impossible to express against the event stream without a window
|
||||
// function over every (delegation_id, latest event) pair — a planner-killing
|
||||
// query at scale. The dedicated table makes the sweeper an indexed scan.
|
||||
//
|
||||
// Writes go to BOTH tables. activity_logs remains the audit-grade record
|
||||
// for forensics; delegations is the queryable view for dashboards + sweeper
|
||||
// joins. Symmetric-write pattern — same posture as tenant_resources (PR
|
||||
// #2343), per memory `reference_tenant_resources_audit`.
|
||||
|
||||
// DelegationLedger writes the per-task durable row alongside the existing
|
||||
// activity_logs event-stream writes. All methods are best-effort: a ledger
|
||||
// write failure logs but does NOT propagate up — activity_logs remains the
|
||||
// audit-grade source of truth.
|
||||
//
|
||||
// Same shape as `tenant_resources` reconciler (PR #2343): orchestration
|
||||
// continues even when the ledger write fails, and the next status update
|
||||
// (or PR-3 reconciler) will heal the ledger.
|
||||
type DelegationLedger struct {
|
||||
db *sql.DB
|
||||
}
|
||||
|
||||
// NewDelegationLedger returns a ledger backed by the package db handle.
|
||||
// Tests can construct one with a sqlmock-backed *sql.DB.
|
||||
func NewDelegationLedger(handle *sql.DB) *DelegationLedger {
|
||||
if handle == nil {
|
||||
handle = db.DB
|
||||
}
|
||||
return &DelegationLedger{db: handle}
|
||||
}
|
||||
|
||||
// truncatePreview caps stored preview at 4KB. The full prompt/response is
|
||||
// already in activity_logs.{request,response}_body — this is the at-a-glance
|
||||
// view for the dashboard, not a forensic record.
|
||||
//
|
||||
// Rune-safe: previous byte-slice form (s[:previewCap]) split on a byte
|
||||
// boundary, which on a multi-byte codepoint at byte 4096 produced
|
||||
// invalid UTF-8 — Postgres JSONB rejects → ledger row not inserted →
|
||||
// audit gap. Issue #2962. Walks the string by rune, stops at the last
|
||||
// rune-boundary index that fits inside the cap. ASCII-only strings hit
|
||||
// the cap exactly; CJK/emoji strings stop slightly under the cap,
|
||||
// never over.
|
||||
//
|
||||
// Mirrors the truncatePreviewRunes fix from agent_message_writer.go
|
||||
// (#2959). Both call sites should consume a shared helper after both
|
||||
// fixes have landed — followup deduplication tracked in #2962's body.
|
||||
const previewCap = 4096
|
||||
|
||||
func truncatePreview(s string) string {
|
||||
if len(s) <= previewCap {
|
||||
return s
|
||||
}
|
||||
// Range over a string yields rune-boundary byte indices. Walk
|
||||
// until the next index would exceed previewCap; the previous
|
||||
// index is the safe truncation point.
|
||||
end := 0
|
||||
for i := range s {
|
||||
if i > previewCap {
|
||||
break
|
||||
}
|
||||
end = i
|
||||
}
|
||||
return s[:end]
|
||||
}
|
||||
|
||||
// InsertOpts is the agent's record-of-intent. Caller, callee, task preview,
|
||||
// and the chosen delegation_id are required; idempotency_key is optional.
|
||||
type InsertOpts struct {
|
||||
DelegationID string
|
||||
CallerID string
|
||||
CalleeID string
|
||||
TaskPreview string
|
||||
IdempotencyKey string // empty → NULL
|
||||
// Deadline defaults to now + 6h when zero. Callers can pass a tighter
|
||||
// per-task deadline (cron, interactive request) by setting it.
|
||||
Deadline time.Time
|
||||
}
|
||||
|
||||
// Insert writes the queued row. ON CONFLICT (delegation_id) DO NOTHING so
|
||||
// the agent's retry-on-restart codepath is naturally idempotent — a duplicate
|
||||
// Insert with the same delegation_id is a no-op. (Idempotency_key dedupe is
|
||||
// a separate UNIQUE index handled by the same DO NOTHING.)
|
||||
func (l *DelegationLedger) Insert(ctx context.Context, opts InsertOpts) {
|
||||
if opts.DelegationID == "" || opts.CallerID == "" || opts.CalleeID == "" {
|
||||
log.Printf("delegation_ledger Insert: missing required field, skipping")
|
||||
return
|
||||
}
|
||||
deadline := opts.Deadline
|
||||
if deadline.IsZero() {
|
||||
deadline = time.Now().Add(6 * time.Hour)
|
||||
}
|
||||
idemArg := sql.NullString{String: opts.IdempotencyKey, Valid: opts.IdempotencyKey != ""}
|
||||
_, err := l.db.ExecContext(ctx, `
|
||||
INSERT INTO delegations (
|
||||
delegation_id, caller_id, callee_id, task_preview,
|
||||
status, deadline, idempotency_key
|
||||
) VALUES ($1, $2, $3, $4, 'queued', $5, $6)
|
||||
ON CONFLICT (delegation_id) DO NOTHING
|
||||
`, opts.DelegationID, opts.CallerID, opts.CalleeID,
|
||||
truncatePreview(opts.TaskPreview), deadline, idemArg)
|
||||
if err != nil {
|
||||
log.Printf("delegation_ledger Insert(%s): %v", opts.DelegationID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// allowedTransitions enforces the lifecycle in code as defense-in-depth on
|
||||
// the schema CHECK. Terminal states (completed, failed, stuck) reject any
|
||||
// further status update — once a delegation is done, it stays done.
|
||||
//
|
||||
// The "queued → in_progress" jump (skipping dispatched) is allowed: lazy
|
||||
// callers that don't ack the dispatched stage shouldn't be penalised,
|
||||
// since the agent ultimately cares about whether work started, not which
|
||||
// HTTP layer happened to ack first.
|
||||
var allowedTransitions = map[string]map[string]bool{
|
||||
"queued": {"dispatched": true, "in_progress": true, "failed": true},
|
||||
"dispatched": {"in_progress": true, "completed": true, "failed": true},
|
||||
"in_progress": {"completed": true, "failed": true, "stuck": true},
|
||||
}
|
||||
|
||||
// ErrInvalidTransition is returned by SetStatus when the transition would
|
||||
// move out of a terminal state. Callers SHOULD ignore (it's a duplicate
|
||||
// terminal write) but they're surfaced for tests.
|
||||
var ErrInvalidTransition = errors.New("delegation ledger: invalid status transition")
|
||||
|
||||
// SetStatus is the catch-all updater. Status MUST be one of the lifecycle
|
||||
// values. errorDetail is non-empty only for failed/stuck. resultPreview is
|
||||
// non-empty only for completed.
|
||||
//
|
||||
// Idempotent: re-applying the same terminal status with the same payload
|
||||
// returns nil; transitioning back out of a terminal state returns
|
||||
// ErrInvalidTransition. (Forward-only protection — once 'completed' you
|
||||
// don't get to revise to 'failed'.)
|
||||
func (l *DelegationLedger) SetStatus(ctx context.Context,
|
||||
delegationID, status, errorDetail, resultPreview string,
|
||||
) error {
|
||||
if delegationID == "" || status == "" {
|
||||
return errors.New("delegation ledger: missing required field")
|
||||
}
|
||||
|
||||
// Read current status to validate the transition. We accept the rare
|
||||
// race where two updaters both observe the same prior status — Postgres
|
||||
// CHECK constraint catches truly-invalid status values; our forward-only
|
||||
// check is best-effort.
|
||||
var current string
|
||||
err := l.db.QueryRowContext(ctx,
|
||||
`SELECT status FROM delegations WHERE delegation_id = $1`,
|
||||
delegationID,
|
||||
).Scan(¤t)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
// Insert was lost or wasn't called. Defensively NO-OP — the next
|
||||
// agent retry will re-Insert and the next SetStatus will land.
|
||||
log.Printf("delegation_ledger SetStatus(%s, %s): row missing, skipping",
|
||||
delegationID, status)
|
||||
return nil
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Same-status replay (e.g. duplicate completion notification): no-op,
|
||||
// don't bump updated_at, no error.
|
||||
if current == status {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Forward-only on terminal states.
|
||||
if next, ok := allowedTransitions[current]; !ok || !next[status] {
|
||||
// Terminal already — refuse to revise.
|
||||
return ErrInvalidTransition
|
||||
}
|
||||
|
||||
_, err = l.db.ExecContext(ctx, `
|
||||
UPDATE delegations
|
||||
SET status = $2,
|
||||
error_detail = NULLIF($3, ''),
|
||||
result_preview = NULLIF($4, ''),
|
||||
updated_at = now()
|
||||
WHERE delegation_id = $1
|
||||
`, delegationID, status, errorDetail, truncatePreview(resultPreview))
|
||||
return err
|
||||
}
|
||||
|
||||
// Heartbeat stamps last_heartbeat = now() for an in-flight delegation. Used
|
||||
// by the callee whenever it makes progress; PR-3's sweeper compares to
|
||||
// NOW() to decide stuckness. No-op on terminal-state delegations.
|
||||
//
|
||||
// Best-effort: failure logs but doesn't propagate.
|
||||
func (l *DelegationLedger) Heartbeat(ctx context.Context, delegationID string) {
|
||||
if delegationID == "" {
|
||||
return
|
||||
}
|
||||
_, err := l.db.ExecContext(ctx, `
|
||||
UPDATE delegations
|
||||
SET last_heartbeat = now(), updated_at = now()
|
||||
WHERE delegation_id = $1
|
||||
AND status NOT IN ('completed','failed','stuck')
|
||||
`, delegationID)
|
||||
if err != nil {
|
||||
log.Printf("delegation_ledger Heartbeat(%s): %v", delegationID, err)
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user