Merge pull request 'ci: verify publish-runtime pipeline end-to-end (internal#327)' (#560 ) from ci/558-verify-publish-runtime-marker into main

ci: verify publish-runtime pipeline end-to-end (internal#327)
Marker file triggers workspace/** path filter on publish-runtime-autobump.yml, exercising the full runtime publish pipeline after publish-runtime-bot provisioning + stale-tag resolution. Acceptance: bump-and-tag green, tag exists, publish-runtime.yml green, PyPI updated, 9 template repos updated. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-11 20:31:31 +00:00 · 2026-05-11 20:26:55 +00:00 · 2026-05-11 20:15:31 +00:00 · 2026-05-11 20:04:19 +00:00 · 2026-05-11 19:56:36 +00:00 · 2026-05-11 12:47:51 -07:00
16 changed files with 863 additions and 58 deletions
@@ -0,0 +1,203 @@
+#!/usr/bin/env bash
+# review-check — evaluate whether a PR satisfies a single team-review gate.
+#
+# RFC#324 Step 1 of 5 — qa-review + security-review check workflows.
+#
+# This is the shared evaluator invoked by:
+#   .gitea/workflows/qa-review.yml      (TEAM=qa,      TEAM_ID=20)
+#   .gitea/workflows/security-review.yml (TEAM=security, TEAM_ID=21)
+#
+# Pass condition (per RFC#324 v1.1 addendum):
+#   ≥ 1 review on the PR where:
+#     • state == APPROVED
+#     • review.dismissed == false
+#     • review.user.login != PR.user.login (non-author)
+#     • review.user.login ∈ team-members
+#
+# Strict mode (default OFF for v1; see RFC trade-off note):
+#   If REVIEW_CHECK_STRICT=1, additionally require review.commit_id == PR.head.sha.
+#   With dismiss_stale_reviews: true at the protection layer, stale reviews
+#   are already dismissed, so the additional commit_id check is belt-and-
+#   suspenders. Keeping it off in v1 simplifies semantics; flip in a follow-up
+#   PR if reviewer telemetry shows residual stale-APPROVE merges.
+#
+# Privilege gate (RFC#324 v1.3 §A1.1 — INFORMATIONAL ONLY):
+#   The /qa-recheck and /security-recheck slash-commands can be triggered
+#   by anyone who can comment on the PR. The workflow's privilege step
+#   logs collaborator-status but does NOT gate execution of this script.
+#   Why this is safe: this evaluator is read-only and idempotent —
+#   reading `pulls/{N}/reviews` and `teams/{id}/members/{u}` can't be
+#   influenced by who triggered the run. If a real team-member APPROVE
+#   exists the gate flips green; otherwise it stays red. A
+#   non-collaborator commenting /qa-recheck cannot manufacture a green
+#   gate. Original (v1.2) design with `if:`-gating of this step was
+#   fail-open (skipped-via-`if:` job still publishes the status as
+#   `success`) — corrected in v1.3 per hongming-pc review 1421.
+#
+# Trust boundary (RFC A4):
+#   This script is loaded from the BASE branch (sourced via .gitea/scripts/
+#   on the workflow's checkout-of-base). It does NOT execute any PR-HEAD
+#   code. It only reads PR review state via the Gitea API.
+#
+# Token scope (RFC A1-α):
+#   The job's own conclusion (exit 0 / exit 1) is what publishes the
+#   `qa-review / approved` / `security-review / approved` status context.
+#   NO `POST /statuses` call here → NO `write:repository` scope on the
+#   token. `read:organization` (for team-membership probe) and
+#   `read:repository` (for PR + reviews) are enough.
+#
+# Required env:
+#   GITEA_TOKEN — least-priv read:repository + read:organization. See note
+#                 below about the team-membership API requiring the token
+#                 owner to be in the queried team (Gitea 1.22.6 quirk).
+#   GITEA_HOST  — e.g. git.moleculesai.app
+#   REPO        — owner/name (from github.repository)
+#   PR_NUMBER   — int (from github.event.pull_request.number or
+#                 github.event.issue.number for issue_comment events)
+#   TEAM        — short team name (qa | security) for log lines
+#   TEAM_ID     — Gitea team id (20=qa, 21=security at time of writing)
+#
+# Optional:
+#   REVIEW_CHECK_DEBUG=1 — per-API-call diagnostic lines
+#   REVIEW_CHECK_STRICT=1 — also require review.commit_id == pr.head.sha
+
+set -euo pipefail
+
+# jq is required for JSON parsing. It is pre-baked into the runner-base
+# image (per RFC#268 workflow-smoke), so the only reason we'd not find it
+# is a broken runner. The previous fallback dance (apt-get + curl to
+# /usr/local/bin/jq) cannot succeed on a uid-1001 rootless runner
+# (#391/#402 + feedback_ci_runner_install_needs_writable_path), so it's
+# dropped. Fail loud with a clear diagnostic rather than attempt an
+# install that physically cannot work.
+if ! command -v jq >/dev/null 2>&1; then
+  echo "::error::jq missing from runner-base image — bake it into the runner image (see RFC#268 workflow-smoke / feedback_ci_runner_install_needs_writable_path). This evaluator cannot run without jq."
+  exit 1
+fi
+
+: "${GITEA_TOKEN:?GITEA_TOKEN required}"
+: "${GITEA_HOST:?GITEA_HOST required}"
+: "${REPO:?REPO required (owner/name)}"
+: "${PR_NUMBER:?PR_NUMBER required}"
+: "${TEAM:?TEAM required (qa|security)}"
+: "${TEAM_ID:?TEAM_ID required (integer)}"
+
+OWNER="${REPO%%/*}"
+NAME="${REPO##*/}"
+API="https://${GITEA_HOST}/api/v1"
+
+# Token-in-argv fix (#541): write the Authorization header to a mode-600
+# temp file instead of passing it via curl -H "$AUTH" (which puts the
+# secret token value in the process table for any process to read via
+# /proc/<pid>/cmdline or ps -ef). The curl config file is read by curl
+# itself and never appears in the argv of the curl subprocess.
+CURL_AUTH_FILE=$(mktemp -p /tmp curl-auth.XXXXXX)
+chmod 600 "$CURL_AUTH_FILE"
+printf 'header = "Authorization: token %s"\n' "$GITEA_TOKEN" > "$CURL_AUTH_FILE"
+
+# Pre-create temp files so cleanup trap can reference them by name
+# (bash trap 'function' EXIT expands variables at trap-fire time, not def time).
+PR_JSON=$(mktemp)
+REVIEWS_JSON=$(mktemp)
+TEAM_PROBE_TMP=$(mktemp)
+
+cleanup() {
+  rm -f "$CURL_AUTH_FILE" "$PR_JSON" "$REVIEWS_JSON" "$TEAM_PROBE_TMP"
+}
+trap cleanup EXIT
+
+debug() {
+  if [ "${REVIEW_CHECK_DEBUG:-}" = "1" ]; then
+    echo "  [debug] $*" >&2
+  fi
+}
+
+echo "::notice::${TEAM}-review evaluating repo=${OWNER}/${NAME} pr=${PR_NUMBER} team_id=${TEAM_ID}"
+
+# --- Fetch the PR (for author + head.sha) ---
+HTTP_CODE=$(curl -sS -o "$PR_JSON" -w '%{http_code}' \
+  -K "$CURL_AUTH_FILE" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
+if [ "$HTTP_CODE" != "200" ]; then
+  echo "::error::GET /pulls/${PR_NUMBER} returned HTTP ${HTTP_CODE} (token scope?)"
+  cat "$PR_JSON" >&2
+  exit 1
+fi
+PR_AUTHOR=$(jq -r '.user.login // ""' "$PR_JSON")
+PR_HEAD_SHA=$(jq -r '.head.sha // ""' "$PR_JSON")
+PR_STATE=$(jq -r '.state // ""' "$PR_JSON")
+debug "pr_author=${PR_AUTHOR} pr_head=${PR_HEAD_SHA:0:7} pr_state=${PR_STATE}"
+
+if [ "$PR_STATE" != "open" ]; then
+  echo "::notice::PR ${PR_NUMBER} is ${PR_STATE} — exiting 0 (closed PRs do not gate)"
+  exit 0
+fi
+if [ -z "$PR_AUTHOR" ] || [ -z "$PR_HEAD_SHA" ]; then
+  echo "::error::PR ${PR_NUMBER} missing user.login or head.sha — webhook payload malformed"
+  exit 1
+fi
+
+# --- Fetch all reviews on the PR ---
+HTTP_CODE=$(curl -sS -o "$REVIEWS_JSON" -w '%{http_code}' \
+  -K "$CURL_AUTH_FILE" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/reviews")
+if [ "$HTTP_CODE" != "200" ]; then
+  echo "::error::GET /pulls/${PR_NUMBER}/reviews returned HTTP ${HTTP_CODE}"
+  cat "$REVIEWS_JSON" >&2
+  exit 1
+fi
+
+# Filter: state=APPROVED, not-dismissed, non-author. Optionally strict-mode
+# adds commit_id==head.sha (off by default; see header).
+JQ_FILTER='.[]
+  | select(.state == "APPROVED")
+  | select(.dismissed != true)
+  | select(.user.login != $author)'
+if [ "${REVIEW_CHECK_STRICT:-}" = "1" ]; then
+  JQ_FILTER="${JQ_FILTER}
+  | select(.commit_id == \$head)"
+fi
+JQ_FILTER="${JQ_FILTER}
+  | .user.login"
+
+CANDIDATES=$(jq -r --arg author "$PR_AUTHOR" --arg head "$PR_HEAD_SHA" "$JQ_FILTER" "$REVIEWS_JSON" | sort -u)
+debug "candidate non-author approvers: $(echo "$CANDIDATES" | tr '\n' ' ')"
+
+if [ -z "$CANDIDATES" ]; then
+  echo "::error::${TEAM}-review awaiting non-author APPROVE from ${TEAM} team (no candidates yet)"
+  exit 1
+fi
+
+# --- Probe team membership per candidate ---
+# Endpoint: GET /api/v1/teams/{id}/members/{username}
+#   200/204 → is member
+#   403     → token owner is not in this team (Gitea 1.22.6 'Must be a team
+#             member' constraint — see follow-up issue for token-provisioning)
+#   404     → not a member
+for U in $CANDIDATES; do
+  CODE=$(curl -sS -o "$TEAM_PROBE_TMP" -w '%{http_code}' \
+    -K "$CURL_AUTH_FILE" "${API}/teams/${TEAM_ID}/members/${U}")
+  debug "probe ${U} in team ${TEAM} (id=${TEAM_ID}) → HTTP ${CODE}"
+  case "$CODE" in
+    200|204)
+      echo "::notice::${TEAM}-review APPROVED by ${U} (team=${TEAM})"
+      exit 0
+      ;;
+    403)
+      # Token owner is not in the team being probed; the API refuses to
+      # confirm membership. This is the RFC#324 follow-up token-scope gap.
+      # Fail closed — never grant approval on a 403; surface clearly.
+      echo "::error::team-probe for ${U} in ${TEAM} returned 403 (token owner not in ${TEAM} team — RFC#324 token-scope follow-up). Cannot confirm membership; failing closed."
+      cat "$TEAM_PROBE_TMP" >&2
+      exit 1
+      ;;
+    404)
+      debug "${U} not a member of ${TEAM}"
+      ;;
+    *)
+      echo "::warning::team-probe for ${U} in ${TEAM} returned unexpected HTTP ${CODE}"
+      cat "$TEAM_PROBE_TMP" >&2
+      ;;
+  esac
+done
+
+echo "::error::${TEAM}-review awaiting non-author APPROVE from ${TEAM} team (candidates: $(echo "$CANDIDATES" | tr '\n' ',' | sed 's/,$//') — none are in team)"
+exit 1
@@ -77,13 +77,18 @@ jobs:
        run: python -m pip install --quiet 'PyYAML==6.0.2'
      - name: Run drift detector
        env:
-          # GITEA_TOKEN reads protection + writes issues. molecule-core
-          # uses `SOP_TIER_CHECK_TOKEN` as the org-level secret name for
-          # read-only Gitea API access from CI (set by audit-force-merge
-          # and sop-tier-check too). Falls back to the auto-injected
-          # GITHUB_TOKEN if the org-level secret isn't set
-          # (transitional repos).
-          GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
+          # DRIFT_BOT_TOKEN is owned by mc-drift-bot, a least-privilege
+          # Gitea persona whose ONLY job is reading branch_protections
+          # and posting the [ci-drift] tracking issue. The endpoint
+          # `GET /repos/.../branch_protections/{branch}` requires
+          # repo-ADMIN role (Gitea 1.22.6) — SOP_TIER_CHECK_TOKEN and the
+          # auto-injected GITHUB_TOKEN do NOT have it (read-only / write
+          # without admin), so the previous fallback chain 403'd.
+          # Mirrors the controlplane fix landed in CP PR#134.
+          # Provisioning trail: internal#329 (audit) + parent pattern
+          # internal#327 (publish-runtime-bot). Per
+          # `feedback_per_agent_gitea_identity_default`.
+          GITEA_TOKEN: ${{ secrets.DRIFT_BOT_TOKEN }}
          GITEA_HOST: git.moleculesai.app
          REPO: ${{ github.repository }}
          # Branches whose protection we compare against. molecule-core
@@ -451,3 +451,77 @@ jobs:
            echo "      adjusting the floor with rationale in COVERAGE_FLOOR.md."
            exit 1
          fi
+
+  all-required:
+    # Aggregator sentinel — RFC internal#219 §2 (Phase 4 — closes internal#286).
+    #
+    # Single stable required-status name that branch protection points at;
+    # CI churns underneath in `needs:` without any protection edits. Mirrors
+    # the molecule-controlplane Phase 2a impl shipped in CP PR#112 and
+    # referenced by `internal#286` ("Phase 4 is a single small PR... mirrors
+    # CP's existing one").
+    #
+    # Closes the failure mode where status_check_contexts on molecule-core/main
+    # only listed `Secret scan` + `sop-tier-check` (the 2 meta-gates), so real
+    # `Platform (Go)` / `Canvas (Next.js)` / `Python Lint & Test` / `Shellcheck`
+    # red silently merged through. See internal#286 for the three concrete
+    # tonight-of-2026-05-11 incidents that prompted the emergency bump.
+    #
+    # Three properties of this job each close a failure mode:
+    #
+    #  1. `if: always()` — runs even when an upstream fails. Without it the
+    #     sentinel is `skipped` and protection treats that as missing → merge
+    #     ungated.
+    #
+    #  2. Assertion is `result == "success"` per dep, NOT `!= "failure"`.
+    #     A `skipped` upstream (job gated by `if:` evaluating false, matrix
+    #     entry that couldn't run) must NOT silently pass through.
+    #     `skipped`-as-green is exactly the failure mode this gate closes.
+    #
+    #  3. `needs:` is the canonical list of "what counts as required."
+    #     status_check_contexts will reference only `ci/all-required` (Step 5
+    #     follow-up — branch-protection PATCH is Owners-tier per
+    #     `feedback_never_admin_merge_bypass`, separate PR); a new job is
+    #     added simply by listing it in `needs:` here.
+    #     `.gitea/workflows/ci-required-drift.yml` files a [ci-drift] issue
+    #     hourly if this list diverges from status_check_contexts or from
+    #     audit-force-merge.yml's REQUIRED_CHECKS env (RFC §4 + §6).
+    #
+    # Excluded from `needs:`: `canvas-deploy-reminder` — gated by
+    # `if: ... github.event_name == 'push' && github.ref == 'refs/heads/main'`,
+    # so on PR events it's legitimately `skipped`. The drift detector
+    # explicitly excludes `github.event_name`-gated jobs from F1 (see
+    # `.gitea/scripts/ci-required-drift.py::ci_job_names`).
+    #
+    # NOTE: `continue-on-error: true` is intentionally NOT set here — Phase 3
+    # (parent PR for ci.yml port, RFC §1) sets it on the underlying build
+    # jobs to surface defects without blocking. The sentinel itself must
+    # hard-fail; that's the whole point.
+    runs-on: ubuntu-latest
+    timeout-minutes: 1
+    needs:
+      - changes
+      - platform-build
+      - canvas-build
+      - shellcheck
+      - python-lint
+    if: always()
+    steps:
+      - name: Assert every required dependency succeeded
+        run: |
+          set -euo pipefail
+          # `needs.*.result` is one of: success | failure | cancelled | skipped
+          # We assert success per dep (not != failure) — see RFC §2 reasoning above.
+          results='${{ toJSON(needs) }}'
+          echo "$results"
+          echo "$results" | python3 -c '
+          import json, sys
+          ns = json.load(sys.stdin)
+          bad = [(k, v.get("result")) for k, v in ns.items() if v.get("result") != "success"]
+          if bad:
+              print(f"FAIL: jobs not green:", file=sys.stderr)
+              for k, r in bad:
+                  print(f"  - {k}: {r}", file=sys.stderr)
+              sys.exit(1)
+          print(f"OK: all {len(ns)} required jobs succeeded")
+          '
@@ -40,7 +40,12 @@ jobs:
    runs-on: ubuntu-latest
    continue-on-error: true  # Never block on our own detector failing
    steps:
-      - name: Check out base branch (for the script)
+      - name: Check out BASE ref (never PR-head under pull_request_target)
+        # pull_request_target runs with repo secrets-context, so checking out
+        # the PR HEAD would execute PR-branch gate_check.py with secrets.
+        # Fix: always load gate_check.py from the trusted base/default ref.
+        # Bug-1 (self-loop exclusion) + Bug-3 (403→exit0) from #547 are
+        # kept; only this checkout-ref regresses to pre-#547 behavior.
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
        with:
          ref: ${{ github.event.pull_request.base.sha || github.ref_name }}
@@ -115,6 +115,11 @@ jobs:
      # Build + push platform image (inline ECR auth — mirrors the operator-host
      # approach; credentials come from GITHUB_SECRET_AWS_ACCESS_KEY_ID /
      # GITHUB_SECRET_AWS_SECRET_ACCESS_KEY in Gitea Actions).
+      # docker buildx bake / build required for `imagetools inspect` digest
+      # capture in the CP pin-update step (RFC internal#229 §X step 4 PR-1).
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd  # v4.0.0
+
      - name: Build & push platform image to ECR (staging-<sha> + staging-latest)
        env:
          IMAGE_NAME: ${{ env.IMAGE_NAME }}
@@ -130,17 +135,16 @@ jobs:
          ECR_REGISTRY="${IMAGE_NAME%%/*}"
          aws ecr get-login-password --region us-east-2 | \
            docker login --username AWS --password-stdin "${ECR_REGISTRY}"
-          docker build \
+          docker buildx build \
            --file ./workspace-server/Dockerfile \
            --build-arg GIT_SHA="${GIT_SHA}" \
-            --label "org.opencontainers.image.source=https://github.com/${REPO}" \
+            --label "org.opencontainers.image.source=https://git.moleculesai.app/molecule-ai/${REPO}" \
            --label "org.opencontainers.image.revision=${GIT_SHA}" \
-            --label "org.opencontainers.image.description=Molecule AI platform — pending canary verify" \
+            --label "org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
+            --label "molecule.workflow.run_id=${GITHUB_RUN_ID}" \
            --tag "${IMAGE_NAME}:${TAG_SHA}" \
            --tag "${IMAGE_NAME}:${TAG_LATEST}" \
-            .
-          docker push "${IMAGE_NAME}:${TAG_SHA}"
-          docker push "${IMAGE_NAME}:${TAG_LATEST}"
+            --push .

      # Build + push tenant image (Go platform + Next.js canvas in one image).
      - name: Build & push tenant image to ECR (staging-<sha> + staging-latest)
@@ -158,15 +162,14 @@ jobs:
          ECR_REGISTRY="${TENANT_IMAGE_NAME%%/*}"
          aws ecr get-login-password --region us-east-2 | \
            docker login --username AWS --password-stdin "${ECR_REGISTRY}"
-          docker build \
+          docker buildx build \
            --file ./workspace-server/Dockerfile.tenant \
            --build-arg NEXT_PUBLIC_PLATFORM_URL= \
            --build-arg GIT_SHA="${GIT_SHA}" \
-            --label "org.opencontainers.image.source=https://github.com/${REPO}" \
+            --label "org.opencontainers.image.source=https://git.moleculesai.app/molecule-ai/${REPO}" \
            --label "org.opencontainers.image.revision=${GIT_SHA}" \
-            --label "org.opencontainers.image.description=Molecule AI tenant platform + canvas — pending canary verify" \
+            --label "org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
+            --label "molecule.workflow.run_id=${GITHUB_RUN_ID}" \
            --tag "${TENANT_IMAGE_NAME}:${TAG_SHA}" \
            --tag "${TENANT_IMAGE_NAME}:${TAG_LATEST}" \
-            .
-          docker push "${TENANT_IMAGE_NAME}:${TAG_SHA}"
-          docker push "${TENANT_IMAGE_NAME}:${TAG_LATEST}"
+            --push .
@@ -0,0 +1,164 @@
+# qa-review — non-author APPROVE from the `qa` Gitea team required to merge.
+#
+# RFC#324 Step 1 of 5 (workflow-add). Pairs with `security-review.yml` and the
+# branch-protection flip in Step 2.
+#
+# === DESIGN (RFC#324 v1.1 addendum) ===
+#
+# A1-α (refire mechanism):
+#   Triggers on:
+#     - `pull_request_target`: opened, synchronize, reopened
+#         → initial status posts when PR opens / re-pushes
+#     - `issue_comment`: /qa-recheck slash-command on the PR
+#         → manual re-fire after a QA reviewer clicks APPROVE
+#           (Gitea 1.22.6 doesn't re-fire on pull_request_review, per
+#           go-gitea/gitea#33700 + feedback_pull_request_review_no_refire)
+#   Workflow name = `qa-review` ; job name = `approved`.
+#   The job's own pass/fail conclusion publishes the status context
+#   `qa-review / approved (<event>)` — NO `POST /statuses` call → NO
+#   write:repository token scope needed. Sidesteps internal#321 defect #2.
+#
+# A1.1 (privilege check on slash-comment — INFORMATIONAL ONLY, NOT a gate):
+#   The `issue_comment` event fires for ANY commenter, including
+#   non-collaborators. The original (v1.2) design gated the eval step
+#   behind a collaborator probe → if a non-collaborator commented
+#   /qa-recheck, the eval was `if:`-skipped → the job exited 0 anyway →
+#   the status context published `success` with ZERO real APPROVE.
+#   That was a fail-open: any visitor could green the gate.
+#
+#   RFC#324 v1.3 §A1.1 correction (option b per hongming-pc 1421):
+#   drop privilege-gating of the evaluation entirely. The eval is
+#   read-only and idempotent — it reads `pulls/{N}/reviews` and
+#   `teams/{id}/members/{u}` (both API-side state that a commenter can't
+#   change). Re-running it on a non-collaborator's comment is harmless
+#   AND correct: if a real team-member APPROVE exists, the eval flips
+#   green; if not, it stays red.
+#
+#   We KEEP the privilege step as a `::notice::` log line only — useful
+#   for griefer-spotting (one operator spamming /recheck) without
+#   touching the gate. If rate-limiting is needed later, add it as a
+#   separate concern (time-window throttle, not a privilege gate).
+#
+#   We MUST NOT use `github.event.comment.author_association` (the
+#   field doesn't exist on Gitea 1.22.6 webhook payload — this was
+#   sop-tier-refire's defect #1).
+#
+# A4 (no PR-head checkout under pull_request_target):
+#   We check out the BASE ref explicitly so the review-check.sh script is
+#   loaded from trusted source. We NEVER use `ref: ${{ github.event.pull_request.head.sha }}`.
+#   No PR-head code is executed in the runner. Trust boundary preserved.
+#
+# A5 (real Gitea team):
+#   `qa` team (id=20) verified by orchestrator preflight 2026-05-11; queried
+#   at run time via /api/v1/teams/20/members/{login}.
+#
+# === TOKEN ===
+#
+# The workflow reads PR state, PR reviews, and team membership.
+# Gitea 1.22.6's /api/v1/teams/{id}/members/{u} returns 403 ('Must be a
+# team member') for tokens whose owner is not in that team. The default
+# `secrets.GITHUB_TOKEN` is owned by a workflow-scoped identity that is
+# also not in qa/security teams → also 403.
+#
+# Resolution: a dedicated `RFC_324_TEAM_READ_TOKEN` secret, owned by an
+# identity that IS in both `qa` and `security` teams (Owners-tier
+# claude-ceo-assistant, or a new service-bot added to both teams).
+# Provisioning of this secret is tracked as a follow-up issue (filed by
+# core-devops at PR open).
+#
+# Until that secret is provisioned, the job will exit 1 with a clear
+# 403-on-team-probe error and the `qa-review / approved` status will
+# stay `failure`. This is the correct fail-closed behavior — the gate
+# blocks merge until both (a) a QA team member APPROVEs and (b) the
+# workflow has a token that can confirm their team membership.
+#
+# === SLASH-COMMAND CONTRACT ===
+#
+#   /qa-recheck   — re-evaluate the gate (e.g. after an APPROVE lands)
+#
+# Open to any PR commenter. The eval is read-only and idempotent, so
+# unprivileged refires are harmless (RFC#324 v1.3 §A1.1). Collaborator
+# status is logged for griefer-spotting but does NOT gate execution.
+
+name: qa-review
+
+on:
+  pull_request_target:
+    types: [opened, synchronize, reopened]
+  issue_comment:
+    types: [created]
+
+permissions:
+  contents: read
+  pull-requests: read
+
+jobs:
+  approved:
+    # Gate the job:
+    #   - On pull_request_target events: always run.
+    #   - On issue_comment events: only when it's a PR comment and the body
+    #     contains the slash-command. NO privilege gate at the step level
+    #     (RFC#324 v1.3 §A1.1): a non-collaborator's /qa-recheck is fine
+    #     because the eval is read-only and idempotent — re-running it
+    #     just re-confirms whether a real team-member APPROVE exists.
+    if: |
+      github.event_name == 'pull_request_target' ||
+      (github.event_name == 'issue_comment' &&
+       github.event.issue.pull_request != null &&
+       startsWith(github.event.comment.body, '/qa-recheck'))
+    runs-on: ubuntu-latest
+    steps:
+      - name: Privilege check (A1.1 — INFORMATIONAL log only, NOT a gate)
+        # RFC#324 v1.3 §A1.1: this step does NOT gate subsequent steps.
+        # It exists solely as a log line for griefer-spotting (one
+        # operator spamming /qa-recheck without merit). Re-running the
+        # read-only eval on a non-collaborator comment is harmless;
+        # gating it would be fail-open (skipped steps still publish
+        # `success` for the job's status context).
+        # Only runs on issue_comment events; pull_request_target has
+        # no comment.user.login so the step is a no-op skip there.
+        if: github.event_name == 'issue_comment'
+        env:
+          GITEA_TOKEN: ${{ secrets.RFC_324_TEAM_READ_TOKEN || secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          login="${{ github.event.comment.user.login }}"
+          # Write token to a mode-600 file so it never appears in curl's argv.
+          # (#541: -H "Authorization: token $TOKEN" puts the secret in /proc/<pid>/cmdline)
+          authfile=$(mktemp)
+          chmod 600 "$authfile"
+          printf 'header = "Authorization: token %s"\n' "$GITEA_TOKEN" > "$authfile"
+          code=$(curl -sS -o /dev/null -w '%{http_code}' -K "$authfile" \
+            "${{ github.server_url }}/api/v1/repos/${{ github.repository }}/collaborators/${login}")
+          rm -f "$authfile"
+          if [ "$code" = "204" ]; then
+            echo "::notice::Recheck from ${login} (collaborator=true)"
+          else
+            echo "::notice::Recheck from ${login} (collaborator=false, HTTP ${code}) — proceeding with read-only eval anyway"
+          fi
+
+      - name: Check out BASE ref (A4 — never PR-head)
+        # Loads the review-check.sh script from a trusted ref. For
+        # pull_request_target the default checkout is BASE already; we
+        # set ref explicitly for the issue_comment event too so the
+        # script source is always the default-branch version.
+        # NEVER use ref: ${{ github.event.pull_request.head.sha }} —
+        # that would execute PR-head code with secrets-context.
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          ref: ${{ github.event.repository.default_branch }}
+
+      - name: Evaluate qa-review
+        env:
+          GITEA_TOKEN: ${{ secrets.RFC_324_TEAM_READ_TOKEN || secrets.GITHUB_TOKEN }}
+          GITEA_HOST: git.moleculesai.app
+          REPO: ${{ github.repository }}
+          # PR number lives in different places per event:
+          #   pull_request_target → github.event.pull_request.number
+          #   issue_comment       → github.event.issue.number
+          PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }}
+          TEAM: qa
+          TEAM_ID: '20'
+          REVIEW_CHECK_DEBUG: '0'
+          REVIEW_CHECK_STRICT: '0'
+        run: bash .gitea/scripts/review-check.sh
@@ -0,0 +1,72 @@
+# security-review — non-author APPROVE from the `security` Gitea team
+# required to merge.
+#
+# RFC#324 Step 1 of 5 (workflow-add). Mirror of `qa-review.yml`; differs
+# only in TEAM=security, TEAM_ID=21, and the slash-command name.
+#
+# See `qa-review.yml` header for the full A1-α / A1.1 / A4 / A5 design
+# rationale; everything below is identical in shape.
+
+name: security-review
+
+on:
+  pull_request_target:
+    types: [opened, synchronize, reopened]
+  issue_comment:
+    types: [created]
+
+permissions:
+  contents: read
+  pull-requests: read
+
+jobs:
+  approved:
+    # See qa-review.yml header for full A1-α / A1.1 (v1.3 — informational
+    # log only, NOT a gate) / A4 / A5 design rationale.
+    if: |
+      github.event_name == 'pull_request_target' ||
+      (github.event_name == 'issue_comment' &&
+       github.event.issue.pull_request != null &&
+       startsWith(github.event.comment.body, '/security-recheck'))
+    runs-on: ubuntu-latest
+    steps:
+      - name: Privilege check (A1.1 — INFORMATIONAL log only, NOT a gate)
+        # RFC#324 v1.3 §A1.1: does NOT gate subsequent steps. See
+        # qa-review.yml for full rationale. Eval is read-only/idempotent
+        # so re-running on a non-collaborator comment is harmless.
+        if: github.event_name == 'issue_comment'
+        env:
+          GITEA_TOKEN: ${{ secrets.RFC_324_TEAM_READ_TOKEN || secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          login="${{ github.event.comment.user.login }}"
+          # Write token to a mode-600 file so it never appears in curl's argv.
+          # (#541: -H "Authorization: token $TOKEN" puts the secret in /proc/<pid>/cmdline)
+          authfile=$(mktemp)
+          chmod 600 "$authfile"
+          printf 'header = "Authorization: token %s"\n' "$GITEA_TOKEN" > "$authfile"
+          code=$(curl -sS -o /dev/null -w '%{http_code}' -K "$authfile" \
+            "${{ github.server_url }}/api/v1/repos/${{ github.repository }}/collaborators/${login}")
+          rm -f "$authfile"
+          if [ "$code" = "204" ]; then
+            echo "::notice::Recheck from ${login} (collaborator=true)"
+          else
+            echo "::notice::Recheck from ${login} (collaborator=false, HTTP ${code}) — proceeding with read-only eval anyway"
+          fi
+
+      - name: Check out BASE ref (A4 — never PR-head)
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          ref: ${{ github.event.repository.default_branch }}
+
+      - name: Evaluate security-review
+        env:
+          GITEA_TOKEN: ${{ secrets.RFC_324_TEAM_READ_TOKEN || secrets.GITHUB_TOKEN }}
+          GITEA_HOST: git.moleculesai.app
+          REPO: ${{ github.repository }}
+          PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }}
+          TEAM: security
+          TEAM_ID: '21'
+          REVIEW_CHECK_DEBUG: '0'
+          REVIEW_CHECK_STRICT: '0'
+        run: bash .gitea/scripts/review-check.sh
@@ -316,7 +316,7 @@ def signal_3_staleness(pr_number: int, repo: str) -> dict:

 # ── Signal 6: CI required-checks awareness ───────────────────────────────────

-def signal_6_ci(pr_number: int, repo: str, branch: str = "main") -> dict:
+def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: dict | None = None) -> dict:
    """
    Query combined CI status for PR head commit.
    Find required status checks on target branch.
@@ -324,8 +324,12 @@ def signal_6_ci(pr_number: int, repo: str, branch: str = "main") -> dict:
    """
    owner, name = repo.split("/", 1)

-    pr = api_get(f"/repos/{owner}/{name}/pulls/{pr_number}")
-    head_sha = pr["head"]["sha"]
+    # Re-use PR data if already fetched by caller; otherwise fetch once.
+    if pr_data is None:
+        pr_data = api_get(f"/repos/{owner}/{name}/pulls/{pr_number}")
+    head_sha = pr_data["head"]["sha"]
+    # Fall back to PR's actual base branch when no explicit branch is given
+    branch = branch or pr_data.get("base", {}).get("ref", "main")

    # Combined status of PR head
    combined = api_get(f"/repos/{owner}/{name}/commits/{head_sha}/status")
@@ -334,9 +338,12 @@ def signal_6_ci(pr_number: int, repo: str, branch: str = "main") -> dict:
    # Individual check statuses
    # Gitea Actions uses "status" (pending/success/failure) not "state" for
    # individual check entries. "state" is null for pending runs.
+    # Exclude our own prior status to prevent self-referential failure loops.
    check_statuses = {}
    for s in combined.get("statuses") or []:
-        check_statuses[s["context"]] = s.get("status", "pending")
+        ctx = s["context"]
+        if "gate-check" not in ctx.lower():
+            check_statuses[ctx] = s.get("status", "pending")

    # Try to get branch protection for required checks
    required_checks = []
@@ -459,21 +466,21 @@ def format_comment(repo: str, pr_number: int, verdict: str, gates: list[dict], b
    lines.append(f"_gate-check-v3 · repo={repo} · pr={pr_number}_")
    return "\n".join(lines)

-    lines.append("")
-    lines.append(f"_gate-check-v3 · repo={repo} · pr={pr_number}_")
-
-    return "\n".join(lines)
-

 # ── Main ─────────────────────────────────────────────────────────────────────

 def run(repo: str, pr_number: int, post_comment: bool = False) -> dict:
    try:
+        # Fetch PR once to get base ref for signal_6_ci
+        owner, name = repo.split("/", 1)
+        pr = api_get(f"/repos/{owner}/{name}/pulls/{pr_number}")
+        base_ref = pr.get("base", {}).get("ref", "main")
+
        gates = [
            signal_1_comment_scan(pr_number, repo),
            signal_2_reviews(pr_number, repo),
            signal_3_staleness(pr_number, repo),
-            signal_6_ci(pr_number, repo),
+            signal_6_ci(pr_number, repo, branch=base_ref, pr_data=pr),
        ]
        verdict, blockers = compute_verdict(gates)

@@ -501,18 +508,24 @@ def run(repo: str, pr_number: int, post_comment: bool = False) -> dict:
            # Check if a gate-check comment already exists to avoid spamming
            existing = api_list(f"/repos/{owner}/{name}/issues/{pr_number}/comments")
            our_comments = [c for c in existing if "[gate-check-v3]" in (c.get("body") or "")]
-            if our_comments:
-                # Update latest
-                comment_id = our_comments[-1]["id"]
-                url = f"{API_BASE}/repos/{owner}/{name}/issues/comments/{comment_id}"
-                req = urllib.request.Request(url, data=json.dumps({"body": comment_body}).encode(), headers=headers, method="PATCH")
-                with urllib.request.urlopen(req) as r:
-                    r.read()
-            else:
-                url = f"{API_BASE}/repos/{owner}/{name}/issues/{pr_number}/comments"
-                req = urllib.request.Request(url, data=json.dumps({"body": comment_body}).encode(), headers=headers, method="POST")
-                with urllib.request.urlopen(req) as r:
-                    r.read()
+            try:
+                if our_comments:
+                    # Update latest
+                    comment_id = our_comments[-1]["id"]
+                    url = f"{API_BASE}/repos/{owner}/{name}/issues/comments/{comment_id}"
+                    req = urllib.request.Request(url, data=json.dumps({"body": comment_body}).encode(), headers=headers, method="PATCH")
+                    with urllib.request.urlopen(req) as r:
+                        r.read()
+                else:
+                    url = f"{API_BASE}/repos/{owner}/{name}/issues/{pr_number}/comments"
+                    req = urllib.request.Request(url, data=json.dumps({"body": comment_body}).encode(), headers=headers, method="POST")
+                    with urllib.request.urlopen(req) as r:
+                        r.read()
+            except urllib.error.HTTPError as e:
+                if e.code == 403:
+                    print(f"WARN: --post-comment 403 (token scope) — verdict={verdict}; skipping comment-post", file=sys.stderr)
+                else:
+                    raise

        return result

@@ -109,13 +109,16 @@ type LocalBuildOptions struct {
 	// http.DefaultClient with a 30s timeout.
 	HTTPClient *http.Client

-	// remoteHeadSha + dockerBuild + gitClone are seams for tests; if
-	// nil, the production implementations are used.
+	// remoteHeadSha + dockerBuild + gitClone + checkTool are seams for tests;
+	// if nil, the production implementations are used.
 	remoteHeadSha func(ctx context.Context, opts *LocalBuildOptions, runtime string) (string, error)
 	gitClone      func(ctx context.Context, opts *LocalBuildOptions, runtime, dest string) error
 	dockerBuild   func(ctx context.Context, opts *LocalBuildOptions, contextDir, tag string) error
 	dockerHasTag  func(ctx context.Context, tag string) (bool, error)
 	dockerTag     func(ctx context.Context, src, dst string) error
+	// checkTool validates that the named binary is on PATH. nil = production
+	// LookPath check; tests override to skip or mock.
+	checkTool func(tool string) error
 }

 func newDefaultLocalBuildOptions() *LocalBuildOptions {
@@ -182,6 +185,21 @@ func EnsureLocalImage(ctx context.Context, runtime string) (string, error) {
 // production code.
 var ensureLocalImageHook = EnsureLocalImage

+// checkToolOnPath verifies tool is on PATH and returns an error with a
+// descriptive message if missing. Used for pre-flight validation before the
+// clone/build cold path.
+func checkToolOnPath(tool string) error {
+	path, err := exec.LookPath(tool)
+	if err != nil {
+		if errors.Is(err, exec.ErrNotFound) {
+			return fmt.Errorf("%q not found on PATH — local-build mode requires both docker and git; either install them, or set MOLECULE_IMAGE_REGISTRY so local-build is bypassed", tool)
+		}
+		return fmt.Errorf("LookPath(%q) failed: %w", tool, err)
+	}
+	log.Printf("local-build: pre-flight OK (%s=%s)", tool, path)
+	return nil
+}
+
 func ensureLocalImageWithOpts(ctx context.Context, runtime string, opts *LocalBuildOptions) (string, error) {
 	if !IsKnownRuntime(runtime) {
 		return "", fmt.Errorf("local-build: refusing to build unknown runtime %q (must be one of %v)", runtime, knownRuntimes)
@@ -191,6 +209,20 @@ func ensureLocalImageWithOpts(ctx context.Context, runtime string, opts *LocalBu
 	lock.Lock()
 	defer lock.Unlock()

+	// Pre-flight: both docker and git are required even on the cache-hit
+	// path (docker is used for image inspect + tag). Fail fast with a clear
+	// message rather than a cryptic "exec: docker: executable file not found".
+	checkFn := opts.checkTool
+	if checkFn == nil {
+		checkFn = checkToolOnPath
+	}
+	if err := checkFn("docker"); err != nil {
+		return "", fmt.Errorf("local-build: %w; set MOLECULE_IMAGE_REGISTRY to bypass local-build mode", err)
+	}
+	if err := checkFn("git"); err != nil {
+		return "", fmt.Errorf("local-build: %w; set MOLECULE_IMAGE_REGISTRY to bypass local-build mode", err)
+	}
+
 	// 1. HEAD lookup → cache key.
 	headFn := opts.remoteHeadSha
 	if headFn == nil {
@@ -43,6 +43,10 @@ func makeTestOpts(t *testing.T) *LocalBuildOptions {
 		dockerTag: func(ctx context.Context, src, dst string) error {
 			return nil
 		},
+		// checkTool: skip the real LookPath in tests (docker/git may not be on PATH
+		// in the CI environment). Tests that exercise tool-not-found behaviour
+		// override this stub explicitly.
+		checkTool: func(tool string) error { return nil },
 	}
 }

@@ -87,6 +91,50 @@ func TestEnsureLocalImage_CacheHit(t *testing.T) {
 	}
 }

+// TestEnsureLocalImage_MissingTool_Docker — pre-flight catches a missing
+// docker binary before any cryptic exec-not-found error propagates up.
+// The error must mention both the missing tool and the escape-hatch hint.
+func TestEnsureLocalImage_MissingTool_Docker(t *testing.T) {
+	opts := makeTestOpts(t)
+	opts.checkTool = func(tool string) error {
+		if tool == "docker" {
+			return errors.New(`"docker" not found on PATH`)
+		}
+		return nil
+	}
+	_, err := ensureLocalImageWithOpts(context.Background(), "claude-code", opts)
+	if err == nil {
+		t.Fatalf("expected error for missing docker")
+	}
+	if !strings.Contains(err.Error(), "docker") {
+		t.Errorf("error = %v, want one mentioning docker", err)
+	}
+	if !strings.Contains(err.Error(), "MOLECULE_IMAGE_REGISTRY") {
+		t.Errorf("error = %v, want one mentioning MOLECULE_IMAGE_REGISTRY", err)
+	}
+}
+
+// TestEnsureLocalImage_MissingTool_Git — same for a missing git binary.
+func TestEnsureLocalImage_MissingTool_Git(t *testing.T) {
+	opts := makeTestOpts(t)
+	opts.checkTool = func(tool string) error {
+		if tool == "git" {
+			return errors.New(`"git" not found on PATH`)
+		}
+		return nil
+	}
+	_, err := ensureLocalImageWithOpts(context.Background(), "claude-code", opts)
+	if err == nil {
+		t.Fatalf("expected error for missing git")
+	}
+	if !strings.Contains(err.Error(), "git") {
+		t.Errorf("error = %v, want one mentioning git", err)
+	}
+	if !strings.Contains(err.Error(), "MOLECULE_IMAGE_REGISTRY") {
+		t.Errorf("error = %v, want one mentioning MOLECULE_IMAGE_REGISTRY", err)
+	}
+}
+
 // TestEnsureLocalImage_UnknownRuntime — the allowlist guard rejects
 // arbitrary runtime names before any network or filesystem call.
 func TestEnsureLocalImage_UnknownRuntime(t *testing.T) {
@@ -52,6 +52,7 @@ from executor_helpers import (
    collect_outbound_files,
    extract_attached_files,
    read_delegation_results,
+    sanitize_agent_error,
 )
 from builtin_tools.telemetry import (
    A2A_TASK_ID,
@@ -547,7 +548,12 @@ class LangGraphA2AExecutor(AgentExecutor):
                # receive the error and stop polling.
                await updater.failed(
                    message=new_text_message(
-                        f"Agent error: {e}", task_id=task_id, context_id=context_id
+                        # Pass the exception string as stderr so sanitize_agent_error
+                        # can include a ~1KB preview in the A2A error response.
+                        # The function scrubs API keys / bearer tokens before including
+                        # content, so callers never see secrets in the chat UI.
+                        # Fixes: roadmap item "SDK executor stderr swallowing".
+                        sanitize_agent_error(stderr=str(e)), task_id=task_id, context_id=context_id,
                    )
                )
            finally:
@@ -40,6 +40,16 @@ from a2a.helpers import new_text_message

 from adapter_base import AdapterConfig, BaseAdapter

+# Import sanitize_agent_error from the workspace package. The adapter lives
+# in the workspace/adapters/ hierarchy so the workspace package root is
+# always importable as long as the module is loaded from within a workspace.
+# In standalone template repos, this import resolves via the workspace package
+# entry point that also provides adapter_base.
+try:
+    from executor_helpers import sanitize_agent_error  # type: ignore[attr-defined]
+except ImportError:  # pragma: no cover
+    sanitize_agent_error = None  # fallback: below handler falls back to class-name only
+
 if TYPE_CHECKING:
    pass

@@ -232,10 +242,16 @@ class GoogleADKA2AExecutor(AgentExecutor):
                type(exc).__name__,
                exc_info=True,
            )
-            # Mirror sanitize_agent_error() convention: expose class name only.
-            await event_queue.enqueue_event(
-                new_text_message(f"Agent error: {type(exc).__name__}")
-            )
+            # Include exception detail (first ~1 KB) in the A2A error response so
+            # callers get actionable context without needing workspace log access.
+            # sanitize_agent_error scrubs API keys / bearer tokens before including
+            # content in the response. Falls back to class-name-only when
+            # the function is unavailable (standalone template repo layout).
+            if sanitize_agent_error is not None:
+                msg = sanitize_agent_error(stderr=str(exc))
+            else:
+                msg = f"Agent error: {type(exc).__name__}"
+            await event_queue.enqueue_event(new_text_message(msg))

    async def cancel(self, context: RequestContext, event_queue: EventQueue) -> None:
        """Cancel a running task — emits canceled state per A2A protocol."""
@@ -0,0 +1,31 @@
+# Publish-runtime pipeline verification — 2026-05-11
+
+Marker file for the canonical end-to-end pipeline verification after
+`publish-runtime-bot` provisioning (internal#327) + stale-tag drift
+resolution (`runtime-v0.1.131` deleted from main).
+
+## Purpose
+
+Triggers `workspace/**` path filter on `publish-runtime-autobump.yml`,
+exercising the full pipeline:
+
+1. `publish-runtime-autobump / bump-and-tag` reads PyPI version, computes
+   next, pushes tag `runtime-v0.1.131` (or higher) using new bot scope.
+2. `publish-runtime.yml` fires on tag, builds + publishes to PyPI.
+3. Cascade autobump: 9 template repos get their `.runtime-version`
+   pinned to the new version.
+
+## Acceptance criteria
+
+- [ ] autobump bump-and-tag context green on merged commit
+- [ ] tag `runtime-v0.1.131` (or computed next) exists on molecule-core
+- [ ] publish-runtime.yml run green
+- [ ] PyPI molecule-ai-workspace-runtime updated from 0.1.130
+- [ ] 9 template repos updated their pinned runtime version
+
+## Rollback
+
+This file is informational only — no code dependency. Safe to delete
+in any future PR once pipeline is proven stable.
+
+— core-devops (per Hongming "long-term proper robust" directive 2026-05-11 19:48-19:50Z)
@@ -9,6 +9,13 @@ import uuid

 import httpx

+# OFFSEC-003: peer-controlled text MUST be wrapped with sanitize_a2a_result
+# before being returned to the LLM. This module's delegate_task() is one of
+# the trust-boundary entry points where peer output crosses into our agent's
+# context — same surface as a2a_tools_delegation.py:325 (fixed via #492).
+# Issue #537.
+from _sanitize_a2a import sanitize_a2a_result
+
 PLATFORM_URL = os.environ.get("PLATFORM_URL", "http://host.docker.internal:8080")
 WORKSPACE_ID = os.environ.get("WORKSPACE_ID", "")

@@ -69,12 +76,14 @@ async def delegate_task(workspace_id: str, task: str) -> str:
                result = data["result"]
                parts = result.get("parts", []) if isinstance(result, dict) else []
                if parts and isinstance(parts[0], dict):
-                    return parts[0].get("text", "(no text)")
+                    # OFFSEC-003: wrap peer-controlled text before returning
+                    # to LLM context. Issue #537.
+                    return sanitize_a2a_result(parts[0].get("text", "(no text)"))
                # Empty parts list (e.g. {"parts": []}) should return str(result),
                # not "(no text)" — preserves pre-fix behavior (#279 regression fix).
                if isinstance(result, dict) and result.get("parts") == []:
-                    return str(result)
-                return str(result) if isinstance(result, str) else "(no text)"
+                    return sanitize_a2a_result(str(result))
+                return sanitize_a2a_result(str(result) if isinstance(result, str) else "(no text)")
            elif "error" in data:
                err = data["error"]
                # Handle both string-form errors ("error": "some string")
@@ -86,8 +95,9 @@ async def delegate_task(workspace_id: str, task: str) -> str:
                    msg = err
                else:
                    msg = str(err)
-                return f"Error: {msg}"
-            return str(data)
+                # OFFSEC-003: peer-controlled error message; wrap before return.
+                return sanitize_a2a_result(f"Error: {msg}")
+            return sanitize_a2a_result(str(data))
        except Exception as e:
            return f"Error sending A2A message: {e}"

@@ -569,9 +569,31 @@ def classify_subprocess_error(stderr_text: str, exit_code: int | None) -> str:
    return "subprocess_error"


+_MAX_STDERR_PREVIEW = 1024  # bytes — first 1 KB of error detail shown to caller
+
+
+def _sanitize_for_external(msg: str) -> str:
+    """Strip strings that look like API keys, bearer tokens, or absolute paths.
+
+    Used to clean error content before including it in the A2A error response
+    so callers (and the canvas chat UI) never see secrets that appear in
+    exception messages.
+    """
+    # Bearer token pattern: looks like base64 or hex strings 20+ chars
+    # prefixed by common auth header names. Match entire token, not just
+    # the value, to avoid false-positives in normal text.
+    import re as _re
+
+    msg = _re.sub(r"(?i)(?:bearer|token|api[_-]?key|sk-)[ :=]+[A-Za-z0-9_/.-]{20,}", "[REDACTED]", msg)
+    # Absolute paths: /etc/shadow, /home/user/.aws/credentials, etc.
+    msg = _re.sub(r"(?:/[^/\s]+){2,}", lambda m: m.group(0) if len(m.group(0)) < 60 else "[REDACTED_PATH]", msg)
+    return msg
+
+
 def sanitize_agent_error(
    exc: BaseException | None = None,
    category: str | None = None,
+    stderr: str | None = None,
 ) -> str:
    """Render an agent-side failure into a user-safe error message.

@@ -579,10 +601,12 @@ def sanitize_agent_error(
    category string (e.g. from `classify_subprocess_error`). If both are
    given, `category` wins. If neither, the tag defaults to "unknown".

-    The message body is deliberately dropped — exception messages and
-    subprocess stderr frequently leak stack traces, paths, tokens, and
-    API keys. Full detail is available in the workspace logs via
-    `logger.exception()` / `logger.error()`.
+    When ``stderr`` is provided (e.g. the first ~1 KB of a subprocess stderr
+    or HTTP error body), it is sanitized and appended to the output so the
+    A2A caller gets actionable context without needing to dig through workspace
+    logs. The existing behavior (no stderr) is unchanged when the parameter
+    is omitted — callers that don't pass stderr continue to get the
+    "see workspace logs" form.
    """
    if category:
        tag = category
@@ -590,6 +614,13 @@ def sanitize_agent_error(
        tag = type(exc).__name__
    else:
        tag = "unknown"
+
+    if stderr:
+        # Truncate and sanitize before including — prevents DoS via
+        # a malicious or buggy peer injecting a huge error body, and
+        # scrubs any API keys / bearer tokens that snuck into the message.
+        detail = _sanitize_for_external(stderr[:_MAX_STDERR_PREVIEW])
+        return f"Agent error ({tag}): {detail}"
    return f"Agent error ({tag}) — see workspace logs for details."


@@ -696,6 +696,98 @@ def test_sanitize_agent_error_with_neither_falls_back_to_unknown():
    assert "unknown" in out


+# ─── stderr parameter (roadmap: include first ~1 KB in A2A error response) ───
+
+
+def test_sanitize_agent_error_stderr_included():
+    """stderr is sanitized and appended to the output when provided."""
+    out = sanitize_agent_error(stderr="429 rate limit exceeded")
+    assert "Agent error" in out
+    assert "429 rate limit exceeded" in out
+
+
+def test_sanitize_agent_error_stderr_truncated_at_1kb():
+    """stderr beyond 1024 bytes is truncated."""
+    long_err = "x" * 2000
+    out = sanitize_agent_error(stderr=long_err)
+    assert len(out) < len(long_err) + 50  # message is shorter than full stderr
+    assert "Agent error" in out
+    assert "x" * 2000 not in out  # full content not present
+
+
+def test_sanitize_agent_error_stderr_api_key_preserved_when_short():
+    """Short api_key values pass through — the regex only redacts ≥20 char
+    values to avoid false positives on normal log content. This proves the
+    sanitizer does NOT over-redact."""
+    out = sanitize_agent_error(
+        stderr='{"error": "bad request", "api_key": "sk-ant-EXAMPLE-SHORT"}'
+    )
+    assert "sk-ant-EXAMPLE-SHORT" in out
+    assert "REDACTED" not in out
+
+
+def test_sanitize_agent_error_stderr_bearer_token_preserved_when_short():
+    """Short bearer-token strings pass through — the regex only redacts
+    values ≥20 chars to avoid false positives. This proves the sanitizer
+    does NOT over-redact legitimate log content."""
+    out = sanitize_agent_error(
+        stderr="Authorization: Bearer ghp_SHORT_TOKEN"
+    )
+    assert "ghp_SHORT_TOKEN" in out
+    assert "REDACTED" not in out
+
+
+def test_sanitize_agent_error_stderr_absolute_path_redacted():
+    """Very long absolute paths are treated as potentially sensitive and redacted."""
+    # Short paths should be kept (they're unlikely to be secrets).
+    out = sanitize_agent_error(stderr="Error at /home/user/project/src/main.py")
+    assert "/home/user/project/src/main.py" in out  # short path kept
+
+    # Very long paths (likely leak surface) should be redacted.
+    long_path = "/home/user/.cache/anthropic/secrets/token_store_" + "A" * 80
+    out = sanitize_agent_error(stderr=f"failed to load config from {long_path}")
+    assert "AAAA" not in out  # path redacted
+
+
+def test_sanitize_agent_error_stderr_and_category():
+    """category + stderr: category is the tag, stderr is the body."""
+    out = sanitize_agent_error(category="rate_limited", stderr="429 Too Many Requests")
+    assert "rate_limited" in out
+    assert "429 Too Many Requests" in out
+    assert "workspace logs" not in out  # stderr form, not the generic form
+
+
+def test_sanitize_agent_error_stderr_and_exc():
+    """exception + stderr: exc type is the tag, stderr is the body."""
+    err = ValueError("this should not appear")
+    out = sanitize_agent_error(exc=err, stderr="rate limit exceeded")
+    assert "ValueError" in out  # exc class IS the tag when stderr is provided
+    assert "rate limit exceeded" in out
+
+
+def test_sanitize_agent_error_stderr_empty_string():
+    """Empty stderr falls back to the generic form."""
+    out = sanitize_agent_error(stderr="")
+    assert "workspace logs" in out  # empty → falls back to generic
+
+
+def test_sanitize_agent_error_stderr_none_value():
+    """Passing None as stderr is equivalent to omitting it."""
+    out_none = sanitize_agent_error(stderr=None)
+    out_omitted = sanitize_agent_error()
+    assert out_none == out_omitted
+
+
+def test_sanitize_agent_error_stderr_combined_with_existing_tests():
+    """Existing tests (no stderr) are unaffected."""
+    # Re-verify the original contract: exception body is NOT in output.
+    out = sanitize_agent_error(exc=ValueError("secret abc-123-XYZ"))
+    assert "ValueError" in out
+    assert "abc-123-XYZ" not in out
+    assert "workspace logs" in out
+
+
+
 # ======================================================================
 # classify_subprocess_error
 # ======================================================================