Merge pull request #2211 from Molecule-AI/staging

staging to main
Merge pull request #2212 from Molecule-AI/feat/secret-pattern-drift-lint
2026-04-28 15:52:20 -07:00 · 2026-04-28 22:40:13 +00:00 · 2026-04-28 15:41:14 -07:00 · 2026-04-28 15:29:09 -07:00 · 2026-04-28 22:08:35 +00:00 · 2026-04-28 14:59:23 -07:00
22 changed files with 1749 additions and 119 deletions
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+"""Lint SECRET_PATTERNS drift across known consumers of molecule-core's canonical.
+
+The canonical SECRET_PATTERNS array in
+.github/workflows/secret-scan.yml is mirrored by every other side
+that scans for credentials: the workspace-runtime's bundled
+pre-commit hook, the molecule-controlplane inlined copy, etc. The
+mirror is enforced socially today — when someone adds a new pattern
+to canonical (e.g. the sk-cp- MiniMax token after F1088), the other
+sides are supposed to be updated in lockstep.
+
+This script automates the check. Diffs the canonical's pattern set
+against each known public consumer and exits non-zero on any
+mismatch. Wired into a daily cron + on-push gate via
+.github/workflows/secret-pattern-drift.yml.
+
+Private-repo consumers (currently molecule-controlplane's inlined
+copy) are out of scope here because the molecule-core workflow's
+GITHUB_TOKEN can't read other private repos in the org. They're
+expected to self-monitor via their own copy of this script — not a
+hard barrier, just a future expansion.
+"""
+
+from __future__ import annotations
+
+import re
+import sys
+import urllib.request
+from pathlib import Path
+
+CANONICAL_FILE = Path(".github/workflows/secret-scan.yml")
+
+# Public consumer mirrors. Each entry is (label, raw_url) — raw_url
+# points at the file's RAW content on the consumer's default branch
+# (or staging where applicable). Add an entry here when a new public
+# repo starts shipping its own SECRET_PATTERNS array.
+CONSUMERS: list[tuple[str, str]] = [
+    (
+        "molecule-ai-workspace-runtime/molecule_runtime/scripts/pre-commit-checks.sh",
+        "https://raw.githubusercontent.com/Molecule-AI/molecule-ai-workspace-runtime/main/molecule_runtime/scripts/pre-commit-checks.sh",
+    ),
+]
+
+# Matches the SECRET_PATTERNS=( ... ) array in either yaml-indented
+# (the canonical workflow's `run:` block) or shell-flat (runtime
+# hook) format. Patterns inside are single-quoted Bash strings; we
+# pull each via _PATTERN_RE.
+#
+# Closing `)` is anchored to the start of a line (possibly indented)
+# because pattern comments like `# GitHub PAT (classic)` contain
+# their own `)` mid-line — a non-anchored regex would match through
+# the comment's paren and capture only the first pattern.
+_ARRAY_RE = re.compile(r"SECRET_PATTERNS=\((.*?)^\s*\)", re.DOTALL | re.MULTILINE)
+_PATTERN_RE = re.compile(r"'([^']+)'")
+
+
+def extract_patterns(content: str, source_label: str) -> list[str]:
+    """Pull the SECRET_PATTERNS list out of either format. Raises if missing."""
+    m = _ARRAY_RE.search(content)
+    if not m:
+        raise SystemExit(f"::error::{source_label}: SECRET_PATTERNS=(...) array not found")
+    return _PATTERN_RE.findall(m.group(1))
+
+
+def fetch(url: str) -> str:
+    req = urllib.request.Request(
+        url, headers={"User-Agent": "secret-pattern-drift-lint/1"}
+    )
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        return resp.read().decode("utf-8")
+
+
+def diff_patterns(canonical: list[str], consumer: list[str]) -> tuple[list[str], list[str]]:
+    """Return (missing_from_consumer, extra_in_consumer) — both sorted."""
+    canonical_set = set(canonical)
+    consumer_set = set(consumer)
+    return (
+        sorted(canonical_set - consumer_set),
+        sorted(consumer_set - canonical_set),
+    )
+
+
+def main() -> int:
+    if not CANONICAL_FILE.exists():
+        print(f"::error::canonical not found at {CANONICAL_FILE}")
+        return 1
+
+    canonical = extract_patterns(CANONICAL_FILE.read_text(), str(CANONICAL_FILE))
+    print(f"canonical ({CANONICAL_FILE}): {len(canonical)} patterns")
+
+    drift = False
+    for label, url in CONSUMERS:
+        try:
+            content = fetch(url)
+        except Exception as e:
+            # Fetch failures are warnings, not errors. A consumer
+            # whose default branch was just renamed (or whose file
+            # moved) shouldn't fail the lint until someone updates
+            # the URL above. Real drift is the failure mode this
+            # gate exists to catch — fetch reliability isn't.
+            print(f"::warning::{label}: fetch failed ({e}) — skipping")
+            continue
+
+        consumer = extract_patterns(content, label)
+        missing, extra = diff_patterns(canonical, consumer)
+        if not missing and not extra:
+            print(f"  ✓ {label}: aligned ({len(consumer)} patterns)")
+            continue
+
+        drift = True
+        print(f"::error::DRIFT in {label}:")
+        for p in missing:
+            print(f"  -  missing from consumer: {p!r}")
+        for p in extra:
+            print(f"  -  extra in consumer (not in canonical): {p!r}")
+
+    if drift:
+        print()
+        print("::error::SECRET_PATTERNS drift detected. Bring consumer(s) into")
+        print("alignment with the canonical SECRET_PATTERNS array in")
+        print(f"{CANONICAL_FILE} by adding the missing patterns and removing")
+        print("any extras. The two sides must stay byte-aligned on the pattern")
+        print("list — the runtime hook is the developer's local pre-commit,")
+        print("the canonical is the org-wide CI gate, divergence means a token")
+        print("can pass one but get rejected by the other.")
+        return 1
+
+    print()
+    print("✓ All known consumers aligned with canonical SECRET_PATTERNS.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,114 @@
+name: Auto-promote :latest on E2E green
+
+# Retags `ghcr.io/molecule-ai/{platform,platform-tenant}:staging-<sha>`
+# → `:latest` whenever E2E Staging SaaS passes for a `main` push.
+#
+# This is the doc-aligned alternative to the (deferred) Phase 2 canary
+# fleet — staging E2E catches ~90% of what canary would catch at 0%
+# ongoing infra cost. See `molecule-controlplane/docs/canary-tenants.md`
+# section "Do we actually need canary right now?" — recommended
+# sequencing for the current scale (≤20 paying tenants).
+#
+# Why a separate workflow rather than folding into e2e-staging-saas.yml:
+#   - Keeps test concerns separate from release concerns.
+#   - Disabling promote (e.g. during an incident) is one toggle, not an
+#     edit to the long E2E workflow file.
+#   - When Phase 2 canary work eventually lands, the canary path can
+#     replace this file's trigger without touching the E2E workflow.
+#
+# Why trigger on `main` only:
+#   - `:latest` is what prod tenants pull. We only want SHAs that have
+#     reached `main` (via auto-promote-staging) to advance `:latest`.
+#   - Triggering on staging would let a staging-only revert advance
+#     `:latest` to a SHA that never reaches `main`, breaking the
+#     "production runs what's on `main`" invariant.
+
+on:
+  workflow_run:
+    workflows: ['E2E Staging SaaS (full lifecycle)']
+    types: [completed]
+    branches: [main]
+  workflow_dispatch:
+    inputs:
+      sha:
+        description: 'Short sha to promote (override; defaults to upstream workflow_run head_sha)'
+        required: false
+        type: string
+
+permissions:
+  contents: read
+  packages: write
+
+env:
+  IMAGE_NAME: ghcr.io/molecule-ai/platform
+  TENANT_IMAGE_NAME: ghcr.io/molecule-ai/platform-tenant
+
+jobs:
+  promote:
+    # Skip if E2E failed — `:latest` stays on the prior known-good
+    # digest. Manual dispatch always proceeds (the operator already
+    # decided to promote).
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success')
+    runs-on: ubuntu-latest
+    steps:
+      - name: Compute short sha
+        id: sha
+        run: |
+          set -euo pipefail
+          if [ -n "${{ github.event.inputs.sha }}" ]; then
+            FULL="${{ github.event.inputs.sha }}"
+          else
+            FULL="${{ github.event.workflow_run.head_sha }}"
+          fi
+          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
+          echo "full=${FULL}" >> "$GITHUB_OUTPUT"
+
+      - uses: imjasonh/setup-crane@v0.4
+
+      - name: GHCR login
+        run: |
+          echo "${{ secrets.GITHUB_TOKEN }}" | \
+            crane auth login ghcr.io -u "${{ github.actor }}" --password-stdin
+
+      - name: Verify :staging-<sha> exists for both images
+        # Better to fail fast with a clear message than to half-tag
+        # (platform retagged but platform-tenant missing → tenants pull
+        # a stale image).
+        run: |
+          set -euo pipefail
+          for img in "${IMAGE_NAME}" "${TENANT_IMAGE_NAME}"; do
+            tag="${img}:staging-${{ steps.sha.outputs.short }}"
+            if ! crane manifest "$tag" >/dev/null 2>&1; then
+              echo "::error::Missing tag: $tag"
+              echo "::error::publish-workspace-server-image must complete on this SHA before auto-promote-on-e2e can retag :latest."
+              exit 1
+            fi
+            echo "  ok: $tag exists"
+          done
+
+      - name: Retag platform :staging-<sha> → :latest
+        run: |
+          crane tag "${IMAGE_NAME}:staging-${{ steps.sha.outputs.short }}" latest
+
+      - name: Retag tenant :staging-<sha> → :latest
+        run: |
+          crane tag "${TENANT_IMAGE_NAME}:staging-${{ steps.sha.outputs.short }}" latest
+
+      - name: Summary
+        run: |
+          {
+            echo "## E2E green → :latest promoted"
+            echo
+            if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+              echo "- Trigger: manual dispatch"
+            else
+              echo "- Upstream E2E run: ${{ github.event.workflow_run.html_url }}"
+            fi
+            echo "- platform:staging-${{ steps.sha.outputs.short }} → :latest"
+            echo "- platform-tenant:staging-${{ steps.sha.outputs.short }} → :latest"
+            echo
+            echo "Tenant fleet auto-pulls within 5 min via IMAGE_AUTO_REFRESH=true."
+            echo "Force immediate fanout: dispatch redeploy-tenants-on-main.yml."
+          } >> "$GITHUB_STEP_SUMMARY"
@@ -61,13 +61,30 @@ jobs:
        run: |
          set -euo pipefail

-          # Required gate workflow names. Must match the `name:` field
-          # in the respective .github/workflows/*.yml files.
+          # Required gate workflow files. Use file paths (relative to
+          # .github/workflows/) rather than display names because:
+          #
+          #   1. `gh run list --workflow=<name>` is ambiguous when two
+          #      workflows have the same `name:` — observed 2026-04-28
+          #      with "CodeQL" matching both `codeql.yml` (explicit) and
+          #      GitHub's UI-configured Code-quality default setup
+          #      (internal "codeql"). gh CLI returns "could not resolve
+          #      to a unique workflow" → empty result → gate evaluated
+          #      as missing/none → auto-promote dead-locked despite all
+          #      checks actually passing.
+          #
+          #   2. File paths are the unique identifier for workflows;
+          #      `name:` is just a display string and can collide.
+          #
+          # When adding/removing a gate, update this list AND the
+          # branch-protection required-checks list (which uses check-run
+          # display names, not workflow names; the two are decoupled and
+          # should be kept in sync manually).
          GATES=(
-            "CI"
-            "E2E Staging Canvas (Playwright)"
-            "E2E API Smoke Test"
-            "CodeQL"
+            "ci.yml"
+            "e2e-staging-canvas.yml"
+            "e2e-api.yml"
+            "codeql.yml"
          )

          echo "head_sha=${HEAD_SHA}" >> "$GITHUB_OUTPUT"
@@ -0,0 +1,149 @@
+name: Auto-sync main → staging
+
+# Reflects every push to `main` back onto `staging` so the
+# staging-as-superset-of-main invariant holds.
+#
+# Background:
+#
+# `auto-promote-staging.yml` advances main via `git merge --ff-only`
+# + `git push origin main` — that's a clean fast-forward, no merge
+# commit. But manual merges of `staging → main` PRs through the
+# GitHub UI / API create a merge commit on main that staging
+# doesn't have. The next `staging → main` PR then evaluates as
+# "BEHIND" because staging is missing that merge commit, requiring
+# a manual `gh pr update-branch` round-trip.
+#
+# This happened twice on 2026-04-28 (PRs #2202, #2205, both manual
+# bridges). Each time the bridge needed update-branch + a re-CI
+# round before merging. Operationally annoying and avoidable.
+#
+# This workflow closes the gap automatically:
+#
+#   1. Push to main fires (regardless of source: auto-promote, UI
+#      merge, API merge, direct push).
+#   2. Check whether main is already in staging's ancestry — if
+#      yes, no-op (auto-promote-staging already kept them in sync
+#      via fast-forward).
+#   3. If not, try fast-forward staging to main first (works when
+#      staging hasn't diverged with its own commits).
+#   4. If ff fails (staging has commits main doesn't — feature work
+#      in flight), do a real merge with a "chore: sync" commit so
+#      staging absorbs main's tip while keeping its own history.
+#   5. Push staging.
+#
+# Loop safety:
+#
+# `GITHUB_TOKEN`-authored pushes do NOT trigger downstream workflow
+# runs by default (GitHub Actions safety). So when this workflow
+# pushes the synced staging, `auto-promote-staging.yml` is NOT
+# triggered by that push. The next developer push to staging triggers
+# auto-promote normally. No loop is even theoretically possible.
+#
+# Concurrency:
+#
+# Two pushes to main in quick succession (e.g., manual UI merge
+# immediately followed by auto-promote-staging's ff-merge) would
+# otherwise race two auto-sync runs against the same staging branch
+# — second push fails non-fast-forward. The concurrency group
+# serializes them so the second run sees the first's result.
+
+on:
+  push:
+    branches: [main]
+
+permissions:
+  contents: write
+
+concurrency:
+  group: auto-sync-main-to-staging
+  cancel-in-progress: false
+
+jobs:
+  sync-staging:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout staging
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          ref: staging
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Configure git author
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+      - name: Check if staging already contains main
+        id: check
+        run: |
+          set -euo pipefail
+          git fetch origin main
+          if git merge-base --is-ancestor origin/main HEAD; then
+            echo "needs_sync=false" >> "$GITHUB_OUTPUT"
+            {
+              echo "## ✅ No-op"
+              echo
+              echo "staging already contains \`origin/main\` ($(git rev-parse --short=8 origin/main))."
+              echo "auto-promote-staging or a previous auto-sync run already kept them aligned."
+            } >> "$GITHUB_STEP_SUMMARY"
+          else
+            echo "needs_sync=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::staging is missing main's tip — sync needed"
+          fi
+
+      - name: Fast-forward staging to main
+        if: steps.check.outputs.needs_sync == 'true'
+        id: ff
+        run: |
+          set -euo pipefail
+          if git merge --ff-only origin/main; then
+            echo "did_ff=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::Fast-forwarded staging to origin/main"
+          else
+            echo "did_ff=false" >> "$GITHUB_OUTPUT"
+            echo "::notice::ff failed — staging has its own commits; will create merge"
+          fi
+
+      - name: Merge main into staging (when ff fails)
+        if: steps.check.outputs.needs_sync == 'true' && steps.ff.outputs.did_ff != 'true'
+        run: |
+          set -euo pipefail
+          # ff failed because staging has commits main doesn't — typical
+          # in-flight feature work. Create a merge commit so staging
+          # absorbs main's tip while keeping its own history.
+          if ! git merge --no-ff origin/main -m "chore: sync main → staging (auto)"; then
+            # Hygiene: leave the work tree clean before failing. Doesn't
+            # affect future runs (each gets a fresh checkout) but a
+            # half-merged tree is an unpleasant artifact to debug if
+            # anyone ever shells into the runner.
+            git merge --abort || true
+            {
+              echo "## ❌ Conflict"
+              echo
+              echo "Auto-merge \`main → staging\` failed with conflicts."
+              echo "A human needs to resolve manually:"
+              echo
+              echo "    git checkout staging"
+              echo "    git merge origin/main"
+              echo "    # resolve, commit, push"
+            } >> "$GITHUB_STEP_SUMMARY"
+            exit 1
+          fi
+
+      - name: Push staging
+        if: steps.check.outputs.needs_sync == 'true'
+        run: |
+          set -euo pipefail
+          git push origin staging
+          {
+            if [ "${{ steps.ff.outputs.did_ff }}" = "true" ]; then
+              echo "## ✅ staging fast-forwarded"
+              echo
+              echo "staging is now at \`$(git rev-parse --short=8 HEAD)\` (== origin/main)."
+            else
+              echo "## ✅ staging absorbed main"
+              echo
+              echo "staging is now at \`$(git rev-parse --short=8 HEAD)\` with a merge commit absorbing main's tip."
+            fi
+          } >> "$GITHUB_STEP_SUMMARY"
@@ -1,27 +1,73 @@
 name: E2E API Smoke Test
 # Extracted from ci.yml so workflow-level concurrency can protect this job
 # from run-level cancellation (issue #458).
+#
+# Trigger model (changed 2026-04-28 — see auto-promote gap below):
+#
+# This workflow always FIRES on push/pull_request to staging+main, but
+# only does real work when paths under `workspace-server/`,
+# `tests/e2e/`, or this workflow file changed. The detect-changes job
+# uses dorny/paths-filter to decide; the e2e-api job runs only if
+# changes match. Otherwise the no-op job emits success so the workflow
+# always produces a `completed/success` run record.
+#
+# Why: auto-promote-staging.yml's gate-check (line 99) treats "workflow
+# didn't run" as failure, which dead-locked any platform-only or
+# test-only push to staging that didn't touch workspace-server paths.
+# Dropping the path filter on the trigger and gating real work
+# internally guarantees the workflow always emits a result that the
+# auto-promote chain can read. Same pattern applied to
+# e2e-staging-canvas.yml in the same PR.

 on:
  push:
    branches: [main, staging]
-    paths:
-      - 'workspace-server/**'
-      - 'tests/e2e/**'
-      - '.github/workflows/e2e-api.yml'
  pull_request:
    branches: [main, staging]
-    paths:
-      - 'workspace-server/**'
-      - 'tests/e2e/**'
-      - '.github/workflows/e2e-api.yml'
+  workflow_dispatch:

 concurrency:
  group: e2e-api-${{ github.ref }}
  cancel-in-progress: false

 jobs:
+  detect-changes:
+    runs-on: ubuntu-latest
+    outputs:
+      api: ${{ steps.decide.outputs.api }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dorny/paths-filter@v3
+        id: filter
+        with:
+          filters: |
+            api:
+              - 'workspace-server/**'
+              - 'tests/e2e/**'
+              - '.github/workflows/e2e-api.yml'
+      - id: decide
+        # Always run real work for manual dispatch — no diff context to
+        # filter against and ops dispatching this expects the suite to
+        # actually exercise the platform.
+        run: |
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+            echo "api=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "api=${{ steps.filter.outputs.api }}" >> "$GITHUB_OUTPUT"
+          fi
+
+  no-op:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.api != 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - run: |
+          echo "No workspace-server / tests/e2e / workflow changes — E2E API gate satisfied without running tests."
+          echo "::notice::E2E API Smoke Test no-op pass (paths filter excluded this commit)."
+
  e2e-api:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.api == 'true'
    name: E2E API Smoke Test
    runs-on: ubuntu-latest
    timeout-minutes: 15
@@ -13,16 +13,23 @@ name: E2E Staging Canvas (Playwright)
 # workflow — mirrors what PR #1891 does for e2e-api.yml.

 on:
+  # Trigger model (changed 2026-04-28 — see auto-promote gap below):
+  #
+  # Always fires on push/pull_request; only does real work when canvas/
+  # or this workflow file changed. The detect-changes job uses
+  # dorny/paths-filter to decide; the playwright job runs only if
+  # changes match. Otherwise no-op emits success so the workflow always
+  # produces a `completed/success` run record.
+  #
+  # Why: auto-promote-staging.yml's gate-check (line 99) treats
+  # "workflow didn't run" as failure, which dead-locked platform-only
+  # pushes to staging. Dropping the trigger path filter and gating real
+  # work internally guarantees a result the auto-promote chain can
+  # read. Same pattern applied to e2e-api.yml in the same PR.
  push:
    branches: [main, staging]
-    paths:
-      - 'canvas/**'
-      - '.github/workflows/e2e-staging-canvas.yml'
  pull_request:
    branches: [main, staging]
-    paths:
-      - 'canvas/**'
-      - '.github/workflows/e2e-staging-canvas.yml'
  workflow_dispatch:
  schedule:
    # Weekly on Sunday 08:00 UTC — catches Chrome / Playwright / Next.js
@@ -34,7 +41,41 @@ concurrency:
  cancel-in-progress: false

 jobs:
+  detect-changes:
+    runs-on: ubuntu-latest
+    outputs:
+      canvas: ${{ steps.decide.outputs.canvas }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dorny/paths-filter@v3
+        id: filter
+        with:
+          filters: |
+            canvas:
+              - 'canvas/**'
+              - '.github/workflows/e2e-staging-canvas.yml'
+      - id: decide
+        # Always run real tests for manual dispatch and the weekly cron —
+        # both exist precisely to exercise the suite, regardless of diff.
+        run: |
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ] || [ "${{ github.event_name }}" = "schedule" ]; then
+            echo "canvas=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "canvas=${{ steps.filter.outputs.canvas }}" >> "$GITHUB_OUTPUT"
+          fi
+
+  no-op:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.canvas != 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - run: |
+          echo "No canvas / workflow changes — E2E Staging Canvas gate satisfied without running tests."
+          echo "::notice::E2E Staging Canvas no-op pass (paths filter excluded this commit)."
+
  playwright:
+    needs: detect-changes
+    if: needs.detect-changes.outputs.canvas == 'true'
    name: Canvas tabs E2E
    runs-on: ubuntu-latest
    timeout-minutes: 40
@@ -79,6 +79,7 @@ jobs:
      id-token: write   # PyPI Trusted Publisher (OIDC) — no PYPI_TOKEN needed
    outputs:
      version: ${{ steps.version.outputs.version }}
+      wheel_sha256: ${{ steps.wheel_hash.outputs.wheel_sha256 }}
    steps:
      - uses: actions/checkout@v4

@@ -129,6 +130,28 @@ jobs:
        working-directory: ${{ runner.temp }}/runtime-build
        run: python -m build

+      - name: Capture wheel SHA256 for cascade content-verification
+        # Recorded BEFORE upload so the cascade probe can verify the
+        # bytes Fastly serves under the new version's URL match what
+        # we built. Closes a hole left by #2197: that probe verified
+        # pip can resolve the version (catches propagation lag) but
+        # not that the wheel content matches (would silently pass a
+        # Fastly stale-content scenario where the new version's URL
+        # serves an old wheel binary).
+        id: wheel_hash
+        working-directory: ${{ runner.temp }}/runtime-build
+        run: |
+          set -eu
+          WHEEL=$(ls dist/*.whl 2>/dev/null | head -1)
+          if [ -z "$WHEEL" ]; then
+            echo "::error::No .whl in dist/ — `python -m build` must have failed silently"
+            exit 1
+          fi
+          HASH=$(sha256sum "$WHEEL" | awk '{print $1}')
+          echo "wheel_sha256=${HASH}" >> "$GITHUB_OUTPUT"
+          echo "Local wheel SHA256 (pre-upload): ${HASH}"
+          echo "Wheel filename: $(basename "$WHEEL")"
+
      - name: Verify package contents (sanity)
        working-directory: ${{ runner.temp }}/runtime-build
        run: |
@@ -212,6 +235,56 @@ jobs:
              default_output_modes=['text/plain', 'application/json'],
          )
          print('✓ AgentCard call-shape smoke passed')
+
+          # Well-known agent-card path probe alignment. main.py's
+          # _send_initial_prompt() polls AGENT_CARD_WELL_KNOWN_PATH
+          # to know when the local A2A server is ready. If the SDK
+          # ever splits the constant value from the path that
+          # create_agent_card_routes() actually mounts at, every
+          # workspace silently drops its initial_prompt:
+          #   - Probe gets 404 every attempt.
+          #   - Falls through to 'server not ready after 30s,
+          #     skipping' even though the server is fine.
+          #   - The user hits a fresh chat with no kickoff context.
+          # This was the #2193 incident class — the v0.x → v1.x
+          # rename of /.well-known/agent.json → /.well-known/agent-card.json
+          # plus the constant itself moving to a2a.utils.constants.
+          # source-tree pytest (test_agent_card_well_known_path.py)
+          # catches main.py-side regressions; this catches the
+          # SDK-side ones BEFORE PyPI upload.
+          from a2a.utils.constants import AGENT_CARD_WELL_KNOWN_PATH
+          from a2a.server.routes import create_agent_card_routes
+          mounted_paths = [
+              getattr(r, 'path', None)
+              for r in create_agent_card_routes(
+                  AgentCard(
+                      name='wk-smoke',
+                      description='well-known mount alignment',
+                      version='0.0.0-smoke',
+                  )
+              )
+          ]
+          assert AGENT_CARD_WELL_KNOWN_PATH in mounted_paths, (
+              f'AGENT_CARD_WELL_KNOWN_PATH ({AGENT_CARD_WELL_KNOWN_PATH!r}) '
+              f'is NOT among paths mounted by create_agent_card_routes '
+              f'({mounted_paths!r}). The SDK constant and its own route '
+              f'factory have drifted — workspace probes will 404 forever, '
+              f'silently dropping every workspace initial_prompt.'
+          )
+          print(f'✓ well-known mount alignment OK ({AGENT_CARD_WELL_KNOWN_PATH})')
+
+          # Message helper smoke. a2a-sdk renamed
+          # new_agent_text_message → new_text_message in the v1.x
+          # protobuf-flat migration (per the v0→v1 cheat sheet). main.py
+          # and a2a_executor.py call new_text_message in hot paths; if
+          # the import breaks, every reply errors with ImportError before
+          # the message even leaves the workspace. Importing here
+          # catches a future v2.x rename at publish time.
+          from a2a.helpers import new_text_message
+          msg = new_text_message('smoke')
+          assert msg is not None, 'new_text_message returned None'
+          print('✓ message helper import + call OK')
+
          print('✓ smoke import passed')
          "

@@ -239,28 +312,98 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Wait for PyPI to propagate the new version
-        # PyPI accepts the upload, then takes a few seconds to make it
-        # available via the package index. If the cascade fires too
-        # fast, downstream template builds run `pip install` against
-        # an index that hasn't seen the new version yet — they resolve
-        # to the previous one, and docker layer cache then locks that
-        # in for subsequent rebuilds (the cache trap that bit us five
-        # times tonight).
+        # PyPI accepts the upload, then takes a few seconds to make the
+        # new version visible across all THREE surfaces pip touches:
+        #   1. /pypi/<pkg>/<ver>/json — metadata endpoint
+        #   2. /simple/<pkg>/         — pip's primary download index
+        #   3. files.pythonhosted.org — CDN-fronted wheel binary
+        # Each has its own cache. The previous check polled only (1)
+        # and would let the cascade fire while (2) or (3) still served
+        # the previous version, so downstream `pip install` resolved
+        # to the old wheel. Docker layer cache then locked that stale
+        # resolution in for subsequent rebuilds (the cache trap that
+        # bit us five times in one night).
        #
-        # Poll PyPI's JSON API for up to 60s. Cheap (~50ms per poll),
-        # avoids over-trusting "publish job said success."
+        # Two-stage probe per poll:
+        #   (a) `pip install --no-cache-dir PACKAGE==VERSION` — succeeds
+        #       only when the version is resolvable. Catches surface (1)
+        #       and (2) propagation lag.
+        #   (b) `pip download` of the same wheel + SHA256 compare against
+        #       the just-built dist's hash. Catches surface (3) lag AND
+        #       Fastly serving stale content under the new version's URL
+        #       (a separate Fastly-corruption mode that pip-install alone
+        #       can't see, since pip install resolves+unpacks against
+        #       whatever bytes Fastly returns and never inspects them).
+        # Both must pass before the cascade fans out.
+        #
+        # The venv is reused across polls; only `pip install`/`pip
+        # download` run in the loop, with --force-reinstall +
+        # --no-cache-dir so the previous poll's cached state doesn't
+        # mask propagation lag.
        env:
          RUNTIME_VERSION: ${{ needs.publish.outputs.version }}
+          EXPECTED_SHA256: ${{ needs.publish.outputs.wheel_sha256 }}
        run: |
          set -eu
+          if [ -z "$EXPECTED_SHA256" ]; then
+            echo "::error::publish job did not expose wheel_sha256 — cannot verify wheel content. Refusing to fan out cascade."
+            exit 1
+          fi
+          python -m venv /tmp/propagation-probe
+          PROBE=/tmp/propagation-probe/bin
+          $PROBE/pip install --upgrade --quiet pip
+          # Poll budget: 30 attempts × (~3-5s pip install + ~3s pip
+          # download + 4s sleep) ≈ 5-6 min wall on a slow GH runner.
+          # Generous vs PyPI's typical few-seconds propagation;
+          # failures past this are signal of a real PyPI / Fastly
+          # issue, not just lag.
          for i in $(seq 1 30); do
-            if curl -fsS "https://pypi.org/pypi/molecule-ai-workspace-runtime/${RUNTIME_VERSION}/json" >/dev/null 2>&1; then
-              echo "::notice::✓ PyPI serving ${RUNTIME_VERSION} after ${i} polls"
-              exit 0
+            # Stage (a): can pip resolve and install the version?
+            if $PROBE/pip install \
+                  --quiet \
+                  --no-cache-dir \
+                  --force-reinstall \
+                  --no-deps \
+                  "molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \
+                  >/dev/null 2>&1; then
+              INSTALLED=$($PROBE/pip show molecule-ai-workspace-runtime 2>/dev/null \
+                          | awk -F': ' '/^Version:/{print $2}')
+              if [ "$INSTALLED" = "$RUNTIME_VERSION" ]; then
+                # Stage (b): does Fastly serve the bytes we uploaded?
+                # `pip download` writes the actual .whl file to disk so
+                # we can sha256sum it (vs `pip install` which unpacks
+                # and discards).
+                rm -rf /tmp/probe-dl
+                mkdir -p /tmp/probe-dl
+                if $PROBE/pip download \
+                      --quiet \
+                      --no-cache-dir \
+                      --no-deps \
+                      --dest /tmp/probe-dl \
+                      "molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \
+                      >/dev/null 2>&1; then
+                  WHEEL=$(ls /tmp/probe-dl/*.whl 2>/dev/null | head -1)
+                  if [ -n "$WHEEL" ]; then
+                    ACTUAL=$(sha256sum "$WHEEL" | awk '{print $1}')
+                    if [ "$ACTUAL" = "$EXPECTED_SHA256" ]; then
+                      echo "::notice::✓ pip resolves AND wheel content matches after ${i} poll(s) (sha256=${EXPECTED_SHA256})"
+                      exit 0
+                    fi
+                    # Hash mismatch: PyPI accepted our upload but Fastly
+                    # is serving different bytes under the version's URL.
+                    # Most often this is propagation lag of the BINARY
+                    # surface — the version is resolvable but the wheel
+                    # cache hasn't caught up. Retry.
+                    echo "::warning::poll ${i}: wheel content mismatch (got ${ACTUAL:0:12}…, want ${EXPECTED_SHA256:0:12}…) — Fastly likely still serving stale binary, retrying"
+                  fi
+                fi
+              fi
            fi
-            sleep 2
+            sleep 4
          done
-          echo "::error::PyPI never propagated ${RUNTIME_VERSION} within 60s — refusing to fan out cascade against stale index"
+          echo "::error::pip never resolved molecule-ai-workspace-runtime==${RUNTIME_VERSION} with matching wheel content within ~5 min."
+          echo "::error::Expected wheel SHA256: ${EXPECTED_SHA256}"
+          echo "::error::Refusing to fan out cascade against stale or corrupt PyPI surfaces."
          exit 1

      - name: Fan out repository_dispatch
@@ -10,15 +10,29 @@ name: Runtime Pin Compatibility
 #   4. Every tenant workspace crashed; the canary tenant caught it but
 #      only after 5 hours of degraded staging
 #
-# This workflow installs the runtime in a fresh Python venv from PyPI
-# and tries the same import the EC2 user-data does. If pip resolution
-# silently produces a broken combo, this gate fails before the tenant
-# image gets published.
+# This workflow installs the CURRENTLY PUBLISHED runtime from PyPI on
+# top of `workspace/requirements.txt` and smoke-imports. Catches:
+#   - Upstream PyPI yanks
+#   - Bad re-releases of molecule-ai-workspace-runtime
+#   - Already-shipped wheels that stop importing because a transitive
+#     dep moved underneath
+#
+# This is the "PyPI artifact health" half of pin compatibility. The
+# companion workflow `runtime-prbuild-compat.yml` covers the
+# "PR-introduced breakage" half by building the wheel from THIS PR's
+# workspace/ source. Splitting the two means each gets a narrow
+# `paths:` filter — the pypi-latest job no longer fires on doc-only
+# workspace/ edits whose content can't change what's currently on PyPI.

 on:
  push:
    branches: [main, staging]
    paths:
+      # Narrow filter: pypi-latest is sensitive only to changes that
+      # affect what we're INSTALLING (requirements.txt) or WHAT THE
+      # CHECK ITSELF DOES (this workflow file). Edits to workspace/
+      # source code don't change what's on PyPI right now, so they
+      # don't change this gate's verdict.
      - 'workspace/requirements.txt'
      - '.github/workflows/runtime-pin-compat.yml'
  pull_request:
@@ -42,8 +56,8 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  default-install:
-    name: Default install + import smoke
+  pypi-latest-install:
+    name: PyPI-latest install + import smoke
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
@@ -0,0 +1,100 @@
+name: Runtime PR-Built Compatibility
+
+# Companion to `runtime-pin-compat.yml`. That workflow tests what's
+# CURRENTLY PUBLISHED on PyPI; this workflow tests what WOULD BE
+# PUBLISHED if THIS PR merges.
+#
+# Why two workflows: the chicken-and-egg #128 fix added a "PR-built
+# wheel" job to the original runtime-pin-compat.yml, but both jobs
+# shared a `paths:` filter that was the union of their needs
+# (`workspace/**`). That meant the PyPI-latest job ran on every doc
+# edit even though the upstream PyPI artifact can't change with our
+# workspace/ source. Splitting the two means each gets a narrow
+# `paths:` filter that matches the inputs it actually depends on.
+#
+# Catches the failure mode where a PR adds an import requiring a newer
+# SDK than `workspace/requirements.txt` pins:
+#   1. Pip resolves the existing PyPI wheel + the old SDK pin → smoke
+#      passes (it imports the OLD main.py from the wheel, not the PR's
+#      new main.py).
+#   2. Merge → publish-runtime.yml ships a wheel WITH the new import.
+#   3. Tenant images redeploy → all crash on first boot with
+#      ImportError.
+#
+# By building from the PR's source and smoke-importing THAT wheel, we
+# fail at PR-time instead of after publish.
+
+on:
+  push:
+    branches: [main, staging]
+    paths:
+      # Broad filter: this workflow's verdict can change whenever any
+      # workspace/ source file changes (because the wheel we build is
+      # produced from those files), or when the build script itself
+      # changes (it controls the wheel layout).
+      - 'workspace/**'
+      - 'scripts/build_runtime_package.py'
+      - '.github/workflows/runtime-prbuild-compat.yml'
+  pull_request:
+    branches: [main, staging]
+    paths:
+      - 'workspace/**'
+      - 'scripts/build_runtime_package.py'
+      - '.github/workflows/runtime-prbuild-compat.yml'
+  workflow_dispatch:
+  # Required-check support: when this becomes a branch-protection gate,
+  # merge_group runs let the queue green-check this in addition to PRs.
+  merge_group:
+    types: [checks_requested]
+  # No cron: the same pre-merge run already covered the commit, and
+  # re-running daily wouldn't surface anything new (workspace/ doesn't
+  # change between cron firings unless a PR already passed this gate).
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  local-build-install:
+    # Builds the wheel from THIS PR's workspace/ + scripts/ and tests
+    # IT — the artifact that WOULD be published if this PR merges.
+    name: PR-built wheel + import smoke
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: pip
+          cache-dependency-path: workspace/requirements.txt
+      - name: Install build tooling
+        run: pip install build
+      - name: Build wheel from PR source (mirrors publish-runtime.yml)
+        # Use a fixed test version so the wheel filename is predictable.
+        # Doesn't reach PyPI — this build is local-only for the smoke.
+        # Use the SAME build script with the SAME args as
+        # publish-runtime.yml's build step. The temp dir path differs
+        # (`/tmp/runtime-build` here vs `${{ runner.temp }}/runtime-build`
+        # in publish-runtime.yml — they coincide on ubuntu-latest but
+        # the call sites are not byte-identical). The smoke import is
+        # also intentionally narrower than publish's: this gate exists
+        # to catch SDK-version-import drift specifically; full invariant
+        # coverage lives in publish-runtime.yml's own pre-PyPI smoke.
+        run: |
+          python scripts/build_runtime_package.py \
+            --version "0.0.0.dev0+pin-compat" \
+            --out /tmp/runtime-build
+          cd /tmp/runtime-build && python -m build
+      - name: Install built wheel + workspace requirements
+        run: |
+          python -m venv /tmp/venv-built
+          /tmp/venv-built/bin/pip install --upgrade pip
+          /tmp/venv-built/bin/pip install /tmp/runtime-build/dist/*.whl
+          /tmp/venv-built/bin/pip install -r workspace/requirements.txt
+          /tmp/venv-built/bin/pip show molecule-ai-workspace-runtime a2a-sdk \
+            | grep -E '^(Name|Version):'
+      - name: Smoke import the PR-built wheel
+        env:
+          WORKSPACE_ID: 00000000-0000-0000-0000-000000000001
+        run: |
+          /tmp/venv-built/bin/python -c "from molecule_runtime.main import main_sync; print('PR-built runtime imports OK')"
@@ -0,0 +1,57 @@
+name: SECRET_PATTERNS drift lint
+
+# Detects when the canonical SECRET_PATTERNS array in
+# .github/workflows/secret-scan.yml diverges from known consumer
+# mirrors (workspace-runtime's bundled pre-commit hook today; more
+# can be added as the consumer set grows).
+#
+# Why this exists: every side that scans for credentials has its own
+# copy of the pattern list. They drift — most recently the runtime
+# hook lagged the canonical by one pattern (sk-cp- / MiniMax F1088),
+# so a developer's local pre-commit would let a sk-cp- token through
+# while the org-wide CI scan would refuse it. The cost of that drift
+# is dev confusion + delayed feedback; the fix is automated detection.
+#
+# Triggers:
+#   - schedule: daily 05:00 UTC. Catches drift introduced by edits
+#     to a consumer copy that didn't update canonical here.
+#   - push to main/staging where the canonical or this lint changed:
+#     catches the inverse — canonical updated but consumers not yet
+#     bumped. The lint will fail the push; that's intentional, the
+#     person editing canonical is the right person to also update
+#     the consumer.
+#   - workflow_dispatch: ad-hoc operator runs.
+
+on:
+  schedule:
+    # 05:00 UTC = 22:00 PT / 01:00 ET. Quiet hours so a failure
+    # email lands when humans are starting their day, not
+    # interrupting it.
+    - cron: "0 5 * * *"
+  push:
+    branches: [main, staging]
+    paths:
+      - ".github/workflows/secret-scan.yml"
+      - ".github/workflows/secret-pattern-drift.yml"
+      - ".github/scripts/lint_secret_pattern_drift.py"
+  workflow_dispatch:
+
+# GITHUB_TOKEN scoped to read-only. The lint only does git checkout
+# + HTTPS GETs to public consumer files; no writes to anything.
+permissions:
+  contents: read
+
+jobs:
+  lint:
+    name: Detect SECRET_PATTERNS drift
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Run drift lint
+        run: python3 .github/scripts/lint_secret_pattern_drift.py
@@ -148,7 +148,13 @@ jobs:
          SELF=".github/workflows/secret-scan.yml"

          OFFENDING=""
-          for f in $CHANGED; do
+          # `while IFS= read -r` (not `for f in $CHANGED`) so filenames
+          # containing whitespace don't word-split silently — a path
+          # with a space would otherwise produce two iterations on
+          # tokens that aren't real filenames, breaking the
+          # self-exclude + diff lookup.
+          while IFS= read -r f; do
+            [ -z "$f" ] && continue
            [ "$f" = "$SELF" ] && continue
            if [ -n "$DIFF_RANGE" ]; then
              ADDED=$(git diff --no-color --unified=0 "$BASE" "$HEAD" -- "$f" 2>/dev/null | grep -E '^\+[^+]' || true)
@@ -164,11 +170,18 @@ jobs:
                break
              fi
            done
-          done
+          done <<< "$CHANGED"

          if [ -n "$OFFENDING" ]; then
            echo "::error::Credential-shaped strings detected in diff additions:"
-            printf "$OFFENDING"
+            # `printf '%b' "$OFFENDING"` interprets backslash escapes
+            # (the literal `\n` we appended above becomes a newline)
+            # WITHOUT treating OFFENDING as a format string. Plain
+            # `printf "$OFFENDING"` is a format-string sink: a filename
+            # containing `%` would be interpreted as a conversion
+            # specifier, corrupting the error message (or printing
+            # `%(missing)` artifacts).
+            printf '%b' "$OFFENDING"
            echo ""
            echo "The actual matched values are NOT echoed here, deliberately —"
            echo "round-tripping a leaked credential into CI logs widens the blast"
@@ -1,6 +1,6 @@
 # Ecosystem Watch — Phase 30 Competitive Tracking
 **Created by:** PMM
-**Date:** 2026-04-21
+**Date:** 2026-04-27
 **Status:** ACTIVE — competitor monitoring in progress
 **Phase:** 30 — Remote Workspaces + Cross-Network Federation

@@ -118,7 +118,7 @@ Track competitor releases and market events that affect Phase 30 positioning. En
 - **Check frequency:** Every marketing cycle
 - **Trigger:** Any competitor shipping something that invalidates a Phase 30 positioning claim
 - **File location:** `docs/ecosystem-watch.md` (origin/main)
- **Last updated by:** PMM | 2026-04-23 (LangGraph PRs verified OPEN; new feat PRs #1730/#1702/#1731 logged; release note written)
+- **Last updated by:** PMM | 2026-04-27 (weekly refresh — all competitor versions, stars, and PRs verified current; Clawith v1.9.0 added)

 ---

@@ -7,20 +7,46 @@ This path is aligned to the current repository and current UI. It gets you from
 - Docker + Docker Compose v2
 - Node.js 20+
 - Go 1.25+
+- `jq` (for the template-registry clone in `setup.sh`)
 - One model/API key for the runtime you want to use
  - `ANTHROPIC_API_KEY`
  - `OPENAI_API_KEY`
  - `GOOGLE_API_KEY`
  - or another provider routed through LiteLLM

-## Step 1: Clone the repository
+## The one-command path
+
+```bash
+git clone https://github.com/Molecule-AI/molecule-monorepo.git
+cd molecule-monorepo
+./scripts/dev-start.sh
+```
+
+That single script:
+
+1. Generates an `ADMIN_TOKEN` into `.env` (first run only — preserved on re-runs)
+2. Brings up Postgres, Redis, Langfuse, ClickHouse, and Temporal via `infra/scripts/setup.sh`
+3. Populates the workspace template + plugin registry from `manifest.json`
+4. Builds and starts the platform on `http://localhost:8080`
+5. Installs canvas deps (first run) and starts the canvas on `http://localhost:3000`
+6. Prints next-step instructions and tails both processes — `Ctrl-C` tears everything down
+
+Total wall-clock: ~30 seconds for a re-run, ~2 minutes for a first run (npm install + docker pulls).
+
+Once the canvas is up: open it, add your model API key in **Config → Secrets & API Keys → Global**, then click a template card or **+ Create blank workspace**.
+
+## Manual setup (advanced)
+
+If you'd rather run each component yourself — useful when you're iterating on the platform binary or the canvas in isolation — follow the steps below. Each section is what `dev-start.sh` does internally; running them by hand gives you per-component logs and lets you keep one piece running while you restart another.
+
+### Step 1: Clone the repository

 ```bash
 git clone https://github.com/Molecule-AI/molecule-monorepo.git
 cd molecule-monorepo
 ```

-## Step 2: Start the shared infrastructure
+### Step 2: Start the shared infrastructure

 Recommended:

@@ -28,7 +54,7 @@ Recommended:
 ./infra/scripts/setup.sh
 ```

-That brings up Postgres, Redis, and Langfuse.
+That brings up Postgres, Redis, Langfuse, ClickHouse, and Temporal.

 If you only want the raw compose flow:

@@ -36,7 +62,7 @@ If you only want the raw compose flow:
 docker compose -f docker-compose.infra.yml up -d
 ```

-## Step 3: Start the platform
+### Step 3: Start the platform

 ```bash
 cd workspace-server
@@ -45,7 +71,7 @@ go run ./cmd/server

 The control plane listens on `http://localhost:8080`.

-## Step 4: Start the canvas
+### Step 4: Start the canvas

 In a new terminal:

@@ -1,69 +1,182 @@
 #!/bin/sh
 # dev-start.sh — one-command local development environment.
 #
-# Starts: Postgres, Redis, Platform (Go :8080), Canvas (Next.js :3000)
-# Stops all on Ctrl-C.
+# What it does (in order):
+#   1. Generates ADMIN_TOKEN into .env if missing (closes #684 fail-open)
+#   2. Runs infra/scripts/setup.sh (postgres + redis + langfuse + clickhouse
+#      + temporal + populates template/plugin registry from manifest.json)
+#   3. Starts the platform (Go :8080), waits for /health
+#   4. Starts the canvas (Next.js :3000), waits for HTTP 200
+#   5. Prints a readiness banner with API-key add instructions
+#   6. On Ctrl-C, kills both background processes and tears down infra
 #
 # Prerequisites:
-#   - Docker (for Postgres + Redis)
-#   - Go 1.25+ (for platform)
-#   - Node.js 20+ (for canvas)
+#   - Docker + Docker Compose v2  (for postgres/redis/langfuse/etc)
+#   - Go 1.25+                     (for the platform binary)
+#   - Node.js 20+                  (for the canvas)
+#   - jq                           (for setup.sh's manifest clone — optional;
+#                                   without it, template palette will be
+#                                   empty until you run clone-manifest.sh
+#                                   manually)
 #
 # Usage:
 #   ./scripts/dev-start.sh
-#   # Open http://localhost:3000
+#   # Open http://localhost:3000, add your model API key in
+#   # Config → Secrets & API Keys, then create your first workspace.
+#
+# Idempotent: re-running picks up where the last run left off (existing
+# .env is preserved, npm install skipped if node_modules present, etc).

 set -e

 ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+ENV_FILE="$ROOT/.env"

 cleanup() {
    echo ""
-    echo "Shutting down..."
+    echo "==> Shutting down..."
    kill $PLATFORM_PID $CANVAS_PID 2>/dev/null || true
+    # Use setup.sh's compose file (full infra) since that's what we
+    # brought up. `down` keeps named volumes by default — call with
+    # --volumes here only if you want a clean slate (we don't, since
+    # idempotent re-runs are the usual case).
    docker compose -f "$ROOT/docker-compose.infra.yml" down 2>/dev/null || true
-    echo "Done."
+    echo "    Done."
 }
 trap cleanup EXIT INT TERM

-echo "==> Starting infrastructure (Postgres, Redis)..."
-docker compose -f "$ROOT/docker-compose.infra.yml" up -d
+# ─────────────────────────────────────────────── 1. dev-mode auth posture

-echo "==> Waiting for Postgres..."
-until docker compose -f "$ROOT/docker-compose.infra.yml" exec -T postgres pg_isready -q 2>/dev/null; do
-    sleep 1
-done
-echo "    Postgres ready."
+# The AdminAuth middleware closes its fail-open the moment the first
+# workspace token lands in the DB — at which point /workspaces and
+# other admin routes 401 unless the caller has either ADMIN_TOKEN or
+# the dev-mode escape hatch. The canvas at localhost:3000 has no
+# bearer token to send, so without one of those two paths it can't
+# call admin endpoints after a workspace exists.
+#
+# For local dev the right posture is the dev-mode escape hatch:
+#
+#   MOLECULE_ENV=development AND ADMIN_TOKEN unset
+#
+# That makes middleware.isDevModeFailOpen() return true and lets the
+# canvas keep working without a bearer. Setting ADMIN_TOKEN here
+# would BREAK the canvas (it has no way to read that token in dev).
+#
+# For SaaS the platform is provisioned with ADMIN_TOKEN set AND
+# MOLECULE_ENV=production — either one closes the hatch. So the dev
+# mode signal here is safe (it's only active when both other knobs
+# are absent).
+if [ -f "$ENV_FILE" ] && grep -q '^MOLECULE_ENV=' "$ENV_FILE"; then
+    echo "==> Reusing MOLECULE_ENV from existing .env"
+else
+    echo "==> Setting MOLECULE_ENV=development in .env (dev-mode auth hatch)"
+    {
+        if [ -f "$ENV_FILE" ]; then
+            cat "$ENV_FILE"
+            echo ""
+        fi
+        echo "# Generated by scripts/dev-start.sh on $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+        echo "# Local-dev auth posture: dev-mode fail-open lets the canvas at"
+        echo "# localhost:3000 call admin endpoints without a bearer token."
+        echo "# DO NOT set ADMIN_TOKEN here in dev — it would close the hatch"
+        echo "# and the canvas would 401 on every admin call."
+        echo "MOLECULE_ENV=development"
+    } > "$ENV_FILE.tmp"
+    mv "$ENV_FILE.tmp" "$ENV_FILE"
+    echo "    Saved to $ENV_FILE"
+fi

-echo "==> Starting Platform (Go :8080)..."
+# Source .env so the platform inherits ADMIN_TOKEN (and anything else
+# the user has added — e.g. ANTHROPIC_API_KEY for skipping the canvas
+# Secrets UI). `set -a` exports every assignment in the sourced file
+# without us having to know the var names.
+set -a
+# shellcheck disable=SC1090
+. "$ENV_FILE"
+set +a
+
+# ─────────────────────────────────────────────── 2. infra + templates
+
+# Use setup.sh (not raw docker-compose) so the template registry gets
+# populated from manifest.json. Without that, the canvas template
+# palette is empty and the user has to manually clone repos — exactly
+# the friction this script exists to eliminate.
+echo "==> Running infra/scripts/setup.sh (infra + template registry)"
+"$ROOT/infra/scripts/setup.sh"
+
+# ─────────────────────────────────────────────── 3. platform
+
+echo "==> Starting Platform (Go :8080)"
 cd "$ROOT/workspace-server"
-go run ./cmd/server &
+go run ./cmd/server > /tmp/molecule-platform.log 2>&1 &
 PLATFORM_PID=$!

-echo "==> Waiting for Platform health..."
-until curl -sf http://localhost:8080/health >/dev/null 2>&1; do
+echo "    Waiting for Platform /health..."
+PLATFORM_READY=0
+for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \
+         21 22 23 24 25 26 27 28 29 30; do
+    if curl -sf http://localhost:8080/health >/dev/null 2>&1; then
+        echo "    Platform ready (t+${i}s)"
+        PLATFORM_READY=1
+        break
+    fi
    sleep 1
 done
-echo "    Platform ready."
+if [ "$PLATFORM_READY" -ne 1 ]; then
+    echo "    ✗ Platform did not respond in 30s — check /tmp/molecule-platform.log"
+    exit 1
+fi

-echo "==> Starting Canvas (Next.js :3000)..."
+# ─────────────────────────────────────────────── 4. canvas
+
+echo "==> Starting Canvas (Next.js :3000)"
 cd "$ROOT/canvas"
 if [ ! -d node_modules ]; then
+    echo "    First-run: npm install (~30-60s)"
    npm install
 fi
-npm run dev &
+npm run dev > /tmp/molecule-canvas.log 2>&1 &
 CANVAS_PID=$!

-echo ""
-echo "============================================"
-echo "  Molecule AI dev environment running"
-echo ""
-echo "  Canvas:   http://localhost:3000"
-echo "  Platform: http://localhost:8080"
-echo "  Postgres: localhost:5432"
-echo "  Redis:    localhost:6379"
-echo ""
-echo "  Press Ctrl-C to stop all services"
-echo "============================================"
+echo "    Waiting for Canvas HTTP 200..."
+CANVAS_READY=0
+for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \
+         21 22 23 24 25 26 27 28 29 30; do
+    code=$(curl -sf -o /dev/null -w "%{http_code}" http://localhost:3000/ 2>/dev/null || echo "0")
+    if [ "$code" = "200" ]; then
+        echo "    Canvas ready (t+${i}s)"
+        CANVAS_READY=1
+        break
+    fi
+    sleep 1
+done
+if [ "$CANVAS_READY" -ne 1 ]; then
+    echo "    ✗ Canvas did not respond in 30s — check /tmp/molecule-canvas.log"
+    exit 1
+fi
+
+# ─────────────────────────────────────────────── 5. readiness banner
+
+cat <<EOF
+
+═══════════════════════════════════════════════════════════
+  Molecule AI dev environment ready
+
+  Canvas:   http://localhost:3000
+  Platform: http://localhost:8080
+  Logs:     /tmp/molecule-platform.log
+            /tmp/molecule-canvas.log
+
+  Next steps:
+    1. Open http://localhost:3000 in a browser.
+    2. Add your model API key in
+         Config → Secrets & API Keys → Global
+       (skip if ANTHROPIC_API_KEY / OPENAI_API_KEY is already
+        set in .env — the platform inherits it.)
+    3. Click a template card or "+ Create blank workspace".
+
+  Press Ctrl-C to stop all services.
+═══════════════════════════════════════════════════════════
+EOF

 wait
@@ -86,24 +86,47 @@ cleanup_org() {
  fi

  log "🧹 Tearing down org $SLUG..."
-  curl "${CURL_COMMON[@]}" -X DELETE "$CP_URL/cp/admin/tenants/$SLUG" \
+
+  # The DELETE handler runs the GDPR Art. 17 cascade synchronously
+  # (Stripe + Redis + EC2 terminate + CF tunnel + DNS + DB rows). Real
+  # observed wall-time on prod-shaped infra is ~30–90s — EC2 termination
+  # alone takes 30–60s. The 5–15s estimate in `purge.go`'s comment is
+  # the API-call cost, NOT the AWS-side time-to-termination it waits on.
+  #
+  # Two-part patience to match reality:
+  #   1. 120s curl timeout on the DELETE itself (was 30s) so the
+  #      synchronous cascade has room to complete in-band.
+  #   2. Poll up to 60s after for organizations.status='purged' (or row
+  #      gone) instead of one rigid 10s sleep — covers the case where
+  #      DELETE returns 5xx mid-cascade and the cascade finishes anyway,
+  #      and the case where DELETE legitimately exceeds 120s and we want
+  #      eventual-consistency confirmation.
+  curl "${CURL_COMMON[@]}" --max-time 120 -X DELETE "$CP_URL/cp/admin/tenants/$SLUG" \
    -H "Authorization: Bearer $ADMIN_TOKEN" \
    -H "Content-Type: application/json" \
    -d "{\"confirm\":\"$SLUG\"}" >/dev/null 2>&1 \
    && ok "Teardown request accepted" \
    || log "Teardown returned non-2xx (may already be gone)"

-  sleep 10
-  local leak_count
-  leak_count=$(curl "${CURL_COMMON[@]}" "$CP_URL/cp/admin/orgs" \
-    -H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
-    | python3 -c "import json,sys; d=json.load(sys.stdin); print(sum(1 for o in d.get('orgs', []) if o.get('slug')=='$SLUG' and o.get('status') != 'purged'))" \
-    2>/dev/null || echo 0)
+  local leak_count=1
+  local elapsed=0
+  while [ "$elapsed" -lt 60 ]; do
+    leak_count=$(curl "${CURL_COMMON[@]}" "$CP_URL/cp/admin/orgs" \
+      -H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
+      | python3 -c "import json,sys; d=json.load(sys.stdin); print(sum(1 for o in d.get('orgs', []) if o.get('slug')=='$SLUG' and o.get('status') != 'purged'))" \
+      2>/dev/null || echo 1)
+    if [ "$leak_count" = "0" ]; then
+      break
+    fi
+    sleep 5
+    elapsed=$((elapsed + 5))
+  done
+
  if [ "$leak_count" != "0" ]; then
-    echo "⚠️  LEAK: org $SLUG still present post-teardown (count=$leak_count)" >&2
+    echo "⚠️  LEAK: org $SLUG still present post-teardown after ${elapsed}s (count=$leak_count)" >&2
    exit 4
  fi
-  ok "Teardown clean — no orphan resources for $SLUG"
+  ok "Teardown clean — no orphan resources for $SLUG (${elapsed}s)"

  # Normalize unexpected upstream exit codes to 1 (generic failure). The
  # script's documented contract (header "Exit codes" section) only emits
@@ -112,12 +112,15 @@ func sweepOnce(parent context.Context, reaper OrphanReaper) {
 	ctx, cancel := context.WithTimeout(parent, orphanSweepDeadline)
 	defer cancel()

-	// Two independent passes. Each handles its own short-circuit; an
-	// empty result or transient error in one must NOT stop the other,
+	// Three independent passes. Each handles its own short-circuit; an
+	// empty result or transient error in one must NOT stop the others,
 	// since the wiped-DB pass exists precisely for cases where the
-	// removed-row pass finds zero candidates (DB has been dropped).
+	// removed-row pass finds zero candidates (DB has been dropped) and
+	// the stale-token pass exists for the mirror case (DB persists but
+	// /configs volume has been wiped).
 	sweepRemovedRows(ctx, reaper)
 	sweepLabeledOrphansWithoutRows(ctx, reaper)
+	sweepStaleTokensWithoutContainer(ctx, reaper)
 }

 // sweepRemovedRows is the original sweep: ws-* containers (by name
@@ -290,3 +293,181 @@ func sweepLabeledOrphansWithoutRows(ctx context.Context, reaper OrphanReaper) {
 		}
 	}
 }
+
+// staleTokenGrace bounds how recently a token must have been used (or
+// issued, if never used) for it to be considered "potentially live".
+// Anything quieter than this is fair game for the stale-token revoke
+// pass when there's no matching container.
+//
+// Sized vs the heartbeat cadence (30s) and provisioning latency: a
+// healthy workspace touches `last_used_at` every heartbeat, so 5min is
+// 10× the heartbeat interval — enough headroom that brief container
+// restarts (Stop → Start) don't trip the pass. A workspace that's been
+// silent past this window AND has no container is either a wiped-volume
+// orphan or a workspace nobody is using; either way, revoking is safe
+// because the next /registry/register mints a fresh token via the
+// no-live-tokens bootstrap branch in registry.go.
+const staleTokenGrace = 5 * time.Minute
+
+// sweepStaleTokensWithoutContainer revokes workspace_auth_tokens rows
+// for workspaces whose /configs volume must have been wiped — detected
+// as "live token in DB whose owning workspace has no live Docker
+// container". This heals the user-reported failure mode where
+// `docker compose down -v` (or any out-of-band volume removal) leaves
+// stale tokens in the DB while the recreated container has an empty
+// `/configs/.auth_token`. Without this pass, /registry/register on the
+// fresh container 401s forever (requireWorkspaceToken sees live tokens,
+// container can't present one), and the workspace is permanently
+// wedged until an operator manually revokes via SQL.
+//
+// The platform's restart endpoint already handles this case correctly
+// via wsauth.RevokeAllForWorkspace inside issueAndInjectToken — this
+// pass is the safety net for the equivalent action taken outside the
+// API (operator did `docker compose down -v`, host crashed mid-restart,
+// disk pressure evicted a volume, etc).
+//
+// Safety filters that bound the revoke radius:
+//
+//  1. Only runs in single-tenant Docker mode. The orphan sweeper is
+//     wired only when prov != nil (see cmd/server/main.go) — in CP/SaaS
+//     mode there is no Docker daemon and the sweeper doesn't run, so an
+//     empty container list cannot be confused with "no Docker at all"
+//     here (which would otherwise revoke every workspace's tokens).
+//     The function also short-circuits on a nil reaper as a belt-and-
+//     braces guard against a future refactor wiring it incorrectly.
+//
+//  2. staleTokenGrace skips tokens that were issued or used in the
+//     last 5 minutes. Bounds the race with mid-provisioning (token
+//     issued moments before docker run completes) and brief restart
+//     windows.
+//
+//  3. CRITICAL: the staleness predicate is enforced AT THE UPDATE,
+//     not just at the SELECT. This closes a TOCTOU race against
+//     workspace_provision.go:issueAndInjectToken — the platform's
+//     restart endpoint Stops the container synchronously then dispatches
+//     re-provisioning to a goroutine, so a stale-on-SELECT workspace
+//     can have a fresh token inserted by issueAndInjectToken between
+//     our SELECT and our UPDATE. A predicate-only `WHERE workspace_id
+//     = $1 AND revoked_at IS NULL` UPDATE would catch that fresh token
+//     too. Carrying COALESCE(last_used_at, created_at) < now() - grace
+//     in the UPDATE makes the operation idempotent against fresh
+//     inserts: a token created within the grace window cannot match.
+//
+//  4. The DB query joins on workspaces.status NOT IN ('removed',
+//     'provisioning') so deleted and mid-restart workspaces are not
+//     revoked here — those are handled at delete time and by
+//     issueAndInjectToken respectively. (`status = 'provisioning'` is
+//     set synchronously in workspace_restart.go before the async
+//     re-provision begins, so it's a reliable in-flight signal.)
+//
+//  5. Each revocation is logged with the workspace ID so operators can
+//     correlate "workspace just lost auth" with this sweeper, not blame
+//     a network blip.
+//
+// Failure mode: revoke fails for some reason (transient DB error). The
+// next sweep cycle (60s out) retries. Worst case: a workspace stays
+// 401-blocked an extra minute.
+func sweepStaleTokensWithoutContainer(ctx context.Context, reaper OrphanReaper) {
+	// Defence-in-depth (F2): a future refactor that wires the sweeper
+	// in CP/SaaS mode without checking prov would otherwise hit this
+	// pass with a nil reaper. The StartOrphanSweeper entry point
+	// already short-circuits on nil, but we don't want to depend on
+	// every future caller doing the same.
+	if reaper == nil {
+		return
+	}
+
+	prefixes, err := reaper.ListWorkspaceContainerIDPrefixes(ctx)
+	if err != nil {
+		log.Printf("Orphan sweeper: ListWorkspaceContainerIDPrefixes failed: %v — skipping stale-token pass", err)
+		return
+	}
+
+	// Same hex-and-dash filter as the other passes — anything that
+	// can't be a workspace UUID prefix doesn't belong in a SQL LIKE
+	// pattern.
+	//
+	// NOTE: an empty `likes` array is intentionally NOT a short-circuit.
+	// "No workspace containers" is the load-bearing case for this pass
+	// (operator nuked everything). The `cardinality($1) = 0` clause in
+	// the SELECT below treats empty likes as "no LIKE filter" → every
+	// stale-token workspace becomes a candidate. The first two passes'
+	// early-return-on-empty-prefixes pattern would defeat this entire
+	// pass's purpose.
+	likes := make([]string, 0, len(prefixes))
+	for _, p := range prefixes {
+		if !isLikelyWorkspaceID(p) {
+			continue
+		}
+		likes = append(likes, p+"%")
+	}
+
+	// Find workspaces with live tokens whose most-recent activity is
+	// past the grace window AND whose ID does NOT match any live
+	// container prefix. When `likes` is empty (no workspace containers
+	// running at all), every stale-activity workspace is a candidate —
+	// expressed via the `cardinality($1) = 0` short-circuit so the
+	// query has a single shape regardless of container count.
+	//
+	// make_interval(secs => $2) avoids the time.Duration.String() →
+	// `"5m0s"` mismatch with Postgres interval grammar; passing seconds
+	// as an int keeps the binding portable.
+	graceSeconds := int(staleTokenGrace.Seconds())
+	rows, qErr := db.DB.QueryContext(ctx, `
+		SELECT DISTINCT t.workspace_id::text
+		  FROM workspace_auth_tokens t
+		  JOIN workspaces w ON w.id = t.workspace_id
+		 WHERE t.revoked_at IS NULL
+		   AND w.status NOT IN ('removed', 'provisioning')
+		   AND COALESCE(t.last_used_at, t.created_at) < now() - make_interval(secs => $2)
+		   AND (
+		         cardinality($1::text[]) = 0
+		      OR NOT (t.workspace_id::text LIKE ANY($1::text[]))
+		   )
+	`, pq.Array(likes), graceSeconds)
+	if qErr != nil {
+		log.Printf("Orphan sweeper: stale-token query failed: %v — skipping stale-token pass", qErr)
+		return
+	}
+	defer rows.Close()
+
+	var staleWorkspaceIDs []string
+	for rows.Next() {
+		var id string
+		if scanErr := rows.Scan(&id); scanErr != nil {
+			log.Printf("Orphan sweeper: stale-token row scan failed: %v", scanErr)
+			continue
+		}
+		staleWorkspaceIDs = append(staleWorkspaceIDs, id)
+	}
+	if iterErr := rows.Err(); iterErr != nil {
+		log.Printf("Orphan sweeper: stale-token rows iteration failed: %v", iterErr)
+		return
+	}
+
+	// Per-workspace UPDATE with the SAME staleness predicate as the
+	// SELECT, so any token inserted between SELECT and UPDATE (e.g.
+	// issueAndInjectToken racing during a user-triggered restart of a
+	// long-idle workspace) is automatically excluded — its created_at
+	// is fresh and won't satisfy `< now() - grace`.
+	//
+	// We deliberately bypass wsauth.RevokeAllForWorkspace here because
+	// that helper revokes EVERY live token for the workspace; we want
+	// "every STALE live token", which is a different (safer) operation.
+	for _, wsID := range staleWorkspaceIDs {
+		log.Printf("Orphan sweeper: revoking stale tokens for workspace %s (no live container; volume likely wiped)", wsID)
+		_, revokeErr := db.DB.ExecContext(ctx, `
+			UPDATE workspace_auth_tokens
+			   SET revoked_at = now()
+			 WHERE workspace_id = $1
+			   AND revoked_at IS NULL
+			   AND COALESCE(last_used_at, created_at) < now() - make_interval(secs => $2)
+		`, wsID, graceSeconds)
+		if revokeErr != nil {
+			// Non-fatal — next sweep retries. Bail on the loop so a
+			// systemic DB error doesn't spam the log on every iteration.
+			log.Printf("Orphan sweeper: stale-token revoke for %s failed: %v — will retry next cycle", wsID, revokeErr)
+			return
+		}
+	}
+}
@@ -10,6 +10,24 @@ import (
 	"github.com/DATA-DOG/go-sqlmock"
 )

+// expectStaleTokenSweepNoOp registers the third-pass query
+// (sweepStaleTokensWithoutContainer) returning zero rows. The third
+// pass runs unconditionally on every sweepOnce, so every test that
+// doesn't specifically exercise stale-token revocation must register
+// this expectation or sqlmock will fail "unexpected query".
+//
+// Centralising the regex here keeps the existing test suite readable —
+// individual tests don't have to spell out a query they're not actually
+// asserting against.
+//
+// The regex is anchored at the start of the query AND requires the
+// status-filter to keep us from accidentally matching a future query
+// that opens with the same column name. R3 from the review.
+func expectStaleTokenSweepNoOp(mock sqlmock.Sqlmock) {
+	mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\)`).
+		WillReturnRows(sqlmock.NewRows([]string{"workspace_id"}))
+}
+
 // fakeReaper is a hand-rolled OrphanReaper for the sweeper tests.
 // Records every Stop / RemoveVolume call so tests can assert which
 // workspace IDs got reconciled.
@@ -73,6 +91,7 @@ func TestSweepOnce_ReconcilesRunningRemovedRows(t *testing.T) {
 	mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`).
 		WillReturnRows(sqlmock.NewRows([]string{"id"}).
 			AddRow("abc123def456-0000-0000-0000-000000000000"))
+	expectStaleTokenSweepNoOp(mock)

 	sweepOnce(context.Background(), reaper)

@@ -96,8 +115,11 @@ func TestSweepOnce_NoRunningContainers(t *testing.T) {

 	reaper := &fakeReaper{listResponse: nil}

-	// No DB query expected — if sweepOnce makes one anyway the
-	// sqlmock will fail "unexpected query".
+	// First two passes short-circuit on empty container lists. The
+	// third pass (stale-token sweep) DOES query — that's its whole
+	// reason for existing in the no-containers case (operator nuked
+	// everything). Mock it returning no stale tokens.
+	expectStaleTokenSweepNoOp(mock)
 	sweepOnce(context.Background(), reaper)

 	if len(reaper.stopCalls) != 0 {
@@ -145,6 +167,7 @@ func TestSweepOnce_StopFailureLeavesVolume(t *testing.T) {
 	mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`).
 		WillReturnRows(sqlmock.NewRows([]string{"id"}).
 			AddRow("abc123def456-0000-0000-0000-000000000000"))
+	expectStaleTokenSweepNoOp(mock)

 	sweepOnce(context.Background(), reaper)

@@ -173,6 +196,7 @@ func TestSweepOnce_VolumeRemoveErrorIsNonFatal(t *testing.T) {
 		WillReturnRows(sqlmock.NewRows([]string{"id"}).
 			AddRow("aaa111bbb222-0000-0000-0000-000000000000").
 			AddRow("ccc333ddd444-0000-0000-0000-000000000000"))
+	expectStaleTokenSweepNoOp(mock)

 	sweepOnce(context.Background(), reaper)

@@ -205,9 +229,12 @@ func TestSweepOnce_FiltersNonWorkspacePrefixes(t *testing.T) {
 		},
 	}

-	// No DB query expected — every prefix is rejected before the
-	// query builds, so we short-circuit. sqlmock fails on any
-	// unexpected query.
+	// First-pass query is skipped — every prefix is rejected before
+	// the query builds. Third-pass query still runs (filtered prefixes
+	// + non-empty input list still produces an empty likes array,
+	// which the third-pass treats the same as "no containers running"
+	// → stale-token candidates with no LIKE filter). Mock it empty.
+	expectStaleTokenSweepNoOp(mock)
 	sweepOnce(context.Background(), reaper)

 	if len(reaper.stopCalls) != 0 {
@@ -289,6 +316,7 @@ func TestSweepOnce_WipedDBReapsLabeledOrphans(t *testing.T) {
 	// returns no rows — both prefixes are unknown.
 	mock.ExpectQuery(`SELECT lk\s+FROM unnest`).
 		WillReturnRows(sqlmock.NewRows([]string{"lk"}))
+	expectStaleTokenSweepNoOp(mock)

 	sweepOnce(context.Background(), reaper)

@@ -328,6 +356,7 @@ func TestSweepOnce_WipedDBSkipsLabeledContainersWithRows(t *testing.T) {
 		WillReturnRows(sqlmock.NewRows([]string{"lk"}).
 			AddRow("abc123def456%").
 			AddRow("ee0011223344%"))
+	expectStaleTokenSweepNoOp(mock)

 	sweepOnce(context.Background(), reaper)

@@ -355,6 +384,7 @@ func TestSweepOnce_WipedDBReapsOnlyTheUnknownOnes(t *testing.T) {
 	mock.ExpectQuery(`SELECT lk\s+FROM unnest`).
 		WillReturnRows(sqlmock.NewRows([]string{"lk"}).
 			AddRow(keep + "%"))
+	expectStaleTokenSweepNoOp(mock)

 	sweepOnce(context.Background(), reaper)

@@ -380,7 +410,10 @@ func TestSweepOnce_WipedDBSkippedOnDockerError(t *testing.T) {
 	}

 	// No DB query expected for the second pass since we error out
-	// before reaching SQL.
+	// before reaching SQL. The third pass (stale-token sweep) uses
+	// ListWorkspaceContainerIDPrefixes (which succeeded with empty
+	// here, not the same call that errored), so it DOES query.
+	expectStaleTokenSweepNoOp(mock)
 	sweepOnce(context.Background(), reaper)

 	if len(reaper.stopCalls) != 0 {
@@ -410,6 +443,7 @@ func TestSweepOnce_WipedDBSkipsNonUUIDPrefixes(t *testing.T) {
 	// that should appear in the unnest array.
 	mock.ExpectQuery(`SELECT lk\s+FROM unnest`).
 		WillReturnRows(sqlmock.NewRows([]string{"lk"}))
+	expectStaleTokenSweepNoOp(mock)

 	sweepOnce(context.Background(), reaper)

@@ -420,3 +454,204 @@ func TestSweepOnce_WipedDBSkipsNonUUIDPrefixes(t *testing.T) {
 		t.Errorf("unmet sqlmock expectations: %v", err)
 	}
 }
+
+// =============================================================================
+// Third pass: sweepStaleTokensWithoutContainer
+//
+// Heals the user-reported "auth token conflict after volume wipe" failure mode.
+// Scenario: operator runs `docker compose down -v` (or any out-of-band volume
+// removal); DB still has tokens for workspaces whose recreated containers boot
+// with empty /configs and 401 forever on /registry/register.
+// =============================================================================
+
+// TestSweepOnce_StaleTokenRevokeFiresWhenNoContainer — the headline
+// case. A workspace has live tokens in the DB but no live container
+// matches it (volume-wipe scenario). The third pass revokes.
+func TestSweepOnce_StaleTokenRevokeFiresWhenNoContainer(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	// Two name-shaped containers running, both have status='removed'
+	// rows so first pass reaps them. Second pass finds nothing
+	// (managed list empty in this scenario). Third pass: even though
+	// the running containers cover those two prefixes, an unrelated
+	// workspace ID (no live container, no name prefix match) has
+	// stale tokens — revoke it.
+	const orphanedID = "deadbeef-0000-0000-0000-000000000000"
+	reaper := &fakeReaper{listResponse: []string{"abc123def456"}}
+
+	mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`).
+		WillReturnRows(sqlmock.NewRows([]string{"id"}).
+			AddRow("abc123def456-0000-0000-0000-000000000000"))
+
+	// Third-pass query returns the orphaned workspace.
+	// Tight regex pins the safety guards: status-filter excludes
+	// 'removed' and 'provisioning' (R2 + the C1 fix), and the
+	// staleness predicate appears in the SELECT.
+	mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\).*COALESCE\(t\.last_used_at, t\.created_at\) < now\(\) - make_interval`).
+		WillReturnRows(sqlmock.NewRows([]string{"workspace_id"}).
+			AddRow(orphanedID))
+
+	// Revoke executes one UPDATE — and the UPDATE itself MUST also
+	// carry the staleness predicate (closes the C1 TOCTOU race
+	// against issueAndInjectToken inserting a fresh token between
+	// our SELECT and our UPDATE).
+	mock.ExpectExec(`(?s)UPDATE workspace_auth_tokens\s+SET revoked_at = now\(\)\s+WHERE workspace_id = \$1\s+AND revoked_at IS NULL\s+AND COALESCE\(last_used_at, created_at\) < now\(\) - make_interval`).
+		WithArgs(orphanedID, 300).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	sweepOnce(context.Background(), reaper)
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestSweepOnce_StaleTokenSkippedWhenContainerExists — pin the safety
+// guarantee: a workspace with both live tokens AND a live container
+// must NOT be revoked. The query's NOT LIKE clause is the gate; this
+// test exercises that gate by having the third-pass query return zero
+// rows (the live-container workspace is filtered out).
+func TestSweepOnce_StaleTokenSkippedWhenContainerExists(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	// One running container; first pass returns no removed-row matches.
+	reaper := &fakeReaper{listResponse: []string{"abc123def456"}}
+	mock.ExpectQuery(`SELECT id::text\s+FROM workspaces`).
+		WillReturnRows(sqlmock.NewRows([]string{"id"}))
+
+	// Third-pass query: the live workspace has live tokens but its
+	// prefix matches the running container, so the NOT LIKE excludes
+	// it. Result: zero stale tokens.
+	expectStaleTokenSweepNoOp(mock)
+
+	sweepOnce(context.Background(), reaper)
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestSweepOnce_StaleTokenRevokeFailureBailsLoop — a transient DB
+// error during revoke must not spam the log on every iteration.
+// Bail out of the loop; next 60s cycle retries.
+func TestSweepOnce_StaleTokenRevokeFailureBailsLoop(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	reaper := &fakeReaper{listResponse: nil}
+
+	// Third-pass returns two stale-token workspaces; the first revoke
+	// errors. Loop must bail without attempting the second.
+	mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\)`).
+		WillReturnRows(sqlmock.NewRows([]string{"workspace_id"}).
+			AddRow("aaaa1111-0000-0000-0000-000000000000").
+			AddRow("bbbb2222-0000-0000-0000-000000000000"))
+	mock.ExpectExec(`(?s)UPDATE workspace_auth_tokens\s+SET revoked_at = now\(\)\s+WHERE workspace_id = \$1\s+AND revoked_at IS NULL\s+AND COALESCE\(last_used_at, created_at\) < now\(\) - make_interval`).
+		WithArgs("aaaa1111-0000-0000-0000-000000000000", 300).
+		WillReturnError(errors.New("connection reset"))
+	// No second ExpectExec: if the loop tries it, sqlmock fails
+	// "unexpected call".
+
+	sweepOnce(context.Background(), reaper)
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestSweepOnce_StaleTokenQueryErrorIsNonFatal — a transient DB error
+// on the SELECT must not prevent the rest of sweepOnce from making
+// progress. (In this test there's no other progress to make either,
+// just verifying no panic + the cycle completes.)
+func TestSweepOnce_StaleTokenQueryErrorIsNonFatal(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	reaper := &fakeReaper{listResponse: nil}
+
+	mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\)`).
+		WillReturnError(errors.New("connection reset"))
+
+	sweepOnce(context.Background(), reaper)
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestSweepOnce_StaleTokenRevokeUsesStalenessPredicate — pin the C1
+// race fix: the per-workspace UPDATE must carry the staleness
+// predicate so a token inserted by issueAndInjectToken between our
+// SELECT and our UPDATE is automatically excluded (its created_at is
+// fresh and won't satisfy `< now() - grace`).
+//
+// This test asserts the SHAPE of the UPDATE (predicate present, grace
+// argument bound). A real-Postgres integration test would prove the
+// race resolution end-to-end; this catches the regression where
+// someone "simplifies" the UPDATE back to a predicate-only revoke.
+func TestSweepOnce_StaleTokenRevokeUsesStalenessPredicate(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	const orphanedID = "deadbeef-0000-0000-0000-000000000000"
+	reaper := &fakeReaper{listResponse: nil}
+
+	mock.ExpectQuery(`(?s)^\s*SELECT DISTINCT t\.workspace_id::text\s+FROM workspace_auth_tokens.*status NOT IN \('removed', 'provisioning'\)`).
+		WillReturnRows(sqlmock.NewRows([]string{"workspace_id"}).
+			AddRow(orphanedID))
+
+	// The UPDATE regex requires every guard: workspace_id binding,
+	// revoked_at IS NULL, AND the staleness predicate using the SAME
+	// COALESCE expression as the SELECT. Loosening any of these
+	// would re-open the C1 race, and this regex would no longer match.
+	mock.ExpectExec(`(?s)UPDATE workspace_auth_tokens\s+SET revoked_at = now\(\)\s+WHERE workspace_id = \$1\s+AND revoked_at IS NULL\s+AND COALESCE\(last_used_at, created_at\) < now\(\) - make_interval\(secs => \$2\)`).
+		WithArgs(orphanedID, 300).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	sweepOnce(context.Background(), reaper)
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestSweepStaleTokens_NilReaperEarlyExit — defence-in-depth (F2):
+// even though StartOrphanSweeper short-circuits on nil reaper, the
+// individual pass also early-exits. Protects against future refactors
+// that wire the pass without the outer guard.
+func TestSweepStaleTokens_NilReaperEarlyExit(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	// No DB queries expected. If the early-return is removed, sqlmock
+	// fails on the unexpected SELECT.
+	sweepStaleTokensWithoutContainer(context.Background(), nil)
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestSweepOnce_StaleTokenSkippedWhenDockerListFails — if the third
+// pass can't enumerate containers (Docker hiccup), it must skip the
+// query entirely. Otherwise it would query with empty likes and
+// revoke every stale-token workspace based on stale information.
+func TestSweepOnce_StaleTokenSkippedWhenDockerListFails(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	reaper := &fakeReaper{listErr: errors.New("daemon unreachable")}
+
+	// No DB queries expected: first pass bails on listErr, second
+	// pass uses managedList (also fails because we never set it),
+	// third pass also bails on listErr. Verify by NOT registering
+	// ExpectStaleTokenSweepNoOp — sqlmock fails on any unexpected
+	// query.
+	sweepOnce(context.Background(), reaper)
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
+	}
+}
@@ -437,13 +437,23 @@ async def main():  # pragma: no cover
            )
        async def _send_initial_prompt():
            """Wait for server to be ready, then send initial_prompt as self-message."""
-            # Wait for the A2A server to accept connections
+            # Wait for the A2A server to accept connections.
+            # Use the SDK's own constant for the well-known path so this
+            # probe and the route mounted by create_agent_card_routes()
+            # never drift apart. Pre-fix this hardcoded the pre-1.x
+            # well-known path string; a2a-sdk 1.x renamed it (the
+            # canonical value lives in a2a.utils.constants now), so
+            # the probe got 404 every attempt and fell through to
+            # "server not ready after 30s, skipping" even though the
+            # server was actually serving fine. Net effect: every
+            # workspace silently dropped its `initial_prompt`.
+            from a2a.utils.constants import AGENT_CARD_WELL_KNOWN_PATH
            ready = False
            for attempt in range(30):
                await asyncio.sleep(1)
                try:
                    async with httpx.AsyncClient(timeout=5.0) as client:
-                        resp = await client.get(f"http://127.0.0.1:{port}/.well-known/agent.json")
+                        resp = await client.get(f"http://127.0.0.1:{port}{AGENT_CARD_WELL_KNOWN_PATH}")
                        if resp.status_code == 200:
                            ready = True
                            break
@@ -2,6 +2,7 @@

 from __future__ import annotations

+import json
 from typing import Any

 from a2a.server.agent_execution import RequestContext
@@ -89,33 +90,46 @@ def append_peer_guidance(


 def summarize_peer_cards(peers: list[dict[str, Any]]) -> list[dict[str, Any]]:
-    """Return compact peer metadata for prompt rendering."""
+    """Return compact peer metadata for prompt rendering.
+
+    Falls back to the registry row's `name` and `role` when `agent_card` is
+    null or unparseable so peers stay visible to delegators even before
+    their A2A discovery roundtrip has populated a card. Without this
+    fallback a coordinator-tier workspace with N freshly-created worker
+    peers would render an empty `## Your Peers` section and refuse to
+    delegate (the regression behind the 2026-04-27 Design Director
+    discovery bug).
+    """
    summaries: list[dict[str, Any]] = []
    for peer in peers:
        agent_card = peer.get("agent_card")
-        if not agent_card:
-            continue
        if isinstance(agent_card, str):
            try:
-                import json
-
                agent_card = json.loads(agent_card)
            except Exception:
-                continue
+                agent_card = None
        if not isinstance(agent_card, dict):
-            continue
+            agent_card = None
+
+        if agent_card:
+            skills_raw = agent_card.get("skills") or []
+            skills = [
+                s.get("name", s.get("id", ""))
+                for s in skills_raw
+                if isinstance(s, dict)
+            ]
+            name = agent_card.get("name") or peer.get("name") or "Unknown"
+        else:
+            skills = []
+            name = peer.get("name") or "Unknown"

-        skills = agent_card.get("skills", [])
        summaries.append(
            {
                "id": peer.get("id", "unknown"),
-                "name": agent_card.get("name", peer.get("name", "Unknown")),
+                "name": name,
+                "role": peer.get("role") or "",
                "status": peer.get("status", "unknown"),
-                "skills": [
-                    s.get("name", s.get("id", ""))
-                    for s in skills
-                    if isinstance(s, dict)
-                ],
+                "skills": skills,
            }
        )
    return summaries
@@ -140,6 +154,8 @@ def build_peer_section(
        parts.append(f"- **{peer['name']}** (id: `{peer['id']}`, status: {peer['status']})")
        if peer["skills"]:
            parts.append(f"  Skills: {', '.join(peer['skills'])}")
+        elif peer.get("role"):
+            parts.append(f"  Role: {peer['role']}")
        parts.append("")
    parts.append(instruction)
    return "\n".join(parts)
@@ -0,0 +1,84 @@
+"""Pin the agent-card readiness probe to the SDK's canonical path.
+
+main.py's _send_initial_prompt() polls the local A2A server's
+well-known agent-card URL to know when it's safe to send the initial
+prompt as a self-message. Pre-fix the URL was hardcoded to the pre-1.x
+literal; a2a-sdk 1.x renamed the well-known path (the canonical value
+lives in `a2a.utils.constants.AGENT_CARD_WELL_KNOWN_PATH`), so the
+probe got 404 every attempt and silently fell through to "server not
+ready after 30s, skipping" — dropping every workspace's
+`initial_prompt` from config.yaml.
+
+The fix is to import the SDK's `AGENT_CARD_WELL_KNOWN_PATH` constant
+and use it directly in the probe URL. These tests pin the static
+invariants of that fix:
+
+  1. No hardcoded `/.well-known/agent.json` literal anywhere in
+     main.py (catches a future contributor reverting to a literal).
+  2. The probe URL fstring interpolates `AGENT_CARD_WELL_KNOWN_PATH`
+     (catches a "fix" that imports the constant for show but still
+     uses a literal in the actual GET).
+
+Note: we deliberately do not assert the constant's value or compare
+it against `create_agent_card_routes()` here. The runtime SDK is
+mocked in this directory's conftest for the executor-test path, so
+any test that imports the real `a2a.utils.constants` would either
+collide with the mock or require running in a separate pytest session.
+The two static invariants are sufficient: by always following whatever
+the SDK constant says, we travel through any rename automatically. The
+SDK's own contract that `create_agent_card_routes` mounts at the
+constant's value is the SDK's responsibility, not ours.
+"""
+
+from __future__ import annotations
+
+import re
+from pathlib import Path
+
+WORKSPACE_ROOT = Path(__file__).resolve().parents[1]
+
+
+def test_main_uses_sdk_constant_for_agent_card_probe():
+    """No hardcoded `/.well-known/agent.json` literal anywhere in main.py.
+
+    The SDK constant (AGENT_CARD_WELL_KNOWN_PATH) is the single source
+    of truth — string-literal probes drift the moment the SDK renames.
+    """
+    main = (WORKSPACE_ROOT / "main.py").read_text()
+
+    bad_literal = "/.well-known/agent.json"
+    offenders = [
+        (lineno, line)
+        for lineno, line in enumerate(main.splitlines(), 1)
+        if bad_literal in line
+    ]
+    assert not offenders, (
+        f"Found pre-1.x literal {bad_literal!r} in main.py — must use "
+        f"the SDK's AGENT_CARD_WELL_KNOWN_PATH constant instead. "
+        f"Offending lines: {offenders}"
+    )
+
+    assert (
+        "AGENT_CARD_WELL_KNOWN_PATH" in main
+    ), "main.py must import a2a.utils.constants.AGENT_CARD_WELL_KNOWN_PATH"
+
+
+def test_probe_loop_uses_constant_in_url_format():
+    """Spot-check that the URL fstring in main.py interpolates the
+    constant, not a literal. Catches a future "fix" that imports the
+    constant for show but still uses a literal in the actual GET."""
+    main = (WORKSPACE_ROOT / "main.py").read_text()
+
+    # The probe pattern: `client.get(f"http://127.0.0.1:{port}{...}")`
+    # where `{...}` must be `{AGENT_CARD_WELL_KNOWN_PATH}`, not a
+    # hardcoded path.
+    pattern = re.compile(
+        r'client\.get\(f"http://127\.0\.0\.1:\{port\}\{(?P<expr>[^}]+)\}"\)'
+    )
+    matches = pattern.findall(main)
+    assert matches, "no readiness probe pattern found in main.py"
+    for expr in matches:
+        assert "AGENT_CARD_WELL_KNOWN_PATH" in expr, (
+            f"readiness probe URL uses {expr!r} instead of "
+            f"AGENT_CARD_WELL_KNOWN_PATH"
+        )
@@ -203,8 +203,11 @@ def test_peer_capabilities_format(tmp_path):
    assert "**Echo Agent** (id: `peer-1`, status: online)" in result
    assert "Skills: echo, repeat" in result
    assert "delegate_to_workspace" in result
-    # peer-2 has no agent_card so it's skipped
-    assert "Silent Agent" not in result
+    # peer-2 has no agent_card but DOES have a DB name + status — must
+    # still render so coordinators can delegate to freshly-created peers
+    # whose A2A discovery hasn't populated a card yet (regression of the
+    # 2026-04-27 Design Director discovery bug).
+    assert "**Silent Agent** (id: `peer-2`, status: offline)" in result


 def test_peer_with_json_string_agent_card(tmp_path):
@@ -0,0 +1,111 @@
+"""Pin peer-summary fallback when agent_card is missing.
+
+Regresses the 2026-04-27 Design Director discovery bug:
+`summarize_peer_cards()` previously skipped any peer whose `agent_card`
+was null or unparseable, so a coordinator with freshly-created workers
+saw an empty `## Your Peers` section in its system prompt and refused
+to delegate. The registry endpoint already returns DB `name` + `role`
+on every row regardless of agent_card state — falling back to those
+keeps peers visible while A2A discovery catches up.
+"""
+
+from __future__ import annotations
+
+from shared_runtime import build_peer_section, summarize_peer_cards
+
+
+def _peer(**overrides):
+    base = {
+        "id": "ws-1",
+        "name": "DB Name",
+        "role": "DB Role",
+        "status": "active",
+        "agent_card": None,
+    }
+    base.update(overrides)
+    return base
+
+
+def test_summarize_includes_peer_with_null_agent_card_using_db_fields():
+    summaries = summarize_peer_cards([_peer()])
+    assert len(summaries) == 1
+    assert summaries[0]["id"] == "ws-1"
+    assert summaries[0]["name"] == "DB Name"
+    assert summaries[0]["role"] == "DB Role"
+    assert summaries[0]["status"] == "active"
+    assert summaries[0]["skills"] == []
+
+
+def test_summarize_prefers_agent_card_name_over_db_name():
+    peer = _peer(
+        agent_card={"name": "Card Name", "skills": [{"name": "draft-spec"}]}
+    )
+    summaries = summarize_peer_cards([peer])
+    assert summaries[0]["name"] == "Card Name"
+    assert summaries[0]["skills"] == ["draft-spec"]
+    assert summaries[0]["role"] == "DB Role"
+
+
+def test_summarize_handles_string_agent_card_json():
+    peer = _peer(agent_card='{"name": "JSON Name", "skills": []}')
+    summaries = summarize_peer_cards([peer])
+    assert summaries[0]["name"] == "JSON Name"
+
+
+def test_summarize_falls_back_when_agent_card_string_is_malformed():
+    peer = _peer(agent_card="not-valid-json")
+    summaries = summarize_peer_cards([peer])
+    assert len(summaries) == 1
+    assert summaries[0]["name"] == "DB Name"
+    assert summaries[0]["role"] == "DB Role"
+    assert summaries[0]["skills"] == []
+
+
+def test_summarize_falls_back_when_agent_card_is_wrong_type():
+    peer = _peer(agent_card=42)
+    summaries = summarize_peer_cards([peer])
+    assert len(summaries) == 1
+    assert summaries[0]["name"] == "DB Name"
+
+
+def test_summarize_handles_missing_role_and_name_with_unknown_default():
+    peer = {"id": "ws-2", "status": "active", "agent_card": None}
+    summaries = summarize_peer_cards([peer])
+    assert summaries[0]["name"] == "Unknown"
+    assert summaries[0]["role"] == ""
+
+
+def test_build_peer_section_renders_role_when_skills_empty():
+    section = build_peer_section([_peer()])
+    assert "## Your Peers" in section
+    assert "**DB Name**" in section
+    assert "Role: DB Role" in section
+    assert "Skills:" not in section
+
+
+def test_build_peer_section_prefers_skills_over_role_when_card_present():
+    peer = _peer(
+        agent_card={"name": "Worker", "skills": [{"name": "design"}, {"name": "review"}]}
+    )
+    section = build_peer_section([peer])
+    assert "Skills: design, review" in section
+    assert "Role: DB Role" not in section
+
+
+def test_build_peer_section_mixed_peers():
+    peers = [
+        _peer(id="ws-a"),
+        _peer(
+            id="ws-b",
+            agent_card={"name": "Card B", "skills": [{"name": "build"}]},
+        ),
+    ]
+    section = build_peer_section(peers)
+    assert "id: `ws-a`" in section
+    assert "id: `ws-b`" in section
+    assert "Role: DB Role" in section
+    assert "Skills: build" in section
+
+
+def test_build_peer_section_empty_when_no_peers():
+    assert build_peer_section([]) == ""