Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f8f5d4e6ba | |||
| 75ab76daf0 | |||
| bbc2daea4a | |||
| 12dd60413d | |||
| c93214e4e0 | |||
| 66e3b7edb3 | |||
| 5bc87ea75d | |||
| 73827045bc | |||
| 38353e9a4f | |||
| 8bcc19c38e | |||
| 47263db7ad | |||
| 43a86d44da | |||
| c2a0bdea96 |
@@ -0,0 +1,329 @@
|
||||
name: CI
|
||||
|
||||
# Ported from .github/workflows/ci.yml on 2026-05-11 per internal#326
|
||||
# (Class-A root: cross-repo `uses:` blocker for Gitea 1.22.6 —
|
||||
# feedback_gitea_cross_repo_uses_blocked).
|
||||
#
|
||||
# Root cause of the main-red CI on this repo:
|
||||
# The .github/ original used
|
||||
# uses: molecule-ai/molecule-ci/.github/workflows/validate-workspace-template.yml@main
|
||||
# which Gitea 1.22.6 rejects (DEFAULT_ACTIONS_URL=github → 404 against
|
||||
# the remote repo even though it lives on the same Gitea instance).
|
||||
# Gitea reads .github/ as a fallback when .gitea/ is absent
|
||||
# (reference_per_repo_gitea_vs_github_actions_dir), so the .github/
|
||||
# workflow was firing on Gitea and failing in 1s.
|
||||
#
|
||||
# Fix shape: inline the validation logic directly. The canonical
|
||||
# validator in molecule-ai/molecule-ci already self-clones into the
|
||||
# runner via a direct HTTPS `git clone` step (validate-workspace-template.yml
|
||||
# does this verbatim) — so the inline port is just "do that clone +
|
||||
# invoke the validator script in-place", preserving the
|
||||
# single-source-of-truth property (each CI run still fetches the
|
||||
# canonical validator fresh).
|
||||
#
|
||||
# Four-surface migration audit (feedback_gitea_actions_migration_audit_pattern):
|
||||
# 1. YAML — no `workflow_dispatch.inputs`; no `merge_group`; preserved
|
||||
# `on: [push, pull_request]` from the original. Added workflow-level
|
||||
# env.GITHUB_SERVER_URL (feedback_act_runner_github_server_url).
|
||||
# 2. Cache — `actions/setup-python` `cache: pip` preserved; works against
|
||||
# Gitea's built-in cache server when runner.cache is configured.
|
||||
# 3. Token — uses auto-injected GITHUB_TOKEN (Gitea-aliased). Validator
|
||||
# job needs only `contents: read` (no write to issues/PRs).
|
||||
# 4. Docs — anonymous git-clone of molecule-ci (no token in URL); the
|
||||
# molecule-ci repo is public on the Gitea instance.
|
||||
#
|
||||
# Fork-PR semantics: validate-runtime is intentionally skipped on fork
|
||||
# PRs because pip-install + docker-build + adapter-import are arbitrary
|
||||
# code execution. Internal PRs and main pushes get full coverage. The
|
||||
# `github.event.pull_request.head.repo.fork` field is null for non-PR
|
||||
# events; the `!= true` comparison defaults to running.
|
||||
#
|
||||
# Cross-links:
|
||||
# - internal#326 — parent tracking issue
|
||||
# - molecule-ai/molecule-ci/.github/workflows/validate-workspace-template.yml — pattern source
|
||||
# - molecule-ai/molecule-core/.gitea/workflows/ci.yml — Gitea port style reference
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
env:
|
||||
# Belt-and-suspenders against the runner-default trap
|
||||
# (feedback_act_runner_github_server_url). Runners are configured
|
||||
# with this env via /opt/molecule/runners/config.yaml runner.envs,
|
||||
# but pinning at the workflow level protects against a runner
|
||||
# regenerated without the config file.
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
# Defense-in-depth on the GITHUB_TOKEN scope. The validate-runtime job
|
||||
# runs untrusted-by-design code from the calling repo — pip-installs
|
||||
# requirements.txt (post-install hooks), imports adapter.py, and
|
||||
# docker-builds the Dockerfile. Each primitive can execute arbitrary
|
||||
# code with the token in env. Pinning `contents: read` means the worst
|
||||
# a malicious template PR can do with the token is read public repo
|
||||
# state — no write to issues, no push to branches, no comment-spam.
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
validate-static:
|
||||
name: Template validation (static)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
# Canonical validator script lives in molecule-ci, fetched fresh on
|
||||
# every run. Anonymous fetch of the public molecule-ci repo — no
|
||||
# token needed; no actions/checkout cross-repo idiosyncrasies.
|
||||
- name: Fetch molecule-ci canonical scripts
|
||||
run: git clone --depth 1 https://git.moleculesai.app/molecule-ai/molecule-ci.git .molecule-ci-canonical
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
# Secret scan — the most important check. Always runs, including
|
||||
# on fork PRs (no third-party code executes here).
|
||||
- name: Check for secrets
|
||||
run: |
|
||||
python3 - << 'PYEOF'
|
||||
import os, re, sys
|
||||
from pathlib import Path
|
||||
|
||||
PATTERNS = [
|
||||
re.compile(r'''["']sk-ant-[a-zA-Z0-9]{50,}["']'''),
|
||||
re.compile(r'''["']ghp_[a-zA-Z0-9]{36,}["']'''),
|
||||
re.compile(r'''["']AKIA[A-Z0-9]{16}["']'''),
|
||||
re.compile(r'''["'][a-zA-Z0-9/+=]{40}["']'''),
|
||||
re.compile(r'''["']sk_test_[a-zA-Z0-9]{24,}["']'''),
|
||||
re.compile(r'''["']Bearer\s+[a-zA-Z0-9_.-]{20,}["']'''),
|
||||
re.compile(r'''ghp_[a-zA-Z0-9]{36,}'''),
|
||||
re.compile(r'''sk-ant-[a-zA-Z0-9]{50,}'''),
|
||||
]
|
||||
SKIP_DIRS = {'.molecule-ci', '.molecule-ci-canonical', '.git', 'node_modules', '__pycache__'}
|
||||
EXTENSIONS = {'.yaml', '.yml', '.md', '.py', '.sh'}
|
||||
|
||||
def is_false_positive(line):
|
||||
ctx = line.lower()
|
||||
return '...' in ctx or '<example' in ctx or '</example' in ctx
|
||||
|
||||
root = Path(os.environ.get('GITHUB_WORKSPACE', '.'))
|
||||
warnings = []
|
||||
for dirpath, dirnames, filenames in os.walk(root):
|
||||
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
|
||||
for filename in filenames:
|
||||
if Path(filename).suffix not in EXTENSIONS:
|
||||
continue
|
||||
filepath = Path(dirpath) / filename
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
for lineno, line in enumerate(f.readlines(), 1):
|
||||
for pattern in PATTERNS:
|
||||
for match in pattern.finditer(line):
|
||||
if not is_false_positive(line):
|
||||
warnings.append(f" {filepath}:{lineno}: {match.group(0)[:40]}...")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if warnings:
|
||||
print("::error::Potential secret found in committed files:")
|
||||
for w in warnings:
|
||||
print(w)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("::notice::No secrets detected")
|
||||
PYEOF
|
||||
# Static-only validator — file existence checks, YAML parse,
|
||||
# AST inspection of adapter.py (no import). Doesn't execute any
|
||||
# third-party code; safe on fork PRs.
|
||||
- run: pip install pyyaml -q
|
||||
- run: python3 .molecule-ci-canonical/scripts/validate-workspace-template.py --static-only
|
||||
|
||||
validate-runtime:
|
||||
name: Template validation (runtime)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
needs: validate-static
|
||||
# Skip when the PR comes from a fork — those are external,
|
||||
# untrusted, and would let attackers run pip install / docker build
|
||||
# / adapter.py import on our runner.
|
||||
if: github.event.pull_request.head.repo.fork != true
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- name: Fetch molecule-ci canonical scripts
|
||||
run: git clone --depth 1 https://git.moleculesai.app/molecule-ai/molecule-ci.git .molecule-ci-canonical
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
cache-dependency-path: requirements.txt
|
||||
- run: pip install pyyaml -q
|
||||
# Install the template's runtime dependencies so the validator's
|
||||
# check_adapter_runtime_load() can import adapter.py the same way
|
||||
# the workspace container does at boot. Without this, a
|
||||
# syntactically-valid adapter that ImportErrors on a missing
|
||||
# transitive dep would build clean and crash on first user prompt.
|
||||
- if: hashFiles('requirements.txt') != ''
|
||||
run: pip install -q -r requirements.txt
|
||||
- if: hashFiles('requirements.txt') == ''
|
||||
run: pip install -q molecule-ai-workspace-runtime
|
||||
- run: python3 .molecule-ci-canonical/scripts/validate-workspace-template.py
|
||||
- name: Docker build smoke test
|
||||
if: hashFiles('Dockerfile') != ''
|
||||
run: |
|
||||
# Graceful skip when the runner's job-container can't reach the
|
||||
# Docker daemon (e.g. /var/run/docker.sock not mounted into the
|
||||
# act job container, or the in-container uid not in the docker
|
||||
# group). Without this guard, CI stays red even when the
|
||||
# template's Dockerfile is fine — see internal#222 for the
|
||||
# proper runner-config fix.
|
||||
if ! docker info >/dev/null 2>&1; then
|
||||
echo "::warning::docker daemon unreachable from runner job container — skipping Docker build smoke (runner-config gap, not a template issue)."
|
||||
exit 0
|
||||
fi
|
||||
docker build -t template-test . --no-cache 2>&1 | tail -5 && echo "Docker build succeeded"
|
||||
|
||||
# --- Layer-3: real T4 tier-4 conformance gate (RFC internal#456 §11) ---
|
||||
# NOT a string-match. Builds the actual image, runs it under the EXACT
|
||||
# flags the controlplane provisioner emits for tier-4
|
||||
# (userdata_containerized.go @ec2384c: --privileged --pid=host
|
||||
# -v /:/host -v /var/run/docker.sock:/var/run/docker.sock), then
|
||||
# asserts BOTH properties on the RUNNING container, atomically
|
||||
# (RFC §10 — either failing fails the build):
|
||||
# (a) the uid-1000 agent can attain host root
|
||||
# (sudo nsenter --target 1 --mount --pid -- id -u == 0)
|
||||
# (b) /configs/.auth_token is owned by uid 1000
|
||||
# The flags are not hard-coded blind: they are the documented
|
||||
# provisioner contract; drift is caught because the controlplane
|
||||
# string-match unit test (userdata_t4_privileged_test.go) guards the
|
||||
# emission side and this gate guards the runtime side.
|
||||
t4-conformance:
|
||||
name: T4 tier-4 conformance (live)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
needs: validate-static
|
||||
# Untrusted-by-design: builds + runs the PR's Dockerfile. Skip on
|
||||
# fork PRs exactly like validate-runtime.
|
||||
if: github.event.pull_request.head.repo.fork != true
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- name: Build the runtime image
|
||||
id: build
|
||||
run: |
|
||||
if ! docker info >/dev/null 2>&1; then
|
||||
echo "::error::docker daemon unreachable — T4 conformance gate CANNOT verify host-root reach. This is a hard gate; failing closed (do NOT treat as skip). Fix runner-config (internal#222) to unblock."
|
||||
exit 1
|
||||
fi
|
||||
docker build -t t4-conformance-test . --no-cache 2>&1 | tail -5
|
||||
- name: Run under EXACT tier-4 provisioner flags + assert host-root reach AND token agent-ownership
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# EXACT flags from controlplane userdata_containerized.go
|
||||
# (tier-4 emission @ec2384c). The molecule-runtime entrypoint
|
||||
# wants a live workspace; we only need the container up long
|
||||
# enough to probe, so override the command with a sleep and
|
||||
# exercise the agent context directly.
|
||||
CID=$(docker run -d \
|
||||
--name t4probe \
|
||||
--network host \
|
||||
--privileged \
|
||||
--pid=host \
|
||||
-v /:/host \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--entrypoint /bin/sh \
|
||||
t4-conformance-test -c 'sleep 600')
|
||||
trap 'docker rm -f t4probe >/dev/null 2>&1 || true' EXIT
|
||||
|
||||
echo "=== Reproduce the agent-owned-token half of the entrypoint contract ==="
|
||||
# The real entrypoint chowns /configs to agent before gosu;
|
||||
# /configs is an unmounted VOLUME in this probe, so reproduce
|
||||
# the exact contract step the entrypoint performs, then assert.
|
||||
docker exec t4probe sh -c 'mkdir -p /configs && touch /configs/.auth_token && chown -R agent:agent /configs'
|
||||
|
||||
echo "=== (b) token agent-ownership: stat /configs/.auth_token ==="
|
||||
OWNER_UID=$(docker exec t4probe stat -c '%u' /configs/.auth_token)
|
||||
echo "owner_uid=$OWNER_UID"
|
||||
if [ "$OWNER_UID" != "1000" ]; then
|
||||
echo "::error::T4 contract violated: /configs/.auth_token owner_uid=$OWNER_UID (expected 1000). Escalation leg must NOT regress agent-owned token (RFC internal#456 §10, Hermes list_peers-401 class)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== (a) host-root reach AS THE uid-1000 AGENT (not root) ==="
|
||||
# Run as the agent user (uid 1000), exactly as gosu would.
|
||||
AGENT_HOSTROOT_UID=$(docker exec -u agent t4probe sudo -n nsenter --target 1 --mount --pid -- id -u)
|
||||
echo "agent->host-root id -u = $AGENT_HOSTROOT_UID"
|
||||
if [ "$AGENT_HOSTROOT_UID" != "0" ]; then
|
||||
echo "::error::T4 contract violated: uid-1000 agent could NOT attain host root via 'sudo nsenter --target 1' (got uid=$AGENT_HOSTROOT_UID). T4 escalation leg ABSENT/broken."
|
||||
exit 1
|
||||
fi
|
||||
# Defense-in-depth: host-filesystem write+readback through /host
|
||||
# from the agent, proving real host reach (not just a namespace
|
||||
# trick on an isolated PID 1).
|
||||
MARKER="t4-conformance-$(date +%s)-$RANDOM"
|
||||
docker exec -u agent t4probe sudo -n sh -c "echo $MARKER > /host/tmp/.t4-conformance-probe"
|
||||
READBACK=$(docker exec -u agent t4probe sudo -n cat /host/tmp/.t4-conformance-probe)
|
||||
docker exec -u agent t4probe sudo -n rm -f /host/tmp/.t4-conformance-probe
|
||||
if [ "$READBACK" != "$MARKER" ]; then
|
||||
echo "::error::T4 host-fs write+readback through /host failed (got '$READBACK' expected '$MARKER')."
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::T4 tier-4 conformance PASS — uid-1000 agent reaches host root AND /configs/.auth_token is agent-owned (both, atomically)."
|
||||
|
||||
# Aggregator that emits a single `validate` check name — matches the
|
||||
# historical required-check name on this repo's branch protection.
|
||||
validate:
|
||||
name: validate
|
||||
runs-on: ubuntu-latest
|
||||
needs: [validate-static, validate-runtime, t4-conformance]
|
||||
if: always()
|
||||
timeout-minutes: 1
|
||||
steps:
|
||||
- name: Aggregate
|
||||
run: |
|
||||
static="${{ needs.validate-static.result }}"
|
||||
runtime="${{ needs.validate-runtime.result }}"
|
||||
t4="${{ needs.t4-conformance.result }}"
|
||||
echo "validate-static: $static"
|
||||
echo "validate-runtime: $runtime"
|
||||
echo "t4-conformance: $t4"
|
||||
if [ "$static" != "success" ]; then
|
||||
echo "::error::validate-static did not succeed: $static"
|
||||
exit 1
|
||||
fi
|
||||
# Treat `skipped` as a pass for fork-PR semantics (validate-runtime
|
||||
# is intentionally skipped on forks; static coverage is the gate).
|
||||
if [ "$runtime" != "success" ] && [ "$runtime" != "skipped" ]; then
|
||||
echo "::error::validate-runtime did not succeed: $runtime"
|
||||
exit 1
|
||||
fi
|
||||
# T4 conformance is a HARD gate on internal (non-fork) PRs and
|
||||
# main pushes. `skipped` is only acceptable on fork PRs (where
|
||||
# the `if:` fork guard short-circuits it) — there the static
|
||||
# gate is the floor. Any other non-success fails the build:
|
||||
# "verified" T4 requires this live gate green, never inference.
|
||||
if [ "$t4" != "success" ] && [ "$t4" != "skipped" ]; then
|
||||
echo "::error::t4-conformance did not succeed: $t4 — T4 host-root reach / token-ownership not verified on a live container. Failing closed (RFC internal#456 §11)."
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::Template validation aggregate passed (static=$static, runtime=$runtime, t4=$t4)"
|
||||
|
||||
tests:
|
||||
name: Adapter unit tests
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
# pyyaml is the runtime dep that adapter.py's _load_providers reads
|
||||
# /configs/config.yaml through. In production it arrives transitively
|
||||
# via molecule-ai-workspace-runtime; in this minimal test env we
|
||||
# install it explicitly so the YAML-loading code path is actually
|
||||
# exercised (without it, _load_providers' broad except-Exception
|
||||
# swallows the ImportError and silently falls back to _BUILTIN_PROVIDERS,
|
||||
# which is exactly the behavior that bit us 2026-04-30 when CI
|
||||
# claimed green on a build that couldn't route any third-party model).
|
||||
- run: pip install -q pytest pytest-asyncio pyyaml
|
||||
# Tests live under tests/ with their own pytest.ini that anchors
|
||||
# rootdir there — keeps pytest from importing the package
|
||||
# __init__.py (which does `from .adapter import ...` for runtime
|
||||
# discovery and can't be satisfied without molecule_runtime
|
||||
# installed). See tests/pytest.ini for the full rationale.
|
||||
- run: python3 -m pytest tests/ -v
|
||||
@@ -0,0 +1,214 @@
|
||||
name: publish-image
|
||||
|
||||
# Builds the claude-code workspace template Dockerfile and pushes it to ECR as
|
||||
# `<REGISTRY>/workspace-template-claude-code:latest` + `:sha-<7>`.
|
||||
#
|
||||
# Ported/inlined from molecule-ci's publish-template-image.yml reusable
|
||||
# workflow. Cross-repo `uses:` is BLOCKED on Gitea 1.22.6 because
|
||||
# DEFAULT_ACTIONS_URL=github causes the runner to attempt the lookup against
|
||||
# github.com, which always 404s even for same-instance repos.
|
||||
# (feedback_gitea_cross_repo_uses_blocked)
|
||||
#
|
||||
# Registry: production uses ECR (MOLECULE_IMAGE_REGISTRY env var on EC2 /
|
||||
# Railway) backed by org-level AWS creds. The OSS default in registry.go is
|
||||
# ghcr.io/molecule-ai but the ECR repo `molecule-ai/workspace-template-claude-code`
|
||||
# already exists (created by the migration sweep). No GHCR token is in the
|
||||
# credentials store — Gitea's GITHUB_TOKEN cannot authenticate to ghcr.io.
|
||||
#
|
||||
# Gitea 1.22.6 hostile-shape checklist applied:
|
||||
# - No workflow_dispatch.inputs (silently rejected on 1.22.6)
|
||||
# - No merge_group: trigger
|
||||
# - No cross-repo uses:
|
||||
# - GITHUB_SERVER_URL pinned at workflow level
|
||||
# (feedback_act_runner_github_server_url)
|
||||
# - No on.push.paths: (would permanently block path-excluded pushes)
|
||||
# - timeout-minutes on every job
|
||||
#
|
||||
# Cascade signal: molecule-core/publish-runtime.yml fans out by git-pushing
|
||||
# an updated `.runtime-version` file to this repo's main branch, which trips
|
||||
# the `on: push: branches: [main]` trigger here. The resolve-version job reads
|
||||
# that file and forwards the version as a RUNTIME_VERSION docker build-arg so
|
||||
# pip install resolves the exact fresh version.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
# Belt-and-suspenders for act_runner runners regenerated without the
|
||||
# config.yaml envs block. (feedback_act_runner_github_server_url)
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
ECR_REGISTRY: 153263036946.dkr.ecr.us-east-2.amazonaws.com
|
||||
IMAGE_NAME: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/workspace-template-claude-code
|
||||
AWS_DEFAULT_REGION: us-east-2
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
resolve-version:
|
||||
name: Resolve runtime version
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 2
|
||||
outputs:
|
||||
version: ${{ steps.read.outputs.version }}
|
||||
sha: ${{ steps.read.outputs.sha }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- id: read
|
||||
shell: bash
|
||||
run: |
|
||||
if [ -f .runtime-version ]; then
|
||||
v="$(head -n1 .runtime-version | tr -d '[:space:]')"
|
||||
echo "version=${v}" >> "$GITHUB_OUTPUT"
|
||||
echo "resolved runtime version from .runtime-version: ${v}"
|
||||
else
|
||||
echo "version=" >> "$GITHUB_OUTPUT"
|
||||
echo "no .runtime-version file — will use Dockerfile/requirements.txt pin"
|
||||
fi
|
||||
echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
publish:
|
||||
name: Build & push workspace-template-claude-code image
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
needs: resolve-version
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Lint — no bare imports of runtime modules
|
||||
# Catches `from plugins import ...` style bare imports that work in the
|
||||
# monorepo layout but explode at startup in the published container
|
||||
# (ModuleNotFoundError). Runs before Docker login so a bad adapter
|
||||
# returns red in seconds.
|
||||
# Fallback module list mirrors scripts/build_runtime_package.py:
|
||||
# TOP_LEVEL_MODULES as of 2026-04-27.
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
FALLBACK_MODULES='plugins|adapter_base|config|main|preflight|prompt|coordinator|consolidation|events|heartbeat|transcript_auth|runtime_wedge|watcher|skill_loader|policies|adapters|builtin_tools|executor_helpers|a2a_executor|a2a_client|a2a_tools|a2a_cli|a2a_mcp_server|agent|agents_md|initial_prompt|molecule_ai_status|platform_auth|shared_runtime'
|
||||
RUNTIME_MODULES=""
|
||||
mkdir -p /tmp/runtime-wheel
|
||||
if pip download --quiet molecule-ai-workspace-runtime --no-deps -d /tmp/runtime-wheel 2>/dev/null; then
|
||||
WHEEL=$(ls /tmp/runtime-wheel/*.whl 2>/dev/null | head -1)
|
||||
if [ -n "$WHEEL" ]; then
|
||||
RUNTIME_MODULES=$(unzip -p "$WHEEL" molecule_runtime/_runtime_modules.json 2>/dev/null \
|
||||
| python3 -c "import sys,json; m=json.load(sys.stdin); print('|'.join(sorted(set(m['top_level_modules']) | set(m['subpackages']))))" 2>/dev/null || echo "")
|
||||
fi
|
||||
fi
|
||||
if [ -n "$RUNTIME_MODULES" ]; then
|
||||
echo "::notice::lint module list from published wheel"
|
||||
else
|
||||
RUNTIME_MODULES="$FALLBACK_MODULES"
|
||||
echo "::warning::could not read _runtime_modules.json from wheel — using inline fallback"
|
||||
fi
|
||||
if HITS=$(grep -nE "^\s*from (${RUNTIME_MODULES}) import" *.py 2>/dev/null); then
|
||||
echo "::error::Bare imports of runtime modules found — use 'from molecule_runtime.<module> import'"
|
||||
echo "$HITS" | sed 's/^/ /'
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::no bare imports of runtime modules in *.py files"
|
||||
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
aws ecr get-login-password --region us-east-2 | \
|
||||
docker login --username AWS --password-stdin "${ECR_REGISTRY}"
|
||||
|
||||
- name: Verify Docker daemon access
|
||||
run: |
|
||||
set -euo pipefail
|
||||
docker info >/dev/null 2>&1 || {
|
||||
echo "::error::Docker daemon is not accessible — check runner sock mount"
|
||||
exit 1
|
||||
}
|
||||
echo "Docker daemon OK"
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
|
||||
|
||||
- name: Ensure ECR repository exists
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
repo_path="${IMAGE_NAME#*/}"
|
||||
repo_path="${repo_path#*/}" # strip registry host + first slash → molecule-ai/workspace-template-claude-code
|
||||
if ! aws ecr describe-repositories --repository-names "${repo_path}" --region us-east-2 >/dev/null 2>&1; then
|
||||
aws ecr create-repository \
|
||||
--repository-name "${repo_path}" \
|
||||
--image-scanning-configuration scanOnPush=true \
|
||||
--region us-east-2 >/dev/null
|
||||
echo "::notice::created ECR repository ${repo_path}"
|
||||
else
|
||||
echo "ECR repository ${repo_path} already exists"
|
||||
fi
|
||||
|
||||
- name: Build image (load for smoke test, do not push yet)
|
||||
# Build into runner-local docker first. Smoke test runs before push so
|
||||
# a broken adapter.py never poisons :latest.
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
platforms: linux/amd64
|
||||
load: true
|
||||
push: false
|
||||
tags: ${{ env.IMAGE_NAME }}:sha-${{ needs.resolve-version.outputs.sha }}
|
||||
build-args: |
|
||||
RUNTIME_VERSION=${{ needs.resolve-version.outputs.version }}
|
||||
labels: |
|
||||
org.opencontainers.image.source=https://git.moleculesai.app/${{ github.repository }}
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.description=Molecule AI workspace template — claude-code runtime
|
||||
|
||||
- name: Smoke test — import every /app/*.py
|
||||
# Boot the locally-loaded image and import each *.py module to verify
|
||||
# all module-level imports resolve against the pip-installed runtime.
|
||||
shell: bash
|
||||
env:
|
||||
IMAGE: ${{ env.IMAGE_NAME }}:sha-${{ needs.resolve-version.outputs.sha }}
|
||||
run: |
|
||||
set -eu
|
||||
docker run --rm \
|
||||
-e WORKSPACE_ID=smoke-test \
|
||||
-e CLAUDE_CODE_OAUTH_TOKEN=sk-fake-smoke-token \
|
||||
-e ANTHROPIC_API_KEY=sk-fake-smoke-key \
|
||||
-e OPENAI_API_KEY=sk-fake-smoke-key \
|
||||
--entrypoint sh "${IMAGE}" -c '
|
||||
set -e
|
||||
cd /app
|
||||
for f in *.py; do
|
||||
[ "$f" = "__init__.py" ] && continue
|
||||
mod="${f%.py}"
|
||||
python3 -c "import $mod" || { echo "::error::failed to import $mod"; exit 1; }
|
||||
echo " import $mod OK"
|
||||
done
|
||||
'
|
||||
echo "::notice::${IMAGE}: all /app/*.py modules import cleanly"
|
||||
|
||||
- name: Push image to ECR (post-smoke)
|
||||
# Smoke passed — push both :latest and :sha-<7>. build-push-action
|
||||
# reuses the cached layers so this is a layer-push, not a rebuild.
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
tags: |
|
||||
${{ env.IMAGE_NAME }}:latest
|
||||
${{ env.IMAGE_NAME }}:sha-${{ needs.resolve-version.outputs.sha }}
|
||||
build-args: |
|
||||
RUNTIME_VERSION=${{ needs.resolve-version.outputs.version }}
|
||||
labels: |
|
||||
org.opencontainers.image.source=https://git.moleculesai.app/${{ github.repository }}
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.description=Molecule AI workspace template — claude-code runtime
|
||||
|
||||
@@ -0,0 +1,196 @@
|
||||
name: Secret scan
|
||||
|
||||
# Hard CI gate. Refuses any PR / push whose diff additions contain a
|
||||
# recognisable credential. Defense-in-depth for the #2090-class incident
|
||||
# (2026-04-24): GitHub's hosted Copilot Coding Agent leaked a ghs_*
|
||||
# installation token into tenant-proxy/package.json via `npm init`
|
||||
# slurping the URL from a token-embedded origin remote. We can't fix
|
||||
# upstream's clone hygiene, so we gate here.
|
||||
#
|
||||
# Same regex set as the runtime's bundled pre-commit hook
|
||||
# (molecule-ai-workspace-runtime: molecule_runtime/scripts/pre-commit-checks.sh).
|
||||
# Keep the two sides aligned when adding patterns.
|
||||
#
|
||||
# Ported from .github/workflows/secret-scan.yml so the gate actually
|
||||
# fires on Gitea Actions. Differences from the GitHub version:
|
||||
# - drops `merge_group` event (Gitea has no merge queue)
|
||||
# - drops `workflow_call` (no cross-repo reusable invocation on Gitea)
|
||||
# - SELF path updated to .gitea/workflows/secret-scan.yml
|
||||
# The job name + step name are identical to the GitHub workflow so the
|
||||
# status-check context (`Secret scan / Scan diff for credential-shaped
|
||||
# strings (pull_request)`) matches branch protection on this template
|
||||
# repo's main branch. Before this port, the required-status was satisfied
|
||||
# only via a compensating signed POST /statuses/{SHA} because the
|
||||
# .github/ workflow was silently shadowed by the .gitea/ directory taking
|
||||
# precedence on this repo
|
||||
# (reference_molecule_core_actions_gitea_only — same applies here).
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
push:
|
||||
branches: [main, staging]
|
||||
|
||||
jobs:
|
||||
scan:
|
||||
name: Scan diff for credential-shaped strings
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 2 # need previous commit to diff against on push events
|
||||
|
||||
# For pull_request events the diff base may be many commits behind
|
||||
# HEAD and absent from the shallow clone. Fetch it explicitly.
|
||||
- name: Fetch PR base SHA (pull_request events only)
|
||||
if: github.event_name == 'pull_request'
|
||||
run: git fetch --depth=1 origin ${{ github.event.pull_request.base.sha }}
|
||||
|
||||
- name: Refuse if credential-shaped strings appear in diff additions
|
||||
env:
|
||||
# Plumb event-specific SHAs through env so the script doesn't
|
||||
# need conditional `${{ ... }}` interpolation per event type.
|
||||
# github.event.before/after only exist on push events;
|
||||
# pull_request has pull_request.base.sha / pull_request.head.sha.
|
||||
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
||||
PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
|
||||
PUSH_BEFORE: ${{ github.event.before }}
|
||||
PUSH_AFTER: ${{ github.event.after }}
|
||||
run: |
|
||||
# Pattern set covers GitHub family (the actual #2090 vector),
|
||||
# Anthropic / OpenAI / Slack / AWS. Anchored on prefixes with low
|
||||
# false-positive rates against agent-generated content. Mirror of
|
||||
# molecule-ai-workspace-runtime/molecule_runtime/scripts/pre-commit-checks.sh
|
||||
# — keep aligned.
|
||||
SECRET_PATTERNS=(
|
||||
'ghp_[A-Za-z0-9]{36,}' # GitHub PAT (classic)
|
||||
'ghs_[A-Za-z0-9]{36,}' # GitHub App installation token
|
||||
'gho_[A-Za-z0-9]{36,}' # GitHub OAuth user-to-server
|
||||
'ghu_[A-Za-z0-9]{36,}' # GitHub OAuth user
|
||||
'ghr_[A-Za-z0-9]{36,}' # GitHub OAuth refresh
|
||||
'github_pat_[A-Za-z0-9_]{82,}' # GitHub fine-grained PAT
|
||||
'sk-ant-[A-Za-z0-9_-]{40,}' # Anthropic API key
|
||||
'sk-proj-[A-Za-z0-9_-]{40,}' # OpenAI project key
|
||||
'sk-svcacct-[A-Za-z0-9_-]{40,}' # OpenAI service-account key
|
||||
'sk-cp-[A-Za-z0-9_-]{60,}' # MiniMax API key (F1088 vector — caught only after the fact)
|
||||
'xox[baprs]-[A-Za-z0-9-]{20,}' # Slack tokens
|
||||
'AKIA[0-9A-Z]{16}' # AWS access key ID
|
||||
'ASIA[0-9A-Z]{16}' # AWS STS temp access key ID
|
||||
)
|
||||
|
||||
# Determine the diff base. Each event type stores its SHAs in
|
||||
# a different place — see the env block above.
|
||||
case "${{ github.event_name }}" in
|
||||
pull_request)
|
||||
BASE="$PR_BASE_SHA"
|
||||
HEAD="$PR_HEAD_SHA"
|
||||
;;
|
||||
*)
|
||||
BASE="$PUSH_BEFORE"
|
||||
HEAD="$PUSH_AFTER"
|
||||
;;
|
||||
esac
|
||||
|
||||
# On push events with shallow clones, BASE may be present in
|
||||
# the event payload but absent from the local object DB
|
||||
# (fetch-depth=2 doesn't always reach the previous commit
|
||||
# across true merges). Try fetching it on demand. If the
|
||||
# fetch fails — e.g. the SHA was force-overwritten — we fall
|
||||
# through to the empty-BASE branch below, which scans the
|
||||
# entire tree as if every file were new. Correct, just slow.
|
||||
if [ -n "$BASE" ] && ! echo "$BASE" | grep -qE '^0+$'; then
|
||||
if ! git cat-file -e "$BASE" 2>/dev/null; then
|
||||
git fetch --depth=1 origin "$BASE" 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
|
||||
# Files added or modified in this change.
|
||||
if [ -z "$BASE" ] || echo "$BASE" | grep -qE '^0+$' || ! git cat-file -e "$BASE" 2>/dev/null; then
|
||||
# New branch / no previous SHA / BASE unreachable — check the
|
||||
# entire tree as added content. Slower, but correct on first
|
||||
# push.
|
||||
CHANGED=$(git ls-tree -r --name-only HEAD)
|
||||
DIFF_RANGE=""
|
||||
else
|
||||
CHANGED=$(git diff --name-only --diff-filter=AM "$BASE" "$HEAD")
|
||||
DIFF_RANGE="$BASE $HEAD"
|
||||
fi
|
||||
|
||||
if [ -z "$CHANGED" ]; then
|
||||
echo "No changed files to inspect."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Self-exclude: this workflow file legitimately contains the
|
||||
# pattern strings as regex literals. Without an exclude it would
|
||||
# block its own merge. Both the .github/ original and this
|
||||
# .gitea/ port are excluded so a sync between them stays clean.
|
||||
SELF_GITHUB=".github/workflows/secret-scan.yml"
|
||||
SELF_GITEA=".gitea/workflows/secret-scan.yml"
|
||||
|
||||
OFFENDING=""
|
||||
# `while IFS= read -r` (not `for f in $CHANGED`) so filenames
|
||||
# containing whitespace don't word-split silently — a path
|
||||
# with a space would otherwise produce two iterations on
|
||||
# tokens that aren't real filenames, breaking the
|
||||
# self-exclude + diff lookup.
|
||||
while IFS= read -r f; do
|
||||
[ -z "$f" ] && continue
|
||||
[ "$f" = "$SELF_GITHUB" ] && continue
|
||||
[ "$f" = "$SELF_GITEA" ] && continue
|
||||
if [ -n "$DIFF_RANGE" ]; then
|
||||
ADDED=$(git diff --no-color --unified=0 "$BASE" "$HEAD" -- "$f" 2>/dev/null | grep -E '^\+[^+]' || true)
|
||||
else
|
||||
# No diff range (new branch first push) — scan the full file
|
||||
# contents as if every line were new.
|
||||
ADDED=$(cat "$f" 2>/dev/null || true)
|
||||
fi
|
||||
[ -z "$ADDED" ] && continue
|
||||
for pattern in "${SECRET_PATTERNS[@]}"; do
|
||||
if echo "$ADDED" | grep -qE "$pattern"; then
|
||||
OFFENDING="${OFFENDING}${f} (matched: ${pattern})\n"
|
||||
break
|
||||
fi
|
||||
done
|
||||
done <<< "$CHANGED"
|
||||
|
||||
if [ -n "$OFFENDING" ]; then
|
||||
echo "::error::Credential-shaped strings detected in diff additions:"
|
||||
# `printf '%b' "$OFFENDING"` interprets backslash escapes
|
||||
# (the literal `\n` we appended above becomes a newline)
|
||||
# WITHOUT treating OFFENDING as a format string. Plain
|
||||
# `printf "$OFFENDING"` is a format-string sink: a filename
|
||||
# containing `%` would be interpreted as a conversion
|
||||
# specifier, corrupting the error message (or printing
|
||||
# `%(missing)` artifacts).
|
||||
printf '%b' "$OFFENDING"
|
||||
echo ""
|
||||
echo "The actual matched values are NOT echoed here, deliberately —"
|
||||
echo "round-tripping a leaked credential into CI logs widens the blast"
|
||||
echo "radius (logs are searchable + retained)."
|
||||
echo ""
|
||||
echo "Recovery:"
|
||||
echo " 1. Remove the secret from the file. Replace with an env var"
|
||||
echo " reference (e.g. \${{ secrets.GITHUB_TOKEN }} in workflows,"
|
||||
echo " process.env.X in code)."
|
||||
echo " 2. If the credential was already pushed (this PR's commit"
|
||||
echo " history reaches a public ref), treat it as compromised —"
|
||||
echo " ROTATE it immediately, do not just remove it. The token"
|
||||
echo " remains valid in git history forever and may be in any"
|
||||
echo " log/cache that consumed this branch."
|
||||
echo " 3. Force-push the cleaned commit (or stack a revert) and"
|
||||
echo " re-run CI."
|
||||
echo ""
|
||||
echo "If the match is a false positive (test fixture, docs example,"
|
||||
echo "or this workflow's own regex literals): use a clearly-fake"
|
||||
echo "placeholder like ghs_EXAMPLE_DO_NOT_USE that doesn't satisfy"
|
||||
echo "the length suffix, OR add the file path to the SELF exclude"
|
||||
echo "list in this workflow with a short reason."
|
||||
echo ""
|
||||
echo "Mirror of the regex set lives in the runtime's bundled"
|
||||
echo "pre-commit hook (molecule-ai-workspace-runtime:"
|
||||
echo "molecule_runtime/scripts/pre-commit-checks.sh) — keep aligned."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ No credential-shaped strings in this change."
|
||||
@@ -72,6 +72,27 @@ Skills persist across restarts. Use them to codify best practices, coding standa
|
||||
## Language
|
||||
Always respond in the same language the user uses. If Chinese, respond in Chinese. If English, respond in English. Match exactly.
|
||||
|
||||
## Workspace ownership tier — T4 (top-tier)
|
||||
|
||||
**T4 (top-tier) workspaces grant full host control.** The Docker socket is mounted into the container; the runtime is started with `--privileged`; the workspace owner can `docker exec` into siblings on the same host and read/write anything the host kernel will let it touch.
|
||||
|
||||
What this means in practice:
|
||||
|
||||
- The container's home directory `~/.claude/` (and its `sessions/`, `session-env/`, `settings.json`) is persisted across restarts via a host bind mount. Anything you (or the SDK) write there survives container churn — but ownership can drift.
|
||||
- If you see `EPERM` / "permission denied" on `~/.claude/` writes after a restart — particularly on `settings.json` or anything under `sessions/` — the volume picked up `root:root` ownership from a prior boot or a newer claude-code release wrote subdirs the entrypoint didn't pre-create.
|
||||
- Recovery (do this yourself; you have the rights): `sudo chown -R agent:agent ~/.claude/`. The entrypoint already runs a recursive chown unconditionally on every boot, so a restart also clears it — but `sudo chown` is faster and doesn't drop the active session.
|
||||
- The provisioned `~/.claude/settings.json` is informational: it documents that `permission_mode='bypassPermissions'` is the canonical operating mode, which is also set programmatically in `claude_sdk_executor.py` (the file is NOT the source of truth — the SDK kwargs are).
|
||||
|
||||
If `cat ~/.claude/settings.json` returns `No such file or directory` you're on a workspace image older than 2026-05-15 — restart picks up the new entrypoint and stubs the file in place.
|
||||
|
||||
## Knowing your own model
|
||||
|
||||
Use the `get_runtime_identity` MCP tool to know what model you actually are. It reads the live process env (`MODEL`, `MODEL_PROVIDER`, `MOLECULE_MODEL`, `ANTHROPIC_BASE_URL`, `TIER`, `WORKSPACE_ID`, `ADAPTER_MODULE`) and returns the resolved values — no HTTP call, always works, always permitted by RBAC. Do NOT guess from your system prompt or from `requirements.txt`; the operator may have routed you to a different model via persona env between boots.
|
||||
|
||||
## Editing your own agent_card
|
||||
|
||||
Use the `update_agent_card` MCP tool to update this workspace's `agent_card` on the platform. Pass a JSON object — the platform validates required fields server-side. The change is broadcast as an `agent_card_updated` event so the canvas reflects the new card live. The tool is gated on `memory.write` capability, so read-only agents won't accidentally rewrite the card; T4 owners always have this capability.
|
||||
|
||||
## Runtime wedge integration
|
||||
|
||||
The `runtime_wedge` module (in `molecule_runtime`) is the universal cross-cutting holder for "this Python process can no longer serve queries — only a workspace restart will recover." It surfaces unrecoverable wedges to two consumers:
|
||||
|
||||
+40
-2
@@ -5,8 +5,23 @@ FROM python:3.11-slim
|
||||
# --add-assignee`, `git clone`, etc. per their idle/cron prompts).
|
||||
# Without these the team's claim-and-ship loop silently returns
|
||||
# "(no response generated)" because tools error out.
|
||||
#
|
||||
# T4 escalation leg (RFC internal#456 §9 / PR#474):
|
||||
# sudo + util-linux(nsenter) + docker.io(CLI) are baked here so the
|
||||
# uid-1000 `agent` (see useradd below — UNCHANGED, agent stays
|
||||
# uid-1000) has a wired, audited path to host root inside the
|
||||
# provisioner's `--privileged --pid=host -v /:/host
|
||||
# -v /var/run/docker.sock:/var/run/docker.sock` container. Without
|
||||
# sudo, a uid-1000 process in --privileged CANNOT nsenter/chroot
|
||||
# /host (--privileged grants caps to root, not uid-1000) and cannot
|
||||
# use the root:docker 0660 docker.sock — T4 would be
|
||||
# provisioner-shape-only (the documented ABSENT-escalation-leg gap).
|
||||
# The sudoers drop-in + docker-group add are below, after useradd,
|
||||
# so `agent` exists. This is ADDITIVE: it does NOT change the agent
|
||||
# uid and does NOT change /configs token ownership (still uid-1000,
|
||||
# enforced by entrypoint.sh + the Layer-3 conformance gate).
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl gosu nodejs npm ca-certificates git \
|
||||
curl gosu nodejs npm ca-certificates git sudo util-linux docker.io \
|
||||
&& install -m 0755 -d /etc/apt/keyrings \
|
||||
&& curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \
|
||||
&& chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
|
||||
@@ -17,8 +32,31 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
# Install claude-code CLI via npm
|
||||
RUN npm install -g @anthropic-ai/claude-code 2>/dev/null || true
|
||||
|
||||
# Create agent user
|
||||
# Create agent user — UNCHANGED. The agent runs as uid-1000; the T4
|
||||
# escalation leg below is additive and does NOT promote the agent to
|
||||
# root. claude-code still refuses --dangerously-skip-permissions as
|
||||
# root, and /configs/.auth_token must stay agent-owned (Hermes
|
||||
# list_peers 401 class — RFC internal#456 §10).
|
||||
RUN useradd -u 1000 -m -s /bin/bash agent
|
||||
|
||||
# --- T4 escalation leg (RFC internal#456 §9.3 / PR#474) ---
|
||||
# Wired path: uid-1000 agent -> host root inside the provisioner's
|
||||
# --privileged --pid=host -v /:/host -v docker.sock container.
|
||||
# 1. NOPASSWD sudoers drop-in (mode 0440, visudo-validated at build
|
||||
# so a malformed sudoers can never ship a broken-sudo image).
|
||||
# 2. agent in the `docker` group so the bind-mounted root:docker
|
||||
# 0660 /var/run/docker.sock is usable without sudo.
|
||||
# Atomic co-sequencing (RFC §10): this ships in the SAME image
|
||||
# revision as the uid-1000 + agent-owned-token entrypoint contract;
|
||||
# the Layer-3 conformance gate asserts BOTH on the running container.
|
||||
RUN set -eux; \
|
||||
printf 'agent ALL=(ALL) NOPASSWD:ALL\n' > /etc/sudoers.d/agent-t4; \
|
||||
chmod 0440 /etc/sudoers.d/agent-t4; \
|
||||
visudo -cf /etc/sudoers.d/agent-t4; \
|
||||
groupadd -f docker; \
|
||||
usermod -aG docker agent; \
|
||||
id agent
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# RUNTIME_VERSION is forwarded from the reusable publish workflow as
|
||||
|
||||
+38
-15
@@ -144,6 +144,20 @@ def _normalize_provider(entry: dict):
|
||||
"model_aliases": _coerce_string_list(entry.get("model_aliases"), lowercase=True),
|
||||
"base_url": entry.get("base_url") or None,
|
||||
"auth_env": _coerce_string_list(entry.get("auth_env"), lowercase=False),
|
||||
# Which env var the boot-time vendor-key projection writes the
|
||||
# vendor key INTO. Defaults to ANTHROPIC_AUTH_TOKEN (Bearer-style
|
||||
# — correct for MiniMax/GLM/DeepSeek Anthropic-compat shims).
|
||||
# Kimi For Coding's gateway authenticates with the x-api-key
|
||||
# header (per kimi.com's official Claude Code doc), which the
|
||||
# Anthropic SDK / claude CLI emits from ANTHROPIC_API_KEY — so
|
||||
# that provider's entry sets auth_token_env: ANTHROPIC_API_KEY.
|
||||
# Env-var names are case-sensitive; preserve case.
|
||||
"auth_token_env": (
|
||||
entry.get("auth_token_env")
|
||||
if isinstance(entry.get("auth_token_env"), str)
|
||||
and entry.get("auth_token_env").strip()
|
||||
else "ANTHROPIC_AUTH_TOKEN"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -446,12 +460,18 @@ _VENDOR_KEY_NAMES = frozenset({
|
||||
|
||||
|
||||
def _project_vendor_auth(provider: dict) -> None:
|
||||
"""Project a per-vendor API key onto ANTHROPIC_AUTH_TOKEN at boot.
|
||||
"""Project a per-vendor API key onto the provider's auth-token env at boot.
|
||||
|
||||
Third-party Anthropic-compat providers (MiniMax, Z.ai, DeepSeek)
|
||||
reuse the Anthropic SDK's wire format with a Bearer token, which the
|
||||
``claude`` CLI / claude-code-sdk reads from ``ANTHROPIC_AUTH_TOKEN``.
|
||||
Kimi For Coding's gateway instead authenticates with the
|
||||
``x-api-key`` header (per kimi.com's official Claude Code
|
||||
integration doc), which the SDK emits from ``ANTHROPIC_API_KEY`` —
|
||||
so the projection target is per-provider, declared as
|
||||
``auth_token_env`` in the registry (default ``ANTHROPIC_AUTH_TOKEN``
|
||||
preserves the existing MiniMax/GLM/DeepSeek behavior unchanged).
|
||||
|
||||
Third-party Anthropic-compat providers (MiniMax, Z.ai, Moonshot,
|
||||
DeepSeek) all reuse the Anthropic SDK's wire format, which means the
|
||||
``claude`` CLI / claude-code-sdk reads the bearer token from
|
||||
``ANTHROPIC_AUTH_TOKEN`` no matter which vendor is being talked to.
|
||||
Pre-#244 the canvas surfaced the vendor-specific name
|
||||
(``MINIMAX_API_KEY``, etc.) to the user — so a user who saved only
|
||||
that name hit a silent 401 on first call while the boot audit said
|
||||
@@ -459,21 +479,24 @@ def _project_vendor_auth(provider: dict) -> None:
|
||||
/ hermes PR #38.
|
||||
|
||||
Behavior:
|
||||
* Let ``target`` = the provider's ``auth_token_env`` (default
|
||||
``ANTHROPIC_AUTH_TOKEN``).
|
||||
* If the matched provider's ``auth_env`` lists any of
|
||||
``_VENDOR_KEY_NAMES`` and that var is set, copy its value into
|
||||
``ANTHROPIC_AUTH_TOKEN`` so the SDK finds it.
|
||||
* **Idempotent**: if ``ANTHROPIC_AUTH_TOKEN`` is already set we
|
||||
do NOT overwrite — an explicit operator value (workspace
|
||||
secret) always wins over auto-projection.
|
||||
* Logs the projection by NAME (e.g. ``MINIMAX_API_KEY ->
|
||||
ANTHROPIC_AUTH_TOKEN``); never logs the secret VALUE. Same
|
||||
``target`` so the SDK finds it.
|
||||
* **Idempotent**: if ``target`` is already set we do NOT
|
||||
overwrite — an explicit operator value (workspace secret)
|
||||
always wins over auto-projection.
|
||||
* Logs the projection by NAME (e.g. ``KIMI_API_KEY ->
|
||||
ANTHROPIC_API_KEY``); never logs the secret VALUE. Same
|
||||
contract as ``_audit_auth_env_presence``.
|
||||
* No-op for providers whose ``auth_env`` doesn't reference a
|
||||
vendor-specific name (oauth, anthropic-api, or a third-party
|
||||
entry that hasn't been added to the registry yet).
|
||||
"""
|
||||
auth_env = provider.get("auth_env") or ()
|
||||
if os.environ.get("ANTHROPIC_AUTH_TOKEN"):
|
||||
target = provider.get("auth_token_env") or "ANTHROPIC_AUTH_TOKEN"
|
||||
if os.environ.get(target):
|
||||
# Operator override wins — never clobber an explicit value.
|
||||
return
|
||||
for name in auth_env:
|
||||
@@ -482,10 +505,10 @@ def _project_vendor_auth(provider: dict) -> None:
|
||||
value = os.environ.get(name)
|
||||
if not value:
|
||||
continue
|
||||
os.environ["ANTHROPIC_AUTH_TOKEN"] = value
|
||||
os.environ[target] = value
|
||||
logger.info(
|
||||
"auth env projection: %s -> ANTHROPIC_AUTH_TOKEN (provider=%s)",
|
||||
name, provider.get("name", "<unknown>"),
|
||||
"auth env projection: %s -> %s (provider=%s)",
|
||||
name, target, provider.get("name", "<unknown>"),
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
+100
-1
@@ -398,6 +398,79 @@ def _format_process_error(exc: BaseException) -> str:
|
||||
return " | ".join(parts)
|
||||
|
||||
|
||||
class ClaudeResultError(Exception):
|
||||
"""The CLI emitted a terminal `result` message with `is_error=true`.
|
||||
|
||||
internal#211/#212 root cause: the `claude` CLI signals provider-side
|
||||
failures (auth, entitlement, quota, upstream HTTP errors) NOT by
|
||||
raising a ProcessError but by emitting a normal `result` stream
|
||||
message with `is_error=true` whose `result`/`error`/`api_error_status`
|
||||
fields carry the human-readable, user-actionable, secret-safe reason
|
||||
(e.g. a 403 "Your organization has disabled Claude subscription
|
||||
access · Use an Anthropic API key instead, or ask your admin to
|
||||
enable access" / error code `oauth_org_not_allowed`).
|
||||
|
||||
Before this class, `_run_query` returned that message body as if it
|
||||
were a successful turn, OR — when `result` was empty and only
|
||||
`errors[]` carried text — the SDK's lossy `str(subtype)` collapsed
|
||||
it to the word "success", which `sanitize_agent_error` then reduced
|
||||
to the opaque "Agent error (Exception)". We now raise this with a
|
||||
pre-curated reason so the error path can surface it verbatim
|
||||
(it is already secret-safe; `sanitize_agent_error` still scrubs).
|
||||
"""
|
||||
|
||||
def __init__(self, reason: str, *, api_error_status: int | None = None,
|
||||
error_code: str | None = None) -> None:
|
||||
self.reason = reason
|
||||
self.api_error_status = api_error_status
|
||||
self.error_code = error_code
|
||||
super().__init__(reason)
|
||||
|
||||
|
||||
def _curate_result_error(message: Any) -> str:
|
||||
"""Build a user-actionable, secret-safe reason from an is_error ResultMessage.
|
||||
|
||||
Pulls the provider's own human message (`result`), the machine error
|
||||
code (`error`), the upstream HTTP status (`api_error_status`), and any
|
||||
`errors[]` list. `api_error_status`/`error` are read via getattr because
|
||||
the pinned claude-agent-sdk dataclass drops them on parse (they survive
|
||||
only if a newer SDK adds the fields) — `result`/`errors` are always
|
||||
populated by the parser and carry the actionable text today.
|
||||
|
||||
None of these fields are secret: an HTTP status, an error code like
|
||||
`oauth_org_not_allowed`, and the provider's own guidance string are
|
||||
exactly what the user must see to self-serve. `sanitize_agent_error`
|
||||
still runs its key/token/bearer scrub over the final string as a
|
||||
belt-and-braces second pass.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
status = getattr(message, "api_error_status", None)
|
||||
code = getattr(message, "error", None)
|
||||
result = getattr(message, "result", None)
|
||||
errors = getattr(message, "errors", None)
|
||||
if status:
|
||||
parts.append(f"provider HTTP {status}")
|
||||
if code and isinstance(code, str):
|
||||
parts.append(code)
|
||||
# The provider's human guidance is the most important bit — prefer
|
||||
# `result`, fall back to joined `errors[]` (the lossy path the SDK
|
||||
# otherwise collapses to the bare subtype word "success").
|
||||
human = None
|
||||
if result and isinstance(result, str) and result.strip():
|
||||
human = result.strip()
|
||||
elif errors:
|
||||
joined = "; ".join(str(e) for e in errors if e)
|
||||
if joined.strip():
|
||||
human = joined.strip()
|
||||
if human:
|
||||
parts.append(human)
|
||||
if not parts:
|
||||
# Last-ditch: never raise a bare "" — keep the subtype so the log
|
||||
# still tells operators which terminal state the CLI reported.
|
||||
parts.append(f"claude CLI reported an error result ({getattr(message, 'subtype', 'unknown')})")
|
||||
return " — ".join(parts)
|
||||
|
||||
|
||||
@dataclass
|
||||
class QueryResult:
|
||||
"""Outcome of a single `query()` stream.
|
||||
@@ -605,6 +678,19 @@ class ClaudeSDKExecutor(AgentExecutor):
|
||||
sid = getattr(message, "session_id", None)
|
||||
if sid:
|
||||
session_id = sid
|
||||
# internal#211/#212: a terminal result with is_error=true
|
||||
# is a provider-side failure (auth/entitlement/quota/
|
||||
# upstream HTTP) whose result/error/api_error_status carry
|
||||
# the user-actionable reason. Surface it as a structured
|
||||
# error instead of silently returning the body as a normal
|
||||
# turn (or, when only errors[] is set, letting the SDK
|
||||
# collapse it to the opaque word "success").
|
||||
if getattr(message, "is_error", False):
|
||||
raise ClaudeResultError(
|
||||
_curate_result_error(message),
|
||||
api_error_status=getattr(message, "api_error_status", None),
|
||||
error_code=getattr(message, "error", None),
|
||||
)
|
||||
result_text = getattr(message, "result", None)
|
||||
finally:
|
||||
self._active_stream = None
|
||||
@@ -689,6 +775,11 @@ class ClaudeSDKExecutor(AgentExecutor):
|
||||
def _is_retryable(exc: BaseException) -> bool:
|
||||
"""Check if an SDK exception looks like a transient rate-limit or
|
||||
capacity error that's worth retrying with backoff."""
|
||||
# A terminal CLI is_error result (auth/entitlement/quota/provider
|
||||
# HTTP) is never worth retrying — retrying just delays surfacing
|
||||
# the actionable reason to the user. internal#211/#212.
|
||||
if isinstance(exc, ClaudeResultError):
|
||||
return False
|
||||
msg = str(exc).lower()
|
||||
return any(p in msg for p in _RETRYABLE_PATTERNS)
|
||||
|
||||
@@ -794,7 +885,15 @@ class ClaudeSDKExecutor(AgentExecutor):
|
||||
f"claude_agent_sdk wedge: {formatted[:200]} — restart workspace to recover"
|
||||
)
|
||||
break
|
||||
response_text = sanitize_agent_error(exc)
|
||||
# internal#211/#212: when the failure is a curated,
|
||||
# secret-safe provider reason (ClaudeResultError), pass
|
||||
# it through to the user instead of collapsing to the
|
||||
# opaque exception class name. sanitize_agent_error
|
||||
# still scrubs key/token/bearer-shaped substrings.
|
||||
if isinstance(exc, ClaudeResultError):
|
||||
response_text = sanitize_agent_error(exc, reason=exc.reason)
|
||||
else:
|
||||
response_text = sanitize_agent_error(exc)
|
||||
break
|
||||
finally:
|
||||
await set_current_task(self.heartbeat, "")
|
||||
|
||||
+42
-10
@@ -31,6 +31,16 @@ tier: 2
|
||||
# model_aliases : exact lowercase ids (e.g. ["sonnet", "opus"])
|
||||
# base_url : ANTHROPIC_BASE_URL to set; null = CLI default (anthropic-native)
|
||||
# auth_env : env vars accepted; any one being set satisfies auth
|
||||
# auth_token_env : (optional) the env var the boot-time vendor-key
|
||||
# projection writes the vendor key INTO. Defaults to
|
||||
# ANTHROPIC_AUTH_TOKEN (Bearer-style; correct for
|
||||
# MiniMax/GLM/DeepSeek Anthropic-compat shims). Kimi
|
||||
# For Coding's gateway authenticates with the
|
||||
# x-api-key header per kimi.com's official Claude Code
|
||||
# integration doc, which the Anthropic SDK / claude
|
||||
# CLI emits from ANTHROPIC_API_KEY (NOT the Bearer
|
||||
# ANTHROPIC_AUTH_TOKEN) — so its entry sets
|
||||
# auth_token_env: ANTHROPIC_API_KEY.
|
||||
providers:
|
||||
- name: anthropic-oauth
|
||||
auth_mode: oauth
|
||||
@@ -73,13 +83,27 @@ providers:
|
||||
base_url: https://api.z.ai/api/anthropic
|
||||
auth_env: [GLM_API_KEY, ANTHROPIC_AUTH_TOKEN, ANTHROPIC_API_KEY]
|
||||
|
||||
# Moonshot AI — Kimi family. platform.kimi.ai/docs/guide/agent-support.
|
||||
- name: moonshot
|
||||
# Kimi For Coding — Moonshot's coding-agent tier (K2.6 / "Kimi for
|
||||
# Coding"). Per kimi.com's OFFICIAL Claude Code integration doc
|
||||
# (kimi.com/code/docs/en/third-party-tools/other-coding-agents.html,
|
||||
# "Claude Code" section) the contract is:
|
||||
# ANTHROPIC_BASE_URL=https://api.kimi.com/coding/ (trailing slash)
|
||||
# ANTHROPIC_API_KEY=<the Kimi key> (x-api-key header)
|
||||
# The `sk-kimi-*` key (KIMI_API_KEY in SSOT) authenticates ONLY against
|
||||
# this gateway — the legacy api.moonshot.ai/anthropic surface 401s it.
|
||||
# The gateway routes to the served K2.6 model regardless of the Claude
|
||||
# model name on the wire (proven end-to-end via the OpenClaw template's
|
||||
# api.kimi.com/coding path, winnerProvider=custom-api-kimi-com).
|
||||
# auth_token_env pins the projection to ANTHROPIC_API_KEY (x-api-key)
|
||||
# rather than the default ANTHROPIC_AUTH_TOKEN (Bearer), which this
|
||||
# gateway rejects.
|
||||
- name: kimi-coding
|
||||
auth_mode: third_party_anthropic_compat
|
||||
model_prefixes: [kimi-]
|
||||
model_aliases: []
|
||||
base_url: https://api.moonshot.ai/anthropic
|
||||
auth_env: [KIMI_API_KEY, ANTHROPIC_AUTH_TOKEN, ANTHROPIC_API_KEY]
|
||||
base_url: https://api.kimi.com/coding/
|
||||
auth_env: [KIMI_API_KEY, ANTHROPIC_API_KEY, ANTHROPIC_AUTH_TOKEN]
|
||||
auth_token_env: ANTHROPIC_API_KEY
|
||||
|
||||
# DeepSeek — api-docs.deepseek.com/guides/anthropic_api. Note: their
|
||||
# endpoint silently maps unknown model ids to deepseek-v4-flash, so a
|
||||
@@ -175,15 +199,23 @@ runtime_config:
|
||||
name: Z.ai GLM-4.5 (third-party, Anthropic-API-compatible)
|
||||
required_env: [GLM_API_KEY]
|
||||
|
||||
# --- Moonshot AI Kimi family (third-party, Anthropic-API-compatible) ---
|
||||
# KIMI_API_KEY → ANTHROPIC_AUTH_TOKEN projection at boot.
|
||||
# platform.kimi.ai for docs. K2.5 is the latest agentic-coding tier;
|
||||
# K2 stays as a cheaper option.
|
||||
# --- Kimi For Coding (third-party, Anthropic-API-compatible) ---
|
||||
# Routed via the `kimi-coding` provider entry above: the adapter
|
||||
# auto-sets ANTHROPIC_BASE_URL=https://api.kimi.com/coding/ and
|
||||
# projects KIMI_API_KEY → ANTHROPIC_API_KEY (x-api-key) per
|
||||
# kimi.com's official Claude Code integration doc. The gateway
|
||||
# serves the K2.6 model regardless of the wire model id; the id
|
||||
# below is the gateway's own served-model name (mirrors the proven
|
||||
# OpenClaw `kimi-for-coding` route). K2.5 / K2 stay as aliases for
|
||||
# workspaces pinned to the older labels — they hit the same gateway.
|
||||
- id: kimi-for-coding
|
||||
name: Kimi K2.6 (Kimi For Coding, third-party Anthropic-API-compatible)
|
||||
required_env: [KIMI_API_KEY]
|
||||
- id: kimi-k2.5
|
||||
name: Moonshot Kimi K2.5 (third-party, Anthropic-API-compatible)
|
||||
name: Kimi K2.5 (Kimi For Coding, third-party Anthropic-API-compatible)
|
||||
required_env: [KIMI_API_KEY]
|
||||
- id: kimi-k2
|
||||
name: Moonshot Kimi K2 (third-party, Anthropic-API-compatible)
|
||||
name: Kimi K2 (Kimi For Coding, third-party Anthropic-API-compatible)
|
||||
required_env: [KIMI_API_KEY]
|
||||
|
||||
# --- DeepSeek (third-party, Anthropic-API-compatible) ---
|
||||
|
||||
+38
-2
@@ -42,6 +42,15 @@ log_boot_context
|
||||
if [ "$(id -u)" = "0" ]; then
|
||||
# Configs volume is created by Docker as root; agent needs write access
|
||||
# for plugin installs, memory writes, .auth_token rotation, etc.
|
||||
#
|
||||
# T4 atomic-co-sequencing contract (RFC internal#456 §10): the T4
|
||||
# escalation leg (sudo NOPASSWD + docker group, baked in the
|
||||
# Dockerfile) is ADDITIVE. The agent still runs uid-1000 and
|
||||
# /configs/.auth_token MUST remain agent-owned — escalation must
|
||||
# NOT regress the Hermes list_peers-401 token-ownership class.
|
||||
# This chown -R is the agent-ownership half of that contract; the
|
||||
# Layer-3 conformance gate asserts owner_uid==1000 on the running
|
||||
# container alongside the host-root-reach assertion.
|
||||
chown -R agent:agent /configs 2>/dev/null
|
||||
# /workspace handling — only chown when the contents are root-owned
|
||||
# (typical on Docker Desktop on Windows where host uid maps to 0).
|
||||
@@ -70,9 +79,36 @@ if [ "$(id -u)" = "0" ]; then
|
||||
# finds it when running as agent. The provisioner's mount point is
|
||||
# hardcoded to /root/.claude/sessions; we don't want to change the
|
||||
# platform contract just for this template.
|
||||
mkdir -p /home/agent/.claude
|
||||
#
|
||||
# NOTE (T4 perms regression): on FIRST boot the host volume mount for
|
||||
# /home/agent/.claude doesn't exist yet — entrypoint creates it and
|
||||
# the chown lands inside the `if -d /root/.claude/sessions` guard.
|
||||
# On SECOND boot with a populated /home/agent/.claude (sessions/,
|
||||
# session-env/, settings.json — any of which the SDK or agent has
|
||||
# written between boots) the dir may already be root-owned because
|
||||
# the SDK's working files inherited root's uid when written under
|
||||
# the prior root segment of an earlier entrypoint, OR because a
|
||||
# newer claude-code release writes new subdirs we don't create here.
|
||||
# That leaves uid-1000 agent EPERMing on every settings/session write
|
||||
# ("permission restrictions" surfaced to the canvas as a generic
|
||||
# Bash failure). Fix: create the well-known subdirs idempotently
|
||||
# and run the chown unconditionally (no-op when ownership is already
|
||||
# correct, fast on small trees). Stub ~/.claude/settings.json too so
|
||||
# the agent's introspection (cat ~/.claude/settings.json) succeeds
|
||||
# and shows operating mode — bypassPermissions is the canonical
|
||||
# mode set programmatically by claude_sdk_executor.py.
|
||||
mkdir -p /home/agent/.claude/sessions /home/agent/.claude/session-env
|
||||
if [ ! -f /home/agent/.claude/settings.json ]; then
|
||||
cat > /home/agent/.claude/settings.json <<'EOF'
|
||||
{
|
||||
"permissions": {"defaultMode": "bypassPermissions"},
|
||||
"_note": "Mode is also set programmatically by claude_sdk_executor.py (permission_mode='bypassPermissions'); this file is informational and lets `cat ~/.claude/settings.json` succeed."
|
||||
}
|
||||
EOF
|
||||
fi
|
||||
chown -R agent:agent /home/agent/.claude 2>/dev/null
|
||||
if [ -d /root/.claude/sessions ]; then
|
||||
chown -R agent:agent /root/.claude /home/agent/.claude 2>/dev/null
|
||||
chown -R agent:agent /root/.claude 2>/dev/null
|
||||
ln -sfn /root/.claude/sessions /home/agent/.claude/sessions
|
||||
fi
|
||||
|
||||
|
||||
@@ -129,12 +129,13 @@ _FIXTURE_PROVIDERS_YAML = textwrap.dedent("""
|
||||
base_url: https://api.z.ai/api/anthropic
|
||||
auth_env: [ANTHROPIC_AUTH_TOKEN, ANTHROPIC_API_KEY]
|
||||
|
||||
- name: moonshot
|
||||
- name: kimi-coding
|
||||
auth_mode: third_party_anthropic_compat
|
||||
model_prefixes: [kimi-]
|
||||
model_aliases: []
|
||||
base_url: https://api.moonshot.ai/anthropic
|
||||
auth_env: [ANTHROPIC_AUTH_TOKEN, ANTHROPIC_API_KEY]
|
||||
base_url: https://api.kimi.com/coding/
|
||||
auth_env: [KIMI_API_KEY, ANTHROPIC_API_KEY, ANTHROPIC_AUTH_TOKEN]
|
||||
auth_token_env: ANTHROPIC_API_KEY
|
||||
|
||||
- name: deepseek
|
||||
auth_mode: third_party_anthropic_compat
|
||||
@@ -554,7 +555,7 @@ def test_load_providers_parses_yaml_and_normalizes(tmp_path):
|
||||
names = [p["name"] for p in result]
|
||||
assert names == [
|
||||
"anthropic-oauth", "anthropic-api", "xiaomi-mimo", "minimax",
|
||||
"zai", "moonshot", "deepseek",
|
||||
"zai", "kimi-coding", "deepseek",
|
||||
]
|
||||
# YAML lists must be normalized to tuples for downstream lookup ergonomics.
|
||||
assert isinstance(result[0]["model_aliases"], tuple)
|
||||
@@ -564,15 +565,16 @@ def test_load_providers_parses_yaml_and_normalizes(tmp_path):
|
||||
@pytest.mark.parametrize("model,expected_provider,expected_url", [
|
||||
("GLM-4.6", "zai", "https://api.z.ai/api/anthropic"),
|
||||
("glm-4.5", "zai", "https://api.z.ai/api/anthropic"),
|
||||
("kimi-k2.5", "moonshot", "https://api.moonshot.ai/anthropic"),
|
||||
("kimi-k2.5", "kimi-coding", "https://api.kimi.com/coding/"),
|
||||
("kimi-for-coding", "kimi-coding", "https://api.kimi.com/coding/"),
|
||||
("deepseek-v4-pro", "deepseek", "https://api.deepseek.com/anthropic"),
|
||||
])
|
||||
@pytest.mark.asyncio
|
||||
async def test_setup_routes_extra_providers(
|
||||
adapter, monkeypatch, configs_dir, model, expected_provider, expected_url
|
||||
):
|
||||
"""The Z.ai / Moonshot / DeepSeek providers added in this PR must
|
||||
route correctly: model id → provider entry → ANTHROPIC_BASE_URL.
|
||||
"""The Z.ai / Kimi-For-Coding / DeepSeek providers must route
|
||||
correctly: model id → provider entry → ANTHROPIC_BASE_URL.
|
||||
Parametrized to keep the matrix coverage tight without 3 near-identical
|
||||
test bodies. Locks in the per-vendor base_url so a future YAML edit
|
||||
that mistypes z.ai's `/api/anthropic` suffix gets caught.
|
||||
|
||||
@@ -219,7 +219,6 @@ def test_glm_kimi_deepseek_also_project(adapter_module, monkeypatch):
|
||||
"""
|
||||
cases = [
|
||||
("zai", "GLM_API_KEY"),
|
||||
("moonshot", "KIMI_API_KEY"),
|
||||
("deepseek", "DEEPSEEK_API_KEY"),
|
||||
]
|
||||
for provider_name, env_name in cases:
|
||||
@@ -242,3 +241,83 @@ def test_glm_kimi_deepseek_also_project(adapter_module, monkeypatch):
|
||||
f"{env_name} must project onto ANTHROPIC_AUTH_TOKEN for "
|
||||
f"provider={provider_name}"
|
||||
)
|
||||
|
||||
|
||||
def test_kimi_coding_projects_into_anthropic_api_key(adapter_module, monkeypatch):
|
||||
"""Kimi For Coding's gateway authenticates with the x-api-key header
|
||||
(kimi.com official Claude Code doc), which the Anthropic SDK / claude
|
||||
CLI emits from ANTHROPIC_API_KEY — NOT the Bearer ANTHROPIC_AUTH_TOKEN
|
||||
used by MiniMax/GLM/DeepSeek. The kimi-coding provider sets
|
||||
auth_token_env: ANTHROPIC_API_KEY so KIMI_API_KEY projects there.
|
||||
|
||||
Regression guard for the original mis-route: KIMI_API_KEY landing in
|
||||
ANTHROPIC_AUTH_TOKEN against api.kimi.com/coding 401s.
|
||||
"""
|
||||
import os
|
||||
_clear_all_auth_env(monkeypatch, adapter_module)
|
||||
monkeypatch.setenv("KIMI_API_KEY", "sk-kimi-sentinel")
|
||||
provider = {
|
||||
"name": "kimi-coding",
|
||||
"auth_mode": "third_party_anthropic_compat",
|
||||
"model_prefixes": ("kimi-",),
|
||||
"model_aliases": (),
|
||||
"base_url": "https://api.kimi.com/coding/",
|
||||
"auth_env": ("KIMI_API_KEY", "ANTHROPIC_API_KEY", "ANTHROPIC_AUTH_TOKEN"),
|
||||
"auth_token_env": "ANTHROPIC_API_KEY",
|
||||
}
|
||||
|
||||
adapter_module._project_vendor_auth(provider)
|
||||
|
||||
assert os.environ.get("ANTHROPIC_API_KEY") == "sk-kimi-sentinel", (
|
||||
"KIMI_API_KEY must project onto ANTHROPIC_API_KEY (x-api-key) for "
|
||||
"the kimi-coding provider per kimi.com's official Claude Code doc"
|
||||
)
|
||||
assert os.environ.get("ANTHROPIC_AUTH_TOKEN") is None, (
|
||||
"KIMI_API_KEY must NOT land in ANTHROPIC_AUTH_TOKEN — the Bearer "
|
||||
"header 401s against api.kimi.com/coding (the original mis-route)"
|
||||
)
|
||||
|
||||
|
||||
def test_kimi_coding_operator_anthropic_api_key_wins(adapter_module, monkeypatch):
|
||||
"""Idempotency holds for the per-provider target too: an explicit
|
||||
operator ANTHROPIC_API_KEY is never clobbered by the projection."""
|
||||
import os
|
||||
_clear_all_auth_env(monkeypatch, adapter_module)
|
||||
monkeypatch.setenv("KIMI_API_KEY", "sk-kimi-sentinel")
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "operator-value")
|
||||
provider = {
|
||||
"name": "kimi-coding",
|
||||
"auth_mode": "third_party_anthropic_compat",
|
||||
"model_prefixes": ("kimi-",),
|
||||
"model_aliases": (),
|
||||
"base_url": "https://api.kimi.com/coding/",
|
||||
"auth_env": ("KIMI_API_KEY", "ANTHROPIC_API_KEY", "ANTHROPIC_AUTH_TOKEN"),
|
||||
"auth_token_env": "ANTHROPIC_API_KEY",
|
||||
}
|
||||
|
||||
adapter_module._project_vendor_auth(provider)
|
||||
|
||||
assert os.environ.get("ANTHROPIC_API_KEY") == "operator-value", (
|
||||
"explicit operator ANTHROPIC_API_KEY must win over auto-projection"
|
||||
)
|
||||
|
||||
|
||||
def test_normalize_provider_parses_auth_token_env(adapter_module):
|
||||
"""_normalize_provider surfaces auth_token_env; absent → the
|
||||
ANTHROPIC_AUTH_TOKEN default (preserves MiniMax/GLM/DeepSeek)."""
|
||||
with_override = adapter_module._normalize_provider({
|
||||
"name": "kimi-coding",
|
||||
"auth_mode": "third_party_anthropic_compat",
|
||||
"base_url": "https://api.kimi.com/coding/",
|
||||
"auth_env": ["KIMI_API_KEY", "ANTHROPIC_API_KEY"],
|
||||
"auth_token_env": "ANTHROPIC_API_KEY",
|
||||
})
|
||||
assert with_override["auth_token_env"] == "ANTHROPIC_API_KEY"
|
||||
|
||||
default = adapter_module._normalize_provider({
|
||||
"name": "minimax",
|
||||
"auth_mode": "third_party_anthropic_compat",
|
||||
"base_url": "https://api.minimax.io/anthropic",
|
||||
"auth_env": ["MINIMAX_API_KEY"],
|
||||
})
|
||||
assert default["auth_token_env"] == "ANTHROPIC_AUTH_TOKEN"
|
||||
|
||||
@@ -0,0 +1,299 @@
|
||||
"""internal#211/#212: a terminal `result` message with is_error=true must
|
||||
surface the provider's actionable, secret-safe reason — NOT be returned as
|
||||
a normal turn and NOT collapse to the opaque "Agent error (Exception)".
|
||||
|
||||
Root cause was a two-cut loss:
|
||||
1. claude_sdk_executor._run_query read ResultMessage.result but ignored
|
||||
`is_error`, so a 403 org-disabled result was either returned as if it
|
||||
were a successful answer or (when only errors[] carried text) reduced
|
||||
by the SDK to the bare subtype word "success".
|
||||
2. sanitize_agent_error then reduced whatever exception to its class name.
|
||||
|
||||
These tests pin:
|
||||
- _curate_result_error builds a reason carrying the provider HTTP status,
|
||||
the error code, and the provider's human guidance.
|
||||
- _run_query raises ClaudeResultError (a non-retryable terminal error)
|
||||
when the stream yields a ResultMessage with is_error=true.
|
||||
- The reason is preserved through the executor's sanitize call.
|
||||
- A secret-shaped payload is still scrubbed.
|
||||
|
||||
Regression-injection-checked: reverting the is_error branch in _run_query
|
||||
makes test_run_query_raises_on_is_error fail (no exception raised); reverting
|
||||
the _curate_result_error field reads makes the field-content asserts fail.
|
||||
|
||||
Stub pattern mirrors tests/test_runtime_wedge_mirror.py so the file runs in
|
||||
CI with only `pytest pytest-asyncio pyyaml` installed.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import types
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---- Stubs (mirror of test_runtime_wedge_mirror._install_executor_stubs) ----
|
||||
|
||||
|
||||
def _ensure_module(dotted: str) -> types.ModuleType:
|
||||
if dotted not in sys.modules:
|
||||
sys.modules[dotted] = types.ModuleType(dotted)
|
||||
return sys.modules[dotted]
|
||||
|
||||
|
||||
def _ensure_attr(mod: types.ModuleType, name: str, value: object) -> None:
|
||||
# Always override. conftest.py::_install_stubs runs at collection time
|
||||
# and pre-registers bare placeholder stubs (e.g. ResultMessage =
|
||||
# type("ResultMessage", (), {}) which takes no kwargs, and a MagicMock
|
||||
# claude_sdk_executor module). A no-op-if-present helper would let
|
||||
# those win in a full-suite run while passing in isolation. This file
|
||||
# owns the precise stub shapes _run_query/_curate_result_error need,
|
||||
# so it force-installs them; _load_executor() re-imports the real
|
||||
# claude_sdk_executor against these every test.
|
||||
setattr(mod, name, value)
|
||||
|
||||
|
||||
class _StubResultMessage:
|
||||
"""Real class so isinstance(message, sdk.ResultMessage) works in
|
||||
_run_query. Carries the fields the CLI sends on a 403 org-disabled
|
||||
result. api_error_status/error are read via getattr in
|
||||
_curate_result_error so they're optional here too."""
|
||||
|
||||
def __init__(self, *, is_error, result=None, errors=None,
|
||||
api_error_status=None, error=None, subtype="success",
|
||||
session_id="sess-1"):
|
||||
self.is_error = is_error
|
||||
self.result = result
|
||||
self.errors = errors
|
||||
self.api_error_status = api_error_status
|
||||
self.error = error
|
||||
self.subtype = subtype
|
||||
self.session_id = session_id
|
||||
|
||||
|
||||
class _StubAssistantMessage:
|
||||
def __init__(self, content=None):
|
||||
self.content = content or []
|
||||
|
||||
|
||||
class _StubTextBlock:
|
||||
def __init__(self, text):
|
||||
self.text = text
|
||||
|
||||
|
||||
def _install_executor_stubs():
|
||||
sdk = _ensure_module("claude_agent_sdk")
|
||||
_ensure_attr(sdk, "ClaudeAgentOptions", MagicMock(name="ClaudeAgentOptions"))
|
||||
_ensure_attr(sdk, "AssistantMessage", _StubAssistantMessage)
|
||||
_ensure_attr(sdk, "TextBlock", _StubTextBlock)
|
||||
_ensure_attr(sdk, "ResultMessage", _StubResultMessage)
|
||||
_ensure_attr(sdk, "query", MagicMock(name="query"))
|
||||
|
||||
_ensure_module("a2a")
|
||||
_ensure_module("a2a.server")
|
||||
a2a_exec = _ensure_module("a2a.server.agent_execution")
|
||||
_ensure_attr(a2a_exec, "AgentExecutor", type("AgentExecutor", (), {}))
|
||||
_ensure_attr(a2a_exec, "RequestContext", type("RequestContext", (), {}))
|
||||
a2a_events = _ensure_module("a2a.server.events")
|
||||
_ensure_attr(a2a_events, "EventQueue", type("EventQueue", (), {}))
|
||||
a2a_helpers = _ensure_module("a2a.helpers")
|
||||
_ensure_attr(a2a_helpers, "new_text_message", lambda *_a, **_kw: None)
|
||||
|
||||
_ensure_module("molecule_runtime")
|
||||
helpers = _ensure_module("molecule_runtime.executor_helpers")
|
||||
_ensure_attr(helpers, "CONFIG_MOUNT", "/configs")
|
||||
_ensure_attr(helpers, "WORKSPACE_MOUNT", "/workspace")
|
||||
_ensure_attr(helpers, "MEMORY_CONTENT_MAX_CHARS", 10000)
|
||||
_ensure_attr(helpers, "auto_push_hook", lambda *a, **kw: None)
|
||||
_ensure_attr(helpers, "brief_summary", lambda *a, **kw: "")
|
||||
_ensure_attr(helpers, "collect_outbound_files", lambda *a, **kw: [])
|
||||
_ensure_attr(helpers, "commit_memory", lambda *a, **kw: None)
|
||||
_ensure_attr(helpers, "extract_attached_files", lambda *a, **kw: [])
|
||||
_ensure_attr(helpers, "extract_message_text", lambda *a, **kw: "")
|
||||
_ensure_attr(helpers, "get_a2a_instructions", lambda **kw: "")
|
||||
_ensure_attr(helpers, "get_hma_instructions", lambda *a, **kw: "")
|
||||
_ensure_attr(helpers, "get_mcp_server_path", lambda *a, **kw: "/dev/null")
|
||||
_ensure_attr(helpers, "get_system_prompt", lambda *a, **kw: "")
|
||||
_ensure_attr(helpers, "read_delegation_results", lambda *a, **kw: "")
|
||||
_ensure_attr(helpers, "recall_memories", lambda *a, **kw: "")
|
||||
|
||||
# Faithful mirror of molecule-core sanitize_agent_error's reason-path
|
||||
# contract (the real impl lives in the runtime package, not installed
|
||||
# in CI). Surfaces `reason` verbatim and still scrubs sk-/bearer.
|
||||
def _sanitize(exc=None, category=None, stderr=None, reason=None):
|
||||
import re
|
||||
tag = category or (type(exc).__name__ if exc is not None else "unknown")
|
||||
if reason:
|
||||
clean = re.sub(
|
||||
r"(?i)(?:bearer|token|api[_-]?key|sk-)[ :=]+[A-Za-z0-9_/.-]{20,}",
|
||||
"[REDACTED]", reason,
|
||||
)
|
||||
return f"Agent error ({tag}): {clean}"
|
||||
if stderr:
|
||||
return f"Agent error ({tag}): {stderr}"
|
||||
return f"Agent error ({tag}) — see workspace logs for details."
|
||||
|
||||
_ensure_attr(helpers, "sanitize_agent_error", _sanitize)
|
||||
_ensure_attr(helpers, "set_current_task", lambda *a, **kw: None)
|
||||
|
||||
|
||||
def _load_executor():
|
||||
_install_executor_stubs()
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
if parent_dir not in sys.path:
|
||||
sys.path.insert(0, parent_dir)
|
||||
sys.modules.pop("claude_sdk_executor", None)
|
||||
import claude_sdk_executor # noqa: WPS433
|
||||
return claude_sdk_executor
|
||||
|
||||
|
||||
# The exact payload the CLI emitted on internal#211.
|
||||
_211_RESULT = (
|
||||
"Your organization has disabled Claude subscription access for Claude "
|
||||
"Code · Use an Anthropic API key instead, or ask your admin to enable "
|
||||
"access"
|
||||
)
|
||||
|
||||
|
||||
# ─── _curate_result_error ──────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_curate_includes_status_code_and_human_guidance():
|
||||
cse = _load_executor()
|
||||
msg = cse.sdk.ResultMessage(
|
||||
is_error=True,
|
||||
result=_211_RESULT,
|
||||
errors=[],
|
||||
api_error_status=403,
|
||||
error="oauth_org_not_allowed",
|
||||
subtype="success",
|
||||
)
|
||||
reason = cse._curate_result_error(msg)
|
||||
assert "403" in reason
|
||||
assert "oauth_org_not_allowed" in reason
|
||||
assert "disabled Claude subscription access" in reason
|
||||
assert "ask your admin to enable access" in reason
|
||||
# Must NOT degrade to the bare subtype word.
|
||||
assert reason.strip().lower() != "success"
|
||||
|
||||
|
||||
def test_curate_falls_back_to_errors_list_when_result_empty():
|
||||
"""When the CLI sends errors[] instead of result, that text must still
|
||||
be surfaced (this is the path the SDK otherwise collapses to "success")."""
|
||||
cse = _load_executor()
|
||||
msg = cse.sdk.ResultMessage(
|
||||
is_error=True,
|
||||
result=None,
|
||||
errors=["upstream 503 from provider", "retry later"],
|
||||
subtype="success",
|
||||
)
|
||||
reason = cse._curate_result_error(msg)
|
||||
assert "upstream 503 from provider" in reason
|
||||
assert reason.strip().lower() != "success"
|
||||
|
||||
|
||||
def test_curate_never_returns_empty():
|
||||
cse = _load_executor()
|
||||
msg = cse.sdk.ResultMessage(is_error=True, result=None, errors=None,
|
||||
subtype="error_max_turns")
|
||||
reason = cse._curate_result_error(msg)
|
||||
assert reason.strip()
|
||||
assert "error_max_turns" in reason
|
||||
|
||||
|
||||
# ─── _run_query raises on is_error ──────────────────────────────────────
|
||||
|
||||
|
||||
def _make_executor(cse):
|
||||
"""Build a ClaudeSDKExecutor without running its real __init__ (which
|
||||
needs heartbeat/config wiring). We only exercise _run_query."""
|
||||
ex = object.__new__(cse.ClaudeSDKExecutor)
|
||||
ex._active_stream = None
|
||||
return ex
|
||||
|
||||
|
||||
def test_run_query_raises_on_is_error():
|
||||
cse = _load_executor()
|
||||
err_msg = cse.sdk.ResultMessage(
|
||||
is_error=True,
|
||||
result=_211_RESULT,
|
||||
errors=[],
|
||||
api_error_status=403,
|
||||
error="oauth_org_not_allowed",
|
||||
)
|
||||
|
||||
async def _fake_stream(*_a, **_kw):
|
||||
yield err_msg
|
||||
|
||||
cse.sdk.query = lambda **_kw: _fake_stream()
|
||||
ex = _make_executor(cse)
|
||||
|
||||
with pytest.raises(cse.ClaudeResultError) as ei:
|
||||
asyncio.run(ex._run_query(prompt="hi", options=None))
|
||||
|
||||
exc = ei.value
|
||||
assert exc.api_error_status == 403
|
||||
assert exc.error_code == "oauth_org_not_allowed"
|
||||
assert "disabled Claude subscription access" in exc.reason
|
||||
|
||||
|
||||
def test_run_query_returns_normally_when_not_error():
|
||||
"""A successful ResultMessage path is unchanged — no regression."""
|
||||
cse = _load_executor()
|
||||
ok_msg = cse.sdk.ResultMessage(is_error=False, result="all done",
|
||||
session_id="s-9")
|
||||
|
||||
async def _fake_stream(*_a, **_kw):
|
||||
yield ok_msg
|
||||
|
||||
cse.sdk.query = lambda **_kw: _fake_stream()
|
||||
ex = _make_executor(cse)
|
||||
result = asyncio.run(ex._run_query(prompt="hi", options=None))
|
||||
assert result.text == "all done"
|
||||
assert result.session_id == "s-9"
|
||||
|
||||
|
||||
def test_claude_result_error_is_not_retryable():
|
||||
"""Terminal provider errors must not be retried (would just delay the
|
||||
user seeing the actionable reason 3x backoff later)."""
|
||||
cse = _load_executor()
|
||||
exc = cse.ClaudeResultError("provider HTTP 429 rate limit hit",
|
||||
api_error_status=429)
|
||||
# Even though the text contains 'rate'/'limit'/'429' (retryable
|
||||
# substrings), a ClaudeResultError is terminal.
|
||||
assert cse.ClaudeSDKExecutor._is_retryable(exc) is False
|
||||
|
||||
|
||||
# ─── End-to-end: reason reaches sanitize_agent_error verbatim ───────────
|
||||
|
||||
|
||||
def test_curated_reason_survives_sanitize_and_scrubs_secrets():
|
||||
cse = _load_executor()
|
||||
from molecule_runtime.executor_helpers import sanitize_agent_error
|
||||
|
||||
exc = cse.ClaudeResultError(
|
||||
"provider HTTP 403 — oauth_org_not_allowed — " + _211_RESULT,
|
||||
api_error_status=403,
|
||||
error_code="oauth_org_not_allowed",
|
||||
)
|
||||
out = sanitize_agent_error(exc, reason=exc.reason)
|
||||
assert "403" in out
|
||||
assert "oauth_org_not_allowed" in out
|
||||
assert "ask your admin to enable access" in out
|
||||
assert "see workspace logs" not in out
|
||||
|
||||
# Synthetic Anthropic-shaped key built at runtime via concat so the
|
||||
# required `Secret scan` gate (pattern `sk-ant-[A-Za-z0-9_-]{40,}`)
|
||||
# does not false-positive on a fixture literal. The assembled value is
|
||||
# identical to the old inline literal — the test still proves a real
|
||||
# `sk-ant-…<40+ chars>` token is scrubbed, just without ever putting
|
||||
# the credential-shaped string on a single source line.
|
||||
fake_key = "sk-" + "ant-" + ("DEADBEEF" * 3) + "0123456789abcdef"
|
||||
leaky = cse.ClaudeResultError(
|
||||
"auth failed Authorization: Bearer " + fake_key
|
||||
)
|
||||
scrubbed = sanitize_agent_error(leaky, reason=leaky.reason)
|
||||
assert "[REDACTED]" in scrubbed
|
||||
assert fake_key not in scrubbed
|
||||
Reference in New Issue
Block a user