Merge pull request 'harden(provisioner): denylist SCM-write tokens from tenant workspace env (forensic #145)' (#1277) from harden/provisioner-scm-token-denylist into staging
Block internal-flavored paths / Block forbidden paths (push) Successful in 17s
Harness Replays / detect-changes (push) Successful in 19s
CI / Detect changes (push) Successful in 1m14s
E2E Chat / detect-changes (push) Successful in 1m25s
E2E API Smoke Test / detect-changes (push) Successful in 1m28s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 26s
Handlers Postgres Integration / detect-changes (push) Successful in 1m18s
Runtime PR-Built Compatibility / detect-changes (push) Successful in 1m9s
CI / Shellcheck (E2E scripts) (push) Successful in 8s
Harness Replays / Harness Replays (push) Successful in 8s
CI / Python Lint & Test (push) Successful in 10s
E2E Chat / E2E Chat (push) Failing after 44s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (push) Failing after 1m15s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 2m17s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 5m57s
CI / Canvas (Next.js) (push) Successful in 18m50s
CI / Platform (Go) (push) Successful in 19m21s
CI / Canvas Deploy Reminder (push) Successful in 6s
CI / all-required (push) Has been cancelled
Block internal-flavored paths / Block forbidden paths (push) Successful in 17s
Harness Replays / detect-changes (push) Successful in 19s
CI / Detect changes (push) Successful in 1m14s
E2E Chat / detect-changes (push) Successful in 1m25s
E2E API Smoke Test / detect-changes (push) Successful in 1m28s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 26s
Handlers Postgres Integration / detect-changes (push) Successful in 1m18s
Runtime PR-Built Compatibility / detect-changes (push) Successful in 1m9s
CI / Shellcheck (E2E scripts) (push) Successful in 8s
Harness Replays / Harness Replays (push) Successful in 8s
CI / Python Lint & Test (push) Successful in 10s
E2E Chat / E2E Chat (push) Failing after 44s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (push) Failing after 1m15s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 2m17s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 5m57s
CI / Canvas (Next.js) (push) Successful in 18m50s
CI / Platform (Go) (push) Successful in 19m21s
CI / Canvas Deploy Reminder (push) Successful in 6s
CI / all-required (push) Has been cancelled
This commit was merged in pull request #1277.
This commit is contained in:
@@ -178,12 +178,21 @@ func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string,
|
||||
// /admin/liveness and other admin-gated platform endpoints (core#831).
|
||||
// p.adminToken is read from os.Getenv("ADMIN_TOKEN") at provisioner creation;
|
||||
// it is also used for CP→platform HTTP auth but those are separate concerns.
|
||||
env := cfg.EnvVars
|
||||
if p.adminToken != "" {
|
||||
env = make(map[string]string, len(cfg.EnvVars)+1)
|
||||
for k, v := range cfg.EnvVars {
|
||||
env[k] = v
|
||||
//
|
||||
// Forensic #145 hardening: tenant workspaces run on EC2 via this path, so
|
||||
// the SCM-write-token denylist (see buildContainerEnv) is enforced here
|
||||
// too. Always build a filtered copy — never pass cfg.EnvVars through
|
||||
// verbatim — so a latent persona-merged GITEA_TOKEN can't reach the
|
||||
// tenant container regardless of whether ADMIN_TOKEN is set.
|
||||
env := make(map[string]string, len(cfg.EnvVars)+1)
|
||||
for k, v := range cfg.EnvVars {
|
||||
if isSCMWriteTokenKey(k) {
|
||||
log.Printf("CPProvisioner.Start: dropped SCM-write credential %q from tenant workspace env (forensic #145 guard)", k)
|
||||
continue
|
||||
}
|
||||
env[k] = v
|
||||
}
|
||||
if p.adminToken != "" {
|
||||
env["ADMIN_TOKEN"] = p.adminToken
|
||||
}
|
||||
// Collect template files and generated configs, with OFFSEC-010 guards:
|
||||
@@ -343,6 +352,7 @@ func collectCPConfigFiles(cfg WorkspaceConfig) (map[string]string, error) {
|
||||
}
|
||||
return files, nil
|
||||
}
|
||||
|
||||
// Stop terminates the workspace's EC2 instance via the control plane.
|
||||
//
|
||||
// Looks up the actual EC2 instance_id from the workspaces table before
|
||||
@@ -497,7 +507,9 @@ func (p *CPProvisioner) IsRunning(ctx context.Context, workspaceID string) (bool
|
||||
// Don't leak the body — upstream errors may echo headers.
|
||||
return true, fmt.Errorf("cp provisioner: status: unexpected %d", resp.StatusCode)
|
||||
}
|
||||
var result struct{ State string `json:"state"` }
|
||||
var result struct {
|
||||
State string `json:"state"`
|
||||
}
|
||||
// Cap body read at 64 KiB for parity with Start — a misconfigured
|
||||
// or compromised CP streaming a huge body could otherwise exhaust
|
||||
// memory in this hot path (called reactively per-request from
|
||||
|
||||
@@ -591,6 +591,28 @@ func ValidateWorkspaceAccess(access, workspacePath string) error {
|
||||
}
|
||||
}
|
||||
|
||||
// scmWriteTokenKeys is the explicit denylist of environment variable names
|
||||
// that carry a Git SCM *write* credential (push / merge / approve). These
|
||||
// must never reach a tenant workspace container — see the forensic #145
|
||||
// rationale in buildContainerEnv. Kept as an exact-match set rather than a
|
||||
// substring/prefix heuristic so the guard is auditable and can't silently
|
||||
// over-strip a legitimately-named var.
|
||||
var scmWriteTokenKeys = map[string]struct{}{
|
||||
"GITEA_TOKEN": {},
|
||||
"GITHUB_TOKEN": {},
|
||||
"GH_TOKEN": {}, // gh CLI honours GH_TOKEN as a GITHUB_TOKEN alias
|
||||
"GITLAB_TOKEN": {},
|
||||
"GL_TOKEN": {}, // glab CLI alias
|
||||
"BITBUCKET_TOKEN": {},
|
||||
}
|
||||
|
||||
// isSCMWriteTokenKey reports whether an env var name is a known Git SCM
|
||||
// write credential that must be stripped from tenant workspace env.
|
||||
func isSCMWriteTokenKey(key string) bool {
|
||||
_, ok := scmWriteTokenKeys[key]
|
||||
return ok
|
||||
}
|
||||
|
||||
// buildContainerEnv assembles the initial environment variables injected
|
||||
// into every workspace container.
|
||||
//
|
||||
@@ -627,6 +649,21 @@ func buildContainerEnv(cfg WorkspaceConfig) []string {
|
||||
env = append(env, fmt.Sprintf("AWARENESS_URL=%s", cfg.AwarenessURL))
|
||||
}
|
||||
for k, v := range cfg.EnvVars {
|
||||
// Forensic #145 hardening: tenant workspace containers run
|
||||
// agent-controlled code and must NEVER receive a Git SCM *write*
|
||||
// credential. Without merge/approve creds in-container the
|
||||
// two-eyes review gate is structurally self-bypass-proof — an
|
||||
// agent that forges an approval has no token to act on it. A
|
||||
// latent path exists (loadPersonaEnvFile merges a per-role
|
||||
// persona `GITEA_TOKEN` into cfg.EnvVars when MOLECULE_PERSONA_ROOT
|
||||
// is set on a tenant host); it is inert today (persona dirs are
|
||||
// operator-host-only) but unguarded. Strip SCM-write tokens here
|
||||
// by construction so the invariant holds regardless of whether
|
||||
// that path ever becomes reachable.
|
||||
if isSCMWriteTokenKey(k) {
|
||||
log.Printf("buildContainerEnv: dropped SCM-write credential %q from workspace env (forensic #145 guard)", k)
|
||||
continue
|
||||
}
|
||||
env = append(env, fmt.Sprintf("%s=%s", k, v))
|
||||
}
|
||||
// Inject ADMIN_TOKEN from the platform server's environment so workspace
|
||||
|
||||
@@ -636,10 +636,15 @@ func TestBuildContainerEnv_AwarenessOnlyWhenBothSet(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestBuildContainerEnv_CustomEnvVarsAppended(t *testing.T) {
|
||||
// NOTE: this test previously asserted GITHUB_TOKEN passed through
|
||||
// verbatim. That assertion encoded the forensic #145 latent leak as
|
||||
// expected behavior. Post-guard, ordinary custom env still flows but
|
||||
// SCM-write credentials are stripped — see
|
||||
// TestBuildContainerEnv_StripsSCMWriteTokens for the negative assertion.
|
||||
cfg := WorkspaceConfig{
|
||||
WorkspaceID: "ws-x",
|
||||
PlatformURL: "http://localhost:8080",
|
||||
EnvVars: map[string]string{"CUSTOM": "value", "GITHUB_TOKEN": "fake-token-for-test"},
|
||||
EnvVars: map[string]string{"CUSTOM": "value", "ANTHROPIC_API_KEY": "sk-not-an-scm-token"},
|
||||
}
|
||||
env := buildContainerEnv(cfg)
|
||||
seen := map[string]string{}
|
||||
@@ -652,8 +657,8 @@ func TestBuildContainerEnv_CustomEnvVarsAppended(t *testing.T) {
|
||||
if seen["CUSTOM"] != "value" {
|
||||
t.Errorf("CUSTOM env missing, got env=%v", env)
|
||||
}
|
||||
if seen["GITHUB_TOKEN"] != "fake-token-for-test" {
|
||||
t.Errorf("GITHUB_TOKEN env missing, got env=%v", env)
|
||||
if seen["ANTHROPIC_API_KEY"] != "sk-not-an-scm-token" {
|
||||
t.Errorf("non-SCM custom env must still pass through, got env=%v", env)
|
||||
}
|
||||
// Built-in defaults still present
|
||||
if seen["MOLECULE_URL"] == "" {
|
||||
@@ -661,6 +666,129 @@ func TestBuildContainerEnv_CustomEnvVarsAppended(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- forensic #145: SCM-write-token denylist guard ----------
|
||||
|
||||
// TestBuildContainerEnv_StripsSCMWriteTokens is the core negative
|
||||
// assertion: a tenant workspace env constructed via buildContainerEnv MUST
|
||||
// NOT contain any Git SCM *write* credential, regardless of how it got into
|
||||
// cfg.EnvVars. This proves the two-eyes review gate stays structurally
|
||||
// self-bypass-proof — an agent in-container has no merge/approve token to
|
||||
// act on a forged approval. See forensic #145.
|
||||
//
|
||||
// This test FAILS on the pre-guard code (where buildContainerEnv passed
|
||||
// cfg.EnvVars through verbatim) and PASSES once the denylist filter is in
|
||||
// place — i.e. the guard is proven by construction, not by environment
|
||||
// accident.
|
||||
func TestBuildContainerEnv_StripsSCMWriteTokens(t *testing.T) {
|
||||
scmTokens := []string{
|
||||
"GITEA_TOKEN", "GITHUB_TOKEN", "GH_TOKEN",
|
||||
"GITLAB_TOKEN", "GL_TOKEN", "BITBUCKET_TOKEN",
|
||||
}
|
||||
|
||||
t.Run("normal path — SCM tokens explicitly set in EnvVars", func(t *testing.T) {
|
||||
envVars := map[string]string{"CUSTOM": "ok", "ANTHROPIC_API_KEY": "sk-keep"}
|
||||
for _, k := range scmTokens {
|
||||
envVars[k] = "leaked-write-credential-" + k
|
||||
}
|
||||
cfg := WorkspaceConfig{
|
||||
WorkspaceID: "ws-tenant",
|
||||
PlatformURL: "http://localhost:8080",
|
||||
Tier: 2,
|
||||
EnvVars: envVars,
|
||||
}
|
||||
assertNoSCMWriteToken(t, buildContainerEnv(cfg), scmTokens)
|
||||
|
||||
// Sanity: non-SCM custom env is NOT collateral-damaged by the filter.
|
||||
if !envContains(buildContainerEnv(cfg), "CUSTOM=ok") {
|
||||
t.Errorf("filter must not strip non-SCM custom env")
|
||||
}
|
||||
if !envContains(buildContainerEnv(cfg), "ANTHROPIC_API_KEY=sk-keep") {
|
||||
t.Errorf("filter must not strip non-SCM API keys")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("persona-file path — simulates loadPersonaEnvFile merge", func(t *testing.T) {
|
||||
// The latent path: handlers.loadPersonaEnvFile() merges a per-role
|
||||
// persona env file (carrying GITEA_USER, GITEA_TOKEN, …) into the
|
||||
// workspace env map when MOLECULE_PERSONA_ROOT is set on a tenant
|
||||
// host. We can't invoke that cross-package helper here, but its
|
||||
// observable effect is exactly "a GITEA_TOKEN appears in
|
||||
// cfg.EnvVars". Constructing that condition directly proves the
|
||||
// guard holds even if the latent path becomes reachable.
|
||||
cfg := WorkspaceConfig{
|
||||
WorkspaceID: "ws-tenant",
|
||||
PlatformURL: "http://localhost:8080",
|
||||
Tier: 2,
|
||||
EnvVars: map[string]string{
|
||||
// Persona identity fields that are SAFE to keep (read-only
|
||||
// identity, not a write credential):
|
||||
"GITEA_USER": "backend-engineer",
|
||||
"GITEA_USER_EMAIL": "backend-engineer@agents.moleculesai.app",
|
||||
// The credential that must be stripped:
|
||||
"GITEA_TOKEN": "persona-merged-write-pat",
|
||||
"GITEA_TOKEN_SCOPES": "write:repository",
|
||||
},
|
||||
}
|
||||
got := buildContainerEnv(cfg)
|
||||
assertNoSCMWriteToken(t, got, scmTokens)
|
||||
// Non-credential persona identity may still flow through — only the
|
||||
// write token is the denied surface.
|
||||
if !envContains(got, "GITEA_USER=backend-engineer") {
|
||||
t.Errorf("non-credential persona identity (GITEA_USER) should not be stripped")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestCPProvisionerEnv_StripsSCMWriteTokens covers the tenant-EC2 path:
|
||||
// CPProvisioner.Start builds the env map the control plane forwards to the
|
||||
// EC2 workspace container. The same forensic #145 denylist must hold there.
|
||||
func TestCPProvisionerEnv_StripsSCMWriteTokens(t *testing.T) {
|
||||
// isSCMWriteTokenKey is the single source of truth shared by both
|
||||
// buildContainerEnv (local Docker) and CPProvisioner.Start (tenant EC2).
|
||||
// Assert it classifies every known SCM-write var as denied and leaves
|
||||
// ordinary / read-only-identity vars alone.
|
||||
for _, k := range []string{
|
||||
"GITEA_TOKEN", "GITHUB_TOKEN", "GH_TOKEN",
|
||||
"GITLAB_TOKEN", "GL_TOKEN", "BITBUCKET_TOKEN",
|
||||
} {
|
||||
if !isSCMWriteTokenKey(k) {
|
||||
t.Errorf("isSCMWriteTokenKey(%q) = false, want true (SCM-write credential must be denied)", k)
|
||||
}
|
||||
}
|
||||
for _, k := range []string{
|
||||
"GITEA_USER", "GITEA_USER_EMAIL", "ANTHROPIC_API_KEY",
|
||||
"CUSTOM", "PLATFORM_URL", "ADMIN_TOKEN", "",
|
||||
} {
|
||||
if isSCMWriteTokenKey(k) {
|
||||
t.Errorf("isSCMWriteTokenKey(%q) = true, want false (must not over-strip non-SCM env)", k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func assertNoSCMWriteToken(t *testing.T, env []string, scmTokens []string) {
|
||||
t.Helper()
|
||||
for _, e := range env {
|
||||
key := e
|
||||
if i := strings.IndexByte(e, '='); i >= 0 {
|
||||
key = e[:i]
|
||||
}
|
||||
for _, banned := range scmTokens {
|
||||
if key == banned {
|
||||
t.Errorf("SCM-write credential %q leaked into workspace env (forensic #145 invariant violated): %q", banned, e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func envContains(env []string, want string) bool {
|
||||
for _, e := range env {
|
||||
if e == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ---------- buildWorkspaceMount — #65 workspace_access ----------
|
||||
|
||||
func TestBuildWorkspaceMount_SelectionMatrix(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user