diff --git a/workspace/executor_helpers.py b/workspace/executor_helpers.py index daf93c60..95e5aa81 100644 --- a/workspace/executor_helpers.py +++ b/workspace/executor_helpers.py @@ -599,6 +599,28 @@ def _sanitize_for_external(msg: str) -> str: import re as _re msg = _re.sub(r"(?i)(?:bearer|token|api[_-]?key|sk-)[ :=]+[A-Za-z0-9_/.-]{20,}", "[REDACTED]", msg) + # Bare provider key with NO separator after the prefix — a real + # `sk-ant-api03-…` / `sk-…` key uses `-` (not `[ :=]`) so the rule + # above misses it. Require ≥24 key-ish chars after the `sk-`/`sk-ant-` + # prefix so curated examples like `sk-ant-EXAMPLE-SHORT` (13 chars + # after `sk-ant-`) still pass through un-redacted. + msg = _re.sub(r"(?i)\bsk-(?:ant-)?[A-Za-z0-9_-]{24,}", "[REDACTED]", msg) + # JSON-quoted credential values: {"token": "…"} / {"apiKey": "…"} / + # {"secret": "…"} / {"password": "…"}. Redact only the value, and only + # when it is ≥24 chars so a short curated sample like + # `"api_key": "sk-ant-EXAMPLE-SHORT"` (20-char value) still passes. + msg = _re.sub( + r'(?i)("(?:token|api[_-]?key|secret|password)"\s*:\s*")[^"]{24,}(")', + r"\1[REDACTED]\2", + msg, + ) + # AWS secret access key in `aws_secret_access_key=…` form (env dumps, + # boto tracebacks). The base64-ish value runs until whitespace/quote. + msg = _re.sub( + r"(?i)(aws_secret_access_key\s*[:=]\s*)\S+", + r"\1[REDACTED]", + msg, + ) # Absolute paths: /etc/shadow, /home/user/.aws/credentials, etc. msg = _re.sub(r"(?:/[^/\s]+){2,}", lambda m: m.group(0) if len(m.group(0)) < 60 else "[REDACTED_PATH]", msg) return msg diff --git a/workspace/tests/test_executor_helpers.py b/workspace/tests/test_executor_helpers.py index 70ee5011..8ae3c967 100644 --- a/workspace/tests/test_executor_helpers.py +++ b/workspace/tests/test_executor_helpers.py @@ -829,6 +829,62 @@ def test_sanitize_agent_error_reason_still_scrubs_secrets(): assert "please re-auth" in out +def test_sanitize_agent_error_reason_scrubs_all_secret_formats(): + """The scrubber must redact every realistic credential shape — not just + the `Bearer ` form the original test happened to exercise + (internal#212 review finding: bare `sk-ant-api03-…` keys, JSON-quoted + "token"/"apiKey" values, and `aws_secret_access_key=` all leaked). + All curated/actionable guidance must still survive the scrub. + """ + # 1. Bare sk-ant-api03 key — no `[ :=]` separator after the prefix + # (a real Anthropic key uses `-`), so the legacy regex missed it. + bare = ( + "provider HTTP 401 — auth failed — invalid key " + "sk-ant-api03-AbCdEf0123456789AbCdEf0123456789AbCdEf0123456789xyz " + "please re-auth" + ) + out = sanitize_agent_error(reason=bare) + assert "sk-ant-api03-AbCdEf0123456789AbCdEf0123456789AbCdEf0123456789xyz" not in out + assert "[REDACTED]" in out + assert "401" in out # actionable status survives + assert "please re-auth" in out # actionable guidance survives + + # 2. JSON-quoted "token" / "apiKey" values. + jblob = ( + 'provider error — config dump {"token": ' + '"abcDEF0123456789ghIJKL0123456789mnopQRST", "apiKey": ' + '"sk-ant-api03-ZZZZ1111ZZZZ2222ZZZZ3333ZZZZ4444ZZZZ"} — ' + "use an API key instead" + ) + out = sanitize_agent_error(reason=jblob) + assert "abcDEF0123456789ghIJKL0123456789mnopQRST" not in out + assert "sk-ant-api03-ZZZZ1111ZZZZ2222ZZZZ3333ZZZZ4444ZZZZ" not in out + assert "[REDACTED]" in out + assert "use an API key instead" in out # actionable guidance survives + + # 3. aws_secret_access_key=… form. + awsblob = ( + "provider HTTP 403 — boto credential error " + "aws_secret_access_key=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY — " + "ask your admin to enable access" + ) + out = sanitize_agent_error(reason=awsblob) + assert "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" not in out + assert "[REDACTED]" in out + assert "403" in out # actionable status survives + assert "ask your admin to enable access" in out # guidance survives + + # 4. Regression: the original Bearer form still redacts. + bearer = ( + "provider HTTP 401 — Authorization: Bearer " + "sk-ant-DEADBEEFDEADBEEFDEADBEEF0123456789abcdef re-auth" + ) + out = sanitize_agent_error(reason=bearer) + assert "sk-ant-DEADBEEFDEADBEEFDEADBEEF0123456789abcdef" not in out + assert "[REDACTED]" in out + assert "re-auth" in out + + def test_sanitize_agent_error_reason_wins_over_stderr(): """When both reason and stderr are passed, the curated reason wins.""" out = sanitize_agent_error(