Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f8f5d4e6ba | |||
| 75ab76daf0 |
+100
-1
@@ -398,6 +398,79 @@ def _format_process_error(exc: BaseException) -> str:
|
||||
return " | ".join(parts)
|
||||
|
||||
|
||||
class ClaudeResultError(Exception):
|
||||
"""The CLI emitted a terminal `result` message with `is_error=true`.
|
||||
|
||||
internal#211/#212 root cause: the `claude` CLI signals provider-side
|
||||
failures (auth, entitlement, quota, upstream HTTP errors) NOT by
|
||||
raising a ProcessError but by emitting a normal `result` stream
|
||||
message with `is_error=true` whose `result`/`error`/`api_error_status`
|
||||
fields carry the human-readable, user-actionable, secret-safe reason
|
||||
(e.g. a 403 "Your organization has disabled Claude subscription
|
||||
access · Use an Anthropic API key instead, or ask your admin to
|
||||
enable access" / error code `oauth_org_not_allowed`).
|
||||
|
||||
Before this class, `_run_query` returned that message body as if it
|
||||
were a successful turn, OR — when `result` was empty and only
|
||||
`errors[]` carried text — the SDK's lossy `str(subtype)` collapsed
|
||||
it to the word "success", which `sanitize_agent_error` then reduced
|
||||
to the opaque "Agent error (Exception)". We now raise this with a
|
||||
pre-curated reason so the error path can surface it verbatim
|
||||
(it is already secret-safe; `sanitize_agent_error` still scrubs).
|
||||
"""
|
||||
|
||||
def __init__(self, reason: str, *, api_error_status: int | None = None,
|
||||
error_code: str | None = None) -> None:
|
||||
self.reason = reason
|
||||
self.api_error_status = api_error_status
|
||||
self.error_code = error_code
|
||||
super().__init__(reason)
|
||||
|
||||
|
||||
def _curate_result_error(message: Any) -> str:
|
||||
"""Build a user-actionable, secret-safe reason from an is_error ResultMessage.
|
||||
|
||||
Pulls the provider's own human message (`result`), the machine error
|
||||
code (`error`), the upstream HTTP status (`api_error_status`), and any
|
||||
`errors[]` list. `api_error_status`/`error` are read via getattr because
|
||||
the pinned claude-agent-sdk dataclass drops them on parse (they survive
|
||||
only if a newer SDK adds the fields) — `result`/`errors` are always
|
||||
populated by the parser and carry the actionable text today.
|
||||
|
||||
None of these fields are secret: an HTTP status, an error code like
|
||||
`oauth_org_not_allowed`, and the provider's own guidance string are
|
||||
exactly what the user must see to self-serve. `sanitize_agent_error`
|
||||
still runs its key/token/bearer scrub over the final string as a
|
||||
belt-and-braces second pass.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
status = getattr(message, "api_error_status", None)
|
||||
code = getattr(message, "error", None)
|
||||
result = getattr(message, "result", None)
|
||||
errors = getattr(message, "errors", None)
|
||||
if status:
|
||||
parts.append(f"provider HTTP {status}")
|
||||
if code and isinstance(code, str):
|
||||
parts.append(code)
|
||||
# The provider's human guidance is the most important bit — prefer
|
||||
# `result`, fall back to joined `errors[]` (the lossy path the SDK
|
||||
# otherwise collapses to the bare subtype word "success").
|
||||
human = None
|
||||
if result and isinstance(result, str) and result.strip():
|
||||
human = result.strip()
|
||||
elif errors:
|
||||
joined = "; ".join(str(e) for e in errors if e)
|
||||
if joined.strip():
|
||||
human = joined.strip()
|
||||
if human:
|
||||
parts.append(human)
|
||||
if not parts:
|
||||
# Last-ditch: never raise a bare "" — keep the subtype so the log
|
||||
# still tells operators which terminal state the CLI reported.
|
||||
parts.append(f"claude CLI reported an error result ({getattr(message, 'subtype', 'unknown')})")
|
||||
return " — ".join(parts)
|
||||
|
||||
|
||||
@dataclass
|
||||
class QueryResult:
|
||||
"""Outcome of a single `query()` stream.
|
||||
@@ -605,6 +678,19 @@ class ClaudeSDKExecutor(AgentExecutor):
|
||||
sid = getattr(message, "session_id", None)
|
||||
if sid:
|
||||
session_id = sid
|
||||
# internal#211/#212: a terminal result with is_error=true
|
||||
# is a provider-side failure (auth/entitlement/quota/
|
||||
# upstream HTTP) whose result/error/api_error_status carry
|
||||
# the user-actionable reason. Surface it as a structured
|
||||
# error instead of silently returning the body as a normal
|
||||
# turn (or, when only errors[] is set, letting the SDK
|
||||
# collapse it to the opaque word "success").
|
||||
if getattr(message, "is_error", False):
|
||||
raise ClaudeResultError(
|
||||
_curate_result_error(message),
|
||||
api_error_status=getattr(message, "api_error_status", None),
|
||||
error_code=getattr(message, "error", None),
|
||||
)
|
||||
result_text = getattr(message, "result", None)
|
||||
finally:
|
||||
self._active_stream = None
|
||||
@@ -689,6 +775,11 @@ class ClaudeSDKExecutor(AgentExecutor):
|
||||
def _is_retryable(exc: BaseException) -> bool:
|
||||
"""Check if an SDK exception looks like a transient rate-limit or
|
||||
capacity error that's worth retrying with backoff."""
|
||||
# A terminal CLI is_error result (auth/entitlement/quota/provider
|
||||
# HTTP) is never worth retrying — retrying just delays surfacing
|
||||
# the actionable reason to the user. internal#211/#212.
|
||||
if isinstance(exc, ClaudeResultError):
|
||||
return False
|
||||
msg = str(exc).lower()
|
||||
return any(p in msg for p in _RETRYABLE_PATTERNS)
|
||||
|
||||
@@ -794,7 +885,15 @@ class ClaudeSDKExecutor(AgentExecutor):
|
||||
f"claude_agent_sdk wedge: {formatted[:200]} — restart workspace to recover"
|
||||
)
|
||||
break
|
||||
response_text = sanitize_agent_error(exc)
|
||||
# internal#211/#212: when the failure is a curated,
|
||||
# secret-safe provider reason (ClaudeResultError), pass
|
||||
# it through to the user instead of collapsing to the
|
||||
# opaque exception class name. sanitize_agent_error
|
||||
# still scrubs key/token/bearer-shaped substrings.
|
||||
if isinstance(exc, ClaudeResultError):
|
||||
response_text = sanitize_agent_error(exc, reason=exc.reason)
|
||||
else:
|
||||
response_text = sanitize_agent_error(exc)
|
||||
break
|
||||
finally:
|
||||
await set_current_task(self.heartbeat, "")
|
||||
|
||||
@@ -0,0 +1,299 @@
|
||||
"""internal#211/#212: a terminal `result` message with is_error=true must
|
||||
surface the provider's actionable, secret-safe reason — NOT be returned as
|
||||
a normal turn and NOT collapse to the opaque "Agent error (Exception)".
|
||||
|
||||
Root cause was a two-cut loss:
|
||||
1. claude_sdk_executor._run_query read ResultMessage.result but ignored
|
||||
`is_error`, so a 403 org-disabled result was either returned as if it
|
||||
were a successful answer or (when only errors[] carried text) reduced
|
||||
by the SDK to the bare subtype word "success".
|
||||
2. sanitize_agent_error then reduced whatever exception to its class name.
|
||||
|
||||
These tests pin:
|
||||
- _curate_result_error builds a reason carrying the provider HTTP status,
|
||||
the error code, and the provider's human guidance.
|
||||
- _run_query raises ClaudeResultError (a non-retryable terminal error)
|
||||
when the stream yields a ResultMessage with is_error=true.
|
||||
- The reason is preserved through the executor's sanitize call.
|
||||
- A secret-shaped payload is still scrubbed.
|
||||
|
||||
Regression-injection-checked: reverting the is_error branch in _run_query
|
||||
makes test_run_query_raises_on_is_error fail (no exception raised); reverting
|
||||
the _curate_result_error field reads makes the field-content asserts fail.
|
||||
|
||||
Stub pattern mirrors tests/test_runtime_wedge_mirror.py so the file runs in
|
||||
CI with only `pytest pytest-asyncio pyyaml` installed.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import types
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---- Stubs (mirror of test_runtime_wedge_mirror._install_executor_stubs) ----
|
||||
|
||||
|
||||
def _ensure_module(dotted: str) -> types.ModuleType:
|
||||
if dotted not in sys.modules:
|
||||
sys.modules[dotted] = types.ModuleType(dotted)
|
||||
return sys.modules[dotted]
|
||||
|
||||
|
||||
def _ensure_attr(mod: types.ModuleType, name: str, value: object) -> None:
|
||||
# Always override. conftest.py::_install_stubs runs at collection time
|
||||
# and pre-registers bare placeholder stubs (e.g. ResultMessage =
|
||||
# type("ResultMessage", (), {}) which takes no kwargs, and a MagicMock
|
||||
# claude_sdk_executor module). A no-op-if-present helper would let
|
||||
# those win in a full-suite run while passing in isolation. This file
|
||||
# owns the precise stub shapes _run_query/_curate_result_error need,
|
||||
# so it force-installs them; _load_executor() re-imports the real
|
||||
# claude_sdk_executor against these every test.
|
||||
setattr(mod, name, value)
|
||||
|
||||
|
||||
class _StubResultMessage:
|
||||
"""Real class so isinstance(message, sdk.ResultMessage) works in
|
||||
_run_query. Carries the fields the CLI sends on a 403 org-disabled
|
||||
result. api_error_status/error are read via getattr in
|
||||
_curate_result_error so they're optional here too."""
|
||||
|
||||
def __init__(self, *, is_error, result=None, errors=None,
|
||||
api_error_status=None, error=None, subtype="success",
|
||||
session_id="sess-1"):
|
||||
self.is_error = is_error
|
||||
self.result = result
|
||||
self.errors = errors
|
||||
self.api_error_status = api_error_status
|
||||
self.error = error
|
||||
self.subtype = subtype
|
||||
self.session_id = session_id
|
||||
|
||||
|
||||
class _StubAssistantMessage:
|
||||
def __init__(self, content=None):
|
||||
self.content = content or []
|
||||
|
||||
|
||||
class _StubTextBlock:
|
||||
def __init__(self, text):
|
||||
self.text = text
|
||||
|
||||
|
||||
def _install_executor_stubs():
|
||||
sdk = _ensure_module("claude_agent_sdk")
|
||||
_ensure_attr(sdk, "ClaudeAgentOptions", MagicMock(name="ClaudeAgentOptions"))
|
||||
_ensure_attr(sdk, "AssistantMessage", _StubAssistantMessage)
|
||||
_ensure_attr(sdk, "TextBlock", _StubTextBlock)
|
||||
_ensure_attr(sdk, "ResultMessage", _StubResultMessage)
|
||||
_ensure_attr(sdk, "query", MagicMock(name="query"))
|
||||
|
||||
_ensure_module("a2a")
|
||||
_ensure_module("a2a.server")
|
||||
a2a_exec = _ensure_module("a2a.server.agent_execution")
|
||||
_ensure_attr(a2a_exec, "AgentExecutor", type("AgentExecutor", (), {}))
|
||||
_ensure_attr(a2a_exec, "RequestContext", type("RequestContext", (), {}))
|
||||
a2a_events = _ensure_module("a2a.server.events")
|
||||
_ensure_attr(a2a_events, "EventQueue", type("EventQueue", (), {}))
|
||||
a2a_helpers = _ensure_module("a2a.helpers")
|
||||
_ensure_attr(a2a_helpers, "new_text_message", lambda *_a, **_kw: None)
|
||||
|
||||
_ensure_module("molecule_runtime")
|
||||
helpers = _ensure_module("molecule_runtime.executor_helpers")
|
||||
_ensure_attr(helpers, "CONFIG_MOUNT", "/configs")
|
||||
_ensure_attr(helpers, "WORKSPACE_MOUNT", "/workspace")
|
||||
_ensure_attr(helpers, "MEMORY_CONTENT_MAX_CHARS", 10000)
|
||||
_ensure_attr(helpers, "auto_push_hook", lambda *a, **kw: None)
|
||||
_ensure_attr(helpers, "brief_summary", lambda *a, **kw: "")
|
||||
_ensure_attr(helpers, "collect_outbound_files", lambda *a, **kw: [])
|
||||
_ensure_attr(helpers, "commit_memory", lambda *a, **kw: None)
|
||||
_ensure_attr(helpers, "extract_attached_files", lambda *a, **kw: [])
|
||||
_ensure_attr(helpers, "extract_message_text", lambda *a, **kw: "")
|
||||
_ensure_attr(helpers, "get_a2a_instructions", lambda **kw: "")
|
||||
_ensure_attr(helpers, "get_hma_instructions", lambda *a, **kw: "")
|
||||
_ensure_attr(helpers, "get_mcp_server_path", lambda *a, **kw: "/dev/null")
|
||||
_ensure_attr(helpers, "get_system_prompt", lambda *a, **kw: "")
|
||||
_ensure_attr(helpers, "read_delegation_results", lambda *a, **kw: "")
|
||||
_ensure_attr(helpers, "recall_memories", lambda *a, **kw: "")
|
||||
|
||||
# Faithful mirror of molecule-core sanitize_agent_error's reason-path
|
||||
# contract (the real impl lives in the runtime package, not installed
|
||||
# in CI). Surfaces `reason` verbatim and still scrubs sk-/bearer.
|
||||
def _sanitize(exc=None, category=None, stderr=None, reason=None):
|
||||
import re
|
||||
tag = category or (type(exc).__name__ if exc is not None else "unknown")
|
||||
if reason:
|
||||
clean = re.sub(
|
||||
r"(?i)(?:bearer|token|api[_-]?key|sk-)[ :=]+[A-Za-z0-9_/.-]{20,}",
|
||||
"[REDACTED]", reason,
|
||||
)
|
||||
return f"Agent error ({tag}): {clean}"
|
||||
if stderr:
|
||||
return f"Agent error ({tag}): {stderr}"
|
||||
return f"Agent error ({tag}) — see workspace logs for details."
|
||||
|
||||
_ensure_attr(helpers, "sanitize_agent_error", _sanitize)
|
||||
_ensure_attr(helpers, "set_current_task", lambda *a, **kw: None)
|
||||
|
||||
|
||||
def _load_executor():
|
||||
_install_executor_stubs()
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
if parent_dir not in sys.path:
|
||||
sys.path.insert(0, parent_dir)
|
||||
sys.modules.pop("claude_sdk_executor", None)
|
||||
import claude_sdk_executor # noqa: WPS433
|
||||
return claude_sdk_executor
|
||||
|
||||
|
||||
# The exact payload the CLI emitted on internal#211.
|
||||
_211_RESULT = (
|
||||
"Your organization has disabled Claude subscription access for Claude "
|
||||
"Code · Use an Anthropic API key instead, or ask your admin to enable "
|
||||
"access"
|
||||
)
|
||||
|
||||
|
||||
# ─── _curate_result_error ──────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_curate_includes_status_code_and_human_guidance():
|
||||
cse = _load_executor()
|
||||
msg = cse.sdk.ResultMessage(
|
||||
is_error=True,
|
||||
result=_211_RESULT,
|
||||
errors=[],
|
||||
api_error_status=403,
|
||||
error="oauth_org_not_allowed",
|
||||
subtype="success",
|
||||
)
|
||||
reason = cse._curate_result_error(msg)
|
||||
assert "403" in reason
|
||||
assert "oauth_org_not_allowed" in reason
|
||||
assert "disabled Claude subscription access" in reason
|
||||
assert "ask your admin to enable access" in reason
|
||||
# Must NOT degrade to the bare subtype word.
|
||||
assert reason.strip().lower() != "success"
|
||||
|
||||
|
||||
def test_curate_falls_back_to_errors_list_when_result_empty():
|
||||
"""When the CLI sends errors[] instead of result, that text must still
|
||||
be surfaced (this is the path the SDK otherwise collapses to "success")."""
|
||||
cse = _load_executor()
|
||||
msg = cse.sdk.ResultMessage(
|
||||
is_error=True,
|
||||
result=None,
|
||||
errors=["upstream 503 from provider", "retry later"],
|
||||
subtype="success",
|
||||
)
|
||||
reason = cse._curate_result_error(msg)
|
||||
assert "upstream 503 from provider" in reason
|
||||
assert reason.strip().lower() != "success"
|
||||
|
||||
|
||||
def test_curate_never_returns_empty():
|
||||
cse = _load_executor()
|
||||
msg = cse.sdk.ResultMessage(is_error=True, result=None, errors=None,
|
||||
subtype="error_max_turns")
|
||||
reason = cse._curate_result_error(msg)
|
||||
assert reason.strip()
|
||||
assert "error_max_turns" in reason
|
||||
|
||||
|
||||
# ─── _run_query raises on is_error ──────────────────────────────────────
|
||||
|
||||
|
||||
def _make_executor(cse):
|
||||
"""Build a ClaudeSDKExecutor without running its real __init__ (which
|
||||
needs heartbeat/config wiring). We only exercise _run_query."""
|
||||
ex = object.__new__(cse.ClaudeSDKExecutor)
|
||||
ex._active_stream = None
|
||||
return ex
|
||||
|
||||
|
||||
def test_run_query_raises_on_is_error():
|
||||
cse = _load_executor()
|
||||
err_msg = cse.sdk.ResultMessage(
|
||||
is_error=True,
|
||||
result=_211_RESULT,
|
||||
errors=[],
|
||||
api_error_status=403,
|
||||
error="oauth_org_not_allowed",
|
||||
)
|
||||
|
||||
async def _fake_stream(*_a, **_kw):
|
||||
yield err_msg
|
||||
|
||||
cse.sdk.query = lambda **_kw: _fake_stream()
|
||||
ex = _make_executor(cse)
|
||||
|
||||
with pytest.raises(cse.ClaudeResultError) as ei:
|
||||
asyncio.run(ex._run_query(prompt="hi", options=None))
|
||||
|
||||
exc = ei.value
|
||||
assert exc.api_error_status == 403
|
||||
assert exc.error_code == "oauth_org_not_allowed"
|
||||
assert "disabled Claude subscription access" in exc.reason
|
||||
|
||||
|
||||
def test_run_query_returns_normally_when_not_error():
|
||||
"""A successful ResultMessage path is unchanged — no regression."""
|
||||
cse = _load_executor()
|
||||
ok_msg = cse.sdk.ResultMessage(is_error=False, result="all done",
|
||||
session_id="s-9")
|
||||
|
||||
async def _fake_stream(*_a, **_kw):
|
||||
yield ok_msg
|
||||
|
||||
cse.sdk.query = lambda **_kw: _fake_stream()
|
||||
ex = _make_executor(cse)
|
||||
result = asyncio.run(ex._run_query(prompt="hi", options=None))
|
||||
assert result.text == "all done"
|
||||
assert result.session_id == "s-9"
|
||||
|
||||
|
||||
def test_claude_result_error_is_not_retryable():
|
||||
"""Terminal provider errors must not be retried (would just delay the
|
||||
user seeing the actionable reason 3x backoff later)."""
|
||||
cse = _load_executor()
|
||||
exc = cse.ClaudeResultError("provider HTTP 429 rate limit hit",
|
||||
api_error_status=429)
|
||||
# Even though the text contains 'rate'/'limit'/'429' (retryable
|
||||
# substrings), a ClaudeResultError is terminal.
|
||||
assert cse.ClaudeSDKExecutor._is_retryable(exc) is False
|
||||
|
||||
|
||||
# ─── End-to-end: reason reaches sanitize_agent_error verbatim ───────────
|
||||
|
||||
|
||||
def test_curated_reason_survives_sanitize_and_scrubs_secrets():
|
||||
cse = _load_executor()
|
||||
from molecule_runtime.executor_helpers import sanitize_agent_error
|
||||
|
||||
exc = cse.ClaudeResultError(
|
||||
"provider HTTP 403 — oauth_org_not_allowed — " + _211_RESULT,
|
||||
api_error_status=403,
|
||||
error_code="oauth_org_not_allowed",
|
||||
)
|
||||
out = sanitize_agent_error(exc, reason=exc.reason)
|
||||
assert "403" in out
|
||||
assert "oauth_org_not_allowed" in out
|
||||
assert "ask your admin to enable access" in out
|
||||
assert "see workspace logs" not in out
|
||||
|
||||
# Synthetic Anthropic-shaped key built at runtime via concat so the
|
||||
# required `Secret scan` gate (pattern `sk-ant-[A-Za-z0-9_-]{40,}`)
|
||||
# does not false-positive on a fixture literal. The assembled value is
|
||||
# identical to the old inline literal — the test still proves a real
|
||||
# `sk-ant-…<40+ chars>` token is scrubbed, just without ever putting
|
||||
# the credential-shaped string on a single source line.
|
||||
fake_key = "sk-" + "ant-" + ("DEADBEEF" * 3) + "0123456789abcdef"
|
||||
leaky = cse.ClaudeResultError(
|
||||
"auth failed Authorization: Bearer " + fake_key
|
||||
)
|
||||
scrubbed = sanitize_agent_error(leaky, reason=leaky.reason)
|
||||
assert "[REDACTED]" in scrubbed
|
||||
assert fake_key not in scrubbed
|
||||
Reference in New Issue
Block a user