Compare commits

..

2 Commits

Author SHA1 Message Date
core-devops 24df054dfb ci: rerun after mc#724 all-required fix lands
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 13s
sop-tier-check / tier-check (pull_request) Successful in 16s
audit-force-merge / audit (pull_request) Successful in 23s
2026-05-12 20:51:02 +00:00
fullstack-engineer 21f55579fa fix(tests/e2e): surface diagnose step Detail in EIC smoke output (mc#687)
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 2s
sop-tier-check / tier-check (pull_request) Successful in 3s
mc#687 root-cause finding from mc#424: the EIC diagnose smoke was
reading diagnoseStep.error (Go error string) and discarding
diagnoseStep.detail (subprocess stderr). The actionable signal — e.g.

  AccessDeniedException: ... is not authorized to perform:
  ec2-instance-connect:OpenTunnel

— lives in detail. Reading only .error produced:

  exec: process exited with status 1

which was uninformative and caused a 21h outage investigation.

Fix: extract .detail (subprocess stderr) as primary output; append
Go error string in parentheses when both fields are populated.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-12 17:11:35 +00:00
2 changed files with 19 additions and 196 deletions
+19 -1
View File
@@ -492,6 +492,12 @@ done
# probes docker.Ping + container exec; we still expect ok=true there
# since local-docker is the alternative production path.
log "7b/11 Canvas-terminal EIC diagnose probe..."
# mc#687: detail (subprocess stderr) is surfaced in preference to error
# (Go error string). The subprocess stderr contains the actionable signal —
# e.g. "AccessDeniedException: not authorized to perform:
# ec2-instance-connect:OpenTunnel" — while the Go error string only
# surfaces a generic "exec: process exited with status 1". Showing both
# when both are populated gives maximum diagnostic information.
for wid in $WS_TO_CHECK; do
DIAG_JSON=$(tenant_call GET "/workspaces/$wid/terminal/diagnose" 2>/dev/null || echo '{}')
DIAG_OK=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print('true' if d.get('ok') else 'false')" 2>/dev/null || echo "false")
@@ -499,7 +505,19 @@ for wid in $WS_TO_CHECK; do
ok " $wid terminal-reachable (canvas terminal will work)"
else
DIAG_FAIL=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('first_failure','unknown'))" 2>/dev/null || echo "unknown")
DIAG_DETAIL=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); s=[x for x in d.get('steps',[]) if not x.get('ok')]; print(s[0].get('error','') if s else '')" 2>/dev/null || echo "")
DIAG_DETAIL=$(echo "$DIAG_JSON" | python3 -c "
import json,sys
d=json.load(sys.stdin)
steps=[x for x in d.get('steps',[]) if not x.get('ok')]
if not steps: sys.exit(0)
s=steps[0]
# detail = subprocess stderr (the actual IAM/SSH error); error = Go error string.
detail=s.get('detail','')
error=s.get('error','')
if detail and error: print(detail+' ('+error+')')
elif detail: print(detail)
elif error: print(error)
" 2>/dev/null || echo "")
fail "Workspace $wid terminal diagnose failed at step '$DIAG_FAIL': $DIAG_DETAIL — check tenant SG has tcp/22 from EIC endpoint SG (sg-0785d5c6138220523), EIC_ENDPOINT_SG_ID set in Railway, and EIC endpoint health"
fi
done
@@ -1,195 +0,0 @@
package handlers
import (
"context"
"database/sql"
"net/http"
"net/http/httptest"
"testing"
"github.com/DATA-DOG/go-sqlmock"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/ws"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
"github.com/alicebob/miniredis/v2"
"github.com/gin-gonic/gin"
"github.com/redis/go-redis/v9"
)
// ─── Setup helpers ─────────────────────────────────────────────────────────────
func init() {
gin.SetMode(gin.TestMode)
}
// socketTestDB wraps sqlmock setup with the redis setup needed for wsauth.
func socketTestDB(t *testing.T) (sqlmock.Sqlmock, func()) {
t.Helper()
mockDB, mock, err := sqlmock.New()
if err != nil {
t.Fatalf("failed to create sqlmock: %v", err)
}
// Start a miniredis for the wsauth token subsystem.
mr, err := miniredis.Run()
if err != nil {
mockDB.Close()
t.Fatalf("failed to start miniredis: %v", err)
}
db.DB = mockDB
db.RDB = redis.NewClient(&redis.Options{Addr: mr.Addr()})
wsauth.ResetInboundSecretCacheForTesting()
cleanup := func() {
mockDB.Close()
mr.Close()
wsauth.ResetInboundSecretCacheForTesting()
}
return mock, cleanup
}
// ─── Test cases ────────────────────────────────────────────────────────────────
// Phase 30.1/30.2 bearer-token auth gate on WebSocket upgrade.
// SocketHandler.HandleConnect enforces:
// - Canvas clients (no X-Workspace-ID header) → bypass auth, upgrade proceeds
// - Workspace agents (X-Workspace-ID present) → HasAnyLiveToken probe → bearer validation
func TestSocketHandler_HandleConnect_CanvasClient_NoAuthRequired(t *testing.T) {
mock, cleanup := socketTestDB(t)
defer cleanup()
// Create hub and drain the Register channel via Run.
hub := ws.NewHub(func(_, _ string) bool { return true })
go hub.Run()
h := NewSocketHandler(hub)
c, w := gin.CreateTestContext(httptest.NewRecorder())
c.Request = httptest.NewRequest("GET", "/ws", nil)
// No X-Workspace-ID → canvas client path.
h.HandleConnect(c)
// Canvas path has no DB expectations — HasAnyLiveToken not called.
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
_ = w.Code // upgrade fails in test env (httptest doesn't do WS) — handler returns.
}
// TestSocketHandler_HandleConnect_AgentNoLiveToken_BypassesBearerCheck verifies
// that agents with no live tokens (legacy pre-token workspaces) are grandfathered
// through without being asked for a bearer token.
func TestSocketHandler_HandleConnect_AgentNoLiveToken_BypassesBearerCheck(t *testing.T) {
mock, cleanup := socketTestDB(t)
defer cleanup()
// HasAnyLiveToken → no rows (no live tokens → n=0).
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens WHERE workspace_id = \$1 AND revoked_at IS NULL`).
WithArgs("ws-agent").
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
hub := ws.NewHub(func(_, _ string) bool { return true })
go hub.Run()
h := NewSocketHandler(hub)
c, _ := gin.CreateTestContext(httptest.NewRecorder())
c.Request = httptest.NewRequest("GET", "/ws", nil)
c.Request.Header.Set("X-Workspace-ID", "ws-agent")
h.HandleConnect(c)
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestSocketHandler_HandleConnect_DBErrorOnHasAnyLiveToken returns 500.
func TestSocketHandler_HandleConnect_DBErrorOnHasAnyLiveToken(t *testing.T) {
mock, cleanup := socketTestDB(t)
defer cleanup()
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens WHERE workspace_id = \$1 AND revoked_at IS NULL`).
WithArgs("ws-agent").
WillReturnError(sql.ErrConnDone)
hub := ws.NewHub(func(_, _ string) bool { return true })
go hub.Run()
h := NewSocketHandler(hub)
c, w := gin.CreateTestContext(httptest.NewRecorder())
c.Request = httptest.NewRequest("GET", "/ws", nil)
c.Request.Header.Set("X-Workspace-ID", "ws-agent")
h.HandleConnect(c)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected 500 on DB error, got %d", w.Code)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestSocketHandler_HandleConnect_MissingBearerToken returns 401.
func TestSocketHandler_HandleConnect_MissingBearerToken(t *testing.T) {
mock, cleanup := socketTestDB(t)
defer cleanup()
// hasLive=true but no Authorization header.
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens WHERE workspace_id = \$1 AND revoked_at IS NULL`).
WithArgs("ws-agent").
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
hub := ws.NewHub(func(_, _ string) bool { return true })
go hub.Run()
h := NewSocketHandler(hub)
c, w := gin.CreateTestContext(httptest.NewRecorder())
c.Request = httptest.NewRequest("GET", "/ws", nil)
c.Request.Header.Set("X-Workspace-ID", "ws-agent")
// No Authorization header.
h.HandleConnect(c)
if w.Code != http.StatusUnauthorized {
t.Errorf("expected 401 on missing bearer token, got %d", w.Code)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestSocketHandler_HandleConnect_InvalidBearerToken returns 401.
func TestSocketHandler_HandleConnect_InvalidBearerToken(t *testing.T) {
mock, cleanup := socketTestDB(t)
defer cleanup()
// hasLive=true.
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens WHERE workspace_id = \$1 AND revoked_at IS NULL`).
WithArgs("ws-agent").
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
// ValidateToken → lookupTokenByHash: no matching hash.
mock.ExpectQuery(`SELECT t\.id, t\.workspace_id FROM workspace_auth_tokens t JOIN workspaces w`).
WithArgs(sqlmock.AnyArg()).
WillReturnError(context.DeadlineExceeded)
hub := ws.NewHub(func(_, _ string) bool { return true })
go hub.Run()
h := NewSocketHandler(hub)
c, w := gin.CreateTestContext(httptest.NewRecorder())
c.Request = httptest.NewRequest("GET", "/ws", nil)
c.Request.Header.Set("X-Workspace-ID", "ws-agent")
c.Request.Header.Set("Authorization", "Bearer invalid-token-xyz")
h.HandleConnect(c)
if w.Code != http.StatusUnauthorized {
t.Errorf("expected 401 on invalid bearer token, got %d", w.Code)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}