forked from molecule-ai/molecule-core
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 9cb5b0a182 | |||
| fabf45216d | |||
| a50cda1a85 | |||
| a526dabf04 | |||
| 4534e922c8 | |||
| 427d5b04ed | |||
| a93c4ce177 | |||
| b3041c13d3 | |||
| e1214ca0b4 | |||
| bfefcb315b |
@@ -0,0 +1,170 @@
|
||||
# sop-tier-check — canonical Gitea Actions workflow for §SOP-6 enforcement.
|
||||
#
|
||||
# Copy this file to `.gitea/workflows/sop-tier-check.yml` in any repo that
|
||||
# wants the §SOP-6 PR gate enforced. Pair with branch protection on the
|
||||
# protected branch:
|
||||
# required_status_checks: ["sop-tier-check"]
|
||||
# required_approving_reviews: 1
|
||||
# approving_review_teams: ["ceo", "managers", "engineers"]
|
||||
#
|
||||
# What it does:
|
||||
# 1. Reads the PR's `tier:*` label (low | medium | high). Fails if absent
|
||||
# or ambiguous.
|
||||
# 2. Reads every approving review on the PR.
|
||||
# 3. For each approver, queries Gitea team membership.
|
||||
# 4. Marks the check success only if at least one approver is in a team
|
||||
# whose tier-tag covers the PR's tier label, AND the approver is not
|
||||
# the author.
|
||||
#
|
||||
# Tier → eligible-team mapping (mirror of dev-sop §SOP-6):
|
||||
# tier:low → engineers, managers, ceo
|
||||
# tier:medium → managers, ceo
|
||||
# tier:high → ceo
|
||||
#
|
||||
# Author identity is excluded automatically; Gitea's review system already
|
||||
# rejects self-reviews, but this workflow re-checks defensively in case the
|
||||
# native rule is bypassed (admin override, branch-protection edit, etc.).
|
||||
#
|
||||
# Force-merge: Owners-team override remains available out-of-band via the
|
||||
# Gitea merge API; force-merge writes `incident.force_merge` to
|
||||
# structure_events per §Persistent structured logging gate (Phase 3).
|
||||
|
||||
name: sop-tier-check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, edited, synchronize, reopened, labeled, unlabeled]
|
||||
pull_request_review:
|
||||
types: [submitted, dismissed, edited]
|
||||
|
||||
jobs:
|
||||
tier-check:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
steps:
|
||||
- name: Verify tier label + reviewer team membership
|
||||
env:
|
||||
# SOP_TIER_CHECK_TOKEN is the read-only `sop-tier-bot` PAT,
|
||||
# provisioned with read:org scope and added to ceo/managers/
|
||||
# engineers teams (a Gitea team-membership probe requires the
|
||||
# caller to be a member of the team being probed). The auto-
|
||||
# injected GITHUB_TOKEN's scope is repo-level only and cannot
|
||||
# query org team membership, hence the dedicated secret.
|
||||
# Falls back to GITHUB_TOKEN so the workflow at least starts and
|
||||
# surfaces a clear error when the secret is missing.
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ -z "${GITEA_TOKEN:-}" ]; then
|
||||
echo "::error::Neither GITEA_TOKEN nor GITHUB_TOKEN is available. Add a GITEA_TOKEN secret with org-membership read scope to enable team-based approval gating."
|
||||
exit 1
|
||||
fi
|
||||
OWNER="${REPO%%/*}"
|
||||
NAME="${REPO##*/}"
|
||||
API="https://${GITEA_HOST}/api/v1"
|
||||
AUTH="Authorization: token ${GITEA_TOKEN}"
|
||||
echo "::notice::tier-check start: repo=$OWNER/$NAME pr=$PR_NUMBER author=$PR_AUTHOR"
|
||||
# Sanity-check the token resolves a user; surfaces token-scope problems
|
||||
# early instead of failing on a downstream call with no context.
|
||||
WHOAMI=$(curl -sS -H "$AUTH" "${API}/user" | jq -r '.login // ""')
|
||||
if [ -z "$WHOAMI" ]; then
|
||||
echo "::error::GITEA_TOKEN cannot resolve a user via /api/v1/user — check the token scope and that the secret is wired correctly."
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::token resolves to user: $WHOAMI"
|
||||
|
||||
# 1. Read tier label
|
||||
LABELS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/issues/${PR_NUMBER}/labels" | jq -r '.[].name')
|
||||
TIER=""
|
||||
for L in $LABELS; do
|
||||
case "$L" in
|
||||
tier:low|tier:medium|tier:high)
|
||||
if [ -n "$TIER" ]; then
|
||||
echo "::error::Multiple tier labels: $TIER + $L. Apply exactly one."
|
||||
exit 1
|
||||
fi
|
||||
TIER="$L"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
if [ -z "$TIER" ]; then
|
||||
echo "::error::PR has no tier:low|tier:medium|tier:high label. Apply one before merge."
|
||||
exit 1
|
||||
fi
|
||||
echo "tier=$TIER"
|
||||
|
||||
# 2. Tier → eligible teams
|
||||
case "$TIER" in
|
||||
tier:low) ELIGIBLE="engineers managers ceo" ;;
|
||||
tier:medium) ELIGIBLE="managers ceo" ;;
|
||||
tier:high) ELIGIBLE="ceo" ;;
|
||||
esac
|
||||
echo "eligible_teams=$ELIGIBLE"
|
||||
|
||||
# Resolve team-name → team-id once. The /orgs/{org}/teams/{slug}/...
|
||||
# endpoints don't exist on Gitea 1.22; we have to use /teams/{id}.
|
||||
# Fail loud on missing team rather than treating it as "user not in
|
||||
# team" — that'd mask a misconfigured deployment.
|
||||
ORG_TEAMS_FILE=$(mktemp)
|
||||
HTTP_CODE=$(curl -sS -o "$ORG_TEAMS_FILE" -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/orgs/${OWNER}/teams")
|
||||
echo "teams-list HTTP=$HTTP_CODE size=$(wc -c <"$ORG_TEAMS_FILE")"
|
||||
echo "teams-list body (first 300 chars):"
|
||||
head -c 300 "$ORG_TEAMS_FILE"; echo
|
||||
if [ "$HTTP_CODE" != "200" ]; then
|
||||
echo "::error::GET /orgs/${OWNER}/teams returned HTTP $HTTP_CODE — token likely lacks read:org scope. Add a SOP_TIER_CHECK_TOKEN secret with read:organization scope."
|
||||
exit 1
|
||||
fi
|
||||
declare -A TEAM_ID
|
||||
for T in $ELIGIBLE; do
|
||||
ID=$(jq -r --arg t "$T" '.[] | select(.name==$t) | .id' <"$ORG_TEAMS_FILE" | head -1)
|
||||
if [ -z "$ID" ] || [ "$ID" = "null" ]; then
|
||||
VISIBLE=$(jq -r '.[]?.name? // empty' <"$ORG_TEAMS_FILE" 2>/dev/null | tr '\n' ' ')
|
||||
echo "::error::Team \"$T\" not found in org $OWNER. Teams visible: $VISIBLE"
|
||||
exit 1
|
||||
fi
|
||||
TEAM_ID[$T]="$ID"
|
||||
echo "team-id: $T → $ID"
|
||||
done
|
||||
|
||||
# 3. Read approving reviewers
|
||||
REVIEWS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/reviews")
|
||||
APPROVERS=$(echo "$REVIEWS" | jq -r '[.[] | select(.state=="APPROVED") | .user.login] | unique | .[]')
|
||||
if [ -z "$APPROVERS" ]; then
|
||||
echo "::error::No approving reviews. Tier $TIER requires approval from {$ELIGIBLE} (non-author)."
|
||||
exit 1
|
||||
fi
|
||||
echo "approvers: $(echo $APPROVERS | tr '\n' ' ')"
|
||||
|
||||
# 4. For each approver: check non-author + team membership (by id)
|
||||
OK=""
|
||||
for U in $APPROVERS; do
|
||||
if [ "$U" = "$PR_AUTHOR" ]; then
|
||||
echo "skip self-review by $U"
|
||||
continue
|
||||
fi
|
||||
for T in $ELIGIBLE; do
|
||||
ID="${TEAM_ID[$T]}"
|
||||
CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/teams/${ID}/members/${U}")
|
||||
echo " probe: $U in team $T (id=$ID) → HTTP $CODE"
|
||||
if [ "$CODE" = "200" ] || [ "$CODE" = "204" ]; then
|
||||
echo "::notice::approver $U is in team $T (eligible for $TIER)"
|
||||
OK="yes"
|
||||
break
|
||||
fi
|
||||
done
|
||||
[ -n "$OK" ] && break
|
||||
done
|
||||
|
||||
if [ -z "$OK" ]; then
|
||||
echo "::error::Tier $TIER requires approval from a non-author member of {$ELIGIBLE}. Got approvers: $APPROVERS — none of them satisfied team membership (probe HTTP codes above)."
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::sop-tier-check passed: $TIER, approver in {$ELIGIBLE}"
|
||||
@@ -26,6 +26,14 @@ func TestExtended_WorkspaceDelete(t *testing.T) {
|
||||
WithArgs(wsDelID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}))
|
||||
|
||||
// CascadeDelete walks descendants unconditionally (the 0-children
|
||||
// optimization in the old inline path was dropped during the
|
||||
// CascadeDelete extraction — descendant CTE returns 0 rows here,
|
||||
// same end state, one extra cheap query).
|
||||
mock.ExpectQuery("WITH RECURSIVE descendants").
|
||||
WithArgs(wsDelID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}))
|
||||
|
||||
// #73: batch UPDATE happens BEFORE any container teardown.
|
||||
// Uses ANY($1::uuid[]) even with a single ID for consistency.
|
||||
mock.ExpectExec("UPDATE workspaces SET status =").
|
||||
|
||||
@@ -589,12 +589,6 @@ func (h *OrgHandler) Import(c *gin.Context) {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
|
||||
return
|
||||
}
|
||||
importStart := time.Now()
|
||||
emitOrgEvent(c.Request.Context(), "org.import.started", map[string]any{
|
||||
"name": body.Template.Name,
|
||||
"dir": body.Dir,
|
||||
"mode": body.Mode,
|
||||
})
|
||||
|
||||
var tmpl OrgTemplate
|
||||
var orgBaseDir string // base directory for files_dir resolution
|
||||
@@ -635,6 +629,19 @@ func (h *OrgHandler) Import(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// Emit started AFTER the YAML is loaded so payload.name carries the
|
||||
// resolved template name (was: empty when caller passed `dir` instead
|
||||
// of inline `template`). Pre-parse error paths above return without
|
||||
// emitting — semantically "we couldn't even start an import" — so
|
||||
// every started event is guaranteed a paired completed/failed below
|
||||
// (no orphan started rows in structure_events).
|
||||
importStart := time.Now()
|
||||
emitOrgEvent(c.Request.Context(), "org.import.started", map[string]any{
|
||||
"name": tmpl.Name,
|
||||
"dir": body.Dir,
|
||||
"mode": body.Mode,
|
||||
})
|
||||
|
||||
// Required-env preflight — refuses import when any required_env is
|
||||
// missing from global_secrets. No bypass: the prior `force: true`
|
||||
// escape hatch was removed (issue #2290) because it was the silent
|
||||
@@ -787,14 +794,14 @@ func (h *OrgHandler) Import(c *gin.Context) {
|
||||
rows.Close()
|
||||
|
||||
for _, oid := range orphanIDs {
|
||||
cascadeCount, stopErrs, err := h.workspace.CascadeDelete(ctx, oid)
|
||||
descendantIDs, stopErrs, err := h.workspace.CascadeDelete(ctx, oid)
|
||||
if err != nil {
|
||||
log.Printf("Org import reconcile: CascadeDelete(%s) failed: %v", oid, err)
|
||||
reconcileErrs = append(reconcileErrs, fmt.Sprintf("delete %s: %v", oid, err))
|
||||
reconcileSkipped++
|
||||
continue
|
||||
}
|
||||
reconcileRemovedCount += 1 + cascadeCount
|
||||
reconcileRemovedCount += 1 + len(descendantIDs)
|
||||
if len(stopErrs) > 0 {
|
||||
log.Printf("Org import reconcile: %s had %d stop errors (orphan sweeper will retry)", oid, len(stopErrs))
|
||||
}
|
||||
|
||||
@@ -323,161 +323,19 @@ func (h *WorkspaceHandler) Delete(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// Cascade delete: collect ALL descendants (not just direct children) via
|
||||
// recursive CTE, then stop each container and remove each volume.
|
||||
// Previous bug: only direct children's containers were stopped, leaving
|
||||
// grandchildren as orphan running containers after a cascade delete.
|
||||
descendantIDs := []string{}
|
||||
if len(children) > 0 {
|
||||
descRows, err := db.DB.QueryContext(ctx, `
|
||||
WITH RECURSIVE descendants AS (
|
||||
SELECT id FROM workspaces WHERE parent_id = $1 AND status != 'removed'
|
||||
UNION ALL
|
||||
SELECT w.id FROM workspaces w JOIN descendants d ON w.parent_id = d.id WHERE w.status != 'removed'
|
||||
)
|
||||
SELECT id FROM descendants
|
||||
`, id)
|
||||
if err != nil {
|
||||
log.Printf("Delete: descendant query error for %s: %v", id, err)
|
||||
} else {
|
||||
for descRows.Next() {
|
||||
var descID string
|
||||
if descRows.Scan(&descID) == nil {
|
||||
descendantIDs = append(descendantIDs, descID)
|
||||
}
|
||||
}
|
||||
descRows.Close()
|
||||
}
|
||||
// Delegate the cascade to CascadeDelete so the HTTP path and the
|
||||
// OrgImport reconcile path share one teardown sequence (#73 race
|
||||
// guard, container stop, volume removal, token revocation, schedule
|
||||
// disable, broadcast). The HTTP-specific bits — direct-children 409
|
||||
// gate above, ?purge=true hard-delete below, response shaping —
|
||||
// stay in this handler.
|
||||
descendantIDs, stopErrs, err := h.CascadeDelete(ctx, id)
|
||||
if err != nil {
|
||||
log.Printf("Delete: CascadeDelete(%s) failed: %v", id, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
// #73 fix: mark rows 'removed' in the DB FIRST, BEFORE stopping containers
|
||||
// or removing volumes. Previously the sequence was stop → update-status,
|
||||
// which left a gap where:
|
||||
// - the container's last pre-teardown heartbeat could resurrect the row
|
||||
// via the register-handler UPSERT (now also guarded in #73)
|
||||
// - the liveness monitor could observe 'online' status + expired Redis
|
||||
// TTL and trigger RestartByID, recreating a container we're trying
|
||||
// to destroy
|
||||
// Marking 'removed' first makes both of those paths no-op via their
|
||||
// existing `status NOT IN ('removed', ...)` guards.
|
||||
allIDs := append([]string{id}, descendantIDs...)
|
||||
if _, err := db.DB.ExecContext(ctx,
|
||||
`UPDATE workspaces SET status = $1, updated_at = now() WHERE id = ANY($2::uuid[])`,
|
||||
models.StatusRemoved, pq.Array(allIDs)); err != nil {
|
||||
log.Printf("Delete status update error for %s: %v", id, err)
|
||||
}
|
||||
if _, err := db.DB.ExecContext(ctx,
|
||||
`DELETE FROM canvas_layouts WHERE workspace_id = ANY($1::uuid[])`,
|
||||
pq.Array(allIDs)); err != nil {
|
||||
log.Printf("Delete canvas_layouts error for %s: %v", id, err)
|
||||
}
|
||||
// Revoke all auth tokens for the deleted workspaces. Once the workspace is
|
||||
// gone its tokens are meaningless; leaving them alive would keep
|
||||
// HasAnyLiveTokenGlobal = true even after the platform is otherwise empty,
|
||||
// which prevents AdminAuth from returning to fail-open and breaks the E2E
|
||||
// test's count-zero assertion (and local re-run cleanup).
|
||||
if _, err := db.DB.ExecContext(ctx,
|
||||
`UPDATE workspace_auth_tokens SET revoked_at = now()
|
||||
WHERE workspace_id = ANY($1::uuid[]) AND revoked_at IS NULL`,
|
||||
pq.Array(allIDs)); err != nil {
|
||||
log.Printf("Delete token revocation error for %s: %v", id, err)
|
||||
}
|
||||
// #1027: cascade-disable all schedules for the deleted workspaces so
|
||||
// the scheduler never fires a cron into a removed container.
|
||||
if _, err := db.DB.ExecContext(ctx,
|
||||
`UPDATE workspace_schedules SET enabled = false, updated_at = now()
|
||||
WHERE workspace_id = ANY($1::uuid[]) AND enabled = true`,
|
||||
pq.Array(allIDs)); err != nil {
|
||||
log.Printf("Delete schedule disable error for %s: %v", id, err)
|
||||
}
|
||||
|
||||
// Now stop containers + remove volumes for all descendants (any depth).
|
||||
// Any concurrent heartbeat / registration / liveness-triggered restart
|
||||
// will see status='removed' and bail out early.
|
||||
//
|
||||
// Combines two concerns:
|
||||
//
|
||||
// 1. Detach cleanup from the request ctx via WithoutCancel + a 30s
|
||||
// timeout, so when the canvas's `api.del` resolves on our 200
|
||||
// (and gin cancels c.Request.Context()), in-flight Docker
|
||||
// stop/remove calls don't get cancelled mid-operation. The
|
||||
// previous shape leaked containers every time the canvas hung
|
||||
// up promptly: Stop returned "context canceled", the container
|
||||
// stayed up, and the next RemoveVolume failed with
|
||||
// "volume in use". 30s is generous for Docker daemon round-
|
||||
// trips (typical: <2s) and bounds a stuck daemon.
|
||||
//
|
||||
// 2. #1843: aggregate Stop() failures into stopErrs so the
|
||||
// post-deletion block surfaces them as 500. On the CP/EC2
|
||||
// backend, Stop() calls control plane's DELETE endpoint to
|
||||
// terminate the EC2; if that errors (transient 5xx, network),
|
||||
// the EC2 stays running with no DB row to track it (the
|
||||
// "orphan EC2 on a 0-customer account" scenario). Loud-fail
|
||||
// instead of silent-leak — clients retry, Stop's instance_id
|
||||
// lookup is idempotent against status='removed'. RemoveVolume
|
||||
// errors stay log-and-continue (local cleanup, not infra-leak).
|
||||
cleanupCtx, cleanupCancel := context.WithTimeout(
|
||||
context.WithoutCancel(ctx), 30*time.Second)
|
||||
defer cleanupCancel()
|
||||
|
||||
var stopErrs []error
|
||||
stopAndRemove := func(wsID string) {
|
||||
// Stop the workload first via the backend dispatcher (CP for
|
||||
// SaaS, Docker for self-hosted). Pre-2026-05-05 this gate was
|
||||
// `if h.provisioner == nil { return }` — early-returning on
|
||||
// every SaaS tenant left the EC2 running with no DB row to
|
||||
// track it (issue #2814; the comment below claimed "loud-fail
|
||||
// instead of silent-leak" but the early-return made it the
|
||||
// silent path on SaaS).
|
||||
//
|
||||
// Check Stop's error before any volume cleanup — the previous
|
||||
// code discarded it and immediately tried RemoveVolume, which
|
||||
// always fails with "volume in use" when Stop didn't actually
|
||||
// kill the container. The orphan sweeper
|
||||
// (registry/orphan_sweeper.go) catches what we skip here on
|
||||
// the next reconcile pass.
|
||||
if err := h.StopWorkspaceAuto(cleanupCtx, wsID); err != nil {
|
||||
log.Printf("Delete %s stop failed: %v — leaving cleanup for orphan sweeper", wsID, err)
|
||||
stopErrs = append(stopErrs, fmt.Errorf("stop %s: %w", wsID, err))
|
||||
return
|
||||
}
|
||||
// Volume cleanup is Docker-only — CP-managed workspaces have
|
||||
// no host-bind volumes to remove. Skip silently when no Docker
|
||||
// provisioner is wired (the SaaS path already terminated the
|
||||
// EC2 above; nothing left to do).
|
||||
if h.provisioner != nil {
|
||||
if err := h.provisioner.RemoveVolume(cleanupCtx, wsID); err != nil {
|
||||
log.Printf("Delete %s volume removal warning: %v", wsID, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, descID := range descendantIDs {
|
||||
stopAndRemove(descID)
|
||||
db.ClearWorkspaceKeys(cleanupCtx, descID)
|
||||
// #2269: drop the per-workspace restartState entry so it
|
||||
// doesn't accumulate across the platform's lifetime. The
|
||||
// LoadOrStore that creates the entry (workspace_restart.go)
|
||||
// has no companion remove path; without this Delete, every
|
||||
// short-lived workspace leaks ~16 bytes forever.
|
||||
restartStates.Delete(descID)
|
||||
// Detach broadcaster ctx for the same reason as the cleanup
|
||||
// above — RecordAndBroadcast does an INSERT INTO
|
||||
// structure_events + Redis Publish. If the canvas hangs up,
|
||||
// a request-ctx-bound INSERT can be cancelled mid-write,
|
||||
// leaving other WS clients ignorant of the cascade. The DB
|
||||
// row is already 'removed' so it's recoverable, but the
|
||||
// inconsistency is avoidable.
|
||||
h.broadcaster.RecordAndBroadcast(cleanupCtx, string(events.EventWorkspaceRemoved), descID, map[string]interface{}{})
|
||||
}
|
||||
|
||||
stopAndRemove(id)
|
||||
db.ClearWorkspaceKeys(cleanupCtx, id)
|
||||
restartStates.Delete(id) // #2269: same as descendants above
|
||||
|
||||
h.broadcaster.RecordAndBroadcast(cleanupCtx, string(events.EventWorkspaceRemoved), id, map[string]interface{}{
|
||||
"cascade_deleted": len(descendantIDs),
|
||||
})
|
||||
|
||||
// If any Stop call failed, surface 500 so the client retries. The DB
|
||||
// row is already 'removed' (idempotent), and Stop's instance_id
|
||||
@@ -549,16 +407,17 @@ func (h *WorkspaceHandler) Delete(c *gin.Context) {
|
||||
// remove volumes, revoke tokens, disable schedules, broadcast events.
|
||||
//
|
||||
// Idempotent against already-removed rows (the descendant CTE and all UPDATE
|
||||
// guards skip status='removed'). Returns the number of cascaded descendants
|
||||
// (not including id itself) and any per-workspace stop errors so callers can
|
||||
// surface a retryable failure instead of a silent-leak.
|
||||
// guards skip status='removed'). Returns the descendant id list so the HTTP
|
||||
// caller can drive the optional `?purge=true` hard-delete path against the
|
||||
// same set the cascade just touched, plus any per-workspace stop errors so
|
||||
// callers can surface a retryable failure instead of a silent-leak.
|
||||
//
|
||||
// Caller is responsible for the children-confirmation gate (the HTTP handler
|
||||
// returns 409 when children exist + ?confirm=true is missing); this helper
|
||||
// always cascades.
|
||||
func (h *WorkspaceHandler) CascadeDelete(ctx context.Context, id string) (int, []error, error) {
|
||||
func (h *WorkspaceHandler) CascadeDelete(ctx context.Context, id string) ([]string, []error, error) {
|
||||
if err := validateWorkspaceID(id); err != nil {
|
||||
return 0, nil, err
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
descendantIDs := []string{}
|
||||
@@ -571,7 +430,7 @@ func (h *WorkspaceHandler) CascadeDelete(ctx context.Context, id string) (int, [
|
||||
SELECT id FROM descendants
|
||||
`, id)
|
||||
if err != nil {
|
||||
return 0, nil, fmt.Errorf("descendant query: %w", err)
|
||||
return nil, nil, fmt.Errorf("descendant query: %w", err)
|
||||
}
|
||||
for descRows.Next() {
|
||||
var descID string
|
||||
@@ -637,7 +496,7 @@ func (h *WorkspaceHandler) CascadeDelete(ctx context.Context, id string) (int, [
|
||||
"cascade_deleted": len(descendantIDs),
|
||||
})
|
||||
|
||||
return len(descendantIDs), stopErrs, nil
|
||||
return descendantIDs, stopErrs, nil
|
||||
}
|
||||
|
||||
// validateWorkspaceID returns an error when id is not a valid UUID.
|
||||
|
||||
@@ -813,6 +813,12 @@ func TestWorkspaceDelete_DisablesSchedules(t *testing.T) {
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}))
|
||||
|
||||
// CascadeDelete walks descendants unconditionally — 0-children case
|
||||
// returns 0 rows here.
|
||||
mock.ExpectQuery("WITH RECURSIVE descendants").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}))
|
||||
|
||||
// Mark workspace as removed
|
||||
mock.ExpectExec("UPDATE workspaces SET status =").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
@@ -935,6 +941,12 @@ func TestWorkspaceDelete_ScheduleDisableOnlyTargetsDeletedWorkspace(t *testing.T
|
||||
WithArgs(wsA).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}))
|
||||
|
||||
// CascadeDelete walks descendants unconditionally — 0-children case
|
||||
// returns 0 rows here.
|
||||
mock.ExpectQuery("WITH RECURSIVE descendants").
|
||||
WithArgs(wsA).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}))
|
||||
|
||||
// Mark only workspace A as removed
|
||||
mock.ExpectExec("UPDATE workspaces SET status =").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
@@ -46,7 +46,11 @@
|
||||
# 2. Fetch fresh token from platform API.
|
||||
# 3. If platform is unreachable, fall back to GITHUB_TOKEN / GH_TOKEN
|
||||
# env var (set at container start, valid for up to 60 min).
|
||||
# 4. If all fail, exit 1 so git falls through to the next credential
|
||||
# 4. If env var is unset, read static-token file at
|
||||
# ${CONFIGS_DIR}/.github-token. Operator escape hatch for incidents
|
||||
# when the platform endpoint is broken; not managed by the platform.
|
||||
# Never auto-cached, so API recovery is detected immediately.
|
||||
# 5. If all fail, exit 1 so git falls through to the next credential
|
||||
# helper in the chain (if any).
|
||||
#
|
||||
# # gh CLI integration
|
||||
@@ -197,7 +201,25 @@ _fetch_token_from_api() {
|
||||
echo "${token}"
|
||||
}
|
||||
|
||||
# _fetch_token — return a fresh token using cache > API > env fallback chain.
|
||||
# _read_static_token — output static-token-file contents if present and
|
||||
# non-empty. Returns 1 if file missing or empty. Never writes to cache —
|
||||
# operator escape hatch; we want API recovery to be detected on the very
|
||||
# next call without 50-min stale-cache stickiness on the workaround.
|
||||
_read_static_token() {
|
||||
local static_file="${CONFIGS_DIR}/.github-token"
|
||||
if [ ! -f "${static_file}" ]; then
|
||||
return 1
|
||||
fi
|
||||
local static_token
|
||||
static_token=$(cat "${static_file}" 2>/dev/null | tr -d '[:space:]')
|
||||
if [ -z "${static_token}" ]; then
|
||||
return 1
|
||||
fi
|
||||
echo "${static_token}"
|
||||
return 0
|
||||
}
|
||||
|
||||
# _fetch_token — return a fresh token using cache > API > env > static fallback chain.
|
||||
# Outputs the raw token string on success; exits non-zero if all sources fail.
|
||||
_fetch_token() {
|
||||
# 1. Try cache first.
|
||||
@@ -222,6 +244,16 @@ _fetch_token() {
|
||||
return 0
|
||||
fi
|
||||
|
||||
# 4. Static-token file fallback — operator escape hatch for when
|
||||
# the platform API is broken AND no env var is set.
|
||||
# Manually written by infra; never auto-cached so API recovery
|
||||
# is detected on the very next call.
|
||||
static_token=$(_read_static_token 2>/dev/null) && {
|
||||
echo "[molecule-git-token-helper] API + env exhausted, using static-token file" >&2
|
||||
echo "${static_token}"
|
||||
return 0
|
||||
}
|
||||
|
||||
echo "[molecule-git-token-helper] all token sources exhausted" >&2
|
||||
return 1
|
||||
}
|
||||
@@ -240,20 +272,38 @@ case "${ACTION}" in
|
||||
# No-op — the platform manages token lifecycle.
|
||||
;;
|
||||
_fetch_token)
|
||||
# Return raw token (cache > API > env fallback).
|
||||
# Return raw token (cache > API > env > static fallback).
|
||||
_fetch_token
|
||||
;;
|
||||
_refresh_gh)
|
||||
# Refresh cache AND update gh CLI auth in one shot.
|
||||
# Called by molecule-gh-token-refresh.sh background daemon.
|
||||
# Force-bypass cache to get a definitely fresh token.
|
||||
api_token=$(_fetch_token_from_api) || {
|
||||
echo "[molecule-git-token-helper] _refresh_gh: API fetch failed" >&2
|
||||
exit 1
|
||||
}
|
||||
_write_cache "${api_token}"
|
||||
# On API failure, fall through env → static-file like _fetch_token does,
|
||||
# but do NOT write the cache (those aren't API-issued tokens).
|
||||
api_token=$(_fetch_token_from_api) || api_token=""
|
||||
chosen_token=""
|
||||
if [ -n "${api_token}" ]; then
|
||||
_write_cache "${api_token}"
|
||||
chosen_token="${api_token}"
|
||||
else
|
||||
env_token="${GITHUB_TOKEN:-${GH_TOKEN:-}}"
|
||||
if [ -n "${env_token}" ]; then
|
||||
chosen_token="${env_token}"
|
||||
echo "[molecule-git-token-helper] _refresh_gh: API failed, using env GITHUB_TOKEN" >&2
|
||||
else
|
||||
static_token=$(_read_static_token 2>/dev/null) && {
|
||||
chosen_token="${static_token}"
|
||||
echo "[molecule-git-token-helper] _refresh_gh: API failed + env unset, using static-token file" >&2
|
||||
}
|
||||
fi
|
||||
if [ -z "${chosen_token}" ]; then
|
||||
echo "[molecule-git-token-helper] _refresh_gh: API fetch failed and no fallback available" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
# Update gh CLI auth — gh auth login reads token from stdin.
|
||||
echo "${api_token}" | gh auth login --hostname github.com --with-token 2>/dev/null || {
|
||||
echo "${chosen_token}" | gh auth login --hostname github.com --with-token 2>/dev/null || {
|
||||
echo "[molecule-git-token-helper] _refresh_gh: gh auth login failed (non-fatal)" >&2
|
||||
}
|
||||
# Also update GH_TOKEN file for scripts that source it.
|
||||
@@ -265,7 +315,7 @@ case "${ACTION}" in
|
||||
# function); shadow with a uniquely-named global instead.
|
||||
_gh_prev_umask=$(umask)
|
||||
umask 077
|
||||
printf '%s' "${api_token}" > "${gh_token_file}.tmp"
|
||||
printf '%s' "${chosen_token}" > "${gh_token_file}.tmp"
|
||||
mv -f "${gh_token_file}.tmp" "${gh_token_file}"
|
||||
umask "${_gh_prev_umask}"
|
||||
unset _gh_prev_umask
|
||||
|
||||
Reference in New Issue
Block a user