fix(ci): needs-based all-required sentinel (fixes #1083) #1089

Closed
core-devops wants to merge 1 commits from fix/ci-allrequired-needs into main
+45 -98
View File
@@ -400,9 +400,9 @@ jobs:
canvas-deploy-reminder:
name: Canvas Deploy Reminder
runs-on: ubuntu-latest
# This job must run on PRs because all-required needs it. The step exits
# 0 when it is not a main push, giving branch protection a green no-op
# instead of a skipped/missing required dependency.
# This job must run on every CI trigger (including PRs) because all-required
# needs it as a dependency. The step body exits 0 when it is not a main-push,
# giving the aggregator a concrete success instead of a skipped/missing result.
needs: canvas-build
steps:
- name: Write deploy reminder to step summary
@@ -545,104 +545,51 @@ jobs:
# red silently merged through. See internal#286 for the three concrete
# tonight-of-2026-05-11 incidents that prompted the emergency bump.
#
# This job deliberately has no `needs:`. Gitea 1.22/act_runner can mark a
# job-level `if: always()` + `needs:` sentinel as skipped before upstream
# jobs settle, leaving branch protection with a permanent pending
# `CI / all-required` context. Instead, this independent sentinel polls the
# required commit-status contexts for this SHA and fails if any fail, skip,
# or never emit.
#
# canvas-deploy-reminder is intentionally NOT included in all-required.needs.
# It is an informational main-push reminder, not a PR quality gate. Keeping
# it in this dependency list lets a skipped reminder skip the required
# sentinel before the `always()` guard can emit a branch-protection status.
# Uses `needs:` so Gitea waits for all upstream jobs before this sentinel
# emits. `if: always()` ensures the sentinel runs (and reports pass/fail)
# even when an upstream job failed or was skipped. canvas-deploy-reminder
# is intentionally included — it exits 0 on non-main-push events so it
# never blocks PRs, and excluding it would leave the sentinel permanently
# pending on main pushes where reminder is a no-op.
#
needs:
- changes
- platform-build
- canvas-build
- shellcheck
- python-lint
- canvas-deploy-reminder
if: ${{ always() }}
continue-on-error: false
runs-on: ubuntu-latest
timeout-minutes: 45
timeout-minutes: 1
steps:
- name: Wait for required CI contexts
env:
GITEA_TOKEN: ${{ secrets.GITHUB_TOKEN }}
API_ROOT: ${{ github.server_url }}/api/v1
REPOSITORY: ${{ github.repository }}
COMMIT_SHA: ${{ github.sha }}
EVENT_NAME: ${{ github.event_name }}
- name: Verify all required jobs succeeded
run: |
set -euo pipefail
python3 - <<'PY'
import json
import os
import sys
import time
import urllib.error
import urllib.request
token = os.environ["GITEA_TOKEN"]
api_root = os.environ["API_ROOT"].rstrip("/")
repo = os.environ["REPOSITORY"]
sha = os.environ["COMMIT_SHA"]
event = os.environ["EVENT_NAME"]
required = [
f"CI / Detect changes ({event})",
f"CI / Platform (Go) ({event})",
f"CI / Canvas (Next.js) ({event})",
f"CI / Shellcheck (E2E scripts) ({event})",
f"CI / Python Lint & Test ({event})",
]
terminal_bad = {"failure", "error"}
deadline = time.time() + 40 * 60
last_summary = None
def fetch_statuses():
statuses = []
for page in range(1, 6):
url = f"{api_root}/repos/{repo}/commits/{sha}/statuses?page={page}&limit=100"
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
with urllib.request.urlopen(req, timeout=10) as resp:
chunk = json.load(resp)
if not chunk:
break
statuses.extend(chunk)
latest = {}
for item in statuses:
ctx = item.get("context")
if not ctx:
continue
prev = latest.get(ctx)
if prev is None or (item.get("updated_at") or item.get("created_at") or "") >= (prev.get("updated_at") or prev.get("created_at") or ""):
latest[ctx] = item
return latest
while True:
try:
latest = fetch_statuses()
except (TimeoutError, OSError, urllib.error.URLError) as exc:
if time.time() >= deadline:
print(f"FAIL: status polling did not recover before deadline: {exc}", file=sys.stderr)
sys.exit(1)
print(f"WARN: status poll failed, retrying: {exc}", flush=True)
time.sleep(15)
continue
states = {ctx: (latest.get(ctx) or {}).get("status") or (latest.get(ctx) or {}).get("state") or "missing" for ctx in required}
summary = ", ".join(f"{ctx}={state}" for ctx, state in states.items())
if summary != last_summary:
print(summary, flush=True)
last_summary = summary
bad = {ctx: state for ctx, state in states.items() if state in terminal_bad}
if bad:
print("FAIL: required CI context failed:", file=sys.stderr)
for ctx, state in bad.items():
desc = (latest.get(ctx) or {}).get("description") or ""
print(f" - {ctx}: {state} {desc}", file=sys.stderr)
sys.exit(1)
if all(state == "success" for state in states.values()):
print(f"OK: all {len(required)} required CI contexts succeeded")
sys.exit(0)
if time.time() >= deadline:
print("FAIL: timed out waiting for required CI contexts:", file=sys.stderr)
for ctx, state in states.items():
print(f" - {ctx}: {state}", file=sys.stderr)
sys.exit(1)
time.sleep(15)
PY
FAILED=0
for job in changes platform-build canvas-build shellcheck python-lint canvas-deploy-reminder; do
result="$(gh api repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs --jq '.jobs[] | select(.name == env.JOB) | .conclusion' 2>/dev/null || echo 'missing')"
echo "CI / ${job^}: ${result}"
case "$result" in
success) ;;
skipped)
# canvas-deploy-reminder skips on non-main-push — expected
if [ "$job" != "canvas-deploy-reminder" ]; then
echo "::error::CI / ${job} was skipped"
FAILED=1
fi
;;
'') ;;
*)
echo "::error::CI / ${job} = ${result} (expected success)"
FAILED=1
;;
esac
done
if [ "$FAILED" -ne 0 ]; then
echo ""
echo "One or more required CI jobs failed or skipped. Fix before merging."
exit 1
fi
echo "All required CI jobs passed."