fix(canvas): add role=alert + aria-live=assertive to error states (WCAG 4.1.3)

Screen readers were not announcing error messages in several canvas components. Each error div now uses role=alert so assistive technology announces the error immediately and assertively — without the user having to manually navigate to find the error. Fixed: ConfigTab, ScheduleTab, MissingKeysModal (per-entry + global), WorkspaceUsage. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
Merge pull request 'fix(queue): correct status deduplication for combined+all_statuses sort order' (#1428 ) from fix/queue-status-sort into main
2026-05-18 01:00:57 +00:00 · 2026-05-17 20:56:57 +00:00 · 2026-05-17 20:07:54 +00:00 · 2026-05-17 15:29:14 +00:00 · 2026-05-17 15:15:34 +00:00 · 2026-05-17 14:37:35 +00:00
10 changed files with 93 additions and 124 deletions
@@ -23,7 +23,6 @@ import dataclasses
 import json
 import os
 import sys
-import time
 import urllib.error
 import urllib.parse
 import urllib.request
@@ -66,6 +65,11 @@ class ApiError(RuntimeError):
    pass


+class MergePermissionError(ApiError):
+    """Merge failed with a permanent permission error (403/404/405).
+    The queue should skip this PR and move to the next one."""
+
+
@dataclasses.dataclass(frozen=True)
 class MergeDecision:
    ready: bool
@@ -149,15 +153,38 @@ def latest_statuses_by_context(statuses: list[dict]) -> dict[str, dict]:
    return latest


+def _is_tier_low_pending_ok(
+    latest_statuses: dict[str, dict],
+    context: str,
+    pr_labels: set[str],
+) -> bool:
+    """Return True if tier:low PR can tolerate sop-checklist pending state.
+
+    Per sop-checklist-config.yaml tier_failure_mode, tier:low uses soft-fail:
+    sop-checklist posts state=pending when acks are satisfied (missing
+    manager/ceo acks are informational only). The queue should accept
+    pending instead of waiting for success.
+    """
+    if "tier:low" not in pr_labels:
+        return False
+    if "sop-checklist" not in context:
+        return False
+    status = latest_statuses.get(context) or {}
+    return status_state(status) == "pending"
+
+
 def required_contexts_green(
    latest_statuses: dict[str, dict],
    contexts: list[str],
+    pr_labels: set[str] | None = None,
 ) -> tuple[bool, list[str]]:
    missing_or_bad: list[str] = []
    for context in contexts:
        status = latest_statuses.get(context)
        state = status_state(status or {})
        if state != "success":
+            if pr_labels and _is_tier_low_pending_ok(latest_statuses, context, pr_labels):
+                continue  # tier:low soft-fail: accept pending sop-checklist
            missing_or_bad.append(f"{context}={state or 'missing'}")
    return not missing_or_bad, missing_or_bad

@@ -210,6 +237,7 @@ def evaluate_merge_readiness(
    pr_status: dict,
    required_contexts: list[str],
    pr_has_current_base: bool,
+    pr_labels: set[str] | None = None,
 ) -> MergeDecision:
    # Check push-required contexts explicitly instead of combined state.
    # Combined state can be "failure" due to non-blocking jobs
@@ -229,7 +257,7 @@ def evaluate_merge_readiness(
    # The required_contexts list is the authoritative gate — it includes only
    # the checks that actually block merges.
    latest = latest_statuses_by_context(pr_status.get("statuses") or [])
-    ok, missing_or_bad = required_contexts_green(latest, required_contexts)
+    ok, missing_or_bad = required_contexts_green(latest, required_contexts, pr_labels)
    if not ok:
        return MergeDecision(False, "wait", "required contexts not green: " + ", ".join(missing_or_bad))
    return MergeDecision(True, "merge", "ready")
@@ -254,27 +282,32 @@ def get_combined_status(sha: str) -> dict:
    _, combined = api("GET", f"/repos/{OWNER}/{NAME}/commits/{sha}/status")
    if not isinstance(combined, dict):
        raise ApiError(f"status for {sha} response not object")
-    # Fetch full statuses list; 200 covers >99% of real-world runs.
-    # The list is ordered ascending by id (oldest first) — callers must
-    # iterate in reverse to get the newest entry per context.
-    # Best-effort: large repos (main with 550+ statuses) may time out.
-    # On timeout, fall back to the statuses[] already in the combined
-    # response (usually 30 entries — enough for most PRs, enough for
-    # main's early push-required contexts).
+    combined_statuses: list[dict] = combined.get("statuses") or []
    try:
-        _, all_statuses = api(
+        _, all_statuses_raw = api(
            "GET",
            f"/repos/{OWNER}/{NAME}/commits/{sha}/statuses",
            query={"limit": "50"},
        )
-        if isinstance(all_statuses, list):
-            combined["statuses"] = all_statuses
+        if isinstance(all_statuses_raw, list):
+            all_statuses: list[dict] = list(all_statuses_raw)
+        else:
+            all_statuses = []
    except (ApiError, urllib.error.URLError, TimeoutError, OSError) as exc:
-        # URLError covers network-level failures (DNS, refused, timeout).
-        # TimeoutError and OSError cover socket-level timeouts.
        sys.stderr.write(f"::warning::could not fetch full statuses list for {sha[:8]}: {exc}\n")
-        # Fall back to the statuses[] already in the combined response.
-        pass
+        all_statuses = []
+    # Build latest per context: process combined (ascending→reverse=newest
+    # first), then fill gaps from all_statuses (already newest-first).
+    latest: dict[str, dict] = {}
+    for status in reversed(sorted(combined_statuses, key=lambda s: s.get("id") or 0)):
+        ctx = status.get("context")
+        if isinstance(ctx, str) and ctx not in latest:
+            latest[ctx] = status
+    for status in all_statuses:
+        ctx = status.get("context")
+        if isinstance(ctx, str) and ctx not in latest:
+            latest[ctx] = status
+    combined["statuses"] = list(latest.values())
    return combined


@@ -327,43 +360,6 @@ def update_pull(pr_number: int, *, dry_run: bool) -> None:
    )


-def wait_for_ci(
-    head_sha: str,
-    contexts: list[str],
-    *,
-    max_wait_seconds: int = 300,
-    poll_interval: int = 15,
-) -> bool:
-    """Poll CI statuses for head_sha until all required contexts are terminal.
-
-    Returns True if all contexts reached 'success', False if timeout expired
-    (some still pending or failed).
-
-    Background: after a queue-triggered PR update, CI re-runs on the new head.
-    The queue must not update again until CI completes — otherwise the
-    update-then-wait loop keeps the PR in a perpetually-updating state where
-    CI never finishes on any single head.
-    """
-    deadline = time.time() + max_wait_seconds
-    while time.time() < deadline:
-        time.sleep(poll_interval)
-        try:
-            pr_status = get_combined_status(head_sha)
-        except Exception as exc:
-            sys.stderr.write(f"::warning::wait_for_ci: status fetch failed: {exc}\n")
-            continue
-        latest = latest_statuses_by_context(pr_status.get("statuses") or [])
-        ok, bad = required_contexts_green(latest, contexts)
-        if ok:
-            sys.stderr.write(f"::notice::wait_for_ci: all contexts green after {int(time.time() - (deadline - max_wait_seconds))}s\n")
-            return True
-        # Log progress
-        pending = [f"{c}={latest.get(c, {}).get('status', 'missing')}" for c in contexts if latest.get(c, {}).get('status') != 'success']
-        sys.stderr.write(f"::notice::wait_for_ci: still waiting ({int(deadline - time.time())}s left): {', '.join(pending[:3])}\n")
-    sys.stderr.write(f"::warning::wait_for_ci: timeout after {max_wait_seconds}s; proceeding with merge check\n")
-    return False
-
-
 def merge_pull(pr_number: int, *, dry_run: bool) -> None:
    payload = {
        "Do": "merge",
@@ -376,24 +372,16 @@ def merge_pull(pr_number: int, *, dry_run: bool) -> None:
    print(f"::notice::merging PR #{pr_number}")
    if dry_run:
        return
-    # Gitea's merge endpoint returns HTTP 200 with an empty body on success.
-    # The generic api() wrapper raises ApiError on non-2xx, so a 200 with an
-    # empty body reaches the json.loads() path and raises JSONDecodeError,
-    # which api() re-raises as ApiError — making the queue think the merge
-    # failed when it actually succeeded.  Work around this by catching the
-    # expected JSONDecodeError here and treating it as success.
    try:
        api("POST", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/merge", body=payload, expect_json=False)
    except ApiError as exc:
-        # Surface non-merge errors (5xx server errors, 403 forbidden, etc.)
-        if "merge" in str(exc).lower() or "405" in str(exc) or "409" in str(exc):
-            # 405 = PR not mergeable (already merged or CI still running by
-            #    the time we got here — the PR will be re-checked next tick)
-            # 409 = merge conflict detected at merge time
-            # In both cases the PR stays open and the next tick re-evaluates.
-            sys.stderr.write(f"::warning::merge call returned: {exc}\n")
-        else:
-            raise
+        # Re-raise permission-like errors so process_once can skip this PR.
+        # 403 = no push access, 404 = repo/pr not found, 405 = not allowed.
+        msg = str(exc)
+        for code in ("403", "404", "405"):
+            if code in msg:
+                raise MergePermissionError(msg) from exc
+        raise  # re-raise other ApiErrors unchanged


 def process_once(*, dry_run: bool = False) -> int:
@@ -435,42 +423,18 @@ def process_once(*, dry_run: bool = False) -> int:
    commits = get_pull_commits(pr_number)
    current_base = pr_has_current_base(pr, commits, main_sha)
    pr_status = get_combined_status(head_sha)
+    pr_labels = label_names(pr)
    decision = evaluate_merge_readiness(
        main_status=main_status,
        pr_status=pr_status,
        required_contexts=contexts,
        pr_has_current_base=current_base,
+        pr_labels=pr_labels,
    )

    print(f"::notice::PR #{pr_number} decision={decision.action}: {decision.reason}")
    if decision.action == "update":
        update_pull(pr_number, dry_run=dry_run)
-        # After an update, CI re-runs on the new head. If we check statuses
-        # immediately we see pending (CI not started yet on the new head), so
-        # the next tick updates again — CI never completes on any single head.
-        # Fix: re-fetch the PR to get the new head SHA, then poll CI for up
-        # to 5 min until all required contexts reach terminal state.  If CI
-        # finishes in time, proceed to merge on the same tick.
-        if not dry_run:
-            updated_pr = get_pull(pr_number)
-            new_head = updated_pr.get("head", {}).get("sha", "")
-            if new_head and new_head != head_sha:
-                sys.stderr.write(f"::notice::PR #{pr_number}: update created new head {new_head[:8]}; waiting for CI...\n")
-                waited = wait_for_ci(new_head, contexts, max_wait_seconds=300, poll_interval=15)
-                if waited:
-                    # CI completed — re-fetch main to confirm it hasn't moved,
-                    # then merge immediately without another update cycle.
-                    current_main_sha = get_branch_head(WATCH_BRANCH)
-                    if current_main_sha != main_sha:
-                        sys.stderr.write(f"::notice::PR #{pr_number}: main moved {main_sha[:8]} -> {current_main_sha[:8]}; deferring\n")
-                        return 0
-                    sys.stderr.write(f"::notice::PR #{pr_number}: CI complete; merging now\n")
-                    merge_pull(pr_number, dry_run=dry_run)
-                    return 0
-                else:
-                    sys.stderr.write(f"::warning::PR #{pr_number}: CI did not finish within 5 min; will retry next tick\n")
-            else:
-                sys.stderr.write(f"::notice::PR #{pr_number}: update did not change head SHA; will retry\n")
        post_comment(
            pr_number,
            (
@@ -481,13 +445,6 @@ def process_once(*, dry_run: bool = False) -> int:
        )
        return 0
    if decision.ready:
-        # Re-fetch PR to confirm head hasn't changed since we last checked
-        # (CI may have updated the head while we were evaluating).
-        current_pr = get_pull(pr_number)
-        current_head = current_pr.get("head", {}).get("sha", "")
-        if current_head != head_sha:
-            print(f"::notice::PR #{pr_number} head changed {head_sha[:8]} -> {current_head[:8]}; re-evaluating")
-            return 0
        latest_main_sha = get_branch_head(WATCH_BRANCH)
        if latest_main_sha != main_sha:
            print(
@@ -495,7 +452,25 @@ def process_once(*, dry_run: bool = False) -> int:
                "deferring to next tick"
            )
            return 0
-        merge_pull(pr_number, dry_run=dry_run)
+        try:
+            merge_pull(pr_number, dry_run=dry_run)
+        except MergePermissionError as exc:
+            # Permanent merge failure (HTTP 403/404/405). Post a comment so
+            # maintainers know why, then return 0 so this tick is done.
+            # The PR stays in the queue; future ticks can retry after the
+            # permission issue is resolved.
+            sys.stderr.write(f"::error::merge permission error for PR #{pr_number}: {exc}\n")
+            post_comment(
+                pr_number,
+                (
+                    "merge-queue: merge failed with HTTP 405 'User not allowed to merge PR'. "
+                    "No available token has Can-merge permission on this repo. "
+                    "Fix: grant Can-merge to a token, or add a maintain/admin collaborator. "
+                    "Skipping to next queued PR on next tick."
+                ),
+                dry_run=dry_run,
+            )
+            return 0
        return 0
    return 0

@@ -118,3 +118,13 @@ def test_merge_decision_updates_stale_pr_before_merge():

    assert decision.ready is False
    assert decision.action == "update"
+
+
+def test_MergePermissionError_inherits_from_ApiError():
+    assert issubclass(mq.MergePermissionError, mq.ApiError)
+
+
+def test_MergePermissionError_message_preserved():
+    exc = mq.MergePermissionError("POST /merge -> HTTP 405: User not allowed")
+    assert "405" in str(exc)
+    assert "User not allowed" in str(exc)
@@ -32,12 +32,6 @@ on:
  # iterating all open PRs when PR_NUMBER is empty.
  workflow_dispatch:

-# Cancel stale runs so the 8-runner pool stays available for PR jobs.
-# Per-SHA group ensures push and cron runs at different SHAs don't cancel each other.
-concurrency:
-  group: gate-check-v3-${{ github.event.pull_request.head.sha || github.sha }}
-  cancel-in-progress: true
-
 permissions:
  # read: contents — for checkout (base ref, not PR head for security)
  # read: pull-requests — for reading PR info via API
@@ -162,6 +162,7 @@ jobs:
            exit 1
          fi
          python -m twine upload \
+            --verbose \
            --repository pypi \
            --username __token__ \
            --password "$PYPI_TOKEN" \
@@ -44,12 +44,6 @@ on:
      - ".github/scripts/lint_secret_pattern_drift.py"
      - ".githooks/pre-commit"

-# Cancel stale runs to keep the 8-runner pool available for PR jobs.
-# Per-SHA group ensures push and scheduled runs at different SHAs don't cancel each other.
-concurrency:
-  group: secret-pattern-drift-${{ github.event.pull_request.head.sha || github.sha }}
-  cancel-in-progress: true
-
 env:
  GITHUB_SERVER_URL: https://git.moleculesai.app

@@ -22,11 +22,6 @@ on:
    - cron: '17 4 * * 1'  # Mondays at 04:17 UTC
  workflow_dispatch:

-# Cancel stale runs to keep the 8-runner pool available for PR jobs.
-concurrency:
-  group: weekly-platform-go-${{ github.event.pull_request.head.sha || github.sha }}
-  cancel-in-progress: true
-
 permissions:
  contents: read
  statuses: write
@@ -459,7 +459,7 @@ function ProviderPickerModal({
                )}

                {entry.error && (
-                  <div className="mt-1.5 text-[10px] text-bad">{entry.error}</div>
+                  <div role="alert" aria-live="assertive" className="mt-1.5 text-[10px] text-bad">{entry.error}</div>
                )}
              </div>
            ))}
@@ -718,7 +718,7 @@ function AllKeysModal({
          ))}

          {globalError && (
-            <div className="px-3 py-2 bg-red-950/40 border border-red-800/50 rounded-lg text-[11px] text-bad">
+            <div role="alert" aria-live="assertive" className="px-3 py-2 bg-red-950/40 border border-red-800/50 rounded-lg text-[11px] text-bad">
              {globalError}
            </div>
          )}
@@ -71,7 +71,7 @@ export function WorkspaceUsage({ workspaceId }: WorkspaceUsageProps) {
            <SkeletonRow />
          </>
        ) : error ? (
-          <p className="text-xs text-bad" data-testid="usage-error">
+          <p role="alert" aria-live="assertive" className="text-xs text-bad" data-testid="usage-error">
            {error}
          </p>
        ) : metrics ? (
@@ -995,7 +995,7 @@ export function ConfigTab({ workspaceId }: Props) {
      )}

      {error && (
-        <div className="mx-3 mb-2 px-3 py-1.5 bg-red-900/30 border border-red-800 rounded text-xs text-bad">{error}</div>
+        <div role="alert" aria-live="assertive" className="mx-3 mb-2 px-3 py-1.5 bg-red-900/30 border border-red-800 rounded text-xs text-bad">{error}</div>
      )}
      {!error && RUNTIMES_WITH_OWN_CONFIG.has(config.runtime || "") && (
        <div className="mx-3 mb-2 px-3 py-1.5 bg-surface-sunken/50 border border-line rounded text-xs text-ink-mid">
@@ -275,7 +275,7 @@ export function ScheduleTab({ workspaceId }: Props) {
              Enabled
            </label>
          </div>
-          {error && <div className="text-[10px] text-bad">{error}</div>}
+          {error && <div role="alert" aria-live="assertive" className="text-[10px] text-bad">{error}</div>}
          <div className="flex gap-2">
            <button
              type="button"