Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4f84fb896e | |||
| 4c0cd6b705 | |||
| af7afc6112 | |||
| 4f5d683f4b | |||
| 36dba4eb51 | |||
| 64755ad258 | |||
| df4a0e3f9d | |||
| 5903c010a6 | |||
| 8b952ac0a5 |
@@ -65,6 +65,11 @@ class ApiError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class MergePermissionError(ApiError):
|
||||
"""Merge failed with a permanent permission error (403/404/405).
|
||||
The queue should skip this PR and move to the next one."""
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class MergeDecision:
|
||||
ready: bool
|
||||
@@ -367,7 +372,16 @@ def merge_pull(pr_number: int, *, dry_run: bool) -> None:
|
||||
print(f"::notice::merging PR #{pr_number}")
|
||||
if dry_run:
|
||||
return
|
||||
api("POST", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/merge", body=payload, expect_json=False)
|
||||
try:
|
||||
api("POST", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/merge", body=payload, expect_json=False)
|
||||
except ApiError as exc:
|
||||
# Re-raise permission-like errors so process_once can skip this PR.
|
||||
# 403 = no push access, 404 = repo/pr not found, 405 = not allowed.
|
||||
msg = str(exc)
|
||||
for code in ("403", "404", "405"):
|
||||
if code in msg:
|
||||
raise MergePermissionError(msg) from exc
|
||||
raise # re-raise other ApiErrors unchanged
|
||||
|
||||
|
||||
def process_once(*, dry_run: bool = False) -> int:
|
||||
@@ -438,7 +452,25 @@ def process_once(*, dry_run: bool = False) -> int:
|
||||
"deferring to next tick"
|
||||
)
|
||||
return 0
|
||||
merge_pull(pr_number, dry_run=dry_run)
|
||||
try:
|
||||
merge_pull(pr_number, dry_run=dry_run)
|
||||
except MergePermissionError as exc:
|
||||
# Permanent merge failure (HTTP 403/404/405). Post a comment so
|
||||
# maintainers know why, then return 0 so this tick is done.
|
||||
# The PR stays in the queue; future ticks can retry after the
|
||||
# permission issue is resolved.
|
||||
sys.stderr.write(f"::error::merge permission error for PR #{pr_number}: {exc}\n")
|
||||
post_comment(
|
||||
pr_number,
|
||||
(
|
||||
"merge-queue: merge failed with HTTP 405 'User not allowed to merge PR'. "
|
||||
"No available token has Can-merge permission on this repo. "
|
||||
"Fix: grant Can-merge to a token, or add a maintain/admin collaborator. "
|
||||
"Skipping to next queued PR on next tick."
|
||||
),
|
||||
dry_run=dry_run,
|
||||
)
|
||||
return 0
|
||||
return 0
|
||||
return 0
|
||||
|
||||
|
||||
@@ -144,6 +144,16 @@ def parse_directives(
|
||||
if not parts:
|
||||
continue
|
||||
first = parts[0]
|
||||
# Em-dash (U+2014) is a common visual separator in user-written
|
||||
# notes, e.g. /sop-ack Five-Axis — five-axis-review
|
||||
# If raw_slug contains an em-dash, split on the first one so
|
||||
# the part before becomes the slug and the rest becomes the note.
|
||||
note_from_slug = ""
|
||||
slug_source = raw_slug
|
||||
emdash_idx = raw_slug.find("—")
|
||||
if emdash_idx != -1:
|
||||
slug_source = raw_slug[:emdash_idx].strip()
|
||||
note_from_slug = raw_slug[emdash_idx + 1 :].strip()
|
||||
# If the slug-capture greedily matched multiple words (e.g.
|
||||
# "comprehensive testing"), preserve normalize behavior: join
|
||||
# the WHOLE first-word-token only; trailing words get appended to
|
||||
@@ -156,13 +166,14 @@ def parse_directives(
|
||||
# as slug and "testing extra-note" as note. We defer the
|
||||
# disambiguation to the caller via the returned canonical
|
||||
# slug. For simplicity: try the WHOLE captured string first.
|
||||
canonical = normalize_slug(raw_slug, numeric_aliases)
|
||||
canonical = normalize_slug(slug_source, numeric_aliases)
|
||||
else:
|
||||
canonical = normalize_slug(first, numeric_aliases)
|
||||
canonical = normalize_slug(slug_source, numeric_aliases)
|
||||
note_from_group = (m.group(3) or "").strip()
|
||||
# If we collapsed multi-word slug into kebab and there's a
|
||||
# trailing-text group too, append it.
|
||||
entry = (kind, canonical, note_from_group)
|
||||
# Combine note_from_slug (em-dash split) with note_from_group
|
||||
# (trailing text after the slug captured by the regex group).
|
||||
combined_note = (note_from_slug + " " + note_from_group).strip()
|
||||
entry = (kind, canonical, combined_note)
|
||||
if kind == "sop-n/a":
|
||||
na_directives.append(entry)
|
||||
else:
|
||||
@@ -831,7 +842,22 @@ def main(argv: list[str] | None = None) -> int:
|
||||
team_member_cache: dict[tuple[str, int], bool | None] = {}
|
||||
|
||||
def probe(slug: str, users: list[str]) -> list[str]:
|
||||
item = items_by_slug[slug]
|
||||
# Slugs can be either checklist item names (from items_by_slug) or
|
||||
# gate names (from na_gates). compute_na_state passes gate names
|
||||
# (e.g. "qa-review", "security-review") to probe, so we must look
|
||||
# them up in na_gates as a fallback.
|
||||
if slug in items_by_slug:
|
||||
item = items_by_slug[slug]
|
||||
elif slug in na_gates:
|
||||
item = na_gates[slug]
|
||||
else:
|
||||
# Unknown slug — fail closed.
|
||||
print(
|
||||
f"::warning::probe received unknown slug '{slug}' — "
|
||||
"returning no approved users (fail-closed)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return []
|
||||
team_names: list[str] = item["required_teams"]
|
||||
# Resolve names → ids. NOTE: orgs/{org}/teams/search may not be
|
||||
# available — fall back to the list endpoint.
|
||||
|
||||
@@ -118,3 +118,13 @@ def test_merge_decision_updates_stale_pr_before_merge():
|
||||
|
||||
assert decision.ready is False
|
||||
assert decision.action == "update"
|
||||
|
||||
|
||||
def test_MergePermissionError_inherits_from_ApiError():
|
||||
assert issubclass(mq.MergePermissionError, mq.ApiError)
|
||||
|
||||
|
||||
def test_MergePermissionError_message_preserved():
|
||||
exc = mq.MergePermissionError("POST /merge -> HTTP 405: User not allowed")
|
||||
assert "405" in str(exc)
|
||||
assert "User not allowed" in str(exc)
|
||||
|
||||
@@ -209,6 +209,22 @@ class TestParseDirectives(unittest.TestCase):
|
||||
d = self.parse_ack_revoke("/sop-ack Comprehensive_Testing")
|
||||
self.assertEqual(d[0][1], "comprehensive-testing")
|
||||
|
||||
def test_emdash_separator_parsed_correctly(self):
|
||||
# Em-dash (U+2014) between slug and note is common in practice.
|
||||
# /sop-ack Five-Axis — five-axis-review
|
||||
# → slug = five-axis, note = — five-axis-review
|
||||
d = self.parse_ack_revoke("/sop-ack Five-Axis — five-axis-review")
|
||||
self.assertEqual(len(d), 1)
|
||||
self.assertEqual(d[0][1], "five-axis")
|
||||
self.assertIn("five-axis-review", d[0][2])
|
||||
|
||||
def test_emdash_no_note(self):
|
||||
# Em-dash at end of slug: only slug, no note content
|
||||
d = self.parse_ack_revoke("/sop-ack Five-Axis —")
|
||||
self.assertEqual(len(d), 1)
|
||||
self.assertEqual(d[0][1], "five-axis")
|
||||
self.assertEqual(d[0][2], "—") # em-dash preserved as note
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# section_marker_present
|
||||
|
||||
@@ -138,8 +138,8 @@ n/a_gates:
|
||||
must post /sop-n/a qa-review to activate.
|
||||
|
||||
security-review:
|
||||
required_teams: [security, managers, ceo]
|
||||
required_teams: [security, managers, ceo, Owners]
|
||||
description: >-
|
||||
Security review N/A when this change has no security surface
|
||||
(docs-only, pure-frontend, dependency-only). A security/owners
|
||||
(docs-only, pure-frontend, dependency-only). A security/managers/ceo/owners
|
||||
member must post /sop-n/a security-review to activate.
|
||||
|
||||
@@ -70,7 +70,7 @@ name: sop-checklist
|
||||
# Cancel any in-progress runs for the same PR to prevent
|
||||
# stale runs from overwriting newer status contexts.
|
||||
concurrency:
|
||||
group: ${{ github.repository }}-${{ github.workflow }}-${{ github.event.pull_request.number || github.event.issue.number || github.ref }}
|
||||
group: ${{ github.repository }}-${{ github.event.pull_request.number }}
|
||||
cancel-in-progress: true
|
||||
|
||||
# bp-required: yes ← emits sop-checklist / all-items-acked (pull_request)
|
||||
@@ -110,9 +110,9 @@ jobs:
|
||||
# For pull_request_target, the default branch is the trust
|
||||
# anchor. For issue_comment the PR base may differ from the
|
||||
# default branch (PR targeting `staging`), so we use the
|
||||
# default-branch ref explicitly — same approach as
|
||||
# base-branch ref explicitly — same approach as
|
||||
# qa-review.yml so the script source is always trusted.
|
||||
ref: ${{ github.event.repository.default_branch }}
|
||||
ref: ${{ github.event.pull_request.base.ref }}
|
||||
|
||||
- name: Run sop-checklist
|
||||
env:
|
||||
|
||||
@@ -176,6 +176,10 @@ func TestResolveAgentURLForRestartSignal_CacheMiss(t *testing.T) {
|
||||
// TestGracefulPreRestart_Success verifies that when the workspace returns 200,
|
||||
// the signal is logged as acknowledged without error.
|
||||
func TestGracefulPreRestart_Success(t *testing.T) {
|
||||
hWrapper := &resolveURLTestWrapper{
|
||||
WorkspaceHandler: newHandlerWithTestDeps(t),
|
||||
testURL: "http://fake-agent.example/agent",
|
||||
}
|
||||
_ = setupTestDB(t)
|
||||
|
||||
// httptest server simulating the workspace container's /signals/restart_pending
|
||||
@@ -205,18 +209,15 @@ func TestGracefulPreRestart_Success(t *testing.T) {
|
||||
})
|
||||
}))
|
||||
defer srv.Close()
|
||||
hWrapper.testURL = srv.URL + "/agent"
|
||||
|
||||
// Pre-populate Redis cache with the test server URL
|
||||
_ = setupTestRedisWithURL(t, srv.URL)
|
||||
|
||||
// Use a wrapper so gracefulPreRestart runs through the embedded handler.
|
||||
hWrapper := &resolveURLTestWrapper{
|
||||
WorkspaceHandler: newHandlerWithTestDeps(t),
|
||||
testURL: srv.URL + "/agent",
|
||||
}
|
||||
// gracefulPreRestart runs in a goroutine; wait for it before db.DB is restored.
|
||||
// Must be registered AFTER setupTestDB (LIFO: async wait → db.DB restore).
|
||||
waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)
|
||||
|
||||
// gracefulPreRestart runs in a goroutine with its own timeout.
|
||||
// We give it time to complete before the test ends.
|
||||
hWrapper.gracefulPreRestart(context.Background(), "ws-ack-789")
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
}
|
||||
@@ -224,19 +225,22 @@ func TestGracefulPreRestart_Success(t *testing.T) {
|
||||
// TestGracefulPreRestart_NotImplemented verifies that when the workspace returns
|
||||
// 404 (old SDK version), the platform proceeds gracefully (log + no error).
|
||||
func TestGracefulPreRestart_NotImplemented(t *testing.T) {
|
||||
hWrapper := &resolveURLTestWrapper{
|
||||
WorkspaceHandler: newHandlerWithTestDeps(t),
|
||||
testURL: "http://fake-agent.example/agent",
|
||||
}
|
||||
_ = setupTestDB(t)
|
||||
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}))
|
||||
defer srv.Close()
|
||||
hWrapper.testURL = srv.URL + "/agent"
|
||||
|
||||
_ = setupTestRedisWithURL(t, srv.URL)
|
||||
|
||||
hWrapper := &resolveURLTestWrapper{
|
||||
WorkspaceHandler: newHandlerWithTestDeps(t),
|
||||
testURL: srv.URL + "/agent",
|
||||
}
|
||||
// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
|
||||
waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)
|
||||
|
||||
hWrapper.gracefulPreRestart(context.Background(), "ws-noimpl-999")
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
@@ -246,15 +250,18 @@ func TestGracefulPreRestart_NotImplemented(t *testing.T) {
|
||||
// TestGracefulPreRestart_ConnectionRefused verifies that when the workspace
|
||||
// is unreachable, the platform proceeds gracefully without error.
|
||||
func TestGracefulPreRestart_ConnectionRefused(t *testing.T) {
|
||||
_ = setupTestDB(t)
|
||||
|
||||
mr := setupTestRedisWithURL(t, "http://localhost:19999/agent") // nothing listening on 19999
|
||||
_ = mr
|
||||
|
||||
hWrapper := &resolveURLTestWrapper{
|
||||
WorkspaceHandler: newHandlerWithTestDeps(t),
|
||||
testURL: "http://localhost:19999/agent",
|
||||
}
|
||||
_ = setupTestDB(t)
|
||||
|
||||
// Nothing listening on 19999 — deliberate connection failure.
|
||||
mr := setupTestRedisWithURL(t, "http://localhost:19999/agent")
|
||||
_ = mr
|
||||
|
||||
// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
|
||||
waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)
|
||||
|
||||
hWrapper.gracefulPreRestart(context.Background(), "ws-unreachable-000")
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
@@ -264,14 +271,17 @@ func TestGracefulPreRestart_ConnectionRefused(t *testing.T) {
|
||||
// TestGracefulPreRestart_URLResolutionError verifies that when URL resolution
|
||||
// fails, the platform proceeds gracefully without blocking the restart.
|
||||
func TestGracefulPreRestart_URLResolutionError(t *testing.T) {
|
||||
_ = setupTestDB(t)
|
||||
_ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal
|
||||
|
||||
hWrapper := &resolveURLTestWrapper{
|
||||
WorkspaceHandler: newHandlerWithTestDeps(t),
|
||||
errToReturn: context.DeadlineExceeded,
|
||||
}
|
||||
// Register async wait FIRST so LIFO order is: db restore → Redis → async wait.
|
||||
// This ensures goroutines (which access both DB and Redis) complete before
|
||||
// any cleanup fires. setupTestRedis comes after newHandlerWithTestDeps
|
||||
// so the handler holds the correct Redis client reference.
|
||||
waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)
|
||||
_ = setupTestDB(t)
|
||||
_ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal
|
||||
|
||||
hWrapper.gracefulPreRestart(context.Background(), "ws-url-err-111")
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
|
||||
@@ -610,7 +610,10 @@ func (h *WorkspaceHandler) runRestartCycle(workspaceID string) {
|
||||
h.provisionWorkspaceAutoSync(workspaceID, "", nil, payload)
|
||||
// sendRestartContext is a one-way notification to the new container; safe
|
||||
// to fire async — the next restart cycle won't depend on it completing.
|
||||
go h.sendRestartContext(workspaceID, restartData)
|
||||
// Tracked via h.goAsync so tests can wait for it via h.asyncWG before
|
||||
// closing the sqlmock. Without this, untracked goroutines hit the restored
|
||||
// mock and cause "was not expected" errors in parallel CI execution (mc#1264).
|
||||
h.goAsync(func() { h.sendRestartContext(workspaceID, restartData) })
|
||||
}
|
||||
|
||||
// Pause handles POST /workspaces/:id/pause
|
||||
|
||||
Reference in New Issue
Block a user