Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d183dfdb73 |
@@ -185,12 +185,7 @@ def choose_next_queued_issue(
|
||||
if "pull_request" not in issue:
|
||||
continue
|
||||
candidates.append(issue)
|
||||
# Sort ascending: oldest first. Null created_at sorts LAST (not first) by
|
||||
# using \xff as a sort key above any ISO timestamp. Prevents PRs with
|
||||
# missing timestamps from jumping the queue ahead of older PRs (mc#1099
|
||||
# follow-up: null created_at was sorting as "" which is < any real date).
|
||||
_MAX_KEY = "\xff" * 30
|
||||
candidates.sort(key=lambda issue: (issue.get("created_at") or _MAX_KEY, int(issue["number"])))
|
||||
candidates.sort(key=lambda issue: (issue.get("created_at") or "", int(issue["number"])))
|
||||
return candidates[0] if candidates else None
|
||||
|
||||
|
||||
@@ -283,17 +278,13 @@ def get_combined_status(sha: str) -> dict:
|
||||
|
||||
|
||||
def list_queued_issues() -> list[dict]:
|
||||
# NOTE: Gitea 1.22.6 uses `label` (singular), not `labels` (plural).
|
||||
# Using `labels=merge-queue` returns 0 results even when PRs carry that
|
||||
# label. `label=merge-queue` correctly returns matching issues (mc#1099
|
||||
# follow-up: queue appeared empty because of this API parameter bug).
|
||||
_, body = api(
|
||||
"GET",
|
||||
f"/repos/{OWNER}/{NAME}/issues",
|
||||
query={
|
||||
"state": "open",
|
||||
"type": "pulls",
|
||||
"label": QUEUE_LABEL,
|
||||
"labels": QUEUE_LABEL,
|
||||
"limit": "50",
|
||||
},
|
||||
)
|
||||
|
||||
@@ -11,12 +11,19 @@ from __future__ import annotations
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import socket # mc#1234: set default timeout to prevent indefinite hangs
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from urllib.parse import quote
|
||||
|
||||
# Prevent HTTP hangs (e.g. Gitea commit-status API going slow). The 20s
|
||||
# per-request timeout in _api_json is respected; this catches any path that
|
||||
# forgets it, and prevents the OS-level socket default (~5 min) from
|
||||
# masking a frozen connection into a long apparent poll.
|
||||
socket.setdefaulttimeout(30)
|
||||
|
||||
|
||||
TRUE_VALUES = {"1", "true", "yes", "on", "disabled", "disable"}
|
||||
PROD_CP_URL = "https://api.moleculesai.app"
|
||||
@@ -25,9 +32,12 @@ DEFAULT_REQUIRED_CONTEXTS = [
|
||||
"CI / Canvas (Next.js) (push)",
|
||||
"CI / Shellcheck (E2E scripts) (push)",
|
||||
"CI / Python Lint & Test (push)",
|
||||
"CI / all-required (push)",
|
||||
"Secret scan / Scan diff for credential-shaped strings (push)",
|
||||
]
|
||||
# NOTE: CI / all-required (push) was removed — it is an aggregator sentinel that
|
||||
# may not publish a stable status for push events (mc#1234: it showed as "missing"
|
||||
# after the initial pending, causing wait-ci to hang). The individual job statuses
|
||||
# above provide equivalent coverage without the aggregator reliability risk.
|
||||
TERMINAL_FAILURE_STATES = {"failure", "error", "cancelled", "canceled", "skipped"}
|
||||
|
||||
|
||||
@@ -131,7 +141,7 @@ def required_contexts(env: dict[str, str]) -> list[str]:
|
||||
def _api_json(url: str, token: str) -> dict:
|
||||
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=20) as resp:
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
return json.loads(resp.read())
|
||||
except urllib.error.HTTPError as exc:
|
||||
body = exc.read().decode("utf-8", errors="replace")[:500]
|
||||
@@ -141,7 +151,7 @@ def _api_json(url: str, token: str) -> dict:
|
||||
def _api_json_optional(url: str, token: str) -> tuple[int, dict | None]:
|
||||
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=20) as resp:
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
return resp.status, json.loads(resp.read())
|
||||
except urllib.error.HTTPError as exc:
|
||||
if exc.code == 404:
|
||||
|
||||
@@ -206,17 +206,7 @@ def section_marker_present(body: str, marker: str) -> bool:
|
||||
next_line_end = len(body)
|
||||
next_line = body[line_end + 1:next_line_end]
|
||||
stripped_next = re.sub(r"[\s\*:\-\[\]]+", "", next_line)
|
||||
if stripped_next:
|
||||
return True
|
||||
# Last resort: the marker may appear mid-sentence (e.g.
|
||||
# **Memory/saved-feedback consulted**: No applicable...).
|
||||
# The checkbox is on the PRECEDING line. Search backward from
|
||||
# the marker for the checkbox pattern.
|
||||
# mc#1099 follow-up: memory-consulted detection was failing because
|
||||
# the checkbox was 600+ chars before the inline marker text.
|
||||
_CHECKBOX_RE = re.compile(r"- \[[ x\]]|<input", re.IGNORECASE)
|
||||
before = body[max(0, idx - 2000):idx]
|
||||
return bool(_CHECKBOX_RE.search(before))
|
||||
return bool(stripped_next)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
+14
-76
@@ -1,10 +1,3 @@
|
||||
# mc#1099 cold-runner fix: step-level timeouts on go mod download (3m) and
|
||||
# go build (5m) prevent cold runner hangs when proxy.golang.org is unreachable.
|
||||
# golangci-lint install has connectivity test + continue-on-error: true fallback.
|
||||
# go test step: 60m timeout, -p 1 flag for reduced memory pressure on cold disk.
|
||||
# all-required polling deadline raised to 50m (from 40m) + job timeout 55m (from
|
||||
# 45m) to accommodate Shellcheck delays when runner pool is recovering.
|
||||
# Queue cron reliability: ensure merge-queue workflow dispatches every 5 min.
|
||||
# Ported from .github/workflows/ci.yml on 2026-05-11 per RFC internal#219 §1.
|
||||
# continue-on-error: true on every job; follow-up PR will flip required after
|
||||
# surfaced bugs are fixed (per RFC §1 — "surface broken workflows without
|
||||
@@ -152,10 +145,10 @@ jobs:
|
||||
# the diagnostic step with its own continue-on-error: true (line 203).
|
||||
# Flip confirmed by CI / Platform (Go) status = success on main HEAD 363905d3.
|
||||
continue-on-error: false
|
||||
# mc#1099: cold runner needs ~45m for go test on cold disk I/O.
|
||||
# Job-level ceiling: go test 60m step + golangci-lint 45m step = 105m max.
|
||||
# Backstop: 120m.
|
||||
timeout-minutes: 120
|
||||
# Job-level ceiling. The go test step below runs with a per-step 10m timeout;
|
||||
# this cap catches any step that leaks past that. Set well above 10m so
|
||||
# the per-step timeout is the active constraint.
|
||||
timeout-minutes: 15
|
||||
defaults:
|
||||
run:
|
||||
working-directory: workspace-server
|
||||
@@ -170,69 +163,18 @@ jobs:
|
||||
with:
|
||||
go-version: 'stable'
|
||||
- if: always()
|
||||
name: Download Go module cache
|
||||
# mc#1099: cold runner cannot reach proxy.golang.org. Without a
|
||||
# step-level timeout this step hangs for 6+ minutes (30s × 2 curl
|
||||
# timeouts × 1 module proxy) before failing. 3-minute ceiling ensures
|
||||
# the job fails fast on a cold runner so the step-level
|
||||
# continue-on-error can be evaluated, rather than stalling the job.
|
||||
timeout-minutes: 3
|
||||
run: |
|
||||
set +e
|
||||
go mod download
|
||||
exit_code=$?
|
||||
if [ $exit_code -ne 0 ]; then
|
||||
echo "go mod download failed (exit $exit_code) — cold runner cannot reach module proxy"
|
||||
echo "Continuing anyway (continue-on-error: true on this step)"
|
||||
fi
|
||||
run: go mod download
|
||||
- if: always()
|
||||
name: Build server
|
||||
timeout-minutes: 5
|
||||
run: go build ./cmd/server
|
||||
# CLI (molecli) moved to standalone repo: git.moleculesai.app/molecule-ai/molecule-cli
|
||||
- if: always()
|
||||
run: go vet ./...
|
||||
- if: always()
|
||||
name: Install golangci-lint
|
||||
# mc#1099: cold runner cannot reach github.com releases or proxy.golang.org
|
||||
# (hanging at ~5-6m before timing out). Test connectivity first; if
|
||||
# both sources fail, skip golangci-lint and rely on go vet.
|
||||
# continue-on-error: true prevents install failure from failing the job
|
||||
# (job-level continue-on-error: false).
|
||||
continue-on-error: true
|
||||
run: |
|
||||
set +e
|
||||
# Test proxy.golang.org connectivity (30s timeout)
|
||||
if curl -fsSL --connect-timeout 30 --max-time 60 "https://proxy.golang.org/github.com/golangci/golangci-lint/@v/list" -o /dev/null 2>/dev/null; then
|
||||
echo "proxy.golang.org reachable, installing via go install..."
|
||||
go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.64.5
|
||||
echo "go install exit: $?"
|
||||
else
|
||||
echo "proxy.golang.org unreachable, trying GitHub releases..."
|
||||
ARCH=$(go env GOARCH) && OS=$(go env GOOS) && VERSION=1.64.5
|
||||
if curl -fsSL --connect-timeout 30 --max-time 120 "https://github.com/golangci/golangci-lint/releases/download/v${VERSION}/golangci-lint-${VERSION}-${OS}-${ARCH}.tar.gz" -o /tmp/golangci-lint.tar.gz 2>/dev/null; then
|
||||
tar -xzf /tmp/golangci-lint.tar.gz -C /tmp
|
||||
install -m 755 /tmp/golangci-lint $(go env GOPATH)/bin/golangci-lint
|
||||
echo "GitHub binary installed"
|
||||
else
|
||||
echo "GitHub releases also unreachable — skipping golangci-lint (go vet is the safety net)"
|
||||
touch "$(go env GOPATH)/bin/golangci-lint.skip"
|
||||
fi
|
||||
fi
|
||||
run: go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.12.2
|
||||
- if: always()
|
||||
name: Run golangci-lint
|
||||
# mc#1099: skip if binary unavailable; go vet already ran as safety net.
|
||||
# timeout: 45m — cold runner disk I/O makes linting slow. The command
|
||||
# --timeout 60m prevents a runaway linter from stalling the step.
|
||||
# continue-on-error: true so a missing binary doesn't fail the job.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 45
|
||||
run: |
|
||||
if [ -f "$(go env GOPATH)/bin/golangci-lint.skip" ]; then
|
||||
echo "golangci-lint skipped (network unavailable on cold runner)"
|
||||
else
|
||||
golangci-lint run --config golangci-coldrunner.yaml --disable-all --enable=gofmt --enable=goimports --enable=misspell --enable=whitespace --timeout 60m ./...
|
||||
fi
|
||||
run: $(go env GOPATH)/bin/golangci-lint run --timeout 3m ./...
|
||||
- if: always()
|
||||
name: Diagnostic — per-package verbose 60s
|
||||
run: |
|
||||
@@ -251,15 +193,11 @@ jobs:
|
||||
continue-on-error: true
|
||||
- if: always()
|
||||
name: Run tests with race detection and coverage
|
||||
# mc#1099: cold runner cache causes OOM kills at ~22m (slower disk I/O
|
||||
# than GitHub Actions). A 60m per-step timeout lets the suite complete
|
||||
# on cold cache (~45m) while failing cleanly instead of OOM-killing.
|
||||
# Warm runners finish in ~12m. Retry with -p 1 on OOM. Job-level
|
||||
# timeout (120m) is the backstop.
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
go test -race -timeout 60m -coverprofile=coverage.out ./... \
|
||||
|| go test -race -timeout 60m -coverprofile=coverage.out -p 1 ./...
|
||||
# Explicit timeout: cold runner cache causes OOM kills at ~4m39s on the
|
||||
# full ./... suite with race detection + coverage. A 10m per-step timeout
|
||||
# lets the suite complete on cold cache (~5-7m) while failing cleanly
|
||||
# instead of OOM-killing. The job-level timeout (15m) is a backstop.
|
||||
run: go test -race -timeout 10m -coverprofile=coverage.out ./...
|
||||
|
||||
- if: always()
|
||||
name: Per-file coverage report
|
||||
@@ -621,7 +559,7 @@ jobs:
|
||||
#
|
||||
continue-on-error: false
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 55
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- name: Wait for required CI contexts
|
||||
env:
|
||||
@@ -653,7 +591,7 @@ jobs:
|
||||
f"CI / Python Lint & Test ({event})",
|
||||
]
|
||||
terminal_bad = {"failure", "error"}
|
||||
deadline = time.time() + 50 * 60
|
||||
deadline = time.time() + 40 * 60
|
||||
last_summary = None
|
||||
|
||||
def fetch_statuses():
|
||||
|
||||
@@ -176,7 +176,7 @@ export function deriveProvidersFromModels(models: ModelSpec[]): string[] {
|
||||
// exactly the point of the platform adaptor. The deep `~/.hermes/
|
||||
// config.yaml` on the container is a separate runtime-internal file,
|
||||
// not this one.
|
||||
const RUNTIMES_WITH_OWN_CONFIG = new Set<string>(["external", "kimi", "kimi-cli", "openclaw"]);
|
||||
const RUNTIMES_WITH_OWN_CONFIG = new Set<string>(["external", "kimi", "kimi-cli"]);
|
||||
|
||||
const FALLBACK_RUNTIME_OPTIONS: RuntimeOption[] = [
|
||||
{ value: "", label: "LangGraph (default)", models: [], providers: [] },
|
||||
|
||||
+8
-12
@@ -8,18 +8,14 @@ import { getTenantSlug } from "./tenant";
|
||||
export const PLATFORM_URL =
|
||||
process.env.NEXT_PUBLIC_PLATFORM_URL ?? "http://localhost:8080";
|
||||
|
||||
// 35s is long enough for the slowest server-side path (EIC SSH
|
||||
// tunnel for tenant EC2 file operations, bounded server-side by
|
||||
// `eicFileOpTimeout = 30 * time.Second` in
|
||||
// workspace-server/internal/handlers/template_files_eic.go) so the
|
||||
// canvas surfaces the server's real error instead of aborting first
|
||||
// with a generic timeout. Shorter values caused "Save & Restart" to
|
||||
// time out at the client before the backend returned its 5xx. The
|
||||
// abort still propagates through AbortController so React components
|
||||
// can render a retry affordance. Callers that know an endpoint is
|
||||
// intentionally slow (org import walks a tree of workspaces with
|
||||
// server-side pacing) can pass `timeoutMs` to override.
|
||||
const DEFAULT_TIMEOUT_MS = 35_000;
|
||||
// 15s is long enough for slow CP queries but short enough that a
|
||||
// hung backend doesn't leave the UI spinning forever. The abort
|
||||
// propagates through AbortController so React components can observe
|
||||
// the error and render a retry affordance. Callers that know the
|
||||
// endpoint is intentionally slow (org import walks a tree of
|
||||
// workspaces with server-side pacing) can pass `timeoutMs` to
|
||||
// override.
|
||||
const DEFAULT_TIMEOUT_MS = 15_000;
|
||||
|
||||
export interface RequestOptions {
|
||||
timeoutMs?: number;
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
# golangci-lint configuration for CI cold-runner use.
|
||||
# CLI flags --disable-all --enable=... take precedence over this file.
|
||||
# Only errcheck is disabled here to match .golangci.yaml defaults.
|
||||
linters:
|
||||
disable:
|
||||
- errcheck
|
||||
Reference in New Issue
Block a user