Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 738e54593c | |||
| b331747f1c | |||
| 03e7a2d8a5 | |||
| f3b01ceefb | |||
| eee83dfb94 | |||
| 381c710f8a | |||
| 06af0bbeb3 | |||
| b8ccd21c8c | |||
| 290773ecbc |
@@ -0,0 +1,113 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Lint workflow bash for curl status-code capture pollution.
|
||||
|
||||
The bad shape is:
|
||||
|
||||
HTTP_CODE=$(curl ... -w '%{http_code}' ... || echo "000")
|
||||
|
||||
`curl -w` writes the HTTP code to stdout before returning non-zero, so
|
||||
fallback output inside the same command substitution appends another code.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import NamedTuple
|
||||
|
||||
|
||||
SELF = ".gitea/workflows/lint-curl-status-capture.yml"
|
||||
|
||||
|
||||
class Finding(NamedTuple):
|
||||
path: str
|
||||
snippet: str
|
||||
|
||||
|
||||
BAD_STATUS_CAPTURE = re.compile(
|
||||
r"""
|
||||
\$\(\s*
|
||||
curl\b
|
||||
[^)]*
|
||||
-w\s*['"]%\{http_code\}['"]
|
||||
[^)]*
|
||||
\|\|\s*
|
||||
(?:
|
||||
echo\s+['"]?000['"]?
|
||||
|
|
||||
printf\s+['"]000['"]
|
||||
)
|
||||
\s*\)
|
||||
""",
|
||||
re.DOTALL | re.VERBOSE,
|
||||
)
|
||||
|
||||
|
||||
def _logical_shell(content: str) -> str:
|
||||
"""Collapse bash line continuations so one curl command is one string."""
|
||||
return re.sub(r"\\\s*\n\s*", " ", content)
|
||||
|
||||
|
||||
def scan_content(path: str, content: str) -> list[Finding]:
|
||||
flat = _logical_shell(content)
|
||||
return [
|
||||
Finding(path=path, snippet=re.sub(r"\s+", " ", match.group(0)).strip()[:160])
|
||||
for match in BAD_STATUS_CAPTURE.finditer(flat)
|
||||
]
|
||||
|
||||
|
||||
def scan_paths(paths: list[str]) -> list[Finding]:
|
||||
findings: list[Finding] = []
|
||||
for path in paths:
|
||||
if path == SELF:
|
||||
continue
|
||||
content = Path(path).read_text(encoding="utf-8")
|
||||
findings.extend(scan_content(path, content))
|
||||
return findings
|
||||
|
||||
|
||||
def default_paths() -> list[str]:
|
||||
return sorted(glob.glob(".gitea/workflows/*.yml"))
|
||||
|
||||
|
||||
def print_report(findings: list[Finding]) -> None:
|
||||
if not findings:
|
||||
print("OK No curl-status-capture pollution patterns detected")
|
||||
return
|
||||
|
||||
print(f"::error::Found {len(findings)} curl-status-capture pollution site(s):")
|
||||
for finding in findings:
|
||||
print(
|
||||
f"::error file={finding.path}::Curl status-capture pollution: "
|
||||
"'|| echo/printf 000' inside a $(curl ... -w '%{http_code}' ...) "
|
||||
"subshell. On non-2xx or connection failure, curl's -w writes a "
|
||||
"status, then exits non-zero, then the fallback appends another "
|
||||
"status. Fix: route -w into a tempfile so the exit code cannot "
|
||||
"pollute stdout."
|
||||
)
|
||||
print(f" matched: {finding.snippet}...")
|
||||
|
||||
print()
|
||||
print("Fix template:")
|
||||
print(" set +e")
|
||||
print(" curl ... -w '%{http_code}' >code.txt 2>/dev/null")
|
||||
print(" set -e")
|
||||
print(' HTTP_CODE=$(cat code.txt 2>/dev/null)')
|
||||
print(' [ -z "$HTTP_CODE" ] && HTTP_CODE="000"')
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("paths", nargs="*", help="workflow files to scan")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
paths = args.paths or default_paths()
|
||||
findings = scan_paths(paths)
|
||||
print_report(findings)
|
||||
return 1 if findings else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -169,10 +169,10 @@ jobs:
|
||||
run: go build ./cmd/server
|
||||
# CLI (molecli) moved to standalone repo: git.moleculesai.app/molecule-ai/molecule-cli
|
||||
- if: needs.changes.outputs.platform == 'true'
|
||||
run: go vet ./... || true
|
||||
run: go vet ./...
|
||||
- if: needs.changes.outputs.platform == 'true'
|
||||
name: Run golangci-lint
|
||||
run: golangci-lint run --timeout 3m ./... || true
|
||||
run: golangci-lint run --timeout 3m ./...
|
||||
- if: needs.changes.outputs.platform == 'true'
|
||||
name: Diagnostic — per-package verbose 60s
|
||||
run: |
|
||||
|
||||
@@ -30,10 +30,16 @@ name: Lint curl status-code capture
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths: ['.gitea/workflows/**']
|
||||
paths:
|
||||
- '.gitea/workflows/**'
|
||||
- '.gitea/scripts/lint-curl-status-capture.py'
|
||||
- 'tests/test_lint_curl_status_capture.py'
|
||||
push:
|
||||
branches: [main, staging]
|
||||
paths: ['.gitea/workflows/**']
|
||||
paths:
|
||||
- '.gitea/workflows/**'
|
||||
- '.gitea/scripts/lint-curl-status-capture.py'
|
||||
- 'tests/test_lint_curl_status_capture.py'
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
@@ -51,55 +57,4 @@ jobs:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- name: Find curl ... -w '%{http_code}' ... || echo "000" subshells
|
||||
run: |
|
||||
set -uo pipefail
|
||||
# Multi-line aware: look for `$(curl ... -w '%{http_code}' ... || echo "000")`
|
||||
# subshell where the entire command-substitution wraps a curl that
|
||||
# ends with `|| echo "000"`. Must distinguish from the SAFE shape
|
||||
# `$(cat tempfile 2>/dev/null || echo "000")` — `cat` with a missing
|
||||
# tempfile produces empty stdout, no pollution.
|
||||
python3 <<'PY'
|
||||
import os, re, sys, glob
|
||||
|
||||
BAD_FILES = []
|
||||
|
||||
# Match the buggy substitution across newlines: $(curl ... -w '%{http_code}' ... || echo "000")
|
||||
# The `\\n` is the bash line-continuation that lets curl flags span lines.
|
||||
# We collapse continuation lines first, then look for the single-line bad pattern.
|
||||
PATTERN = re.compile(
|
||||
r'\$\(\s*curl\b[^)]*-w\s*[\'"]%\{http_code\}[\'"][^)]*\|\|\s*echo\s+"000"\s*\)',
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
# Self-skip: this lint workflow contains the literal anti-pattern in
|
||||
# its own docstring — that's intentional, not a bug.
|
||||
SELF = ".gitea/workflows/lint-curl-status-capture.yml"
|
||||
|
||||
for f in sorted(glob.glob(".gitea/workflows/*.yml")):
|
||||
if f == SELF:
|
||||
continue
|
||||
with open(f) as fh:
|
||||
content = fh.read()
|
||||
# Collapse bash line-continuations (\\\n + leading whitespace)
|
||||
# into a single logical line so the regex can see the full
|
||||
# curl invocation as one chunk.
|
||||
flat = re.sub(r'\\\s*\n\s*', ' ', content)
|
||||
for m in PATTERN.finditer(flat):
|
||||
BAD_FILES.append((f, m.group(0)[:120]))
|
||||
|
||||
if not BAD_FILES:
|
||||
print("OK No curl-status-capture pollution patterns detected")
|
||||
sys.exit(0)
|
||||
|
||||
print(f"::error::Found {len(BAD_FILES)} curl-status-capture pollution site(s):")
|
||||
for f, snippet in BAD_FILES:
|
||||
print(f"::error file={f}::Curl status-capture pollution: '|| echo \"000\"' inside a $(curl ... -w '%{{http_code}}' ...) subshell. On non-2xx or connection failure, curl's -w writes a status, then exits non-zero, then the || echo appends another '000' — producing 'HTTP 000000' or '409000' that fails comparisons silently. Fix: route -w into a tempfile so the exit code can't pollute stdout. See memory feedback_curl_status_capture_pollution.md.")
|
||||
print(f" matched: {snippet}...")
|
||||
print()
|
||||
print("Fix template:")
|
||||
print(' set +e')
|
||||
print(' curl ... -w \'%{http_code}\' >code.txt 2>/dev/null')
|
||||
print(' set -e')
|
||||
print(' HTTP_CODE=$(cat code.txt 2>/dev/null)')
|
||||
print(' [ -z "$HTTP_CODE" ] && HTTP_CODE="000"')
|
||||
sys.exit(1)
|
||||
PY
|
||||
python3 .gitea/scripts/lint-curl-status-capture.py
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
"""Tests for `.gitea/scripts/lint-curl-status-capture.py`.
|
||||
|
||||
Run:
|
||||
python3 -m pytest tests/test_lint_curl_status_capture.py -v
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
SCRIPT_PATH = (
|
||||
Path(__file__).resolve().parent.parent
|
||||
/ ".gitea"
|
||||
/ "scripts"
|
||||
/ "lint-curl-status-capture.py"
|
||||
)
|
||||
|
||||
|
||||
def _load_module():
|
||||
spec = importlib.util.spec_from_file_location("lint_curl_status_capture", SCRIPT_PATH)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def test_finds_quoted_echo_fallback_pollution():
|
||||
lint = _load_module()
|
||||
content = """
|
||||
HTTP_CODE=$(curl -sS -o /tmp/body -w "%{http_code}" https://example.test || echo "000")
|
||||
"""
|
||||
|
||||
findings = lint.scan_content("workflow.yml", content)
|
||||
|
||||
assert len(findings) == 1
|
||||
assert "echo" in findings[0].snippet
|
||||
|
||||
|
||||
def test_finds_unquoted_echo_fallback_pollution():
|
||||
lint = _load_module()
|
||||
content = """
|
||||
HTTP_CODE=$(curl -sS -o /tmp/body -w '%{http_code}' https://example.test || echo 000)
|
||||
"""
|
||||
|
||||
findings = lint.scan_content("workflow.yml", content)
|
||||
|
||||
assert len(findings) == 1
|
||||
assert "echo" in findings[0].snippet
|
||||
|
||||
|
||||
def test_finds_printf_fallback_pollution():
|
||||
lint = _load_module()
|
||||
content = """
|
||||
HTTP_CODE=$(curl -sS -o /tmp/body -w '%{http_code}' https://example.test || printf '000')
|
||||
"""
|
||||
|
||||
findings = lint.scan_content("workflow.yml", content)
|
||||
|
||||
assert len(findings) == 1
|
||||
assert "printf" in findings[0].snippet
|
||||
|
||||
|
||||
def test_ignores_tempfile_fallback_after_curl():
|
||||
lint = _load_module()
|
||||
content = """
|
||||
set +e
|
||||
curl -sS -o /tmp/body -w '%{http_code}' https://example.test >/tmp/code
|
||||
rc=$?
|
||||
set -e
|
||||
HTTP_CODE=$(cat /tmp/code 2>/dev/null || echo "000")
|
||||
[ -z "$HTTP_CODE" ] && HTTP_CODE="000"
|
||||
"""
|
||||
|
||||
assert lint.scan_content("workflow.yml", content) == []
|
||||
|
||||
|
||||
def test_collapses_bash_line_continuations():
|
||||
lint = _load_module()
|
||||
content = """
|
||||
HTTP_CODE=$(curl -sS -o /tmp/body \\
|
||||
-w "%{http_code}" \\
|
||||
https://example.test \\
|
||||
|| echo "000")
|
||||
"""
|
||||
|
||||
findings = lint.scan_content("workflow.yml", content)
|
||||
|
||||
assert len(findings) == 1
|
||||
@@ -35,7 +35,22 @@ RUN CGO_ENABLED=0 GOOS=linux go build \
|
||||
-o /memory-plugin ./cmd/memory-plugin-postgres
|
||||
|
||||
FROM alpine:3.20@sha256:c64c687cbea9300178b30c95835354e34c4e4febc4badfe27102879de0483b5e
|
||||
RUN apk add --no-cache ca-certificates git tzdata wget
|
||||
# docker-cli is required by internal/provisioner/localbuild.go which
|
||||
# shells out via exec.Command("docker", "image", "inspect"/"build"/"tag", ...)
|
||||
# whenever Resolve().Mode == RegistryModeLocal — which is the permanent
|
||||
# mode post-2026-05-06 (Molecule-AI GitHub org suspended → GHCR
|
||||
# unreachable → MOLECULE_IMAGE_REGISTRY unset → registry_mode.go falls
|
||||
# through to RegistryModeLocal). Without docker-cli here the platform
|
||||
# fails every workspace re-provision with `local-build: image inspect
|
||||
# for molecule-local/workspace-template-<runtime>:<sha> failed
|
||||
# (exec: "docker": executable file not found in $PATH)` and the
|
||||
# workspace stays status=failed. The Docker SOCKET is already mounted
|
||||
# (entrypoint.sh adds the platform user to the docker group) — only
|
||||
# the CLI binary was missing. Caught after sdk-lead + CP-QA went down
|
||||
# this way during the MiniMax-switch attempt + after-Class-A audit.
|
||||
# Related: Task #194 / Issue #63 (local-build path added);
|
||||
# `feedback_workspace_image_ghcr_dead`.
|
||||
RUN apk add --no-cache ca-certificates docker-cli git tzdata wget
|
||||
COPY --from=builder /platform /platform
|
||||
COPY --from=builder /memory-plugin /memory-plugin
|
||||
COPY workspace-server/migrations /migrations
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"go/parser"
|
||||
"go/token"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
|
||||
@@ -71,6 +72,8 @@ func TestPreflight_ContainerRunning_ReturnsNil(t *testing.T) {
|
||||
// triggers the offline-flip + WORKSPACE_OFFLINE broadcast + async restart.
|
||||
// This is the load-bearing case — saves the caller 2-30s of network timeout.
|
||||
func TestPreflight_ContainerNotRunning_StructuredFastFail(t *testing.T) {
|
||||
const wsID = "ws-dead-456"
|
||||
resetRestartStatesFor(wsID)
|
||||
mock := setupTestDB(t)
|
||||
_ = setupTestRedis(t)
|
||||
stub := &preflightLocalProv{running: false, err: nil}
|
||||
@@ -79,14 +82,14 @@ func TestPreflight_ContainerNotRunning_StructuredFastFail(t *testing.T) {
|
||||
|
||||
// Expect the offline-flip UPDATE.
|
||||
mock.ExpectExec(`UPDATE workspaces SET status =`).
|
||||
WithArgs(models.StatusOffline, "ws-dead-456").
|
||||
WithArgs(models.StatusOffline, wsID).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
// Broadcaster's INSERT INTO structure_events fires too — best-effort
|
||||
// log entry for the WORKSPACE_OFFLINE event. Match permissively.
|
||||
mock.ExpectExec(`INSERT INTO structure_events`).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
proxyErr := h.preflightContainerHealth(context.Background(), "ws-dead-456")
|
||||
proxyErr := h.preflightContainerHealth(context.Background(), wsID)
|
||||
if proxyErr == nil {
|
||||
t.Fatal("preflight should return *proxyA2AError when container not running")
|
||||
}
|
||||
@@ -107,6 +110,32 @@ func TestPreflight_ContainerNotRunning_StructuredFastFail(t *testing.T) {
|
||||
// h.broadcaster.RecordAndBroadcast call but not asserted here — the
|
||||
// real *events.Broadcaster doesn't expose received events for inspection.
|
||||
// The DB UPDATE expectation is sufficient to pin the offline-flip path.
|
||||
waitRestartByIDGoroutineIdle(t, wsID)
|
||||
}
|
||||
|
||||
func waitRestartByIDGoroutineIdle(t *testing.T, wsID string) {
|
||||
t.Helper()
|
||||
deadline := time.Now().Add(2 * time.Second)
|
||||
sawState := false
|
||||
for time.Now().Before(deadline) {
|
||||
sv, ok := restartStates.Load(wsID)
|
||||
if ok {
|
||||
sawState = true
|
||||
st := sv.(*restartState)
|
||||
st.mu.Lock()
|
||||
running := st.running
|
||||
st.mu.Unlock()
|
||||
if !running {
|
||||
resetRestartStatesFor(wsID)
|
||||
return
|
||||
}
|
||||
}
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
if !sawState {
|
||||
t.Fatalf("preflight did not start RestartByID goroutine for %s", wsID)
|
||||
}
|
||||
t.Fatalf("RestartByID goroutine for %s did not drain before test cleanup", wsID)
|
||||
}
|
||||
|
||||
// TestPreflight_TransientError_FailsSoftAsAlive — IsRunning(true,err): the
|
||||
|
||||
Reference in New Issue
Block a user