mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 05:51:15 +08:00
fix(e2e): stop interrupted docker builds
This commit is contained in:
@@ -65,6 +65,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Release/CI/E2E: reject oversized ClickClack fixture request bodies before release journey smokes can accumulate unbounded payloads.
|
||||
- Release/CI/E2E: reject oversized OpenAI image-auth mock request bodies before Docker proof runs can accumulate unbounded payloads.
|
||||
- Release/CI/E2E: require the Kitchen Sink RPC walk to prove every expected plugin tool is cataloged and effective before invoking tool fixtures.
|
||||
- Release/CI/E2E: stop tracked Docker build commands when centralized build wrappers receive shutdown signals.
|
||||
- Release/CI/E2E: fail secret-provider proof runs when temporary state cleanup still fails after retries instead of hiding the cleanup error.
|
||||
- Release/CI/E2E: fail package-candidate ref proofs when temporary source worktree cleanup fails instead of leaving stale worktrees behind.
|
||||
- Release/CI/E2E: remove package tarball extract directories when tar extraction fails before validation can continue.
|
||||
|
||||
@@ -73,6 +73,15 @@ docker_build_timeout_required() {
|
||||
return 1
|
||||
}
|
||||
|
||||
docker_build_signal_exit_status() {
|
||||
case "$1" in
|
||||
129 | 130 | 143)
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
return 1
|
||||
}
|
||||
|
||||
docker_build_heartbeat_seconds() {
|
||||
local configured="${OPENCLAW_DOCKER_BUILD_HEARTBEAT_SECONDS:-30}"
|
||||
if [[ "$configured" =~ ^[0-9]+$ ]] && [ "$configured" -ge 1 ]; then
|
||||
@@ -104,11 +113,76 @@ docker_build_run_logged() {
|
||||
local started_at="$SECONDS"
|
||||
local next_heartbeat=$heartbeat_seconds
|
||||
local build_status=0
|
||||
local build_pid=""
|
||||
local previous_int_trap
|
||||
local previous_term_trap
|
||||
local previous_hup_trap
|
||||
local heartbeat_sleep_pid=""
|
||||
|
||||
previous_int_trap="$(trap -p INT || true)"
|
||||
previous_term_trap="$(trap -p TERM || true)"
|
||||
previous_hup_trap="$(trap -p HUP || true)"
|
||||
|
||||
docker_build_restore_signal_traps() {
|
||||
if [ -n "$previous_int_trap" ]; then
|
||||
eval "$previous_int_trap"
|
||||
else
|
||||
trap - INT
|
||||
fi
|
||||
if [ -n "$previous_term_trap" ]; then
|
||||
eval "$previous_term_trap"
|
||||
else
|
||||
trap - TERM
|
||||
fi
|
||||
if [ -n "$previous_hup_trap" ]; then
|
||||
eval "$previous_hup_trap"
|
||||
else
|
||||
trap - HUP
|
||||
fi
|
||||
}
|
||||
|
||||
docker_build_signal_process_tree() {
|
||||
local signal="$1"
|
||||
local process_id="$2"
|
||||
local child_pid
|
||||
if command -v pgrep >/dev/null 2>&1; then
|
||||
while IFS= read -r child_pid; do
|
||||
if [ -n "$child_pid" ]; then
|
||||
docker_build_signal_process_tree "$signal" "$child_pid"
|
||||
fi
|
||||
done < <(pgrep -P "$process_id" 2>/dev/null || true)
|
||||
fi
|
||||
kill -s "$signal" -- "-$process_id" 2>/dev/null ||
|
||||
kill -s "$signal" "$process_id" 2>/dev/null ||
|
||||
true
|
||||
}
|
||||
|
||||
docker_build_stop_tracked_build() {
|
||||
local signal="$1"
|
||||
local exit_code="$2"
|
||||
if [ -n "$heartbeat_sleep_pid" ] && kill -0 "$heartbeat_sleep_pid" 2>/dev/null; then
|
||||
kill "$heartbeat_sleep_pid" 2>/dev/null || true
|
||||
wait "$heartbeat_sleep_pid" 2>/dev/null || true
|
||||
fi
|
||||
if [ -n "$build_pid" ] && kill -0 "$build_pid" 2>/dev/null; then
|
||||
docker_build_signal_process_tree "$signal" "$build_pid"
|
||||
wait "$build_pid" 2>/dev/null || true
|
||||
fi
|
||||
docker_build_restore_signal_traps
|
||||
return "$exit_code"
|
||||
}
|
||||
|
||||
trap 'docker_build_stop_tracked_build TERM 130; return 130' INT
|
||||
trap 'docker_build_stop_tracked_build TERM 143; return 143' TERM
|
||||
trap 'docker_build_stop_tracked_build HUP 129; return 129' HUP
|
||||
|
||||
docker_build_run_command "$timeout_value" "$@" >"$log_file" 2>&1 &
|
||||
local build_pid="$!"
|
||||
build_pid="$!"
|
||||
while kill -0 "$build_pid" 2>/dev/null; do
|
||||
/bin/sleep 1
|
||||
/bin/sleep 1 &
|
||||
heartbeat_sleep_pid="$!"
|
||||
wait "$heartbeat_sleep_pid" 2>/dev/null || true
|
||||
heartbeat_sleep_pid=""
|
||||
local elapsed_seconds=$((SECONDS - started_at))
|
||||
if [ "$elapsed_seconds" -ge "$next_heartbeat" ] && kill -0 "$build_pid" 2>/dev/null; then
|
||||
local log_bytes="0"
|
||||
@@ -122,6 +196,7 @@ docker_build_run_logged() {
|
||||
done
|
||||
|
||||
wait "$build_pid" || build_status="$?"
|
||||
docker_build_restore_signal_traps
|
||||
return "$build_status"
|
||||
}
|
||||
|
||||
@@ -134,6 +209,7 @@ docker_build_with_retries() {
|
||||
local max_attempts=$((retries + 1))
|
||||
local log_file
|
||||
local command=()
|
||||
local build_status=0
|
||||
while IFS= read -r -d '' part; do
|
||||
command+=("$part")
|
||||
done < <(docker_build_command "$@")
|
||||
@@ -144,6 +220,13 @@ docker_build_with_retries() {
|
||||
if docker_build_run_logged "$label" "$timeout_value" "$log_file" "${command[@]}"; then
|
||||
rm -f "$log_file"
|
||||
return 0
|
||||
else
|
||||
build_status="$?"
|
||||
fi
|
||||
|
||||
if docker_build_signal_exit_status "$build_status"; then
|
||||
rm -f "$log_file"
|
||||
return "$build_status"
|
||||
fi
|
||||
|
||||
if [ "$attempt" -ge "$max_attempts" ] || ! docker_build_transient_failure "$log_file"; then
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { execFileSync, spawn } from "node:child_process";
|
||||
import {
|
||||
chmodSync,
|
||||
existsSync,
|
||||
mkdtempSync,
|
||||
mkdirSync,
|
||||
readdirSync,
|
||||
@@ -10,6 +11,7 @@ import {
|
||||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { setTimeout as delay } from "node:timers/promises";
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
const HELPER_PATH = "scripts/lib/docker-build.sh";
|
||||
@@ -293,6 +295,105 @@ output="$(docker_build_run e2e-build -t demo-image .)"
|
||||
}
|
||||
});
|
||||
|
||||
it("stops the tracked build command without retrying when interrupted", async () => {
|
||||
const workDir = mkdtempSync(join(tmpdir(), "openclaw-docker-build-signal-"));
|
||||
|
||||
try {
|
||||
const binDir = join(workDir, "bin");
|
||||
mkdirSync(binDir);
|
||||
writeFileSync(
|
||||
join(binDir, "docker"),
|
||||
`#!/bin/bash
|
||||
set -euo pipefail
|
||||
count=0
|
||||
if [ -f "$TMPDIR/docker-count" ]; then
|
||||
count="$(<"$TMPDIR/docker-count")"
|
||||
fi
|
||||
count="$((count + 1))"
|
||||
printf '%s\\n' "$count" >"$TMPDIR/docker-count"
|
||||
printf '%s\\n' "$$" >"$TMPDIR/docker.pid"
|
||||
printf 'rpc error: code = Unavailable\\n'
|
||||
trap 'printf "term\\n" >"$TMPDIR/docker.term"; exit 0' TERM
|
||||
while true; do
|
||||
/bin/sleep 1
|
||||
done
|
||||
`,
|
||||
);
|
||||
chmodSync(join(binDir, "docker"), 0o755);
|
||||
const rootDir = process.cwd();
|
||||
writeFileSync(
|
||||
join(workDir, "runner.sh"),
|
||||
`#!/bin/bash
|
||||
set -euo pipefail
|
||||
ROOT_DIR=${shellQuote(rootDir)}
|
||||
TMPDIR=${shellQuote(workDir)}
|
||||
export ROOT_DIR TMPDIR
|
||||
export PATH="$TMPDIR/bin:$PATH"
|
||||
export OPENCLAW_DOCKER_BUILD_RETRIES=3
|
||||
source "$ROOT_DIR/scripts/lib/docker-build.sh"
|
||||
docker_build_run e2e-build -t demo-image .
|
||||
`,
|
||||
);
|
||||
chmodSync(join(workDir, "runner.sh"), 0o755);
|
||||
|
||||
const waitForFile = async (filePath: string) => {
|
||||
for (let attempt = 0; attempt < 50; attempt += 1) {
|
||||
if (existsSync(filePath)) {
|
||||
return;
|
||||
}
|
||||
await delay(100);
|
||||
}
|
||||
throw new Error(`file was not written: ${filePath}`);
|
||||
};
|
||||
const waitForExit = async (child: ReturnType<typeof spawn>) =>
|
||||
await new Promise<{ code: number | null; signal: NodeJS.Signals | null }>((resolve) => {
|
||||
child.once("exit", (code, signal) => resolve({ code, signal }));
|
||||
});
|
||||
const waitForDead = async (pid: number) => {
|
||||
for (let attempt = 0; attempt < 50; attempt += 1) {
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
await delay(100);
|
||||
}
|
||||
throw new Error(`process stayed alive: ${pid}`);
|
||||
};
|
||||
const runInterruptedBuild = async (signal: NodeJS.Signals, expectedCode: number) => {
|
||||
rmSync(join(workDir, "docker.pid"), { force: true });
|
||||
rmSync(join(workDir, "docker.term"), { force: true });
|
||||
rmSync(join(workDir, "docker-count"), { force: true });
|
||||
const runner = spawn(join(workDir, "runner.sh"), {
|
||||
env: { ...process.env, TMPDIR: workDir },
|
||||
stdio: "ignore",
|
||||
});
|
||||
try {
|
||||
const pidPath = join(workDir, "docker.pid");
|
||||
await waitForFile(pidPath);
|
||||
const buildPid = Number.parseInt(readFileSync(pidPath, "utf8"), 10);
|
||||
|
||||
runner.kill(signal);
|
||||
const exit = await waitForExit(runner);
|
||||
|
||||
expect(exit).toEqual({ code: expectedCode, signal: null });
|
||||
await waitForFile(join(workDir, "docker.term"));
|
||||
expect(readFileSync(join(workDir, "docker-count"), "utf8").trim()).toBe("1");
|
||||
await waitForDead(buildPid);
|
||||
} finally {
|
||||
if (runner.exitCode === null && runner.signalCode === null) {
|
||||
runner.kill("SIGKILL");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
await runInterruptedBuild("SIGTERM", 143);
|
||||
await runInterruptedBuild("SIGINT", 130);
|
||||
} finally {
|
||||
rmSync(workDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("does not delay fast successful centralized Docker builds until the next heartbeat", () => {
|
||||
const workDir = mkdtempSync(join(tmpdir(), "openclaw-docker-build-fast-heartbeat-"));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user