ci(release): fail fast on red release children

(cherry picked from commit 8d7038775f0a0a1bb5354ba6b6b708c6b2c3167b)
This commit is contained in:
Peter Steinberger
2026-06-01 22:41:49 +01:00
parent afdf9aaea0
commit 4e45010203
7 changed files with 144 additions and 27 deletions

View File

@@ -380,6 +380,21 @@ jobs:
gh_with_retry api --paginate "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/jobs?per_page=100" --jq '.jobs[]'
}
fail_fast_failed_jobs() {
local failed_jobs_json
failed_jobs_json="$(
fetch_child_jobs |
jq -s '[.[] | select(.status == "completed" and .conclusion != "success" and .conclusion != "skipped")]'
)"
if jq -e 'length > 0' <<< "$failed_jobs_json" >/dev/null; then
echo "::error::${workflow} has failed child jobs before the workflow completed; cancelling the remaining matrix."
jq '.[] | {name, conclusion, url: .html_url}' <<< "$failed_jobs_json"
cancel_child
trap - EXIT INT TERM
exit 1
fi
}
cancel_child() {
if [[ -n "${run_id:-}" ]]; then
echo "Cancelling child workflow ${workflow}: ${run_id}" >&2
@@ -395,6 +410,9 @@ jobs:
break
fi
poll_count=$((poll_count + 1))
if (( poll_count % 2 == 0 )); then
fail_fast_failed_jobs
fi
if (( poll_count % 10 == 0 )); then
echo "Still waiting on ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
fetch_child_jobs | jq 'select(.status != "completed") | {name, status, url: .html_url}' || true
@@ -510,6 +528,21 @@ jobs:
gh_with_retry api --paginate "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/jobs?per_page=100" --jq '.jobs[]'
}
fail_fast_failed_jobs() {
local failed_jobs_json
failed_jobs_json="$(
fetch_child_jobs |
jq -s '[.[] | select(.status == "completed" and .conclusion != "success" and .conclusion != "skipped")]'
)"
if jq -e 'length > 0' <<< "$failed_jobs_json" >/dev/null; then
echo "::error::${workflow} has failed child jobs before the workflow completed; cancelling the remaining matrix."
jq '.[] | {name, conclusion, url: .html_url}' <<< "$failed_jobs_json"
cancel_child
trap - EXIT INT TERM
exit 1
fi
}
cancel_child() {
if [[ -n "${run_id:-}" ]]; then
echo "Cancelling child workflow ${workflow}: ${run_id}" >&2
@@ -525,6 +558,9 @@ jobs:
break
fi
poll_count=$((poll_count + 1))
if (( poll_count % 2 == 0 )); then
fail_fast_failed_jobs
fi
if (( poll_count % 10 == 0 )); then
echo "Still waiting on ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
fetch_child_jobs | jq 'select(.status != "completed") | {name, status, url: .html_url}' || true
@@ -690,6 +726,24 @@ jobs:
[[ "$saw_advisory" == "1" && "$failed" == "0" ]]
}
fail_fast_failed_jobs() {
local failed_jobs_json
if [[ "$workflow" == "openclaw-release-checks.yml" && "$CHILD_WORKFLOW_REF" =~ ^tideclaw/alpha/[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{4}Z$ ]]; then
return 0
fi
failed_jobs_json="$(
fetch_child_jobs |
jq -s '[.[] | select(.status == "completed" and .conclusion != "success" and .conclusion != "skipped")]'
)"
if jq -e 'length > 0' <<< "$failed_jobs_json" >/dev/null; then
echo "::error::${workflow} has failed child jobs before the workflow completed; cancelling the remaining matrix."
jq '.[] | {name, conclusion, url: .html_url}' <<< "$failed_jobs_json"
cancel_child
trap - EXIT INT TERM
exit 1
fi
}
cancel_child() {
if [[ -n "${run_id:-}" ]]; then
echo "Cancelling child workflow ${workflow}: ${run_id}" >&2
@@ -705,6 +759,9 @@ jobs:
break
fi
poll_count=$((poll_count + 1))
if (( poll_count % 2 == 0 )); then
fail_fast_failed_jobs
fi
if (( poll_count % 10 == 0 )); then
echo "Still waiting on ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
fetch_child_jobs | jq 'select(.status != "completed") | {name, status, url: .html_url}' || true
@@ -962,6 +1019,21 @@ jobs:
}
trap cancel_child EXIT INT TERM
fail_fast_failed_jobs() {
local failed_jobs_json
failed_jobs_json="$(
gh_with_retry run view "$run_id" --json jobs \
--jq '[.jobs[] | select(.status == "completed" and .conclusion != "success" and .conclusion != "skipped")]'
)"
if jq -e 'length > 0' <<< "$failed_jobs_json" >/dev/null; then
echo "::error::npm-telegram-beta-e2e.yml has failed child jobs before the workflow completed; cancelling the remaining run."
jq '.[] | {name, conclusion, url}' <<< "$failed_jobs_json"
cancel_child
trap - EXIT INT TERM
exit 1
fi
}
poll_count=0
while true; do
status="$(gh_with_retry run view "$run_id" --json status --jq '.status')"
@@ -969,6 +1041,9 @@ jobs:
break
fi
poll_count=$((poll_count + 1))
if (( poll_count % 2 == 0 )); then
fail_fast_failed_jobs
fi
if (( poll_count % 10 == 0 )); then
echo "Still waiting on npm-telegram-beta-e2e.yml: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
gh_with_retry run view "$run_id" --json jobs --jq '.jobs[] | select(.status != "completed") | {name, status, url}' || true

View File

@@ -46,11 +46,12 @@ on:
default: true
type: boolean
release_profile:
description: Release coverage profile used for release evidence summaries
description: Release coverage profile used for release evidence summaries; default reads it from the validation manifest
required: false
default: beta
default: from-validation
type: choice
options:
- from-validation
- beta
- stable
- full
@@ -135,9 +136,9 @@ jobs:
exit 1
fi
case "$RELEASE_PROFILE" in
beta|stable|full) ;;
from-validation|beta|stable|full) ;;
*)
echo "release_profile must be one of: beta, stable, full" >&2
echo "release_profile must be one of: from-validation, beta, stable, full" >&2
exit 1
;;
esac
@@ -259,6 +260,7 @@ jobs:
echo "sha=$release_sha" >> "$GITHUB_OUTPUT"
- name: Validate full release validation manifest
id: full_manifest
if: ${{ inputs.publish_openclaw_npm }}
env:
GH_TOKEN: ${{ github.token }}
@@ -289,7 +291,7 @@ jobs:
echo "Full release validation target SHA mismatch: expected $EXPECTED_SHA, got $target_sha" >&2
exit 1
fi
if [[ "$release_profile" != "$EXPECTED_RELEASE_PROFILE" ]]; then
if [[ "$EXPECTED_RELEASE_PROFILE" != "from-validation" && "$release_profile" != "$EXPECTED_RELEASE_PROFILE" ]]; then
echo "Full release validation profile mismatch: expected $EXPECTED_RELEASE_PROFILE, got $release_profile" >&2
exit 1
fi
@@ -297,6 +299,7 @@ jobs:
echo "Full release validation must run rerun_group=all before npm publish; got $rerun_group" >&2
exit 1
fi
echo "release_profile=$release_profile" >> "$GITHUB_OUTPUT"
- name: Validate release tag is reachable from a trusted release branch
env:
@@ -332,7 +335,7 @@ jobs:
env:
RELEASE_TAG: ${{ inputs.tag }}
TARGET_SHA: ${{ steps.manifest.outputs.sha || steps.ref.outputs.sha }}
RELEASE_PROFILE: ${{ inputs.release_profile }}
RELEASE_PROFILE: ${{ steps.full_manifest.outputs.release_profile || inputs.release_profile }}
FULL_RELEASE_VALIDATION_RUN_ID: ${{ inputs.full_release_validation_run_id }}
run: |
{
@@ -501,7 +504,7 @@ jobs:
wait_for_run() {
local workflow="$1"
local run_id="$2"
local status conclusion url updated_at created_at duration_seconds duration_label last_state
local status conclusion url updated_at created_at duration_seconds duration_label last_state failed_json
last_state=""
while true; do
@@ -510,6 +513,14 @@ jobs:
if [[ "$status" == "completed" ]]; then
break
fi
failed_json="$(gh run view --repo "$GITHUB_REPOSITORY" "$run_id" --json jobs \
--jq '[.jobs[] | select(.status == "completed" and .conclusion != "success" and .conclusion != "skipped")]' || true)"
if [[ -n "${failed_json}" ]] && jq -e 'length > 0' <<< "$failed_json" >/dev/null; then
echo "${workflow} has failed jobs before the workflow completed: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}" >&2
jq '.[] | {name, conclusion, url}' <<< "$failed_json" >&2 || true
print_failed_run_summary "${run_id}"
return 1
fi
url="$(printf '%s' "$run_json" | jq -r '.url')"
updated_at="$(printf '%s' "$run_json" | jq -r '.updatedAt')"
state="${status}:${updated_at}"