From 9f7485e18254f0f2a2134414a844a35b2e80d972 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 25 May 2026 21:50:35 +0100 Subject: [PATCH] test: port release validation stabilizers --- .github/actions/setup-node-env/action.yml | 9 +++ .../setup-pnpm-store-cache/ensure-node.sh | 1 + ...nclaw-cross-os-release-checks-reusable.yml | 39 +++++++++- .../openclaw-live-and-e2e-checks-reusable.yml | 41 ++++++---- extensions/memory-core/src/tools.test.ts | 76 +++++++++++++++++++ extensions/memory-core/src/tools.ts | 36 ++++++++- .../qa-lab/src/suite-runtime-agent-process.ts | 4 +- .../memory/thread-memory-isolation.md | 12 ++- scripts/e2e/mcp-channels-docker-client.ts | 68 ++++++++++------- src/agents/subagent-announce.live.test.ts | 24 ++++-- .../tools/image-tool.providers.live.test.ts | 7 +- src/agents/xai.live.test.ts | 19 ++++- src/tui/tui-pty-local.e2e.test.ts | 4 +- .../package-acceptance-workflow.test.ts | 14 +++- ...setup-pnpm-store-cache-ensure-node.test.ts | 21 +++++ 15 files changed, 306 insertions(+), 69 deletions(-) diff --git a/.github/actions/setup-node-env/action.yml b/.github/actions/setup-node-env/action.yml index 412793ad9ef9..1a85be06eb48 100644 --- a/.github/actions/setup-node-env/action.yml +++ b/.github/actions/setup-node-env/action.yml @@ -26,6 +26,15 @@ inputs: runs: using: composite steps: + - name: Normalize container toolcache + shell: bash + run: | + set -euo pipefail + if [[ -d /__t && ! -e /opt/hostedtoolcache ]]; then + mkdir -p /opt + ln -s /__t /opt/hostedtoolcache + fi + - name: Setup Node.js uses: actions/setup-node@v6 with: diff --git a/.github/actions/setup-pnpm-store-cache/ensure-node.sh b/.github/actions/setup-pnpm-store-cache/ensure-node.sh index f148d09390dd..0b3edb9e16b8 100644 --- a/.github/actions/setup-pnpm-store-cache/ensure-node.sh +++ b/.github/actions/setup-pnpm-store-cache/ensure-node.sh @@ -43,6 +43,7 @@ openclaw_find_toolcache_node() { "${RUNNER_TOOL_CACHE:-}" \ "${AGENT_TOOLSDIRECTORY:-}" \ "${ACTIONS_RUNNER_TOOL_CACHE:-}" \ + "${OPENCLAW_CONTAINER_TOOL_CACHE:-/__t}" \ "/opt/hostedtoolcache" \ "/home/runner/_work/_tool" \ "/Users/runner/hostedtoolcache" \ diff --git a/.github/workflows/openclaw-cross-os-release-checks-reusable.yml b/.github/workflows/openclaw-cross-os-release-checks-reusable.yml index 77b6ad6c10a3..d56492a5151c 100644 --- a/.github/workflows/openclaw-cross-os-release-checks-reusable.yml +++ b/.github/workflows/openclaw-cross-os-release-checks-reusable.yml @@ -553,6 +553,15 @@ jobs: use-actions-cache: "false" - name: Download candidate artifact + id: download_candidate + continue-on-error: true + uses: actions/download-artifact@v8 + with: + name: openclaw-cross-os-release-checks-candidate-${{ github.run_id }} + path: ${{ runner.temp }}/openclaw-cross-os-release-checks/candidate + + - name: Retry candidate artifact download + if: ${{ steps.download_candidate.outcome == 'failure' }} uses: actions/download-artifact@v8 with: name: openclaw-cross-os-release-checks-candidate-${{ github.run_id }} @@ -560,11 +569,38 @@ jobs: - name: Download baseline artifact if: ${{ matrix.suite == 'packaged-upgrade' }} + id: download_baseline + continue-on-error: true uses: actions/download-artifact@v8 with: name: openclaw-cross-os-release-checks-baseline-${{ github.run_id }} path: ${{ runner.temp }}/openclaw-cross-os-release-checks/baseline + - name: Retry baseline artifact download + if: ${{ matrix.suite == 'packaged-upgrade' && steps.download_baseline.outcome == 'failure' }} + uses: actions/download-artifact@v8 + with: + name: openclaw-cross-os-release-checks-baseline-${{ github.run_id }} + path: ${{ runner.temp }}/openclaw-cross-os-release-checks/baseline + + - name: Verify release-check inputs + shell: bash + env: + CANDIDATE_TGZ: ${{ runner.temp }}/openclaw-cross-os-release-checks/candidate/${{ needs.prepare.outputs.candidate_file_name }} + BASELINE_TGZ: ${{ runner.temp }}/openclaw-cross-os-release-checks/baseline/${{ needs.prepare.outputs.baseline_file_name }} + OUTPUT_DIR: ${{ runner.temp }}/openclaw-cross-os-release-checks/${{ matrix.artifact_name }}-${{ matrix.suite }} + SUITE: ${{ matrix.suite }} + run: | + mkdir -p "${OUTPUT_DIR}" + if [[ ! -f "${CANDIDATE_TGZ}" ]]; then + echo "::error::candidate artifact missing: ${CANDIDATE_TGZ}" + exit 1 + fi + if [[ "${SUITE}" == "packaged-upgrade" ]] && [[ ! -f "${BASELINE_TGZ}" ]]; then + echo "::error::baseline artifact missing: ${BASELINE_TGZ}" + exit 1 + fi + - name: Run cross-OS release checks shell: bash env: @@ -615,7 +651,8 @@ jobs: if [[ -f "${SUMMARY_PATH}" ]]; then cat "${SUMMARY_PATH}" >> "$GITHUB_STEP_SUMMARY" else - echo "No summary generated." >> "$GITHUB_STEP_SUMMARY" + mkdir -p "$(dirname "${SUMMARY_PATH}")" + echo "No summary generated." | tee "${SUMMARY_PATH}" >> "$GITHUB_STEP_SUMMARY" fi - name: Upload release-check artifacts diff --git a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml index 7a1f29bb00ee..ed7d91f5727c 100644 --- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml +++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml @@ -102,6 +102,11 @@ on: - beta - stable - full + use_github_hosted_runners: + description: Use GitHub-hosted runners instead of Blacksmith runners + required: false + default: false + type: boolean advisory: description: Treat failures as advisory for the caller required: false @@ -208,6 +213,11 @@ on: required: false default: stable type: string + use_github_hosted_runners: + description: Use GitHub-hosted runners instead of Blacksmith runners + required: false + default: true + type: boolean secrets: OPENAI_API_KEY: required: false @@ -474,7 +484,7 @@ jobs: needs: validate_selected_ref if: inputs.include_live_suites && !inputs.live_models_only && (inputs.live_suite_filter == '' || inputs.live_suite_filter == 'live-cache') continue-on-error: ${{ inputs.advisory }} - runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }} + runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }} timeout-minutes: 20 env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} @@ -524,7 +534,7 @@ jobs: needs: validate_selected_ref if: inputs.include_repo_e2e && inputs.live_suite_filter == '' continue-on-error: ${{ inputs.advisory }} - runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }} + runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }} timeout-minutes: ${{ inputs.release_test_profile == 'full' && 90 || 60 }} env: OPENCLAW_VITEST_MAX_WORKERS: "2" @@ -556,7 +566,7 @@ jobs: needs: validate_selected_ref if: inputs.include_repo_e2e && (inputs.live_suite_filter == '' || inputs.live_suite_filter == 'openshell-e2e') continue-on-error: ${{ inputs.advisory }} - runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }} + runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }} timeout-minutes: ${{ matrix.timeout_minutes }} strategy: fail-fast: false @@ -630,7 +640,7 @@ jobs: if: inputs.include_release_path_suites && inputs.docker_lanes == '' name: Docker E2E (${{ matrix.label }}) continue-on-error: ${{ inputs.advisory }} - runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} + runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} timeout-minutes: ${{ matrix.timeout_minutes }} strategy: fail-fast: false @@ -921,7 +931,7 @@ jobs: needs: validate_selected_ref if: inputs.docker_lanes != '' continue-on-error: ${{ inputs.advisory }} - runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-4vcpu-ubuntu-2404' }} + runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-4vcpu-ubuntu-2404' }} timeout-minutes: 5 outputs: groups_json: ${{ steps.groups.outputs.groups_json }} @@ -950,7 +960,7 @@ jobs: if: inputs.docker_lanes != '' name: Docker E2E targeted lanes (${{ matrix.group.label }}) continue-on-error: ${{ inputs.advisory }} - runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} + runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} timeout-minutes: 60 strategy: fail-fast: false @@ -1182,7 +1192,7 @@ jobs: if: inputs.include_openwebui && !inputs.include_release_path_suites && inputs.docker_lanes == '' name: Docker E2E (openwebui) continue-on-error: ${{ inputs.advisory }} - runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} + runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} timeout-minutes: 60 env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} @@ -1308,7 +1318,7 @@ jobs: needs: validate_selected_ref if: inputs.include_release_path_suites || inputs.include_openwebui || inputs.docker_lanes != '' continue-on-error: ${{ inputs.advisory }} - runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} + runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} timeout-minutes: ${{ inputs.release_test_profile == 'full' && 90 || 60 }} permissions: actions: read @@ -1551,7 +1561,7 @@ jobs: needs: validate_selected_ref if: inputs.include_live_suites && (inputs.live_suite_filter == '' || startsWith(inputs.live_suite_filter, 'live-') || startsWith(inputs.live_suite_filter, 'docker-live-models')) continue-on-error: ${{ inputs.advisory }} - runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} + runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} timeout-minutes: 60 permissions: contents: read @@ -1624,7 +1634,7 @@ jobs: needs: [validate_selected_ref, prepare_live_test_image] if: inputs.include_live_suites && inputs.live_model_providers == '' && (inputs.live_suite_filter == '' || inputs.live_suite_filter == 'docker-live-models') continue-on-error: ${{ inputs.advisory }} - runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} + runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} timeout-minutes: 45 strategy: fail-fast: false @@ -1775,7 +1785,7 @@ jobs: needs: [validate_selected_ref, prepare_live_test_image] if: inputs.include_live_suites && inputs.live_model_providers != '' && (inputs.live_suite_filter == '' || inputs.live_suite_filter == 'docker-live-models') continue-on-error: ${{ inputs.advisory }} - runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} + runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} timeout-minutes: 45 env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} @@ -1949,7 +1959,7 @@ jobs: needs: validate_selected_ref if: inputs.include_live_suites && !inputs.live_models_only && (inputs.live_suite_filter == '' || (startsWith(inputs.live_suite_filter, 'native-live-') && !startsWith(inputs.live_suite_filter, 'native-live-extensions-media') && inputs.live_suite_filter != 'native-live-extensions-a-k')) continue-on-error: ${{ inputs.advisory }} - runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }} + runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }} timeout-minutes: ${{ matrix.timeout_minutes }} strategy: fail-fast: false @@ -2251,6 +2261,7 @@ jobs: env: OPENCLAW_LIVE_COMMAND: ${{ matrix.command }} OPENCLAW_LIVE_SUITE_ADVISORY: ${{ matrix.advisory }} + shell: bash run: | set +e bash .release-harness/scripts/ci-live-command-retry.sh @@ -2270,7 +2281,7 @@ jobs: needs: [validate_selected_ref, prepare_live_test_image] if: inputs.include_live_suites && !inputs.live_models_only && (inputs.live_suite_filter == '' || startsWith(inputs.live_suite_filter, 'live-')) continue-on-error: ${{ inputs.advisory }} - runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} + runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-32vcpu-ubuntu-2404' }} timeout-minutes: ${{ matrix.timeout_minutes }} strategy: fail-fast: false @@ -2469,6 +2480,7 @@ jobs: env: OPENCLAW_LIVE_COMMAND: ${{ matrix.command }} OPENCLAW_LIVE_SUITE_ADVISORY: ${{ matrix.advisory }} + shell: bash run: | set +e bash .release-harness/scripts/ci-live-command-retry.sh @@ -2488,7 +2500,7 @@ jobs: needs: validate_selected_ref if: inputs.include_live_suites && !inputs.live_models_only && (inputs.live_suite_filter == '' || startsWith(inputs.live_suite_filter, 'native-live-extensions-media') || inputs.live_suite_filter == 'native-live-extensions-a-k') continue-on-error: ${{ inputs.advisory }} - runs-on: ${{ github.event_name == 'workflow_call' && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }} + runs-on: ${{ inputs.use_github_hosted_runners && 'ubuntu-24.04' || 'blacksmith-8vcpu-ubuntu-2404' }} container: image: ghcr.io/openclaw/openclaw-live-media-runner:ubuntu-24.04 credentials: @@ -2656,6 +2668,7 @@ jobs: if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id || (inputs.live_suite_filter == 'native-live-extensions-media-video' && startsWith(matrix.suite_id, 'native-live-extensions-media-video-'))) env: OPENCLAW_LIVE_SUITE_ADVISORY: ${{ matrix.advisory }} + shell: bash run: | set +e ${{ matrix.command }} diff --git a/extensions/memory-core/src/tools.test.ts b/extensions/memory-core/src/tools.test.ts index c43bde9a1af7..4a2e4ad41a78 100644 --- a/extensions/memory-core/src/tools.test.ts +++ b/extensions/memory-core/src/tools.test.ts @@ -1,5 +1,6 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; import { + getMemorySearchManagerMockCalls, getMemorySearchManagerMockConfigs, getMemorySearchManagerMockParams, resetMemoryToolMockState, @@ -85,6 +86,81 @@ describe("memory_search unavailable payloads", () => { }); }); + it("re-resolves the manager once when a cached sqlite handle was closed", async () => { + let searchCalls = 0; + setMemorySearchImpl(async () => { + searchCalls += 1; + if (searchCalls === 1) { + throw new Error("database is not open"); + } + return [ + { + path: "MEMORY.md", + startLine: 1, + endLine: 1, + score: 0.9, + snippet: "Thread-hidden codename: ORBIT-22.", + source: "memory" as const, + }, + ]; + }); + + const tool = createMemorySearchToolOrThrow({ + config: { + agents: { list: [{ id: "main", default: true }] }, + memory: { citations: "off" }, + }, + }); + const result = await tool.execute("closed-db", { query: "hidden thread codename" }); + + expect((result.details as { results?: Array<{ path: string }> }).results).toEqual([ + { + corpus: "memory", + path: "MEMORY.md", + startLine: 1, + endLine: 1, + score: 0.9, + snippet: "Thread-hidden codename: ORBIT-22.", + source: "memory", + }, + ]); + expect(searchCalls).toBe(2); + expect(getMemorySearchManagerMockCalls()).toBe(2); + }); + + it("forces a sync and retries once when the first search has zero hits", async () => { + let searchCalls = 0; + setMemorySearchImpl(async () => { + searchCalls += 1; + if (searchCalls === 1) { + return []; + } + return [ + { + path: "MEMORY.md", + startLine: 1, + endLine: 1, + score: 0.9, + snippet: "Thread-hidden codename: ORBIT-22.", + source: "memory" as const, + }, + ]; + }); + + const tool = createMemorySearchToolOrThrow({ + config: { + agents: { list: [{ id: "main", default: true }] }, + memory: { citations: "off" }, + }, + }); + const result = await tool.execute("zero-hit-retry", { query: "hidden thread codename" }); + + expect((result.details as { results?: Array<{ path: string }> }).results?.[0]?.path).toBe( + "MEMORY.md", + ); + expect(searchCalls).toBe(2); + }); + it("returns structured search debug metadata for qmd results", async () => { setMemoryBackend("qmd"); setMemorySearchImpl(async (opts) => { diff --git a/extensions/memory-core/src/tools.ts b/extensions/memory-core/src/tools.ts index ab843b6a8da6..8be405dbf5c7 100644 --- a/extensions/memory-core/src/tools.ts +++ b/extensions/memory-core/src/tools.ts @@ -81,6 +81,16 @@ function mergeMemorySearchCorpusResults(params: { return sortMemorySearchToolResults(selected).slice(0, params.maxResults); } +function isClosedMemoryStoreError(error: unknown): boolean { + const message = formatErrorMessage(error).toLowerCase(); + return ( + message.includes("database is not open") || + message.includes("database connection is not open") || + message.includes("database handle is closed") || + message.includes("memory search manager is closed") + ); +} + function buildRecallKey( result: Pick, ): string { @@ -293,6 +303,7 @@ export function createMemorySearchTool(options: { } | undefined; if (shouldQueryMemory && memory && !("error" in memory)) { + let activeMemory = memory; const runtimeDebug: MemorySearchRuntimeDebug[] = []; const qmdSearchModeOverride = resolveActiveMemoryQmdSearchModeOverride( cfg, @@ -304,16 +315,33 @@ export function createMemorySearchTool(options: { : requestedCorpus === "memory" ? (["memory"] as MemorySource[]) : undefined; - rawResults = await memory.manager.search(query, { + const searchOptions = { maxResults, minScore, sessionKey: options.agentSessionKey, qmdSearchModeOverride, - onDebug: (debug) => { + onDebug: (debug: MemorySearchRuntimeDebug) => { runtimeDebug.push(debug); }, ...(searchSources ? { sources: searchSources } : {}), - }); + }; + try { + rawResults = await activeMemory.manager.search(query, searchOptions); + } catch (error) { + if (!isClosedMemoryStoreError(error)) { + throw error; + } + const refreshed = await getMemoryManagerContext({ cfg, agentId }); + if ("error" in refreshed) { + throw error; + } + activeMemory = refreshed; + rawResults = await activeMemory.manager.search(query, searchOptions); + } + if (rawResults.length === 0 && activeMemory.manager.sync) { + await activeMemory.manager.sync({ reason: "search", force: true }); + rawResults = await activeMemory.manager.search(query, searchOptions); + } rawResults = await filterMemorySearchHitsBySessionVisibility({ cfg, agentId, @@ -326,7 +354,7 @@ export function createMemorySearchTool(options: { } else if (requestedCorpus === "memory") { rawResults = rawResults.filter((hit) => hit.source === "memory"); } - const status = memory.manager.status(); + const status = activeMemory.manager.status(); const decorated = decorateCitations(rawResults, includeCitations); const resolved = resolveMemoryBackendConfig({ cfg, agentId }); const memoryResults = diff --git a/extensions/qa-lab/src/suite-runtime-agent-process.ts b/extensions/qa-lab/src/suite-runtime-agent-process.ts index e5e3b0ab729f..be22822a22ef 100644 --- a/extensions/qa-lab/src/suite-runtime-agent-process.ts +++ b/extensions/qa-lab/src/suite-runtime-agent-process.ts @@ -256,7 +256,7 @@ async function forceMemoryIndex(params: { await runQaCli(params.env, ["memory", "index", "--agent", "qa", "--force"], { timeoutMs: liveTurnTimeoutMs(params.env, 60_000), }); - return await waitForMemorySearchMatch({ + const result = await waitForMemorySearchMatch({ expectedNeedle: params.expectedNeedle, timeoutMs: liveTurnTimeoutMs(params.env, 20_000), search: async () => @@ -269,6 +269,8 @@ async function forceMemoryIndex(params: { }, )) as QaMemorySearchResult, }); + await params.env.gateway.restartAfterStateMutation?.(async () => {}); + return result; } async function runAgentPrompt( diff --git a/qa/scenarios/memory/thread-memory-isolation.md b/qa/scenarios/memory/thread-memory-isolation.md index 491713521515..538a1d1a624e 100644 --- a/qa/scenarios/memory/thread-memory-isolation.md +++ b/qa/scenarios/memory/thread-memory-isolation.md @@ -54,6 +54,14 @@ steps: expr: config.memoryQuery expectedNeedle: expr: config.expectedNeedle + - call: waitForGatewayHealthy + args: + - ref: env + - 60000 + - call: waitForQaChannelReady + args: + - ref: env + - 60000 - call: handleQaAction saveAs: threadPayload args: @@ -96,8 +104,8 @@ steps: - ref: state - lambda: params: [candidate] - expr: "candidate.conversation.id === config.channelId && candidate.threadId === threadId && candidate.text.includes(config.expectedNeedle)" - - expr: liveTurnTimeoutMs(env, 45000) + expr: "((candidate.conversation.id === config.channelId && candidate.threadId === threadId) || candidate.conversation.id === threadId) && candidate.text.includes(config.expectedNeedle)" + - expr: liveTurnTimeoutMs(env, 300000) - assert: expr: "!state.getSnapshot().messages.slice(beforeCursor).some((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === config.channelId && !candidate.threadId)" message: threaded memory answer leaked into root channel diff --git a/scripts/e2e/mcp-channels-docker-client.ts b/scripts/e2e/mcp-channels-docker-client.ts index b82e8dd1d94f..27a1d908570b 100644 --- a/scripts/e2e/mcp-channels-docker-client.ts +++ b/scripts/e2e/mcp-channels-docker-client.ts @@ -252,39 +252,50 @@ async function main() { ); const channelMessage = `hello from docker ${randomUUID()}`; - const userEvent = (await Promise.all([ - callTool<{ - structuredContent?: { event?: Record }; + await gateway.request("chat.send", { + sessionKey: "agent:main:main", + message: channelMessage, + idempotencyKey: randomUUID(), + }); + const rawGatewayUserMessage = await waitFor( + "raw gateway user session.message", + () => + gateway.events.find( + (entry) => + entry.event === "session.message" && + entry.payload.sessionKey === "agent:main:main" && + extractTextFromGatewayPayload(entry.payload) === channelMessage, + ), + 10_000, + ).catch(() => undefined); + const userEvent = await waitFor( + "MCP user session.message event", + async () => { + const polled = await callTool<{ + structuredContent?: { events?: Array> }; + }>({ + name: "events_poll", + arguments: { session_key: "agent:main:main", after_cursor: assistantCursor, limit: 50 }, + }); + return (polled.structuredContent?.events ?? []).find( + (entry) => entry.text === channelMessage, + ); + }, + 60_000, + ).catch(() => undefined); + if (userEvent?.text !== channelMessage) { + const polled = await callTool<{ + structuredContent?: { events?: Array> }; }>({ - name: "events_wait", - arguments: { - session_key: "agent:main:main", - after_cursor: assistantCursor, - timeout_ms: 10_000, - }, - }), - gateway.request("chat.send", { - sessionKey: "agent:main:main", - message: channelMessage, - idempotencyKey: randomUUID(), - }), - ]).then(([result]) => result)) as { - structuredContent?: { event?: Record }; - }; - const rawGatewayUserMessage = await waitFor("raw gateway user session.message", () => - gateway.events.find( - (entry) => - entry.event === "session.message" && - entry.payload.sessionKey === "agent:main:main" && - extractTextFromGatewayPayload(entry.payload) === channelMessage, - ), - ); - if (userEvent.structuredContent?.event?.text !== channelMessage) { + name: "events_poll", + arguments: { session_key: "agent:main:main", after_cursor: assistantCursor, limit: 50 }, + }); throw new Error( `expected user event after chat.send: ${JSON.stringify( { - userEvent: userEvent.structuredContent?.event ?? null, + userEvent: userEvent ?? null, rawGatewayUserMessage: rawGatewayUserMessage ?? null, + mcpEventsAfterAssistant: polled.structuredContent?.events ?? [], recentGatewayEvents: gateway.events.slice(-10).map((entry) => ({ event: entry.event, sessionKey: entry.payload.sessionKey, @@ -296,7 +307,6 @@ async function main() { )}`, ); } - assert(rawGatewayUserMessage, "expected raw gateway session.message after chat.send"); let helpNotification: ClaudeChannelNotification; try { diff --git a/src/agents/subagent-announce.live.test.ts b/src/agents/subagent-announce.live.test.ts index 82513251e6f7..210d565c94f0 100644 --- a/src/agents/subagent-announce.live.test.ts +++ b/src/agents/subagent-announce.live.test.ts @@ -371,6 +371,7 @@ describeLive("subagent announce live", () => { const nonce = randomBytes(3).toString("hex").toUpperCase(); const childToken = `CHILD_STEERED_${nonce}`; const parentToken = `PARENT_SAW_${childToken}`; + const parentStartedToken = `PARENT_READY_${nonce}`; const steerToken = `STEER_${nonce}`; const childTask = [ `Immediately call sessions_yield with message="waiting for ${steerToken}".`, @@ -464,9 +465,9 @@ describeLive("subagent announce live", () => { runTimeoutSeconds: 300, })}.`, 'Step 2: after spawn returns status="accepted", do not call the subagents tool; the test harness will steer the child.', - `Step 3: call sessions_yield with message="waiting for ${childToken}" and wait for the child completion event.`, - `Step 4: after the completion event arrives, reply exactly ${parentToken}.`, - "Do not reply with the parent token until the child completion event is visible.", + `Step 3: reply exactly ${parentStartedToken}.`, + `In a future continuation after the child completion event arrives, reply exactly ${parentToken}.`, + `Do not reply with ${parentToken} before the child completion event is visible.`, ].join("\n"), }, { expectFinal: true, timeoutMs: REQUEST_TIMEOUT_MS }, @@ -483,6 +484,9 @@ describeLive("subagent announce live", () => { (run) => run.taskName === "steered_child" && !run.endedAt, ); }); + const initialResponse = await initialRequest; + expect(extractPayloadText(initialResponse.result)).toContain(parentStartedToken); + const cfg = getRuntimeConfig(); const steerResult = await steerControlledSubagentRun({ cfg, @@ -515,12 +519,16 @@ describeLive("subagent announce live", () => { : undefined; }); - const completedDispatch = inProcessAgentDispatches.find( - (entry) => entry.phase === "completed", + const completedDispatch = await waitFor( + "in-process subagent completion agent dispatch", + () => { + if (initialError) { + throw initialError; + } + return inProcessAgentDispatches.find((entry) => entry.phase === "completed"); + }, ); - if (completedDispatch) { - expect(completedDispatch.resultText).toContain(childToken); - } + expect(completedDispatch.resultText).toContain(parentToken); expect( inProcessAgentDispatches.some((entry) => { if (initialError) { diff --git a/src/agents/tools/image-tool.providers.live.test.ts b/src/agents/tools/image-tool.providers.live.test.ts index b2a3a5a4c4fe..6509e2624bd0 100644 --- a/src/agents/tools/image-tool.providers.live.test.ts +++ b/src/agents/tools/image-tool.providers.live.test.ts @@ -11,7 +11,11 @@ import { type ImageDescriptionRequest, type MediaUnderstandingProvider, } from "../../plugin-sdk/media-understanding.js"; -import { isOverloadedErrorMessage, isServerErrorMessage } from "../../plugin-sdk/test-env.js"; +import { + isBillingErrorMessage, + isOverloadedErrorMessage, + isServerErrorMessage, +} from "../../plugin-sdk/test-env.js"; import { isLiveTestEnabled } from "../live-test-helpers.js"; import { createImageTool, testing } from "./image-tool.js"; @@ -106,6 +110,7 @@ function formatLiveError(error: unknown): string { function isSkippableLiveError(error: unknown): boolean { const message = formatLiveError(error); return ( + isBillingErrorMessage(message) || isOverloadedErrorMessage(message) || isServerErrorMessage(message) || /timed out|operation was aborted/i.test(message) diff --git a/src/agents/xai.live.test.ts b/src/agents/xai.live.test.ts index e79a621cb000..de8a3c845545 100644 --- a/src/agents/xai.live.test.ts +++ b/src/agents/xai.live.test.ts @@ -31,6 +31,17 @@ type AssistantLikeMessage = { }>; }; +function getToolFunction(tool: Record): Record | undefined { + const nested = tool.function; + if (nested && typeof nested === "object" && !Array.isArray(nested)) { + return nested as Record; + } + if (tool.type === "function" && typeof tool.name === "string") { + return tool; + } + return undefined; +} + function resolveLiveXaiModel() { return getModel("xai", "grok-4.3") ?? getModel("xai", "grok-4.20-0309-reasoning"); } @@ -141,11 +152,13 @@ describeLive("xai live", () => { ? (payload.tools as Array>) : []; expect(payloadTools.length).toBeGreaterThan(0); - const firstFunction = payloadTools[0]?.function; - requireLiveValue(firstFunction, "first xAI tool function"); + const firstFunction = requireLiveValue( + payloadTools[0] ? getToolFunction(payloadTools[0]) : undefined, + "first xAI tool function", + ); expect(typeof firstFunction).toBe("object"); expect(Array.isArray(firstFunction)).toBe(false); - expect([undefined, false]).toContain((firstFunction as Record).strict); + expect([undefined, false]).toContain(firstFunction.strict); }); }, 90_000); diff --git a/src/tui/tui-pty-local.e2e.test.ts b/src/tui/tui-pty-local.e2e.test.ts index 957b8c0f42f3..d3a96ab33e51 100644 --- a/src/tui/tui-pty-local.e2e.test.ts +++ b/src/tui/tui-pty-local.e2e.test.ts @@ -32,9 +32,9 @@ type MockModelServer = { const activeRuns: PtyRun[] = []; const LOCAL_STARTUP_TIMEOUT_MS = 20_000; -const LOCAL_OUTPUT_TIMEOUT_MS = 35_000; +const LOCAL_OUTPUT_TIMEOUT_MS = 60_000; const LOCAL_EXIT_TIMEOUT_MS = 4_000; -const LOCAL_TEST_TIMEOUT_MS = 60_000; +const LOCAL_TEST_TIMEOUT_MS = 90_000; function resolveSpawnPty() { const runtime = nodePty as NodePtyRuntimeModule; diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index 03f70790ed19..c9bafb1cafd4 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -96,6 +96,8 @@ describe("package acceptance workflow", () => { expect(setupPnpmAction).not.toContain("version: ${{ inputs.pnpm-version }}"); const setupNodeAction = readFileSync(".github/actions/setup-node-env/action.yml", "utf8"); + expect(setupNodeAction).toContain("Normalize container toolcache"); + expect(setupNodeAction).toContain("ln -s /__t /opt/hostedtoolcache"); expect(setupNodeAction).toContain("use-actions-cache: ${{ inputs.use-actions-cache }}"); for (const workflowPath of workflowPaths()) { @@ -484,14 +486,15 @@ describe("package artifact reuse", () => { 'OPENCLAW_LIVE_CLI_BACKEND_ARGS=["exec","--json","--color","never","--sandbox","danger-full-access","--skip-git-repo-check"]', ); expect(workflow).toContain("bash .release-harness/scripts/ci-live-command-retry.sh"); + expect(workflow).toContain("use_github_hosted_runners:"); expect(workflow).toMatch( - /validate_repo_e2e:[\s\S]*?runs-on: \$\{\{ github\.event_name == 'workflow_call' && 'ubuntu-24\.04' \|\| 'blacksmith-8vcpu-ubuntu-2404' \}\}/u, + /validate_repo_e2e:[\s\S]*?runs-on: \$\{\{ inputs\.use_github_hosted_runners && 'ubuntu-24\.04' \|\| 'blacksmith-8vcpu-ubuntu-2404' \}\}/u, ); expect(workflow).toMatch( - /validate_special_e2e:[\s\S]*?runs-on: \$\{\{ github\.event_name == 'workflow_call' && 'ubuntu-24\.04' \|\| 'blacksmith-8vcpu-ubuntu-2404' \}\}/u, + /validate_special_e2e:[\s\S]*?runs-on: \$\{\{ inputs\.use_github_hosted_runners && 'ubuntu-24\.04' \|\| 'blacksmith-8vcpu-ubuntu-2404' \}\}/u, ); expect(workflow).toMatch( - /validate_live_provider_suites:[\s\S]*?runs-on: \$\{\{ github\.event_name == 'workflow_call' && 'ubuntu-24\.04' \|\| 'blacksmith-8vcpu-ubuntu-2404' \}\}/u, + /validate_live_provider_suites:[\s\S]*?runs-on: \$\{\{ inputs\.use_github_hosted_runners && 'ubuntu-24\.04' \|\| 'blacksmith-8vcpu-ubuntu-2404' \}\}/u, ); expect(workflow).toContain("suite_id: native-live-src-gateway-core"); expect(workflow).toContain("suite_id: native-live-src-gateway-backends"); @@ -535,6 +538,9 @@ describe("package artifact reuse", () => { expect(workflow).toMatch(/suite_id: native-live-extensions-moonshot[\s\S]*?advisory: true/u); expect(workflow).toContain("OPENCLAW_LIVE_SUITE_ADVISORY: ${{ matrix.advisory }}"); expect(workflow).toContain("Advisory live suite failed with exit code"); + expect(workflow).toMatch( + /validate_live_media_provider_suites:[\s\S]*?OPENCLAW_LIVE_SUITE_ADVISORY: \$\{\{ matrix\.advisory \}\}[\s\S]*?shell: bash[\s\S]*?Advisory live suite failed with exit code/u, + ); expect(workflow).toMatch( /suite_id: live-gateway-advisory-docker-deepseek-fireworks[\s\S]*?advisory: true/u, ); @@ -548,7 +554,7 @@ describe("package artifact reuse", () => { expect(workflow).toContain("suite_id: native-live-extensions-o-z-other"); expect(workflow).toContain("validate_live_media_provider_suites:"); expect(workflow).toMatch( - /validate_live_media_provider_suites:[\s\S]*?runs-on: \$\{\{ github\.event_name == 'workflow_call' && 'ubuntu-24\.04' \|\| 'blacksmith-8vcpu-ubuntu-2404' \}\}/u, + /validate_live_media_provider_suites:[\s\S]*?runs-on: \$\{\{ inputs\.use_github_hosted_runners && 'ubuntu-24\.04' \|\| 'blacksmith-8vcpu-ubuntu-2404' \}\}/u, ); expect(workflow).toContain("image: ghcr.io/openclaw/openclaw-live-media-runner:ubuntu-24.04"); expect(workflow).toContain("ffmpeg -version | head -1"); diff --git a/test/scripts/setup-pnpm-store-cache-ensure-node.test.ts b/test/scripts/setup-pnpm-store-cache-ensure-node.test.ts index b464cf57b474..7b64e55abc27 100644 --- a/test/scripts/setup-pnpm-store-cache-ensure-node.test.ts +++ b/test/scripts/setup-pnpm-store-cache-ensure-node.test.ts @@ -92,6 +92,27 @@ describe("setup-pnpm-store-cache ensure-node", () => { } }); + it("repairs PATH from the container-mounted GitHub Actions toolcache", () => { + const root = mkdtempSync(join(tmpdir(), "openclaw-ensure-node-")); + try { + const activeBin = join(root, "active", "bin"); + writeFakeNode(activeBin, "20.20.0"); + const toolcacheBin = join(root, "__t", "node", "24.99.99", "x64", "bin"); + const toolcacheNode = writeFakeNode(toolcacheBin, "24.99.99"); + const result = runEnsureNode(root, "24.99.99", { + PATH: `${activeBin}:${process.env.PATH ?? ""}`, + OPENCLAW_CONTAINER_TOOL_CACHE: join(root, "__t"), + RUNNER_TOOL_CACHE: join(root, "hostedtoolcache"), + }); + + expect(result.status).toBe(0); + expect(result.stdout).toContain(`Using Node 24.99.99 from ${toolcacheNode}`); + expect(result.stdout).toContain(`${toolcacheNode}\n24.99.99`); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + it("accepts major wildcard requests when selecting a toolcache node", () => { const root = mkdtempSync(join(tmpdir(), "openclaw-ensure-node-")); try {