feat(qa-lab): add jsonl replay harness

This commit is contained in:
Vincent Koc
2026-05-21 15:27:24 +08:00
parent 66dcc4ee8f
commit cf0657852f
14 changed files with 641 additions and 0 deletions

View File

@@ -9,6 +9,7 @@ Docs: https://docs.openclaw.ai
- Gateway/plugins: reuse a compatible Gateway startup plugin registry during dispatch so safe plugin dispatches avoid redundant registry loading. (#84324) Thanks @ai-hpc.
- Dependencies: refresh provider, plugin, UI, and tooling packages, update `protobufjs` to 8.4.0 to clear the current npm advisory, and carry the Claude ACP completion patch forward to `@agentclientprotocol/claude-agent-acp` 0.36.1.
- Agents/tools: remove the old sender-owner tool gating path so configured tools stay visible for trusted sessions while command and channel-action auth still carry real sender identity.
- QA-Lab: add curated mock JSONL replay fixtures and first-drift reporting for runtime-parity audits. (#80323, refs #80176) Thanks @100yenadmin.
- Tests/perf: isolate doctor core health check unit coverage from real skills/workspace discovery so `doctor-core-checks` no longer dominates unit perf while keeping one real skills-readiness smoke. (#84493) Thanks @frankekn.
### Fixes

View File

@@ -76,6 +76,7 @@ import {
runQaDockerUpCommand,
runQaCharacterEvalCommand,
runQaCoverageReportCommand,
runQaJsonlReplayCommand,
runQaManualLaneCommand,
runQaParityReportCommand,
runQaSuiteCommand,
@@ -1084,6 +1085,44 @@ describe("qa cli runtime", () => {
expectWriteContains(stdoutWrite, "codex-native-workspace");
});
it("writes a curated mock JSONL replay report and summary", async () => {
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-jsonl-replay-cli-"));
try {
await runQaJsonlReplayCommand({
repoRoot,
transcripts: path.resolve("qa/scenarios/jsonl-replay"),
outputDir: "jsonl-output",
runtimePair: "pi,codex",
});
const report = await fs.readFile(
path.join(repoRoot, "jsonl-output", "qa-jsonl-replay-report.md"),
"utf8",
);
const summary = JSON.parse(
await fs.readFile(
path.join(repoRoot, "jsonl-output", "qa-jsonl-replay-summary.json"),
"utf8",
),
) as { transcripts?: Array<{ userTurnCount?: number }> };
expect(report).toContain("# OpenClaw JSONL Replay Report - pi vs codex");
expect(report).toContain("| plan-mode-boundaries.jsonl | 3 | | none, none, none |");
expect(summary.transcripts).toHaveLength(7);
} finally {
await fs.rm(repoRoot, { recursive: true, force: true });
}
});
it("keeps JSONL replay mock-only until real runtime cell replay is wired", async () => {
await expect(
runQaJsonlReplayCommand({
repoRoot: process.cwd(),
providerMode: "live-frontier",
}),
).rejects.toThrow("qa jsonl-replay currently supports mock-openai curated fixtures only.");
});
it("exits nonzero when tool coverage summary is missing a required runtime tool call", async () => {
const priorExitCode = process.exitCode;
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-tool-coverage-"));

View File

@@ -16,6 +16,12 @@ import { buildQaCoverageInventory, renderQaCoverageMarkdownReport } from "./cove
import { buildQaDockerHarnessImage, writeQaDockerHarnessFiles } from "./docker-harness.js";
import { runQaDockerUp } from "./docker-up.runtime.js";
import type { QaCliBackendAuthMode } from "./gateway-child.js";
import {
createMockJsonlReplayCellRunner,
renderJsonlReplayMarkdownReport,
runJsonlReplay,
type JsonlReplayInput,
} from "./jsonl-replay.js";
import { startQaLabServer } from "./lab-server.js";
import { runQaManualLane } from "./manual-lane.runtime.js";
import { runQaMultipass } from "./multipass.runtime.js";
@@ -820,6 +826,50 @@ export async function runQaCoverageReportCommand(opts: {
process.stdout.write(body);
}
export async function runQaJsonlReplayCommand(opts: {
repoRoot?: string;
transcripts?: string;
outputDir?: string;
runtimePair?: string;
providerMode?: QaProviderModeInput;
}) {
const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
const runtimePair = parseQaRuntimePair(opts.runtimePair) ?? ["pi", "codex"];
if (runtimePair[0] !== "pi" || runtimePair[1] !== "codex") {
throw new Error('--runtime-pair for jsonl-replay must be "pi,codex".');
}
const providerMode = normalizeQaProviderMode(opts.providerMode ?? "mock-openai");
if (providerMode !== "mock-openai") {
throw new Error("qa jsonl-replay currently supports mock-openai curated fixtures only.");
}
const transcriptDir = path.resolve(repoRoot, opts.transcripts ?? "qa/scenarios/jsonl-replay");
const outputDir =
resolveRepoRelativeOutputDir(repoRoot, opts.outputDir) ??
path.join(repoRoot, ".artifacts", "qa-e2e", `jsonl-replay-${Date.now().toString(36)}`);
await fs.mkdir(outputDir, { recursive: true });
const result = await runJsonlReplay(
{
directory: transcriptDir,
runtimePair: runtimePair as JsonlReplayInput["runtimePair"],
providerMode,
},
{ runCell: createMockJsonlReplayCellRunner() },
);
const reportPayload = {
generatedAt: new Date().toISOString(),
providerMode,
runtimePair: runtimePair as JsonlReplayInput["runtimePair"],
transcripts: result.transcripts,
};
const report = renderJsonlReplayMarkdownReport(reportPayload);
const reportPath = path.join(outputDir, "qa-jsonl-replay-report.md");
const summaryPath = path.join(outputDir, "qa-jsonl-replay-summary.json");
await fs.writeFile(reportPath, report, "utf8");
await fs.writeFile(summaryPath, `${JSON.stringify(result, null, 2)}\n`, "utf8");
process.stdout.write(`QA JSONL replay report: ${reportPath}\n`);
process.stdout.write(`QA JSONL replay summary: ${summaryPath}\n`);
}
export async function runQaCharacterEvalCommand(opts: {
repoRoot?: string;
outputDir?: string;

View File

@@ -45,6 +45,7 @@ const {
runQaCredentialsListCommand,
runQaCredentialsRemoveCommand,
runQaCoverageReportCommand,
runQaJsonlReplayCommand,
runQaProviderServerCommand,
runQaSuiteCommand,
runQaTelegramCommand,
@@ -58,6 +59,7 @@ const {
runQaCredentialsListCommand: vi.fn(),
runQaCredentialsRemoveCommand: vi.fn(),
runQaCoverageReportCommand: vi.fn(),
runQaJsonlReplayCommand: vi.fn(),
runQaProviderServerCommand: vi.fn(),
runQaSuiteCommand: vi.fn(),
runQaTelegramCommand: vi.fn(),
@@ -113,6 +115,7 @@ vi.mock("./cli.runtime.js", () => ({
runQaCredentialsListCommand,
runQaCredentialsRemoveCommand,
runQaCoverageReportCommand,
runQaJsonlReplayCommand,
runQaProviderServerCommand,
runQaSuiteCommand,
}));
@@ -128,6 +131,7 @@ describe("qa cli registration", () => {
runQaCredentialsListCommand.mockReset();
runQaCredentialsRemoveCommand.mockReset();
runQaCoverageReportCommand.mockReset();
runQaJsonlReplayCommand.mockReset();
runQaProviderServerCommand.mockReset();
runQaSuiteCommand.mockReset();
runQaTelegramCommand.mockReset();
@@ -480,6 +484,33 @@ describe("qa cli registration", () => {
});
});
it("routes JSONL replay flags into the qa runtime command", async () => {
await program.parseAsync([
"node",
"openclaw",
"qa",
"jsonl-replay",
"--repo-root",
"/tmp/openclaw-repo",
"--transcripts",
"qa/scenarios/jsonl-replay",
"--runtime-pair",
"pi,codex",
"--provider-mode",
"mock-openai",
"--output-dir",
".artifacts/qa-e2e/jsonl-replay-test",
]);
expect(runQaJsonlReplayCommand).toHaveBeenCalledWith({
repoRoot: "/tmp/openclaw-repo",
transcripts: "qa/scenarios/jsonl-replay",
runtimePair: "pi,codex",
providerMode: "mock-openai",
outputDir: ".artifacts/qa-e2e/jsonl-replay-test",
});
});
it("delegates discovered qa runner registration through the generic host seam", () => {
const [{ registration }] = listQaRunnerCliContributions.mock.results[0]?.value;
expect(registration.register).toHaveBeenCalledTimes(1);

View File

@@ -83,6 +83,17 @@ async function runQaCoverageReport(opts: {
await runtime.runQaCoverageReportCommand(opts);
}
async function runQaJsonlReplay(opts: {
repoRoot?: string;
transcripts?: string;
outputDir?: string;
runtimePair?: string;
providerMode?: QaProviderModeInput;
}) {
const runtime = await loadQaLabCliRuntime();
await runtime.runQaJsonlReplayCommand(opts);
}
async function runQaCharacterEval(opts: {
repoRoot?: string;
outputDir?: string;
@@ -402,6 +413,33 @@ export function registerQaLabCli(program: Command) {
},
);
qa.command("jsonl-replay")
.description("Replay curated JSONL transcripts through the runtime parity replay harness")
.option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
.option(
"--transcripts <path>",
"Directory of curated JSONL transcripts",
"qa/scenarios/jsonl-replay",
)
.option("--runtime-pair <pair>", "Runtime pair label, e.g. pi,codex", "pi,codex")
.option(
"--provider-mode <mode>",
`Provider mode (${formatQaProviderModeHelp()})`,
"mock-openai",
)
.option("--output-dir <path>", "Artifact directory for the JSONL replay report")
.action(
async (opts: {
repoRoot?: string;
transcripts?: string;
runtimePair?: string;
providerMode?: QaProviderModeInput;
outputDir?: string;
}) => {
await runQaJsonlReplay(opts);
},
);
qa.command("character-eval")
.description("Run the character QA scenario across live models and write a judged report")
.option("--repo-root <path>", "Repository root to target when running from a neutral cwd")

View File

@@ -0,0 +1,169 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it } from "vitest";
import {
createMockJsonlReplayCellRunner,
extractJsonlReplayUserTurns,
renderJsonlReplayMarkdownReport,
runJsonlReplay,
type JsonlReplayCellRunner,
} from "./jsonl-replay.js";
import type { RuntimeId, RuntimeParityCell, RuntimeParityToolCall } from "./runtime-parity.js";
const tempRoots: string[] = [];
function makeCell(
runtime: RuntimeId,
overrides: Partial<RuntimeParityCell> = {},
): RuntimeParityCell {
return {
runtime,
transcriptBytes: `{"message":{"role":"assistant","content":"${runtime} reply"}}\n`,
toolCalls: [],
finalText: "same reply",
usage: {
inputTokens: 10,
outputTokens: 5,
totalTokens: 15,
},
wallClockMs: 12,
bootStateLines: [],
...overrides,
};
}
function makeToolCall(overrides: Partial<RuntimeParityToolCall> = {}): RuntimeParityToolCall {
return {
tool: "read",
argsHash: "args-a",
resultHash: "result-a",
...overrides,
};
}
async function makeTempDir() {
const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "jsonl-replay-"));
tempRoots.push(tempRoot);
return tempRoot;
}
afterEach(async () => {
await Promise.all(
tempRoots.splice(0).map((tempRoot) => fs.rm(tempRoot, { recursive: true, force: true })),
);
});
describe("jsonl replay", () => {
it("extracts user-turn boundaries while ignoring system, tool-only, empty, and malformed rows", () => {
const turns = extractJsonlReplayUserTurns(
[
`{"message":{"role":"system","content":"System setup"}}`,
`{"message":{"role":"tool","content":"tool-only prelude"}}`,
`{"message":{"role":"user","content":" "}}`,
`{not-json`,
`{"message":{"role":"assistant","content":"Ready."}}`,
`{"message":{"role":"user","content":[{"type":"text","text":"Plan the release"},{"type":"tool_result","content":"ignored"}]}}`,
`{"role":"user","content":[{"type":"input_text","text":"Check the follow-up"}]}`,
].join("\n"),
);
expect(turns).toEqual([
expect.objectContaining({
turn: 1,
lineNumber: 6,
userText: "Plan the release",
}),
expect.objectContaining({
turn: 2,
lineNumber: 7,
userText: "Check the follow-up",
}),
]);
expect(turns[0]?.transcriptPrefix).toContain(`"role":"system"`);
expect(turns[0]?.transcriptPrefix).not.toContain("{not-json");
});
it("reports the earliest divergent turn using runtime parity drift classes", async () => {
const transcriptDir = await makeTempDir();
await fs.writeFile(
path.join(transcriptDir, "three-turns.jsonl"),
[
`{"message":{"role":"user","content":"Turn one"}}`,
`{"message":{"role":"assistant","content":"Ready"}}`,
`{"message":{"role":"user","content":"Turn two"}}`,
`{"message":{"role":"assistant","content":"Using a tool"}}`,
`{"message":{"role":"user","content":"Turn three"}}`,
].join("\n"),
"utf8",
);
const runCell: JsonlReplayCellRunner = async ({ runtime, turn }) => {
if (turn.turn === 2) {
return {
scenarioStatus: "pass",
cell: makeCell(runtime, {
toolCalls: [makeToolCall(runtime === "pi" ? {} : { argsHash: "args-codex" })],
}),
};
}
if (turn.turn === 3) {
return {
scenarioStatus: "pass",
cell: makeCell(runtime, {
finalText: runtime === "pi" ? "pi wording" : "codex wording",
}),
};
}
return {
scenarioStatus: "pass",
cell: makeCell(runtime),
};
};
const result = await runJsonlReplay(
{
directory: transcriptDir,
runtimePair: ["pi", "codex"],
providerMode: "mock-openai",
},
{ runCell },
);
expect(result.transcripts).toHaveLength(1);
expect(result.transcripts[0]).toEqual(
expect.objectContaining({
userTurnCount: 3,
drift: ["none", "tool-call-shape", "text-only"],
firstDriftAtTurn: 2,
}),
);
expect(result.transcripts[0]?.cells.pi).toHaveLength(3);
expect(result.transcripts[0]?.cells.codex).toHaveLength(3);
});
it("runs the curated replay fixture set in mock-openai mode", async () => {
const fixtureDir = path.resolve("qa/scenarios/jsonl-replay");
const result = await runJsonlReplay(
{
directory: fixtureDir,
runtimePair: ["pi", "codex"],
providerMode: "mock-openai",
},
{ runCell: createMockJsonlReplayCellRunner() },
);
expect(result.transcripts).toHaveLength(7);
expect(result.transcripts.map((entry) => entry.userTurnCount)).toEqual([2, 2, 3, 2, 2, 2, 2]);
expect(result.transcripts.every((entry) => entry.firstDriftAtTurn === undefined)).toBe(true);
expect(
renderJsonlReplayMarkdownReport({
generatedAt: "2026-05-10T00:00:00.000Z",
providerMode: "mock-openai",
runtimePair: ["pi", "codex"],
transcripts: result.transcripts,
}),
).toContain("| plan-mode-boundaries.jsonl | 3 | | none, none, none |");
});
});

View File

@@ -0,0 +1,270 @@
import fs from "node:fs/promises";
import path from "node:path";
import {
runRuntimeParityScenario,
type RuntimeId,
type RuntimeParityCell,
type RuntimeParityResult,
type RuntimeParityScenarioExecution,
} from "./runtime-parity.js";
export type JsonlReplayInput = {
directory: string;
runtimePair: ["pi", "codex"];
providerMode: "mock-openai" | "live-frontier";
};
export type JsonlReplayTurn = {
turn: number;
lineNumber: number;
userText: string;
transcriptPrefix: string;
};
export type JsonlReplayCellRunner = (params: {
runtime: RuntimeId;
transcriptPath: string;
turn: JsonlReplayTurn;
turns: readonly JsonlReplayTurn[];
providerMode: JsonlReplayInput["providerMode"];
}) => Promise<RuntimeParityScenarioExecution>;
export type JsonlReplayResult = {
transcripts: Array<{
transcriptPath: string;
userTurnCount: number;
cells: { pi: RuntimeParityCell[]; codex: RuntimeParityCell[] };
drift: Array<RuntimeParityResult["drift"]>;
firstDriftAtTurn?: number;
}>;
};
export type JsonlReplayOptions = {
runCell?: JsonlReplayCellRunner;
};
export type JsonlReplayMarkdownReport = {
generatedAt: string;
providerMode: JsonlReplayInput["providerMode"];
runtimePair: JsonlReplayInput["runtimePair"];
transcripts: JsonlReplayResult["transcripts"];
};
function isRecord(value: unknown): value is Record<string, unknown> {
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
}
function readString(value: unknown): string | undefined {
return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined;
}
function readReplayMessage(record: Record<string, unknown>): Record<string, unknown> | undefined {
if (isRecord(record.message)) {
return record.message;
}
return readString(record.role) ? record : undefined;
}
function readRole(message: Record<string, unknown>) {
return readString(message.role)?.toLowerCase();
}
function isTextLikeContentBlock(block: Record<string, unknown>) {
const type = readString(block.type)?.toLowerCase();
return (
!type ||
type === "text" ||
type === "input_text" ||
type === "message" ||
type === "output_text" ||
type === "user_text"
);
}
function extractTextContent(content: unknown): string {
if (typeof content === "string") {
return content.trim();
}
if (!Array.isArray(content)) {
return "";
}
const parts: string[] = [];
for (const block of content) {
if (typeof block === "string") {
if (block.trim()) {
parts.push(block.trim());
}
continue;
}
if (!isRecord(block) || !isTextLikeContentBlock(block)) {
continue;
}
const text = readString(block.text) ?? readString(block.content);
if (text) {
parts.push(text);
}
}
return parts.join("\n").trim();
}
export function extractJsonlReplayUserTurns(transcriptBytes: string): JsonlReplayTurn[] {
const turns: JsonlReplayTurn[] = [];
const acceptedLines: string[] = [];
for (const [lineIndex, rawLine] of transcriptBytes.split(/\r?\n/u).entries()) {
const trimmed = rawLine.trim();
if (!trimmed) {
continue;
}
let parsed: unknown;
try {
parsed = JSON.parse(trimmed) as unknown;
} catch {
continue;
}
if (!isRecord(parsed)) {
continue;
}
acceptedLines.push(trimmed);
const message = readReplayMessage(parsed);
if (!message || readRole(message) !== "user") {
continue;
}
const userText = extractTextContent(message.content);
if (!userText) {
continue;
}
turns.push({
turn: turns.length + 1,
lineNumber: lineIndex + 1,
userText,
transcriptPrefix: `${acceptedLines.join("\n")}\n`,
});
}
return turns;
}
async function listJsonlFiles(directory: string): Promise<string[]> {
const entries = await fs.readdir(directory, { withFileTypes: true });
const files: string[] = [];
for (const entry of entries.toSorted((left, right) => left.name.localeCompare(right.name))) {
const entryPath = path.join(directory, entry.name);
if (entry.isDirectory()) {
files.push(...(await listJsonlFiles(entryPath)));
continue;
}
if (entry.isFile() && entry.name.endsWith(".jsonl")) {
files.push(entryPath);
}
}
return files;
}
function defaultRunCell(): Promise<RuntimeParityScenarioExecution> {
throw new Error(
"jsonl replay requires a runtime cell runner; CLI/suite wiring should provide the Phase 1 runtime parity runner",
);
}
function assertSupportedRuntimePair(runtimePair: JsonlReplayInput["runtimePair"]) {
if (runtimePair[0] !== "pi" || runtimePair[1] !== "codex") {
throw new Error(`unsupported jsonl replay runtime pair: ${runtimePair.join(",")}`);
}
}
export function createMockJsonlReplayCellRunner(): JsonlReplayCellRunner {
return async ({ runtime, turn }) => ({
scenarioStatus: "pass",
cell: {
runtime,
transcriptBytes: turn.transcriptPrefix,
toolCalls: [],
finalText: `Replayed curated turn ${turn.turn}.`,
usage: {
inputTokens: Math.max(1, Math.ceil(turn.transcriptPrefix.length / 4)),
outputTokens: 8,
totalTokens: Math.max(1, Math.ceil(turn.transcriptPrefix.length / 4)) + 8,
},
wallClockMs: 1,
bootStateLines: [],
},
});
}
export async function runJsonlReplay(
input: JsonlReplayInput,
options: JsonlReplayOptions = {},
): Promise<JsonlReplayResult> {
assertSupportedRuntimePair(input.runtimePair);
const directory = path.resolve(input.directory);
const transcriptPaths = await listJsonlFiles(directory);
const runCell = options.runCell ?? defaultRunCell;
const transcripts: JsonlReplayResult["transcripts"] = [];
for (const transcriptPath of transcriptPaths) {
const transcriptBytes = await fs.readFile(transcriptPath, "utf8");
const turns = extractJsonlReplayUserTurns(transcriptBytes);
const cells: { pi: RuntimeParityCell[]; codex: RuntimeParityCell[] } = {
pi: [],
codex: [],
};
const drift: Array<RuntimeParityResult["drift"]> = [];
let firstDriftAtTurn: number | undefined;
for (const turn of turns) {
const parity = await runRuntimeParityScenario({
scenarioId: `${path.basename(transcriptPath)}#turn-${turn.turn}`,
runCell: async (runtime) =>
runCell({
runtime,
transcriptPath,
turn,
turns,
providerMode: input.providerMode,
}),
});
cells.pi.push(parity.cells.pi);
cells.codex.push(parity.cells.codex);
drift.push(parity.drift);
if (firstDriftAtTurn === undefined && parity.drift !== "none") {
firstDriftAtTurn = turn.turn;
}
}
transcripts.push({
transcriptPath,
userTurnCount: turns.length,
cells,
drift,
...(firstDriftAtTurn !== undefined ? { firstDriftAtTurn } : {}),
});
}
return { transcripts };
}
export function renderJsonlReplayMarkdownReport(report: JsonlReplayMarkdownReport): string {
const totalTurns = report.transcripts.reduce((sum, entry) => sum + entry.userTurnCount, 0);
const driftedTranscripts = report.transcripts.filter(
(entry) => entry.firstDriftAtTurn !== undefined,
);
const lines = [
`# OpenClaw JSONL Replay Report - ${report.runtimePair[0]} vs ${report.runtimePair[1]}`,
"",
`- Generated at: ${report.generatedAt}`,
`- Provider mode: ${report.providerMode}`,
`- Transcripts: ${report.transcripts.length}`,
`- User turns: ${totalTurns}`,
`- Drifted transcripts: ${driftedTranscripts.length}`,
"",
"| Transcript | User turns | First drift turn | Drift sequence |",
"| --- | ---: | ---: | --- |",
];
for (const transcript of report.transcripts) {
lines.push(
`| ${path.basename(transcript.transcriptPath)} | ${transcript.userTurnCount} | ${transcript.firstDriftAtTurn ?? ""} | ${transcript.drift.join(", ")} |`,
);
}
return `${lines.join("\n").trimEnd()}\n`;
}

View File

@@ -0,0 +1,6 @@
{"message":{"role":"system","content":"Curated replay fixture: approval denial then safe retry."}}
{"message":{"role":"user","content":"Read the guarded config file, but do not modify anything without approval."}}
{"message":{"role":"assistant","content":"I will request the read and stop before any write."}}
{"message":{"role":"tool","content":"approval denied for write attempt; read-only path remains allowed"}}
{"message":{"role":"user","content":"Retry with read-only inspection and summarize the safe path."}}
{"message":{"role":"assistant","content":"The safe path is read-only inspection followed by a summary."}}

View File

@@ -0,0 +1,6 @@
{"message":{"role":"system","content":"Curated replay fixture: gateway restart recovery."}}
{"message":{"role":"user","content":"Start a long workspace check and record the marker RESTART-RECOVERY-A."}}
{"message":{"role":"assistant","content":"Started the check and recorded RESTART-RECOVERY-A."}}
{"message":{"role":"tool","content":"gateway restarted before final delivery; session state recovered"}}
{"message":{"role":"user","content":"Continue after the restart and confirm no duplicate marker was sent."}}
{"message":{"role":"assistant","content":"Recovered after restart and did not duplicate the marker."}}

View File

@@ -0,0 +1,8 @@
{"message":{"role":"system","content":"Curated JSONL replay fixture: plan-mode boundary smoke. Synthetic data only."}}
{"message":{"role":"tool","toolName":"status","content":"prelude status row before any user turn"}}
{"message":{"role":"user","content":"Enter plan mode and report the mode fields only."}}
{"message":{"role":"assistant","content":"inPlanMode=true mode=plan"}}
{"message":{"role":"user","content":[{"type":"text","text":"Attempt a write while plan approval is pending."},{"type":"tool_result","content":"ignored tool result block"}]}}
{"message":{"role":"assistant","content":"The write is blocked while plan approval is pending."}}
{"message":{"role":"user","content":"Exit plan mode and stop after reporting status."}}
{"message":{"role":"assistant","content":"inPlanMode=false mode=normal"}}

View File

@@ -0,0 +1,6 @@
{"message":{"role":"system","content":"Curated replay fixture: plugin lifecycle with searchable tools."}}
{"message":{"role":"user","content":"Install the demo plugin and discover its OpenClaw integration tool before calling it."}}
{"message":{"role":"assistant","content":"The plugin is installed; I will discover the integration tool before use."}}
{"message":{"role":"tool","content":"searchable tool discovery returned openclaw.demo.status"}}
{"message":{"role":"user","content":"Call the discovered status tool and summarize the result."}}
{"message":{"role":"assistant","content":"The discovered plugin status tool returned healthy."}}

View File

@@ -0,0 +1,4 @@
{"message":{"role":"system","content":"Curated JSONL replay fixture: partial recovery session. Synthetic data only."}}
{"message":{"role":"user","content":"A runtime produced an empty assistant response after a safe read. Summarize the recovery plan."}}
{"message":{"role":"assistant","content":"Retry once with the same safe read, preserve tool results, and avoid mutating actions."}}
{"message":{"role":"user","content":"Now replay the follow-up with only the safe read context and no network dependency."}}

View File

@@ -0,0 +1,7 @@
{"message":{"role":"system","content":"Curated JSONL replay fixture: repository triage. Synthetic data only; no private transcript content."}}
{"message":{"role":"user","content":"Review the QA fixture index and identify one missing runtime coverage row."}}
{"message":{"role":"assistant","content":[{"type":"tool_use","id":"fixture_tool_1","name":"read","input":{"path":"qa/scenarios/index.md"}}]}}
{"message":{"role":"tool","toolName":"read","content":"Runtime coverage index includes basic channel and model rows."}}
{"message":{"role":"assistant","content":"The index has channel and model rows; runtime replay coverage is a good follow-up."}}
{"message":{"role":"user","content":"Draft the smallest next test without editing production wiring."}}
{"message":{"role":"assistant","content":"Add a fixture-only replay test and leave CLI wiring for the parent branch."}}

View File

@@ -0,0 +1,6 @@
{"message":{"role":"system","content":"Curated replay fixture: workspace edit loop."}}
{"message":{"role":"user","content":"Read README.md, patch the typo in the local draft, and show the diff summary."}}
{"message":{"role":"assistant","content":"I read README.md, applied the local typo patch, and summarized the diff."}}
{"message":{"role":"tool","content":"patch applied to draft.md with one changed line"}}
{"message":{"role":"user","content":"Run the lightweight check and tell me whether the patch is safe."}}
{"message":{"role":"assistant","content":"The lightweight check passed and the patch is safe."}}