Compare commits

..

1 Commits

Author SHA1 Message Date
Peter Steinberger
a28dce4bdf fix(slack): stop logging inbound message previews 2026-06-23 23:30:26 -07:00
36 changed files with 223 additions and 1228 deletions

1
.github/labeler.yml vendored
View File

@@ -118,7 +118,6 @@
- any-glob-to-any-file:
- "extensions/qa-lab/**"
- "qa/scenarios/**"
- "docs/maturity/**"
- "docs/concepts/qa-e2e-automation.md"
- "docs/concepts/personal-agent-benchmark-pack.md"
- "docs/channels/qa-channel.md"

View File

@@ -24,14 +24,6 @@ This directory owns docs authoring, Mintlify link rules, and docs i18n policy.
- `scripts/docs-sync-publish.mjs` excludes and prunes `docs/internal/**` from the public `openclaw/docs` publish repo if a page is force-added later.
- Internal docs may mention repo paths, private app names, 1Password item names, and runbooks, but never include secret values.
## Maturity Scorecard Editing
`taxonomy.yaml` and `qa/maturity-scores.yaml` are the source inputs; generated maturity docs under `docs/maturity/` are projections and should not be hand-edited for score, LTS, taxonomy, QA profile, or evidence tables.
`scripts/qa/render-maturity-docs.ts` owns generation; use `pnpm maturity:render` to refresh committed docs and `pnpm maturity:check` to verify them.
`.github/workflows/maturity-scorecard.yml` renders artifact previews and can open generated-doc PRs; `.github/workflows/openclaw-release-checks.yml` dispatches it for release QA.
Keep deterministic `qa-evidence.json.scorecard` data in GitHub Actions artifacts unless a maintainer explicitly asks for a sanitized committed projection.
Human overrides must change source state in a PR and explain the reason plus public or redacted evidence.
## Docs i18n
- Foreign-language docs are not maintained in this repo. The generated publish output lives in the separate `openclaw/docs` repo (often cloned locally as `../openclaw-docs`).

View File

@@ -68,7 +68,7 @@ Slim evidence omits per-entry `execution` and sets `evidenceMode: "slim"`;
```bash
pnpm openclaw qa run \
--qa-profile smoke-ci \
--category channel-framework.conversation-routing-and-delivery \
--category agent-runtime-and-provider-execution.agent-turn-execution \
--provider-mode mock-openai \
--output-dir .artifacts/qa-e2e/smoke-ci-profile-dispatch
```
@@ -966,7 +966,6 @@ output and whose artifact paths are resolved relative to that producer
`qa run --qa-profile`, the same `qa-evidence.json` also includes the profile
scorecard summary for the selected taxonomy categories.
Treat it as a discovery aid, not a gate replacement; the selected scenario still needs the right provider mode, live transport, Multipass, Testbox, or release lane for the behavior under test.
For scorecard context, see [Maturity scorecard](/maturity/scorecard).
For character and style checks, run the same scenario across multiple live model
refs and write a judged Markdown report:
@@ -1024,7 +1023,6 @@ When no `--judge-model` is passed, the judges default to
## Related docs
- [Matrix QA](/concepts/qa-matrix)
- [Maturity scorecard](/maturity/scorecard)
- [Personal agent benchmark pack](/concepts/personal-agent-benchmark-pack)
- [QA Channel](/channels/qa-channel)
- [Testing](/help/testing)

View File

@@ -20,7 +20,6 @@ of Docker runners. This doc is a "how we test" guide:
- [QA overview](/concepts/qa-e2e-automation) - architecture, command surface, scenario authoring.
- [Matrix QA](/concepts/qa-matrix) - reference for `pnpm openclaw qa matrix`.
- [Maturity scorecard](/maturity/scorecard) - how release QA evidence supports stability and LTS decisions.
- [QA channel](/channels/qa-channel) - the synthetic transport plugin used by repo-backed scenarios.
This page covers running the regular test suites and Docker/Parallels runners. The QA-specific runners section below ([QA-specific runners](#qa-specific-runners)) lists the concrete `qa` invocations and points back at the references above.

View File

@@ -121,46 +121,6 @@ describe("compileMemoryWikiVault", () => {
).resolves.toContain('"text":"Alpha is the canonical source page."');
});
it("discovers pages in nested subdirectories during compile", async () => {
const { rootDir, config } = await createVault({
rootDir: nextCaseRoot(),
initialize: true,
});
await fs.mkdir(path.join(rootDir, "sources", "sub"), { recursive: true });
await fs.writeFile(
path.join(rootDir, "sources", "top.md"),
renderWikiMarkdown({
frontmatter: { pageType: "source", id: "source.top", title: "Top Source" },
body: "# Top Source\n",
}),
"utf8",
);
await fs.writeFile(
path.join(rootDir, "sources", "sub", "nested.md"),
renderWikiMarkdown({
frontmatter: { pageType: "source", id: "source.nested", title: "Nested Source" },
body: "# Nested Source\n",
}),
"utf8",
);
const result = await compileMemoryWikiVault(config);
expect(result.pageCounts.source).toBe(2);
// Root index should link to both
await expect(fs.readFile(path.join(rootDir, "index.md"), "utf8")).resolves.toContain(
"[Top Source](sources/top.md)",
);
await expect(fs.readFile(path.join(rootDir, "index.md"), "utf8")).resolves.toContain(
"[Nested Source](sources/sub/nested.md)",
);
// Sources index should link to nested file
await expect(fs.readFile(path.join(rootDir, "sources", "index.md"), "utf8")).resolves.toContain(
"[Nested Source](sub/nested.md)",
);
});
it("renders native directory index links relative to each generated index", async () => {
const { rootDir, config } = await createVault({
rootDir: nextCaseRoot(),

View File

@@ -364,15 +364,10 @@ export type RefreshMemoryWikiIndexesResult = {
async function collectMarkdownFiles(rootDir: string, relativeDir: string): Promise<string[]> {
const dirPath = path.join(rootDir, relativeDir);
const entries = await fs
.readdir(dirPath, { withFileTypes: true, recursive: true })
.catch(() => []);
const entries = await fs.readdir(dirPath, { withFileTypes: true }).catch(() => []);
return entries
.filter((entry) => entry.isFile() && entry.name.endsWith(".md"))
.map((entry) => {
const absPath = path.join(entry.parentPath ?? dirPath, entry.name);
return path.relative(rootDir, absPath).split(path.sep).join("/");
})
.map((entry) => path.join(relativeDir, entry.name))
.filter((relativePath) => path.basename(relativePath) !== "index.md")
.toSorted((left, right) => left.localeCompare(right));
}

View File

@@ -1067,35 +1067,6 @@ describe("searchMemoryWiki", () => {
]);
});
it("discovers pages in nested subdirectories", async () => {
const { rootDir, config } = await createQueryVault({
initialize: true,
});
await fs.mkdir(path.join(rootDir, "sources", "sub"), { recursive: true });
await fs.writeFile(
path.join(rootDir, "sources", "top.md"),
renderWikiMarkdown({
frontmatter: { pageType: "source", id: "source.top", title: "Top Source" },
body: "# Top Source\n",
}),
"utf8",
);
await fs.writeFile(
path.join(rootDir, "sources", "sub", "nested.md"),
renderWikiMarkdown({
frontmatter: { pageType: "source", id: "source.nested", title: "Nested Source" },
body: "# Nested Source\n",
}),
"utf8",
);
const results = await searchMemoryWiki({ config, query: "Source" });
expect(results).toHaveLength(2);
const paths = results.map((r) => r.path).toSorted();
expect(paths).toEqual(["sources/sub/nested.md", "sources/top.md"]);
});
it("drops gateway-style owner-qualified session hits that collide with the scoped store", async () => {
const { config } = await createQueryVault({
initialize: true,

View File

@@ -245,17 +245,12 @@ async function listWikiMarkdownFiles(rootDir: string): Promise<string[]> {
await Promise.all(
QUERY_DIRS.map(async (relativeDir) => {
const dirPath = path.join(rootDir, relativeDir);
const entries = await fs
.readdir(dirPath, { withFileTypes: true, recursive: true })
.catch(() => []);
const entries = await fs.readdir(dirPath, { withFileTypes: true }).catch(() => []);
return entries
.filter(
(entry) => entry.isFile() && entry.name.endsWith(".md") && entry.name !== "index.md",
)
.map((entry) => {
const absPath = path.join(entry.parentPath ?? dirPath, entry.name);
return path.relative(rootDir, absPath).split(path.sep).join("/");
});
.map((entry) => path.join(relativeDir, entry.name));
}),
)
).flat();

View File

@@ -92,39 +92,6 @@ describe("resolveMemoryWikiStatus", () => {
expect(status.warnings.map((warning) => warning.code)).toContain("bridge-artifacts-missing");
});
it("discovers pages in nested subdirectories", async () => {
const { rootDir, config } = await createVault({
prefix: "memory-wiki-nested-",
initialize: true,
});
await fs.mkdir(path.join(rootDir, "sources", "sub"), { recursive: true });
await fs.writeFile(
path.join(rootDir, "sources", "top.md"),
renderWikiMarkdown({
frontmatter: { pageType: "source", id: "source.top", title: "Top Source" },
body: "# Top Source\n",
}),
"utf8",
);
await fs.writeFile(
path.join(rootDir, "sources", "sub", "nested.md"),
renderWikiMarkdown({
frontmatter: { pageType: "source", id: "source.nested", title: "Nested Source" },
body: "# Nested Source\n",
}),
"utf8",
);
const status = await resolveMemoryWikiStatus(config, {
pathExists: async () => true,
resolveCommand: async () => null,
});
expect(status.pageCounts.source).toBe(2);
expect(status.sourceCounts.native).toBe(2);
});
it("counts source provenance from the vault", async () => {
const { rootDir, config } = await createVault({
prefix: "memory-wiki-status-",

View File

@@ -87,28 +87,26 @@ async function collectVaultCounts(vaultPath: string): Promise<{
};
const dirs = ["entities", "concepts", "sources", "syntheses", "reports"] as const;
for (const dir of dirs) {
const dirPath = path.join(vaultPath, dir);
const entries = await fs
.readdir(dirPath, { withFileTypes: true, recursive: true })
.readdir(path.join(vaultPath, dir), { withFileTypes: true })
.catch(() => []);
for (const entry of entries) {
if (!entry.isFile() || !entry.name.endsWith(".md") || entry.name === "index.md") {
continue;
}
const absolutePath = path.join(entry.parentPath ?? dirPath, entry.name);
const relativeToVault = path.relative(vaultPath, absolutePath).split(path.sep).join("/");
const kind = inferWikiPageKind(relativeToVault);
const kind = inferWikiPageKind(path.join(dir, entry.name));
if (kind) {
pageCounts[kind] += 1;
}
if (dir === "sources") {
const absolutePath = path.join(vaultPath, dir, entry.name);
const raw = await fs.readFile(absolutePath, "utf8").catch(() => null);
if (!raw) {
continue;
}
const page = toWikiPageSummary({
absolutePath,
relativePath: relativeToVault,
relativePath: path.join(dir, entry.name),
raw,
});
if (!page) {

View File

@@ -85,15 +85,6 @@ describe("buildWelcomeCard", () => {
expect(actions[0]?.title).toBe("What can you do?");
});
it("styles the heading with valid PascalCase Adaptive Card enum values", () => {
// Lowercase weight/size fall back to Default in the Teams renderer, so the heading must use the
// schema's PascalCase enums to render bold/medium.
const card = buildWelcomeCard();
const heading = (card.body as Array<{ weight?: string; size?: string }>)[0];
expect(heading?.weight).toBe("Bolder");
expect(heading?.size).toBe("Medium");
});
it("uses custom bot name", () => {
const card = buildWelcomeCard({ botName: "TestBot" });
const body = card.body as Array<{ text: string }>;

View File

@@ -31,10 +31,8 @@ export function buildWelcomeCard(options?: WelcomeCardOptions): Record<string, u
{
type: "TextBlock",
text: `Hi! I'm ${botName}.`,
// Adaptive Card TextWeight/TextSize enums are PascalCase ("Bolder"/"Medium"); lowercase
// values fall back to Default, so the greeting rendered unstyled (matches polls/presentation).
weight: "Bolder",
size: "Medium",
weight: "bolder",
size: "medium",
},
{
type: "TextBlock",

View File

@@ -382,7 +382,7 @@ function buildOpenShellPolicyYaml(params: {
filesystem_policy:
include_workdir: true
read_only: [/usr, /lib, /proc, /dev/urandom, /app, /etc, /var/log, /opt]
read_only: [/usr, /lib, /proc, /dev/urandom, /app, /etc, /var/log]
read_write: [/sandbox, /tmp, /dev/null]
landlock:

View File

@@ -45,21 +45,6 @@ type PendingExec = {
};
const MATERIALIZED_SKILLS_REMOTE_PARTS = [".openclaw", "sandbox-skills"] as const;
export function buildOpenShellDirectoryUploadArgs(params: {
sandboxName: string;
localPath: string;
remotePath: string;
}): string[] {
return [
"sandbox",
"upload",
"--no-git-ignore",
params.sandboxName,
params.localPath,
normalizeRemotePath(params.remotePath),
];
}
export const PINNED_REMOTE_PATH_MUTATION_SCRIPT = [
"set -eu",
'die() { echo "$1" >&2; exit 1; }',
@@ -753,26 +738,26 @@ class OpenShellSandboxBackendImpl {
async ({ dir: tmpDir }) => {
// Stage a symlink-free snapshot so upload never dereferences host paths
// outside the mirrored workspace tree.
const remoteRootName = path.posix.basename(normalizeRemotePath(remotePath));
const remoteRootName = path.posix.basename(remotePath);
const stagedRoot = path.join(tmpDir, remoteRootName);
await stageDirectoryContents({
sourceDir: localPath,
targetDir: stagedRoot,
});
const stagedEntries = (await fs.readdir(stagedRoot)).toSorted();
for (const entry of stagedEntries) {
const result = await runOpenShellCli({
context: this.params.execContext,
args: buildOpenShellDirectoryUploadArgs({
sandboxName: this.params.execContext.sandboxName,
localPath: path.join(stagedRoot, entry),
remotePath,
}),
cwd: this.params.createParams.workspaceDir,
});
if (result.code !== 0) {
throw new Error(result.stderr.trim() || "openshell sandbox upload failed");
}
const result = await runOpenShellCli({
context: this.params.execContext,
args: [
"sandbox",
"upload",
"--no-git-ignore",
this.params.execContext.sandboxName,
stagedRoot,
path.posix.dirname(remotePath),
],
cwd: this.params.createParams.workspaceDir,
});
if (result.code !== 0) {
throw new Error(result.stderr.trim() || "openshell sandbox upload failed");
}
},
);

View File

@@ -29,7 +29,6 @@ const cliMocks = vi.hoisted(() => ({
let createOpenShellSandboxBackendManager: typeof import("./backend.js").createOpenShellSandboxBackendManager;
let createOpenShellSandboxBackendFactory: typeof import("./backend.js").createOpenShellSandboxBackendFactory;
let buildOpenShellDirectoryUploadArgs: typeof import("./backend.js").buildOpenShellDirectoryUploadArgs;
let ensureOpenShellRemoteRealDirectoryScript: typeof import("./backend.js").ENSURE_OPEN_SHELL_REMOTE_REAL_DIRECTORY_SCRIPT;
describe("openshell cli helpers", () => {
@@ -173,7 +172,6 @@ describe("openshell backend manager", () => {
};
});
({
buildOpenShellDirectoryUploadArgs,
ENSURE_OPEN_SHELL_REMOTE_REAL_DIRECTORY_SCRIPT: ensureOpenShellRemoteRealDirectoryScript,
createOpenShellSandboxBackendFactory,
createOpenShellSandboxBackendManager,
@@ -189,30 +187,6 @@ describe("openshell backend manager", () => {
vi.clearAllMocks();
});
it("uploads staged directory snapshots to the managed remote directory itself", () => {
expect(
buildOpenShellDirectoryUploadArgs({
sandboxName: "openclaw-session",
localPath: "/tmp/openclaw-upload/sandbox/seed.txt",
remotePath: "/sandbox",
}),
).toEqual([
"sandbox",
"upload",
"--no-git-ignore",
"openclaw-session",
"/tmp/openclaw-upload/sandbox/seed.txt",
"/sandbox",
]);
expect(
buildOpenShellDirectoryUploadArgs({
sandboxName: "openclaw-session",
localPath: "/tmp/openclaw-upload/project",
remotePath: "/sandbox/./project",
}).at(-1),
).toBe("/sandbox/project");
});
it.runIf(process.platform !== "win32")(
"preserves caller positional args after OpenShell remote directory validation",
async () => {

View File

@@ -431,8 +431,8 @@ describe("qa cli runtime", () => {
repoRoot: "/tmp/openclaw-repo",
outputDir: ".artifacts/qa-e2e/smoke-ci",
profile: "smoke-ci",
surface: "channel-framework",
category: "channel-framework.conversation-routing-and-delivery",
surface: "agent-runtime-and-provider-execution",
category: "agent-runtime-and-provider-execution.agent-turn-execution",
scenarioIds: ["dm-chat-baseline"],
transportId: "qa-channel",
fastMode: true,
@@ -482,7 +482,7 @@ describe("qa cli runtime", () => {
expect(evidence.scorecard).not.toHaveProperty("profile");
expect(evidence.scorecard?.features?.fulfilled).toBe(0);
expect(evidence.scorecard?.categoryReports?.[0]).toMatchObject({
id: "channel-framework.conversation-routing-and-delivery",
id: "agent-runtime-and-provider-execution.agent-turn-execution",
features: {
fulfilled: 0,
},
@@ -595,11 +595,11 @@ describe("qa cli runtime", () => {
runQaProfileCommand({
repoRoot: "/tmp/openclaw-repo",
profile: "smoke-ci",
category: "channel-framework.conversation-routing-and-delivery",
category: "agent-runtime-and-provider-execution.agent-turn-execution",
scenarioIds: ["not-a-real-scenario"],
}),
).rejects.toThrow(
"qa run did not find taxonomy scenarios for --qa-profile smoke-ci --category channel-framework.conversation-routing-and-delivery --scenario not-a-real-scenario.",
"qa run did not find taxonomy scenarios for --qa-profile smoke-ci --category agent-runtime-and-provider-execution.agent-turn-execution --scenario not-a-real-scenario.",
);
expect(runQaSuite).not.toHaveBeenCalled();
});

View File

@@ -214,9 +214,9 @@ describe("qa cli registration", () => {
"--qa-profile",
"smoke-ci",
"--surface",
"channel-framework",
"agent-runtime-and-provider-execution",
"--category",
"channel-framework.conversation-routing-and-delivery",
"agent-runtime-and-provider-execution.agent-turn-execution",
"--scenario",
"dm-chat-baseline",
"--evidence-mode",
@@ -239,8 +239,8 @@ describe("qa cli registration", () => {
repoRoot: "/tmp/openclaw-repo",
outputDir: ".artifacts/qa-e2e/smoke-ci",
profile: "smoke-ci",
surface: "channel-framework",
category: "channel-framework.conversation-routing-and-delivery",
surface: "agent-runtime-and-provider-execution",
category: "agent-runtime-and-provider-execution.agent-turn-execution",
scenarioIds: ["dm-chat-baseline"],
evidenceMode: "slim",
transportId: "qa-channel",
@@ -257,7 +257,7 @@ describe("qa cli registration", () => {
it.each([
["--output-dir", [".artifacts/qa-e2e/smoke-ci"]],
["--surface", ["agent-runtime-and-provider-execution"]],
["--category", ["channel-framework.conversation-routing-and-delivery"]],
["--category", ["agent-runtime-and-provider-execution.agent-turn-execution"]],
["--scenario", ["dm-chat-baseline"]],
["--evidence-mode", ["slim"]],
["--exclude-test-execution-evidence", []],

View File

@@ -167,32 +167,6 @@ describe("qa suite", () => {
expect(qaSuiteProgressTesting.sanitizeQaSuiteProgressValue("\u0000\u0001")).toBe("<empty>");
});
it("includes effective channel driver in run start progress logs", () => {
expect(
qaSuiteProgressTesting.formatQaSuiteRunStartProgress({
selectedScenarioCount: 80,
concurrency: 8,
transportId: "qa-channel",
}),
).toBe("run start: scenarios=80 concurrency=8 transport=qa-channel");
expect(
qaSuiteProgressTesting.formatQaSuiteRunStartProgress({
selectedScenarioCount: 80,
concurrency: 1,
transportId: "qa-channel",
channelDriverSelection: {
capabilityMatrixPath: "crabline-fake-provider-capabilities.json",
channel: "telegram",
channelDriver: "crabline",
smokeArtifactPath: "crabline-fake-provider-smoke.json",
},
}),
).toBe(
"run start: scenarios=80 concurrency=1 transport=qa-channel channelDriver=crabline channel=telegram",
);
});
it("records gateway RSS peak and trace samples", () => {
expect(
qaSuiteProgressTesting.buildQaSuiteRuntimeMetrics({

View File

@@ -200,29 +200,6 @@ function writeQaSuiteProgress(enabled: boolean, message: string) {
process.stderr.write(`[qa-suite] ${message}\n`);
}
function formatQaSuiteRunStartProgress(params: {
selectedScenarioCount: number;
concurrency: number;
transportId: QaTransportId;
channelDriver?: QaScorecardChannelDriver | null;
channelDriverSelection?: OpenClawCrablineChannelDriverSelection | null;
}) {
const channelDriver = params.channelDriver ?? params.channelDriverSelection?.channelDriver;
const channel = params.channelDriverSelection?.channel;
const parts = [
`run start: scenarios=${params.selectedScenarioCount}`,
`concurrency=${params.concurrency}`,
`transport=${sanitizeQaSuiteProgressValue(params.transportId)}`,
];
if (channelDriver) {
parts.push(`channelDriver=${sanitizeQaSuiteProgressValue(channelDriver)}`);
}
if (channel) {
parts.push(`channel=${sanitizeQaSuiteProgressValue(channel)}`);
}
return parts.join(" ");
}
async function waitForQaLabReady(baseUrl: string, timeoutMs = 10_000) {
const startedAt = Date.now();
while (Date.now() - startedAt < timeoutMs) {
@@ -1208,13 +1185,7 @@ export async function runQaFlowSuite(params?: QaSuiteRunParams): Promise<QaSuite
const gatewayHeapCheckpointsEnabled = shouldCaptureGatewayHeapCheckpoints();
writeQaSuiteProgress(
progressEnabled,
formatQaSuiteRunStartProgress({
selectedScenarioCount: selectedScenarios.length,
concurrency,
transportId,
channelDriver: params?.channelDriver,
channelDriverSelection: params?.channelDriverSelection,
}),
`run start: scenarios=${selectedScenarios.length} concurrency=${concurrency} transport=${transportId}`,
);
const useIsolatedScenarioWorkers = shouldRunQaSuiteWithIsolatedScenarioWorkers({
scenarios: selectedScenarios,
@@ -1796,7 +1767,6 @@ export const qaSuiteProgressTesting = {
buildQaGatewayHeapCheckpointRuntimeEnvPatch,
buildQaIsolatedScenarioWorkerParams,
buildQaSuiteRuntimeMetrics,
formatQaSuiteRunStartProgress,
buildQaRuntimeEnvPatch,
mergeQaRuntimeEnvPatches,
parseQaSuiteBooleanEnv,

View File

@@ -31,7 +31,20 @@ import {
} from "./prepare.test-helpers.js";
import { clearSlackSubteamMentionCacheForTest } from "./subteam-mentions.js";
const enqueueSystemEventMock = vi.hoisted(() => vi.fn());
const { enqueueSystemEventMock, logVerboseMock, shouldLogVerboseMock } = vi.hoisted(() => ({
enqueueSystemEventMock: vi.fn(),
logVerboseMock: vi.fn(),
shouldLogVerboseMock: vi.fn(() => false),
}));
vi.mock("openclaw/plugin-sdk/runtime-env", async (importOriginal) => {
const actual = await importOriginal<typeof import("openclaw/plugin-sdk/runtime-env")>();
return {
...actual,
logVerbose: (...args: unknown[]) => logVerboseMock(...args),
shouldLogVerbose: () => shouldLogVerboseMock(),
};
});
vi.mock("openclaw/plugin-sdk/system-event-runtime", async (importOriginal) => {
const actual = await importOriginal<typeof import("openclaw/plugin-sdk/system-event-runtime")>();
@@ -54,6 +67,9 @@ describe("slack prepareSlackMessage inbound contract", () => {
clearSlackAllowFromCacheForTest();
clearSlackSubteamMentionCacheForTest();
enqueueSystemEventMock.mockClear();
logVerboseMock.mockClear();
shouldLogVerboseMock.mockReset();
shouldLogVerboseMock.mockReturnValue(false);
});
afterAll(() => {
@@ -171,6 +187,28 @@ describe("slack prepareSlackMessage inbound contract", () => {
expect(prepared.ctxPayload.BodyForAgent).toContain(body);
});
it("logs inbound metadata without logging message content", async () => {
const body = "confidential acquisition target: northstar; do not include this text in logs";
shouldLogVerboseMock.mockReturnValue(true);
const prepared = await prepareWithDefaultCtx(createSlackMessage({ text: body }));
assertPrepared(prepared);
const inboundLog = logVerboseMock.mock.calls
.map(([entry]) => entry)
.find((entry) => typeof entry === "string" && entry.startsWith("slack inbound:"));
const verboseOutput = logVerboseMock.mock.calls
.flat()
.filter((entry): entry is string => typeof entry === "string")
.join("\n");
expect(inboundLog).toBe(
`slack inbound: account=${prepared.route.accountId} agent=${prepared.route.agentId} channel=D123 message_ts=1.000 thread_ts=none from=slack:U1 chat=direct chars=${body.length}`,
);
expect(verboseOutput).not.toContain(body);
expect(verboseOutput).not.toContain("confidential acquisition target");
expect(verboseOutput).not.toContain("preview=");
});
it("prepares wildcard open-policy account DMs", async () => {
const ctx = createInboundSlackCtx({
cfg: {

View File

@@ -1386,7 +1386,9 @@ export async function prepareSlackMessage(params: {
}
if (shouldLogVerbose()) {
logVerbose(`slack inbound: channel=${message.channel} from=${slackFrom} preview="${preview}"`);
logVerbose(
`slack inbound: account=${route.accountId} agent=${route.agentId} channel=${message.channel} message_ts=${message.ts ?? "unknown"} thread_ts=${effectiveMessageThreadId ?? "none"} from=${slackFrom} chat=${chatType} chars=${rawBody.length}`,
);
}
const updateLastRouteSessionKey = resolveInboundLastRouteSessionKey({ route, sessionKey });

View File

@@ -1,19 +0,0 @@
// Error-format helper tests cover the non-Error cause stringifier contract.
import { describe, expect, it } from "vitest";
import { stringifyNonErrorCause } from "./error-format.js";
describe("stringifyNonErrorCause", () => {
it("returns a string for values JSON.stringify serializes to undefined", () => {
// JSON.stringify(fn|symbol|undefined) is undefined; the `string`-typed helper must not leak it.
expect(stringifyNonErrorCause(() => {})).toBe("[object Function]");
expect(stringifyNonErrorCause(Symbol("x"))).toBe("[object Symbol]");
expect(stringifyNonErrorCause(undefined)).toBe("[object Undefined]");
});
it("stringifies ordinary scalar and object causes", () => {
expect(stringifyNonErrorCause({ a: 1 })).toBe('{"a":1}');
expect(stringifyNonErrorCause("hi")).toBe("hi");
expect(stringifyNonErrorCause(42)).toBe("42");
expect(stringifyNonErrorCause(null)).toBe("null");
});
});

View File

@@ -75,9 +75,7 @@ export function stringifyNonErrorCause(value: unknown): string {
return String(value);
}
try {
// JSON.stringify returns undefined (not a string) for functions/symbols/undefined; fall back to
// a tag string so this `string`-typed helper never leaks undefined (matches src/infra/errors.ts).
return JSON.stringify(value) ?? Object.prototype.toString.call(value);
return JSON.stringify(value);
} catch {
return Object.prototype.toString.call(value);
}

View File

@@ -9,7 +9,7 @@ scenario:
- telemetry.prometheus-authenticated-gateway-export
secondary:
- harness.qa-lab
- docker.runtime-validation
- docker.e2e
objective: Verify a QA-lab gateway run emits protected, bounded Prometheus diagnostics metrics through the diagnostics-prometheus plugin.
successCriteria:
- The diagnostics-prometheus plugin exposes the protected scrape route.

View File

@@ -6,7 +6,7 @@ scenario:
runtimeParityTier: standard
coverage:
primary:
- runtime.multi-turn-continuity
- runtime.first-hour-20
secondary:
- runtime.long-context
objective: Verify both runtimes preserve a same-session conversation across the required 20-turn maintainer gate.

View File

@@ -6,7 +6,7 @@ scenario:
runtimeParityTier: soak
coverage:
primary:
- runtime.long-run-stability
- runtime.soak-100
secondary:
- runtime.long-context
objective: Provide an optional long-run soak that can be scheduled or run in Testbox without entering the maintainer default gate.

View File

@@ -117,18 +117,6 @@ export function resolvePrepackCommandTimeoutMs(env: NodeJS.ProcessEnv = process.
);
}
export function resolvePrepackCommandStdio(
options: SpawnSyncOptions,
env: NodeJS.ProcessEnv = process.env,
): SpawnSyncOptions["stdio"] {
const requestedStdio = options.stdio ?? "inherit";
const npmJsonOutput = env.npm_config_json === "true" || env.npm_config_json === "1";
if (npmJsonOutput && requestedStdio === "inherit") {
return ["inherit", 2, "inherit"];
}
return requestedStdio;
}
export function runPrepackCommand(
command: string,
args: string[],
@@ -136,10 +124,10 @@ export function runPrepackCommand(
): ReturnType<typeof spawnSync> {
const env = options.env ?? process.env;
return spawnSync(command, args, {
stdio: "inherit",
...options,
env,
killSignal: options.killSignal ?? "SIGKILL",
stdio: resolvePrepackCommandStdio(options, env),
timeout: options.timeout ?? resolvePrepackCommandTimeoutMs(env),
});
}

View File

@@ -4,7 +4,6 @@ import http from "node:http";
import os from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it, vi } from "vitest";
import { cleanupTempDirs, makeTempDir } from "../../test/helpers/temp-dir.js";
import { createBundleMcpJsonSchemaValidator } from "./agent-bundle-mcp-runtime.js";
import { cleanupBundleMcpHarness } from "./agent-bundle-mcp-test-harness.js";
import {
@@ -27,8 +26,6 @@ vi.mock("./embedded-agent-mcp.js", () => ({
}),
}));
const tempDirs: string[] = [];
type RuntimeFactoryOptions = NonNullable<
Parameters<typeof testing.createSessionMcpRuntimeManager>[0]
>;
@@ -40,12 +37,10 @@ async function writeListToolsMcpServer(params: {
filePath: string;
logPath: string;
delayMs?: number;
initializeDelayMs?: number;
hang?: boolean;
inputSchema?: unknown;
tools?: Array<{ name: string; description?: string; inputSchema?: unknown }>;
capabilities?: Record<string, unknown>;
notifyListChangedOnInitialized?: boolean;
listToolsMethodNotFound?: boolean;
callToolIsError?: boolean;
callToolJsonRpcError?: boolean;
@@ -58,10 +53,8 @@ import fs from "node:fs/promises";
const logPath = ${JSON.stringify(params.logPath)};
const delayMs = ${params.delayMs ?? 0};
const initializeDelayMs = ${params.initializeDelayMs ?? 0};
const hang = ${params.hang === true};
const capabilities = ${JSON.stringify(params.capabilities ?? { tools: {} })};
const notifyListChangedOnInitialized = ${params.notifyListChangedOnInitialized === true};
const listToolsMethodNotFound = ${params.listToolsMethodNotFound === true};
const tools = ${JSON.stringify(
params.tools ?? [
@@ -91,7 +84,7 @@ function handle(message) {
}
log("recv " + String(message.method ?? "unknown"));
if (message.method === "initialize") {
const response = {
send({
jsonrpc: "2.0",
id: message.id,
result: {
@@ -99,19 +92,10 @@ function handle(message) {
capabilities,
serverInfo: { name: "test-list-tools", version: "1.0.0" },
},
};
if (initializeDelayMs > 0) {
setTimeout(() => send(response), initializeDelayMs);
} else {
send(response);
}
});
return;
}
if (message.method === "notifications/initialized") {
if (notifyListChangedOnInitialized) {
log("notify tools/list_changed");
send({ jsonrpc: "2.0", method: "notifications/tools/list_changed" });
}
return;
}
if (message.method === "tools/list") {
@@ -297,7 +281,6 @@ function makeRuntime(
}
afterEach(async () => {
cleanupTempDirs(tempDirs);
await cleanupBundleMcpHarness();
});
@@ -2052,529 +2035,4 @@ process.stdin.on("end", () => {
}
},
);
it(
"parallelizes MCP server catalog loading across multiple slow servers",
{ timeout: LIST_TOOLS_TEST_DEADLINE_MS },
async () => {
const tempDir = makeTempDir(tempDirs, "bundle-mcp-parallel-");
const delays = [200, 400, 600];
const serverPaths = delays.map((delay, i) => {
const serverPath = path.join(tempDir, `slow-server-${i}.mjs`);
const logPath = path.join(tempDir, `server-${i}.log`);
return { serverPath, logPath, delay, serverName: `slowServer${i}` };
});
await Promise.all(
serverPaths.map(({ serverPath, logPath, delay }) =>
writeListToolsMcpServer({ filePath: serverPath, logPath, delayMs: delay }),
),
);
testing.setBundleMcpCatalogListTimeoutMsForTest(4_000);
const runtime = await getOrCreateSessionMcpRuntime({
sessionId: "session-parallel-catalog-test",
sessionKey: "agent:test:session-parallel-catalog-test",
workspaceDir: "/workspace",
cfg: {
mcp: {
servers: Object.fromEntries(
serverPaths.map(({ serverName, serverPath }) => [
serverName,
{
command: process.execPath,
args: [serverPath],
connectionTimeoutMs: 2_000,
},
]),
),
},
},
});
try {
const sumDelays = delays.reduce((a, b) => a + b, 0);
const maxDelay = Math.max(...delays);
const parallelBudgetMs = maxDelay + 500;
const t0 = performance.now();
const catalog = await runtime.getCatalog();
const wallTime = performance.now() - t0;
// Must have successfully connected to all servers
expect(Object.keys(catalog.servers)).toHaveLength(delays.length);
expect(catalog.tools.map((t) => t.toolName)).toEqual([
"slow_tool",
"slow_tool",
"slow_tool",
]);
// Sequential listing would have to wait roughly sumDelays before overhead;
// parallel listing should stay near the slowest server plus launch overhead.
expect(wallTime).toBeLessThan(parallelBudgetMs);
expect(parallelBudgetMs).toBeLessThan(sumDelays);
expect(wallTime).toBeGreaterThanOrEqual(maxDelay * 0.7);
} finally {
await runtime.dispose();
}
},
);
it(
"awaits in-progress MCP session connections after catalog invalidation",
{ timeout: LIST_TOOLS_TEST_DEADLINE_MS },
async () => {
const tempDir = makeTempDir(tempDirs, "bundle-mcp-inflight-connect-");
const invalidatingServer = {
serverName: "invalidatingServer",
serverPath: path.join(tempDir, "invalidating-server.mjs"),
logPath: path.join(tempDir, "invalidating-server.log"),
};
const slowConnectServer = {
serverName: "slowConnectServer",
serverPath: path.join(tempDir, "slow-connect-server.mjs"),
logPath: path.join(tempDir, "slow-connect-server.log"),
};
await writeListToolsMcpServer({
filePath: invalidatingServer.serverPath,
logPath: invalidatingServer.logPath,
capabilities: { tools: { listChanged: true } },
notifyListChangedOnInitialized: true,
});
await writeListToolsMcpServer({
filePath: slowConnectServer.serverPath,
logPath: slowConnectServer.logPath,
initializeDelayMs: 500,
});
testing.setBundleMcpCatalogListTimeoutMsForTest(4_000);
const runtime = await getOrCreateSessionMcpRuntime({
sessionId: "session-inflight-connect-test",
sessionKey: "agent:test:session-inflight-connect-test",
workspaceDir: "/workspace",
cfg: {
mcp: {
servers: Object.fromEntries(
[invalidatingServer, slowConnectServer].map(({ serverName, serverPath }) => [
serverName,
{
command: process.execPath,
args: [serverPath],
connectionTimeoutMs: 2_000,
},
]),
),
},
},
});
try {
const firstCatalog = runtime.getCatalog();
await waitForFileText(
invalidatingServer.logPath,
"notify tools/list_changed",
LIST_TOOLS_SERVER_LOG_TIMEOUT_MS,
);
const secondCatalog = await runtime.getCatalog();
await firstCatalog;
expect(Object.keys(secondCatalog.servers).toSorted()).toEqual([
invalidatingServer.serverName,
slowConnectServer.serverName,
]);
expect(secondCatalog.diagnostics ?? []).toEqual([]);
} finally {
await runtime.dispose();
}
},
);
it(
"retires timed-out shared MCP sessions before later catalog retries",
{ timeout: 8_000 },
async () => {
const tempDir = makeTempDir(tempDirs, "bundle-mcp-timeout-retire-");
const triggerServerPath = path.join(tempDir, "trigger-server.mjs");
const triggerLogPath = path.join(tempDir, "trigger.log");
const slowServerPath = path.join(tempDir, "slow-server.mjs");
const slowLogPath = path.join(tempDir, "slow.log");
const firstConnectMarkerPath = path.join(tempDir, "first-connect.marker");
await writeExecutable(
triggerServerPath,
`#!/usr/bin/env node
import fs from "node:fs/promises";
const logPath = ${JSON.stringify(triggerLogPath)};
let buffer = "";
function log(line) {
void fs.appendFile(logPath, line + "\\n", "utf8").catch(() => {});
}
function send(message) {
process.stdout.write(JSON.stringify(message) + "\\n");
}
function handle(message) {
if (!message || typeof message !== "object") {
return;
}
log("recv " + String(message.method ?? "unknown"));
if (message.method === "initialize") {
send({
jsonrpc: "2.0",
id: message.id,
result: {
protocolVersion: message.params?.protocolVersion ?? "2025-03-26",
capabilities: { tools: { listChanged: true } },
serverInfo: { name: "timeout-trigger", version: "1.0.0" },
},
});
return;
}
if (message.method === "notifications/initialized") {
send({ jsonrpc: "2.0", method: "notifications/tools/list_changed" });
log("sent initial tools/list_changed");
return;
}
if (message.method === "tools/list") {
send({
jsonrpc: "2.0",
id: message.id,
result: {
tools: [{ name: "poke", inputSchema: { type: "object", properties: {} } }],
},
});
return;
}
if (message.method === "tools/call") {
send({ jsonrpc: "2.0", method: "notifications/tools/list_changed" });
log("sent call tools/list_changed");
send({
jsonrpc: "2.0",
id: message.id,
result: { isError: false, content: [{ type: "text", text: "poked" }] },
});
}
}
process.stdin.setEncoding("utf8");
function shutdown() {
process.exit(0);
}
process.stdin.on("data", (chunk) => {
buffer += chunk;
while (true) {
const newline = buffer.indexOf("\\n");
if (newline < 0) {
return;
}
const line = buffer.slice(0, newline).replace(/\\r$/, "");
buffer = buffer.slice(newline + 1);
if (line.trim()) {
handle(JSON.parse(line));
}
}
});
process.stdin.on("end", shutdown);
process.on("SIGTERM", shutdown);
process.on("SIGINT", shutdown);`,
);
await writeExecutable(
slowServerPath,
`#!/usr/bin/env node
import fs from "node:fs/promises";
const logPath = ${JSON.stringify(slowLogPath)};
const markerPath = ${JSON.stringify(firstConnectMarkerPath)};
let buffer = "";
function log(line) {
void fs.appendFile(logPath, line + "\\n", "utf8").catch(() => {});
}
function send(message) {
process.stdout.write(JSON.stringify(message) + "\\n");
}
async function isFirstConnect() {
try {
const handle = await fs.open(markerPath, "wx");
await handle.close();
return true;
} catch {
return false;
}
}
async function handle(message) {
if (!message || typeof message !== "object") {
return;
}
log("recv " + String(message.method ?? "unknown"));
if (message.method === "initialize") {
const response = {
jsonrpc: "2.0",
id: message.id,
result: {
protocolVersion: message.params?.protocolVersion ?? "2025-03-26",
capabilities: { tools: {} },
serverInfo: { name: "timeout-slow", version: "1.0.0" },
},
};
if (await isFirstConnect()) {
log("slow first initialize");
setTimeout(() => send(response), 600);
} else {
log("fast retry initialize");
send(response);
}
return;
}
if (message.method === "tools/list") {
send({
jsonrpc: "2.0",
id: message.id,
result: {
tools: [{ name: "slow_tool", inputSchema: { type: "object", properties: {} } }],
},
});
}
}
process.stdin.setEncoding("utf8");
function shutdown() {
process.exit(0);
}
process.stdin.on("data", (chunk) => {
buffer += chunk;
while (true) {
const newline = buffer.indexOf("\\n");
if (newline < 0) {
return;
}
const line = buffer.slice(0, newline).replace(/\\r$/, "");
buffer = buffer.slice(newline + 1);
if (line.trim()) {
void handle(JSON.parse(line));
}
}
});
process.stdin.on("end", shutdown);
process.on("SIGTERM", shutdown);
process.on("SIGINT", shutdown);`,
);
const runtime = await getOrCreateSessionMcpRuntime({
sessionId: "session-timeout-retire-test",
sessionKey: "agent:test:session-timeout-retire-test",
workspaceDir: "/workspace",
cfg: {
mcp: {
servers: {
trigger: {
command: process.execPath,
args: [triggerServerPath],
connectionTimeoutMs: 2_000,
},
slow: {
command: process.execPath,
args: [slowServerPath],
connectionTimeoutMs: 150,
},
},
},
},
});
try {
const firstCatalog = runtime.getCatalog();
await waitForFileText(
triggerLogPath,
"sent initial tools/list_changed",
LIST_TOOLS_SERVER_LOG_TIMEOUT_MS,
);
const secondCatalog = await runtime.getCatalog();
await firstCatalog;
expect(secondCatalog.servers.trigger).toBeDefined();
expect(secondCatalog.diagnostics?.some((diag) => diag.serverName === "slow")).toBe(true);
await waitForFileText(
slowLogPath,
"slow first initialize",
LIST_TOOLS_SERVER_LOG_TIMEOUT_MS,
);
await expect(runtime.callTool("trigger", "poke", {})).resolves.toMatchObject({
content: [{ type: "text", text: "poked" }],
isError: false,
});
await waitForFileText(
triggerLogPath,
"sent call tools/list_changed",
LIST_TOOLS_SERVER_LOG_TIMEOUT_MS,
);
await waitForPredicate(
() => runtime.peekCatalog() === null,
"manual list_changed to retry timed-out server",
LIST_TOOLS_SERVER_LOG_TIMEOUT_MS,
);
const retriedCatalog = await runtime.getCatalog();
expect(retriedCatalog.diagnostics?.some((diag) => diag.serverName === "slow")).not.toBe(
true,
);
expect(retriedCatalog.servers.slow).toBeDefined();
expect(retriedCatalog.tools.map((tool) => tool.toolName).toSorted()).toEqual([
"poke",
"slow_tool",
]);
await waitForFileText(
slowLogPath,
"fast retry initialize",
LIST_TOOLS_SERVER_LOG_TIMEOUT_MS,
);
} finally {
await runtime.dispose();
}
},
);
it(
"does not dispose sessions shared with a newer catalog generation",
{ timeout: LIST_TOOLS_TEST_DEADLINE_MS },
async () => {
const tempDir = makeTempDir(tempDirs, "bundle-mcp-overlap-generation-");
const serverPath = path.join(tempDir, "overlap-server.mjs");
const logPath = path.join(tempDir, "server.log");
await writeExecutable(
serverPath,
`#!/usr/bin/env node
import fs from "node:fs/promises";
const logPath = ${JSON.stringify(logPath)};
let buffer = "";
let listCount = 0;
function log(line) {
void fs.appendFile(logPath, line + "\\n", "utf8").catch(() => {});
}
function send(message) {
process.stdout.write(JSON.stringify(message) + "\\n");
}
function handle(message) {
if (!message || typeof message !== "object") {
return;
}
log("recv " + String(message.method ?? "unknown"));
if (message.method === "initialize") {
send({
jsonrpc: "2.0",
id: message.id,
result: {
protocolVersion: message.params?.protocolVersion ?? "2025-03-26",
capabilities: { tools: { listChanged: true } },
serverInfo: { name: "overlap-generation", version: "1.0.0" },
},
});
return;
}
if (message.method === "notifications/initialized") {
send({ jsonrpc: "2.0", method: "notifications/tools/list_changed" });
log("sent tools/list_changed");
return;
}
if (message.method === "tools/list") {
listCount += 1;
const currentList = listCount;
log("tools/list " + currentList);
if (currentList === 1) {
setTimeout(() => {
send({
jsonrpc: "2.0",
id: message.id,
result: {
tools: [{ name: "ok_tool", inputSchema: [] }],
},
});
}, 100);
return;
}
send({
jsonrpc: "2.0",
id: message.id,
result: {
tools: [{ name: "ok_tool", inputSchema: { type: "object", properties: {} } }],
},
});
return;
}
if (message.method === "tools/call") {
send({
jsonrpc: "2.0",
id: message.id,
result: { isError: false, content: [{ type: "text", text: "still connected" }] },
});
}
}
process.stdin.setEncoding("utf8");
function shutdown() {
process.exit(0);
}
process.stdin.on("data", (chunk) => {
buffer += chunk;
while (true) {
const newline = buffer.indexOf("\\n");
if (newline < 0) {
return;
}
const line = buffer.slice(0, newline).replace(/\\r$/, "");
buffer = buffer.slice(newline + 1);
if (line.trim()) {
handle(JSON.parse(line));
}
}
});
process.stdin.on("end", shutdown);
process.on("SIGTERM", shutdown);
process.on("SIGINT", shutdown);`,
);
const runtime = await getOrCreateSessionMcpRuntime({
sessionId: "session-overlap-generation-test",
sessionKey: "agent:test:session-overlap-generation-test",
workspaceDir: "/workspace",
cfg: {
mcp: {
servers: {
overlap: {
command: process.execPath,
args: [serverPath],
},
},
},
},
});
try {
const firstCatalog = runtime.getCatalog();
await waitForFileText(logPath, "sent tools/list_changed", LIST_TOOLS_SERVER_LOG_TIMEOUT_MS);
await waitForFileText(logPath, "tools/list 1", LIST_TOOLS_SERVER_LOG_TIMEOUT_MS);
const secondCatalog = await runtime.getCatalog();
const firstCatalogResult = await firstCatalog;
expect(firstCatalogResult.diagnostics?.[0]?.serverName).toBe("overlap");
expect(secondCatalog.diagnostics ?? []).toEqual([]);
expect(secondCatalog.tools.map((tool) => tool.toolName)).toEqual(["ok_tool"]);
await expect(runtime.callTool("overlap", "ok_tool", {})).resolves.toMatchObject({
content: [{ type: "text", text: "still connected" }],
isError: false,
});
} finally {
await runtime.dispose();
}
},
);
});

View File

@@ -23,7 +23,6 @@ import {
findJsonSchemaShapeError,
normalizeJsonSchemaForTypeBox,
} from "../shared/json-schema-defaults.js";
import { runTasksWithConcurrency } from "../utils/run-with-concurrency.js";
import { sanitizeServerName } from "./agent-bundle-mcp-names.js";
import type {
McpCatalogTool,
@@ -44,11 +43,6 @@ type BundleMcpSession = {
transportType: "stdio" | "sse" | "streamable-http";
requestTimeoutMs: number;
supportsParallelToolCalls: boolean;
connected: boolean;
retiring: boolean;
catalogUseCount: number;
sharedAcrossCatalogGenerations: boolean;
connectPromise?: Promise<void>;
detachStderr?: () => void;
};
@@ -65,7 +59,6 @@ const SESSION_MCP_RUNTIME_SWEEP_INTERVAL_MS = 60 * 1000;
const BUNDLE_MCP_FAILURE_THRESHOLD = 3;
const BUNDLE_MCP_FAILURE_COOLDOWN_MS = 60_000;
const BUNDLE_MCP_CATALOG_LIST_TIMEOUT_MS = 1_500;
const BUNDLE_MCP_CATALOG_CONNECT_CONCURRENCY = 6;
const BUNDLE_MCP_METADATA_TEXT_LIMIT = 1_200;
let bundleMcpCatalogListTimeoutMs: number | undefined;
@@ -540,41 +533,6 @@ export function createSessionMcpRuntime(params: {
throw createDisposedError(params.sessionId);
}
};
const ensureSessionConnected = async (
session: BundleMcpSession,
connectionTimeoutMs: number,
): Promise<void> => {
if (session.retiring) {
throw new Error(`bundle-mcp server "${session.serverName}" is retiring`);
}
if (session.connected) {
return;
}
session.connectPromise ??= connectWithTimeout(
session.client,
session.transport,
connectionTimeoutMs,
)
.then(() => {
session.connected = true;
})
.finally(() => {
session.connectPromise = undefined;
});
await session.connectPromise;
};
const retireSessionIfCurrent = async (
serverName: string,
session: BundleMcpSession,
): Promise<boolean> => {
if (sessions.get(serverName) !== session) {
return false;
}
session.retiring = true;
sessions.delete(serverName);
await disposeSession(session);
return true;
};
const getCatalog = async (): Promise<McpToolCatalog> => {
failIfDisposed();
@@ -601,13 +559,6 @@ export function createSessionMcpRuntime(params: {
const usedServerNames = new Set<string>();
try {
// Pre-compute safe server names sequentially (synchronous, fast — no I/O)
const preparedEntries: Array<{
serverName: string;
rawServer: (typeof loaded.mcpServers)[string];
resolved: NonNullable<ReturnType<typeof resolveMcpTransport>>;
safeServerName: string;
}> = [];
for (const [serverName, rawServer] of Object.entries(loaded.mcpServers)) {
failIfDisposed();
const resolved = resolveMcpTransport(serverName, rawServer);
@@ -620,209 +571,137 @@ export function createSessionMcpRuntime(params: {
`bundle-mcp: server key "${serverName}" registered as "${safeServerName}" for provider-safe tool names.`,
);
}
preparedEntries.push({ serverName, rawServer, resolved, safeServerName });
}
// Bounded fan-out keeps common 4-5 server setups parallel without letting
// large configs spawn/connect every MCP transport at once.
type ServerResult = {
serverName: string;
serverEntry: McpServerCatalog | null;
toolEntries: McpCatalogTool[];
diagnostics: McpToolCatalogDiagnostic[];
};
const tasks = preparedEntries.map(
({ serverName, rawServer, resolved, safeServerName }) =>
async (): Promise<ServerResult> => {
failIfDisposed();
let session = sessions.get(serverName);
if (session?.retiring) {
session = undefined;
}
const reusedSession = Boolean(session);
if (!session) {
const client = new Client(
{
name: "openclaw-bundle-mcp",
version: "0.0.0",
},
{
jsonSchemaValidator: createBundleMcpJsonSchemaValidator(),
listChanged: {
tools: {
autoRefresh: false,
debounceMs: 0,
onChanged: (error) => {
if (error) {
logWarn(
`bundle-mcp: failed to refresh changed tool list for server "${serverName}": ${redactErrorUrls(error)}`,
);
}
catalogInvalidationGeneration += 1;
catalog = null;
catalogInFlight = undefined;
},
},
let session = sessions.get(serverName);
const reusedSession = Boolean(session);
let connected = Boolean(session);
if (!session) {
const client = new Client(
{
name: "openclaw-bundle-mcp",
version: "0.0.0",
},
{
jsonSchemaValidator: createBundleMcpJsonSchemaValidator(),
listChanged: {
tools: {
autoRefresh: false,
debounceMs: 0,
onChanged: (error) => {
if (error) {
logWarn(
`bundle-mcp: failed to refresh changed tool list for server "${serverName}": ${redactErrorUrls(error)}`,
);
}
catalogInvalidationGeneration += 1;
catalog = null;
catalogInFlight = undefined;
},
},
);
session = {
serverName,
client,
transport: resolved.transport,
transportType: resolved.transportType,
requestTimeoutMs: resolved.requestTimeoutMs,
supportsParallelToolCalls: resolved.supportsParallelToolCalls,
connected: false,
retiring: false,
catalogUseCount: 0,
sharedAcrossCatalogGenerations: false,
detachStderr: resolved.detachStderr,
};
sessions.set(serverName, session);
}
},
},
);
session = {
serverName,
client,
transport: resolved.transport,
transportType: resolved.transportType,
requestTimeoutMs: resolved.requestTimeoutMs,
supportsParallelToolCalls: resolved.supportsParallelToolCalls,
detachStderr: resolved.detachStderr,
};
sessions.set(serverName, session);
}
if (session.catalogUseCount === 0) {
session.sharedAcrossCatalogGenerations = false;
}
if (reusedSession && session.catalogUseCount > 0) {
session.sharedAcrossCatalogGenerations = true;
}
session.catalogUseCount += 1;
let connectedForCatalog = false;
try {
failIfDisposed();
await ensureSessionConnected(session, resolved.connectionTimeoutMs);
connectedForCatalog = true;
failIfDisposed();
const capabilities = summarizeServerCapabilities(
session.client.getServerCapabilities(),
);
const listedTools = await listAllToolsBestEffort({
client: session.client,
timeoutMs: getCatalogListTimeoutMs(rawServer, resolved.requestTimeoutMs),
suppressUnsupported: Boolean(
!capabilities.tools && (capabilities.resources || capabilities.prompts),
),
});
failIfDisposed();
const selection = getMcpToolSelection(rawServer);
const exposedTools = listedTools.filter((tool) =>
shouldExposeMcpTool(selection, tool.name.trim()),
);
const serverEntry: McpServerCatalog = {
serverName,
safeServerName,
launchSummary: resolved.description,
toolCount: exposedTools.length,
requestTimeoutMs: resolved.requestTimeoutMs,
supportsParallelToolCalls: resolved.supportsParallelToolCalls,
...(capabilities.resources ? { resources: capabilities.resources } : {}),
...(capabilities.prompts ? { prompts: capabilities.prompts } : {}),
...(capabilities.tools
? {
tools: {
...capabilities.tools,
...(exposedTools.length !== listedTools.length
? { filteredCount: listedTools.length - exposedTools.length }
: {}),
},
}
: {}),
...(selection.include || selection.exclude
? {
toolFilter: {
...(selection.include ? { include: [...selection.include] } : {}),
...(selection.exclude ? { exclude: [...selection.exclude] } : {}),
},
}
: {}),
};
const toolEntries: McpCatalogTool[] = [];
for (const tool of exposedTools) {
const toolName = tool.name.trim();
if (!toolName) {
continue;
try {
failIfDisposed();
if (!connected) {
await connectWithTimeout(
session.client,
session.transport,
resolved.connectionTimeoutMs,
);
connected = true;
}
failIfDisposed();
const capabilities = summarizeServerCapabilities(
session.client.getServerCapabilities(),
);
const listedTools = await listAllToolsBestEffort({
client: session.client,
timeoutMs: getCatalogListTimeoutMs(rawServer, resolved.requestTimeoutMs),
suppressUnsupported: Boolean(
!capabilities.tools && (capabilities.resources || capabilities.prompts),
),
});
failIfDisposed();
const selection = getMcpToolSelection(rawServer);
const exposedTools = listedTools.filter((tool) =>
shouldExposeMcpTool(selection, tool.name.trim()),
);
servers[serverName] = {
serverName,
safeServerName,
launchSummary: resolved.description,
toolCount: exposedTools.length,
requestTimeoutMs: resolved.requestTimeoutMs,
supportsParallelToolCalls: resolved.supportsParallelToolCalls,
...(capabilities.resources ? { resources: capabilities.resources } : {}),
...(capabilities.prompts ? { prompts: capabilities.prompts } : {}),
...(capabilities.tools
? {
tools: {
...capabilities.tools,
...(exposedTools.length !== listedTools.length
? { filteredCount: listedTools.length - exposedTools.length }
: {}),
},
}
toolEntries.push({
serverName,
safeServerName,
toolName,
title: tool.title,
description: sanitizeMcpMetadataText(tool.description),
inputSchema: tool.inputSchema,
fallbackDescription: `Provided by bundle MCP server "${serverName}" (${resolved.description}).`,
});
}
return {
serverName,
serverEntry,
toolEntries,
diagnostics: [] as McpToolCatalogDiagnostic[],
};
} catch (error) {
const message = redactErrorUrls(error);
if (!disposed) {
const action = reusedSession ? "refresh" : "start";
logWarn(
`bundle-mcp: failed to ${action} server "${serverName}" (${resolved.description}): ${message}`,
);
}
const diags: McpToolCatalogDiagnostic[] = [
{
serverName,
safeServerName,
launchSummary: resolved.description,
message,
},
];
const sharedWithNewerGeneration =
session.sharedAcrossCatalogGenerations || session.catalogUseCount > 1;
if (!connectedForCatalog && !session.connected) {
// Timed-out connects can still leave the SDK client bound to a
// transport. Delete before async close so future catalogs start fresh.
await retireSessionIfCurrent(serverName, session);
} else if (!reusedSession && !sharedWithNewerGeneration) {
// Catalog invalidation can overlap generations; an older failed
// generation must not dispose a session a newer one already reused.
await retireSessionIfCurrent(serverName, session);
}
failIfDisposed();
return {
serverName,
serverEntry: null,
toolEntries: [],
diagnostics: diags,
} as ServerResult;
} finally {
session.catalogUseCount -= 1;
if (session.catalogUseCount === 0) {
session.sharedAcrossCatalogGenerations = false;
}
: {}),
...(selection.include || selection.exclude
? {
toolFilter: {
...(selection.include ? { include: [...selection.include] } : {}),
...(selection.exclude ? { exclude: [...selection.exclude] } : {}),
},
}
: {}),
};
for (const tool of exposedTools) {
const toolName = tool.name.trim();
if (!toolName) {
continue;
}
},
);
const { results, firstError, hasError } = await runTasksWithConcurrency({
tasks,
limit: BUNDLE_MCP_CATALOG_CONNECT_CONCURRENCY,
errorMode: "continue",
});
if (hasError) {
throw firstError;
}
for (const result of results) {
if (!result) {
continue;
tools.push({
serverName,
safeServerName,
toolName,
title: tool.title,
description: sanitizeMcpMetadataText(tool.description),
inputSchema: tool.inputSchema,
fallbackDescription: `Provided by bundle MCP server "${serverName}" (${resolved.description}).`,
});
}
} catch (error) {
const message = redactErrorUrls(error);
if (!disposed) {
const action = reusedSession ? "refresh" : "start";
logWarn(
`bundle-mcp: failed to ${action} server "${serverName}" (${resolved.description}): ${message}`,
);
}
diagnostics.push({
serverName,
safeServerName,
launchSummary: resolved.description,
message,
});
if (!reusedSession) {
await disposeSession(session);
sessions.delete(serverName);
}
failIfDisposed();
}
const { serverEntry, toolEntries, diagnostics: serverDiags } = result;
if (serverEntry) {
servers[result.serverName] = serverEntry;
}
tools.push(...toolEntries);
diagnostics.push(...serverDiags);
}
failIfDisposed();

View File

@@ -109,101 +109,6 @@ describe("commitments command", () => {
);
});
it("keeps fixed-width columns aligned when an id or scope is truncated", async () => {
// An id longer than the 16-char ID column and a scope longer than the
// 28-char Scope column, so truncate() fires for both cells.
mocks.listCommitments.mockResolvedValue([
commitment({
id: "cm_abcdefghijklmnopqrstuvwxyz", // 29 chars > 16
agentId: "averylongagentidentifier",
channel: "telegram",
to: "+15551234567890", // agentId/channel/to joined > 28 chars
}),
]);
const { runtime, logs } = createRuntime();
await commitmentsListCommand({}, runtime);
const lines = logs.map(stripAnsi);
const header = lines.find((line) => line.startsWith("ID"));
const row = lines.find((line) => line.startsWith("cm_"));
expect(header).toBeDefined();
expect(row).toBeDefined();
// The truncated ID cell must stay within its 16-char column: 15 chars of
// content plus a single-character ellipsis, not a 3-char "..." that overflows.
expect(row?.slice(0, 16)).toBe("cm_abcdefghijkl…");
// With each truncated cell at its intended width, the following columns line
// up with the header. A 3-char "..." pushes every column after a truncated
// cell 2 chars right of its header label.
expect(row?.indexOf("pending")).toBe(header?.indexOf("Status"));
expect(row?.indexOf("event_check_in")).toBe(header?.indexOf("Kind"));
});
it("keeps the Scope column aligned when only the scope is truncated", async () => {
// Short id (untouched) but a scope longer than its 28-char column, so only
// the scope cell is truncated. Isolates the second truncation site.
mocks.listCommitments.mockResolvedValue([
commitment({
id: "cm_short", // 8 chars, fits the ID column untouched
agentId: "averylongagentidentifier",
channel: "telegram",
to: "+15551234567890", // joined scope exceeds 28 chars
}),
]);
const { runtime, logs } = createRuntime();
await commitmentsListCommand({}, runtime);
const lines = logs.map(stripAnsi);
const header = lines.find((line) => line.startsWith("ID"));
const row = lines.find((line) => line.startsWith("cm_"));
expect(header).toBeDefined();
expect(row).toBeDefined();
// The short id is rendered in full (no ellipsis).
expect(row?.slice(0, 16)).toBe("cm_short ");
// The 28-char Scope cell ends in a single-char ellipsis and holds its width,
// so the trailing Suggested text column still begins under its header label.
const scopeCell = row?.slice(70, 98);
expect(scopeCell?.length).toBe(28);
expect(scopeCell?.endsWith("…")).toBe(true);
expect(row?.indexOf("How did it go?")).toBe(header?.indexOf("Suggested text"));
});
it("does not truncate an id that exactly fills the ID column", async () => {
// 16 chars == maxChars, so value.length <= maxChars and the id passes through
// whole with no ellipsis. Guards the boundary so we never over-truncate.
mocks.listCommitments.mockResolvedValue([commitment({ id: "cm_exactly16char" })]);
const { runtime, logs } = createRuntime();
await commitmentsListCommand({}, runtime);
const lines = logs.map(stripAnsi);
const header = lines.find((line) => line.startsWith("ID"));
const row = lines.find((line) => line.startsWith("cm_"));
expect(row?.slice(0, 16)).toBe("cm_exactly16char");
expect(row).not.toContain("…");
expect(row?.indexOf("pending")).toBe(header?.indexOf("Status"));
});
it("truncates an id one character past the column width to a single ellipsis", async () => {
// 17 chars == maxChars + 1, so truncate fires: 15 chars of content plus one
// ellipsis == 16, holding the column (the old "..." produced 18 and overflowed).
mocks.listCommitments.mockResolvedValue([commitment({ id: "cm_0123456789abcd" })]);
const { runtime, logs } = createRuntime();
await commitmentsListCommand({}, runtime);
const lines = logs.map(stripAnsi);
const header = lines.find((line) => line.startsWith("ID"));
const row = lines.find((line) => line.startsWith("cm_"));
expect(row?.slice(0, 16)).toBe("cm_0123456789ab…");
expect(row?.indexOf("pending")).toBe(header?.indexOf("Status"));
});
it("writes list JSON to runtime stdout instead of log output", async () => {
const { runtime, logs, stdout } = createRuntime();

View File

@@ -24,7 +24,7 @@ const STATUS_VALUES = new Set<CommitmentStatus>([
]);
function truncate(value: string, maxChars: number): string {
return value.length <= maxChars ? value : `${value.slice(0, maxChars - 1)}`;
return value.length <= maxChars ? value : `${value.slice(0, maxChars - 1)}...`;
}
function safe(value: string): string {

View File

@@ -926,7 +926,6 @@ describe("test-projects args", () => {
config: "test/vitest/vitest.agents.config.ts",
forwardedArgs: [],
includePatterns: [
"src/agents/agent-bundle-mcp-runtime.test.ts",
"src/agents/models-config.file-mode.test.ts",
"src/agents/sandbox/ssh.test.ts",
],

View File

@@ -10,12 +10,6 @@ describe("shared/model-param-b", () => {
expect(inferParamBFromIdOrName("(70b) + m1.5b + qwen-14b")).toBe(70);
});
it("matches both tokens when two are separated by a single delimiter", () => {
expect(inferParamBFromIdOrName("8b 70b")).toBe(70);
expect(inferParamBFromIdOrName("8b-70b")).toBe(70);
expect(inferParamBFromIdOrName("7b-13b")).toBe(13);
});
it("ignores malformed, zero, and non-delimited matches", () => {
expect(inferParamBFromIdOrName("abc70beta 0b x70b2")).toBeNull();
expect(inferParamBFromIdOrName("model 0b")).toBeNull();

View File

@@ -4,9 +4,7 @@ import { normalizeLowercaseStringOrEmpty } from "@openclaw/normalization-core/st
/** Infers the largest `<number>b` parameter-size token from a model id or display name. */
export function inferParamBFromIdOrName(text: string): number | null {
const raw = normalizeLowercaseStringOrEmpty(text);
// Trailing boundary is a lookahead so two adjacent `<num>b` tokens sharing one delimiter (e.g.
// "8b 70b" / "8b-70b") both match; a consuming boundary ate the delimiter and skipped the second.
const matches = raw.matchAll(/(?:^|[^a-z0-9])[a-z]?(\d+(?:\.\d+)?)b(?=[^a-z0-9]|$)/g);
const matches = raw.matchAll(/(?:^|[^a-z0-9])[a-z]?(\d+(?:\.\d+)?)b(?:[^a-z0-9]|$)/g);
let best: number | null = null;
for (const match of matches) {
const numRaw = match[1];

View File

@@ -12,11 +12,13 @@ profiles:
evidenceMode: slim
channelDriver: crabline
categoryIds:
- agent-runtime-and-provider-execution.agent-turn-execution
- agent-runtime-and-provider-execution.model-and-runtime-selection
- agent-runtime-and-provider-execution.provider-auth
- agent-runtime-and-provider-execution.streaming-and-progress
- agent-runtime-and-provider-execution.tool-calls-and-response-handling
- agent-runtime-and-provider-execution.tool-execution-controls
- session-memory-and-context-engine.token-management
- session-memory-and-context-engine.context-engine
- session-memory-and-context-engine.cross-client-history-and-session-parity
- session-memory-and-context-engine.core-prompts-and-context
@@ -34,6 +36,7 @@ profiles:
- channel-framework.outbound-delivery-and-reply-pipeline
- channel-framework.group-thread-and-ambient-room-behavior
- channel-framework.status-health-and-operator-controls
- session-memory-and-context-engine.memory
- session-memory-and-context-engine.diagnostics-maintenance-and-recovery
- automation-cron-hooks-tasks-polling.cron-jobs
- plugin-sdk-and-bundled-plugin-architecture.installing-and-running-plugins
@@ -42,6 +45,7 @@ profiles:
- media-understanding-and-media-generation.media-understanding
- media-understanding-and-media-generation.media-generation
- browser-control-ui-and-webchat.browser-ui
- security-auth-pairing-and-secrets.credential-and-secret-hygiene
- id: release
description: Stable/LTS proof selector for live providers, live channels, package artifacts,
upgrade paths, and platform proof where the claim depends on real upstreams or release
@@ -1301,13 +1305,13 @@ surfaces:
id: agent-turn-execution
features:
- name: Turn startup and runtime choice
coverageIds: [agents.create, agents.instructions, channels.discord-config, config.crestodian-setup, runtime.first-action, runtime.multi-turn-continuity, runtime.long-context]
coverageIds: [agents.create, agents.instructions, channels.discord-config, config.crestodian-setup, runtime.first-action, runtime.first-hour-20, runtime.long-context]
description: Starting an agent turn and choosing gateway versus embedded runtime execution.
- name: Session and run coordination
coverageIds: [agents.subagents, channels.dedup, channels.dm, channels.qa-channel, channels.reconnect, channels.streaming, channels.threads, commitments.heartbeat-target-none, commitments.scope, personal.channel-replies, runtime.codex-plugin.lifecycle, runtime.delivery, runtime.fallback-delivery, runtime.gateway-restart, runtime.restart-recovery, runtime.turn-ordering]
description: Establishing session and run ids, queue locks, and related execution coordination.
- name: Abort and terminal outcomes
coverageIds: [channels.streaming, runtime.delivery, runtime.fallback-delivery, runtime.long-context, runtime.long-run-stability]
coverageIds: [channels.streaming, runtime.delivery, runtime.fallback-delivery, runtime.long-context, runtime.soak-100]
description: Honoring aborts, timing provider/model work, and emitting terminal outcomes.
docs:
- docs/concepts/agent-loop.md
@@ -1589,7 +1593,7 @@ surfaces:
coverageIds: [session.pruning]
description: Covers Pruning across manual and automatic compaction, preemptive overflow checks, context-window estimation, session pruning, tool-result trimming, compaction providers, retry/timeout behavior, and compacted transcript checkpoints.
- name: Token Pressure
coverageIds: [runtime.codex-app-server, runtime.multi-turn-continuity, runtime.gateway-log-sentinel.codex-progress, runtime.long-context, runtime.long-run-stability]
coverageIds: [runtime.codex-app-server, runtime.first-hour-20, runtime.gateway-log-sentinel.codex-progress, runtime.long-context, runtime.soak-100]
description: Covers Token Pressure across manual and automatic compaction, preemptive overflow checks, context-window estimation, session pruning, tool-result trimming, compaction providers, retry/timeout behavior, and compacted transcript checkpoints.
docs:
- docs/concepts/compaction.md
@@ -2414,7 +2418,7 @@ surfaces:
coverageIds: [telemetry.trusted-trace-context]
description: Trusted trace context, W3C traceparent propagation to model calls, file-log correlation, content-capture controls, and redacted/bounded attributes
- name: Model and runtime telemetry
coverageIds: [docker.runtime-validation, harness.qa-lab, harness.tool-trace-visibility, personal.failure-recovery, personal.no-fake-progress, personal.task-followthrough, runtime.qa-bus, telemetry.otel, telemetry.prometheus, tools.evidence, tools.trace]
coverageIds: [docker.e2e, harness.qa-lab, harness.tool-trace-visibility, personal.failure-recovery, personal.no-fake-progress, personal.task-followthrough, runtime.qa-bus, telemetry.otel, telemetry.prometheus, tools.evidence, tools.trace]
description: Model, tool, message, session, queue, Talk, exec, webhook, context assembly, harness, and exporter-health signals
- name: diagnostics-prometheus plugin install
coverageIds: [telemetry.diagnostics-prometheus-plugin-install]
@@ -2423,7 +2427,7 @@ surfaces:
coverageIds: [telemetry.prometheus-authenticated-gateway-export]
description: Gateway-authenticated GET /api/diagnostics/prometheus behavior, status, and operator-visible verification.
- name: Prometheus text exposition
coverageIds: [docker.runtime-validation, harness.qa-lab, telemetry.prometheus]
coverageIds: [docker.e2e, harness.qa-lab, telemetry.prometheus]
description: Prometheus text exposition, counters, gauges, histograms, label policy, series cap, and overflow metric
- name: Trusted diagnostic event subscription
coverageIds: [telemetry.trusted-diagnostic-event-subscription]
@@ -6441,7 +6445,7 @@ surfaces:
coverageIds: [docker.package-artifact-generation]
description: Docker E2E package artifact generation and shared build helpers
- name: Docker E2E plan/scheduler scripts
coverageIds: [docker.runtime-validation, harness.qa-lab, telemetry.prometheus]
coverageIds: [docker.e2e, harness.qa-lab, telemetry.prometheus]
description: Docker E2E plan/scheduler scripts, lane metadata, targeted grouping, package artifact generation, and GitHub hydration action
- name: Release-path install
coverageIds: [docker.release-path-install]

View File

@@ -2,7 +2,6 @@
import { describe, expect, it } from "vitest";
import {
collectPreparedPrepackErrors,
resolvePrepackCommandStdio,
resolvePrepackCommandTimeoutMs,
runPrepackCommand,
} from "../scripts/openclaw-prepack.ts";
@@ -27,20 +26,6 @@ describe("collectPreparedPrepackErrors", () => {
});
describe("runPrepackCommand", () => {
it("keeps prepack child stdout off npm pack JSON stdout", () => {
expect(resolvePrepackCommandStdio({ stdio: "inherit" }, { npm_config_json: "true" })).toEqual([
"inherit",
2,
"inherit",
]);
expect(
resolvePrepackCommandStdio(
{ stdio: ["ignore", "pipe", "pipe"] },
{ npm_config_json: "true" },
),
).toEqual(["ignore", "pipe", "pipe"]);
});
it("returns captured output for successful commands", () => {
const result = runPrepackCommand(process.execPath, ["--eval", "process.stdout.write('ok')"], {
encoding: "utf8",