fix: derive plugin media trust from metadata (#86410)

This commit is contained in:
Peter Steinberger
2026-05-25 14:18:36 +01:00
committed by GitHub
parent 75c72360ad
commit e761eb8f3e
13 changed files with 179 additions and 47 deletions

View File

@@ -17,6 +17,7 @@ Docs: https://docs.openclaw.ai
- Discord/OpenAI voice: accept leading fuzzy wake-name transcripts such as "Monty" or "Moti" for a Molty agent while keeping ambient speech gated.
- Media understanding: convert HEIC and HEIF images to JPEG before image description providers run so iPhone photos work in direct and configured image-description flows. (#86037)
- Discord/OpenAI voice: rotate Realtime sessions at provider max duration without logging the expected session-expiry event as an error.
- Agents/media: derive bundled plugin local-media trust from plugin tool metadata instead of importing the full plugin registry on subscription paths. (#84409) Thanks @samzong.
- Memory/local embeddings: run local GGUF embeddings in an isolated worker sidecar and degrade to configured fallback or keyword search on worker failure so native embedding crashes do not take down the Gateway. (#85348) Thanks @osolmaz.
- Gateway: clear the runtime config snapshot before `SIGUSR1` in-process restarts so config changes survive the next gateway loop. (#86388) Thanks @XuZehan-iCenter.
- Models: show OAuth delegation markers as configured `models.json` auth while keeping runtime route usability checks strict. (#86378) Thanks @rohitjavvadi.

View File

@@ -124,6 +124,7 @@ import {
} from "../../pi-embedded-helpers.js";
import { countActiveToolExecutions } from "../../pi-embedded-subscribe.handlers.tools.js";
import { subscribeEmbeddedPiSession } from "../../pi-embedded-subscribe.js";
import { isCoreToolResultMediaTrustedName } from "../../pi-embedded-subscribe.tools.js";
import { createPreparedEmbeddedPiSettingsManager } from "../../pi-project-settings.js";
import {
applyPiAutoCompactionGuard,
@@ -448,6 +449,35 @@ export {
resolveEmbeddedAgentStreamFn,
};
function collectTrustedPluginLocalMediaToolNames(params: {
tools: Array<{ name?: string }>;
}): Set<string> {
const trusted = new Set<string>();
for (const tool of params.tools) {
const toolName = tool.name?.trim();
if (!toolName) {
continue;
}
const meta = getPluginToolMeta(tool as Parameters<typeof getPluginToolMeta>[0]);
if (meta?.trustedLocalMedia === true) {
trusted.add(toolName);
}
}
return trusted;
}
function collectTrustedLocalMediaToolNames(params: {
coreBuiltinToolNames: ReadonlySet<string>;
trustedPluginToolNames: ReadonlySet<string>;
}): Set<string> {
return new Set([
...[...params.coreBuiltinToolNames].filter((toolName) =>
isCoreToolResultMediaTrustedName(toolName),
),
...params.trustedPluginToolNames,
]);
}
const MAX_BTW_SNAPSHOT_MESSAGES = 100;
const TOOL_SEARCH_CONTROL_ALLOWLIST_NAMES = [
TOOL_SEARCH_CODE_MODE_TOOL_NAME,
@@ -1651,6 +1681,11 @@ export async function runEmbeddedAttempt(
modelApi: params.model.api,
model: params.model,
};
const pluginMetadataSnapshot = getCurrentPluginMetadataSnapshot({
config: params.config,
env: process.env,
workspaceDir: effectiveWorkspace,
});
const tools = normalizeAgentRuntimeTools({
runtimePlan: params.runtimePlan,
tools: toolsEnabled ? toolsRaw : [],
@@ -1721,6 +1756,9 @@ export async function runEmbeddedAttempt(
senderE164: params.senderE164,
warn: (message) => log.warn(message),
});
const trustedPluginLocalMediaToolNames = collectTrustedPluginLocalMediaToolNames({
tools: toolsEnabled ? [...toolsRaw, ...filteredBundledTools] : [],
});
const normalizedBundledTools =
filteredBundledTools.length > 0
? normalizeAgentRuntimeTools({
@@ -2202,11 +2240,7 @@ export async function runEmbeddedAttempt(
cwd: effectiveWorkspace,
agentDir,
cfg: params.config,
pluginMetadataSnapshot: getCurrentPluginMetadataSnapshot({
config: params.config,
env: process.env,
workspaceDir: effectiveWorkspace,
}),
pluginMetadataSnapshot,
contextTokenBudget: params.contextTokenBudget,
});
const piAutoCompactionGuardArgs = {
@@ -2285,10 +2319,8 @@ export async function runEmbeddedAttempt(
cfg: params.config,
agentId: sessionAgentId,
});
// Exact raw names of every tool registered for this run, including
// bundled/plugin tools. Used as the raw-name set for the trusted local
// MEDIA: passthrough gate: a normalized alias is not sufficient — the
// emitted tool name must match an exact registration of this run.
// Exact raw names of every tool registered for this run. This remains
// available for diagnostics; local MEDIA: trust is narrower below.
const builtinToolNames = new Set(
uncompactedEffectiveTools.flatMap((tool) => {
const name = (tool.name ?? "").trim();
@@ -2304,6 +2336,10 @@ export async function runEmbeddedAttempt(
isPluginTool: (tool) =>
Boolean(getPluginToolMeta(tool as Parameters<typeof getPluginToolMeta>[0])),
});
const trustedLocalMediaToolNames = collectTrustedLocalMediaToolNames({
coreBuiltinToolNames,
trustedPluginToolNames: trustedPluginLocalMediaToolNames,
});
const clientToolNameConflicts = findClientToolNameConflicts({
tools: clientTools ?? [],
existingToolNames: [...coreBuiltinToolNames, ...PI_RESERVED_TOOL_NAMES],
@@ -3303,6 +3339,7 @@ export async function runEmbeddedAttempt(
sessionId: params.sessionId,
agentId: sessionAgentId,
builtinToolNames,
trustedLocalMediaToolNames,
internalEvents: params.internalEvents,
}),
);

View File

@@ -45,6 +45,8 @@ export function createSubscribedSessionHarness(
const mergedSession = Object.assign(session, sessionExtras ?? {});
const subscription = subscribeEmbeddedPiSession({
...subscribeParams,
trustedLocalMediaToolNames:
subscribeParams.trustedLocalMediaToolNames ?? subscribeParams.builtinToolNames,
session: mergedSession,
});
return { emit, session: mergedSession, subscription };

View File

@@ -11,6 +11,7 @@ function createMockContext(overrides?: {
onToolResult?: ReturnType<typeof vi.fn>;
toolResultFormat?: "markdown" | "plain";
builtinToolNames?: ReadonlySet<string>;
trustedLocalMediaToolNames?: ReadonlySet<string>;
}): EmbeddedPiSubscribeContext {
const onToolResult = overrides?.onToolResult ?? vi.fn();
return {
@@ -44,6 +45,8 @@ function createMockContext(overrides?: {
},
log: { debug: vi.fn(), info: vi.fn(), warn: vi.fn() },
builtinToolNames: overrides?.builtinToolNames,
trustedLocalMediaToolNames:
overrides?.trustedLocalMediaToolNames ?? overrides?.builtinToolNames,
shouldEmitToolResult: vi.fn(() => false),
shouldEmitToolOutput: vi.fn(() => overrides?.shouldEmitToolOutput ?? false),
emitToolSummary: vi.fn(),
@@ -465,6 +468,37 @@ describe("handleToolExecutionEnd media emission", () => {
expect(ctx.state.pendingToolMediaUrls).toStrictEqual([]);
});
it("does not queue trusted bundled plugin media already emitted in plain verbose output", async () => {
const ctx = createMockContext({
shouldEmitToolOutput: true,
toolResultFormat: "plain",
trustedLocalMediaToolNames: new Set(["meeting_notes"]),
});
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "meeting_notes",
toolCallId: "tc-1",
isError: false,
result: {
content: [
{
type: "text",
text: "Meeting audio attached.\nMEDIA:/tmp/meeting.wav",
},
],
details: {
media: {
mediaUrls: ["/tmp/meeting.wav"],
},
},
},
});
expect(ctx.emitToolOutput).toHaveBeenCalledTimes(1);
expect(ctx.state.pendingToolMediaUrls).toStrictEqual([]);
});
it("queues structured media once for markdown verbose output", async () => {
const ctx = await handleVerboseGeneratedImage("markdown");

View File

@@ -537,13 +537,14 @@ async function collectEmittedToolOutputMediaUrls(
toolName: string,
outputText: string,
result: unknown,
trustedLocalMediaToolNames?: ReadonlySet<string>,
): Promise<string[]> {
const { splitMediaFromOutput } = await loadMediaParse();
const mediaUrls = splitMediaFromOutput(outputText).mediaUrls ?? [];
if (mediaUrls.length === 0) {
return [];
}
return filterToolResultMediaUrls(toolName, mediaUrls, result);
return filterToolResultMediaUrls(toolName, mediaUrls, result, trustedLocalMediaToolNames);
}
function readExecApprovalPendingDetails(result: unknown): {
@@ -712,7 +713,12 @@ async function emitToolResultOutput(params: {
const outputText = extractToolResultText(sanitizedResult);
const mediaReply = isToolError ? undefined : extractToolResultMediaArtifact(result);
const mediaUrls = mediaReply
? filterToolResultMediaUrls(rawToolName, mediaReply.mediaUrls, result, ctx.builtinToolNames)
? filterToolResultMediaUrls(
rawToolName,
mediaReply.mediaUrls,
result,
ctx.trustedLocalMediaToolNames,
)
: [];
const shouldEmitOutput =
!shouldSuppressStructuredMediaToolOutput({
@@ -730,6 +736,7 @@ async function emitToolResultOutput(params: {
rawToolName,
outputText,
result,
ctx.trustedLocalMediaToolNames,
);
}
}

View File

@@ -132,6 +132,7 @@ export type EmbeddedPiSubscribeContext = {
blockChunker: EmbeddedBlockChunker | null;
hookRunner?: HookRunner;
builtinToolNames?: ReadonlySet<string>;
trustedLocalMediaToolNames?: ReadonlySet<string>;
noteLastAssistant: (msg: AgentMessage) => void;
shouldEmitToolResult: () => boolean;
@@ -244,6 +245,7 @@ export type ToolHandlerContext = {
log: EmbeddedSubscribeLogger;
hookRunner?: HookRunner;
builtinToolNames?: ReadonlySet<string>;
trustedLocalMediaToolNames?: ReadonlySet<string>;
flushBlockReplyBuffer: () => void | Promise<void>;
shouldEmitToolResult: () => boolean;
shouldEmitToolOutput: () => boolean;

View File

@@ -53,6 +53,7 @@ describe("subscribeEmbeddedPiSession", () => {
subscribeEmbeddedPiSession({
session,
...options,
trustedLocalMediaToolNames: options.trustedLocalMediaToolNames ?? options.builtinToolNames,
});
return { emit };
}

View File

@@ -340,8 +340,14 @@ describe("extractToolResultMediaPaths", () => {
expect(isToolResultMediaTrusted("video_generate")).toBe(true);
});
it("trusts bundled plugin tool local MEDIA paths", () => {
expect(isToolResultMediaTrusted("music_generate")).toBe(true);
it("does not trust bundled plugin tool names without run-local metadata", () => {
expect(isToolResultMediaTrusted("meeting_notes")).toBe(false);
});
it("trusts bundled plugin tool names carried by run-local metadata", () => {
expect(isToolResultMediaTrusted("meeting_notes", undefined, new Set(["meeting_notes"]))).toBe(
true,
);
});
it("blocks trusted-media aliases that are not exact registered built-ins", () => {
@@ -382,18 +388,15 @@ describe("extractToolResultMediaPaths", () => {
).toEqual(["/tmp/reply.opus"]);
});
it("keeps local media for bundled plugin tool names registered in this run", () => {
// music_generate is a bundled-plugin trusted tool; when the runner
// registers it for this run, its raw name must be allowed through the
// exact-name gate just like a core built-in.
it("keeps local media for bundled plugin tool names trusted in this run", () => {
expect(
filterToolResultMediaUrls(
"music_generate",
["/tmp/song.mp3"],
"meeting_notes",
["/tmp/meeting.wav"],
undefined,
new Set(["music_generate"]),
new Set(["meeting_notes"]),
),
).toEqual(["/tmp/song.mp3"]);
).toEqual(["/tmp/meeting.wav"]);
});
it("strips local media for plugin-name collisions when the plugin is not registered", () => {

View File

@@ -2,7 +2,6 @@ import { getChannelPlugin, normalizeChannelId } from "../channels/plugins/index.
import { normalizeTargetForProvider } from "../infra/outbound/target-normalization.js";
import { redactSensitiveFieldValue, redactToolPayloadText } from "../logging/redact.js";
import { splitMediaFromOutput } from "../media/parse.js";
import { pluginRegistrationContractRegistry } from "../plugins/contracts/registry.js";
import {
normalizeOptionalLowercaseString,
normalizeOptionalString,
@@ -277,8 +276,8 @@ export function extractToolResultText(result: unknown): string | undefined {
return texts.join("\n");
}
// Core tool names that are allowed to emit local MEDIA: paths.
// Plugin/MCP tools are intentionally excluded to prevent untrusted file reads.
// Core tool names that are allowed to emit local MEDIA: paths. Plugin tools
// must be explicitly passed as trusted run-local names by the caller.
const TRUSTED_TOOL_RESULT_MEDIA = new Set([
"agents_list",
"apply_patch",
@@ -310,11 +309,15 @@ const TRUSTED_TOOL_RESULT_MEDIA = new Set([
"x_search",
"write",
]);
const TRUSTED_BUNDLED_PLUGIN_MEDIA_TOOLS = new Set(
pluginRegistrationContractRegistry.flatMap((entry) => entry.toolNames),
);
const HTTP_URL_RE = /^https?:\/\//i;
export function isCoreToolResultMediaTrustedName(toolName?: string): boolean {
if (!toolName) {
return false;
}
return TRUSTED_TOOL_RESULT_MEDIA.has(normalizeToolName(toolName));
}
function readToolResultDetails(result: unknown): Record<string, unknown> | undefined {
if (!result || typeof result !== "object") {
return undefined;
@@ -338,20 +341,29 @@ function isExternalToolResult(result: unknown): boolean {
return typeof details.mcpServer === "string" || typeof details.mcpTool === "string";
}
export function isToolResultMediaTrusted(toolName?: string, result?: unknown): boolean {
export function isToolResultMediaTrusted(
toolName?: string,
result?: unknown,
trustedLocalMediaToolNames?: ReadonlySet<string>,
): boolean {
if (!toolName || isExternalToolResult(result)) {
return false;
}
const normalized = normalizeToolName(toolName);
return (
TRUSTED_TOOL_RESULT_MEDIA.has(normalized) || TRUSTED_BUNDLED_PLUGIN_MEDIA_TOOLS.has(normalized)
);
const registeredName = toolName.trim();
if (registeredName && trustedLocalMediaToolNames?.has(registeredName) === true) {
return true;
}
return isCoreToolResultMediaTrustedName(toolName);
}
function isTrustedOwnedTtsLocalMedia(toolName: string | undefined, result: unknown): boolean {
function isTrustedOwnedTtsLocalMedia(
toolName: string | undefined,
result: unknown,
trustedLocalMediaToolNames?: ReadonlySet<string>,
): boolean {
if (
!toolName ||
!isToolResultMediaTrusted(toolName, result) ||
!isToolResultMediaTrusted(toolName, result, trustedLocalMediaToolNames) ||
normalizeToolName(toolName) !== "tts"
) {
return false;
@@ -367,25 +379,29 @@ export function filterToolResultMediaUrls(
toolName: string | undefined,
mediaUrls: string[],
result?: unknown,
builtinToolNames?: ReadonlySet<string>,
trustedLocalMediaToolNames?: ReadonlySet<string>,
): string[] {
if (mediaUrls.length === 0) {
return mediaUrls;
}
const trustedOwnedTtsLocalMedia = isTrustedOwnedTtsLocalMedia(toolName, result);
if (isToolResultMediaTrusted(toolName, result)) {
// When the current run provides its exact registered tool names (core
// built-ins plus bundled/trusted plugin tools), require the raw emitted
// tool name to match one of them before allowing local MEDIA: paths.
const trustedOwnedTtsLocalMedia = isTrustedOwnedTtsLocalMedia(
toolName,
result,
trustedLocalMediaToolNames,
);
if (isToolResultMediaTrusted(toolName, result, trustedLocalMediaToolNames)) {
// When the current run provides its exact trusted local-media tool names,
// require the raw emitted tool name to match one of them before allowing
// local MEDIA: paths.
// This blocks normalized aliases and case-variant collisions such as
// "Bash" -> "bash" or "Web_Search" -> "web_search" from inheriting a
// registered tool's media trust. TTS-generated local files carry a
// separate trusted-media flag from the owned tool result, so they can
// survive runs whose exact built-in set omitted the raw tts name.
if (builtinToolNames !== undefined) {
// survive runs whose exact trusted set omitted the raw tts name.
if (trustedLocalMediaToolNames !== undefined) {
if (!trustedOwnedTtsLocalMedia) {
const registeredName = toolName?.trim();
if (!registeredName || !builtinToolNames.has(registeredName)) {
if (!registeredName || !trustedLocalMediaToolNames.has(registeredName)) {
return mediaUrls.filter((url) => HTTP_URL_RE.test(url.trim()));
}
}

View File

@@ -565,7 +565,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
toolName,
mediaUrls ?? [],
result,
params.builtinToolNames,
params.trustedLocalMediaToolNames,
);
if (
params.sourceReplyDeliveryMode === "message_tool_only" &&
@@ -1010,6 +1010,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
blockChunker,
hookRunner: params.hookRunner,
builtinToolNames: params.builtinToolNames,
trustedLocalMediaToolNames: params.trustedLocalMediaToolNames,
noteLastAssistant,
shouldEmitToolResult,
shouldEmitToolOutput,

View File

@@ -70,10 +70,14 @@ export type SubscribeEmbeddedPiSessionParams = {
/** Agent identity for hook context — resolved from session config in attempt.ts. */
agentId?: string;
/**
* Exact raw names of non-plugin OpenClaw tools registered for this run.
* When provided, MEDIA: passthrough requires an exact match instead of only
* a normalized-name collision with a trusted built-in.
* Exact raw names of OpenClaw tools registered for this run.
*/
builtinToolNames?: ReadonlySet<string>;
/**
* Exact raw names allowed to emit local MEDIA: paths for this run.
* Includes core trusted tools plus bundled plugin tools proven from the
* startup metadata snapshot.
*/
trustedLocalMediaToolNames?: ReadonlySet<string>;
internalEvents?: AgentInternalEvent[];
};

View File

@@ -1364,8 +1364,11 @@ describe("resolvePluginTools optional tools", () => {
expectResolvedToolNames(first, ["other_tool", "optional_tool"]);
expectResolvedToolNames(second, ["other_tool", "optional_tool"]);
expect(getPluginToolMeta(first[0])?.optional).toBe(false);
expect(getPluginToolMeta(first[0])?.trustedLocalMedia).toBe(true);
expect(getPluginToolMeta(first[1])?.optional).toBe(true);
expect(getPluginToolMeta(first[1])?.trustedLocalMedia).toBe(true);
expect(getPluginToolMeta(second[1])?.optional).toBe(true);
expect(getPluginToolMeta(second[1])?.trustedLocalMedia).toBe(true);
expect(factory).toHaveBeenCalledTimes(1);
});

View File

@@ -38,6 +38,7 @@ export {
export type PluginToolMeta = {
pluginId: string;
optional: boolean;
trustedLocalMedia?: boolean;
};
type PluginToolFactoryTimingResult = "array" | "error" | "null" | "single";
@@ -139,6 +140,16 @@ function isPluginToolOptional(params: {
);
}
function isTrustedManifestLocalMediaTool(params: {
manifestPlugin: PluginManifestRecord | undefined;
toolName: string;
}): boolean {
return (
params.manifestPlugin?.origin === "bundled" &&
params.manifestPlugin.contracts?.tools?.includes(params.toolName) === true
);
}
function isOptionalToolAllowed(params: {
toolName: string;
pluginId: string;
@@ -530,6 +541,7 @@ function cachedDescriptorsCoverToolNames(params: {
function createCachedDescriptorPluginTool(params: {
descriptor: CachedPluginToolDescriptor;
plugin: PluginManifestRecord;
ctx: OpenClawPluginToolContext;
loadContext: ReturnType<typeof resolvePluginRuntimeLoadContext>;
runtimeOptions: PluginLoadOptions["runtimeOptions"];
@@ -601,6 +613,10 @@ function createCachedDescriptorPluginTool(params: {
setPluginToolMeta(tool, {
pluginId,
optional: params.descriptor.optional,
trustedLocalMedia: isTrustedManifestLocalMediaTool({
manifestPlugin: params.plugin,
toolName,
}),
});
return tool;
}
@@ -728,6 +744,7 @@ function resolveCachedPluginTools(params: {
pluginTools.push(
createCachedDescriptorPluginTool({
descriptor: cachedDescriptor,
plugin,
ctx: params.ctx,
loadContext: params.loadContext,
runtimeOptions: params.runtimeOptions,
@@ -1195,6 +1212,10 @@ export function resolvePluginTools(params: {
pluginToolMeta.set(tool, {
pluginId: entry.pluginId,
optional,
trustedLocalMedia: isTrustedManifestLocalMediaTool({
manifestPlugin,
toolName: tool.name,
}),
});
if (manifestPlugin) {
const capturedDescriptors = capturedDescriptorsByPluginId.get(entry.pluginId) ?? [];