mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 05:51:15 +08:00
Adapt image compression quality by model (#85742)
* feat: adapt image compression quality * refactor: move image limits into model metadata * test: cover adaptive image downscaling * test: cover image tool live providers * fix: apply media metadata to all image paths * fix: align providerless image compression * fix: add chutes runtime image limits * fix: optimize image data urls with model limits * fix: type media metadata merge * fix: optimize data url byte limits after decode * fix: preserve data url optimizer fallback * fix: keep low-side image compression fallbacks * fix: enforce data url image compression policy * fix: preserve gif data url media policy * fix: satisfy adaptive image type checks * test: keep cron provider-runtime mock current
This commit is contained in:
committed by
GitHub
parent
00388134c4
commit
4c210e22fa
@@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Gateway/perf: lazy-load startup-idle plugin work, core gateway method handlers, and the embedded ACPX runtime so Gateway health and ready signals no longer wait on unused handler trees or ACPX probes.
|
||||
- Gateway/perf: cache plugin SDK public-surface alias maps and skip irrelevant macOS Linuxbrew PATH probes so Gateway startup avoids repeated filesystem walks and slow missing-directory stats.
|
||||
- Image tool: add adaptive model-aware image compression with an `agents.defaults.imageQuality` preference for choosing token-efficient, balanced, or high-detail media handling.
|
||||
- Meeting Notes: add a source-only external meeting-notes plugin and SDK source-provider contract outside the core npm package, with auto-start capture config, manual transcript imports, read-only `openclaw meeting-notes` CLI access, and Discord voice as the first live source.
|
||||
- Docs/channels/config: add Signal `configPath`, Telegram wildcard topic defaults, local-time backup archive names, Termux home fallback, include-path validation, secret-scanner-safe placeholder guidance, Gemini CLI/Antigravity media guidance, and macOS VM auto-login guidance. Thanks @NorseGaud, @yudistiraashadi, @huangqian8, @VibhorGautam, @maweibin, @tianxingleo, @IgnacioPro, and @xzcxzcyy-claw.
|
||||
- Docs: clarify model-usage portability, Codex migration prerequisites, status bootstrap wording, thread-bound subagent limits, hook ownership, and config-preserving safety guidance. Thanks @aniruddhaadak80, @leno23, @TomDjerry, @matthewxmurphy, @vincentkoc, and @stablegenius49.
|
||||
|
||||
@@ -327,6 +327,26 @@ Higher values preserve more visual detail.
|
||||
}
|
||||
```
|
||||
|
||||
### `agents.defaults.imageQuality`
|
||||
|
||||
Image-tool compression/detail preference for images loaded from file paths, URLs, and media references.
|
||||
Default: `auto`.
|
||||
|
||||
OpenClaw adapts the resize ladder to the selected image model. For example, Claude Opus 4.7, OpenAI GPT-5.5, Qwen VL, and hosted Llama 4 vision models can use larger images than older/default high-detail vision paths, while multi-image turns are compressed more aggressively in `auto` mode to control token and latency cost.
|
||||
|
||||
Values:
|
||||
|
||||
- `auto`: adapt to model limits and image count.
|
||||
- `efficient`: prefer smaller images for lower token and byte usage.
|
||||
- `balanced`: use the standard middle-ground ladder.
|
||||
- `high`: preserve more detail for screenshots, diagrams, and document images.
|
||||
|
||||
```json5
|
||||
{
|
||||
agents: { defaults: { imageQuality: "auto" } },
|
||||
}
|
||||
```
|
||||
|
||||
### `agents.defaults.userTimezone`
|
||||
|
||||
Timezone for system prompt context (not message timestamps). Falls back to host timezone.
|
||||
|
||||
@@ -10,6 +10,17 @@ const CLAUDE_CLI_MODEL_LABELS: Record<string, string> = {
|
||||
"claude-sonnet-4-6": "Claude Sonnet 4.6 (Claude CLI)",
|
||||
};
|
||||
|
||||
function resolveClaudeCliImageMediaInput(id: string): ModelCatalogEntry["mediaInput"] {
|
||||
const maxSidePx = id === "claude-opus-4-7" ? 2576 : 1568;
|
||||
return {
|
||||
image: {
|
||||
maxSidePx,
|
||||
preferredSidePx: maxSidePx,
|
||||
tokenMode: "provider",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function extractClaudeCliModelIds(): string[] {
|
||||
const ids: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
@@ -34,6 +45,7 @@ export function buildClaudeCliCatalogEntries(): ModelCatalogEntry[] {
|
||||
provider: CLAUDE_CLI_BACKEND_ID,
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
mediaInput: resolveClaudeCliImageMediaInput(id),
|
||||
contextWindow: CLAUDE_CLI_DEFAULT_CONTEXT_WINDOW,
|
||||
}));
|
||||
}
|
||||
|
||||
@@ -541,6 +541,34 @@ describe("anthropic provider replay hooks", () => {
|
||||
} as never);
|
||||
|
||||
expect(normalized?.input).toEqual(["text", "image"]);
|
||||
expect(normalized?.mediaInput).toEqual({
|
||||
image: { maxSidePx: 1568, preferredSidePx: 1568, tokenMode: "provider" },
|
||||
});
|
||||
});
|
||||
|
||||
it("merges partial Claude image media metadata with provider limits", async () => {
|
||||
const provider = await registerSingleProviderPlugin(anthropicPlugin);
|
||||
|
||||
const normalized = provider.normalizeResolvedModel?.({
|
||||
provider: "anthropic",
|
||||
modelId: "claude-opus-4-7",
|
||||
model: {
|
||||
id: "claude-opus-4-7",
|
||||
name: "Claude Opus 4.7",
|
||||
provider: "anthropic",
|
||||
api: "anthropic-messages",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200_000,
|
||||
maxTokens: 64_000,
|
||||
mediaInput: { image: { maxBytes: 1 } },
|
||||
},
|
||||
} as never);
|
||||
|
||||
expect(normalized?.mediaInput).toEqual({
|
||||
image: { maxBytes: 1, maxSidePx: 2576, preferredSidePx: 2576, tokenMode: "provider" },
|
||||
});
|
||||
});
|
||||
|
||||
it("normalizes GA 1M Claude variants to 1M context", async () => {
|
||||
@@ -577,6 +605,29 @@ describe("anthropic provider replay hooks", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("does not normalize legacy Claude 4.5 models to 1M context", async () => {
|
||||
const provider = await registerSingleProviderPlugin(anthropicPlugin);
|
||||
|
||||
const normalized = provider.normalizeResolvedModel?.({
|
||||
provider: "anthropic",
|
||||
modelId: "claude-sonnet-4-5",
|
||||
model: {
|
||||
id: "claude-sonnet-4-5",
|
||||
name: "Claude Sonnet 4.5",
|
||||
provider: "anthropic",
|
||||
api: "anthropic-messages",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200_000,
|
||||
contextTokens: 200_000,
|
||||
maxTokens: 32_000,
|
||||
},
|
||||
} as never);
|
||||
|
||||
expect(normalized).toBeUndefined();
|
||||
});
|
||||
|
||||
it("resolves claude-cli synthetic oauth auth", async () => {
|
||||
readClaudeCliCredentialsForRuntimeMock.mockReset();
|
||||
readClaudeCliCredentialsForRuntimeMock.mockReturnValue({
|
||||
|
||||
@@ -381,6 +381,25 @@ function supportsAnthropicImageInput(modelId: string, modelName?: string): boole
|
||||
.some((candidate) => matchesAnthropicModernModel(candidate));
|
||||
}
|
||||
|
||||
function resolveAnthropicImageMediaInput(modelId: string, modelName?: string) {
|
||||
if (!supportsAnthropicImageInput(modelId, modelName)) {
|
||||
return undefined;
|
||||
}
|
||||
const refs = [modelId, modelName].filter((value): value is string => typeof value === "string");
|
||||
const opus47 = refs.some((ref) =>
|
||||
[ANTHROPIC_OPUS_47_MODEL_ID, ANTHROPIC_OPUS_47_DOT_MODEL_ID].some((prefix) =>
|
||||
normalizeLowercaseStringOrEmpty(ref).startsWith(prefix),
|
||||
),
|
||||
);
|
||||
return {
|
||||
image: {
|
||||
maxSidePx: opus47 ? 2576 : 1568,
|
||||
preferredSidePx: opus47 ? 2576 : 1568,
|
||||
tokenMode: "provider" as const,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function applyAnthropicImageInputCapability(params: {
|
||||
modelId: string;
|
||||
model: ProviderRuntimeModel;
|
||||
@@ -401,13 +420,27 @@ function normalizeAnthropicResolvedModel(
|
||||
ctx: ProviderNormalizeResolvedModelContext,
|
||||
): ProviderRuntimeModel | undefined {
|
||||
const imageCapableModel = applyAnthropicImageInputCapability(ctx) ?? ctx.model;
|
||||
const mediaInput = resolveAnthropicImageMediaInput(ctx.modelId, imageCapableModel.name);
|
||||
const mediaInputModel = mediaInput
|
||||
? {
|
||||
...imageCapableModel,
|
||||
mediaInput: {
|
||||
...mediaInput,
|
||||
...imageCapableModel.mediaInput,
|
||||
image: {
|
||||
...mediaInput.image,
|
||||
...imageCapableModel.mediaInput?.image,
|
||||
},
|
||||
},
|
||||
}
|
||||
: imageCapableModel;
|
||||
const contextWindowModel =
|
||||
applyAnthropicGa1MContextWindow({
|
||||
config: ctx.config,
|
||||
provider: ctx.provider,
|
||||
modelId: ctx.modelId,
|
||||
model: imageCapableModel,
|
||||
}) ?? imageCapableModel;
|
||||
model: mediaInputModel,
|
||||
}) ?? mediaInputModel;
|
||||
return contextWindowModel === ctx.model ? undefined : contextWindowModel;
|
||||
}
|
||||
|
||||
|
||||
@@ -85,6 +85,20 @@ describe("chutes-models", () => {
|
||||
expect(def.compat.supportsUsageInStreaming).toBe(false);
|
||||
});
|
||||
|
||||
it("keeps Qwen VL image limits in the runtime catalog", () => {
|
||||
const visionModelIds = ["Qwen/Qwen2.5-VL-32B-Instruct", "Qwen/Qwen3-VL-235B-A22B-Instruct"];
|
||||
for (const id of visionModelIds) {
|
||||
const model = CHUTES_MODEL_CATALOG.find((candidate) => candidate.id === id);
|
||||
expect(model).toBeDefined();
|
||||
if (!model) {
|
||||
throw new Error(`expected ${id}`);
|
||||
}
|
||||
expect(buildChutesModelDefinition(model).mediaInput).toEqual({
|
||||
image: { maxPixels: 12845056, preferredSidePx: 2048, tokenMode: "provider" },
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it("discoverChutesModels returns static catalog when accessToken is empty", async () => {
|
||||
const models = await discoverChutesModels("");
|
||||
expect(models).toHaveLength(CHUTES_MODEL_CATALOG.length);
|
||||
|
||||
@@ -349,6 +349,9 @@ export const CHUTES_MODEL_CATALOG: ModelDefinitionConfig[] = [
|
||||
name: "Qwen/Qwen2.5-VL-32B-Instruct",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
mediaInput: {
|
||||
image: { maxPixels: 12845056, preferredSidePx: 2048, tokenMode: "provider" },
|
||||
},
|
||||
contextWindow: 16384,
|
||||
maxTokens: 16384,
|
||||
cost: { input: 0.05, output: 0.22, cacheRead: 0, cacheWrite: 0 },
|
||||
@@ -358,6 +361,9 @@ export const CHUTES_MODEL_CATALOG: ModelDefinitionConfig[] = [
|
||||
name: "Qwen/Qwen3-VL-235B-A22B-Instruct",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
mediaInput: {
|
||||
image: { maxPixels: 12845056, preferredSidePx: 2048, tokenMode: "provider" },
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
cost: { input: 0.3, output: 1.2, cacheRead: 0, cacheWrite: 0 },
|
||||
|
||||
@@ -567,6 +567,9 @@
|
||||
"name": "Qwen/Qwen2.5-VL-32B-Instruct",
|
||||
"reasoning": false,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxPixels": 12845056, "preferredSidePx": 2048, "tokenMode": "provider" }
|
||||
},
|
||||
"contextWindow": 16384,
|
||||
"maxTokens": 16384,
|
||||
"cost": {
|
||||
@@ -581,6 +584,9 @@
|
||||
"name": "Qwen/Qwen3-VL-235B-A22B-Instruct",
|
||||
"reasoning": false,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxPixels": 12845056, "preferredSidePx": 2048, "tokenMode": "provider" }
|
||||
},
|
||||
"contextWindow": 262144,
|
||||
"maxTokens": 262144,
|
||||
"cost": {
|
||||
|
||||
@@ -94,6 +94,9 @@
|
||||
"name": "Llama 4 Scout 17B",
|
||||
"reasoning": false,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxPixels": 33177600, "preferredSidePx": 2048, "tokenMode": "provider" }
|
||||
},
|
||||
"contextWindow": 131072,
|
||||
"maxTokens": 8192,
|
||||
"cost": {
|
||||
|
||||
@@ -73,6 +73,9 @@
|
||||
"name": "GPT-5.4",
|
||||
"reasoning": true,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxSidePx": 2048, "preferredSidePx": 2048, "tokenMode": "detail" }
|
||||
},
|
||||
"contextWindow": 272000,
|
||||
"maxTokens": 128000,
|
||||
"cost": { "input": 2.5, "output": 15, "cacheRead": 0.25, "cacheWrite": 0 }
|
||||
@@ -82,6 +85,9 @@
|
||||
"name": "GPT-5.4 mini",
|
||||
"reasoning": true,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxSidePx": 2048, "preferredSidePx": 2048, "tokenMode": "detail" }
|
||||
},
|
||||
"contextWindow": 400000,
|
||||
"maxTokens": 128000,
|
||||
"cost": { "input": 0.75, "output": 4.5, "cacheRead": 0.075, "cacheWrite": 0 }
|
||||
@@ -91,6 +97,9 @@
|
||||
"name": "GPT-5.4 nano",
|
||||
"reasoning": true,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxSidePx": 2048, "preferredSidePx": 2048, "tokenMode": "detail" }
|
||||
},
|
||||
"contextWindow": 400000,
|
||||
"maxTokens": 128000,
|
||||
"cost": { "input": 0.2, "output": 1.25, "cacheRead": 0.02, "cacheWrite": 0 }
|
||||
@@ -100,6 +109,9 @@
|
||||
"name": "GPT-5.4 Pro",
|
||||
"reasoning": true,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxSidePx": 2048, "preferredSidePx": 2048, "tokenMode": "detail" }
|
||||
},
|
||||
"contextWindow": 1050000,
|
||||
"maxTokens": 128000,
|
||||
"cost": { "input": 30, "output": 180, "cacheRead": 0, "cacheWrite": 0 }
|
||||
@@ -109,6 +121,9 @@
|
||||
"name": "GPT-5.5",
|
||||
"reasoning": true,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxSidePx": 6000, "preferredSidePx": 2048, "tokenMode": "detail" }
|
||||
},
|
||||
"contextWindow": 272000,
|
||||
"maxTokens": 128000,
|
||||
"cost": { "input": 5, "output": 30, "cacheRead": 0.5, "cacheWrite": 0 }
|
||||
@@ -190,6 +205,9 @@
|
||||
"name": "gpt-5.5-pro",
|
||||
"reasoning": true,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxSidePx": 6000, "preferredSidePx": 2048, "tokenMode": "detail" }
|
||||
},
|
||||
"contextWindow": 1000000,
|
||||
"maxTokens": 128000,
|
||||
"cost": { "input": 30, "output": 180, "cacheRead": 0, "cacheWrite": 0 }
|
||||
@@ -205,6 +223,9 @@
|
||||
"name": "gpt-5.5",
|
||||
"reasoning": true,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxSidePx": 6000, "preferredSidePx": 2048, "tokenMode": "detail" }
|
||||
},
|
||||
"contextWindow": 400000,
|
||||
"contextTokens": 272000,
|
||||
"maxTokens": 128000,
|
||||
@@ -215,6 +236,9 @@
|
||||
"name": "gpt-5.4",
|
||||
"reasoning": true,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxSidePx": 2048, "preferredSidePx": 2048, "tokenMode": "detail" }
|
||||
},
|
||||
"contextWindow": 1050000,
|
||||
"contextTokens": 272000,
|
||||
"maxTokens": 128000,
|
||||
@@ -225,6 +249,9 @@
|
||||
"name": "gpt-5.4-pro",
|
||||
"reasoning": true,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxSidePx": 2048, "preferredSidePx": 2048, "tokenMode": "detail" }
|
||||
},
|
||||
"contextWindow": 1050000,
|
||||
"contextTokens": 272000,
|
||||
"maxTokens": 128000,
|
||||
@@ -235,6 +262,9 @@
|
||||
"name": "gpt-5.4-mini",
|
||||
"reasoning": true,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxSidePx": 2048, "preferredSidePx": 2048, "tokenMode": "detail" }
|
||||
},
|
||||
"contextWindow": 400000,
|
||||
"contextTokens": 272000,
|
||||
"maxTokens": 128000,
|
||||
@@ -245,6 +275,9 @@
|
||||
"name": "gpt-5.5-pro",
|
||||
"reasoning": true,
|
||||
"input": ["text", "image"],
|
||||
"mediaInput": {
|
||||
"image": { "maxSidePx": 6000, "preferredSidePx": 2048, "tokenMode": "detail" }
|
||||
},
|
||||
"contextWindow": 1000000,
|
||||
"contextTokens": 272000,
|
||||
"maxTokens": 128000,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import type { ModelCompatConfig } from "../config/types.models.js";
|
||||
import type { ModelCompatConfig, ModelMediaInputConfig } from "../config/types.models.js";
|
||||
|
||||
export type ModelInputType = "text" | "image" | "audio" | "video" | "document";
|
||||
|
||||
@@ -12,4 +12,5 @@ export type ModelCatalogEntry = {
|
||||
reasoning?: boolean;
|
||||
input?: ModelInputType[];
|
||||
compat?: ModelCompatConfig;
|
||||
mediaInput?: ModelMediaInputConfig;
|
||||
};
|
||||
|
||||
@@ -548,7 +548,7 @@ function resolveFallbackSoonestCooldownExpiry(params: {
|
||||
return soonest;
|
||||
}
|
||||
|
||||
function resolveImageFallbackCandidates(
|
||||
export function resolveImageFallbackCandidates(
|
||||
params: {
|
||||
cfg: OpenClawConfig | undefined;
|
||||
defaultProvider: string;
|
||||
@@ -605,7 +605,7 @@ function resolveImageFallbackCandidates(
|
||||
return candidates;
|
||||
}
|
||||
|
||||
function resolveImageFallbackDefaultProvider(cfg: OpenClawConfig | undefined): string {
|
||||
export function resolveImageFallbackDefaultProvider(cfg: OpenClawConfig | undefined): string {
|
||||
const configuredPrimary = resolveAgentModelPrimaryValue(cfg?.agents?.defaults?.imageModel);
|
||||
if (configuredPrimary?.trim()) {
|
||||
const aliasIndex = buildModelAliasIndex({
|
||||
|
||||
@@ -41,6 +41,9 @@ function createMistralManifestPlugin(overrides?: {
|
||||
contextWindow: 262144,
|
||||
maxTokens: 8192,
|
||||
cost: { input: 1.5, output: 7.5, cacheRead: 0, cacheWrite: 0 },
|
||||
mediaInput: {
|
||||
image: { maxSidePx: 2048, preferredSidePx: 1536, tokenMode: "provider" },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
@@ -80,6 +83,9 @@ describe("resolveBundledStaticCatalogModel", () => {
|
||||
id: "mistral-medium-3-5",
|
||||
input: ["text", "image"],
|
||||
maxTokens: 8192,
|
||||
mediaInput: {
|
||||
image: { maxSidePx: 2048, preferredSidePx: 1536, tokenMode: "provider" },
|
||||
},
|
||||
name: "Mistral Medium 3.5",
|
||||
provider: "mistral",
|
||||
reasoning: true,
|
||||
|
||||
@@ -39,6 +39,7 @@ function modelFromStaticCatalogRow(row: NormalizedModelCatalogRow): Model<Api> {
|
||||
maxTokens: row.maxTokens,
|
||||
headers: row.headers,
|
||||
compat: row.compat,
|
||||
mediaInput: row.mediaInput,
|
||||
} as Model<Api>;
|
||||
}
|
||||
|
||||
|
||||
@@ -498,6 +498,9 @@ describe("resolveModel", () => {
|
||||
input: ["text", "image"],
|
||||
contextWindow: 262144,
|
||||
maxTokens: 8192,
|
||||
mediaInput: {
|
||||
image: { maxSidePx: 2048, preferredSidePx: 1536, tokenMode: "provider" },
|
||||
},
|
||||
});
|
||||
const cfg = {
|
||||
models: {
|
||||
@@ -539,6 +542,101 @@ describe("resolveModel", () => {
|
||||
expect(discoverModels).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("merges bundled static media input into resolved models when opted in", async () => {
|
||||
mockDiscoveredModel(discoverModels, {
|
||||
provider: "openai",
|
||||
modelId: "gpt-5.5-pro",
|
||||
templateModel: {
|
||||
id: "gpt-5.5-pro",
|
||||
name: "GPT-5.5 Pro",
|
||||
provider: "openai",
|
||||
api: "openai-responses",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 272_000,
|
||||
maxTokens: 128_000,
|
||||
},
|
||||
});
|
||||
resolveBundledStaticCatalogModelMock.mockReturnValueOnce({
|
||||
provider: "openai",
|
||||
id: "gpt-5.5-pro",
|
||||
name: "GPT-5.5 Pro",
|
||||
api: "openai-responses",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 272_000,
|
||||
maxTokens: 128_000,
|
||||
mediaInput: {
|
||||
image: { maxSidePx: 6000, preferredSidePx: 2048, tokenMode: "detail" },
|
||||
},
|
||||
});
|
||||
|
||||
const result = await resolveModelAsync("openai", "gpt-5.5-pro", "/tmp/agent", undefined, {
|
||||
allowBundledStaticCatalogFallback: true,
|
||||
authStorage: { mocked: true } as never,
|
||||
modelRegistry: discoverModels({ mocked: true } as never, "/tmp/agent"),
|
||||
runtimeHooks: createRuntimeHooks(),
|
||||
skipPiDiscovery: true,
|
||||
});
|
||||
|
||||
expect((expectResolvedModel(result) as { mediaInput?: unknown }).mediaInput).toEqual({
|
||||
image: { maxSidePx: 6000, preferredSidePx: 2048, tokenMode: "detail" },
|
||||
});
|
||||
expect(resolveBundledStaticCatalogModelMock).toHaveBeenCalledWith({
|
||||
provider: "openai",
|
||||
modelId: "gpt-5.5-pro",
|
||||
cfg: undefined,
|
||||
workspaceDir: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
it("merges configured media input with discovered model metadata", () => {
|
||||
mockDiscoveredModel(discoverModels, {
|
||||
provider: "custom",
|
||||
modelId: "vision-model",
|
||||
templateModel: {
|
||||
id: "vision-model",
|
||||
name: "Vision Model",
|
||||
provider: "custom",
|
||||
api: "openai-responses",
|
||||
baseUrl: "https://models.example.com/v1",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 8192,
|
||||
maxTokens: 1024,
|
||||
mediaInput: {
|
||||
image: { maxSidePx: 2048, preferredSidePx: 1536, tokenMode: "provider" },
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const result = resolveModelForTest("custom", "vision-model", "/tmp/agent", {
|
||||
models: {
|
||||
providers: {
|
||||
custom: {
|
||||
baseUrl: "https://models.example.com/v1",
|
||||
models: [
|
||||
{
|
||||
id: "vision-model",
|
||||
name: "Vision Model",
|
||||
mediaInput: { image: { maxBytes: 1 } },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig);
|
||||
|
||||
expect((expectResolvedModel(result) as { mediaInput?: unknown }).mediaInput).toEqual({
|
||||
image: { maxBytes: 1, maxSidePx: 2048, preferredSidePx: 1536, tokenMode: "provider" },
|
||||
});
|
||||
});
|
||||
|
||||
it("does not use bundled static catalog rows unless the caller opts in", async () => {
|
||||
const result = await resolveModelAsync(
|
||||
"mistral",
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
type AuthStorage,
|
||||
type ModelRegistry,
|
||||
} from "@earendil-works/pi-coding-agent";
|
||||
import type { ModelMediaInputConfig } from "../../config/types.models.js";
|
||||
import type { OpenClawConfig } from "../../config/types.openclaw.js";
|
||||
import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js";
|
||||
import {
|
||||
@@ -363,6 +364,29 @@ function resolveProviderRequestTimeoutMs(timeoutSeconds: unknown): number | unde
|
||||
return Math.floor(timeoutSeconds) * 1000;
|
||||
}
|
||||
|
||||
function mergeModelMediaInput(
|
||||
base: ModelMediaInputConfig | undefined,
|
||||
override: ModelMediaInputConfig | undefined,
|
||||
): ModelMediaInputConfig | undefined {
|
||||
if (!base) {
|
||||
return override;
|
||||
}
|
||||
if (!override) {
|
||||
return base;
|
||||
}
|
||||
return {
|
||||
...base,
|
||||
...override,
|
||||
image:
|
||||
base.image || override.image
|
||||
? {
|
||||
...base.image,
|
||||
...override.image,
|
||||
}
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
function matchesProviderScopedModelId(params: {
|
||||
candidateId?: string;
|
||||
provider: string;
|
||||
@@ -702,6 +726,10 @@ function applyConfiguredProviderOverrides(params: {
|
||||
...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
|
||||
headers: requestConfig.headers,
|
||||
compat: metadataOverrideModel?.compat ?? discoveredModel.compat,
|
||||
mediaInput: mergeModelMediaInput(
|
||||
discoveredModel.mediaInput,
|
||||
metadataOverrideModel?.mediaInput,
|
||||
),
|
||||
},
|
||||
providerRequest,
|
||||
),
|
||||
@@ -913,7 +941,10 @@ function resolveConfiguredFallbackModel(params: {
|
||||
}
|
||||
const fallbackTransport = resolveProviderTransport({
|
||||
provider,
|
||||
api: normalizeResolvedTransportApi(configuredModel?.api) ?? resolveConfiguredProviderDefaultApi(providerConfig) ?? "openai-responses",
|
||||
api:
|
||||
normalizeResolvedTransportApi(configuredModel?.api) ??
|
||||
resolveConfiguredProviderDefaultApi(providerConfig) ??
|
||||
"openai-responses",
|
||||
baseUrl: configuredModel?.baseUrl ?? providerConfig?.baseUrl,
|
||||
cfg,
|
||||
workspaceDir,
|
||||
@@ -968,6 +999,7 @@ function resolveConfiguredFallbackModel(params: {
|
||||
...(resolvedParams ? { params: resolvedParams } : {}),
|
||||
...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
|
||||
headers: requestConfig.headers,
|
||||
mediaInput: configuredModel?.mediaInput,
|
||||
} as Model<Api>,
|
||||
providerRequest,
|
||||
),
|
||||
@@ -1270,6 +1302,20 @@ export async function resolveModelAsync(
|
||||
});
|
||||
}
|
||||
}
|
||||
if (model && options?.allowBundledStaticCatalogFallback) {
|
||||
const staticCatalogModel = resolveBundledStaticCatalogModel({
|
||||
provider: normalizedRef.provider,
|
||||
modelId: normalizedRef.model,
|
||||
cfg,
|
||||
workspaceDir,
|
||||
});
|
||||
const staticMediaInput = (staticCatalogModel as ProviderRuntimeModel | undefined)?.mediaInput;
|
||||
const resolvedMediaInput = (model as ProviderRuntimeModel).mediaInput;
|
||||
const mediaInput = mergeModelMediaInput(staticMediaInput, resolvedMediaInput);
|
||||
if (mediaInput) {
|
||||
model = { ...(model as ProviderRuntimeModel), mediaInput } as typeof model;
|
||||
}
|
||||
}
|
||||
if (model) {
|
||||
return { model, authStorage, modelRegistry };
|
||||
}
|
||||
|
||||
253
src/agents/tools/image-tool.providers.live.test.ts
Normal file
253
src/agents/tools/image-tool.providers.live.test.ts
Normal file
@@ -0,0 +1,253 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import type { ModelApi } from "../../config/types.models.js";
|
||||
import type { OpenClawConfig } from "../../config/types.openclaw.js";
|
||||
import { resizeToJpeg } from "../../media/media-services.js";
|
||||
import { encodePngRgba, fillPixel } from "../../media/png-encode.js";
|
||||
import {
|
||||
describeImageWithModel,
|
||||
type ImageDescriptionRequest,
|
||||
type MediaUnderstandingProvider,
|
||||
} from "../../plugin-sdk/media-understanding.js";
|
||||
import { isOverloadedErrorMessage, isServerErrorMessage } from "../../plugin-sdk/test-env.js";
|
||||
import { isLiveTestEnabled } from "../live-test-helpers.js";
|
||||
import { createImageTool, testing } from "./image-tool.js";
|
||||
|
||||
const OPENAI_API_KEY = process.env.OPENAI_API_KEY?.trim() ?? "";
|
||||
const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY?.trim() ?? "";
|
||||
const LIVE_IMAGE_TOOL_ENABLED = isLiveTestEnabled(["OPENCLAW_LIVE_IMAGE_TOOL_TEST"]);
|
||||
const LIVE_OPENAI_MODEL =
|
||||
process.env.OPENCLAW_LIVE_IMAGE_TOOL_OPENAI_MODEL?.trim() ||
|
||||
process.env.OPENCLAW_LIVE_IMAGE_TOOL_MODEL?.trim() ||
|
||||
"gpt-4.1-mini";
|
||||
const LIVE_ANTHROPIC_MODEL =
|
||||
process.env.OPENCLAW_LIVE_IMAGE_TOOL_ANTHROPIC_MODEL?.trim() || "claude-sonnet-4-6";
|
||||
const MODEL_SIDE_LIMIT = 512;
|
||||
|
||||
type LiveProviderCase = {
|
||||
provider: "openai" | "anthropic";
|
||||
model: string;
|
||||
apiKey: string;
|
||||
api: ModelApi;
|
||||
baseUrl: string;
|
||||
contextWindow: number;
|
||||
maxTokens: number;
|
||||
reasoning: boolean;
|
||||
live: boolean;
|
||||
};
|
||||
|
||||
const OPENAI_LIVE_CASE: LiveProviderCase = {
|
||||
provider: "openai",
|
||||
model: LIVE_OPENAI_MODEL,
|
||||
apiKey: OPENAI_API_KEY,
|
||||
api: "openai-responses",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
contextWindow: 1_047_576,
|
||||
maxTokens: 32_768,
|
||||
reasoning: false,
|
||||
live: LIVE_IMAGE_TOOL_ENABLED && OPENAI_API_KEY.length > 0,
|
||||
};
|
||||
|
||||
const ANTHROPIC_LIVE_CASE: LiveProviderCase = {
|
||||
provider: "anthropic",
|
||||
model: LIVE_ANTHROPIC_MODEL,
|
||||
apiKey: ANTHROPIC_API_KEY,
|
||||
api: "anthropic-messages",
|
||||
baseUrl: "https://api.anthropic.com/v1",
|
||||
contextWindow: 200_000,
|
||||
maxTokens: 8192,
|
||||
reasoning: true,
|
||||
live: LIVE_IMAGE_TOOL_ENABLED && ANTHROPIC_API_KEY.length > 0,
|
||||
};
|
||||
|
||||
function createLargeCenterRedPng(size: number): Buffer {
|
||||
const buf = Buffer.alloc(size * size * 4, 255);
|
||||
const centerStart = Math.floor(size * 0.25);
|
||||
const centerEnd = Math.floor(size * 0.75);
|
||||
for (let y = 0; y < size; y += 1) {
|
||||
for (let x = 0; x < size; x += 1) {
|
||||
const inCenter = x >= centerStart && x < centerEnd && y >= centerStart && y < centerEnd;
|
||||
fillPixel(buf, x, y, size, inCenter ? 230 : 30, inCenter ? 40 : 110, inCenter ? 35 : 220);
|
||||
}
|
||||
}
|
||||
return encodePngRgba(buf, size, size);
|
||||
}
|
||||
|
||||
function readJpegDimensions(buffer: Buffer): { width: number; height: number } {
|
||||
let offset = 2;
|
||||
while (offset + 9 < buffer.length) {
|
||||
if (buffer[offset] !== 0xff) {
|
||||
offset += 1;
|
||||
continue;
|
||||
}
|
||||
const marker = buffer[offset + 1];
|
||||
offset += 2;
|
||||
if (marker === 0xd8 || marker === 0xd9 || (marker >= 0xd0 && marker <= 0xd7)) {
|
||||
continue;
|
||||
}
|
||||
const segmentLength = buffer.readUInt16BE(offset);
|
||||
if (marker >= 0xc0 && marker <= 0xcf && ![0xc4, 0xc8, 0xcc].includes(marker)) {
|
||||
return {
|
||||
height: buffer.readUInt16BE(offset + 3),
|
||||
width: buffer.readUInt16BE(offset + 5),
|
||||
};
|
||||
}
|
||||
offset += segmentLength;
|
||||
}
|
||||
throw new Error("JPEG dimensions not found");
|
||||
}
|
||||
|
||||
function formatLiveError(error: unknown): string {
|
||||
return error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
|
||||
function isSkippableLiveError(error: unknown): boolean {
|
||||
const message = formatLiveError(error);
|
||||
return (
|
||||
isOverloadedErrorMessage(message) ||
|
||||
isServerErrorMessage(message) ||
|
||||
/timed out|operation was aborted/i.test(message)
|
||||
);
|
||||
}
|
||||
|
||||
function createLiveConfig(testCase: LiveProviderCase): OpenClawConfig {
|
||||
return {
|
||||
agents: {
|
||||
defaults: {
|
||||
imageModel: { primary: `${testCase.provider}/${testCase.model}` },
|
||||
imageQuality: "high",
|
||||
},
|
||||
},
|
||||
models: {
|
||||
providers: {
|
||||
[testCase.provider]: {
|
||||
apiKey: testCase.apiKey,
|
||||
baseUrl: testCase.baseUrl,
|
||||
api: testCase.api,
|
||||
models: [
|
||||
{
|
||||
id: testCase.model,
|
||||
name: testCase.model,
|
||||
reasoning: testCase.reasoning,
|
||||
input: ["text", "image"],
|
||||
contextWindow: testCase.contextWindow,
|
||||
maxTokens: testCase.maxTokens,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
mediaInput: {
|
||||
image: { maxSidePx: MODEL_SIDE_LIMIT, preferredSidePx: MODEL_SIDE_LIMIT },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
tools: {
|
||||
media: {
|
||||
image: {
|
||||
timeoutSeconds: 90,
|
||||
models: [{ provider: testCase.provider, model: testCase.model, timeoutSeconds: 90 }],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function withLiveWorkspace<T>(
|
||||
run: (ctx: { agentDir: string; workspaceDir: string; imagePath: string }) => Promise<T>,
|
||||
) {
|
||||
const root = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-image-tool-live-"));
|
||||
try {
|
||||
const agentDir = path.join(root, "agent");
|
||||
const workspaceDir = path.join(root, "workspace");
|
||||
await fs.mkdir(agentDir, { recursive: true });
|
||||
await fs.mkdir(workspaceDir, { recursive: true });
|
||||
const sourcePng = createLargeCenterRedPng(2200);
|
||||
const sourceJpeg = await resizeToJpeg({
|
||||
buffer: sourcePng,
|
||||
maxSide: 2200,
|
||||
quality: 92,
|
||||
withoutEnlargement: true,
|
||||
});
|
||||
const sourceDimensions = readJpegDimensions(sourceJpeg);
|
||||
expect(Math.max(sourceDimensions.width, sourceDimensions.height)).toBeGreaterThan(
|
||||
MODEL_SIDE_LIMIT,
|
||||
);
|
||||
const imagePath = path.join(workspaceDir, "large-center-red.jpg");
|
||||
await fs.writeFile(imagePath, sourceJpeg);
|
||||
return await run({ agentDir, workspaceDir, imagePath });
|
||||
} finally {
|
||||
await fs.rm(root, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
testing.setProviderDepsForTest();
|
||||
});
|
||||
|
||||
async function runLiveDownscaleCase(testCase: LiveProviderCase) {
|
||||
let observedDimensions: { width: number; height: number } | undefined;
|
||||
testing.setProviderDepsForTest({
|
||||
getMediaUnderstandingProvider: (
|
||||
_id: string,
|
||||
_registry: Map<string, MediaUnderstandingProvider>,
|
||||
) => undefined,
|
||||
describeImageWithModel: async (params: ImageDescriptionRequest) => {
|
||||
expect(params.provider).toBe(testCase.provider);
|
||||
expect(params.model).toBe(testCase.model);
|
||||
expect(params.mime).toBe("image/jpeg");
|
||||
observedDimensions = readJpegDimensions(params.buffer);
|
||||
expect(Math.max(observedDimensions.width, observedDimensions.height)).toBeLessThanOrEqual(
|
||||
MODEL_SIDE_LIMIT,
|
||||
);
|
||||
return await describeImageWithModel(params);
|
||||
},
|
||||
});
|
||||
|
||||
await withLiveWorkspace(async ({ agentDir, workspaceDir, imagePath }) => {
|
||||
const tool = createImageTool({
|
||||
config: createLiveConfig(testCase),
|
||||
agentDir,
|
||||
workspaceDir,
|
||||
});
|
||||
if (!tool) {
|
||||
throw new Error("expected image tool");
|
||||
}
|
||||
|
||||
let result: unknown;
|
||||
try {
|
||||
result = await tool.execute(`live-${testCase.provider}-large-image`, {
|
||||
prompt:
|
||||
"Look at the center of the image. Reply with one lowercase word naming that center color.",
|
||||
image: imagePath,
|
||||
});
|
||||
} catch (err) {
|
||||
if (isSkippableLiveError(err)) {
|
||||
console.warn(`[live:image-tool:${testCase.provider}] skipped: ${formatLiveError(err)}`);
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
const content = (result as { content?: Array<{ type?: string; text?: string }> }).content;
|
||||
const text = content
|
||||
?.filter((block) => block.type === "text")
|
||||
.map((block) => block.text?.toLowerCase() ?? "")
|
||||
.join(" ");
|
||||
expect(text).toMatch(/red|crimson|orange/);
|
||||
expect(observedDimensions).toBeDefined();
|
||||
});
|
||||
}
|
||||
|
||||
describe.skipIf(!OPENAI_LIVE_CASE.live)("image tool OpenAI live", () => {
|
||||
it("downscales a large local image before sending it to the live vision model", async () => {
|
||||
await runLiveDownscaleCase(OPENAI_LIVE_CASE);
|
||||
}, 180_000);
|
||||
});
|
||||
|
||||
describe.skipIf(!ANTHROPIC_LIVE_CASE.live)("image tool Anthropic live", () => {
|
||||
it("downscales a large local image before sending it to the live vision model", async () => {
|
||||
await runLiveDownscaleCase(ANTHROPIC_LIVE_CASE);
|
||||
}, 180_000);
|
||||
});
|
||||
@@ -5,6 +5,7 @@ import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import type { ModelDefinitionConfig } from "../../config/types.models.js";
|
||||
import { encodePngRgba, fillPixel } from "../../media/png-encode.js";
|
||||
import type {
|
||||
ImageDescriptionRequest,
|
||||
ImagesDescriptionRequest,
|
||||
@@ -223,7 +224,53 @@ async function withTempAgentDir<T>(run: (agentDir: string) => Promise<T>): Promi
|
||||
const ONE_PIXEL_PNG_B64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAIAAAAlC+aJAAAAIGNIUk0AAHomAACAhAAA+gAAAIDoAAB1MAAA6mAAADqYAAAXcJy6UTwAAAAGYktHRAD/AP8A/6C9p5MAAAAHdElNRQfqBBsGAQr00ED3AAAAJXRFWHRkYXRlOmNyZWF0ZQAyMDI2LTA0LTI3VDA2OjAxOjEwKzAwOjAwPU3tXwAAACV0RVh0ZGF0ZTptb2RpZnkAMjAyNi0wNC0yN1QwNjowMToxMCswMDowMEwQVeMAAAAodEVYdGRhdGU6dGltZXN0YW1wADIwMjYtMDQtMjdUMDY6MDE6MTArMDA6MDAbBXQ8AAAAeElEQVRo3u3awQnDQBAEwT2Q8w/YAikIP5rF1RFMca+FO8/s7rrnqjcA1BsA6g0A9QaAesOfA77zqTf8Blj/AgAAAAAAAJsDqAOoA6gDqAOoc9TXAdQB1AHUAdQB1AHUAdQB1AHU7Qc46gEAAAAANrcecGZ2f8B/ASYSQPlKoEJ/AAAAAElFTkSuQmCC";
|
||||
const ONE_PIXEL_GIF_B64 = "R0lGODlhAQABAIABAP///wAAACwAAAAAAQABAAACAkQBADs=";
|
||||
const ONE_PIXEL_JPEG_B64 = "QUJDRA==";
|
||||
|
||||
function createLargeColorBlockPng(size: number): Buffer {
|
||||
const buf = Buffer.alloc(size * size * 4, 255);
|
||||
const centerStart = Math.floor(size * 0.25);
|
||||
const centerEnd = Math.floor(size * 0.75);
|
||||
for (let y = 0; y < size; y += 1) {
|
||||
for (let x = 0; x < size; x += 1) {
|
||||
const inCenter = x >= centerStart && x < centerEnd && y >= centerStart && y < centerEnd;
|
||||
fillPixel(buf, x, y, size, inCenter ? 230 : 30, inCenter ? 40 : 110, inCenter ? 35 : 220);
|
||||
}
|
||||
}
|
||||
return encodePngRgba(buf, size, size);
|
||||
}
|
||||
|
||||
function readJpegDimensions(buffer: Buffer): { width: number; height: number } {
|
||||
let offset = 2;
|
||||
while (offset + 9 < buffer.length) {
|
||||
if (buffer[offset] !== 0xff) {
|
||||
offset += 1;
|
||||
continue;
|
||||
}
|
||||
const marker = buffer[offset + 1];
|
||||
offset += 2;
|
||||
if (marker === 0xd8 || marker === 0xd9 || (marker >= 0xd0 && marker <= 0xd7)) {
|
||||
continue;
|
||||
}
|
||||
const segmentLength = buffer.readUInt16BE(offset);
|
||||
if (marker >= 0xc0 && marker <= 0xcf && ![0xc4, 0xc8, 0xcc].includes(marker)) {
|
||||
return {
|
||||
height: buffer.readUInt16BE(offset + 3),
|
||||
width: buffer.readUInt16BE(offset + 5),
|
||||
};
|
||||
}
|
||||
offset += segmentLength;
|
||||
}
|
||||
throw new Error("JPEG dimensions not found");
|
||||
}
|
||||
|
||||
function readPngDimensions(buffer: Buffer): { width: number; height: number } {
|
||||
if (buffer.length < 24 || buffer.toString("ascii", 12, 16) !== "IHDR") {
|
||||
throw new Error("PNG dimensions not found");
|
||||
}
|
||||
return {
|
||||
width: buffer.readUInt32BE(16),
|
||||
height: buffer.readUInt32BE(20),
|
||||
};
|
||||
}
|
||||
|
||||
async function withTempWorkspacePng(
|
||||
cb: (args: { workspaceDir: string; imagePath: string }) => Promise<void>,
|
||||
@@ -1370,7 +1417,7 @@ describe("image tool implicit imageModel config", () => {
|
||||
).toBe(true);
|
||||
expect(userContent.some((block) => block.type === "image_url")).toBe(true);
|
||||
expect(userContent.find((block) => block.type === "image_url")?.image_url?.url).toContain(
|
||||
"data:image/png;base64,",
|
||||
"data:image/",
|
||||
);
|
||||
expect(bodyRaw).not.toContain('"role":"developer"');
|
||||
expectToolText(result, "ok moonshot");
|
||||
@@ -1783,6 +1830,136 @@ describe("image tool data URL support", () => {
|
||||
bufferFromSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
|
||||
it("applies model image maxBytes to data URLs", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
installImageUnderstandingProviderStubs();
|
||||
const model = {
|
||||
...makeModelDefinition("tiny-vision", ["text", "image"]),
|
||||
mediaInput: { image: { maxBytes: 1 } },
|
||||
} satisfies ModelDefinitionConfig;
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
imageModel: { primary: "openai/tiny-vision" },
|
||||
},
|
||||
},
|
||||
models: {
|
||||
providers: {
|
||||
openai: {
|
||||
api: "openai-responses",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
models: [model],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
const tool = createRequiredImageTool({ config: cfg, agentDir });
|
||||
|
||||
await expect(
|
||||
tool.execute("t1", {
|
||||
prompt: "Describe this image.",
|
||||
image: `data:image/png;base64,${ONE_PIXEL_PNG_B64}`,
|
||||
}),
|
||||
).rejects.toThrow(/could not be reduced below/i);
|
||||
});
|
||||
});
|
||||
|
||||
it("downscales data URL images to the resolved model side limit", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
let observedDimensions: { width: number; height: number } | undefined;
|
||||
installImageUnderstandingProviderStubs({
|
||||
id: "openai",
|
||||
capabilities: ["image"],
|
||||
describeImage: async (params) => {
|
||||
observedDimensions =
|
||||
params.mime === "image/png"
|
||||
? readPngDimensions(params.buffer)
|
||||
: readJpegDimensions(params.buffer);
|
||||
return { text: "ok", model: params.model };
|
||||
},
|
||||
});
|
||||
const model = {
|
||||
...makeModelDefinition("tiny-vision", ["text", "image"]),
|
||||
mediaInput: { image: { maxSidePx: 512, preferredSidePx: 512 } },
|
||||
} satisfies ModelDefinitionConfig;
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
imageModel: { primary: "openai/tiny-vision" },
|
||||
imageQuality: "high",
|
||||
},
|
||||
},
|
||||
models: {
|
||||
providers: {
|
||||
openai: {
|
||||
api: "openai-responses",
|
||||
apiKey: "test-key",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
models: [model],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
const tool = createRequiredImageTool({ config: cfg, agentDir });
|
||||
const source = createLargeColorBlockPng(1600);
|
||||
await expectImageToolExecOk(tool, `data:image/png;base64,${source.toString("base64")}`);
|
||||
|
||||
expect(observedDimensions).toBeDefined();
|
||||
if (!observedDimensions) {
|
||||
throw new Error("expected observed data URL dimensions");
|
||||
}
|
||||
expect(Math.max(observedDimensions.width, observedDimensions.height)).toBeLessThanOrEqual(
|
||||
512,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it("applies configured image quality to data URLs without model media metadata", async () => {
|
||||
await withTempAgentDir(async (agentDir) => {
|
||||
let observedDimensions: { width: number; height: number } | undefined;
|
||||
installImageUnderstandingProviderStubs({
|
||||
id: "openai",
|
||||
capabilities: ["image"],
|
||||
describeImage: async (params) => {
|
||||
observedDimensions =
|
||||
params.mime === "image/png"
|
||||
? readPngDimensions(params.buffer)
|
||||
: readJpegDimensions(params.buffer);
|
||||
return { text: "ok", model: params.model };
|
||||
},
|
||||
});
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
imageModel: { primary: "openai/plain-vision" },
|
||||
imageQuality: "efficient",
|
||||
},
|
||||
},
|
||||
models: {
|
||||
providers: {
|
||||
openai: {
|
||||
api: "openai-responses",
|
||||
apiKey: "test-key",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
models: [makeModelDefinition("plain-vision", ["text", "image"])],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
const tool = createRequiredImageTool({ config: cfg, agentDir });
|
||||
const source = createLargeColorBlockPng(1600);
|
||||
await expectImageToolExecOk(tool, `data:image/png;base64,${source.toString("base64")}`);
|
||||
|
||||
expect(observedDimensions).toBeDefined();
|
||||
if (!observedDimensions) {
|
||||
throw new Error("expected observed data URL dimensions");
|
||||
}
|
||||
expect(Math.max(observedDimensions.width, observedDimensions.height)).toBeLessThanOrEqual(
|
||||
1280,
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("image tool MiniMax VLM routing", () => {
|
||||
@@ -1832,7 +2009,7 @@ describe("image tool MiniMax VLM routing", () => {
|
||||
expect(init?.method).toBe("POST");
|
||||
expect((init?.headers as Record<string, string>)?.Authorization).toBe("Bearer minimax-test");
|
||||
expect(String(init?.body)).toContain('"prompt":"Describe the image."');
|
||||
expect(String(init?.body)).toContain('"image_url":"data:image/png;base64,');
|
||||
expect(String(init?.body)).toContain('"image_url":"data:image/');
|
||||
|
||||
const text = res.content?.find((b) => b.type === "text")?.text ?? "";
|
||||
expect(text).toBe("ok");
|
||||
@@ -1840,10 +2017,11 @@ describe("image tool MiniMax VLM routing", () => {
|
||||
|
||||
it("accepts images[] for multi-image requests", async () => {
|
||||
const { fetch, tool } = await createMinimaxVlmFixture({ status_code: 0, status_msg: "" });
|
||||
const secondPngB64 = createLargeColorBlockPng(2).toString("base64");
|
||||
|
||||
const res = await tool.execute("t1", {
|
||||
prompt: "Compare these images.",
|
||||
images: [`data:image/png;base64,${pngB64}`, `data:image/jpeg;base64,${ONE_PIXEL_JPEG_B64}`],
|
||||
images: [`data:image/png;base64,${pngB64}`, `data:image/png;base64,${secondPngB64}`],
|
||||
});
|
||||
|
||||
expect(fetch).toHaveBeenCalledTimes(2);
|
||||
@@ -1857,14 +2035,15 @@ describe("image tool MiniMax VLM routing", () => {
|
||||
|
||||
it("combines image + images with dedupe and enforces maxImages", async () => {
|
||||
const { fetch, tool } = await createMinimaxVlmFixture({ status_code: 0, status_msg: "" });
|
||||
const secondPngB64 = createLargeColorBlockPng(2).toString("base64");
|
||||
|
||||
const deduped = await tool.execute("t1", {
|
||||
prompt: "Compare these images.",
|
||||
image: `data:image/png;base64,${pngB64}`,
|
||||
images: [
|
||||
`data:image/png;base64,${pngB64}`,
|
||||
`data:image/jpeg;base64,${ONE_PIXEL_JPEG_B64}`,
|
||||
`data:image/jpeg;base64,${ONE_PIXEL_JPEG_B64}`,
|
||||
`data:image/png;base64,${secondPngB64}`,
|
||||
`data:image/png;base64,${secondPngB64}`,
|
||||
],
|
||||
});
|
||||
|
||||
@@ -2166,3 +2345,135 @@ describe("image tool response validation", () => {
|
||||
expect(testing.hasImageReasoningOnlyResponse(message as never)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("image compression policy", () => {
|
||||
const cfgWithImageModelMetadata = {
|
||||
agents: {
|
||||
defaults: {
|
||||
imageQuality: "high",
|
||||
},
|
||||
},
|
||||
models: {
|
||||
providers: {
|
||||
anthropic: {
|
||||
baseUrl: "https://api.anthropic.com",
|
||||
api: "anthropic-messages",
|
||||
models: [
|
||||
{
|
||||
id: "claude-opus-4-7",
|
||||
name: "Claude Opus 4.7",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 1_000_000,
|
||||
maxTokens: 64_000,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
mediaInput: {
|
||||
image: { maxSidePx: 2576, preferredSidePx: 2576, tokenMode: "provider" },
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "claude-opus-4-6",
|
||||
name: "Claude Opus 4.6",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 1_000_000,
|
||||
maxTokens: 64_000,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
mediaInput: {
|
||||
image: { maxSidePx: 1568, preferredSidePx: 1568, tokenMode: "provider" },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
openai: {
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
api: "openai-responses",
|
||||
models: [
|
||||
{
|
||||
id: "gpt-5.5",
|
||||
name: "GPT-5.5",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 272_000,
|
||||
maxTokens: 128_000,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
mediaInput: {
|
||||
image: { maxSidePx: 6000, preferredSidePx: 2048, tokenMode: "detail" },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
} satisfies OpenClawConfig;
|
||||
|
||||
it("derives model metadata, quality preference, and image count from config", async () => {
|
||||
const cfg = {
|
||||
...cfgWithImageModelMetadata,
|
||||
} satisfies OpenClawConfig;
|
||||
|
||||
await expect(
|
||||
testing.resolveImageCompressionPolicy({
|
||||
cfg,
|
||||
imageModelConfig: { primary: "anthropic/claude-opus-4-7" },
|
||||
imageCount: 2,
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
quality: "high",
|
||||
imageCount: 2,
|
||||
models: [{ maxSidePx: 2576, preferredSidePx: 2576, tokenMode: "provider" }],
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps unset image quality as adaptive auto behavior and includes fallback models", async () => {
|
||||
const { agents: _agents, ...cfg } = cfgWithImageModelMetadata;
|
||||
await expect(
|
||||
testing.resolveImageCompressionPolicy({
|
||||
cfg,
|
||||
imageModelConfig: {
|
||||
primary: "openai/gpt-5.5",
|
||||
fallbacks: ["anthropic/claude-opus-4-6", "unknown/custom-image"],
|
||||
},
|
||||
imageCount: 1,
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
imageCount: 1,
|
||||
models: [
|
||||
{ maxSidePx: 6000, preferredSidePx: 2048, tokenMode: "detail" },
|
||||
{ maxSidePx: 1568, preferredSidePx: 1568, tokenMode: "provider" },
|
||||
{},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it("uses a model override as the compression candidate", async () => {
|
||||
await expect(
|
||||
testing.resolveImageCompressionPolicy({
|
||||
cfg: cfgWithImageModelMetadata,
|
||||
imageModelConfig: {
|
||||
primary: "openai/gpt-5.5",
|
||||
fallbacks: ["anthropic/claude-opus-4-6"],
|
||||
},
|
||||
modelOverride: "anthropic/claude-opus-4-6",
|
||||
imageCount: 1,
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
models: [{ maxSidePx: 1568, preferredSidePx: 1568, tokenMode: "provider" }],
|
||||
});
|
||||
});
|
||||
|
||||
it("resolves providerless overrides before reading compression metadata", async () => {
|
||||
await expect(
|
||||
testing.resolveImageCompressionPolicy({
|
||||
cfg: cfgWithImageModelMetadata,
|
||||
imageModelConfig: {
|
||||
primary: "anthropic/claude-opus-4-6",
|
||||
},
|
||||
modelOverride: "gpt-5.5",
|
||||
imageCount: 1,
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
models: [{ maxSidePx: 6000, preferredSidePx: 2048, tokenMode: "detail" }],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -16,15 +16,26 @@ import {
|
||||
classifyMediaReferenceSource,
|
||||
normalizeMediaReferenceSource,
|
||||
} from "../../media/media-reference.js";
|
||||
import { loadWebMedia } from "../../media/web-media.js";
|
||||
import {
|
||||
loadWebMedia,
|
||||
optimizeImageBufferForWebMedia,
|
||||
type ImageCompressionModelPolicy,
|
||||
type ImageCompressionPolicy,
|
||||
} from "../../media/web-media.js";
|
||||
import {
|
||||
describeImageWithModel,
|
||||
describeImagesWithModel,
|
||||
type MediaUnderstandingProvider,
|
||||
} from "../../plugin-sdk/media-understanding.js";
|
||||
import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js";
|
||||
import { resolveUserPath } from "../../utils.js";
|
||||
import type { AuthProfileStore } from "../auth-profiles/types.js";
|
||||
import { isMinimaxVlmProvider } from "../minimax-vlm.js";
|
||||
import {
|
||||
resolveImageFallbackCandidates,
|
||||
resolveImageFallbackDefaultProvider,
|
||||
} from "../model-fallback.js";
|
||||
import { resolveModelAsync } from "../pi-embedded-runner/model.js";
|
||||
import {
|
||||
coerceImageAssistantText,
|
||||
coerceImageModelConfig,
|
||||
@@ -117,6 +128,7 @@ export const testing = {
|
||||
coerceImageAssistantText,
|
||||
hasImageReasoningOnlyResponse,
|
||||
resolveImageToolMaxTokens,
|
||||
resolveImageCompressionPolicy,
|
||||
setProviderDepsForTest(overrides?: {
|
||||
buildProviderRegistry?: typeof buildProviderRegistry;
|
||||
getMediaUnderstandingProvider?: typeof getMediaUnderstandingProvider;
|
||||
@@ -272,6 +284,68 @@ function pickMaxBytes(cfg?: OpenClawConfig, maxBytesMb?: number): number | undef
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveCompressionModelCandidates(params: {
|
||||
cfg?: OpenClawConfig;
|
||||
imageModelConfig?: ImageModelConfig | null;
|
||||
modelOverride?: string;
|
||||
}): Array<{ provider: string; model: string }> {
|
||||
const overrideConfig = resolveImageModelConfigForOverride({
|
||||
cfg: params.cfg,
|
||||
modelOverride: params.modelOverride,
|
||||
});
|
||||
const configuredImageModelConfig = params.imageModelConfig
|
||||
? resolveConfiguredImageModelRefs({
|
||||
cfg: params.cfg,
|
||||
imageModelConfig: params.imageModelConfig,
|
||||
})
|
||||
: null;
|
||||
const effectiveImageModelConfig = overrideConfig ?? configuredImageModelConfig;
|
||||
const effectiveCfg = effectiveImageModelConfig
|
||||
? applyImageModelConfigDefaults(params.cfg, effectiveImageModelConfig)
|
||||
: params.cfg;
|
||||
return resolveImageFallbackCandidates({
|
||||
cfg: effectiveCfg,
|
||||
defaultProvider: resolveImageFallbackDefaultProvider(effectiveCfg),
|
||||
});
|
||||
}
|
||||
|
||||
async function resolveImageCompressionPolicy(params: {
|
||||
cfg?: OpenClawConfig;
|
||||
imageModelConfig?: ImageModelConfig | null;
|
||||
modelOverride?: string;
|
||||
imageCount: number;
|
||||
agentDir?: string;
|
||||
workspaceDir?: string;
|
||||
}): Promise<ImageCompressionPolicy> {
|
||||
const modelCandidates = resolveCompressionModelCandidates(params);
|
||||
const quality = params.cfg?.agents?.defaults?.imageQuality;
|
||||
const models: ImageCompressionModelPolicy[] = await Promise.all(
|
||||
modelCandidates.map(async (candidate): Promise<ImageCompressionModelPolicy> => {
|
||||
try {
|
||||
const resolved = await resolveModelAsync(
|
||||
candidate.provider,
|
||||
candidate.model,
|
||||
params.agentDir,
|
||||
params.cfg,
|
||||
{
|
||||
allowBundledStaticCatalogFallback: true,
|
||||
skipPiDiscovery: true,
|
||||
workspaceDir: params.workspaceDir,
|
||||
},
|
||||
);
|
||||
return (resolved.model as ProviderRuntimeModel | undefined)?.mediaInput?.image ?? {};
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
}),
|
||||
);
|
||||
return {
|
||||
imageCount: params.imageCount,
|
||||
...(models.length > 0 ? { models } : {}),
|
||||
...(quality ? { quality } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function matchesImageTimeoutEntry(params: {
|
||||
entry: MediaUnderstandingModelConfig;
|
||||
source: "capability" | "shared";
|
||||
@@ -574,6 +648,31 @@ export function createImageTool(options?: {
|
||||
);
|
||||
const maxBytesMb = typeof record.maxBytesMb === "number" ? record.maxBytesMb : undefined;
|
||||
const maxBytes = pickMaxBytes(options?.config, maxBytesMb);
|
||||
const imageModelConfig =
|
||||
resolvedImageModelConfig ??
|
||||
resolveImageModelConfigForOverride({
|
||||
cfg: options?.config,
|
||||
modelOverride,
|
||||
}) ??
|
||||
resolveImageModelConfigForTool({
|
||||
cfg: options?.config,
|
||||
agentDir,
|
||||
workspaceDir: options?.workspaceDir,
|
||||
authStore: options?.authProfileStore,
|
||||
});
|
||||
if (!imageModelConfig) {
|
||||
throw new Error(
|
||||
"No image model is configured. Set agents.defaults.imageModel or configure an image-capable provider.",
|
||||
);
|
||||
}
|
||||
const imageCompression = await resolveImageCompressionPolicy({
|
||||
cfg: options?.config,
|
||||
imageModelConfig,
|
||||
modelOverride,
|
||||
imageCount: imageInputs.length,
|
||||
agentDir,
|
||||
workspaceDir: options?.workspaceDir,
|
||||
});
|
||||
|
||||
const sandboxConfig: SandboxedBridgeMediaPathConfig | null =
|
||||
options?.sandbox && options?.sandbox.root.trim()
|
||||
@@ -672,26 +771,39 @@ export function createImageTool(options?: {
|
||||
);
|
||||
|
||||
const media = isDataUrl
|
||||
? decodeDataUrl(resolvedImage, { maxBytes })
|
||||
? await (async () => {
|
||||
const decoded = decodeDataUrl(resolvedImage, { maxBytes });
|
||||
return await optimizeImageBufferForWebMedia({
|
||||
buffer: decoded.buffer,
|
||||
contentType: decoded.mimeType,
|
||||
maxBytes,
|
||||
imageCompression,
|
||||
});
|
||||
})()
|
||||
: sandboxConfig
|
||||
? await loadWebMedia(resolvedPath ?? resolvedImage, {
|
||||
maxBytes,
|
||||
sandboxValidated: true,
|
||||
readFile: createSandboxBridgeReadFile({ sandbox: sandboxConfig }),
|
||||
imageCompression,
|
||||
})
|
||||
: await loadWebMedia(resolvedPath ?? resolvedImage, {
|
||||
maxBytes,
|
||||
localRoots: mediaLocalRoots,
|
||||
ssrfPolicy: remoteMediaSsrfPolicy,
|
||||
imageCompression,
|
||||
});
|
||||
if (media.kind !== "image") {
|
||||
throw new Error(`Unsupported media type: ${media.kind}`);
|
||||
}
|
||||
|
||||
const mimeType =
|
||||
("contentType" in media && media.contentType) ||
|
||||
("mimeType" in media && media.mimeType) ||
|
||||
"image/png";
|
||||
const contentType =
|
||||
"contentType" in media && typeof media.contentType === "string"
|
||||
? media.contentType
|
||||
: undefined;
|
||||
const legacyMimeType =
|
||||
"mimeType" in media && typeof media.mimeType === "string" ? media.mimeType : undefined;
|
||||
const mimeType = contentType ?? legacyMimeType ?? "image/png";
|
||||
loadedImages.push({
|
||||
buffer: media.buffer,
|
||||
mimeType,
|
||||
@@ -703,23 +815,6 @@ export function createImageTool(options?: {
|
||||
}
|
||||
|
||||
// MARK: - Run image prompt with all loaded images
|
||||
const imageModelConfig =
|
||||
resolvedImageModelConfig ??
|
||||
resolveImageModelConfigForOverride({
|
||||
cfg: options?.config,
|
||||
modelOverride,
|
||||
}) ??
|
||||
resolveImageModelConfigForTool({
|
||||
cfg: options?.config,
|
||||
agentDir,
|
||||
workspaceDir: options?.workspaceDir,
|
||||
authStore: options?.authProfileStore,
|
||||
});
|
||||
if (!imageModelConfig) {
|
||||
throw new Error(
|
||||
"No image model is configured. Set agents.defaults.imageModel or configure an image-capable provider.",
|
||||
);
|
||||
}
|
||||
const result = await runImagePrompt({
|
||||
cfg: options?.config,
|
||||
agentDir,
|
||||
|
||||
@@ -1052,6 +1052,20 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"Optional low-level agent runtime policy for this specific model. Model runtime policy overrides the provider runtime policy.",
|
||||
"models.providers.*.models[].agentRuntime.id":
|
||||
'Model agent runtime id: "pi", "auto", a registered plugin harness id such as "codex", or a supported CLI backend alias such as "claude-cli".',
|
||||
"models.providers.*.models[].mediaInput":
|
||||
"Optional model media capability metadata used by tools to choose conservative image compression defaults.",
|
||||
"models.providers.*.models[].mediaInput.image":
|
||||
"Optional image input limits for this model, such as maximum side length, maximum pixels, and preferred compression side.",
|
||||
"models.providers.*.models[].mediaInput.image.maxBytes":
|
||||
"Maximum encoded image payload size accepted by the provider for this model.",
|
||||
"models.providers.*.models[].mediaInput.image.maxPixels":
|
||||
"Maximum image pixel count accepted by the provider for this model.",
|
||||
"models.providers.*.models[].mediaInput.image.maxSidePx":
|
||||
"Maximum image width or height accepted by the provider for this model.",
|
||||
"models.providers.*.models[].mediaInput.image.preferredSidePx":
|
||||
"Preferred image resize side for balanced compression. Leave unset to use OpenClaw's conservative default.",
|
||||
"models.providers.*.models[].mediaInput.image.tokenMode":
|
||||
'Provider image token accounting style: "tile", "detail", or "provider".',
|
||||
auth: "Authentication profile root used for multi-profile provider credentials and cooldown-based failover ordering. Keep profiles minimal and explicit so automatic failover behavior stays auditable.",
|
||||
"channels.googlechat.botLoopProtection":
|
||||
"Sliding-window guard for accepted Google Chat bot-to-bot loops. Defaults to the shared bot loop protection budget when allowBots lets bot-authored messages reach dispatch.",
|
||||
@@ -1431,6 +1445,8 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"Maximum number of PDF pages to process for the PDF tool (default: 20).",
|
||||
"agents.defaults.imageMaxDimensionPx":
|
||||
"Max image side length in pixels when sanitizing transcript/tool-result image payloads (default: 1200).",
|
||||
"agents.defaults.imageQuality":
|
||||
'Image-tool media compression preference: "auto" adapts to provider/model limits and image count, "efficient" saves tokens and bytes, "balanced" keeps the current middle ground, and "high" preserves more detail for screenshots and document images.',
|
||||
"agents.defaults.cliBackends": "Optional CLI backends for text-only fallback (claude-cli, etc.).",
|
||||
"agents.defaults.compaction":
|
||||
"Compaction tuning for when context nears token limits, including history share, reserve headroom, and pre-compaction memory flush behavior. Use this when long-running sessions need stable continuity under tight context windows.",
|
||||
|
||||
@@ -626,6 +626,13 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"models.providers.*.models": "Model Provider Model List",
|
||||
"models.providers.*.models[].agentRuntime": "Model Runtime",
|
||||
"models.providers.*.models[].agentRuntime.id": "Model Runtime ID",
|
||||
"models.providers.*.models[].mediaInput": "Model Media Input",
|
||||
"models.providers.*.models[].mediaInput.image": "Model Image Input",
|
||||
"models.providers.*.models[].mediaInput.image.maxBytes": "Model Image Max Bytes",
|
||||
"models.providers.*.models[].mediaInput.image.maxPixels": "Model Image Max Pixels",
|
||||
"models.providers.*.models[].mediaInput.image.maxSidePx": "Model Image Max Side",
|
||||
"models.providers.*.models[].mediaInput.image.preferredSidePx": "Model Image Preferred Side",
|
||||
"models.providers.*.models[].mediaInput.image.tokenMode": "Model Image Token Mode",
|
||||
"auth.cooldowns.billingBackoffHours": "Billing Backoff (hours)",
|
||||
"auth.cooldowns.billingBackoffHoursByProvider": "Billing Backoff Overrides",
|
||||
"auth.cooldowns.billingMaxHours": "Billing Backoff Cap (hours)",
|
||||
@@ -656,6 +663,7 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"agents.defaults.pdfMaxBytesMb": "PDF Max Size (MB)",
|
||||
"agents.defaults.pdfMaxPages": "PDF Max Pages",
|
||||
"agents.defaults.imageMaxDimensionPx": "Image Max Dimension (px)",
|
||||
"agents.defaults.imageQuality": "Image Quality",
|
||||
"agents.defaults.humanDelay.mode": "Human Delay Mode",
|
||||
"agents.defaults.humanDelay.minMs": "Human Delay Min (ms)",
|
||||
"agents.defaults.humanDelay.maxMs": "Human Delay Max (ms)",
|
||||
|
||||
@@ -18,6 +18,7 @@ export type AgentContextInjection = "always" | "continuation-skip" | "never";
|
||||
export type OptionalBootstrapFileName = "SOUL.md" | "USER.md" | "HEARTBEAT.md" | "IDENTITY.md";
|
||||
export type EmbeddedPiExecutionContract = "default" | "strict-agentic";
|
||||
export type SubagentDelegationMode = "suggest" | "prefer";
|
||||
export type AgentImageQualityPreference = "auto" | "efficient" | "balanced" | "high";
|
||||
|
||||
export type Gpt5PromptOverlayConfig = {
|
||||
/** Friendly interaction-style layer for GPT-5-family models (default: friendly). */
|
||||
@@ -365,6 +366,11 @@ export type AgentDefaultsConfig = {
|
||||
* Default: 1200.
|
||||
*/
|
||||
imageMaxDimensionPx?: number;
|
||||
/**
|
||||
* Image compression/detail preference for image-tool media loading.
|
||||
* Default: auto, which adapts to provider/model limits and image count.
|
||||
*/
|
||||
imageQuality?: AgentImageQualityPreference;
|
||||
typingIntervalSeconds?: number;
|
||||
/** Typing indicator start mode (never|instant|thinking|message). */
|
||||
typingMode?: TypingMode;
|
||||
|
||||
@@ -89,6 +89,23 @@ export type ModelCompatConfig = SupportedOpenAICompatFields &
|
||||
requiresOpenAiAnthropicToolPayload?: boolean;
|
||||
};
|
||||
|
||||
export type ModelImageInputConfig = {
|
||||
/** Provider-documented maximum encoded image payload size. */
|
||||
maxBytes?: number;
|
||||
/** Provider-documented maximum accepted input pixels. */
|
||||
maxPixels?: number;
|
||||
/** Provider-documented maximum accepted width/height in pixels. */
|
||||
maxSidePx?: number;
|
||||
/** Preferred resize side for the default balanced compression policy. */
|
||||
preferredSidePx?: number;
|
||||
/** Token accounting style, used as documentation for provider-owned policy. */
|
||||
tokenMode?: "tile" | "detail" | "provider";
|
||||
};
|
||||
|
||||
export type ModelMediaInputConfig = {
|
||||
image?: ModelImageInputConfig;
|
||||
};
|
||||
|
||||
export type ModelProviderAuthMode = "api-key" | "aws-sdk" | "oauth" | "token";
|
||||
|
||||
export type ModelProviderLocalServiceConfig = {
|
||||
@@ -140,6 +157,7 @@ export type ModelDefinitionConfig = {
|
||||
agentRuntime?: AgentRuntimePolicyConfig;
|
||||
headers?: Record<string, string>;
|
||||
compat?: ModelCompatConfig;
|
||||
mediaInput?: ModelMediaInputConfig;
|
||||
metadataSource?: "models-add";
|
||||
};
|
||||
|
||||
|
||||
@@ -246,6 +246,7 @@ export const AgentDefaultsSchema = z
|
||||
timeoutSeconds: z.number().int().positive().optional(),
|
||||
mediaMaxMb: z.number().positive().optional(),
|
||||
imageMaxDimensionPx: z.number().int().positive().optional(),
|
||||
imageQuality: z.enum(["auto", "efficient", "balanced", "high"]).optional(),
|
||||
typingIntervalSeconds: z.number().int().positive().optional(),
|
||||
typingMode: TypingModeSchema.optional(),
|
||||
heartbeat: HeartbeatSchema,
|
||||
|
||||
@@ -307,6 +307,22 @@ const ModelAgentRuntimePolicySchema = z
|
||||
.strict()
|
||||
.optional();
|
||||
|
||||
const ModelImageInputSchema = z
|
||||
.object({
|
||||
maxBytes: z.number().int().positive().optional(),
|
||||
maxPixels: z.number().int().positive().optional(),
|
||||
maxSidePx: z.number().int().positive().optional(),
|
||||
preferredSidePx: z.number().int().positive().optional(),
|
||||
tokenMode: z.union([z.literal("tile"), z.literal("detail"), z.literal("provider")]).optional(),
|
||||
})
|
||||
.strict();
|
||||
|
||||
const ModelMediaInputSchema = z
|
||||
.object({
|
||||
image: ModelImageInputSchema.optional(),
|
||||
})
|
||||
.strict();
|
||||
|
||||
const ModelDefinitionSchema = z
|
||||
.object({
|
||||
id: z.string().min(1),
|
||||
@@ -348,6 +364,7 @@ const ModelDefinitionSchema = z
|
||||
agentRuntime: ModelAgentRuntimePolicySchema,
|
||||
headers: z.record(z.string(), z.string()).optional(),
|
||||
compat: ModelCompatSchema,
|
||||
mediaInput: ModelMediaInputSchema.optional(),
|
||||
metadataSource: z.literal("models-add").optional(),
|
||||
})
|
||||
.strict();
|
||||
|
||||
@@ -13,7 +13,8 @@ import {
|
||||
} from "./isolated-agent.test-harness.js";
|
||||
import { setupIsolatedAgentTurnMocks } from "./isolated-agent.test-setup.js";
|
||||
|
||||
vi.mock("../plugins/provider-runtime.js", () => ({
|
||||
vi.mock("../plugins/provider-runtime.js", async (importOriginal) => ({
|
||||
...(await importOriginal<typeof import("../plugins/provider-runtime.js")>()),
|
||||
resolveExternalAuthProfilesWithPlugins: () => [],
|
||||
}));
|
||||
|
||||
|
||||
@@ -536,7 +536,7 @@ function readJpegMetadata(buffer: Buffer): ImageMetadata | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
function readImageMetadataFromHeader(buffer: Buffer): ImageMetadata | null {
|
||||
export function readImageMetadataFromHeader(buffer: Buffer): ImageMetadata | null {
|
||||
return (
|
||||
readPngMetadata(buffer) ??
|
||||
readGifMetadata(buffer) ??
|
||||
@@ -1235,6 +1235,7 @@ export async function resizeToPng(params: ResizeToPngParams): Promise<Buffer> {
|
||||
export async function optimizeImageToPng(
|
||||
buffer: Buffer,
|
||||
maxBytes: number,
|
||||
options?: { sides?: readonly number[] },
|
||||
): Promise<{
|
||||
buffer: Buffer;
|
||||
optimizedSize: number;
|
||||
@@ -1243,7 +1244,7 @@ export async function optimizeImageToPng(
|
||||
}> {
|
||||
// Try a grid of sizes/compression levels until under the limit.
|
||||
// PNG uses compression levels 0-9 (higher = smaller but slower).
|
||||
const sides = [2048, 1536, 1280, 1024, 800];
|
||||
const sides = options?.sides?.length ? [...options.sides] : [2048, 1536, 1280, 1024, 800];
|
||||
const compressionLevels = [6, 7, 8, 9];
|
||||
let smallest: {
|
||||
buffer: Buffer;
|
||||
|
||||
@@ -7,11 +7,15 @@ import { resolveStateDir } from "../config/paths.js";
|
||||
import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
|
||||
import { createEmptyPluginRegistry } from "../plugins/registry-empty.js";
|
||||
import { resetPluginRuntimeStateForTest, setActivePluginRegistry } from "../plugins/runtime.js";
|
||||
import { resizeToJpeg } from "./media-services.js";
|
||||
import { encodePngRgba, fillPixel } from "./png-encode.js";
|
||||
|
||||
let effectiveImageBytesCap: typeof import("./web-media.js").effectiveImageBytesCap;
|
||||
let LocalMediaAccessError: typeof import("./web-media.js").LocalMediaAccessError;
|
||||
let loadWebMedia: typeof import("./web-media.js").loadWebMedia;
|
||||
let loadWebMediaRaw: typeof import("./web-media.js").loadWebMediaRaw;
|
||||
let optimizeImageToJpeg: typeof import("./web-media.js").optimizeImageToJpeg;
|
||||
let resolveImageCompressionGrid: typeof import("./web-media.js").resolveImageCompressionGrid;
|
||||
|
||||
const TINY_PNG_BASE64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
|
||||
@@ -40,8 +44,14 @@ function installCanvasMediaResolver() {
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
({ LocalMediaAccessError, loadWebMedia, loadWebMediaRaw, optimizeImageToJpeg } =
|
||||
await import("./web-media.js"));
|
||||
({
|
||||
effectiveImageBytesCap,
|
||||
LocalMediaAccessError,
|
||||
loadWebMedia,
|
||||
loadWebMediaRaw,
|
||||
optimizeImageToJpeg,
|
||||
resolveImageCompressionGrid,
|
||||
} = await import("./web-media.js"));
|
||||
fixtureRoot = await fs.mkdtemp(path.join(resolvePreferredOpenClawTmpDir(), "web-media-core-"));
|
||||
tinyPngFile = path.join(fixtureRoot, "tiny.png");
|
||||
await fs.writeFile(tinyPngFile, Buffer.from(TINY_PNG_BASE64, "base64"));
|
||||
@@ -77,6 +87,83 @@ afterAll(async () => {
|
||||
});
|
||||
|
||||
describe("loadWebMedia", () => {
|
||||
function createLargeColorBlockPng(size: number): Buffer {
|
||||
const buf = Buffer.alloc(size * size * 4, 255);
|
||||
const centerStart = Math.floor(size * 0.25);
|
||||
const centerEnd = Math.floor(size * 0.75);
|
||||
for (let y = 0; y < size; y += 1) {
|
||||
for (let x = 0; x < size; x += 1) {
|
||||
const inCenter = x >= centerStart && x < centerEnd && y >= centerStart && y < centerEnd;
|
||||
fillPixel(buf, x, y, size, inCenter ? 230 : 30, inCenter ? 40 : 110, inCenter ? 35 : 220);
|
||||
}
|
||||
}
|
||||
return encodePngRgba(buf, size, size);
|
||||
}
|
||||
|
||||
function createLargeTransparentColorBlockPng(size: number): Buffer {
|
||||
const buf = Buffer.alloc(size * size * 4, 0);
|
||||
const centerStart = Math.floor(size * 0.25);
|
||||
const centerEnd = Math.floor(size * 0.75);
|
||||
for (let y = 0; y < size; y += 1) {
|
||||
for (let x = 0; x < size; x += 1) {
|
||||
const inCenter = x >= centerStart && x < centerEnd && y >= centerStart && y < centerEnd;
|
||||
fillPixel(
|
||||
buf,
|
||||
x,
|
||||
y,
|
||||
size,
|
||||
inCenter ? 230 : 30,
|
||||
inCenter ? 40 : 110,
|
||||
inCenter ? 35 : 220,
|
||||
inCenter ? 255 : 96,
|
||||
);
|
||||
}
|
||||
}
|
||||
return encodePngRgba(buf, size, size);
|
||||
}
|
||||
|
||||
function readPngDimensions(buffer: Buffer): { width: number; height: number } {
|
||||
if (buffer.length < 24 || buffer.toString("ascii", 12, 16) !== "IHDR") {
|
||||
throw new Error("PNG dimensions not found");
|
||||
}
|
||||
return {
|
||||
width: buffer.readUInt32BE(16),
|
||||
height: buffer.readUInt32BE(20),
|
||||
};
|
||||
}
|
||||
|
||||
function createGifHeader(width: number, height: number): Buffer {
|
||||
const buffer = Buffer.alloc(10);
|
||||
buffer.write("GIF89a", 0, "ascii");
|
||||
buffer.writeUInt16LE(width, 6);
|
||||
buffer.writeUInt16LE(height, 8);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
function readJpegDimensions(buffer: Buffer): { width: number; height: number } {
|
||||
let offset = 2;
|
||||
while (offset + 9 < buffer.length) {
|
||||
if (buffer[offset] !== 0xff) {
|
||||
offset += 1;
|
||||
continue;
|
||||
}
|
||||
const marker = buffer[offset + 1];
|
||||
offset += 2;
|
||||
if (marker === 0xd8 || marker === 0xd9 || (marker >= 0xd0 && marker <= 0xd7)) {
|
||||
continue;
|
||||
}
|
||||
const segmentLength = buffer.readUInt16BE(offset);
|
||||
if (marker >= 0xc0 && marker <= 0xcf && ![0xc4, 0xc8, 0xcc].includes(marker)) {
|
||||
return {
|
||||
height: buffer.readUInt16BE(offset + 3),
|
||||
width: buffer.readUInt16BE(offset + 5),
|
||||
};
|
||||
}
|
||||
offset += segmentLength;
|
||||
}
|
||||
throw new Error("JPEG dimensions not found");
|
||||
}
|
||||
|
||||
function makeStallingFetch(firstChunk: Uint8Array) {
|
||||
return vi.fn(
|
||||
async () =>
|
||||
@@ -282,9 +369,82 @@ describe("loadWebMedia", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("uses model metadata-aware image compression grids", () => {
|
||||
expect(
|
||||
resolveImageCompressionGrid({
|
||||
models: [{ maxSidePx: 2576, preferredSidePx: 2576 }],
|
||||
quality: "high",
|
||||
}).sides[0],
|
||||
).toBe(2576);
|
||||
expect(
|
||||
resolveImageCompressionGrid({
|
||||
models: [{ maxSidePx: 1568, preferredSidePx: 1568 }],
|
||||
quality: "high",
|
||||
}).sides[0],
|
||||
).toBe(1568);
|
||||
expect(
|
||||
resolveImageCompressionGrid({
|
||||
models: [{ maxSidePx: 6000, preferredSidePx: 2048 }],
|
||||
quality: "high",
|
||||
}).sides[0],
|
||||
).toBe(6000);
|
||||
expect(
|
||||
resolveImageCompressionGrid({
|
||||
models: [{ maxSidePx: 6000, preferredSidePx: 2048 }],
|
||||
quality: "balanced",
|
||||
}).sides[0],
|
||||
).toBe(2048);
|
||||
expect(
|
||||
resolveImageCompressionGrid({
|
||||
models: [{ maxSidePx: 6000, maxPixels: 12845056, preferredSidePx: 2048 }],
|
||||
quality: "high",
|
||||
}).sides[0],
|
||||
).toBe(3584);
|
||||
expect(
|
||||
resolveImageCompressionGrid({
|
||||
models: [{ maxPixels: 33177600, preferredSidePx: 2048 }],
|
||||
quality: "high",
|
||||
}).sides[0],
|
||||
).toBe(5760);
|
||||
expect(
|
||||
resolveImageCompressionGrid({
|
||||
models: [
|
||||
{ maxSidePx: 6000, preferredSidePx: 2048 },
|
||||
{ maxSidePx: 1568, preferredSidePx: 1568 },
|
||||
],
|
||||
quality: "high",
|
||||
}).sides[0],
|
||||
).toBe(1568);
|
||||
expect(
|
||||
resolveImageCompressionGrid({
|
||||
models: [{ maxSidePx: 512, preferredSidePx: 512, maxBytes: 64 * 1024 }],
|
||||
quality: "balanced",
|
||||
}).sides,
|
||||
).toEqual([512, 384, 256, 192, 128]);
|
||||
});
|
||||
|
||||
it("adapts automatic image compression for many-image turns", () => {
|
||||
const single = resolveImageCompressionGrid({
|
||||
models: [{ maxSidePx: 2576, preferredSidePx: 2576 }],
|
||||
quality: "auto",
|
||||
imageCount: 1,
|
||||
});
|
||||
const many = resolveImageCompressionGrid({
|
||||
models: [{ maxSidePx: 2576, preferredSidePx: 2576 }],
|
||||
quality: "auto",
|
||||
imageCount: 8,
|
||||
});
|
||||
|
||||
expect(single.sides[0]).toBe(2576);
|
||||
expect(single.qualities).toEqual([80, 70, 60, 50, 40]);
|
||||
expect(many.sides[0]).toBe(1280);
|
||||
expect(many.qualities).toEqual([70, 60, 50, 40]);
|
||||
});
|
||||
|
||||
async function withUnavailableImageOptimizer<T>(fn: () => Promise<T>): Promise<T> {
|
||||
vi.resetModules();
|
||||
vi.doMock("./media-services.js", () => ({
|
||||
vi.doMock("./media-services.js", async (importOriginal) => ({
|
||||
...(await importOriginal<typeof import("./media-services.js")>()),
|
||||
convertHeicToJpeg: vi.fn(async (buffer: Buffer) => buffer),
|
||||
hasAlphaChannel: vi.fn(async () => {
|
||||
throw new Error(
|
||||
@@ -335,6 +495,155 @@ describe("loadWebMedia", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("sends an in-limit data URL image when optional sharp optimization is unavailable", async () => {
|
||||
await withUnavailableImageOptimizer(async () => {
|
||||
const { optimizeImageBufferForWebMedia } = await import("./web-media.js");
|
||||
const buffer = Buffer.from(TINY_PNG_BASE64, "base64");
|
||||
const result = await optimizeImageBufferForWebMedia({
|
||||
buffer,
|
||||
contentType: "image/png",
|
||||
maxBytes: 1024,
|
||||
imageCompression: { models: [{ maxSidePx: 1024 }] },
|
||||
});
|
||||
expect(result.kind).toBe("image");
|
||||
expect(result.contentType).toBe("image/png");
|
||||
expect(result.buffer.equals(buffer)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
it("does not bypass the data URL image cap when optional sharp optimization is unavailable", async () => {
|
||||
await withUnavailableImageOptimizer(async () => {
|
||||
const { optimizeImageBufferForWebMedia } = await import("./web-media.js");
|
||||
await expect(
|
||||
optimizeImageBufferForWebMedia({
|
||||
buffer: Buffer.from(TINY_PNG_BASE64, "base64"),
|
||||
contentType: "image/png",
|
||||
maxBytes: 8,
|
||||
imageCompression: { models: [{ maxSidePx: 1024 }] },
|
||||
}),
|
||||
).rejects.toThrow(/Optional dependency sharp is required/);
|
||||
});
|
||||
});
|
||||
|
||||
it("does not bypass model dimensions when optional sharp optimization is unavailable", async () => {
|
||||
await withUnavailableImageOptimizer(async () => {
|
||||
const { optimizeImageBufferForWebMedia } = await import("./web-media.js");
|
||||
await expect(
|
||||
optimizeImageBufferForWebMedia({
|
||||
buffer: createLargeColorBlockPng(1600),
|
||||
contentType: "image/png",
|
||||
maxBytes: 16 * 1024 * 1024,
|
||||
imageCompression: { models: [{ maxSidePx: 512 }] },
|
||||
}),
|
||||
).rejects.toThrow(/Optional dependency sharp is required/);
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves in-limit GIF buffers when optimizing direct image buffers", async () => {
|
||||
const { optimizeImageBufferForWebMedia } = await import("./web-media.js");
|
||||
const buffer = createGifHeader(16, 16);
|
||||
const result = await optimizeImageBufferForWebMedia({
|
||||
buffer,
|
||||
contentType: "image/gif",
|
||||
maxBytes: 1024,
|
||||
imageCompression: { models: [{ maxSidePx: 64 }] },
|
||||
});
|
||||
|
||||
expect(result.kind).toBe("image");
|
||||
expect(result.contentType).toBe("image/gif");
|
||||
expect(result.buffer.equals(buffer)).toBe(true);
|
||||
});
|
||||
|
||||
it("does not bypass model dimensions for GIF buffers", async () => {
|
||||
const { optimizeImageBufferForWebMedia } = await import("./web-media.js");
|
||||
await expect(
|
||||
optimizeImageBufferForWebMedia({
|
||||
buffer: createGifHeader(1600, 1600),
|
||||
contentType: "image/gif",
|
||||
maxBytes: 1024,
|
||||
imageCompression: { models: [{ maxSidePx: 512 }] },
|
||||
}),
|
||||
).rejects.toThrow(/dimensions exceed model image limits/i);
|
||||
});
|
||||
|
||||
it("applies model image maxBytes to the effective image cap", async () => {
|
||||
await expect(
|
||||
loadWebMediaRaw(tinyPngFile, {
|
||||
maxBytes: 1024 * 1024,
|
||||
localRoots: [fixtureRoot],
|
||||
imageCompression: {
|
||||
models: [{ maxBytes: 8 }],
|
||||
},
|
||||
}),
|
||||
).rejects.toThrow(/exceeds/i);
|
||||
});
|
||||
|
||||
it("uses the strictest model image maxBytes across fallback candidates", () => {
|
||||
expect(
|
||||
effectiveImageBytesCap(16 * 1024 * 1024, {
|
||||
models: [{ maxBytes: 8 * 1024 * 1024 }, {}, { maxBytes: 2 * 1024 * 1024 }],
|
||||
}),
|
||||
).toBe(2 * 1024 * 1024);
|
||||
expect(effectiveImageBytesCap(undefined, { models: [{ maxBytes: 1024 }] })).toBe(1024);
|
||||
});
|
||||
|
||||
it("downscales oversized JPEGs to the resolved model side limit before returning media", async () => {
|
||||
const sourcePng = createLargeColorBlockPng(1600);
|
||||
const sourceJpeg = await resizeToJpeg({
|
||||
buffer: sourcePng,
|
||||
maxSide: 1600,
|
||||
quality: 92,
|
||||
withoutEnlargement: true,
|
||||
});
|
||||
expect(Math.max(...Object.values(readJpegDimensions(sourceJpeg)))).toBe(1600);
|
||||
|
||||
const largeImage = path.join(fixtureRoot, "large-center-red.jpg");
|
||||
await fs.writeFile(largeImage, sourceJpeg);
|
||||
const result = await loadWebMedia(largeImage, {
|
||||
maxBytes: 16 * 1024 * 1024,
|
||||
localRoots: [fixtureRoot],
|
||||
imageCompression: {
|
||||
quality: "high",
|
||||
models: [{ maxSidePx: 512, preferredSidePx: 512 }],
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.kind).toBe("image");
|
||||
expect(result.contentType).toBe("image/jpeg");
|
||||
const dimensions = readJpegDimensions(result.buffer);
|
||||
expect(Math.max(dimensions.width, dimensions.height)).toBeLessThanOrEqual(512);
|
||||
});
|
||||
|
||||
it("downscales alpha PNGs to the resolved model side limit before returning media", async () => {
|
||||
const sourcePng = createLargeTransparentColorBlockPng(1600);
|
||||
expect(Math.max(...Object.values(readPngDimensions(sourcePng)))).toBe(1600);
|
||||
|
||||
const largeImage = path.join(fixtureRoot, "large-transparent.png");
|
||||
await fs.writeFile(largeImage, sourcePng);
|
||||
const result = await loadWebMedia(largeImage, {
|
||||
maxBytes: 16 * 1024 * 1024,
|
||||
localRoots: [fixtureRoot],
|
||||
imageCompression: {
|
||||
quality: "high",
|
||||
models: [{ maxSidePx: 512, preferredSidePx: 512 }],
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.kind).toBe("image");
|
||||
expect(result.contentType).toBe("image/png");
|
||||
const dimensions = readPngDimensions(result.buffer);
|
||||
expect(Math.max(dimensions.width, dimensions.height)).toBeLessThanOrEqual(512);
|
||||
});
|
||||
|
||||
it("uses low default dimensions when model metadata is unavailable", async () => {
|
||||
expect(
|
||||
resolveImageCompressionGrid({
|
||||
quality: "high",
|
||||
models: [{}],
|
||||
}).sides[0],
|
||||
).toBe(2048);
|
||||
});
|
||||
|
||||
it("does not send original HEIC media when optional sharp conversion is unavailable", async () => {
|
||||
await withUnavailableImageOptimizer(async () => {
|
||||
const heicFile = path.join(fixtureRoot, "photo.heic");
|
||||
|
||||
@@ -21,6 +21,7 @@ import {
|
||||
hasAlphaChannel,
|
||||
isImageProcessorUnavailableError,
|
||||
optimizeImageToPng,
|
||||
readImageMetadataFromHeader,
|
||||
resizeToJpeg,
|
||||
} from "./media-services.js";
|
||||
import {
|
||||
@@ -45,6 +46,7 @@ export type WebMediaResult = {
|
||||
type WebMediaOptions = {
|
||||
maxBytes?: number;
|
||||
optimizeImages?: boolean;
|
||||
imageCompression?: ImageCompressionPolicy;
|
||||
ssrfPolicy?: SsrFPolicy;
|
||||
proxyUrl?: string;
|
||||
fetchImpl?: (input: RequestInfo | URL, init?: RequestInit) => Promise<Response>;
|
||||
@@ -61,6 +63,21 @@ type WebMediaOptions = {
|
||||
hostReadCapability?: boolean;
|
||||
};
|
||||
|
||||
export type ImageQualityPreference = "auto" | "efficient" | "balanced" | "high";
|
||||
|
||||
export type ImageCompressionModelPolicy = {
|
||||
maxBytes?: number;
|
||||
maxPixels?: number;
|
||||
maxSidePx?: number;
|
||||
preferredSidePx?: number;
|
||||
};
|
||||
|
||||
export type ImageCompressionPolicy = {
|
||||
quality?: ImageQualityPreference;
|
||||
models?: ImageCompressionModelPolicy[];
|
||||
imageCount?: number;
|
||||
};
|
||||
|
||||
async function resolveMediaStoreUriToPath(mediaUrl: string): Promise<string | null> {
|
||||
if (!/^media:\/\//i.test(mediaUrl)) {
|
||||
return null;
|
||||
@@ -335,6 +352,200 @@ type OptimizedImage = {
|
||||
compressionLevel?: number;
|
||||
};
|
||||
|
||||
const DEFAULT_JPEG_SIDES = [2048, 1536, 1280, 1024, 800] as const;
|
||||
const DEFAULT_JPEG_QUALITIES = [80, 70, 60, 50, 40] as const;
|
||||
const DEFAULT_VISION_MAX_SIDE = 2048;
|
||||
const LOW_IMAGE_SIDE_FALLBACKS = [640, 512, 384, 256, 192, 128] as const;
|
||||
|
||||
function normalizeImageQualityPreference(value?: string): ImageQualityPreference {
|
||||
switch (value) {
|
||||
case "efficient":
|
||||
case "balanced":
|
||||
case "high":
|
||||
return value;
|
||||
default:
|
||||
return "auto";
|
||||
}
|
||||
}
|
||||
|
||||
function squareLongSideForPixelBudget(pixelBudget: number): number {
|
||||
return Math.floor(Math.sqrt(pixelBudget));
|
||||
}
|
||||
|
||||
function positiveInteger(value: number | undefined): number | undefined {
|
||||
return typeof value === "number" && Number.isFinite(value) && value > 0
|
||||
? Math.floor(value)
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function effectiveImageQualityPreference(
|
||||
policy?: ImageCompressionPolicy,
|
||||
): Exclude<ImageQualityPreference, "auto"> {
|
||||
const preference = normalizeImageQualityPreference(policy?.quality);
|
||||
if (preference !== "auto") {
|
||||
return preference;
|
||||
}
|
||||
const imageCount = Math.max(1, Math.floor(policy?.imageCount ?? 1));
|
||||
if (imageCount >= 6) {
|
||||
return "efficient";
|
||||
}
|
||||
return "balanced";
|
||||
}
|
||||
|
||||
function maxSideForModel(model: ImageCompressionModelPolicy | undefined): number {
|
||||
const maxSide = positiveInteger(model?.maxSidePx);
|
||||
const maxPixels = positiveInteger(model?.maxPixels);
|
||||
const hardLimits = [
|
||||
maxSide,
|
||||
maxPixels ? squareLongSideForPixelBudget(maxPixels) : undefined,
|
||||
].filter((value): value is number => value !== undefined);
|
||||
if (hardLimits.length > 0) {
|
||||
return Math.min(...hardLimits);
|
||||
}
|
||||
return positiveInteger(model?.preferredSidePx) ?? DEFAULT_VISION_MAX_SIDE;
|
||||
}
|
||||
|
||||
function preferredSideForModel(model: ImageCompressionModelPolicy | undefined): number {
|
||||
return (
|
||||
positiveInteger(model?.preferredSidePx) ??
|
||||
Math.min(maxSideForModel(model), DEFAULT_VISION_MAX_SIDE)
|
||||
);
|
||||
}
|
||||
|
||||
function policyModelSides(policy: ImageCompressionPolicy | undefined): {
|
||||
maxSide: number;
|
||||
preferredSide: number;
|
||||
} {
|
||||
const models = policy?.models?.length ? policy.models : [undefined];
|
||||
const maxSide = Math.min(...models.map((model) => maxSideForModel(model)));
|
||||
const preferredSide = Math.min(...models.map((model) => preferredSideForModel(model)));
|
||||
return {
|
||||
maxSide,
|
||||
preferredSide: Math.min(preferredSide, maxSide),
|
||||
};
|
||||
}
|
||||
|
||||
function sideForPreference(
|
||||
preference: Exclude<ImageQualityPreference, "auto">,
|
||||
policy?: ImageCompressionPolicy,
|
||||
): number {
|
||||
const { maxSide, preferredSide } = policyModelSides(policy);
|
||||
switch (preference) {
|
||||
case "efficient":
|
||||
return Math.min(preferredSide, maxSide, 1280);
|
||||
case "balanced":
|
||||
return Math.min(preferredSide, maxSide);
|
||||
case "high":
|
||||
return maxSide;
|
||||
}
|
||||
return Math.min(preferredSide, maxSide);
|
||||
}
|
||||
|
||||
function imageMaxBytesForPolicy(policy?: ImageCompressionPolicy): number | undefined {
|
||||
const maxBytes = policy?.models
|
||||
?.map((model) => positiveInteger(model.maxBytes))
|
||||
.filter((value): value is number => value !== undefined);
|
||||
return maxBytes?.length ? Math.min(...maxBytes) : undefined;
|
||||
}
|
||||
|
||||
function imageSatisfiesHardDimensionPolicy(
|
||||
buffer: Buffer,
|
||||
policy?: ImageCompressionPolicy,
|
||||
): boolean {
|
||||
const models = policy?.models ?? [];
|
||||
const hardMaxSides = models
|
||||
.map((model) => positiveInteger(model.maxSidePx))
|
||||
.filter((value): value is number => value !== undefined);
|
||||
const hardMaxPixels = models
|
||||
.map((model) => positiveInteger(model.maxPixels))
|
||||
.filter((value): value is number => value !== undefined);
|
||||
if (hardMaxSides.length === 0 && hardMaxPixels.length === 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const meta = readImageMetadataFromHeader(buffer);
|
||||
if (!meta) {
|
||||
return false;
|
||||
}
|
||||
const maxSide = Math.max(meta.width, meta.height);
|
||||
const pixels = meta.width * meta.height;
|
||||
return (
|
||||
(hardMaxSides.length === 0 || maxSide <= Math.min(...hardMaxSides)) &&
|
||||
(hardMaxPixels.length === 0 || pixels <= Math.min(...hardMaxPixels))
|
||||
);
|
||||
}
|
||||
|
||||
function assertImageSatisfiesHardDimensionPolicy(
|
||||
buffer: Buffer,
|
||||
policy?: ImageCompressionPolicy,
|
||||
): void {
|
||||
if (imageSatisfiesHardDimensionPolicy(buffer, policy)) {
|
||||
return;
|
||||
}
|
||||
const meta = readImageMetadataFromHeader(buffer);
|
||||
const detail = meta ? `: ${meta.width}x${meta.height}` : "";
|
||||
throw new Error(`Image dimensions exceed model image limits${detail}`);
|
||||
}
|
||||
|
||||
export function effectiveImageBytesCap(
|
||||
baseCap: number | undefined,
|
||||
policy?: ImageCompressionPolicy,
|
||||
): number | undefined {
|
||||
const policyCap = imageMaxBytesForPolicy(policy);
|
||||
if (baseCap === undefined) {
|
||||
return policyCap;
|
||||
}
|
||||
return policyCap === undefined ? baseCap : Math.min(baseCap, policyCap);
|
||||
}
|
||||
|
||||
function buildDescendingLadder(maxSide: number, values: readonly number[]): number[] {
|
||||
const normalizedMax = Math.max(1, Math.floor(maxSide));
|
||||
const ladder = [normalizedMax, ...values, ...LOW_IMAGE_SIDE_FALLBACKS]
|
||||
.map((value) => Math.min(normalizedMax, value))
|
||||
.filter((value, idx, arr) => value > 0 && arr.indexOf(value) === idx)
|
||||
.toSorted((a, b) => b - a);
|
||||
if (ladder.length > 1 || normalizedMax <= 1) {
|
||||
return ladder;
|
||||
}
|
||||
return [
|
||||
normalizedMax,
|
||||
Math.floor(normalizedMax * 0.75),
|
||||
Math.floor(normalizedMax * 0.5),
|
||||
Math.floor(normalizedMax * 0.25),
|
||||
]
|
||||
.filter((value, idx, arr) => value > 0 && arr.indexOf(value) === idx)
|
||||
.toSorted((a, b) => b - a);
|
||||
}
|
||||
|
||||
export function resolveImageCompressionGrid(policy?: ImageCompressionPolicy): {
|
||||
sides: number[];
|
||||
qualities: number[];
|
||||
} {
|
||||
const preference = effectiveImageQualityPreference(policy);
|
||||
const side = sideForPreference(preference, policy);
|
||||
switch (preference) {
|
||||
case "efficient":
|
||||
return {
|
||||
sides: buildDescendingLadder(side, [1024, 800]),
|
||||
qualities: [70, 60, 50, 40],
|
||||
};
|
||||
case "high":
|
||||
return {
|
||||
sides: buildDescendingLadder(side, [3072, 2576, 2048, 1800, 1536, 1280, 1024, 800]),
|
||||
qualities: [92, 85, 78, 70, 62, 52, 42],
|
||||
};
|
||||
case "balanced":
|
||||
return {
|
||||
sides: buildDescendingLadder(side, [...DEFAULT_JPEG_SIDES]),
|
||||
qualities: [...DEFAULT_JPEG_QUALITIES],
|
||||
};
|
||||
}
|
||||
return {
|
||||
sides: buildDescendingLadder(side, [...DEFAULT_JPEG_SIDES]),
|
||||
qualities: [...DEFAULT_JPEG_QUALITIES],
|
||||
};
|
||||
}
|
||||
|
||||
function logOptimizedImage(params: { originalSize: number; optimized: OptimizedImage }): void {
|
||||
if (!shouldLogVerbose()) {
|
||||
return;
|
||||
@@ -357,13 +568,15 @@ async function optimizeImageWithFallback(params: {
|
||||
buffer: Buffer;
|
||||
cap: number;
|
||||
meta?: { contentType?: string; fileName?: string };
|
||||
imageCompression?: ImageCompressionPolicy;
|
||||
}): Promise<OptimizedImage> {
|
||||
const { buffer, cap, meta } = params;
|
||||
const isPng = meta?.contentType === "image/png" || meta?.fileName?.toLowerCase().endsWith(".png");
|
||||
const hasAlpha = isPng && (await hasAlphaChannel(buffer));
|
||||
|
||||
if (hasAlpha) {
|
||||
const optimized = await optimizeImageToPng(buffer, cap);
|
||||
const grid = resolveImageCompressionGrid(params.imageCompression);
|
||||
const optimized = await optimizeImageToPng(buffer, cap, { sides: grid.sides });
|
||||
if (optimized.buffer.length <= cap) {
|
||||
return { ...optimized, format: "png" };
|
||||
}
|
||||
@@ -374,10 +587,79 @@ async function optimizeImageWithFallback(params: {
|
||||
}
|
||||
}
|
||||
|
||||
const optimized = await optimizeImageToJpeg(buffer, cap, meta);
|
||||
const optimized = await optimizeImageToJpeg(buffer, cap, {
|
||||
...meta,
|
||||
...(params.imageCompression ? { imageCompression: params.imageCompression } : {}),
|
||||
});
|
||||
return { ...optimized, format: "jpeg" };
|
||||
}
|
||||
|
||||
export async function optimizeImageBufferForWebMedia(params: {
|
||||
buffer: Buffer;
|
||||
contentType?: string;
|
||||
fileName?: string;
|
||||
maxBytes?: number;
|
||||
imageCompression?: ImageCompressionPolicy;
|
||||
}): Promise<WebMediaResult> {
|
||||
const baseCap = params.maxBytes ?? maxBytesForKind("image");
|
||||
const cap = effectiveImageBytesCap(baseCap, params.imageCompression) ?? baseCap;
|
||||
if (params.contentType === "image/gif") {
|
||||
if (params.buffer.length > cap) {
|
||||
throw new Error(formatCapLimit("GIF", cap, params.buffer.length));
|
||||
}
|
||||
assertImageSatisfiesHardDimensionPolicy(params.buffer, params.imageCompression);
|
||||
return {
|
||||
buffer: params.buffer,
|
||||
contentType: params.contentType,
|
||||
kind: "image",
|
||||
fileName: params.fileName,
|
||||
};
|
||||
}
|
||||
const meta = { contentType: params.contentType, fileName: params.fileName };
|
||||
let optimized: OptimizedImage;
|
||||
try {
|
||||
optimized = await optimizeImageWithFallback({
|
||||
buffer: params.buffer,
|
||||
cap,
|
||||
meta,
|
||||
imageCompression: params.imageCompression,
|
||||
});
|
||||
} catch (err) {
|
||||
if (
|
||||
isImageProcessorUnavailableError(err) &&
|
||||
!isHeicSource(meta) &&
|
||||
params.buffer.length <= cap &&
|
||||
imageSatisfiesHardDimensionPolicy(params.buffer, params.imageCompression)
|
||||
) {
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(
|
||||
`Image optimizer unavailable; sending original ${formatMb(params.buffer.length)}MB media without optimization`,
|
||||
);
|
||||
}
|
||||
return {
|
||||
buffer: params.buffer,
|
||||
contentType: params.contentType,
|
||||
kind: "image",
|
||||
fileName: params.fileName,
|
||||
};
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
logOptimizedImage({ originalSize: params.buffer.length, optimized });
|
||||
if (optimized.buffer.length > cap) {
|
||||
throw new Error(formatCapReduce("Media", cap, optimized.buffer.length));
|
||||
}
|
||||
return {
|
||||
buffer: optimized.buffer,
|
||||
contentType: optimized.format === "png" ? "image/png" : "image/jpeg",
|
||||
kind: "image",
|
||||
fileName:
|
||||
optimized.format === "jpeg" && isHeicSource(params)
|
||||
? toJpegFileName(params.fileName)
|
||||
: params.fileName,
|
||||
};
|
||||
}
|
||||
|
||||
async function loadWebMediaInternal(
|
||||
mediaUrl: string,
|
||||
options: WebMediaOptions = {},
|
||||
@@ -396,6 +678,7 @@ async function loadWebMediaInternal(
|
||||
sandboxValidated = false,
|
||||
readFile: readFileOverride,
|
||||
hostReadCapability = false,
|
||||
imageCompression,
|
||||
} = options;
|
||||
// Strip MEDIA: prefix used by agent tools (e.g. TTS) to tag media paths.
|
||||
// Be lenient: LLM output may add extra whitespace (e.g. " MEDIA : /tmp/x.png").
|
||||
@@ -421,12 +704,18 @@ async function loadWebMediaInternal(
|
||||
const originalSize = buffer.length;
|
||||
let optimized: OptimizedImage;
|
||||
try {
|
||||
optimized = await optimizeImageWithFallback({ buffer, cap, meta });
|
||||
optimized = await optimizeImageWithFallback({
|
||||
buffer,
|
||||
cap,
|
||||
meta,
|
||||
...(imageCompression ? { imageCompression } : {}),
|
||||
});
|
||||
} catch (err) {
|
||||
if (
|
||||
isImageProcessorUnavailableError(err) &&
|
||||
!isHeicSource(meta ?? {}) &&
|
||||
buffer.length <= cap
|
||||
buffer.length <= cap &&
|
||||
imageSatisfiesHardDimensionPolicy(buffer, imageCompression)
|
||||
) {
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(
|
||||
@@ -472,11 +761,13 @@ async function loadWebMediaInternal(
|
||||
// Otherwise fall back to per-kind defaults.
|
||||
const cap = maxBytes !== undefined ? maxBytes : maxBytesForKind(params.kind ?? "document");
|
||||
if (params.kind === "image") {
|
||||
const imageCap = effectiveImageBytesCap(cap, imageCompression) ?? cap;
|
||||
const isGif = params.contentType === "image/gif";
|
||||
if (isGif || !optimizeImages) {
|
||||
if (params.buffer.length > cap) {
|
||||
throw new Error(formatCapLimit(isGif ? "GIF" : "Media", cap, params.buffer.length));
|
||||
if (params.buffer.length > imageCap) {
|
||||
throw new Error(formatCapLimit(isGif ? "GIF" : "Media", imageCap, params.buffer.length));
|
||||
}
|
||||
assertImageSatisfiesHardDimensionPolicy(params.buffer, imageCompression);
|
||||
return {
|
||||
buffer: params.buffer,
|
||||
contentType: params.contentType,
|
||||
@@ -485,7 +776,7 @@ async function loadWebMediaInternal(
|
||||
};
|
||||
}
|
||||
return {
|
||||
...(await optimizeAndClampImage(params.buffer, cap, {
|
||||
...(await optimizeAndClampImage(params.buffer, imageCap, {
|
||||
contentType: params.contentType,
|
||||
fileName: params.fileName,
|
||||
})),
|
||||
@@ -643,7 +934,11 @@ export async function loadWebMediaRaw(
|
||||
export async function optimizeImageToJpeg(
|
||||
buffer: Buffer,
|
||||
maxBytes: number,
|
||||
opts: { contentType?: string; fileName?: string } = {},
|
||||
opts: {
|
||||
contentType?: string;
|
||||
fileName?: string;
|
||||
imageCompression?: ImageCompressionPolicy;
|
||||
} = {},
|
||||
): Promise<{
|
||||
buffer: Buffer;
|
||||
optimizedSize: number;
|
||||
@@ -659,8 +954,7 @@ export async function optimizeImageToJpeg(
|
||||
throw new Error(`HEIC image conversion failed: ${String(err)}`, { cause: err });
|
||||
}
|
||||
}
|
||||
const sides = [2048, 1536, 1280, 1024, 800];
|
||||
const qualities = [80, 70, 60, 50, 40];
|
||||
const { sides, qualities } = resolveImageCompressionGrid(opts.imageCompression);
|
||||
let smallest: {
|
||||
buffer: Buffer;
|
||||
size: number;
|
||||
|
||||
@@ -3,6 +3,8 @@ import {
|
||||
isModelThinkingFormat,
|
||||
type ModelApi,
|
||||
type ModelCompatConfig,
|
||||
type ModelImageInputConfig,
|
||||
type ModelMediaInputConfig,
|
||||
} from "../config/types.models.js";
|
||||
import { isBlockedObjectKey } from "../infra/prototype-keys.js";
|
||||
import { normalizeOptionalString } from "../shared/string-coerce.js";
|
||||
@@ -237,6 +239,33 @@ function normalizeModelCatalogStatus(value: unknown): ModelCatalogStatus | undef
|
||||
return MODEL_CATALOG_STATUSES.has(status) ? (status as ModelCatalogStatus) : undefined;
|
||||
}
|
||||
|
||||
function normalizeModelCatalogImageTokenMode(value: unknown): ModelImageInputConfig["tokenMode"] {
|
||||
const tokenMode = normalizeOptionalString(value) ?? "";
|
||||
if (tokenMode === "tile" || tokenMode === "detail" || tokenMode === "provider") {
|
||||
return tokenMode;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function normalizeModelCatalogMediaInput(value: unknown): ModelMediaInputConfig | undefined {
|
||||
if (!isRecord(value) || !isRecord(value.image)) {
|
||||
return undefined;
|
||||
}
|
||||
const maxBytes = normalizePositiveInteger(value.image.maxBytes);
|
||||
const maxPixels = normalizePositiveInteger(value.image.maxPixels);
|
||||
const maxSidePx = normalizePositiveInteger(value.image.maxSidePx);
|
||||
const preferredSidePx = normalizePositiveInteger(value.image.preferredSidePx);
|
||||
const tokenMode = normalizeModelCatalogImageTokenMode(value.image.tokenMode);
|
||||
const normalizedImage = {
|
||||
...(maxBytes !== undefined ? { maxBytes } : {}),
|
||||
...(maxPixels !== undefined ? { maxPixels } : {}),
|
||||
...(maxSidePx !== undefined ? { maxSidePx } : {}),
|
||||
...(preferredSidePx !== undefined ? { preferredSidePx } : {}),
|
||||
...(tokenMode ? { tokenMode } : {}),
|
||||
};
|
||||
return Object.keys(normalizedImage).length > 0 ? { image: normalizedImage } : undefined;
|
||||
}
|
||||
|
||||
function normalizeModelCatalogModel(value: unknown): ModelCatalogModel | undefined {
|
||||
if (!isRecord(value)) {
|
||||
return undefined;
|
||||
@@ -256,6 +285,7 @@ function normalizeModelCatalogModel(value: unknown): ModelCatalogModel | undefin
|
||||
const maxTokens = normalizePositiveNumber(value.maxTokens);
|
||||
const cost = normalizeModelCatalogCost(value.cost);
|
||||
const compat = normalizeModelCatalogCompat(value.compat);
|
||||
const mediaInput = normalizeModelCatalogMediaInput(value.mediaInput);
|
||||
const status = normalizeModelCatalogStatus(value.status);
|
||||
const statusReason = normalizeOptionalString(value.statusReason) ?? "";
|
||||
const replaces = normalizeTrimmedStringList(value.replaces);
|
||||
@@ -274,6 +304,7 @@ function normalizeModelCatalogModel(value: unknown): ModelCatalogModel | undefin
|
||||
...(maxTokens !== undefined ? { maxTokens } : {}),
|
||||
...(cost ? { cost } : {}),
|
||||
...(compat ? { compat } : {}),
|
||||
...(mediaInput ? { mediaInput } : {}),
|
||||
...(status ? { status } : {}),
|
||||
...(statusReason ? { statusReason } : {}),
|
||||
...(replaces.length > 0 ? { replaces } : {}),
|
||||
@@ -468,6 +499,7 @@ export function normalizeModelCatalogProviderRows(params: {
|
||||
const maxTokens = normalizePositiveNumber(model.maxTokens);
|
||||
const cost = normalizeModelCatalogCost(model.cost);
|
||||
const compat = normalizeModelCatalogCompat(model.compat);
|
||||
const mediaInput = normalizeModelCatalogMediaInput(model.mediaInput);
|
||||
const statusReason = normalizeOptionalString(model.statusReason) ?? "";
|
||||
const replacedBy = normalizeOptionalString(model.replacedBy) ?? "";
|
||||
const replaces = normalizeStringList(model.replaces);
|
||||
@@ -490,6 +522,7 @@ export function normalizeModelCatalogProviderRows(params: {
|
||||
...(maxTokens !== undefined ? { maxTokens } : {}),
|
||||
...(cost ? { cost } : {}),
|
||||
...(compat ? { compat } : {}),
|
||||
...(mediaInput ? { mediaInput } : {}),
|
||||
...(statusReason ? { statusReason } : {}),
|
||||
...(replaces ? { replaces } : {}),
|
||||
...(replacedBy ? { replacedBy } : {}),
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import type { ModelApi, ModelCompatConfig } from "../config/types.models.js";
|
||||
import type { ModelApi, ModelCompatConfig, ModelMediaInputConfig } from "../config/types.models.js";
|
||||
|
||||
export type ModelCatalogInput = "text" | "image" | "document";
|
||||
export type ModelCatalogDiscovery = "static" | "refreshable" | "runtime";
|
||||
@@ -71,6 +71,7 @@ export type ModelCatalogModel = {
|
||||
maxTokens?: number;
|
||||
cost?: ModelCatalogCost;
|
||||
compat?: ModelCompatConfig;
|
||||
mediaInput?: ModelMediaInputConfig;
|
||||
status?: ModelCatalogStatus;
|
||||
statusReason?: string;
|
||||
replaces?: string[];
|
||||
@@ -127,6 +128,7 @@ export type NormalizedModelCatalogRow = {
|
||||
maxTokens?: number;
|
||||
cost?: ModelCatalogCost;
|
||||
compat?: ModelCompatConfig;
|
||||
mediaInput?: ModelMediaInputConfig;
|
||||
statusReason?: string;
|
||||
replaces?: string[];
|
||||
replacedBy?: string;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { Api, Model } from "@earendil-works/pi-ai";
|
||||
import type { ModelCompatConfig } from "../config/types.models.js";
|
||||
import type { ModelCompatConfig, ModelMediaInputConfig } from "../config/types.models.js";
|
||||
|
||||
/**
|
||||
* Fully-resolved runtime model shape used after provider/plugin-owned
|
||||
@@ -10,4 +10,5 @@ export type ProviderRuntimeModel = Omit<Model<Api>, "compat"> & {
|
||||
contextTokens?: number;
|
||||
params?: Record<string, unknown>;
|
||||
requestTimeoutMs?: number;
|
||||
mediaInput?: ModelMediaInputConfig;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user