diff --git a/config/knip.config.ts b/config/knip.config.ts index dc7492375030..aa3d74b58d96 100644 --- a/config/knip.config.ts +++ b/config/knip.config.ts @@ -50,7 +50,7 @@ const bundledPluginIgnoredRuntimeDependencies = [ "lit", "linkedom", "openclaw", - "pdfjs-dist", + "clawpdf", ] as const; const rootBundledPluginRuntimeDependencies = [ @@ -70,7 +70,7 @@ const rootBundledPluginRuntimeDependencies = [ "minimatch", "node-edge-tts", "openshell", - "pdfjs-dist", + "clawpdf", "tokenjuice", ] as const; diff --git a/docs/gateway/openresponses-http-api.md b/docs/gateway/openresponses-http-api.md index a9a07b45f5de..84c25cb45d0e 100644 --- a/docs/gateway/openresponses-http-api.md +++ b/docs/gateway/openresponses-http-api.md @@ -175,9 +175,9 @@ Current behavior: rasterized into images and passed to the model, and the injected file block uses the placeholder `[PDF content rendered to images]`. -PDF parsing is provided by the bundled `document-extract` plugin, which uses the -Node-friendly `pdfjs-dist` legacy build (no worker). The modern PDF.js build -expects browser workers/DOM globals, so it is not used in the Gateway. +PDF parsing is provided by the bundled `document-extract` plugin, which uses +`clawpdf` and its packaged PDFium WebAssembly runtime for text extraction and +page rendering. URL fetch defaults: diff --git a/docs/tools/pdf.md b/docs/tools/pdf.md index ae37743aa50a..9acc791a0103 100644 --- a/docs/tools/pdf.md +++ b/docs/tools/pdf.md @@ -113,8 +113,8 @@ Fallback details: text-only model, OpenClaw drops the rendered images and continues with the extracted text. - Extraction fallback uses the bundled `document-extract` plugin. The plugin owns - `pdfjs-dist`; `@napi-rs/canvas` is used only when image rendering fallback is - available. + `clawpdf`, which provides text extraction and image rendering through PDFium + WebAssembly. ## Config diff --git a/extensions/document-extract/document-extractor.test.ts b/extensions/document-extract/document-extractor.test.ts index 5e9dab77a947..9f21de69d0e3 100644 --- a/extensions/document-extract/document-extractor.test.ts +++ b/extensions/document-extract/document-extractor.test.ts @@ -1,61 +1,45 @@ -import { existsSync } from "node:fs"; -import { createRequire } from "node:module"; -import path from "node:path"; import { afterAll, beforeEach, describe, expect, it, vi } from "vitest"; -const { canvasSizes, getDocumentMock, pdfDocument } = vi.hoisted(() => ({ - canvasSizes: [] as Array<{ width: number; height: number }>, - getDocumentMock: vi.fn(), +const { createEngineMock, openPdfMock, pdfDocument } = vi.hoisted(() => ({ + createEngineMock: vi.fn(), + openPdfMock: vi.fn(), pdfDocument: { - numPages: 2, - getPage: vi.fn(async () => ({ - getTextContent: vi.fn(async () => ({ items: [] })), - getViewport: vi.fn(({ scale }: { scale: number }) => ({ - width: 1000 * scale, - height: 1000 * scale, - })), - render: vi.fn(() => ({ promise: Promise.resolve() })), - })), + pageCount: 2, + extract: vi.fn(), + destroy: vi.fn(), }, })); -vi.mock("pdfjs-dist/legacy/build/pdf.mjs", () => ({ - getDocument: getDocumentMock, -})); - -vi.mock("@napi-rs/canvas", () => ({ - createCanvas: vi.fn((width: number, height: number) => { - canvasSizes.push({ width, height }); - return { - toBuffer: vi.fn(() => Buffer.from("png")), - }; - }), +vi.mock("clawpdf", () => ({ + createEngine: createEngineMock, })); import { createPdfDocumentExtractor } from "./document-extractor.js"; -const require = createRequire(import.meta.url); - -function requireFirstMockArg(mock: ReturnType, label: string) { - const [call] = mock.mock.calls; - if (!call) { - throw new Error(`Expected ${label}`); - } - return call[0]; +function request(overrides = {}) { + return { + buffer: Buffer.from("%PDF-1.4"), + mimeType: "application/pdf", + maxPages: 2, + maxPixels: 100, + minTextChars: 10, + ...overrides, + }; } describe("PDF document extractor", () => { afterAll(() => { - vi.doUnmock("pdfjs-dist/legacy/build/pdf.mjs"); - vi.doUnmock("@napi-rs/canvas"); + vi.doUnmock("clawpdf"); vi.resetModules(); }); beforeEach(() => { - canvasSizes.length = 0; - getDocumentMock.mockReset(); - getDocumentMock.mockReturnValue({ promise: Promise.resolve(pdfDocument) }); - pdfDocument.getPage.mockClear(); + createEngineMock.mockResolvedValue({ open: openPdfMock }); + openPdfMock.mockReset(); + openPdfMock.mockResolvedValue(pdfDocument); + pdfDocument.pageCount = 2; + pdfDocument.extract.mockReset(); + pdfDocument.destroy.mockReset(); }); it("declares PDF support", () => { @@ -70,55 +54,90 @@ describe("PDF document extractor", () => { }); }); - it("treats maxPixels as a hard total image rendering budget", async () => { + it("extracts text first and renders fallback images through clawpdf", async () => { + pdfDocument.extract.mockResolvedValueOnce({ text: "", images: [] }).mockResolvedValueOnce({ + text: "", + images: [ + { + type: "image", + bytes: Uint8Array.from(Buffer.from("png")), + mimeType: "image/png", + page: 1, + width: 10, + height: 10, + }, + ], + }); const extractor = createPdfDocumentExtractor(); - const result = await extractor.extract({ - buffer: Buffer.from("%PDF-1.4"), - mimeType: "application/pdf", - maxPages: 2, - maxPixels: 100, - minTextChars: 10, - }); + const result = await extractor.extract(request()); if (!result) { throw new Error("Expected PDF extraction result"); } - expect(result.images).toHaveLength(1); - expect(canvasSizes).toEqual([{ width: 10, height: 10 }]); + expect(openPdfMock).toHaveBeenCalledWith(expect.any(Uint8Array)); + expect(pdfDocument.extract).toHaveBeenNthCalledWith(1, { + mode: "text", + maxPages: 2, + maxTextChars: 200_000, + }); + expect(pdfDocument.extract).toHaveBeenNthCalledWith(2, { + mode: "images", + maxPages: 2, + image: { + maxDimension: 10_000, + maxPixels: 100, + forms: true, + }, + }); + expect(result).toEqual({ + text: "", + images: [{ type: "image", data: "cG5n", mimeType: "image/png" }], + }); + expect(pdfDocument.destroy).toHaveBeenCalledTimes(1); }); - it("passes standardFontDataUrl to pdfjs getDocument as a package-root filesystem path", async () => { + it("skips image fallback when enough text is extracted", async () => { + pdfDocument.extract.mockResolvedValueOnce({ text: "enough text", images: [] }); const extractor = createPdfDocumentExtractor(); - await extractor.extract({ - buffer: Buffer.from("%PDF-1.4"), - mimeType: "application/pdf", - maxPages: 1, - maxPixels: 4_000_000, - minTextChars: 200, - }); + const result = await extractor.extract(request({ minTextChars: 5 })); - expect(getDocumentMock).toHaveBeenCalledTimes(1); - const params = requireFirstMockArg(getDocumentMock, "pdfjs getDocument call"); - const { data, standardFontDataUrl, ...stableParams } = params as { - data: Uint8Array; - disableWorker: boolean; - standardFontDataUrl: string; - }; - expect(stableParams).toEqual({ - disableWorker: true, - }); - expect(data).toBeInstanceOf(Uint8Array); - expect(typeof standardFontDataUrl).toBe("string"); + expect(result).toEqual({ text: "enough text", images: [] }); + expect(pdfDocument.extract).toHaveBeenCalledTimes(1); + expect(pdfDocument.destroy).toHaveBeenCalledTimes(1); + }); - const expectedStandardFontDataUrl = - path.join(path.dirname(require.resolve("pdfjs-dist/package.json")), "standard_fonts") + "/"; - expect(standardFontDataUrl).toBe(expectedStandardFontDataUrl); - expect(path.isAbsolute(standardFontDataUrl)).toBe(true); - expect(standardFontDataUrl.endsWith("/")).toBe(true); - expect(standardFontDataUrl.startsWith("file://")).toBe(false); - expect(existsSync(standardFontDataUrl)).toBe(true); - expect(existsSync(path.join(standardFontDataUrl, "LiberationSans-Regular.ttf"))).toBe(true); + it("filters selected pages before passing them to clawpdf", async () => { + pdfDocument.extract + .mockResolvedValueOnce({ text: "", images: [] }) + .mockResolvedValueOnce({ text: "", images: [] }); + const extractor = createPdfDocumentExtractor(); + + await extractor.extract(request({ pageNumbers: [3, 2, 0, 1], maxPages: 2 })); + + expect(pdfDocument.extract).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ pages: [2, 1] }), + ); + expect(pdfDocument.extract).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ pages: [2, 1] }), + ); + }); + + it("reports image fallback failures and returns extracted text", async () => { + const onImageExtractionError = vi.fn(); + const failure = new Error("render failed"); + pdfDocument.extract + .mockResolvedValueOnce({ text: "short", images: [] }) + .mockRejectedValueOnce(failure); + const extractor = createPdfDocumentExtractor(); + + const result = await extractor.extract(request({ onImageExtractionError })); + + expect(result).toEqual({ text: "short", images: [] }); + expect(onImageExtractionError).toHaveBeenCalledWith(failure); + expect(pdfDocument.destroy).toHaveBeenCalledTimes(1); }); }); diff --git a/extensions/document-extract/document-extractor.ts b/extensions/document-extract/document-extractor.ts index 37cb7b03bec8..70859aeeaad8 100644 --- a/extensions/document-extract/document-extractor.ts +++ b/extensions/document-extract/document-extractor.ts @@ -1,219 +1,80 @@ -import { createRequire } from "node:module"; -import path from "node:path"; +import type { PdfEngine, PdfImage } from "clawpdf"; import type { DocumentExtractedImage, DocumentExtractionRequest, DocumentExtractionResult, DocumentExtractorPlugin, } from "openclaw/plugin-sdk/document-extractor"; -import type * as PdfJsLegacy from "pdfjs-dist/legacy/build/pdf.mjs"; -type CanvasLike = { - toBuffer(type: "image/png"): Buffer; -}; - -type CanvasModule = { - createCanvas(width: number, height: number): CanvasLike; -}; - -type PdfTextItem = { - str: string; -}; - -type PdfTextContent = { - items: Array; -}; - -type PdfViewport = { - width: number; - height: number; -}; - -type PdfPage = { - getTextContent(): Promise; - getViewport(params: { scale: number }): PdfViewport; - render(params: { canvas: unknown; viewport: PdfViewport }): { promise: Promise }; -}; - -type PdfDocument = { - numPages: number; - getPage(pageNumber: number): Promise; -}; - -type PdfJsModule = typeof PdfJsLegacy; - -const CANVAS_MODULE = "@napi-rs/canvas"; -const PDFJS_MODULE = "pdfjs-dist/legacy/build/pdf.mjs"; const MAX_EXTRACTED_TEXT_CHARS = 200_000; const MAX_RENDER_DIMENSION = 10_000; -const require = createRequire(import.meta.url); -let canvasModulePromise: Promise | null = null; -let pdfJsModulePromise: Promise | null = null; -let pdfJsStandardFontDataPath: string | null = null; +let pdfEnginePromise: Promise | null = null; -async function loadCanvasModule(): Promise { - if (!canvasModulePromise) { - canvasModulePromise = (import(CANVAS_MODULE) as Promise).catch((err) => { - canvasModulePromise = null; - throw new Error("Optional dependency @napi-rs/canvas is required for PDF image extraction", { - cause: err, +async function loadPdfEngine(): Promise { + if (!pdfEnginePromise) { + pdfEnginePromise = import("clawpdf") + .then(({ createEngine }) => createEngine()) + .catch((err) => { + pdfEnginePromise = null; + throw new Error("Dependency clawpdf is required for PDF extraction", { + cause: err, + }); }); - }); } - return canvasModulePromise; + return pdfEnginePromise; } -async function loadPdfJsModule(): Promise { - if (!pdfJsModulePromise) { - pdfJsModulePromise = (import(PDFJS_MODULE) as Promise).catch((err) => { - pdfJsModulePromise = null; - throw new Error("Optional dependency pdfjs-dist is required for PDF extraction", { - cause: err, - }); - }); - } - return pdfJsModulePromise; -} - -function resolvePdfJsStandardFontDataPath(): string { - if (!pdfJsStandardFontDataPath) { - const pdfJsPackageJsonPath = require.resolve("pdfjs-dist/package.json"); - pdfJsStandardFontDataPath = - path.join(path.dirname(pdfJsPackageJsonPath), "standard_fonts") + "/"; - } - return pdfJsStandardFontDataPath; -} - -function appendTextWithinLimit(parts: string[], pageText: string, currentLength: number): number { - if (!pageText) { - return currentLength; - } - const remaining = MAX_EXTRACTED_TEXT_CHARS - currentLength; - if (remaining <= 0) { - return currentLength; - } - const nextText = pageText.length > remaining ? pageText.slice(0, remaining) : pageText; - parts.push(nextText); - return currentLength + nextText.length; -} - -function resolveRenderPlan( - viewport: PdfViewport, - remainingPixels: number, -): { scale: number; width: number; height: number; pixels: number } | null { - if ( - remainingPixels <= 0 || - !Number.isFinite(viewport.width) || - !Number.isFinite(viewport.height) || - viewport.width <= 0 || - viewport.height <= 0 - ) { - return null; - } - - const pagePixels = Math.max(1, viewport.width * viewport.height); - const maxScale = Math.min( - 1, - Math.sqrt(remainingPixels / pagePixels), - MAX_RENDER_DIMENSION / viewport.width, - MAX_RENDER_DIMENSION / viewport.height, - ); - if (!Number.isFinite(maxScale) || maxScale <= 0) { - return null; - } - - let best: { scale: number; width: number; height: number; pixels: number } | null = null; - let low = 0; - let high = maxScale; - for (let i = 0; i < 32; i += 1) { - const scale = (low + high) / 2; - const width = Math.max(1, Math.ceil(viewport.width * scale)); - const height = Math.max(1, Math.ceil(viewport.height * scale)); - const pixels = width * height; - if ( - width <= MAX_RENDER_DIMENSION && - height <= MAX_RENDER_DIMENSION && - pixels <= remainingPixels - ) { - best = { scale, width, height, pixels }; - low = scale; - } else { - high = scale; - } - } - return best; +function toDocumentImage(image: PdfImage): DocumentExtractedImage { + return { + type: "image", + data: Buffer.from(image.bytes).toString("base64"), + mimeType: image.mimeType, + }; } async function extractPdfContent( request: DocumentExtractionRequest, ): Promise { - const pdfJsModule = await loadPdfJsModule(); - const pdf = (await pdfJsModule.getDocument({ - data: new Uint8Array(request.buffer), - disableWorker: true, - standardFontDataUrl: resolvePdfJsStandardFontDataPath(), - }).promise) as PdfDocument; - - const effectivePages: number[] = request.pageNumbers - ? request.pageNumbers.filter((p) => p >= 1 && p <= pdf.numPages).slice(0, request.maxPages) - : Array.from({ length: Math.min(pdf.numPages, request.maxPages) }, (_, i) => i + 1); - - const textParts: string[] = []; - let extractedTextLength = 0; - for (const pageNum of effectivePages) { - const page = await pdf.getPage(pageNum); - const textContent = await page.getTextContent(); - const pageText = textContent.items - .map((item) => ("str" in item ? item.str : "")) - .filter(Boolean) - .join(" "); - if (pageText) { - extractedTextLength = appendTextWithinLimit(textParts, pageText, extractedTextLength); - if (extractedTextLength >= MAX_EXTRACTED_TEXT_CHARS) { - break; - } - } - } - - const text = textParts.join("\n\n"); - if (text.trim().length >= request.minTextChars) { - return { text, images: [] }; - } - - let canvasModule: CanvasModule; + const engine = await loadPdfEngine(); + const pdf = await engine.open(new Uint8Array(request.buffer)); try { - canvasModule = await loadCanvasModule(); - } catch (err) { - request.onImageExtractionError?.(err); - return { text, images: [] }; - } + const pages = request.pageNumbers + ? request.pageNumbers + .filter((p) => Number.isInteger(p) && p >= 1 && p <= pdf.pageCount) + .slice(0, request.maxPages) + : undefined; + const pageSelection = pages ? { pages } : { maxPages: request.maxPages }; - const images: DocumentExtractedImage[] = []; - let remainingPixels = Math.max(1, Math.floor(request.maxPixels)); + const textResult = await pdf.extract({ + mode: "text", + ...pageSelection, + maxTextChars: MAX_EXTRACTED_TEXT_CHARS, + }); + const text = textResult.text; - for (const pageNum of effectivePages) { - if (remainingPixels <= 0) { - break; + if (text.trim().length >= request.minTextChars) { + return { text, images: [] }; } - const page = await pdf.getPage(pageNum); - const viewport = page.getViewport({ scale: 1 }); - const plan = resolveRenderPlan(viewport, remainingPixels); - if (!plan) { - break; - } - const scaled = page.getViewport({ scale: plan.scale }); - const canvas = canvasModule.createCanvas(plan.width, plan.height); - await page.render({ - canvas: canvas as unknown as HTMLCanvasElement, - viewport: scaled, - }).promise; - const png = canvas.toBuffer("image/png"); - images.push({ type: "image", data: png.toString("base64"), mimeType: "image/png" }); - remainingPixels -= plan.pixels; - } - return { text, images }; + try { + const imageResult = await pdf.extract({ + mode: "images", + ...pageSelection, + image: { + maxDimension: MAX_RENDER_DIMENSION, + maxPixels: request.maxPixels, + forms: true, + }, + }); + return { text, images: imageResult.images.map(toDocumentImage) }; + } catch (err) { + request.onImageExtractionError?.(err); + return { text, images: [] }; + } + } finally { + pdf.destroy(); + } } export function createPdfDocumentExtractor(): DocumentExtractorPlugin { diff --git a/extensions/document-extract/package.json b/extensions/document-extract/package.json index 016de3b10310..6d4090f223e4 100644 --- a/extensions/document-extract/package.json +++ b/extensions/document-extract/package.json @@ -5,19 +5,11 @@ "description": "OpenClaw local document extraction plugin", "type": "module", "dependencies": { - "pdfjs-dist": "5.7.284" + "clawpdf": "0.2.0" }, "devDependencies": { "@openclaw/plugin-sdk": "workspace:*" }, - "peerDependencies": { - "@napi-rs/canvas": "^0.1.89" - }, - "peerDependenciesMeta": { - "@napi-rs/canvas": { - "optional": true - } - }, "openclaw": { "extensions": [ "./index.ts" diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json index 017f6d824ecc..c35274b05249 100644 --- a/npm-shrinkwrap.json +++ b/npm-shrinkwrap.json @@ -28,6 +28,7 @@ "@silvia-odwyer/photon-node": "0.3.4", "chalk": "5.6.2", "chokidar": "5.0.0", + "clawpdf": "0.2.0", "commander": "14.0.3", "croner": "10.0.1", "cross-spawn": "7.0.6", @@ -53,7 +54,6 @@ "node-edge-tts": "1.2.10", "openai": "6.39.0", "partial-json": "0.1.7", - "pdfjs-dist": "5.7.284", "playwright-core": "1.60.0", "proper-lockfile": "4.1.2", "qrcode": "1.5.4", @@ -900,256 +900,6 @@ "node": ">=14.0.0" } }, - "node_modules/@napi-rs/canvas": { - "version": "0.1.100", - "resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.100.tgz", - "integrity": "sha512-xglYA6q3XO5P3BNJYxVZ1IV7DLVjp1Py6nwag88YntrS+3vKHyYcMqXVS4ZztJmwz2uGvz1FWhI/4LgbR5uQDA==", - "license": "MIT", - "optional": true, - "workspaces": [ - "e2e/*" - ], - "engines": { - "node": ">= 10" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/Brooooooklyn" - }, - "optionalDependencies": { - "@napi-rs/canvas-android-arm64": "0.1.100", - "@napi-rs/canvas-darwin-arm64": "0.1.100", - "@napi-rs/canvas-darwin-x64": "0.1.100", - "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.100", - "@napi-rs/canvas-linux-arm64-gnu": "0.1.100", - "@napi-rs/canvas-linux-arm64-musl": "0.1.100", - "@napi-rs/canvas-linux-riscv64-gnu": "0.1.100", - "@napi-rs/canvas-linux-x64-gnu": "0.1.100", - "@napi-rs/canvas-linux-x64-musl": "0.1.100", - "@napi-rs/canvas-win32-arm64-msvc": "0.1.100", - "@napi-rs/canvas-win32-x64-msvc": "0.1.100" - } - }, - "node_modules/@napi-rs/canvas-android-arm64": { - "version": "0.1.100", - "resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.100.tgz", - "integrity": "sha512-hjhCKhntPv9+t4ckHymdx0phYNcVW+GKQR6Lzw2zE+pOVjOplSmtx9nNNknTjbEDLcuLZqA1y8ufKg1XfgftzQ==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">= 10" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/Brooooooklyn" - } - }, - "node_modules/@napi-rs/canvas-darwin-arm64": { - "version": "0.1.100", - "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.100.tgz", - "integrity": "sha512-2PcswRaC7Ly645DGt88///zuFDhJxJYdKAs1uU3mfk1atYkXufgcgLfBpk6Tm12nCQBaNt1wpybuPZ4qOhTo8A==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">= 10" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/Brooooooklyn" - } - }, - "node_modules/@napi-rs/canvas-darwin-x64": { - "version": "0.1.100", - "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.100.tgz", - "integrity": "sha512-ePNZtj7pNIva/siZMg+HmbeozkIjqUIYdoymH8HaA3qK7LfzFN4WMBM8G6HQ9ZC+H3+Dnn5pqtiXpgLykaPOhw==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">= 10" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/Brooooooklyn" - } - }, - "node_modules/@napi-rs/canvas-linux-arm-gnueabihf": { - "version": "0.1.100", - "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.100.tgz", - "integrity": "sha512-d5cDB48oWFGU8/XPhUOFAlySgb/VAu7D+s8fi55K1Pcfg8aPplHWqMgibhVLU8ky7Pyg/fuiVLz4Nf3JrSTuUA==", - "cpu": [ - "arm" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/Brooooooklyn" - } - }, - "node_modules/@napi-rs/canvas-linux-arm64-gnu": { - "version": "0.1.100", - "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.100.tgz", - "integrity": "sha512-rDxgxRu69RvDlX/bh9o22DxLsGr8EqsNgotL9+RwQE1S0b0cqeatqsw6aW45mukm0B42DIAaAacKaYQ8cqS1nw==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/Brooooooklyn" - } - }, - "node_modules/@napi-rs/canvas-linux-arm64-musl": { - "version": "0.1.100", - "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.100.tgz", - "integrity": "sha512-K3mDW66N+xT2/V439u1alFANiBUjdEx2gLiNYnCmUsva5jZMxWTjafBYwTzYK+EMFMHrUoabuU+T1BIP5CgbYQ==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/Brooooooklyn" - } - }, - "node_modules/@napi-rs/canvas-linux-riscv64-gnu": { - "version": "0.1.100", - "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.100.tgz", - "integrity": "sha512-mooqUBTIsccZpnoQC4NgrC1v6C1vof39etLNMnBwCY+p0gajWJvAHLGQ6g/gGyS5YrpDW+GefSN4+Cvcr08UWw==", - "cpu": [ - "riscv64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/Brooooooklyn" - } - }, - "node_modules/@napi-rs/canvas-linux-x64-gnu": { - "version": "0.1.100", - "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.100.tgz", - "integrity": "sha512-1eCvkDCazm7FFhsT7DfGOdSaHgZVK3bt/dSBl5EWHOWmnz+I7j8tPseJqqD81NF+MH21jKUK4wQSDjN0mdhnTg==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/Brooooooklyn" - } - }, - "node_modules/@napi-rs/canvas-linux-x64-musl": { - "version": "0.1.100", - "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.100.tgz", - "integrity": "sha512-20arT6lnI19S68qNlii73TSEDbECNgzMz2EpldC1V3mZFuRkeujXkcebRk0LRJe9SEUAooYiLokfMViY8IX7yA==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/Brooooooklyn" - } - }, - "node_modules/@napi-rs/canvas-win32-arm64-msvc": { - "version": "0.1.100", - "resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-arm64-msvc/-/canvas-win32-arm64-msvc-0.1.100.tgz", - "integrity": "sha512-DZFFT1wIAg37LJw37yhMRFfjATd3vTQzjZ1Yki8u2vhO6Hi5VE6BVaGQ1aaDu7xb4iMErz+9EOwjpS7xcxFeBw==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">= 10" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/Brooooooklyn" - } - }, - "node_modules/@napi-rs/canvas-win32-x64-msvc": { - "version": "0.1.100", - "resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.100.tgz", - "integrity": "sha512-MyT1j3mHC2+Lu4pBi9mKyMJhtP6U7k7EldY7sj/uS5gJA65gTXt8MefJQXLJo5d/vZbuWmfxzkEUNc/urV3pHA==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">= 10" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/Brooooooklyn" - } - }, "node_modules/@openclaw/fs-safe": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/@openclaw/fs-safe/-/fs-safe-0.3.0.tgz", @@ -1512,6 +1262,15 @@ "node": ">=18" } }, + "node_modules/clawpdf": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/clawpdf/-/clawpdf-0.2.0.tgz", + "integrity": "sha512-Za4HD3CMRHNqOXOOyVJiQLnEuezRZR/oXiBzraTwL5XEQZuBwFxnyC1UzN4AjQWV2JrLN3ItbzfPRGE0gGOVwg==", + "license": "MIT", + "engines": { + "node": ">=20" + } + }, "node_modules/cliui": { "version": "8.0.1", "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", @@ -3204,18 +2963,6 @@ "url": "https://opencollective.com/express" } }, - "node_modules/pdfjs-dist": { - "version": "5.7.284", - "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.7.284.tgz", - "integrity": "sha512-h4EdYQczmGhbOlqc3PPZwxevn7ApdWPbovAuWXOB/DjIyigSnwfy2oze7c6mRcSr9XgLp3eN3EeL4DyySTPMFw==", - "license": "Apache-2.0", - "engines": { - "node": ">=22.13.0 || >=24" - }, - "optionalDependencies": { - "@napi-rs/canvas": "^0.1.100" - } - }, "node_modules/pkce-challenge": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.1.tgz", diff --git a/package.json b/package.json index 316e606151ff..1500702fbacd 100644 --- a/package.json +++ b/package.json @@ -1834,6 +1834,7 @@ "@openclaw/proxyline": "0.3.3", "@silvia-odwyer/photon-node": "0.3.4", "chalk": "5.6.2", + "clawpdf": "0.2.0", "chokidar": "5.0.0", "commander": "14.0.3", "croner": "10.0.1", @@ -1860,7 +1861,6 @@ "node-edge-tts": "1.2.10", "openai": "6.39.0", "partial-json": "0.1.7", - "pdfjs-dist": "5.7.284", "playwright-core": "1.60.0", "proper-lockfile": "4.1.2", "qrcode": "1.5.4", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a10fc669ae04..3a8a58a1da78 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -95,6 +95,9 @@ importers: chokidar: specifier: 5.0.0 version: 5.0.0 + clawpdf: + specifier: 0.2.0 + version: 0.2.0 commander: specifier: 14.0.3 version: 14.0.3 @@ -170,9 +173,6 @@ importers: partial-json: specifier: 0.1.7 version: 0.1.7 - pdfjs-dist: - specifier: 5.7.284 - version: 5.7.284 playwright-core: specifier: 1.60.0 version: 1.60.0 @@ -691,12 +691,9 @@ importers: extensions/document-extract: dependencies: - '@napi-rs/canvas': - specifier: ^0.1.89 - version: 0.1.99 - pdfjs-dist: - specifier: 5.7.284 - version: 5.7.284 + clawpdf: + specifier: 0.2.0 + version: 0.2.0 devDependencies: '@openclaw/plugin-sdk': specifier: workspace:* @@ -2962,156 +2959,6 @@ packages: resolution: {integrity: sha512-juG5VWh4qAivzTAeMzvY9xs9HY5rAcr2E4I7tiSSCokRFi7XIZCAu92ZkSTsIj1OPceCifL3cpfteP3pDT9/QQ==} engines: {node: '>=14.0.0'} - '@napi-rs/canvas-android-arm64@0.1.100': - resolution: {integrity: sha512-hjhCKhntPv9+t4ckHymdx0phYNcVW+GKQR6Lzw2zE+pOVjOplSmtx9nNNknTjbEDLcuLZqA1y8ufKg1XfgftzQ==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [android] - - '@napi-rs/canvas-android-arm64@0.1.99': - resolution: {integrity: sha512-9OCRt8VVxA17m32NWZKyNC2qamdaS/SC5CEOIQwFngRq0DIeVm4PDal+6Ljnhqm2whZiC63DNuKZ4xSp2nbj9w==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [android] - - '@napi-rs/canvas-darwin-arm64@0.1.100': - resolution: {integrity: sha512-2PcswRaC7Ly645DGt88///zuFDhJxJYdKAs1uU3mfk1atYkXufgcgLfBpk6Tm12nCQBaNt1wpybuPZ4qOhTo8A==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [darwin] - - '@napi-rs/canvas-darwin-arm64@0.1.99': - resolution: {integrity: sha512-lupMDMy1+H38dhyCcLirOKKVUyzzlxi7j7rGPLI3vViMHOoPjcXO1b10ivy+ad+q6MiwHfoLjKTCoLke5ySOBg==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [darwin] - - '@napi-rs/canvas-darwin-x64@0.1.100': - resolution: {integrity: sha512-ePNZtj7pNIva/siZMg+HmbeozkIjqUIYdoymH8HaA3qK7LfzFN4WMBM8G6HQ9ZC+H3+Dnn5pqtiXpgLykaPOhw==} - engines: {node: '>= 10'} - cpu: [x64] - os: [darwin] - - '@napi-rs/canvas-darwin-x64@0.1.99': - resolution: {integrity: sha512-fdz02t4w8n6Ii/rYhWig6STb/zcTmCC/6YZTGmjoDeidDwn9Wf0ukQVynhCPEs29vqUc66wHZKsuIgMs9tycCg==} - engines: {node: '>= 10'} - cpu: [x64] - os: [darwin] - - '@napi-rs/canvas-linux-arm-gnueabihf@0.1.100': - resolution: {integrity: sha512-d5cDB48oWFGU8/XPhUOFAlySgb/VAu7D+s8fi55K1Pcfg8aPplHWqMgibhVLU8ky7Pyg/fuiVLz4Nf3JrSTuUA==} - engines: {node: '>= 10'} - cpu: [arm] - os: [linux] - - '@napi-rs/canvas-linux-arm-gnueabihf@0.1.99': - resolution: {integrity: sha512-w4FwVwlNo00ezeRhfY62IVIyt6G3u8wodkPtiqWc52BUHx+VDBUM2vkS3ogfANaLI7hnf3s6WK4LyZVUjBg1lA==} - engines: {node: '>= 10'} - cpu: [arm] - os: [linux] - - '@napi-rs/canvas-linux-arm64-gnu@0.1.100': - resolution: {integrity: sha512-rDxgxRu69RvDlX/bh9o22DxLsGr8EqsNgotL9+RwQE1S0b0cqeatqsw6aW45mukm0B42DIAaAacKaYQ8cqS1nw==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [linux] - libc: [glibc] - - '@napi-rs/canvas-linux-arm64-gnu@0.1.99': - resolution: {integrity: sha512-8JvHeexKQ8c7g0q7YJ29NVQwnf1ePghP9ys9ZN0R0qzyqJQ9Uw6N9qnDINArlm3IYHexB7LjzArIfhQiqSDGvQ==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [linux] - libc: [glibc] - - '@napi-rs/canvas-linux-arm64-musl@0.1.100': - resolution: {integrity: sha512-K3mDW66N+xT2/V439u1alFANiBUjdEx2gLiNYnCmUsva5jZMxWTjafBYwTzYK+EMFMHrUoabuU+T1BIP5CgbYQ==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [linux] - libc: [musl] - - '@napi-rs/canvas-linux-arm64-musl@0.1.99': - resolution: {integrity: sha512-Z+6nyLdJXWzLPVxi4H6g9TJop4DwN3KSgHWto5JCbZV5/uKoVqcSynPs0tGlUHOoWI8S8tEvJspz51GQkvr07w==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [linux] - libc: [musl] - - '@napi-rs/canvas-linux-riscv64-gnu@0.1.100': - resolution: {integrity: sha512-mooqUBTIsccZpnoQC4NgrC1v6C1vof39etLNMnBwCY+p0gajWJvAHLGQ6g/gGyS5YrpDW+GefSN4+Cvcr08UWw==} - engines: {node: '>= 10'} - cpu: [riscv64] - os: [linux] - libc: [glibc] - - '@napi-rs/canvas-linux-riscv64-gnu@0.1.99': - resolution: {integrity: sha512-jAnfOUv4IO1l8Levk5t85oVtEBOXLa07KnIUgWo1CDlPxiqpxS3uBfiE38Lvj/CQgHaNF6Nxk/SaemwLgsVJgw==} - engines: {node: '>= 10'} - cpu: [riscv64] - os: [linux] - libc: [glibc] - - '@napi-rs/canvas-linux-x64-gnu@0.1.100': - resolution: {integrity: sha512-1eCvkDCazm7FFhsT7DfGOdSaHgZVK3bt/dSBl5EWHOWmnz+I7j8tPseJqqD81NF+MH21jKUK4wQSDjN0mdhnTg==} - engines: {node: '>= 10'} - cpu: [x64] - os: [linux] - libc: [glibc] - - '@napi-rs/canvas-linux-x64-gnu@0.1.99': - resolution: {integrity: sha512-mIkXw3fGmbYyFjSmfWEvty4jN+rwEOmv0+Dy9bRvvTzLYWCgm3RMgUEQVfAKFw96nIRFnyNZiK83KNQaVVFjng==} - engines: {node: '>= 10'} - cpu: [x64] - os: [linux] - libc: [glibc] - - '@napi-rs/canvas-linux-x64-musl@0.1.100': - resolution: {integrity: sha512-20arT6lnI19S68qNlii73TSEDbECNgzMz2EpldC1V3mZFuRkeujXkcebRk0LRJe9SEUAooYiLokfMViY8IX7yA==} - engines: {node: '>= 10'} - cpu: [x64] - os: [linux] - libc: [musl] - - '@napi-rs/canvas-linux-x64-musl@0.1.99': - resolution: {integrity: sha512-f3Uz2P0RgrtBHISxZqr6yiYXJlTDyCVBumDacxo+4AmSg7z0HiqYZKGWC/gszq3fbPhyQUya1W2AEteKxT9Y6A==} - engines: {node: '>= 10'} - cpu: [x64] - os: [linux] - libc: [musl] - - '@napi-rs/canvas-win32-arm64-msvc@0.1.100': - resolution: {integrity: sha512-DZFFT1wIAg37LJw37yhMRFfjATd3vTQzjZ1Yki8u2vhO6Hi5VE6BVaGQ1aaDu7xb4iMErz+9EOwjpS7xcxFeBw==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [win32] - - '@napi-rs/canvas-win32-arm64-msvc@0.1.99': - resolution: {integrity: sha512-XE6KUkfqRsCNejcoRMiMr3RaUeObxNf6y7dut3hrq2rn7PzfRTZgrjF1F/B2C7FcdgqY/vSHWpQeMuNz1vTNHg==} - engines: {node: '>= 10'} - cpu: [arm64] - os: [win32] - - '@napi-rs/canvas-win32-x64-msvc@0.1.100': - resolution: {integrity: sha512-MyT1j3mHC2+Lu4pBi9mKyMJhtP6U7k7EldY7sj/uS5gJA65gTXt8MefJQXLJo5d/vZbuWmfxzkEUNc/urV3pHA==} - engines: {node: '>= 10'} - cpu: [x64] - os: [win32] - - '@napi-rs/canvas-win32-x64-msvc@0.1.99': - resolution: {integrity: sha512-plMYGVbc/vmmPF9MtmHbwNk1rL1Aj53vQZt+Gnv1oZn6gmd9jEHHJ0n9Nd2nxa5sKH7TS5IjkCDM6289O0d6PQ==} - engines: {node: '>= 10'} - cpu: [x64] - os: [win32] - - '@napi-rs/canvas@0.1.100': - resolution: {integrity: sha512-xglYA6q3XO5P3BNJYxVZ1IV7DLVjp1Py6nwag88YntrS+3vKHyYcMqXVS4ZztJmwz2uGvz1FWhI/4LgbR5uQDA==} - engines: {node: '>= 10'} - - '@napi-rs/canvas@0.1.99': - resolution: {integrity: sha512-zN4eQlK3eBf7aJBcTHZilpBH3tDekBzPMIWC8r0s94Ecl73XfOyFi4w7yKFMRVUT0lvNQjtOL8YSrwqQj6mZFg==} - engines: {node: '>= 10'} - '@napi-rs/wasm-runtime@1.1.4': resolution: {integrity: sha512-3NQNNgA1YSlJb/kMH1ildASP9HW7/7kYnRI2szWJaofaS1hWmbGI4H+d3+22aGzXXN9IJ+n+GiFVcGipJP18ow==} peerDependencies: @@ -4674,6 +4521,10 @@ packages: cjs-module-lexer@2.2.0: resolution: {integrity: sha512-4bHTS2YuzUvtoLjdy+98ykbNB5jS0+07EvFNXerqZQJ89F7DI6ET7OQo/HJuW6K0aVsKA9hj9/RVb2kQVOrPDQ==} + clawpdf@0.2.0: + resolution: {integrity: sha512-Za4HD3CMRHNqOXOOyVJiQLnEuezRZR/oXiBzraTwL5XEQZuBwFxnyC1UzN4AjQWV2JrLN3ItbzfPRGE0gGOVwg==} + engines: {node: '>=20'} + cli-table3@0.6.5: resolution: {integrity: sha512-+W/5efTR7y5HRD7gACw9yQjqMVvEMLBHmboM/kPWam+H+Hmyrgjh6YncVKK122YZkXrLudzTuAukUw9FnMf7IQ==} engines: {node: 10.* || >= 12.*} @@ -6224,10 +6075,6 @@ packages: pathe@2.0.3: resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==} - pdfjs-dist@5.7.284: - resolution: {integrity: sha512-h4EdYQczmGhbOlqc3PPZwxevn7ApdWPbovAuWXOB/DjIyigSnwfy2oze7c6mRcSr9XgLp3eN3EeL4DyySTPMFw==} - engines: {node: '>=22.13.0 || >=24'} - picocolors@1.1.1: resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==} @@ -8704,101 +8551,6 @@ snapshots: '@mozilla/readability@0.6.0': {} - '@napi-rs/canvas-android-arm64@0.1.100': - optional: true - - '@napi-rs/canvas-android-arm64@0.1.99': - optional: true - - '@napi-rs/canvas-darwin-arm64@0.1.100': - optional: true - - '@napi-rs/canvas-darwin-arm64@0.1.99': - optional: true - - '@napi-rs/canvas-darwin-x64@0.1.100': - optional: true - - '@napi-rs/canvas-darwin-x64@0.1.99': - optional: true - - '@napi-rs/canvas-linux-arm-gnueabihf@0.1.100': - optional: true - - '@napi-rs/canvas-linux-arm-gnueabihf@0.1.99': - optional: true - - '@napi-rs/canvas-linux-arm64-gnu@0.1.100': - optional: true - - '@napi-rs/canvas-linux-arm64-gnu@0.1.99': - optional: true - - '@napi-rs/canvas-linux-arm64-musl@0.1.100': - optional: true - - '@napi-rs/canvas-linux-arm64-musl@0.1.99': - optional: true - - '@napi-rs/canvas-linux-riscv64-gnu@0.1.100': - optional: true - - '@napi-rs/canvas-linux-riscv64-gnu@0.1.99': - optional: true - - '@napi-rs/canvas-linux-x64-gnu@0.1.100': - optional: true - - '@napi-rs/canvas-linux-x64-gnu@0.1.99': - optional: true - - '@napi-rs/canvas-linux-x64-musl@0.1.100': - optional: true - - '@napi-rs/canvas-linux-x64-musl@0.1.99': - optional: true - - '@napi-rs/canvas-win32-arm64-msvc@0.1.100': - optional: true - - '@napi-rs/canvas-win32-arm64-msvc@0.1.99': - optional: true - - '@napi-rs/canvas-win32-x64-msvc@0.1.100': - optional: true - - '@napi-rs/canvas-win32-x64-msvc@0.1.99': - optional: true - - '@napi-rs/canvas@0.1.100': - optionalDependencies: - '@napi-rs/canvas-android-arm64': 0.1.100 - '@napi-rs/canvas-darwin-arm64': 0.1.100 - '@napi-rs/canvas-darwin-x64': 0.1.100 - '@napi-rs/canvas-linux-arm-gnueabihf': 0.1.100 - '@napi-rs/canvas-linux-arm64-gnu': 0.1.100 - '@napi-rs/canvas-linux-arm64-musl': 0.1.100 - '@napi-rs/canvas-linux-riscv64-gnu': 0.1.100 - '@napi-rs/canvas-linux-x64-gnu': 0.1.100 - '@napi-rs/canvas-linux-x64-musl': 0.1.100 - '@napi-rs/canvas-win32-arm64-msvc': 0.1.100 - '@napi-rs/canvas-win32-x64-msvc': 0.1.100 - optional: true - - '@napi-rs/canvas@0.1.99': - optionalDependencies: - '@napi-rs/canvas-android-arm64': 0.1.99 - '@napi-rs/canvas-darwin-arm64': 0.1.99 - '@napi-rs/canvas-darwin-x64': 0.1.99 - '@napi-rs/canvas-linux-arm-gnueabihf': 0.1.99 - '@napi-rs/canvas-linux-arm64-gnu': 0.1.99 - '@napi-rs/canvas-linux-arm64-musl': 0.1.99 - '@napi-rs/canvas-linux-riscv64-gnu': 0.1.99 - '@napi-rs/canvas-linux-x64-gnu': 0.1.99 - '@napi-rs/canvas-linux-x64-musl': 0.1.99 - '@napi-rs/canvas-win32-arm64-msvc': 0.1.99 - '@napi-rs/canvas-win32-x64-msvc': 0.1.99 - '@napi-rs/wasm-runtime@1.1.4(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0)': dependencies: '@emnapi/core': 1.10.0 @@ -10307,6 +10059,8 @@ snapshots: cjs-module-lexer@2.2.0: {} + clawpdf@0.2.0: {} + cli-table3@0.6.5: dependencies: string-width: 4.2.3 @@ -12178,10 +11932,6 @@ snapshots: pathe@2.0.3: {} - pdfjs-dist@5.7.284: - optionalDependencies: - '@napi-rs/canvas': 0.1.100 - picocolors@1.1.1: {} picomatch@2.3.2: {} diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index d777a8b07e8d..bb798f1a0614 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -54,6 +54,7 @@ minimumReleaseAgeExclude: - "sqlite-vec" - "sqlite-vec-*" - "rastermill" + - "clawpdf" nodeLinker: hoisted blockExoticSubdeps: true @@ -89,7 +90,6 @@ allowBuilds: "@google/genai": true "@lydell/node-pty": true "@matrix-org/matrix-sdk-crypto-nodejs": true - "@napi-rs/canvas": true "@tloncorp/api": true "@tloncorp/tlon-skill": true baileys: true @@ -103,6 +103,7 @@ allowBuilds: tree-sitter-bash: false openclaw: true "@openclaw/proxyline": true + clawpdf: true rastermill: true packageExtensions: diff --git a/scripts/lib/dependency-ownership.json b/scripts/lib/dependency-ownership.json index 1781a6d0eed8..ae796a764852 100644 --- a/scripts/lib/dependency-ownership.json +++ b/scripts/lib/dependency-ownership.json @@ -32,15 +32,6 @@ "activation": ["tools.web.fetch.readability", "plugins.entries.web-readability.enabled"], "risk": ["parser", "untrusted-html"] }, - "@napi-rs/canvas": { - "owner": "plugin:document-extract", - "class": "optional-peer-runtime", - "activation": [ - "input_file.application_pdf.image_fallback", - "plugins.entries.document-extract.enabled" - ], - "risk": ["native", "parser", "untrusted-files"] - }, "ajv": { "owner": "core:json-schema-validation", "class": "core-runtime", @@ -137,11 +128,11 @@ "class": "core-runtime", "risk": ["browser-automation", "cdp"] }, - "pdfjs-dist": { + "clawpdf": { "owner": "plugin:document-extract", "class": "plugin-runtime", "activation": ["input_file.application_pdf", "plugins.entries.document-extract.enabled"], - "risk": ["parser", "untrusted-files"] + "risk": ["wasm", "parser", "untrusted-files"] }, "proxy-agent": { "owner": "core:proxy", diff --git a/src/types/pdfjs-dist-legacy.d.ts b/src/types/pdfjs-dist-legacy.d.ts deleted file mode 100644 index 1657b3246c25..000000000000 --- a/src/types/pdfjs-dist-legacy.d.ts +++ /dev/null @@ -1,26 +0,0 @@ -declare module "pdfjs-dist/legacy/build/pdf.mjs" { - import type { - DocumentInitParameters, - PDFDocumentLoadingTask, - TypedArray, - } from "pdfjs-dist/types/src/display/api.js"; - - export type LegacyDocumentInitParameters = DocumentInitParameters & { - disableWorker?: boolean; - }; - - export function getDocument( - src?: string | URL | TypedArray | ArrayBuffer | LegacyDocumentInitParameters, - ): PDFDocumentLoadingTask; - - export type { - DocumentInitParameters, - PDFDocumentLoadingTask, - PDFDocumentProxy, - PDFPageProxy, - TextContent, - TextItem, - TypedArray, - } from "pdfjs-dist/types/src/display/api.js"; - export type { PageViewport } from "pdfjs-dist/types/src/display/display_utils.js"; -} diff --git a/test/scripts/root-dependency-ownership-audit.test.ts b/test/scripts/root-dependency-ownership-audit.test.ts index 66651cc8c537..c01adad1bc22 100644 --- a/test/scripts/root-dependency-ownership-audit.test.ts +++ b/test/scripts/root-dependency-ownership-audit.test.ts @@ -45,20 +45,15 @@ describe("collectModuleSpecifiers", () => { expect([ ...collectModuleSpecifiers(` const READABILITY_MODULE = "@mozilla/readability"; - const PDFJS_MODULE = "pdfjs-dist/legacy/build/pdf.mjs"; + const CLAWPDF_MODULE = "clawpdf"; const CIAO_MODULE_ID = "@homebridge/ciao"; let SQLITE_VEC_MODULE_ID = "sqlite-vec"; import(READABILITY_MODULE); - import(PDFJS_MODULE); + import(CLAWPDF_MODULE); require(CIAO_MODULE_ID); require.resolve(SQLITE_VEC_MODULE_ID); `), - ]).toEqual([ - "@mozilla/readability", - "pdfjs-dist/legacy/build/pdf.mjs", - "@homebridge/ciao", - "sqlite-vec", - ]); + ]).toEqual(["@mozilla/readability", "clawpdf", "@homebridge/ciao", "sqlite-vec"]); }); }); @@ -154,15 +149,15 @@ describe("collectRootDependencyOwnershipCheckErrors", () => { writeRepoFile( repoRoot, "package.json", - JSON.stringify({ dependencies: { "pdfjs-dist": "^5.0.0", "sqlite-vec": "0.1.9" } }), + JSON.stringify({ dependencies: { clawpdf: "^0.2.0", "sqlite-vec": "0.1.9" } }), ); writeRepoFile( repoRoot, "src/media/pdf-extract.ts", ` - const PDFJS_MODULE = "pdfjs-dist/legacy/build/pdf.mjs"; + const CLAWPDF_MODULE = "clawpdf"; export async function loadPdf() { - return import(PDFJS_MODULE); + return import(CLAWPDF_MODULE); } `, ); @@ -186,13 +181,13 @@ describe("collectRootDependencyOwnershipCheckErrors", () => { { category: "core_runtime", declaredInExtensions: [], - depName: "pdfjs-dist", + depName: "clawpdf", fileCount: 1, internalizedBundledRuntimeOwners: [], recommendation: "keep at root", sampleFiles: ["src/media/pdf-extract.ts"], sections: ["src"], - spec: "^5.0.0", + spec: "^0.2.0", }, { category: "core_runtime",