ci: report memory metrics

2026-06-06 05:51:15 +08:00 · 2026-05-25 22:49:05 +01:00
parent 5b6d03e3e2
commit 3b0805414e
7 changed files with 439 additions and 63 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -513,7 +513,24 @@ jobs:
        run: pnpm test:build:singleton

      - name: Check CLI startup memory
-        run: pnpm test:startup:memory
+        shell: bash
+        run: |
+          set +e
+          pnpm test:startup:memory
+          status=$?
+          if [[ -f .artifacts/startup-memory/summary.md ]]; then
+            cat .artifacts/startup-memory/summary.md >> "$GITHUB_STEP_SUMMARY"
+          fi
+          exit "$status"
+
+      - name: Upload startup memory report
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: startup-memory
+          path: .artifacts/startup-memory/
+          if-no-files-found: ignore
+          retention-days: 7

      - name: Run built artifact checks
        id: built_artifact_checks
--- a/.github/workflows/openclaw-performance.yml
+++ b/.github/workflows/openclaw-performance.yml
@@ -348,6 +348,41 @@ jobs:
            exit "$status"
          fi

+      - name: Fetch previous source performance baseline
+        if: ${{ steps.lane.outputs.run == 'true' && matrix.lane == 'mock-provider' && steps.clawgrit.outputs.present == 'true' }}
+        env:
+          CLAWGRIT_REPORTS_TOKEN: ${{ secrets.CLAWGRIT_REPORTS_TOKEN }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          reports_root=".artifacts/clawgrit-baseline"
+          mkdir -p "$reports_root"
+          git -C "$reports_root" init -b main
+          git -C "$reports_root" remote add origin "https://x-access-token:${CLAWGRIT_REPORTS_TOKEN}@github.com/openclaw/clawgrit-reports.git"
+          if ! git -C "$reports_root" fetch --depth=1 origin main; then
+            echo "No previous source performance baseline could be fetched." >> "$GITHUB_STEP_SUMMARY"
+            exit 0
+          fi
+          git -C "$reports_root" checkout -B main FETCH_HEAD
+          ref_slug="$(printf '%s' "${TESTED_REF}" | tr -c 'A-Za-z0-9._-' '-')"
+          pointer="${reports_root}/openclaw-performance/${ref_slug}/latest-mock-provider.json"
+          if [[ ! -f "$pointer" ]]; then
+            echo "No previous source performance baseline exists for ${TESTED_REF}." >> "$GITHUB_STEP_SUMMARY"
+            exit 0
+          fi
+          if ! latest_path="$(node -e "const fs=require('node:fs'); const data=JSON.parse(fs.readFileSync(process.argv[1],'utf8')); const value=String(data.path || ''); if (!/^openclaw-performance\\/[A-Za-z0-9._-]+\\/[0-9]+-[0-9]+\\/mock-provider$/u.test(value)) process.exit(1); process.stdout.write(value);" "$pointer")"; then
+            echo "Previous source performance baseline pointer is invalid." >> "$GITHUB_STEP_SUMMARY"
+            exit 0
+          fi
+          baseline_source="${reports_root}/${latest_path}/source"
+          if [[ -d "$baseline_source" ]]; then
+            baseline_source="$(realpath "$baseline_source")"
+            echo "SOURCE_PERF_BASELINE_DIR=$baseline_source" >> "$GITHUB_ENV"
+            echo "Using source performance baseline: ${latest_path}/source" >> "$GITHUB_STEP_SUMMARY"
+          else
+            echo "Previous source performance baseline has no source directory." >> "$GITHUB_STEP_SUMMARY"
+          fi
+
      - name: Run OpenClaw source performance probes
        if: ${{ steps.lane.outputs.run == 'true' && matrix.lane == 'mock-provider' }}
        shell: bash
@@ -359,7 +394,7 @@ jobs:
          fi

          mkdir -p "$SOURCE_PERF_DIR/mock-hello"
-          if ! node -e "const fs=require('node:fs'); const scripts=require('./package.json').scripts||{}; process.exit(scripts['test:gateway:cpu-scenarios'] && scripts.openclaw && fs.existsSync('scripts/bench-cli-startup.ts') ? 0 : 1)"; then
+          if ! node -e "const fs=require('node:fs'); const scripts=require('./package.json').scripts||{}; process.exit(scripts['test:gateway:cpu-scenarios'] && scripts['test:extensions:memory'] && scripts.openclaw && fs.existsSync('scripts/bench-cli-startup.ts') && fs.existsSync('scripts/profile-extension-memory.mjs') ? 0 : 1)"; then
            cat > "$SOURCE_PERF_DIR/index.md" <<EOF
          # OpenClaw Source Performance

@@ -371,7 +406,7 @@ jobs:

          - Tested ref: ${TESTED_REF}
          - Tested SHA: ${TESTED_SHA}
-          - Required scripts: test:gateway:cpu-scenarios, openclaw, scripts/bench-cli-startup.ts
+          - Required scripts: test:gateway:cpu-scenarios, test:extensions:memory, openclaw, scripts/bench-cli-startup.ts, scripts/profile-extension-memory.mjs
          EOF
            cat "$SOURCE_PERF_DIR/index.md" >> "$GITHUB_STEP_SUMMARY"
            exit 0
@@ -391,6 +426,9 @@ jobs:
            --startup-case fiftyPlugins \
            --startup-case fiftyStartupLazyPlugins

+          pnpm test:extensions:memory \
+            -- --json "$SOURCE_PERF_DIR/extension-memory.json"
+
          for run_index in $(seq 1 "$source_runs"); do
            run_dir="$SOURCE_PERF_DIR/mock-hello/run-$(printf '%03d' "$run_index")"
            pnpm openclaw qa suite \
@@ -460,9 +498,13 @@ jobs:
          cleanup_gateway
          trap - EXIT

-          node "$PERFORMANCE_HELPER_DIR/scripts/openclaw-performance-source-summary.mjs" \
+          summary_args=(node "$PERFORMANCE_HELPER_DIR/scripts/openclaw-performance-source-summary.mjs" \
            --source-dir "$SOURCE_PERF_DIR" \
-            --output "$SOURCE_PERF_DIR/index.md"
+            --output "$SOURCE_PERF_DIR/index.md")
+          if [[ -n "${SOURCE_PERF_BASELINE_DIR:-}" && -d "$SOURCE_PERF_BASELINE_DIR" ]]; then
+            summary_args+=(--baseline-source-dir "$SOURCE_PERF_BASELINE_DIR")
+          fi
+          "${summary_args[@]}"

          cat "$SOURCE_PERF_DIR/index.md" >> "$GITHUB_STEP_SUMMARY"

--- a/docs/ci.md
+++ b/docs/ci.md
@@ -43,7 +43,7 @@ OpenClaw CI runs on every push to `main` and every pull request. The `preflight`

 GitHub may mark superseded jobs as `cancelled` when a newer push lands on the same PR or `main` ref. Treat that as CI noise unless the newest run for the same ref is also failing. Matrix jobs use `fail-fast: false`, and `build-artifacts` reports embedded channel, core-support-boundary, and gateway-watch failures directly instead of queuing tiny verifier jobs. The automatic CI concurrency key is versioned (`CI-v7-*`) so a GitHub-side zombie in an old queue group cannot indefinitely block newer main runs. Manual full-suite runs use `CI-manual-v1-*` and do not cancel in-progress runs.

-The `ci-timings-summary` job uploads a compact `ci-timings-summary` artifact for each non-draft CI run. It records wall time, queue time, slowest jobs, and failed jobs for the current run, so CI health checks do not need to scrape the full Actions payload repeatedly.
+The `ci-timings-summary` job uploads a compact `ci-timings-summary` artifact for each non-draft CI run. It records wall time, queue time, slowest jobs, and failed jobs for the current run, so CI health checks do not need to scrape the full Actions payload repeatedly. The `build-artifacts` job also runs the blocking startup-memory smoke and uploads a `startup-memory` artifact with per-command RSS values for `--help`, `status --json`, and `gateway status`.

 ## Real behavior proof

@@ -157,6 +157,8 @@ node scripts/ci-run-timings.mjs --latest-main # ignore issue/comment noise and c
 node scripts/ci-run-timings.mjs --recent 10   # compare recent successful main CI runs
 pnpm test:perf:groups --full-suite --allow-failures --output .artifacts/test-perf/baseline-before.json
 pnpm test:perf:groups:compare .artifacts/test-perf/baseline-before.json .artifacts/test-perf/after-agent.json
+pnpm test:startup:memory
+pnpm test:extensions:memory -- --json .artifacts/openclaw-performance/source/mock-provider/extension-memory.json
 pnpm perf:kova:summary --report .artifacts/kova/reports/mock-provider/report.json --output .artifacts/kova/summary.md
 ```

@@ -178,7 +180,7 @@ The workflow installs OCM from a pinned release and Kova from `openclaw/Kova` at
 - `mock-deep-profile`: CPU/heap/trace profiling for startup, gateway, and agent-turn hotspots.
 - `live-openai-candidate`: a real OpenAI `openai/gpt-5.5` agent turn, skipped when `OPENAI_API_KEY` is unavailable.

-The mock-provider lane also runs OpenClaw-native source probes after the Kova pass: gateway boot timing and memory across default, hook, and 50-plugin startup cases; repeated mock-OpenAI `channel-chat-baseline` hello loops; and CLI startup commands against the booted gateway. The source probe Markdown summary lives at `source/index.md` in the report bundle, with raw JSON beside it.
+The mock-provider lane also runs OpenClaw-native source probes after the Kova pass: gateway boot timing and memory across default, hook, and 50-plugin startup cases; bundled plugin import RSS, repeated mock-OpenAI `channel-chat-baseline` hello loops, and CLI startup commands against the booted gateway. When the previous published mock-provider source report is available for the tested ref, the source summary compares current RSS and heap values against that baseline and marks large RSS increases as `watch`. The source probe Markdown summary lives at `source/index.md` in the report bundle, with raw JSON beside it.

 Every lane uploads GitHub artifacts. When `CLAWGRIT_REPORTS_TOKEN` is configured, the workflow also commits `report.json`, `report.md`, bundles, `index.md`, and source-probe artifacts into `openclaw/clawgrit-reports` under `openclaw-performance/<tested-ref>/<run-id>-<attempt>/<lane>/`. The current tested-ref pointer is written as `openclaw-performance/<tested-ref>/latest-<lane>.json`.

@@ -503,7 +505,7 @@ The `Docs Agent` workflow is an event-driven Codex maintenance lane for keeping

 ### Test Performance Agent

-The `Test Performance Agent` workflow is an event-driven Codex maintenance lane for slow tests. It has no pure schedule: a successful non-bot push CI run on `main` can trigger it, but it skips if another workflow-run invocation already ran or is running that UTC day. Manual dispatch bypasses that daily activity gate. The lane builds a full-suite grouped Vitest performance report, lets Codex make only small coverage-preserving test performance fixes instead of broad refactors, then reruns the full-suite report and rejects changes that reduce the passing baseline test count. If the baseline has failing tests, Codex may fix only obvious failures and the after-agent full-suite report must pass before anything is committed. When `main` advances before the bot push lands, the lane rebases the validated patch, reruns `pnpm check:changed`, and retries the push; conflicting stale patches are skipped. It uses GitHub-hosted Ubuntu so the Codex action can keep the same drop-sudo safety posture as the docs agent.
+The `Test Performance Agent` workflow is an event-driven Codex maintenance lane for slow tests. It has no pure schedule: a successful non-bot push CI run on `main` can trigger it, but it skips if another workflow-run invocation already ran or is running that UTC day. Manual dispatch bypasses that daily activity gate. The lane builds a full-suite grouped Vitest performance report, lets Codex make only small coverage-preserving test performance fixes instead of broad refactors, then reruns the full-suite report and rejects changes that reduce the passing baseline test count. The grouped report records per-config wall time and max RSS on Linux and macOS, so the before/after comparison surfaces test memory deltas beside duration deltas. If the baseline has failing tests, Codex may fix only obvious failures and the after-agent full-suite report must pass before anything is committed. When `main` advances before the bot push lands, the lane rebases the validated patch, reruns `pnpm check:changed`, and retries the push; conflicting stale patches are skipped. It uses GitHub-hosted Ubuntu so the Codex action can keep the same drop-sudo safety posture as the docs agent.

 ### Duplicate PRs After Merge

--- a/scripts/check-cli-startup-memory.mjs
+++ b/scripts/check-cli-startup-memory.mjs
@@ -1,7 +1,7 @@
 #!/usr/bin/env node

 import { spawnSync } from "node:child_process";
-import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
 import os from "node:os";
 import path from "node:path";

@@ -19,6 +19,46 @@ const tmpDir = process.env.TMPDIR || process.env.TEMP || process.env.TMP || os.t
 const rssHookPath = path.join(tmpHome, "measure-rss.mjs");
 const MAX_RSS_MARKER = "__OPENCLAW_MAX_RSS_KB__=";

+function parseArgs(argv) {
+  const options = {
+    jsonPath:
+      process.env.OPENCLAW_STARTUP_MEMORY_JSON_PATH ||
+      path.join(repoRoot, ".artifacts", "startup-memory", "startup-memory.json"),
+    summaryPath:
+      process.env.OPENCLAW_STARTUP_MEMORY_SUMMARY_PATH ||
+      path.join(repoRoot, ".artifacts", "startup-memory", "summary.md"),
+  };
+  for (let index = 0; index < argv.length; index += 1) {
+    const arg = argv[index];
+    if (arg === "--json") {
+      const value = argv[index + 1];
+      if (!value) {
+        throw new Error("--json requires a path");
+      }
+      options.jsonPath = path.resolve(value);
+      index += 1;
+      continue;
+    }
+    if (arg === "--summary") {
+      const value = argv[index + 1];
+      if (!value) {
+        throw new Error("--summary requires a path");
+      }
+      options.summaryPath = path.resolve(value);
+      index += 1;
+      continue;
+    }
+    if (arg === "--help") {
+      console.log(
+        "Usage: node scripts/check-cli-startup-memory.mjs [--json <path>] [--summary <path>]",
+      );
+      process.exit(0);
+    }
+    throw new Error(`Unknown option: ${arg}`);
+  }
+  return options;
+}
+
 writeFileSync(
  rssHookPath,
  [
@@ -97,6 +137,14 @@ function parseMaxRssMb(stderr) {
  return Number(lastMatch[1]) / 1024;
 }

+function formatMb(value) {
+  return typeof value === "number" && Number.isFinite(value) ? `${value.toFixed(1)} MB` : "n/a";
+}
+
+function formatCaseCommand(testCase) {
+  return `node ${testCase.args.join(" ")}`;
+}
+
 function buildBenchEnv() {
  const env = {
    HOME: tmpHome,
@@ -143,42 +191,104 @@ function runCase(testCase) {
  const stderr = result.stderr ?? "";
  const maxRssMb = parseMaxRssMb(stderr);
  const matrixBootstrapWarning = /matrix: crypto runtime bootstrap failed/i.test(stderr);
+  const report = {
+    id: testCase.id,
+    label: testCase.label,
+    command: formatCaseCommand(testCase),
+    limitMb: testCase.limitMb,
+    maxRssMb,
+    status: "pass",
+    exitCode: result.status,
+    error: null,
+  };

  if (result.status !== 0) {
-    throw new Error(
-      formatFailure(
-        testCase,
-        `${testCase.label} exited with ${String(result.status)}`,
-        stderr.trim() || result.stdout || "",
-      ),
-    );
+    report.status = "fail";
+    report.error = `${testCase.label} exited with ${String(result.status)}`;
+    return Object.assign(report, {
+      failureMessage: formatFailure(testCase, report.error, stderr.trim() || result.stdout || ""),
+    });
  }
  if (maxRssMb == null) {
-    throw new Error(formatFailure(testCase, `${testCase.label} did not report max RSS`, stderr));
+    report.status = "fail";
+    report.error = `${testCase.label} did not report max RSS`;
+    return Object.assign(report, {
+      failureMessage: formatFailure(testCase, report.error, stderr),
+    });
  }
  if (matrixBootstrapWarning) {
-    throw new Error(
-      formatFailure(testCase, `${testCase.label} triggered Matrix crypto bootstrap during startup`),
-    );
+    report.status = "fail";
+    report.error = `${testCase.label} triggered Matrix crypto bootstrap during startup`;
+    return Object.assign(report, {
+      failureMessage: formatFailure(testCase, report.error),
+    });
  }
  if (maxRssMb > testCase.limitMb) {
-    throw new Error(
-      formatFailure(
-        testCase,
-        `${testCase.label} used ${maxRssMb.toFixed(1)} MB RSS (limit ${testCase.limitMb} MB)`,
-      ),
-    );
+    report.status = "fail";
+    report.error = `${testCase.label} used ${maxRssMb.toFixed(1)} MB RSS (limit ${
+      testCase.limitMb
+    } MB)`;
+    return Object.assign(report, {
+      failureMessage: formatFailure(testCase, report.error),
+    });
  }

  console.log(
    `[startup-memory] ${testCase.label}: ${maxRssMb.toFixed(1)} MB RSS (limit ${testCase.limitMb} MB)`,
  );
+  return report;
 }

+function writeReport(options, results) {
+  const failed = results.filter((result) => result.status !== "pass");
+  const report = {
+    generatedAt: new Date().toISOString(),
+    platform: process.platform,
+    repoRoot,
+    status: failed.length === 0 ? "pass" : "fail",
+    results: results.map(({ failureMessage: _failureMessage, ...result }) => result),
+  };
+  const lines = [
+    "# OpenClaw Startup Memory",
+    "",
+    `Generated: ${report.generatedAt}`,
+    "",
+    `Status: ${report.status}`,
+    "",
+    ...results.map(
+      (result) =>
+        `- ${result.label}: ${result.status} RSS ${formatMb(result.maxRssMb)} / ${formatMb(
+          result.limitMb,
+        )}`,
+    ),
+    "",
+  ];
+  if (failed.length > 0) {
+    lines.push(
+      "## Failures",
+      "",
+      ...failed.map((result) => `- ${result.label}: ${result.error ?? "unknown failure"}`),
+      "",
+    );
+  }
+  mkdirSync(path.dirname(options.jsonPath), { recursive: true });
+  mkdirSync(path.dirname(options.summaryPath), { recursive: true });
+  writeFileSync(options.jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8");
+  writeFileSync(options.summaryPath, `${lines.join("\n")}\n`, "utf8");
+}
+
+const options = parseArgs(process.argv.slice(2));
+const results = [];
 try {
  for (const testCase of cases) {
-    runCase(testCase);
+    results.push(runCase(testCase));
  }
 } finally {
+  writeReport(options, results);
  rmSync(tmpHome, { recursive: true, force: true });
 }
+
+const failure = results.find((result) => result.status !== "pass");
+if (failure?.failureMessage) {
+  throw new Error(failure.failureMessage);
+}
--- a/scripts/openclaw-performance-source-summary.mjs
+++ b/scripts/openclaw-performance-source-summary.mjs
@@ -5,7 +5,7 @@ import path from "node:path";
 import process from "node:process";

 function parseArgs(argv) {
-  const options = { sourceDir: null, output: null };
+  const options = { baselineSourceDir: null, sourceDir: null, output: null };
  for (let index = 0; index < argv.length; index += 1) {
    const arg = argv[index];
    const readValue = () => {
@@ -20,6 +20,9 @@ function parseArgs(argv) {
      case "--source-dir":
        options.sourceDir = path.resolve(readValue());
        break;
+      case "--baseline-source-dir":
+        options.baselineSourceDir = path.resolve(readValue());
+        break;
      case "--output":
        options.output = path.resolve(readValue());
        break;
@@ -38,7 +41,7 @@ function parseArgs(argv) {
 }

 function printHelp() {
-  console.log(`Usage: node scripts/openclaw-performance-source-summary.mjs --source-dir <dir> [--output <summary.md>]
+  console.log(`Usage: node scripts/openclaw-performance-source-summary.mjs --source-dir <dir> [--baseline-source-dir <dir>] [--output <summary.md>]

 Summarizes OpenClaw-native performance probe artifacts for CI reports.`);
 }
@@ -72,6 +75,42 @@ function metric(stats, key = "p50") {
  return stats && typeof stats[key] === "number" ? stats[key] : null;
 }

+function percentDelta(before, after) {
+  if (typeof before !== "number" || typeof after !== "number") {
+    return null;
+  }
+  if (before === 0) {
+    return after === 0 ? 0 : null;
+  }
+  return ((after - before) / before) * 100;
+}
+
+function formatDeltaMb(before, after) {
+  if (typeof before !== "number" || typeof after !== "number") {
+    return "n/a";
+  }
+  const delta = after - before;
+  const percent = percentDelta(before, after);
+  const sign = delta > 0 ? "+" : "";
+  const percentText = percent == null ? "new" : `${percent > 0 ? "+" : ""}${percent.toFixed(1)}%`;
+  return `${sign}${formatMb(delta)} (${percentText})`;
+}
+
+function memoryRisk(before, after) {
+  const percent = percentDelta(before, after);
+  const delta = typeof before === "number" && typeof after === "number" ? after - before : null;
+  if (percent == null || delta == null) {
+    return "n/a";
+  }
+  if (percent >= 20 && delta >= 10) {
+    return "watch";
+  }
+  if (percent <= -10 && delta <= -10) {
+    return "improved";
+  }
+  return "stable";
+}
+
 function escapeCell(value) {
  return String(value).replaceAll("|", "\\|");
 }
@@ -104,6 +143,18 @@ function loadMockHelloSummaries(sourceDir) {
    .toSorted((a, b) => a.id.localeCompare(b.id));
 }

+function loadSourceArtifacts(sourceDir) {
+  if (!sourceDir || !fs.existsSync(sourceDir)) {
+    return null;
+  }
+  return {
+    startup: readJsonIfExists(path.join(sourceDir, "gateway-cpu", "gateway-startup-bench.json")),
+    cli: readJsonIfExists(path.join(sourceDir, "cli-startup.json")),
+    extensionMemory: readJsonIfExists(path.join(sourceDir, "extension-memory.json")),
+    mockHelloSummaries: loadMockHelloSummaries(sourceDir),
+  };
+}
+
 function buildStartupRows(startup) {
  return (startup?.results ?? []).map((result) => [
    result.id ?? "unknown",
@@ -165,6 +216,117 @@ function buildCliRows(cli) {
  ]);
 }

+function buildStartupMemoryDeltaRows(current, baseline) {
+  const baselineById = new Map((baseline?.results ?? []).map((result) => [result.id, result]));
+  return (current?.results ?? [])
+    .map((result) => {
+      const before = baselineById.get(result.id);
+      if (!before) {
+        return null;
+      }
+      const beforeRss = metric(before.summary?.maxRssMb, "p95");
+      const afterRss = metric(result.summary?.maxRssMb, "p95");
+      const beforeReadyHeap = metric(
+        before.summary?.startupTrace?.["memory.ready.heapUsedMb"],
+        "p95",
+      );
+      const afterReadyHeap = metric(
+        result.summary?.startupTrace?.["memory.ready.heapUsedMb"],
+        "p95",
+      );
+      return [
+        "gateway boot",
+        result.id ?? "unknown",
+        formatMb(beforeRss),
+        formatMb(afterRss),
+        formatDeltaMb(beforeRss, afterRss),
+        formatDeltaMb(beforeReadyHeap, afterReadyHeap),
+        memoryRisk(beforeRss, afterRss),
+      ];
+    })
+    .filter(Boolean);
+}
+
+function buildCliMemoryDeltaRows(current, baseline) {
+  const baselineById = new Map((baseline?.primary?.cases ?? []).map((entry) => [entry.id, entry]));
+  return (current?.primary?.cases ?? [])
+    .map((entry) => {
+      const before = baselineById.get(entry.id);
+      if (!before) {
+        return null;
+      }
+      const beforeRss = metric(before.summary?.maxRssMb, "p95");
+      const afterRss = metric(entry.summary?.maxRssMb, "p95");
+      return [
+        "cli",
+        entry.id ?? "unknown",
+        formatMb(beforeRss),
+        formatMb(afterRss),
+        formatDeltaMb(beforeRss, afterRss),
+        "n/a",
+        memoryRisk(beforeRss, afterRss),
+      ];
+    })
+    .filter(Boolean);
+}
+
+function average(values) {
+  const numeric = values.filter((value) => typeof value === "number" && Number.isFinite(value));
+  if (numeric.length === 0) {
+    return null;
+  }
+  return numeric.reduce((sum, value) => sum + value, 0) / numeric.length;
+}
+
+function buildMockHelloMemoryDeltaRows(current, baseline) {
+  const beforeDelta = average(
+    (baseline ?? []).map(
+      (entry) => entry.summary?.metrics?.gatewayProcessRssDeltaBytes / 1024 / 1024,
+    ),
+  );
+  const afterDelta = average(
+    (current ?? []).map(
+      (entry) => entry.summary?.metrics?.gatewayProcessRssDeltaBytes / 1024 / 1024,
+    ),
+  );
+  if (beforeDelta == null || afterDelta == null) {
+    return [];
+  }
+  return [
+    [
+      "mock hello",
+      "gateway RSS delta avg",
+      formatMb(beforeDelta),
+      formatMb(afterDelta),
+      formatDeltaMb(beforeDelta, afterDelta),
+      "n/a",
+      memoryRisk(beforeDelta, afterDelta),
+    ],
+  ];
+}
+
+function buildExtensionMemoryRows(extensionMemory) {
+  return (extensionMemory?.topByDeltaMb ?? [])
+    .slice(0, 10)
+    .map((entry) => [
+      entry.dir ?? "unknown",
+      formatMb(entry.maxRssMb),
+      formatMb(entry.deltaFromBaselineMb),
+      entry.status ?? "unknown",
+    ]);
+}
+
+function buildMemoryDeltaRows(current, baseline) {
+  if (!baseline) {
+    return [];
+  }
+  return [
+    ...buildStartupMemoryDeltaRows(current.startup, baseline.startup),
+    ...buildCliMemoryDeltaRows(current.cli, baseline.cli),
+    ...buildMockHelloMemoryDeltaRows(current.mockHelloSummaries, baseline.mockHelloSummaries),
+  ];
+}
+
 function formatExitSummary(value) {
  if (typeof value !== "string" || !value) {
    return "n/a";
@@ -181,13 +343,16 @@ function buildObservationRows(summary) {
  ]);
 }

-function buildMarkdown(sourceDir) {
+function buildMarkdown(sourceDir, baselineSourceDir) {
+  const current = loadSourceArtifacts(sourceDir) ?? {
+    startup: null,
+    cli: null,
+    extensionMemory: null,
+    mockHelloSummaries: [],
+  };
+  const baseline = loadSourceArtifacts(baselineSourceDir);
  const gatewaySummary = readJsonIfExists(path.join(sourceDir, "gateway-cpu", "summary.json"));
-  const startup = readJsonIfExists(
-    path.join(sourceDir, "gateway-cpu", "gateway-startup-bench.json"),
-  );
-  const cli = readJsonIfExists(path.join(sourceDir, "cli-startup.json"));
-  const mockHelloSummaries = loadMockHelloSummaries(sourceDir);
+  const memoryDeltaRows = buildMemoryDeltaRows(current, baseline);

  const lines = [
    "# OpenClaw Source Performance",
@@ -209,11 +374,35 @@ function buildMarkdown(sourceDir) {
        "RSS p95",
        "CPU core p95",
      ],
-      buildStartupRows(startup),
+      buildStartupRows(current.startup),
+    ),
+    "## Memory Trend",
+    "",
+    baseline
+      ? "Compared with the latest published mock-provider source probe for this tested ref."
+      : "No published source baseline was available for this tested ref.",
+    "",
+    ...table(
+      [
+        "surface",
+        "case",
+        "baseline RSS p95",
+        "current RSS p95",
+        "RSS delta",
+        "heap delta",
+        "state",
+      ],
+      memoryDeltaRows,
+    ),
+    "## Bundled Plugin Import Memory",
+    "",
+    ...table(
+      ["plugin", "max RSS", "delta from empty process", "status"],
+      buildExtensionMemoryRows(current.extensionMemory),
    ),
    "## Startup Hotspots",
    "",
-    ...table(["case", "phase", "p50", "p95"], buildTraceRows(startup)),
+    ...table(["case", "phase", "p50", "p95"], buildTraceRows(current.startup)),
    "## Fake Model Hello Loops",
    "",
    ...table(
@@ -228,13 +417,13 @@ function buildMarkdown(sourceDir) {
        "RSS delta",
        "model",
      ],
-      buildMockHelloRows(mockHelloSummaries),
+      buildMockHelloRows(current.mockHelloSummaries),
    ),
    "## CLI Against Booted Gateway",
    "",
    ...table(
      ["case", "command", "duration p50", "duration p95", "RSS p95", "exits"],
-      buildCliRows(cli),
+      buildCliRows(current.cli),
    ),
    "## Observations",
    "",
@@ -246,7 +435,7 @@ function buildMarkdown(sourceDir) {

 async function main() {
  const options = parseArgs(process.argv.slice(2));
-  const markdown = buildMarkdown(options.sourceDir);
+  const markdown = buildMarkdown(options.sourceDir, options.baselineSourceDir);
  if (options.output) {
    fs.mkdirSync(path.dirname(options.output), { recursive: true });
    fs.writeFileSync(options.output, markdown, "utf8");
--- a/scripts/test-group-report.mjs
+++ b/scripts/test-group-report.mjs
@@ -35,7 +35,7 @@ function usage() {
    "  --limit <count>       Number of groups/configs to print (default: 25)",
    "  --top-files <count>   Number of files to print (default: 25)",
    "  --allow-failures      Write a report even when a Vitest run exits non-zero",
-    "  --no-rss              Skip macOS max RSS measurement",
+    "  --no-rss              Skip max RSS measurement",
    "  --help                Show this help",
    "",
    "Examples:",
@@ -60,7 +60,7 @@ export function parseTestGroupReportArgs(argv) {
    limit: 25,
    output: null,
    reports: [],
-    rss: process.platform === "darwin",
+    rss: process.platform !== "win32",
    topFiles: 25,
    vitestArgs: [],
  };
@@ -156,9 +156,26 @@ function sanitizePathSegment(value) {
  );
 }

+function resolveTimeArgs(command) {
+  if (process.platform === "darwin") {
+    return { command: "/usr/bin/time", args: ["-l", ...command] };
+  }
+  if (process.platform === "linux") {
+    return { command: "/usr/bin/time", args: ["-v", ...command] };
+  }
+  return { command: command[0], args: command.slice(1) };
+}
+
 function parseMaxRssBytes(output) {
-  const match = output.match(/(\d+)\s+maximum resident set size/u);
-  return match ? Number.parseInt(match[1], 10) : null;
+  const macMatch = output.match(/(\d+)\s+maximum resident set size/u);
+  if (macMatch) {
+    return Number.parseInt(macMatch[1], 10);
+  }
+  const linuxMatch = output.match(/Maximum resident set size \(kbytes\):\s*(\d+)/u);
+  if (linuxMatch) {
+    return Number.parseInt(linuxMatch[1], 10) * 1024;
+  }
+  return null;
 }

 function runVitestJsonReport(params) {
@@ -177,24 +194,23 @@ function runVitestJsonReport(params) {
    ...params.vitestArgs,
  ];
  const startedAt = process.hrtime.bigint();
-  const result = spawnSync(
-    params.rss ? "/usr/bin/time" : command[0],
-    params.rss ? ["-l", ...command] : command.slice(1),
-    {
-      cwd: process.cwd(),
-      encoding: "utf8",
-      env: {
-        ...process.env,
-        NODE_OPTIONS: [
-          process.env.NODE_OPTIONS?.trim(),
-          ...resolveVitestNodeArgs(process.env).filter((arg) => arg !== "--no-maglev"),
-        ]
-          .filter(Boolean)
-          .join(" "),
-      },
-      maxBuffer: 1024 * 1024 * 64,
+  const spawnCommand = params.rss
+    ? resolveTimeArgs(command)
+    : { command: command[0], args: command.slice(1) };
+  const result = spawnSync(spawnCommand.command, spawnCommand.args, {
+    cwd: process.cwd(),
+    encoding: "utf8",
+    env: {
+      ...process.env,
+      NODE_OPTIONS: [
+        process.env.NODE_OPTIONS?.trim(),
+        ...resolveVitestNodeArgs(process.env).filter((arg) => arg !== "--no-maglev"),
+      ]
+        .filter(Boolean)
+        .join(" "),
    },
-  );
+    maxBuffer: 1024 * 1024 * 64,
+  });
  const elapsedMs = Number.parseFloat(String(process.hrtime.bigint() - startedAt)) / 1_000_000;
  const output = `${result.stdout ?? ""}${result.stderr ?? ""}`;
  fs.writeFileSync(params.logPath, output, "utf8");
--- a/test/scripts/test-group-report.test.ts
+++ b/test/scripts/test-group-report.test.ts
@@ -240,7 +240,7 @@ describe("scripts/test-group-report arg parsing", () => {
      limit: 25,
      output: null,
      reports: [],
-      rss: process.platform === "darwin",
+      rss: process.platform !== "win32",
      topFiles: 25,
      vitestArgs: ["--maxWorkers=1"],
    });
@@ -266,7 +266,7 @@ describe("scripts/test-group-report arg parsing", () => {
      limit: 5,
      output: null,
      reports: [],
-      rss: process.platform === "darwin",
+      rss: process.platform !== "win32",
      topFiles: 3,
      vitestArgs: [],
    });