perf: speed up agent transcript lookup

This commit is contained in:
Peter Steinberger
2026-05-25 15:40:16 +01:00
parent 2e3b59bc58
commit e7ad116b9b
2 changed files with 34 additions and 13 deletions

View File

@@ -17,6 +17,7 @@ Best-effort local-only provenance for OpenClaw PR/issue bodies. Use during agent
- Fail closed on unresolved secrets, private keys, browser/session/cookie details, or auth URLs.
- Drop system/developer prompts, raw tool outputs, reasoning, env, cookies, tokens, and broad local paths.
- Keep user prompts, assistant visible decisions, terse tool summaries, and test/proof outcomes.
- Remove session turns unrelated to the PR/issue work. Use the PR/issue title, branch name, changed files, and stated goal as scope; omit earlier/later unrelated tasks even when they are in the same session log.
- Best effort only: PR/issue creation must continue if no safe transcript is found.
- Use a collapsed `<details>` section and update existing markers instead of duplicating sections.
@@ -35,6 +36,8 @@ Find a likely local session:
--since-days 14
```
`find` scans the newest 400 matching local JSONL logs by default. Use `--max-files N` for a wider local search.
Render a PR/issue body section:
```bash
@@ -68,9 +71,10 @@ Append/update a body file before `gh pr create --body-file` or connector PR crea
3. If a high-confidence session is found, ask:
`Include a redacted agent transcript? It helps reviewers and can make the PR easier to prioritize. I can open a local preview first.`
4. If the user wants preview, run `preview`, open the HTML with `open`, and wait for confirmation.
5. If the user approves, run `append-body`.
6. Use the enriched body file for creation/update.
7. If no safe session is found, say nothing and continue without transcript. If the user declines, continue without transcript.
5. Before insertion, trim unrelated session turns from the generated section. Keep only turns that explain this PR/issue's goal, implementation choices, files, tests, proof, blockers, and final outcome.
6. If the user approves, run `append-body`.
7. Use the enriched body file for creation/update.
8. If no safe session is found, say nothing and continue without transcript. If the user declines, continue without transcript.
## Review Artifacts

View File

@@ -11,7 +11,7 @@ const DEFAULT_ENTRY_MAX_CHARS = 6000;
function usage() {
console.log(`Usage:
agent-transcript find --query TEXT [--cwd PATH] [--since-days N] [--root PATH...]
agent-transcript find --query TEXT [--cwd PATH] [--since-days N] [--max-files N] [--root PATH...]
agent-transcript render --session FILE [--out FILE] [--max-chars N] [--entry-max-chars N] [--title TEXT] [--url URL]
agent-transcript preview --session FILE [--out FILE] [--max-chars N] [--entry-max-chars N] [--title TEXT] [--url URL]
agent-transcript append-body --body FILE --session FILE [--out FILE] [--max-chars N] [--entry-max-chars N]
@@ -425,17 +425,14 @@ function readBoundedText(file, maxBytes = 220000) {
}
}
function sessionScanRecord(file) {
function sessionScanRecord(file, maxBytes) {
const stat = fs.statSync(file);
let agent = "agent";
try {
agent = detectAgent(file, readJsonl(file, 50));
} catch {}
const agent = detectAgent(file, []);
return {
file,
agent,
mtime: new Date(stat.mtimeMs).toISOString(),
haystack: `${file}\n${readBoundedText(file)}`.toLowerCase(),
haystack: `${file}\n${readBoundedText(file, maxBytes)}`.toLowerCase(),
};
}
@@ -461,6 +458,25 @@ function scoreScanRecord(record, terms, cwd) {
return { file: record.file, score, reasons, mtime: record.mtime, agent: record.agent };
}
function recentFiles(files, maxFiles) {
return files
.map((file) => {
try {
return { file, mtimeMs: fs.statSync(file).mtimeMs };
} catch {
return null;
}
})
.filter(Boolean)
.sort((a, b) => b.mtimeMs - a.mtimeMs)
.slice(0, maxFiles)
.map((entry) => entry.file);
}
function candidateFiles(roots, terms, sinceMs, options = {}) {
return recentFiles(roots.flatMap((root) => walkJsonl(root, sinceMs)), Number(options["max-files"] || 400));
}
function findSessions(options) {
const sinceDays = Number(options["since-days"] || 14);
const sinceMs = Date.now() - sinceDays * 24 * 60 * 60 * 1000;
@@ -470,9 +486,10 @@ function findSessions(options) {
.split(/\s+/)
.concat(query.match(/https?:\/\/\S+/g) || [])
.filter(Boolean);
const files = roots.flatMap((root) => walkJsonl(root, sinceMs));
const files = candidateFiles(roots, terms, sinceMs, options);
const scanBytes = Number(options["scan-bytes"] || 60000);
const results = files
.map((file) => scoreScanRecord(sessionScanRecord(file), terms, options.cwd))
.map((file) => scoreScanRecord(sessionScanRecord(file, scanBytes), terms, options.cwd))
.filter((result) => result.score > 0)
.sort((a, b) => b.score - a.score || b.mtime.localeCompare(a.mtime))
.slice(0, Number(options.limit || 10));
@@ -487,7 +504,7 @@ function sessionScanRecords(options) {
return roots
.flatMap((root) => walkJsonl(root, sinceMs))
.filter((file) => !excluded.has(path.resolve(file)))
.map(sessionScanRecord);
.map((file) => sessionScanRecord(file, Number(options["scan-bytes"] || 90000)));
}
function replaceSection(body, section) {