fix: make autoreview progress visible

2026-06-06 05:51:15 +08:00 · 2026-05-25 08:49:52 +01:00
parent 84ab206887
commit 236edb267d
2 changed files with 38 additions and 4 deletions
--- a/.agents/skills/autoreview/SKILL.md
+++ b/.agents/skills/autoreview/SKILL.md
@@ -26,6 +26,8 @@ Use when:
 - If a review-triggered fix changes code, rerun focused tests and rerun the structured review helper.
 - For security-audit suppression changes, verify accepted findings remain auditable: suppressed findings stay in structured output, active output keeps an unsuppressible suppression notice, and aggregate findings cannot hide unrelated active risk.
 - Never switch or override the requested review engine/model. If the review hits model capacity, retry the same command a few times with the same engine/model.
+- Be patient with large bundles. Structured review can be silent for several minutes while the model call is active, especially with Codex tools or web search. Treat `review still running: ... elapsed=... pid=...` as healthy progress, not a hang.
+- Do not kill a review just because it has been quiet for 2-5 minutes. Inspect the process only after multiple heartbeat intervals or an obviously failed subprocess; prefer letting the same helper command finish.
 - Tools are useful in review mode. The helper allows read-only inspection tools and web search by default so reviewers can check dependency contracts, upstream docs, and current behavior.
 - Security perspective is always included, but it should not cripple legitimate functionality. Report security findings only when the change creates a concrete, actionable risk or removes an important safety check.
 - Do not invoke built-in `codex review`, nested reviewers, or reviewer panels from inside the review. The helper builds one bundle, calls one selected engine, validates one structured result, and stops.
@@ -169,6 +171,7 @@ The helper:
 - supports `--dry-run`, `--parallel-tests`, `--prompt`, `--prompt-file`, `--dataset`, `--no-tools`, `--no-web-search`, and commit refs
 - supports opt-in review panels with `--panel` / `--reviewers`, plus per-engine `--model` and `--thinking`
 - allows read-only tools and web search by default where the selected CLI supports them; forbids nested review in the prompt; Codex is run through `codex exec` with read-only sandbox and structured output
+- prints `review still running: <engine> elapsed=<seconds>s pid=<pid>` to stderr at long-running intervals while waiting for the selected review engine
 - prints `autoreview clean: no accepted/actionable findings reported` when the selected review command exits 0
 - exits nonzero when accepted/actionable findings are present

--- a/.agents/skills/autoreview/scripts/autoreview
+++ b/.agents/skills/autoreview/scripts/autoreview
@@ -93,6 +93,37 @@ def run(args: list[str], cwd: Path, *, input_text: str | None = None, check: boo
    return result


+def run_with_heartbeat(
+    args: list[str],
+    cwd: Path,
+    *,
+    input_text: str | None = None,
+    label: str,
+    heartbeat_seconds: int = 60,
+) -> subprocess.CompletedProcess[str]:
+    started = time.monotonic()
+    proc = subprocess.Popen(
+        args,
+        cwd=cwd,
+        stdin=subprocess.PIPE if input_text is not None else None,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+    )
+    first_communicate = True
+    while True:
+        try:
+            stdout, stderr = proc.communicate(
+                input=input_text if first_communicate else None,
+                timeout=heartbeat_seconds,
+            )
+            return subprocess.CompletedProcess(args, int(proc.returncode or 0), stdout, stderr)
+        except subprocess.TimeoutExpired:
+            first_communicate = False
+            elapsed = int(time.monotonic() - started)
+            print(f"review still running: {label} elapsed={elapsed}s pid={proc.pid}", file=sys.stderr, flush=True)
+
+
 def git(repo: Path, *args: str, check: bool = True) -> str:
    return run(["git", *args], repo, check=check).stdout

@@ -320,7 +351,7 @@ def run_codex(args: argparse.Namespace, repo: Path, prompt: str) -> str:
            "-",
        ]
    )
-    result = run(cmd, repo, input_text=prompt, check=False)
+    result = run_with_heartbeat(cmd, repo, input_text=prompt, label="codex")
    try:
        output = output_path.read_text()
    finally:
@@ -349,7 +380,7 @@ def run_claude(args: argparse.Namespace, repo: Path, prompt: str) -> str:
        cmd.extend(["--model", args.model])
    if args.thinking:
        cmd.extend(["--effort", args.thinking])
-    result = run(cmd, repo, input_text=prompt, check=False)
+    result = run_with_heartbeat(cmd, repo, input_text=prompt, label="claude")
    if result.returncode != 0:
        raise SystemExit(f"claude engine failed ({result.returncode})\n{result.stderr or result.stdout}")
    return result.stdout
@@ -374,7 +405,7 @@ def run_droid(args: argparse.Namespace, repo: Path, prompt: str) -> str:
        cmd.extend(["--model", args.model])
    if not args.tools:
        cmd.extend(["--disabled-tools", "*"])
-    result = run(cmd, repo, check=False)
+    result = run_with_heartbeat(cmd, repo, label="droid")
    prompt_path.unlink(missing_ok=True)
    if result.returncode != 0:
        raise SystemExit(f"droid engine failed ({result.returncode})\n{result.stderr or result.stdout}")
@@ -416,7 +447,7 @@ def run_copilot(args: argparse.Namespace, repo: Path, prompt: str) -> str:
        )
        if args.web_search:
            cmd.append("--allow-all-urls")
-        result = run(cmd, Path(tempdir), check=False)
+        result = run_with_heartbeat(cmd, Path(tempdir), label="copilot")
    if result.returncode != 0:
        raise SystemExit(f"copilot engine failed ({result.returncode})\n{result.stderr or result.stdout}")
    return result.stdout