You are a senior code reviewer. Review the following code changes.

## Specification

No specification provided. Focus on correctness, tests, and integration.





## Code Changes

```diff
diff --git a/expert_build/cli.py b/expert_build/cli.py
index 2cb73b9..5fab97b 100644
--- a/expert_build/cli.py
+++ b/expert_build/cli.py
@@ -182,6 +182,11 @@ def main():
     except Exception as e:
         print(f"error: {e}", file=sys.stderr)
         sys.exit(1)
+    finally:
+        from .llm import format_cost_summary
+        cost = format_cost_summary()
+        if cost:
+            print(f"\n{cost}", file=sys.stderr)
 
 
 if __name__ == "__main__":
diff --git a/expert_build/llm.py b/expert_build/llm.py
index 1ca48b2..8d7ecbc 100644
--- a/expert_build/llm.py
+++ b/expert_build/llm.py
@@ -1,4 +1,8 @@
-"""Model invocation for expert agent builder."""
+"""Model invocation for expert agent builder.
+
+Cost tracking: CLI models use --output-format json to capture token
+counts and costs. Use get_cost_summary() to retrieve accumulated stats.
+"""
 
 import asyncio
 import json
@@ -6,12 +10,101 @@
 import shutil
 
 MODEL_COMMANDS: dict[str, list[str]] = {
-    "claude": ["claude", "-p"],
-    "gemini": ["gemini", "-p", ""],
+    "claude": ["claude", "-p", "--output-format", "json"],
+    "gemini": ["gemini", "--skip-trust", "-o", "json", "-p", ""],
 }
 
 DEFAULT_TIMEOUT = 300
 
+_cost_tracker = {
+    "calls": 0,
+    "input_tokens": 0,
+    "output_tokens": 0,
+    "total_cost_usd": 0.0,
+    "by_model": {},
+}
+
+
+def reset_cost_tracker():
+    """Reset accumulated cost/token stats."""
+    _cost_tracker["calls"] = 0
+    _cost_tracker["input_tokens"] = 0
+    _cost_tracker["output_tokens"] = 0
+    _cost_tracker["total_cost_usd"] = 0.0
+    _cost_tracker["by_model"] = {}
+
+
+def get_cost_summary() -> dict:
+    """Return accumulated cost/token stats across all LLM calls."""
+    return dict(_cost_tracker)
+
+
+def format_cost_summary() -> str:
+    """Format cost summary as a human-readable string."""
+    s = _cost_tracker
+    if s["calls"] == 0:
+        return ""
+    parts = []
+    if s["total_cost_usd"] > 0:
+        parts.append(f"${s['total_cost_usd']:.4f}")
+    parts.append(f"{s['input_tokens']:,} input + {s['output_tokens']:,} output tokens")
+    parts.append(f"{s['calls']} call(s)")
+    return "Cost: " + " | ".join(parts)
+
+
+def _record_cost(model: str, input_tokens: int, output_tokens: int, cost_usd: float):
+    """Record token/cost stats from one LLM call."""
+    _cost_tracker["calls"] += 1
+    _cost_tracker["input_tokens"] += input_tokens
+    _cost_tracker["output_tokens"] += output_tokens
+    _cost_tracker["total_cost_usd"] += cost_usd
+
+    if model not in _cost_tracker["by_model"]:
+        _cost_tracker["by_model"][model] = {
+            "calls": 0, "input_tokens": 0, "output_tokens": 0, "total_cost_usd": 0.0,
+        }
+    m = _cost_tracker["by_model"][model]
+    m["calls"] += 1
+    m["input_tokens"] += input_tokens
+    m["output_tokens"] += output_tokens
+    m["total_cost_usd"] += cost_usd
+
+
+def _parse_cli_json(output: str, model: str) -> str:
+    """Parse JSON output from CLI, extract response text and record costs.
+
+    Falls back to returning raw output if JSON parsing fails.
+    """
+    try:
+        data = json.loads(output)
+    except (json.JSONDecodeError, ValueError):
+        return output
+
+    if not isinstance(data, dict):
+        return output
+
+    if model.startswith("gemini"):
+        text = data.get("response") or output
+        stats = data.get("stats", {})
+        input_tokens = 0
+        output_tokens = 0
+        for model_stats in stats.get("models", {}).values():
+            tokens = model_stats.get("tokens", {})
+            input_tokens += tokens.get("input", 0)
+            output_tokens += tokens.get("candidates", 0)
+        _record_cost(model, input_tokens, output_tokens, 0.0)
+        return text
+
+    text = data.get("result") or output
+    usage = data.get("usage", {})
+    input_tokens = (usage.get("input_tokens", 0)
+                    + usage.get("cache_creation_input_tokens", 0)
+                    + usage.get("cache_read_input_tokens", 0))
+    output_tokens = usage.get("output_tokens", 0)
+    cost_usd = data.get("total_cost_usd", 0.0)
+    _record_cost(model, input_tokens, output_tokens, cost_usd)
+    return text
+
 
 def check_model_available(model: str) -> bool:
     """Check if a model's CLI is available."""
@@ -22,7 +115,11 @@ def check_model_available(model: str) -> bool:
 
 
 async def invoke(prompt: str, model: str = "claude", timeout: int = DEFAULT_TIMEOUT) -> str:
-    """Invoke model via CLI, piping prompt through stdin."""
+    """Invoke model via CLI, piping prompt through stdin.
+
+    Uses --output-format json to capture token/cost data.
+    Accumulated stats available via get_cost_summary().
+    """
     if model not in MODEL_COMMANDS:
         raise ValueError(f"Unknown model: {model}. Available: {list(MODEL_COMMANDS.keys())}")
 
@@ -51,7 +148,7 @@ async def invoke(prompt: str, model: str = "claude", timeout: int = DEFAULT_TIME
     if proc.returncode != 0:
         raise RuntimeError(f"Model {model} failed: {stderr.decode()}")
 
-    return stdout.decode()
+    return _parse_cli_json(stdout.decode(), model)
 
 
 def invoke_sync(prompt: str, model: str = "claude", timeout: int = DEFAULT_TIMEOUT) -> str:
diff --git a/tests/test_llm.py b/tests/test_llm.py
new file mode 100644
index 0000000..3f41900
--- /dev/null
+++ b/tests/test_llm.py
@@ -0,0 +1,161 @@
+"""Tests for expert_build.llm — cost tracking and JSON output parsing."""
+
+import json
+
+import pytest
+
+from expert_build.llm import (
+    _parse_cli_json,
+    _record_cost,
+    reset_cost_tracker,
+    get_cost_summary,
+    format_cost_summary,
+)
+
+
+@pytest.fixture(autouse=True)
+def clean_tracker():
+    """Reset cost tracker before each test."""
+    reset_cost_tracker()
+    yield
+    reset_cost_tracker()
+
+
+# --- _parse_cli_json ---
+
+def test_parse_claude_json():
+    data = {
+        "result": "The answer is 4.",
+        "total_cost_usd": 0.21,
+        "usage": {
+            "input_tokens": 100,
+            "output_tokens": 10,
+            "cache_creation_input_tokens": 500,
+            "cache_read_input_tokens": 0,
+        },
+    }
+    text = _parse_cli_json(json.dumps(data), "claude")
+    assert text == "The answer is 4."
+    s = get_cost_summary()
+    assert s["calls"] == 1
+    assert s["input_tokens"] == 600
+    assert s["output_tokens"] == 10
+    assert s["total_cost_usd"] == 0.21
+
+
+def test_parse_gemini_json():
+    data = {
+        "response": "4",
+        "stats": {
+            "models": {
+                "gemini-2.5-flash": {
+                    "tokens": {"input": 200, "candidates": 5, "total": 205},
+                },
+            },
+        },
+    }
+    text = _parse_cli_json(json.dumps(data), "gemini")
+    assert text == "4"
+    s = get_cost_summary()
+    assert s["calls"] == 1
+    assert s["input_tokens"] == 200
+    assert s["output_tokens"] == 5
+    assert s["total_cost_usd"] == 0.0
+
+
+def test_parse_non_json_falls_back():
+    text = _parse_cli_json("Just plain text", "claude")
+    assert text == "Just plain text"
+    s = get_cost_summary()
+    assert s["calls"] == 0
+
+
+def test_parse_claude_null_result_falls_back():
+    data = {"result": None, "total_cost_usd": 0.01, "usage": {"input_tokens": 10, "output_tokens": 0}}
+    raw = json.dumps(data)
+    text = _parse_cli_json(raw, "claude")
+    assert text == raw
+    s = get_cost_summary()
+    assert s["calls"] == 1
+
+
+def test_parse_non_dict_json_falls_back():
+    text = _parse_cli_json("[1, 2, 3]", "claude")
+    assert text == "[1, 2, 3]"
+    s = get_cost_summary()
+    assert s["calls"] == 0
+
+
+def test_parse_gemini_multi_model():
+    data = {
+        "response": "answer",
+        "stats": {
+            "models": {
+                "gemini-2.5-flash-lite": {
+                    "tokens": {"input": 100, "candidates": 10},
+                },
+                "gemini-3-flash-preview": {
+                    "tokens": {"input": 500, "candidates": 20},
+                },
+            },
+        },
+    }
+    text = _parse_cli_json(json.dumps(data), "gemini")
+    assert text == "answer"
+    s = get_cost_summary()
+    assert s["input_tokens"] == 600
+    assert s["output_tokens"] == 30
+
+
+# --- cost accumulation ---
+
+def test_accumulates_across_calls():
+    _record_cost("claude", 100, 10, 0.10)
+    _record_cost("claude", 200, 20, 0.20)
+    s = get_cost_summary()
+    assert s["calls"] == 2
+    assert s["input_tokens"] == 300
+    assert s["output_tokens"] == 30
+    assert abs(s["total_cost_usd"] - 0.30) < 0.001
+
+
+def test_tracks_by_model():
+    _record_cost("claude", 100, 10, 0.10)
+    _record_cost("gemini", 200, 20, 0.0)
+    s = get_cost_summary()
+    assert s["by_model"]["claude"]["calls"] == 1
+    assert s["by_model"]["gemini"]["calls"] == 1
+    assert s["by_model"]["claude"]["total_cost_usd"] == 0.10
+
+
+def test_reset_clears_all():
+    _record_cost("claude", 100, 10, 0.10)
+    reset_cost_tracker()
+    s = get_cost_summary()
+    assert s["calls"] == 0
+    assert s["input_tokens"] == 0
+    assert s["by_model"] == {}
+
+
+# --- format_cost_summary ---
+
+def test_format_no_calls():
+    assert format_cost_summary() == ""
+
+
+def test_format_with_cost():
+    _record_cost("claude", 1000, 50, 0.1234)
+    result = format_cost_summary()
+    assert "Cost:" in result
+    assert "$0.1234" in result
+    assert "1,000 input" in result
+    assert "50 output" in result
+    assert "1 call(s)" in result
+
+
+def test_format_without_cost():
+    _record_cost("gemini", 500, 25, 0.0)
+    result = format_cost_summary()
+    assert "Cost:" in result
+    assert "$" not in result
+    assert "500 input" in result

```

## Observation Results

You previously requested observations. Here are the results:

```json
{
  "main_full_body": {
    "function": "main",
    "file": "expert_build/cli.py",
    "start_line": 17,
    "end_line": 189,
    "source": "def main():\n    parser = argparse.ArgumentParser(\n        prog=\"expert-build\",\n        description=\"Build expert agents from documented domains\",\n    )\n    parser.add_argument(\"--version\", action=\"version\", version=f\"%(prog)s {__version__}\")\n\n    sub = parser.add_subparsers(dest=\"command\")\n\n    # -- init --\n    init_p = sub.add_parser(\"init\", help=\"Bootstrap a new expert agent repo\")\n    init_p.add_argument(\"name\", help=\"Domain name (e.g., rhcsa, kubernetes)\")\n    init_p.add_argument(\"--domain\", help=\"One-line domain description\")\n    init_p.add_argument(\"--no-git\", action=\"store_true\", help=\"Skip git init (for subdirectories of existing repos)\")\n\n    # -- fetch-docs --\n    fetch_p = sub.add_parser(\"fetch-docs\", help=\"Fetch documentation from URLs\")\n    fetch_p.add_argument(\"url\", help=\"Starting URL to fetch\")\n    fetch_p.add_argument(\"--depth\", type=int, default=1, help=\"Crawl depth (default: 1)\")\n    fetch_p.add_argument(\"--output-dir\", default=\"sources\", help=\"Output directory (default: sources)\")\n    fetch_p.add_argument(\"--selector\", default=\"main,article,.content,body\",\n                         help=\"CSS selectors for content (comma-separated, default: main,article,.content,body)\")\n    fetch_p.add_argument(\"--sitemap\", action=\"store_true\", help=\"Use sitemap.xml for URL discovery\")\n    fetch_p.add_argument(\"--include\", help=\"URL pattern to include (glob)\")\n    fetch_p.add_argument(\"--exclude\", help=\"URL pattern to exclude (glob)\")\n    fetch_p.add_argument(\"--delay\", type=float, default=1.0, help=\"Delay between requests in seconds (default: 1.0)\")\n\n    # -- chunk-pdf --\n    chunk_p = sub.add_parser(\"chunk-pdf\", help=\"Chunk a PDF paper into section entries\")\n    chunk_p.add_argument(\"pdf\", help=\"Path to PDF file\")\n    chunk_p.add_argument(\"--prefix\", help=\"Entry filename prefix (e.g., 'doyle-1979')\")\n    chunk_p.add_argument(\"--source-label\", help=\"Citation label for Source line\")\n    chunk_p.add_argument(\"--dry-run\", action=\"store_true\", help=\"Show sections without creating entries\")\n\n    # -- chunk-docs --\n    chunkd_p = sub.add_parser(\"chunk-docs\", help=\"Chunk large documents into entry-sized pieces\")\n    chunkd_p.add_argument(\"--input-dir\", default=\"sources\", help=\"Source directory (default: sources)\")\n    chunkd_p.add_argument(\"--threshold\", type=int, default=30000,\n                          help=\"Only chunk files larger than this (default: 30000)\")\n    chunkd_p.add_argument(\"--dry-run\", action=\"store_true\", help=\"Show chunks without creating entries\")\n\n    # -- summarize --\n    sum_p = sub.add_parser(\"summarize\", help=\"Generate entries from source documents\")\n    sum_p.add_argument(\"--input-dir\", default=\"sources\", help=\"Source directory (default: sources)\")\n    sum_p.add_argument(\"--limit\", type=int, help=\"Max files to process\")\n    sum_p.add_argument(\"--model\", default=\"claude\", help=\"Model to use (default: claude)\")\n\n    # -- propose-beliefs --\n    prop_p = sub.add_parser(\"propose-beliefs\", help=\"Extract candidate beliefs from entries\")\n    prop_p.add_argument(\"--input-dir\", default=\"entries\", help=\"Entries directory (default: entries)\")\n    prop_p.add_argument(\"--output\", default=\"proposed-beliefs.md\",\n                        help=\"Output file (default: proposed-beliefs.md)\")\n    prop_p.add_argument(\"--model\", default=\"claude\", help=\"Model to use (default: claude)\")\n    prop_p.add_argument(\"--batch-size\", type=int, default=5,\n                        help=\"Entries per LLM batch (default: 5)\")\n    prop_p.add_argument(\"--entry\", action=\"append\",\n                        help=\"Process specific entry file(s) instead of all entries\")\n    prop_p.add_argument(\"--all\", action=\"store_true\",\n                        help=\"Re-process all entries (ignore processed tracking)\")\n\n    # -- accept-beliefs --\n    accept_p = sub.add_parser(\"accept-beliefs\", help=\"Import accepted beliefs from proposals\")\n    accept_p.add_argument(\"--file\", default=\"proposed-beliefs.md\",\n                          help=\"Proposals file (default: proposed-beliefs.md)\")\n\n    # -- cert-coverage --\n    cert_p = sub.add_parser(\"cert-coverage\", help=\"Map cert objectives to beliefs\")\n    cert_p.add_argument(\"objectives_file\", help=\"Path to certification objectives\")\n    cert_p.add_argument(\"--beliefs-file\", type=Path, default=Path(\"reasons.db\"))\n    cert_p.add_argument(\"--model\", default=None, help=\"Use LLM for semantic matching\")\n\n    # -- exam --\n    exam_p = sub.add_parser(\"exam\", help=\"Run practice questions, discover gaps\")\n    exam_p.add_argument(\"questions_file\", help=\"Path to practice questions\")\n    exam_p.add_argument(\"--model\", default=\"claude\", help=\"Model to use (default: claude)\")\n    exam_p.add_argument(\"--beliefs-file\", type=Path, default=Path(\"reasons.db\"))\n    exam_p.add_argument(\"--limit\", type=int, help=\"Max questions to process\")\n    exam_p.add_argument(\"--output\", \"-o\", type=Path, default=None,\n                        help=\"Save results to file (markdown)\")\n    exam_p.add_argument(\"--no-judge\", action=\"store_true\",\n                        help=\"Disable LLM judge for open-ended questions (use string matching)\")\n\n    # -- pipeline --\n    pipe_p = sub.add_parser(\"pipeline\", help=\"Run end-to-end EEM construction pipeline\")\n    pipe_p.add_argument(\"--url\", help=\"Starting URL for doc fetching\")\n    pipe_p.add_argument(\"--pdf\", action=\"append\", help=\"PDF files to chunk (repeatable)\")\n    pipe_p.add_argument(\"--sources-dir\", default=\"sources\", help=\"Source directory (default: sources)\")\n    pipe_p.add_argument(\"--model\", default=\"claude\", help=\"Model for LLM calls (default: claude)\")\n    pipe_p.add_argument(\"--rounds\", type=int, default=3,\n                        help=\"Max derive/review/repair cycles (default: 3)\")\n    pipe_p.add_argument(\"--max-derive-rounds\", type=int, default=10,\n                        help=\"Max derive exhaust rounds per cycle (default: 10)\")\n    pipe_p.add_argument(\"--no-auto-accept\", action=\"store_true\",\n                        help=\"Stop after propose-beliefs for human review\")\n    pipe_p.add_argument(\"--no-fetch\", action=\"store_true\",\n                        help=\"Skip fetch-docs (use existing sources/)\")\n    pipe_p.add_argument(\"--depth\", type=int, default=2,\n                        help=\"Crawl depth for fetch-docs (default: 2)\")\n    pipe_p.add_argument(\"--timeout\", type=int, default=600,\n                        help=\"LLM timeout in seconds (default: 600)\")\n    pipe_p.add_argument(\"--domain\", help=\"Domain description for derive context\")\n    pipe_p.add_argument(\"--resume\", action=\"store_true\",\n                        help=\"Resume from last saved pipeline state\")\n\n    # -- derive-review-repair --\n    drr_p = sub.add_parser(\"derive-review-repair\",\n                           help=\"Run derive/review/repair loop on existing belief network\")\n    drr_p.add_argument(\"--model\", default=\"claude\", help=\"Model to use (default: claude)\")\n    drr_p.add_argument(\"--rounds\", type=int, default=3,\n                       help=\"Max derive/review/repair cycles (default: 3)\")\n    drr_p.add_argument(\"--max-derive-rounds\", type=int, default=10,\n                       help=\"Max derive exhaust rounds per cycle (default: 10)\")\n    drr_p.add_argument(\"--timeout\", type=int, default=600,\n                       help=\"LLM timeout in seconds (default: 600)\")\n    drr_p.add_argument(\"--domain\", help=\"Domain description for derive context\")\n\n    # -- status --\n    sub.add_parser(\"status\", help=\"Show pipeline progress\")\n\n    # -- install-skill --\n    skill_p = sub.add_parser(\"install-skill\", help=\"Install Claude Code skill file\")\n    skill_p.add_argument(\"--skill-dir\", type=Path, default=Path(\".claude/skills\"),\n                         help=\"Target skills directory\")\n\n    args = parser.parse_args()\n\n    if not args.command:\n        parser.print_help()\n        sys.exit(1)\n\n    commands = {\n        \"init\": lambda a: _lazy(\"init_cmd\", \"cmd_init\")(a),\n        \"chunk-pdf\": lambda a: _lazy(\"chunk_pdf\", \"cmd_chunk_pdf\")(a),\n        \"chunk-docs\": lambda a: _lazy(\"chunk_docs\", \"cmd_chunk_docs\")(a),\n        \"fetch-docs\": lambda a: _lazy(\"fetch\", \"cmd_fetch_docs\")(a),\n        \"summarize\": lambda a: _lazy(\"summarize\", \"cmd_summarize\")(a),\n        \"propose-beliefs\": lambda a: _lazy(\"propose\", \"cmd_propose_beliefs\")(a),\n        \"accept-beliefs\": lambda a: _lazy(\"propose\", \"cmd_accept_beliefs\")(a),\n        \"cert-coverage\": lambda a: _lazy(\"coverage\", \"cmd_cert_coverage\")(a),\n        \"exam\": lambda a: _lazy(\"exam\", \"cmd_exam\")(a),\n        \"pipeline\": lambda a: _lazy(\"pipeline\", \"cmd_pipeline\")(a),\n        \"derive-review-repair\": lambda a: _lazy(\"pipeline\", \"cmd_derive_review_repair\")(a),\n        \"status\": lambda a: _lazy(\"init_cmd\", \"cmd_status\")(a),\n        \"install-skill\": lambda a: _lazy(\"init_cmd\", \"cmd_install_skill\")(a),\n    }\n\n    subparser_names = set(sub.choices.keys())\n    command_names = set(commands.keys())\n    if subparser_names != command_names:\n        missing_dispatch = subparser_names - command_names\n        missing_parser = command_names - subparser_names\n        parts = []\n        if missing_dispatch:\n            parts.append(f\"subcommands without dispatch: {sorted(missing_dispatch)}\")\n        if missing_parser:\n            parts.append(f\"dispatch keys without subcommand: {sorted(missing_parser)}\")\n        print(f\"CLI registration error: {'; '.join(parts)}\", file=sys.stderr)\n        sys.exit(1)\n\n    try:\n        commands[args.command](args)\n    except KeyboardInterrupt:\n        sys.exit(130)\n    except SystemExit:\n        raise\n    except Exception as e:\n        print(f\"error: {e}\", file=sys.stderr)\n        sys.exit(1)\n    finally:\n        from .llm import format_cost_summary\n        cost = format_cost_summary()\n        if cost:\n            print(f\"\\n{cost}\", file=sys.stderr)"
  },
  "invoke_callers": {
    "symbol": "invoke",
    "production_callers": [
      {
        "file": ".venv/lib/python3.14/site-packages/bs4/filter.py",
        "line": 295,
        "text": "# No need to invoke the test function."
      },
      {
        "file": ".venv/lib/python3.14/site-packages/bs4/builder/_htmlparser.py",
        "line": 341,
        "text": "invoked."
      },
      {
        "file": ".venv/lib/python3.14/site-packages/bs4/builder/_htmlparser.py",
        "line": 344,
        "text": "invoked."
      },
      {
        "file": ".venv/lib/python3.14/site-packages/pypdf/_writer.py",
        "line": 835,
        "text": "parameter which is invoked after pages are appended to the writer."
      },
      {
        "file": ".venv/lib/python3.14/site-packages/pypdf/_writer.py",
        "line": 844,
        "text": "Callback function that is invoked after each page is appended to"
      },
      {
        "file": ".venv/lib/python3.14/site-packages/pypdf/_writer.py",
        "line": 1234,
        "text": "Callback function that is invoked after each page is appended to"
      },
      {
        "file": ".venv/lib/python3.14/site-packages/typing_extensions.py",
        "line": 3152,
        "text": "# If we somehow get invoked from outside typing.py,"
      },
      {
        "file": "expert_build/exam.py",
        "line": 9,
        "text": "from .llm import check_model_available, extract_json, invoke_sync, RETRY_JSON"
      },
      {
        "file": "expert_build/exam.py",
        "line": 114,
        "text": "retry_response = invoke_sync("
      },
      {
        "file": "expert_build/exam.py",
        "line": 132,
        "text": "response = invoke_sync(prompt, model=model, timeout=60)"
      },
      {
        "file": "expert_build/exam.py",
        "line": 143,
        "text": "retry_response = invoke_sync("
      },
      {
        "file": "expert_build/exam.py",
        "line": 202,
        "text": "response = invoke_sync(prompt, model=args.model, timeout=120)"
      },
      {
        "file": "expert_build/propose.py",
        "line": 12,
        "text": "from .llm import check_model_available, extract_json, invoke_sync, RETRY_JSON"
      },
      {
        "file": "expert_build/propose.py",
        "line": 389,
        "text": "result = invoke_sync(prompt, model=args.model, timeout=600)"
      },
      {
        "file": "expert_build/propose.py",
        "line": 399,
        "text": "retry_response = invoke_sync("
      },
      {
        "file": "expert_build/llm.py",
        "line": 117,
        "text": "async def invoke(prompt: str, model: str = \"claude\", timeout: int = DEFAULT_TIMEOUT) -> str:"
      },
      {
        "file": "expert_build/llm.py",
        "line": 154,
        "text": "def invoke_sync(prompt: str, model: str = \"claude\", timeout: int = DEFAULT_TIMEOUT) -> str:"
      },
      {
        "file": "expert_build/llm.py",
        "line": 155,
        "text": "\"\"\"Synchronous wrapper for invoke.\"\"\""
      },
      {
        "file": "expert_build/llm.py",
        "line": 156,
        "text": "return asyncio.run(invoke(prompt, model, timeout))"
      },
      {
        "file": "expert_build/coverage.py",
        "line": 9,
        "text": "from .llm import check_model_available, extract_json, invoke_sync, RETRY_JSON"
      },
      {
        "file": "expert_build/coverage.py",
        "line": 115,
        "text": "result = invoke_sync(prompt, model=args.model, timeout=120)"
      },
      {
        "file": "expert_build/coverage.py",
        "line": 118,
        "text": "retry_response = invoke_sync("
      },
      {
        "file": "expert_build/summarize.py",
        "line": 7,
        "text": "from .llm import check_model_available, invoke_sync"
      },
      {
        "file": "expert_build/summarize.py",
        "line": 92,
        "text": "summary = invoke_sync(prompt, model=args.model)"
      },
      {
        "file": "expert_build/pipeline.py",
        "line": 10,
        "text": "from .llm import check_model_available, invoke_sync"
      },
      {
        "file": "expert_build/pipeline.py",
        "line": 193,
        "text": "response = invoke_sync(prompt, model=args.model, timeout=args.timeout)"
      }
    ],
    "test_callers": [
      {
        "file": "tests/test_exam.py",
        "line": 80,
        "text": "with patch(\"expert_build.exam.invoke_sync\",",
        "context_function": "test_extract_answer_retries_on_bad_json",
        "context_snippet": "   77: def test_extract_answer_retries_on_bad_json():\n   78:     bad_response = \"I think the answer is B because of reasons\"\n   79: \n>> 80:     with patch(\"expert_build.exam.invoke_sync\",\n   81:                return_value='{\"answer\": \"b\", \"explanation\": \"reasons\"}') as mock_llm:\n   82:         result = extract_answer(bad_response, model=\"test\", prompt=\"original prompt\")\n   83: "
      },
      {
        "file": "tests/test_exam.py",
        "line": 91,
        "text": "with patch(\"expert_build.exam.invoke_sync\", return_value=\"Still no format\"):",
        "context_function": "test_extract_answer_fallback_after_failed_retry",
        "context_snippet": "   88: def test_extract_answer_fallback_after_failed_retry():\n   89:     bad_response = \"No format at all\"\n   90: \n>> 91:     with patch(\"expert_build.exam.invoke_sync\", return_value=\"Still no format\"):\n   92:         result = extract_answer(bad_response, model=\"test\", prompt=\"original prompt\")\n   93: \n   94:     assert result == \"No format at all\""
      },
      {
        "file": "tests/test_exam.py",
        "line": 110,
        "text": "with patch(\"expert_build.exam.invoke_sync\",",
        "context_function": "test_judge_correct",
        "context_snippet": "   107: # --- judge_answer ---\n   108: \n   109: def test_judge_correct():\n>> 110:     with patch(\"expert_build.exam.invoke_sync\",\n   111:                return_value='{\"verdict\": \"CORRECT\", \"explanation\": \"matches\"}'):\n   112:         is_correct, explanation = judge_answer(\"q\", \"expected\", \"got\", \"test\")\n   113: "
      },
      {
        "file": "tests/test_exam.py",
        "line": 119,
        "text": "with patch(\"expert_build.exam.invoke_sync\",",
        "context_function": "test_judge_wrong",
        "context_snippet": "   116: \n   117: \n   118: def test_judge_wrong():\n>> 119:     with patch(\"expert_build.exam.invoke_sync\",\n   120:                return_value='{\"verdict\": \"WRONG\", \"explanation\": \"missed key point\"}'):\n   121:         is_correct, explanation = judge_answer(\"q\", \"expected\", \"got\", \"test\")\n   122: "
      },
      {
        "file": "tests/test_exam.py",
        "line": 136,
        "text": "with patch(\"expert_build.exam.invoke_sync\", side_effect=side_effect):",
        "context_function": "side_effect",
        "context_snippet": "   133:             return \"I think this is correct because it matches\"\n   134:         return '{\"verdict\": \"CORRECT\", \"explanation\": \"matches expected\"}'\n   135: \n>> 136:     with patch(\"expert_build.exam.invoke_sync\", side_effect=side_effect):\n   137:         is_correct, explanation = judge_answer(\"q\", \"expected\", \"got\", \"test\")\n   138: \n   139:     assert is_correct is True"
      },
      {
        "file": "tests/test_exam.py",
        "line": 144,
        "text": "with patch(\"expert_build.exam.invoke_sync\", return_value=\"No JSON at all\"):",
        "context_function": "test_judge_fallback_after_failed_retry",
        "context_snippet": "   141: \n   142: \n   143: def test_judge_fallback_after_failed_retry():\n>> 144:     with patch(\"expert_build.exam.invoke_sync\", return_value=\"No JSON at all\"):\n   145:         is_correct, explanation = judge_answer(\"q\", \"expected\", \"got\", \"test\")\n   146: \n   147:     assert is_correct is False"
      },
      {
        "file": "tests/test_exam.py",
        "line": 152,
        "text": "with patch(\"expert_build.exam.invoke_sync\",",
        "context_function": "test_judge_handles_llm_error",
        "context_snippet": "   149: \n   150: \n   151: def test_judge_handles_llm_error():\n>> 152:     with patch(\"expert_build.exam.invoke_sync\",\n   153:                side_effect=RuntimeError(\"timeout\")):\n   154:         is_correct, explanation = judge_answer(\"q\", \"expected\", \"got\", \"test\")\n   155: "
      },
      {
        "file": "tests/test_exam.py",
        "line": 169,
        "text": "with patch(\"expert_build.exam.invoke_sync\", side_effect=side_effect):",
        "context_function": "side_effect",
        "context_snippet": "   166:             return \"Not JSON\"\n   167:         raise RuntimeError(\"retry timeout\")\n   168: \n>> 169:     with patch(\"expert_build.exam.invoke_sync\", side_effect=side_effect):\n   170:         is_correct, explanation = judge_answer(\"q\", \"expected\", \"got\", \"test\")\n   171: \n   172:     assert is_correct is False"
      },
      {
        "file": "tests/test_exam.py",
        "line": 178,
        "text": "with patch(\"expert_build.exam.invoke_sync\",",
        "context_function": "test_judge_case_insensitive_verdict",
        "context_snippet": "   175: \n   176: \n   177: def test_judge_case_insensitive_verdict():\n>> 178:     with patch(\"expert_build.exam.invoke_sync\",\n   179:                return_value='{\"verdict\": \"correct\", \"explanation\": \"ok\"}'):\n   180:         is_correct, _ = judge_answer(\"q\", \"expected\", \"got\", \"test\")\n   181: "
      },
      {
        "file": "tests/test_coverage.py",
        "line": 47,
        "text": "def invoke_side_effect(prompt, model=None, timeout=None):",
        "context_function": "test_json_matching",
        "context_snippet": "   44:     args = make_args(objectives_file, beliefs_db)\n   45: \n   46:     call_count = 0\n>> 47:     def invoke_side_effect(prompt, model=None, timeout=None):\n   48:         nonlocal call_count\n   49:         call_count += 1\n   50:         if \"Configure local storage\" in prompt:"
      },
      {
        "file": "tests/test_coverage.py",
        "line": 55,
        "text": "patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   52:         return json.dumps({\"matching_ids\": [\"lvm-basics\"]})\n   53: \n   54:     with patch(\"expert_build.coverage.check_model_available\", return_value=True), \\\n>> 55:          patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\\n   56:          patch(\"expert_build.coverage.load_beliefs\", return_value=FAKE_BELIEFS):\n   57:         cmd_cert_coverage(args)\n   58: "
      },
      {
        "file": "tests/test_coverage.py",
        "line": 68,
        "text": "def invoke_side_effect(prompt, model=None, timeout=None):",
        "context_function": "test_json_empty_matches",
        "context_snippet": "   65:     \"\"\"LLM returns empty matching_ids array \u2014 falls back to keyword.\"\"\"\n   66:     args = make_args(objectives_file, beliefs_db)\n   67: \n>> 68:     def invoke_side_effect(prompt, model=None, timeout=None):\n   69:         return json.dumps({\"matching_ids\": []})\n   70: \n   71:     with patch(\"expert_build.coverage.check_model_available\", return_value=True), \\"
      },
      {
        "file": "tests/test_coverage.py",
        "line": 72,
        "text": "patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   69:         return json.dumps({\"matching_ids\": []})\n   70: \n   71:     with patch(\"expert_build.coverage.check_model_available\", return_value=True), \\\n>> 72:          patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\\n   73:          patch(\"expert_build.coverage.load_beliefs\", return_value=FAKE_BELIEFS):\n   74:         cmd_cert_coverage(args)\n   75: "
      },
      {
        "file": "tests/test_coverage.py",
        "line": 85,
        "text": "def invoke_side_effect(prompt, model=None, timeout=None):",
        "context_function": "test_json_retry_on_bad_response",
        "context_snippet": "   82:     args = make_args(objectives_file, beliefs_db)\n   83: \n   84:     call_count = 0\n>> 85:     def invoke_side_effect(prompt, model=None, timeout=None):\n   86:         nonlocal call_count\n   87:         call_count += 1\n   88:         if call_count <= 2:"
      },
      {
        "file": "tests/test_coverage.py",
        "line": 93,
        "text": "patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   90:         return json.dumps({\"matching_ids\": [\"local-storage-config\"]})\n   91: \n   92:     with patch(\"expert_build.coverage.check_model_available\", return_value=True), \\\n>> 93:          patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\\n   94:          patch(\"expert_build.coverage.load_beliefs\", return_value=FAKE_BELIEFS):\n   95:         cmd_cert_coverage(args)\n   96: "
      },
      {
        "file": "tests/test_coverage.py",
        "line": 105,
        "text": "def invoke_side_effect(prompt, model=None, timeout=None):",
        "context_function": "test_json_with_code_fence",
        "context_snippet": "   102:     \"\"\"LLM response wrapped in code fences is parsed correctly.\"\"\"\n   103:     args = make_args(objectives_file, beliefs_db)\n   104: \n>> 105:     def invoke_side_effect(prompt, model=None, timeout=None):\n   106:         return '```json\\n{\"matching_ids\": [\"local-storage-config\"]}\\n```'\n   107: \n   108:     with patch(\"expert_build.coverage.check_model_available\", return_value=True), \\"
      },
      {
        "file": "tests/test_coverage.py",
        "line": 109,
        "text": "patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   106:         return '```json\\n{\"matching_ids\": [\"local-storage-config\"]}\\n```'\n   107: \n   108:     with patch(\"expert_build.coverage.check_model_available\", return_value=True), \\\n>> 109:          patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\\n   110:          patch(\"expert_build.coverage.load_beliefs\", return_value=FAKE_BELIEFS):\n   111:         cmd_cert_coverage(args)\n   112: "
      },
      {
        "file": "tests/test_coverage.py",
        "line": 121,
        "text": "def invoke_side_effect(prompt, model=None, timeout=None):",
        "context_function": "test_invalid_belief_ids_ignored",
        "context_snippet": "   118:     \"\"\"Belief IDs not in the known beliefs list are silently ignored.\"\"\"\n   119:     args = make_args(objectives_file, beliefs_db)\n   120: \n>> 121:     def invoke_side_effect(prompt, model=None, timeout=None):\n   122:         return json.dumps({\"matching_ids\": [\"nonexistent-belief\", \"local-storage-config\"]})\n   123: \n   124:     with patch(\"expert_build.coverage.check_model_available\", return_value=True), \\"
      },
      {
        "file": "tests/test_coverage.py",
        "line": 125,
        "text": "patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   122:         return json.dumps({\"matching_ids\": [\"nonexistent-belief\", \"local-storage-config\"]})\n   123: \n   124:     with patch(\"expert_build.coverage.check_model_available\", return_value=True), \\\n>> 125:          patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\\n   126:          patch(\"expert_build.coverage.load_beliefs\", return_value=FAKE_BELIEFS):\n   127:         cmd_cert_coverage(args)\n   128: "
      },
      {
        "file": "tests/test_propose.py",
        "line": 56,
        "text": "def invoke_side_effect(prompt, model=None, timeout=None):",
        "context_function": "test_proposals_written_after_each_batch",
        "context_snippet": "   53:     args = make_args(entries_dir, output=str(output), batch_size=2)\n   54: \n   55:     call_count = 0\n>> 56:     def invoke_side_effect(prompt, model=None, timeout=None):\n   57:         nonlocal call_count\n   58:         call_count += 1\n   59:         if call_count == 2:"
      },
      {
        "file": "tests/test_propose.py",
        "line": 64,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\"
      },
      {
        "file": "tests/test_propose.py",
        "line": 82,
        "text": "def invoke_side_effect(prompt, model=None, timeout=None):"
      },
      {
        "file": "tests/test_propose.py",
        "line": 88,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\"
      },
      {
        "file": "tests/test_propose.py",
        "line": 106,
        "text": "def invoke_side_effect(prompt, model=None, timeout=None):"
      },
      {
        "file": "tests/test_propose.py",
        "line": 113,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\"
      },
      {
        "file": "tests/test_propose.py",
        "line": 132,
        "text": "def invoke_side_effect(prompt, model=None, timeout=None):"
      },
      {
        "file": "tests/test_propose.py",
        "line": 140,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\"
      },
      {
        "file": "tests/test_propose.py",
        "line": 163,
        "text": "def invoke_side_effect(prompt, model=None, timeout=None):"
      },
      {
        "file": "tests/test_propose.py",
        "line": 171,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\"
      },
      {
        "file": "tests/test_propose.py",
        "line": 188,
        "text": "def invoke_side_effect(prompt, model=None, timeout=None):"
      }
    ],
    "production_count": 26,
    "test_count": 60,
    "total_count": 86
  },
  "invoke_sync_callers": {
    "symbol": "invoke_sync",
    "production_callers": [
      {
        "file": "expert_build/exam.py",
        "line": 9,
        "text": "from .llm import check_model_available, extract_json, invoke_sync, RETRY_JSON"
      },
      {
        "file": "expert_build/exam.py",
        "line": 114,
        "text": "retry_response = invoke_sync("
      },
      {
        "file": "expert_build/exam.py",
        "line": 132,
        "text": "response = invoke_sync(prompt, model=model, timeout=60)"
      },
      {
        "file": "expert_build/exam.py",
        "line": 143,
        "text": "retry_response = invoke_sync("
      },
      {
        "file": "expert_build/exam.py",
        "line": 202,
        "text": "response = invoke_sync(prompt, model=args.model, timeout=120)"
      },
      {
        "file": "expert_build/propose.py",
        "line": 12,
        "text": "from .llm import check_model_available, extract_json, invoke_sync, RETRY_JSON"
      },
      {
        "file": "expert_build/propose.py",
        "line": 389,
        "text": "result = invoke_sync(prompt, model=args.model, timeout=600)"
      },
      {
        "file": "expert_build/propose.py",
        "line": 399,
        "text": "retry_response = invoke_sync("
      },
      {
        "file": "expert_build/llm.py",
        "line": 154,
        "text": "def invoke_sync(prompt: str, model: str = \"claude\", timeout: int = DEFAULT_TIMEOUT) -> str:"
      },
      {
        "file": "expert_build/coverage.py",
        "line": 9,
        "text": "from .llm import check_model_available, extract_json, invoke_sync, RETRY_JSON"
      },
      {
        "file": "expert_build/coverage.py",
        "line": 115,
        "text": "result = invoke_sync(prompt, model=args.model, timeout=120)"
      },
      {
        "file": "expert_build/coverage.py",
        "line": 118,
        "text": "retry_response = invoke_sync("
      },
      {
        "file": "expert_build/summarize.py",
        "line": 7,
        "text": "from .llm import check_model_available, invoke_sync"
      },
      {
        "file": "expert_build/summarize.py",
        "line": 92,
        "text": "summary = invoke_sync(prompt, model=args.model)"
      },
      {
        "file": "expert_build/pipeline.py",
        "line": 10,
        "text": "from .llm import check_model_available, invoke_sync"
      },
      {
        "file": "expert_build/pipeline.py",
        "line": 193,
        "text": "response = invoke_sync(prompt, model=args.model, timeout=args.timeout)"
      }
    ],
    "test_callers": [
      {
        "file": "tests/test_exam.py",
        "line": 80,
        "text": "with patch(\"expert_build.exam.invoke_sync\",",
        "context_function": "test_extract_answer_retries_on_bad_json",
        "context_snippet": "   77: def test_extract_answer_retries_on_bad_json():\n   78:     bad_response = \"I think the answer is B because of reasons\"\n   79: \n>> 80:     with patch(\"expert_build.exam.invoke_sync\",\n   81:                return_value='{\"answer\": \"b\", \"explanation\": \"reasons\"}') as mock_llm:\n   82:         result = extract_answer(bad_response, model=\"test\", prompt=\"original prompt\")\n   83: "
      },
      {
        "file": "tests/test_exam.py",
        "line": 91,
        "text": "with patch(\"expert_build.exam.invoke_sync\", return_value=\"Still no format\"):",
        "context_function": "test_extract_answer_fallback_after_failed_retry",
        "context_snippet": "   88: def test_extract_answer_fallback_after_failed_retry():\n   89:     bad_response = \"No format at all\"\n   90: \n>> 91:     with patch(\"expert_build.exam.invoke_sync\", return_value=\"Still no format\"):\n   92:         result = extract_answer(bad_response, model=\"test\", prompt=\"original prompt\")\n   93: \n   94:     assert result == \"No format at all\""
      },
      {
        "file": "tests/test_exam.py",
        "line": 110,
        "text": "with patch(\"expert_build.exam.invoke_sync\",",
        "context_function": "test_judge_correct",
        "context_snippet": "   107: # --- judge_answer ---\n   108: \n   109: def test_judge_correct():\n>> 110:     with patch(\"expert_build.exam.invoke_sync\",\n   111:                return_value='{\"verdict\": \"CORRECT\", \"explanation\": \"matches\"}'):\n   112:         is_correct, explanation = judge_answer(\"q\", \"expected\", \"got\", \"test\")\n   113: "
      },
      {
        "file": "tests/test_exam.py",
        "line": 119,
        "text": "with patch(\"expert_build.exam.invoke_sync\",",
        "context_function": "test_judge_wrong",
        "context_snippet": "   116: \n   117: \n   118: def test_judge_wrong():\n>> 119:     with patch(\"expert_build.exam.invoke_sync\",\n   120:                return_value='{\"verdict\": \"WRONG\", \"explanation\": \"missed key point\"}'):\n   121:         is_correct, explanation = judge_answer(\"q\", \"expected\", \"got\", \"test\")\n   122: "
      },
      {
        "file": "tests/test_exam.py",
        "line": 136,
        "text": "with patch(\"expert_build.exam.invoke_sync\", side_effect=side_effect):",
        "context_function": "side_effect",
        "context_snippet": "   133:             return \"I think this is correct because it matches\"\n   134:         return '{\"verdict\": \"CORRECT\", \"explanation\": \"matches expected\"}'\n   135: \n>> 136:     with patch(\"expert_build.exam.invoke_sync\", side_effect=side_effect):\n   137:         is_correct, explanation = judge_answer(\"q\", \"expected\", \"got\", \"test\")\n   138: \n   139:     assert is_correct is True"
      },
      {
        "file": "tests/test_exam.py",
        "line": 144,
        "text": "with patch(\"expert_build.exam.invoke_sync\", return_value=\"No JSON at all\"):",
        "context_function": "test_judge_fallback_after_failed_retry",
        "context_snippet": "   141: \n   142: \n   143: def test_judge_fallback_after_failed_retry():\n>> 144:     with patch(\"expert_build.exam.invoke_sync\", return_value=\"No JSON at all\"):\n   145:         is_correct, explanation = judge_answer(\"q\", \"expected\", \"got\", \"test\")\n   146: \n   147:     assert is_correct is False"
      },
      {
        "file": "tests/test_exam.py",
        "line": 152,
        "text": "with patch(\"expert_build.exam.invoke_sync\",",
        "context_function": "test_judge_handles_llm_error",
        "context_snippet": "   149: \n   150: \n   151: def test_judge_handles_llm_error():\n>> 152:     with patch(\"expert_build.exam.invoke_sync\",\n   153:                side_effect=RuntimeError(\"timeout\")):\n   154:         is_correct, explanation = judge_answer(\"q\", \"expected\", \"got\", \"test\")\n   155: "
      },
      {
        "file": "tests/test_exam.py",
        "line": 169,
        "text": "with patch(\"expert_build.exam.invoke_sync\", side_effect=side_effect):",
        "context_function": "side_effect",
        "context_snippet": "   166:             return \"Not JSON\"\n   167:         raise RuntimeError(\"retry timeout\")\n   168: \n>> 169:     with patch(\"expert_build.exam.invoke_sync\", side_effect=side_effect):\n   170:         is_correct, explanation = judge_answer(\"q\", \"expected\", \"got\", \"test\")\n   171: \n   172:     assert is_correct is False"
      },
      {
        "file": "tests/test_exam.py",
        "line": 178,
        "text": "with patch(\"expert_build.exam.invoke_sync\",",
        "context_function": "test_judge_case_insensitive_verdict",
        "context_snippet": "   175: \n   176: \n   177: def test_judge_case_insensitive_verdict():\n>> 178:     with patch(\"expert_build.exam.invoke_sync\",\n   179:                return_value='{\"verdict\": \"correct\", \"explanation\": \"ok\"}'):\n   180:         is_correct, _ = judge_answer(\"q\", \"expected\", \"got\", \"test\")\n   181: "
      },
      {
        "file": "tests/test_coverage.py",
        "line": 55,
        "text": "patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   52:         return json.dumps({\"matching_ids\": [\"lvm-basics\"]})\n   53: \n   54:     with patch(\"expert_build.coverage.check_model_available\", return_value=True), \\\n>> 55:          patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\\n   56:          patch(\"expert_build.coverage.load_beliefs\", return_value=FAKE_BELIEFS):\n   57:         cmd_cert_coverage(args)\n   58: "
      },
      {
        "file": "tests/test_coverage.py",
        "line": 72,
        "text": "patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   69:         return json.dumps({\"matching_ids\": []})\n   70: \n   71:     with patch(\"expert_build.coverage.check_model_available\", return_value=True), \\\n>> 72:          patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\\n   73:          patch(\"expert_build.coverage.load_beliefs\", return_value=FAKE_BELIEFS):\n   74:         cmd_cert_coverage(args)\n   75: "
      },
      {
        "file": "tests/test_coverage.py",
        "line": 93,
        "text": "patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   90:         return json.dumps({\"matching_ids\": [\"local-storage-config\"]})\n   91: \n   92:     with patch(\"expert_build.coverage.check_model_available\", return_value=True), \\\n>> 93:          patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\\n   94:          patch(\"expert_build.coverage.load_beliefs\", return_value=FAKE_BELIEFS):\n   95:         cmd_cert_coverage(args)\n   96: "
      },
      {
        "file": "tests/test_coverage.py",
        "line": 109,
        "text": "patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   106:         return '```json\\n{\"matching_ids\": [\"local-storage-config\"]}\\n```'\n   107: \n   108:     with patch(\"expert_build.coverage.check_model_available\", return_value=True), \\\n>> 109:          patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\\n   110:          patch(\"expert_build.coverage.load_beliefs\", return_value=FAKE_BELIEFS):\n   111:         cmd_cert_coverage(args)\n   112: "
      },
      {
        "file": "tests/test_coverage.py",
        "line": 125,
        "text": "patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   122:         return json.dumps({\"matching_ids\": [\"nonexistent-belief\", \"local-storage-config\"]})\n   123: \n   124:     with patch(\"expert_build.coverage.check_model_available\", return_value=True), \\\n>> 125:          patch(\"expert_build.coverage.invoke_sync\", side_effect=invoke_side_effect), \\\n   126:          patch(\"expert_build.coverage.load_beliefs\", return_value=FAKE_BELIEFS):\n   127:         cmd_cert_coverage(args)\n   128: "
      },
      {
        "file": "tests/test_propose.py",
        "line": 64,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   61:         return _json_beliefs((f\"belief-from-batch-{call_count}\", \"A belief.\"))\n   62: \n   63:     with patch(\"expert_build.propose.check_model_available\", return_value=True), \\\n>> 64:          patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\\n   65:          patch(\"expert_build.propose._load_existing_beliefs\", return_value=[]), \\\n   66:          patch(\"expert_build.propose._has_embeddings\", return_value=False):\n   67:         cmd_propose_beliefs(args)"
      },
      {
        "file": "tests/test_propose.py",
        "line": 88,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   85:         return _json_beliefs((f\"belief-{call_count}\", \"A belief.\"))\n   86: \n   87:     with patch(\"expert_build.propose.check_model_available\", return_value=True), \\\n>> 88:          patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\\n   89:          patch(\"expert_build.propose._load_existing_beliefs\", return_value=[]), \\\n   90:          patch(\"expert_build.propose._has_embeddings\", return_value=False):\n   91:         cmd_propose_beliefs(args)"
      },
      {
        "file": "tests/test_propose.py",
        "line": 113,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   110:         )\n   111: \n   112:     with patch(\"expert_build.propose.check_model_available\", return_value=True), \\\n>> 113:          patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\\n   114:          patch(\"expert_build.propose._load_existing_beliefs\", return_value=existing), \\\n   115:          patch(\"expert_build.propose._has_embeddings\", return_value=False):\n   116:         cmd_propose_beliefs(args)"
      },
      {
        "file": "tests/test_propose.py",
        "line": 140,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   137:         return _json_beliefs((f\"belief-{call_count}\", \"A belief.\"))\n   138: \n   139:     with patch(\"expert_build.propose.check_model_available\", return_value=True), \\\n>> 140:          patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\\n   141:          patch(\"expert_build.propose._load_existing_beliefs\", return_value=[]), \\\n   142:          patch(\"expert_build.propose._has_embeddings\", return_value=False):\n   143:         cmd_propose_beliefs(args)"
      },
      {
        "file": "tests/test_propose.py",
        "line": 171,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   168:         return _json_beliefs((\"retried-belief\", \"A belief from retry.\"))\n   169: \n   170:     with patch(\"expert_build.propose.check_model_available\", return_value=True), \\\n>> 171:          patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\\n   172:          patch(\"expert_build.propose._load_existing_beliefs\", return_value=[]), \\\n   173:          patch(\"expert_build.propose._has_embeddings\", return_value=False):\n   174:         cmd_propose_beliefs(args)"
      },
      {
        "file": "tests/test_propose.py",
        "line": 192,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\",
        "context_function": "invoke_side_effect",
        "context_snippet": "   189:         return '```json\\n' + _json_beliefs((\"fenced-belief\", \"A belief.\")) + '\\n```'\n   190: \n   191:     with patch(\"expert_build.propose.check_model_available\", return_value=True), \\\n>> 192:          patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\\n   193:          patch(\"expert_build.propose._load_existing_beliefs\", return_value=[]), \\\n   194:          patch(\"expert_build.propose._has_embeddings\", return_value=False):\n   195:         cmd_propose_beliefs(args)"
      },
      {
        "file": "tests/test_propose.py",
        "line": 216,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\"
      },
      {
        "file": "tests/test_propose.py",
        "line": 239,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\"
      },
      {
        "file": "tests/test_propose.py",
        "line": 262,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\"
      },
      {
        "file": "tests/test_propose.py",
        "line": 282,
        "text": "patch(\"expert_build.propose.invoke_sync\", side_effect=invoke_side_effect), \\"
      },
      {
        "file": "tests/test_summarize.py",
        "line": 50,
        "text": "patch(\"expert_build.summarize.invoke_sync\", return_value=\"## Topic Title\\nSummary\"):"
      },
      {
        "file": "tests/test_summarize.py",
        "line": 62,
        "text": "patch(\"expert_build.summarize.invoke_sync\", return_value=\"## Module\\nSummary\"):"
      },
      {
        "file": "tests/test_summarize.py",
        "line": 75,
        "text": "patch(\"expert_build.summarize.invoke_sync\", return_value=\"## Title\\nSummary\") as mock_llm:"
      },
      {
        "file": "tests/test_summarize.py",
        "line": 89,
        "text": "patch(\"expert_build.summarize.invoke_sync\") as mock_llm:"
      },
      {
        "file": "tests/test_summarize.py",
        "line": 102,
        "text": "patch(\"expert_build.summarize.invoke_sync\", return_value=\"## Module\\nSummary\") as mock_llm:"
      },
      {
        "file": "tests/test_summarize.py",
        "line": 114,
        "text": "patch(\"expert_build.summarize.invoke_sync\", return_value=\"## Doc Title\\nSummary\") as mock_llm:"
      }
    ],
    "production_count": 16,
    "test_count": 45,
    "total_count": 61
  },
  "invoke_sync_body": {
    "function": "invoke_sync",
    "file": "expert_build/llm.py",
    "start_line": 154,
    "end_line": 156,
    "source": "def invoke_sync(prompt: str, model: str = \"claude\", timeout: int = DEFAULT_TIMEOUT) -> str:\n    \"\"\"Synchronous wrapper for invoke.\"\"\"\n    return asyncio.run(invoke(prompt, model, timeout))"
  },
  "invoke_full_body": {
    "function": "invoke",
    "file": "expert_build/llm.py",
    "start_line": 117,
    "end_line": 151,
    "source": "async def invoke(prompt: str, model: str = \"claude\", timeout: int = DEFAULT_TIMEOUT) -> str:\n    \"\"\"Invoke model via CLI, piping prompt through stdin.\n\n    Uses --output-format json to capture token/cost data.\n    Accumulated stats available via get_cost_summary().\n    \"\"\"\n    if model not in MODEL_COMMANDS:\n        raise ValueError(f\"Unknown model: {model}. Available: {list(MODEL_COMMANDS.keys())}\")\n\n    cmd = MODEL_COMMANDS[model]\n\n    # Remove CLAUDECODE env var to allow nested claude invocation\n    env = {k: v for k, v in os.environ.items() if k != \"CLAUDECODE\"}\n\n    proc = await asyncio.create_subprocess_exec(\n        *cmd,\n        stdin=asyncio.subprocess.PIPE,\n        stdout=asyncio.subprocess.PIPE,\n        stderr=asyncio.subprocess.PIPE,\n        env=env,\n    )\n\n    try:\n        stdout, stderr = await asyncio.wait_for(\n            proc.communicate(prompt.encode()),\n            timeout=timeout,\n        )\n    except TimeoutError:\n        proc.kill()\n        raise TimeoutError(f\"Model {model} timed out after {timeout}s\") from None\n\n    if proc.returncode != 0:\n        raise RuntimeError(f\"Model {model} failed: {stderr.decode()}\")\n\n    return _parse_cli_json(stdout.decode(), model)"
  },
  "format_cost_summary_usages": {
    "symbol": "format_cost_summary",
    "usages": [
      {
        "file": "tests/test_llm.py",
        "line": 12,
        "text": "format_cost_summary,"
      },
      {
        "file": "tests/test_llm.py",
        "line": 140,
        "text": "# --- format_cost_summary ---"
      },
      {
        "file": "tests/test_llm.py",
        "line": 143,
        "text": "assert format_cost_summary() == \"\""
      },
      {
        "file": "tests/test_llm.py",
        "line": 148,
        "text": "result = format_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 158,
        "text": "result = format_cost_summary()"
      },
      {
        "file": "expert_build/llm.py",
        "line": 42,
        "text": "def format_cost_summary() -> str:"
      },
      {
        "file": "expert_build/cli.py",
        "line": 186,
        "text": "from .llm import format_cost_summary"
      },
      {
        "file": "expert_build/cli.py",
        "line": 187,
        "text": "cost = format_cost_summary()"
      }
    ],
    "production_usages": [
      {
        "file": "expert_build/llm.py",
        "line": 42,
        "text": "def format_cost_summary() -> str:"
      },
      {
        "file": "expert_build/cli.py",
        "line": 186,
        "text": "from .llm import format_cost_summary"
      },
      {
        "file": "expert_build/cli.py",
        "line": 187,
        "text": "cost = format_cost_summary()"
      }
    ],
    "test_usages": [
      {
        "file": "tests/test_llm.py",
        "line": 12,
        "text": "format_cost_summary,"
      },
      {
        "file": "tests/test_llm.py",
        "line": 140,
        "text": "# --- format_cost_summary ---"
      },
      {
        "file": "tests/test_llm.py",
        "line": 143,
        "text": "assert format_cost_summary() == \"\""
      },
      {
        "file": "tests/test_llm.py",
        "line": 148,
        "text": "result = format_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 158,
        "text": "result = format_cost_summary()"
      }
    ],
    "production_count": 3,
    "test_count": 5,
    "total_count": 8
  },
  "get_cost_summary_usages": {
    "symbol": "get_cost_summary",
    "usages": [
      {
        "file": "tests/test_llm.py",
        "line": 11,
        "text": "get_cost_summary,"
      },
      {
        "file": "tests/test_llm.py",
        "line": 39,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 59,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 69,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 78,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 85,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 105,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 115,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 125,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 134,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "expert_build/llm.py",
        "line": 4,
        "text": "counts and costs. Use get_cost_summary() to retrieve accumulated stats."
      },
      {
        "file": "expert_build/llm.py",
        "line": 37,
        "text": "def get_cost_summary() -> dict:"
      },
      {
        "file": "expert_build/llm.py",
        "line": 121,
        "text": "Accumulated stats available via get_cost_summary()."
      }
    ],
    "production_usages": [
      {
        "file": "expert_build/llm.py",
        "line": 4,
        "text": "counts and costs. Use get_cost_summary() to retrieve accumulated stats."
      },
      {
        "file": "expert_build/llm.py",
        "line": 37,
        "text": "def get_cost_summary() -> dict:"
      },
      {
        "file": "expert_build/llm.py",
        "line": 121,
        "text": "Accumulated stats available via get_cost_summary()."
      }
    ],
    "test_usages": [
      {
        "file": "tests/test_llm.py",
        "line": 11,
        "text": "get_cost_summary,"
      },
      {
        "file": "tests/test_llm.py",
        "line": 39,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 59,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 69,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 78,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 85,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 105,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 115,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 125,
        "text": "s = get_cost_summary()"
      },
      {
        "file": "tests/test_llm.py",
        "line": 134,
        "text": "s = get_cost_summary()"
      }
    ],
    "production_count": 3,
    "test_count": 10,
    "total_count": 13
  }
}
```

Use these results to inform your review. Do not request the same observations again.


## Instructions

For each significant change (new file, modified function, etc.), provide a structured verdict.

Use this exact format for each change:

### <file_path or file_path:function_name>
VERDICT: PASS | CONCERN | BLOCK
CORRECTNESS: VALID | QUESTIONABLE | BROKEN
SPEC_COMPLIANCE: MEETS | PARTIAL | VIOLATES | N/A
ISSUE_COMPLIANCE: ADDRESSES | PARTIAL | UNRELATED | N/A
BELIEF_COMPLIANCE: CONSISTENT | VIOLATES | N/A
TEST_COVERAGE: COVERED | PARTIAL | UNTESTED
INTEGRATION: WIRED | PARTIAL | MISSING
REASONING: <brief explanation of your assessment>
---

## Review Criteria

1. **CORRECTNESS**: Does the code do what it claims? Is the logic sound?
   - VALID: Logic is correct, no bugs apparent
   - QUESTIONABLE: Logic may have edge cases or unclear behavior
   - BROKEN: Clear bugs or incorrect behavior

2. **SPEC_COMPLIANCE**: Does it meet MUST requirements from the spec?
   - MEETS: All relevant spec requirements satisfied
   - PARTIAL: Some requirements met, others missing or incomplete
   - VIOLATES: Contradicts spec requirements
   - N/A: No spec provided or not applicable

3. **ISSUE_COMPLIANCE** (only when an issue is provided): Do the changes address the problem or feature described in the issue?
   - ADDRESSES: Changes directly solve the issue's stated problem or implement the requested feature
   - PARTIAL: Changes partially address the issue but leave some aspects unresolved
   - UNRELATED: Changes do not appear related to the issue
   - N/A: No issue provided

4. **TEST_COVERAGE**: Are there tests for the new/changed code?
   - COVERED: Tests exist and cover the changes
   - PARTIAL: Some tests exist but coverage is incomplete
   - UNTESTED: No tests for the changes

5. **INTEGRATION**: Are callers updated? Is the feature usable end-to-end?
   - WIRED: Feature is fully integrated and usable
   - PARTIAL: Interface exists but callers not updated, or integration incomplete
   - MISSING: No integration with existing code

6. **BELIEF_COMPLIANCE** (only when beliefs are provided): Do the changes respect known architectural invariants, contracts, and rules?
   - CONSISTENT: Changes align with or reinforce known beliefs
   - VIOLATES: Changes contradict a specific belief — cite the belief ID
   - N/A: No beliefs provided or no relevant beliefs apply

## Verdict Guidelines

- **BLOCK**: Security issues, broken functionality, spec violations, or missing critical integration
- **CONCERN**: Missing tests, partial integration, questionable patterns, or unclear logic
- **PASS**: Correct, tested, well-integrated code

## Important

- Full function bodies for modified functions may be available in the observations section — use them to verify the complete logic, not just the diff hunks
- Related test files (prefixed with ``related_test:``) may be included in observations — check whether existing test assertions still match modified return types, signatures, or behavior. Flag any test that would break due to the changes
- If duplicate test coverage is detected (multiple test files covering the same source), note it in your review
- Focus on actual issues, not style preferences
- If a method signature is added but callers aren't updated, that's PARTIAL integration
- Be specific in reasoning - reference line numbers or function names
- When in doubt, use CONCERN rather than PASS

## Self-Review

After completing your review, add a brief self-assessment:

### SELF_REVIEW
LIMITATIONS: <what context were you missing that affected review quality?>
---

Examples of limitations:
- "Could not see full class to verify no other methods access the modified field"
- "Test file not included in diff - cannot verify coverage claims"
- "Spec file referenced but not provided"


## Feature Requests

If this review tool could be improved to help you do a better job, suggest features:

### FEATURE_REQUESTS
- <suggestion 1>
- <suggestion 2>
---

Examples:
- "Include full file context for modified functions, not just diff hunks"
- "Show callers of modified methods to verify integration"
- "Include test file alongside implementation changes"

Only include this section if you have specific suggestions. Skip if none.
