You are a senior code reviewer preparing to review code changes.

## Code Changes

```diff
diff --git a/expert_build/chunk_docs.py b/expert_build/chunk_docs.py
new file mode 100644
index 0000000..7d4368a
--- /dev/null
+++ b/expert_build/chunk_docs.py
@@ -0,0 +1,191 @@
+"""Chunk large documents into entry-sized pieces."""
+
+import re
+import sys
+from datetime import date
+from pathlib import Path
+
+
+def chunk_markdown(text, max_chars=25000):
+    """Split markdown by heading boundaries, merging small sections."""
+    parts = re.split(r"(?=^#{1,2} )", text, flags=re.MULTILINE)
+    parts = [p for p in parts if p.strip()]
+
+    if len(parts) <= 1:
+        return chunk_fixed(text, max_chars)
+
+    chunks = []
+    current = ""
+    for part in parts:
+        if len(current) + len(part) > max_chars and current:
+            chunks.append(current)
+            current = part
+        else:
+            current += part
+    if current:
+        chunks.append(current)
+
+    return chunks
+
+
+def chunk_python(text, max_chars=25000):
+    """Split Python by top-level class/def boundaries, keeping imports."""
+    lines = text.split("\n")
+
+    preamble_end = 0
+    for i, line in enumerate(lines):
+        if re.match(r"^(class |def )", line) or (line.startswith("@") and i + 1 < len(lines) and re.match(r"^(class |def |@)", lines[i + 1])):
+            preamble_end = i
+            break
+    else:
+        return chunk_fixed(text, max_chars)
+
+    preamble = "\n".join(lines[:preamble_end]).rstrip() + "\n\n"
+
+    boundaries = []
+    for i, line in enumerate(lines[preamble_end:], start=preamble_end):
+        if re.match(r"^(class |def )", line):
+            # Include preceding decorator lines
+            start = i
+            while start > preamble_end and lines[start - 1].startswith("@"):
+                start -= 1
+            if not boundaries or boundaries[-1] != start:
+                boundaries.append(start)
+
+    if not boundaries:
+        return chunk_fixed(text, max_chars)
+
+    sections = []
+    for j, start in enumerate(boundaries):
+        end = boundaries[j + 1] if j + 1 < len(boundaries) else len(lines)
+        section = "\n".join(lines[start:end])
+        sections.append(section)
+
+    chunks = []
+    current = preamble
+    for section in sections:
+        if len(current) + len(section) > max_chars and current.strip() != preamble.strip():
+            chunks.append(current)
+            current = preamble + section
+        else:
+            current += section + "\n"
+    if current.strip():
+        chunks.append(current)
+
+    return chunks
+
+
+def chunk_fixed(text, max_chars=25000, overlap=500):
+    """Split text into fixed-size windows with overlap."""
+    if len(text) <= max_chars:
+        return [text]
+
+    overlap = min(overlap, max_chars // 4)
+    step = max(max_chars - overlap, 1)
+
+    chunks = []
+    start = 0
+    while start < len(text):
+        end = start + max_chars
+        chunks.append(text[start:end])
+        start += step
+    return chunks
+
+
+def _strip_frontmatter(text):
+    """Strip YAML frontmatter and return (frontmatter_dict, content)."""
+    meta = {}
+    content = text
+    if text.startswith("---"):
+        end = text.find("---", 3)
+        if end != -1:
+            for line in text[3:end].splitlines():
+                if ":" in line:
+                    key, _, val = line.partition(":")
+                    meta[key.strip()] = val.strip()
+            content = text[end + 3:].strip()
+    return meta, content
+
+
+def cmd_chunk_docs(args):
+    """Chunk large documents into entry-sized pieces."""
+    input_dir = Path(args.input_dir)
+    if not input_dir.exists():
+        print(f"Source directory not found: {input_dir}")
+        sys.exit(1)
+
+    threshold = args.threshold
+
+    sources = sorted(
+        [*input_dir.glob("*.md"), *input_dir.glob("*.py")],
+        key=lambda p: p.name,
+    )
+    if not sources:
+        print(f"No .md or .py files in {input_dir}")
+        return
+
+    manifest = Path(".chunked-docs")
+    done = set()
+    if manifest.exists():
+        done = set(manifest.read_text().strip().split("\n"))
+
+    total_chunked = 0
+    total_skipped = 0
+
+    for source_path in sources:
+        if str(source_path) in done:
+            total_skipped += 1
+            continue
+
+        raw = source_path.read_text()
+        meta, content = _strip_frontmatter(raw)
+
+        if len(content) <= threshold:
+            continue
+
+        print(f"Chunking: {source_path.name} ({len(content)} chars)")
+
+        max_chars = threshold
+        if source_path.suffix == ".py":
+            chunks = chunk_python(content, max_chars=max_chars)
+        elif source_path.suffix == ".md":
+            chunks = chunk_markdown(content, max_chars=max_chars)
+        else:
+            chunks = chunk_fixed(content, max_chars=max_chars)
+
+        if args.dry_run:
+            for i, chunk in enumerate(chunks, 1):
+                print(f"  chunk {i}: {len(chunk)} chars")
+            continue
+
+        today = date.today()
+        entry_dir = Path("entries") / str(today.year) / f"{today.month:02d}" / f"{today.day:02d}"
+        entry_dir.mkdir(parents=True, exist_ok=True)
+
+        source_url = meta.get("source_url") or meta.get("source", "")
+        if source_url and not source_url.startswith(("http://", "https://")):
+            source_url = ""
+        source_id = meta.get("source_id", "")
+
+        for i, chunk in enumerate(chunks, 1):
+            entry_name = f"{source_path.stem}-chunk-{i}.md"
+            entry_path = entry_dir / entry_name
+
+            fm_lines = [f"source: {source_path}"]
+            if source_url:
+                fm_lines.append(f"source_url: {source_url}")
+            if source_id:
+                fm_lines.append(f"source_id: {source_id}")
+            fm_lines.append(f"chunk: {i}/{len(chunks)}")
+            frontmatter = "---\n" + "\n".join(fm_lines) + "\n---\n\n"
+
+            entry_path.write_text(frontmatter + chunk + "\n")
+            print(f"  -> {entry_path}")
+
+        total_chunked += 1
+
+        with manifest.open("a") as f:
+            f.write(f"{source_path}\n")
+        done.add(str(source_path))
+
+    print(f"\nChunked {total_chunked} files ({total_skipped} already done)")
diff --git a/expert_build/cli.py b/expert_build/cli.py
index 0f99b53..2cb73b9 100644
--- a/expert_build/cli.py
+++ b/expert_build/cli.py
@@ -48,6 +48,13 @@ def main():
     chunk_p.add_argument("--source-label", help="Citation label for Source line")
     chunk_p.add_argument("--dry-run", action="store_true", help="Show sections without creating entries")
 
+    # -- chunk-docs --
+    chunkd_p = sub.add_parser("chunk-docs", help="Chunk large documents into entry-sized pieces")
+    chunkd_p.add_argument("--input-dir", default="sources", help="Source directory (default: sources)")
+    chunkd_p.add_argument("--threshold", type=int, default=30000,
+                          help="Only chunk files larger than this (default: 30000)")
+    chunkd_p.add_argument("--dry-run", action="store_true", help="Show chunks without creating entries")
+
     # -- summarize --
     sum_p = sub.add_parser("summarize", help="Generate entries from source documents")
     sum_p.add_argument("--input-dir", default="sources", help="Source directory (default: sources)")
@@ -140,6 +147,7 @@ def main():
     commands = {
         "init": lambda a: _lazy("init_cmd", "cmd_init")(a),
         "chunk-pdf": lambda a: _lazy("chunk_pdf", "cmd_chunk_pdf")(a),
+        "chunk-docs": lambda a: _lazy("chunk_docs", "cmd_chunk_docs")(a),
         "fetch-docs": lambda a: _lazy("fetch", "cmd_fetch_docs")(a),
         "summarize": lambda a: _lazy("summarize", "cmd_summarize")(a),
         "propose-beliefs": lambda a: _lazy("propose", "cmd_propose_beliefs")(a),
diff --git a/expert_build/summarize.py b/expert_build/summarize.py
index 840d7de..29d8bcb 100644
--- a/expert_build/summarize.py
+++ b/expert_build/summarize.py
@@ -83,7 +83,7 @@ def cmd_summarize(args):
                       f"Consider: expert-build chunk-pdf {source_path}")
             else:
                 print(f"  WARN: truncated from {original_len} to 30000 chars. "
-                      f"Large documents may lose tail content.")
+                      f"Consider: expert-build chunk-docs")
 
         template = SUMMARIZE_CODE if source_path.suffix == ".py" else SUMMARIZE
         prompt = template.format(content=content)
diff --git a/tests/test_chunk_docs.py b/tests/test_chunk_docs.py
new file mode 100644
index 0000000..571e52e
--- /dev/null
+++ b/tests/test_chunk_docs.py
@@ -0,0 +1,237 @@
+"""Tests for expert_build.chunk_docs — document chunking."""
+
+import types
+from pathlib import Path
+
+import pytest
+
+from expert_build.chunk_docs import (
+    chunk_markdown,
+    chunk_python,
+    chunk_fixed,
+    cmd_chunk_docs,
+)
+
+
+# --- chunk_markdown ---
+
+def test_chunk_markdown_by_headings():
+    text = "# Section 1\nContent one.\n\n# Section 2\nContent two.\n"
+    chunks = chunk_markdown(text, max_chars=50)
+    assert len(chunks) == 2
+    assert "Section 1" in chunks[0]
+    assert "Section 2" in chunks[1]
+
+
+def test_chunk_markdown_merges_small_sections():
+    text = "# A\nShort.\n\n# B\nAlso short.\n\n# C\nStill short.\n"
+    chunks = chunk_markdown(text, max_chars=1000)
+    assert len(chunks) == 1
+    assert "A" in chunks[0]
+    assert "C" in chunks[0]
+
+
+def test_chunk_markdown_h2_headings():
+    text = "## First\nContent.\n\n## Second\nMore content.\n"
+    chunks = chunk_markdown(text, max_chars=30)
+    assert len(chunks) == 2
+
+
+def test_chunk_markdown_no_headings_falls_back():
+    text = "Just a plain text document with no headings at all. " * 100
+    chunks = chunk_markdown(text, max_chars=200)
+    assert len(chunks) > 1
+
+
+# --- chunk_python ---
+
+def test_chunk_python_by_definitions():
+    text = (
+        "import os\n\n"
+        "def foo():\n    pass\n\n"
+        "def bar():\n    pass\n"
+    )
+    chunks = chunk_python(text, max_chars=40)
+    assert len(chunks) == 2
+    assert "def foo" in chunks[0]
+    assert "def bar" in chunks[1]
+
+
+def test_chunk_python_keeps_imports():
+    text = (
+        "import os\nimport sys\n\n"
+        "def first():\n    pass\n\n"
+        "def second():\n    pass\n"
+    )
+    chunks = chunk_python(text, max_chars=60)
+    assert len(chunks) == 2
+    for chunk in chunks:
+        assert "import os" in chunk
+
+
+def test_chunk_python_class_boundary():
+    text = (
+        "import x\n\n"
+        "class Foo:\n    pass\n\n"
+        "class Bar:\n    pass\n"
+    )
+    chunks = chunk_python(text, max_chars=40)
+    assert len(chunks) == 2
+    assert "class Foo" in chunks[0]
+    assert "class Bar" in chunks[1]
+
+
+def test_chunk_python_decorator_stays_with_function():
+    text = (
+        "import os\n\n"
+        "@decorator\n"
+        "def foo():\n    pass\n\n"
+        "def bar():\n    pass\n"
+    )
+    chunks = chunk_python(text, max_chars=60)
+    assert len(chunks) == 2
+    assert "@decorator" in chunks[0]
+    assert "def foo" in chunks[0]
+
+
+def test_chunk_python_no_defs_falls_back():
+    text = "x = 1\ny = 2\nz = 3\n" * 100
+    chunks = chunk_python(text, max_chars=100)
+    assert len(chunks) > 1
+
+
+# --- chunk_fixed ---
+
+def test_chunk_fixed_small_text_single_chunk():
+    text = "Short text."
+    chunks = chunk_fixed(text, max_chars=100)
+    assert len(chunks) == 1
+    assert chunks[0] == text
+
+
+def test_chunk_fixed_with_overlap():
+    text = "a" * 1000
+    chunks = chunk_fixed(text, max_chars=400, overlap=100)
+    assert len(chunks) >= 3
+    assert chunks[0][-100:] == chunks[1][:100]
+
+
+# --- cmd_chunk_docs ---
+
+@pytest.fixture
+def source_dir(tmp_path):
+    d = tmp_path / "sources"
+    d.mkdir()
+    return d
+
+
+@pytest.fixture
+def work_dir(tmp_path, monkeypatch):
+    wd = tmp_path / "work"
+    wd.mkdir()
+    monkeypatch.chdir(wd)
+    return wd
+
+
+def make_args(input_dir, threshold=100, dry_run=False):
+    return types.SimpleNamespace(
+        input_dir=str(input_dir),
+        threshold=threshold,
+        dry_run=dry_run,
+    )
+
+
+def test_skips_small_files(source_dir, work_dir, capsys):
+    (source_dir / "small.md").write_text("# Short\nContent")
+    args = make_args(source_dir, threshold=30000)
+    cmd_chunk_docs(args)
+    captured = capsys.readouterr()
+    assert "Chunked 0 files" in captured.out
+    entries = list((work_dir / "entries").rglob("*.md")) if (work_dir / "entries").exists() else []
+    assert len(entries) == 0
+
+
+def test_chunks_large_markdown(source_dir, work_dir):
+    text = "# Section 1\n" + "x" * 200 + "\n\n# Section 2\n" + "y" * 200
+    (source_dir / "big.md").write_text(text)
+    args = make_args(source_dir, threshold=100)
+    cmd_chunk_docs(args)
+    entries = list((work_dir / "entries").rglob("*.md"))
+    assert len(entries) == 2
+    contents = [e.read_text() for e in sorted(entries)]
+    assert "Section 1" in contents[0]
+    assert "Section 2" in contents[1]
+
+
+def test_chunks_large_python(source_dir, work_dir):
+    text = "import os\n\n" + "def foo():\n    " + "x = 1\n    " * 50 + "\n\ndef bar():\n    " + "y = 2\n    " * 50
+    (source_dir / "big.py").write_text(text)
+    args = make_args(source_dir, threshold=100)
+    cmd_chunk_docs(args)
+    entries = list((work_dir / "entries").rglob("*.md"))
+    assert len(entries) == 2
+
+
+def test_dry_run_no_files_created(source_dir, work_dir, capsys):
+    text = "# A\n" + "x" * 200 + "\n\n# B\n" + "y" * 200
+    (source_dir / "big.md").write_text(text)
+    args = make_args(source_dir, threshold=100, dry_run=True)
+    cmd_chunk_docs(args)
+    entries_dir = work_dir / "entries"
+    assert not entries_dir.exists() or len(list(entries_dir.rglob("*.md"))) == 0
+    captured = capsys.readouterr()
+    assert "chunk 1" in captured.out
+
+    manifest = work_dir / ".chunked-docs"
+    assert not manifest.exists()
+
+
+def test_dry_run_does_not_poison_manifest(source_dir, work_dir):
+    """Dry-run should not prevent subsequent real runs."""
+    text = "# A\n" + "x" * 200 + "\n\n# B\n" + "y" * 200
+    (source_dir / "big.md").write_text(text)
+
+    dry_args = make_args(source_dir, threshold=100, dry_run=True)
+    cmd_chunk_docs(dry_args)
+
+    real_args = make_args(source_dir, threshold=100, dry_run=False)
+    cmd_chunk_docs(real_args)
+
+    entries = list((work_dir / "entries").rglob("*.md"))
+    assert len(entries) == 2
+
+
+def test_provenance_frontmatter(source_dir, work_dir):
+    fm = "---\nsource_url: https://example.com/doc\nsource_id: abc\n---\n\n"
+    text = fm + "# Part 1\n" + "x" * 200 + "\n\n# Part 2\n" + "y" * 200
+    (source_dir / "doc.md").write_text(text)
+    args = make_args(source_dir, threshold=100)
+    cmd_chunk_docs(args)
+    entries = sorted((work_dir / "entries").rglob("*.md"))
+    content = entries[0].read_text()
+    assert "source_url: https://example.com/doc" in content
+    assert "source_id: abc" in content
+    assert "chunk: 1/" in content
+
+
+def test_manifest_tracking(source_dir, work_dir):
+    text = "# A\n" + "x" * 200 + "\n\n# B\n" + "y" * 200
+    (source_dir / "big.md").write_text(text)
+    args = make_args(source_dir, threshold=100)
+    cmd_chunk_docs(args)
+    entries_count_1 = len(list((work_dir / "entries").rglob("*.md")))
+
+    cmd_chunk_docs(args)
+    entries_count_2 = len(list((work_dir / "entries").rglob("*.md")))
+    assert entries_count_1 == entries_count_2
+
+
+def test_chunk_names_include_stem(source_dir, work_dir):
+    text = "# A\n" + "x" * 200 + "\n\n# B\n" + "y" * 200
+    (source_dir / "my-doc.md").write_text(text)
+    args = make_args(source_dir, threshold=100)
+    cmd_chunk_docs(args)
+    entries = list((work_dir / "entries").rglob("*.md"))
+    names = [e.name for e in entries]
+    assert any("my-doc-chunk-1" in n for n in names)
+    assert any("my-doc-chunk-2" in n for n in names)
diff --git a/tests/test_summarize.py b/tests/test_summarize.py
index ff3457d..7f000be 100644
--- a/tests/test_summarize.py
+++ b/tests/test_summarize.py
@@ -130,7 +130,7 @@ def test_truncation_warning_for_large_file(source_dir, work_dir, capsys):
 
     captured = capsys.readouterr()
     assert "WARN: truncated from 50000 to 30000 chars" in captured.out
-    assert "Large documents may lose tail content" in captured.out
+    assert "Consider: expert-build chunk-docs" in captured.out
 
 
 def test_truncation_content_is_capped(source_dir, work_dir):

```

## Your Task

Analyze the diff and identify what additional information you need to render confident verdicts.
Do NOT render verdicts yet. Only request observations.

## Available Observation Tools

| Tool | Purpose | When to use |
|------|---------|-------------|
| `exception_hierarchy` | Show exception MRO and subclasses | Retry logic, exception handling |
| `raises_analysis` | What exceptions a function raises | New function calls, error paths |
| `call_graph` | What a function calls | Impact analysis |
| `find_usages` | Where a symbol is used (with prod/test split) | Quick integration lookup |
| `find_callers` | Caller analysis with prod/test split and calling context | Method signature changes, return type changes, constructor modifications, integration verification |
| `test_coverage` | Find tests for a file (uses coverage-map if available) | Test coverage claims |
| `coverage_map_tests` | Find tests covering a file (from coverage-map.json) | Precise test coverage from actual execution |
| `coverage_map_files` | Find files covered by tests matching a pattern | Impact analysis for test changes |
| `function_body` | Full source of a function/method | Need complete function context beyond diff hunks |
| `file_imports` | Extract imports from a file | Verify import changes, check dependencies |
| `project_dependencies` | Get pyproject.toml/requirements.txt | Verify new imports have dependencies |
| `related_test_files` | Find test files for a source file | Discover tests by naming, imports, and coverage map |
| `class_hierarchy` | Show base classes and their `__init__` signatures | Class changes its parent, modifies `__init__`, or uses `super()` |
| `symbol_migration` | Check if a rename is complete across the repo | Symbol renamed in diff — verify old name is fully removed |
| `generator_info` | Report whether a function uses `yield` | Function might be a generator — affects return value semantics |

## What to Look For

1. **Exception handling**: Any `retry_if_exception_type`, `except`, or exception class references
2. **New dependencies**: Calls to external libraries where you don't know the error behavior
3. **Behavioral changes**: Modified logic where you need to verify callers/callees
4. **Test claims**: References to tests you can't see in the diff
5. **Inheritance changes**: Class definition changes, new base classes, `super()` calls
6. **Renames**: Symbols that appear to have been renamed in the diff
7. **Factory methods**: Calls to `@classmethod` / `@staticmethod` constructors (e.g. `Result.error(...)`) — request `function_body` to see their implementation

## Output Format

Output a JSON array of observation requests:

```json
[
  {"name": "descriptive_name", "tool": "tool_name", "params": {"param": "value"}},
  ...
]
```

If you don't need any observations (simple changes, all context is in the diff), output:

```json
[]
```

## Examples

For a diff containing `retry_if_exception_type((OSError, httpx.TransportError))`:
```json
[
  {"name": "oserror_subclasses", "tool": "exception_hierarchy", "params": {"class_name": "builtins.OSError"}},
  {"name": "transport_errors", "tool": "exception_hierarchy", "params": {"class_name": "httpx.TransportError"}}
]
```

For a diff adding a new function that calls `oauth_client.get_access_token()`:
```json
[
  {"name": "oauth_exceptions", "tool": "raises_analysis", "params": {"file_path": "src/auth/oauth.py", "function_name": "get_access_token"}}
]
```

For a diff modifying a method but you need the full function to verify:
```json
[
  {"name": "full_getattr", "tool": "function_body", "params": {"file_path": "src/proxy.py", "function_name": "__getattr__"}}
]
```

For a diff changing a method signature or return type (verify all callers):
```json
[
  {"name": "handle_request_callers", "tool": "find_callers", "params": {"symbol": "handle_request"}}
]
```

For a diff adding new imports (e.g., `import httpx`):
```json
[
  {"name": "file_imports", "tool": "file_imports", "params": {"file_path": "src/client.py"}},
  {"name": "project_deps", "tool": "project_dependencies", "params": {}}
]
```

For a diff calling a factory method like `ModuleResult.error_result(msg)`:
```json
[
  {"name": "error_result_body", "tool": "function_body", "params": {"file_path": "src/models.py", "function_name": "error_result"}}
]
```

For a diff where a class changes its parent class:
```json
[
  {"name": "client_hierarchy", "tool": "class_hierarchy", "params": {"class_name": "MyClient", "file_path": "src/client.py"}}
]
```

For a diff that renames a symbol (e.g., `OldClient` to `NewClient`):
```json
[
  {"name": "client_rename", "tool": "symbol_migration", "params": {"old_name": "OldClient", "new_name": "NewClient"}}
]
```

For a diff modifying a function that might be a generator:
```json
[
  {"name": "process_gen", "tool": "generator_info", "params": {"file_path": "src/pipeline.py", "function_name": "process_items"}}
]
```

Now analyze the diff above and output your observation requests as JSON:
