You are a senior code reviewer preparing to review code changes.

## Code Changes

```diff
diff --git a/expert_build/cli.py b/expert_build/cli.py
index 0ac957f..37c0171 100644
--- a/expert_build/cli.py
+++ b/expert_build/cli.py
@@ -138,6 +138,19 @@ def main():
                        help="LLM timeout in seconds (default: 600)")
     drr_p.add_argument("--domain", help="Domain description for derive context")
 
+    # -- index-sources --
+    idx_p = sub.add_parser("index-sources", help="Build FTS5 chunks database from source documents")
+    idx_p.add_argument("--input-dir", default="sources", help="Source directory (default: sources)")
+    idx_p.add_argument("--recursive", "-r", action="store_true",
+                       help="Recursively search subdirectories")
+    idx_p.add_argument("--db", default="rag_fts.db", help="Output database path (default: rag_fts.db)")
+    idx_p.add_argument("--type", default="source", choices=["source", "summary", "chunked-summary"],
+                       help="Chunk type metadata (default: source)")
+    idx_p.add_argument("--chunk-size", type=int, default=2000,
+                       help="Target chunk size in chars (default: 2000)")
+    idx_p.add_argument("--rebuild", action="store_true",
+                       help="Drop and rebuild the index from scratch")
+
     # -- status --
     sub.add_parser("status", help="Show pipeline progress")
 
@@ -162,6 +175,7 @@ def main():
         "accept-beliefs": lambda a: _lazy("propose", "cmd_accept_beliefs")(a),
         "cert-coverage": lambda a: _lazy("coverage", "cmd_cert_coverage")(a),
         "exam": lambda a: _lazy("exam", "cmd_exam")(a),
+        "index-sources": lambda a: _lazy("index_sources", "cmd_index_sources")(a),
         "pipeline": lambda a: _lazy("pipeline", "cmd_pipeline")(a),
         "derive-review-repair": lambda a: _lazy("pipeline", "cmd_derive_review_repair")(a),
         "status": lambda a: _lazy("init_cmd", "cmd_status")(a),
diff --git a/expert_build/index_sources.py b/expert_build/index_sources.py
new file mode 100644
index 0000000..3e0b634
--- /dev/null
+++ b/expert_build/index_sources.py
@@ -0,0 +1,115 @@
+"""Build FTS5 chunks database from source documents."""
+
+import sqlite3
+import sys
+from pathlib import Path
+
+from .chunk_docs import chunk_markdown, chunk_python, chunk_fixed, _strip_frontmatter
+
+
+DEFAULT_DB = "rag_fts.db"
+DEFAULT_CHUNK_SIZE = 2000
+
+
+def _init_db(db_path, rebuild=False):
+    """Create the chunks and FTS5 tables."""
+    conn = sqlite3.connect(db_path)
+    if rebuild:
+        conn.execute("DROP TABLE IF EXISTS chunks_fts")
+        conn.execute("DROP TABLE IF EXISTS chunks")
+
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS chunks (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            text TEXT NOT NULL,
+            cluster TEXT DEFAULT '',
+            filename TEXT NOT NULL,
+            section TEXT DEFAULT '',
+            chunk_type TEXT DEFAULT 'source',
+            source_url TEXT DEFAULT ''
+        )
+    """)
+    conn.execute("""
+        CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts
+        USING fts5(text, content=chunks, content_rowid=id,
+                   tokenize="porter unicode61")
+    """)
+    conn.commit()
+    return conn
+
+
+def _insert_chunks(conn, chunks, filename, chunk_type="source", source_url=""):
+    """Insert chunks into the database and FTS5 index."""
+    for i, chunk_text in enumerate(chunks):
+        section = f"chunk {i + 1}/{len(chunks)}" if len(chunks) > 1 else ""
+        conn.execute(
+            "INSERT INTO chunks (text, filename, section, chunk_type, source_url) "
+            "VALUES (?, ?, ?, ?, ?)",
+            (chunk_text.strip(), str(filename), section, chunk_type, source_url),
+        )
+    conn.commit()
+    conn.execute("INSERT INTO chunks_fts(chunks_fts) VALUES('rebuild')")
+    conn.commit()
+
+
+def cmd_index_sources(args):
+    """Build FTS5 chunks database from source documents."""
+    input_dir = Path(args.input_dir)
+    if not input_dir.exists():
+        print(f"Source directory not found: {input_dir}")
+        sys.exit(1)
+
+    db_path = args.db
+    rebuild = getattr(args, "rebuild", False)
+    chunk_type = getattr(args, "type", "source")
+    max_chars = getattr(args, "chunk_size", DEFAULT_CHUNK_SIZE)
+
+    glob = input_dir.rglob if getattr(args, "recursive", False) else input_dir.glob
+    sources = sorted(
+        [*glob("*.md"), *glob("*.py"), *glob("*.txt")],
+        key=lambda p: p.name,
+    )
+    if not sources:
+        print(f"No .md, .py, or .txt files in {input_dir}")
+        return
+
+    print(f"Indexing {len(sources)} files into {db_path}")
+    conn = _init_db(db_path, rebuild=rebuild)
+
+    existing = set()
+    if not rebuild:
+        cur = conn.execute("SELECT DISTINCT filename FROM chunks")
+        existing = {row[0] for row in cur.fetchall()}
+
+    indexed = 0
+    skipped = 0
+
+    for source_path in sources:
+        if str(source_path) in existing:
+            skipped += 1
+            continue
+
+        raw = source_path.read_text()
+        meta, content = _strip_frontmatter(raw)
+
+        if not content.strip():
+            continue
+
+        source_url = meta.get("source_url") or meta.get("source", "")
+        if source_url and not source_url.startswith(("http://", "https://")):
+            source_url = ""
+
+        if source_path.suffix == ".py":
+            chunks = chunk_python(content, max_chars=max_chars)
+        elif source_path.suffix == ".md":
+            chunks = chunk_markdown(content, max_chars=max_chars)
+        else:
+            chunks = chunk_fixed(content, max_chars=max_chars)
+
+        _insert_chunks(conn, chunks, source_path, chunk_type=chunk_type,
+                       source_url=source_url)
+        indexed += 1
+        print(f"  {source_path.name} -> {len(chunks)} chunk(s)")
+
+    conn.close()
+    print(f"\nIndexed {indexed} files ({skipped} already indexed)")
diff --git a/tests/test_index_sources.py b/tests/test_index_sources.py
new file mode 100644
index 0000000..88c87e6
--- /dev/null
+++ b/tests/test_index_sources.py
@@ -0,0 +1,207 @@
+"""Tests for expert_build.index_sources — FTS5 chunk indexing."""
+
+import sqlite3
+import types
+from pathlib import Path
+
+import pytest
+
+from expert_build.index_sources import cmd_index_sources, _init_db, _insert_chunks
+
+
+@pytest.fixture
+def source_dir(tmp_path):
+    d = tmp_path / "sources"
+    d.mkdir()
+    return d
+
+
+@pytest.fixture
+def work_dir(tmp_path, monkeypatch):
+    wd = tmp_path / "work"
+    wd.mkdir()
+    monkeypatch.chdir(wd)
+    return wd
+
+
+def make_args(input_dir, db="rag_fts.db", rebuild=False, recursive=False,
+              chunk_type="source", chunk_size=2000):
+    return types.SimpleNamespace(
+        input_dir=str(input_dir),
+        db=db,
+        rebuild=rebuild,
+        recursive=recursive,
+        type=chunk_type,
+        chunk_size=chunk_size,
+    )
+
+
+# --- _init_db ---
+
+def test_init_db_creates_tables(tmp_path):
+    db_path = str(tmp_path / "test.db")
+    conn = _init_db(db_path)
+    cur = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
+    tables = {row[0] for row in cur.fetchall()}
+    assert "chunks" in tables
+    assert "chunks_fts" in tables
+    conn.close()
+
+
+def test_init_db_rebuild_clears_data(tmp_path):
+    db_path = str(tmp_path / "test.db")
+    conn = _init_db(db_path)
+    _insert_chunks(conn, ["test content"], "test.md")
+    conn.close()
+
+    conn = _init_db(db_path, rebuild=True)
+    cur = conn.execute("SELECT COUNT(*) FROM chunks")
+    assert cur.fetchone()[0] == 0
+    conn.close()
+
+
+# --- cmd_index_sources ---
+
+def test_indexes_markdown_files(source_dir, work_dir):
+    (source_dir / "doc.md").write_text("# Hello\nSome content about testing.")
+    db_path = str(work_dir / "test.db")
+    args = make_args(source_dir, db=db_path)
+    cmd_index_sources(args)
+
+    conn = sqlite3.connect(db_path)
+    cur = conn.execute("SELECT COUNT(*) FROM chunks")
+    assert cur.fetchone()[0] >= 1
+    cur = conn.execute("SELECT filename FROM chunks")
+    filenames = [row[0] for row in cur.fetchall()]
+    assert any("doc.md" in f for f in filenames)
+    conn.close()
+
+
+def test_indexes_python_files(source_dir, work_dir):
+    (source_dir / "module.py").write_text("import os\n\ndef hello():\n    pass\n")
+    db_path = str(work_dir / "test.db")
+    args = make_args(source_dir, db=db_path)
+    cmd_index_sources(args)
+
+    conn = sqlite3.connect(db_path)
+    cur = conn.execute("SELECT COUNT(*) FROM chunks")
+    assert cur.fetchone()[0] >= 1
+    conn.close()
+
+
+def test_skips_already_indexed(source_dir, work_dir, capsys):
+    (source_dir / "doc.md").write_text("# Hello\nContent")
+    db_path = str(work_dir / "test.db")
+    args = make_args(source_dir, db=db_path)
+
+    cmd_index_sources(args)
+    cmd_index_sources(args)
+
+    captured = capsys.readouterr()
+    assert "already indexed" in captured.out
+
+
+def test_rebuild_reindexes(source_dir, work_dir):
+    (source_dir / "doc.md").write_text("# Hello\nContent")
+    db_path = str(work_dir / "test.db")
+
+    args = make_args(source_dir, db=db_path)
+    cmd_index_sources(args)
+
+    args = make_args(source_dir, db=db_path, rebuild=True)
+    cmd_index_sources(args)
+
+    conn = sqlite3.connect(db_path)
+    cur = conn.execute("SELECT COUNT(*) FROM chunks")
+    assert cur.fetchone()[0] >= 1
+    conn.close()
+
+
+def test_recursive_indexes_nested(source_dir, work_dir):
+    subdir = source_dir / "sub"
+    subdir.mkdir()
+    (subdir / "nested.md").write_text("# Nested\nContent here")
+    (source_dir / "top.md").write_text("# Top\nContent here")
+    db_path = str(work_dir / "test.db")
+    args = make_args(source_dir, db=db_path, recursive=True)
+    cmd_index_sources(args)
+
+    conn = sqlite3.connect(db_path)
+    cur = conn.execute("SELECT COUNT(DISTINCT filename) FROM chunks")
+    assert cur.fetchone()[0] == 2
+    conn.close()
+
+
+def test_non_recursive_skips_nested(source_dir, work_dir):
+    subdir = source_dir / "sub"
+    subdir.mkdir()
+    (subdir / "nested.md").write_text("# Nested\nContent here")
+    (source_dir / "top.md").write_text("# Top\nContent here")
+    db_path = str(work_dir / "test.db")
+    args = make_args(source_dir, db=db_path, recursive=False)
+    cmd_index_sources(args)
+
+    conn = sqlite3.connect(db_path)
+    cur = conn.execute("SELECT COUNT(DISTINCT filename) FROM chunks")
+    assert cur.fetchone()[0] == 1
+    conn.close()
+
+
+def test_chunk_type_stored(source_dir, work_dir):
+    (source_dir / "doc.md").write_text("# Hello\nContent")
+    db_path = str(work_dir / "test.db")
+    args = make_args(source_dir, db=db_path, chunk_type="summary")
+    cmd_index_sources(args)
+
+    conn = sqlite3.connect(db_path)
+    cur = conn.execute("SELECT chunk_type FROM chunks")
+    types = [row[0] for row in cur.fetchall()]
+    assert all(t == "summary" for t in types)
+    conn.close()
+
+
+def test_fts5_search_works(source_dir, work_dir):
+    (source_dir / "doc.md").write_text("# Kubernetes\nPod scheduling and node affinity rules.")
+    db_path = str(work_dir / "test.db")
+    args = make_args(source_dir, db=db_path)
+    cmd_index_sources(args)
+
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    cur = conn.execute("""
+        SELECT c.text, c.filename
+        FROM chunks_fts
+        JOIN chunks c ON c.id = chunks_fts.rowid
+        WHERE chunks_fts MATCH 'kubernetes'
+    """)
+    results = [dict(row) for row in cur.fetchall()]
+    assert len(results) >= 1
+    assert "Kubernetes" in results[0]["text"]
+    conn.close()
+
+
+def test_large_file_chunked(source_dir, work_dir):
+    text = "# Section 1\n" + "x" * 3000 + "\n\n# Section 2\n" + "y" * 3000
+    (source_dir / "big.md").write_text(text)
+    db_path = str(work_dir / "test.db")
+    args = make_args(source_dir, db=db_path, chunk_size=2000)
+    cmd_index_sources(args)
+
+    conn = sqlite3.connect(db_path)
+    cur = conn.execute("SELECT COUNT(*) FROM chunks")
+    assert cur.fetchone()[0] == 2
+    conn.close()
+
+
+def test_source_url_from_frontmatter(source_dir, work_dir):
+    fm = "---\nsource_url: https://example.com/doc\n---\n\n# Hello\nContent"
+    (source_dir / "doc.md").write_text(fm)
+    db_path = str(work_dir / "test.db")
+    args = make_args(source_dir, db=db_path)
+    cmd_index_sources(args)
+
+    conn = sqlite3.connect(db_path)
+    cur = conn.execute("SELECT source_url FROM chunks")
+    urls = [row[0] for row in cur.fetchall()]
+    assert all(u == "https://example.com/doc" for u in urls)
+    conn.close()

```

## Your Task

Analyze the diff and identify what additional information you need to render confident verdicts.
Do NOT render verdicts yet. Only request observations.

## Available Observation Tools

| Tool | Purpose | When to use |
|------|---------|-------------|
| `exception_hierarchy` | Show exception MRO and subclasses | Retry logic, exception handling |
| `raises_analysis` | What exceptions a function raises | New function calls, error paths |
| `call_graph` | What a function calls | Impact analysis |
| `find_usages` | Where a symbol is used (with prod/test split) | Quick integration lookup |
| `find_callers` | Caller analysis with prod/test split and calling context | Method signature changes, return type changes, constructor modifications, integration verification |
| `test_coverage` | Find tests for a file (uses coverage-map if available) | Test coverage claims |
| `coverage_map_tests` | Find tests covering a file (from coverage-map.json) | Precise test coverage from actual execution |
| `coverage_map_files` | Find files covered by tests matching a pattern | Impact analysis for test changes |
| `function_body` | Full source of a function/method | Need complete function context beyond diff hunks |
| `file_imports` | Extract imports from a file | Verify import changes, check dependencies |
| `project_dependencies` | Get pyproject.toml/requirements.txt | Verify new imports have dependencies |
| `related_test_files` | Find test files for a source file | Discover tests by naming, imports, and coverage map |
| `class_hierarchy` | Show base classes and their `__init__` signatures | Class changes its parent, modifies `__init__`, or uses `super()` |
| `symbol_migration` | Check if a rename is complete across the repo | Symbol renamed in diff — verify old name is fully removed |
| `generator_info` | Report whether a function uses `yield` | Function might be a generator — affects return value semantics |

## What to Look For

1. **Exception handling**: Any `retry_if_exception_type`, `except`, or exception class references
2. **New dependencies**: Calls to external libraries where you don't know the error behavior
3. **Behavioral changes**: Modified logic where you need to verify callers/callees
4. **Test claims**: References to tests you can't see in the diff
5. **Inheritance changes**: Class definition changes, new base classes, `super()` calls
6. **Renames**: Symbols that appear to have been renamed in the diff
7. **Factory methods**: Calls to `@classmethod` / `@staticmethod` constructors (e.g. `Result.error(...)`) — request `function_body` to see their implementation

## Output Format

Output a JSON array of observation requests:

```json
[
  {"name": "descriptive_name", "tool": "tool_name", "params": {"param": "value"}},
  ...
]
```

If you don't need any observations (simple changes, all context is in the diff), output:

```json
[]
```

## Examples

For a diff containing `retry_if_exception_type((OSError, httpx.TransportError))`:
```json
[
  {"name": "oserror_subclasses", "tool": "exception_hierarchy", "params": {"class_name": "builtins.OSError"}},
  {"name": "transport_errors", "tool": "exception_hierarchy", "params": {"class_name": "httpx.TransportError"}}
]
```

For a diff adding a new function that calls `oauth_client.get_access_token()`:
```json
[
  {"name": "oauth_exceptions", "tool": "raises_analysis", "params": {"file_path": "src/auth/oauth.py", "function_name": "get_access_token"}}
]
```

For a diff modifying a method but you need the full function to verify:
```json
[
  {"name": "full_getattr", "tool": "function_body", "params": {"file_path": "src/proxy.py", "function_name": "__getattr__"}}
]
```

For a diff changing a method signature or return type (verify all callers):
```json
[
  {"name": "handle_request_callers", "tool": "find_callers", "params": {"symbol": "handle_request"}}
]
```

For a diff adding new imports (e.g., `import httpx`):
```json
[
  {"name": "file_imports", "tool": "file_imports", "params": {"file_path": "src/client.py"}},
  {"name": "project_deps", "tool": "project_dependencies", "params": {}}
]
```

For a diff calling a factory method like `ModuleResult.error_result(msg)`:
```json
[
  {"name": "error_result_body", "tool": "function_body", "params": {"file_path": "src/models.py", "function_name": "error_result"}}
]
```

For a diff where a class changes its parent class:
```json
[
  {"name": "client_hierarchy", "tool": "class_hierarchy", "params": {"class_name": "MyClient", "file_path": "src/client.py"}}
]
```

For a diff that renames a symbol (e.g., `OldClient` to `NewClient`):
```json
[
  {"name": "client_rename", "tool": "symbol_migration", "params": {"old_name": "OldClient", "new_name": "NewClient"}}
]
```

For a diff modifying a function that might be a generator:
```json
[
  {"name": "process_gen", "tool": "generator_info", "params": {"file_path": "src/pipeline.py", "function_name": "process_items"}}
]
```

Now analyze the diff above and output your observation requests as JSON:
