You are a senior code reviewer preparing to review code changes.

## Code Changes

```diff
diff --git a/expert_build/prompts.py b/expert_build/prompts.py
index 2526135..9110d81 100644
--- a/expert_build/prompts.py
+++ b/expert_build/prompts.py
@@ -6,8 +6,10 @@
 Given the following documentation page, create a concise summary suitable for \
 building domain expertise. Structure your output as:
 
-## Overview
-One paragraph summarizing what this page covers.
+## <Descriptive Title>
+Start with a short, specific title that names the topic (e.g., \
+"IAM Role Configuration", "Network Policy Rules", "Cluster Autoscaling"). \
+Then one paragraph summarizing what this page covers.
 
 ## Key Concepts
 Bulleted list of the most important facts, definitions, and concepts.
@@ -28,6 +30,39 @@
 {content}
 """
 
+SUMMARIZE_CODE = """\
+You are an expert technical writer creating structured notes from source code.
+
+Given the following source code file, create a concise summary focused on how \
+this code is used in practice. Structure your output as:
+
+## <Descriptive Title>
+Start with a short, specific title that names the module or component (e.g., \
+"CLI Entry Point", "PDF Chunker", "LLM Invocation Layer"). Then one paragraph \
+summarizing what this code does and its role in the project.
+
+## Usage Patterns
+How this code is meant to be called or used — entry points, key functions, \
+typical invocations. Include code snippets where helpful.
+
+## API and Configuration
+Key parameters, options, environment variables, config files, or arguments \
+this code accepts.
+
+## Key Behaviors
+Important behaviors, error handling, edge cases, or gotchas a user should know about.
+
+## Relationships
+How this code connects to other components — what it imports, what calls it, \
+what services or systems it interacts with.
+
+---
+
+SOURCE CODE:
+
+{content}
+"""
+
 PROPOSE_BELIEFS = """\
 You are extracting factual claims from study notes to build a belief registry.
 
diff --git a/expert_build/summarize.py b/expert_build/summarize.py
index 5fb718c..e0d6e0b 100644
--- a/expert_build/summarize.py
+++ b/expert_build/summarize.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 
 from .llm import check_model_available, invoke_sync
-from .prompts import SUMMARIZE
+from .prompts import SUMMARIZE, SUMMARIZE_CODE
 
 
 def cmd_summarize(args):
@@ -24,9 +24,12 @@ def cmd_summarize(args):
         print("Install claude CLI or specify --model")
         sys.exit(1)
 
-    sources = sorted(input_dir.glob("*.md"))
+    sources = sorted(
+        [*input_dir.glob("*.md"), *input_dir.glob("*.py")],
+        key=lambda p: p.name,
+    )
     if not sources:
-        print(f"No .md files in {input_dir}")
+        print(f"No .md or .py files in {input_dir}")
         return
 
     if args.limit:
@@ -70,9 +73,17 @@ def cmd_summarize(args):
 
         # Truncate very long documents
         if len(content) > 30000:
+            original_len = len(content)
             content = content[:30000] + "\n\n[Truncated — original was longer]"
+            if source_path.suffix == ".pdf":
+                print(f"  WARN: truncated from {original_len} to 30000 chars. "
+                      f"Consider: expert-build chunk-pdf {source_path}")
+            else:
+                print(f"  WARN: truncated from {original_len} to 30000 chars. "
+                      f"Large documents may lose tail content.")
 
-        prompt = SUMMARIZE.format(content=content)
+        template = SUMMARIZE_CODE if source_path.suffix == ".py" else SUMMARIZE
+        prompt = template.format(content=content)
 
         try:
             summary = invoke_sync(prompt, model=args.model)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_summarize.py b/tests/test_summarize.py
new file mode 100644
index 0000000..5d4d0c1
--- /dev/null
+++ b/tests/test_summarize.py
@@ -0,0 +1,243 @@
+"""Tests for expert_build.summarize."""
+
+import types
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from expert_build.summarize import cmd_summarize
+from expert_build.prompts import SUMMARIZE, SUMMARIZE_CODE
+
+
+# --- Fixtures ---
+
+@pytest.fixture
+def source_dir(tmp_path):
+    """Create a temp directory with sample source files."""
+    src = tmp_path / "sources"
+    src.mkdir()
+    return src
+
+
+@pytest.fixture
+def work_dir(tmp_path, monkeypatch):
+    """Set working directory to tmp_path so .summarized manifest is isolated."""
+    wd = tmp_path / "work"
+    wd.mkdir()
+    monkeypatch.chdir(wd)
+    return wd
+
+
+def make_args(input_dir, model="test-model", limit=None):
+    return types.SimpleNamespace(input_dir=str(input_dir), model=model, limit=limit)
+
+
+# --- File discovery tests ---
+
+def test_discovers_md_files(source_dir, work_dir):
+    (source_dir / "doc.md").write_text("# Hello\nSome content")
+    args = make_args(source_dir)
+
+    with patch("expert_build.summarize.check_model_available", return_value=True), \
+         patch("expert_build.summarize.invoke_sync", return_value="## Topic Title\nSummary"), \
+         patch("subprocess.run") as mock_run:
+        mock_run.return_value = MagicMock(returncode=0, stdout="Created entries/doc.md", stderr="")
+        cmd_summarize(args)
+
+    assert mock_run.called
+
+
+def test_discovers_py_files(source_dir, work_dir):
+    (source_dir / "module.py").write_text("def hello(): pass")
+    args = make_args(source_dir)
+
+    with patch("expert_build.summarize.check_model_available", return_value=True), \
+         patch("expert_build.summarize.invoke_sync", return_value="## Module\nSummary") as mock_llm, \
+         patch("subprocess.run") as mock_run:
+        mock_run.return_value = MagicMock(returncode=0, stdout="Created entries/module.md", stderr="")
+        cmd_summarize(args)
+
+    assert mock_llm.called
+
+
+def test_discovers_both_md_and_py(source_dir, work_dir):
+    (source_dir / "alpha.md").write_text("# Alpha\nContent")
+    (source_dir / "beta.py").write_text("x = 1")
+    args = make_args(source_dir)
+
+    calls = []
+    with patch("expert_build.summarize.check_model_available", return_value=True), \
+         patch("expert_build.summarize.invoke_sync", return_value="## Title\nSummary") as mock_llm, \
+         patch("subprocess.run") as mock_run:
+        mock_run.return_value = MagicMock(returncode=0, stdout="Created entries/x.md", stderr="")
+        cmd_summarize(args)
+
+    assert mock_llm.call_count == 2
+
+
+def test_ignores_other_extensions(source_dir, work_dir):
+    (source_dir / "data.json").write_text("{}")
+    (source_dir / "notes.txt").write_text("hello")
+    args = make_args(source_dir)
+
+    with patch("expert_build.summarize.check_model_available", return_value=True), \
+         patch("expert_build.summarize.invoke_sync") as mock_llm:
+        cmd_summarize(args)
+
+    assert not mock_llm.called
+
+
+# --- Template selection tests ---
+
+def test_uses_summarize_code_for_py(source_dir, work_dir):
+    (source_dir / "module.py").write_text("def hello(): pass")
+    args = make_args(source_dir)
+
+    with patch("expert_build.summarize.check_model_available", return_value=True), \
+         patch("expert_build.summarize.invoke_sync", return_value="## Module\nSummary") as mock_llm, \
+         patch("subprocess.run") as mock_run:
+        mock_run.return_value = MagicMock(returncode=0, stdout="Created entries/module.md", stderr="")
+        cmd_summarize(args)
+
+    prompt = mock_llm.call_args[0][0]
+    assert "source code" in prompt.lower()
+
+
+def test_uses_summarize_for_md(source_dir, work_dir):
+    (source_dir / "doc.md").write_text("# Hello\nSome content")
+    args = make_args(source_dir)
+
+    with patch("expert_build.summarize.check_model_available", return_value=True), \
+         patch("expert_build.summarize.invoke_sync", return_value="## Doc Title\nSummary") as mock_llm, \
+         patch("subprocess.run") as mock_run:
+        mock_run.return_value = MagicMock(returncode=0, stdout="Created entries/doc.md", stderr="")
+        cmd_summarize(args)
+
+    prompt = mock_llm.call_args[0][0]
+    assert "documentation page" in prompt.lower()
+
+
+# --- Truncation tests ---
+
+def test_truncation_warning_for_large_file(source_dir, work_dir, capsys):
+    (source_dir / "big.md").write_text("x" * 50000)
+    args = make_args(source_dir)
+
+    with patch("expert_build.summarize.check_model_available", return_value=True), \
+         patch("expert_build.summarize.invoke_sync", return_value="## Big Doc\nSummary") as mock_llm, \
+         patch("subprocess.run") as mock_run:
+        mock_run.return_value = MagicMock(returncode=0, stdout="Created entries/big.md", stderr="")
+        cmd_summarize(args)
+
+    captured = capsys.readouterr()
+    assert "WARN: truncated from 50000 to 30000 chars" in captured.out
+    assert "Large documents may lose tail content" in captured.out
+
+
+def test_truncation_content_is_capped(source_dir, work_dir):
+    (source_dir / "big.md").write_text("x" * 50000)
+    args = make_args(source_dir)
+
+    with patch("expert_build.summarize.check_model_available", return_value=True), \
+         patch("expert_build.summarize.invoke_sync", return_value="## Big\nSummary") as mock_llm, \
+         patch("subprocess.run") as mock_run:
+        mock_run.return_value = MagicMock(returncode=0, stdout="Created entries/big.md", stderr="")
+        cmd_summarize(args)
+
+    prompt = mock_llm.call_args[0][0]
+    assert "[Truncated" in prompt
+    assert len(prompt) < 50000
+
+
+def test_no_truncation_warning_for_small_file(source_dir, work_dir, capsys):
+    (source_dir / "small.md").write_text("Short content")
+    args = make_args(source_dir)
+
+    with patch("expert_build.summarize.check_model_available", return_value=True), \
+         patch("expert_build.summarize.invoke_sync", return_value="## Small\nSummary") as mock_llm, \
+         patch("subprocess.run") as mock_run:
+        mock_run.return_value = MagicMock(returncode=0, stdout="Created entries/small.md", stderr="")
+        cmd_summarize(args)
+
+    captured = capsys.readouterr()
+    assert "WARN" not in captured.out
+
+
+# --- Manifest / idempotency tests ---
+
+def test_skips_already_summarized(source_dir, work_dir):
+    (source_dir / "doc.md").write_text("# Hello\nContent")
+    manifest = work_dir / ".summarized"
+    manifest.write_text(f"{source_dir / 'doc.md'}\n")
+    args = make_args(source_dir)
+
+    with patch("expert_build.summarize.check_model_available", return_value=True), \
+         patch("expert_build.summarize.invoke_sync") as mock_llm:
+        cmd_summarize(args)
+
+    assert not mock_llm.called
+
+
+def test_manifest_records_processed_file(source_dir, work_dir):
+    (source_dir / "doc.md").write_text("# Hello\nContent")
+    args = make_args(source_dir)
+
+    with patch("expert_build.summarize.check_model_available", return_value=True), \
+         patch("expert_build.summarize.invoke_sync", return_value="## Title\nSummary"), \
+         patch("subprocess.run") as mock_run:
+        mock_run.return_value = MagicMock(returncode=0, stdout="Created entries/doc.md", stderr="")
+        cmd_summarize(args)
+
+    manifest = work_dir / ".summarized"
+    assert manifest.exists()
+    assert str(source_dir / "doc.md") in manifest.read_text()
+
+
+# --- Frontmatter stripping tests ---
+
+def test_strips_frontmatter_before_summarizing(source_dir, work_dir):
+    content = "---\nsource_url: https://example.com\n---\n\nActual content here"
+    (source_dir / "doc.md").write_text(content)
+    args = make_args(source_dir)
+
+    with patch("expert_build.summarize.check_model_available", return_value=True), \
+         patch("expert_build.summarize.invoke_sync", return_value="## Title\nSummary") as mock_llm, \
+         patch("subprocess.run") as mock_run:
+        mock_run.return_value = MagicMock(returncode=0, stdout="Created entries/doc.md", stderr="")
+        cmd_summarize(args)
+
+    prompt = mock_llm.call_args[0][0]
+    assert "source_url" not in prompt
+    assert "Actual content here" in prompt
+
+
+def test_skips_empty_content_after_frontmatter(source_dir, work_dir, capsys):
+    (source_dir / "empty.md").write_text("---\nsource_url: https://example.com\n---\n\n")
+    args = make_args(source_dir)
+
+    with patch("expert_build.summarize.check_model_available", return_value=True), \
+         patch("expert_build.summarize.invoke_sync") as mock_llm:
+        cmd_summarize(args)
+
+    assert not mock_llm.called
+    captured = capsys.readouterr()
+    assert "SKIP" in captured.out
+
+
+# --- Prompt template tests ---
+
+def test_summarize_template_requests_descriptive_title():
+    assert "<Descriptive Title>" in SUMMARIZE
+
+
+def test_summarize_code_template_requests_descriptive_title():
+    assert "<Descriptive Title>" in SUMMARIZE_CODE
+
+
+def test_summarize_template_has_content_placeholder():
+    assert "{content}" in SUMMARIZE
+
+
+def test_summarize_code_template_has_content_placeholder():
+    assert "{content}" in SUMMARIZE_CODE

```

## Your Task

Analyze the diff and identify what additional information you need to render confident verdicts.
Do NOT render verdicts yet. Only request observations.

## Available Observation Tools

| Tool | Purpose | When to use |
|------|---------|-------------|
| `exception_hierarchy` | Show exception MRO and subclasses | Retry logic, exception handling |
| `raises_analysis` | What exceptions a function raises | New function calls, error paths |
| `call_graph` | What a function calls | Impact analysis |
| `find_usages` | Where a symbol is used (with prod/test split) | Quick integration lookup |
| `find_callers` | Caller analysis with prod/test split and calling context | Method signature changes, return type changes, constructor modifications, integration verification |
| `test_coverage` | Find tests for a file (uses coverage-map if available) | Test coverage claims |
| `coverage_map_tests` | Find tests covering a file (from coverage-map.json) | Precise test coverage from actual execution |
| `coverage_map_files` | Find files covered by tests matching a pattern | Impact analysis for test changes |
| `function_body` | Full source of a function/method | Need complete function context beyond diff hunks |
| `file_imports` | Extract imports from a file | Verify import changes, check dependencies |
| `project_dependencies` | Get pyproject.toml/requirements.txt | Verify new imports have dependencies |
| `related_test_files` | Find test files for a source file | Discover tests by naming, imports, and coverage map |
| `class_hierarchy` | Show base classes and their `__init__` signatures | Class changes its parent, modifies `__init__`, or uses `super()` |
| `symbol_migration` | Check if a rename is complete across the repo | Symbol renamed in diff — verify old name is fully removed |
| `generator_info` | Report whether a function uses `yield` | Function might be a generator — affects return value semantics |

## What to Look For

1. **Exception handling**: Any `retry_if_exception_type`, `except`, or exception class references
2. **New dependencies**: Calls to external libraries where you don't know the error behavior
3. **Behavioral changes**: Modified logic where you need to verify callers/callees
4. **Test claims**: References to tests you can't see in the diff
5. **Inheritance changes**: Class definition changes, new base classes, `super()` calls
6. **Renames**: Symbols that appear to have been renamed in the diff
7. **Factory methods**: Calls to `@classmethod` / `@staticmethod` constructors (e.g. `Result.error(...)`) — request `function_body` to see their implementation

## Output Format

Output a JSON array of observation requests:

```json
[
  {"name": "descriptive_name", "tool": "tool_name", "params": {"param": "value"}},
  ...
]
```

If you don't need any observations (simple changes, all context is in the diff), output:

```json
[]
```

## Examples

For a diff containing `retry_if_exception_type((OSError, httpx.TransportError))`:
```json
[
  {"name": "oserror_subclasses", "tool": "exception_hierarchy", "params": {"class_name": "builtins.OSError"}},
  {"name": "transport_errors", "tool": "exception_hierarchy", "params": {"class_name": "httpx.TransportError"}}
]
```

For a diff adding a new function that calls `oauth_client.get_access_token()`:
```json
[
  {"name": "oauth_exceptions", "tool": "raises_analysis", "params": {"file_path": "src/auth/oauth.py", "function_name": "get_access_token"}}
]
```

For a diff modifying a method but you need the full function to verify:
```json
[
  {"name": "full_getattr", "tool": "function_body", "params": {"file_path": "src/proxy.py", "function_name": "__getattr__"}}
]
```

For a diff changing a method signature or return type (verify all callers):
```json
[
  {"name": "handle_request_callers", "tool": "find_callers", "params": {"symbol": "handle_request"}}
]
```

For a diff adding new imports (e.g., `import httpx`):
```json
[
  {"name": "file_imports", "tool": "file_imports", "params": {"file_path": "src/client.py"}},
  {"name": "project_deps", "tool": "project_dependencies", "params": {}}
]
```

For a diff calling a factory method like `ModuleResult.error_result(msg)`:
```json
[
  {"name": "error_result_body", "tool": "function_body", "params": {"file_path": "src/models.py", "function_name": "error_result"}}
]
```

For a diff where a class changes its parent class:
```json
[
  {"name": "client_hierarchy", "tool": "class_hierarchy", "params": {"class_name": "MyClient", "file_path": "src/client.py"}}
]
```

For a diff that renames a symbol (e.g., `OldClient` to `NewClient`):
```json
[
  {"name": "client_rename", "tool": "symbol_migration", "params": {"old_name": "OldClient", "new_name": "NewClient"}}
]
```

For a diff modifying a function that might be a generator:
```json
[
  {"name": "process_gen", "tool": "generator_info", "params": {"file_path": "src/pipeline.py", "function_name": "process_items"}}
]
```

Now analyze the diff above and output your observation requests as JSON:
