# Local-only planning / design docs (not committed)
_planning/

# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
*.egg-info/
*.egg
build/
dist/
wheels/
.installed.cfg

# Virtual envs
.venv/
venv/
env/
ENV/

# Tests / coverage
.pytest_cache/
.coverage
.coverage.*
htmlcov/
coverage.xml
*.cover
.hypothesis/

# Type checkers
.mypy_cache/
.dmypy.json
dmypy.json
.pyre/
.pytype/
.ruff_cache/

# IDEs / editors
.vscode/
.idea/
*.swp
*.swo
*~
.DS_Store

# Build artifacts
*.log
*.tmp
.tsbuildinfo

# Benchmark outputs — exclude ad-hoc runs but TRACK named baselines
benchmarks/results/*.json
benchmarks/results/*.csv
benchmarks/results/*.png
!benchmarks/results/baseline_*.json
benchmarks/cache/

# Synthetic VN PDF — built from system DejaVuSans, regeneratable.
# The .gt.txt ground truth is committed; the .pdf rebuilds via
# benchmarks/data/synthetic_pdf_vi/_generate.py.
benchmarks/data/synthetic_pdf_vi/*.pdf

# Training data + checkpoints — multi-GB; regeneratable via
# training/diacritic/prep_data.py + train.py. Stats committed.
training/diacritic/data/*.jsonl
training/spell_correction/data/*.jsonl
training/*/checkpoints/

# Local model / dataset caches (we don't ship these)
models/
data/raw/
*.gguf
*.safetensors
*.bin

# Secrets — never check these in
.env
.env.*
!.env.example
*.pem
*.key
secrets.toml

# Editor / agent local state — never tracked
.claude/
.playwright-mcp/

# Local-only research notes (use docs/ for shipping documentation)
research/
docs/research/

# Frontend build artifacts. ui/dist/ and src/nom/chat/ui_dist/ are
# regenerated by `scripts/build_ui.sh` and bundled into the wheel via
# `[tool.hatch.build.targets.wheel] artifacts`; never check the built
# JS/CSS into git history.
ui/node_modules/
node_modules/

# VitePress site build output
docs/.vitepress/dist/
docs/.vitepress/cache/
ui/dist/
ui/.vite/
ui/*.tsbuildinfo
src/nom/chat/ui_dist/

# Server runtime data dirs (default ~/.nom and the demo dir)
.nom/
nom-demo/

# Large RAG benchmark fixtures (sampled from public HF datasets).
# Mirrored at https://huggingface.co/datasets/nrl-ai/vn-rag-bench.
# Regenerate locally via benchmarks/rag/fixtures/_build_zalo_legal.py.
benchmarks/rag/fixtures/vn_legal_zalo_*.json

# OCR fixture images (regenerate via benchmarks/data/vn_ocr_subset/_build.py).
# Mirrored at https://huggingface.co/datasets/nrl-ai/vn-rag-bench.
benchmarks/data/vn_ocr_subset/images/
training/onnx_export/vn-spell-correction-*-int8*/
training/onnx_export/vn-diacritic-*-int8*/

# OCR-correction training data + checkpoints — regenerated locally
training/ocr_correction/data/*.jsonl
training/ocr_correction/checkpoints/
training/ocr_correction/run.log
.qa-screenshots-2026-05-03/
