packages/training/output/generate-v1.log
packages/training/output/train-v1.log
# Project-specific
CLAUDE.md
**/.claude/*
!**/.claude/skills/
.mcp.json
.compound-engineering/

# Training outputs — reproducible, only keep scores and configs
packages/training/output/
output/

# Trained model archives — large, regenerate via `make train-hf-v02-tiny`
packages/training/models/*.tar.gz

# Experiment logs (autotrain session logs, keep log.jsonl)
packages/training/experiments/autotrain_*.log

# HuggingFace datasets (Arrow format) — reproducible via `make convert-hf-v02`
packages/training/data/hf/

# Local JSONL dump of the private 0xhikae/pii-masking-300k-ja split (#168).
# Re-dumpable via the snippet in scripts/train_supervised_300k_ja.py header.
packages/training/data/raw/ja-300k-supervised/

# Local JSONL dump of the public ai4privacy/pii-masking-300k EN slice.
# Re-dumpable via `make dump-supervised-en`.
packages/training/data/raw/en-300k-supervised/

# External hardneg mining outputs (#64) — reproducible via
# `make mine-external-hardneg-ja`, large (~MB), not committed.
packages/training/data/raw/ja-v02/external_hardneg.json
packages/training/data/raw/ja-v02/generated_with_external.json
packages/training/data/raw/ja-v02/augmented_with_external.json

# Rust/WASM build artifacts
**/target/
Cargo.lock

# Benchmark data — reproducible via `make benchmark-v02-generate`, only keep scores
packages/training/data/benchmark/**/raw.json
packages/training/data/benchmark/**/*.spacy

# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# Virtual environments
venv/
env/
ENV/
.venv

# IDE
.vscode/
.idea/
*.swp
*.swo
*~

# OS
.DS_Store
Thumbs.db

# npm (project uses pnpm)
package-lock.json

# Environment variables
.env
.env.local
.env.keys
*.key
.coverage
