# Python
__pycache__/
*.py[cod]
*.egg-info/
dist/
build/

# Environments
.venv/
.env
.envrc

# Tools
.ruff_cache/
.pytest_cache/
.mypy_cache/
node_modules/

# IDE
.idea/
.vscode/
*.swp

# OS
.DS_Store
Thumbs.db

# Project
.tmp/

# Input data (SCB CSVs, Socialstyrelsen metadata, source PDFs — not committed).
# Listed per-subdirectory so the maintainer-curated classifications/ folder
# can stay tracked without an exclude/re-include dance (git doesn't traverse
# into a parent dir that's been ignored at the directory level).
regmeta/input_data/*
!regmeta/input_data/classifications/

# mock-data-wizard generated output
mock_data/
mock_output/
mdw_runner.py
mdw_step1_discovery.json
mdw_step2_config.json
mdw_step3_stats.json
extract_stats*.R

# mock-data-wizard runtime artifacts (transient — fcntl sidecar, run logs)
.mock_data_config.lock
mdw_log_*.txt

# Local test workspaces (real user data, not part of the toolkit)
/covid-education-immigrants-test/

# Personal exploration scripts (not part of the toolkit's curated scripts/)
scripts/sample_*.py

# MONA probe artefacts -- can contain workspace metadata (paths,
# hostnames, DSNs); inspect before sharing. The findings that matter
# live in mock_data_wizard/DESIGN.md.
mdw_probe_*.log
mdw_python_probe_*.log
mdw_py_probe_*.csv
mdw_upload_probe_*.txt

# regmeta database (built from SCB exports, not committed)
*.db

# SCB source PDFs (binary, copyrighted, not committed)
*.pdf

# Marker raw output (regenerable from PDFs + parser)
regmeta/docs/_raw/

# Archive (concluded investigations, ad-hoc scripts, internal notes)
archive/
*.har

# Claude Code harness state (transient locks, per-user settings, worktree shims).
# `.claude/skills/` is intentionally tracked for shared skills, so ignore the
# transient pieces individually rather than blanket-ignoring `.claude/`.
.claude/scheduled_tasks.lock
.claude/worktrees/
.claude/settings.local.json
