# Environment variables
.env
.env.local
.env.*
*.csv
*.json
*.pkl
env

# Gmail Assistant tokens, logs, and reports
Gmail_Assistant/token.json
Gmail_Assistant/logs/
Gmail_Assistant/reports/
Gmail_Assistant/config/backups/

# Windows reserved device names (created accidentally by 2>NUL in bash)
NUL
**/nul.ipynb

# ---- Sensitive credentials and secrets ----
# Private keys / certificates
*.pem
*.key
*.p12
*.pfx
*.jks

# Auth tokens and session cookies
*token*.txt
*.token
*cookie*.txt
.saleroom_cookie_header.txt

# AWS credentials
.aws/credentials
aws_credentials*

# Project-specific: Cartier deliverable (contains client-embedded AWS/API keys pending rotation)
Cartier_Search_Engine/

# Claude Code worktrees (may duplicate credentials)
.claude/worktrees/

# Virtualenv / caches / build artifacts
.venv/
venv/
__pycache__/
*.pyc
*.pyo

# Logs
*.log
Scheduled_Local_Tasks/Pipeline_Status_Logs/

# Downloaded / temporary data
downloaded_files/

# Large binary tools bundled locally (not source)
ffmpeg-master-latest-win64-gpl/

# Downloaded media (scraping artifacts)
**/youtube_videos/
*.mp4
*.mov
*.webm

# Large pipeline output / test fixtures (regenerated, not source)
test_*.jsonl
**/Pipeline_Output/*.jsonl
**/Merged/*.jsonl

# Gemini reclassification notebook outputs (large generated data)
AI_Models/Gemini/1_gemini_output_watches_reclassify*.ipynb
AI_Models/Gemini/2_gemini_merge_output_watches*.ipynb

# Failed-log artifacts
*.log.failed*

# ---- Direct-vendor scraping (Bidpath, EasyLive, etc.) ----
# Whitelist vendor configs that should be tracked (override global *.json rule)
!Data Collection/vendor-configs/
!Data Collection/vendor-configs/*.json

# Whitelist small reference outputs (top-100 vendor list, discovery results, probe results)
!Data Collection/vendor_top100.json
!Data Collection/vendor_directory.json
!Data Collection/bidpath_probe_results.json
!Data Collection/step5_jewelry_targets_filtered.json

# Block large local caches (regenerable from S3)
Data Collection/_cache_*.json
Data Collection/cross_platform_matches.json
Data Collection/cross_platform_merge_manifest.json
Data Collection/saleroom_split_manifest.json
Data Collection/bonhams_enrichment_candidates*.json
Data Collection/bonhams_enrichment_strict.json
Data Collection/bonhams_enrichment_summary.json
Data Collection/bonhams_enrichment_execution_summary.json
Data Collection/bonhams_categories_enrichment_summary.json
Data Collection/bonhams_jewelry_v2_summary.json
Data Collection/bonhams_gwh_v2_summary.json
Data Collection/enrichment_strategy_comparison.json
Data Collection/step5_jewelry_targets.json
Data Collection/saleroom_vendors_captured.txt
Data Collection/cross_platform_matches.csv
Data Collection/enrich_bonhams_*.sql

# Auth files (already in .saleroom_cookie_header.txt rule above, but be explicit)
Data Collection/.saleroom_cookie_header.txt
Data Collection/.saleroom_user_agent.txt
Data Collection/.saleroom_cookies.json
Data Collection/Tajan/*_out.ipynb

# Repeat-detection cache: keep only small artifacts; skip ALL the giant parquet datasets
repeat_detection_cache/jewelry_lots.parquet
repeat_detection_cache/jewelry_verified.parquet
repeat_detection_cache/*.parquet

# Barrett-Jackson raw API dump (large, regenerable from S3/API)
Data Collection/Barrett-Jackson/bj_api_pre2023.jsonl

# Branding storybook/Figma BUILD artifacts (~956MB of node_modules + storybook-static,
# regenerable; the storybooks deploy to the altfinancedbstatuspage S3 bucket).
# NOTE: Branding/AltFinance is intentionally NOT ignored — it's source (docs, email_system, covers).
Branding/Bloomberg/
Branding/BusinessInsider/
Branding/AlphaSense/
Branding/LoversByShan/
Branding/Figma_Projects/
Branding/JPMorgan/

# Safety net: never track node_modules anywhere (a JPMorgan storybook leaked ~7.5k untracked files)
**/node_modules/
