# Memgar AI Security System - .gitignore

# ============================================================================
# ML Models (Large Files - Rebuild with: python rebuild_model.py)
# ============================================================================
*.pkl
*.h5
*.pth
*.ckpt
*.model
*.weights

# Transformer / ONNX artifacts.
# We DO NOT use directory-level ignore rules here, because once a directory
# is ignored git silently drops all `!` exceptions for files inside it. We
# instead allowlist the inference-time artifacts and ignore everything else
# under ml/artifacts/transformer_model* by glob.

# General rule: ignore safetensors and stray onnx outside the model dirs
*.safetensors

# Ignore training-time checkpoints (large optimizer + scheduler state)
ml/artifacts/transformer_checkpoints/

# Ignore the training-time PyTorch dump inside transformer_model/
# (we ship the ONNX export only — pytorch/*.bin is ~44MB and not used at inference)
ml/artifacts/transformer_model/pytorch/

# Ignore everything else under both model dirs by glob, then explicitly
# allow the inference-time files. This pattern survives git's directory
# semantics because no parent directory is itself ignored.
ml/artifacts/transformer_model/**/*.bin
ml/artifacts/transformer_model/**/*.pt
ml/artifacts/transformer_model/**/*.pth
ml/artifacts/transformer_model/**/*.safetensors

# Explicit inference artifacts we DO commit:
#   transformer_model/model.onnx        (ONNX FP32, ~45MB)
#   transformer_model/config.json       (model metadata, <1KB)
#   transformer_model/tokenizer/*       (~950KB)
#   transformer_model_int8/model.onnx   (ONNX int8, ~12MB)
#   transformer_model_int8/config.json
#   transformer_model_int8/tokenizer.json + friends (when present)

# Keep current model config, ignore backups and versioned snapshots
!*.pkl.config.json
*.pkl.backup
*_v[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_*.pkl.config.json
*_v[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_*.json

# ============================================================================
# Training Data (Optional - uncomment if too large)
# ============================================================================
# training_data.json

# ============================================================================
# Python
# ============================================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
*.manifest
*.spec

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Jupyter Notebook
.ipynb_checkpoints
*.ipynb

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# ============================================================================
# IDEs and Editors
# ============================================================================
# VS Code
.vscode/
*.code-workspace

# PyCharm
.idea/
*.iml
*.iws

# Sublime Text
*.sublime-project
*.sublime-workspace

# Vim
*.swp
*.swo
*~

# Emacs
*~
\#*\#
.\#*

# ============================================================================
# OS Files
# ============================================================================
# macOS
.DS_Store
.AppleDouble
.LSOverride

# Windows
Thumbs.db
ehthumbs.db
Desktop.ini
$RECYCLE.BIN/

# Linux
*~

# ============================================================================
# Project Specific
# ============================================================================
# Temporary files
temp/
tmp/
*.tmp
*.log

# Test outputs
test_results/
test_output/

# Local configuration
local_config.json
secrets.json
.env.local

# Feed signing key — NEVER commit this
feed_private.pem
*.pem

# Performance profiling
*.prof
*.lprof

# Database files (if any)
*.db
*.sqlite
*.sqlite3

# ============================================================================
# Git LFS (if using)
# ============================================================================
# Uncomment if using Git LFS for models:
# !.gitattributes

# ============================================================================
# Documentation Build
# ============================================================================
docs/_build/
site/

# ============================================================================
# Keep These
# ============================================================================
# Keep directory structure
!.gitkeep

# feeds/ — signed threat-intelligence bundle (tracked, published via publish-feed.yml)
# feeds/ is intentionally NOT ignored

# Keep important configs
!requirements.txt
!setup.py
!pyproject.toml

# Keep all markdown docs
!*.md

# Keep all Python source
!*.py

# Keep JSON configs (except secrets)
!*.json
secrets.json

# Generated ML artifacts: versioned model configs and training data snapshots
ml/artifacts/*_v[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_*.pkl.config.json
ml/data/training_data_v[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_*.json

# Continuous learning runtime data (predictions, metrics, feedback, drift reports)
ml/continuous_learning/storage/

# Public corpus download cache — regenerated by scripts/import_public_corpora.py
ml/data/_corpus_cache/

# mkdocs build output
site/
