.axolotl-complete.bash
.bandit
.coderabbit.yaml
.coveragerc
.editorconfig
.gitattributes
.gitignore
.mypy.ini
.pre-commit-config.yaml
AGENTS.md
CITATION.cff
CLAUDE.md
CNAME
FAQS.md
LICENSE
MANIFEST.in
README.md
VERSION
_quarto.yml
codecov.yml
docker-compose.yaml
favicon.jpg
index.qmd
pyproject.toml
styles.css
.github/CODE_OF_CONDUCT.md
.github/CONTRIBUTING.md
.github/FUNDING.yml
.github/PULL_REQUEST_TEMPLATE.md
.github/SECURITY.md
.github/SUPPORT.md
.github/release-drafter.yml
.github/ISSUE_TEMPLATE/bug-report.yaml
.github/ISSUE_TEMPLATE/config.yml
.github/ISSUE_TEMPLATE/docs.yml
.github/ISSUE_TEMPLATE/feature-request.yaml
.github/workflows/base.yml
.github/workflows/docs.yml
.github/workflows/lint.yml
.github/workflows/main.yml
.github/workflows/multi-gpu-e2e.yml
.github/workflows/nightlies.yml
.github/workflows/precommit-autoupdate.yml
.github/workflows/preview-docs.yml
.github/workflows/pypi.yml
.github/workflows/tests-nightly.yml
.github/workflows/tests.yml
.runpod/.gitignore
.runpod/Dockerfile
.runpod/README.md
.runpod/hub.json
.runpod/requirements.txt
.runpod/test-input.json
.runpod/tests.json
.runpod/src/handler.py
.runpod/src/test_input.json
.runpod/src/train.py
.runpod/src/utils.py
.runpod/src/config/config.yaml
.vscode/README.md
.vscode/launch.json
.vscode/tasks.json
benchmarks/bench_entropy.py
benchmarks/bench_scattermoe_lora.py
benchmarks/bench_selective_logsoftmax.py
cicd/Dockerfile-uv.jinja
cicd/__init__.py
cicd/cicd.sh
cicd/cleanup.py
cicd/cleanup.sh
cicd/e2e_tests.py
cicd/multigpu.py
cicd/multigpu.sh
cicd/single_gpu.py
deepspeed_configs/zero1.json
deepspeed_configs/zero1_torch_compile.json
deepspeed_configs/zero2.json
deepspeed_configs/zero2_torch_compile.json
deepspeed_configs/zero3.json
deepspeed_configs/zero3_bf16.json
deepspeed_configs/zero3_bf16_cpuoffload_all.json
deepspeed_configs/zero3_bf16_cpuoffload_params.json
devtools/README.md
devtools/dev_chat_template.yml
docker/Dockerfile
docker/Dockerfile-base
docker/Dockerfile-base-next
docker/Dockerfile-base-nightly
docker/Dockerfile-cloud
docker/Dockerfile-cloud-no-tmux
docker/Dockerfile-cloud-uv
docker/Dockerfile-tests
docker/Dockerfile-uv
docker/Dockerfile-uv-base
docs/.gitignore
docs/1_58bit_finetuning.qmd
docs/amd_hpc.qmd
docs/attention.qmd
docs/batch_vs_grad.qmd
docs/checkpoint_saving.qmd
docs/choosing_method.qmd
docs/cli.qmd
docs/custom_integrations.qmd
docs/dataset_loading.qmd
docs/dataset_preprocessing.qmd
docs/debugging.qmd
docs/docker.qmd
docs/ebft.qmd
docs/expert_quantization.qmd
docs/faq.qmd
docs/fsdp_qlora.qmd
docs/getting-started.qmd
docs/gradient_checkpointing.qmd
docs/grpo.qmd
docs/inference.qmd
docs/input_output.qmd
docs/installation.qmd
docs/lora_optims.qmd
docs/lr_groups.qmd
docs/mac.qmd
docs/mixed_precision.qmd
docs/multi-gpu.qmd
docs/multi-node.qmd
docs/multimodal.qmd
docs/multimodal_assistant_mask.md
docs/multipack.qmd
docs/nccl.qmd
docs/nd_parallelism.qmd
docs/optimizations.qmd
docs/optimizers.qmd
docs/qat.qmd
docs/quantize.qmd
docs/ray-integration.qmd
docs/reward_modelling.qmd
docs/rlhf.qmd
docs/sequence_parallelism.qmd
docs/streaming.qmd
docs/telemetry.qmd
docs/torchao.qmd
docs/training_stability.qmd
docs/vllm_serving.qmd
docs/agents/grpo.md
docs/agents/model_architectures.md
docs/agents/new_model_support.md
docs/agents/preference_tuning.md
docs/agents/pretraining.md
docs/agents/reward_modelling.md
docs/agents/sft.md
docs/dataset-formats/conversation.qmd
docs/dataset-formats/index.qmd
docs/dataset-formats/inst_tune.qmd
docs/dataset-formats/pretraining.qmd
docs/dataset-formats/stepwise_supervised.qmd
docs/dataset-formats/template_free.qmd
docs/dataset-formats/tokenized.qmd
docs/images/4d-mask.png
docs/images/ray-cluster-dashboard.png
docs/scripts/examples-allowlist.yml
docs/scripts/generate_config_docs.py
docs/scripts/generate_examples_docs.py
examples/LiquidAI/README.md
examples/LiquidAI/lfm2-350m-fft.yaml
examples/LiquidAI/lfm2-8b-a1b-lora.yaml
examples/LiquidAI/lfm2-vl-lora.yaml
examples/alst/README.md
examples/alst/llama3-8b-deepspeed-alst.yaml
examples/alst/llama3-8b-fsdp2-alst.yaml
examples/apertus/README.md
examples/apertus/apertus-8b-qlora.yaml
examples/arcee/README.md
examples/arcee/afm-4.5b-qlora.yaml
examples/archived/README.md
examples/archived/cerebras/btlm-ft.yml
examples/archived/cerebras/qlora.yml
examples/archived/code-llama/README.md
examples/archived/code-llama/13b/lora.yml
examples/archived/code-llama/13b/qlora.yml
examples/archived/code-llama/34b/lora.yml
examples/archived/code-llama/34b/qlora.yml
examples/archived/code-llama/7b/lora.yml
examples/archived/code-llama/7b/qlora.yml
examples/archived/dbrx/16bit-lora.yaml
examples/archived/dbrx/8bit-lora.yaml
examples/archived/dbrx/README.md
examples/archived/dbrx/fft-ds-zero3.yaml
examples/archived/deepcoder/deepcoder-14B-preview-lora.yml
examples/archived/falcon/config-7b-lora.yml
examples/archived/falcon/config-7b-qlora.yml
examples/archived/falcon/config-7b.yml
examples/archived/gemma/qlora.yml
examples/archived/gptj/qlora.yml
examples/archived/jeopardy-bot/config.yml
examples/archived/mpt-7b/README.md
examples/archived/mpt-7b/config.yml
examples/archived/openllama-3b/README.md
examples/archived/openllama-3b/config.yml
examples/archived/openllama-3b/lora.yml
examples/archived/openllama-3b/qlora.yml
examples/archived/pythia/lora.yml
examples/archived/pythia-12b/README.md
examples/archived/pythia-12b/config.yml
examples/archived/qwen/README.md
examples/archived/qwen/lora.yml
examples/archived/qwen/qlora.yml
examples/archived/qwen/qwen2-moe-lora.yaml
examples/archived/qwen/qwen2-moe-qlora.yaml
examples/archived/redpajama/README.md
examples/archived/redpajama/config-3b.yml
examples/archived/replit-3b/config-lora.yml
examples/archived/stablelm-2/README.md
examples/archived/stablelm-2/1.6b/fft.yml
examples/archived/stablelm-2/1.6b/lora.yml
examples/archived/starcoder2/qlora.yml
examples/archived/tiny-llama/README.md
examples/archived/tiny-llama/lora-mps.yml
examples/archived/tiny-llama/lora.yml
examples/archived/tiny-llama/pretrain.yml
examples/archived/tiny-llama/qlora.yml
examples/archived/xgen-7b/xgen-7b-8k-qlora.yml
examples/archived/yi-34B-chat/README.md
examples/archived/yi-34B-chat/qlora.yml
examples/cloud/baseten.yaml
examples/cloud/modal.yaml
examples/cohere/command-r-7b-qlora.yml
examples/colab-notebooks/colab-axolotl-example.ipynb
examples/deepcogito/cogito-v1-preview-llama-3B-lora.yml
examples/deepcogito/cogito-v1-preview-qwen-14B-lora.yml
examples/deepseek-v2/fft-fsdp-16b.yaml
examples/deepseek-v2/qlora-fsdp-2_5.yaml
examples/devstral/README.md
examples/devstral/devstral-small-qlora.yml
examples/distributed-parallel/README.md
examples/distributed-parallel/llama-3_1-8b-hsdp-tp.yaml
examples/distributed-parallel/qwen3-8b-fsdp-tp-cp.yaml
examples/eaft/eaft-example.yml
examples/ebft/README.md
examples/ebft/ebft_opencode.py
examples/ebft/ebft_pretrain.py
examples/ebft/ebft_strided_structured.py
examples/ebft/llama-1b-ebft-opencode-novllm.yaml
examples/ebft/llama-1b-ebft-opencode.yaml
examples/ebft/llama-1b-ebft-strided-structured.yaml
examples/ebft/llama-1b-ebft-strided.yaml
examples/ebft/llama-3b-ebft-strided-fft.yaml
examples/ebft/llama-8b-ebft-strided-fft.yaml
examples/ebft/qwen35-4b-ebft-structured-async.yaml
examples/ebft/qwen35-4b-ebft-structured.yaml
examples/ebft/qwen35-9b-ebft-structured.yaml
examples/expert_parallel/qwen3_30ba3b_ep_fft_4gpu.yaml
examples/expert_parallel/qwen3_30ba3b_ep_fsdp_fft_4gpu.yaml
examples/expert_parallel/qwen3_30ba3b_ep_lora_4gpu.yaml
examples/falcon-e/falcon-e-3b-dpo.yaml
examples/falcon-e/falcon-e-3b-ft.yaml
examples/falcon-h1/falcon-h1-1b-deep-qlora.yaml
examples/falcon-h1/falcon-h1-1b-qlora-cp.yaml
examples/falcon-h1/falcon-h1-1b-qlora.yaml
examples/falcon-h1/falcon-h1-34b-qlora.yaml
examples/falcon-h1/falcon-h1-3b-qlora.yaml
examples/falcon-h1/falcon-h1-500m-qlora.yaml
examples/falcon-h1/falcon-h1-7b-qlora.yaml
examples/gemma2/qlora.yml
examples/gemma2/reward-model.yaml
examples/gemma3/gemma-3-1b-qlora.yml
examples/gemma3/gemma-3-270m-qlora.yml
examples/gemma3/gemma-3-4b-qlora.yml
examples/gemma3/gemma-3-4b-vision-qlora.yml
examples/gemma3n/README.md
examples/gemma3n/gemma-3n-e2b-qlora.yml
examples/gemma3n/gemma-3n-e2b-vision-audio-qlora.yml
examples/gemma3n/gemma-3n-e2b-vision-qlora.yml
examples/gemma4/26b-a4b-moe-qlora.yaml
examples/gemma4/31b-qlora-flex.yaml
examples/gemma4/31b-qlora.yaml
examples/gemma4/README.md
examples/gemma4/e2b-vision-lora.yaml
examples/glm4/qlora-32b.yaml
examples/glm45/README.md
examples/glm45/glm-45-air-qlora.yaml
examples/glm46v/README.md
examples/glm46v/glm-4-6v-flash-ddp.yaml
examples/glm46v/glm-4-6v-flash-qlora.yaml
examples/glm47-flash/README.md
examples/glm47-flash/lora.yaml
examples/glm47-flash/lora_fsdp.yaml
examples/glm47-flash/qlora.yaml
examples/glm47-flash/qlora_fsdp.yaml
examples/gpt-oss/README.md
examples/gpt-oss/gpt-oss-120b-fft-fsdp2-offload.yaml
examples/gpt-oss/gpt-oss-20b-fft-deepspeed-zero3.yaml
examples/gpt-oss/gpt-oss-20b-fft-fsdp2-offload.yaml
examples/gpt-oss/gpt-oss-20b-fft-fsdp2.yaml
examples/gpt-oss/gpt-oss-20b-sft-lora-singlegpu.yaml
examples/gpt-oss/gpt-oss-safeguard-20b-sft-lora-singlegpu.yaml
examples/granite4/README.md
examples/granite4/granite-4.0-tiny-fft.yaml
examples/hunyuan/README.md
examples/hunyuan/hunyuan-v1-dense-qlora.yaml
examples/internvl3_5/README.md
examples/internvl3_5/internvl3_5-8b-qlora.yml
examples/jamba/README.md
examples/jamba/qlora.yaml
examples/jamba/qlora_deepspeed.yaml
examples/jamba/qlora_fsdp_large.yaml
examples/kimi-linear/README.md
examples/kimi-linear/kimi-48b-lora.yaml
examples/llama-2/README.md
examples/llama-2/fft_optimized.yml
examples/llama-2/gptq-lora.yml
examples/llama-2/lisa.yml
examples/llama-2/loftq.yml
examples/llama-2/lora.yml
examples/llama-2/qlora-fsdp.yml
examples/llama-2/qlora.yml
examples/llama-2/relora.yml
examples/llama-3/3b-fp8-fsdp2.yaml
examples/llama-3/3b-qat-fsdp2.yaml
examples/llama-3/3b-qat-mxfp4.yaml
examples/llama-3/3b-qat-nvfp4.yaml
examples/llama-3/README.md
examples/llama-3/fft-8b-liger-fsdp.yaml
examples/llama-3/fft-8b.yaml
examples/llama-3/instruct-dpo-lora-8b.yml
examples/llama-3/instruct-lora-8b.yml
examples/llama-3/lora-1b-deduplicate-dpo.yml
examples/llama-3/lora-1b-deduplicate-sft.yml
examples/llama-3/lora-1b-kernels.yml
examples/llama-3/lora-1b-ray.yml
examples/llama-3/lora-1b-sample-packing-sequentially.yml
examples/llama-3/lora-1b.yml
examples/llama-3/lora-8b.yml
examples/llama-3/opentelemetry-qlora.yml
examples/llama-3/qlora-1b-gdpo.yaml
examples/llama-3/qlora-1b-kto.yaml
examples/llama-3/qlora-1b.yml
examples/llama-3/qlora-fsdp-405b.yaml
examples/llama-3/qlora-fsdp-70b.yaml
examples/llama-3/qlora.yml
examples/llama-3/sparse-finetuning.yaml
examples/llama-3-vision/lora-11b.yaml
examples/llama-3/diffusion/pretrain-1b.yaml
examples/llama-3/diffusion/sft-1b.yaml
examples/llama-4/README.md
examples/llama-4/scout-qlora-flexattn-fsdp2.yaml
examples/llama-4/scout-qlora-single-h100-flex.yaml
examples/llama-4/scout-vision-qlora-fsdp2-flex.yaml
examples/llama-4/do-no-use-fa2/maverick-qlora-fsdp1.yaml
examples/llama-4/do-no-use-fa2/scout-qlora-fsdp1.yaml
examples/llama-4/do-no-use-fa2/scout-qlora-single-h100.yaml
examples/llama-4/do-no-use-fa2/scout-vision-qlora-fsdp.yaml
examples/llava/lora-7b.yaml
examples/magistral/README.md
examples/magistral/magistral-small-fsdp-qlora.yaml
examples/magistral/magistral-small-qlora.yaml
examples/magistral/think/README.md
examples/magistral/think/magistral-small-think-qlora.yaml
examples/magistral/vision/README.md
examples/magistral/vision/magistral-small-vision-24B-qlora.yml
examples/mamba/config.yml
examples/mimo/README.md
examples/mimo/mimo-7b-qlora.yaml
examples/ministral/README.md
examples/ministral/ministral-small-qlora.yaml
examples/ministral3/README.md
examples/ministral3/ministral3-3b-qlora.yaml
examples/ministral3/think/README.md
examples/ministral3/think/ministral3-3b-think-qlora.yaml
examples/ministral3/vision/README.md
examples/ministral3/vision/ministral3-3b-vision-qlora.yml
examples/mistral/README.md
examples/mistral/config.yml
examples/mistral/lora.yml
examples/mistral/mistral-qlora-fsdp.yml
examples/mistral/qlora.yml
examples/mistral-medium-3_5/README.md
examples/mistral-medium-3_5/qlora-text.yml
examples/mistral-medium-3_5/qlora-vision.yml
examples/mistral-small/README.md
examples/mistral-small/mistral-small-3.1-24B-lora.yml
examples/mistral/bigstral/bigstral-ds-zero3.yaml
examples/mistral/dpo/mistral-dpo-qlora.yml
examples/mistral/mixtral/mixtral-8x22b-qlora-fsdp.yml
examples/mistral/mixtral/mixtral-qlora-fsdp.yml
examples/mistral/mixtral/mixtral.yml
examples/mistral/mixtral/mixtral_22.yml
examples/mistral/mps/lora-mps.yml
examples/mistral/orpo/mistral-qlora-orpo.yml
examples/mistral4/README.md
examples/mistral4/fft-text.yml
examples/mistral4/fft-vision.yml
examples/mistral4/qlora-text.yml
examples/mistral4/qlora-vision.yml
examples/nemotron/nemotron-mini-4b-qlora.yaml
examples/nemotron-h/120b-a12b-qlora.yaml
examples/nemotron-h/README.md
examples/nemotron-h/nano-30b-a3b-qlora-cp.yaml
examples/nemotron-h/nano-30b-a3b-qlora.yaml
examples/olmo3/README.md
examples/olmo3/olmo3-7b-qlora.yaml
examples/orpheus/README.md
examples/orpheus/finetune.yml
examples/phi/README.md
examples/phi/lora-3.5.yaml
examples/phi/phi-ft.yml
examples/phi/phi-qlora.yml
examples/phi/phi2-ft.yml
examples/phi/phi3-ft-fsdp.yml
examples/phi/phi3-ft.yml
examples/pixtral/lora-12b.yml
examples/plano/README.md
examples/plano/plano-4b-qlora.yaml
examples/qat_nvfp4/Gemma3-12B_baseline.yml
examples/qat_nvfp4/Gemma3-12B_qat.yml
examples/qat_nvfp4/Math-Gemma3-12B_baseline.yml
examples/qat_nvfp4/Math-Gemma3-12B_qat.yml
examples/qat_nvfp4/Math-Gemma3-27B_baseline.yml
examples/qat_nvfp4/Math-Gemma3-27B_qat.yml
examples/qat_nvfp4/Math-Qwen2.5-72B_baseline.yml
examples/qat_nvfp4/Math-Qwen2.5-72B_qat.yml
examples/qat_nvfp4/Qwen2.5-72B_baseline.yml
examples/qat_nvfp4/Qwen2.5-72B_qat.yml
examples/qwen2/adamw-pretrain-fsdp2.yaml
examples/qwen2/dpo.yaml
examples/qwen2/muon-pretrain-fsdp2.yaml
examples/qwen2/prm.yaml
examples/qwen2/qlora-fsdp.yaml
examples/qwen2/reward-model.yaml
examples/qwen2-vl/lora-7b.yaml
examples/qwen2_5-vl/lora-7b.yaml
examples/qwen3/32b-qlora.yaml
examples/qwen3/8b-lora-fused-attn.yaml
examples/qwen3/8b-qat-fsdp2.yml
examples/qwen3/README.md
examples/qwen3/qlora-fsdp.yaml
examples/qwen3/reward-model.yaml
examples/qwen3-next/README.md
examples/qwen3-next/qwen3-next-80b-a3b-qlora.yaml
examples/qwen3.5/122b-a10b-moe-qlora-fsdp.yaml
examples/qwen3.5/122b-a10b-moe-qlora.yaml
examples/qwen3.5/27b-fft.yaml
examples/qwen3.5/27b-qlora-fsdp.yaml
examples/qwen3.5/27b-qlora.yaml
examples/qwen3.5/35b-a3b-moe-qlora-fsdp.yaml
examples/qwen3.5/35b-a3b-moe-qlora.yaml
examples/qwen3.5/35b-a3b-moe-vision-lora.yaml
examples/qwen3.5/9b-fft-vision.yaml
examples/qwen3.5/9b-lora-vision.yaml
examples/qwen3.5/README.md
examples/seed-oss/README.md
examples/seed-oss/seed-oss-36b-qlora.yaml
examples/slurm/README.md
examples/slurm/axolotl.slurm
examples/smolvlm2/README.md
examples/smolvlm2/smolvlm2-2B-lora.yaml
examples/streaming/README.md
examples/streaming/pretrain.yaml
examples/streaming/sft.yaml
examples/swanlab/README.md
examples/swanlab/custom_trainer_profiling.py
examples/swanlab/dpo-swanlab-completions.yml
examples/swanlab/dpo-swanlab-full-featured.yml
examples/swanlab/lora-swanlab-profiling.yml
examples/trinity/README.md
examples/trinity/trinity-nano-preview-qlora.yaml
examples/voxtral/README.md
examples/voxtral/voxtral-mini-audio-qlora.yml
examples/voxtral/voxtral-mini-qlora.yml
image/axolotl-badge-web-legacy.png
image/axolotl-badge-web.png
image/axolotl.png
image/axolotl_logo_digital_black.svg
image/axolotl_logo_digital_white.svg
image/axolotl_symbol_digital_black.svg
image/axolotl_symbol_digital_white.svg
image/axolotl_wordmark_digital_black.svg
image/axolotl_wordmark_digital_white.svg
image/sticker_fixed.png
scripts/analyze_profile.py
scripts/chat_datasets.py
scripts/cloud-entrypoint-term.sh
scripts/cloud-entrypoint.sh
scripts/cuda13_env.sh
scripts/cutcrossentropy_install.py
scripts/motd
scripts/uv-entrypoint.sh
src/axolotl/__init__.py
src/axolotl/convert.py
src/axolotl/datasets.py
src/axolotl/evaluate.py
src/axolotl/logging_config.py
src/axolotl/processing_strategies.py
src/axolotl/prompt_tokenizers.py
src/axolotl/prompters.py
src/axolotl/train.py
src/axolotl.egg-info/PKG-INFO
src/axolotl.egg-info/SOURCES.txt
src/axolotl.egg-info/dependency_links.txt
src/axolotl.egg-info/entry_points.txt
src/axolotl.egg-info/requires.txt
src/axolotl.egg-info/top_level.txt
src/axolotl/cli/__init__.py
src/axolotl/cli/args.py
src/axolotl/cli/art.py
src/axolotl/cli/checks.py
src/axolotl/cli/config.py
src/axolotl/cli/delinearize_llama4.py
src/axolotl/cli/evaluate.py
src/axolotl/cli/inference.py
src/axolotl/cli/main.py
src/axolotl/cli/merge_lora.py
src/axolotl/cli/merge_sharded_fsdp_weights.py
src/axolotl/cli/preprocess.py
src/axolotl/cli/quantize.py
src/axolotl/cli/train.py
src/axolotl/cli/vllm_serve.py
src/axolotl/cli/agent_docs/__init__.py
src/axolotl/cli/cloud/__init__.py
src/axolotl/cli/cloud/base.py
src/axolotl/cli/cloud/modal_.py
src/axolotl/cli/cloud/baseten/__init__.py
src/axolotl/cli/cloud/baseten/template/run.sh
src/axolotl/cli/cloud/baseten/template/train_sft.py
src/axolotl/cli/utils/__init__.py
src/axolotl/cli/utils/args.py
src/axolotl/cli/utils/diffusion.py
src/axolotl/cli/utils/fetch.py
src/axolotl/cli/utils/load.py
src/axolotl/cli/utils/lora_merge.py
src/axolotl/cli/utils/sweeps.py
src/axolotl/cli/utils/train.py
src/axolotl/common/__init__.py
src/axolotl/common/architectures.py
src/axolotl/common/const.py
src/axolotl/common/datasets.py
src/axolotl/core/__init__.py
src/axolotl/core/training_args.py
src/axolotl/core/training_args_base.py
src/axolotl/core/attention/__init__.py
src/axolotl/core/builders/__init__.py
src/axolotl/core/builders/base.py
src/axolotl/core/builders/causal.py
src/axolotl/core/builders/rl.py
src/axolotl/core/chat/__init__.py
src/axolotl/core/chat/messages.py
src/axolotl/core/chat/format/__init__.py
src/axolotl/core/chat/format/chatml.py
src/axolotl/core/chat/format/llama3x.py
src/axolotl/core/chat/format/shared.py
src/axolotl/core/datasets/__init__.py
src/axolotl/core/datasets/chat.py
src/axolotl/core/datasets/transforms/__init__.py
src/axolotl/core/datasets/transforms/chat_builder.py
src/axolotl/core/trainers/__init__.py
src/axolotl/core/trainers/base.py
src/axolotl/core/trainers/constants.py
src/axolotl/core/trainers/mamba.py
src/axolotl/core/trainers/trl.py
src/axolotl/core/trainers/utils.py
src/axolotl/core/trainers/dpo/__init__.py
src/axolotl/core/trainers/dpo/args.py
src/axolotl/core/trainers/dpo/trainer.py
src/axolotl/core/trainers/ebft/__init__.py
src/axolotl/core/trainers/ebft/args.py
src/axolotl/core/trainers/ebft/kernels.py
src/axolotl/core/trainers/ebft/rewards.py
src/axolotl/core/trainers/ebft/strided.py
src/axolotl/core/trainers/ebft/trainer.py
src/axolotl/core/trainers/grpo/__init__.py
src/axolotl/core/trainers/grpo/args.py
src/axolotl/core/trainers/grpo/async_trainer.py
src/axolotl/core/trainers/grpo/fast_async_trainer.py
src/axolotl/core/trainers/grpo/replay_buffer.py
src/axolotl/core/trainers/grpo/sampler.py
src/axolotl/core/trainers/grpo/trainer.py
src/axolotl/core/trainers/mixins/__init__.py
src/axolotl/core/trainers/mixins/activation_checkpointing.py
src/axolotl/core/trainers/mixins/checkpoints.py
src/axolotl/core/trainers/mixins/distributed_parallel.py
src/axolotl/core/trainers/mixins/layer_offloading.py
src/axolotl/core/trainers/mixins/optimizer.py
src/axolotl/core/trainers/mixins/packing.py
src/axolotl/core/trainers/mixins/rng_state_loader.py
src/axolotl/core/trainers/mixins/scheduler.py
src/axolotl/integrations/LICENSE.md
src/axolotl/integrations/__init__.py
src/axolotl/integrations/base.py
src/axolotl/integrations/config.py
src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.md
src/axolotl/integrations/cut_cross_entropy/LICENSE
src/axolotl/integrations/cut_cross_entropy/README.md
src/axolotl/integrations/cut_cross_entropy/__init__.py
src/axolotl/integrations/cut_cross_entropy/args.py
src/axolotl/integrations/densemixer/README.md
src/axolotl/integrations/densemixer/__init__.py
src/axolotl/integrations/densemixer/args.py
src/axolotl/integrations/densemixer/plugin.py
src/axolotl/integrations/diffusion/README.md
src/axolotl/integrations/diffusion/__init__.py
src/axolotl/integrations/diffusion/args.py
src/axolotl/integrations/diffusion/callbacks.py
src/axolotl/integrations/diffusion/generation.py
src/axolotl/integrations/diffusion/plugin.py
src/axolotl/integrations/diffusion/trainer.py
src/axolotl/integrations/diffusion/utils.py
src/axolotl/integrations/expert_parallel/README.md
src/axolotl/integrations/expert_parallel/__init__.py
src/axolotl/integrations/expert_parallel/args.py
src/axolotl/integrations/expert_parallel/buffer.py
src/axolotl/integrations/expert_parallel/experts_fn.py
src/axolotl/integrations/expert_parallel/plugin.py
src/axolotl/integrations/expert_parallel/shard.py
src/axolotl/integrations/grokfast/LICENSE
src/axolotl/integrations/grokfast/README.md
src/axolotl/integrations/grokfast/__init__.py
src/axolotl/integrations/grokfast/args.py
src/axolotl/integrations/grokfast/optimizer.py
src/axolotl/integrations/hatchery/__init__.py
src/axolotl/integrations/hatchery/args.py
src/axolotl/integrations/hatchery/data.py
src/axolotl/integrations/hatchery/plugin.py
src/axolotl/integrations/hatchery/rl_trainer.py
src/axolotl/integrations/hatchery/trainer.py
src/axolotl/integrations/hatchery/examples/prep_math_rl.py
src/axolotl/integrations/hatchery/examples/tinker_rl.yaml
src/axolotl/integrations/hatchery/examples/tinker_sft.yaml
src/axolotl/integrations/hatchery/rewards/__init__.py
src/axolotl/integrations/hatchery/rewards/math_reward.py
src/axolotl/integrations/kd/README.md
src/axolotl/integrations/kd/__init__.py
src/axolotl/integrations/kd/args.py
src/axolotl/integrations/kd/callbacks.py
src/axolotl/integrations/kd/chat_template.py
src/axolotl/integrations/kd/collator.py
src/axolotl/integrations/kd/collator_online_teacher.py
src/axolotl/integrations/kd/trainer.py
src/axolotl/integrations/kd/utils.py
src/axolotl/integrations/kd/kernels/__init__.py
src/axolotl/integrations/kd/kernels/liger.py
src/axolotl/integrations/kd/topk_logprob/__init__.py
src/axolotl/integrations/kd/topk_logprob/forward_kl.py
src/axolotl/integrations/kernels/README.md
src/axolotl/integrations/kernels/__init__.py
src/axolotl/integrations/kernels/args.py
src/axolotl/integrations/kernels/autotune_callback.py
src/axolotl/integrations/kernels/autotune_collector.py
src/axolotl/integrations/kernels/constants.py
src/axolotl/integrations/kernels/plugin.py
src/axolotl/integrations/kernels/libs/__init__.py
src/axolotl/integrations/kernels/libs/scattermoe_lora/__init__.py
src/axolotl/integrations/kernels/libs/scattermoe_lora/experts.py
src/axolotl/integrations/kernels/libs/scattermoe_lora/layers.py
src/axolotl/integrations/kernels/libs/scattermoe_lora/lora_ops.py
src/axolotl/integrations/kernels/libs/scattermoe_lora/mx_weights.py
src/axolotl/integrations/kernels/libs/scattermoe_lora/parallel_experts.py
src/axolotl/integrations/kernels/libs/scattermoe_lora/parallel_linear_lora.py
src/axolotl/integrations/kernels/libs/scattermoe_lora/selective_dequant.py
src/axolotl/integrations/kernels/libs/scattermoe_lora/selective_dequant_kernel.py
src/axolotl/integrations/kernels/libs/scattermoe_lora/kernels/__init__.py
src/axolotl/integrations/kernels/libs/scattermoe_lora/kernels/lora_ops.py
src/axolotl/integrations/kernels/libs/scattermoe_lora/kernels/ops.py
src/axolotl/integrations/kernels/libs/scattermoe_lora/kernels/single.py
src/axolotl/integrations/kernels/libs/sonicmoe/__init__.py
src/axolotl/integrations/kernels/libs/sonicmoe/experts.py
src/axolotl/integrations/kernels/libs/sonicmoe/lora.py
src/axolotl/integrations/liger/LICENSE
src/axolotl/integrations/liger/README.md
src/axolotl/integrations/liger/__init__.py
src/axolotl/integrations/liger/args.py
src/axolotl/integrations/liger/plugin.py
src/axolotl/integrations/liger/utils.py
src/axolotl/integrations/liger/models/__init__.py
src/axolotl/integrations/liger/models/base.py
src/axolotl/integrations/liger/models/deepseekv2.py
src/axolotl/integrations/liger/models/jamba.py
src/axolotl/integrations/liger/models/llama4.py
src/axolotl/integrations/liger/models/qwen3.py
src/axolotl/integrations/liger/models/qwen3_5.py
src/axolotl/integrations/liger/models/qwen3_5_moe.py
src/axolotl/integrations/liger/models/qwen3_moe.py
src/axolotl/integrations/llm_compressor/README.md
src/axolotl/integrations/llm_compressor/__init__.py
src/axolotl/integrations/llm_compressor/args.py
src/axolotl/integrations/llm_compressor/plugin.py
src/axolotl/integrations/llm_compressor/utils.py
src/axolotl/integrations/lm_eval/README.md
src/axolotl/integrations/lm_eval/__init__.py
src/axolotl/integrations/lm_eval/args.py
src/axolotl/integrations/lm_eval/cli.py
src/axolotl/integrations/mora/__init__.py
src/axolotl/integrations/mora/args.py
src/axolotl/integrations/mora/plugin.py
src/axolotl/integrations/nemo_gym/README.md
src/axolotl/integrations/nemo_gym/__init__.py
src/axolotl/integrations/nemo_gym/args.py
src/axolotl/integrations/nemo_gym/data_producer.py
src/axolotl/integrations/nemo_gym/dataset.py
src/axolotl/integrations/nemo_gym/multi_turn.py
src/axolotl/integrations/nemo_gym/plugin.py
src/axolotl/integrations/nemo_gym/rewards.py
src/axolotl/integrations/nemo_gym/server.py
src/axolotl/integrations/nemo_gym/examples/nemo_gym_multi_env.yaml
src/axolotl/integrations/nemo_gym/examples/nemo_gym_multi_turn.yaml
src/axolotl/integrations/nemo_gym/examples/nemo_gym_sudoku.yaml
src/axolotl/integrations/spectrum/LICENSE
src/axolotl/integrations/spectrum/README.md
src/axolotl/integrations/spectrum/__init__.py
src/axolotl/integrations/spectrum/args.py
src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-1.5B-Instruct.json
src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-1.5B.json
src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-3B-Instruct.json
src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-3B.json
src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-7B-Instruct.json
src/axolotl/integrations/spectrum/model_snr_results/snr_results_Qwen-Qwen2.5-7B.json
src/axolotl/integrations/spectrum/model_snr_results/snr_results_google-gemma-2-2b.json
src/axolotl/integrations/spectrum/model_snr_results/snr_results_meta-llama-Llama-3.2-1B-Instruct.json
src/axolotl/integrations/spectrum/model_snr_results/snr_results_meta-llama-Llama-3.2-1B.json
src/axolotl/integrations/spectrum/model_snr_results/snr_results_meta-llama-Llama-3.2-3B-Instruct.json
src/axolotl/integrations/spectrum/model_snr_results/snr_results_meta-llama-Llama-3.2-3B.json
src/axolotl/integrations/swanlab/README.md
src/axolotl/integrations/swanlab/__init__.py
src/axolotl/integrations/swanlab/args.py
src/axolotl/integrations/swanlab/callbacks.py
src/axolotl/integrations/swanlab/completion_logger.py
src/axolotl/integrations/swanlab/plugins.py
src/axolotl/integrations/swanlab/profiling.py
src/axolotl/kernels/__init__.py
src/axolotl/kernels/autotune_telemetry.py
src/axolotl/kernels/dora.py
src/axolotl/kernels/geglu.py
src/axolotl/kernels/gemma4_fused_rope.py
src/axolotl/kernels/lora.py
src/axolotl/kernels/quantize.py
src/axolotl/kernels/rms_norm_gated.py
src/axolotl/kernels/swiglu.py
src/axolotl/kernels/utils.py
src/axolotl/loaders/__init__.py
src/axolotl/loaders/adapter.py
src/axolotl/loaders/constants.py
src/axolotl/loaders/model.py
src/axolotl/loaders/patch_manager.py
src/axolotl/loaders/processor.py
src/axolotl/loaders/tokenizer.py
src/axolotl/loaders/utils.py
src/axolotl/loaders/adapters/__init__.py
src/axolotl/models/__init__.py
src/axolotl/models/mamba/__init__.py
src/axolotl/models/mamba/configuration_mamba.py
src/axolotl/models/mamba/modeling_mamba.py
src/axolotl/monkeypatch/__init__.py
src/axolotl/monkeypatch/btlm_attn_hijack_flash.py
src/axolotl/monkeypatch/deepspeed_utils.py
src/axolotl/monkeypatch/fsdp2_qlora.py
src/axolotl/monkeypatch/gemma4_hybrid_mask.py
src/axolotl/monkeypatch/gemma4_kernelize.py
src/axolotl/monkeypatch/llama_attn_hijack_flash.py
src/axolotl/monkeypatch/llama_attn_hijack_xformers.py
src/axolotl/monkeypatch/lora_kernels.py
src/axolotl/monkeypatch/mistral_attn_hijack_flash.py
src/axolotl/monkeypatch/moe_quant.py
src/axolotl/monkeypatch/multipack.py
src/axolotl/monkeypatch/relora.py
src/axolotl/monkeypatch/scaled_softmax_attn.py
src/axolotl/monkeypatch/stablelm_attn_hijack_flash.py
src/axolotl/monkeypatch/torchao_optim.py
src/axolotl/monkeypatch/trainer_accelerator_args.py
src/axolotl/monkeypatch/trainer_fsdp_optim.py
src/axolotl/monkeypatch/transformers_fa_utils.py
src/axolotl/monkeypatch/utils.py
src/axolotl/monkeypatch/accelerate/__init__.py
src/axolotl/monkeypatch/accelerate/fsdp2.py
src/axolotl/monkeypatch/accelerate/parallelism_config.py
src/axolotl/monkeypatch/attention/__init__.py
src/axolotl/monkeypatch/attention/flash_attn_4.py
src/axolotl/monkeypatch/attention/flex_attn.py
src/axolotl/monkeypatch/attention/fp8_attn.py
src/axolotl/monkeypatch/attention/sage_attn.py
src/axolotl/monkeypatch/attention/xformers.py
src/axolotl/monkeypatch/data/__init__.py
src/axolotl/monkeypatch/data/batch_dataset_fetcher.py
src/axolotl/monkeypatch/gradient_checkpointing/__init__.py
src/axolotl/monkeypatch/gradient_checkpointing/offload_cpu.py
src/axolotl/monkeypatch/gradient_checkpointing/offload_disk.py
src/axolotl/monkeypatch/loss/__init__.py
src/axolotl/monkeypatch/loss/chunked.py
src/axolotl/monkeypatch/loss/eaft.py
src/axolotl/monkeypatch/mixtral/__init__.py
src/axolotl/monkeypatch/models/__init__.py
src/axolotl/monkeypatch/models/mamba_utils.py
src/axolotl/monkeypatch/models/apertus/__init__.py
src/axolotl/monkeypatch/models/apertus/activation.py
src/axolotl/monkeypatch/models/falcon_h1/__init__.py
src/axolotl/monkeypatch/models/falcon_h1/modeling.py
src/axolotl/monkeypatch/models/gemma4/fused_attn.py
src/axolotl/monkeypatch/models/granitemoehybrid/__init__.py
src/axolotl/monkeypatch/models/granitemoehybrid/modeling.py
src/axolotl/monkeypatch/models/kimi_linear/__init__.py
src/axolotl/monkeypatch/models/kimi_linear/configuration_kimi.py
src/axolotl/monkeypatch/models/kimi_linear/modeling_kimi.py
src/axolotl/monkeypatch/models/kimi_linear/patch_kimi_linear.py
src/axolotl/monkeypatch/models/kimi_linear/tokenization_kimi.py
src/axolotl/monkeypatch/models/llama4/__init__.py
src/axolotl/monkeypatch/models/llama4/modeling.py
src/axolotl/monkeypatch/models/mistral3/__init__.py
src/axolotl/monkeypatch/models/mistral3/mistral_common_tokenizer.py
src/axolotl/monkeypatch/models/nemotron_h/__init__.py
src/axolotl/monkeypatch/models/nemotron_h/modeling.py
src/axolotl/monkeypatch/models/pixtral/__init__.py
src/axolotl/monkeypatch/models/pixtral/modeling_flash_attention_utils.py
src/axolotl/monkeypatch/models/qwen3/__init__.py
src/axolotl/monkeypatch/models/qwen3/fused_attn.py
src/axolotl/monkeypatch/models/qwen3_5/__init__.py
src/axolotl/monkeypatch/models/qwen3_5/fused_attn.py
src/axolotl/monkeypatch/models/qwen3_5/modeling.py
src/axolotl/monkeypatch/models/qwen3_5_moe/__init__.py
src/axolotl/monkeypatch/models/qwen3_5_moe/fused_attn.py
src/axolotl/monkeypatch/models/qwen3_moe/__init__.py
src/axolotl/monkeypatch/models/qwen3_moe/fused_attn.py
src/axolotl/monkeypatch/models/qwen3_next/__init__.py
src/axolotl/monkeypatch/models/qwen3_next/modeling.py
src/axolotl/monkeypatch/models/qwen3_vl/__init__.py
src/axolotl/monkeypatch/models/qwen3_vl/fused_attn.py
src/axolotl/monkeypatch/models/voxtral/__init__.py
src/axolotl/monkeypatch/models/voxtral/modeling.py
src/axolotl/monkeypatch/peft/__init__.py
src/axolotl/monkeypatch/peft/utils.py
src/axolotl/monkeypatch/ring_attn/__init__.py
src/axolotl/monkeypatch/ring_attn/patch.py
src/axolotl/monkeypatch/ring_attn/adapters/__init__.py
src/axolotl/monkeypatch/ring_attn/adapters/batch.py
src/axolotl/monkeypatch/tiled_mlp/__init__.py
src/axolotl/monkeypatch/tiled_mlp/base.py
src/axolotl/monkeypatch/tiled_mlp/patch.py
src/axolotl/monkeypatch/trainer/__init__.py
src/axolotl/monkeypatch/trainer/lr.py
src/axolotl/monkeypatch/trainer/trl.py
src/axolotl/monkeypatch/trainer/trl_vllm.py
src/axolotl/monkeypatch/trainer/utils.py
src/axolotl/monkeypatch/transformers/__init__.py
src/axolotl/monkeypatch/transformers/trainer_loss_calc.py
src/axolotl/monkeypatch/xformers_/__init__.py
src/axolotl/prompt_strategies/__init__.py
src/axolotl/prompt_strategies/_synthetic.py
src/axolotl/prompt_strategies/alpaca_chat.py
src/axolotl/prompt_strategies/alpaca_instruct.py
src/axolotl/prompt_strategies/alpaca_w_system.py
src/axolotl/prompt_strategies/base.py
src/axolotl/prompt_strategies/chat_template.py
src/axolotl/prompt_strategies/completion.py
src/axolotl/prompt_strategies/context_qa.py
src/axolotl/prompt_strategies/creative_acr.py
src/axolotl/prompt_strategies/input_output.py
src/axolotl/prompt_strategies/jinja_template_analyzer.py
src/axolotl/prompt_strategies/llama2_chat.py
src/axolotl/prompt_strategies/metharme.py
src/axolotl/prompt_strategies/orcamini.py
src/axolotl/prompt_strategies/pretrain.py
src/axolotl/prompt_strategies/pygmalion.py
src/axolotl/prompt_strategies/stepwise_supervised.py
src/axolotl/prompt_strategies/user_defined.py
src/axolotl/prompt_strategies/bradley_terry/README.md
src/axolotl/prompt_strategies/bradley_terry/__init__.py
src/axolotl/prompt_strategies/bradley_terry/chat_template.py
src/axolotl/prompt_strategies/bradley_terry/llama3.py
src/axolotl/prompt_strategies/dpo/__init__.py
src/axolotl/prompt_strategies/dpo/chat_template.py
src/axolotl/prompt_strategies/dpo/chatml.py
src/axolotl/prompt_strategies/dpo/llama3.py
src/axolotl/prompt_strategies/dpo/passthrough.py
src/axolotl/prompt_strategies/dpo/user_defined.py
src/axolotl/prompt_strategies/dpo/zephyr.py
src/axolotl/prompt_strategies/ebft/__init__.py
src/axolotl/prompt_strategies/ebft/ebft_chat_multiturn.py
src/axolotl/prompt_strategies/ebft/ebft_opencode.py
src/axolotl/prompt_strategies/ebft/ebft_reasoning.py
src/axolotl/prompt_strategies/ebft/ebft_strided_chat.py
src/axolotl/prompt_strategies/ebft/ebft_strided_structured.py
src/axolotl/prompt_strategies/kto/__init__.py
src/axolotl/prompt_strategies/kto/chatml.py
src/axolotl/prompt_strategies/kto/llama3.py
src/axolotl/prompt_strategies/kto/user_defined.py
src/axolotl/prompt_strategies/messages/__init__.py
src/axolotl/prompt_strategies/messages/chat.py
src/axolotl/prompt_strategies/orpo/__init__.py
src/axolotl/prompt_strategies/orpo/chat_template.py
src/axolotl/scripts/__init__.py
src/axolotl/scripts/process_cleanup.py
src/axolotl/scripts/vllm_serve_lora.py
src/axolotl/scripts/vllm_worker_ext.py
src/axolotl/telemetry/__init__.py
src/axolotl/telemetry/callbacks.py
src/axolotl/telemetry/errors.py
src/axolotl/telemetry/manager.py
src/axolotl/telemetry/runtime_metrics.py
src/axolotl/telemetry/whitelist.yaml
src/axolotl/utils/__init__.py
src/axolotl/utils/bench.py
src/axolotl/utils/comet_.py
src/axolotl/utils/cuda13.py
src/axolotl/utils/datasets.py
src/axolotl/utils/dict.py
src/axolotl/utils/distributed.py
src/axolotl/utils/environment.py
src/axolotl/utils/fp32_norms.py
src/axolotl/utils/freeze.py
src/axolotl/utils/import_helper.py
src/axolotl/utils/logging.py
src/axolotl/utils/lora.py
src/axolotl/utils/mlflow_.py
src/axolotl/utils/model_shard_quant.py
src/axolotl/utils/quantization.py
src/axolotl/utils/schedulers.py
src/axolotl/utils/tee.py
src/axolotl/utils/tokenization.py
src/axolotl/utils/trackio_.py
src/axolotl/utils/train.py
src/axolotl/utils/trainer.py
src/axolotl/utils/wandb_.py
src/axolotl/utils/weight_serde.py
src/axolotl/utils/callbacks/__init__.py
src/axolotl/utils/callbacks/comet_.py
src/axolotl/utils/callbacks/dynamic_checkpoint.py
src/axolotl/utils/callbacks/generation.py
src/axolotl/utils/callbacks/lisa.py
src/axolotl/utils/callbacks/mlflow_.py
src/axolotl/utils/callbacks/models.py
src/axolotl/utils/callbacks/opentelemetry.py
src/axolotl/utils/callbacks/perplexity.py
src/axolotl/utils/callbacks/profiler.py
src/axolotl/utils/callbacks/qat.py
src/axolotl/utils/callbacks/swanlab.py
src/axolotl/utils/callbacks/tokens_per_second.py
src/axolotl/utils/callbacks/trackio_.py
src/axolotl/utils/chat_templates/__init__.py
src/axolotl/utils/chat_templates/base.py
src/axolotl/utils/chat_templates/templates/alpaca.jinja
src/axolotl/utils/chat_templates/templates/aya.jinja
src/axolotl/utils/chat_templates/templates/chatml.jinja
src/axolotl/utils/chat_templates/templates/cohere.jinja
src/axolotl/utils/chat_templates/templates/command_a.jinja
src/axolotl/utils/chat_templates/templates/command_a_rag.jinja
src/axolotl/utils/chat_templates/templates/command_a_tool_use.jinja
src/axolotl/utils/chat_templates/templates/deepseek_v2.jinja
src/axolotl/utils/chat_templates/templates/deepseek_v3.jinja
src/axolotl/utils/chat_templates/templates/exaone.jinja
src/axolotl/utils/chat_templates/templates/exaone4.jinja
src/axolotl/utils/chat_templates/templates/falcon_h1.jinja
src/axolotl/utils/chat_templates/templates/gemma.jinja
src/axolotl/utils/chat_templates/templates/gemma3.jinja
src/axolotl/utils/chat_templates/templates/gemma3n.jinja
src/axolotl/utils/chat_templates/templates/gemma4.jinja
src/axolotl/utils/chat_templates/templates/jamba.jinja
src/axolotl/utils/chat_templates/templates/llama3.jinja
src/axolotl/utils/chat_templates/templates/llama3_2_vision.jinja
src/axolotl/utils/chat_templates/templates/llama4.jinja
src/axolotl/utils/chat_templates/templates/llava.jinja
src/axolotl/utils/chat_templates/templates/metharme.jinja
src/axolotl/utils/chat_templates/templates/mistral_v1.jinja
src/axolotl/utils/chat_templates/templates/mistral_v2v3.jinja
src/axolotl/utils/chat_templates/templates/mistral_v3_tekken.jinja
src/axolotl/utils/chat_templates/templates/mistral_v7_tekken.jinja
src/axolotl/utils/chat_templates/templates/nemotron_h.jinja
src/axolotl/utils/chat_templates/templates/phi_3.jinja
src/axolotl/utils/chat_templates/templates/phi_35.jinja
src/axolotl/utils/chat_templates/templates/phi_4.jinja
src/axolotl/utils/chat_templates/templates/pixtral.jinja
src/axolotl/utils/chat_templates/templates/qwen2_vl.jinja
src/axolotl/utils/chat_templates/templates/qwen3.jinja
src/axolotl/utils/chat_templates/templates/qwen3_5.jinja
src/axolotl/utils/chat_templates/templates/qwen_25.jinja
src/axolotl/utils/collators/__init__.py
src/axolotl/utils/collators/batching.py
src/axolotl/utils/collators/core.py
src/axolotl/utils/collators/dpo.py
src/axolotl/utils/collators/mamba.py
src/axolotl/utils/collators/mm_chat.py
src/axolotl/utils/config/__init__.py
src/axolotl/utils/config/models/__init__.py
src/axolotl/utils/ctx_managers/__init__.py
src/axolotl/utils/ctx_managers/sequence_parallel.py
src/axolotl/utils/data/__init__.py
src/axolotl/utils/data/lock.py
src/axolotl/utils/data/rl.py
src/axolotl/utils/data/sft.py
src/axolotl/utils/data/shared.py
src/axolotl/utils/data/streaming.py
src/axolotl/utils/data/utils.py
src/axolotl/utils/data/wrappers.py
src/axolotl/utils/generation/__init__.py
src/axolotl/utils/generation/sft.py
src/axolotl/utils/mistral/__init__.py
src/axolotl/utils/mistral/mistral3_processor.py
src/axolotl/utils/mistral/mistral_tokenizer.py
src/axolotl/utils/optimizers/__init__.py
src/axolotl/utils/optimizers/adopt.py
src/axolotl/utils/optimizers/qgalore.py
src/axolotl/utils/samplers/__init__.py
src/axolotl/utils/samplers/multipack.py
src/axolotl/utils/samplers/utils.py
src/axolotl/utils/schemas/__init__.py
src/axolotl/utils/schemas/config.py
src/axolotl/utils/schemas/datasets.py
src/axolotl/utils/schemas/deprecated.py
src/axolotl/utils/schemas/dynamic_checkpoint.py
src/axolotl/utils/schemas/enums.py
src/axolotl/utils/schemas/fsdp.py
src/axolotl/utils/schemas/integrations.py
src/axolotl/utils/schemas/model.py
src/axolotl/utils/schemas/multimodal.py
src/axolotl/utils/schemas/peft.py
src/axolotl/utils/schemas/quantization.py
src/axolotl/utils/schemas/training.py
src/axolotl/utils/schemas/trl.py
src/axolotl/utils/schemas/utils.py
src/axolotl/utils/schemas/validation.py
src/axolotl/utils/schemas/vllm.py
src/axolotl/utils/schemas/internal/__init__.py
tests/__init__.py
tests/conftest.py
tests/constants.py
tests/hf_offline_utils.py
tests/test_attn_implementation.py
tests/test_chunked_xentropy.py
tests/test_context_parallel_batch_size.py
tests/test_convert.py
tests/test_data.py
tests/test_datasets.py
tests/test_dict.py
tests/test_ebft_kernels.py
tests/test_ebft_strided_structured.py
tests/test_exact_deduplication.py
tests/test_fp32_norms.py
tests/test_freeze.py
tests/test_http_weight_sync.py
tests/test_loaders.py
tests/test_logging_config_file_capture.py
tests/test_lora.py
tests/test_mm_chat_collator.py
tests/test_no_legacy_attn_reads.py
tests/test_normalize_config.py
tests/test_opentelemetry_callback.py
tests/test_packed_batch_sampler.py
tests/test_packed_dataset.py
tests/test_packed_pretraining.py
tests/test_perplexity.py
tests/test_processing_strategies.py
tests/test_prompt_tokenizers.py
tests/test_prompters.py
tests/test_revision_parameter.py
tests/test_save_deduplicated.py
tests/test_schedulers.py
tests/test_streaming.py
tests/test_tensor_parallel_batch_size.py
tests/test_tokenizers.py
tests/test_train.py
tests/test_triton_kernels.py
tests/test_utils_tee.py
tests/test_validation_dataset.py
tests/cli/__init__.py
tests/cli/conftest.py
tests/cli/test_cli_base.py
tests/cli/test_cli_evaluate.py
tests/cli/test_cli_fetch.py
tests/cli/test_cli_inference.py
tests/cli/test_cli_interface.py
tests/cli/test_cli_merge_lora.py
tests/cli/test_cli_merge_sharded_fsdp_weights.py
tests/cli/test_cli_preprocess.py
tests/cli/test_cli_sweeps.py
tests/cli/test_cli_train.py
tests/cli/test_cli_version.py
tests/cli/test_load_cfg_capabilities.py
tests/cli/test_nested_options.py
tests/cli/test_utils.py
tests/core/test_async_grpo.py
tests/core/test_builders.py
tests/core/chat/__init__.py
tests/core/chat/test_messages.py
tests/core/chat/format/__init__.py
tests/e2e/.gitignore
tests/e2e/__init__.py
tests/e2e/test_activation_offloading.py
tests/e2e/test_deepseekv3.py
tests/e2e/test_diffusion.py
tests/e2e/test_dpo.py
tests/e2e/test_embeddings_lr.py
tests/e2e/test_evaluate.py
tests/e2e/test_falcon.py
tests/e2e/test_gemma2.py
tests/e2e/test_gemma3_text.py
tests/e2e/test_imports.py
tests/e2e/test_llama.py
tests/e2e/test_llama_pretrain.py
tests/e2e/test_llama_vision.py
tests/e2e/test_load_model.py
tests/e2e/test_lora_llama.py
tests/e2e/test_mamba.py
tests/e2e/test_mistral.py
tests/e2e/test_mixtral.py
tests/e2e/test_optimizers.py
tests/e2e/test_packing_loss.py
tests/e2e/test_phi.py
tests/e2e/test_preprocess.py
tests/e2e/test_process_reward_model_smollm2.py
tests/e2e/test_profiler.py
tests/e2e/test_qat.py
tests/e2e/test_quantization.py
tests/e2e/test_qwen.py
tests/e2e/test_save_first_step.py
tests/e2e/test_schedulers.py
tests/e2e/test_streaming.py
tests/e2e/test_tokenizer.py
tests/e2e/utils.py
tests/e2e/integrations/test_cut_cross_entropy.py
tests/e2e/integrations/test_fp8.py
tests/e2e/integrations/test_hooks.py
tests/e2e/integrations/test_kd.py
tests/e2e/integrations/test_liger.py
tests/e2e/integrations/test_llm_compressor.py
tests/e2e/integrations/test_scattermoe_lora_kernels.py
tests/e2e/integrations/test_scattermoe_lora_olmoe.py
tests/e2e/integrations/test_sonicmoe.py
tests/e2e/integrations/test_sonicmoe_lora.py
tests/e2e/kernels/test_geglu.py
tests/e2e/kernels/test_lora.py
tests/e2e/kernels/test_lora_features.py
tests/e2e/kernels/test_quantize.py
tests/e2e/kernels/test_swiglu.py
tests/e2e/multigpu/__init__.py
tests/e2e/multigpu/_fp32_norms_dtype_capture.py
tests/e2e/multigpu/test_dist_muon_fsdp2.py
tests/e2e/multigpu/test_eval.py
tests/e2e/multigpu/test_fp8_fsdp2.py
tests/e2e/multigpu/test_fsdp1.py
tests/e2e/multigpu/test_fsdp2.py
tests/e2e/multigpu/test_fsdp2_fp32_norms.py
tests/e2e/multigpu/test_fsdp2_lora_kernels.py
tests/e2e/multigpu/test_gemma3.py
tests/e2e/multigpu/test_llama.py
tests/e2e/multigpu/test_locking.py
tests/e2e/multigpu/test_ray.py
tests/e2e/multigpu/test_tiled_mlp_fsdp2.py
tests/e2e/multigpu/test_tp.py
tests/e2e/multigpu/patched/__init__.py
tests/e2e/multigpu/patched/test_sp.py
tests/e2e/multigpu/solo/__init__.py
tests/e2e/multigpu/solo/test_flex.py
tests/e2e/multigpu/solo/test_gdpo.py
tests/e2e/multigpu/solo/test_grpo.py
tests/e2e/patched/__init__.py
tests/e2e/patched/test_4d_multipack_llama.py
tests/e2e/patched/test_activation_checkpointing.py
tests/e2e/patched/test_cli_integrations.py
tests/e2e/patched/test_fa_xentropy.py
tests/e2e/patched/test_falcon_samplepack.py
tests/e2e/patched/test_flattening.py
tests/e2e/patched/test_fsdp2_qlora.py
tests/e2e/patched/test_fused_llama.py
tests/e2e/patched/test_lora_llama_multipack.py
tests/e2e/patched/test_mistral_samplepack.py
tests/e2e/patched/test_mixtral_samplepack.py
tests/e2e/patched/test_model_patches.py
tests/e2e/patched/test_peft_embeddings.py
tests/e2e/patched/test_phi_multipack.py
tests/e2e/patched/test_resume.py
tests/e2e/patched/lora_kernels/__init__.py
tests/e2e/patched/lora_kernels/test_lora_kernel_patching.py
tests/e2e/solo/__init__.py
tests/e2e/solo/test_batch_flattening.py
tests/e2e/solo/test_flex.py
tests/e2e/solo/test_relora_llama.py
tests/e2e/solo/test_reward_model_smollm2.py
tests/e2e/solo/test_trainer_loss_calc.py
tests/fixtures/conversation.json
tests/fixtures/conversation.missingturns.json
tests/fixtures/conversation.tokenized.json
tests/fixtures/conversation.tokenized_llama2chat.json
tests/fixtures/alpaca/alpaca.json
tests/integrations/__init__.py
tests/integrations/test_adapter_plugin_registry.py
tests/integrations/test_diffusion.py
tests/integrations/test_diffusion_callback.py
tests/integrations/test_expert_parallel.py
tests/integrations/test_gemma4_moe.py
tests/integrations/test_kd_chat_template.py
tests/integrations/test_kd_liger.py
tests/integrations/test_kd_trainer_direct_loss.py
tests/integrations/test_liger.py
tests/integrations/test_liger_qwen_vl_rope_default.py
tests/integrations/test_nemo_gym.py
tests/integrations/test_scattermoe_autotune_telemetry.py
tests/integrations/test_scattermoe_lora.py
tests/integrations/test_scattermoe_lora_kernels.py
tests/integrations/test_sonicmoe.py
tests/integrations/test_sonicmoe_lora.py
tests/integrations/test_swanlab.py
tests/integrations/kernels/__init__.py
tests/integrations/kernels/scattermoe_lora/__init__.py
tests/integrations/kernels/scattermoe_lora/bench_int64_kernel.py
tests/integrations/kernels/scattermoe_lora/bench_int64_kernel_results.md
tests/integrations/kernels/scattermoe_lora/bench_mxfp4.py
tests/integrations/kernels/scattermoe_lora/bench_mxfp4_results.md
tests/integrations/kernels/scattermoe_lora/conftest.py
tests/integrations/kernels/scattermoe_lora/test_mxfp4_expert_weights.py
tests/integrations/kernels/scattermoe_lora/test_mxfp4_integration.py
tests/integrations/kernels/scattermoe_lora/test_parallel_experts_large_batch_repro.py
tests/integrations/kernels/scattermoe_lora/test_scattermoe_lora_int64_indices.py
tests/integrations/kernels/scattermoe_lora/test_scattermoe_lora_m_bucket.py
tests/integrations/kernels/scattermoe_lora/test_shared_dequant_helper.py
tests/integrations/monkeypatch/__init__.py
tests/integrations/monkeypatch/test_tiled_mlp_moe.py
tests/integrations/mora/test_mora.py
tests/kernels/test_fused_rope_autotune_telemetry.py
tests/kernels/test_gemma4_fused_rope.py
tests/kernels/test_gemma4_fused_rope_compile.py
tests/kernels/test_gemma4_fused_rope_unit_offset.py
tests/kernels/test_rms_norm_gated.py
tests/monkeypatch/test_gemma4_fused_attn.py
tests/monkeypatch/test_gemma4_fused_attn_patch.py
tests/monkeypatch/test_gemma4_hybrid_mask.py
tests/monkeypatch/test_gemma4_kernelize.py
tests/monkeypatch/test_llama_attn_hijack_flash.py
tests/monkeypatch/test_mamba_utils.py
tests/monkeypatch/test_pixtral_flash_attention_patch.py
tests/monkeypatch/test_qwen3_5_fused_attn.py
tests/monkeypatch/test_qwen3_fused_attn.py
tests/monkeypatch/test_qwen3_fused_attn_defensive.py
tests/monkeypatch/test_qwen3_fused_attn_robustness.py
tests/monkeypatch/test_qwen3_next_modeling_patch.py
tests/monkeypatch/test_qwen3_vl_fused_attn.py
tests/monkeypatch/test_relora.py
tests/monkeypatch/test_trainer_accelerator_args.py
tests/monkeypatch/test_trl_vllm.py
tests/monkeypatch/test_voxtral_modeling_patch.py
tests/patched/test_validation.py
tests/prompt_strategies/__init__.py
tests/prompt_strategies/conftest.py
tests/prompt_strategies/test_alpaca.py
tests/prompt_strategies/test_chat_template_ds_schema_unification.py
tests/prompt_strategies/test_chat_template_utils.py
tests/prompt_strategies/test_chat_templates.py
tests/prompt_strategies/test_chat_templates_advanced.py
tests/prompt_strategies/test_chat_templates_mistral.py
tests/prompt_strategies/test_chat_templates_thinking.py
tests/prompt_strategies/test_chat_templates_tool_call_string_arguments.py
tests/prompt_strategies/test_dpo_chat_templates.py
tests/prompt_strategies/test_dpo_chatml.py
tests/prompt_strategies/test_jinja_template_analyzer.py
tests/prompt_strategies/test_raw_io.py
tests/prompt_strategies/test_stepwise.py
tests/prompt_strategies/test_synthetic.py
tests/prompt_strategies/messages/__init__.py
tests/prompt_strategies/messages/test_chat.py
tests/telemetry/__init__.py
tests/telemetry/conftest.py
tests/telemetry/test_callbacks.py
tests/telemetry/test_errors.py
tests/telemetry/test_manager.py
tests/telemetry/test_runtime_metrics.py
tests/utils/test_cuda13.py
tests/utils/test_grpo_rw_fnc.py
tests/utils/test_import_helper.py
tests/utils/test_mistral3_processor.py
tests/utils/test_train.py
tests/utils/callbacks/test_dynamic_checkpoint.py
tests/utils/callbacks/test_gc_callback.py
tests/utils/callbacks/test_skip_eval_on_resume.py
tests/utils/data/test_hash.py
tests/utils/data/test_rl.py
tests/utils/data/test_utils.py
tests/utils/lora/test_config_validation_lora.py
tests/utils/lora/test_freeze_lora.py
tests/utils/lora/test_merge_lora.py
tests/utils/schemas/validation/test_activation_offloading.py
tests/utils/schemas/validation/test_config_validators.py
tests/utils/schemas/validation/test_default_values.py
tests/utils/schemas/validation/test_fsdp.py
tests/utils/schemas/validation/test_moe_quant.py
tests/utils/schemas/validation/test_qgalore.py
tests/utils/schemas/validation/mora/test_mora_validation.py