.gitignore
LICENSE
README.md
mkdocs.yml
pyproject.toml
pytest.ini
.github/workflows/docker-image.yml
.github/workflows/docs.yml
.github/workflows/publish.yml
.github/workflows/test.yml
configs/eval/default.yaml
configs/eval/quick.yaml
configs/eval/quick_pretrained.yaml
configs/metrics/default.yaml
configs/metrics/tinystories_test.yaml
configs/prepare_data/default.yaml
configs/prepare_data/tokenizer/gpt2.yaml
configs/prepare_data/tokenizer/llama2.yaml
configs/prepare_data/tokenizer/llama3.yaml
configs/serve/char_shakespeare.yaml
configs/serve/qwen.yaml
configs/serve/tinyllama.yaml
configs/train/defaults.yaml
configs/train/train_llama.yaml
configs/train/train_llama_efficient.yaml
configs/train/train_llama_pretokenized.yaml
configs/train/train_llama_shakespeare.yaml
configs/train/criterion/cross_entropy.yaml
configs/train/loggers/basic.yaml
configs/train/loggers/none.yaml
configs/train/lr_scheduler/cosine.yaml
configs/train/lr_scheduler/wsd.yaml
configs/train/model/llama2-1b.yaml
configs/train/model/llama2-7b.yaml
configs/train/model/llama2-x-lite.yaml
configs/train/model/llama2.yaml
configs/train/optimization/amp/bfloat16.yaml
configs/train/optimization/amp/disabled.yaml
configs/train/optimization/amp/float16.yaml
docker/Dockerfile
docker/Dockerfile.raw
docker/build.sh
docker/docker-bake.hcl
docker/packages.txt
docker/requirements-dev.txt
docker/deps/clone_torch.sh
docker/deps/install_dcgm.sh
docker/deps/install_flash.sh
docker/deps/install_gdrcopy.sh
docker/deps/install_libfabric.sh
docker/deps/install_nccl.sh
docker/deps/install_nvshmem
docker/deps/install_ompi.sh
docker/deps/install_torch.sh
docker/deps/install_ucx.sh
docs/gen_doc_stubs.py
docs/index.md
docs/javascripts/mathjax.js
docs/user-guide/configuration.md
docs/user-guide/data-prep.md
docs/user-guide/data.md
docs/user-guide/docker.md
docs/user-guide/metrics.md
docs/user-guide/models.md
docs/user-guide/optimizers.md
docs/user-guide/quick-start.md
docs/user-guide/serving.md
optimus_dl/__init__.py
optimus_dl/_version.py
optimus_dl/py.typed
optimus_dl.egg-info/PKG-INFO
optimus_dl.egg-info/SOURCES.txt
optimus_dl.egg-info/dependency_links.txt
optimus_dl.egg-info/entry_points.txt
optimus_dl.egg-info/requires.txt
optimus_dl.egg-info/top_level.txt
optimus_dl/core/__init__.py
optimus_dl/core/bootstrap.py
optimus_dl/core/device.py
optimus_dl/core/env.py
optimus_dl/core/log.py
optimus_dl/core/model_utils.py
optimus_dl/core/numerical.py
optimus_dl/core/omegaconf.py
optimus_dl/core/profile.py
optimus_dl/core/registry.py
optimus_dl/core/seed.py
optimus_dl/modules/__init__.py
optimus_dl/modules/checkpoint/__init__.py
optimus_dl/modules/checkpoint/checkpoint_manager.py
optimus_dl/modules/checkpoint/load_strategy.py
optimus_dl/modules/criterion/__init__.py
optimus_dl/modules/criterion/base.py
optimus_dl/modules/criterion/config.py
optimus_dl/modules/criterion/cross_entropy.py
optimus_dl/modules/data/__init__.py
optimus_dl/modules/data/config.py
optimus_dl/modules/data/profiling.py
optimus_dl/modules/data/datasets/__init__.py
optimus_dl/modules/data/datasets/base.py
optimus_dl/modules/data/datasets/composite.py
optimus_dl/modules/data/datasets/huggingface.py
optimus_dl/modules/data/datasets/loop_dataset.py
optimus_dl/modules/data/datasets/tokenized_dataset.py
optimus_dl/modules/data/datasets/tokenized_flat_dataset.py
optimus_dl/modules/data/datasets/txt_lines.py
optimus_dl/modules/data/datasets/strategies/__init__.py
optimus_dl/modules/data/datasets/strategies/base.py
optimus_dl/modules/data/datasets/strategies/concat_random.py
optimus_dl/modules/data/datasets/strategies/document.py
optimus_dl/modules/data/presets/__init__.py
optimus_dl/modules/data/presets/fineweb-edu.py
optimus_dl/modules/data/presets/slimpajama.py
optimus_dl/modules/data/presets/tinyshakespeare.py
optimus_dl/modules/data/transforms/__init__.py
optimus_dl/modules/data/transforms/base.py
optimus_dl/modules/data/transforms/basic_batcher.py
optimus_dl/modules/data/transforms/chunk_tokens.py
optimus_dl/modules/data/transforms/composite.py
optimus_dl/modules/data/transforms/flat_tokens_batcher.py
optimus_dl/modules/data/transforms/prefetch.py
optimus_dl/modules/data/transforms/shuffle.py
optimus_dl/modules/data/transforms/skip.py
optimus_dl/modules/data/transforms/to_device.py
optimus_dl/modules/data/transforms/tokenize.py
optimus_dl/modules/distributed/__init__.py
optimus_dl/modules/distributed/base.py
optimus_dl/modules/distributed/config.py
optimus_dl/modules/distributed/fake.py
optimus_dl/modules/distributed/mesh.py
optimus_dl/modules/eval/__init__.py
optimus_dl/modules/eval/model.py
optimus_dl/modules/loggers/__init__.py
optimus_dl/modules/loggers/base.py
optimus_dl/modules/loggers/config.py
optimus_dl/modules/loggers/jsonl.py
optimus_dl/modules/loggers/wandb.py
optimus_dl/modules/lr_scheduler/__init__.py
optimus_dl/modules/lr_scheduler/base.py
optimus_dl/modules/lr_scheduler/cosine_annealing.py
optimus_dl/modules/lr_scheduler/linear_warmup.py
optimus_dl/modules/lr_scheduler/wsd_scheduler.py
optimus_dl/modules/metrics/__init__.py
optimus_dl/modules/metrics/base.py
optimus_dl/modules/metrics/common.py
optimus_dl/modules/metrics/engine.py
optimus_dl/modules/metrics/metrics.py
optimus_dl/modules/metrics/source.py
optimus_dl/modules/metrics/sources/__init__.py
optimus_dl/modules/metrics/sources/causal_lm.py
optimus_dl/modules/metrics/sources/generation.py
optimus_dl/modules/metrics/sources/model_info.py
optimus_dl/modules/model/__init__.py
optimus_dl/modules/model/base.py
optimus_dl/modules/model/config.py
optimus_dl/modules/model/gpt2.py
optimus_dl/modules/model/llama2.py
optimus_dl/modules/model/olmo3.py
optimus_dl/modules/model/qwen3.py
optimus_dl/modules/model/blocks/__init__.py
optimus_dl/modules/model/blocks/attention.py
optimus_dl/modules/model/blocks/layer_norms.py
optimus_dl/modules/model/blocks/mlp.py
optimus_dl/modules/model/blocks/rope.py
optimus_dl/modules/model/blocks/transformer.py
optimus_dl/modules/model/presets/__init__.py
optimus_dl/modules/model/presets/hf_llama.py
optimus_dl/modules/model/presets/hf_olmo3.py
optimus_dl/modules/model/presets/hf_qwen3.py
optimus_dl/modules/model/presets/utils.py
optimus_dl/modules/model_transforms/__init__.py
optimus_dl/modules/model_transforms/base.py
optimus_dl/modules/model_transforms/checkpoint.py
optimus_dl/modules/model_transforms/compile.py
optimus_dl/modules/model_transforms/config.py
optimus_dl/modules/model_transforms/distributed.py
optimus_dl/modules/model_transforms/load_weights.py
optimus_dl/modules/model_transforms/tensor_parallel.py
optimus_dl/modules/optim/__init__.py
optimus_dl/modules/optim/adamw.py
optimus_dl/modules/optim/config.py
optimus_dl/modules/optim/muon.py
optimus_dl/modules/optim/soap.py
optimus_dl/modules/tokenizer/__init__.py
optimus_dl/modules/tokenizer/base.py
optimus_dl/modules/tokenizer/config.py
optimus_dl/modules/tokenizer/implementations/__init__.py
optimus_dl/modules/tokenizer/implementations/char.py
optimus_dl/modules/tokenizer/implementations/huggingface.py
optimus_dl/modules/tokenizer/implementations/inline_tokens.py
optimus_dl/modules/tokenizer/implementations/tiktoken.py
optimus_dl/recipe/__init__.py
optimus_dl/recipe/eval/__init__.py
optimus_dl/recipe/eval/base.py
optimus_dl/recipe/eval/config.py
optimus_dl/recipe/metrics/__init__.py
optimus_dl/recipe/metrics/base.py
optimus_dl/recipe/metrics/config.py
optimus_dl/recipe/mixins/__init__.py
optimus_dl/recipe/mixins/model_builder.py
optimus_dl/recipe/pretokenize/__init__.py
optimus_dl/recipe/pretokenize/checkpoint.py
optimus_dl/recipe/pretokenize/config.py
optimus_dl/recipe/pretokenize/processor.py
optimus_dl/recipe/pretokenize/recipe.py
optimus_dl/recipe/pretokenize/sharder.py
optimus_dl/recipe/pretokenize/source.py
optimus_dl/recipe/serve/__init__.py
optimus_dl/recipe/serve/base.py
optimus_dl/recipe/serve/config.py
optimus_dl/recipe/serve/models.py
optimus_dl/recipe/train/__init__.py
optimus_dl/recipe/train/base.py
optimus_dl/recipe/train/config.py
optimus_dl/recipe/train/builders/__init__.py
optimus_dl/recipe/train/builders/criterion_builder.py
optimus_dl/recipe/train/builders/data_builder.py
optimus_dl/recipe/train/builders/optimizer_builder.py
optimus_dl/recipe/train/builders/scheduler_builder.py
optimus_dl/recipe/train/mixins/__init__.py
optimus_dl/recipe/train/mixins/execution/__init__.py
optimus_dl/recipe/train/mixins/execution/context_mixin.py
optimus_dl/recipe/train/mixins/execution/interruption_mixin.py
optimus_dl/recipe/train/mixins/execution/iteration_mixin.py
optimus_dl/recipe/train/mixins/managers/__init__.py
optimus_dl/recipe/train/mixins/managers/evaluation_manager.py
optimus_dl/recipe/train/mixins/managers/logger_manager.py
scripts/eval.py
scripts/metrics.py
scripts/pretokenize.py
scripts/serve.py
scripts/train.py
tests/conftest.py
tests/checkpoint/test_checkpoint_manager.py
tests/core/test_registry_validation.py
tests/criterion/test_cross_entropy.py
tests/criterion/test_flat_criterion.py
tests/criterion/test_flat_criterion_consistency.py
tests/data/test_basic_batcher.py
tests/data/test_char_tokenize.py
tests/data/test_composite_dataset_v2.py
tests/data/test_datasets_totality.py
tests/data/test_hf_dataset.py
tests/data/test_inline_tokens_tokenizer.py
tests/data/test_profiling.py
tests/data/test_skip_transforms.py
tests/data/test_to_device.py
tests/data/test_token_based_batching.py
tests/data/test_tokenized_dataset.py
tests/data/test_tokenized_dataset_v2.py
tests/data/test_tokenized_flat_dataset.py
tests/data/test_txt_lines.py
tests/integration/__init__.py
tests/integration/test_checkpoint_resumption.py
tests/integration/test_data_pipeline.py
tests/integration/test_training_pipeline.py
tests/integration/liger_kernel/test_liger_cross_entropy.py
tests/integration/liger_kernel/test_liger_equivalence.py
tests/loggers/test_loggers.py
tests/lr_scheduler/test_base.py
tests/lr_scheduler/test_cosine_annealing.py
tests/lr_scheduler/test_linear_warmup.py
tests/metrics/test_generation_source.py
tests/metrics/test_metric_engine_advanced.py
tests/metrics/test_metric_engine_extended.py
tests/metrics/test_metric_engine_external.py
tests/metrics/test_metrics_system.py
tests/model/test_attention.py
tests/model/test_attention_modes.py
tests/model/test_flat_consistency.py
tests/model/test_gpt2.py
tests/model/test_hf_llama_loading.py
tests/model/test_layer_norms.py
tests/model/test_llama2.py
tests/model/test_logits_matching.py
tests/model/test_olmo3.py
tests/model/test_olmo3_integration.py
tests/model/test_qwen3_integration.py
tests/model/test_sliding_window.py
tests/model/test_tp.py
tests/model/test_tp_collectives.py
tests/model/test_tp_correctness.py
tests/model/test_tp_memory.py
tests/model/test_varlen_gqa.py
tests/model/test_varlen_sliding_window.py
tests/model_transforms/test_checkpoint.py
tests/optim/test_adamw.py
tests/optim/test_optimizing.py
tests/recipe/pretokenize/test_pretok_checkpoint_manager.py
tests/recipe/pretokenize/test_processor_and_recipe.py
tests/recipe/pretokenize/test_sharder.py
tests/recipe/pretokenize/test_source.py