LICENSE
README.md
pyproject.toml
assets/version.txt
torchtitan/__init__.py
torchtitan/train.py
torchtitan.egg-info/PKG-INFO
torchtitan.egg-info/SOURCES.txt
torchtitan.egg-info/dependency_links.txt
torchtitan.egg-info/requires.txt
torchtitan.egg-info/top_level.txt
torchtitan/components/checkpoint.py
torchtitan/components/dataloader.py
torchtitan/components/loss.py
torchtitan/components/lr_scheduler.py
torchtitan/components/metrics.py
torchtitan/components/optimizer.py
torchtitan/components/tokenizer.py
torchtitan/components/validate.py
torchtitan/components/ft/__init__.py
torchtitan/components/ft/manager.py
torchtitan/components/ft/config/__init__.py
torchtitan/components/ft/config/job_config.py
torchtitan/components/ft/diloco/__init__.py
torchtitan/components/ft/diloco/protocol.py
torchtitan/components/ft/diloco/utils.py
torchtitan/components/quantization/__init__.py
torchtitan/components/quantization/float8.py
torchtitan/components/quantization/mx.py
torchtitan/components/quantization/utils.py
torchtitan/config/__init__.py
torchtitan/config/job_config.py
torchtitan/config/manager.py
torchtitan/distributed/__init__.py
torchtitan/distributed/activation_checkpoint.py
torchtitan/distributed/context_parallel.py
torchtitan/distributed/dual_pipe_v.py
torchtitan/distributed/expert_parallel.py
torchtitan/distributed/parallel_dims.py
torchtitan/distributed/pipeline_parallel.py
torchtitan/distributed/tensor_parallel.py
torchtitan/distributed/utils.py
torchtitan/distributed/deepep/__init__.py
torchtitan/distributed/deepep/deepep.py
torchtitan/experiments/__init__.py
torchtitan/experiments/autoparallel/job_config.py
torchtitan/experiments/autoparallel/deepseek_v3/__init__.py
torchtitan/experiments/autoparallel/deepseek_v3/parallelize_deepseekv3.py
torchtitan/experiments/autoparallel/llama3/__init__.py
torchtitan/experiments/autoparallel/llama3/parallelize_llama.py
torchtitan/experiments/autoparallel/local_map_deepseek_v3/__init__.py
torchtitan/experiments/autoparallel/local_map_deepseek_v3/args.py
torchtitan/experiments/autoparallel/local_map_deepseek_v3/model.py
torchtitan/experiments/autoparallel/local_map_deepseek_v3/parallelize_deepseekv3.py
torchtitan/experiments/autoparallel/tests/__init__.py
torchtitan/experiments/autoparallel/tests/integration_tests.py
torchtitan/experiments/compiler_toolkit/common_utils.py
torchtitan/experiments/compiler_toolkit/cudagraph.py
torchtitan/experiments/compiler_toolkit/graph_utils.py
torchtitan/experiments/compiler_toolkit/job_config.py
torchtitan/experiments/compiler_toolkit/passes.py
torchtitan/experiments/compiler_toolkit/train.py
torchtitan/experiments/compiler_toolkit/deepseek_v3/__init__.py
torchtitan/experiments/compiler_toolkit/deepseek_v3/parallelize.py
torchtitan/experiments/compiler_toolkit/llama3/__init__.py
torchtitan/experiments/compiler_toolkit/llama3/parallelize.py
torchtitan/experiments/compiler_toolkit/scripts/check_numerics.py
torchtitan/experiments/compiler_toolkit/tests/__init__.py
torchtitan/experiments/compiler_toolkit/tests/integration_tests.py
torchtitan/experiments/compiler_toolkit/tests/numerics_utils.py
torchtitan/experiments/compiler_toolkit/tests/test_numerics.py
torchtitan/experiments/compiler_toolkit/tests/test_passes.py
torchtitan/experiments/forge/__init__.py
torchtitan/experiments/forge/engine.py
torchtitan/experiments/forge/example_train.py
torchtitan/experiments/forge/job_config.py
torchtitan/experiments/forge/train_spec.py
torchtitan/experiments/ft/train.py
torchtitan/experiments/moe_symm_mem_kernels/combine.py
torchtitan/experiments/moe_symm_mem_kernels/dispatch.py
torchtitan/experiments/rl/unified/__init__.py
torchtitan/experiments/rl/unified/infer.py
torchtitan/experiments/rl/unified/simple_rl_multiprocess.py
torchtitan/experiments/rl/unified/actors/generator.py
torchtitan/experiments/rl/unified/actors/trainer.py
torchtitan/experiments/rl/unified/infra/parallelism_utils.py
torchtitan/experiments/rl/unified/infra/parallelize.py
torchtitan/experiments/rl/unified/models/attention.py
torchtitan/experiments/rl/unified/models/utils.py
torchtitan/experiments/rl/unified/models/vllm_wrapper.py
torchtitan/experiments/rl/vllm_compat/__init__.py
torchtitan/experiments/rl/vllm_compat/batch_invariant_backward.py
torchtitan/experiments/rl/vllm_compat/simple_rl.py
torchtitan/experiments/rl/vllm_compat/weights_vllm_compat.py
torchtitan/experiments/rl/vllm_compat/models/__init__.py
torchtitan/experiments/rl/vllm_compat/models/attention.py
torchtitan/experiments/rl/vllm_compat/models/qwen3/__init__.py
torchtitan/experiments/rl/vllm_compat/models/qwen3/model_vllm_compat.py
torchtitan/experiments/rl/vllm_compat/tests/__init__.py
torchtitan/experiments/rl/vllm_compat/tests/test_batch_invariant_backward.py
torchtitan/experiments/rl/vllm_compat/tests/test_exact_determinism.py
torchtitan/experiments/rl/vllm_compat/weights/__init__.py
torchtitan/experiments/rl/vllm_compat/weights/converter.py
torchtitan/experiments/simple_fsdp/backend.py
torchtitan/experiments/simple_fsdp/job_config.py
torchtitan/experiments/simple_fsdp/reshard_after_forward.py
torchtitan/experiments/simple_fsdp/simple_fsdp.py
torchtitan/experiments/simple_fsdp/deepseek_v3/__init__.py
torchtitan/experiments/simple_fsdp/deepseek_v3/model.py
torchtitan/experiments/simple_fsdp/deepseek_v3/parallelize.py
torchtitan/experiments/simple_fsdp/llama3/__init__.py
torchtitan/experiments/simple_fsdp/llama3/model.py
torchtitan/experiments/simple_fsdp/llama3/parallelize.py
torchtitan/experiments/simple_fsdp/tests/__init__.py
torchtitan/experiments/simple_fsdp/tests/integration_tests.py
torchtitan/experiments/simple_fsdp/tests/test_numerics.py
torchtitan/experiments/torchcomms/integration_tests.py
torchtitan/experiments/torchcomms/parallel_dims.py
torchtitan/experiments/torchcomms/train.py
torchtitan/experiments/transformers_modeling_backend/__init__.py
torchtitan/experiments/transformers_modeling_backend/job_config.py
torchtitan/experiments/transformers_modeling_backend/infra/parallelize.py
torchtitan/experiments/transformers_modeling_backend/infra/pipeline.py
torchtitan/experiments/transformers_modeling_backend/model/args.py
torchtitan/experiments/transformers_modeling_backend/model/model.py
torchtitan/experiments/transformers_modeling_backend/tests/integration_tests.py
torchtitan/experiments/vlm/__init__.py
torchtitan/experiments/vlm/job_config.py
torchtitan/experiments/vlm/datasets/mm_collator_nld.py
torchtitan/experiments/vlm/datasets/mm_datasets.py
torchtitan/experiments/vlm/datasets/utils/image.py
torchtitan/experiments/vlm/datasets/utils/packing.py
torchtitan/experiments/vlm/datasets/utils/text.py
torchtitan/experiments/vlm/infra/parallelize.py
torchtitan/experiments/vlm/model/args.py
torchtitan/experiments/vlm/model/model.py
torchtitan/experiments/vlm/model/siglip2.py
torchtitan/experiments/vlm/tests/integration_tests.py
torchtitan/hf_datasets/__init__.py
torchtitan/hf_datasets/text_datasets.py
torchtitan/models/__init__.py
torchtitan/models/attention.py
torchtitan/models/utils.py
torchtitan/models/deepseek_v3/__init__.py
torchtitan/models/deepseek_v3/infra/parallelize.py
torchtitan/models/deepseek_v3/model/args.py
torchtitan/models/deepseek_v3/model/model.py
torchtitan/models/deepseek_v3/model/state_dict_adapter.py
torchtitan/models/flux/__init__.py
torchtitan/models/flux/flux_datasets.py
torchtitan/models/flux/job_config.py
torchtitan/models/flux/tokenizer.py
torchtitan/models/flux/train.py
torchtitan/models/flux/utils.py
torchtitan/models/flux/validate.py
torchtitan/models/flux/inference/infer.py
torchtitan/models/flux/inference/sampling.py
torchtitan/models/flux/infra/parallelize.py
torchtitan/models/flux/model/args.py
torchtitan/models/flux/model/autoencoder.py
torchtitan/models/flux/model/hf_embedder.py
torchtitan/models/flux/model/layers.py
torchtitan/models/flux/model/model.py
torchtitan/models/flux/model/state_dict_adapter.py
torchtitan/models/gpt_oss/__init__.py
torchtitan/models/gpt_oss/infra/expert_parallel.py
torchtitan/models/gpt_oss/infra/parallelize.py
torchtitan/models/gpt_oss/model/args.py
torchtitan/models/gpt_oss/model/model.py
torchtitan/models/gpt_oss/model/moe.py
torchtitan/models/gpt_oss/model/state_dict_adapter.py
torchtitan/models/llama3/__init__.py
torchtitan/models/llama3/infra/parallelize.py
torchtitan/models/llama3/model/args.py
torchtitan/models/llama3/model/model.py
torchtitan/models/llama3/model/state_dict_adapter.py
torchtitan/models/llama3_ft/__init__.py
torchtitan/models/llama4/__init__.py
torchtitan/models/llama4/infra/parallelize.py
torchtitan/models/llama4/model/args.py
torchtitan/models/llama4/model/model.py
torchtitan/models/llama4/model/state_dict_adapter.py
torchtitan/models/moe/__init__.py
torchtitan/models/moe/kernels.py
torchtitan/models/moe/moe.py
torchtitan/models/moe/moe_deepep.py
torchtitan/models/moe/utils.py
torchtitan/models/qwen3/__init__.py
torchtitan/models/qwen3/infra/parallelize.py
torchtitan/models/qwen3/model/args.py
torchtitan/models/qwen3/model/model.py
torchtitan/models/qwen3/model/state_dict_adapter.py
torchtitan/protocols/__init__.py
torchtitan/protocols/model.py
torchtitan/protocols/model_converter.py
torchtitan/protocols/state_dict_adapter.py
torchtitan/protocols/train_spec.py
torchtitan/tools/logging.py
torchtitan/tools/profiling.py
torchtitan/tools/utils.py