LICENSE
README.md
pyproject.toml
setup.py
src/aligntune/__init__.py
src/aligntune/__main__.py
src/aligntune/_fallbacks.py
src/aligntune/_imports.py
src/aligntune/main.py
src/aligntune/py.typed
src/aligntune.egg-info/PKG-INFO
src/aligntune.egg-info/SOURCES.txt
src/aligntune.egg-info/dependency_links.txt
src/aligntune.egg-info/entry_points.txt
src/aligntune.egg-info/requires.txt
src/aligntune.egg-info/top_level.txt
src/aligntune/backends/__init__.py
src/aligntune/backends/trl/__init__.py
src/aligntune/backends/trl/eval/__init__.py
src/aligntune/backends/trl/rewards/__init__.py
src/aligntune/backends/trl/rewards/training.py
src/aligntune/backends/trl/rl/__init__.py
src/aligntune/backends/trl/rl/counterfact_grpo/__init__.py
src/aligntune/backends/trl/rl/counterfact_grpo/counterfact_grpo.py
src/aligntune/backends/trl/rl/counterfact_grpo/custom_trainer.py
src/aligntune/backends/trl/rl/dapo/__init__.py
src/aligntune/backends/trl/rl/dapo/dapo.py
src/aligntune/backends/trl/rl/dpo/__init__.py
src/aligntune/backends/trl/rl/dpo/dpo.py
src/aligntune/backends/trl/rl/dr_grpo/__init__.py
src/aligntune/backends/trl/rl/dr_grpo/drgrpo.py
src/aligntune/backends/trl/rl/gbmpo/__init__.py
src/aligntune/backends/trl/rl/gbmpo/gbmpo.py
src/aligntune/backends/trl/rl/gbmpo/gbmpo_trainer.py
src/aligntune/backends/trl/rl/grpo/__init__.py
src/aligntune/backends/trl/rl/grpo/grpo.py
src/aligntune/backends/trl/rl/gspo/__init__.py
src/aligntune/backends/trl/rl/gspo/gspo.py
src/aligntune/backends/trl/rl/meta_es/es_utils.py
src/aligntune/backends/trl/rl/meta_es/logging_config.py
src/aligntune/backends/trl/rl/meta_es/meta_es_trainer.py
src/aligntune/backends/trl/rl/meta_es/meta_es_trainer_utils.py
src/aligntune/backends/trl/rl/meta_es/neural_mirror_grpo.py
src/aligntune/backends/trl/rl/meta_es/train_nmdrgrpo_code.py
src/aligntune/backends/trl/rl/meta_es/train_nmdrgrpo_es_math.py
src/aligntune/backends/trl/rl/meta_es/vllm_evaluator.py
src/aligntune/backends/trl/rl/meta_es/vllm_evaluator_math.py
src/aligntune/backends/trl/rl/neural_mirror_grpo/NMGrpo.py
src/aligntune/backends/trl/rl/neural_mirror_grpo/neural_mirror_grpo.py
src/aligntune/backends/trl/rl/pace/__init__.py
src/aligntune/backends/trl/rl/pace/baseline.py
src/aligntune/backends/trl/rl/pace/curriculum.py
src/aligntune/backends/trl/rl/pace/pace.py
src/aligntune/backends/trl/rl/ppo/__init__.py
src/aligntune/backends/trl/rl/ppo/ppo.py
src/aligntune/backends/trl/sft/Classification_trainer.py
src/aligntune/backends/trl/sft/__init__.py
src/aligntune/backends/trl/sft/sft.py
src/aligntune/backends/unsloth/__init__.py
src/aligntune/backends/unsloth/eval/__init__.py
src/aligntune/backends/unsloth/rl/__init__.py
src/aligntune/backends/unsloth/rl/counterfact_grpo/__init__.py
src/aligntune/backends/unsloth/rl/counterfact_grpo/counterfact_grpo.py
src/aligntune/backends/unsloth/rl/dapo/__init__.py
src/aligntune/backends/unsloth/rl/dapo/dapo.py
src/aligntune/backends/unsloth/rl/dpo/__init__.py
src/aligntune/backends/unsloth/rl/dpo/dpo.py
src/aligntune/backends/unsloth/rl/dr_grpo/__init__.py
src/aligntune/backends/unsloth/rl/dr_grpo/drgrpo.py
src/aligntune/backends/unsloth/rl/gbmpo/__init__.py
src/aligntune/backends/unsloth/rl/gbmpo/gbmpo.py
src/aligntune/backends/unsloth/rl/gbmpo/gbmpo_trainer.py
src/aligntune/backends/unsloth/rl/grpo/__init__.py
src/aligntune/backends/unsloth/rl/grpo/grpo.py
src/aligntune/backends/unsloth/rl/gspo/__init__.py
src/aligntune/backends/unsloth/rl/gspo/gspo.py
src/aligntune/backends/unsloth/rl/neural_mirror_grpo/NMGrpo.py
src/aligntune/backends/unsloth/rl/neural_mirror_grpo/neural_mirror_grpo.py
src/aligntune/backends/unsloth/rl/pace/__init__.py
src/aligntune/backends/unsloth/rl/pace/pace.py
src/aligntune/backends/unsloth/rl/ppo/__init__.py
src/aligntune/backends/unsloth/rl/ppo/ppo.py
src/aligntune/backends/unsloth/rl/ppo/unsloth_patches.py
src/aligntune/backends/unsloth/sft/__init__.py
src/aligntune/backends/unsloth/sft/sft.py
src/aligntune/cli/__init__.py
src/aligntune/cli/__main__.py
src/aligntune/cli/arg_parser.py
src/aligntune/cli/config_builders.py
src/aligntune/cli/diagnose.py
src/aligntune/cli/finetune.py
src/aligntune/cli/recipes.py
src/aligntune/cli/unified-old.py
src/aligntune/cli/unified.py
src/aligntune/cli/validate.py
src/aligntune/core/backend_factory.py
src/aligntune/core/dataset_adapters.py
src/aligntune/core/optimization.py
src/aligntune/core/precision_handler.py
src/aligntune/core/callbacks/__init__.py
src/aligntune/core/callbacks/base.py
src/aligntune/core/rl/__init__.py
src/aligntune/core/rl/caching.py
src/aligntune/core/rl/config.py
src/aligntune/core/rl/config_loader.py
src/aligntune/core/rl/distributed.py
src/aligntune/core/rl/evaluator.py
src/aligntune/core/rl/function_based_reward_model.py
src/aligntune/core/rl/logging.py
src/aligntune/core/rl/models.py
src/aligntune/core/rl/registries.py
src/aligntune/core/rl/reward_model_wrapper.py
src/aligntune/core/rl/rollout.py
src/aligntune/core/rl/sample_logger.py
src/aligntune/core/rl/trainer_base.py
src/aligntune/core/rl/trainer_factory.py
src/aligntune/core/sft/__init__.py
src/aligntune/core/sft/config.py
src/aligntune/core/sft/config_loader.py
src/aligntune/core/sft/evaluator.py
src/aligntune/core/sft/logging.py
src/aligntune/core/sft/trainer_base.py
src/aligntune/core/sft/trainer_factory.py
src/aligntune/data/__init__.py
src/aligntune/data/full_requirements_test.py
src/aligntune/data/manager.py
src/aligntune/data/processors.py
src/aligntune/data/schemas.py
src/aligntune/data/loaders/__init__.py
src/aligntune/data/loaders/base.py
src/aligntune/data/loaders/csv_loader.py
src/aligntune/data/loaders/directory_loader.py
src/aligntune/data/loaders/hf_loader.py
src/aligntune/data/loaders/json_loader.py
src/aligntune/data/loaders/parquet_loader.py
src/aligntune/data/loaders/resolver.py
src/aligntune/eval/__init__.py
src/aligntune/eval/caching.py
src/aligntune/eval/cli.py
src/aligntune/eval/core.py
src/aligntune/eval/eval_gsm8k.py
src/aligntune/eval/evaluator.py
src/aligntune/eval/full_evaluation_test.py
src/aligntune/eval/lm_eval_integration.py
src/aligntune/eval/registry.py
src/aligntune/eval/rl_evaluator.py
src/aligntune/eval/runner.py
src/aligntune/eval/safe_executor.py
src/aligntune/eval/metrics/__init__.py
src/aligntune/eval/metrics/base.py
src/aligntune/eval/metrics/code.py
src/aligntune/eval/metrics/dpo.py
src/aligntune/eval/metrics/generic.py
src/aligntune/eval/metrics/math.py
src/aligntune/eval/metrics/rl.py
src/aligntune/eval/metrics/text.py
src/aligntune/recipes/__init__.py
src/aligntune/recipes/config.py
src/aligntune/rewards/__init__.py
src/aligntune/rewards/core.py
src/aligntune/rewards/factory.py
src/aligntune/rewards/registry.py
src/aligntune/rewards/training.py
src/aligntune/rl/__init__.py
src/aligntune/rl/core/__init__.py
src/aligntune/scripts/__init__.py
src/aligntune/scripts/precompute_baseline.py
src/aligntune/sft/__init__.py
src/aligntune/sft/core/__init__.py
src/aligntune/utils/__init__.py
src/aligntune/utils/auth.py
src/aligntune/utils/checkpointing.py
src/aligntune/utils/colored_logging.py
src/aligntune/utils/config_extractor.py
src/aligntune/utils/config_utils.py
src/aligntune/utils/device.py
src/aligntune/utils/diagnostics.py
src/aligntune/utils/environment.py
src/aligntune/utils/errors.py
src/aligntune/utils/inference_utils.py
src/aligntune/utils/logging.py
src/aligntune/utils/math_grading.py
src/aligntune/utils/model_loader.py
src/aligntune/utils/validation.py