Metadata-Version: 2.4
Name: skyrl
Version: 0.2.0
Summary: Unified API for training and inference
Requires-Python: >=3.11
Description-Content-Type: text/markdown
License-File: LICENSE
Requires-Dist: datasets>=4.0.0
Requires-Dist: pillow>=11.3.0
Requires-Dist: rich>=14.1.0
Requires-Dist: safetensors>=0.6.2
Requires-Dist: tokenizers>=0.21.2
Requires-Dist: transformers<=5.3.0,>=5.0.0
Requires-Dist: typer>=0.17.4
Requires-Dist: peft
Requires-Dist: hf_transfer
Requires-Dist: cloudpathlib>=0.23.0
Provides-Extra: gpu
Requires-Dist: jax[cuda12]>=0.7.2; sys_platform == "linux" and extra == "gpu"
Provides-Extra: tpu
Requires-Dist: jax[tpu]>=0.7.2; sys_platform == "linux" and extra == "tpu"
Provides-Extra: tinker
Requires-Dist: tinker>=0.3.0; extra == "tinker"
Requires-Dist: fastapi[standard]; extra == "tinker"
Requires-Dist: sqlmodel; extra == "tinker"
Requires-Dist: sqlalchemy[asyncio]; extra == "tinker"
Requires-Dist: aiosqlite; extra == "tinker"
Requires-Dist: asyncpg; extra == "tinker"
Requires-Dist: psycopg2-binary; extra == "tinker"
Provides-Extra: aws
Requires-Dist: cloudpathlib[s3]; extra == "aws"
Provides-Extra: gcp
Requires-Dist: cloudpathlib[gs]; extra == "gcp"
Provides-Extra: azure
Requires-Dist: cloudpathlib[azure]; extra == "azure"
Provides-Extra: jax
Requires-Dist: jax<1.0,>=0.8; extra == "jax"
Requires-Dist: flax>=0.12.2; extra == "jax"
Requires-Dist: optax>=0.2.5; extra == "jax"
Provides-Extra: skyrl-train
Requires-Dist: loguru; extra == "skyrl-train"
Requires-Dist: tqdm; extra == "skyrl-train"
Requires-Dist: ninja; extra == "skyrl-train"
Requires-Dist: tensorboard; extra == "skyrl-train"
Requires-Dist: func_timeout; extra == "skyrl-train"
Requires-Dist: hydra-core==1.3.2; extra == "skyrl-train"
Requires-Dist: accelerate; extra == "skyrl-train"
Requires-Dist: torchdata; extra == "skyrl-train"
Requires-Dist: omegaconf; extra == "skyrl-train"
Requires-Dist: ray==2.51.1; extra == "skyrl-train"
Requires-Dist: peft; extra == "skyrl-train"
Requires-Dist: debugpy==1.8.0; extra == "skyrl-train"
Requires-Dist: hf_transfer; extra == "skyrl-train"
Requires-Dist: wandb; extra == "skyrl-train"
Requires-Dist: datasets>=4.0.0; extra == "skyrl-train"
Requires-Dist: tensordict; extra == "skyrl-train"
Requires-Dist: jaxtyping; extra == "skyrl-train"
Requires-Dist: skyrl-gym==0.3.0; extra == "skyrl-train"
Requires-Dist: flash-attn==2.8.3; sys_platform == "linux" and extra == "skyrl-train"
Requires-Dist: polars; extra == "skyrl-train"
Requires-Dist: s3fs; extra == "skyrl-train"
Requires-Dist: fastapi; extra == "skyrl-train"
Requires-Dist: uvicorn; extra == "skyrl-train"
Requires-Dist: vllm-router; sys_platform == "linux" and extra == "skyrl-train"
Requires-Dist: pybind11; extra == "skyrl-train"
Requires-Dist: setuptools; extra == "skyrl-train"
Requires-Dist: nixl; sys_platform == "linux" and extra == "skyrl-train"
Provides-Extra: fsdp
Requires-Dist: skyrl[skyrl-train]; extra == "fsdp"
Requires-Dist: vllm==0.19.0; sys_platform == "linux" and extra == "fsdp"
Requires-Dist: vllm-router; sys_platform == "linux" and extra == "fsdp"
Requires-Dist: nixl; sys_platform == "linux" and extra == "fsdp"
Requires-Dist: flash-linear-attention; sys_platform == "linux" and extra == "fsdp"
Requires-Dist: causal-conv1d; sys_platform == "linux" and extra == "fsdp"
Requires-Dist: flash-attn==2.8.3; sys_platform == "linux" and extra == "fsdp"
Requires-Dist: torch==2.10.0; sys_platform == "linux" and extra == "fsdp"
Requires-Dist: flashinfer-python==0.6.6; (sys_platform == "linux" and platform_machine == "x86_64") and extra == "fsdp"
Requires-Dist: flashinfer-jit-cache==0.6.6; (sys_platform == "linux" and platform_machine == "x86_64") and extra == "fsdp"
Requires-Dist: torchvision; sys_platform == "linux" and extra == "fsdp"
Provides-Extra: megatron
Requires-Dist: skyrl[skyrl-train]; extra == "megatron"
Requires-Dist: transformer-engine[pytorch]==2.10.0; sys_platform == "linux" and extra == "megatron"
Requires-Dist: flash-attn==2.8.3; sys_platform == "linux" and extra == "megatron"
Requires-Dist: flash-linear-attention; sys_platform == "linux" and extra == "megatron"
Requires-Dist: causal-conv1d; sys_platform == "linux" and extra == "megatron"
Requires-Dist: vllm==0.19.0; sys_platform == "linux" and extra == "megatron"
Requires-Dist: vllm-router; sys_platform == "linux" and extra == "megatron"
Requires-Dist: nixl; sys_platform == "linux" and extra == "megatron"
Requires-Dist: torch==2.10.0; sys_platform == "linux" and extra == "megatron"
Requires-Dist: flashinfer-python==0.6.6; (sys_platform == "linux" and platform_machine == "x86_64") and extra == "megatron"
Requires-Dist: torchvision; sys_platform == "linux" and extra == "megatron"
Requires-Dist: megatron-bridge; (sys_platform == "linux" and python_version >= "3.12") and extra == "megatron"
Requires-Dist: megatron-core; (sys_platform == "linux" and python_version >= "3.12") and extra == "megatron"
Requires-Dist: flashinfer-jit-cache==0.6.6; (sys_platform == "linux" and platform_machine == "x86_64") and extra == "megatron"
Requires-Dist: nvidia-modelopt; sys_platform == "linux" and extra == "megatron"
Provides-Extra: mamba
Requires-Dist: mamba-ssm>=2.3.0; sys_platform == "linux" and extra == "mamba"
Provides-Extra: flashrl
Requires-Dist: skyrl[skyrl-train]; extra == "flashrl"
Requires-Dist: flash-attn==2.8.3; sys_platform == "linux" and extra == "flashrl"
Requires-Dist: torch==2.7.0; sys_platform == "linux" and extra == "flashrl"
Requires-Dist: flashinfer-python; sys_platform == "linux" and extra == "flashrl"
Requires-Dist: torchvision; sys_platform == "linux" and extra == "flashrl"
Provides-Extra: miniswe
Requires-Dist: skyrl[skyrl-train]; extra == "miniswe"
Requires-Dist: mini-swe-agent>=1.12.0; extra == "miniswe"
Requires-Dist: litellm; extra == "miniswe"
Provides-Extra: harbor
Requires-Dist: harbor[daytona,modal]; python_version >= "3.12" and extra == "harbor"
Provides-Extra: dev
Requires-Dist: mkdocs; extra == "dev"
Requires-Dist: mkdocs-material; extra == "dev"
Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "dev"
Requires-Dist: pymdown-extensions>=10.7; extra == "dev"
Requires-Dist: pytest; extra == "dev"
Requires-Dist: pytest-forked; extra == "dev"
Requires-Dist: pytest-asyncio; extra == "dev"
Requires-Dist: pre-commit; extra == "dev"
Requires-Dist: litellm; extra == "dev"
Requires-Dist: torch; extra == "dev"
Requires-Dist: ty; extra == "dev"
Requires-Dist: cloudpathlib[s3]; extra == "dev"
Requires-Dist: alembic; extra == "dev"
Requires-Dist: griffe2md; extra == "dev"
Dynamic: license-file

<div align="center">

# SkyRL: A Modular Full-stack RL Library for LLMs

<p align="center">
| <a href="https://docs.skyrl.ai/docs/"><b>Documentation</b></a> | <a href="https://x.com/NovaSkyAI"><b>Twitter/X</b></a> | <a href="https://huggingface.co/NovaSky-AI"><b>Huggingface</b></a> | <a href="https://join.slack.com/t/skyrl/shared_invite/zt-3f6ncn5b8-QawzK3uks6ka3KWoLwsi5Q"><b>Slack Workspace</b></a> |
</p>

</div>

---

# Overview

> [!IMPORTANT]
> **Note:** SkyRL is undergoing a repo reorganization into the [`skyrl/`](./skyrl) folder, which unifies the skyrl libraries below into a single package. The existing packages below are fully functional but will be migrated to new paths shortly. For full [Tinker API](https://docs.skyrl.ai/docs/tinker/overview) support please use the `skyrl/` folder. See the [Tinker Quickstart docs](https://docs.skyrl.ai/docs/tinker/quickstart) to get started. See issue: https://github.com/NovaSky-AI/SkyRL/issues/1145

SkyRL is a full-stack RL library that provides the following components:

- [skyrl](./skyrl): Our new unified library for RL on your own hardware, with support for the [Tinker API](https://docs.skyrl.ai/docs/tinker/overview). `skyrl` combines our previous work:

  * [`skyrl-train`](./skyrl-train): A modular, performant training framework for RL.  
  * [`skyrl-tx`](./skyrl-tx): A cross-platform library implementing a backend for the [Tinker API](https://docs.skyrl.ai/docs/tinker/overview), with a unified engine for training and inference.
- [`skyrl-agent`](./skyrl-agent): Our agent layer for training long-horizon, real-world agents. For exact reproduction of [SkyRL-v0](https://novasky-ai.notion.site/skyrl-v0) results, please checkout to commit a0d50c482436af7fac8caffa4533616a78431d66.
- [`skyrl-gym`](./skyrl-gym): Our gymnasium of tool-use tasks, including a library of math, coding, search and SQL environments implemented in the Gymnasium API.


# Getting Started

For a guide on developing with SkyRL, take at look at our [Development Guide](https://docs.skyrl.ai/docs/getting-started/development) docs.

For model training, checkout [`skyrl`](./skyrl) to start using, modifying, or building on top of the SkyRL training stack. See our [quickstart docs](https://docs.skyrl.ai/docs/index) to ramp up!

For building environments, checkout [`skyrl-gym`](./skyrl-gym) to integrate your task in the simple gymnasium interface.

For agentic pipelines, check out [`skyrl-agent`](./skyrl-agent) for our work on optimizing and scaling pipelines for multi-turn tool use LLMs on long-horizon, real-environment tasks.


# News
- **[2026/02/17]** 🎉 SkyRL is officially integrated with Harbor! Train your terminal-use agent! [[Blog](https://novasky-ai.notion.site/skyrl-harbor)]
- **[2026/02/13]** 🎉 SkyRL now implements the Tinker API! Run any training script written in the Tinker API on your local GPUs with SkyRL! [[Blog](https://novasky-ai.notion.site/skyrl-tinker)]
- **[2025/11/26]** 🎉 We released SkyRL-Agent: An agent layer for efficient, multi-turn, long-horizon agent training and evaluation. [[Paper](https://arxiv.org/pdf/2511.16108)]
- **[2025/10/06]** 🎉 We released SkyRL tx: An open implementation of a backend for the Tinker API to run a Tinker-like service on their own hardware. [[Blog](https://novasky-ai.notion.site/skyrl-tx)]
- **[2025/06/26]** 🎉 We released SkyRL-v0.1: A highly-modular, performant RL training framework. [[Blog](https://novasky-ai.notion.site/skyrl-v01)]
- **[2025/06/26]** 🎉 We released SkyRL-Gym: A library of RL environments for LLMs implemented with the Gymnasium API. [[Blog](https://novasky-ai.notion.site/skyrl-v01)]
- **[2025/05/20]** 🎉 We released SkyRL-SQL: a multi-turn RL training pipeline for Text-to-SQL, along with SkyRL-SQL-7B — a model trained on just 653 samples that outperforms both GPT-4o and o4-mini!
- **[2025/05/06]** 🎉 We released SkyRL-v0: our open RL training pipeline for multi-turn tool use LLMs, optimized for long-horizon, real-environment tasks like SWE-Bench!

# Links
- 📜 [Train Your Terminal-Use Agent with SkyRL + Harbor](https://novasky-ai.notion.site/skyrl-harbor)
- 📜 [SkyRL Brings Tinker to Your GPUs](https://novasky-ai.notion.site/skyrl-tinker)
- 📜 [Fully Async RL with In-Flight Weight Updates in SkyRL](https://docs.skyrl.ai/docs/tutorials/fully_async)
- 📜 [Open Recipes on SkyRL](https://docs.skyrl.ai/docs/recipes/overview)
- 📜 [SkyRL-Agent Paper](https://arxiv.org/pdf/2511.16108)
- 📜 [On-Policy Distillation on SkyRL Blog Post](https://novasky-ai.notion.site/on-policy-distillation)
- 📜 [Search-R1 on SkyRL Blog Post](https://novasky-ai.notion.site/skyrl-searchr1)
- 📜 [SkyRL-v0.1 Blog Post](https://novasky-ai.notion.site/skyrl-v01)
- 📜 [SkyRL-SQL Blog Post](https://novasky-ai.notion.site/skyrl-sql)
- 📜 [SkyRL-v0 Blog Post](https://novasky-ai.notion.site/skyrl-v0)

# Projects using SkyRL
- [Biomni-R0](https://biomni.stanford.edu/blog/biomni-r0-technical-report/): Using RL to Hill-Climb Biomedical Reasoning Agents to Expert-Level ![GitHub Repo stars](https://img.shields.io/github/stars/snap-stanford/Biomni)
- [How to Train Your Advisor](https://github.com/az1326/advisor-models): Steering Black-Box LLMs with Advisor Models ![GitHub Repo stars](https://img.shields.io/github/stars/az1326/advisor-models)
- [OpenThoughts-Agent](https://github.com/open-thoughts/OpenThoughts-Agent): Data recipes and robust infrastructure for training AI agents ![GitHub Repo stars](https://img.shields.io/github/stars/open-thoughts/OpenThoughts-Agent)
- [Endless Terminals](https://arxiv.org/abs/2601.16443): A fully autonomous pipeline that procedurally generates terminal tasks for RL training with no human annotation needed ![GitHub Repo stars](https://img.shields.io/github/stars/kanishkg/endless-terminals)
- [CodeScout](https://arxiv.org/abs/2603.17829): Open-source SoTA code localization on SWE-Bench via RL ![GitHub Repo stars](https://img.shields.io/github/stars/OpenHands/codescout)

# Acknowledgement

This work is done at [**Berkeley Sky Computing Lab**](https://sky.cs.berkeley.edu/) in collaboration with [**Anyscale**](https://www.anyscale.com/), with generous compute support from [**Anyscale**](https://www.anyscale.com/), [**Databricks**](https://www.databricks.com/), [**NVIDIA**](https://developer.nvidia.com/brev), [**Lambda Labs**](https://lambdalabs.com/service/gpu-cloud?srsltid=AfmBOop5FnmEFTkavVtdZDsLWvHWNg6peXtat-OXJ9MW5GMNsk756PE5), [**AMD**](https://www.amd.com/en), [**AWS**](https://aws.amazon.com/), [**Modal**](https://modal.com/), and [**Daytona**](https://www.daytona.io/).

We adopt many lessons and code from several great projects such as [veRL](https://github.com/volcengine/verl), [OpenRLHF](https://github.com/OpenRLHF/OpenRLHF), [Search-R1](https://github.com/PeterGriffinJin/Search-R1), [OpenReasonerZero](https://github.com/Open-Reasoner-Zero/Open-Reasoner-Zero), and [NeMo-RL](https://github.com/NVIDIA-NeMo/RL). We appreciate each of these teams and their contributions to open-source research!


# Citation

If you find the work in this repository helpful, please consider citing:

```bibtex
@misc{cao2025skyrl,
  title     = {SkyRL-v0: Train Real-World Long-Horizon Agents via Reinforcement Learning},
  author    = {Shiyi Cao and Sumanth Hegde and Dacheng Li and Tyler Griggs and Shu Liu and Eric Tang and Jiayi Pan and Xingyao Wang and Akshay Malik and Graham Neubig and Kourosh Hakhamaneshi and Richard Liaw and Philipp Moritz and Matei Zaharia and Joseph E. Gonzalez and Ion Stoica},
  year      = {2025},
}
```

```bibtex
@misc{liu2025skyrlsql,
      title={SkyRL-SQL: Matching GPT-4o and o4-mini on Text2SQL with Multi-Turn RL},
      author={Shu Liu and Sumanth Hegde and Shiyi Cao and Alan Zhu and Dacheng Li and Tyler Griggs and Eric Tang and Akshay Malik and Kourosh Hakhamaneshi and Richard Liaw and Philipp Moritz and Matei Zaharia and Joseph E. Gonzalez and Ion Stoica},
      year={2025},
}
```

```bibtex
@misc{griggs2025skrylv01,
      title={Evolving SkyRL into a Highly-Modular RL Framework},
      author={Tyler Griggs and Sumanth Hegde and Eric Tang and Shu Liu and Shiyi Cao and Dacheng Li and Charlie Ruan and Philipp Moritz and Kourosh Hakhamaneshi and Richard Liaw and Akshay Malik and Matei Zaharia and Joseph E. Gonzalez and Ion Stoica},
      year={2025},
      note={Notion Blog}
}
```
