Source code for jeevesagent.governance.budget

"""Token / call / cost budgets.

:class:`StandardBudget` enforces hard limits on tokens, cost, and
wall clock; emits a soft warning at a configurable threshold.
:class:`NoBudget` is the always-allow stub used when the user has
opted out of governance entirely.
"""

from __future__ import annotations

from dataclasses import dataclass
from datetime import UTC, datetime, timedelta

import anyio

from ..core.types import BudgetStatus


[docs] class NoBudget: """Never blocks, never warns."""
[docs] async def allows_step(self) -> BudgetStatus: return BudgetStatus.ok_()
[docs] async def consume( self, *, tokens_in: int, tokens_out: int, cost_usd: float, ) -> None: return None
[docs] @dataclass(slots=True) class BudgetConfig: max_tokens: int | None = None max_input_tokens: int | None = None max_output_tokens: int | None = None max_cost_usd: float | None = None max_wall_clock: timedelta | None = None soft_warning_at: float = 0.8 # 80% triggers a warning
[docs] class StandardBudget: """Hard-limited, thread-safe budget tracker.""" def __init__(self, cfg: BudgetConfig | None = None) -> None: self._cfg = cfg or BudgetConfig() self._tokens_in = 0 self._tokens_out = 0 self._cost = 0.0 self._started_at = datetime.now(UTC) self._lock = anyio.Lock()
[docs] async def allows_step(self) -> BudgetStatus: async with self._lock: blocked = self._first_block_reason() if blocked is not None: return BudgetStatus.blocked_(blocked) warn = self._first_warning_reason() if warn is not None: return BudgetStatus.warn_(warn) return BudgetStatus.ok_()
[docs] async def consume( self, *, tokens_in: int, tokens_out: int, cost_usd: float, ) -> None: async with self._lock: self._tokens_in += tokens_in self._tokens_out += tokens_out self._cost += cost_usd
# ---- helpers --------------------------------------------------------- def _total_tokens(self) -> int: return self._tokens_in + self._tokens_out def _elapsed(self) -> timedelta: return datetime.now(UTC) - self._started_at def _first_block_reason(self) -> str | None: c = self._cfg if c.max_tokens is not None and self._total_tokens() >= c.max_tokens: return "max_tokens" if c.max_input_tokens is not None and self._tokens_in >= c.max_input_tokens: return "max_input_tokens" if c.max_output_tokens is not None and self._tokens_out >= c.max_output_tokens: return "max_output_tokens" if c.max_cost_usd is not None and self._cost >= c.max_cost_usd: return "max_cost_usd" if c.max_wall_clock is not None and self._elapsed() >= c.max_wall_clock: return "max_wall_clock" return None def _first_warning_reason(self) -> str | None: c = self._cfg threshold = c.soft_warning_at if c.max_tokens is not None and self._total_tokens() >= c.max_tokens * threshold: return f"tokens at {self._total_tokens() / c.max_tokens:.0%}" if c.max_cost_usd is not None and self._cost >= c.max_cost_usd * threshold: return f"cost at {self._cost / c.max_cost_usd:.0%}" return None