Source code for jeevesagent.model.echo

"""A trivial model that echoes the last user message back, in chunks.

Useful for proving the loop end-to-end without API keys or network. It
emits one ``text`` chunk per word followed by a single ``finish`` chunk
with a synthetic usage record.
"""

from __future__ import annotations

from collections.abc import AsyncIterator

import anyio

from ..core.types import Message, ModelChunk, Role, ToolCall, ToolDef, Usage


[docs] class EchoModel: """Echo-style model for tests and demos.""" name: str = "echo" def __init__( self, *, prefix: str = "Echo: ", chunk_delay_s: float = 0.0, cost_per_token: float = 0.0, ) -> None: self._prefix = prefix self._chunk_delay = chunk_delay_s self._cost_per_token = cost_per_token
[docs] async def complete( self, messages: list[Message], *, tools: list[ToolDef] | None = None, temperature: float = 1.0, max_tokens: int | None = None, ) -> tuple[str, list[ToolCall], Usage, str]: """Single-shot echo. Returns the echoed user prompt as one string with synthetic usage. No per-token chunking — used by the non-streaming hot path (``agent.run()``).""" last_user = next( (m for m in reversed(messages) if m.role == Role.USER), None, ) text = f"{self._prefix}{last_user.content if last_user else ''}" input_tokens = sum(len(m.content.split()) for m in messages) output_tokens = max(1, len(text.split())) cost = (input_tokens + output_tokens) * self._cost_per_token usage = Usage( input_tokens=input_tokens, output_tokens=output_tokens, cost_usd=cost, ) return text, [], usage, "stop"
[docs] async def stream( self, messages: list[Message], *, tools: list[ToolDef] | None = None, temperature: float = 1.0, max_tokens: int | None = None, ) -> AsyncIterator[ModelChunk]: last_user = next( (m for m in reversed(messages) if m.role == Role.USER), None, ) text = f"{self._prefix}{last_user.content if last_user else ''}" words = text.split(" ") or [text] emitted = 0 for i, word in enumerate(words): piece = word if i == 0 else " " + word if self._chunk_delay > 0: await anyio.sleep(self._chunk_delay) yield ModelChunk(kind="text", text=piece) emitted += 1 # Cheap-and-cheerful usage estimate: 1 token per whitespace-separated # word in the input, same in the output. input_tokens = sum(len(m.content.split()) for m in messages) output_tokens = emitted cost = (input_tokens + output_tokens) * self._cost_per_token yield ModelChunk( kind="finish", finish_reason="stop", usage=Usage( input_tokens=input_tokens, output_tokens=output_tokens, cost_usd=cost, ), )