Source code for scitex_agent_container.config._types

"""Dataclass definitions for agent configuration."""

from __future__ import annotations

from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict


[docs] @dataclass class ContainerSpec: runtime: str = "none" # none | docker | apptainer image: str = "scitex-agent-container:latest" volumes: list[str] = field(default_factory=list) network: str = "host" # Opt-in auto-mount of the host's ``~/.claude`` directory at # ``/home/agent/.claude:ro`` inside the container. Default False: the # container is the isolation boundary, and auto-mounting leaks host # identity/skills/MCP/memory into every agent — surprising default. # Set ``mount_host_claude: true`` in the YAML only when the agent # actually needs host-agent identity/memory/skills from ``~/.claude``. mount_host_claude: bool = False
[docs] @dataclass class ClaudeSpec: channels: list[str] = field(default_factory=list) flags: list[str] = field(default_factory=list) # Session restart strategy. One of: # continue-or-new try --continue, fall back to a fresh launch if no prior session (default) # continue always pass --continue (fails if no prior session exists) # new never pass --continue # resume pass --resume <resume_id> (explicit session ID) session: str = "continue-or-new" # Only resume if the most recent session jsonl is newer than this many minutes. # None = no age check (always resume if session exists). continue_max_age_minutes: int | None = None # Explicit session ID to pass to --resume. Only used when session="resume". resume_id: str = "" auto_accept: bool = True
[docs] @dataclass class HealthSpec: enabled: bool = False interval: int = 30 timeout: int = 5 method: str = "multiplexer-alive"
# Parsed for backward compat but not interpreted by runtime. # Watchdog lifecycle is managed externally via hooks.
[docs] @dataclass class WatchdogSpec: enabled: bool = False interval: float = 1.5 resp_y_n: str = "1" resp_y_y_n: str = "2" resp_waiting: str = "/speak-and-call"
# F-CS3 — autonomous drive-until-done. # # claude-session runners do ONE turn and idle by default; multi-turn # tasks have to wrap externally with a2a peer post-turn loops, and # every project ends up rewriting that scaffolding. The autonomous # block lets the runner natively: # # 1. Watch each assistant turn for a text match (``drive_until``); # hitting it exits the runner with code 0. # 2. After ``idle_kick_after_s`` of no tool activity AND no match, # post ``kick_text`` so the conversation keeps moving. # 3. Cap at ``max_turns`` to prevent runaway loops. # # Phase 1 (this dataclass + parser + validator) lands the schema so # yamls can author the contract today; the runner-side enforcement # (consume these fields in _runners.claude_session) lands in phase 2. # An ``enabled`` row authored under the schema before phase 2 ships # is harmless — the runner just ignores it for now. # F-CS18 — apptainer-specific extension hook. # # Apptainer reads OCI images natively (`apptainer build sif docker://...`), # so for the no-extras case spec.image alone is enough — sac just # `apptainer build`s the SIF and runs it. For HPC-specific layering # (extra pip packages, system libs, env vars), the operator can either: # # * declare `spec.apptainer.post` — sac synthesises a `.def` with # `Bootstrap: docker` + `%post` + `%environment` and builds from it. # * declare `spec.apptainer.def_file` — sac runs `apptainer build` # against the operator's hand-written `.def` (full control). # # All fields are optional; an `apptainer:` block with no fields set is # equivalent to none at all. @dataclass class ApptainerSpec: """Apptainer-specific image-build extensions (F-CS18).""" post: str = "" """Shell snippet run inside the SIF build (apptainer's `%post`). Lines are concatenated verbatim. Empty = no extension.""" environment: dict = field(default_factory=dict) """Env vars baked into the SIF (apptainer's `%environment`). Same shape as ``spec.env`` — KEY: VALUE pairs.""" def_file: str = "" """Path to a hand-authored ``.def`` file (apptainer's native build language). Mutually exclusive with `post`/`environment`: when set, sac uses this file verbatim and ignores `post`.""" nv: bool = False """Forward host NVIDIA driver/libs into the container (apptainer's ``--nv``). Required for CUDA workloads on GPU nodes; harmless on CPU-only hosts but only set when needed.""" rocm: bool = False """Forward host AMD ROCm libs (apptainer's ``--rocm``). Mutually exclusive with ``nv`` in practice (no host has both).""" @dataclass class AutonomousSpec: enabled: bool = False drive_until: str = "DONE" max_turns: int = 50 idle_kick_after_s: int = 120 kick_text: str = "Continue. Print DONE when finished."
[docs] @dataclass class RestartSpec: policy: str = "never" # never | on-failure | always max_retries: int = 3 backoff_initial: int = 30 backoff_max: int = 300 backoff_multiplier: int = 2
# Parsed for backward compat but not interpreted by runtime. # Telegram setup is managed externally via hooks.
[docs] @dataclass class TelegramSpec: bot_token_env: str = "SCITEX_AGENT_CONTAINER_TELEGRAM_BOT_TOKEN" allowed_users: list[str] = field(default_factory=list) auto_connect: bool = True greeting: str = ""
[docs] @dataclass class OrochiSpec: enabled: bool = False hosts: list[str] = field(default_factory=list) port: int = 8559 token_env: str = "SCITEX_OROCHI_TOKEN" channels: list[str] = field(default_factory=list) heartbeat_interval: int = 60
[docs] @dataclass class RemoteSpec: # Chain-based remote: list of SSH config aliases (new format). # Populated when spec.remote is a str or list[str]. # Empty when using legacy dict format. hops: list = field(default_factory=list) host: str = "" # SSH host (hostname or IP) user: str = "" # SSH user key: str = "" # Path to SSH key (optional) port: int = 22 # SSH port timeout: int = 60 # SSH command timeout in seconds login_shell: bool = True # Use bash -l -c (needed for PATH on most hosts) no_preflight: bool = False # Skip preflight checks (HPC with module loads) @property def is_remote(self) -> bool: """Return True if this agent should be deployed via SSH.""" return bool(self.hops or self.host)
[docs] @dataclass class ContextManagementConfig: """Context-lifecycle policy for an agent. Defaults mirror ``strategy="noop"`` so absence of the ``context_management`` block preserves existing behavior (sensor disabled). """ trigger_at_percent: float = 70.0 strategy: str = "noop" # "compact" | "restart" | "noop" warn_before_n_checks: int = 0 check_interval_seconds: int = 300 state_file: str = "~/.scitex/agent-container/state/<agent>.json" @property def enabled(self) -> bool: return self.strategy != "noop"
[docs] @dataclass class SkillsSpec: required: list[str] = field(default_factory=list) # Auto-loaded at startup available: list[str] = field(default_factory=list) # Available but not auto-loaded # How sac materializes the skill list into the agent's CLAUDE.md: # "at-import" — resolve each name to file paths and emit `@<path>` lines # so Claude Code inlines the content at session start # (default — eager loading per Anthropic @-import). # "block" — emit a ```skills <name>``` block (legacy lazy form). injection_mode: str = "at-import" # Strategies used to resolve a skill name → file paths in at-import mode. # Each entry runs independently; results are unioned + deduped. # "skill-id" — Anthropic-canonical: walk skill roots, for each # ``<dir>/SKILL.md`` resolve identity as # ``frontmatter.name`` (if set) ELSE ``<dir>.name``. # Match if identity equals the requested value. # See https://docs.claude.com/en/docs/claude-code/skills. # "tag" — files where frontmatter ``tags:`` contains the value # (orchestration extension; not in Anthropic spec but # used by ywatanabe ``tags-expand`` pattern). # "filename" — files whose basename (without ``.md``) matches # (opt-in; broader than ``skill-id``, can over-match). match_by: list[str] = field(default_factory=lambda: ["skill-id", "tag"]) # Comparison style for ``match_by`` strategies. # "exact" — value == candidate (default) # "partial" — value substring of candidate (case-sensitive) match_style: str = "exact"
[docs] @dataclass class HostsSpec: """Where an agent should run, in either singleton or multi-instance form. Mutually exclusive — exactly one of ``host`` or ``hosts`` may be set: * ``host`` (singular) — exactly one instance runs: - empty / absent: local singleton (runs wherever sac is invoked) - string: pinned to that host - list: priority order; first available host wins (fallback chain) * ``hosts`` (plural) — multiple instances run, one per host: - "all": one per fleet host (replaces the old per-host mode) - list of host names: one per listed host (subset) Validator (in ``_validation.py``) enforces mutual exclusion + types. Loader composes effective ids: ``hosts`` triggers the ``<name>-<HOST>`` suffix; ``host`` keeps the bare name. """ host: str | list[str] = "" hosts: str | list[str] = field(default_factory=list)
[docs] @dataclass class SchedulingSpec: """Fleet-wide scheduling policy for an agent (shared-host layout). ``mode`` controls effective-id composition and launch-skip behavior: * ``per-host`` (default): agent is started on every host that runs ``sac agent start <name>``; the effective id is ``<metadata.name>-<HOST>`` unless the name already ends with ``-<HOST>``. * ``singleton``: exactly one instance fleet-wide. The effective id stays as the bare ``<metadata.name>``. Only launched on ``preferred-host``; on other hosts the launch is a no-op. ``fallback-hosts`` is recorded for observability but not acted on automatically — manual failover today. """ mode: str = "per-host" preferred_host: str = "" fallback_hosts: list[str] = field(default_factory=list)
[docs] @dataclass class ListenPort: """Declaration of a port/socket an external tool binds on behalf of an agent. The container NEVER binds these — it just validates the shape and echoes them in ``status --json`` so orchestrators can see what sidecars are expected to exist. ``owner`` is free-form (e.g. ``"orochi"``) to identify the plugin that actually listens. """ port: int = 0 proto: str = "tcp" # tcp | udp | unix path: str = "" # unix-socket path (when proto == "unix") name: str = "" owner: str = ""
[docs] @dataclass class HookSpec: """All hook points supported by the container. Each entry is a list of opaque commands — shell strings or http(s) URLs. The container executes them fire-and-forget; errors are logged but never raised to the caller. Absent keys default to empty lists (feature disabled). """ pre_start: list[str] = field(default_factory=list) post_start: list[str] = field(default_factory=list) pre_stop: list[str] = field(default_factory=list) post_stop: list[str] = field(default_factory=list) on_compact: list[str] = field(default_factory=list) on_restart: list[str] = field(default_factory=list) on_diff: list[str] = field(default_factory=list)
[docs] def counts(self) -> dict[str, int]: return { "pre_start": len(self.pre_start), "post_start": len(self.post_start), "pre_stop": len(self.pre_stop), "post_stop": len(self.post_stop), "on_compact": len(self.on_compact), "on_restart": len(self.on_restart), "on_diff": len(self.on_diff), }
[docs] @dataclass class StartupCommand: delay: int = 0 # seconds after startup command: str = ""
[docs] @dataclass class ReadyPattern: """A single regex the pane content must match for the agent to be ready.""" regex: str = ""
[docs] @dataclass class StartupSpec: """Opt-in ready-state gate for startup commands (todo#291). When ``ready_patterns`` is empty, legacy fire-and-hope behavior is preserved. Otherwise ``agent_start`` polls the tmux pane content and only dispatches ``commands`` once all patterns match against the tail of the capture AND the pane has been byte-identical for ``ready_idle_ticks`` consecutive polls. """ ready_patterns: list[ReadyPattern] = field(default_factory=list) ready_idle_ticks: int = 3 ready_poll_interval_seconds: float = 0.5 ready_timeout_seconds: float = 60.0 # "capture_and_fail" | "capture_and_proceed" on_timeout: str = "capture_and_proceed" commands: list[StartupCommand] = field(default_factory=list)
[docs] @dataclass class AgentConfig: """Parsed agent configuration from a YAML definition file.""" name: str runtime: str = "claude-code" # F-CS16 phase 2a — top-level fields that flatten the old # spec.container.{image, dockerfile} block. Empty string means # "use the default" (resolved by phase 2d's auto-build path # against ContainerSpec.image / containers/Dockerfile.<target>). image: str = "" dockerfile: str = "" model: str = "sonnet" workdir: str = "~/proj" python_venv: str = "" # resolved venv path (post _resolve_python_venv) env: dict[str, str] = field(default_factory=dict) env_files: list[str] = field( default_factory=list ) # .env file paths (workspace-relative ok) screen_name: str = "" labels: dict[str, str] = field(default_factory=dict) container: ContainerSpec = field(default_factory=ContainerSpec) claude: ClaudeSpec = field(default_factory=ClaudeSpec) health: HealthSpec = field(default_factory=HealthSpec) watchdog: WatchdogSpec = field(default_factory=WatchdogSpec) restart: RestartSpec = field(default_factory=RestartSpec) autonomous: AutonomousSpec = field(default_factory=AutonomousSpec) apptainer: ApptainerSpec = field(default_factory=ApptainerSpec) hooks: dict[str, list[str]] = field(default_factory=dict) listen: list[ListenPort] = field(default_factory=list) extensions: Dict[str, Any] = field(default_factory=dict) telegram: TelegramSpec = field(default_factory=TelegramSpec) remote: RemoteSpec = field(default_factory=RemoteSpec) skills: SkillsSpec = field(default_factory=SkillsSpec) context_management: ContextManagementConfig = field( default_factory=ContextManagementConfig ) startup_commands: list[StartupCommand] = field(default_factory=list) startup: "StartupSpec" = field(default_factory=lambda: StartupSpec()) mcp_servers: dict[str, dict] = field(default_factory=dict) multiplexer: str = "tmux" # "tmux" (default) or "screen" hosts_spec: HostsSpec = field(default_factory=HostsSpec) scheduling: SchedulingSpec = field(default_factory=SchedulingSpec) orochi: OrochiSpec = field(default_factory=OrochiSpec) config_path: str = "" def __post_init__(self) -> None: if not self.screen_name: self.screen_name = f"cld-{self.name}" @property def expanded_workdir(self) -> str: return str(Path(self.workdir).expanduser())